mirror of
https://github.com/janet-lang/janet
synced 2025-01-10 15:40:30 +00:00
More work on peg. Disable indexed backrefs and replace substitution
with accumulation.
This commit is contained in:
parent
4b8edef58c
commit
612a245961
3
.gitignore
vendored
3
.gitignore
vendored
@ -12,6 +12,9 @@ janet
|
|||||||
janet-*.tar.gz
|
janet-*.tar.gz
|
||||||
dist
|
dist
|
||||||
|
|
||||||
|
# Local directory for testing
|
||||||
|
local
|
||||||
|
|
||||||
# Emscripten
|
# Emscripten
|
||||||
*.bc
|
*.bc
|
||||||
janet.js
|
janet.js
|
||||||
|
@ -134,15 +134,15 @@ Most captures specials will match the same text as their first argument pattern.
|
|||||||
| ------- | ---------------- |
|
| ------- | ---------------- |
|
||||||
| `(capture patt)` | Captures all of the text in patt if patt matches, If patt contains any captures, then those captures will be pushed to the capture stack before the total text. |
|
| `(capture patt)` | Captures all of the text in patt if patt matches, If patt contains any captures, then those captures will be pushed to the capture stack before the total text. |
|
||||||
| `(<- patt)` | Alias for `(capture patt)` |
|
| `(<- patt)` | Alias for `(capture patt)` |
|
||||||
| `(group patt) ` | Pops all of the captures in patt off of the capture stack and pushes them in an array if patt matches.
|
| `(group patt) ` | Captures an array of all of the captures in patt.
|
||||||
| `(replace patt subst)` | Replaces the captures produced by patt by applying subst to them. If subst is a table or struct, will push `(get subst last-capture)` to the capture stack after removing the old captures. If a subst is a function, will call subst with the captures of patt as arguments and push the result to the capture stack. Otherwise, will push subst literally to the capture stack. |
|
| `(replace patt subst)` | Replaces the captures produced by patt by applying subst to them. If subst is a table or struct, will push `(get subst last-capture)` to the capture stack after removing the old captures. If a subst is a function, will call subst with the captures of patt as arguments and push the result to the capture stack. Otherwise, will push subst literally to the capture stack. |
|
||||||
| `(/ patt subst)` | Alias for `(replace patt subst)` |
|
| `(/ patt subst)` | Alias for `(replace patt subst)` |
|
||||||
| `(constant k)` | Captures a constant value and advances no characters. |
|
| `(constant k)` | Captures a constant value and advances no characters. |
|
||||||
| `(argument n)` | Captures the nth extra argument to the match function and does not advance. |
|
| `(argument n)` | Captures the nth extra argument to the match function and does not advance. |
|
||||||
| `(position)` | Captures the current index into the text and advances no input. |
|
| `(position)` | Captures the current index into the text and advances no input. |
|
||||||
| `($)` | Alias for `(position)`. |
|
| `($)` | Alias for `(position)`. |
|
||||||
| `(substitute patt)` | Replace the text matched by all captures in patt with the capture values. Pushes the substituted text matched by patt to the capture stack. |
|
| `(accumulate patt)` | Capture a string that is the concatenation of all captures in patt. This will try to be efficient and not create intermediate strings if possible. |
|
||||||
| `(% patt)` | Alias for `(substitute patt)`
|
| `(% patt)` | Alias for `(accumulate patt)`
|
||||||
| `(cmt patt fun)` | Invokes fun with all of the captures of patt as arguments (if patt matches). If the result is truthy, then captures the result. The whole expression fails if fun returns false or nil. |
|
| `(cmt patt fun)` | Invokes fun with all of the captures of patt as arguments (if patt matches). If the result is truthy, then captures the result. The whole expression fails if fun returns false or nil. |
|
||||||
| `(backref n)` | Duplicates the nth capture and pushes it to the stack again (0 is the first capture). If n is negative, indexes from the top of the stack (-1 pushes the previously captured value to the stack). If n does not map to a valid stack index then the match fails. |
|
| `(backref n)` | Duplicates the nth capture and pushes it to the stack again (0 is the first capture). If n is negative, indexes from the top of the stack (-1 pushes the previously captured value to the stack). If n does not map to a valid stack index then the match fails. |
|
||||||
| `(error patt)` | Throws a Janet error if patt matches. The error thrown will be the last capture ofpatt, or a generic error if patt produces no captures. |
|
| `(error patt)` | Throws a Janet error if patt matches. The error thrown will be the last capture ofpatt, or a generic error if patt produces no captures. |
|
||||||
|
247
src/core/peg.c
247
src/core/peg.c
@ -44,11 +44,10 @@ typedef enum {
|
|||||||
RULE_NOT, /* [rule] */
|
RULE_NOT, /* [rule] */
|
||||||
RULE_BETWEEN, /* [lo, hi, rule] */
|
RULE_BETWEEN, /* [lo, hi, rule] */
|
||||||
RULE_CAPTURE, /* [rule] */
|
RULE_CAPTURE, /* [rule] */
|
||||||
RULE_POSITION, /* [] */
|
RULE_POSITION, /* [tag] */
|
||||||
RULE_ARGUMENT, /* [argument-index] */
|
RULE_ARGUMENT, /* [argument-index] */
|
||||||
RULE_REPINDEX, /* [capture-index] */
|
|
||||||
RULE_CONSTANT, /* [constant] */
|
RULE_CONSTANT, /* [constant] */
|
||||||
RULE_SUBSTITUTE, /* [rule] */
|
RULE_ACCUMULATE, /* [rule] */
|
||||||
RULE_GROUP, /* [rule] */
|
RULE_GROUP, /* [rule] */
|
||||||
RULE_REPLACE, /* [rule, constant] */
|
RULE_REPLACE, /* [rule, constant] */
|
||||||
RULE_MATCHTIME, /* [rule, constant] */
|
RULE_MATCHTIME, /* [rule, constant] */
|
||||||
@ -59,7 +58,6 @@ typedef enum {
|
|||||||
typedef struct {
|
typedef struct {
|
||||||
const uint8_t *text_start;
|
const uint8_t *text_start;
|
||||||
const uint8_t *text_end;
|
const uint8_t *text_end;
|
||||||
const uint8_t *subst_end;
|
|
||||||
const uint32_t *bytecode;
|
const uint32_t *bytecode;
|
||||||
const Janet *constants;
|
const Janet *constants;
|
||||||
JanetArray *captures;
|
JanetArray *captures;
|
||||||
@ -69,7 +67,7 @@ typedef struct {
|
|||||||
int32_t depth;
|
int32_t depth;
|
||||||
enum {
|
enum {
|
||||||
PEG_MODE_NORMAL,
|
PEG_MODE_NORMAL,
|
||||||
PEG_MODE_SUBSTITUTE,
|
PEG_MODE_ACCUMULATE,
|
||||||
PEG_MODE_NOCAPTURE
|
PEG_MODE_NOCAPTURE
|
||||||
} mode;
|
} mode;
|
||||||
} PegState;
|
} PegState;
|
||||||
@ -78,7 +76,6 @@ typedef struct {
|
|||||||
* to save state at branches, and then reload
|
* to save state at branches, and then reload
|
||||||
* if one branch fails and try a new branch. */
|
* if one branch fails and try a new branch. */
|
||||||
typedef struct {
|
typedef struct {
|
||||||
const uint8_t *subst_end;
|
|
||||||
int32_t cap;
|
int32_t cap;
|
||||||
int32_t scratch;
|
int32_t scratch;
|
||||||
} CapState;
|
} CapState;
|
||||||
@ -86,7 +83,6 @@ typedef struct {
|
|||||||
/* Save the current capture state */
|
/* Save the current capture state */
|
||||||
static CapState cap_save(PegState *s) {
|
static CapState cap_save(PegState *s) {
|
||||||
CapState cs;
|
CapState cs;
|
||||||
cs.subst_end = s->subst_end;
|
|
||||||
cs.scratch = s->scratch->count;
|
cs.scratch = s->scratch->count;
|
||||||
cs.cap = s->captures->count;
|
cs.cap = s->captures->count;
|
||||||
return cs;
|
return cs;
|
||||||
@ -94,24 +90,16 @@ static CapState cap_save(PegState *s) {
|
|||||||
|
|
||||||
/* Load a saved capture state in the case of failure */
|
/* Load a saved capture state in the case of failure */
|
||||||
static void cap_load(PegState *s, CapState cs) {
|
static void cap_load(PegState *s, CapState cs) {
|
||||||
s->subst_end = cs.subst_end;
|
|
||||||
s->scratch->count = cs.scratch;
|
s->scratch->count = cs.scratch;
|
||||||
s->captures->count = cs.cap;
|
s->captures->count = cs.cap;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Add a capture */
|
/* Add a capture */
|
||||||
static void pushcap(PegState *s,
|
static void pushcap(PegState *s, Janet capture) {
|
||||||
Janet capture,
|
if (s->mode == PEG_MODE_ACCUMULATE)
|
||||||
const uint8_t *text,
|
|
||||||
const uint8_t *result) {
|
|
||||||
if (s->mode == PEG_MODE_SUBSTITUTE) {
|
|
||||||
janet_buffer_push_bytes(s->scratch, s->subst_end,
|
|
||||||
(int32_t)(text - s->subst_end));
|
|
||||||
janet_to_string_b(s->scratch, capture);
|
janet_to_string_b(s->scratch, capture);
|
||||||
s->subst_end = result;
|
if (s->mode == PEG_MODE_NORMAL)
|
||||||
} else if (s->mode == PEG_MODE_NORMAL) {
|
|
||||||
janet_array_push(s->captures, capture);
|
janet_array_push(s->captures, capture);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Prevent stack overflow */
|
/* Prevent stack overflow */
|
||||||
@ -120,7 +108,14 @@ static void pushcap(PegState *s,
|
|||||||
} while (0)
|
} while (0)
|
||||||
#define up1(s) ((s)->depth++)
|
#define up1(s) ((s)->depth++)
|
||||||
|
|
||||||
/* Evaluate a peg rule */
|
/* Evaluate a peg rule
|
||||||
|
* Pre-conditions: s is in a valid state
|
||||||
|
* Post-conditions: If there is a match, returns a pointer to the next text.
|
||||||
|
* All captures on the capture stack are valid. If there is no match,
|
||||||
|
* returns NULL. Extra captures from successful child expressions can be
|
||||||
|
* left on the capture stack. If s->mode was PEG_MODE_NOCAPTURE, captures MUST
|
||||||
|
* not be changed, though.
|
||||||
|
*/
|
||||||
static const uint8_t *peg_rule(
|
static const uint8_t *peg_rule(
|
||||||
PegState *s,
|
PegState *s,
|
||||||
const uint32_t *rule,
|
const uint32_t *rule,
|
||||||
@ -130,22 +125,26 @@ tail:
|
|||||||
default:
|
default:
|
||||||
janet_panic("unexpected opcode");
|
janet_panic("unexpected opcode");
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
case RULE_LITERAL:
|
case RULE_LITERAL:
|
||||||
{
|
{
|
||||||
uint32_t len = rule[1];
|
uint32_t len = rule[1];
|
||||||
if (text + len > s->text_end) return NULL;
|
if (text + len > s->text_end) return NULL;
|
||||||
return memcmp(text, rule + 2, len) ? NULL : text + len;
|
return memcmp(text, rule + 2, len) ? NULL : text + len;
|
||||||
}
|
}
|
||||||
|
|
||||||
case RULE_NCHAR:
|
case RULE_NCHAR:
|
||||||
{
|
{
|
||||||
uint32_t n = rule[1];
|
uint32_t n = rule[1];
|
||||||
return (text + n > s->text_end) ? NULL : text + n;
|
return (text + n > s->text_end) ? NULL : text + n;
|
||||||
}
|
}
|
||||||
|
|
||||||
case RULE_NOTNCHAR:
|
case RULE_NOTNCHAR:
|
||||||
{
|
{
|
||||||
uint32_t n = rule[1];
|
uint32_t n = rule[1];
|
||||||
return (text + n > s->text_end) ? text : NULL;
|
return (text + n > s->text_end) ? text : NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
case RULE_RANGE:
|
case RULE_RANGE:
|
||||||
{
|
{
|
||||||
uint8_t lo = rule[1] & 0xFF;
|
uint8_t lo = rule[1] & 0xFF;
|
||||||
@ -156,6 +155,7 @@ tail:
|
|||||||
? text + 1
|
? text + 1
|
||||||
: NULL;
|
: NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
case RULE_SET:
|
case RULE_SET:
|
||||||
{
|
{
|
||||||
uint32_t word = rule[1 + (text[0] >> 5)];
|
uint32_t word = rule[1 + (text[0] >> 5)];
|
||||||
@ -164,6 +164,7 @@ tail:
|
|||||||
? text + 1
|
? text + 1
|
||||||
: NULL;
|
: NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
case RULE_LOOK:
|
case RULE_LOOK:
|
||||||
{
|
{
|
||||||
text += ((int32_t *)rule)[1];
|
text += ((int32_t *)rule)[1];
|
||||||
@ -176,6 +177,7 @@ tail:
|
|||||||
s->mode = oldmode;
|
s->mode = oldmode;
|
||||||
return result ? text : NULL;
|
return result ? text : NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
case RULE_CHOICE:
|
case RULE_CHOICE:
|
||||||
{
|
{
|
||||||
uint32_t len = rule[1];
|
uint32_t len = rule[1];
|
||||||
@ -195,6 +197,7 @@ tail:
|
|||||||
rule = s->bytecode + args[len - 1];
|
rule = s->bytecode + args[len - 1];
|
||||||
goto tail;
|
goto tail;
|
||||||
}
|
}
|
||||||
|
|
||||||
case RULE_SEQUENCE:
|
case RULE_SEQUENCE:
|
||||||
{
|
{
|
||||||
uint32_t len = rule[1];
|
uint32_t len = rule[1];
|
||||||
@ -208,6 +211,7 @@ tail:
|
|||||||
rule = s->bytecode + args[len - 1];
|
rule = s->bytecode + args[len - 1];
|
||||||
goto tail;
|
goto tail;
|
||||||
}
|
}
|
||||||
|
|
||||||
case RULE_IF:
|
case RULE_IF:
|
||||||
case RULE_IFNOT:
|
case RULE_IFNOT:
|
||||||
{
|
{
|
||||||
@ -223,6 +227,7 @@ tail:
|
|||||||
rule = rule_b;
|
rule = rule_b;
|
||||||
goto tail;
|
goto tail;
|
||||||
}
|
}
|
||||||
|
|
||||||
case RULE_NOT:
|
case RULE_NOT:
|
||||||
{
|
{
|
||||||
const uint32_t *rule_a = s->bytecode + rule[1];
|
const uint32_t *rule_a = s->bytecode + rule[1];
|
||||||
@ -234,6 +239,7 @@ tail:
|
|||||||
s->mode = oldmode;
|
s->mode = oldmode;
|
||||||
return (result) ? NULL : text;
|
return (result) ? NULL : text;
|
||||||
}
|
}
|
||||||
|
|
||||||
case RULE_BETWEEN:
|
case RULE_BETWEEN:
|
||||||
{
|
{
|
||||||
uint32_t lo = rule[1];
|
uint32_t lo = rule[1];
|
||||||
@ -243,7 +249,13 @@ tail:
|
|||||||
const uint8_t *next_text;
|
const uint8_t *next_text;
|
||||||
CapState cs = cap_save(s);
|
CapState cs = cap_save(s);
|
||||||
down1(s);
|
down1(s);
|
||||||
while (captured < hi && (next_text = peg_rule(s, rule_a, text))) {
|
while (captured < hi) {
|
||||||
|
CapState cs2 = cap_save(s);
|
||||||
|
next_text = peg_rule(s, rule_a, text);
|
||||||
|
if (!next_text || next_text == text) {
|
||||||
|
cap_load(s, cs2);
|
||||||
|
break;
|
||||||
|
}
|
||||||
captured++;
|
captured++;
|
||||||
text = next_text;
|
text = next_text;
|
||||||
}
|
}
|
||||||
@ -254,133 +266,100 @@ tail:
|
|||||||
}
|
}
|
||||||
return text;
|
return text;
|
||||||
}
|
}
|
||||||
|
|
||||||
case RULE_POSITION:
|
case RULE_POSITION:
|
||||||
{
|
{
|
||||||
pushcap(s, janet_wrap_number((double)(text - s->text_start)), text, text);
|
pushcap(s, janet_wrap_number((double)(text - s->text_start)));
|
||||||
return text;
|
return text;
|
||||||
}
|
}
|
||||||
|
|
||||||
case RULE_ARGUMENT:
|
case RULE_ARGUMENT:
|
||||||
{
|
{
|
||||||
int32_t index = ((int32_t *)rule)[1];
|
int32_t index = ((int32_t *)rule)[1];
|
||||||
Janet capture = (index >= s->extrac) ? janet_wrap_nil() : s->extrav[index];
|
Janet capture = (index >= s->extrac) ? janet_wrap_nil() : s->extrav[index];
|
||||||
pushcap(s, capture, text, text);
|
pushcap(s, capture);
|
||||||
return text;
|
|
||||||
}
|
|
||||||
case RULE_REPINDEX:
|
|
||||||
{
|
|
||||||
int32_t index = ((int32_t *)rule)[1];
|
|
||||||
if (index < 0) index += s->captures->count;
|
|
||||||
if (index >= s->captures->count || index < 0) return NULL;
|
|
||||||
Janet capture = s->captures->data[index];
|
|
||||||
pushcap(s, capture, text, text);
|
|
||||||
return text;
|
return text;
|
||||||
}
|
}
|
||||||
|
|
||||||
case RULE_CONSTANT:
|
case RULE_CONSTANT:
|
||||||
{
|
{
|
||||||
pushcap(s, s->constants[rule[1]], text, text);
|
pushcap(s, s->constants[rule[1]]);
|
||||||
return text;
|
return text;
|
||||||
}
|
}
|
||||||
|
|
||||||
case RULE_CAPTURE:
|
case RULE_CAPTURE:
|
||||||
{
|
{
|
||||||
int oldmode = s->mode;
|
if (s->mode == PEG_MODE_NOCAPTURE) {
|
||||||
if (oldmode == PEG_MODE_NOCAPTURE) {
|
|
||||||
rule = s->bytecode + rule[1];
|
rule = s->bytecode + rule[1];
|
||||||
goto tail;
|
goto tail;
|
||||||
}
|
}
|
||||||
if (oldmode == PEG_MODE_SUBSTITUTE) s->mode = PEG_MODE_NOCAPTURE;
|
|
||||||
down1(s);
|
down1(s);
|
||||||
const uint8_t *result = peg_rule(s, s->bytecode + rule[1], text);
|
const uint8_t *result = peg_rule(s, s->bytecode + rule[1], text);
|
||||||
up1(s);
|
up1(s);
|
||||||
s->mode = oldmode;
|
|
||||||
if (!result) return NULL;
|
if (!result) return NULL;
|
||||||
if (oldmode != PEG_MODE_SUBSTITUTE)
|
/* Specialized pushcap - avoid intermediate string creation */
|
||||||
pushcap(s, janet_stringv(text, (int32_t)(result - text)), text, result);
|
if (s->mode == PEG_MODE_ACCUMULATE) {
|
||||||
|
janet_buffer_push_bytes(s->scratch, text, (int32_t)(result - text));
|
||||||
|
} else {
|
||||||
|
janet_array_push(s->captures, janet_stringv(text, (int32_t)(result - text)));
|
||||||
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
case RULE_SUBSTITUTE:
|
|
||||||
case RULE_GROUP:
|
case RULE_ACCUMULATE:
|
||||||
case RULE_REPLACE:
|
|
||||||
{
|
{
|
||||||
/* In no-capture mode, all captures simply become their matching pattern */
|
|
||||||
int oldmode = s->mode;
|
int oldmode = s->mode;
|
||||||
if (oldmode == PEG_MODE_NOCAPTURE) {
|
/* No capture mode, skip captures. Accumulate inside accumulate also does nothing. */
|
||||||
|
if (oldmode != PEG_MODE_NORMAL) {
|
||||||
rule = s->bytecode + rule[1];
|
rule = s->bytecode + rule[1];
|
||||||
goto tail;
|
goto tail;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Save previous state. Will use this to reload state before
|
|
||||||
* pushing grammar. Each of these rules pushes exactly 1 new
|
|
||||||
* capture, regardless of the sub rule. */
|
|
||||||
CapState cs = cap_save(s);
|
CapState cs = cap_save(s);
|
||||||
|
s->mode = PEG_MODE_ACCUMULATE;
|
||||||
/* Set sub mode as needed. Modes affect how captures are recorded (pushed to stack,
|
|
||||||
* pushed to byte buffer, or ignored) */
|
|
||||||
if (rule[0] == RULE_GROUP) s->mode = PEG_MODE_NORMAL;
|
|
||||||
if (rule[0] == RULE_REPLACE) s->mode = PEG_MODE_NORMAL;
|
|
||||||
if (rule[0] == RULE_SUBSTITUTE) {
|
|
||||||
s->mode = PEG_MODE_SUBSTITUTE;
|
|
||||||
s->subst_end = text;
|
|
||||||
}
|
|
||||||
|
|
||||||
down1(s);
|
down1(s);
|
||||||
const uint8_t *result = peg_rule(s, s->bytecode + rule[1], text);
|
const uint8_t *result = peg_rule(s, s->bytecode + rule[1], text);
|
||||||
up1(s);
|
up1(s);
|
||||||
s->mode = oldmode;
|
s->mode = oldmode;
|
||||||
if (!result) return NULL;
|
if (!result) return NULL;
|
||||||
|
Janet cap = janet_stringv(s->scratch->data + cs.scratch, s->scratch->count - cs.scratch);
|
||||||
/* The replacement capture */
|
|
||||||
Janet cap;
|
|
||||||
|
|
||||||
/* Figure out what to push based on opcode */
|
|
||||||
if (rule[0] == RULE_GROUP) {
|
|
||||||
int32_t num_sub_captures = s->captures->count - cs.cap;
|
|
||||||
JanetArray *sub_captures = janet_array(num_sub_captures);
|
|
||||||
memcpy(sub_captures->data,
|
|
||||||
s->captures->data + cs.cap,
|
|
||||||
sizeof(Janet) * num_sub_captures);
|
|
||||||
sub_captures->count = num_sub_captures;
|
|
||||||
cap = janet_wrap_array(sub_captures);
|
|
||||||
|
|
||||||
} else if (rule[0] == RULE_SUBSTITUTE) {
|
|
||||||
janet_buffer_push_bytes(s->scratch, s->subst_end,
|
|
||||||
(int32_t)(result - s->subst_end));
|
|
||||||
cap = janet_stringv(s->scratch->data + cs.scratch,
|
|
||||||
s->scratch->count - cs.scratch);
|
|
||||||
|
|
||||||
} else { /* RULE_REPLACE */
|
|
||||||
Janet constant = s->constants[rule[2]];
|
|
||||||
switch (janet_type(constant)) {
|
|
||||||
default:
|
|
||||||
cap = constant;
|
|
||||||
break;
|
|
||||||
case JANET_STRUCT:
|
|
||||||
cap = janet_struct_get(janet_unwrap_struct(constant),
|
|
||||||
s->captures->data[s->captures->count - 1]);
|
|
||||||
break;
|
|
||||||
case JANET_TABLE:
|
|
||||||
cap = janet_table_get(janet_unwrap_table(constant),
|
|
||||||
s->captures->data[s->captures->count - 1]);
|
|
||||||
break;
|
|
||||||
case JANET_CFUNCTION:
|
|
||||||
cap = janet_unwrap_cfunction(constant)(s->captures->count - cs.cap,
|
|
||||||
s->captures->data + cs.cap);
|
|
||||||
break;
|
|
||||||
case JANET_FUNCTION:
|
|
||||||
cap = janet_call(janet_unwrap_function(constant),
|
|
||||||
s->captures->count - cs.cap,
|
|
||||||
s->captures->data + cs.cap);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Reset old state and then push capture */
|
|
||||||
cap_load(s, cs);
|
cap_load(s, cs);
|
||||||
pushcap(s, cap, text, result);
|
pushcap(s, cap);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case RULE_GROUP:
|
||||||
|
{
|
||||||
|
int oldmode = s->mode;
|
||||||
|
if (oldmode == PEG_MODE_NOCAPTURE) {
|
||||||
|
rule = s->bytecode + rule[1];
|
||||||
|
goto tail;
|
||||||
|
}
|
||||||
|
CapState cs = cap_save(s);
|
||||||
|
s->mode = PEG_MODE_NORMAL;
|
||||||
|
down1(s);
|
||||||
|
const uint8_t *result = peg_rule(s, s->bytecode + rule[1], text);
|
||||||
|
up1(s);
|
||||||
|
s->mode = oldmode;
|
||||||
|
if (!result) return NULL;
|
||||||
|
int32_t num_sub_captures = s->captures->count - cs.cap;
|
||||||
|
JanetArray *sub_captures = janet_array(num_sub_captures);
|
||||||
|
memcpy(sub_captures->data,
|
||||||
|
s->captures->data + cs.cap,
|
||||||
|
sizeof(Janet) * num_sub_captures);
|
||||||
|
sub_captures->count = num_sub_captures;
|
||||||
|
cap_load(s, cs);
|
||||||
|
pushcap(s, janet_wrap_array(sub_captures));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
case RULE_REPLACE:
|
||||||
case RULE_MATCHTIME:
|
case RULE_MATCHTIME:
|
||||||
{
|
{
|
||||||
int oldmode = s->mode;
|
int oldmode = s->mode;
|
||||||
|
if (rule[0] == RULE_REPLACE && oldmode == PEG_MODE_NOCAPTURE) {
|
||||||
|
rule = s->bytecode + rule[1];
|
||||||
|
goto tail;
|
||||||
|
}
|
||||||
CapState cs = cap_save(s);
|
CapState cs = cap_save(s);
|
||||||
s->mode = PEG_MODE_NORMAL;
|
s->mode = PEG_MODE_NORMAL;
|
||||||
down1(s);
|
down1(s);
|
||||||
@ -389,28 +368,36 @@ tail:
|
|||||||
s->mode = oldmode;
|
s->mode = oldmode;
|
||||||
if (!result) return NULL;
|
if (!result) return NULL;
|
||||||
|
|
||||||
/* Now check captures with provided function */
|
|
||||||
int32_t argc = s->captures->count - cs.cap;
|
|
||||||
Janet *argv = s->captures->data + cs.cap;
|
|
||||||
Janet fval = s->constants[rule[2]];
|
|
||||||
Janet cap;
|
Janet cap;
|
||||||
if (janet_checktype(fval, JANET_FUNCTION)) {
|
Janet constant = s->constants[rule[2]];
|
||||||
cap = janet_call(janet_unwrap_function(fval), argc, argv);
|
switch (janet_type(constant)) {
|
||||||
} else {
|
default:
|
||||||
JanetCFunction cfun = janet_unwrap_cfunction(fval);
|
cap = constant;
|
||||||
cap = cfun(argc, argv);
|
break;
|
||||||
|
case JANET_STRUCT:
|
||||||
|
cap = janet_struct_get(janet_unwrap_struct(constant),
|
||||||
|
s->captures->data[s->captures->count - 1]);
|
||||||
|
break;
|
||||||
|
case JANET_TABLE:
|
||||||
|
cap = janet_table_get(janet_unwrap_table(constant),
|
||||||
|
s->captures->data[s->captures->count - 1]);
|
||||||
|
break;
|
||||||
|
case JANET_CFUNCTION:
|
||||||
|
cap = janet_unwrap_cfunction(constant)(s->captures->count - cs.cap,
|
||||||
|
s->captures->data + cs.cap);
|
||||||
|
break;
|
||||||
|
case JANET_FUNCTION:
|
||||||
|
cap = janet_call(janet_unwrap_function(constant),
|
||||||
|
s->captures->count - cs.cap,
|
||||||
|
s->captures->data + cs.cap);
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
cap_load(s, cs);
|
cap_load(s, cs);
|
||||||
|
if (rule[0] == RULE_MATCHTIME && !janet_truthy(cap)) return NULL;
|
||||||
/* Capture failed */
|
pushcap(s, cap);
|
||||||
if (!janet_truthy(cap)) return NULL;
|
|
||||||
|
|
||||||
/* Capture worked, so use new capture */
|
|
||||||
pushcap(s, cap, text, result);
|
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
case RULE_ERROR:
|
case RULE_ERROR:
|
||||||
{
|
{
|
||||||
int oldmode = s->mode;
|
int oldmode = s->mode;
|
||||||
@ -672,8 +659,8 @@ static void spec_not(Builder *b, int32_t argc, const Janet *argv) {
|
|||||||
static void spec_capture(Builder *b, int32_t argc, const Janet *argv) {
|
static void spec_capture(Builder *b, int32_t argc, const Janet *argv) {
|
||||||
spec_onerule(b, argc, argv, RULE_CAPTURE);
|
spec_onerule(b, argc, argv, RULE_CAPTURE);
|
||||||
}
|
}
|
||||||
static void spec_substitute(Builder *b, int32_t argc, const Janet *argv) {
|
static void spec_accumulate(Builder *b, int32_t argc, const Janet *argv) {
|
||||||
spec_onerule(b, argc, argv, RULE_SUBSTITUTE);
|
spec_onerule(b, argc, argv, RULE_ACCUMULATE);
|
||||||
}
|
}
|
||||||
static void spec_group(Builder *b, int32_t argc, const Janet *argv) {
|
static void spec_group(Builder *b, int32_t argc, const Janet *argv) {
|
||||||
spec_onerule(b, argc, argv, RULE_GROUP);
|
spec_onerule(b, argc, argv, RULE_GROUP);
|
||||||
@ -698,13 +685,6 @@ static void spec_position(Builder *b, int32_t argc, const Janet *argv) {
|
|||||||
emit_rule(r, RULE_POSITION, 0, NULL);
|
emit_rule(r, RULE_POSITION, 0, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void spec_reference(Builder *b, int32_t argc, const Janet *argv) {
|
|
||||||
peg_fixarity(b, argc, 1);
|
|
||||||
Reserve r = reserve(b, 2);
|
|
||||||
int32_t index = peg_getinteger(b, argv[0]);
|
|
||||||
emit_1(r, RULE_REPINDEX, index);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void spec_argument(Builder *b, int32_t argc, const Janet *argv) {
|
static void spec_argument(Builder *b, int32_t argc, const Janet *argv) {
|
||||||
peg_fixarity(b, argc, 1);
|
peg_fixarity(b, argc, 1);
|
||||||
Reserve r = reserve(b, 2);
|
Reserve r = reserve(b, 2);
|
||||||
@ -787,18 +767,18 @@ typedef struct {
|
|||||||
static const SpecialPair specials[] = {
|
static const SpecialPair specials[] = {
|
||||||
{"!", spec_not},
|
{"!", spec_not},
|
||||||
{"$", spec_position},
|
{"$", spec_position},
|
||||||
{"%", spec_substitute},
|
{"%", spec_accumulate},
|
||||||
{"*", spec_sequence},
|
{"*", spec_sequence},
|
||||||
{"+", spec_choice},
|
{"+", spec_choice},
|
||||||
{"/", spec_replace},
|
{"/", spec_replace},
|
||||||
{"<-", spec_capture},
|
{"<-", spec_capture},
|
||||||
{">", spec_look},
|
{">", spec_look},
|
||||||
{"?", spec_opt},
|
{"?", spec_opt},
|
||||||
|
{"accumulate", spec_accumulate},
|
||||||
{"any", spec_any},
|
{"any", spec_any},
|
||||||
{"argument", spec_argument},
|
{"argument", spec_argument},
|
||||||
{"at-least", spec_atleast},
|
{"at-least", spec_atleast},
|
||||||
{"at-most", spec_atmost},
|
{"at-most", spec_atmost},
|
||||||
{"backref", spec_reference},
|
|
||||||
{"between", spec_between},
|
{"between", spec_between},
|
||||||
{"capture", spec_capture},
|
{"capture", spec_capture},
|
||||||
{"choice", spec_choice},
|
{"choice", spec_choice},
|
||||||
@ -817,7 +797,6 @@ static const SpecialPair specials[] = {
|
|||||||
{"sequence", spec_sequence},
|
{"sequence", spec_sequence},
|
||||||
{"set", spec_set},
|
{"set", spec_set},
|
||||||
{"some", spec_some},
|
{"some", spec_some},
|
||||||
{"substitute", spec_substitute},
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Compile a janet value into a rule and return the rule index. */
|
/* Compile a janet value into a rule and return the rule index. */
|
||||||
|
@ -761,9 +761,11 @@ Janet janet_call(JanetFunction *fun, int32_t argc, const Janet *argv) {
|
|||||||
int handle = janet_gclock();
|
int handle = janet_gclock();
|
||||||
|
|
||||||
JanetFiber *old_fiber = janet_vm_fiber;
|
JanetFiber *old_fiber = janet_vm_fiber;
|
||||||
|
old_fiber->child = fiber;
|
||||||
janet_vm_fiber = fiber;
|
janet_vm_fiber = fiber;
|
||||||
memcpy(fiber->buf, janet_vm_fiber->buf, sizeof(jmp_buf));
|
memcpy(fiber->buf, janet_vm_fiber->buf, sizeof(jmp_buf));
|
||||||
run_vm(fiber, janet_wrap_nil(), JANET_STATUS_NEW);
|
run_vm(fiber, janet_wrap_nil(), JANET_STATUS_NEW);
|
||||||
|
old_fiber->child = NULL;
|
||||||
janet_vm_fiber = old_fiber;
|
janet_vm_fiber = old_fiber;
|
||||||
|
|
||||||
janet_gcunlock(handle);
|
janet_gcunlock(handle);
|
||||||
|
@ -222,7 +222,7 @@
|
|||||||
(file/flush stderr)
|
(file/flush stderr)
|
||||||
(file/flush stdout)
|
(file/flush stdout)
|
||||||
|
|
||||||
(def grammar '(% (any (+ (/ "dog" "purple panda") 1))))
|
(def grammar '(accumulate (any (+ (/ "dog" "purple panda") (<- 1)))))
|
||||||
(defn try-grammar [text]
|
(defn try-grammar [text]
|
||||||
(assert (= (string/replace-all "dog" "purple panda" text) (0 (peg/match grammar text))) text))
|
(assert (= (string/replace-all "dog" "purple panda" text) (0 (peg/match grammar text))) text))
|
||||||
|
|
||||||
@ -238,8 +238,8 @@
|
|||||||
|
|
||||||
(def csv
|
(def csv
|
||||||
'{:field (+
|
'{:field (+
|
||||||
(* `"` (% (any (+ (if-not `"` 1) (/ `""` `"`)))) `"`)
|
(* `"` (% (any (+ (<- (if-not `"` 1)) (* (constant `"`) `""`)))) `"`)
|
||||||
(% (any (if-not (set ",\n") 1))))
|
(<- (any (if-not (set ",\n") 1))))
|
||||||
:main (* :field (any (* "," :field)) (+ "\n" -1))})
|
:main (* :field (any (* "," :field)) (+ "\n" -1))})
|
||||||
|
|
||||||
(defn check-csv
|
(defn check-csv
|
||||||
@ -292,9 +292,9 @@
|
|||||||
|
|
||||||
(def wrapped-string
|
(def wrapped-string
|
||||||
~{:pad (any "=")
|
~{:pad (any "=")
|
||||||
:open (* "[" (capture :pad) "[")
|
:open (* "[" (<- :pad :n) "[")
|
||||||
:close (* "]" (cmt (* (backref 0) (capture :pad)) ,=) "]")
|
:close (* "]" (cmt (* (-> :n) (<- :pad)) ,=) "]")
|
||||||
:main (* :open (any (if-not :close 1)) :close -1)})
|
:main (cmt (* :open (any (if-not :close 1)) :close -1) ,=)})
|
||||||
|
|
||||||
(check-match wrapped-string "[[]]" true)
|
(check-match wrapped-string "[[]]" true)
|
||||||
(check-match wrapped-string "[==[a]==]" true)
|
(check-match wrapped-string "[==[a]==]" true)
|
||||||
@ -305,11 +305,11 @@
|
|||||||
(check-match wrapped-string "[[bl]rk]] " false)
|
(check-match wrapped-string "[[bl]rk]] " false)
|
||||||
(check-match wrapped-string "[=[bl]]rk]=] " false)
|
(check-match wrapped-string "[=[bl]]rk]=] " false)
|
||||||
(check-match wrapped-string "[=[bl]==]rk]=] " false)
|
(check-match wrapped-string "[=[bl]==]rk]=] " false)
|
||||||
|
(check-match wrapped-string "[===[]==]===]" true)
|
||||||
|
|
||||||
(def janet-longstring
|
(def janet-longstring
|
||||||
~{:open (capture (some "`"))
|
~{:delim (capture (some "`"))
|
||||||
:close (cmt (* (backref 0) :open) ,=)
|
:main (cmt (* :delim (any (if-not (* (not (> -1 "`")) :delim) 1)) (not (> -1 "`")) :delim -1) ,=)})
|
||||||
:main (* :open (any (if-not :close 1)) (not (> -1 "`")) :close -1)})
|
|
||||||
|
|
||||||
(check-match janet-longstring "`john" false)
|
(check-match janet-longstring "`john" false)
|
||||||
(check-match janet-longstring "abc" false)
|
(check-match janet-longstring "abc" false)
|
||||||
|
Loading…
Reference in New Issue
Block a user