mirror of
https://github.com/janet-lang/janet
synced 2024-12-01 12:29:54 +00:00
Remove no-capture mode in pegs.
Some peg grammars could not capture values based on their position in a larger grammar. This is a design limitation inheritted from LPeg, but no longer needed as the replace mode is superseded by the accumulator mode, which is more general if slightly harder to use.
This commit is contained in:
parent
59c69e6896
commit
fa1c5c85b5
@ -1571,7 +1571,8 @@
|
|||||||
or so. Each element is a two element tuple, containing the path
|
or so. Each element is a two element tuple, containing the path
|
||||||
template and a keyword :source, :native, or :image indicating how
|
template and a keyword :source, :native, or :image indicating how
|
||||||
require should load files found at these paths."
|
require should load files found at these paths."
|
||||||
@[["./:all:.janet" :source]
|
@[[":all:" :source]
|
||||||
|
["./:all:.janet" :source]
|
||||||
["./:all:/init.janet" :source]
|
["./:all:/init.janet" :source]
|
||||||
[":sys:/:all:.janet" :source]
|
[":sys:/:all:.janet" :source]
|
||||||
[":sys:/:all:/init.janet" :source]
|
[":sys:/:all:/init.janet" :source]
|
||||||
@ -1579,8 +1580,7 @@
|
|||||||
["./:all:/:name:.:native:" :native]
|
["./:all:/:name:.:native:" :native]
|
||||||
[":sys:/:all:.:native:" :native]
|
[":sys:/:all:.:native:" :native]
|
||||||
["./:all:.jimage" :image]
|
["./:all:.jimage" :image]
|
||||||
[":sys:/:all:.jimage" :image]
|
[":sys:/:all:.jimage" :image]])
|
||||||
[":all:" :source]])
|
|
||||||
|
|
||||||
(var module/*syspath*
|
(var module/*syspath*
|
||||||
"The path where globally installed libraries are located.
|
"The path where globally installed libraries are located.
|
||||||
|
@ -75,8 +75,7 @@ typedef struct {
|
|||||||
int32_t depth;
|
int32_t depth;
|
||||||
enum {
|
enum {
|
||||||
PEG_MODE_NORMAL,
|
PEG_MODE_NORMAL,
|
||||||
PEG_MODE_ACCUMULATE,
|
PEG_MODE_ACCUMULATE
|
||||||
PEG_MODE_NOCAPTURE
|
|
||||||
} mode;
|
} mode;
|
||||||
} PegState;
|
} PegState;
|
||||||
|
|
||||||
@ -105,10 +104,10 @@ static void cap_load(PegState *s, CapState cs) {
|
|||||||
|
|
||||||
/* Add a capture */
|
/* Add a capture */
|
||||||
static void pushcap(PegState *s, Janet capture, uint32_t tag) {
|
static void pushcap(PegState *s, Janet capture, uint32_t tag) {
|
||||||
if (s->mode == PEG_MODE_ACCUMULATE)
|
if (s->mode == PEG_MODE_ACCUMULATE) {
|
||||||
janet_to_string_b(s->scratch, capture);
|
janet_to_string_b(s->scratch, capture);
|
||||||
if (s->mode == PEG_MODE_NORMAL ||
|
}
|
||||||
(tag && s->mode == PEG_MODE_ACCUMULATE)) {
|
if (tag || s->mode == PEG_MODE_NORMAL) {
|
||||||
janet_array_push(s->captures, capture);
|
janet_array_push(s->captures, capture);
|
||||||
janet_buffer_push_u8(s->tags, tag);
|
janet_buffer_push_u8(s->tags, tag);
|
||||||
}
|
}
|
||||||
@ -125,8 +124,7 @@ static void pushcap(PegState *s, Janet capture, uint32_t tag) {
|
|||||||
* Post-conditions: If there is a match, returns a pointer to the next text.
|
* Post-conditions: If there is a match, returns a pointer to the next text.
|
||||||
* All captures on the capture stack are valid. If there is no match,
|
* All captures on the capture stack are valid. If there is no match,
|
||||||
* returns NULL. Extra captures from successful child expressions can be
|
* returns NULL. Extra captures from successful child expressions can be
|
||||||
* left on the capture stack. If s->mode was PEG_MODE_NOCAPTURE, captures MUST
|
* left on the capture stack.
|
||||||
* not be changed, though.
|
|
||||||
*/
|
*/
|
||||||
static const uint8_t *peg_rule(
|
static const uint8_t *peg_rule(
|
||||||
PegState *s,
|
PegState *s,
|
||||||
@ -175,12 +173,9 @@ tail:
|
|||||||
case RULE_LOOK: {
|
case RULE_LOOK: {
|
||||||
text += ((int32_t *)rule)[1];
|
text += ((int32_t *)rule)[1];
|
||||||
if (text < s->text_start || text > s->text_end) return NULL;
|
if (text < s->text_start || text > s->text_end) return NULL;
|
||||||
int oldmode = s->mode;
|
|
||||||
s->mode = PEG_MODE_NOCAPTURE;
|
|
||||||
down1(s);
|
down1(s);
|
||||||
const uint8_t *result = peg_rule(s, s->bytecode + rule[2], text);
|
const uint8_t *result = peg_rule(s, s->bytecode + rule[2], text);
|
||||||
up1(s);
|
up1(s);
|
||||||
s->mode = oldmode;
|
|
||||||
return result ? text : NULL;
|
return result ? text : NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -220,12 +215,9 @@ tail:
|
|||||||
case RULE_IFNOT: {
|
case RULE_IFNOT: {
|
||||||
const uint32_t *rule_a = s->bytecode + rule[1];
|
const uint32_t *rule_a = s->bytecode + rule[1];
|
||||||
const uint32_t *rule_b = s->bytecode + rule[2];
|
const uint32_t *rule_b = s->bytecode + rule[2];
|
||||||
int oldmode = s->mode;
|
|
||||||
s->mode = PEG_MODE_NOCAPTURE;
|
|
||||||
down1(s);
|
down1(s);
|
||||||
const uint8_t *result = peg_rule(s, rule_a, text);
|
const uint8_t *result = peg_rule(s, rule_a, text);
|
||||||
up1(s);
|
up1(s);
|
||||||
s->mode = oldmode;
|
|
||||||
if (rule[0] == RULE_IF ? !result : !!result) return NULL;
|
if (rule[0] == RULE_IF ? !result : !!result) return NULL;
|
||||||
rule = rule_b;
|
rule = rule_b;
|
||||||
goto tail;
|
goto tail;
|
||||||
@ -233,12 +225,9 @@ tail:
|
|||||||
|
|
||||||
case RULE_NOT: {
|
case RULE_NOT: {
|
||||||
const uint32_t *rule_a = s->bytecode + rule[1];
|
const uint32_t *rule_a = s->bytecode + rule[1];
|
||||||
int oldmode = s->mode;
|
|
||||||
s->mode = PEG_MODE_NOCAPTURE;
|
|
||||||
down1(s);
|
down1(s);
|
||||||
const uint8_t *result = peg_rule(s, rule_a, text);
|
const uint8_t *result = peg_rule(s, rule_a, text);
|
||||||
up1(s);
|
up1(s);
|
||||||
s->mode = oldmode;
|
|
||||||
return (result) ? NULL : text;
|
return (result) ? NULL : text;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -301,10 +290,6 @@ tail:
|
|||||||
|
|
||||||
case RULE_CAPTURE: {
|
case RULE_CAPTURE: {
|
||||||
uint32_t tag = rule[2];
|
uint32_t tag = rule[2];
|
||||||
if (!tag && s->mode == PEG_MODE_NOCAPTURE) {
|
|
||||||
rule = s->bytecode + rule[1];
|
|
||||||
goto tail;
|
|
||||||
}
|
|
||||||
down1(s);
|
down1(s);
|
||||||
const uint8_t *result = peg_rule(s, s->bytecode + rule[1], text);
|
const uint8_t *result = peg_rule(s, s->bytecode + rule[1], text);
|
||||||
up1(s);
|
up1(s);
|
||||||
@ -321,8 +306,7 @@ tail:
|
|||||||
case RULE_ACCUMULATE: {
|
case RULE_ACCUMULATE: {
|
||||||
uint32_t tag = rule[2];
|
uint32_t tag = rule[2];
|
||||||
int oldmode = s->mode;
|
int oldmode = s->mode;
|
||||||
/* No capture mode, skip captures. Accumulate inside accumulate also does nothing. */
|
if (!tag && oldmode == PEG_MODE_ACCUMULATE) {
|
||||||
if (!tag && oldmode != PEG_MODE_NORMAL) {
|
|
||||||
rule = s->bytecode + rule[1];
|
rule = s->bytecode + rule[1];
|
||||||
goto tail;
|
goto tail;
|
||||||
}
|
}
|
||||||
@ -333,7 +317,8 @@ tail:
|
|||||||
up1(s);
|
up1(s);
|
||||||
s->mode = oldmode;
|
s->mode = oldmode;
|
||||||
if (!result) return NULL;
|
if (!result) return NULL;
|
||||||
Janet cap = janet_stringv(s->scratch->data + cs.scratch, s->scratch->count - cs.scratch);
|
Janet cap = janet_stringv(s->scratch->data + cs.scratch,
|
||||||
|
s->scratch->count - cs.scratch);
|
||||||
cap_load(s, cs);
|
cap_load(s, cs);
|
||||||
pushcap(s, cap, tag);
|
pushcap(s, cap, tag);
|
||||||
return result;
|
return result;
|
||||||
@ -352,10 +337,6 @@ tail:
|
|||||||
case RULE_GROUP: {
|
case RULE_GROUP: {
|
||||||
uint32_t tag = rule[2];
|
uint32_t tag = rule[2];
|
||||||
int oldmode = s->mode;
|
int oldmode = s->mode;
|
||||||
if (!tag && oldmode == PEG_MODE_NOCAPTURE) {
|
|
||||||
rule = s->bytecode + rule[1];
|
|
||||||
goto tail;
|
|
||||||
}
|
|
||||||
CapState cs = cap_save(s);
|
CapState cs = cap_save(s);
|
||||||
s->mode = PEG_MODE_NORMAL;
|
s->mode = PEG_MODE_NORMAL;
|
||||||
down1(s);
|
down1(s);
|
||||||
@ -378,10 +359,6 @@ tail:
|
|||||||
case RULE_MATCHTIME: {
|
case RULE_MATCHTIME: {
|
||||||
uint32_t tag = rule[3];
|
uint32_t tag = rule[3];
|
||||||
int oldmode = s->mode;
|
int oldmode = s->mode;
|
||||||
if (!tag && rule[0] == RULE_REPLACE && oldmode == PEG_MODE_NOCAPTURE) {
|
|
||||||
rule = s->bytecode + rule[1];
|
|
||||||
goto tail;
|
|
||||||
}
|
|
||||||
CapState cs = cap_save(s);
|
CapState cs = cap_save(s);
|
||||||
s->mode = PEG_MODE_NORMAL;
|
s->mode = PEG_MODE_NORMAL;
|
||||||
down1(s);
|
down1(s);
|
||||||
@ -495,14 +472,14 @@ static void peg_arity(Builder *b, int32_t arity, int32_t min, int32_t max) {
|
|||||||
|
|
||||||
static const uint8_t *peg_getset(Builder *b, Janet x) {
|
static const uint8_t *peg_getset(Builder *b, Janet x) {
|
||||||
if (!janet_checktype(x, JANET_STRING))
|
if (!janet_checktype(x, JANET_STRING))
|
||||||
peg_panicf(b, "expected string for character set");
|
peg_panic(b, "expected string for character set");
|
||||||
const uint8_t *str = janet_unwrap_string(x);
|
const uint8_t *str = janet_unwrap_string(x);
|
||||||
return str;
|
return str;
|
||||||
}
|
}
|
||||||
|
|
||||||
static const uint8_t *peg_getrange(Builder *b, Janet x) {
|
static const uint8_t *peg_getrange(Builder *b, Janet x) {
|
||||||
if (!janet_checktype(x, JANET_STRING))
|
if (!janet_checktype(x, JANET_STRING))
|
||||||
peg_panicf(b, "expected string for character range");
|
peg_panic(b, "expected string for character range");
|
||||||
const uint8_t *str = janet_unwrap_string(x);
|
const uint8_t *str = janet_unwrap_string(x);
|
||||||
if (janet_string_length(str) != 2)
|
if (janet_string_length(str) != 2)
|
||||||
peg_panicf(b, "expected string to have length 2, got %v", x);
|
peg_panicf(b, "expected string to have length 2, got %v", x);
|
||||||
@ -541,7 +518,7 @@ static uint32_t emit_tag(Builder *b, Janet t) {
|
|||||||
if (janet_checktype(check, JANET_NIL)) {
|
if (janet_checktype(check, JANET_NIL)) {
|
||||||
uint32_t tag = b->nexttag++;
|
uint32_t tag = b->nexttag++;
|
||||||
if (tag > 255) {
|
if (tag > 255) {
|
||||||
peg_panicf(b, "too many tags - up to 255 tags are supported per peg");
|
peg_panic(b, "too many tags - up to 255 tags are supported per peg");
|
||||||
}
|
}
|
||||||
Janet val = janet_wrap_number(tag);
|
Janet val = janet_wrap_number(tag);
|
||||||
janet_table_put(b->tags, t, val);
|
janet_table_put(b->tags, t, val);
|
||||||
@ -898,7 +875,7 @@ static uint32_t peg_compile1(Builder *b, Janet peg) {
|
|||||||
|
|
||||||
switch (janet_type(peg)) {
|
switch (janet_type(peg)) {
|
||||||
default:
|
default:
|
||||||
peg_panicf(b, "unexpected peg source");
|
peg_panic(b, "unexpected peg source");
|
||||||
return 0;
|
return 0;
|
||||||
case JANET_NUMBER: {
|
case JANET_NUMBER: {
|
||||||
int32_t n = peg_getinteger(b, peg);
|
int32_t n = peg_getinteger(b, peg);
|
||||||
@ -919,7 +896,7 @@ static uint32_t peg_compile1(Builder *b, Janet peg) {
|
|||||||
case JANET_KEYWORD: {
|
case JANET_KEYWORD: {
|
||||||
Janet check = janet_table_get(b->grammar, peg);
|
Janet check = janet_table_get(b->grammar, peg);
|
||||||
if (janet_checktype(check, JANET_NIL))
|
if (janet_checktype(check, JANET_NIL))
|
||||||
peg_panicf(b, "unknown rule");
|
peg_panic(b, "unknown rule");
|
||||||
rule = peg_compile1(b, check);
|
rule = peg_compile1(b, check);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -929,7 +906,7 @@ static uint32_t peg_compile1(Builder *b, Janet peg) {
|
|||||||
b->grammar = grammar;
|
b->grammar = grammar;
|
||||||
Janet main_rule = janet_table_get(grammar, janet_ckeywordv("main"));
|
Janet main_rule = janet_table_get(grammar, janet_ckeywordv("main"));
|
||||||
if (janet_checktype(main_rule, JANET_NIL))
|
if (janet_checktype(main_rule, JANET_NIL))
|
||||||
peg_panicf(b, "grammar requires :main rule");
|
peg_panic(b, "grammar requires :main rule");
|
||||||
rule = peg_compile1(b, main_rule);
|
rule = peg_compile1(b, main_rule);
|
||||||
b->grammar = grammar->proto;
|
b->grammar = grammar->proto;
|
||||||
break;
|
break;
|
||||||
|
Loading…
Reference in New Issue
Block a user