mirror of
https://github.com/janet-lang/janet
synced 2025-01-22 21:26:51 +00:00
Fix recursion in grammars.
This commit is contained in:
parent
e53778d5d8
commit
170e785b72
2
Makefile
2
Makefile
@ -29,7 +29,7 @@ LIBDIR=$(PREFIX)/lib
|
|||||||
BINDIR=$(PREFIX)/bin
|
BINDIR=$(PREFIX)/bin
|
||||||
JANET_BUILD?="\"$(shell git log --pretty=format:'%h' -n 1)\""
|
JANET_BUILD?="\"$(shell git log --pretty=format:'%h' -n 1)\""
|
||||||
|
|
||||||
CFLAGS=-std=c99 -Wall -Wextra -Isrc/include -fpic -O2 -fvisibility=hidden \
|
CFLAGS=-std=c99 -Wall -Wextra -Isrc/include -fpic -g -O2 -fvisibility=hidden \
|
||||||
-DJANET_BUILD=$(JANET_BUILD)
|
-DJANET_BUILD=$(JANET_BUILD)
|
||||||
CLIBS=-lm -ldl
|
CLIBS=-lm -ldl
|
||||||
JANET_TARGET=build/janet
|
JANET_TARGET=build/janet
|
||||||
|
62
doc/Peg.md
Normal file
62
doc/Peg.md
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
# Peg (Parsing Expression Grammars)
|
||||||
|
|
||||||
|
A common task for developers is to recognize patterns in text, be it
|
||||||
|
filtering emails from a list or extracting data from a CSV file. Programming
|
||||||
|
languages and libraries usually offer a number of tools for this, including prebuilt
|
||||||
|
parsers, simple operations on strings (splitting a string on commas), and regular expressions.
|
||||||
|
The pre-built or custom-built parser is usually the most robust solution, but can
|
||||||
|
be very complex to maintain and may not exist for many languages. String functions are not
|
||||||
|
powerful enough for a large class of languages, and regular expressions can be hard to read
|
||||||
|
(which characters are escaped?) and underpowered (don't parse HTML with regex!).
|
||||||
|
|
||||||
|
PEGs, or Parsing Expression Grammars, are another formalism for recognizing languages that
|
||||||
|
are easier to write as a custom parser and more powerful than regular expressions. They also
|
||||||
|
can produce grammars that are easily unerstandable and moderatly fast. PEGs can also be compiled
|
||||||
|
to a bytecode format that can be reused.
|
||||||
|
|
||||||
|
Below is a siimple example for checking if a string is a valid IP address. Notice how
|
||||||
|
the grammar is descriptive enough that you can read it even if you don't know the peg
|
||||||
|
syntax (example is translated from a (RED language blog post)[https://www.red-lang.org/2013/11/041-introducing-parse.html]).
|
||||||
|
```
|
||||||
|
(def ip-address
|
||||||
|
'{:dig (range "09")
|
||||||
|
:0-4 (range "04")
|
||||||
|
:0-5 (range "05")
|
||||||
|
:byte (choice
|
||||||
|
(sequence "25" :0-5)
|
||||||
|
(sequence "2" :0-4 :dig)
|
||||||
|
(sequence "1" :dig :dig)
|
||||||
|
(between 1 2 :dig))
|
||||||
|
:main (sequence :byte "." :byte "." :byte "." :byte)})
|
||||||
|
|
||||||
|
(peg/match ip-address "0.0.0.0") # -> @[]
|
||||||
|
(peg/match ip-address "elephant") # -> nil
|
||||||
|
(peg/match ip-address "256.0.0.0") # -> nil
|
||||||
|
```
|
||||||
|
|
||||||
|
## Primitive Patterns
|
||||||
|
|
||||||
|
Larger patterns are built up with primitive patterns, which recognize individual
|
||||||
|
characters, string literals, or a given number of characters. A character in Janet
|
||||||
|
is considered a byte, so PEGs will work on any string of bytes. No special meaning is
|
||||||
|
given to the 0 byte, or the string terminator in many languages.
|
||||||
|
|
||||||
|
| Pattern | Alias | What it Matches |
|
||||||
|
| string ("cat") | | The literal string. |
|
||||||
|
| integer (3) | | Matches a number of characters, and advances that many characters. If negative, matches if not that many characters and does not advance. For example, -1 will match the end of a string |
|
||||||
|
| `(range "az" "AZ")` | | Matches characters in a range and advances 1 character. Multiple ranges can be combined together. |
|
||||||
|
| `(set "abcd")` | | Match any character in the argument string. Advances 1 character. |
|
||||||
|
|
||||||
|
## Combining Patterns
|
||||||
|
|
||||||
|
These primitve patterns are combined with a few specials to match a wide number of languages.
|
||||||
|
|
||||||
|
|
||||||
|
## Grammars and Recursion
|
||||||
|
|
||||||
|
Parsing Expression Grammars try to match an input text with a pattern in a greedy manner.
|
||||||
|
This means that if a rule fails to match, that rule will fail and not try again. The only
|
||||||
|
backtracking provided in a peg is provided by the `(choice x y z ...)` special, which will
|
||||||
|
try rules in order until one succeeds, and the whole pattern succeeds. If no sub pattern
|
||||||
|
succeeds, then the whole pattern fails. Note that this means that the order of `x y z` in choice
|
||||||
|
DOES matter. If y matches everything that z matches, z will never succeed.
|
198
src/core/peg.c
198
src/core/peg.c
@ -455,17 +455,35 @@ static uint32_t emit_constant(Builder *b, Janet c) {
|
|||||||
return cindex;
|
return cindex;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Reserve space in bytecode for a rule. When a special emits a rule,
|
||||||
|
* it must place that rule immediately on the bytecode stack. This lets
|
||||||
|
* the compiler know where the rule is going to be before it is complete,
|
||||||
|
* allowing recursive rules. */
|
||||||
|
typedef struct {
|
||||||
|
Builder *builder;
|
||||||
|
uint32_t index;
|
||||||
|
int32_t size;
|
||||||
|
} Reserve;
|
||||||
|
|
||||||
|
static Reserve reserve(Builder *b, int32_t size) {
|
||||||
|
Reserve r;
|
||||||
|
r.index = janet_v_count(b->bytecode);
|
||||||
|
r.builder = b;
|
||||||
|
r.size = size;
|
||||||
|
for (int32_t i = 0; i < size; i++)
|
||||||
|
janet_v_push(b->bytecode, 0);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
/* Emit a rule in the builder. Returns the index of the new rule */
|
/* Emit a rule in the builder. Returns the index of the new rule */
|
||||||
static uint32_t emit_rule(Builder *b, uint32_t op, int32_t n, const uint32_t *body) {
|
static void emit_rule(Reserve r, int32_t op, int32_t n, const uint32_t *body) {
|
||||||
uint32_t next_rule = janet_v_count(b->bytecode);
|
janet_assert(r.size == n + 1, "bad reserve");
|
||||||
janet_v_push(b->bytecode, op);
|
r.builder->bytecode[r.index] = op;
|
||||||
for (int32_t i = 0; i < n; i++)
|
memcpy(r.builder->bytecode + r.index + 1, body, n * sizeof(uint32_t));
|
||||||
janet_v_push(b->bytecode, body[i]);
|
|
||||||
return next_rule;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* For RULE_LITERAL */
|
/* For RULE_LITERAL */
|
||||||
static uint32_t emit_bytes(Builder *b, uint32_t op, int32_t len, const uint8_t *bytes) {
|
static void emit_bytes(Builder *b, uint32_t op, int32_t len, const uint8_t *bytes) {
|
||||||
uint32_t next_rule = janet_v_count(b->bytecode);
|
uint32_t next_rule = janet_v_count(b->bytecode);
|
||||||
janet_v_push(b->bytecode, op);
|
janet_v_push(b->bytecode, op);
|
||||||
janet_v_push(b->bytecode, len);
|
janet_v_push(b->bytecode, len);
|
||||||
@ -473,20 +491,19 @@ static uint32_t emit_bytes(Builder *b, uint32_t op, int32_t len, const uint8_t *
|
|||||||
for (int32_t i = 0; i < words; i++)
|
for (int32_t i = 0; i < words; i++)
|
||||||
janet_v_push(b->bytecode, 0);
|
janet_v_push(b->bytecode, 0);
|
||||||
memcpy(b->bytecode + next_rule + 2, bytes, len);
|
memcpy(b->bytecode + next_rule + 2, bytes, len);
|
||||||
return next_rule;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* For fixed arity rules of arities 1, 2, and 3 */
|
/* For fixed arity rules of arities 1, 2, and 3 */
|
||||||
static uint32_t emit_1(Builder *b, uint32_t op, uint32_t arg) {
|
static void emit_1(Reserve r, uint32_t op, uint32_t arg) {
|
||||||
return emit_rule(b, op, 1, &arg);
|
return emit_rule(r, op, 1, &arg);
|
||||||
}
|
}
|
||||||
static uint32_t emit_2(Builder *b, uint32_t op, uint32_t arg1, uint32_t arg2) {
|
static void emit_2(Reserve r, uint32_t op, uint32_t arg1, uint32_t arg2) {
|
||||||
uint32_t arr[2] = {arg1, arg2};
|
uint32_t arr[2] = {arg1, arg2};
|
||||||
return emit_rule(b, op, 2, arr);
|
return emit_rule(r, op, 2, arr);
|
||||||
}
|
}
|
||||||
static uint32_t emit_3(Builder *b, uint32_t op, uint32_t arg1, uint32_t arg2, uint32_t arg3) {
|
static void emit_3(Reserve r, uint32_t op, uint32_t arg1, uint32_t arg2, uint32_t arg3) {
|
||||||
uint32_t arr[3] = {arg1, arg2, arg3};
|
uint32_t arr[3] = {arg1, arg2, arg3};
|
||||||
return emit_rule(b, op, 3, arr);
|
return emit_rule(r, op, 3, arr);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -560,43 +577,47 @@ static void bitmap_set(uint32_t *bitmap, uint8_t c) {
|
|||||||
bitmap[c >> 5] |= ((uint32_t)1) << (c & 0x1F);
|
bitmap[c >> 5] |= ((uint32_t)1) << (c & 0x1F);
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint32_t spec_range(Builder *b, int32_t argc, const Janet *argv) {
|
static void spec_range(Builder *b, int32_t argc, const Janet *argv) {
|
||||||
peg_arity(b, argc, 1, -1);
|
peg_arity(b, argc, 1, -1);
|
||||||
if (argc == 1) {
|
if (argc == 1) {
|
||||||
|
Reserve r = reserve(b, 2);
|
||||||
const uint8_t *str = peg_getrange(b, argv[0]);
|
const uint8_t *str = peg_getrange(b, argv[0]);
|
||||||
uint32_t arg = str[0] | (str[1] << 16);
|
uint32_t arg = str[0] | (str[1] << 16);
|
||||||
return emit_1(b, RULE_RANGE, arg);
|
emit_1(r, RULE_RANGE, arg);
|
||||||
} else {
|
} else {
|
||||||
/* Compile as a set */
|
/* Compile as a set */
|
||||||
|
Reserve r = reserve(b, 9);
|
||||||
uint32_t bitmap[8] = {0};
|
uint32_t bitmap[8] = {0};
|
||||||
for (int32_t i = 0; i < argc; i++) {
|
for (int32_t i = 0; i < argc; i++) {
|
||||||
const uint8_t *str = peg_getrange(b, argv[i]);
|
const uint8_t *str = peg_getrange(b, argv[i]);
|
||||||
for (uint32_t c = str[0]; c <= str[1]; c++)
|
for (uint32_t c = str[0]; c <= str[1]; c++)
|
||||||
bitmap_set(bitmap, c);
|
bitmap_set(bitmap, c);
|
||||||
}
|
}
|
||||||
return emit_rule(b, RULE_SET, 8, bitmap);
|
emit_rule(r, RULE_SET, 8, bitmap);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint32_t spec_set(Builder *b, int32_t argc, const Janet *argv) {
|
static void spec_set(Builder *b, int32_t argc, const Janet *argv) {
|
||||||
peg_fixarity(b, argc, 1);
|
peg_fixarity(b, argc, 1);
|
||||||
|
Reserve r = reserve(b, 9);
|
||||||
const uint8_t *str = peg_getset(b, argv[0]);
|
const uint8_t *str = peg_getset(b, argv[0]);
|
||||||
uint32_t bitmap[8] = {0};
|
uint32_t bitmap[8] = {0};
|
||||||
for (int32_t i = 0; i < janet_string_length(str); i++)
|
for (int32_t i = 0; i < janet_string_length(str); i++)
|
||||||
bitmap_set(bitmap, str[i]);
|
bitmap_set(bitmap, str[i]);
|
||||||
return emit_rule(b, RULE_SET, 8, bitmap);
|
emit_rule(r, RULE_SET, 8, bitmap);
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint32_t spec_look(Builder *b, int32_t argc, const Janet *argv) {
|
static void spec_look(Builder *b, int32_t argc, const Janet *argv) {
|
||||||
peg_arity(b, argc, 1, 2);
|
peg_arity(b, argc, 1, 2);
|
||||||
|
Reserve r = reserve(b, 3);
|
||||||
int32_t rulearg = argc == 2 ? 1 : 0;
|
int32_t rulearg = argc == 2 ? 1 : 0;
|
||||||
int32_t offset = argc == 2 ? peg_getinteger(b, argv[0]) : 0;
|
int32_t offset = argc == 2 ? peg_getinteger(b, argv[0]) : 0;
|
||||||
uint32_t subrule = compile1(b, argv[rulearg]);
|
uint32_t subrule = compile1(b, argv[rulearg]);
|
||||||
return emit_2(b, RULE_LOOK, (uint32_t) offset, subrule);
|
emit_2(r, RULE_LOOK, (uint32_t) offset, subrule);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Rule of the form [len, rules...] */
|
/* Rule of the form [len, rules...] */
|
||||||
static uint32_t spec_variadic(Builder *b, int32_t argc, const Janet *argv, uint32_t op) {
|
static void spec_variadic(Builder *b, int32_t argc, const Janet *argv, uint32_t op) {
|
||||||
uint32_t rule = janet_v_count(b->bytecode);
|
uint32_t rule = janet_v_count(b->bytecode);
|
||||||
janet_v_push(b->bytecode, op);
|
janet_v_push(b->bytecode, op);
|
||||||
janet_v_push(b->bytecode, argc);
|
janet_v_push(b->bytecode, argc);
|
||||||
@ -606,128 +627,138 @@ static uint32_t spec_variadic(Builder *b, int32_t argc, const Janet *argv, uint3
|
|||||||
uint32_t rulei = compile1(b, argv[i]);
|
uint32_t rulei = compile1(b, argv[i]);
|
||||||
b->bytecode[rule + 2 + i] = rulei;
|
b->bytecode[rule + 2 + i] = rulei;
|
||||||
}
|
}
|
||||||
return rule;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint32_t spec_choice(Builder *b, int32_t argc, const Janet *argv) {
|
static void spec_choice(Builder *b, int32_t argc, const Janet *argv) {
|
||||||
return spec_variadic(b, argc, argv, RULE_CHOICE);
|
spec_variadic(b, argc, argv, RULE_CHOICE);
|
||||||
}
|
}
|
||||||
static uint32_t spec_sequence(Builder *b, int32_t argc, const Janet *argv) {
|
static void spec_sequence(Builder *b, int32_t argc, const Janet *argv) {
|
||||||
return spec_variadic(b, argc, argv, RULE_SEQUENCE);
|
spec_variadic(b, argc, argv, RULE_SEQUENCE);
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint32_t spec_ifnot(Builder *b, int32_t argc, const Janet *argv) {
|
static void spec_ifnot(Builder *b, int32_t argc, const Janet *argv) {
|
||||||
peg_fixarity(b, argc, 2);
|
peg_fixarity(b, argc, 2);
|
||||||
|
Reserve r = reserve(b, 3);
|
||||||
uint32_t rule_a = compile1(b, argv[0]);
|
uint32_t rule_a = compile1(b, argv[0]);
|
||||||
uint32_t rule_b = compile1(b, argv[1]);
|
uint32_t rule_b = compile1(b, argv[1]);
|
||||||
return emit_2(b, RULE_IFNOT, rule_a, rule_b);
|
emit_2(r, RULE_IFNOT, rule_a, rule_b);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Rule of the form [rule] */
|
/* Rule of the form [rule] */
|
||||||
static uint32_t spec_onerule(Builder *b, int32_t argc, const Janet *argv, uint32_t op) {
|
static void spec_onerule(Builder *b, int32_t argc, const Janet *argv, uint32_t op) {
|
||||||
peg_fixarity(b, argc, 1);
|
peg_fixarity(b, argc, 1);
|
||||||
|
Reserve r = reserve(b, 2);
|
||||||
uint32_t rule = compile1(b, argv[0]);
|
uint32_t rule = compile1(b, argv[0]);
|
||||||
return emit_1(b, op, rule);
|
emit_1(r, op, rule);
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint32_t spec_not(Builder *b, int32_t argc, const Janet *argv) {
|
static void spec_not(Builder *b, int32_t argc, const Janet *argv) {
|
||||||
return spec_onerule(b, argc, argv, RULE_NOT);
|
spec_onerule(b, argc, argv, RULE_NOT);
|
||||||
}
|
}
|
||||||
static uint32_t spec_capture(Builder *b, int32_t argc, const Janet *argv) {
|
static void spec_capture(Builder *b, int32_t argc, const Janet *argv) {
|
||||||
return spec_onerule(b, argc, argv, RULE_CAPTURE);
|
spec_onerule(b, argc, argv, RULE_CAPTURE);
|
||||||
}
|
}
|
||||||
static uint32_t spec_substitute(Builder *b, int32_t argc, const Janet *argv) {
|
static void spec_substitute(Builder *b, int32_t argc, const Janet *argv) {
|
||||||
return spec_onerule(b, argc, argv, RULE_SUBSTITUTE);
|
spec_onerule(b, argc, argv, RULE_SUBSTITUTE);
|
||||||
}
|
}
|
||||||
static uint32_t spec_group(Builder *b, int32_t argc, const Janet *argv) {
|
static void spec_group(Builder *b, int32_t argc, const Janet *argv) {
|
||||||
return spec_onerule(b, argc, argv, RULE_GROUP);
|
spec_onerule(b, argc, argv, RULE_GROUP);
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint32_t spec_exponent(Builder *b, int32_t argc, const Janet *argv) {
|
static void spec_exponent(Builder *b, int32_t argc, const Janet *argv) {
|
||||||
peg_fixarity(b, argc, 2);
|
peg_fixarity(b, argc, 2);
|
||||||
|
Reserve r = reserve(b, 4);
|
||||||
int32_t n = peg_getinteger(b, argv[1]);
|
int32_t n = peg_getinteger(b, argv[1]);
|
||||||
uint32_t subrule = compile1(b, argv[0]);
|
uint32_t subrule = compile1(b, argv[0]);
|
||||||
if (n < 0) {
|
if (n < 0) {
|
||||||
return emit_3(b, RULE_BETWEEN, 0, -n, subrule);
|
emit_3(r, RULE_BETWEEN, 0, -n, subrule);
|
||||||
} else {
|
} else {
|
||||||
return emit_3(b, RULE_BETWEEN, n, UINT32_MAX, subrule);
|
emit_3(r, RULE_BETWEEN, n, UINT32_MAX, subrule);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint32_t spec_between(Builder *b, int32_t argc, const Janet *argv) {
|
static void spec_between(Builder *b, int32_t argc, const Janet *argv) {
|
||||||
peg_fixarity(b, argc, 3);
|
peg_fixarity(b, argc, 3);
|
||||||
|
Reserve r = reserve(b, 4);
|
||||||
int32_t lo = peg_getnat(b, argv[0]);
|
int32_t lo = peg_getnat(b, argv[0]);
|
||||||
int32_t hi = peg_getnat(b, argv[1]);
|
int32_t hi = peg_getnat(b, argv[1]);
|
||||||
uint32_t subrule = compile1(b, argv[2]);
|
uint32_t subrule = compile1(b, argv[2]);
|
||||||
return emit_3(b, RULE_BETWEEN, lo, hi, subrule);
|
emit_3(r, RULE_BETWEEN, lo, hi, subrule);
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint32_t spec_position(Builder *b, int32_t argc, const Janet *argv) {
|
static void spec_position(Builder *b, int32_t argc, const Janet *argv) {
|
||||||
peg_fixarity(b, argc, 0);
|
peg_fixarity(b, argc, 0);
|
||||||
|
Reserve r = reserve(b, 1);
|
||||||
(void) argv;
|
(void) argv;
|
||||||
return emit_rule(b, RULE_POSITION, 0, NULL);
|
emit_rule(r, RULE_POSITION, 0, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint32_t spec_reference(Builder *b, int32_t argc, const Janet *argv) {
|
static void spec_reference(Builder *b, int32_t argc, const Janet *argv) {
|
||||||
peg_fixarity(b, argc, 1);
|
peg_fixarity(b, argc, 1);
|
||||||
|
Reserve r = reserve(b, 2);
|
||||||
int32_t index = peg_getinteger(b, argv[0]);
|
int32_t index = peg_getinteger(b, argv[0]);
|
||||||
if (index < 0) {
|
if (index < 0) {
|
||||||
return emit_1(b, RULE_BACKINDEX, -index);
|
emit_1(r, RULE_BACKINDEX, -index);
|
||||||
} else {
|
} else {
|
||||||
return emit_1(b, RULE_REPINDEX, index);
|
emit_1(r, RULE_REPINDEX, index);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint32_t spec_argument(Builder *b, int32_t argc, const Janet *argv) {
|
static void spec_argument(Builder *b, int32_t argc, const Janet *argv) {
|
||||||
peg_fixarity(b, argc, 1);
|
peg_fixarity(b, argc, 1);
|
||||||
int32_t index = peg_getinteger(b, argv[0]);
|
Reserve r = reserve(b, 2);
|
||||||
if (index < 0)
|
int32_t index = peg_getnat(b, argv[0]);
|
||||||
peg_panicf(b, "argument index must be natural number, got %v", argv[0]);
|
emit_1(r, RULE_ARGUMENT, index);
|
||||||
return emit_1(b, RULE_ARGUMENT, index);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint32_t spec_constant(Builder *b, int32_t argc, const Janet *argv) {
|
static void spec_constant(Builder *b, int32_t argc, const Janet *argv) {
|
||||||
janet_fixarity(argc, 1);
|
janet_fixarity(argc, 1);
|
||||||
return emit_1(b, RULE_CONSTANT, emit_constant(b, argv[0]));
|
Reserve r = reserve(b, 2);
|
||||||
|
emit_1(r, RULE_CONSTANT, emit_constant(b, argv[0]));
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint32_t spec_replace(Builder *b, int32_t argc, const Janet *argv) {
|
static void spec_replace(Builder *b, int32_t argc, const Janet *argv) {
|
||||||
peg_fixarity(b, argc, 2);
|
peg_fixarity(b, argc, 2);
|
||||||
|
Reserve r = reserve(b, 3);
|
||||||
uint32_t subrule = compile1(b, argv[0]);
|
uint32_t subrule = compile1(b, argv[0]);
|
||||||
uint32_t constant = emit_constant(b, argv[1]);
|
uint32_t constant = emit_constant(b, argv[1]);
|
||||||
return emit_2(b, RULE_REPLACE, subrule, constant);
|
emit_2(r, RULE_REPLACE, subrule, constant);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* For some and any, really just short-hand for (^ rule n) */
|
/* For some and any, really just short-hand for (^ rule n) */
|
||||||
static uint32_t spec_repeater(Builder *b, int32_t argc, const Janet *argv, int32_t min) {
|
static void spec_repeater(Builder *b, int32_t argc, const Janet *argv, int32_t min) {
|
||||||
peg_fixarity(b, argc, 1);
|
peg_fixarity(b, argc, 1);
|
||||||
|
Reserve r = reserve(b, 4);
|
||||||
uint32_t subrule = compile1(b, argv[0]);
|
uint32_t subrule = compile1(b, argv[0]);
|
||||||
return emit_3(b, RULE_BETWEEN, min, UINT32_MAX, subrule);
|
emit_3(r, RULE_BETWEEN, min, UINT32_MAX, subrule);
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint32_t spec_some(Builder *b, int32_t argc, const Janet *argv) {
|
static void spec_some(Builder *b, int32_t argc, const Janet *argv) {
|
||||||
return spec_repeater(b, argc, argv, 1);
|
spec_repeater(b, argc, argv, 1);
|
||||||
}
|
}
|
||||||
static uint32_t spec_any(Builder *b, int32_t argc, const Janet *argv) {
|
static void spec_any(Builder *b, int32_t argc, const Janet *argv) {
|
||||||
return spec_repeater(b, argc, argv, 0);
|
spec_repeater(b, argc, argv, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint32_t spec_atleast(Builder *b, int32_t argc, const Janet *argv) {
|
static void spec_atleast(Builder *b, int32_t argc, const Janet *argv) {
|
||||||
peg_fixarity(b, argc, 2);
|
peg_fixarity(b, argc, 2);
|
||||||
|
Reserve r = reserve(b, 4);
|
||||||
int32_t n = peg_getnat(b, argv[0]);
|
int32_t n = peg_getnat(b, argv[0]);
|
||||||
uint32_t subrule = compile1(b, argv[1]);
|
uint32_t subrule = compile1(b, argv[1]);
|
||||||
return emit_3(b, RULE_BETWEEN, n, UINT32_MAX, subrule);
|
emit_3(r, RULE_BETWEEN, n, UINT32_MAX, subrule);
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint32_t spec_atmost(Builder *b, int32_t argc, const Janet *argv) {
|
static void spec_atmost(Builder *b, int32_t argc, const Janet *argv) {
|
||||||
peg_fixarity(b, argc, 2);
|
peg_fixarity(b, argc, 2);
|
||||||
|
Reserve r = reserve(b, 4);
|
||||||
int32_t n = peg_getnat(b, argv[0]);
|
int32_t n = peg_getnat(b, argv[0]);
|
||||||
uint32_t subrule = compile1(b, argv[1]);
|
uint32_t subrule = compile1(b, argv[1]);
|
||||||
return emit_3(b, RULE_BETWEEN, 0, n, subrule);
|
emit_3(r, RULE_BETWEEN, 0, n, subrule);
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint32_t spec_matchtime(Builder *b, int32_t argc, const Janet *argv) {
|
static void spec_matchtime(Builder *b, int32_t argc, const Janet *argv) {
|
||||||
peg_fixarity(b, argc, 2);
|
peg_fixarity(b, argc, 2);
|
||||||
|
Reserve r = reserve(b, 3);
|
||||||
uint32_t subrule = compile1(b, argv[0]);
|
uint32_t subrule = compile1(b, argv[0]);
|
||||||
Janet fun = argv[1];
|
Janet fun = argv[1];
|
||||||
if (!janet_checktype(fun, JANET_FUNCTION) &&
|
if (!janet_checktype(fun, JANET_FUNCTION) &&
|
||||||
@ -735,11 +766,11 @@ static uint32_t spec_matchtime(Builder *b, int32_t argc, const Janet *argv) {
|
|||||||
peg_panicf(b, "expected function|cfunction, got %v", fun);
|
peg_panicf(b, "expected function|cfunction, got %v", fun);
|
||||||
}
|
}
|
||||||
uint32_t cindex = emit_constant(b, fun);
|
uint32_t cindex = emit_constant(b, fun);
|
||||||
return emit_2(b, RULE_MATCHTIME, subrule, cindex);
|
emit_2(r, RULE_MATCHTIME, subrule, cindex);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Special compiler form */
|
/* Special compiler form */
|
||||||
typedef uint32_t (*Special)(Builder *b, int32_t argc, const Janet *argv);
|
typedef void (*Special)(Builder *b, int32_t argc, const Janet *argv);
|
||||||
typedef struct {
|
typedef struct {
|
||||||
const char *name;
|
const char *name;
|
||||||
Special special;
|
Special special;
|
||||||
@ -797,7 +828,11 @@ static uint32_t compile1(Builder *b, Janet peg) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* The final rule to return */
|
/* The final rule to return */
|
||||||
uint32_t rule;
|
uint32_t rule = janet_v_count(b->bytecode);
|
||||||
|
if (!janet_checktype(peg, JANET_KEYWORD) &&
|
||||||
|
!janet_checktype(peg, JANET_STRUCT)) {
|
||||||
|
janet_table_put(b->memoized, peg, janet_wrap_number(rule));
|
||||||
|
}
|
||||||
|
|
||||||
switch (janet_type(peg)) {
|
switch (janet_type(peg)) {
|
||||||
default:
|
default:
|
||||||
@ -806,10 +841,11 @@ static uint32_t compile1(Builder *b, Janet peg) {
|
|||||||
case JANET_NUMBER:
|
case JANET_NUMBER:
|
||||||
{
|
{
|
||||||
int32_t n = peg_getinteger(b, peg);
|
int32_t n = peg_getinteger(b, peg);
|
||||||
|
Reserve r = reserve(b, 2);
|
||||||
if (n < 0) {
|
if (n < 0) {
|
||||||
rule = emit_1(b, RULE_NOTNCHAR, -n);
|
emit_1(r, RULE_NOTNCHAR, -n);
|
||||||
} else {
|
} else {
|
||||||
rule = emit_1(b, RULE_NCHAR, n);
|
emit_1(r, RULE_NCHAR, n);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -817,7 +853,7 @@ static uint32_t compile1(Builder *b, Janet peg) {
|
|||||||
{
|
{
|
||||||
const uint8_t *str = janet_unwrap_string(peg);
|
const uint8_t *str = janet_unwrap_string(peg);
|
||||||
int32_t len = janet_string_length(str);
|
int32_t len = janet_string_length(str);
|
||||||
rule = emit_bytes(b, RULE_LITERAL, len, str);
|
emit_bytes(b, RULE_LITERAL, len, str);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case JANET_KEYWORD:
|
case JANET_KEYWORD:
|
||||||
@ -826,11 +862,7 @@ static uint32_t compile1(Builder *b, Janet peg) {
|
|||||||
if (janet_checktype(check, JANET_NIL))
|
if (janet_checktype(check, JANET_NIL))
|
||||||
peg_panicf(b, "unknown rule");
|
peg_panicf(b, "unknown rule");
|
||||||
rule = compile1(b, check);
|
rule = compile1(b, check);
|
||||||
/* We don't want to memoize references, as they will become invalid
|
break;
|
||||||
* if we go out of scope */
|
|
||||||
b->depth++;
|
|
||||||
b->form = old_form;
|
|
||||||
return rule;
|
|
||||||
}
|
}
|
||||||
case JANET_STRUCT:
|
case JANET_STRUCT:
|
||||||
{
|
{
|
||||||
@ -859,18 +891,14 @@ static uint32_t compile1(Builder *b, Janet peg) {
|
|||||||
sym);
|
sym);
|
||||||
if (!sp)
|
if (!sp)
|
||||||
peg_panicf(b, "unknown special %S", sym);
|
peg_panicf(b, "unknown special %S", sym);
|
||||||
rule = sp->special(b, len - 1, tup + 1);
|
sp->special(b, len - 1, tup + 1);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Add rule to memoized table */
|
|
||||||
janet_table_put(b->memoized, peg, janet_wrap_number(rule));
|
|
||||||
|
|
||||||
/* Increase depth again */
|
/* Increase depth again */
|
||||||
b->depth++;
|
b->depth++;
|
||||||
b->form = old_form;
|
b->form = old_form;
|
||||||
|
|
||||||
return rule;
|
return rule;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -278,4 +278,13 @@
|
|||||||
(check-deep scanner "-1.3e-7" @[-1.3e-7])
|
(check-deep scanner "-1.3e-7" @[-1.3e-7])
|
||||||
(check-deep scanner "123A" nil)
|
(check-deep scanner "123A" nil)
|
||||||
|
|
||||||
|
# Recursive grammars
|
||||||
|
|
||||||
|
(def g '{:main (+ (* "a" :main "b") "c")})
|
||||||
|
|
||||||
|
(check-match g "c" true)
|
||||||
|
(check-match g "acb" true)
|
||||||
|
(check-match g "aacbb" true)
|
||||||
|
(check-match g "aadbb" false)
|
||||||
|
|
||||||
(end-suite)
|
(end-suite)
|
||||||
|
Loading…
Reference in New Issue
Block a user