2019-01-12 00:22:24 +00:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2019 Calvin Rose
|
|
|
|
*
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
|
|
* of this software and associated documentation files (the "Software"), to
|
|
|
|
* deal in the Software without restriction, including without limitation the
|
|
|
|
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
|
|
|
* sell copies of the Software, and to permit persons to whom the Software is
|
|
|
|
* furnished to do so, subject to the following conditions:
|
|
|
|
*
|
|
|
|
* The above copyright notice and this permission notice shall be included in
|
|
|
|
* all copies or substantial portions of the Software.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
|
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
|
|
* IN THE SOFTWARE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <janet/janet.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include "util.h"
|
|
|
|
|
|
|
|
/* TODO
|
2019-01-12 15:16:25 +00:00
|
|
|
* - Capture substitution - allow substitution in captures, perhaps with a buffer in state
|
|
|
|
* that can accumulate custom data.
|
2019-01-12 00:22:24 +00:00
|
|
|
* - Compilation - compile peg to binary form - one grammar, patterns reference each other by index
|
2019-01-12 15:16:25 +00:00
|
|
|
* and bytecode "opcodes" identify primitive patterns and pattern "constructors". Main pattern is
|
|
|
|
* pattern index 0. The logic of patterns would not change much, but we could elide arity checking,
|
|
|
|
* expensive keyword lookups, and unused patterns. We could also combine certain pattern types into
|
|
|
|
* more efficient types. */
|
2019-01-12 00:22:24 +00:00
|
|
|
|
|
|
|
/* Hold captured patterns and match state */
|
|
|
|
typedef struct {
|
|
|
|
int32_t depth;
|
|
|
|
const uint8_t *text_start;
|
|
|
|
const uint8_t *text_end;
|
2019-01-12 15:16:25 +00:00
|
|
|
const uint8_t *subst_end;
|
2019-01-12 00:22:24 +00:00
|
|
|
JanetTable *grammar;
|
2019-01-12 02:09:49 +00:00
|
|
|
JanetArray *captures;
|
2019-01-12 15:16:25 +00:00
|
|
|
JanetBuffer *scratch;
|
2019-01-12 16:04:47 +00:00
|
|
|
const Janet *extrav;
|
|
|
|
int32_t extrac;
|
2019-01-12 00:22:24 +00:00
|
|
|
int flags;
|
|
|
|
} State;
|
|
|
|
|
2019-01-12 15:16:25 +00:00
|
|
|
/* Check if we are inside a substitution capture */
|
|
|
|
#define PEG_SUBSTITUTE 1
|
|
|
|
|
2019-01-12 00:22:24 +00:00
|
|
|
/* Forward declaration */
|
|
|
|
static int32_t match(State *s, Janet peg, const uint8_t *text);
|
|
|
|
|
|
|
|
/* Special matcher form */
|
|
|
|
typedef int32_t (*Matcher)(State *s, int32_t argc, const Janet *argv, const uint8_t *text);
|
2019-01-12 15:16:25 +00:00
|
|
|
|
|
|
|
/* A "Matcher" is a function that is used to match a pattern at an anchored position. It takes some
|
|
|
|
* optional arguments, and returns either the number of bytes matched, or -1 for no match. It can also
|
|
|
|
* append values to the capture array of State *s, panic on bad arguments, and call match recursively. */
|
2019-01-12 00:22:24 +00:00
|
|
|
|
2019-01-12 02:09:49 +00:00
|
|
|
/*
|
|
|
|
* Primitive Pattern Types
|
|
|
|
*/
|
|
|
|
|
2019-01-12 00:22:24 +00:00
|
|
|
/* Match a character range */
|
2019-01-12 15:16:25 +00:00
|
|
|
static int32_t match_range(State *s, int32_t argc, const Janet *argv, const uint8_t *text) {
|
2019-01-12 00:22:24 +00:00
|
|
|
if (s->text_end <= text)
|
|
|
|
return -1;
|
|
|
|
for (int32_t i = 0; i < argc; i++) {
|
|
|
|
const uint8_t *range = janet_getstring(argv, i);
|
|
|
|
int32_t length = janet_string_length(range);
|
|
|
|
if (length != 2) janet_panicf("arguments to range must have length 2");
|
|
|
|
uint8_t lo = range[0];
|
|
|
|
uint8_t hi = range[1];
|
|
|
|
if (text[0] >= lo && text[0] <= hi) return 1;
|
|
|
|
}
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Match 1 of any character in argv[0] */
|
2019-01-12 15:16:25 +00:00
|
|
|
static int32_t match_set(State *s, int32_t argc, const Janet *argv, const uint8_t *text) {
|
2019-01-12 00:22:24 +00:00
|
|
|
janet_fixarity(argc, 1);
|
|
|
|
const uint8_t *set = janet_getstring(argv, 0);
|
|
|
|
int32_t len = janet_string_length(set);
|
|
|
|
if (s->text_end <= text) return -1;
|
|
|
|
for (int32_t i = 0; i < len; i++)
|
|
|
|
if (set[i] == text[0]) return 1;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2019-01-12 15:16:25 +00:00
|
|
|
/*
|
|
|
|
* Look ahead/behind
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* Match 0 length if match argv[0] */
|
|
|
|
static int32_t match_lookahead(State *s, int32_t argc, const Janet *argv, const uint8_t *text) {
|
|
|
|
janet_fixarity(argc, 1);
|
|
|
|
return match(s, argv[0], text) >= 0 ? 0 : -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Match 0 length if match argv[0] at text + offset */
|
|
|
|
static int32_t match_lookoffset(State *s, int32_t argc, const Janet *argv, const uint8_t *text) {
|
|
|
|
janet_fixarity(argc, 2);
|
|
|
|
text += janet_getinteger(argv, 0);
|
|
|
|
if (text < s->text_start || text > s->text_end)
|
|
|
|
return -1;
|
|
|
|
return match(s, argv[1], text) >= 0 ? 0 : -1;
|
|
|
|
}
|
|
|
|
|
2019-01-12 02:09:49 +00:00
|
|
|
/*
|
|
|
|
* Combining Pattern Types
|
|
|
|
*/
|
|
|
|
|
2019-01-12 00:22:24 +00:00
|
|
|
/* Match the first of argv[0], argv[1], argv[2], ... */
|
2019-01-12 15:16:25 +00:00
|
|
|
static int32_t match_choice(State *s, int32_t argc, const Janet *argv, const uint8_t *text) {
|
2019-01-12 00:22:24 +00:00
|
|
|
for (int32_t i = 0; i < argc; i++) {
|
|
|
|
int32_t result = match(s, argv[i], text);
|
|
|
|
if (result >= 0) return result;
|
|
|
|
}
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Match argv[0] then argv[1] then argv[2] ... Fail if any match fails. */
|
2019-01-12 15:16:25 +00:00
|
|
|
static int32_t match_sequence(State *s, int32_t argc, const Janet *argv, const uint8_t *text) {
|
2019-01-12 00:22:24 +00:00
|
|
|
int32_t traversed = 0;
|
|
|
|
for (int32_t i = 0; i < argc; i++) {
|
|
|
|
int32_t result = match(s, argv[i], text + traversed);
|
|
|
|
if (result < 0) return -1;
|
|
|
|
traversed += result;
|
|
|
|
}
|
|
|
|
return traversed;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Match argv[0] if not argv[1] */
|
2019-01-12 15:16:25 +00:00
|
|
|
static int32_t match_minus(State *s, int32_t argc, const Janet *argv, const uint8_t *text) {
|
2019-01-12 00:22:24 +00:00
|
|
|
janet_fixarity(argc, 2);
|
|
|
|
if (match(s, argv[1], text) < 0)
|
|
|
|
return match(s, argv[0], text);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Match zero length if not match argv[0] */
|
2019-01-12 15:16:25 +00:00
|
|
|
static int32_t match_not(State *s, int32_t argc, const Janet *argv, const uint8_t *text) {
|
2019-01-12 00:22:24 +00:00
|
|
|
janet_fixarity(argc, 1);
|
|
|
|
if (match(s, argv[0], text) < 0)
|
|
|
|
return 0;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Match at least argv[0] repetitions of argv[1]. Will match as many repetitions as possible. */
|
2019-01-12 15:16:25 +00:00
|
|
|
static int32_t match_atleast(State *s, int32_t argc, const Janet *argv, const uint8_t *text) {
|
2019-01-12 00:22:24 +00:00
|
|
|
janet_fixarity(argc, 2);
|
|
|
|
int32_t n = janet_getinteger(argv, 0);
|
|
|
|
int32_t captured = 0;
|
|
|
|
int32_t total_length = 0;
|
|
|
|
int32_t result;
|
|
|
|
/* Greedy match until match fails */
|
|
|
|
while ((result = match(s, argv[1], text + total_length)) > 0) {
|
|
|
|
captured++;
|
|
|
|
total_length += result;
|
|
|
|
}
|
|
|
|
return captured >= n ? total_length : -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Match at most argv[0] repetitions of argv[1]. Will match as many repetitions as possible. */
|
2019-01-12 15:16:25 +00:00
|
|
|
static int32_t match_atmost(State *s, int32_t argc, const Janet *argv, const uint8_t *text) {
|
2019-01-12 00:22:24 +00:00
|
|
|
janet_fixarity(argc, 2);
|
|
|
|
int32_t n = janet_getinteger(argv, 0);
|
|
|
|
int32_t captured = 0;
|
|
|
|
int32_t total_length = 0;
|
|
|
|
int32_t result;
|
|
|
|
/* Greedy match until match fails or n captured */
|
|
|
|
while (captured < n && (result = match(s, argv[1], text + total_length)) > 0) {
|
|
|
|
captured++;
|
|
|
|
total_length += result;
|
|
|
|
}
|
|
|
|
/* always matches */
|
|
|
|
return total_length;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Match between argv[0] and argv[1] repetitions of argv[2]. Will match as many repetitions as possible. */
|
2019-01-12 15:16:25 +00:00
|
|
|
static int32_t match_between(State *s, int32_t argc, const Janet *argv, const uint8_t *text) {
|
2019-01-12 00:22:24 +00:00
|
|
|
janet_fixarity(argc, 3);
|
|
|
|
int32_t lo = janet_getinteger(argv, 0);
|
|
|
|
int32_t hi = janet_getinteger(argv, 1);
|
|
|
|
int32_t captured = 0;
|
|
|
|
int32_t total_length = 0;
|
|
|
|
int32_t result;
|
|
|
|
/* Greedy match until match fails or n captured */
|
|
|
|
while (captured < hi && (result = match(s, argv[2], text + total_length)) > 0) {
|
|
|
|
captured++;
|
|
|
|
total_length += result;
|
|
|
|
}
|
|
|
|
/* always matches */
|
|
|
|
return captured >= lo ? total_length : -1;
|
|
|
|
}
|
|
|
|
|
2019-01-12 15:16:25 +00:00
|
|
|
/*
|
|
|
|
* Captures
|
|
|
|
*/
|
|
|
|
|
|
|
|
static void push_capture(State *s, Janet capture, const uint8_t *text, int32_t nbytes) {
|
|
|
|
if (s->flags & PEG_SUBSTITUTE) {
|
|
|
|
/* Substitution mode, append as string to scratch buffer */
|
|
|
|
/* But first append in-between text */
|
|
|
|
janet_buffer_push_bytes(s->scratch, s->subst_end, text - s->subst_end);
|
|
|
|
janet_to_string_b(s->scratch, capture);
|
|
|
|
s->subst_end = text + nbytes;
|
|
|
|
} else {
|
|
|
|
/* Normal mode, append to captures */
|
|
|
|
janet_array_push(s->captures, capture);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-01-12 02:09:49 +00:00
|
|
|
/* Capture a value */
|
2019-01-12 15:16:25 +00:00
|
|
|
static int32_t match_capture(State *s, int32_t argc, const Janet *argv, const uint8_t *text) {
|
2019-01-12 02:09:49 +00:00
|
|
|
janet_fixarity(argc, 1);
|
|
|
|
int32_t result = match(s, argv[0], text);
|
|
|
|
if (result < 0) return -1;
|
2019-01-12 15:16:25 +00:00
|
|
|
push_capture(s, janet_stringv(text, result), text, result);
|
2019-01-12 02:09:49 +00:00
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Capture position */
|
2019-01-12 15:16:25 +00:00
|
|
|
static int32_t match_position(State *s, int32_t argc, const Janet *argv, const uint8_t *text) {
|
2019-01-12 02:09:49 +00:00
|
|
|
janet_fixarity(argc, 0);
|
|
|
|
(void) argv;
|
2019-01-12 15:16:25 +00:00
|
|
|
push_capture(s, janet_wrap_number(text - s->text_start), text, 0);
|
2019-01-12 02:09:49 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Capture group */
|
2019-01-12 15:16:25 +00:00
|
|
|
static int32_t match_group(State *s, int32_t argc, const Janet *argv, const uint8_t *text) {
|
2019-01-12 02:09:49 +00:00
|
|
|
janet_fixarity(argc, 1);
|
|
|
|
int32_t old_count = s->captures->count;
|
2019-01-12 15:16:25 +00:00
|
|
|
int32_t old_flags = s->flags;
|
|
|
|
s->flags &= ~PEG_SUBSTITUTE; /* Turn off substitution mode */
|
2019-01-12 02:09:49 +00:00
|
|
|
int32_t result = match(s, argv[0], text);
|
2019-01-12 15:16:25 +00:00
|
|
|
s->flags = old_flags;
|
2019-01-12 02:09:49 +00:00
|
|
|
if (result < 0) return -1;
|
2019-01-12 15:16:25 +00:00
|
|
|
|
2019-01-12 02:09:49 +00:00
|
|
|
/* Collect sub-captures into an array by popping new values off of the capture stack,
|
2019-01-12 15:16:25 +00:00
|
|
|
* and then putting them in a new array. Then, push the new array back onto the capture stack. */
|
2019-01-12 02:09:49 +00:00
|
|
|
int32_t num_sub_captures = s->captures->count - old_count;
|
|
|
|
JanetArray *sub_captures = janet_array(num_sub_captures);
|
|
|
|
memcpy(sub_captures->data, s->captures->data + old_count, sizeof(Janet) * num_sub_captures);
|
|
|
|
sub_captures->count = num_sub_captures;
|
|
|
|
s->captures->count = old_count;
|
2019-01-12 15:16:25 +00:00
|
|
|
|
|
|
|
push_capture(s, janet_wrap_array(sub_captures), text, result);
|
2019-01-12 02:09:49 +00:00
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Capture a constant */
|
2019-01-12 15:16:25 +00:00
|
|
|
static int32_t match_capture_constant(State *s, int32_t argc, const Janet *argv, const uint8_t *text) {
|
2019-01-12 02:09:49 +00:00
|
|
|
janet_fixarity(argc, 1);
|
2019-01-12 15:16:25 +00:00
|
|
|
push_capture(s, argv[0], text, 0);
|
2019-01-12 02:09:49 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-01-12 16:04:47 +00:00
|
|
|
/* Capture nth extra argument to peg/match */
|
|
|
|
static int32_t match_capture_arg(State *s, int32_t argc, const Janet *argv, const uint8_t *text) {
|
|
|
|
janet_fixarity(argc, 1);
|
|
|
|
int32_t n = janet_gethalfrange(argv, 0, s->extrac, "n");
|
|
|
|
push_capture(s, s->extrav[n], text, 0);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-01-12 15:16:25 +00:00
|
|
|
/* Capture replace */
|
|
|
|
static int32_t match_replace(State *s, int32_t argc, const Janet *argv, const uint8_t *text) {
|
|
|
|
janet_fixarity(argc, 2);
|
|
|
|
int32_t result = match(s, argv[0], text);
|
|
|
|
if (result < 0) return -1;
|
|
|
|
Janet capture;
|
|
|
|
switch (janet_type(argv[1])) {
|
|
|
|
default:
|
|
|
|
capture = argv[1];
|
|
|
|
break;
|
|
|
|
case JANET_STRUCT:
|
|
|
|
capture = janet_struct_get(janet_unwrap_struct(argv[1]), janet_stringv(text, result));
|
|
|
|
break;
|
|
|
|
case JANET_TABLE:
|
|
|
|
capture = janet_table_get(janet_unwrap_table(argv[1]), janet_stringv(text, result));
|
|
|
|
break;
|
|
|
|
/* TODO - add functions, c functions, numbers */
|
|
|
|
}
|
|
|
|
push_capture(s, capture, text, result);
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Substitution capture */
|
|
|
|
static int32_t match_substitute(State *s, int32_t argc, const Janet *argv, const uint8_t *text) {
|
|
|
|
janet_fixarity(argc, 1);
|
|
|
|
|
|
|
|
/* If we were originally in substitution mode, simply pass
|
|
|
|
* argv[0] right through */
|
|
|
|
if (s->flags & PEG_SUBSTITUTE)
|
|
|
|
return match(s, argv[0], text);
|
|
|
|
|
|
|
|
/* Set up scratch */
|
|
|
|
s->scratch->count = 0;
|
|
|
|
s->subst_end = text;
|
|
|
|
|
|
|
|
s->flags |= PEG_SUBSTITUTE;
|
|
|
|
int32_t result = match(s, argv[0], text);
|
|
|
|
s->flags &= ~PEG_SUBSTITUTE;
|
|
|
|
|
|
|
|
if (result < 0) return -1;
|
|
|
|
|
|
|
|
/* Pop scratch to captures */
|
|
|
|
janet_buffer_push_bytes(s->scratch, s->subst_end, text - s->subst_end + result);
|
|
|
|
janet_array_push(s->captures, janet_stringv(s->scratch->data, s->scratch->count));
|
|
|
|
s->scratch->count = 0;
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2019-01-12 00:22:24 +00:00
|
|
|
/* Lookup for special forms */
|
2019-01-12 15:16:25 +00:00
|
|
|
typedef struct {
|
|
|
|
const char *name;
|
|
|
|
Matcher matcher;
|
|
|
|
} MatcherPair;
|
2019-01-12 00:22:24 +00:00
|
|
|
static const MatcherPair specials[] = {
|
|
|
|
{"*", match_sequence},
|
|
|
|
{"+", match_choice},
|
|
|
|
{"-", match_minus},
|
2019-01-12 15:16:25 +00:00
|
|
|
{"/", match_replace},
|
2019-01-12 02:09:49 +00:00
|
|
|
{"<-", match_capture},
|
2019-01-12 16:04:47 +00:00
|
|
|
{"<-arg", match_capture_arg},
|
2019-01-12 02:09:49 +00:00
|
|
|
{"<-c", match_capture_constant},
|
|
|
|
{"<-g", match_group},
|
2019-01-12 15:16:25 +00:00
|
|
|
{"<-p", match_position},
|
|
|
|
{"<-s", match_substitute},
|
2019-01-12 00:22:24 +00:00
|
|
|
{">", match_lookahead},
|
2019-01-12 15:16:25 +00:00
|
|
|
{">>", match_lookoffset},
|
2019-01-12 00:22:24 +00:00
|
|
|
{"at-least", match_atleast},
|
|
|
|
{"at-most", match_atmost},
|
|
|
|
{"between", match_between},
|
|
|
|
{"not", match_not},
|
|
|
|
{"range", match_range},
|
|
|
|
{"set", match_set}
|
|
|
|
};
|
|
|
|
|
|
|
|
/* Check if the string matches the pattern at the given point. Returns a negative number
|
2019-01-12 15:16:25 +00:00
|
|
|
* if no match, else the number of characters matched against. */
|
2019-01-12 00:22:24 +00:00
|
|
|
static int32_t match(State *s, Janet peg, const uint8_t *text) {
|
|
|
|
switch(janet_type(peg)) {
|
|
|
|
default:
|
|
|
|
janet_panicf("unexpected element in peg: %v", peg);
|
|
|
|
return -1;
|
|
|
|
case JANET_NUMBER:
|
|
|
|
/* Match n characters */
|
|
|
|
{
|
|
|
|
if (!janet_checkint(peg))
|
|
|
|
janet_panicf("numbers in peg must be integers, got %v", peg);
|
|
|
|
int32_t n = janet_unwrap_integer(peg);
|
2019-01-12 16:04:47 +00:00
|
|
|
if (n < 0) /* Invert pattern */
|
|
|
|
return (text - n > s->text_end) ? 0 : -1;
|
|
|
|
return (text + n > s->text_end) ? -1 : n;
|
2019-01-12 00:22:24 +00:00
|
|
|
}
|
|
|
|
case JANET_STRING:
|
|
|
|
/* Match a sequence of bytes */
|
|
|
|
{
|
|
|
|
const uint8_t *str = janet_unwrap_string(peg);
|
|
|
|
int32_t len = janet_string_length(str);
|
2019-01-12 15:16:25 +00:00
|
|
|
if (text + len > s->text_end) return -1;
|
2019-01-12 00:22:24 +00:00
|
|
|
return memcmp(text, str, len) ? -1 : len;
|
|
|
|
}
|
|
|
|
case JANET_TUPLE:
|
|
|
|
/* Match a special command */
|
|
|
|
{
|
|
|
|
const Janet *items;
|
|
|
|
int32_t len;
|
|
|
|
janet_indexed_view(peg, &items, &len);
|
|
|
|
janet_arity(len, 1, -1);
|
|
|
|
if (!janet_checktype(items[0], JANET_SYMBOL))
|
|
|
|
janet_panicf("expected symbol for name of command");
|
|
|
|
const uint8_t *sym = janet_unwrap_symbol(items[0]);
|
|
|
|
const MatcherPair *mp = janet_strbinsearch(
|
|
|
|
&specials,
|
|
|
|
sizeof(specials)/sizeof(MatcherPair),
|
|
|
|
sizeof(MatcherPair),
|
|
|
|
sym);
|
2019-01-12 16:04:47 +00:00
|
|
|
if (!mp) janet_panicf("unknown special form %p", peg);
|
2019-01-12 00:22:24 +00:00
|
|
|
if (s->depth-- == 0)
|
|
|
|
janet_panic("recursed too deeply");
|
2019-01-12 15:16:25 +00:00
|
|
|
|
|
|
|
/* Save old state in case of failure */
|
2019-01-12 02:09:49 +00:00
|
|
|
int32_t old_capture_count = s->captures->count;
|
2019-01-12 15:16:25 +00:00
|
|
|
int32_t old_scratch_count = s->scratch->count;
|
|
|
|
const uint8_t *old_subst_end = s->subst_end;
|
|
|
|
|
2019-01-12 00:22:24 +00:00
|
|
|
int32_t result = mp->matcher(s, len - 1, items + 1, text);
|
2019-01-12 15:16:25 +00:00
|
|
|
|
|
|
|
/* Reset old state on failure */
|
|
|
|
if (result < 0) {
|
2019-01-12 02:09:49 +00:00
|
|
|
s->captures->count = old_capture_count;
|
2019-01-12 15:16:25 +00:00
|
|
|
s->scratch->count = old_scratch_count;
|
|
|
|
s->subst_end = old_subst_end;
|
|
|
|
}
|
|
|
|
s->depth++;
|
2019-01-12 00:22:24 +00:00
|
|
|
return result;
|
|
|
|
}
|
|
|
|
case JANET_KEYWORD:
|
|
|
|
/* Look up a rule */
|
|
|
|
return match(s, janet_table_get(s->grammar, peg), text);
|
|
|
|
case JANET_STRUCT:
|
|
|
|
/* Specify a grammar */
|
|
|
|
{
|
|
|
|
JanetTable *grammar = janet_struct_to_table(janet_unwrap_struct(peg));
|
|
|
|
grammar->proto = s->grammar;
|
|
|
|
|
|
|
|
/* Run main rule with grammar set */
|
|
|
|
s->grammar = grammar;
|
|
|
|
int32_t result = match(s, janet_table_get(grammar, janet_ckeywordv("main")), text);
|
|
|
|
s->grammar = grammar->proto;
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* C Functions */
|
|
|
|
|
|
|
|
static Janet cfun_match(int32_t argc, Janet *argv) {
|
2019-01-12 16:04:47 +00:00
|
|
|
janet_arity(argc, 2, -1);
|
2019-01-12 00:22:24 +00:00
|
|
|
JanetByteView bytes = janet_getbytes(argv, 1);
|
2019-01-12 16:04:47 +00:00
|
|
|
int32_t start;
|
2019-01-12 00:22:24 +00:00
|
|
|
State s;
|
2019-01-12 16:04:47 +00:00
|
|
|
if (argc > 2) {
|
|
|
|
start = janet_gethalfrange(argv, 2, bytes.len, "offset");
|
|
|
|
s.extrac = argc - 3;
|
|
|
|
s.extrav = argv + 3;
|
|
|
|
} else {
|
|
|
|
start = 0;
|
|
|
|
s.extrac = 0;
|
|
|
|
s.extrav = NULL;
|
|
|
|
}
|
2019-01-12 15:16:25 +00:00
|
|
|
s.flags = 0;
|
2019-01-12 00:22:24 +00:00
|
|
|
s.text_start = bytes.bytes;
|
|
|
|
s.text_end = bytes.bytes + bytes.len;
|
|
|
|
s.depth = JANET_RECURSION_GUARD;
|
|
|
|
s.grammar = NULL;
|
2019-01-12 15:16:25 +00:00
|
|
|
s.captures = janet_array(0);
|
|
|
|
s.scratch = janet_buffer(0);
|
|
|
|
int32_t result = match(&s, argv[0], bytes.bytes + start);
|
2019-01-12 02:09:49 +00:00
|
|
|
return result >= 0 ? janet_wrap_array(s.captures) : janet_wrap_nil();
|
2019-01-12 00:22:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static const JanetReg cfuns[] = {
|
2019-01-12 15:16:25 +00:00
|
|
|
{"peg/match", cfun_match,
|
|
|
|
"(peg/match peg text [,start=0])\n\n"
|
|
|
|
"Match a Parsing Expression Grammar to a byte string and return an array of captured values. "
|
|
|
|
"Returns nil if text does not match the language defined by peg. The syntax of PEGs are very "
|
|
|
|
"similar to those defined by LPeg, and have similar capabilities. Still WIP."
|
|
|
|
},
|
2019-01-12 00:22:24 +00:00
|
|
|
{NULL, NULL, NULL}
|
|
|
|
};
|
|
|
|
|
|
|
|
/* Load the peg module */
|
|
|
|
void janet_lib_peg(JanetTable *env) {
|
|
|
|
janet_cfuns(env, NULL, cfuns);
|
|
|
|
}
|