mirror of
https://github.com/janet-lang/janet
synced 2024-11-18 14:44:48 +00:00
Initial peg implementation. Tree walk interpretted with
no captures, so not yet ready.
This commit is contained in:
parent
84fb07dd5a
commit
40845b5c1b
@ -761,6 +761,7 @@ JanetTable *janet_core_env(void) {
|
||||
janet_lib_debug(env);
|
||||
janet_lib_string(env);
|
||||
janet_lib_marsh(env);
|
||||
janet_lib_peg(env);
|
||||
#ifdef JANET_ASSEMBLER
|
||||
janet_lib_asm(env);
|
||||
#endif
|
||||
|
276
src/core/peg.c
Normal file
276
src/core/peg.c
Normal file
@ -0,0 +1,276 @@
|
||||
/*
|
||||
* Copyright (c) 2019 Calvin Rose
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal in the Software without restriction, including without limitation the
|
||||
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
* sell copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <janet/janet.h>
|
||||
#include <string.h>
|
||||
#include "util.h"
|
||||
|
||||
/* TODO
|
||||
* - tail recursion in patterns (allow for self referential patterns that don't decrease depth)
|
||||
* - Captures - need to account for possible (likely) recursion and not overwrite previous captures
|
||||
* - Compilation - compile peg to binary form - one grammar, patterns reference each other by index
|
||||
* and bytecode "opcodes" identify primitive patterns and pattern "constructors". Main pattern is
|
||||
* pattern index 0.
|
||||
* - Investigate more primitive pattern types - Kleene star and variations.
|
||||
* - Possibly allow referencing captures from grammars or arbitrary code execution in
|
||||
* patterns for flexible usage. */
|
||||
|
||||
/* My flags man */
|
||||
#define PEG_CAPTURE 0x1
|
||||
|
||||
/* Hold captured patterns and match state */
|
||||
typedef struct {
|
||||
int32_t depth;
|
||||
const uint8_t *text_start;
|
||||
const uint8_t *text_end;
|
||||
JanetTable *grammar;
|
||||
int flags;
|
||||
} State;
|
||||
|
||||
/* Forward declaration */
|
||||
static int32_t match(State *s, Janet peg, const uint8_t *text);
|
||||
|
||||
/* Special matcher form */
|
||||
typedef int32_t (*Matcher)(State *s, int32_t argc, const Janet *argv, const uint8_t *text);
|
||||
typedef struct {
|
||||
const char *name;
|
||||
Matcher matcher;
|
||||
} MatcherPair;
|
||||
|
||||
/* Match a character range */
|
||||
int32_t match_range(State *s, int32_t argc, const Janet *argv, const uint8_t *text) {
|
||||
if (s->text_end <= text)
|
||||
return -1;
|
||||
for (int32_t i = 0; i < argc; i++) {
|
||||
const uint8_t *range = janet_getstring(argv, i);
|
||||
int32_t length = janet_string_length(range);
|
||||
if (length != 2) janet_panicf("arguments to range must have length 2");
|
||||
uint8_t lo = range[0];
|
||||
uint8_t hi = range[1];
|
||||
if (text[0] >= lo && text[0] <= hi) return 1;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Match 1 of any character in argv[0] */
|
||||
int32_t match_set(State *s, int32_t argc, const Janet *argv, const uint8_t *text) {
|
||||
janet_fixarity(argc, 1);
|
||||
const uint8_t *set = janet_getstring(argv, 0);
|
||||
int32_t len = janet_string_length(set);
|
||||
if (s->text_end <= text) return -1;
|
||||
for (int32_t i = 0; i < len; i++)
|
||||
if (set[i] == text[0]) return 1;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Match the first of argv[0], argv[1], argv[2], ... */
|
||||
int32_t match_choice(State *s, int32_t argc, const Janet *argv, const uint8_t *text) {
|
||||
for (int32_t i = 0; i < argc; i++) {
|
||||
int32_t result = match(s, argv[i], text);
|
||||
if (result >= 0) return result;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Match argv[0] then argv[1] then argv[2] ... Fail if any match fails. */
|
||||
int32_t match_sequence(State *s, int32_t argc, const Janet *argv, const uint8_t *text) {
|
||||
int32_t traversed = 0;
|
||||
for (int32_t i = 0; i < argc; i++) {
|
||||
if (text + traversed >= s->text_end) return -1;
|
||||
int32_t result = match(s, argv[i], text + traversed);
|
||||
if (result < 0) return -1;
|
||||
traversed += result;
|
||||
}
|
||||
return traversed;
|
||||
}
|
||||
|
||||
/* Match argv[0] if not argv[1] */
|
||||
int32_t match_minus(State *s, int32_t argc, const Janet *argv, const uint8_t *text) {
|
||||
janet_fixarity(argc, 2);
|
||||
if (match(s, argv[1], text) < 0)
|
||||
return match(s, argv[0], text);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Match zero length if not match argv[0] */
|
||||
int32_t match_not(State *s, int32_t argc, const Janet *argv, const uint8_t *text) {
|
||||
janet_fixarity(argc, 1);
|
||||
if (match(s, argv[0], text) < 0)
|
||||
return 0;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Match 0 length if match argv[0] */
|
||||
int32_t match_lookahead(State *s, int32_t argc, const Janet *argv, const uint8_t *text) {
|
||||
janet_fixarity(argc, 1);
|
||||
return match(s, argv[0], text) >= 0 ? 0 : -1;
|
||||
}
|
||||
|
||||
/* Match at least argv[0] repetitions of argv[1]. Will match as many repetitions as possible. */
|
||||
int32_t match_atleast(State *s, int32_t argc, const Janet *argv, const uint8_t *text) {
|
||||
janet_fixarity(argc, 2);
|
||||
int32_t n = janet_getinteger(argv, 0);
|
||||
int32_t captured = 0;
|
||||
int32_t total_length = 0;
|
||||
int32_t result;
|
||||
/* Greedy match until match fails */
|
||||
while ((result = match(s, argv[1], text + total_length)) > 0) {
|
||||
captured++;
|
||||
total_length += result;
|
||||
}
|
||||
return captured >= n ? total_length : -1;
|
||||
}
|
||||
|
||||
/* Match at most argv[0] repetitions of argv[1]. Will match as many repetitions as possible. */
|
||||
int32_t match_atmost(State *s, int32_t argc, const Janet *argv, const uint8_t *text) {
|
||||
janet_fixarity(argc, 2);
|
||||
int32_t n = janet_getinteger(argv, 0);
|
||||
int32_t captured = 0;
|
||||
int32_t total_length = 0;
|
||||
int32_t result;
|
||||
/* Greedy match until match fails or n captured */
|
||||
while (captured < n && (result = match(s, argv[1], text + total_length)) > 0) {
|
||||
captured++;
|
||||
total_length += result;
|
||||
}
|
||||
/* always matches */
|
||||
return total_length;
|
||||
}
|
||||
|
||||
/* Match between argv[0] and argv[1] repetitions of argv[2]. Will match as many repetitions as possible. */
|
||||
int32_t match_between(State *s, int32_t argc, const Janet *argv, const uint8_t *text) {
|
||||
janet_fixarity(argc, 3);
|
||||
int32_t lo = janet_getinteger(argv, 0);
|
||||
int32_t hi = janet_getinteger(argv, 1);
|
||||
int32_t captured = 0;
|
||||
int32_t total_length = 0;
|
||||
int32_t result;
|
||||
/* Greedy match until match fails or n captured */
|
||||
while (captured < hi && (result = match(s, argv[2], text + total_length)) > 0) {
|
||||
captured++;
|
||||
total_length += result;
|
||||
}
|
||||
/* always matches */
|
||||
return captured >= lo ? total_length : -1;
|
||||
}
|
||||
|
||||
/* Lookup for special forms */
|
||||
static const MatcherPair specials[] = {
|
||||
{"*", match_sequence},
|
||||
{"+", match_choice},
|
||||
{"-", match_minus},
|
||||
{">", match_lookahead},
|
||||
{"at-least", match_atleast},
|
||||
{"at-most", match_atmost},
|
||||
{"between", match_between},
|
||||
{"not", match_not},
|
||||
{"range", match_range},
|
||||
{"set", match_set}
|
||||
};
|
||||
|
||||
/* Check if the string matches the pattern at the given point. Returns a negative number
|
||||
* if no match, else the number of charcters matched against. */
|
||||
static int32_t match(State *s, Janet peg, const uint8_t *text) {
|
||||
switch(janet_type(peg)) {
|
||||
default:
|
||||
janet_panicf("unexpected element in peg: %v", peg);
|
||||
return -1;
|
||||
case JANET_NUMBER:
|
||||
/* Match n characters */
|
||||
{
|
||||
if (!janet_checkint(peg))
|
||||
janet_panicf("numbers in peg must be integers, got %v", peg);
|
||||
int32_t n = janet_unwrap_integer(peg);
|
||||
return (s->text_end >= text + n) ? n : -1;
|
||||
}
|
||||
case JANET_STRING:
|
||||
/* Match a sequence of bytes */
|
||||
{
|
||||
const uint8_t *str = janet_unwrap_string(peg);
|
||||
int32_t len = janet_string_length(str);
|
||||
if (text + len > s->text_end) return 0;
|
||||
return memcmp(text, str, len) ? -1 : len;
|
||||
}
|
||||
case JANET_TUPLE:
|
||||
/* Match a special command */
|
||||
{
|
||||
const Janet *items;
|
||||
int32_t len;
|
||||
janet_indexed_view(peg, &items, &len);
|
||||
janet_arity(len, 1, -1);
|
||||
if (!janet_checktype(items[0], JANET_SYMBOL))
|
||||
janet_panicf("expected symbol for name of command");
|
||||
const uint8_t *sym = janet_unwrap_symbol(items[0]);
|
||||
const MatcherPair *mp = janet_strbinsearch(
|
||||
&specials,
|
||||
sizeof(specials)/sizeof(MatcherPair),
|
||||
sizeof(MatcherPair),
|
||||
sym);
|
||||
if (!mp) janet_panicf("unknown special form %v", peg);
|
||||
if (s->depth-- == 0)
|
||||
janet_panic("recursed too deeply");
|
||||
int32_t result = mp->matcher(s, len - 1, items + 1, text);
|
||||
s->depth++;
|
||||
return result;
|
||||
}
|
||||
case JANET_KEYWORD:
|
||||
/* Look up a rule */
|
||||
return match(s, janet_table_get(s->grammar, peg), text);
|
||||
case JANET_STRUCT:
|
||||
/* Specify a grammar */
|
||||
{
|
||||
JanetTable *grammar = janet_struct_to_table(janet_unwrap_struct(peg));
|
||||
grammar->proto = s->grammar;
|
||||
|
||||
/* Run main rule with grammar set */
|
||||
s->grammar = grammar;
|
||||
int32_t result = match(s, janet_table_get(grammar, janet_ckeywordv("main")), text);
|
||||
s->grammar = grammar->proto;
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* C Functions */
|
||||
|
||||
static Janet cfun_match(int32_t argc, Janet *argv) {
|
||||
janet_fixarity(argc, 2);
|
||||
JanetByteView bytes = janet_getbytes(argv, 1);
|
||||
State s;
|
||||
s.text_start = bytes.bytes;
|
||||
s.text_end = bytes.bytes + bytes.len;
|
||||
s.depth = JANET_RECURSION_GUARD;
|
||||
s.grammar = NULL;
|
||||
int32_t result = match(&s, argv[0], bytes.bytes);
|
||||
return janet_wrap_boolean(result >= 0);
|
||||
}
|
||||
|
||||
static const JanetReg cfuns[] = {
|
||||
{"peg/match", cfun_match, NULL},
|
||||
{NULL, NULL, NULL}
|
||||
};
|
||||
|
||||
/* Load the peg module */
|
||||
void janet_lib_peg(JanetTable *env) {
|
||||
janet_cfuns(env, NULL, cfuns);
|
||||
}
|
@ -67,5 +67,6 @@ void janet_lib_asm(JanetTable *env);
|
||||
#endif
|
||||
void janet_lib_compile(JanetTable *env);
|
||||
void janet_lib_debug(JanetTable *env);
|
||||
void janet_lib_peg(JanetTable *env);
|
||||
|
||||
#endif
|
||||
|
@ -153,4 +153,17 @@
|
||||
(buffer/blit b2 "abcdefg" 5 6)
|
||||
(assert (= (string b2) "joytogjoyto") "buffer/blit 3")
|
||||
|
||||
# Peg
|
||||
|
||||
(def ip-address
|
||||
'{:d (range "09")
|
||||
:0-4 (range "04")
|
||||
:0-5 (range "05")
|
||||
:block (+ (* "25" :0-5) (* "2" :0-4 :d) (* "1" :d :d) (* :d (at-most 1 :d)))
|
||||
:main (* :block (between 3 3 (* "." :block)))})
|
||||
|
||||
(assert (peg/match ip-address "0.0.0.0") "peg/match 1")
|
||||
(assert (peg/match ip-address "1.2.3.4") "peg/match 2")
|
||||
(assert (not (peg/match ip-address "256.2.3.4")) "peg/match 3")
|
||||
|
||||
(end-suite)
|
||||
|
Loading…
Reference in New Issue
Block a user