From 83f4a11bf36969ea3fe9b4b969e027d3a5bec474 Mon Sep 17 00:00:00 2001 From: Calvin Rose Date: Sat, 12 Jan 2019 11:04:47 -0500 Subject: [PATCH] Add some more tests, add parameterized captures to patterns, and fix some bugs. --- README.md | 1 + src/core/peg.c | 33 ++++++++++++++++++++++++++------- test/suite3.janet | 28 ++++++++++++++++++++++++---- 3 files changed, 51 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index cd2d94d6..98a93809 100644 --- a/README.md +++ b/README.md @@ -46,6 +46,7 @@ Janet makes a good system scripting language, or a language to embed in other pr * Lexical scoping * Imperative programming as well as functional * REPL +* Parsing Expression Grammars built in to the core library * 300+ functions and macros in the core library * Interactive environment with detailed stack traces diff --git a/src/core/peg.c b/src/core/peg.c index a99777df..2e4fa34a 100644 --- a/src/core/peg.c +++ b/src/core/peg.c @@ -42,6 +42,8 @@ typedef struct { JanetTable *grammar; JanetArray *captures; JanetBuffer *scratch; + const Janet *extrav; + int32_t extrac; int flags; } State; @@ -124,7 +126,6 @@ static int32_t match_choice(State *s, int32_t argc, const Janet *argv, const uin static int32_t match_sequence(State *s, int32_t argc, const Janet *argv, const uint8_t *text) { int32_t traversed = 0; for (int32_t i = 0; i < argc; i++) { - if (text + traversed >= s->text_end) return -1; int32_t result = match(s, argv[i], text + traversed); if (result < 0) return -1; traversed += result; @@ -259,6 +260,14 @@ static int32_t match_capture_constant(State *s, int32_t argc, const Janet *argv, return 0; } +/* Capture nth extra argument to peg/match */ +static int32_t match_capture_arg(State *s, int32_t argc, const Janet *argv, const uint8_t *text) { + janet_fixarity(argc, 1); + int32_t n = janet_gethalfrange(argv, 0, s->extrac, "n"); + push_capture(s, s->extrav[n], text, 0); + return 0; +} + /* Capture replace */ static int32_t match_replace(State *s, int32_t argc, const Janet *argv, const uint8_t *text) { janet_fixarity(argc, 2); @@ -319,6 +328,7 @@ static const MatcherPair specials[] = { {"-", match_minus}, {"/", match_replace}, {"<-", match_capture}, + {"<-arg", match_capture_arg}, {"<-c", match_capture_constant}, {"<-g", match_group}, {"<-p", match_position}, @@ -346,7 +356,9 @@ static int32_t match(State *s, Janet peg, const uint8_t *text) { if (!janet_checkint(peg)) janet_panicf("numbers in peg must be integers, got %v", peg); int32_t n = janet_unwrap_integer(peg); - return (s->text_end >= text + n) ? n : -1; + if (n < 0) /* Invert pattern */ + return (text - n > s->text_end) ? 0 : -1; + return (text + n > s->text_end) ? -1 : n; } case JANET_STRING: /* Match a sequence of bytes */ @@ -371,7 +383,7 @@ static int32_t match(State *s, Janet peg, const uint8_t *text) { sizeof(specials)/sizeof(MatcherPair), sizeof(MatcherPair), sym); - if (!mp) janet_panicf("unknown special form %v", peg); + if (!mp) janet_panicf("unknown special form %p", peg); if (s->depth-- == 0) janet_panic("recursed too deeply"); @@ -413,12 +425,19 @@ static int32_t match(State *s, Janet peg, const uint8_t *text) { /* C Functions */ static Janet cfun_match(int32_t argc, Janet *argv) { - janet_arity(argc, 2, 3); + janet_arity(argc, 2, -1); JanetByteView bytes = janet_getbytes(argv, 1); - int32_t start = (argc == 3) ? - start = janet_gethalfrange(argv, 2, bytes.len, "offset") - : 0; + int32_t start; State s; + if (argc > 2) { + start = janet_gethalfrange(argv, 2, bytes.len, "offset"); + s.extrac = argc - 3; + s.extrav = argv + 3; + } else { + start = 0; + s.extrac = 0; + s.extrav = NULL; + } s.flags = 0; s.text_start = bytes.bytes; s.text_end = bytes.bytes + bytes.len; diff --git a/test/suite3.janet b/test/suite3.janet index 03a6cf57..44bc159a 100644 --- a/test/suite3.janet +++ b/test/suite3.janet @@ -155,16 +155,36 @@ # Peg +(defn check-match + [pat text should-match] + (def result (peg/match pat text)) + (assert (= (not should-match) (not result)) text)) + +# Just numbers + +(check-match '(* 4 -1) "abcd" true) +(check-match '(* 4 -1) "abc" false) +(check-match '(* 4 -1) "abcde" false) + +# Simple pattern + +(check-match '(* (at-least 1 (range "az" "AZ")) (not 1)) "hello" true) +(check-match '(* (at-least 1 (range "az" "AZ")) (not 1)) "hello world" false) +(check-match '(* (at-least 1 (range "az" "AZ")) (not 1)) "1he11o" false) + +# IP address + (def ip-address '{:d (range "09") :0-4 (range "04") :0-5 (range "05") :block (+ (* "25" :0-5) (* "2" :0-4 :d) (* "1" :d :d) (between 1 2 :d)) - :main (* :block (between 3 3 (* "." :block)))}) + :main (* :block (between 3 3 (* "." :block)) -1)}) -(assert (peg/match ip-address "0.0.0.0") "peg/match 1") -(assert (peg/match ip-address "1.2.3.4") "peg/match 2") -(assert (not (peg/match ip-address "256.2.3.4")) "peg/match 3") +(check-match ip-address "0.0.0.0" true) +(check-match ip-address "1.2.3.4" true) +(check-match ip-address "256.2.3.4" false) +(check-match ip-address "256.2.3.2514" false) # Substitution test with peg