mirror of
https://github.com/janet-lang/janet
synced 2025-03-29 11:36:55 +00:00
Update peg doc and remame some peg specials.
This commit is contained in:
parent
1304f9263b
commit
3a9b50ea4a
70
doc/Peg.md
70
doc/Peg.md
@ -15,6 +15,8 @@ can produce grammars that are easily understandable and fast. PEGs can also be c
|
|||||||
to a bytecode format that can be reused. Janet offers the `peg` module for writing and
|
to a bytecode format that can be reused. Janet offers the `peg` module for writing and
|
||||||
evaluating PEGs.
|
evaluating PEGs.
|
||||||
|
|
||||||
|
Janet's `peg` module borrows syntax and ideas from both LPeg and REBOL/Red parse module.
|
||||||
|
|
||||||
Below is a simple example for checking if a string is a valid IP address. Notice how
|
Below is a simple example for checking if a string is a valid IP address. Notice how
|
||||||
the grammar is descriptive enough that you can read it even if you don't know the peg
|
the grammar is descriptive enough that you can read it even if you don't know the peg
|
||||||
syntax (example is translated from a (RED language blog post)[https://www.red-lang.org/2013/11/041-introducing-parse.html]).
|
syntax (example is translated from a (RED language blog post)[https://www.red-lang.org/2013/11/041-introducing-parse.html]).
|
||||||
@ -62,12 +64,12 @@ characters, string literals, or a given number of characters. A character in Jan
|
|||||||
is considered a byte, so PEGs will work on any string of bytes. No special meaning is
|
is considered a byte, so PEGs will work on any string of bytes. No special meaning is
|
||||||
given to the 0 byte, or the string terminator in many languages.
|
given to the 0 byte, or the string terminator in many languages.
|
||||||
|
|
||||||
| Pattern | Alias | What it Matches |
|
| Pattern | What it Matches |
|
||||||
| -----------------| ----- | ----------------|
|
| ----------------- | ----------------|
|
||||||
| string ("cat") | | The literal string. |
|
| string ("cat") | The literal string. |
|
||||||
| integer (3) | | Matches a number of characters, and advances that many characters. If negative, matches if not that many characters and does not advance. For example, -1 will match the end of a string |
|
| integer (3) | Matches a number of characters, and advances that many characters. If negative, matches if not that many characters and does not advance. For example, -1 will match the end of a string |
|
||||||
| `(range "az" "AZ")` | | Matches characters in a range and advances 1 character. Multiple ranges can be combined together. |
|
| `(range "az" "AZ")` | Matches characters in a range and advances 1 character. Multiple ranges can be combined together. |
|
||||||
| `(set "abcd")` | | Match any character in the argument string. Advances 1 character. |
|
| `(set "abcd")` | Match any character in the argument string. Advances 1 character. |
|
||||||
|
|
||||||
|
|
||||||
## Combining Patterns
|
## Combining Patterns
|
||||||
@ -84,19 +86,23 @@ succeeds, then the whole pattern fails. Note that this means that the order of `
|
|||||||
DOES matter. If y matches everything that z matches, z will never succeed.
|
DOES matter. If y matches everything that z matches, z will never succeed.
|
||||||
|
|
||||||
|
|
||||||
| Pattern | Alias | What it matches |
|
| Pattern | What it matches |
|
||||||
| ------- | ----- | --------------- |
|
| ------- | --------------- |
|
||||||
| `(choice a b c ...)` | `(+ a b c ...)` | Tries to match a, then b, and so on. Will succeed on the first successful match, and fails if none of the arguments match the text. |
|
| `(choice a b c ...)` | Tries to match a, then b, and so on. Will succeed on the first successful match, and fails if none of the arguments match the text. |
|
||||||
| `(sequence a b c)` | `(* a b c ...)` | Tries to match a, b, c and so on in sequence. If any of these arguments fail to match the text, the whole pattern fails. |
|
| `(+ a b c ...)` | Alias for `(choice a b c ...)` |
|
||||||
| `(any x)` | | Matches 0 or more repetitions of x. |
|
| `(sequence a b c)` | Tries to match a, b, c and so on in sequence. If any of these arguments fail to match the text, the whole pattern fails. |
|
||||||
| `(some x)` | | Matches 1 or more repetitions of x. |
|
| `(* a b c ...)` | Alias for `(sequence a b c ...)` |
|
||||||
| `(between min max x)` | | Matches between min and max (inclusive) or more occurrences of x. |
|
| `(any x)` | Matches 0 or more repetitions of x. |
|
||||||
| `(at-least n x)` | | Matches at least n occurrences of x. |
|
| `(some x)` | Matches 1 or more repetitions of x. |
|
||||||
| `(at-most n x)` | | Matches at most n occurrences of x. |
|
| `(between min max x)` | Matches between min and max (inclusive) or more repetitions of x. |
|
||||||
| `(if cond patt)` | | | Tries to match patt only if cond matches as well. cond will not produce any captures. |
|
| `(at-least n x)` | Matches at least n repetitions of x. |
|
||||||
| `(if-not cond patt)` | | Tries to match only if cond does not match. cond will not produce any captures. |
|
| `(at-most n x)` | Matches at most n repetitions of x. |
|
||||||
| `(not patt)` | `(! patt)` | Matches only if patt does not match. Will not produce captures or advance any characters. |
|
| `(if cond patt)` | Tries to match patt only if cond matches as well. cond will not produce any captures. |
|
||||||
| `(look offset patt)` | `(> offset patt)` | Matches only if patt matches at a fixed offset. offset can be any integer. patt will not produce captures and the peg will not advance any characters. |
|
| `(if-not cond patt)` | Tries to match only if cond does not match. cond will not produce any captures. |
|
||||||
|
| `(not patt)` | Matches only if patt does not match. Will not produce captures or advance any characters. |
|
||||||
|
| `(! patt)` | Alias for `(not patt)` |
|
||||||
|
| `(look offset patt)` | Matches only if patt matches at a fixed offset. offset can be any integer. patt will not produce captures and the peg will not advance any characters. |
|
||||||
|
| `(> offset patt)` | Alias for `(look offset patt)` |
|
||||||
|
|
||||||
## Captures
|
## Captures
|
||||||
|
|
||||||
@ -111,15 +117,19 @@ Most captures specials will match the same text as their first argument pattern.
|
|||||||
|
|
||||||
| Pattern | Alias | What it captures |
|
| Pattern | Alias | What it captures |
|
||||||
| ------- | ----- | --------------- |
|
| ------- | ----- | --------------- |
|
||||||
| `(capture patt)` | `(<- patt)` | Captures all of the text in patt if patt matches, If patt contains any captures, then those captures will be pushed to the capture stack before the total text. |
|
| `(capture patt)` | Captures all of the text in patt if patt matches, If patt contains any captures, then those captures will be pushed to the capture stack before the total text. |
|
||||||
| `(group patt) ` | | Pops all of the captures in patt off of the capture stack and pushes them in an array if patt matches.
|
| `(<- patt)` | Alias for `(capture patt)` |
|
||||||
| `(replace patt subst)` | `(/ patt subst)` | Replaces the captures produced by patt by applying subst to them. If subst is a table or struct, will push `(get subst last-capture)` to the capture stack after removing the old captures. If a subst is a function, will call subst with the captures of patt as arguments and push the result to the capture stack. Otherwise, will push subst literally to the capture stack. |
|
| `(group patt) ` | Pops all of the captures in patt off of the capture stack and pushes them in an array if patt matches.
|
||||||
| `(constant k)` | | Captures a constant value and advances no characters. |
|
| `(replace patt subst)` | Replaces the captures produced by patt by applying subst to them. If subst is a table or struct, will push `(get subst last-capture)` to the capture stack after removing the old captures. If a subst is a function, will call subst with the captures of patt as arguments and push the result to the capture stack. Otherwise, will push subst literally to the capture stack. |
|
||||||
| `(argument n)` | | Captures the nth extra argument to the match function and does not advance. |
|
| `(/ patt subst)` | Alias for `(replace patt subst)` |
|
||||||
| `(position)` | | Captures the current index into the text and advances no input. |
|
| `(constant k)` | Captures a constant value and advances no characters. |
|
||||||
| `(substitute patt)` | `(| patt)` | Replace the text matched by all captures in patt with the capture values. Pushes the substituted text matched by patt to the capture stack. |
|
| `(argument n)` | Captures the nth extra argument to the match function and does not advance. |
|
||||||
| `(cmt patt fun)` | | Invokes fun with all of the captures of patt as arguments (if patt matches). If the result is truthy, then captures the result. The whole expression fails if fun returns false or nil. |
|
| `(position)` | Captures the current index into the text and advances no input. |
|
||||||
| `(backref n)` | | Duplicates the nth last capture and pushes it to the stack again (0 is the previous capture). If n is negative, pushes the nth capture value to the stack (-1 pushes the first captured value to the stack). If n is out of range for the stack, say if the stack is empty, then the match fails. |
|
| `($)` | Alias for `(position)`. |
|
||||||
|
| `(substitute patt)` | Replace the text matched by all captures in patt with the capture values. Pushes the substituted text matched by patt to the capture stack. |
|
||||||
|
| `(% patt) | Alias for `(substitute patt)`
|
||||||
|
| `(cmt patt fun)` | Invokes fun with all of the captures of patt as arguments (if patt matches). If the result is truthy, then captures the result. The whole expression fails if fun returns false or nil. |
|
||||||
|
| `(backref n)` | Duplicates the nth last capture and pushes it to the stack again (0 is the previous capture). If n is negative, pushes the nth capture value to the stack (-1 pushes the first captured value to the stack). If n is out of range for the stack, say if the stack is empty, then the match fails. |
|
||||||
|
|
||||||
## Grammars and Recursion
|
## Grammars and Recursion
|
||||||
|
|
||||||
@ -160,11 +170,9 @@ can be used to embed values in patterns.
|
|||||||
(defn finder
|
(defn finder
|
||||||
"Creates a peg that finds all locations of str in the text."
|
"Creates a peg that finds all locations of str in the text."
|
||||||
[str]
|
[str]
|
||||||
(peg/compile ~(any (+ (* (position) ,str) 1))))
|
(peg/compile ~(any (+ (* ($) ,str) 1))))
|
||||||
|
|
||||||
(def where-are-the-dogs? (finder "dog"))
|
(def where-are-the-dogs? (finder "dog"))
|
||||||
|
|
||||||
(peg/match where-are-the-dogs? "dog dog cat dog") # -> @[0 4 12]
|
(peg/match where-are-the-dogs? "dog dog cat dog") # -> @[0 4 12]
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
@ -766,6 +766,8 @@ typedef struct {
|
|||||||
|
|
||||||
static const SpecialPair specials[] = {
|
static const SpecialPair specials[] = {
|
||||||
{"!", spec_not},
|
{"!", spec_not},
|
||||||
|
{"$", spec_position},
|
||||||
|
{"%", spec_substitute},
|
||||||
{"*", spec_sequence},
|
{"*", spec_sequence},
|
||||||
{"+", spec_choice},
|
{"+", spec_choice},
|
||||||
{"/", spec_replace},
|
{"/", spec_replace},
|
||||||
@ -793,7 +795,6 @@ static const SpecialPair specials[] = {
|
|||||||
{"set", spec_set},
|
{"set", spec_set},
|
||||||
{"some", spec_some},
|
{"some", spec_some},
|
||||||
{"substitute", spec_substitute},
|
{"substitute", spec_substitute},
|
||||||
{"|", spec_substitute},
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Compile a janet value into a rule and return the rule index. */
|
/* Compile a janet value into a rule and return the rule index. */
|
||||||
|
@ -222,7 +222,7 @@
|
|||||||
(file/flush stderr)
|
(file/flush stderr)
|
||||||
(file/flush stdout)
|
(file/flush stdout)
|
||||||
|
|
||||||
(def grammar '(| (any (+ (/ "dog" "purple panda") 1))))
|
(def grammar '(% (any (+ (/ "dog" "purple panda") 1))))
|
||||||
(defn try-grammar [text]
|
(defn try-grammar [text]
|
||||||
(assert (= (string/replace-all "dog" "purple panda" text) (0 (peg/match grammar text))) text))
|
(assert (= (string/replace-all "dog" "purple panda" text) (0 (peg/match grammar text))) text))
|
||||||
|
|
||||||
@ -238,8 +238,8 @@
|
|||||||
|
|
||||||
(def csv
|
(def csv
|
||||||
'{:field (+
|
'{:field (+
|
||||||
(* `"` (| (any (+ (if-not `"` 1) (/ `""` `"`)))) `"`)
|
(* `"` (% (any (+ (if-not `"` 1) (/ `""` `"`)))) `"`)
|
||||||
(| (any (if-not (set ",\n") 1))))
|
(% (any (if-not (set ",\n") 1))))
|
||||||
:main (* :field (any (* "," :field)) (+ "\n" -1))})
|
:main (* :field (any (* "," :field)) (+ "\n" -1))})
|
||||||
|
|
||||||
(defn check-csv
|
(defn check-csv
|
||||||
@ -258,7 +258,7 @@
|
|||||||
|
|
||||||
# Functions in grammar
|
# Functions in grammar
|
||||||
|
|
||||||
(def grmr-triple ~(| (any (/ (<- 1) ,(fn [x] (string x x x))))))
|
(def grmr-triple ~(% (any (/ (<- 1) ,(fn [x] (string x x x))))))
|
||||||
(check-deep grmr-triple "abc" @["aaabbbccc"])
|
(check-deep grmr-triple "abc" @["aaabbbccc"])
|
||||||
(check-deep grmr-triple "" @[""])
|
(check-deep grmr-triple "" @[""])
|
||||||
(check-deep grmr-triple " " @[" "])
|
(check-deep grmr-triple " " @[" "])
|
||||||
|
Loading…
x
Reference in New Issue
Block a user