1
0
mirror of https://github.com/janet-lang/janet synced 2024-11-24 09:17:17 +00:00
janet/test/suite-peg.janet
Ian Henry 61f38fab37
add a new (split) PEG special
This works similarly to string/split, but the separator is a PEG.
2024-01-05 22:02:52 -08:00

761 lines
24 KiB
Plaintext

# Copyright (c) 2023 Calvin Rose
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to
# deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
# sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
(import ./helper :prefix "" :exit true)
(start-suite)
# Peg
# 83f4a11bf
(defn check-match
[pat text should-match]
(def result (peg/match pat text))
(assert (= (not should-match) (not result))
(string "check-match " text)))
# 798c88b4c
(defn check-deep
[pat text what]
(def result (peg/match pat text))
(assert (deep= result what) (string "check-deep " text)))
# Just numbers
# 83f4a11bf
(check-match '(* 4 -1) "abcd" true)
(check-match '(* 4 -1) "abc" false)
(check-match '(* 4 -1) "abcde" false)
# Simple pattern
# 83f4a11bf
(check-match '(* (some (range "az" "AZ")) -1) "hello" true)
(check-match '(* (some (range "az" "AZ")) -1) "hello world" false)
(check-match '(* (some (range "az" "AZ")) -1) "1he11o" false)
(check-match '(* (some (range "az" "AZ")) -1) "" false)
# Pre compile
# ff0d3a008
(def pegleg (peg/compile '{:item "abc" :main (* :item "," :item -1)}))
(peg/match pegleg "abc,abc")
# Bad Grammars
# 192705113
(assert-error "peg/compile error 1" (peg/compile nil))
(assert-error "peg/compile error 2" (peg/compile @{}))
(assert-error "peg/compile error 3" (peg/compile '{:a "abc" :b "def"}))
(assert-error "peg/compile error 4" (peg/compile '(blarg "abc")))
(assert-error "peg/compile error 5" (peg/compile '(1 2 3)))
# IP address
# 40845b5c1
(def ip-address
'{:d (range "09")
:0-4 (range "04")
:0-5 (range "05")
:byte (+
(* "25" :0-5)
(* "2" :0-4 :d)
(* "1" :d :d)
(between 1 2 :d))
:main (* :byte "." :byte "." :byte "." :byte)})
(check-match ip-address "10.240.250.250" true)
(check-match ip-address "0.0.0.0" true)
(check-match ip-address "1.2.3.4" true)
(check-match ip-address "256.2.3.4" false)
(check-match ip-address "256.2.3.2514" false)
# Substitution test with peg
# d7626f8c5
(def grammar '(accumulate (any (+ (/ "dog" "purple panda") (<- 1)))))
(defn try-grammar [text]
(assert (= (string/replace-all "dog" "purple panda" text)
(0 (peg/match grammar text))) text))
(try-grammar "i have a dog called doug the dog. he is good.")
(try-grammar "i have a dog called doug the dog. he is a good boy.")
(try-grammar "i have a dog called doug the do")
(try-grammar "i have a dog called doug the dog")
(try-grammar "i have a dog called doug the dogg")
(try-grammar "i have a dog called doug the doggg")
(try-grammar "i have a dog called doug the dogggg")
# Peg CSV test
# 798c88b4c
(def csv
'{:field (+
(* `"` (% (any (+ (<- (if-not `"` 1))
(* (constant `"`) `""`)))) `"`)
(<- (any (if-not (set ",\n") 1))))
:main (* :field (any (* "," :field)) (+ "\n" -1))})
(defn check-csv
[str res]
(check-deep csv str res))
(check-csv "1,2,3" @["1" "2" "3"])
(check-csv "1,\"2\",3" @["1" "2" "3"])
(check-csv ``1,"1""",3`` @["1" "1\"" "3"])
# Nested Captures
# 798c88b4c
(def grmr '(capture (* (capture "a") (capture 1) (capture "c"))))
(check-deep grmr "abc" @["a" "b" "c" "abc"])
(check-deep grmr "acc" @["a" "c" "c" "acc"])
# Functions in grammar
# 798c88b4c
(def grmr-triple ~(% (any (/ (<- 1) ,(fn [x] (string x x x))))))
(check-deep grmr-triple "abc" @["aaabbbccc"])
(check-deep grmr-triple "" @[""])
(check-deep grmr-triple " " @[" "])
(def counter ~(/ (group (any (<- 1))) ,length))
(check-deep counter "abcdefg" @[7])
# Capture Backtracking
# ff0d3a008
(check-deep '(+ (* (capture "c") "d") "ce") "ce" @[])
# Matchtime capture
# 192705113
(def scanner (peg/compile ~(cmt (capture (some 1)) ,scan-number)))
(check-deep scanner "123" @[123])
(check-deep scanner "0x86" @[0x86])
(check-deep scanner "-1.3e-7" @[-1.3e-7])
(check-deep scanner "123A" nil)
# Recursive grammars
# 170e785b7
(def g '{:main (+ (* "a" :main "b") "c")})
(check-match g "c" true)
(check-match g "acb" true)
(check-match g "aacbb" true)
(check-match g "aadbb" false)
# Back reference
# d0ec89c7c
(def wrapped-string
~{:pad (any "=")
:open (* "[" (<- :pad :n) "[")
:close (* "]" (cmt (* (-> :n) (<- :pad)) ,=) "]")
:main (* :open (any (if-not :close 1)) :close -1)})
(check-match wrapped-string "[[]]" true)
(check-match wrapped-string "[==[a]==]" true)
(check-match wrapped-string "[==[]===]" false)
(check-match wrapped-string "[[blark]]" true)
(check-match wrapped-string "[[bl[ark]]" true)
(check-match wrapped-string "[[bl]rk]]" true)
(check-match wrapped-string "[[bl]rk]] " false)
(check-match wrapped-string "[=[bl]]rk]=] " false)
(check-match wrapped-string "[=[bl]==]rk]=] " false)
(check-match wrapped-string "[===[]==]===]" true)
(def janet-longstring
~{:delim (some "`")
:open (capture :delim :n)
:close (cmt (* (not (> -1 "`")) (-> :n) (<- (backmatch :n))) ,=)
:main (* :open (any (if-not :close 1)) :close -1)})
(check-match janet-longstring "`john" false)
(check-match janet-longstring "abc" false)
(check-match janet-longstring "` `" true)
(check-match janet-longstring "` `" true)
(check-match janet-longstring "`` ``" true)
(check-match janet-longstring "``` `` ```" true)
(check-match janet-longstring "`` ```" false)
(check-match janet-longstring "`a``b`" false)
# Line and column capture
# 776ce586b
(def line-col (peg/compile '(any (* (line) (column) 1))))
(check-deep line-col "abcd" @[1 1 1 2 1 3 1 4])
(check-deep line-col "" @[])
(check-deep line-col "abcd\n" @[1 1 1 2 1 3 1 4 1 5])
(check-deep line-col "abcd\nz" @[1 1 1 2 1 3 1 4 1 5 2 1])
# Backmatch
# 711fe64a5
(def backmatcher-1 '(* (capture (any "x") :1) "y" (backmatch :1) -1))
(check-match backmatcher-1 "y" true)
(check-match backmatcher-1 "xyx" true)
(check-match backmatcher-1 "xxxxxxxyxxxxxxx" true)
(check-match backmatcher-1 "xyxx" false)
(check-match backmatcher-1 (string (string/repeat "x" 73) "y") false)
(check-match backmatcher-1 (string (string/repeat "x" 10000) "y") false)
(check-match backmatcher-1 (string (string/repeat "x" 10000) "y"
(string/repeat "x" 10000)) true)
(def backmatcher-2 '(* '(any "x") "y" (backmatch) -1))
(check-match backmatcher-2 "y" true)
(check-match backmatcher-2 "xyx" true)
(check-match backmatcher-2 "xxxxxxxyxxxxxxx" true)
(check-match backmatcher-2 "xyxx" false)
(check-match backmatcher-2 (string (string/repeat "x" 73) "y") false)
(check-match backmatcher-2 (string (string/repeat "x" 10000) "y") false)
(check-match backmatcher-2 (string (string/repeat "x" 10000) "y"
(string/repeat "x" 10000)) true)
(def longstring-2 '(* '(some "`")
(some (if-not (backmatch) 1))
(backmatch) -1))
(check-match longstring-2 "`john" false)
(check-match longstring-2 "abc" false)
(check-match longstring-2 "` `" true)
(check-match longstring-2 "` `" true)
(check-match longstring-2 "`` ``" true)
(check-match longstring-2 "``` `` ```" true)
(check-match longstring-2 "`` ```" false)
# Optional
# 4eeadd746
(check-match '(* (opt "hi") -1) "" true)
(check-match '(* (opt "hi") -1) "hi" true)
(check-match '(* (opt "hi") -1) "no" false)
(check-match '(* (? "hi") -1) "" true)
(check-match '(* (? "hi") -1) "hi" true)
(check-match '(* (? "hi") -1) "no" false)
# Drop
# b4934cedd
(check-deep '(drop '"hello") "hello" @[])
(check-deep '(drop "hello") "hello" @[])
# Add bytecode verification for peg unmarshaling
# e88a9af2f
# This should be valgrind clean.
(var pegi 3)
(defn marshpeg [p]
(assert (-> p peg/compile marshal unmarshal)
(string "peg marshal " (++ pegi))))
(marshpeg '(* 1 2 (set "abcd") "asdasd" (+ "." 3)))
(marshpeg '(% (* (+ 1 2 3) (* "drop" "bear") '"hi")))
(marshpeg '(> 123 "abcd"))
(marshpeg '{:main (* 1 "hello" :main)})
(marshpeg '(range "AZ"))
(marshpeg '(if-not "abcdf" 123))
(marshpeg '(error ($)))
(marshpeg '(* "abcd" (constant :hi)))
(marshpeg ~(/ "abc" ,identity))
(marshpeg '(if-not "abcdf" 123))
(marshpeg ~(cmt "abcdf" ,identity))
(marshpeg '(group "abc"))
(marshpeg '(sub "abcdf" "abc"))
(marshpeg '(* (sub 1 1)))
(marshpeg '(split "," (+ "a" "b" "c")))
# Peg swallowing errors
# 159651117
(assert (try (peg/match ~(/ '1 ,(fn [x] (nil x))) "x") ([err] err))
"errors should not be swallowed")
(assert (try ((fn [x] (nil x))) ([err] err))
"errors should not be swallowed 2")
# Check for bad memoization (+ :a) should mean different things in
# different contexts
# 8bc8709d0
(def redef-a
~{:a "abc"
:c (+ :a)
:main (* :c {:a "def" :main (+ :a)} -1)})
(check-match redef-a "abcdef" true)
(check-match redef-a "abcabc" false)
(check-match redef-a "defdef" false)
# 54a04b589
(def redef-b
~{:pork {:pork "beef" :main (+ -1 (* 1 :pork))}
:main :pork})
(check-match redef-b "abeef" true)
(check-match redef-b "aabeef" false)
(check-match redef-b "aaaaaa" false)
# Integer parsing
# 45feb5548
(check-deep '(int 1) "a" @[(chr "a")])
(check-deep '(uint 1) "a" @[(chr "a")])
(check-deep '(int-be 1) "a" @[(chr "a")])
(check-deep '(uint-be 1) "a" @[(chr "a")])
(check-deep '(int 1) "\xFF" @[-1])
(check-deep '(uint 1) "\xFF" @[255])
(check-deep '(int-be 1) "\xFF" @[-1])
(check-deep '(uint-be 1) "\xFF" @[255])
(check-deep '(int 2) "\xFF\x7f" @[0x7fff])
(check-deep '(int-be 2) "\x7f\xff" @[0x7fff])
(check-deep '(uint 2) "\xff\x7f" @[0x7fff])
(check-deep '(uint-be 2) "\x7f\xff" @[0x7fff])
(check-deep '(uint-be 2) "\x7f\xff" @[0x7fff])
(when-let [u64 int/u64
i64 int/s64]
(check-deep '(uint 8) "\xff\x7f\x00\x00\x00\x00\x00\x00" @[(u64 0x7fff)])
(check-deep '(int 8) "\xff\x7f\x00\x00\x00\x00\x00\x00" @[(i64 0x7fff)])
(check-deep '(uint 7) "\xff\x7f\x00\x00\x00\x00\x00" @[(u64 0x7fff)])
(check-deep '(int 7) "\xff\x7f\x00\x00\x00\x00\x00" @[(i64 0x7fff)]))
(check-deep '(* (int 2) -1) "123" nil)
# to/thru bug
# issue #640 - 742469a8b
(check-deep '(to -1) "aaaa" @[])
(check-deep '(thru -1) "aaaa" @[])
(check-deep ''(to -1) "aaaa" @["aaaa"])
(check-deep ''(thru -1) "aaaa" @["aaaa"])
(check-deep '(to "b") "aaaa" nil)
(check-deep '(thru "b") "aaaa" nil)
# unref
# 96513665d
(def grammar
(peg/compile
~{:main (* :tagged -1)
:tagged (unref (replace (* :open-tag :value :close-tag) ,struct))
:open-tag (* (constant :tag) "<" (capture :w+ :tag-name) ">")
:value (* (constant :value) (group (any (+ :tagged :untagged))))
:close-tag (* "</" (backmatch :tag-name) ">")
:untagged (capture (any (if-not "<" 1)))}))
(check-deep grammar "<p><em>foobar</em></p>"
@[{:tag "p" :value @[{:tag "em" :value @["foobar"]}]}])
(check-deep grammar "<p>foobar</p>" @[{:tag "p" :value @["foobar"]}])
# Using a large test grammar
# cf05ff610
(def- specials {'fn true
'var true
'do true
'while true
'def true
'splice true
'set true
'unquote true
'quasiquote true
'quote true
'if true})
(defn- check-number [text] (and (scan-number text) text))
(defn capture-sym
[text]
(def sym (symbol text))
[(if (or (root-env sym) (specials sym)) :coresym :symbol) text])
(def grammar
~{:ws (set " \v\t\r\f\n\0")
:readermac (set "';~,")
:symchars (+ (range "09" "AZ" "az" "\x80\xFF")
(set "!$%&*+-./:<?=>@^_|"))
:token (some :symchars)
:hex (range "09" "af" "AF")
:escape (* "\\" (+ (set `"'0?\abefnrtvz`)
(* "x" :hex :hex)
(error (constant "bad hex escape"))))
:comment (/ '(* "#" (any (if-not (+ "\n" -1) 1))) (constant :comment))
:symbol (/ ':token ,capture-sym)
:keyword (/ '(* ":" (any :symchars)) (constant :keyword))
:constant (/ '(+ "true" "false" "nil") (constant :constant))
:bytes (* "\"" (any (+ :escape (if-not "\"" 1))) "\"")
:string (/ ':bytes (constant :string))
:buffer (/ '(* "@" :bytes) (constant :string))
:long-bytes {:delim (some "`")
:open (capture :delim :n)
:close (cmt (* (not (> -1 "`")) (-> :n) '(backmatch :n))
,=)
:main (drop (* :open (any (if-not :close 1)) :close))}
:long-string (/ ':long-bytes (constant :string))
:long-buffer (/ '(* "@" :long-bytes) (constant :string))
:number (/ (cmt ':token ,check-number) (constant :number))
:raw-value (+ :comment :constant :number :keyword
:string :buffer :long-string :long-buffer
:parray :barray :ptuple :btuple :struct :dict :symbol)
:value (* (? '(some (+ :ws :readermac))) :raw-value '(any :ws))
:root (any :value)
:root2 (any (* :value :value))
:ptuple (* '"(" :root (+ '")" (error "")))
:btuple (* '"[" :root (+ '"]" (error "")))
:struct (* '"{" :root2 (+ '"}" (error "")))
:parray (* '"@" :ptuple)
:barray (* '"@" :btuple)
:dict (* '"@" :struct)
:main (+ :root (error ""))})
(def p (peg/compile grammar))
# Just make sure is valgrind clean.
(def p (-> p make-image load-image))
(assert (peg/match p "abc") "complex peg grammar 1")
(assert (peg/match p "[1 2 3 4]") "complex peg grammar 2")
###
### Compiling brainfuck to Janet.
###
# 20d5d560f
(def- bf-peg
"Peg for compiling brainfuck into a Janet source ast."
(peg/compile
~{:+ (/ '(some "+") ,(fn [x] ~(+= (DATA POS) ,(length x))))
:- (/ '(some "-") ,(fn [x] ~(-= (DATA POS) ,(length x))))
:> (/ '(some ">") ,(fn [x] ~(+= POS ,(length x))))
:< (/ '(some "<") ,(fn [x] ~(-= POS ,(length x))))
:. (* "." (constant (prinf "%c" (get DATA POS))))
:loop (/ (* "[" :main "]") ,(fn [& captures]
~(while (not= (get DATA POS) 0)
,;captures)))
:main (any (+ :s :loop :+ :- :> :< :.))}))
(defn bf
"Run brainfuck."
[text]
(eval
~(let [DATA (array/new-filled 100 0)]
(var POS 50)
,;(peg/match bf-peg text))))
(defn test-bf
"Test some bf for expected output."
[input output]
(def b @"")
(with-dyns [:out b]
(bf input))
(assert (= (string output) (string b))
(string "bf input '"
input
"' failed, expected "
(describe output)
", got "
(describe (string b))
".")))
(test-bf (string "++++++++[>++++[>++>+++>+++>+<<<<-]>+>+>->>+[<]<-]"
">>.>---.+++++++..+++.>>.<-.<.+++.------.--------"
".>>+.>++.") "Hello World!\n")
(test-bf (string ">++++++++"
"[-<+++++++++>]<.>>+>-[+]++>++>+++[>[->+++<<+++>]<<]"
">-----.>->+++..+++.>-.<<+[>[+>+]>>]<--------------"
".>>.+++.------.--------.>+.>+.")
"Hello World!\n")
(test-bf (string "+[+[<<<+>>>>]+<-<-<<<+<++]<<.<++.<++..+++.<<++.<---"
".>>.>.+++.------.>-.>>--.")
"Hello, World!")
# Regression test
# issue #300 - 714bd61d5
# Just don't segfault
(assert (peg/match '{:main (replace "S" {"S" :spade})} "S7")
"regression #300")
# Lenprefix rule
# 8b5bcaee3
(def peg (peg/compile ~(* (lenprefix (/ (* '(any (if-not ":" 1)) ":")
,scan-number) 1) -1)))
(assert (peg/match peg "5:abcde") "lenprefix 1")
(assert (not (peg/match peg "5:abcdef")) "lenprefix 2")
(assert (not (peg/match peg "5:abcd")) "lenprefix 3")
# Packet capture
# 8b5bcaee3
(def peg2
(peg/compile
~{# capture packet length in tag :header-len
:packet-header (* (/ ':d+ ,scan-number :header-len) ":")
# capture n bytes from a backref :header-len
:packet-body '(lenprefix (-> :header-len) 1)
# header, followed by body, and drop the :header-len capture
:packet (/ (* :packet-header :packet-body) ,|$1)
# any exact seqence of packets (no extra characters)
:main (* (any :packet) -1)}))
(assert (deep= @["a" "bb" "ccc"] (peg/match peg2 "1:a2:bb3:ccc"))
"lenprefix 4")
(assert (deep= @["a" "bb" "cccccc"] (peg/match peg2 "1:a2:bb6:cccccc"))
"lenprefix 5")
(assert (= nil (peg/match peg2 "1:a2:bb:5:cccccc")) "lenprefix 6")
(assert (= nil (peg/match peg2 "1:a2:bb:7:cccccc")) "lenprefix 7")
# Issue #412
# 677737d34
(assert (peg/match '(* "a" (> -1 "a") "b") "abc")
"lookhead does not move cursor")
# 6d096551f
(def peg3
~{:main (* "(" (thru ")"))})
(def peg4 (peg/compile ~(* (thru "(") '(to ")"))))
(assert (peg/match peg3 "(12345)") "peg thru 1")
(assert (not (peg/match peg3 " (12345)")) "peg thru 2")
(assert (not (peg/match peg3 "(12345")) "peg thru 3")
(assert (= "abc" (0 (peg/match peg4 "123(abc)"))) "peg thru/to 1")
(assert (= "abc" (0 (peg/match peg4 "(abc)"))) "peg thru/to 2")
(assert (not (peg/match peg4 "123(abc")) "peg thru/to 3")
# 86e12369b
(def peg5 (peg/compile [3 "abc"]))
(assert (:match peg5 "abcabcabc") "repeat alias 1")
(assert (:match peg5 "abcabcabcac") "repeat alias 2")
(assert (not (:match peg5 "abcabc")) "repeat alias 3")
# Peg find and find-all
# c26f57362
(def p "/usr/local/bin/janet")
(assert (= (peg/find '"n/" p) 13) "peg find 1")
(assert (not (peg/find '"t/" p)) "peg find 2")
(assert (deep= (peg/find-all '"/" p) @[0 4 10 14]) "peg find-all")
# Peg replace and replace-all
# e548e1f6e
(defn check-replacer
[x y z]
(assert (= (string/replace x y z) (string (peg/replace x y z)))
"replacer test replace")
(assert (= (string/replace-all x y z) (string (peg/replace-all x y z)))
"replacer test replace-all"))
(check-replacer "abc" "Z" "abcabcabcabasciabsabc")
(check-replacer "abc" "Z" "")
(check-replacer "aba" "ZZZZZZ" "ababababababa")
(check-replacer "aba" "" "ababababababa")
# 485099fd6
(check-replacer "aba" string/ascii-upper "ababababababa")
(check-replacer "aba" 123 "ababababababa")
(assert (= (string (peg/replace-all ~(set "ab") string/ascii-upper "abcaa"))
"ABcAA")
"peg/replace-all cfunction")
(assert (= (string (peg/replace-all ~(set "ab") |$ "abcaa"))
"abcaa")
"peg/replace-all function")
# 9dc7e8ed3
(defn peg-test [name f peg subst text expected]
(assert (= (string (f peg subst text)) expected) name))
(peg-test "peg/replace has access to captures"
peg/replace
~(sequence "." (capture (set "ab")))
(fn [str char] (string/format "%s -> %s, " str (string/ascii-upper char)))
".a.b.c"
".a -> A, .b.c")
(peg-test "peg/replace-all has access to captures"
peg/replace-all
~(sequence "." (capture (set "ab")))
(fn [str char] (string/format "%s -> %s, " str (string/ascii-upper char)))
".a.b.c"
".a -> A, .b -> B, .c")
# Peg bug
# eab5f67c5
(assert (deep= @[] (peg/match '(any 1) @"")) "peg empty pattern 1")
(assert (deep= @[] (peg/match '(any 1) (buffer))) "peg empty pattern 2")
(assert (deep= @[] (peg/match '(any 1) "")) "peg empty pattern 3")
(assert (deep= @[] (peg/match '(any 1) (string))) "peg empty pattern 4")
(assert (deep= @[] (peg/match '(* "test" (any 1)) @"test"))
"peg empty pattern 5")
(assert (deep= @[] (peg/match '(* "test" (any 1)) (buffer "test")))
"peg empty pattern 6")
# number pattern
# cccbdc164
(assert (deep= @[111] (peg/match '(number :d+) "111"))
"simple number capture 1")
(assert (deep= @[255] (peg/match '(number :w+) "0xff"))
"simple number capture 2")
# Marshal and unmarshal pegs
# 446ab037b
(def p (-> "abcd" peg/compile marshal unmarshal))
(assert (peg/match p "abcd") "peg marshal 1")
(assert (peg/match p "abcdefg") "peg marshal 2")
(assert (not (peg/match p "zabcdefg")) "peg marshal 3")
# to/thru bug
# issue #971 - a895219d2
(def pattern
(peg/compile
'{:dd (sequence :d :d)
:sep (set "/-")
:date (sequence :dd :sep :dd)
:wsep (some (set " \t"))
:entry (group (sequence (capture :date) :wsep (capture :date)))
:main (some (thru :entry))}))
(def alt-pattern
(peg/compile
'{:dd (sequence :d :d)
:sep (set "/-")
:date (sequence :dd :sep :dd)
:wsep (some (set " \t"))
:entry (group (sequence (capture :date) :wsep (capture :date)))
:main (some (choice :entry 1))}))
(def text "1800-10-818-9-818 16/12\n17/12 19/12\n20/12 11/01")
(assert (deep= (peg/match pattern text) (peg/match alt-pattern text))
"to/thru bug #971")
# 14657a7
(def- sym-prefix-peg
(peg/compile
~{:symchar (+ (range "\x80\xff" "AZ" "az" "09")
(set "!$%&*+-./:<?=>@^_"))
:anchor (drop (cmt ($) ,|(= $ 0)))
:cap (* (+ (> -1 (not :symchar)) :anchor) (* ($) '(some :symchar)))
:recur (+ :cap (> -1 :recur))
:main (> -1 :recur)}))
(assert (deep= (peg/match sym-prefix-peg @"123" 3) @[0 "123"])
"peg lookback")
(assert (deep= (peg/match sym-prefix-peg @"1234" 4) @[0 "1234"])
"peg lookback 2")
# issue #1027 - 356b39c6f
(assert (deep= (peg/replace-all '(* (<- 1) 1 (backmatch))
"xxx" "aba cdc efa")
@"xxx xxx efa")
"peg replace-all 1")
# issue #1026 - 9341081a4
(assert (deep=
(peg/match '(not (* (constant 7) "a")) "hello")
@[]) "peg not")
(assert (deep=
(peg/match '(if-not (* (constant 7) "a") "hello") "hello")
@[]) "peg if-not")
(assert (deep=
(peg/match '(if-not (drop (* (constant 7) "a")) "hello") "hello")
@[]) "peg if-not drop")
(assert (deep=
(peg/match '(if (not (* (constant 7) "a")) "hello") "hello")
@[]) "peg if not")
(defn test [name peg input expected]
(assert (deep= (peg/match peg input) expected) name))
(test "sub: matches the same input twice"
~(sub "abcd" "abc")
"abcdef"
@[])
(test "sub: second pattern cannot match more than the first pattern"
~(sub "abcd" "abcde")
"abcdef"
nil)
(test "sub: fails if first pattern fails"
~(sub "x" "abc")
"abcdef"
nil)
(test "sub: fails if second pattern fails"
~(sub "abc" "x")
"abcdef"
nil)
(test "sub: keeps captures from both patterns"
~(sub '"abcd" '"abc")
"abcdef"
@["abcd" "abc"])
(test "sub: second pattern can reference captures from first"
~(* (constant 5 :tag) (sub (capture "abc" :tag) (backref :tag)))
"abcdef"
@[5 "abc" "abc"])
(test "sub: second pattern can't see past what the first pattern matches"
~(sub "abc" (* "abc" -1))
"abcdef"
@[])
(test "sub: positions inside second match are still relative to the entire input"
~(* "one\ntw" (sub "o" (* ($) (line) (column))))
"one\ntwo\nthree\n"
@[6 2 3])
(test "sub: advances to the end of the first pattern's match"
~(* (sub "abc" "ab") "d")
"abcdef"
@[])
(test "split: basic functionality"
~(split "," '1)
"a,b,c"
@["a" "b" "c"])
(test "split: drops captures from separator pattern"
~(split '"," '1)
"a,b,c"
@["a" "b" "c"])
(test "split: can match empty subpatterns"
~(split "," ':w*)
",a,,bar,,,c,,"
@["" "a" "" "bar" "" "" "c" "" ""])
(test "split: subpattern is limited to only text before the separator"
~(split "," '(to -1))
"a,,bar,c"
@["a" "" "bar" "c"])
(test "split: fails if any subpattern fails"
~(split "," '"a")
"a,a,b"
nil)
(test "split: separator does not have to match anything"
~(split "x" '(to -1))
"a,a,b"
@["a,a,b"])
(test "split: always consumes entire input"
~(split 1 '"")
"abc"
@["" "" "" ""])
(test "split: separator can be an arbitrary PEG"
~(split :s+ '(to -1))
"a b c"
@["a" "b" "c"])
(test "split: does not advance past the end of the input"
~(* (split "," ':w+) 0)
"a,b,c"
@["a" "b" "c"])
(end-suite)