1
0
mirror of https://github.com/janet-lang/janet synced 2025-01-26 07:06:51 +00:00

Add peg/replace and peg/replace-all

This commit is contained in:
Calvin Rose 2020-07-01 21:26:11 -05:00
parent e08235b575
commit e548e1f6e0
3 changed files with 86 additions and 17 deletions

View File

@ -502,7 +502,7 @@ void janet_lib_math(JanetTable *env) {
#ifdef NAN #ifdef NAN
janet_def(env, "math/nan", janet_wrap_number(NAN), janet_def(env, "math/nan", janet_wrap_number(NAN),
#else #else
janet_def(env, "math/nan", janet_wrap_number(0.0/0.0), janet_def(env, "math/nan", janet_wrap_number(0.0 / 0.0),
#endif #endif
JDOC("Not a number (IEEE-754 NaN)")); JDOC("Not a number (IEEE-754 NaN)"));
#endif #endif

View File

@ -1313,24 +1313,31 @@ typedef struct {
JanetPeg *peg; JanetPeg *peg;
PegState s; PegState s;
JanetByteView bytes; JanetByteView bytes;
JanetByteView repl;
int32_t start; int32_t start;
} PegCall; } PegCall;
/* Initialize state for peg cfunctions */ /* Initialize state for peg cfunctions */
static PegCall peg_cfun_init(int32_t argc, Janet *argv) { static PegCall peg_cfun_init(int32_t argc, Janet *argv, int get_replace) {
PegCall ret; PegCall ret;
janet_arity(argc, 2, -1); int32_t min = get_replace ? 3 : 2;
janet_arity(argc, get_replace, -1);
if (janet_checktype(argv[0], JANET_ABSTRACT) && if (janet_checktype(argv[0], JANET_ABSTRACT) &&
janet_abstract_type(janet_unwrap_abstract(argv[0])) == &janet_peg_type) { janet_abstract_type(janet_unwrap_abstract(argv[0])) == &janet_peg_type) {
ret.peg = janet_unwrap_abstract(argv[0]); ret.peg = janet_unwrap_abstract(argv[0]);
} else { } else {
ret.peg = compile_peg(argv[0]); ret.peg = compile_peg(argv[0]);
} }
if (get_replace) {
ret.repl = janet_getbytes(argv, 1);
ret.bytes = janet_getbytes(argv, 2);
} else {
ret.bytes = janet_getbytes(argv, 1); ret.bytes = janet_getbytes(argv, 1);
if (argc > 2) { }
ret.start = janet_gethalfrange(argv, 2, ret.bytes.len, "offset"); if (argc > min) {
ret.s.extrac = argc - 3; ret.start = janet_gethalfrange(argv, min, ret.bytes.len, "offset");
ret.s.extrav = janet_tuple_n(argv + 3, argc - 3); ret.s.extrac = argc - min - 1;
ret.s.extrav = janet_tuple_n(argv + min + 1, argc - min - 1);
} else { } else {
ret.start = 0; ret.start = 0;
ret.s.extrac = 0; ret.s.extrac = 0;
@ -1348,18 +1355,22 @@ static PegCall peg_cfun_init(int32_t argc, Janet *argv) {
return ret; return ret;
} }
static void peg_call_reset(PegCall *c) {
c->s.captures->count = 0;
c->s.scratch->count = 0;
c->s.tags->count = 0;
}
static Janet cfun_peg_match(int32_t argc, Janet *argv) { static Janet cfun_peg_match(int32_t argc, Janet *argv) {
PegCall c = peg_cfun_init(argc, argv); PegCall c = peg_cfun_init(argc, argv, 0);
const uint8_t *result = peg_rule(&c.s, c.s.bytecode, c.bytes.bytes + c.start); const uint8_t *result = peg_rule(&c.s, c.s.bytecode, c.bytes.bytes + c.start);
return result ? janet_wrap_array(c.s.captures) : janet_wrap_nil(); return result ? janet_wrap_array(c.s.captures) : janet_wrap_nil();
} }
static Janet cfun_peg_find(int32_t argc, Janet *argv) { static Janet cfun_peg_find(int32_t argc, Janet *argv) {
PegCall c = peg_cfun_init(argc, argv); PegCall c = peg_cfun_init(argc, argv, 0);
for (int32_t i = c.start; i < c.bytes.len; i++) { for (int32_t i = c.start; i < c.bytes.len; i++) {
c.s.captures->count = 0; peg_call_reset(&c);
c.s.scratch->count = 0;
c.s.tags->count = 0;
if (peg_rule(&c.s, c.s.bytecode, c.bytes.bytes + i)) if (peg_rule(&c.s, c.s.bytecode, c.bytes.bytes + i))
return janet_wrap_integer(i); return janet_wrap_integer(i);
} }
@ -1367,22 +1378,58 @@ static Janet cfun_peg_find(int32_t argc, Janet *argv) {
} }
static Janet cfun_peg_find_all(int32_t argc, Janet *argv) { static Janet cfun_peg_find_all(int32_t argc, Janet *argv) {
PegCall c = peg_cfun_init(argc, argv); PegCall c = peg_cfun_init(argc, argv, 0);
JanetArray *ret = janet_array(0); JanetArray *ret = janet_array(0);
for (int32_t i = c.start; i < c.bytes.len; i++) { for (int32_t i = c.start; i < c.bytes.len; i++) {
c.s.captures->count = 0; peg_call_reset(&c);
c.s.scratch->count = 0;
c.s.tags->count = 0;
if (peg_rule(&c.s, c.s.bytecode, c.bytes.bytes + i)) if (peg_rule(&c.s, c.s.bytecode, c.bytes.bytes + i))
janet_array_push(ret, janet_wrap_integer(i)); janet_array_push(ret, janet_wrap_integer(i));
} }
return janet_wrap_array(ret); return janet_wrap_array(ret);
} }
static Janet cfun_peg_replace_generic(int32_t argc, Janet *argv, int only_one) {
PegCall c = peg_cfun_init(argc, argv, 1);
JanetBuffer *ret = janet_buffer(0);
int32_t trail = 0;
for (int32_t i = c.start; i < c.bytes.len;) {
peg_call_reset(&c);
const uint8_t *result = peg_rule(&c.s, c.s.bytecode, c.bytes.bytes + i);
if (NULL != result) {
if (trail < i) {
janet_buffer_push_bytes(ret, c.bytes.bytes + trail, (i - trail));
trail = i;
}
int32_t nexti = result - c.bytes.bytes;
janet_buffer_push_bytes(ret, c.repl.bytes, c.repl.len);
trail = nexti;
if (nexti == i) nexti++;
i = nexti;
if (only_one) break;
} else {
i++;
}
}
if (trail < c.bytes.len) {
janet_buffer_push_bytes(ret, c.bytes.bytes + trail, (c.bytes.len - trail));
}
return janet_wrap_buffer(ret);
}
static Janet cfun_peg_replace_all(int32_t argc, Janet *argv) {
return cfun_peg_replace_generic(argc, argv, 0);
}
static Janet cfun_peg_replace(int32_t argc, Janet *argv) {
return cfun_peg_replace_generic(argc, argv, 1);
}
static JanetMethod peg_methods[] = { static JanetMethod peg_methods[] = {
{"match", cfun_peg_match}, {"match", cfun_peg_match},
{"find", cfun_peg_find}, {"find", cfun_peg_find},
{"find-all", cfun_peg_find_all}, {"find-all", cfun_peg_find_all},
{"replace", cfun_peg_replace},
{"replace-all", cfun_peg_replace_all},
{NULL, NULL} {NULL, NULL}
}; };
@ -1416,6 +1463,17 @@ static const JanetReg peg_cfuns[] = {
JDOC("(peg/find-all peg text &opt start & args)\n\n" JDOC("(peg/find-all peg text &opt start & args)\n\n"
"Find all indexes where the peg matches in text. Returns an array of integers.") "Find all indexes where the peg matches in text. Returns an array of integers.")
}, },
{
"peg/replace", cfun_peg_replace,
JDOC("(peg/replace peg repl text &opt start & args)\n\n"
"Replace first match of peg in text with repl, returning a new buffer. The peg does not need to make captures to do replacement. "
"If no matches are found, returns the input string in a new buffer.")
},
{
"peg/replace-all", cfun_peg_replace_all,
JDOC("(peg/replace-all peg repl text &opt start & args)\n\n"
"Replace all matches of peg in text with repl, returning a new buffer. The peg does not need to make captures to do replacement.")
},
{NULL, NULL, NULL} {NULL, NULL, NULL}
}; };

View File

@ -338,4 +338,15 @@ neldb\0\0\0\xD8\x05printG\x01\0\xDE\xDE\xDE'\x03\0marshal_tes/\x02
(assert (not (peg/find '"t/" p)) "peg find 2") (assert (not (peg/find '"t/" p)) "peg find 2")
(assert (deep= (peg/find-all '"/" p) @[0 4 10 14]) "peg find-all") (assert (deep= (peg/find-all '"/" p) @[0 4 10 14]) "peg find-all")
# Peg replace and replace-all
(var ti 0)
(defn check-replacer
[x y z]
(assert (= (string/replace x y z) (string (peg/replace x y z))) "replacer test replace")
(assert (= (string/replace-all x y z) (string (peg/replace-all x y z))) "replacer test replace-all"))
(check-replacer "abc" "Z" "abcabcabcabasciabsabc")
(check-replacer "abc" "Z" "")
(check-replacer "aba" "ZZZZZZ" "ababababababa")
(check-replacer "aba" "" "ababababababa")
(end-suite) (end-suite)