mirror of
https://github.com/janet-lang/janet
synced 2025-01-12 16:40:27 +00:00
string and peg replacement functions can now take functions
Functions will be invoked with the matched text, and their result will be coerced to a string and used as the new replacement text. This also allows passing non-function, non-byteviewable values, which will be converted into strings during replacement (only once, and only if at least one match is found).
This commit is contained in:
parent
d9ed7a77f8
commit
485099fd6e
@ -1637,7 +1637,7 @@ typedef struct {
|
|||||||
JanetPeg *peg;
|
JanetPeg *peg;
|
||||||
PegState s;
|
PegState s;
|
||||||
JanetByteView bytes;
|
JanetByteView bytes;
|
||||||
JanetByteView repl;
|
Janet subst;
|
||||||
int32_t start;
|
int32_t start;
|
||||||
} PegCall;
|
} PegCall;
|
||||||
|
|
||||||
@ -1653,7 +1653,7 @@ static PegCall peg_cfun_init(int32_t argc, Janet *argv, int get_replace) {
|
|||||||
ret.peg = compile_peg(argv[0]);
|
ret.peg = compile_peg(argv[0]);
|
||||||
}
|
}
|
||||||
if (get_replace) {
|
if (get_replace) {
|
||||||
ret.repl = janet_getbytes(argv, 1);
|
ret.subst = argv[1];
|
||||||
ret.bytes = janet_getbytes(argv, 2);
|
ret.bytes = janet_getbytes(argv, 2);
|
||||||
} else {
|
} else {
|
||||||
ret.bytes = janet_getbytes(argv, 1);
|
ret.bytes = janet_getbytes(argv, 1);
|
||||||
@ -1738,7 +1738,8 @@ static Janet cfun_peg_replace_generic(int32_t argc, Janet *argv, int only_one) {
|
|||||||
trail = i;
|
trail = i;
|
||||||
}
|
}
|
||||||
int32_t nexti = (int32_t)(result - c.bytes.bytes);
|
int32_t nexti = (int32_t)(result - c.bytes.bytes);
|
||||||
janet_buffer_push_bytes(ret, c.repl.bytes, c.repl.len);
|
JanetByteView subst = janet_text_substitution(&c.subst, c.bytes.bytes + i, nexti - i);
|
||||||
|
janet_buffer_push_bytes(ret, subst.bytes, subst.len);
|
||||||
trail = nexti;
|
trail = nexti;
|
||||||
if (nexti == i) nexti++;
|
if (nexti == i) nexti++;
|
||||||
i = nexti;
|
i = nexti;
|
||||||
@ -1754,14 +1755,20 @@ static Janet cfun_peg_replace_generic(int32_t argc, Janet *argv, int only_one) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
JANET_CORE_FN(cfun_peg_replace_all,
|
JANET_CORE_FN(cfun_peg_replace_all,
|
||||||
"(peg/replace-all peg repl text &opt start & args)",
|
"(peg/replace-all peg subst text &opt start & args)",
|
||||||
"Replace all matches of peg in text with repl, returning a new buffer. The peg does not need to make captures to do replacement.") {
|
"Replace all matches of `peg` in `text` with `subst`, returning a new buffer. "
|
||||||
|
"The peg does not need to make captures to do replacement. "
|
||||||
|
"If `subst` is a function, it will be called once for each match "
|
||||||
|
"and should return the actual replacement text to use.") {
|
||||||
return cfun_peg_replace_generic(argc, argv, 0);
|
return cfun_peg_replace_generic(argc, argv, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
JANET_CORE_FN(cfun_peg_replace,
|
JANET_CORE_FN(cfun_peg_replace,
|
||||||
"(peg/replace peg repl text &opt start & args)",
|
"(peg/replace peg repl text &opt start & args)",
|
||||||
"Replace first match of peg in text with repl, returning a new buffer. The peg does not need to make captures to do replacement. "
|
"Replace first match of `peg` in `text` with `subst`, returning a new buffer. "
|
||||||
|
"The peg does not need to make captures to do replacement. "
|
||||||
|
"If `subst` is a function, it will be called with the matching text, "
|
||||||
|
"and should return the actual replacement text to use. "
|
||||||
"If no matches are found, returns the input string in a new buffer.") {
|
"If no matches are found, returns the input string in a new buffer.") {
|
||||||
return cfun_peg_replace_generic(argc, argv, 1);
|
return cfun_peg_replace_generic(argc, argv, 1);
|
||||||
}
|
}
|
||||||
|
@ -364,14 +364,13 @@ JANET_CORE_FN(cfun_string_findall,
|
|||||||
|
|
||||||
struct replace_state {
|
struct replace_state {
|
||||||
struct kmp_state kmp;
|
struct kmp_state kmp;
|
||||||
const uint8_t *subst;
|
Janet subst;
|
||||||
int32_t substlen;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
static void replacesetup(int32_t argc, Janet *argv, struct replace_state *s) {
|
static void replacesetup(int32_t argc, Janet *argv, struct replace_state *s) {
|
||||||
janet_arity(argc, 3, 4);
|
janet_arity(argc, 3, 4);
|
||||||
JanetByteView pat = janet_getbytes(argv, 0);
|
JanetByteView pat = janet_getbytes(argv, 0);
|
||||||
JanetByteView subst = janet_getbytes(argv, 1);
|
Janet subst = argv[1];
|
||||||
JanetByteView text = janet_getbytes(argv, 2);
|
JanetByteView text = janet_getbytes(argv, 2);
|
||||||
int32_t start = 0;
|
int32_t start = 0;
|
||||||
if (argc == 4) {
|
if (argc == 4) {
|
||||||
@ -380,13 +379,14 @@ static void replacesetup(int32_t argc, Janet *argv, struct replace_state *s) {
|
|||||||
}
|
}
|
||||||
kmp_init(&s->kmp, text.bytes, text.len, pat.bytes, pat.len);
|
kmp_init(&s->kmp, text.bytes, text.len, pat.bytes, pat.len);
|
||||||
s->kmp.i = start;
|
s->kmp.i = start;
|
||||||
s->subst = subst.bytes;
|
s->subst = subst;
|
||||||
s->substlen = subst.len;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
JANET_CORE_FN(cfun_string_replace,
|
JANET_CORE_FN(cfun_string_replace,
|
||||||
"(string/replace patt subst str)",
|
"(string/replace patt subst str)",
|
||||||
"Replace the first occurrence of `patt` with `subst` in the string `str`. "
|
"Replace the first occurrence of `patt` with `subst` in the string `str`. "
|
||||||
|
"If `subst` is a function, it will be called with `patt` only if a match is found, "
|
||||||
|
"and should return the actual replacement text to use. "
|
||||||
"Will return the new string if `patt` is found, otherwise returns `str`.") {
|
"Will return the new string if `patt` is found, otherwise returns `str`.") {
|
||||||
int32_t result;
|
int32_t result;
|
||||||
struct replace_state s;
|
struct replace_state s;
|
||||||
@ -397,10 +397,11 @@ JANET_CORE_FN(cfun_string_replace,
|
|||||||
kmp_deinit(&s.kmp);
|
kmp_deinit(&s.kmp);
|
||||||
return janet_stringv(s.kmp.text, s.kmp.textlen);
|
return janet_stringv(s.kmp.text, s.kmp.textlen);
|
||||||
}
|
}
|
||||||
buf = janet_string_begin(s.kmp.textlen - s.kmp.patlen + s.substlen);
|
JanetByteView subst = janet_text_substitution(&s.subst, s.kmp.text + result, s.kmp.patlen);
|
||||||
|
buf = janet_string_begin(s.kmp.textlen - s.kmp.patlen + subst.len);
|
||||||
safe_memcpy(buf, s.kmp.text, result);
|
safe_memcpy(buf, s.kmp.text, result);
|
||||||
safe_memcpy(buf + result, s.subst, s.substlen);
|
safe_memcpy(buf + result, subst.bytes, subst.len);
|
||||||
safe_memcpy(buf + result + s.substlen,
|
safe_memcpy(buf + result + subst.len,
|
||||||
s.kmp.text + result + s.kmp.patlen,
|
s.kmp.text + result + s.kmp.patlen,
|
||||||
s.kmp.textlen - result - s.kmp.patlen);
|
s.kmp.textlen - result - s.kmp.patlen);
|
||||||
kmp_deinit(&s.kmp);
|
kmp_deinit(&s.kmp);
|
||||||
@ -411,6 +412,8 @@ JANET_CORE_FN(cfun_string_replaceall,
|
|||||||
"(string/replace-all patt subst str)",
|
"(string/replace-all patt subst str)",
|
||||||
"Replace all instances of `patt` with `subst` in the string `str`. Overlapping "
|
"Replace all instances of `patt` with `subst` in the string `str`. Overlapping "
|
||||||
"matches will not be counted, only the first match in such a span will be replaced. "
|
"matches will not be counted, only the first match in such a span will be replaced. "
|
||||||
|
"If `subst` is a function, it will be called with `patt` once for each match, "
|
||||||
|
"and should return the actual replacement text to use. "
|
||||||
"Will return the new string if `patt` is found, otherwise returns `str`.") {
|
"Will return the new string if `patt` is found, otherwise returns `str`.") {
|
||||||
int32_t result;
|
int32_t result;
|
||||||
struct replace_state s;
|
struct replace_state s;
|
||||||
@ -419,8 +422,9 @@ JANET_CORE_FN(cfun_string_replaceall,
|
|||||||
replacesetup(argc, argv, &s);
|
replacesetup(argc, argv, &s);
|
||||||
janet_buffer_init(&b, s.kmp.textlen);
|
janet_buffer_init(&b, s.kmp.textlen);
|
||||||
while ((result = kmp_next(&s.kmp)) >= 0) {
|
while ((result = kmp_next(&s.kmp)) >= 0) {
|
||||||
|
JanetByteView subst = janet_text_substitution(&s.subst, s.kmp.text + result, s.kmp.patlen);
|
||||||
janet_buffer_push_bytes(&b, s.kmp.text + lastindex, result - lastindex);
|
janet_buffer_push_bytes(&b, s.kmp.text + lastindex, result - lastindex);
|
||||||
janet_buffer_push_bytes(&b, s.subst, s.substlen);
|
janet_buffer_push_bytes(&b, subst.bytes, subst.len);
|
||||||
lastindex = result + s.kmp.patlen;
|
lastindex = result + s.kmp.patlen;
|
||||||
kmp_seti(&s.kmp, lastindex);
|
kmp_seti(&s.kmp, lastindex);
|
||||||
}
|
}
|
||||||
|
@ -663,6 +663,46 @@ JanetBinding janet_binding_from_entry(Janet entry) {
|
|||||||
return binding;
|
return binding;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* If the value at the given address can be coerced to a byte view,
|
||||||
|
return that byte view. If it can't, replace the value at the address
|
||||||
|
with the result of janet_to_string, and return a byte view over that
|
||||||
|
string. */
|
||||||
|
static JanetByteView memoize_byte_view(Janet *value) {
|
||||||
|
JanetByteView result;
|
||||||
|
if (!janet_bytes_view(*value, &result.bytes, &result.len)) {
|
||||||
|
JanetString str = janet_to_string(*value);
|
||||||
|
*value = janet_wrap_string(str);
|
||||||
|
result.bytes = str;
|
||||||
|
result.len = janet_string_length(str);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static JanetByteView to_byte_view(Janet value) {
|
||||||
|
JanetByteView result;
|
||||||
|
if (!janet_bytes_view(value, &result.bytes, &result.len)) {
|
||||||
|
JanetString str = janet_to_string(value);
|
||||||
|
result.bytes = str;
|
||||||
|
result.len = janet_string_length(str);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
JanetByteView janet_text_substitution(Janet *subst, const uint8_t *bytes, uint32_t len) {
|
||||||
|
switch (janet_type(*subst)) {
|
||||||
|
case JANET_CFUNCTION: {
|
||||||
|
Janet matched = janet_stringv(bytes, len);
|
||||||
|
return to_byte_view(janet_unwrap_cfunction(*subst)(1, &matched));
|
||||||
|
}
|
||||||
|
case JANET_FUNCTION: {
|
||||||
|
Janet matched = janet_stringv(bytes, len);
|
||||||
|
return to_byte_view(janet_call(janet_unwrap_function(*subst), 1, &matched));
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
return memoize_byte_view(subst);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
JanetBinding janet_resolve_ext(JanetTable *env, const uint8_t *sym) {
|
JanetBinding janet_resolve_ext(JanetTable *env, const uint8_t *sym) {
|
||||||
Janet entry = janet_table_get(env, janet_wrap_symbol(sym));
|
Janet entry = janet_table_get(env, janet_wrap_symbol(sym));
|
||||||
return janet_binding_from_entry(entry);
|
return janet_binding_from_entry(entry);
|
||||||
|
@ -93,6 +93,7 @@ void janet_buffer_format(
|
|||||||
Janet *argv);
|
Janet *argv);
|
||||||
Janet janet_next_impl(Janet ds, Janet key, int is_interpreter);
|
Janet janet_next_impl(Janet ds, Janet key, int is_interpreter);
|
||||||
JanetBinding janet_binding_from_entry(Janet entry);
|
JanetBinding janet_binding_from_entry(Janet entry);
|
||||||
|
JanetByteView janet_text_substitution(Janet *subst, const uint8_t *bytes, uint32_t len);
|
||||||
|
|
||||||
/* Registry functions */
|
/* Registry functions */
|
||||||
void janet_registry_put(
|
void janet_registry_put(
|
||||||
|
@ -72,6 +72,10 @@
|
|||||||
(assert (= (string/replace "X" "." "XXX...XXX...XXX") ".XX...XXX...XXX") "string/replace 1")
|
(assert (= (string/replace "X" "." "XXX...XXX...XXX") ".XX...XXX...XXX") "string/replace 1")
|
||||||
(assert (= (string/replace-all "X" "." "XXX...XXX...XXX") "...............") "string/replace-all 1")
|
(assert (= (string/replace-all "X" "." "XXX...XXX...XXX") "...............") "string/replace-all 1")
|
||||||
(assert (= (string/replace-all "XX" "." "XXX...XXX...XXX") ".X....X....X") "string/replace-all 2")
|
(assert (= (string/replace-all "XX" "." "XXX...XXX...XXX") ".X....X....X") "string/replace-all 2")
|
||||||
|
(assert (= (string/replace "xx" string/ascii-upper "xxyxyxyxxxy") "XXyxyxyxxxy") "string/replace function")
|
||||||
|
(assert (= (string/replace-all "xx" string/ascii-upper "xxyxyxyxxxy") "XXyxyxyXXxy") "string/replace-all function")
|
||||||
|
(assert (= (string/replace "x" 12 "xyx") "12yx") "string/replace stringable")
|
||||||
|
(assert (= (string/replace-all "x" 12 "xyx") "12y12") "string/replace-all stringable")
|
||||||
(assert (= (string/ascii-lower "ABCabc&^%!@:;.") "abcabc&^%!@:;.") "string/ascii-lower")
|
(assert (= (string/ascii-lower "ABCabc&^%!@:;.") "abcabc&^%!@:;.") "string/ascii-lower")
|
||||||
(assert (= (string/ascii-upper "ABCabc&^%!@:;.") "ABCABC&^%!@:;.") "string/ascii-lower")
|
(assert (= (string/ascii-upper "ABCabc&^%!@:;.") "ABCABC&^%!@:;.") "string/ascii-lower")
|
||||||
(assert (= (string/reverse "") "") "string/reverse 1")
|
(assert (= (string/reverse "") "") "string/reverse 1")
|
||||||
|
@ -330,7 +330,6 @@ neldb\0\0\0\xD8\x05printG\x01\0\xDE\xDE\xDE'\x03\0marshal_tes/\x02
|
|||||||
(assert (deep= (peg/find-all '"/" p) @[0 4 10 14]) "peg find-all")
|
(assert (deep= (peg/find-all '"/" p) @[0 4 10 14]) "peg find-all")
|
||||||
|
|
||||||
# Peg replace and replace-all
|
# Peg replace and replace-all
|
||||||
(var ti 0)
|
|
||||||
(defn check-replacer
|
(defn check-replacer
|
||||||
[x y z]
|
[x y z]
|
||||||
(assert (= (string/replace x y z) (string (peg/replace x y z))) "replacer test replace")
|
(assert (= (string/replace x y z) (string (peg/replace x y z))) "replacer test replace")
|
||||||
@ -339,6 +338,14 @@ neldb\0\0\0\xD8\x05printG\x01\0\xDE\xDE\xDE'\x03\0marshal_tes/\x02
|
|||||||
(check-replacer "abc" "Z" "")
|
(check-replacer "abc" "Z" "")
|
||||||
(check-replacer "aba" "ZZZZZZ" "ababababababa")
|
(check-replacer "aba" "ZZZZZZ" "ababababababa")
|
||||||
(check-replacer "aba" "" "ababababababa")
|
(check-replacer "aba" "" "ababababababa")
|
||||||
|
(check-replacer "aba" string/ascii-upper "ababababababa")
|
||||||
|
(check-replacer "aba" 123 "ababababababa")
|
||||||
|
(assert (= (string (peg/replace-all ~(set "ab") string/ascii-upper "abcaa"))
|
||||||
|
"ABcAA")
|
||||||
|
"peg/replace-all cfunction")
|
||||||
|
(assert (= (string (peg/replace-all ~(set "ab") |$ "abcaa"))
|
||||||
|
"abcaa")
|
||||||
|
"peg/replace-all function")
|
||||||
|
|
||||||
# Peg bug
|
# Peg bug
|
||||||
(assert (deep= @[] (peg/match '(any 1) @"")) "peg empty pattern 1")
|
(assert (deep= @[] (peg/match '(any 1) @"")) "peg empty pattern 1")
|
||||||
|
Loading…
Reference in New Issue
Block a user