mirror of
				https://github.com/janet-lang/janet
				synced 2025-11-04 01:23:04 +00:00 
			
		
		
		
	peg replacement functions have access to captures
When peg/replace or peg/replace-all are given a function to serve as the text replacement, any captures produced by the PEG are passed as additional arguments to that function.
This commit is contained in:
		@@ -1738,7 +1738,7 @@ static Janet cfun_peg_replace_generic(int32_t argc, Janet *argv, int only_one) {
 | 
			
		||||
                trail = i;
 | 
			
		||||
            }
 | 
			
		||||
            int32_t nexti = (int32_t)(result - c.bytes.bytes);
 | 
			
		||||
            JanetByteView subst = janet_text_substitution(&c.subst, c.bytes.bytes + i, nexti - i);
 | 
			
		||||
            JanetByteView subst = janet_text_substitution(&c.subst, c.bytes.bytes + i, nexti - i, c.s.captures);
 | 
			
		||||
            janet_buffer_push_bytes(ret, subst.bytes, subst.len);
 | 
			
		||||
            trail = nexti;
 | 
			
		||||
            if (nexti == i) nexti++;
 | 
			
		||||
@@ -1758,8 +1758,8 @@ JANET_CORE_FN(cfun_peg_replace_all,
 | 
			
		||||
              "(peg/replace-all peg subst text &opt start & args)",
 | 
			
		||||
              "Replace all matches of `peg` in `text` with `subst`, returning a new buffer. "
 | 
			
		||||
              "The peg does not need to make captures to do replacement. "
 | 
			
		||||
              "If `subst` is a function, it will be called once for each match "
 | 
			
		||||
              "and should return the actual replacement text to use.") {
 | 
			
		||||
              "If `subst` is a function, it will be called with the "
 | 
			
		||||
              "matching text followed by any captures.") {
 | 
			
		||||
    return cfun_peg_replace_generic(argc, argv, 0);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@@ -1767,8 +1767,8 @@ JANET_CORE_FN(cfun_peg_replace,
 | 
			
		||||
              "(peg/replace peg repl text &opt start & args)",
 | 
			
		||||
              "Replace first match of `peg` in `text` with `subst`, returning a new buffer. "
 | 
			
		||||
              "The peg does not need to make captures to do replacement. "
 | 
			
		||||
              "If `subst` is a function, it will be called with the matching text, "
 | 
			
		||||
              "and should return the actual replacement text to use. "
 | 
			
		||||
              "If `subst` is a function, it will be called with the "
 | 
			
		||||
              "matching text followed by any captures. "
 | 
			
		||||
              "If no matches are found, returns the input string in a new buffer.") {
 | 
			
		||||
    return cfun_peg_replace_generic(argc, argv, 1);
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -397,7 +397,7 @@ JANET_CORE_FN(cfun_string_replace,
 | 
			
		||||
        kmp_deinit(&s.kmp);
 | 
			
		||||
        return janet_stringv(s.kmp.text, s.kmp.textlen);
 | 
			
		||||
    }
 | 
			
		||||
    JanetByteView subst = janet_text_substitution(&s.subst, s.kmp.text + result, s.kmp.patlen);
 | 
			
		||||
    JanetByteView subst = janet_text_substitution(&s.subst, s.kmp.text + result, s.kmp.patlen, NULL);
 | 
			
		||||
    buf = janet_string_begin(s.kmp.textlen - s.kmp.patlen + subst.len);
 | 
			
		||||
    safe_memcpy(buf, s.kmp.text, result);
 | 
			
		||||
    safe_memcpy(buf + result, subst.bytes, subst.len);
 | 
			
		||||
@@ -422,7 +422,7 @@ JANET_CORE_FN(cfun_string_replaceall,
 | 
			
		||||
    replacesetup(argc, argv, &s);
 | 
			
		||||
    janet_buffer_init(&b, s.kmp.textlen);
 | 
			
		||||
    while ((result = kmp_next(&s.kmp)) >= 0) {
 | 
			
		||||
        JanetByteView subst = janet_text_substitution(&s.subst, s.kmp.text + result, s.kmp.patlen);
 | 
			
		||||
        JanetByteView subst = janet_text_substitution(&s.subst, s.kmp.text + result, s.kmp.patlen, NULL);
 | 
			
		||||
        janet_buffer_push_bytes(&b, s.kmp.text + lastindex, result - lastindex);
 | 
			
		||||
        janet_buffer_push_bytes(&b, subst.bytes, subst.len);
 | 
			
		||||
        lastindex = result + s.kmp.patlen;
 | 
			
		||||
 
 | 
			
		||||
@@ -688,19 +688,32 @@ static JanetByteView to_byte_view(Janet value) {
 | 
			
		||||
    return result;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
JanetByteView janet_text_substitution(Janet *subst, const uint8_t *bytes, uint32_t len) {
 | 
			
		||||
  switch (janet_type(*subst)) {
 | 
			
		||||
      case JANET_CFUNCTION: {
 | 
			
		||||
          Janet matched = janet_stringv(bytes, len);
 | 
			
		||||
          return to_byte_view(janet_unwrap_cfunction(*subst)(1, &matched));
 | 
			
		||||
      }
 | 
			
		||||
      case JANET_FUNCTION: {
 | 
			
		||||
          Janet matched = janet_stringv(bytes, len);
 | 
			
		||||
          return to_byte_view(janet_call(janet_unwrap_function(*subst), 1, &matched));
 | 
			
		||||
      }
 | 
			
		||||
      default:
 | 
			
		||||
          return memoize_byte_view(subst);
 | 
			
		||||
  }
 | 
			
		||||
JanetByteView janet_text_substitution(
 | 
			
		||||
    Janet *subst,
 | 
			
		||||
    const uint8_t *bytes,
 | 
			
		||||
    uint32_t len,
 | 
			
		||||
    JanetArray *extra_argv) {
 | 
			
		||||
    int32_t extra_argc = extra_argv == NULL ? 0 : extra_argv->count;
 | 
			
		||||
    JanetType type = janet_type(*subst);
 | 
			
		||||
    switch (type) {
 | 
			
		||||
        case JANET_FUNCTION:
 | 
			
		||||
        case JANET_CFUNCTION: {
 | 
			
		||||
            int32_t argc = 1 + extra_argc;
 | 
			
		||||
            Janet *argv = janet_tuple_begin(argc);
 | 
			
		||||
            argv[0] = janet_stringv(bytes, len);
 | 
			
		||||
            for (int32_t i = 0; i < extra_argc; i++) {
 | 
			
		||||
                argv[i + 1] = extra_argv->data[i];
 | 
			
		||||
            }
 | 
			
		||||
            janet_tuple_end(argv);
 | 
			
		||||
            if (type == JANET_FUNCTION) {
 | 
			
		||||
                return to_byte_view(janet_call(janet_unwrap_function(*subst), argc, argv));
 | 
			
		||||
            } else {
 | 
			
		||||
                return to_byte_view(janet_unwrap_cfunction(*subst)(argc, argv));
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        default:
 | 
			
		||||
            return memoize_byte_view(subst);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
JanetBinding janet_resolve_ext(JanetTable *env, const uint8_t *sym) {
 | 
			
		||||
 
 | 
			
		||||
@@ -93,7 +93,11 @@ void janet_buffer_format(
 | 
			
		||||
    Janet *argv);
 | 
			
		||||
Janet janet_next_impl(Janet ds, Janet key, int is_interpreter);
 | 
			
		||||
JanetBinding janet_binding_from_entry(Janet entry);
 | 
			
		||||
JanetByteView janet_text_substitution(Janet *subst, const uint8_t *bytes, uint32_t len);
 | 
			
		||||
JanetByteView janet_text_substitution(
 | 
			
		||||
    Janet *subst,
 | 
			
		||||
    const uint8_t *bytes,
 | 
			
		||||
    uint32_t len,
 | 
			
		||||
    JanetArray *extra_args);
 | 
			
		||||
 | 
			
		||||
/* Registry functions */
 | 
			
		||||
void janet_registry_put(
 | 
			
		||||
 
 | 
			
		||||
@@ -340,6 +340,7 @@ neldb\0\0\0\xD8\x05printG\x01\0\xDE\xDE\xDE'\x03\0marshal_tes/\x02
 | 
			
		||||
(check-replacer "aba" "" "ababababababa")
 | 
			
		||||
(check-replacer "aba" string/ascii-upper "ababababababa")
 | 
			
		||||
(check-replacer "aba" 123 "ababababababa")
 | 
			
		||||
 | 
			
		||||
(assert (= (string (peg/replace-all ~(set "ab") string/ascii-upper "abcaa"))
 | 
			
		||||
           "ABcAA")
 | 
			
		||||
        "peg/replace-all cfunction")
 | 
			
		||||
@@ -347,6 +348,23 @@ neldb\0\0\0\xD8\x05printG\x01\0\xDE\xDE\xDE'\x03\0marshal_tes/\x02
 | 
			
		||||
           "abcaa")
 | 
			
		||||
        "peg/replace-all function")
 | 
			
		||||
 | 
			
		||||
(defn peg-test [name f peg subst text expected]
 | 
			
		||||
  (assert (= (string (f peg subst text)) expected) name))
 | 
			
		||||
 | 
			
		||||
(peg-test "peg/replace has access to captures"
 | 
			
		||||
  peg/replace
 | 
			
		||||
  ~(sequence "." (capture (set "ab")))
 | 
			
		||||
  (fn [str char] (string/format "%s -> %s, " str (string/ascii-upper char)))
 | 
			
		||||
  ".a.b.c"
 | 
			
		||||
  ".a -> A, .b.c")
 | 
			
		||||
 | 
			
		||||
(peg-test "peg/replace-all has access to captures"
 | 
			
		||||
  peg/replace-all
 | 
			
		||||
  ~(sequence "." (capture (set "ab")))
 | 
			
		||||
  (fn [str char] (string/format "%s -> %s, " str (string/ascii-upper char)))
 | 
			
		||||
  ".a.b.c"
 | 
			
		||||
  ".a -> A, .b -> B, .c")
 | 
			
		||||
 | 
			
		||||
# Peg bug
 | 
			
		||||
(assert (deep= @[] (peg/match '(any 1) @"")) "peg empty pattern 1")
 | 
			
		||||
(assert (deep= @[] (peg/match '(any 1) (buffer))) "peg empty pattern 2")
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user