Introduce new levenshtein, makepatches, applypatches operators (#7290)

* Initial Commit

* Fix crash with invalid patches

See https://github.com/Jermolene/TiddlyWiki5/pull/7290#issuecomment-1453155311

Thanks @yaisog

* Add words and lines options to makepatches (#7326)

* Prevent infinite loop for single-word texts (#7327)

* Add docs and examples for the new operators (#7328)

* Create makepatches Operator.tid

* Improve wording

* Doc and examples for the new operators

---------

Co-authored-by: yaisog <m@rcuswinter.de>
This commit is contained in:
Jeremy Ruston 2023-03-04 21:16:26 +00:00 committed by GitHub
parent f343198353
commit 11ffc83493
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 331 additions and 1 deletions

View File

@ -74,6 +74,113 @@ exports.join = makeStringReducingOperator(
},null
);
var dmp = require("$:/core/modules/utils/diff-match-patch/diff_match_patch.js");
exports.levenshtein = makeStringBinaryOperator(
function(a,b) {
var dmpObject = new dmp.diff_match_patch(),
diffs = dmpObject.diff_main(a,b);
return [dmpObject.diff_levenshtein(diffs) + ""];
}
);
// these two functions are adapted from https://github.com/google/diff-match-patch/wiki/Line-or-Word-Diffs
function diffLineWordMode(text1,text2,mode) {
var dmpObject = new dmp.diff_match_patch();
var a = diffPartsToChars(text1,text2,mode);
var lineText1 = a.chars1;
var lineText2 = a.chars2;
var lineArray = a.lineArray;
var diffs = dmpObject.diff_main(lineText1,lineText2,false);
dmpObject.diff_charsToLines_(diffs,lineArray);
return diffs;
}
function diffPartsToChars(text1,text2,mode) {
var lineArray = [];
var lineHash = {};
lineArray[0] = '';
function diff_linesToPartsMunge_(text,mode) {
var chars = '';
var lineStart = 0;
var lineEnd = -1;
var lineArrayLength = lineArray.length,
regexpResult;
const searchRegexp = /\W+/g;
while(lineEnd < text.length - 1) {
if(mode === "words") {
regexpResult = searchRegexp.exec(text);
lineEnd = searchRegexp.lastIndex;
if(regexpResult === null) {
lineEnd = text.length;
}
lineEnd = --lineEnd;
} else {
lineEnd = text.indexOf('\n', lineStart);
if(lineEnd == -1) {
lineEnd = text.length - 1;
}
}
var line = text.substring(lineStart, lineEnd + 1);
if(lineHash.hasOwnProperty ? lineHash.hasOwnProperty(line) : (lineHash[line] !== undefined)) {
chars += String.fromCharCode(lineHash[line]);
} else {
if (lineArrayLength == maxLines) {
line = text.substring(lineStart);
lineEnd = text.length;
}
chars += String.fromCharCode(lineArrayLength);
lineHash[line] = lineArrayLength;
lineArray[lineArrayLength++] = line;
}
lineStart = lineEnd + 1;
}
return chars;
}
var maxLines = 40000;
var chars1 = diff_linesToPartsMunge_(text1,mode);
maxLines = 65535;
var chars2 = diff_linesToPartsMunge_(text2,mode);
return {chars1: chars1, chars2: chars2, lineArray: lineArray};
};
exports.makepatches = function(source,operator,options) {
var dmpObject = new dmp.diff_match_patch(),
suffix = operator.suffix || "",
result = [];
source(function(tiddler,title) {
var diffs, patches;
if(suffix === "lines" || suffix === "words") {
diffs = diffLineWordMode(title,operator.operand,suffix);
patches = dmpObject.patch_make(title,diffs);
} else {
patches = dmpObject.patch_make(title,operator.operand);
}
Array.prototype.push.apply(result,[dmpObject.patch_toText(patches)]);
});
return result;
};
exports.applypatches = makeStringBinaryOperator(
function(a,b) {
var dmpObject = new dmp.diff_match_patch(),
patches;
try {
patches = dmpObject.patch_fromText(b);
} catch(e) {
}
if(patches) {
return [dmpObject.patch_apply(patches,a)[0]];
} else {
return [a];
}
}
);
function makeStringBinaryOperator(fnCalc) {
return function(source,operator,options) {
var result = [];
@ -184,4 +291,4 @@ exports.charcode = function(source,operator,options) {
return [chars.join("")];
};
})();
})();

View File

@ -0,0 +1,28 @@
title: Filters/DiffMergePatch1
description: Tests for diff-merge-patch derived operators
type: text/vnd.tiddlywiki-multiple
tags: [[$:/tags/wiki-test-spec]]
title: Output
\whitespace trim
\define text1()
the cat sat on the mat
\end
\define text2()
the hat saw in every category
\end
<$text text={{{ [<text1>makepatches<text2>] }}}/>
+
title: ExpectedResult
<p>@@ -1,22 +1,29 @@
the
-c
+h
at sa
-t on the mat
+w in every category
</p>

View File

@ -0,0 +1,25 @@
title: Filters/DiffMergePatch2
description: Tests for diff-merge-patch derived operators
type: text/vnd.tiddlywiki-multiple
tags: [[$:/tags/wiki-test-spec]]
title: Output
\whitespace trim
\define text1()
the cat sat on the mat
\end
\define text2()
the hat saw in every category
\end
<$let patches={{{ [<text1>makepatches<text2>] }}}>
<$text text={{{ [<text1>applypatches<patches>] }}}/>
</$let>
+
title: ExpectedResult
the hat saw in every category

View File

@ -0,0 +1,22 @@
title: Filters/DiffMergePatch3
description: Tests for diff-merge-patch derived operators
type: text/vnd.tiddlywiki-multiple
tags: [[$:/tags/wiki-test-spec]]
title: Output
\whitespace trim
\define text1()
the cat sat on the mat
\end
\define patches()
**NOT A VALID PATCH**
\end
<$text text={{{ [<text1>applypatches<patches>] }}}/>
+
title: ExpectedResult
the cat sat on the mat

View File

@ -1071,6 +1071,20 @@ Tests the filtering mechanism.
expect(wiki.filterTiddlers("[charcode[9],[10]]").join(" ")).toBe(String.fromCharCode(9) + String.fromCharCode(10));
expect(wiki.filterTiddlers("[charcode[]]").join(" ")).toBe("");
});
it("should handle the levenshtein operator", function() {
expect(wiki.filterTiddlers("[[apple]levenshtein[apple]]").join(" ")).toBe("0");
expect(wiki.filterTiddlers("[[apple]levenshtein[banana]]").join(" ")).toBe("9");
expect(wiki.filterTiddlers("[[representation]levenshtein[misreprehensionisation]]").join(" ")).toBe("10");
expect(wiki.filterTiddlers("[[the cat sat on the mat]levenshtein[the hat saw in every category]]").join(" ")).toBe("13");
});
it("should handle the makepatches operator", function() {
expect(wiki.filterTiddlers("[[apple]makepatches[apple]]").join(" ")).toBe("");
expect(wiki.filterTiddlers("[[apple]makepatches[banana]]").join(" ")).toBe("@@ -1,5 +1,6 @@\n-apple\n+banana\n");
expect(wiki.filterTiddlers("[[representation]makepatches[misreprehensionisation]]").join(" ")).toBe("@@ -1,13 +1,21 @@\n+mis\n repre\n-sent\n+hensionis\n atio\n");
expect(wiki.filterTiddlers("[[the cat sat on the mat]makepatches[the hat saw in every category]]").join(" ")).toBe("@@ -1,22 +1,29 @@\n the \n-c\n+h\n at sa\n-t on the mat\n+w in every category\n");
});
it("should parse filter variable parameters", function(){
expect($tw.utils.parseFilterVariable("currentTiddler")).toEqual(

View File

@ -0,0 +1,15 @@
caption: applypatches
created: 20230304154824762
modified: 20230304154826621
op-purpose: applies a set of patches to transform the input
op-input: a [[selection of titles|Title Selection]]
op-parameter: a string containing patches from the [[makepatches Operator]]
op-parameter-name: P
op-output: the transformed input to which the patches <<.place P>> have been applied
tags: [[Filter Operators]] [[String Operators]]
title: applypatches Operator
type: text/vnd.tiddlywiki
<<.from-version "5.2.6">>
<<.operator-examples "makepatches and applypatches">>

View File

@ -0,0 +1,11 @@
created: 20230304161453213
modified: 20230304162156826
tags: [[Operator Examples]]
title: Hamlet
type: application/json
{
"Shakespeare-old": "Hamlet: Do you see yonder cloud that's almost in shape of a camel?\nPolonius: By the mass, and 'tis like a camel, indeed.\nHamlet: Methinks it is like a weasel.\nPolonius: It is backed like a weasel.\nHamlet: Or like a whale?\nPolonius: Very like a whale.\n-- Shakespeare",
"Shakespeare-new": "Hamlet: Do you see the cloud over there that's almost the shape of a camel?\nPolonius: By golly, it is like a camel, indeed.\nHamlet: I think it looks like a weasel.\nPolonius: It is shaped like a weasel.\nHamlet: Or like a whale?\nPolonius: It's totally like a whale.\n-- Shakespeare",
"Trekkie-old": "Kirk: Do you see yonder cloud that's almost in shape of a Klingon?\nSpock: By the mass, and 'tis like a Klingon, indeed.\nKirk: Methinks it is like a Vulcan.\nSpock: It is backed like a Vulcan.\nKirk: Or like a Romulan?\nSpock: Very like a Romulan.\n-- Trekkie"
}

View File

@ -0,0 +1,21 @@
created: 20230304183158728
modified: 20230304183159654
tags: [[levenshtein Operator]] [[Operator Examples]]
title: levenshtein Operator (Examples)
type: text/vnd.tiddlywiki
Determine the Levenshtein distance between two words:
<<.operator-example 1 "[[motel]levenshtein[money]]">>
List the 10 tiddler titles with the smallest Levenstein distance to "~TiddlyWiki":
<$macrocall $name='wikitext-example-without-html'
src="""<ul>
<$list filter="[all[tiddlers]!is[system]] :sort:number[levenshtein[TiddlyWiki]] :and[first[10]]">
<li>
<$link /> (<$text text={{{ [all[current]levenshtein[TiddlyWiki]] }}} />)
</li>
</$list>
</ul>
"""/>

View File

@ -0,0 +1,43 @@
created: 20230304160331362
modified: 20230304160332927
tags: [[makepatches Operator]] [[applypatches Operator]] [[Operator Examples]]
title: makepatches and applypatches Operator (Examples)
type: text/vnd.tiddlywiki
These examples use the example texts in [[Hamlet]], taken from [[https://neil.fraser.name/software/diff_match_patch/demos/patch.html]]
|^!Shakespeare's original |@@white-space: pre-wrap;{{Hamlet##Shakespeare-old}}@@ |
|^!Modern English |@@white-space: pre-wrap;{{Hamlet##Shakespeare-new}}@@ |
|^!Trekkie's Copy |@@white-space: pre-wrap;{{Hamlet##Trekkie-old}}@@ |
<div class="doc-examples-hard-breaks">
Use `makepatches` to generate the set of patches to transform Shakepeare's original into Modern English:
<<.operator-example 1 "[{Hamlet##Shakespeare-old}makepatches{Hamlet##Shakespeare-new}]">>
Use `applypatches` to apply the patches to Shakespeare's original text:
<<.operator-example 2 "[{Hamlet##Shakespeare-old}makepatches{Hamlet##Shakespeare-new}] :map[{Hamlet##Shakespeare-old}applypatches<currentTiddler>]">>
In the above example, the [[Map Filter Run Prefix]] is used to pass the patches information as a parameter to `applypatches`. Inside `:map`, <<.value currentTiddler>> is set to the input title (i.e. the previously generated patches).
The patch information from the Shakepeare texts can also be used to transform the //Trekkie's Copy// to a Modern English version:
<<.operator-example 3 "[{Hamlet##Shakespeare-old}makepatches{Hamlet##Shakespeare-new}] :map[{Hamlet##Trekkie-old}applypatches<currentTiddler>]">>
The above examples used the character mode of `makepatches`. The `word` mode yields very similar results in this case, even when applied to the //Trekkie's Copy//.
<<.operator-example 4 "[{Hamlet##Shakespeare-old}makepatches:words{Hamlet##Shakespeare-new}]">>
<<.operator-example 5 "[{Hamlet##Shakespeare-old}makepatches:words{Hamlet##Shakespeare-new}] :map[{Hamlet##Trekkie-old}applypatches<currentTiddler>]">>
The `lines` mode doesn't work as well in this application:
<<.operator-example 6 "[{Hamlet##Shakespeare-old}makepatches:lines{Hamlet##Shakespeare-new}]">>
<<.operator-example 7 "[{Hamlet##Shakespeare-old}makepatches:lines{Hamlet##Shakespeare-new}] :map[{Hamlet##Trekkie-old}applypatches<currentTiddler>]">>
It is better suited as a very fast algorithm to detect line-wise incremental changes to texts and store only the changes instead of multiple versions of the whole texts.
</div>

View File

@ -0,0 +1,17 @@
caption: levenshtein
created: 20230304181639768
modified: 20230304181642365
op-purpose: determine the Levenshtein distance of the input title(s) and a given string
op-input: a [[selection of titles|Title Selection]]
op-parameter: a string
op-parameter-name: S
op-output: the Levenshtein distance between the input title(s) and <<.place S>>
tags: [[Filter Operators]] [[String Operators]]
title: levenshtein Operator
type: text/vnd.tiddlywiki
<<.from-version "5.2.6">>
The Levenshtein distance is a metric for measuring the difference between two strings. Informally, the Levenshtein distance between two strings is the //minimum// number of single-character edits required to change one string into the other.
<<.operator-examples "levenshtein">>

View File

@ -0,0 +1,23 @@
caption: makepatches
created: 20230304122354967
modified: 20230304122400128
op-purpose: returns a set of patches that transform the input to a given string
op-input: a [[selection of titles|Title Selection]]
op-parameter: a string of characters
op-parameter-name: S
op-output: a set of patch instructions per input title to be used by the [[applypatches Operator]] to transform the input title(s) into the string <<.place S>>
op-suffix: `lines` to operate in line mode, `words` to operate in word mode. If omitted (default), the algorithm operates in character mode. See notes below.
op-suffix-name: T
tags: [[Filter Operators]] [[String Operators]]
title: makepatches Operator
type: text/vnd.tiddlywiki
<<.from-version "5.2.6">>
The difference algorithm operates in character mode by default. This produces the most detailed diff possible. In `words` mode, each word in the input text is transformed into a meta-character, upon which the algorithm then operates. In the default character mode, the filter would find two patches between "ActionWidget" and "Action-Widgets" (the hyphen and the plural s), while in `words` mode, the whole word is found to be changed. In `lines` mode, the meta-character is formed from the whole line, delimited by newline characters, and is found to be changed independent of the number of changes within the line.
The different modes influence the result when the patches are applied to texts other than the original, as well as the runtime.
<<.tip "The calculation in `words` mode is roughly 10 times faster than the default character mode, while `lines` mode can be more than 100 times faster than the default.">>
<<.operator-examples "makepatches and applypatches">>

View File

@ -133,6 +133,10 @@ td svg {
padding-left: 20px;
}
.doc-examples-hard-breaks .doc-example-result li {
white-space: pre-wrap;
}
.doc-bad-example code, .doc-bad-example pre, table.doc-bad-example {
background-color:#ffff80;
}