mirror of
https://github.com/Jermolene/TiddlyWiki5
synced 2025-01-27 01:14:44 +00:00
Introduce new levenshtein, makepatches, applypatches operators (#7290)
* Initial Commit * Fix crash with invalid patches See https://github.com/Jermolene/TiddlyWiki5/pull/7290#issuecomment-1453155311 Thanks @yaisog * Add words and lines options to makepatches (#7326) * Prevent infinite loop for single-word texts (#7327) * Add docs and examples for the new operators (#7328) * Create makepatches Operator.tid * Improve wording * Doc and examples for the new operators --------- Co-authored-by: yaisog <m@rcuswinter.de>
This commit is contained in:
parent
f343198353
commit
11ffc83493
@ -74,6 +74,113 @@ exports.join = makeStringReducingOperator(
|
||||
},null
|
||||
);
|
||||
|
||||
var dmp = require("$:/core/modules/utils/diff-match-patch/diff_match_patch.js");
|
||||
|
||||
exports.levenshtein = makeStringBinaryOperator(
|
||||
function(a,b) {
|
||||
var dmpObject = new dmp.diff_match_patch(),
|
||||
diffs = dmpObject.diff_main(a,b);
|
||||
return [dmpObject.diff_levenshtein(diffs) + ""];
|
||||
}
|
||||
);
|
||||
|
||||
// these two functions are adapted from https://github.com/google/diff-match-patch/wiki/Line-or-Word-Diffs
|
||||
function diffLineWordMode(text1,text2,mode) {
|
||||
var dmpObject = new dmp.diff_match_patch();
|
||||
var a = diffPartsToChars(text1,text2,mode);
|
||||
var lineText1 = a.chars1;
|
||||
var lineText2 = a.chars2;
|
||||
var lineArray = a.lineArray;
|
||||
var diffs = dmpObject.diff_main(lineText1,lineText2,false);
|
||||
dmpObject.diff_charsToLines_(diffs,lineArray);
|
||||
return diffs;
|
||||
}
|
||||
|
||||
function diffPartsToChars(text1,text2,mode) {
|
||||
var lineArray = [];
|
||||
var lineHash = {};
|
||||
lineArray[0] = '';
|
||||
|
||||
function diff_linesToPartsMunge_(text,mode) {
|
||||
var chars = '';
|
||||
var lineStart = 0;
|
||||
var lineEnd = -1;
|
||||
var lineArrayLength = lineArray.length,
|
||||
regexpResult;
|
||||
const searchRegexp = /\W+/g;
|
||||
while(lineEnd < text.length - 1) {
|
||||
if(mode === "words") {
|
||||
regexpResult = searchRegexp.exec(text);
|
||||
lineEnd = searchRegexp.lastIndex;
|
||||
if(regexpResult === null) {
|
||||
lineEnd = text.length;
|
||||
}
|
||||
lineEnd = --lineEnd;
|
||||
} else {
|
||||
lineEnd = text.indexOf('\n', lineStart);
|
||||
if(lineEnd == -1) {
|
||||
lineEnd = text.length - 1;
|
||||
}
|
||||
}
|
||||
var line = text.substring(lineStart, lineEnd + 1);
|
||||
|
||||
if(lineHash.hasOwnProperty ? lineHash.hasOwnProperty(line) : (lineHash[line] !== undefined)) {
|
||||
chars += String.fromCharCode(lineHash[line]);
|
||||
} else {
|
||||
if (lineArrayLength == maxLines) {
|
||||
line = text.substring(lineStart);
|
||||
lineEnd = text.length;
|
||||
}
|
||||
chars += String.fromCharCode(lineArrayLength);
|
||||
lineHash[line] = lineArrayLength;
|
||||
lineArray[lineArrayLength++] = line;
|
||||
}
|
||||
lineStart = lineEnd + 1;
|
||||
}
|
||||
return chars;
|
||||
}
|
||||
var maxLines = 40000;
|
||||
var chars1 = diff_linesToPartsMunge_(text1,mode);
|
||||
maxLines = 65535;
|
||||
var chars2 = diff_linesToPartsMunge_(text2,mode);
|
||||
return {chars1: chars1, chars2: chars2, lineArray: lineArray};
|
||||
};
|
||||
|
||||
exports.makepatches = function(source,operator,options) {
|
||||
var dmpObject = new dmp.diff_match_patch(),
|
||||
suffix = operator.suffix || "",
|
||||
result = [];
|
||||
|
||||
source(function(tiddler,title) {
|
||||
var diffs, patches;
|
||||
if(suffix === "lines" || suffix === "words") {
|
||||
diffs = diffLineWordMode(title,operator.operand,suffix);
|
||||
patches = dmpObject.patch_make(title,diffs);
|
||||
} else {
|
||||
patches = dmpObject.patch_make(title,operator.operand);
|
||||
}
|
||||
Array.prototype.push.apply(result,[dmpObject.patch_toText(patches)]);
|
||||
});
|
||||
|
||||
return result;
|
||||
};
|
||||
|
||||
exports.applypatches = makeStringBinaryOperator(
|
||||
function(a,b) {
|
||||
var dmpObject = new dmp.diff_match_patch(),
|
||||
patches;
|
||||
try {
|
||||
patches = dmpObject.patch_fromText(b);
|
||||
} catch(e) {
|
||||
}
|
||||
if(patches) {
|
||||
return [dmpObject.patch_apply(patches,a)[0]];
|
||||
} else {
|
||||
return [a];
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
function makeStringBinaryOperator(fnCalc) {
|
||||
return function(source,operator,options) {
|
||||
var result = [];
|
||||
@ -184,4 +291,4 @@ exports.charcode = function(source,operator,options) {
|
||||
return [chars.join("")];
|
||||
};
|
||||
|
||||
})();
|
||||
})();
|
@ -0,0 +1,28 @@
|
||||
title: Filters/DiffMergePatch1
|
||||
description: Tests for diff-merge-patch derived operators
|
||||
type: text/vnd.tiddlywiki-multiple
|
||||
tags: [[$:/tags/wiki-test-spec]]
|
||||
|
||||
title: Output
|
||||
|
||||
\whitespace trim
|
||||
\define text1()
|
||||
the cat sat on the mat
|
||||
\end
|
||||
|
||||
\define text2()
|
||||
the hat saw in every category
|
||||
\end
|
||||
|
||||
<$text text={{{ [<text1>makepatches<text2>] }}}/>
|
||||
+
|
||||
title: ExpectedResult
|
||||
|
||||
<p>@@ -1,22 +1,29 @@
|
||||
the
|
||||
-c
|
||||
+h
|
||||
at sa
|
||||
-t on the mat
|
||||
+w in every category
|
||||
</p>
|
@ -0,0 +1,25 @@
|
||||
title: Filters/DiffMergePatch2
|
||||
description: Tests for diff-merge-patch derived operators
|
||||
type: text/vnd.tiddlywiki-multiple
|
||||
tags: [[$:/tags/wiki-test-spec]]
|
||||
|
||||
title: Output
|
||||
|
||||
\whitespace trim
|
||||
\define text1()
|
||||
the cat sat on the mat
|
||||
\end
|
||||
|
||||
\define text2()
|
||||
the hat saw in every category
|
||||
\end
|
||||
|
||||
<$let patches={{{ [<text1>makepatches<text2>] }}}>
|
||||
|
||||
<$text text={{{ [<text1>applypatches<patches>] }}}/>
|
||||
|
||||
</$let>
|
||||
+
|
||||
title: ExpectedResult
|
||||
|
||||
the hat saw in every category
|
@ -0,0 +1,22 @@
|
||||
title: Filters/DiffMergePatch3
|
||||
description: Tests for diff-merge-patch derived operators
|
||||
type: text/vnd.tiddlywiki-multiple
|
||||
tags: [[$:/tags/wiki-test-spec]]
|
||||
|
||||
title: Output
|
||||
|
||||
\whitespace trim
|
||||
\define text1()
|
||||
the cat sat on the mat
|
||||
\end
|
||||
|
||||
\define patches()
|
||||
**NOT A VALID PATCH**
|
||||
\end
|
||||
|
||||
<$text text={{{ [<text1>applypatches<patches>] }}}/>
|
||||
|
||||
+
|
||||
title: ExpectedResult
|
||||
|
||||
the cat sat on the mat
|
@ -1071,6 +1071,20 @@ Tests the filtering mechanism.
|
||||
expect(wiki.filterTiddlers("[charcode[9],[10]]").join(" ")).toBe(String.fromCharCode(9) + String.fromCharCode(10));
|
||||
expect(wiki.filterTiddlers("[charcode[]]").join(" ")).toBe("");
|
||||
});
|
||||
|
||||
it("should handle the levenshtein operator", function() {
|
||||
expect(wiki.filterTiddlers("[[apple]levenshtein[apple]]").join(" ")).toBe("0");
|
||||
expect(wiki.filterTiddlers("[[apple]levenshtein[banana]]").join(" ")).toBe("9");
|
||||
expect(wiki.filterTiddlers("[[representation]levenshtein[misreprehensionisation]]").join(" ")).toBe("10");
|
||||
expect(wiki.filterTiddlers("[[the cat sat on the mat]levenshtein[the hat saw in every category]]").join(" ")).toBe("13");
|
||||
});
|
||||
|
||||
it("should handle the makepatches operator", function() {
|
||||
expect(wiki.filterTiddlers("[[apple]makepatches[apple]]").join(" ")).toBe("");
|
||||
expect(wiki.filterTiddlers("[[apple]makepatches[banana]]").join(" ")).toBe("@@ -1,5 +1,6 @@\n-apple\n+banana\n");
|
||||
expect(wiki.filterTiddlers("[[representation]makepatches[misreprehensionisation]]").join(" ")).toBe("@@ -1,13 +1,21 @@\n+mis\n repre\n-sent\n+hensionis\n atio\n");
|
||||
expect(wiki.filterTiddlers("[[the cat sat on the mat]makepatches[the hat saw in every category]]").join(" ")).toBe("@@ -1,22 +1,29 @@\n the \n-c\n+h\n at sa\n-t on the mat\n+w in every category\n");
|
||||
});
|
||||
|
||||
it("should parse filter variable parameters", function(){
|
||||
expect($tw.utils.parseFilterVariable("currentTiddler")).toEqual(
|
||||
|
15
editions/tw5.com/tiddlers/filters/applypatches Operator.tid
Normal file
15
editions/tw5.com/tiddlers/filters/applypatches Operator.tid
Normal file
@ -0,0 +1,15 @@
|
||||
caption: applypatches
|
||||
created: 20230304154824762
|
||||
modified: 20230304154826621
|
||||
op-purpose: applies a set of patches to transform the input
|
||||
op-input: a [[selection of titles|Title Selection]]
|
||||
op-parameter: a string containing patches from the [[makepatches Operator]]
|
||||
op-parameter-name: P
|
||||
op-output: the transformed input to which the patches <<.place P>> have been applied
|
||||
tags: [[Filter Operators]] [[String Operators]]
|
||||
title: applypatches Operator
|
||||
type: text/vnd.tiddlywiki
|
||||
|
||||
<<.from-version "5.2.6">>
|
||||
|
||||
<<.operator-examples "makepatches and applypatches">>
|
11
editions/tw5.com/tiddlers/filters/examples/Hamlet.tid
Normal file
11
editions/tw5.com/tiddlers/filters/examples/Hamlet.tid
Normal file
@ -0,0 +1,11 @@
|
||||
created: 20230304161453213
|
||||
modified: 20230304162156826
|
||||
tags: [[Operator Examples]]
|
||||
title: Hamlet
|
||||
type: application/json
|
||||
|
||||
{
|
||||
"Shakespeare-old": "Hamlet: Do you see yonder cloud that's almost in shape of a camel?\nPolonius: By the mass, and 'tis like a camel, indeed.\nHamlet: Methinks it is like a weasel.\nPolonius: It is backed like a weasel.\nHamlet: Or like a whale?\nPolonius: Very like a whale.\n-- Shakespeare",
|
||||
"Shakespeare-new": "Hamlet: Do you see the cloud over there that's almost the shape of a camel?\nPolonius: By golly, it is like a camel, indeed.\nHamlet: I think it looks like a weasel.\nPolonius: It is shaped like a weasel.\nHamlet: Or like a whale?\nPolonius: It's totally like a whale.\n-- Shakespeare",
|
||||
"Trekkie-old": "Kirk: Do you see yonder cloud that's almost in shape of a Klingon?\nSpock: By the mass, and 'tis like a Klingon, indeed.\nKirk: Methinks it is like a Vulcan.\nSpock: It is backed like a Vulcan.\nKirk: Or like a Romulan?\nSpock: Very like a Romulan.\n-- Trekkie"
|
||||
}
|
@ -0,0 +1,21 @@
|
||||
created: 20230304183158728
|
||||
modified: 20230304183159654
|
||||
tags: [[levenshtein Operator]] [[Operator Examples]]
|
||||
title: levenshtein Operator (Examples)
|
||||
type: text/vnd.tiddlywiki
|
||||
|
||||
Determine the Levenshtein distance between two words:
|
||||
|
||||
<<.operator-example 1 "[[motel]levenshtein[money]]">>
|
||||
|
||||
List the 10 tiddler titles with the smallest Levenstein distance to "~TiddlyWiki":
|
||||
|
||||
<$macrocall $name='wikitext-example-without-html'
|
||||
src="""<ul>
|
||||
<$list filter="[all[tiddlers]!is[system]] :sort:number[levenshtein[TiddlyWiki]] :and[first[10]]">
|
||||
<li>
|
||||
<$link /> (<$text text={{{ [all[current]levenshtein[TiddlyWiki]] }}} />)
|
||||
</li>
|
||||
</$list>
|
||||
</ul>
|
||||
"""/>
|
@ -0,0 +1,43 @@
|
||||
created: 20230304160331362
|
||||
modified: 20230304160332927
|
||||
tags: [[makepatches Operator]] [[applypatches Operator]] [[Operator Examples]]
|
||||
title: makepatches and applypatches Operator (Examples)
|
||||
type: text/vnd.tiddlywiki
|
||||
|
||||
These examples use the example texts in [[Hamlet]], taken from [[https://neil.fraser.name/software/diff_match_patch/demos/patch.html]]
|
||||
|
||||
|^!Shakespeare's original |@@white-space: pre-wrap;{{Hamlet##Shakespeare-old}}@@ |
|
||||
|^!Modern English |@@white-space: pre-wrap;{{Hamlet##Shakespeare-new}}@@ |
|
||||
|^!Trekkie's Copy |@@white-space: pre-wrap;{{Hamlet##Trekkie-old}}@@ |
|
||||
|
||||
<div class="doc-examples-hard-breaks">
|
||||
|
||||
Use `makepatches` to generate the set of patches to transform Shakepeare's original into Modern English:
|
||||
|
||||
<<.operator-example 1 "[{Hamlet##Shakespeare-old}makepatches{Hamlet##Shakespeare-new}]">>
|
||||
|
||||
Use `applypatches` to apply the patches to Shakespeare's original text:
|
||||
|
||||
<<.operator-example 2 "[{Hamlet##Shakespeare-old}makepatches{Hamlet##Shakespeare-new}] :map[{Hamlet##Shakespeare-old}applypatches<currentTiddler>]">>
|
||||
|
||||
In the above example, the [[Map Filter Run Prefix]] is used to pass the patches information as a parameter to `applypatches`. Inside `:map`, <<.value currentTiddler>> is set to the input title (i.e. the previously generated patches).
|
||||
|
||||
The patch information from the Shakepeare texts can also be used to transform the //Trekkie's Copy// to a Modern English version:
|
||||
|
||||
<<.operator-example 3 "[{Hamlet##Shakespeare-old}makepatches{Hamlet##Shakespeare-new}] :map[{Hamlet##Trekkie-old}applypatches<currentTiddler>]">>
|
||||
|
||||
The above examples used the character mode of `makepatches`. The `word` mode yields very similar results in this case, even when applied to the //Trekkie's Copy//.
|
||||
|
||||
<<.operator-example 4 "[{Hamlet##Shakespeare-old}makepatches:words{Hamlet##Shakespeare-new}]">>
|
||||
|
||||
<<.operator-example 5 "[{Hamlet##Shakespeare-old}makepatches:words{Hamlet##Shakespeare-new}] :map[{Hamlet##Trekkie-old}applypatches<currentTiddler>]">>
|
||||
|
||||
The `lines` mode doesn't work as well in this application:
|
||||
|
||||
<<.operator-example 6 "[{Hamlet##Shakespeare-old}makepatches:lines{Hamlet##Shakespeare-new}]">>
|
||||
|
||||
<<.operator-example 7 "[{Hamlet##Shakespeare-old}makepatches:lines{Hamlet##Shakespeare-new}] :map[{Hamlet##Trekkie-old}applypatches<currentTiddler>]">>
|
||||
|
||||
It is better suited as a very fast algorithm to detect line-wise incremental changes to texts and store only the changes instead of multiple versions of the whole texts.
|
||||
|
||||
</div>
|
17
editions/tw5.com/tiddlers/filters/levenshtein Operator.tid
Normal file
17
editions/tw5.com/tiddlers/filters/levenshtein Operator.tid
Normal file
@ -0,0 +1,17 @@
|
||||
caption: levenshtein
|
||||
created: 20230304181639768
|
||||
modified: 20230304181642365
|
||||
op-purpose: determine the Levenshtein distance of the input title(s) and a given string
|
||||
op-input: a [[selection of titles|Title Selection]]
|
||||
op-parameter: a string
|
||||
op-parameter-name: S
|
||||
op-output: the Levenshtein distance between the input title(s) and <<.place S>>
|
||||
tags: [[Filter Operators]] [[String Operators]]
|
||||
title: levenshtein Operator
|
||||
type: text/vnd.tiddlywiki
|
||||
|
||||
<<.from-version "5.2.6">>
|
||||
|
||||
The Levenshtein distance is a metric for measuring the difference between two strings. Informally, the Levenshtein distance between two strings is the //minimum// number of single-character edits required to change one string into the other.
|
||||
|
||||
<<.operator-examples "levenshtein">>
|
23
editions/tw5.com/tiddlers/filters/makepatches Operator.tid
Normal file
23
editions/tw5.com/tiddlers/filters/makepatches Operator.tid
Normal file
@ -0,0 +1,23 @@
|
||||
caption: makepatches
|
||||
created: 20230304122354967
|
||||
modified: 20230304122400128
|
||||
op-purpose: returns a set of patches that transform the input to a given string
|
||||
op-input: a [[selection of titles|Title Selection]]
|
||||
op-parameter: a string of characters
|
||||
op-parameter-name: S
|
||||
op-output: a set of patch instructions per input title to be used by the [[applypatches Operator]] to transform the input title(s) into the string <<.place S>>
|
||||
op-suffix: `lines` to operate in line mode, `words` to operate in word mode. If omitted (default), the algorithm operates in character mode. See notes below.
|
||||
op-suffix-name: T
|
||||
tags: [[Filter Operators]] [[String Operators]]
|
||||
title: makepatches Operator
|
||||
type: text/vnd.tiddlywiki
|
||||
|
||||
<<.from-version "5.2.6">>
|
||||
|
||||
The difference algorithm operates in character mode by default. This produces the most detailed diff possible. In `words` mode, each word in the input text is transformed into a meta-character, upon which the algorithm then operates. In the default character mode, the filter would find two patches between "ActionWidget" and "Action-Widgets" (the hyphen and the plural s), while in `words` mode, the whole word is found to be changed. In `lines` mode, the meta-character is formed from the whole line, delimited by newline characters, and is found to be changed independent of the number of changes within the line.
|
||||
|
||||
The different modes influence the result when the patches are applied to texts other than the original, as well as the runtime.
|
||||
|
||||
<<.tip "The calculation in `words` mode is roughly 10 times faster than the default character mode, while `lines` mode can be more than 100 times faster than the default.">>
|
||||
|
||||
<<.operator-examples "makepatches and applypatches">>
|
@ -133,6 +133,10 @@ td svg {
|
||||
padding-left: 20px;
|
||||
}
|
||||
|
||||
.doc-examples-hard-breaks .doc-example-result li {
|
||||
white-space: pre-wrap;
|
||||
}
|
||||
|
||||
.doc-bad-example code, .doc-bad-example pre, table.doc-bad-example {
|
||||
background-color:#ffff80;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user