1
0
mirror of https://github.com/Jermolene/TiddlyWiki5 synced 2026-01-20 09:59:48 +00:00

Add the words and lines modes to $diff-text (#9551)

* Initial commit

* Add line ending configuration for consistent tests

* Initial commit

* Correctly consider editcost parameter

* Move diffPartsToChars() to $tw.utils

* Remove superfluous file

* Correct "efficiency" parameter naming in the documentation

The parameter was incorrectly referred to as "efficent" in several places.

* Update diffPartsToChars to ES2017 style

* Consolidate let/const declarations
This commit is contained in:
yaisog
2026-01-17 15:57:52 +01:00
committed by GitHub
parent 99d8afd515
commit 3ba31be2a8
7 changed files with 188 additions and 133 deletions

View File

@@ -37,14 +37,14 @@ exports.trim = function(source,operator,options) {
operand = (operator.operand || ""),
fnCalc;
if(suffix === "prefix") {
fnCalc = function(a,b) {return [$tw.utils.trimPrefix(a,b)];}
fnCalc = function(a,b) {return [$tw.utils.trimPrefix(a,b)];};
} else if(suffix === "suffix") {
fnCalc = function(a,b) {return [$tw.utils.trimSuffix(a,b)];}
fnCalc = function(a,b) {return [$tw.utils.trimSuffix(a,b)];};
} else {
if(operand === "") {
fnCalc = function(a) {return [$tw.utils.trim(a)];}
fnCalc = function(a) {return [$tw.utils.trim(a)];};
} else {
fnCalc = function(a,b) {return [$tw.utils.trimSuffix($tw.utils.trimPrefix(a,b),b)];}
fnCalc = function(a,b) {return [$tw.utils.trimSuffix($tw.utils.trimPrefix(a,b),b)];};
}
}
source(function(tiddler,title) {
@@ -80,9 +80,9 @@ exports.levenshtein = makeStringBinaryOperator(
}
);
// these two functions are adapted from https://github.com/google/diff-match-patch/wiki/Line-or-Word-Diffs
// this function is adapted from https://github.com/google/diff-match-patch/wiki/Line-or-Word-Diffs
function diffLineWordMode(text1,text2,mode) {
var a = diffPartsToChars(text1,text2,mode);
var a = $tw.utils.diffPartsToChars(text1,text2,mode);
var lineText1 = a.chars1;
var lineText2 = a.chars2;
var lineArray = a.lineArray;
@@ -91,56 +91,6 @@ function diffLineWordMode(text1,text2,mode) {
return diffs;
}
function diffPartsToChars(text1,text2,mode) {
var lineArray = [];
var lineHash = {};
lineArray[0] = "";
function diff_linesToPartsMunge_(text,mode) {
var chars = "";
var lineStart = 0;
var lineEnd = -1;
var lineArrayLength = lineArray.length,
regexpResult;
var searchRegexp = /\W+/g;
while(lineEnd < text.length - 1) {
if(mode === "words") {
regexpResult = searchRegexp.exec(text);
lineEnd = searchRegexp.lastIndex;
if(regexpResult === null) {
lineEnd = text.length;
}
lineEnd = --lineEnd;
} else {
lineEnd = text.indexOf("\n", lineStart);
if(lineEnd == -1) {
lineEnd = text.length - 1;
}
}
var line = text.substring(lineStart, lineEnd + 1);
if(lineHash.hasOwnProperty ? lineHash.hasOwnProperty(line) : (lineHash[line] !== undefined)) {
chars += String.fromCharCode(lineHash[line]);
} else {
if(lineArrayLength == maxLines) {
line = text.substring(lineStart);
lineEnd = text.length;
}
chars += String.fromCharCode(lineArrayLength);
lineHash[line] = lineArrayLength;
lineArray[lineArrayLength++] = line;
}
lineStart = lineEnd + 1;
}
return chars;
}
var maxLines = 40000;
var chars1 = diff_linesToPartsMunge_(text1,mode);
maxLines = 65535;
var chars2 = diff_linesToPartsMunge_(text2,mode);
return {chars1: chars1, chars2: chars2, lineArray: lineArray};
};
exports.makepatches = function(source,operator,options) {
var suffix = operator.suffix || "",
result = [];
@@ -275,7 +225,7 @@ exports.pad = function(source,operator,options) {
}
});
return results;
}
};
exports.charcode = function(source,operator,options) {
var chars = [];

View File

@@ -55,7 +55,7 @@ Return the dflt (default) parameter if str is not a base-10 number.
exports.getInt = function(str,deflt) {
var i = parseInt(str,10);
return isNaN(i) ? deflt : i;
}
};
/*
Repeatedly replaces a substring within a string. Like String.prototype.replace, but without any of the default special handling of $ sequences in the replace string
@@ -69,12 +69,12 @@ exports.replaceString = function(text,search,replace) {
exports.trimPrefix = function(str,unwanted) {
if(typeof str === "string" && typeof unwanted === "string") {
if(unwanted === "") {
return str.replace(/^\s\s*/, '');
return str.replace(/^\s\s*/, "");
} else {
// Safely regexp-escape the unwanted text
unwanted = unwanted.replace(/[\\^$*+?.()|[\]{}]/g, '\\$&');
var regex = new RegExp('^(' + unwanted + ')+');
return str.replace(regex, '');
unwanted = unwanted.replace(/[\\^$*+?.()|[\]{}]/g, "\\$&");
var regex = new RegExp("^(" + unwanted + ")+");
return str.replace(regex, "");
}
} else {
return str;
@@ -84,12 +84,12 @@ exports.trimPrefix = function(str,unwanted) {
exports.trimSuffix = function(str,unwanted) {
if(typeof str === "string" && typeof unwanted === "string") {
if(unwanted === "") {
return str.replace(/\s\s*$/, '');
return str.replace(/\s\s*$/, "");
} else {
// Safely regexp-escape the unwanted text
unwanted = unwanted.replace(/[\\^$*+?.()|[\]{}]/g, '\\$&');
var regex = new RegExp('(' + unwanted + ')+$');
return str.replace(regex, '');
unwanted = unwanted.replace(/[\\^$*+?.()|[\]{}]/g, "\\$&");
var regex = new RegExp("(" + unwanted + ")+$");
return str.replace(regex, "");
}
} else {
return str;
@@ -101,14 +101,14 @@ Convert a string to sentence case (ie capitalise first letter)
*/
exports.toSentenceCase = function(str) {
return (str || "").replace(/^\S/, function(c) {return c.toUpperCase();});
}
};
/*
Convert a string to title case (ie capitalise each initial letter)
*/
exports.toTitleCase = function(str) {
return (str || "").replace(/(^|\s)\S/g, function(c) {return c.toUpperCase();});
}
};
/*
Find the line break preceding a given position in a string
@@ -358,8 +358,8 @@ exports.formatDateString = function(date,template) {
}],
[/^TZD/, function() {
var tz = date.getTimezoneOffset(),
atz = Math.abs(tz);
return (tz < 0 ? '+' : '-') + $tw.utils.pad(Math.floor(atz / 60)) + ':' + $tw.utils.pad(atz % 60);
atz = Math.abs(tz);
return (tz < 0 ? "+" : "-") + $tw.utils.pad(Math.floor(atz / 60)) + ":" + $tw.utils.pad(atz % 60);
}],
[/^wYY/, function() {
return $tw.utils.pad($tw.utils.getYearForWeekNo(date) - 2000);
@@ -568,9 +568,9 @@ exports.unescapeLineBreaks = function(s) {
exports.escape = function(ch) {
var charCode = ch.charCodeAt(0);
if(charCode <= 0xFF) {
return '\\x' + $tw.utils.pad(charCode.toString(16).toUpperCase());
return "\\x" + $tw.utils.pad(charCode.toString(16).toUpperCase());
} else {
return '\\u' + $tw.utils.pad(charCode.toString(16).toUpperCase(),4);
return "\\u" + $tw.utils.pad(charCode.toString(16).toUpperCase(),4);
}
};
@@ -587,11 +587,11 @@ exports.stringify = function(s, rawUnicode) {
*/
var regex = rawUnicode ? /[\x00-\x1f]/g : /[\x00-\x1f\x80-\uFFFF]/g;
return (s || "")
.replace(/\\/g, '\\\\') // backslash
.replace(/\\/g, "\\\\") // backslash
.replace(/"/g, '\\"') // double quote character
.replace(/'/g, "\\'") // single quote character
.replace(/\r/g, '\\r') // carriage return
.replace(/\n/g, '\\n') // line feed
.replace(/\r/g, "\\r") // carriage return
.replace(/\n/g, "\\n") // line feed
.replace(regex, exports.escape); // non-ASCII characters
};
@@ -601,15 +601,15 @@ exports.jsonStringify = function(s, rawUnicode) {
// See http://www.json.org/
var regex = rawUnicode ? /[\x00-\x1f]/g : /[\x00-\x1f\x80-\uFFFF]/g;
return (s || "")
.replace(/\\/g, '\\\\') // backslash
.replace(/\\/g, "\\\\") // backslash
.replace(/"/g, '\\"') // double quote character
.replace(/\r/g, '\\r') // carriage return
.replace(/\n/g, '\\n') // line feed
.replace(/\x08/g, '\\b') // backspace
.replace(/\x0c/g, '\\f') // formfeed
.replace(/\t/g, '\\t') // tab
.replace(/\r/g, "\\r") // carriage return
.replace(/\n/g, "\\n") // line feed
.replace(/\x08/g, "\\b") // backspace
.replace(/\x0c/g, "\\f") // formfeed
.replace(/\t/g, "\\t") // tab
.replace(regex,function(s) {
return '\\u' + $tw.utils.pad(s.charCodeAt(0).toString(16).toUpperCase(),4);
return "\\u" + $tw.utils.pad(s.charCodeAt(0).toString(16).toUpperCase(),4);
}); // non-ASCII characters
};
@@ -617,7 +617,7 @@ exports.jsonStringify = function(s, rawUnicode) {
Escape the RegExp special characters with a preceding backslash
*/
exports.escapeRegExp = function(s) {
return s.replace(/[\-\/\\\^\$\*\+\?\.\(\)\|\[\]\{\}]/g, '\\$&');
return s.replace(/[\-\/\\\^\$\*\+\?\.\(\)\|\[\]\{\}]/g, "\\$&");
};
/*
@@ -700,7 +700,7 @@ exports.parseTextReference = function(textRef) {
}
} else {
// If we couldn't parse it
result.title = textRef
result.title = textRef;
}
return result;
};
@@ -759,9 +759,9 @@ Cryptographic hash function as used by sha256 filter operator
options.length .. number of characters returned defaults to 64
*/
exports.sha256 = function(str, options) {
options = options || {}
options = options || {};
return $tw.sjcl.codec.hex.fromBits($tw.sjcl.hash.sha256.hash(str)).substr(0,options.length || 64);
}
};
/*
Decode a base64 string
@@ -914,3 +914,56 @@ exports.makeCompareFunction = function(type,options) {
};
return (types[type] || types[options.defaultType] || types.number);
};
/*
Split text into parts (lines or words) for diff operations
Adapted from https://github.com/google/diff-match-patch/wiki/Line-or-Word-Diffs
*/
exports.diffPartsToChars = function(text1,text2,mode) {
const lineArray = [""],
lineHash = Object.create(null);
function diff_linesToPartsMunge_(text,mode) {
let chars = "",
lineStart = 0,
lineEnd = -1,
lineArrayLength = lineArray.length,
regexpResult;
const searchRegexp = /\W+/g;
while(lineEnd < text.length - 1) {
if(mode === "words") {
regexpResult = searchRegexp.exec(text);
lineEnd = searchRegexp.lastIndex;
if(regexpResult === null) {
lineEnd = text.length;
}
lineEnd = --lineEnd;
} else {
lineEnd = text.indexOf("\n", lineStart);
if(lineEnd === -1) {
lineEnd = text.length - 1;
}
}
let line = text.substring(lineStart, lineEnd + 1);
if(line in lineHash) {
chars += String.fromCharCode(lineHash[line]);
} else {
if(lineArrayLength === maxLines) {
line = text.substring(lineStart);
lineEnd = text.length;
}
chars += String.fromCharCode(lineArrayLength);
lineHash[line] = lineArrayLength;
lineArray[lineArrayLength++] = line;
}
lineStart = lineEnd + 1;
}
return chars;
}
let maxLines = 40000;
const chars1 = diff_linesToPartsMunge_(text1,mode);
maxLines = 65535;
const chars2 = diff_linesToPartsMunge_(text2,mode);
return {chars1, chars2, lineArray};
};

View File

@@ -36,7 +36,13 @@ DiffTextWidget.prototype.render = function(parent,nextSibling) {
this.execute();
// Create the diff object
const editCost = $tw.utils.parseNumber(this.getAttribute("editcost","4"));
const diffs = dmp.diffMain(this.getAttribute("source",""),this.getAttribute("dest",""),{diffEditCost: editCost});
const mode = this.getAttribute("mode") || "chars";
let diffs;
if(mode === "lines" || mode === "words") {
diffs = diffLineWordMode(this.getAttribute("source",""),this.getAttribute("dest",""),mode,editCost);
} else {
diffs = dmp.diffMain(this.getAttribute("source",""),this.getAttribute("dest",""),{diffEditCost: editCost});
}
// Apply required cleanup
switch(this.getAttribute("cleanup","semantic")) {
case "none":
@@ -132,7 +138,7 @@ Selectively refreshes the widget if needed. Returns true if the widget or any of
*/
DiffTextWidget.prototype.refresh = function(changedTiddlers) {
var changedAttributes = this.computeAttributes();
if(changedAttributes.source || changedAttributes.dest || changedAttributes.cleanup || changedAttributes.editcost) {
if(changedAttributes.source || changedAttributes.dest || changedAttributes.cleanup || changedAttributes.mode || changedAttributes.editcost) {
this.refreshSelf();
return true;
} else {
@@ -140,4 +146,15 @@ DiffTextWidget.prototype.refresh = function(changedTiddlers) {
}
};
// This function is adapted from https://github.com/google/diff-match-patch/wiki/Line-or-Word-Diffs
function diffLineWordMode(text1,text2,mode,editCost) {
var a = $tw.utils.diffPartsToChars(text1,text2,mode);
var lineText1 = a.chars1;
var lineText2 = a.chars2;
var lineArray = a.lineArray;
var diffs = dmp.diffMain(lineText1,lineText2,{diffEditCost: editCost});
dmp.diffCharsToLines(diffs,lineArray);
return diffs;
}
exports["diff-text"] = DiffTextWidget;

View File

@@ -1,7 +1,9 @@
created: 20211117003509226
modified: 20251119135921343
modified: 20260102135713260
tags: sampletag1 sampletag2 [[Widget Examples]]
title: SampleTiddlerFirst
type: text/vnd.tiddlywiki
This is a test tiddler called SampleTidlerFirst.
This is a test tiddler called SampleTidlerFirst.
It is used in [[DiffTextWidget]].
You can modify its content.

View File

@@ -1,6 +1,8 @@
created: 20211117003511221
modified: 20211117003724108
modified: 20260102135739735
tags: sampletag1 sampletag2 [[Widget Examples]]
title: SampleTiddlerSecond
This test tiddler is called SampleTiddlerSecond.
This test tiddler is called SampleTiddlerSecond.
It is used in [[DiffTextWidget]].
You can edit its content.

View File

@@ -0,0 +1,15 @@
title: $:/changenotes/5.4.0/#9551
description: Add words and lines modes to diff-text widget
release: 5.4.0
tags: $:/tags/ChangeNote
change-type: enhancement
change-category: widget
github-links: https://github.com/TiddlyWiki/TiddlyWiki5/pull/9551
github-contributors: yaisog
The DiffTextWidget now supports two additional diff modes via the `mode` attribute:
* `mode="words"` - Performs word-level diff operations, making differences more intelligible when comparing text
* `mode="lines"` - Performs line-level diff operations, highlighting entire lines that have changed
The default `mode="chars"` continues to work as before, performing character-level diff operations.

View File

@@ -1,69 +1,85 @@
caption: diff-text
created: 20180316162725329
modified: 20251117054552220
modified: 20260102132650194
tags: Widgets
title: DiffTextWidget
type: text/vnd.tiddlywiki
! Introduction
<<.from-version "5.1.16">> The diff text widget analyses the differences between a pair of source and destination text strings and displays the results as highlighted insertions and deletions (similar to the "track changes" function of a word processor). For example:
<<.from-version "5.1.16">> The diff text widget analyses the differences between a pair of source and destination text strings and displays the results as highlighted insertions and deletions (similar to the "track changes" function of a word processor).
```
<$diff-text source="This is the original text" dest="This is the text to compare to" mode="words>
These are the <<diff-count>> differences:
</$diff-text>
```
<$diff-text source="Hey Jude, don't make it bad. Take a sad song and make it better. Remember to let her into your heart
Then you can start to make it better." dest="Hey Jude, don't be afraid. You were made to go out and get her. The minute you let her under your skin. Then you begin to make it better."/>
! Content and Attributes
!! Content
The content of the <<.wid diff-text>> widget is displayed immediately before the differences. Within the content, the variable <<.var diff-count>> is available, containing the number of differences found. If the widget has no content then it automatically transcludes the tiddler [[$:/language/Diffs/CountMessage]].
The content of the `<$diff-text>` widget is rendered immediately before the diffs. Within it, the variable `diff-count` is available, containing the number of differences found. If the widget has no content then it automatically transcludes the tiddler [[$:/language/Diffs/CountMessage]].
<<<
In other words, these three invocations are all equivalent:
```
<$diff-text source={{FirstTiddler}} dest={{SecondTiddler}}>
{{$:/language/Diffs/CountMessage}}
</$diff-text>
<$diff-text source={{FirstTiddler}} dest={{SecondTiddler}}>
</$diff-text>
<$diff-text source={{FirstTiddler}} dest={{SecondTiddler}}/>
```
<<<
!! Attributes
<<.note """The algorithm counts changes as both insertion and deletion, and therefore the number of differences can be higher than expected.""">>
|!Attribute |!Description |
|source |The source text |
|dest |The destination text |
|cleanup |Defines a way to allow diffs to be human readable |
|editcost |<<.from-version "5.4.0">> Only active if the cleanup flag is set to "efficient" |
|<<.attr source>> |The source text |
|<<.attr dest>> |The destination text |
|<<.attr cleanup>> |Optional post-processing to improve readability (default is <<.value semantic>>) |
|<<.attr editcost>> |<<.from-version "5.4.0">> Threshold parameter for <<.value efficiency>> cleanup mode (default is <<.value 4>>) |
|<<.attr mode>> |<<.from-version "5.4.0">> Specifies the granularity at which differences are computed and displayed (default is <<.value chars>>) |
!!! Cleanup Flags
!! <<.attr cleanup>> / <<.attr editcost>>
The ''cleanup'' attribute determines which optional post-processing should be applied to the diffs:
The <<.attr cleanup>> attribute determines which optional post-processing should be applied to the diffs:
* ''none'': no cleanup is performed
* ''semantic'' (default): rewrites the diffs for human readability
* ''efficient'': rewrites the diffs to minimise the number of operations for subsequent processing
** If efficient is defined, ''editcost'' defines how the cleanup algorithm for human readability works. See example slider
* <<.value none>>: No cleanup is performed
* <<.value semantic>> (default): Optimizes the differences for readability
* <<.value efficiency>>: Optimizes the differences to minimise the number of operations for subsequent processing
** When using <<.value efficiency>> mode, the <<.attr editcost>> parameter controls the cost threshold for the cleanup algorithm, determining how aggressively the diff algorithm merges nearby edits for better human readability (default value is 4).
<<.note """Note that in many cases the results will be the same regardless of the cleanup option. See the [[docs|https://github.com/google/diff-match-patch/wiki/API]] of the underlying library for more details""">>
!! <<.attr mode>>
The <<.attr mode>> attribute determines how differences are computed and displayed:
* <<.value chars>>: Compares differences at the //character level// for precise change detection
* <<.value words>>: Compares differences at the //word level// for more readable text comparisons
* <<.value lines>>: Compares differences at the //line level// for better visibility of structural changes
! Examples
In this example we compare two texts:
A basic example:
<$macrocall $name='wikitext-example-without-html'
src="""|tc-max-width tc-edit-max-width|k
<<.example n:1 e.g."""<$diff-text source="The quick brown fox jumps" dest="The slick brown fox leaps"/>""">>
In <<.value words>> mode, differences are computed at the words level:
<<.example n:2 e.g."""<$diff-text mode="words" source="The quick brown fox jumps" dest="The slick brown fox leaps"/>""">>
To see the effects of all parameters, use this example:
|tc-max-width tc-edit-max-width|k
|<$edit-text tiddler="SampleTiddlerFirst" rows="5"/>|<$edit-text tiddler="SampleTiddlerSecond" rows="5"/>|
Edit cost: {{$:/temp/SampleTiddlerEditCost}} -- Drag to 7 and then to 33
<$range tiddler="$:/temp/SampleTiddlerEditCost" min="1" max="200" default="4" class="tc-max-width"/>
<$diff-text source={{SampleTiddlerFirst}} dest={{SampleTiddlerSecond}} cleanup={{{ [{!!cleanup}!is[blank]else[efficiency]] }}} editcost={{{ [{!!editcost}!is[blank]else[4]] }}} mode={{{ [{!!mode}!is[blank]else[chars]] }}}/>
<$diff-text source={{SampleTiddlerFirst}} dest={{SampleTiddlerSecond}} cleanup=efficiency editcost={{$:/temp/SampleTiddlerEditCost}}/>
"""/>
!! <<.attr mode>>
<$radio field="mode" value="chars" default="chars" class="tc-small-gap-right"> <<.value chars>></$radio>
<$radio field="mode" value="words" class="tc-small-gap-right"> <<.value words>></$radio>
<$radio field="mode" value="lines"> <<.value lines>></$radio>
!! <<.attr cleanup>>
<$radio field="cleanup" value="none" class="tc-small-gap-right"> <<.value none>></$radio>
<$radio field="cleanup" value="semantic" class="tc-small-gap-right"> <<.value semantic>></$radio>
<$radio field="cleanup" value="efficiency" default="efficiency"> <<.value efficiency>></$radio>
<% if [{!!cleanup}!match[none]!match[semantic]] %>
!! <<.attr editcost>>: <$transclude $variable=".value" _={{{ [{!!editcost}!is[blank]else[4]] }}} />
<$range field="editcost" min="1" max="200" default="4" class="tc-max-width" style.max-width="500px" />
<% endif %>