diff --git a/core/modules/filters/strings.js b/core/modules/filters/strings.js index 1f06c2907..854c43b8c 100644 --- a/core/modules/filters/strings.js +++ b/core/modules/filters/strings.js @@ -37,14 +37,14 @@ exports.trim = function(source,operator,options) { operand = (operator.operand || ""), fnCalc; if(suffix === "prefix") { - fnCalc = function(a,b) {return [$tw.utils.trimPrefix(a,b)];} + fnCalc = function(a,b) {return [$tw.utils.trimPrefix(a,b)];}; } else if(suffix === "suffix") { - fnCalc = function(a,b) {return [$tw.utils.trimSuffix(a,b)];} + fnCalc = function(a,b) {return [$tw.utils.trimSuffix(a,b)];}; } else { if(operand === "") { - fnCalc = function(a) {return [$tw.utils.trim(a)];} + fnCalc = function(a) {return [$tw.utils.trim(a)];}; } else { - fnCalc = function(a,b) {return [$tw.utils.trimSuffix($tw.utils.trimPrefix(a,b),b)];} + fnCalc = function(a,b) {return [$tw.utils.trimSuffix($tw.utils.trimPrefix(a,b),b)];}; } } source(function(tiddler,title) { @@ -80,9 +80,9 @@ exports.levenshtein = makeStringBinaryOperator( } ); -// these two functions are adapted from https://github.com/google/diff-match-patch/wiki/Line-or-Word-Diffs +// this function is adapted from https://github.com/google/diff-match-patch/wiki/Line-or-Word-Diffs function diffLineWordMode(text1,text2,mode) { - var a = diffPartsToChars(text1,text2,mode); + var a = $tw.utils.diffPartsToChars(text1,text2,mode); var lineText1 = a.chars1; var lineText2 = a.chars2; var lineArray = a.lineArray; @@ -91,56 +91,6 @@ function diffLineWordMode(text1,text2,mode) { return diffs; } -function diffPartsToChars(text1,text2,mode) { - var lineArray = []; - var lineHash = {}; - lineArray[0] = ""; - - function diff_linesToPartsMunge_(text,mode) { - var chars = ""; - var lineStart = 0; - var lineEnd = -1; - var lineArrayLength = lineArray.length, - regexpResult; - var searchRegexp = /\W+/g; - while(lineEnd < text.length - 1) { - if(mode === "words") { - regexpResult = searchRegexp.exec(text); - lineEnd = searchRegexp.lastIndex; - if(regexpResult === null) { - lineEnd = text.length; - } - lineEnd = --lineEnd; - } else { - lineEnd = text.indexOf("\n", lineStart); - if(lineEnd == -1) { - lineEnd = text.length - 1; - } - } - var line = text.substring(lineStart, lineEnd + 1); - - if(lineHash.hasOwnProperty ? lineHash.hasOwnProperty(line) : (lineHash[line] !== undefined)) { - chars += String.fromCharCode(lineHash[line]); - } else { - if(lineArrayLength == maxLines) { - line = text.substring(lineStart); - lineEnd = text.length; - } - chars += String.fromCharCode(lineArrayLength); - lineHash[line] = lineArrayLength; - lineArray[lineArrayLength++] = line; - } - lineStart = lineEnd + 1; - } - return chars; - } - var maxLines = 40000; - var chars1 = diff_linesToPartsMunge_(text1,mode); - maxLines = 65535; - var chars2 = diff_linesToPartsMunge_(text2,mode); - return {chars1: chars1, chars2: chars2, lineArray: lineArray}; -}; - exports.makepatches = function(source,operator,options) { var suffix = operator.suffix || "", result = []; @@ -275,7 +225,7 @@ exports.pad = function(source,operator,options) { } }); return results; -} +}; exports.charcode = function(source,operator,options) { var chars = []; diff --git a/core/modules/utils/utils.js b/core/modules/utils/utils.js index e36bf88c4..d5f2c9df2 100644 --- a/core/modules/utils/utils.js +++ b/core/modules/utils/utils.js @@ -55,7 +55,7 @@ Return the dflt (default) parameter if str is not a base-10 number. exports.getInt = function(str,deflt) { var i = parseInt(str,10); return isNaN(i) ? deflt : i; -} +}; /* Repeatedly replaces a substring within a string. Like String.prototype.replace, but without any of the default special handling of $ sequences in the replace string @@ -69,12 +69,12 @@ exports.replaceString = function(text,search,replace) { exports.trimPrefix = function(str,unwanted) { if(typeof str === "string" && typeof unwanted === "string") { if(unwanted === "") { - return str.replace(/^\s\s*/, ''); + return str.replace(/^\s\s*/, ""); } else { // Safely regexp-escape the unwanted text - unwanted = unwanted.replace(/[\\^$*+?.()|[\]{}]/g, '\\$&'); - var regex = new RegExp('^(' + unwanted + ')+'); - return str.replace(regex, ''); + unwanted = unwanted.replace(/[\\^$*+?.()|[\]{}]/g, "\\$&"); + var regex = new RegExp("^(" + unwanted + ")+"); + return str.replace(regex, ""); } } else { return str; @@ -84,12 +84,12 @@ exports.trimPrefix = function(str,unwanted) { exports.trimSuffix = function(str,unwanted) { if(typeof str === "string" && typeof unwanted === "string") { if(unwanted === "") { - return str.replace(/\s\s*$/, ''); + return str.replace(/\s\s*$/, ""); } else { // Safely regexp-escape the unwanted text - unwanted = unwanted.replace(/[\\^$*+?.()|[\]{}]/g, '\\$&'); - var regex = new RegExp('(' + unwanted + ')+$'); - return str.replace(regex, ''); + unwanted = unwanted.replace(/[\\^$*+?.()|[\]{}]/g, "\\$&"); + var regex = new RegExp("(" + unwanted + ")+$"); + return str.replace(regex, ""); } } else { return str; @@ -101,14 +101,14 @@ Convert a string to sentence case (ie capitalise first letter) */ exports.toSentenceCase = function(str) { return (str || "").replace(/^\S/, function(c) {return c.toUpperCase();}); -} +}; /* Convert a string to title case (ie capitalise each initial letter) */ exports.toTitleCase = function(str) { return (str || "").replace(/(^|\s)\S/g, function(c) {return c.toUpperCase();}); -} +}; /* Find the line break preceding a given position in a string @@ -358,8 +358,8 @@ exports.formatDateString = function(date,template) { }], [/^TZD/, function() { var tz = date.getTimezoneOffset(), - atz = Math.abs(tz); - return (tz < 0 ? '+' : '-') + $tw.utils.pad(Math.floor(atz / 60)) + ':' + $tw.utils.pad(atz % 60); + atz = Math.abs(tz); + return (tz < 0 ? "+" : "-") + $tw.utils.pad(Math.floor(atz / 60)) + ":" + $tw.utils.pad(atz % 60); }], [/^wYY/, function() { return $tw.utils.pad($tw.utils.getYearForWeekNo(date) - 2000); @@ -568,9 +568,9 @@ exports.unescapeLineBreaks = function(s) { exports.escape = function(ch) { var charCode = ch.charCodeAt(0); if(charCode <= 0xFF) { - return '\\x' + $tw.utils.pad(charCode.toString(16).toUpperCase()); + return "\\x" + $tw.utils.pad(charCode.toString(16).toUpperCase()); } else { - return '\\u' + $tw.utils.pad(charCode.toString(16).toUpperCase(),4); + return "\\u" + $tw.utils.pad(charCode.toString(16).toUpperCase(),4); } }; @@ -587,11 +587,11 @@ exports.stringify = function(s, rawUnicode) { */ var regex = rawUnicode ? /[\x00-\x1f]/g : /[\x00-\x1f\x80-\uFFFF]/g; return (s || "") - .replace(/\\/g, '\\\\') // backslash + .replace(/\\/g, "\\\\") // backslash .replace(/"/g, '\\"') // double quote character .replace(/'/g, "\\'") // single quote character - .replace(/\r/g, '\\r') // carriage return - .replace(/\n/g, '\\n') // line feed + .replace(/\r/g, "\\r") // carriage return + .replace(/\n/g, "\\n") // line feed .replace(regex, exports.escape); // non-ASCII characters }; @@ -601,15 +601,15 @@ exports.jsonStringify = function(s, rawUnicode) { // See http://www.json.org/ var regex = rawUnicode ? /[\x00-\x1f]/g : /[\x00-\x1f\x80-\uFFFF]/g; return (s || "") - .replace(/\\/g, '\\\\') // backslash + .replace(/\\/g, "\\\\") // backslash .replace(/"/g, '\\"') // double quote character - .replace(/\r/g, '\\r') // carriage return - .replace(/\n/g, '\\n') // line feed - .replace(/\x08/g, '\\b') // backspace - .replace(/\x0c/g, '\\f') // formfeed - .replace(/\t/g, '\\t') // tab + .replace(/\r/g, "\\r") // carriage return + .replace(/\n/g, "\\n") // line feed + .replace(/\x08/g, "\\b") // backspace + .replace(/\x0c/g, "\\f") // formfeed + .replace(/\t/g, "\\t") // tab .replace(regex,function(s) { - return '\\u' + $tw.utils.pad(s.charCodeAt(0).toString(16).toUpperCase(),4); + return "\\u" + $tw.utils.pad(s.charCodeAt(0).toString(16).toUpperCase(),4); }); // non-ASCII characters }; @@ -617,7 +617,7 @@ exports.jsonStringify = function(s, rawUnicode) { Escape the RegExp special characters with a preceding backslash */ exports.escapeRegExp = function(s) { - return s.replace(/[\-\/\\\^\$\*\+\?\.\(\)\|\[\]\{\}]/g, '\\$&'); + return s.replace(/[\-\/\\\^\$\*\+\?\.\(\)\|\[\]\{\}]/g, "\\$&"); }; /* @@ -700,7 +700,7 @@ exports.parseTextReference = function(textRef) { } } else { // If we couldn't parse it - result.title = textRef + result.title = textRef; } return result; }; @@ -759,9 +759,9 @@ Cryptographic hash function as used by sha256 filter operator options.length .. number of characters returned defaults to 64 */ exports.sha256 = function(str, options) { - options = options || {} + options = options || {}; return $tw.sjcl.codec.hex.fromBits($tw.sjcl.hash.sha256.hash(str)).substr(0,options.length || 64); -} +}; /* Decode a base64 string @@ -914,3 +914,56 @@ exports.makeCompareFunction = function(type,options) { }; return (types[type] || types[options.defaultType] || types.number); }; + +/* +Split text into parts (lines or words) for diff operations +Adapted from https://github.com/google/diff-match-patch/wiki/Line-or-Word-Diffs +*/ +exports.diffPartsToChars = function(text1,text2,mode) { + const lineArray = [""], + lineHash = Object.create(null); + + function diff_linesToPartsMunge_(text,mode) { + let chars = "", + lineStart = 0, + lineEnd = -1, + lineArrayLength = lineArray.length, + regexpResult; + const searchRegexp = /\W+/g; + while(lineEnd < text.length - 1) { + if(mode === "words") { + regexpResult = searchRegexp.exec(text); + lineEnd = searchRegexp.lastIndex; + if(regexpResult === null) { + lineEnd = text.length; + } + lineEnd = --lineEnd; + } else { + lineEnd = text.indexOf("\n", lineStart); + if(lineEnd === -1) { + lineEnd = text.length - 1; + } + } + let line = text.substring(lineStart, lineEnd + 1); + + if(line in lineHash) { + chars += String.fromCharCode(lineHash[line]); + } else { + if(lineArrayLength === maxLines) { + line = text.substring(lineStart); + lineEnd = text.length; + } + chars += String.fromCharCode(lineArrayLength); + lineHash[line] = lineArrayLength; + lineArray[lineArrayLength++] = line; + } + lineStart = lineEnd + 1; + } + return chars; + } + let maxLines = 40000; + const chars1 = diff_linesToPartsMunge_(text1,mode); + maxLines = 65535; + const chars2 = diff_linesToPartsMunge_(text2,mode); + return {chars1, chars2, lineArray}; +}; diff --git a/core/modules/widgets/diff-text.js b/core/modules/widgets/diff-text.js index b9786aa25..5aa1344d6 100644 --- a/core/modules/widgets/diff-text.js +++ b/core/modules/widgets/diff-text.js @@ -36,7 +36,13 @@ DiffTextWidget.prototype.render = function(parent,nextSibling) { this.execute(); // Create the diff object const editCost = $tw.utils.parseNumber(this.getAttribute("editcost","4")); - const diffs = dmp.diffMain(this.getAttribute("source",""),this.getAttribute("dest",""),{diffEditCost: editCost}); + const mode = this.getAttribute("mode") || "chars"; + let diffs; + if(mode === "lines" || mode === "words") { + diffs = diffLineWordMode(this.getAttribute("source",""),this.getAttribute("dest",""),mode,editCost); + } else { + diffs = dmp.diffMain(this.getAttribute("source",""),this.getAttribute("dest",""),{diffEditCost: editCost}); + } // Apply required cleanup switch(this.getAttribute("cleanup","semantic")) { case "none": @@ -132,7 +138,7 @@ Selectively refreshes the widget if needed. Returns true if the widget or any of */ DiffTextWidget.prototype.refresh = function(changedTiddlers) { var changedAttributes = this.computeAttributes(); - if(changedAttributes.source || changedAttributes.dest || changedAttributes.cleanup || changedAttributes.editcost) { + if(changedAttributes.source || changedAttributes.dest || changedAttributes.cleanup || changedAttributes.mode || changedAttributes.editcost) { this.refreshSelf(); return true; } else { @@ -140,4 +146,15 @@ DiffTextWidget.prototype.refresh = function(changedTiddlers) { } }; +// This function is adapted from https://github.com/google/diff-match-patch/wiki/Line-or-Word-Diffs +function diffLineWordMode(text1,text2,mode,editCost) { + var a = $tw.utils.diffPartsToChars(text1,text2,mode); + var lineText1 = a.chars1; + var lineText2 = a.chars2; + var lineArray = a.lineArray; + var diffs = dmp.diffMain(lineText1,lineText2,{diffEditCost: editCost}); + dmp.diffCharsToLines(diffs,lineArray); + return diffs; +} + exports["diff-text"] = DiffTextWidget; diff --git a/editions/tw5.com/tiddlers/demonstrations/SampleTiddlerFirst.tid b/editions/tw5.com/tiddlers/demonstrations/SampleTiddlerFirst.tid index ba0245f47..e601f6438 100644 --- a/editions/tw5.com/tiddlers/demonstrations/SampleTiddlerFirst.tid +++ b/editions/tw5.com/tiddlers/demonstrations/SampleTiddlerFirst.tid @@ -1,7 +1,9 @@ created: 20211117003509226 -modified: 20251119135921343 +modified: 20260102135713260 tags: sampletag1 sampletag2 [[Widget Examples]] title: SampleTiddlerFirst type: text/vnd.tiddlywiki -This is a test tiddler called SampleTidlerFirst. \ No newline at end of file +This is a test tiddler called SampleTidlerFirst. +It is used in [[DiffTextWidget]]. +You can modify its content. \ No newline at end of file diff --git a/editions/tw5.com/tiddlers/demonstrations/SampleTiddlerSecond.tid b/editions/tw5.com/tiddlers/demonstrations/SampleTiddlerSecond.tid index e932222b3..55b422687 100644 --- a/editions/tw5.com/tiddlers/demonstrations/SampleTiddlerSecond.tid +++ b/editions/tw5.com/tiddlers/demonstrations/SampleTiddlerSecond.tid @@ -1,6 +1,8 @@ created: 20211117003511221 -modified: 20211117003724108 +modified: 20260102135739735 tags: sampletag1 sampletag2 [[Widget Examples]] title: SampleTiddlerSecond -This test tiddler is called SampleTiddlerSecond. \ No newline at end of file +This test tiddler is called SampleTiddlerSecond. +It is used in [[DiffTextWidget]]. +You can edit its content. \ No newline at end of file diff --git a/editions/tw5.com/tiddlers/releasenotes/5.4.0/#9551.tid b/editions/tw5.com/tiddlers/releasenotes/5.4.0/#9551.tid new file mode 100644 index 000000000..98e78c61b --- /dev/null +++ b/editions/tw5.com/tiddlers/releasenotes/5.4.0/#9551.tid @@ -0,0 +1,15 @@ +title: $:/changenotes/5.4.0/#9551 +description: Add words and lines modes to diff-text widget +release: 5.4.0 +tags: $:/tags/ChangeNote +change-type: enhancement +change-category: widget +github-links: https://github.com/TiddlyWiki/TiddlyWiki5/pull/9551 +github-contributors: yaisog + +The DiffTextWidget now supports two additional diff modes via the `mode` attribute: + +* `mode="words"` - Performs word-level diff operations, making differences more intelligible when comparing text +* `mode="lines"` - Performs line-level diff operations, highlighting entire lines that have changed + +The default `mode="chars"` continues to work as before, performing character-level diff operations. diff --git a/editions/tw5.com/tiddlers/widgets/DiffTextWidget.tid b/editions/tw5.com/tiddlers/widgets/DiffTextWidget.tid index af0bd5ad0..a81c539f3 100644 --- a/editions/tw5.com/tiddlers/widgets/DiffTextWidget.tid +++ b/editions/tw5.com/tiddlers/widgets/DiffTextWidget.tid @@ -1,69 +1,85 @@ caption: diff-text created: 20180316162725329 -modified: 20251117054552220 +modified: 20260102132650194 tags: Widgets title: DiffTextWidget type: text/vnd.tiddlywiki ! Introduction -<<.from-version "5.1.16">> The diff text widget analyses the differences between a pair of source and destination text strings and displays the results as highlighted insertions and deletions (similar to the "track changes" function of a word processor). For example: +<<.from-version "5.1.16">> The diff text widget analyses the differences between a pair of source and destination text strings and displays the results as highlighted insertions and deletions (similar to the "track changes" function of a word processor). + +``` +<$diff-text source="This is the original text" dest="This is the text to compare to" mode="words> + These are the <> differences: + +``` + -<$diff-text source="Hey Jude, don't make it bad. Take a sad song and make it better. Remember to let her into your heart -Then you can start to make it better." dest="Hey Jude, don't be afraid. You were made to go out and get her. The minute you let her under your skin. Then you begin to make it better."/> ! Content and Attributes -!! Content +The content of the <<.wid diff-text>> widget is displayed immediately before the differences. Within the content, the variable <<.var diff-count>> is available, containing the number of differences found. If the widget has no content then it automatically transcludes the tiddler [[$:/language/Diffs/CountMessage]]. -The content of the `<$diff-text>` widget is rendered immediately before the diffs. Within it, the variable `diff-count` is available, containing the number of differences found. If the widget has no content then it automatically transcludes the tiddler [[$:/language/Diffs/CountMessage]]. - -<<< -In other words, these three invocations are all equivalent: - -``` -<$diff-text source={{FirstTiddler}} dest={{SecondTiddler}}> -{{$:/language/Diffs/CountMessage}} - - -<$diff-text source={{FirstTiddler}} dest={{SecondTiddler}}> - - -<$diff-text source={{FirstTiddler}} dest={{SecondTiddler}}/> - -``` -<<< - -!! Attributes +<<.note """The algorithm counts changes as both insertion and deletion, and therefore the number of differences can be higher than expected.""">> |!Attribute |!Description | -|source |The source text | -|dest |The destination text | -|cleanup |Defines a way to allow diffs to be human readable | -|editcost |<<.from-version "5.4.0">> Only active if the cleanup flag is set to "efficient" | +|<<.attr source>> |The source text | +|<<.attr dest>> |The destination text | +|<<.attr cleanup>> |Optional post-processing to improve readability (default is <<.value semantic>>) | +|<<.attr editcost>> |<<.from-version "5.4.0">> Threshold parameter for <<.value efficiency>> cleanup mode (default is <<.value 4>>) | +|<<.attr mode>> |<<.from-version "5.4.0">> Specifies the granularity at which differences are computed and displayed (default is <<.value chars>>) | -!!! Cleanup Flags +!! <<.attr cleanup>> / <<.attr editcost>> -The ''cleanup'' attribute determines which optional post-processing should be applied to the diffs: +The <<.attr cleanup>> attribute determines which optional post-processing should be applied to the diffs: -* ''none'': no cleanup is performed -* ''semantic'' (default): rewrites the diffs for human readability -* ''efficient'': rewrites the diffs to minimise the number of operations for subsequent processing -** If efficient is defined, ''editcost'' defines how the cleanup algorithm for human readability works. See example slider +* <<.value none>>: No cleanup is performed +* <<.value semantic>> (default): Optimizes the differences for readability +* <<.value efficiency>>: Optimizes the differences to minimise the number of operations for subsequent processing +** When using <<.value efficiency>> mode, the <<.attr editcost>> parameter controls the cost threshold for the cleanup algorithm, determining how aggressively the diff algorithm merges nearby edits for better human readability (default value is 4). <<.note """Note that in many cases the results will be the same regardless of the cleanup option. See the [[docs|https://github.com/google/diff-match-patch/wiki/API]] of the underlying library for more details""">> +!! <<.attr mode>> + +The <<.attr mode>> attribute determines how differences are computed and displayed: + +* <<.value chars>>: Compares differences at the //character level// for precise change detection +* <<.value words>>: Compares differences at the //word level// for more readable text comparisons +* <<.value lines>>: Compares differences at the //line level// for better visibility of structural changes + ! Examples -In this example we compare two texts: +A basic example: -<$macrocall $name='wikitext-example-without-html' -src="""|tc-max-width tc-edit-max-width|k +<<.example n:1 e.g."""<$diff-text source="The quick brown fox jumps" dest="The slick brown fox leaps"/>""">> + +In <<.value words>> mode, differences are computed at the words level: + +<<.example n:2 e.g."""<$diff-text mode="words" source="The quick brown fox jumps" dest="The slick brown fox leaps"/>""">> + + +To see the effects of all parameters, use this example: + +|tc-max-width tc-edit-max-width|k |<$edit-text tiddler="SampleTiddlerFirst" rows="5"/>|<$edit-text tiddler="SampleTiddlerSecond" rows="5"/>| -Edit cost: {{$:/temp/SampleTiddlerEditCost}} -- Drag to 7 and then to 33 -<$range tiddler="$:/temp/SampleTiddlerEditCost" min="1" max="200" default="4" class="tc-max-width"/> +<$diff-text source={{SampleTiddlerFirst}} dest={{SampleTiddlerSecond}} cleanup={{{ [{!!cleanup}!is[blank]else[efficiency]] }}} editcost={{{ [{!!editcost}!is[blank]else[4]] }}} mode={{{ [{!!mode}!is[blank]else[chars]] }}}/> -<$diff-text source={{SampleTiddlerFirst}} dest={{SampleTiddlerSecond}} cleanup=efficiency editcost={{$:/temp/SampleTiddlerEditCost}}/> -"""/> +!! <<.attr mode>> +<$radio field="mode" value="chars" default="chars" class="tc-small-gap-right"> <<.value chars>> +<$radio field="mode" value="words" class="tc-small-gap-right"> <<.value words>> +<$radio field="mode" value="lines"> <<.value lines>> + +!! <<.attr cleanup>> +<$radio field="cleanup" value="none" class="tc-small-gap-right"> <<.value none>> +<$radio field="cleanup" value="semantic" class="tc-small-gap-right"> <<.value semantic>> +<$radio field="cleanup" value="efficiency" default="efficiency"> <<.value efficiency>> + +<% if [{!!cleanup}!match[none]!match[semantic]] %> + +!! <<.attr editcost>>: <$transclude $variable=".value" _={{{ [{!!editcost}!is[blank]else[4]] }}} /> +<$range field="editcost" min="1" max="200" default="4" class="tc-max-width" style.max-width="500px" /> +<% endif %> \ No newline at end of file