1
0
mirror of https://github.com/Jermolene/TiddlyWiki5 synced 2026-02-27 12:29:51 +00:00

Compare commits

..

5 Commits

Author SHA1 Message Date
Jeremy Ruston
a812313072 Another pesky function wrapper 2026-02-24 11:00:27 +00:00
Jeremy Ruston
f85cbeda70 Remove function wrapper
Thanks @Leilei332
2026-02-24 10:58:02 +00:00
Jeremy Ruston
e9e22598ad Merge branch 'master' into wikify-operator 2026-02-24 10:44:01 +00:00
Jeremy Ruston
1e47b88dc7 Merge branch 'master' into wikify-operator 2025-06-03 16:39:19 +01:00
Jeremy Ruston
55cf0b2965 Add wikify operator 2024-11-07 16:26:07 +00:00
13 changed files with 625 additions and 400 deletions

View File

@@ -0,0 +1,34 @@
/*\
title: $:/core/modules/filters/wikify.js
type: application/javascript
module-type: filteroperator
Filter operator wikifying each string in the input list and returning the result as a list of strings
\*/
/*jslint node: true, browser: true */
/*global $tw: false */
"use strict";
/*
Export our filter function
*/
exports.wikify = function(source,operator,options) {
var output = operator.operands[0],
mode = operator.operands[1],
type = operator.operands[2],
results = [];
source(function(tiddler,title) {
var wikifier = new $tw.utils.Wikifier({
wiki: options.wiki,
widget: options.widget,
text: title,
type: type,
mode: mode,
output: output
});
results.push(wikifier.getResult());
});
return results;
};

View File

@@ -107,7 +107,7 @@ exports.parseStringLiteral = function(source,pos) {
type: "string",
start: pos
};
var reString = /(?:"""([\s\S]*?)"""|"([^"]*)")|(?:'([^']*)')|\[\[((?:[^\]]|\](?!\]))*)\]\]/y;
var reString = /(?:"""([\s\S]*?)"""|"([^"]*)")|(?:'([^']*)')|\[\[((?:[^\]]|\](?!\]))*)\]\]/g;
reString.lastIndex = pos;
var match = reString.exec(source);
if(match && match.index === pos) {
@@ -221,7 +221,7 @@ exports.parseMacroInvocationAsTransclusion = function(source,pos) {
orderedAttributes: []
};
// Define our regexps
var reVarName = /([^\s>"'=:]+)/y;
var reVarName = /([^\s>"'=:]+)/g;
// Skip whitespace
pos = $tw.utils.skipWhiteSpace(source,pos);
// Look for a double opening angle bracket
@@ -237,11 +237,9 @@ exports.parseMacroInvocationAsTransclusion = function(source,pos) {
}
$tw.utils.addAttributeToParseTreeNode(node,"$variable",token.match[1]);
pos = token.end;
// Check that the tag is terminated by a space or >>, and that there is a closing >> somewhere ahead
if(!(source.charAt(pos) === ">" && source.charAt(pos + 1) === ">") ) {
if(!$tw.utils.parseWhiteSpace(source,pos) || source.indexOf(">>",pos) === -1) {
return null;
}
// Check that the tag is terminated by a space or >>
if(!$tw.utils.parseWhiteSpace(source,pos) && !(source.charAt(pos) === ">" && source.charAt(pos + 1) === ">") ) {
return null;
}
// Process attributes
pos = $tw.utils.parseMacroParametersAsAttributes(node,source,pos);
@@ -269,7 +267,7 @@ exports.parseMVVReferenceAsTransclusion = function(source,pos) {
orderedAttributes: []
};
// Define our regexps
var reVarName = /([^\s>"'=:)]+)/y;
var reVarName = /([^\s>"'=:)]+)/g;
// Skip whitespace
pos = $tw.utils.skipWhiteSpace(source,pos);
// Look for a double opening parenthesis
@@ -325,17 +323,17 @@ exports.parseMacroParameterAsAttribute = function(source,pos) {
start: pos
};
// Define our regexps
var reAttributeName = /([^\/\s>"'`=:]+)/y,
reUnquotedAttribute = /((?:(?:>(?!>))|[^\s>"'])+)/y,
reFilteredValue = /\{\{\{([\S\s]+?)\}\}\}/y,
reIndirectValue = /\{\{([^\}]+)\}\}/y,
reSubstitutedValue = /(?:```([\s\S]*?)```|`([^`]|[\S\s]*?)`)/y;
var reAttributeName = /([^\/\s>"'`=:]+)/g,
reUnquotedAttribute = /((?:(?:>(?!>))|[^\s>"'])+)/g,
reFilteredValue = /\{\{\{([\S\s]+?)\}\}\}/g,
reIndirectValue = /\{\{([^\}]+)\}\}/g,
reSubstitutedValue = /(?:```([\s\S]*?)```|`([^`]|[\S\s]*?)`)/g;
// Skip whitespace
pos = $tw.utils.skipWhiteSpace(source,pos);
// Get the attribute name and the separator token
var nameToken = $tw.utils.parseTokenRegExp(source,pos,reAttributeName),
namePos = nameToken && $tw.utils.skipWhiteSpace(source,nameToken.end),
separatorToken = nameToken && $tw.utils.parseTokenRegExp(source,namePos,/=|:/y),
separatorToken = nameToken && $tw.utils.parseTokenRegExp(source,namePos,/=|:/g),
isNewStyleSeparator = false; // If there is no separator then we don't allow new style values
// If we have a name and a separator then we have a named attribute
if(nameToken && separatorToken) {

View File

@@ -0,0 +1,105 @@
/*\
title: $:/core/modules/utils/wikifier.js
type: application/javascript
module-type: utils
A high level helper class for parsing and wikification
\*/
/*
Options include:
wiki: wiki to be used for wikification
widget: optional widget to be used as parent of wikified text
text: text to be parsed/wikified
type: type of the text
mode: inline or block
output: text, formattedtext, html, parsetree or widgettree
*/
function Wikifier(options) {
this.wiki = options.wiki || $tw.wiki;
this.widget = options.widget || $tw.rootWidget;
this.text = options.text || "";
this.type = options.type || "";
this.mode = options.mode || "block";
this.output = options.output || "text";
// Create the parse tree
this.parser = this.wiki.parseText(this.type,this.text,{
parseAsInline: this.mode === "inline"
});
// Create the widget tree
this.widgetNode = this.wiki.makeWidget(this.parser,{
document: $tw.fakeDocument,
parentWidget: this.widget
});
// Render the widget tree to the container
this.container = $tw.fakeDocument.createElement("div");
this.widgetNode.render(this.container,null);
};
Wikifier.prototype.refresh = function(changedTiddlers) {
// Refresh the widget tree
return this.widgetNode.refresh(changedTiddlers);
};
/*
Return the result string
*/
Wikifier.prototype.getResult = function() {
var result;
switch(this.output) {
case "text":
result = this.container.textContent;
break;
case "formattedtext":
result = this.container.formattedTextContent;
break;
case "html":
result = this.container.innerHTML;
break;
case "parsetree":
result = JSON.stringify(this.parser.tree,0,$tw.config.preferences.jsonSpaces);
break;
case "widgettree":
result = JSON.stringify(this.getWidgetTree(),0,$tw.config.preferences.jsonSpaces);
break;
}
return result;
};
/*
Return a string of the widget tree
*/
Wikifier.prototype.getWidgetTree = function() {
var copyNode = function(widgetNode,resultNode) {
var type = widgetNode.parseTreeNode.type;
resultNode.type = type;
switch(type) {
case "element":
resultNode.tag = widgetNode.parseTreeNode.tag;
break;
case "text":
resultNode.text = widgetNode.parseTreeNode.text;
break;
}
if(Object.keys(widgetNode.attributes || {}).length > 0) {
resultNode.attributes = {};
$tw.utils.each(widgetNode.attributes,function(attr,attrName) {
resultNode.attributes[attrName] = widgetNode.getAttribute(attrName);
});
}
if(Object.keys(widgetNode.children || {}).length > 0) {
resultNode.children = [];
$tw.utils.each(widgetNode.children,function(widgetChildNode) {
var node = {};
resultNode.children.push(node);
copyNode(widgetChildNode,node);
});
}
},
results = {};
copyNode(this.widgetNode,results);
return results;
};
exports.Wikifier = Wikifier;

View File

@@ -36,89 +36,22 @@ Compute the internal state of the widget
WikifyWidget.prototype.execute = function() {
// Get our parameters
this.wikifyName = this.getAttribute("name");
this.wikifyText = this.getAttribute("text");
this.wikifyType = this.getAttribute("type");
this.wikifyMode = this.getAttribute("mode","block");
this.wikifyOutput = this.getAttribute("output","text");
// Create the parse tree
this.wikifyParser = this.wiki.parseText(this.wikifyType,this.wikifyText,{
parseAsInline: this.wikifyMode === "inline"
// Create the wikifier
this.wikifier = new $tw.utils.Wikifier({
wiki: this.wiki,
widget: this,
text: this.getAttribute("text"),
type: this.getAttribute("type"),
mode: this.getAttribute("mode","block"),
output: this.getAttribute("output","text")
});
// Create the widget tree
this.wikifyWidgetNode = this.wiki.makeWidget(this.wikifyParser,{
document: $tw.fakeDocument,
parentWidget: this
});
// Render the widget tree to the container
this.wikifyContainer = $tw.fakeDocument.createElement("div");
this.wikifyWidgetNode.render(this.wikifyContainer,null);
this.wikifyResult = this.getResult();
this.wikifyResult = this.wikifier.getResult();
// Set context variable
this.setVariable(this.wikifyName,this.wikifyResult);
// Construct the child widgets
this.makeChildWidgets();
};
/*
Return the result string
*/
WikifyWidget.prototype.getResult = function() {
var result;
switch(this.wikifyOutput) {
case "text":
result = this.wikifyContainer.textContent;
break;
case "formattedtext":
result = this.wikifyContainer.formattedTextContent;
break;
case "html":
result = this.wikifyContainer.innerHTML;
break;
case "parsetree":
result = JSON.stringify(this.wikifyParser.tree,0,$tw.config.preferences.jsonSpaces);
break;
case "widgettree":
result = JSON.stringify(this.getWidgetTree(),0,$tw.config.preferences.jsonSpaces);
break;
}
return result;
};
/*
Return a string of the widget tree
*/
WikifyWidget.prototype.getWidgetTree = function() {
var copyNode = function(widgetNode,resultNode) {
var type = widgetNode.parseTreeNode.type;
resultNode.type = type;
switch(type) {
case "element":
resultNode.tag = widgetNode.parseTreeNode.tag;
break;
case "text":
resultNode.text = widgetNode.parseTreeNode.text;
break;
}
if(Object.keys(widgetNode.attributes || {}).length > 0) {
resultNode.attributes = {};
$tw.utils.each(widgetNode.attributes,function(attr,attrName) {
resultNode.attributes[attrName] = widgetNode.getAttribute(attrName);
});
}
if(Object.keys(widgetNode.children || {}).length > 0) {
resultNode.children = [];
$tw.utils.each(widgetNode.children,function(widgetChildNode) {
var node = {};
resultNode.children.push(node);
copyNode(widgetChildNode,node);
});
}
},
results = {};
copyNode(this.wikifyWidgetNode,results);
return results;
};
/*
Selectively refreshes the widget if needed. Returns true if the widget or any of its children needed re-rendering
*/
@@ -130,9 +63,9 @@ WikifyWidget.prototype.refresh = function(changedTiddlers) {
return true;
} else {
// Refresh the widget tree
if(this.wikifyWidgetNode.refresh(changedTiddlers)) {
if(this.wikifier.refresh(changedTiddlers)) {
// Check if there was any change
var result = this.getResult();
var result = this.wikifier.getResult();
if(result !== this.wikifyResult) {
// If so, save the change
this.wikifyResult = result;

View File

@@ -0,0 +1,21 @@
title: Operators/Wikify/TextMode
description: Simple wikify operator
type: text/vnd.tiddlywiki-multiple
tags: [[$:/tags/wiki-test-spec]]
title: Output
\parsermode inline
<$text text={{{ [subfilter{Filter}] }}}/>
+
title: Filter
[{Text}wikify[html],[inline],[text/vnd.tiddlywiki]]
+
title: Text
This is ''the text'' that is __wikified__
+
title: ExpectedResult
This is &lt;strong&gt;the text&lt;/strong&gt; that is &lt;u&gt;wikified&lt;/u&gt;

View File

@@ -0,0 +1,64 @@
title: Operators/Wikify/ParseTreeMode
description: Simple wikify operator
type: text/vnd.tiddlywiki-multiple
tags: [[$:/tags/wiki-test-spec]]
title: Output
\parsermode inline
<$text text={{{ [subfilter{Filter}] }}}/>
+
title: Filter
[{Text}wikify[parsetree],[inline],[text/vnd.tiddlywiki]]
+
title: Text
This is ''the text'' that is __wikified__
+
title: ExpectedResult
[
{
"type": "text",
"text": "This is ",
"start": 0,
"end": 8
},
{
"type": "element",
"tag": "strong",
"children": [
{
"type": "text",
"text": "the text",
"start": 10,
"end": 18
}
],
"start": 8,
"end": 20,
"rule": "bold"
},
{
"type": "text",
"text": " that is ",
"start": 20,
"end": 29
},
{
"type": "element",
"tag": "u",
"children": [
{
"type": "text",
"text": "wikified",
"start": 31,
"end": 39
}
],
"start": 29,
"end": 41,
"rule": "underscore"
}
]

View File

@@ -0,0 +1,21 @@
title: Operators/Wikify/TextMode
description: Simple wikify operator
type: text/vnd.tiddlywiki-multiple
tags: [[$:/tags/wiki-test-spec]]
title: Output
\parsermode inline
<$text text={{{ [subfilter{Filter}] }}}/>
+
title: Filter
[{Text}wikify[text],[inline],[text/vnd.tiddlywiki]]
+
title: Text
This is ''the text'' that is __wikified__
+
title: ExpectedResult
This is the text that is wikified

View File

@@ -12,15 +12,3 @@ eg="""<<tabs "[tag[sampletab]]" "SampleTabTwo" "$:/state/tab2" "tc-vertical">>""
<$macrocall $name=".example" n="3"
eg="""<<tabs "[tag[sampletab]nsort[order]]" "SampleTabThree" "$:/state/tab3" "tc-vertical">>"""/>
The following example sets the default tab to be the first tiddler selected in the filter and makes the saved state non-persistent (by using "~$:/temp/"):
<$macrocall $name=".example" n="4"
eg="""<$set name=tl filter="[tag[sampletab]nsort[order]]">
<$transclude $variable=tabs tabsList=<<tl>> default={{{[enlist<tl>]}}} state="$:/temp/state/tab" class="tc-vertical"/>
</$set>"""/>
<<.from-version "5.4.0">> Dynamic parameters can be used to specify the default tab:
<$macrocall $name=".example" n="5"
eg="""<<tabs "[tag[sampletab]nsort[order]]" default={{{[tag[sampletab]nsort[order]]}}} state="$:/temp/state/tab" class="tc-vertical">>"""/>

View File

@@ -0,0 +1,17 @@
title: $:/changenotes/5.4.0/#9397
description: Fix critical freelinks bugs: first character loss and false positive matches in v5.4.0
release: 5.4.0
tags: $:/tags/ChangeNote
change-type: bugfix
change-category: plugin
github-links: https://github.com/TiddlyWiki/TiddlyWiki5/pull/9084 https://github.com/TiddlyWiki/TiddlyWiki5/pull/9397
github-contributors: s793016
This note addresses two major bugs introduced in the Freelinks plugin with the v5.4.0 release:
Fixes:
* First Character Loss: The first character of a matched word would incorrectly disappear (e.g., "The" became "he"). This was fixed by correctly timing the filtering of the current tiddler's title during match validation, ensuring proper substring handling.
* False Positive Matches: Unrelated words (like "it is" or "Choose") would incorrectly link to a tiddler title. This was resolved by fixing wrong output merging in the Aho-Corasick failure-link handling, eliminating spurious matches from intermediate nodes, and adding cycle detection.
Impact:
* Significantly improved correctness and reliability of automatic linking for all users, especially in multilingual and large wikis.

View File

@@ -1,48 +0,0 @@
title: $:/changenotes/5.4.0/#9676
description: Fix critical freelinks bugs: first character loss and false positive matches in v5.4.0
release: 5.4.0
tags: $:/tags/ChangeNote
change-type: bugfix
change-category: plugin
github-links: https://github.com/TiddlyWiki/TiddlyWiki5/pull/9084 https://github.com/TiddlyWiki/TiddlyWiki5/pull/9397 https://github.com/TiddlyWiki/TiddlyWiki5/pull/9676
github-contributors: s793016
Fixes and optimizations to the Freelinks plugin's Aho-Corasick implementation following #9397.
Fixes:
* Failure Links Non-Functional (Critical): The failure link map used a plain object `{}` with trie nodes as keys. Since all JavaScript objects coerce to the same string `[object Object]`, every node resolved to the same map entry. Failure links were silently broken for all overlapping patterns. Fixed by replacing with `WeakMap`.
* Cache Rebuilt on Every UI Interaction (Performance): Any `$:/state/...` update (e.g. clicking tabs) would trigger a full Aho-Corasick rebuild, causing severe lag on large wikis. The `refresh` logic now ignores system tiddlers, with an explicit allowlist for plugin config tiddlers.
* Short Match Blocking Longer Match: A shorter title appearing earlier (e.g. "The New") could prevent a longer overlapping title (e.g. "New York City") from matching. Replaced left-to-right greedy selection with global length-first sorting and interval occupation tracking.
* Unicode Index Desync in ignoreCase Mode: Calling `toLowerCase()` on the full text before searching could change string length (e.g. Turkish "İ" expands), causing `substring()` to split Emoji surrogate pairs and produce garbage output. Case conversion is now done per-character during search.
* Removed Vestigial Regex Escaping: `escapeRegExp()` was called during trie construction but Aho-Corasick operates on literal character transitions, not regex. Removed.
Impact:
* Overlapping titles now match correctly for the first time.
* No cache rebuilds during normal UI interactions on large wikis.
* Correct longest-match behavior for titles sharing substrings.
* Safe Emoji and complex Unicode handling in case-insensitive mode.
#9397
This note addresses two major bugs introduced in the Freelinks plugin with the v5.4.0 release:
Fixes:
* First Character Loss: The first character of a matched word would incorrectly disappear (e.g., "The" became "he"). This was fixed by correctly timing the filtering of the current tiddler's title during match validation, ensuring proper substring handling.
* False Positive Matches: Unrelated words (like "it is" or "Choose") would incorrectly link to a tiddler title. This was resolved by fixing wrong output merging in the Aho-Corasick failure-link handling, eliminating spurious matches from intermediate nodes, and adding cycle detection.
Impact:
* Significantly improved correctness and reliability of automatic linking for all users, especially in multilingual and large wikis.
#9084
This change introduces a fully optimized override of the core text widget, integrating an enhanced Aho-Corasick algorithm for automatic linkification of tiddler titles within text (freelinks). The new implementation prioritizes performance for large wikis and correct support for non-Latin scripts such as Chinese.
Highlights:
- Full switch from regex-based matching to a custom, robust Aho-Corasick engine dedicated to rapid, multi-pattern title detection—drastically decreasing linkification time (tested: 15s reduced to 100500ms on ~12,000 tiddlers).
- Handles extremely large title sets gracefully, including a chunked insertion process and use of a persistent cache (`$:/config/Freelinks/PersistAhoCorasickCache`) to further accelerate subsequent linking operations in large/active wikis.
- Improvements for CJK and non-Latin text: supports linking using long or full-width symbol titles such as '' (U+FF1A) with no split or mismatch.
- Smart prioritization: longer titles are automatically matched before shorter, more ambiguous ones, preventing partial/incorrect linking.
- Actively skips self-linking in the current tiddler and prevents overlapping matches for clean, deterministic linkification.
- End users with large or multilingual wikis see massive performance boost and 100% accurate linking for complex, full-width, or multi-language titles.
- New options for persistent match cache and word boundary checking (`$:/config/Freelinks/WordBoundary`), both can be tuned based on wiki size and content language needs.
- Safe for gradual rollout: legacy behavior is preserved if the new freelinks override is not enabled.

View File

@@ -4,10 +4,10 @@
"tiddlywiki/browser-sniff",
"tiddlywiki/confetti",
"tiddlywiki/dynannotate",
"tiddlywiki/tour",
"tiddlywiki/internals",
"tiddlywiki/menubar",
"tiddlywiki/railroad",
"tiddlywiki/tour"
"tiddlywiki/railroad"
],
"themes": [
"tiddlywiki/vanilla",

View File

@@ -1,23 +1,51 @@
/*\
title: $:/core/modules/utils/aho-corasick.js
type: application/javascript
module-type: utils
Optimized Aho-Corasick string matching algorithm implementation with enhanced performance
and error handling for TiddlyWiki freelinking functionality.
Optimized Aho-Corasick string matching algorithm implementation with enhanced performance and error handling for TiddlyWiki freelinking functionality.
- Uses WeakMap for failure links (required; plain object keys would collide).
- search() converts case per character to avoid Unicode index desync.
- Optional word boundary filtering: CJK always allowed; Latin requires non-word chars around.
Useage:
Initialization:
Create an AhoCorasick instance: var ac = new AhoCorasick();
After initialization, the trie and failure structures are automatically created to store patterns and failure links.
Adding Patterns:
Call addPattern(pattern, index) to add a pattern, e.g., ac.addPattern("[[Link]]", 0);.
pattern is the string to match, and index is an identifier for tracking results.
Multiple patterns can be added, stored in the trie structure.
Building Failure Links:
Call buildFailureLinks() to construct failure links for efficient multi-pattern matching.
Includes a maximum node limit (default 100,000 or 15 times the pattern count) to prevent excessive computation.
Performing Search:
Use search(text, useWordBoundary) to find pattern matches in the text.
text is the input string, and useWordBoundary (boolean) controls whether to enforce word boundary checks.
Returns an array of match results, each containing pattern (matched pattern), index (start position), length (pattern length), and titleIndex (pattern identifier).
Word Boundary Check:
If useWordBoundary is true, only matches surrounded by non-word characters (letters, digits, or underscores) are returned.
Cleanup and Statistics:
Use clear() to reset the trie and failure links, freeing memory.
Use getStats() to retrieve statistics, including node count (nodeCount), pattern count (patternCount), and failure link count (failureLinks).
Notes
Performance Considerations: The Aho-Corasick trie may consume significant memory with a large number of patterns. Limit the number of patterns (e.g., <10,000) for optimal performance.
Error Handling: The module includes maximum node and failure depth limits (maxFailureDepth) to prevent infinite loops or memory overflow.
Word Boundary: Enabling useWordBoundary ensures more precise matches, ideal for link detection scenarios.
Compatibility: Ensure compatibility with other TiddlyWiki modules (e.g., wikiparser.js) when processing WikiText.
Debugging: Use getStats() to inspect the trie structure's size and ensure it does not overload browser memory.
\*/
"use strict";
function AhoCorasick() {
this.trie = {};
this.failure = new WeakMap();
this.failure = {};
this.maxFailureDepth = 100;
this.patternCount = 0;
}
@@ -26,164 +54,198 @@ AhoCorasick.prototype.addPattern = function(pattern, index) {
if(!pattern || typeof pattern !== "string" || pattern.length === 0) {
return;
}
var node = this.trie;
for(var i = 0; i < pattern.length; i++) {
var ch = pattern[i];
if(!node[ch]) {
node[ch] = {};
var char = pattern[i];
if(!node[char]) {
node[char] = {};
}
node = node[ch];
node = node[char];
}
if(!node.$) {
node.$ = [];
}
node.$.push({
pattern: pattern,
node.$.push({
pattern: pattern,
index: index,
length: pattern.length
});
this.patternCount++;
};
AhoCorasick.prototype.buildFailureLinks = function() {
var queue = [];
var root = this.trie;
var self = this;
this.failure = new WeakMap();
this.failure.set(root, root);
for(var ch in root) {
if(ch === "$") continue;
if(root[ch] && typeof root[ch] === "object") {
this.failure.set(root[ch], root);
queue.push(root[ch]);
this.failure[root] = root;
for(var char in root) {
if(root[char] && char !== "$") {
this.failure[root[char]] = root;
queue.push(root[char]);
}
}
var processedNodes = 0;
var maxNodes = Math.max(100000, this.patternCount * 15);
while(queue.length > 0) {
if(processedNodes++ >= maxNodes) {
throw new Error("Aho-Corasick: buildFailureLinks exceeded maximum nodes (" + maxNodes + ")");
}
while(queue.length > 0 && processedNodes < maxNodes) {
var node = queue.shift();
processedNodes++;
for(var char in node) {
if(node[char] && char !== "$") {
var child = node[char];
var fail = this.failure[node];
var failureDepth = 0;
while(fail && !fail[char] && failureDepth < this.maxFailureDepth) {
fail = this.failure[fail];
failureDepth++;
}
var failureLink = (fail && fail[char]) ? fail[char] : root;
this.failure[child] = failureLink;
// Do not merge outputs from failure links during build
// Instead, collect matches dynamically by traversing failure links during search
for(var edge in node) {
if(edge === "$") continue;
var child = node[edge];
if(!child || typeof child !== "object") continue;
var fail = self.failure.get(node) || root;
var depth = 0;
while(fail !== root && !fail[edge] && depth < self.maxFailureDepth) {
fail = self.failure.get(fail) || root;
depth++;
queue.push(child);
}
var nextFail = (fail[edge] && fail[edge] !== child) ? fail[edge] : root;
self.failure.set(child, nextFail);
if(nextFail.$) {
if(!child.$) child.$ = [];
child.$ = child.$.concat(nextFail.$);
}
queue.push(child);
}
}
if(processedNodes >= maxNodes) {
throw new Error("Aho-Corasick: buildFailureLinks exceeded maximum nodes (" + maxNodes + ")");
}
};
AhoCorasick.prototype.search = function(text, useWordBoundary, ignoreCase) {
AhoCorasick.prototype.search = function(text, useWordBoundary) {
if(!text || typeof text !== "string" || text.length === 0) {
return [];
}
var matches = [];
var node = this.trie;
var root = this.trie;
var textLength = text.length;
var maxMatches = Math.min(textLength * 2, 10000);
for(var i = 0; i < textLength; i++) {
var ch = ignoreCase ? text[i].toLowerCase() : text[i];
while(node !== root && !node[ch]) {
node = this.failure.get(node) || root;
var char = text[i];
var transitionCount = 0;
// Follow failure links to find a valid transition
while(node && !node[char] && node !== this.trie && transitionCount < this.maxFailureDepth) {
node = this.failure[node] || this.trie;
transitionCount++;
}
if(node[ch]) {
node = node[ch];
}
if(node.$) {
var outputs = node.$;
for(var j = 0; j < outputs.length && matches.length < maxMatches; j++) {
var out = outputs[j];
var matchStart = i - out.length + 1;
var matchEnd = i + 1;
if(matchStart < 0) continue;
if(useWordBoundary && !this.isWordBoundaryMatch(text, matchStart, matchEnd)) {
continue;
}
matches.push({
pattern: out.pattern,
index: matchStart,
length: out.length,
titleIndex: out.index
});
if(node && node[char]) {
node = node[char];
} else {
node = this.trie;
if(this.trie[char]) {
node = this.trie[char];
}
}
// Traverse the current node and its failure link chain to gather all patterns
var currentNode = node;
var collectCount = 0;
var visitedNodes = new Set();
while(currentNode && collectCount < 10) {
// Prevent infinite loops
if(visitedNodes.has(currentNode)) {
break;
}
visitedNodes.add(currentNode);
// Only collect outputs from the current node (not merged ones)
if(currentNode.$) {
var outputs = currentNode.$;
for(var j = 0; j < outputs.length && matches.length < maxMatches; j++) {
var output = outputs[j];
var matchStart = i - output.length + 1;
var matchEnd = i + 1;
var matchedText = text.substring(matchStart, matchEnd);
if(matchedText !== output.pattern) {
continue;
}
if(useWordBoundary && !this.isWordBoundaryMatch(text, matchStart, matchEnd)) {
continue;
}
matches.push({
pattern: output.pattern,
index: matchStart,
length: output.length,
titleIndex: output.index
});
}
}
currentNode = this.failure[currentNode];
if(currentNode === this.trie) break;
collectCount++;
}
}
return matches;
};
AhoCorasick.prototype.isWordBoundaryMatch = function(text, start, end) {
var matchedText = text.substring(start, end);
if(/[\u3400-\u9FFF\uF900-\uFAFF]/.test(matchedText)) {
return true;
}
var beforeChar = start > 0 ? text[start - 1] : "";
var afterChar = end < text.length ? text[end] : "";
var isLatinWordChar = function(char) {
var isWordChar = function(char) {
return /[a-zA-Z0-9_\u00C0-\u00FF]/.test(char);
};
return !isLatinWordChar(beforeChar) && !isLatinWordChar(afterChar);
var beforeIsWord = beforeChar && isWordChar(beforeChar);
var afterIsWord = afterChar && isWordChar(afterChar);
return !beforeIsWord && !afterIsWord;
};
AhoCorasick.prototype.clear = function() {
this.trie = {};
this.failure = new WeakMap();
this.failure = {};
this.patternCount = 0;
};
AhoCorasick.prototype.getStats = function() {
var nodeCount = 0;
// eslint-disable-next-line no-unused-vars
var patternCount = 0;
var failureCount = 0;
function countNodes(node) {
if(!node) return;
nodeCount++;
if(node.$) {
patternCount += node.$.length;
}
for(var key in node) {
if(key === "$") continue;
if(node[key] && typeof node[key] === "object") {
if(node[key] && typeof node[key] === "object" && key !== "$") {
countNodes(node[key]);
}
}
}
countNodes(this.trie);
failureCount += Object.keys(this.failure).length;
return {
nodeCount: nodeCount,
patternCount: this.patternCount,
failureLinks: this.patternCount
failureLinks: failureCount
};
};

View File

@@ -1,14 +1,9 @@
/*\
title: $:/core/modules/widgets/text.js
type: application/javascript
module-type: widget
Optimized override of the core text widget that automatically linkifies text.
- Supports non-Latin languages like Chinese.
- Global longest-match priority, then removes overlaps.
- Excludes current tiddler title from linking.
- Uses Aho-Corasick for performance.
An optimized override of the core text widget that automatically linkifies the text, with support for non-Latin languages like Chinese, prioritizing longer titles, skipping processed matches, excluding the current tiddler title from linking, and handling large title sets with enhanced Aho-Corasick algorithm.
\*/
@@ -23,6 +18,28 @@ var Widget = require("$:/core/modules/widgets/widget.js").widget,
ElementWidget = require("$:/core/modules/widgets/element.js").element,
AhoCorasick = require("$:/core/modules/utils/aho-corasick.js").AhoCorasick;
var ESCAPE_REGEX = /[\\^$*+?.()|[\]{}]/g;
function escapeRegExp(str) {
try {
return str.replace(ESCAPE_REGEX, "\\$&");
} catch(e) {
return null;
}
}
function FastPositionSet() {
this.set = new Set();
}
FastPositionSet.prototype.add = function(pos) {
this.set.add(pos);
};
FastPositionSet.prototype.has = function(pos) {
return this.set.has(pos);
};
var TextNodeWidget = function(parseTreeNode,options) {
this.initialise(parseTreeNode,options);
};
@@ -37,121 +54,138 @@ TextNodeWidget.prototype.render = function(parent,nextSibling) {
};
TextNodeWidget.prototype.execute = function() {
var self = this;
var ignoreCase = self.getVariable("tv-freelinks-ignore-case",{defaultValue:"no"}).trim() === "yes";
var self = this,
ignoreCase = self.getVariable("tv-freelinks-ignore-case",{defaultValue:"no"}).trim() === "yes";
var childParseTree = [{
type: "plain-text",
text: this.getAttribute("text",this.parseTreeNode.text || "")
}];
var text = childParseTree[0].text;
if(!text || text.length < 2) {
this.makeChildWidgets(childParseTree);
return;
}
if(this.getVariable("tv-wikilinks",{defaultValue:"yes"}) !== "no" &&
this.getVariable("tv-freelinks",{defaultValue:"no"}) === "yes" &&
!this.isWithinButtonOrLink()) {
if(this.getVariable("tv-wikilinks",{defaultValue:"yes"}) !== "no" &&
this.getVariable("tv-freelinks",{defaultValue:"no"}) === "yes" &&
!this.isWithinButtonOrLink()) {
var currentTiddlerTitle = this.getVariable("currentTiddler") || "";
var useWordBoundary = self.wiki.getTiddlerText(WORD_BOUNDARY_TIDDLER,"no") === "yes";
var useWordBoundary = self.wiki.getTiddlerText(WORD_BOUNDARY_TIDDLER, "no") === "yes";
var cacheKey = "tiddler-title-info-" + (ignoreCase ? "insensitive" : "sensitive");
this.tiddlerTitleInfo = this.wiki.getGlobalCache(cacheKey,function() {
return computeTiddlerTitleInfo(self,ignoreCase);
this.tiddlerTitleInfo = this.wiki.getGlobalCache(cacheKey, function() {
return computeTiddlerTitleInfo(self, ignoreCase);
});
if(this.tiddlerTitleInfo && this.tiddlerTitleInfo.titles && this.tiddlerTitleInfo.titles.length > 0 && this.tiddlerTitleInfo.ac) {
var newParseTree = this.processTextWithMatches(text,currentTiddlerTitle,ignoreCase,useWordBoundary);
if(newParseTree && newParseTree.length > 0 &&
(newParseTree.length > 1 || newParseTree[0].type !== "plain-text")) {
if(this.tiddlerTitleInfo.titles.length > 0) {
var newParseTree = this.processTextWithMatches(text, currentTiddlerTitle, ignoreCase, useWordBoundary);
if(newParseTree && newParseTree.length > 0 &&
(newParseTree.length > 1 || newParseTree[0].type !== "plain-text")) {
childParseTree = newParseTree;
}
}
}
this.makeChildWidgets(childParseTree);
};
TextNodeWidget.prototype.processTextWithMatches = function(text,currentTiddlerTitle,ignoreCase,useWordBoundary) {
TextNodeWidget.prototype.processTextWithMatches = function(text, currentTiddlerTitle, ignoreCase, useWordBoundary) {
if(!text || text.length === 0) {
return [{type: "plain-text", text: text}];
}
var searchText = ignoreCase ? text.toLowerCase() : text;
var matches;
try {
matches = this.tiddlerTitleInfo.ac.search(text, useWordBoundary, ignoreCase);
matches = this.tiddlerTitleInfo.ac.search(searchText, useWordBoundary);
} catch(e) {
return [{type: "plain-text", text: text}];
}
if(!matches || matches.length === 0) {
return [{type: "plain-text", text: text}];
}
var titleToCompare = ignoreCase ?
(currentTiddlerTitle ? currentTiddlerTitle.toLowerCase() : "") :
currentTiddlerTitle;
matches.sort(function(a,b) {
if(b.length !== a.length) return b.length - a.length;
return a.index - b.index;
});
var occupied = new Uint8Array(text.length);
var validMatches = [];
for(var i = 0; i < matches.length; i++) {
var m = matches[i];
var start = m.index;
var end = start + m.length;
if(start < 0 || end > text.length) continue;
var matchedTitle = this.tiddlerTitleInfo.titles[m.titleIndex];
if(!matchedTitle) continue;
var matchedTitleToCompare = ignoreCase ? matchedTitle.toLowerCase() : matchedTitle;
if(titleToCompare && matchedTitleToCompare === titleToCompare) continue;
var overlapping = false;
for(var j = start; j < end; j++) {
if(occupied[j]) { overlapping = true; break; }
matches.sort(function(a, b) {
if(a.index !== b.index) {
return a.index - b.index;
}
if(overlapping) continue;
validMatches.push(m);
for(var k = start; k < end; k++) {
occupied[k] = 1;
return b.length - a.length;
});
var processedPositions = new FastPositionSet();
var validMatches = [];
for(var i = 0; i < matches.length; i++) {
var match = matches[i];
var matchStart = match.index;
var matchEnd = matchStart + match.length;
if(matchStart < 0 || matchEnd > text.length) {
continue;
}
var matchedTitle = this.tiddlerTitleInfo.titles[match.titleIndex];
var titleToCompare = ignoreCase ?
(currentTiddlerTitle ? currentTiddlerTitle.toLowerCase() : "") :
currentTiddlerTitle;
var matchedTitleToCompare = ignoreCase ?
(matchedTitle ? matchedTitle.toLowerCase() : "") :
matchedTitle;
if(titleToCompare && matchedTitleToCompare === titleToCompare) {
continue;
}
var hasOverlap = false;
for(var pos = matchStart; pos < matchEnd && !hasOverlap; pos++) {
if(processedPositions.has(pos)) {
hasOverlap = true;
}
}
if(!hasOverlap) {
for(var pos = matchStart; pos < matchEnd; pos++) {
processedPositions.add(pos);
}
validMatches.push(match);
}
}
if(validMatches.length === 0) {
return [{type: "plain-text", text: text}];
}
validMatches.sort(function(a,b){ return a.index - b.index; });
var newParseTree = [];
var curPos = 0;
for(var x = 0; x < validMatches.length; x++) {
var mm = validMatches[x];
var s = mm.index;
var e = s + mm.length;
if(s > curPos) {
newParseTree.push({ type: "plain-text", text: text.substring(curPos,s) });
var currentPos = 0;
for(var i = 0; i < validMatches.length; i++) {
var match = validMatches[i];
var matchStart = match.index;
var matchEnd = matchStart + match.length;
if(matchStart > currentPos) {
var beforeText = text.substring(currentPos, matchStart);
newParseTree.push({
type: "plain-text",
text: beforeText
});
}
var toTitle = this.tiddlerTitleInfo.titles[mm.titleIndex];
var matchedText = text.substring(s,e);
var matchedTitle = this.tiddlerTitleInfo.titles[match.titleIndex];
var matchedText = text.substring(matchStart, matchEnd);
newParseTree.push({
type: "link",
attributes: {
to: {type: "string", value: toTitle},
to: {type: "string", value: matchedTitle},
"class": {type: "string", value: "tc-freelink"}
},
children: [{
@@ -159,63 +193,80 @@ TextNodeWidget.prototype.processTextWithMatches = function(text,currentTiddlerTi
text: matchedText
}]
});
curPos = e;
currentPos = matchEnd;
}
if(curPos < text.length) {
newParseTree.push({ type: "plain-text", text: text.substring(curPos) });
if(currentPos < text.length) {
var remainingText = text.substring(currentPos);
newParseTree.push({
type: "plain-text",
text: remainingText
});
}
return newParseTree;
};
function computeTiddlerTitleInfo(self,ignoreCase) {
function computeTiddlerTitleInfo(self, ignoreCase) {
var targetFilterText = self.wiki.getTiddlerText(TITLE_TARGET_FILTER),
titles = targetFilterText ?
self.wiki.filterTiddlers(targetFilterText,$tw.rootWidget) :
titles = !!targetFilterText ?
self.wiki.filterTiddlers(targetFilterText,$tw.rootWidget) :
self.wiki.allTitles();
if(!titles || titles.length === 0) {
return { titles: [], ac: new AhoCorasick() };
return {
titles: [],
ac: new AhoCorasick()
};
}
var validTitles = [];
var ac = new AhoCorasick();
for(var i = 0; i < titles.length; i++) {
var t = titles[i];
if(t && t.length > 0 && t.substring(0,3) !== "$:/") {
validTitles.push(t);
var title = titles[i];
if(title && title.length > 0 && title.substring(0,3) !== "$:/") {
var escapedTitle = escapeRegExp(title);
if(escapedTitle) {
validTitles.push(title);
}
}
}
validTitles.sort(function(a,b) {
var d = b.length - a.length;
if(d !== 0) return d;
var sortedTitles = validTitles.sort(function(a,b) {
var lenDiff = b.length - a.length;
if(lenDiff !== 0) return lenDiff;
return a < b ? -1 : a > b ? 1 : 0;
});
var ac = new AhoCorasick();
for(var j = 0; j < validTitles.length; j++) {
var title = validTitles[j];
for(var i = 0; i < sortedTitles.length; i++) {
var title = sortedTitles[i];
var pattern = ignoreCase ? title.toLowerCase() : title;
ac.addPattern(pattern,j);
ac.addPattern(pattern, i);
}
try {
ac.buildFailureLinks();
} catch(e) {
return { titles: [], ac: new AhoCorasick() };
return {
titles: [],
ac: new AhoCorasick()
};
}
return { titles: validTitles, ac: ac };
return {
titles: sortedTitles,
ac: ac
};
}
TextNodeWidget.prototype.isWithinButtonOrLink = function() {
var widget = this.parentWidget;
while(widget) {
if(widget instanceof ButtonWidget ||
widget instanceof LinkWidget ||
((widget instanceof ElementWidget) && widget.parseTreeNode.tag === "a")) {
if(widget instanceof ButtonWidget ||
widget instanceof LinkWidget ||
((widget instanceof ElementWidget) && widget.parseTreeNode.tag === "a")) {
return true;
}
widget = widget.parentWidget;
@@ -224,56 +275,35 @@ TextNodeWidget.prototype.isWithinButtonOrLink = function() {
};
TextNodeWidget.prototype.refresh = function(changedTiddlers) {
var self = this;
var changedAttributes = this.computeAttributes();
var titlesHaveChanged = false;
var self = this,
changedAttributes = this.computeAttributes(),
titlesHaveChanged = false;
if(changedTiddlers) {
$tw.utils.each(changedTiddlers,function(change,title) {
if(titlesHaveChanged) return;
if(title === WORD_BOUNDARY_TIDDLER || title === TITLE_TARGET_FILTER) {
titlesHaveChanged = true;
return;
}
if(title.substring(0,3) === "$:/") {
return;
}
if(change && change.isDeleted) {
if(self.tiddlerTitleInfo && self.tiddlerTitleInfo.titles && self.tiddlerTitleInfo.titles.indexOf(title) !== -1) {
titlesHaveChanged = true;
}
return;
}
var tiddler = self.wiki.getTiddler(title);
if(tiddler && tiddler.hasField("draft.of")) {
return;
}
if(!self.tiddlerTitleInfo || !self.tiddlerTitleInfo.titles || self.tiddlerTitleInfo.titles.indexOf(title) === -1) {
if(change.isDeleted) {
titlesHaveChanged = true;
} else {
titlesHaveChanged = titlesHaveChanged ||
!self.tiddlerTitleInfo ||
self.tiddlerTitleInfo.titles.indexOf(title) === -1;
}
});
}
var wordBoundaryChanged = !!(changedTiddlers && changedTiddlers[WORD_BOUNDARY_TIDDLER]);
if(changedAttributes.text || titlesHaveChanged || wordBoundaryChanged) {
if(changedAttributes.text || titlesHaveChanged ||
(changedTiddlers && changedTiddlers[WORD_BOUNDARY_TIDDLER])) {
if(titlesHaveChanged) {
self.wiki.clearCache("tiddler-title-info-insensitive");
self.wiki.clearCache("tiddler-title-info-sensitive");
var ignoreCase = self.getVariable("tv-freelinks-ignore-case",{defaultValue:"no"}).trim() === "yes";
var cacheKey = "tiddler-title-info-" + (ignoreCase ? "insensitive" : "sensitive");
self.wiki.clearCache(cacheKey);
}
this.refreshSelf();
return true;
}
if(changedTiddlers) {
} else {
return this.refreshChildren(changedTiddlers);
}
return false;
};
exports.text = TextNodeWidget;