1
0
mirror of https://github.com/Jermolene/TiddlyWiki5 synced 2026-02-27 12:29:51 +00:00

Compare commits

..

7 Commits

Author SHA1 Message Date
saqimtiaz
fe3ac0b28a fix: whitespace 2026-02-26 09:45:09 +01:00
saqimtiaz
d93dcbea6c fix: whitespace 2026-02-26 09:44:52 +01:00
saqimtiaz
66d12c257c fix: remove backwards incompatible change 2026-02-26 09:39:42 +01:00
saqimtiaz
321b5bafda fix: blockquotes trigger parsing as macros 2026-02-26 08:46:23 +01:00
saqimtiaz
ba919fba7a fix: blockquotes trigger parsing as macros 2026-02-26 08:38:26 +01:00
s793016
b0d99f3bd3 Fix Freelinks Aho-Corasick: failure links, cache invalidation, longest-match, and Unicode safety (#9676)
* Update aho-corasick.js

 fix transition logic; ensure complete outputs (via failure-output merge); clean up stats/build scoping; clarify CJK boundary behavior.

* Update text.js

implement global longest-match priority with overlap suppression; fix refresh invalidation to ignore $:/state and drafts; handle deletions precisely to avoid rebuilding on draft deletion; add defensive check for cached automaton presence.

* Update text.js

remove comment

* Update aho-corasick.js

remove comment

* Create #9672.tid

* Create #2026-0222.tid

* Delete editions/tw5.com/tiddlers/releasenotes/5.4.0/#2026-0222.tid

* Update text.js

remove \"

* Update and rename #9672.tid to #9676.tid

change to right number

* Update #9397.tid

update the existing release note with the new PR link instead of creating a new release note.

* Delete editions/tw5.com/tiddlers/releasenotes/5.4.0/#9676.tid

update the existing release note with the new PR link instead of creating a new release note.

* Rename #9397.tid to #9676.tid

update the existing release note with the new PR link instead of creating a new release note.

* Update and rename #9676.tid to #9397.tid

add link

* Rename #9397.tid to #9676.tid

* Update tiddlywiki.info

add plugin for test build

* Update tiddlywiki.info

reverse change, ready to be merge.
2026-02-25 12:07:32 +01:00
Andrew Gregory
91e7a62c13 docs: Tabs examples dynamically setting default tab (#9693)
Where tabs have been selected using a filter it is sometimes useful to set the default tab dynamically from the filter results. In most of these cases it is also desirable that the state not be persistent.
2026-02-25 08:14:38 +01:00
13 changed files with 397 additions and 622 deletions

View File

@@ -1,34 +0,0 @@
/*\
title: $:/core/modules/filters/wikify.js
type: application/javascript
module-type: filteroperator
Filter operator wikifying each string in the input list and returning the result as a list of strings
\*/
/*jslint node: true, browser: true */
/*global $tw: false */
"use strict";
/*
Export our filter function
*/
exports.wikify = function(source,operator,options) {
var output = operator.operands[0],
mode = operator.operands[1],
type = operator.operands[2],
results = [];
source(function(tiddler,title) {
var wikifier = new $tw.utils.Wikifier({
wiki: options.wiki,
widget: options.widget,
text: title,
type: type,
mode: mode,
output: output
});
results.push(wikifier.getResult());
});
return results;
};

View File

@@ -107,7 +107,7 @@ exports.parseStringLiteral = function(source,pos) {
type: "string",
start: pos
};
var reString = /(?:"""([\s\S]*?)"""|"([^"]*)")|(?:'([^']*)')|\[\[((?:[^\]]|\](?!\]))*)\]\]/g;
var reString = /(?:"""([\s\S]*?)"""|"([^"]*)")|(?:'([^']*)')|\[\[((?:[^\]]|\](?!\]))*)\]\]/y;
reString.lastIndex = pos;
var match = reString.exec(source);
if(match && match.index === pos) {
@@ -221,7 +221,7 @@ exports.parseMacroInvocationAsTransclusion = function(source,pos) {
orderedAttributes: []
};
// Define our regexps
var reVarName = /([^\s>"'=:]+)/g;
var reVarName = /([^\s>"'=:]+)/y;
// Skip whitespace
pos = $tw.utils.skipWhiteSpace(source,pos);
// Look for a double opening angle bracket
@@ -237,9 +237,11 @@ exports.parseMacroInvocationAsTransclusion = function(source,pos) {
}
$tw.utils.addAttributeToParseTreeNode(node,"$variable",token.match[1]);
pos = token.end;
// Check that the tag is terminated by a space or >>
if(!$tw.utils.parseWhiteSpace(source,pos) && !(source.charAt(pos) === ">" && source.charAt(pos + 1) === ">") ) {
return null;
// Check that the tag is terminated by a space or >>, and that there is a closing >> somewhere ahead
if(!(source.charAt(pos) === ">" && source.charAt(pos + 1) === ">") ) {
if(!$tw.utils.parseWhiteSpace(source,pos) || source.indexOf(">>",pos) === -1) {
return null;
}
}
// Process attributes
pos = $tw.utils.parseMacroParametersAsAttributes(node,source,pos);
@@ -267,7 +269,7 @@ exports.parseMVVReferenceAsTransclusion = function(source,pos) {
orderedAttributes: []
};
// Define our regexps
var reVarName = /([^\s>"'=:)]+)/g;
var reVarName = /([^\s>"'=:)]+)/y;
// Skip whitespace
pos = $tw.utils.skipWhiteSpace(source,pos);
// Look for a double opening parenthesis
@@ -323,17 +325,17 @@ exports.parseMacroParameterAsAttribute = function(source,pos) {
start: pos
};
// Define our regexps
var reAttributeName = /([^\/\s>"'`=:]+)/g,
reUnquotedAttribute = /((?:(?:>(?!>))|[^\s>"'])+)/g,
reFilteredValue = /\{\{\{([\S\s]+?)\}\}\}/g,
reIndirectValue = /\{\{([^\}]+)\}\}/g,
reSubstitutedValue = /(?:```([\s\S]*?)```|`([^`]|[\S\s]*?)`)/g;
var reAttributeName = /([^\/\s>"'`=:]+)/y,
reUnquotedAttribute = /((?:(?:>(?!>))|[^\s>"'])+)/y,
reFilteredValue = /\{\{\{([\S\s]+?)\}\}\}/y,
reIndirectValue = /\{\{([^\}]+)\}\}/y,
reSubstitutedValue = /(?:```([\s\S]*?)```|`([^`]|[\S\s]*?)`)/y;
// Skip whitespace
pos = $tw.utils.skipWhiteSpace(source,pos);
// Get the attribute name and the separator token
var nameToken = $tw.utils.parseTokenRegExp(source,pos,reAttributeName),
namePos = nameToken && $tw.utils.skipWhiteSpace(source,nameToken.end),
separatorToken = nameToken && $tw.utils.parseTokenRegExp(source,namePos,/=|:/g),
separatorToken = nameToken && $tw.utils.parseTokenRegExp(source,namePos,/=|:/y),
isNewStyleSeparator = false; // If there is no separator then we don't allow new style values
// If we have a name and a separator then we have a named attribute
if(nameToken && separatorToken) {

View File

@@ -1,105 +0,0 @@
/*\
title: $:/core/modules/utils/wikifier.js
type: application/javascript
module-type: utils
A high level helper class for parsing and wikification
\*/
/*
Options include:
wiki: wiki to be used for wikification
widget: optional widget to be used as parent of wikified text
text: text to be parsed/wikified
type: type of the text
mode: inline or block
output: text, formattedtext, html, parsetree or widgettree
*/
function Wikifier(options) {
this.wiki = options.wiki || $tw.wiki;
this.widget = options.widget || $tw.rootWidget;
this.text = options.text || "";
this.type = options.type || "";
this.mode = options.mode || "block";
this.output = options.output || "text";
// Create the parse tree
this.parser = this.wiki.parseText(this.type,this.text,{
parseAsInline: this.mode === "inline"
});
// Create the widget tree
this.widgetNode = this.wiki.makeWidget(this.parser,{
document: $tw.fakeDocument,
parentWidget: this.widget
});
// Render the widget tree to the container
this.container = $tw.fakeDocument.createElement("div");
this.widgetNode.render(this.container,null);
};
Wikifier.prototype.refresh = function(changedTiddlers) {
// Refresh the widget tree
return this.widgetNode.refresh(changedTiddlers);
};
/*
Return the result string
*/
Wikifier.prototype.getResult = function() {
var result;
switch(this.output) {
case "text":
result = this.container.textContent;
break;
case "formattedtext":
result = this.container.formattedTextContent;
break;
case "html":
result = this.container.innerHTML;
break;
case "parsetree":
result = JSON.stringify(this.parser.tree,0,$tw.config.preferences.jsonSpaces);
break;
case "widgettree":
result = JSON.stringify(this.getWidgetTree(),0,$tw.config.preferences.jsonSpaces);
break;
}
return result;
};
/*
Return a string of the widget tree
*/
Wikifier.prototype.getWidgetTree = function() {
var copyNode = function(widgetNode,resultNode) {
var type = widgetNode.parseTreeNode.type;
resultNode.type = type;
switch(type) {
case "element":
resultNode.tag = widgetNode.parseTreeNode.tag;
break;
case "text":
resultNode.text = widgetNode.parseTreeNode.text;
break;
}
if(Object.keys(widgetNode.attributes || {}).length > 0) {
resultNode.attributes = {};
$tw.utils.each(widgetNode.attributes,function(attr,attrName) {
resultNode.attributes[attrName] = widgetNode.getAttribute(attrName);
});
}
if(Object.keys(widgetNode.children || {}).length > 0) {
resultNode.children = [];
$tw.utils.each(widgetNode.children,function(widgetChildNode) {
var node = {};
resultNode.children.push(node);
copyNode(widgetChildNode,node);
});
}
},
results = {};
copyNode(this.widgetNode,results);
return results;
};
exports.Wikifier = Wikifier;

View File

@@ -36,22 +36,89 @@ Compute the internal state of the widget
WikifyWidget.prototype.execute = function() {
// Get our parameters
this.wikifyName = this.getAttribute("name");
// Create the wikifier
this.wikifier = new $tw.utils.Wikifier({
wiki: this.wiki,
widget: this,
text: this.getAttribute("text"),
type: this.getAttribute("type"),
mode: this.getAttribute("mode","block"),
output: this.getAttribute("output","text")
this.wikifyText = this.getAttribute("text");
this.wikifyType = this.getAttribute("type");
this.wikifyMode = this.getAttribute("mode","block");
this.wikifyOutput = this.getAttribute("output","text");
// Create the parse tree
this.wikifyParser = this.wiki.parseText(this.wikifyType,this.wikifyText,{
parseAsInline: this.wikifyMode === "inline"
});
this.wikifyResult = this.wikifier.getResult();
// Create the widget tree
this.wikifyWidgetNode = this.wiki.makeWidget(this.wikifyParser,{
document: $tw.fakeDocument,
parentWidget: this
});
// Render the widget tree to the container
this.wikifyContainer = $tw.fakeDocument.createElement("div");
this.wikifyWidgetNode.render(this.wikifyContainer,null);
this.wikifyResult = this.getResult();
// Set context variable
this.setVariable(this.wikifyName,this.wikifyResult);
// Construct the child widgets
this.makeChildWidgets();
};
/*
Return the result string
*/
WikifyWidget.prototype.getResult = function() {
var result;
switch(this.wikifyOutput) {
case "text":
result = this.wikifyContainer.textContent;
break;
case "formattedtext":
result = this.wikifyContainer.formattedTextContent;
break;
case "html":
result = this.wikifyContainer.innerHTML;
break;
case "parsetree":
result = JSON.stringify(this.wikifyParser.tree,0,$tw.config.preferences.jsonSpaces);
break;
case "widgettree":
result = JSON.stringify(this.getWidgetTree(),0,$tw.config.preferences.jsonSpaces);
break;
}
return result;
};
/*
Return a string of the widget tree
*/
WikifyWidget.prototype.getWidgetTree = function() {
var copyNode = function(widgetNode,resultNode) {
var type = widgetNode.parseTreeNode.type;
resultNode.type = type;
switch(type) {
case "element":
resultNode.tag = widgetNode.parseTreeNode.tag;
break;
case "text":
resultNode.text = widgetNode.parseTreeNode.text;
break;
}
if(Object.keys(widgetNode.attributes || {}).length > 0) {
resultNode.attributes = {};
$tw.utils.each(widgetNode.attributes,function(attr,attrName) {
resultNode.attributes[attrName] = widgetNode.getAttribute(attrName);
});
}
if(Object.keys(widgetNode.children || {}).length > 0) {
resultNode.children = [];
$tw.utils.each(widgetNode.children,function(widgetChildNode) {
var node = {};
resultNode.children.push(node);
copyNode(widgetChildNode,node);
});
}
},
results = {};
copyNode(this.wikifyWidgetNode,results);
return results;
};
/*
Selectively refreshes the widget if needed. Returns true if the widget or any of its children needed re-rendering
*/
@@ -63,9 +130,9 @@ WikifyWidget.prototype.refresh = function(changedTiddlers) {
return true;
} else {
// Refresh the widget tree
if(this.wikifier.refresh(changedTiddlers)) {
if(this.wikifyWidgetNode.refresh(changedTiddlers)) {
// Check if there was any change
var result = this.wikifier.getResult();
var result = this.getResult();
if(result !== this.wikifyResult) {
// If so, save the change
this.wikifyResult = result;

View File

@@ -1,21 +0,0 @@
title: Operators/Wikify/TextMode
description: Simple wikify operator
type: text/vnd.tiddlywiki-multiple
tags: [[$:/tags/wiki-test-spec]]
title: Output
\parsermode inline
<$text text={{{ [subfilter{Filter}] }}}/>
+
title: Filter
[{Text}wikify[html],[inline],[text/vnd.tiddlywiki]]
+
title: Text
This is ''the text'' that is __wikified__
+
title: ExpectedResult
This is &lt;strong&gt;the text&lt;/strong&gt; that is &lt;u&gt;wikified&lt;/u&gt;

View File

@@ -1,64 +0,0 @@
title: Operators/Wikify/ParseTreeMode
description: Simple wikify operator
type: text/vnd.tiddlywiki-multiple
tags: [[$:/tags/wiki-test-spec]]
title: Output
\parsermode inline
<$text text={{{ [subfilter{Filter}] }}}/>
+
title: Filter
[{Text}wikify[parsetree],[inline],[text/vnd.tiddlywiki]]
+
title: Text
This is ''the text'' that is __wikified__
+
title: ExpectedResult
[
{
"type": "text",
"text": "This is ",
"start": 0,
"end": 8
},
{
"type": "element",
"tag": "strong",
"children": [
{
"type": "text",
"text": "the text",
"start": 10,
"end": 18
}
],
"start": 8,
"end": 20,
"rule": "bold"
},
{
"type": "text",
"text": " that is ",
"start": 20,
"end": 29
},
{
"type": "element",
"tag": "u",
"children": [
{
"type": "text",
"text": "wikified",
"start": 31,
"end": 39
}
],
"start": 29,
"end": 41,
"rule": "underscore"
}
]

View File

@@ -1,21 +0,0 @@
title: Operators/Wikify/TextMode
description: Simple wikify operator
type: text/vnd.tiddlywiki-multiple
tags: [[$:/tags/wiki-test-spec]]
title: Output
\parsermode inline
<$text text={{{ [subfilter{Filter}] }}}/>
+
title: Filter
[{Text}wikify[text],[inline],[text/vnd.tiddlywiki]]
+
title: Text
This is ''the text'' that is __wikified__
+
title: ExpectedResult
This is the text that is wikified

View File

@@ -12,3 +12,15 @@ eg="""<<tabs "[tag[sampletab]]" "SampleTabTwo" "$:/state/tab2" "tc-vertical">>""
<$macrocall $name=".example" n="3"
eg="""<<tabs "[tag[sampletab]nsort[order]]" "SampleTabThree" "$:/state/tab3" "tc-vertical">>"""/>
The following example sets the default tab to be the first tiddler selected in the filter and makes the saved state non-persistent (by using "~$:/temp/"):
<$macrocall $name=".example" n="4"
eg="""<$set name=tl filter="[tag[sampletab]nsort[order]]">
<$transclude $variable=tabs tabsList=<<tl>> default={{{[enlist<tl>]}}} state="$:/temp/state/tab" class="tc-vertical"/>
</$set>"""/>
<<.from-version "5.4.0">> Dynamic parameters can be used to specify the default tab:
<$macrocall $name=".example" n="5"
eg="""<<tabs "[tag[sampletab]nsort[order]]" default={{{[tag[sampletab]nsort[order]]}}} state="$:/temp/state/tab" class="tc-vertical">>"""/>

View File

@@ -1,17 +0,0 @@
title: $:/changenotes/5.4.0/#9397
description: Fix critical freelinks bugs: first character loss and false positive matches in v5.4.0
release: 5.4.0
tags: $:/tags/ChangeNote
change-type: bugfix
change-category: plugin
github-links: https://github.com/TiddlyWiki/TiddlyWiki5/pull/9084 https://github.com/TiddlyWiki/TiddlyWiki5/pull/9397
github-contributors: s793016
This note addresses two major bugs introduced in the Freelinks plugin with the v5.4.0 release:
Fixes:
* First Character Loss: The first character of a matched word would incorrectly disappear (e.g., "The" became "he"). This was fixed by correctly timing the filtering of the current tiddler's title during match validation, ensuring proper substring handling.
* False Positive Matches: Unrelated words (like "it is" or "Choose") would incorrectly link to a tiddler title. This was resolved by fixing wrong output merging in the Aho-Corasick failure-link handling, eliminating spurious matches from intermediate nodes, and adding cycle detection.
Impact:
* Significantly improved correctness and reliability of automatic linking for all users, especially in multilingual and large wikis.

View File

@@ -0,0 +1,48 @@
title: $:/changenotes/5.4.0/#9676
description: Fix critical freelinks bugs: first character loss and false positive matches in v5.4.0
release: 5.4.0
tags: $:/tags/ChangeNote
change-type: bugfix
change-category: plugin
github-links: https://github.com/TiddlyWiki/TiddlyWiki5/pull/9084 https://github.com/TiddlyWiki/TiddlyWiki5/pull/9397 https://github.com/TiddlyWiki/TiddlyWiki5/pull/9676
github-contributors: s793016
Fixes and optimizations to the Freelinks plugin's Aho-Corasick implementation following #9397.
Fixes:
* Failure Links Non-Functional (Critical): The failure link map used a plain object `{}` with trie nodes as keys. Since all JavaScript objects coerce to the same string `[object Object]`, every node resolved to the same map entry. Failure links were silently broken for all overlapping patterns. Fixed by replacing with `WeakMap`.
* Cache Rebuilt on Every UI Interaction (Performance): Any `$:/state/...` update (e.g. clicking tabs) would trigger a full Aho-Corasick rebuild, causing severe lag on large wikis. The `refresh` logic now ignores system tiddlers, with an explicit allowlist for plugin config tiddlers.
* Short Match Blocking Longer Match: A shorter title appearing earlier (e.g. "The New") could prevent a longer overlapping title (e.g. "New York City") from matching. Replaced left-to-right greedy selection with global length-first sorting and interval occupation tracking.
* Unicode Index Desync in ignoreCase Mode: Calling `toLowerCase()` on the full text before searching could change string length (e.g. Turkish "İ" expands), causing `substring()` to split Emoji surrogate pairs and produce garbage output. Case conversion is now done per-character during search.
* Removed Vestigial Regex Escaping: `escapeRegExp()` was called during trie construction but Aho-Corasick operates on literal character transitions, not regex. Removed.
Impact:
* Overlapping titles now match correctly for the first time.
* No cache rebuilds during normal UI interactions on large wikis.
* Correct longest-match behavior for titles sharing substrings.
* Safe Emoji and complex Unicode handling in case-insensitive mode.
#9397
This note addresses two major bugs introduced in the Freelinks plugin with the v5.4.0 release:
Fixes:
* First Character Loss: The first character of a matched word would incorrectly disappear (e.g., "The" became "he"). This was fixed by correctly timing the filtering of the current tiddler's title during match validation, ensuring proper substring handling.
* False Positive Matches: Unrelated words (like "it is" or "Choose") would incorrectly link to a tiddler title. This was resolved by fixing wrong output merging in the Aho-Corasick failure-link handling, eliminating spurious matches from intermediate nodes, and adding cycle detection.
Impact:
* Significantly improved correctness and reliability of automatic linking for all users, especially in multilingual and large wikis.
#9084
This change introduces a fully optimized override of the core text widget, integrating an enhanced Aho-Corasick algorithm for automatic linkification of tiddler titles within text (freelinks). The new implementation prioritizes performance for large wikis and correct support for non-Latin scripts such as Chinese.
Highlights:
- Full switch from regex-based matching to a custom, robust Aho-Corasick engine dedicated to rapid, multi-pattern title detection—drastically decreasing linkification time (tested: 15s reduced to 100500ms on ~12,000 tiddlers).
- Handles extremely large title sets gracefully, including a chunked insertion process and use of a persistent cache (`$:/config/Freelinks/PersistAhoCorasickCache`) to further accelerate subsequent linking operations in large/active wikis.
- Improvements for CJK and non-Latin text: supports linking using long or full-width symbol titles such as '' (U+FF1A) with no split or mismatch.
- Smart prioritization: longer titles are automatically matched before shorter, more ambiguous ones, preventing partial/incorrect linking.
- Actively skips self-linking in the current tiddler and prevents overlapping matches for clean, deterministic linkification.
- End users with large or multilingual wikis see massive performance boost and 100% accurate linking for complex, full-width, or multi-language titles.
- New options for persistent match cache and word boundary checking (`$:/config/Freelinks/WordBoundary`), both can be tuned based on wiki size and content language needs.
- Safe for gradual rollout: legacy behavior is preserved if the new freelinks override is not enabled.

View File

@@ -4,10 +4,10 @@
"tiddlywiki/browser-sniff",
"tiddlywiki/confetti",
"tiddlywiki/dynannotate",
"tiddlywiki/tour",
"tiddlywiki/internals",
"tiddlywiki/menubar",
"tiddlywiki/railroad"
"tiddlywiki/railroad",
"tiddlywiki/tour"
],
"themes": [
"tiddlywiki/vanilla",

View File

@@ -1,51 +1,23 @@
/*\
title: $:/core/modules/utils/aho-corasick.js
type: application/javascript
module-type: utils
Optimized Aho-Corasick string matching algorithm implementation with enhanced performance and error handling for TiddlyWiki freelinking functionality.
Optimized Aho-Corasick string matching algorithm implementation with enhanced performance
and error handling for TiddlyWiki freelinking functionality.
Useage:
- Uses WeakMap for failure links (required; plain object keys would collide).
- search() converts case per character to avoid Unicode index desync.
- Optional word boundary filtering: CJK always allowed; Latin requires non-word chars around.
Initialization:
Create an AhoCorasick instance: var ac = new AhoCorasick();
After initialization, the trie and failure structures are automatically created to store patterns and failure links.
Adding Patterns:
Call addPattern(pattern, index) to add a pattern, e.g., ac.addPattern("[[Link]]", 0);.
pattern is the string to match, and index is an identifier for tracking results.
Multiple patterns can be added, stored in the trie structure.
Building Failure Links:
Call buildFailureLinks() to construct failure links for efficient multi-pattern matching.
Includes a maximum node limit (default 100,000 or 15 times the pattern count) to prevent excessive computation.
Performing Search:
Use search(text, useWordBoundary) to find pattern matches in the text.
text is the input string, and useWordBoundary (boolean) controls whether to enforce word boundary checks.
Returns an array of match results, each containing pattern (matched pattern), index (start position), length (pattern length), and titleIndex (pattern identifier).
Word Boundary Check:
If useWordBoundary is true, only matches surrounded by non-word characters (letters, digits, or underscores) are returned.
Cleanup and Statistics:
Use clear() to reset the trie and failure links, freeing memory.
Use getStats() to retrieve statistics, including node count (nodeCount), pattern count (patternCount), and failure link count (failureLinks).
Notes
Performance Considerations: The Aho-Corasick trie may consume significant memory with a large number of patterns. Limit the number of patterns (e.g., <10,000) for optimal performance.
Error Handling: The module includes maximum node and failure depth limits (maxFailureDepth) to prevent infinite loops or memory overflow.
Word Boundary: Enabling useWordBoundary ensures more precise matches, ideal for link detection scenarios.
Compatibility: Ensure compatibility with other TiddlyWiki modules (e.g., wikiparser.js) when processing WikiText.
Debugging: Use getStats() to inspect the trie structure's size and ensure it does not overload browser memory.
\*/
"use strict";
function AhoCorasick() {
this.trie = {};
this.failure = {};
this.failure = new WeakMap();
this.maxFailureDepth = 100;
this.patternCount = 0;
}
@@ -54,198 +26,164 @@ AhoCorasick.prototype.addPattern = function(pattern, index) {
if(!pattern || typeof pattern !== "string" || pattern.length === 0) {
return;
}
var node = this.trie;
for(var i = 0; i < pattern.length; i++) {
var char = pattern[i];
if(!node[char]) {
node[char] = {};
var ch = pattern[i];
if(!node[ch]) {
node[ch] = {};
}
node = node[char];
node = node[ch];
}
if(!node.$) {
node.$ = [];
}
node.$.push({
pattern: pattern,
node.$.push({
pattern: pattern,
index: index,
length: pattern.length
});
this.patternCount++;
};
AhoCorasick.prototype.buildFailureLinks = function() {
var queue = [];
var root = this.trie;
this.failure[root] = root;
for(var char in root) {
if(root[char] && char !== "$") {
this.failure[root[char]] = root;
queue.push(root[char]);
var self = this;
this.failure = new WeakMap();
this.failure.set(root, root);
for(var ch in root) {
if(ch === "$") continue;
if(root[ch] && typeof root[ch] === "object") {
this.failure.set(root[ch], root);
queue.push(root[ch]);
}
}
var processedNodes = 0;
var maxNodes = Math.max(100000, this.patternCount * 15);
while(queue.length > 0 && processedNodes < maxNodes) {
var node = queue.shift();
processedNodes++;
for(var char in node) {
if(node[char] && char !== "$") {
var child = node[char];
var fail = this.failure[node];
var failureDepth = 0;
while(fail && !fail[char] && failureDepth < this.maxFailureDepth) {
fail = this.failure[fail];
failureDepth++;
}
var failureLink = (fail && fail[char]) ? fail[char] : root;
this.failure[child] = failureLink;
// Do not merge outputs from failure links during build
// Instead, collect matches dynamically by traversing failure links during search
queue.push(child);
}
while(queue.length > 0) {
if(processedNodes++ >= maxNodes) {
throw new Error("Aho-Corasick: buildFailureLinks exceeded maximum nodes (" + maxNodes + ")");
}
var node = queue.shift();
for(var edge in node) {
if(edge === "$") continue;
var child = node[edge];
if(!child || typeof child !== "object") continue;
var fail = self.failure.get(node) || root;
var depth = 0;
while(fail !== root && !fail[edge] && depth < self.maxFailureDepth) {
fail = self.failure.get(fail) || root;
depth++;
}
var nextFail = (fail[edge] && fail[edge] !== child) ? fail[edge] : root;
self.failure.set(child, nextFail);
if(nextFail.$) {
if(!child.$) child.$ = [];
child.$ = child.$.concat(nextFail.$);
}
queue.push(child);
}
}
if(processedNodes >= maxNodes) {
throw new Error("Aho-Corasick: buildFailureLinks exceeded maximum nodes (" + maxNodes + ")");
}
};
AhoCorasick.prototype.search = function(text, useWordBoundary) {
AhoCorasick.prototype.search = function(text, useWordBoundary, ignoreCase) {
if(!text || typeof text !== "string" || text.length === 0) {
return [];
}
var matches = [];
var node = this.trie;
var root = this.trie;
var textLength = text.length;
var maxMatches = Math.min(textLength * 2, 10000);
for(var i = 0; i < textLength; i++) {
var char = text[i];
var transitionCount = 0;
// Follow failure links to find a valid transition
while(node && !node[char] && node !== this.trie && transitionCount < this.maxFailureDepth) {
node = this.failure[node] || this.trie;
transitionCount++;
var ch = ignoreCase ? text[i].toLowerCase() : text[i];
while(node !== root && !node[ch]) {
node = this.failure.get(node) || root;
}
if(node && node[char]) {
node = node[char];
} else {
node = this.trie;
if(this.trie[char]) {
node = this.trie[char];
}
if(node[ch]) {
node = node[ch];
}
// Traverse the current node and its failure link chain to gather all patterns
var currentNode = node;
var collectCount = 0;
var visitedNodes = new Set();
while(currentNode && collectCount < 10) {
// Prevent infinite loops
if(visitedNodes.has(currentNode)) {
break;
}
visitedNodes.add(currentNode);
// Only collect outputs from the current node (not merged ones)
if(currentNode.$) {
var outputs = currentNode.$;
for(var j = 0; j < outputs.length && matches.length < maxMatches; j++) {
var output = outputs[j];
var matchStart = i - output.length + 1;
var matchEnd = i + 1;
var matchedText = text.substring(matchStart, matchEnd);
if(matchedText !== output.pattern) {
continue;
}
if(useWordBoundary && !this.isWordBoundaryMatch(text, matchStart, matchEnd)) {
continue;
}
matches.push({
pattern: output.pattern,
index: matchStart,
length: output.length,
titleIndex: output.index
});
if(node.$) {
var outputs = node.$;
for(var j = 0; j < outputs.length && matches.length < maxMatches; j++) {
var out = outputs[j];
var matchStart = i - out.length + 1;
var matchEnd = i + 1;
if(matchStart < 0) continue;
if(useWordBoundary && !this.isWordBoundaryMatch(text, matchStart, matchEnd)) {
continue;
}
matches.push({
pattern: out.pattern,
index: matchStart,
length: out.length,
titleIndex: out.index
});
}
currentNode = this.failure[currentNode];
if(currentNode === this.trie) break;
collectCount++;
}
}
return matches;
};
AhoCorasick.prototype.isWordBoundaryMatch = function(text, start, end) {
var matchedText = text.substring(start, end);
if(/[\u3400-\u9FFF\uF900-\uFAFF]/.test(matchedText)) {
return true;
}
var beforeChar = start > 0 ? text[start - 1] : "";
var afterChar = end < text.length ? text[end] : "";
var isWordChar = function(char) {
var isLatinWordChar = function(char) {
return /[a-zA-Z0-9_\u00C0-\u00FF]/.test(char);
};
var beforeIsWord = beforeChar && isWordChar(beforeChar);
var afterIsWord = afterChar && isWordChar(afterChar);
return !beforeIsWord && !afterIsWord;
return !isLatinWordChar(beforeChar) && !isLatinWordChar(afterChar);
};
AhoCorasick.prototype.clear = function() {
this.trie = {};
this.failure = {};
this.failure = new WeakMap();
this.patternCount = 0;
};
AhoCorasick.prototype.getStats = function() {
var nodeCount = 0;
// eslint-disable-next-line no-unused-vars
var patternCount = 0;
var failureCount = 0;
function countNodes(node) {
if(!node) return;
nodeCount++;
if(node.$) {
patternCount += node.$.length;
}
for(var key in node) {
if(node[key] && typeof node[key] === "object" && key !== "$") {
if(key === "$") continue;
if(node[key] && typeof node[key] === "object") {
countNodes(node[key]);
}
}
}
countNodes(this.trie);
failureCount += Object.keys(this.failure).length;
return {
nodeCount: nodeCount,
patternCount: this.patternCount,
failureLinks: failureCount
failureLinks: this.patternCount
};
};

View File

@@ -1,9 +1,14 @@
/*\
title: $:/core/modules/widgets/text.js
type: application/javascript
module-type: widget
An optimized override of the core text widget that automatically linkifies the text, with support for non-Latin languages like Chinese, prioritizing longer titles, skipping processed matches, excluding the current tiddler title from linking, and handling large title sets with enhanced Aho-Corasick algorithm.
Optimized override of the core text widget that automatically linkifies text.
- Supports non-Latin languages like Chinese.
- Global longest-match priority, then removes overlaps.
- Excludes current tiddler title from linking.
- Uses Aho-Corasick for performance.
\*/
@@ -18,28 +23,6 @@ var Widget = require("$:/core/modules/widgets/widget.js").widget,
ElementWidget = require("$:/core/modules/widgets/element.js").element,
AhoCorasick = require("$:/core/modules/utils/aho-corasick.js").AhoCorasick;
var ESCAPE_REGEX = /[\\^$*+?.()|[\]{}]/g;
function escapeRegExp(str) {
try {
return str.replace(ESCAPE_REGEX, "\\$&");
} catch(e) {
return null;
}
}
function FastPositionSet() {
this.set = new Set();
}
FastPositionSet.prototype.add = function(pos) {
this.set.add(pos);
};
FastPositionSet.prototype.has = function(pos) {
return this.set.has(pos);
};
var TextNodeWidget = function(parseTreeNode,options) {
this.initialise(parseTreeNode,options);
};
@@ -54,138 +37,121 @@ TextNodeWidget.prototype.render = function(parent,nextSibling) {
};
TextNodeWidget.prototype.execute = function() {
var self = this,
ignoreCase = self.getVariable("tv-freelinks-ignore-case",{defaultValue:"no"}).trim() === "yes";
var self = this;
var ignoreCase = self.getVariable("tv-freelinks-ignore-case",{defaultValue:"no"}).trim() === "yes";
var childParseTree = [{
type: "plain-text",
text: this.getAttribute("text",this.parseTreeNode.text || "")
}];
var text = childParseTree[0].text;
if(!text || text.length < 2) {
this.makeChildWidgets(childParseTree);
return;
}
if(this.getVariable("tv-wikilinks",{defaultValue:"yes"}) !== "no" &&
this.getVariable("tv-freelinks",{defaultValue:"no"}) === "yes" &&
!this.isWithinButtonOrLink()) {
if(this.getVariable("tv-wikilinks",{defaultValue:"yes"}) !== "no" &&
this.getVariable("tv-freelinks",{defaultValue:"no"}) === "yes" &&
!this.isWithinButtonOrLink()) {
var currentTiddlerTitle = this.getVariable("currentTiddler") || "";
var useWordBoundary = self.wiki.getTiddlerText(WORD_BOUNDARY_TIDDLER, "no") === "yes";
var useWordBoundary = self.wiki.getTiddlerText(WORD_BOUNDARY_TIDDLER,"no") === "yes";
var cacheKey = "tiddler-title-info-" + (ignoreCase ? "insensitive" : "sensitive");
this.tiddlerTitleInfo = this.wiki.getGlobalCache(cacheKey, function() {
return computeTiddlerTitleInfo(self, ignoreCase);
this.tiddlerTitleInfo = this.wiki.getGlobalCache(cacheKey,function() {
return computeTiddlerTitleInfo(self,ignoreCase);
});
if(this.tiddlerTitleInfo.titles.length > 0) {
var newParseTree = this.processTextWithMatches(text, currentTiddlerTitle, ignoreCase, useWordBoundary);
if(newParseTree && newParseTree.length > 0 &&
(newParseTree.length > 1 || newParseTree[0].type !== "plain-text")) {
if(this.tiddlerTitleInfo && this.tiddlerTitleInfo.titles && this.tiddlerTitleInfo.titles.length > 0 && this.tiddlerTitleInfo.ac) {
var newParseTree = this.processTextWithMatches(text,currentTiddlerTitle,ignoreCase,useWordBoundary);
if(newParseTree && newParseTree.length > 0 &&
(newParseTree.length > 1 || newParseTree[0].type !== "plain-text")) {
childParseTree = newParseTree;
}
}
}
this.makeChildWidgets(childParseTree);
};
TextNodeWidget.prototype.processTextWithMatches = function(text, currentTiddlerTitle, ignoreCase, useWordBoundary) {
TextNodeWidget.prototype.processTextWithMatches = function(text,currentTiddlerTitle,ignoreCase,useWordBoundary) {
if(!text || text.length === 0) {
return [{type: "plain-text", text: text}];
}
var searchText = ignoreCase ? text.toLowerCase() : text;
var matches;
try {
matches = this.tiddlerTitleInfo.ac.search(searchText, useWordBoundary);
matches = this.tiddlerTitleInfo.ac.search(text, useWordBoundary, ignoreCase);
} catch(e) {
return [{type: "plain-text", text: text}];
}
if(!matches || matches.length === 0) {
return [{type: "plain-text", text: text}];
}
matches.sort(function(a, b) {
if(a.index !== b.index) {
return a.index - b.index;
}
return b.length - a.length;
var titleToCompare = ignoreCase ?
(currentTiddlerTitle ? currentTiddlerTitle.toLowerCase() : "") :
currentTiddlerTitle;
matches.sort(function(a,b) {
if(b.length !== a.length) return b.length - a.length;
return a.index - b.index;
});
var processedPositions = new FastPositionSet();
var occupied = new Uint8Array(text.length);
var validMatches = [];
for(var i = 0; i < matches.length; i++) {
var match = matches[i];
var matchStart = match.index;
var matchEnd = matchStart + match.length;
if(matchStart < 0 || matchEnd > text.length) {
continue;
var m = matches[i];
var start = m.index;
var end = start + m.length;
if(start < 0 || end > text.length) continue;
var matchedTitle = this.tiddlerTitleInfo.titles[m.titleIndex];
if(!matchedTitle) continue;
var matchedTitleToCompare = ignoreCase ? matchedTitle.toLowerCase() : matchedTitle;
if(titleToCompare && matchedTitleToCompare === titleToCompare) continue;
var overlapping = false;
for(var j = start; j < end; j++) {
if(occupied[j]) { overlapping = true; break; }
}
var matchedTitle = this.tiddlerTitleInfo.titles[match.titleIndex];
var titleToCompare = ignoreCase ?
(currentTiddlerTitle ? currentTiddlerTitle.toLowerCase() : "") :
currentTiddlerTitle;
var matchedTitleToCompare = ignoreCase ?
(matchedTitle ? matchedTitle.toLowerCase() : "") :
matchedTitle;
if(titleToCompare && matchedTitleToCompare === titleToCompare) {
continue;
}
var hasOverlap = false;
for(var pos = matchStart; pos < matchEnd && !hasOverlap; pos++) {
if(processedPositions.has(pos)) {
hasOverlap = true;
}
}
if(!hasOverlap) {
for(var pos = matchStart; pos < matchEnd; pos++) {
processedPositions.add(pos);
}
validMatches.push(match);
if(overlapping) continue;
validMatches.push(m);
for(var k = start; k < end; k++) {
occupied[k] = 1;
}
}
if(validMatches.length === 0) {
return [{type: "plain-text", text: text}];
}
validMatches.sort(function(a,b){ return a.index - b.index; });
var newParseTree = [];
var currentPos = 0;
for(var i = 0; i < validMatches.length; i++) {
var match = validMatches[i];
var matchStart = match.index;
var matchEnd = matchStart + match.length;
if(matchStart > currentPos) {
var beforeText = text.substring(currentPos, matchStart);
newParseTree.push({
type: "plain-text",
text: beforeText
});
var curPos = 0;
for(var x = 0; x < validMatches.length; x++) {
var mm = validMatches[x];
var s = mm.index;
var e = s + mm.length;
if(s > curPos) {
newParseTree.push({ type: "plain-text", text: text.substring(curPos,s) });
}
var matchedTitle = this.tiddlerTitleInfo.titles[match.titleIndex];
var matchedText = text.substring(matchStart, matchEnd);
var toTitle = this.tiddlerTitleInfo.titles[mm.titleIndex];
var matchedText = text.substring(s,e);
newParseTree.push({
type: "link",
attributes: {
to: {type: "string", value: matchedTitle},
to: {type: "string", value: toTitle},
"class": {type: "string", value: "tc-freelink"}
},
children: [{
@@ -193,80 +159,63 @@ TextNodeWidget.prototype.processTextWithMatches = function(text, currentTiddlerT
text: matchedText
}]
});
currentPos = matchEnd;
curPos = e;
}
if(currentPos < text.length) {
var remainingText = text.substring(currentPos);
newParseTree.push({
type: "plain-text",
text: remainingText
});
if(curPos < text.length) {
newParseTree.push({ type: "plain-text", text: text.substring(curPos) });
}
return newParseTree;
};
function computeTiddlerTitleInfo(self, ignoreCase) {
function computeTiddlerTitleInfo(self,ignoreCase) {
var targetFilterText = self.wiki.getTiddlerText(TITLE_TARGET_FILTER),
titles = !!targetFilterText ?
self.wiki.filterTiddlers(targetFilterText,$tw.rootWidget) :
titles = targetFilterText ?
self.wiki.filterTiddlers(targetFilterText,$tw.rootWidget) :
self.wiki.allTitles();
if(!titles || titles.length === 0) {
return {
titles: [],
ac: new AhoCorasick()
};
return { titles: [], ac: new AhoCorasick() };
}
var validTitles = [];
var ac = new AhoCorasick();
for(var i = 0; i < titles.length; i++) {
var title = titles[i];
if(title && title.length > 0 && title.substring(0,3) !== "$:/") {
var escapedTitle = escapeRegExp(title);
if(escapedTitle) {
validTitles.push(title);
}
var t = titles[i];
if(t && t.length > 0 && t.substring(0,3) !== "$:/") {
validTitles.push(t);
}
}
var sortedTitles = validTitles.sort(function(a,b) {
var lenDiff = b.length - a.length;
if(lenDiff !== 0) return lenDiff;
validTitles.sort(function(a,b) {
var d = b.length - a.length;
if(d !== 0) return d;
return a < b ? -1 : a > b ? 1 : 0;
});
for(var i = 0; i < sortedTitles.length; i++) {
var title = sortedTitles[i];
var ac = new AhoCorasick();
for(var j = 0; j < validTitles.length; j++) {
var title = validTitles[j];
var pattern = ignoreCase ? title.toLowerCase() : title;
ac.addPattern(pattern, i);
ac.addPattern(pattern,j);
}
try {
ac.buildFailureLinks();
} catch(e) {
return {
titles: [],
ac: new AhoCorasick()
};
return { titles: [], ac: new AhoCorasick() };
}
return {
titles: sortedTitles,
ac: ac
};
return { titles: validTitles, ac: ac };
}
TextNodeWidget.prototype.isWithinButtonOrLink = function() {
var widget = this.parentWidget;
while(widget) {
if(widget instanceof ButtonWidget ||
widget instanceof LinkWidget ||
((widget instanceof ElementWidget) && widget.parseTreeNode.tag === "a")) {
if(widget instanceof ButtonWidget ||
widget instanceof LinkWidget ||
((widget instanceof ElementWidget) && widget.parseTreeNode.tag === "a")) {
return true;
}
widget = widget.parentWidget;
@@ -275,35 +224,56 @@ TextNodeWidget.prototype.isWithinButtonOrLink = function() {
};
TextNodeWidget.prototype.refresh = function(changedTiddlers) {
var self = this,
changedAttributes = this.computeAttributes(),
titlesHaveChanged = false;
var self = this;
var changedAttributes = this.computeAttributes();
var titlesHaveChanged = false;
if(changedTiddlers) {
$tw.utils.each(changedTiddlers,function(change,title) {
if(change.isDeleted) {
if(titlesHaveChanged) return;
if(title === WORD_BOUNDARY_TIDDLER || title === TITLE_TARGET_FILTER) {
titlesHaveChanged = true;
return;
}
if(title.substring(0,3) === "$:/") {
return;
}
if(change && change.isDeleted) {
if(self.tiddlerTitleInfo && self.tiddlerTitleInfo.titles && self.tiddlerTitleInfo.titles.indexOf(title) !== -1) {
titlesHaveChanged = true;
}
return;
}
var tiddler = self.wiki.getTiddler(title);
if(tiddler && tiddler.hasField("draft.of")) {
return;
}
if(!self.tiddlerTitleInfo || !self.tiddlerTitleInfo.titles || self.tiddlerTitleInfo.titles.indexOf(title) === -1) {
titlesHaveChanged = true;
} else {
titlesHaveChanged = titlesHaveChanged ||
!self.tiddlerTitleInfo ||
self.tiddlerTitleInfo.titles.indexOf(title) === -1;
}
});
}
if(changedAttributes.text || titlesHaveChanged ||
(changedTiddlers && changedTiddlers[WORD_BOUNDARY_TIDDLER])) {
var wordBoundaryChanged = !!(changedTiddlers && changedTiddlers[WORD_BOUNDARY_TIDDLER]);
if(changedAttributes.text || titlesHaveChanged || wordBoundaryChanged) {
if(titlesHaveChanged) {
var ignoreCase = self.getVariable("tv-freelinks-ignore-case",{defaultValue:"no"}).trim() === "yes";
var cacheKey = "tiddler-title-info-" + (ignoreCase ? "insensitive" : "sensitive");
self.wiki.clearCache(cacheKey);
self.wiki.clearCache("tiddler-title-info-insensitive");
self.wiki.clearCache("tiddler-title-info-sensitive");
}
this.refreshSelf();
return true;
} else {
}
if(changedTiddlers) {
return this.refreshChildren(changedTiddlers);
}
return false;
};
exports.text = TextNodeWidget;