1
0
mirror of https://github.com/Jermolene/TiddlyWiki5 synced 2026-04-20 05:41:29 +00:00

optimized

This commit is contained in:
s793016
2025-06-12 00:11:13 +08:00
committed by GitHub
parent 89e9fff58b
commit 5f0b98d1fd

View File

@@ -3,33 +3,27 @@ title: $:/core/modules/utils/aho-corasick.js
type: application/javascript
module-type: utils
Aho-Corasick string matching algorithm implementation with enhanced error handling
for TiddlyWiki freelinking functionality.
Optimized Aho-Corasick string matching algorithm implementation with enhanced performance
and error handling for TiddlyWiki freelinking functionality.
\*/
"use strict";
/**
* Aho-Corasick implementation with enhanced error handling
* @constructor
*/
/* Optimized Aho-Corasick implementation with performance enhancements */
function AhoCorasick() {
this.trie = {};
this.failure = {};
this.output = {};
this.maxFailureDepth = 100;
}
/**
* Add a pattern to the Aho-Corasick automaton
* @param {string} pattern - The pattern to add
* @param {number} index - The index of the pattern for identification
*/
AhoCorasick.prototype.addPattern = function(pattern, index) {
if(!pattern || typeof pattern !== "string") {
if(!pattern || typeof pattern !== "string" || pattern.length === 0) {
return;
}
var node = this.trie;
for(var i = 0; i < pattern.length; i++) {
var char = pattern[i];
if(!node[char]) {
@@ -37,21 +31,23 @@ AhoCorasick.prototype.addPattern = function(pattern, index) {
}
node = node[char];
}
if(!node.$) {
node.$ = [];
}
node.$.push({ pattern: pattern, index: index });
node.$.push({
pattern: pattern,
index: index,
length: pattern.length
});
};
/**
* Build failure links for the Aho-Corasick automaton
*/
/* Build failure links with depth and node count limits */
AhoCorasick.prototype.buildFailureLinks = function() {
var queue = [];
var root = this.trie;
this.failure[root] = root;
// Initialize first level failure links
for(var char in root) {
if(root[char] && char !== '$') {
this.failure[root[char]] = root;
@@ -59,91 +55,117 @@ AhoCorasick.prototype.buildFailureLinks = function() {
}
}
var maxIterations = 100000; // Prevent infinite loops
var iteration = 0;
var processedNodes = 0;
var maxNodes = 100000;
while(queue.length && iteration < maxIterations) {
while(queue.length > 0 && processedNodes < maxNodes) {
var node = queue.shift();
processedNodes++;
for(var char in node) {
if(node[char] && char !== '$') {
var child = node[char];
var fail = this.failure[node];
var failCount = 0;
var maxFailCount = 1000; // Prevent deep failure chains
var failureDepth = 0;
while(fail && !fail[char] && failCount < maxFailCount) {
while(fail && !fail[char] && failureDepth < this.maxFailureDepth) {
fail = this.failure[fail];
failCount++;
failureDepth++;
}
this.failure[child] = fail[char] || this.trie;
var failureLink = (fail && fail[char]) ? fail[char] : root;
this.failure[child] = failureLink;
// Copy output from failure link
if(this.failure[child].$) {
var failureOutput = this.failure[child];
if(failureOutput && failureOutput.$) {
if(!child.$) {
child.$ = [];
}
child.$.push.apply(child.$, this.failure[child].$);
child.$.push.apply(child.$, failureOutput.$);
}
queue.push(child);
}
}
iteration++;
}
if(iteration >= maxIterations) {
throw new Error("Aho-Corasick: buildFailureLinks exceeded max iterations");
if(processedNodes >= maxNodes) {
throw new Error("Aho-Corasick: buildFailureLinks exceeded maximum nodes");
}
};
/**
* Search for all patterns in the given text
* @param {string} text - The text to search in
* @returns {Array} Array of match objects with pattern, index, length, and titleIndex properties
*/
AhoCorasick.prototype.search = function(text) {
if(!text || typeof text !== "string") {
if(!text || typeof text !== "string" || text.length === 0) {
return [];
}
var matches = [];
var node = this.trie;
var maxIterations = text.length * 10; // Prevent infinite loops
var iteration = 0;
var textLength = text.length;
var maxMatches = Math.min(textLength * 2, 10000);
for(var i = 0; i < text.length && iteration < maxIterations; i++) {
for(var i = 0; i < textLength; i++) {
var char = text[i];
var transitionCount = 0;
var maxTransitionCount = 1000; // Prevent deep transitions
while(node && !node[char] && transitionCount < maxTransitionCount) {
while(node && !node[char] && transitionCount < this.maxFailureDepth) {
node = this.failure[node];
transitionCount++;
}
node = node[char] || this.trie;
node = (node && node[char]) ? node[char] : this.trie;
if(node.$) {
for(var j = 0; j < node.$.length; j++) {
var match = node.$[j];
if(node && node.$) {
var outputs = node.$;
for(var j = 0; j < outputs.length && matches.length < maxMatches; j++) {
var output = outputs[j];
matches.push({
pattern: match.pattern,
index: i - match.pattern.length + 1,
length: match.pattern.length,
titleIndex: match.index
pattern: output.pattern,
index: i - output.length + 1,
length: output.length,
titleIndex: output.index
});
}
}
iteration++;
}
if(iteration >= maxIterations) {
throw new Error("Aho-Corasick: search exceeded max iterations");
}
return matches;
};
// Export the AhoCorasick constructor
AhoCorasick.prototype.clear = function() {
this.trie = {};
this.failure = {};
};
AhoCorasick.prototype.getStats = function() {
var nodeCount = 0;
var patternCount = 0;
var failureCount = 0;
function countNodes(node) {
if(!node) return;
nodeCount++;
if(node.$) {
patternCount += node.$.length;
}
for(var key in node) {
if(node[key] && typeof node[key] === 'object' && key !== '$') {
countNodes(node[key]);
}
}
}
countNodes(this.trie);
for(var key in this.failure) {
failureCount++;
}
return {
nodeCount: nodeCount,
patternCount: patternCount,
failureLinks: failureCount
};
};
exports.AhoCorasick = AhoCorasick;