mirror of
https://github.com/Jermolene/TiddlyWiki5
synced 2026-04-20 05:41:29 +00:00
optimized
This commit is contained in:
@@ -3,33 +3,27 @@ title: $:/core/modules/utils/aho-corasick.js
|
||||
type: application/javascript
|
||||
module-type: utils
|
||||
|
||||
Aho-Corasick string matching algorithm implementation with enhanced error handling
|
||||
for TiddlyWiki freelinking functionality.
|
||||
Optimized Aho-Corasick string matching algorithm implementation with enhanced performance
|
||||
and error handling for TiddlyWiki freelinking functionality.
|
||||
|
||||
\*/
|
||||
|
||||
"use strict";
|
||||
|
||||
/**
|
||||
* Aho-Corasick implementation with enhanced error handling
|
||||
* @constructor
|
||||
*/
|
||||
/* Optimized Aho-Corasick implementation with performance enhancements */
|
||||
function AhoCorasick() {
|
||||
this.trie = {};
|
||||
this.failure = {};
|
||||
this.output = {};
|
||||
this.maxFailureDepth = 100;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a pattern to the Aho-Corasick automaton
|
||||
* @param {string} pattern - The pattern to add
|
||||
* @param {number} index - The index of the pattern for identification
|
||||
*/
|
||||
AhoCorasick.prototype.addPattern = function(pattern, index) {
|
||||
if(!pattern || typeof pattern !== "string") {
|
||||
if(!pattern || typeof pattern !== "string" || pattern.length === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
var node = this.trie;
|
||||
|
||||
for(var i = 0; i < pattern.length; i++) {
|
||||
var char = pattern[i];
|
||||
if(!node[char]) {
|
||||
@@ -37,21 +31,23 @@ AhoCorasick.prototype.addPattern = function(pattern, index) {
|
||||
}
|
||||
node = node[char];
|
||||
}
|
||||
|
||||
if(!node.$) {
|
||||
node.$ = [];
|
||||
}
|
||||
node.$.push({ pattern: pattern, index: index });
|
||||
node.$.push({
|
||||
pattern: pattern,
|
||||
index: index,
|
||||
length: pattern.length
|
||||
});
|
||||
};
|
||||
|
||||
/**
|
||||
* Build failure links for the Aho-Corasick automaton
|
||||
*/
|
||||
/* Build failure links with depth and node count limits */
|
||||
AhoCorasick.prototype.buildFailureLinks = function() {
|
||||
var queue = [];
|
||||
var root = this.trie;
|
||||
this.failure[root] = root;
|
||||
|
||||
// Initialize first level failure links
|
||||
for(var char in root) {
|
||||
if(root[char] && char !== '$') {
|
||||
this.failure[root[char]] = root;
|
||||
@@ -59,91 +55,117 @@ AhoCorasick.prototype.buildFailureLinks = function() {
|
||||
}
|
||||
}
|
||||
|
||||
var maxIterations = 100000; // Prevent infinite loops
|
||||
var iteration = 0;
|
||||
var processedNodes = 0;
|
||||
var maxNodes = 100000;
|
||||
|
||||
while(queue.length && iteration < maxIterations) {
|
||||
while(queue.length > 0 && processedNodes < maxNodes) {
|
||||
var node = queue.shift();
|
||||
processedNodes++;
|
||||
|
||||
for(var char in node) {
|
||||
if(node[char] && char !== '$') {
|
||||
var child = node[char];
|
||||
var fail = this.failure[node];
|
||||
var failCount = 0;
|
||||
var maxFailCount = 1000; // Prevent deep failure chains
|
||||
var failureDepth = 0;
|
||||
|
||||
while(fail && !fail[char] && failCount < maxFailCount) {
|
||||
while(fail && !fail[char] && failureDepth < this.maxFailureDepth) {
|
||||
fail = this.failure[fail];
|
||||
failCount++;
|
||||
failureDepth++;
|
||||
}
|
||||
|
||||
this.failure[child] = fail[char] || this.trie;
|
||||
var failureLink = (fail && fail[char]) ? fail[char] : root;
|
||||
this.failure[child] = failureLink;
|
||||
|
||||
// Copy output from failure link
|
||||
if(this.failure[child].$) {
|
||||
var failureOutput = this.failure[child];
|
||||
if(failureOutput && failureOutput.$) {
|
||||
if(!child.$) {
|
||||
child.$ = [];
|
||||
}
|
||||
child.$.push.apply(child.$, this.failure[child].$);
|
||||
child.$.push.apply(child.$, failureOutput.$);
|
||||
}
|
||||
|
||||
queue.push(child);
|
||||
}
|
||||
}
|
||||
iteration++;
|
||||
}
|
||||
|
||||
if(iteration >= maxIterations) {
|
||||
throw new Error("Aho-Corasick: buildFailureLinks exceeded max iterations");
|
||||
if(processedNodes >= maxNodes) {
|
||||
throw new Error("Aho-Corasick: buildFailureLinks exceeded maximum nodes");
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Search for all patterns in the given text
|
||||
* @param {string} text - The text to search in
|
||||
* @returns {Array} Array of match objects with pattern, index, length, and titleIndex properties
|
||||
*/
|
||||
AhoCorasick.prototype.search = function(text) {
|
||||
if(!text || typeof text !== "string") {
|
||||
if(!text || typeof text !== "string" || text.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
var matches = [];
|
||||
var node = this.trie;
|
||||
var maxIterations = text.length * 10; // Prevent infinite loops
|
||||
var iteration = 0;
|
||||
var textLength = text.length;
|
||||
var maxMatches = Math.min(textLength * 2, 10000);
|
||||
|
||||
for(var i = 0; i < text.length && iteration < maxIterations; i++) {
|
||||
for(var i = 0; i < textLength; i++) {
|
||||
var char = text[i];
|
||||
var transitionCount = 0;
|
||||
var maxTransitionCount = 1000; // Prevent deep transitions
|
||||
|
||||
while(node && !node[char] && transitionCount < maxTransitionCount) {
|
||||
while(node && !node[char] && transitionCount < this.maxFailureDepth) {
|
||||
node = this.failure[node];
|
||||
transitionCount++;
|
||||
}
|
||||
|
||||
node = node[char] || this.trie;
|
||||
node = (node && node[char]) ? node[char] : this.trie;
|
||||
|
||||
if(node.$) {
|
||||
for(var j = 0; j < node.$.length; j++) {
|
||||
var match = node.$[j];
|
||||
if(node && node.$) {
|
||||
var outputs = node.$;
|
||||
for(var j = 0; j < outputs.length && matches.length < maxMatches; j++) {
|
||||
var output = outputs[j];
|
||||
matches.push({
|
||||
pattern: match.pattern,
|
||||
index: i - match.pattern.length + 1,
|
||||
length: match.pattern.length,
|
||||
titleIndex: match.index
|
||||
pattern: output.pattern,
|
||||
index: i - output.length + 1,
|
||||
length: output.length,
|
||||
titleIndex: output.index
|
||||
});
|
||||
}
|
||||
}
|
||||
iteration++;
|
||||
}
|
||||
|
||||
if(iteration >= maxIterations) {
|
||||
throw new Error("Aho-Corasick: search exceeded max iterations");
|
||||
}
|
||||
|
||||
return matches;
|
||||
};
|
||||
|
||||
// Export the AhoCorasick constructor
|
||||
AhoCorasick.prototype.clear = function() {
|
||||
this.trie = {};
|
||||
this.failure = {};
|
||||
};
|
||||
|
||||
AhoCorasick.prototype.getStats = function() {
|
||||
var nodeCount = 0;
|
||||
var patternCount = 0;
|
||||
var failureCount = 0;
|
||||
|
||||
function countNodes(node) {
|
||||
if(!node) return;
|
||||
nodeCount++;
|
||||
if(node.$) {
|
||||
patternCount += node.$.length;
|
||||
}
|
||||
for(var key in node) {
|
||||
if(node[key] && typeof node[key] === 'object' && key !== '$') {
|
||||
countNodes(node[key]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
countNodes(this.trie);
|
||||
|
||||
for(var key in this.failure) {
|
||||
failureCount++;
|
||||
}
|
||||
|
||||
return {
|
||||
nodeCount: nodeCount,
|
||||
patternCount: patternCount,
|
||||
failureLinks: failureCount
|
||||
};
|
||||
};
|
||||
|
||||
exports.AhoCorasick = AhoCorasick;
|
||||
|
||||
Reference in New Issue
Block a user