1
0
mirror of https://github.com/Jermolene/TiddlyWiki5 synced 2024-09-28 07:08:20 +00:00

Revert attempt at optimising filter execution

At the moment the optimiser returns a list of chainable functions, it would be simpler to just return a single function
This commit is contained in:
Jeremy Ruston 2023-11-07 10:34:56 +00:00
parent 9493084f95
commit 02f3065e4f

View File

@ -8,237 +8,230 @@ Adds tiddler filtering methods to the $tw.Wiki object.
\*/ \*/
(function(){ (function(){
/*jslint node: true, browser: true */ /*jslint node: true, browser: true */
/*global $tw: false */ /*global $tw: false */
"use strict"; "use strict";
var widgetClass = require("$:/core/modules/widgets/widget.js").widget; var widgetClass = require("$:/core/modules/widgets/widget.js").widget;
/* Maximum permitted filter recursion depth */ /* Maximum permitted filter recursion depth */
var MAX_FILTER_DEPTH = 300; var MAX_FILTER_DEPTH = 300;
/* /*
Parses an operation (i.e. a run) within a filter string Parses an operation (i.e. a run) within a filter string
operators: Array of array of operator nodes into which results should be inserted operators: Array of array of operator nodes into which results should be inserted
filterString: filter string filterString: filter string
p: start position within the string p: start position within the string
Returns the new start position, after the parsed operation Returns the new start position, after the parsed operation
*/ */
function parseFilterOperation(operators,filterString,p) { function parseFilterOperation(operators,filterString,p) {
var nextBracketPos, operator; var nextBracketPos, operator;
// Skip the starting square bracket // Skip the starting square bracket
if(filterString.charAt(p++) !== "[") { if(filterString.charAt(p++) !== "[") {
throw "Missing [ in filter expression";
}
// Process each operator in turn
do {
operator = {};
// Check for an operator prefix
if(filterString.charAt(p) === "!") {
operator.prefix = filterString.charAt(p++);
}
// Get the operator name
nextBracketPos = filterString.substring(p).search(/[\[\{<\/]/);
if(nextBracketPos === -1) {
throw "Missing [ in filter expression"; throw "Missing [ in filter expression";
} }
nextBracketPos += p; // Process each operator in turn
var bracket = filterString.charAt(nextBracketPos); do {
operator.operator = filterString.substring(p,nextBracketPos); operator = {};
// Any suffix? // Check for an operator prefix
var colon = operator.operator.indexOf(':'); if(filterString.charAt(p) === "!") {
if(colon > -1) { operator.prefix = filterString.charAt(p++);
// The raw suffix for older filters
operator.suffix = operator.operator.substring(colon + 1);
operator.operator = operator.operator.substring(0,colon) || "field";
// The processed suffix for newer filters
operator.suffixes = [];
$tw.utils.each(operator.suffix.split(":"),function(subsuffix) {
operator.suffixes.push([]);
$tw.utils.each(subsuffix.split(","),function(entry) {
entry = $tw.utils.trim(entry);
if(entry) {
operator.suffixes[operator.suffixes.length - 1].push(entry);
}
});
});
}
// Empty operator means: title
else if(operator.operator === "") {
operator.operator = "title";
}
operator.operands = [];
var parseOperand = function(bracketType) {
var operand = {};
switch (bracketType) {
case "{": // Curly brackets
operand.indirect = true;
nextBracketPos = filterString.indexOf("}",p);
break;
case "[": // Square brackets
nextBracketPos = filterString.indexOf("]",p);
break;
case "<": // Angle brackets
operand.variable = true;
nextBracketPos = filterString.indexOf(">",p);
break;
case "/": // regexp brackets
var rex = /^((?:[^\\\/]|\\.)*)\/(?:\(([mygi]+)\))?/g,
rexMatch = rex.exec(filterString.substring(p));
if(rexMatch) {
operator.regexp = new RegExp(rexMatch[1], rexMatch[2]);
// DEPRECATION WARNING
console.log("WARNING: Filter",operator.operator,"has a deprecated regexp operand",operator.regexp);
nextBracketPos = p + rex.lastIndex - 1;
}
else {
throw "Unterminated regular expression in filter expression";
}
break;
} }
// Get the operator name
nextBracketPos = filterString.substring(p).search(/[\[\{<\/]/);
if(nextBracketPos === -1) { if(nextBracketPos === -1) {
throw "Missing closing bracket in filter expression";
}
if(operator.regexp) {
operand.text = "";
} else {
operand.text = filterString.substring(p,nextBracketPos);
}
operator.operands.push(operand);
p = nextBracketPos + 1;
}
p = nextBracketPos + 1;
parseOperand(bracket);
// Check for multiple operands
while(filterString.charAt(p) === ",") {
p++;
if(/^[\[\{<\/]/.test(filterString.substring(p))) {
nextBracketPos = p;
p++;
parseOperand(filterString.charAt(nextBracketPos));
} else {
throw "Missing [ in filter expression"; throw "Missing [ in filter expression";
} }
} nextBracketPos += p;
var bracket = filterString.charAt(nextBracketPos);
// Push this operator operator.operator = filterString.substring(p,nextBracketPos);
operators.push(operator); // Any suffix?
} while(filterString.charAt(p) !== "]"); var colon = operator.operator.indexOf(':');
// Skip the ending square bracket if(colon > -1) {
if(filterString.charAt(p++) !== "]") { // The raw suffix for older filters
throw "Missing ] in filter expression"; operator.suffix = operator.operator.substring(colon + 1);
} operator.operator = operator.operator.substring(0,colon) || "field";
// Return the parsing position // The processed suffix for newer filters
return p; operator.suffixes = [];
} $tw.utils.each(operator.suffix.split(":"),function(subsuffix) {
operator.suffixes.push([]);
/* $tw.utils.each(subsuffix.split(","),function(entry) {
Parse a filter string entry = $tw.utils.trim(entry);
*/ if(entry) {
exports.parseFilter = function(filterString) { operator.suffixes[operator.suffixes.length - 1].push(entry);
filterString = filterString || ""; }
var results = [], // Array of arrays of operator nodes {operator:,operand:} });
p = 0, // Current position in the filter string });
match;
var whitespaceRegExp = /(\s+)/mg,
operandRegExp = /((?:\+|\-|~|=|\:(\w+)(?:\:([\w\:, ]*))?)?)(?:(\[)|(?:"([^"]*)")|(?:'([^']*)')|([^\s\[\]]+))/mg;
while(p < filterString.length) {
// Skip any whitespace
whitespaceRegExp.lastIndex = p;
match = whitespaceRegExp.exec(filterString);
if(match && match.index === p) {
p = p + match[0].length;
}
// Match the start of the operation
if(p < filterString.length) {
operandRegExp.lastIndex = p;
match = operandRegExp.exec(filterString);
if(!match || match.index !== p) {
throw $tw.language.getString("Error/FilterSyntax");
} }
var operation = { // Empty operator means: title
prefix: "", else if(operator.operator === "") {
operators: [] operator.operator = "title";
}; }
if(match[1]) { operator.operands = [];
operation.prefix = match[1]; var parseOperand = function(bracketType) {
p = p + operation.prefix.length; var operand = {};
if(match[2]) { switch (bracketType) {
operation.namedPrefix = match[2]; case "{": // Curly brackets
operand.indirect = true;
nextBracketPos = filterString.indexOf("}",p);
break;
case "[": // Square brackets
nextBracketPos = filterString.indexOf("]",p);
break;
case "<": // Angle brackets
operand.variable = true;
nextBracketPos = filterString.indexOf(">",p);
break;
case "/": // regexp brackets
var rex = /^((?:[^\\\/]|\\.)*)\/(?:\(([mygi]+)\))?/g,
rexMatch = rex.exec(filterString.substring(p));
if(rexMatch) {
operator.regexp = new RegExp(rexMatch[1], rexMatch[2]);
// DEPRECATION WARNING
console.log("WARNING: Filter",operator.operator,"has a deprecated regexp operand",operator.regexp);
nextBracketPos = p + rex.lastIndex - 1;
}
else {
throw "Unterminated regular expression in filter expression";
}
break;
} }
if(match[3]) {
operation.suffixes = []; if(nextBracketPos === -1) {
$tw.utils.each(match[3].split(":"),function(subsuffix) { throw "Missing closing bracket in filter expression";
operation.suffixes.push([]); }
$tw.utils.each(subsuffix.split(","),function(entry) { if(operator.regexp) {
entry = $tw.utils.trim(entry); operand.text = "";
if(entry) { } else {
operation.suffixes[operation.suffixes.length -1].push(entry); operand.text = filterString.substring(p,nextBracketPos);
} }
}); operator.operands.push(operand);
}); p = nextBracketPos + 1;
}
p = nextBracketPos + 1;
parseOperand(bracket);
// Check for multiple operands
while(filterString.charAt(p) === ",") {
p++;
if(/^[\[\{<\/]/.test(filterString.substring(p))) {
nextBracketPos = p;
p++;
parseOperand(filterString.charAt(nextBracketPos));
} else {
throw "Missing [ in filter expression";
} }
} }
if(match[4]) { // Opening square bracket
p = parseFilterOperation(operation.operators,filterString,p); // Push this operator
} else { operators.push(operator);
p = match.index + match[0].length; } while(filterString.charAt(p) !== "]");
} // Skip the ending square bracket
if(match[5] || match[6] || match[7]) { // Double quoted string, single quoted string or unquoted title if(filterString.charAt(p++) !== "]") {
operation.operators.push( throw "Missing ] in filter expression";
{operator: "title", operands: [{text: match[5] || match[6] || match[7]}]}
);
}
results.push(operation);
} }
// Return the parsing position
return p;
} }
return results;
}; /*
Parse a filter string
exports.getFilterOperators = function() { */
if(!this.filterOperators) { exports.parseFilter = function(filterString) {
$tw.Wiki.prototype.filterOperators = {}; filterString = filterString || "";
$tw.modules.applyMethods("filteroperator",this.filterOperators); var results = [], // Array of arrays of operator nodes {operator:,operand:}
p = 0, // Current position in the filter string
match;
var whitespaceRegExp = /(\s+)/mg,
operandRegExp = /((?:\+|\-|~|=|\:(\w+)(?:\:([\w\:, ]*))?)?)(?:(\[)|(?:"([^"]*)")|(?:'([^']*)')|([^\s\[\]]+))/mg;
while(p < filterString.length) {
// Skip any whitespace
whitespaceRegExp.lastIndex = p;
match = whitespaceRegExp.exec(filterString);
if(match && match.index === p) {
p = p + match[0].length;
}
// Match the start of the operation
if(p < filterString.length) {
operandRegExp.lastIndex = p;
match = operandRegExp.exec(filterString);
if(!match || match.index !== p) {
throw $tw.language.getString("Error/FilterSyntax");
}
var operation = {
prefix: "",
operators: []
};
if(match[1]) {
operation.prefix = match[1];
p = p + operation.prefix.length;
if(match[2]) {
operation.namedPrefix = match[2];
}
if(match[3]) {
operation.suffixes = [];
$tw.utils.each(match[3].split(":"),function(subsuffix) {
operation.suffixes.push([]);
$tw.utils.each(subsuffix.split(","),function(entry) {
entry = $tw.utils.trim(entry);
if(entry) {
operation.suffixes[operation.suffixes.length -1].push(entry);
}
});
});
}
}
if(match[4]) { // Opening square bracket
p = parseFilterOperation(operation.operators,filterString,p);
} else {
p = match.index + match[0].length;
}
if(match[5] || match[6] || match[7]) { // Double quoted string, single quoted string or unquoted title
operation.operators.push(
{operator: "title", operands: [{text: match[5] || match[6] || match[7]}]}
);
}
results.push(operation);
}
}
return results;
};
exports.getFilterOperators = function() {
if(!this.filterOperators) {
$tw.Wiki.prototype.filterOperators = {};
$tw.modules.applyMethods("filteroperator",this.filterOperators);
}
return this.filterOperators;
};
exports.getFilterRunPrefixes = function() {
if(!this.filterRunPrefixes) {
$tw.Wiki.prototype.filterRunPrefixes = {};
$tw.modules.applyMethods("filterrunprefix",this.filterRunPrefixes);
}
return this.filterRunPrefixes;
} }
return this.filterOperators;
}; exports.filterTiddlers = function(filterString,widget,source) {
var fn = this.compileFilter(filterString);
exports.getFilterRunPrefixes = function() { return fn.call(this,source,widget);
if(!this.filterRunPrefixes) { };
$tw.Wiki.prototype.filterRunPrefixes = {};
$tw.modules.applyMethods("filterrunprefix",this.filterRunPrefixes); /*
} Compile a filter into a function with the signature fn(source,widget) where:
return this.filterRunPrefixes; source: an iterator function for the source tiddlers, called source(iterator), where iterator is called as iterator(tiddler,title)
} widget: an optional widget node for retrieving the current tiddler etc.
*/
exports.filterTiddlers = function(filterString,widget,source) { exports.compileFilter = function(filterString) {
var fn = this.compileFilter(filterString); if(!this.filterCache) {
return fn.call(this,source,widget); this.filterCache = Object.create(null);
}; this.filterCacheCount = 0;
}
/* if(this.filterCache[filterString] !== undefined) {
Compile a filter into a function with the signature fn(source,widget) where: return this.filterCache[filterString];
source: an iterator function for the source tiddlers, called source(iterator), where iterator is called as iterator(tiddler,title) }
widget: an optional widget node for retrieving the current tiddler etc.
*/
exports.compileFilter = function(filterString) {
var self = this;
// Use cached filter function if already present
if(!this.filterCache) {
this.filterCache = Object.create(null);
this.filterCacheCount = 0;
}
if(this.filterCache[filterString] !== undefined) {
return this.filterCache[filterString];
}
// Attempt to optimise the filter into a single query
var operationFunctions = this.optimiseFilter && this.optimiseFilter(filterString);
// Otherwise compile the filter step by step
if(!operationFunctions) {
// Parse filter
var filterParseTree; var filterParseTree;
try { try {
filterParseTree = this.parseFilter(filterString); filterParseTree = this.parseFilter(filterString);
@ -248,138 +241,132 @@ exports.compileFilter = function(filterString) {
return [$tw.language.getString("Error/Filter") + ": " + e]; return [$tw.language.getString("Error/Filter") + ": " + e];
}; };
} }
// Compile the filter operators into functions // Get the hashmap of filter operator functions
operationFunctions = this.compileFilterOperations(filterParseTree); var filterOperators = this.getFilterOperators();
} // Assemble array of functions, one for each operation
// Return a function that applies the operations to a source iterator of tiddler titles var operationFunctions = [];
var fnMeasured = $tw.perf.measure("filter: " + filterString,function filterFunction(source,widget) { // Step through the operations
if(!source) { var self = this;
source = self.each; $tw.utils.each(filterParseTree,function(operation) {
} else if(typeof source === "object") { // Array or hashmap // Create a function for the chain of operators in the operation
source = self.makeTiddlerIterator(source); var operationSubFunction = function(source,widget) {
} var accumulator = source,
if(!widget) { results = [],
widget = $tw.rootWidget; currTiddlerTitle = widget && widget.getVariable("currentTiddler");
} $tw.utils.each(operation.operators,function(operator) {
var results = new $tw.utils.LinkedList(); var operands = [],
self.filterRecursionCount = (self.filterRecursionCount || 0) + 1; operatorFunction;
if(self.filterRecursionCount < MAX_FILTER_DEPTH) { if(!operator.operator) {
$tw.utils.each(operationFunctions,function(operationFunction) { // Use the "title" operator if no operator is specified
operationFunction(results,source,widget); operatorFunction = filterOperators.title;
}); } else if(!filterOperators[operator.operator]) {
} else { // Unknown operators treated as "[unknown]" - at run time we can distinguish between a custom operator and falling back to the default "field" operator
results.push("/**-- Excessive filter recursion --**/"); operatorFunction = filterOperators["[unknown]"];
}
self.filterRecursionCount = self.filterRecursionCount - 1;
return results.toArray();
});
if(this.filterCacheCount >= 2000) {
// To prevent memory leak, we maintain an upper limit for cache size.
// Reset if exceeded. This should give us 95% of the benefit
// that no cache limit would give us.
this.filterCache = Object.create(null);
this.filterCacheCount = 0;
}
this.filterCache[filterString] = fnMeasured;
this.filterCacheCount++;
return fnMeasured;
};
exports.compileFilterOperations = function(filterParseTree) {
var self = this;
// Get the hashmap of filter operator functions
var filterOperators = this.getFilterOperators();
// Assemble array of functions, one for each operation
var operationFunctions = [];
// Step through the operations
$tw.utils.each(filterParseTree,function(operation) {
// Create a function for the chain of operators in the operation
var operationSubFunction = function(source,widget) {
var accumulator = source,
results = [],
currTiddlerTitle = widget && widget.getVariable("currentTiddler");
$tw.utils.each(operation.operators,function(operator) {
var operands = [],
operatorFunction;
if(!operator.operator) {
// Use the "title" operator if no operator is specified
operatorFunction = filterOperators.title;
} else if(!filterOperators[operator.operator]) {
// Unknown operators treated as "[unknown]" - at run time we can distinguish between a custom operator and falling back to the default "field" operator
operatorFunction = filterOperators["[unknown]"];
} else {
// Use the operator function
operatorFunction = filterOperators[operator.operator];
}
$tw.utils.each(operator.operands,function(operand) {
if(operand.indirect) {
operand.value = self.getTextReference(operand.text,"",currTiddlerTitle);
} else if(operand.variable) {
var varTree = $tw.utils.parseFilterVariable(operand.text);
operand.value = widgetClass.evaluateVariable(widget,varTree.name,{params: varTree.params, source: source})[0] || "";
} else { } else {
operand.value = operand.text; // Use the operator function
operatorFunction = filterOperators[operator.operator];
}
$tw.utils.each(operator.operands,function(operand) {
if(operand.indirect) {
operand.value = self.getTextReference(operand.text,"",currTiddlerTitle);
} else if(operand.variable) {
var varTree = $tw.utils.parseFilterVariable(operand.text);
operand.value = widgetClass.evaluateVariable(widget,varTree.name,{params: varTree.params, source: source})[0] || "";
} else {
operand.value = operand.text;
}
operands.push(operand.value);
});
// Invoke the appropriate filteroperator module
results = operatorFunction(accumulator,{
operator: operator.operator,
operand: operands.length > 0 ? operands[0] : undefined,
operands: operands,
prefix: operator.prefix,
suffix: operator.suffix,
suffixes: operator.suffixes,
regexp: operator.regexp
},{
wiki: self,
widget: widget
});
if($tw.utils.isArray(results)) {
accumulator = self.makeTiddlerIterator(results);
} else {
accumulator = results;
} }
operands.push(operand.value);
}); });
// Invoke the appropriate filteroperator module
results = operatorFunction(accumulator,{
operator: operator.operator,
operand: operands.length > 0 ? operands[0] : undefined,
operands: operands,
prefix: operator.prefix,
suffix: operator.suffix,
suffixes: operator.suffixes,
regexp: operator.regexp
},{
wiki: self,
widget: widget
});
if($tw.utils.isArray(results)) { if($tw.utils.isArray(results)) {
accumulator = self.makeTiddlerIterator(results); return results;
} else { } else {
accumulator = results; var resultArray = [];
results(function(tiddler,title) {
resultArray.push(title);
});
return resultArray;
} }
}); };
if($tw.utils.isArray(results)) { var filterRunPrefixes = self.getFilterRunPrefixes();
return results; // Wrap the operator functions in a wrapper function that depends on the prefix
} else { operationFunctions.push((function() {
var resultArray = []; var options = {wiki: self, suffixes: operation.suffixes || []};
results(function(tiddler,title) { switch(operation.prefix || "") {
resultArray.push(title); case "": // No prefix means that the operation is unioned into the result
return filterRunPrefixes["or"](operationSubFunction, options);
case "=": // The results of the operation are pushed into the result without deduplication
return filterRunPrefixes["all"](operationSubFunction, options);
case "-": // The results of this operation are removed from the main result
return filterRunPrefixes["except"](operationSubFunction, options);
case "+": // This operation is applied to the main results so far
return filterRunPrefixes["and"](operationSubFunction, options);
case "~": // This operation is unioned into the result only if the main result so far is empty
return filterRunPrefixes["else"](operationSubFunction, options);
default:
if(operation.namedPrefix && filterRunPrefixes[operation.namedPrefix]) {
return filterRunPrefixes[operation.namedPrefix](operationSubFunction, options);
} else {
return function(results,source,widget) {
results.clear();
results.push($tw.language.getString("Error/FilterRunPrefix"));
};
}
}
})());
});
// Return a function that applies the operations to a source iterator of tiddler titles
var fnMeasured = $tw.perf.measure("filter: " + filterString,function filterFunction(source,widget) {
if(!source) {
source = self.each;
} else if(typeof source === "object") { // Array or hashmap
source = self.makeTiddlerIterator(source);
}
if(!widget) {
widget = $tw.rootWidget;
}
var results = new $tw.utils.LinkedList();
self.filterRecursionCount = (self.filterRecursionCount || 0) + 1;
if(self.filterRecursionCount < MAX_FILTER_DEPTH) {
$tw.utils.each(operationFunctions,function(operationFunction) {
operationFunction(results,source,widget);
}); });
return resultArray; } else {
results.push("/**-- Excessive filter recursion --**/");
} }
}; self.filterRecursionCount = self.filterRecursionCount - 1;
var filterRunPrefixes = self.getFilterRunPrefixes(); return results.toArray();
// Wrap the operator functions in a wrapper function that depends on the prefix });
operationFunctions.push((function() { if(this.filterCacheCount >= 2000) {
var options = {wiki: self, suffixes: operation.suffixes || []}; // To prevent memory leak, we maintain an upper limit for cache size.
switch(operation.prefix || "") { // Reset if exceeded. This should give us 95% of the benefit
case "": // No prefix means that the operation is unioned into the result // that no cache limit would give us.
return filterRunPrefixes["or"](operationSubFunction, options); this.filterCache = Object.create(null);
case "=": // The results of the operation are pushed into the result without deduplication this.filterCacheCount = 0;
return filterRunPrefixes["all"](operationSubFunction, options); }
case "-": // The results of this operation are removed from the main result this.filterCache[filterString] = fnMeasured;
return filterRunPrefixes["except"](operationSubFunction, options); this.filterCacheCount++;
case "+": // This operation is applied to the main results so far return fnMeasured;
return filterRunPrefixes["and"](operationSubFunction, options); };
case "~": // This operation is unioned into the result only if the main result so far is empty
return filterRunPrefixes["else"](operationSubFunction, options); })();
default:
if(operation.namedPrefix && filterRunPrefixes[operation.namedPrefix]) {
return filterRunPrefixes[operation.namedPrefix](operationSubFunction, options);
} else {
return function(results,source,widget) {
results.clear();
results.push($tw.language.getString("Error/FilterRunPrefix"));
};
}
}
})());
});
return operationFunctions;
};
})();