From 6dcdc2049a27f0acd0c996101504f732ed31e4ff Mon Sep 17 00:00:00 2001 From: Jeremy Ruston Date: Tue, 30 Oct 2018 17:39:18 +0000 Subject: [PATCH] Enhance search operator (#3502) * Enhance search operator * Add support for searching all fields and also searching all fields except nominated fields. * Docs tweaks Thanks @pmario * Error message improvements * Improve error message formatting --- core/modules/filters.js | 14 ++- core/modules/filters/search.js | 25 +++- core/modules/wiki.js | 112 +++++++++++++++--- editions/test/tiddlers/tests/test-filters.js | 27 +++++ .../tiddlers/filters/examples/search.tid | 9 +- editions/tw5.com/tiddlers/filters/search.tid | 37 ++++-- 6 files changed, 194 insertions(+), 30 deletions(-) diff --git a/core/modules/filters.js b/core/modules/filters.js index 76046b828..e9117bec1 100644 --- a/core/modules/filters.js +++ b/core/modules/filters.js @@ -40,12 +40,23 @@ function parseFilterOperation(operators,filterString,p) { nextBracketPos += p; var bracket = filterString.charAt(nextBracketPos); operator.operator = filterString.substring(p,nextBracketPos); - // Any suffix? var colon = operator.operator.indexOf(':'); if(colon > -1) { + // The raw suffix for older filters operator.suffix = operator.operator.substring(colon + 1); operator.operator = operator.operator.substring(0,colon) || "field"; + // The processed suffix for newer filters + operator.suffixes = []; + $tw.utils.each(operator.suffix.split(":"),function(subsuffix) { + operator.suffixes.push([]); + $tw.utils.each(subsuffix.split(","),function(entry) { + entry = $tw.utils.trim(entry); + if(entry) { + operator.suffixes[operator.suffixes.length - 1].push(entry); + } + }); + }); } // Empty operator means: title else if(operator.operator === "") { @@ -208,6 +219,7 @@ exports.compileFilter = function(filterString) { operand: operand, prefix: operator.prefix, suffix: operator.suffix, + suffixes: operator.suffixes, regexp: operator.regexp },{ wiki: self, diff --git a/core/modules/filters/search.js b/core/modules/filters/search.js index 933cf7419..a94ccf0b4 100644 --- a/core/modules/filters/search.js +++ b/core/modules/filters/search.js @@ -17,11 +17,32 @@ Export our filter function */ exports.search = function(source,operator,options) { var invert = operator.prefix === "!"; - if(operator.suffix) { + if(operator.suffixes) { + var hasFlag = function(flag) { + return (operator.suffixes[1] || []).indexOf(flag) !== -1; + }, + excludeFields = false, + firstChar = operator.suffixes[0][0].charAt(0), + fields; + if(operator.suffixes[0][0].charAt(0) === "-") { + fields = [operator.suffixes[0][0].slice(1)].concat(operator.suffixes[0].slice(1)); + excludeFields = true; + } else if(operator.suffixes[0][0] === "*"){ + fields = []; + excludeFields = true; + } else { + fields = operator.suffixes[0].slice(0); + } return options.wiki.search(operator.operand,{ source: source, invert: invert, - field: operator.suffix + field: fields, + excludeField: excludeFields, + caseSensitive: hasFlag("casesensitive"), + literal: hasFlag("literal"), + whitespace: hasFlag("whitespace"), + regexp: hasFlag("regexp"), + words: hasFlag("words") }); } else { return options.wiki.search(operator.operand,{ diff --git a/core/modules/wiki.js b/core/modules/wiki.js index 81f5cafc4..6dda83a97 100755 --- a/core/modules/wiki.js +++ b/core/modules/wiki.js @@ -1047,8 +1047,13 @@ Options available: exclude: An array of tiddler titles to exclude from the search invert: If true returns tiddlers that do not contain the specified string caseSensitive: If true forces a case sensitive search - literal: If true, searches for literal string, rather than separate search terms - field: If specified, restricts the search to the specified field + field: If specified, restricts the search to the specified field, or an array of field names + excludeField: If true, the field options are inverted to specify the fields that are not to be searched + The search mode is determined by the first of these boolean flags to be true + literal: searches for literal string + whitespace: same as literal except runs of whitespace are treated as a single space + regexp: treats the search term as a regular expression + words: (default) treats search string as a list of tokens, and matches if all tokens are found, regardless of adjacency or ordering */ exports.search = function(text,options) { options = options || {}; @@ -1064,6 +1069,21 @@ exports.search = function(text,options) { } else { searchTermsRegExps = [new RegExp("(" + $tw.utils.escapeRegExp(text) + ")",flags)]; } + } else if(options.whitespace) { + terms = []; + $tw.utils.each(text.split(/\s+/g),function(term) { + if(term) { + terms.push($tw.utils.escapeRegExp(term)); + } + }); + searchTermsRegExps = [new RegExp("(" + terms.join("\\s+") + ")",flags)]; + } else if(options.regexp) { + try { + searchTermsRegExps = [new RegExp("(" + text + ")",flags)]; + } catch(e) { + searchTermsRegExps = null; + console.log("Regexp error parsing /(" + text + ")/" + flags + ": ",e); + } } else { terms = text.split(/ +/); if(terms.length === 1 && terms[0] === "") { @@ -1075,6 +1095,23 @@ exports.search = function(text,options) { } } } + // Accumulate the array of fields to be searched or excluded from the search + var fields = []; + if(options.field) { + if($tw.utils.isArray(options.field)) { + $tw.utils.each(options.field,function(fieldName) { + fields.push(fieldName); + }); + } else { + fields.push(options.field); + } + } + // Use default fields if none specified and we're not excluding fields (excluding fields with an empty field array is the same as searching all fields) + if(fields.length === 0 && !options.excludeField) { + fields.push("title"); + fields.push("tags"); + fields.push("text"); + } // Function to check a given tiddler for the search term var searchTiddler = function(title) { if(!searchTermsRegExps) { @@ -1085,24 +1122,63 @@ exports.search = function(text,options) { tiddler = new $tw.Tiddler({title: title, text: "", type: "text/vnd.tiddlywiki"}); } var contentTypeInfo = $tw.config.contentTypeInfo[tiddler.fields.type] || $tw.config.contentTypeInfo["text/vnd.tiddlywiki"], - match; - for(var t=0; t <$macrocall $name=".operator-example" n="2" eg="[all[shadows]search[table]]" ie="shadow tiddlers containing the word <<.word table>>"/> <$macrocall $name=".operator-example" n="3" eg="[search:caption[arch]]" ie="tiddlers containing `arch` in their <<.field caption>> field"/> - +<$macrocall $name=".operator-example" n="4" eg="[search:*[arch]]" ie="tiddlers containing `arch` in any field"/> +<$macrocall $name=".operator-example" n="5" eg="[search:-title,caption[arch]]" ie="tiddlers containing `arch` in any field except <<.field title>> and <<.field caption>>"/> +<$macrocall $name=".operator-example" n="6" eg="[!is[system]search[the first]]" ie="non-system tiddlers containing a case-insensitive match for both the <<.word 'the'>> and <<.word 'first'>>"/> +<$macrocall $name=".operator-example" n="7" eg="[!is[system]search::literal[the first]]" ie="non-system tiddlers containing a case-insensitive match for the literal phrase <<.word 'the first'>>"/> +<$macrocall $name=".operator-example" n="8" eg="[!is[system]search::literal,casesensitive[The first]]" ie="non-system tiddlers containing a case-sensitive match for the literal phrase <<.word 'The first'>>"/> +<$macrocall $name=".operator-example" n="9" eg="[search:caption,description:casesensitive,words[arch]]" ie="any tiddlers containing a case-sensitive match for the word `arch` in their <<.field caption>> or <<.field description>> fields"/> diff --git a/editions/tw5.com/tiddlers/filters/search.tid b/editions/tw5.com/tiddlers/filters/search.tid index 481ac77d0..9da7f8189 100644 --- a/editions/tw5.com/tiddlers/filters/search.tid +++ b/editions/tw5.com/tiddlers/filters/search.tid @@ -1,21 +1,44 @@ created: 20140410103123179 -modified: 20150203191048000 +modified: 20181025082022690 tags: [[Filter Operators]] [[Common Operators]] [[Field Operators]] [[Negatable Operators]] title: search Operator type: text/vnd.tiddlywiki caption: search op-purpose: filter the input by searching tiddler content op-input: a [[selection of titles|Title Selection]] -op-suffix: optionally, the name of a [[field|TiddlerFields]] -op-suffix-name: F -op-parameter: one or more search terms, separated by spaces +op-suffix: the <<.op search>> operator uses a rich suffix, see below for details +op-parameter: one or more search terms, separated by spaces, or a literal search string op-output: those input tiddlers in which <<.em all>> of the search terms can be found in the value of field <<.place F>> op-neg-output: those input tiddlers in which <<.em not>> all of the search terms can be found -When used with a suffix, the <<.op search>> operator is similar to <<.olink regexp>> but less powerful. +<<.from-version "5.1.18">> The search filter operator was significantly enhanced in 5.1.18. Earlier versions do not support the extended syntax and therefore do not permit searching multiple fields, or the ''literal'' or ''casesensitive'' options. -If the suffix is omitted, a tiddler is deemed to match if all the search terms appear in the combination of its <<.field tags>>, <<.field text>> and <<.field title>> fields. +The <<.op search>> operator uses an extended syntax that permits multiple fields and flags to be passed: -The search ignores the difference between capital and lowercase letters. +``` +[search::[]] +``` + +* ''field list'': a comma delimited list of field names to restrict the search +** defaults to <<.field tags>>, <<.field text>> and <<.field title>> if blank +** an asterisk `*` instead of the field list causes the search to be performed across all fields available on each tiddler +** preceding the list with a minus sign `-` reverses the order so that the search is performed on all fields except the listed fields +* ''flag list'': a comma delimited list of flags (defaults to `words` if blank) +* ''operand'': filter operand + +This example searches the fields <<.field title>> and <<.field caption>> for a case-sensitive match for the literal string <<.op-word "The first">>: + +``` +[search:title,caption:literal,casesensitive[The first]] +``` + +The available flags are: + +* Search mode - the first to be set of the following flags determines the type of search that is performed: +** ''literal'': considers the search string to be a literal string, and requires an exact match +** ''whitespace'': considers the search string to be a literal string, but will consider all runs of whitespace to be equivalent to a single space. Thus `A B` matches `A B` +** ''regexp'': treats the search string as a regular expression. Note that the ''regexp'' option obviates the need for the older <<.olink regexp>> operator. +** ''words'': (the default) treats the search string as a list of tokens separated by whitespace, and matches if all of the tokens appear in the string (regardless of ordering and whether there is other text in between) +* ''casesensitive'': if present, this flag forces a case-sensitive match, where upper and lower case letters are considered different. By default, upper and lower case letters are considered identical for matching purposes. <<.operator-examples "search">>