From 45355a7fcfa79cebcccf9fb53b46e39f620b34f6 Mon Sep 17 00:00:00 2001 From: Cameron Fischer Date: Fri, 29 Jan 2021 08:26:31 -0500 Subject: [PATCH] Wikirules now use better macrocall parser (#5451) * wikirules now use better macrocall parser Before, wikirules would use a deficient macrocall parser which couldn't handle certain types of arguments. Now it uses the same one that the widget parser uses. Less code! * style changes and removing weird switch statement That switch statement made more sense in an earlier iteration. * comment improvements * oops, wikirule macrocalls could do ONE thing better * '=' wasn't allowed for widget macros, but why? Now they're allowed for both widget macros and macrocall macros. --- core/modules/parsers/parseutils.js | 2 +- .../wikiparser/rules/macrocallblock.js | 52 +++++----- .../wikiparser/rules/macrocallinline.js | 44 ++++----- .../test/tiddlers/tests/test-html-parser.js | 2 +- .../tiddlers/tests/test-wikitext-parser.js | 98 ++++++++++++++++++- 5 files changed, 139 insertions(+), 59 deletions(-) diff --git a/core/modules/parsers/parseutils.js b/core/modules/parsers/parseutils.js index 0d74355f7..8d83efd1b 100644 --- a/core/modules/parsers/parseutils.js +++ b/core/modules/parsers/parseutils.js @@ -132,7 +132,7 @@ exports.parseMacroParameter = function(source,pos) { start: pos }; // Define our regexp - var reMacroParameter = /(?:([A-Za-z0-9\-_]+)\s*:)?(?:\s*(?:"""([\s\S]*?)"""|"([^"]*)"|'([^']*)'|\[\[([^\]]*)\]\]|([^\s>"'=]+)))/g; + var reMacroParameter = /(?:([A-Za-z0-9\-_]+)\s*:)?(?:\s*(?:"""([\s\S]*?)"""|"([^"]*)"|'([^']*)'|\[\[([^\]]*)\]\]|((?:(?:>(?!>))|[^\s>"'])+)))/g; // Skip whitespace pos = $tw.utils.skipWhiteSpace(source,pos); // Look for the parameter diff --git a/core/modules/parsers/wikiparser/rules/macrocallblock.js b/core/modules/parsers/wikiparser/rules/macrocallblock.js index d99c15a6c..6f50fdbb0 100644 --- a/core/modules/parsers/wikiparser/rules/macrocallblock.js +++ b/core/modules/parsers/wikiparser/rules/macrocallblock.js @@ -21,40 +21,36 @@ exports.types = {block: true}; exports.init = function(parser) { this.parser = parser; - // Regexp to match - this.matchRegExp = /<<([^>\s]+)(?:\s*)((?:[^>]|(?:>(?!>)))*?)>>(?:\r?\n|$)/mg; +}; + +exports.findNextMatch = function(startPos) { + var nextStart = startPos; + // Try parsing at all possible macrocall openers until we match + while((nextStart = this.parser.source.indexOf("<<",nextStart)) >= 0) { + var nextCall = $tw.utils.parseMacroInvocation(this.parser.source,nextStart); + if(nextCall) { + var c = this.parser.source.charAt(nextCall.end); + // Ensure EOL after parsed macro + // If we didn't need to support IE, we'd just use /(?:\r?\n|$)/ym + if ((c === "") || (c === "\n") || ((c === "\r") && this.parser.source.charAt(nextCall.end+1) === "\n")) { + this.nextCall = nextCall; + return nextStart; + } + } + nextStart += 2; + } + return undefined; }; /* Parse the most recent match */ exports.parse = function() { - // Get all the details of the match - var macroName = this.match[1], - paramString = this.match[2]; - // Move past the macro call - this.parser.pos = this.matchRegExp.lastIndex; - var params = [], - reParam = /\s*(?:([A-Za-z0-9\-_]+)\s*:)?(?:\s*(?:"""([\s\S]*?)"""|"([^"]*)"|'([^']*)'|\[\[([^\]]*)\]\]|([^"'\s]+)))/mg, - paramMatch = reParam.exec(paramString); - while(paramMatch) { - // Process this parameter - var paramInfo = { - value: paramMatch[2] || paramMatch[3] || paramMatch[4] || paramMatch[5] || paramMatch[6] - }; - if(paramMatch[1]) { - paramInfo.name = paramMatch[1]; - } - params.push(paramInfo); - // Find the next match - paramMatch = reParam.exec(paramString); - } - return [{ - type: "macrocall", - name: macroName, - params: params, - isBlock: true - }]; + var call = this.nextCall; + call.isBlock = true; + this.nextCall = null; + this.parser.pos = call.end; + return [call]; }; })(); diff --git a/core/modules/parsers/wikiparser/rules/macrocallinline.js b/core/modules/parsers/wikiparser/rules/macrocallinline.js index 6e96d45b7..165a70dce 100644 --- a/core/modules/parsers/wikiparser/rules/macrocallinline.js +++ b/core/modules/parsers/wikiparser/rules/macrocallinline.js @@ -21,39 +21,29 @@ exports.types = {inline: true}; exports.init = function(parser) { this.parser = parser; - // Regexp to match - this.matchRegExp = /<<([^\s>]+)\s*([\s\S]*?)>>/mg; +}; + +exports.findNextMatch = function(startPos) { + var nextStart = startPos; + // Try parsing at all possible macrocall openers until we match + while((nextStart = this.parser.source.indexOf("<<",nextStart)) >= 0) { + this.nextCall = $tw.utils.parseMacroInvocation(this.parser.source,nextStart); + if(this.nextCall) { + return nextStart; + } + nextStart += 2; + } + return undefined; }; /* Parse the most recent match */ exports.parse = function() { - // Get all the details of the match - var macroName = this.match[1], - paramString = this.match[2]; - // Move past the macro call - this.parser.pos = this.matchRegExp.lastIndex; - var params = [], - reParam = /\s*(?:([A-Za-z0-9\-_]+)\s*:)?(?:\s*(?:"""([\s\S]*?)"""|"([^"]*)"|'([^']*)'|\[\[([^\]]*)\]\]|([^"'\s]+)))/mg, - paramMatch = reParam.exec(paramString); - while(paramMatch) { - // Process this parameter - var paramInfo = { - value: paramMatch[2] || paramMatch[3] || paramMatch[4] || paramMatch[5]|| paramMatch[6] - }; - if(paramMatch[1]) { - paramInfo.name = paramMatch[1]; - } - params.push(paramInfo); - // Find the next match - paramMatch = reParam.exec(paramString); - } - return [{ - type: "macrocall", - name: macroName, - params: params - }]; + var call = this.nextCall; + this.nextCall = null; + this.parser.pos = call.end; + return [call]; }; })(); diff --git a/editions/test/tiddlers/tests/test-html-parser.js b/editions/test/tiddlers/tests/test-html-parser.js index bbe4f1573..3e0214059 100644 --- a/editions/test/tiddlers/tests/test-html-parser.js +++ b/editions/test/tiddlers/tests/test-html-parser.js @@ -90,7 +90,7 @@ describe("HTML tag new parser tests", function() { { type : 'macro-parameter', start : 0, value : 'one two three', end : 17 } ); expect($tw.utils.parseMacroParameter("myparam>",0)).toEqual( - { type : 'macro-parameter', start : 0, value : 'myparam', end : 7 } + { type : 'macro-parameter', start : 0, value : 'myparam>', end : 8 } ); }); diff --git a/editions/test/tiddlers/tests/test-wikitext-parser.js b/editions/test/tiddlers/tests/test-wikitext-parser.js index 697c5400a..12607a29f 100644 --- a/editions/test/tiddlers/tests/test-wikitext-parser.js +++ b/editions/test/tiddlers/tests/test-wikitext-parser.js @@ -114,10 +114,104 @@ describe("WikiText parser tests", function() { }); - it("should parse macro calls", function() { + it("should parse inline macro calls", function() { expect(parse("<><><><>")).toEqual( - [ { type : 'element', tag : 'p', children : [ { type : 'macrocall', name : 'john', params : [ ] }, { type : 'macrocall', name : 'paul', params : [ ] }, { type : 'macrocall', name : 'george', params : [ ] }, { type : 'macrocall', name : 'ringo', params : [ ] } ] } ] + [ { type: 'element', tag: 'p', children: [ { type: 'macrocall', start: 0, params: [ ], name: 'john', end: 8 }, { type: 'macrocall', start: 8, params: [ ], name: 'paul', end: 16 }, { type: 'macrocall', start: 16, params: [ ], name: 'george', end: 26 }, { type: 'macrocall', start: 26, params: [ ], name: 'ringo', end: 35 } ] } ] + + ); + expect(parse("text <>")).toEqual( + + [{ type: 'element', tag: 'p', children: [ { type: 'text', text: 'text ' }, { type: 'macrocall', name: 'john', start: 5, params: [ { type: 'macro-parameter', start: 11, value: 'val1', name: 'one', end: 20 }, { type: 'macro-parameter', start: 20, value: 'val "2"', name: 'two', end: 35 }, { type: 'macro-parameter', start: 35, value: 'val \'3\'', name: 'three', end: 52 }, { type: 'macro-parameter', start: 52, value: 'val 4"5\'', name: 'four', end: 73 }, { type: 'macro-parameter', start: 73, value: 'val 5', name: 'five', end: 89 } ], end: 92 } ] } ] + + ); + expect(parse("ignored << carrots <>")).toEqual( + + [ { type: 'element', tag: 'p', children: [ { type: 'text', text: 'ignored << carrots ' }, { type: 'macrocall', name: 'john', start: 19, params: [ ], end: 27 } ] } ] + + ); + expect(parse("text <<>")).toEqual( + + [ { type: 'element', tag: 'p', children: [ { type: 'text', text: 'text ' }, { type: 'macrocall', name: '>")).toEqual( + + [ { type: 'element', tag: 'p', children: [ { type: 'text', text: 'before\n' }, { type: 'macrocall', start: 7, params: [ ], name: 'john', end: 15 } ] } ] + + ); + // A single space will cause it to be inline + expect(parse("<> ")).toEqual( + + [ { type: 'element', tag: 'p', children: [ { type: 'macrocall', start: 0, params: [ ], name: 'john', end: 8 }, { type: 'text', text: ' ' } ] } ] + + ); + expect(parse("text <>' >>")).toEqual( + + [ { type: 'element', tag: 'p', children: [ { type: 'text', text: 'text ' }, { type: 'macrocall', start: 5, params: [ { type: 'macro-parameter', start: 12, value: 'my <>', name: 'one', end: 31 } ], name: 'outie', end: 34 } ] } ] + + ); + + }); + + it("should parse block macro calls", function() { + expect(parse("<>\n<>\r\n<>\n<>")).toEqual( + + [ { type: 'macrocall', start: 0, name: 'john', params: [ ], end: 8, isBlock: true }, { type: 'macrocall', start: 9, name: 'paul', params: [ ], end: 17, isBlock: true }, { type: 'macrocall', start: 19, name: 'george', params: [ ], end: 29, isBlock: true }, { type: 'macrocall', start: 30, name: 'ringo', params: [ ], end: 39, isBlock: true } ] + + ); + expect(parse("<>")).toEqual( + + [ { type: 'macrocall', start: 0, name: 'john', params: [ { type: 'macro-parameter', start: 6, value: 'val1', name: 'one', end: 15 }, { type: 'macro-parameter', start: 15, value: 'val "2"', name: 'two', end: 30 }, { type: 'macro-parameter', start: 30, value: 'val \'3\'', name: 'three', end: 47 }, { type: 'macro-parameter', start: 47, value: 'val 4"5\'', name: 'four', end: 68 }, { type: 'macro-parameter', start: 68, value: 'val 5', name: 'five', end: 84 }], end: 87, isBlock: true } ] + + ); + expect(parse("<< carrots\n\n<>")).toEqual( + + [ { type: 'element', tag: 'p', children: [ { type: 'text', text: '<< carrots' } ] }, { type: 'macrocall', start: 12, params: [ ], name: 'john', end: 20, isBlock: true } ] + + ); + expect(parse("before\n\n<>")).toEqual( + + [ { type: 'element', tag: 'p', children: [ { type: 'text', text: 'before' } ] }, { type: 'macrocall', start: 8, name: 'john', params: [ ], end: 16, isBlock: true } ] + + ); + expect(parse("<>\nafter")).toEqual( + + [ { type: 'macrocall', start: 0, name: 'john', params: [ ], end: 8, isBlock: true }, { type: 'element', tag: 'p', children: [ { type: 'text', text: 'after' } ] } ] + + ); + expect(parse("<>")).toEqual( + + [ { type: 'macrocall', start: 0, params: [ { type: 'macro-parameter', start: 11, value: '\n\nwikitext\n', name: 'arg', end: 33 } ], name: 'multiline', end: 36, isBlock: true }] + + ); + expect(parse("<>' >>")).toEqual( + + [ { type: 'macrocall', start: 0, params: [ { type: 'macro-parameter', start: 7, value: 'my <>', name: 'one', end: 26 } ], name: 'outie', end: 29, isBlock: true } ] + + ); + }); + + it("should parse tricky macrocall parameters", function() { + expect(parse("<am>>")).toEqual( + + [ { type: 'macrocall', start: 0, params: [ { type: 'macro-parameter', start: 6, value: 'pa>am', end: 12 } ], name: 'john', end: 14, isBlock: true } ] + + ); + expect(parse("< >>")).toEqual( + + [ { type: 'macrocall', start: 0, params: [ { type: 'macro-parameter', start: 6, value: 'param>', end: 13 } ], name: 'john', end: 16, isBlock: true } ] + + ); + expect(parse("<>>")).toEqual( + + [ { type: 'element', tag: 'p', children: [ { type: 'macrocall', start: 0, params: [ { type: 'macro-parameter', start: 6, value: 'param', end: 12 } ], name: 'john', end: 14 }, { type: 'text', text: '>' } ] } ] + + ); + // equals signs should be allowed + expect(parse("<=4 >>")).toEqual( + + [ { type: 'macrocall', start: 0, params: [ { type: 'macro-parameter', start: 6, value: 'var>=4', end: 13 } ], name: 'john', end: 16, isBlock: true } ] );