diff --git a/core/modules/parsers/wikiparser/rules/block/heading.js b/core/modules/parsers/wikiparser/rules/block/heading.js index e05da0338..c2277a265 100644 --- a/core/modules/parsers/wikiparser/rules/block/heading.js +++ b/core/modules/parsers/wikiparser/rules/block/heading.js @@ -12,33 +12,21 @@ Wiki text block rule for headings /*global $tw: false */ "use strict"; -var HeadingRule = function(parser,startPos) { - // Save state - this.parser = parser; - // Regexp to match - this.reMatch = /(!{1,6})/mg; - // Get the first match - this.matchIndex = startPos-1; - this.findNextMatch(startPos); -}; +exports.name = "heading"; -HeadingRule.prototype.findNextMatch = function(startPos) { - if(this.matchIndex !== undefined && startPos > this.matchIndex) { - this.reMatch.lastIndex = startPos; - this.match = this.reMatch.exec(this.parser.source); - this.matchIndex = this.match ? this.match.index : undefined; - } - return this.matchIndex; +exports.init = function() { + // Regexp to match + this.matchRegExp = /(!{1,6})/mg; }; /* Parse the most recent match */ -HeadingRule.prototype.parse = function() { +exports.parse = function() { // Get all the details of the match var headingLevel = this.match[1].length; // Move past the !s - this.parser.pos = this.reMatch.lastIndex; + this.parser.pos = this.matchRegExp.lastIndex; // Parse the heading var classedRun = this.parser.parseClassedRun(/(\r?\n)/mg); // Return the heading @@ -51,7 +39,4 @@ HeadingRule.prototype.parse = function() { children: classedRun.tree }]; }; - -exports.HeadingRule = HeadingRule; - })(); diff --git a/core/modules/parsers/wikiparser/rules/block/list.js b/core/modules/parsers/wikiparser/rules/block/list.js index 27834f154..2e4ce6952 100644 --- a/core/modules/parsers/wikiparser/rules/block/list.js +++ b/core/modules/parsers/wikiparser/rules/block/list.js @@ -46,23 +46,11 @@ A CSS class can be applied to a list item as follows: /*global $tw: false */ "use strict"; -var ListRule = function(parser,startPos) { - // Save state - this.parser = parser; - // Regexp to match - this.reMatch = /([\\*#;:]+)/mg; - // Get the first match - this.matchIndex = startPos-1; - this.findNextMatch(startPos); -}; +exports.name = "list"; -ListRule.prototype.findNextMatch = function(startPos) { - if(this.matchIndex !== undefined && startPos > this.matchIndex) { - this.reMatch.lastIndex = startPos; - this.match = this.reMatch.exec(this.parser.source); - this.matchIndex = this.match ? this.match.index : undefined; - } - return this.matchIndex; +exports.init = function() { + // Regexp to match + this.matchRegExp = /([\\*#;:]+)/mg; }; var listTypes = { @@ -75,7 +63,7 @@ var listTypes = { /* Parse the most recent match */ -ListRule.prototype.parse = function() { +exports.parse = function() { // Array of parse tree nodes for the previous row of the list var listStack = []; // Cycle through the items in the list @@ -136,6 +124,4 @@ ListRule.prototype.parse = function() { return [listStack[0]]; }; -exports.ListRule = ListRule; - })(); diff --git a/core/modules/parsers/wikiparser/rules/pragma/macrodef.js b/core/modules/parsers/wikiparser/rules/pragma/macrodef.js index 11ca07dae..c90d351a9 100644 --- a/core/modules/parsers/wikiparser/rules/pragma/macrodef.js +++ b/core/modules/parsers/wikiparser/rules/pragma/macrodef.js @@ -18,34 +18,22 @@ definition text, including $param$ markers /*global $tw: false */ "use strict"; +exports.name = "macrodef"; + /* Instantiate parse rule */ -var MacroDefRule = function(parser,startPos) { - // Save state - this.parser = parser; +exports.init = function() { // Regexp to match - this.reMatch = /^\\define\s*([^(\s]+)\(\s*([^)]*)\)(\r?\n)?/mg; - // Get the first match - this.matchIndex = startPos-1; - this.findNextMatch(startPos); -}; - -MacroDefRule.prototype.findNextMatch = function(startPos) { - if(this.matchIndex !== undefined && startPos > this.matchIndex) { - this.reMatch.lastIndex = startPos; - this.match = this.reMatch.exec(this.parser.source); - this.matchIndex = this.match ? this.match.index : undefined; - } - return this.matchIndex; + this.matchRegExp = /^\\define\s*([^(\s]+)\(\s*([^)]*)\)(\r?\n)?/mg; }; /* Parse the most recent match */ -MacroDefRule.prototype.parse = function() { +exports.parse = function() { // Move past the macro name and parameters - this.parser.pos = this.reMatch.lastIndex; + this.parser.pos = this.matchRegExp.lastIndex; // Parse the parameters var paramString = this.match[2], params = []; @@ -93,6 +81,4 @@ MacroDefRule.prototype.parse = function() { }; }; -exports.MacroDefRule = MacroDefRule; - })(); diff --git a/core/modules/parsers/wikiparser/rules/run/entity.js b/core/modules/parsers/wikiparser/rules/run/entity.js index 6d02c65a7..6d7a6acf8 100644 --- a/core/modules/parsers/wikiparser/rules/run/entity.js +++ b/core/modules/parsers/wikiparser/rules/run/entity.js @@ -16,37 +16,23 @@ Wiki text run rule for HTML entities. For example: /*global $tw: false */ "use strict"; -var EntityRule = function(parser,startPos) { - // Save state - this.parser = parser; - // Regexp to match - this.reMatch = /(&#?[a-zA-Z0-9]{2,8};)/mg; - // Get the first match - this.matchIndex = startPos-1; - this.findNextMatch(startPos); -}; +exports.name = "entity"; -EntityRule.prototype.findNextMatch = function(startPos) { - if(this.matchIndex !== undefined && startPos > this.matchIndex) { - this.reMatch.lastIndex = startPos; - this.match = this.reMatch.exec(this.parser.source); - this.matchIndex = this.match ? this.match.index : undefined; - } - return this.matchIndex; +exports.init = function() { + // Regexp to match + this.matchRegExp = /(&#?[a-zA-Z0-9]{2,8};)/mg; }; /* Parse the most recent match */ -EntityRule.prototype.parse = function() { +exports.parse = function() { // Get all the details of the match var entityString = this.match[1]; // Move past the macro call - this.parser.pos = this.reMatch.lastIndex; + this.parser.pos = this.matchRegExp.lastIndex; // Return the entity return [{type: "entity", entity: this.match[0]}]; }; -exports.EntityRule = EntityRule; - })(); diff --git a/core/modules/parsers/wikiparser/rules/run/html.js b/core/modules/parsers/wikiparser/rules/run/html.js index 8270867da..afc4e6690 100644 --- a/core/modules/parsers/wikiparser/rules/run/html.js +++ b/core/modules/parsers/wikiparser/rules/run/html.js @@ -23,38 +23,26 @@ This is a widget invocation /*global $tw: false */ "use strict"; +exports.name = "html"; + var voidElements = "area,base,br,col,command,embed,hr,img,input,keygen,link,meta,param,source,track,wbr".split(","); -var HtmlRule = function(parser,startPos) { - // Save state - this.parser = parser; +exports.init = function() { // Regexp to match - this.reMatch = /<(_)?([A-Za-z]+)(\s*[^>]*?)(\/)?>/mg; - // Get the first match - this.matchIndex = startPos-1; - this.findNextMatch(startPos); -}; - -HtmlRule.prototype.findNextMatch = function(startPos) { - if(this.matchIndex !== undefined && startPos > this.matchIndex) { - this.reMatch.lastIndex = startPos; - this.match = this.reMatch.exec(this.parser.source); - this.matchIndex = this.match ? this.match.index : undefined; - } - return this.matchIndex; + this.matchRegExp = /<(_)?([A-Za-z]+)(\s*[^>]*?)(\/)?>/mg; }; /* Parse the most recent match */ -HtmlRule.prototype.parse = function() { +exports.parse = function() { // Get all the details of the match in case this parser is called recursively var isWidget = !!this.match[1], tagName = this.match[2], attributeString = this.match[3], isSelfClosing = !!this.match[4]; // Move past the tag name and parameters - this.parser.pos = this.reMatch.lastIndex; + this.parser.pos = this.matchRegExp.lastIndex; var reLineBreak = /(\r?\n)/mg, reAttr = /\s*([A-Za-z\-_]+)(?:\s*=\s*(?:("[^"]*")|('[^']*')|(\{\{[^\}]*\}\})|([^"'\s]+)))?/mg, isBlock; @@ -108,6 +96,4 @@ HtmlRule.prototype.parse = function() { return [element]; }; -exports.HtmlRule = HtmlRule; - })(); diff --git a/core/modules/parsers/wikiparser/rules/run/macrocall.js b/core/modules/parsers/wikiparser/rules/run/macrocall.js index f9c45d46c..21b4ae39b 100644 --- a/core/modules/parsers/wikiparser/rules/run/macrocall.js +++ b/core/modules/parsers/wikiparser/rules/run/macrocall.js @@ -16,34 +16,22 @@ Wiki rule for macro calls /*global $tw: false */ "use strict"; -var MacroCallRule = function(parser,startPos) { - // Save state - this.parser = parser; - // Regexp to match - this.reMatch = /<<([^\s>]+)\s*([\s\S]*?)>>/mg; - // Get the first match - this.matchIndex = startPos-1; - this.findNextMatch(startPos); -}; +exports.name = "macrocall"; -MacroCallRule.prototype.findNextMatch = function(startPos) { - if(this.matchIndex !== undefined && startPos > this.matchIndex) { - this.reMatch.lastIndex = startPos; - this.match = this.reMatch.exec(this.parser.source); - this.matchIndex = this.match ? this.match.index : undefined; - } - return this.matchIndex; +exports.init = function() { + // Regexp to match + this.matchRegExp = /<<([^\s>]+)\s*([\s\S]*?)>>/mg; }; /* Parse the most recent match */ -MacroCallRule.prototype.parse = function() { +exports.parse = function() { // Get all the details of the match var macroName = this.match[1], paramString = this.match[2]; // Move past the macro call - this.parser.pos = this.reMatch.lastIndex; + this.parser.pos = this.matchRegExp.lastIndex; var params = [], reParam = /\s*(?:([A-Za-z0-9\-_]+)\s*:)?(?:\s*(?:"([^"]*)"|'([^']*)'|\[\[([^\]]*)\]\]|([^"'\s]+)))/mg, paramMatch = reParam.exec(paramString); @@ -66,6 +54,4 @@ MacroCallRule.prototype.parse = function() { }]; }; -exports.MacroCallRule = MacroCallRule; - })(); diff --git a/core/modules/parsers/wikiparser/rules/run/wikilink.js b/core/modules/parsers/wikiparser/rules/run/wikilink.js new file mode 100644 index 000000000..b719a5315 --- /dev/null +++ b/core/modules/parsers/wikiparser/rules/run/wikilink.js @@ -0,0 +1,77 @@ +/*\ +title: $:/core/modules/parsers/wikiparser/rules/run/wikilink.js +type: application/javascript +module-type: wikirunrule + +Wiki text run rule for wiki links. For example: + +{{{ +AWikiLink +AnotherLink +~SuppressedLink +}}} + +Precede a camel case word with `~` to prevent it from being recognised as a link. + +\*/ +(function(){ + +/*jslint node: true, browser: true */ +/*global $tw: false */ +"use strict"; + +exports.name = "wikilink"; + +var textPrimitives = { + upperLetter: "[A-Z\u00c0-\u00de\u0150\u0170]", + lowerLetter: "[a-z0-9_\\-\u00df-\u00ff\u0151\u0171]", + anyLetter: "[A-Za-z0-9_\\-\u00c0-\u00de\u00df-\u00ff\u0150\u0170\u0151\u0171]", + anyLetterStrict: "[A-Za-z0-9\u00c0-\u00de\u00df-\u00ff\u0150\u0170\u0151\u0171]" +}; + +textPrimitives.unWikiLink = "~"; +textPrimitives.wikiLink = textPrimitives.upperLetter + "+" + + textPrimitives.lowerLetter + "+" + + textPrimitives.upperLetter + + textPrimitives.anyLetter + "*"; + +exports.init = function() { + // Regexp to match + this.matchRegExp = new RegExp(textPrimitives.unWikiLink + "?" + textPrimitives.wikiLink,"mg"); +}; + +/* +Parse the most recent match +*/ +exports.parse = function() { + // Get the details of the match + var linkText = this.match[0]; + // Move past the macro call + this.parser.pos = this.matchRegExp.lastIndex; + // If the link starts with the unwikilink character then just output it as plain text + if(linkText.substr(0,1) === textPrimitives.unWikiLink) { + return [{type: "text", text: linkText.substr(1)}]; + } + // If the link has been preceded with a letter then don't treat it as a link + if(this.match.index > 0) { + var preRegExp = new RegExp(textPrimitives.anyLetterStrict,"mg"); + preRegExp.lastIndex = this.match.index-1; + var preMatch = preRegExp.exec(this.parser.source); + if(preMatch && preMatch.index === this.match.index-1) { + return [{type: "text", text: linkText}]; + } + } + return [{ + type: "widget", + tag: "link", + attributes: { + to: {type: "string", value: linkText} + }, + children: [{ + type: "text", + text: linkText + }] + }]; +}; + +})(); diff --git a/core/modules/parsers/wikiparser/rules/wikirule.js b/core/modules/parsers/wikiparser/rules/wikirule.js new file mode 100644 index 000000000..f8427881d --- /dev/null +++ b/core/modules/parsers/wikiparser/rules/wikirule.js @@ -0,0 +1,35 @@ +/*\ +title: $:/core/modules/parsers/wikiparser/rules/wikirule.js +type: application/javascript +module-type: global + +Base class for wiki parser rules + +\*/ +(function(){ + +/*jslint node: true, browser: true */ +/*global $tw: false */ +"use strict"; + +var WikiRuleDefaultProperties = {}; + +/* +To be overridden by individual rules +*/ +WikiRuleDefaultProperties.init = function() { + +}; + +/* +Default implementation of findNextMatch looks uses RegExp matching +*/ +WikiRuleDefaultProperties.findNextMatch = function(startPos) { + this.matchRegExp.lastIndex = startPos; + this.match = this.matchRegExp.exec(this.parser.source); + return this.match ? this.match.index : undefined; +}; + +exports.WikiRuleDefaultProperties = WikiRuleDefaultProperties; + +})(); diff --git a/core/modules/parsers/wikiparser/wikiparser.js b/core/modules/parsers/wikiparser/wikiparser.js index af63ed004..e133b475c 100644 --- a/core/modules/parsers/wikiparser/wikiparser.js +++ b/core/modules/parsers/wikiparser/wikiparser.js @@ -38,12 +38,12 @@ var WikiParser = function(vocabulary,type,text,options) { // Initialise the things that pragma rules can change this.macroDefinitions = {}; // Hash map of macro definitions // Instantiate the pragma parse rules - this.pragmaRules = this.instantiateRules(this.vocabulary.pragmaRuleClasses,0); + this.pragmaRules = this.instantiateRules(this.vocabulary.pragmaRules,0); // Parse any pragmas this.parsePragmas(); // Instantiate the parser block and run rules - this.blockRules = this.instantiateRules(this.vocabulary.blockRuleClasses,this.pos); - this.runRules = this.instantiateRules(this.vocabulary.runRuleClasses,this.pos); + this.blockRules = this.instantiateRules(this.vocabulary.blockRules,this.pos); + this.runRules = this.instantiateRules(this.vocabulary.runRules,this.pos); // Parse the text into runs or blocks if(this.type === "text/vnd.tiddlywiki-run") { this.tree = this.parseRun(); @@ -56,17 +56,21 @@ var WikiParser = function(vocabulary,type,text,options) { Instantiate an array of parse rules */ WikiParser.prototype.instantiateRules = function(classes,startPos) { - var rules = [], + var rulesInfo = [], self = this; $tw.utils.each(classes,function(RuleClass) { // Instantiate the rule - var rule = new RuleClass(self,startPos); - // Only save the rule if there is at least one match - if(rule.matchIndex !== undefined) { - rules.push(rule); + var rule = new RuleClass(self); + rule.init(); + var matchIndex = rule.findNextMatch(startPos); + if(matchIndex !== undefined) { + rulesInfo.push({ + rule: rule, + matchIndex: matchIndex + }); } }); - return rules; + return rulesInfo; }; /* @@ -87,16 +91,23 @@ WikiParser.prototype.skipWhitespace = function(options) { Get the next match out of an array of parse rule instances */ WikiParser.prototype.findNextMatch = function(rules,startPos) { - var nextMatch = undefined, - nextMatchPos = this.sourceLength; + // Find the best matching rule by finding the closest match position + var matchingRule = undefined, + matchingRulePos = this.sourceLength; + // Step through each rule for(var t=0; t;`. + +Pragma rules are applied at the start of a block of text, and cover definitions and declarations that affect the parsing of the rest of the text. Block rules are only applied at the beginning of a block of wikitext, while run rules can appear anywhere. The only current example of a pragma rule is for macro definitions. + +Examples of block rules: + +* Headings +* Tables +* Lists + +Examples of run rules: + +* Entities +* HTML tags +* Wiki links + +Parser rule modules extend the `$tw.WikiParserRule` class. This is done by instantiating the class and then copying the exports of the rule module onto the instance. In this way, the parser rule can override the base behaviour of the `$tw.WikiParserRule` class. In particular, the base class incorporates logic for using regular expressions to match parse rules but this logic could be overridden by a parse rule that wanted to, say, use `indexOf()` instead of regular expressions. + +The standard methods and properties of parser rules are as follows: + +* `parser`: automatically generated property pointing back to the parser containing this rule +* `init()`: initialisation function called immediately after the constructor +* `findNextMatch(pos)`: returns the position of the next match after the specified position +* `parse()`: parses the most recent match, returning an array of the generated parse tree nodes. Pragma rules don't return parse tree nodes but instead modify the parser object directly (for example, to add local macro definitions) + +The built in parser rules use regular expression matching. Such rules can take advantage of the implementation of `findNextMatch()` in the base `$tw.WikiParserRule` class by ensuring that their `init()` method creates a `matchRegExp` property containing the regular expression to match. The `match` property contains the details of the match for use in the `parse()` method.