Refactoring implementation of wiki parse rules

And some documentation.
2024-12-24 17:10:29 +00:00 · 2012-12-14 13:31:47 +00:00 · 2012-12-14 13:31:47 +00:00 · 31b283ef36
commit 31b283ef36
parent 28f96de225
12 changed files with 223 additions and 143 deletions
--- a/core/modules/parsers/wikiparser/rules/block/heading.js
+++ b/core/modules/parsers/wikiparser/rules/block/heading.js
@ -12,33 +12,21 @@ Wiki text block rule for headings
 /*global $tw: false */
 "use strict";

-var HeadingRule = function(parser,startPos) {
-	// Save state
-	this.parser = parser;
-	// Regexp to match
-	this.reMatch = /(!{1,6})/mg;
-	// Get the first match
-	this.matchIndex = startPos-1;
-	this.findNextMatch(startPos);
-};
+exports.name = "heading";

-HeadingRule.prototype.findNextMatch = function(startPos) {
-	if(this.matchIndex !== undefined && startPos > this.matchIndex) {
-		this.reMatch.lastIndex = startPos;
-		this.match = this.reMatch.exec(this.parser.source);
-		this.matchIndex = this.match ? this.match.index : undefined;
-	}
-	return this.matchIndex;
+exports.init = function() {
+	// Regexp to match
+	this.matchRegExp = /(!{1,6})/mg;
 };

 /*
 Parse the most recent match
 */
-HeadingRule.prototype.parse = function() {
+exports.parse = function() {
 	// Get all the details of the match
 	var headingLevel = this.match[1].length;
 	// Move past the !s
-	this.parser.pos = this.reMatch.lastIndex;
+	this.parser.pos = this.matchRegExp.lastIndex;
 	// Parse the heading
 	var classedRun = this.parser.parseClassedRun(/(\r?\n)/mg);
 	// Return the heading
@ -51,7 +39,4 @@ HeadingRule.prototype.parse = function() {
 		children: classedRun.tree
 	}];
 };
-
-exports.HeadingRule = HeadingRule;
-
 })();
--- a/core/modules/parsers/wikiparser/rules/block/list.js
+++ b/core/modules/parsers/wikiparser/rules/block/list.js
@ -46,23 +46,11 @@ A CSS class can be applied to a list item as follows:
 /*global $tw: false */
 "use strict";

-var ListRule = function(parser,startPos) {
-	// Save state
-	this.parser = parser;
-	// Regexp to match
-	this.reMatch = /([\\*#;:]+)/mg;
-	// Get the first match
-	this.matchIndex = startPos-1;
-	this.findNextMatch(startPos);
-};
+exports.name = "list";

-ListRule.prototype.findNextMatch = function(startPos) {
-	if(this.matchIndex !== undefined && startPos > this.matchIndex) {
-		this.reMatch.lastIndex = startPos;
-		this.match = this.reMatch.exec(this.parser.source);
-		this.matchIndex = this.match ? this.match.index : undefined;
-	}
-	return this.matchIndex;
+exports.init = function() {
+	// Regexp to match
+	this.matchRegExp = /([\\*#;:]+)/mg;
 };

 var listTypes = {
@ -75,7 +63,7 @@ var listTypes = {
 /*
 Parse the most recent match
 */
-ListRule.prototype.parse = function() {
+exports.parse = function() {
 	// Array of parse tree nodes for the previous row of the list
 	var listStack = [];
 	// Cycle through the items in the list
@ -136,6 +124,4 @@ ListRule.prototype.parse = function() {
 	return [listStack[0]];
 };

-exports.ListRule = ListRule;
-
 })();
--- a/core/modules/parsers/wikiparser/rules/pragma/macrodef.js
+++ b/core/modules/parsers/wikiparser/rules/pragma/macrodef.js
@ -18,34 +18,22 @@ definition text, including $param$ markers
 /*global $tw: false */
 "use strict";

+exports.name = "macrodef";
+
 /*
 Instantiate parse rule
 */
-var MacroDefRule = function(parser,startPos) {
-	// Save state
-	this.parser = parser;
+exports.init = function() {
 	// Regexp to match
-	this.reMatch = /^\\define\s*([^(\s]+)\(\s*([^)]*)\)(\r?\n)?/mg;
-	// Get the first match
-	this.matchIndex = startPos-1;
-	this.findNextMatch(startPos);
-};
-
-MacroDefRule.prototype.findNextMatch = function(startPos) {
-	if(this.matchIndex !== undefined && startPos > this.matchIndex) {
-		this.reMatch.lastIndex = startPos;
-		this.match = this.reMatch.exec(this.parser.source);
-		this.matchIndex = this.match ? this.match.index : undefined;
-	}
-	return this.matchIndex;
+	this.matchRegExp = /^\\define\s*([^(\s]+)\(\s*([^)]*)\)(\r?\n)?/mg;
 };

 /*
 Parse the most recent match
 */
-MacroDefRule.prototype.parse = function() {
+exports.parse = function() {
 	// Move past the macro name and parameters
-	this.parser.pos = this.reMatch.lastIndex;
+	this.parser.pos = this.matchRegExp.lastIndex;
 	// Parse the parameters
 	var paramString = this.match[2],
 		params = [];
@ -93,6 +81,4 @@ MacroDefRule.prototype.parse = function() {
 	};
 };

-exports.MacroDefRule = MacroDefRule;
-
 })();
--- a/core/modules/parsers/wikiparser/rules/run/entity.js
+++ b/core/modules/parsers/wikiparser/rules/run/entity.js
@ -16,37 +16,23 @@ Wiki text run rule for HTML entities. For example:
 /*global $tw: false */
 "use strict";

-var EntityRule = function(parser,startPos) {
-	// Save state
-	this.parser = parser;
-	// Regexp to match
-	this.reMatch = /(&#?[a-zA-Z0-9]{2,8};)/mg;
-	// Get the first match
-	this.matchIndex = startPos-1;
-	this.findNextMatch(startPos);
-};
+exports.name = "entity";

-EntityRule.prototype.findNextMatch = function(startPos) {
-	if(this.matchIndex !== undefined && startPos > this.matchIndex) {
-		this.reMatch.lastIndex = startPos;
-		this.match = this.reMatch.exec(this.parser.source);
-		this.matchIndex = this.match ? this.match.index : undefined;
-	}
-	return this.matchIndex;
+exports.init = function() {
+	// Regexp to match
+	this.matchRegExp = /(&#?[a-zA-Z0-9]{2,8};)/mg;
 };

 /*
 Parse the most recent match
 */
-EntityRule.prototype.parse = function() {
+exports.parse = function() {
 	// Get all the details of the match
 	var entityString = this.match[1];
 	// Move past the macro call
-	this.parser.pos = this.reMatch.lastIndex;
+	this.parser.pos = this.matchRegExp.lastIndex;
 	// Return the entity
 	return [{type: "entity", entity: this.match[0]}];
 };

-exports.EntityRule = EntityRule;
-
 })();
--- a/core/modules/parsers/wikiparser/rules/run/html.js
+++ b/core/modules/parsers/wikiparser/rules/run/html.js
@ -23,38 +23,26 @@ This is a widget invocation
 /*global $tw: false */
 "use strict";

+exports.name = "html";
+
 var voidElements = "area,base,br,col,command,embed,hr,img,input,keygen,link,meta,param,source,track,wbr".split(",");

-var HtmlRule = function(parser,startPos) {
-	// Save state
-	this.parser = parser;
+exports.init = function() {
 	// Regexp to match
-	this.reMatch = /<(_)?([A-Za-z]+)(\s*[^>]*?)(\/)?>/mg;
-	// Get the first match
-	this.matchIndex = startPos-1;
-	this.findNextMatch(startPos);
-};
-
-HtmlRule.prototype.findNextMatch = function(startPos) {
-	if(this.matchIndex !== undefined && startPos > this.matchIndex) {
-		this.reMatch.lastIndex = startPos;
-		this.match = this.reMatch.exec(this.parser.source);
-		this.matchIndex = this.match ? this.match.index : undefined;
-	}
-	return this.matchIndex;
+	this.matchRegExp = /<(_)?([A-Za-z]+)(\s*[^>]*?)(\/)?>/mg;
 };

 /*
 Parse the most recent match
 */
-HtmlRule.prototype.parse = function() {
+exports.parse = function() {
 	// Get all the details of the match in case this parser is called recursively
 	var isWidget = !!this.match[1],
 		tagName = this.match[2],
 		attributeString = this.match[3],
 		isSelfClosing = !!this.match[4];
 	// Move past the tag name and parameters
-	this.parser.pos = this.reMatch.lastIndex;
+	this.parser.pos = this.matchRegExp.lastIndex;
 	var reLineBreak = /(\r?\n)/mg,
 		reAttr = /\s*([A-Za-z\-_]+)(?:\s*=\s*(?:("[^"]*")|('[^']*')|(\{\{[^\}]*\}\})|([^"'\s]+)))?/mg,
 		isBlock;
@ -108,6 +96,4 @@ HtmlRule.prototype.parse = function() {
 	return [element];
 };

-exports.HtmlRule = HtmlRule;
-
 })();
--- a/core/modules/parsers/wikiparser/rules/run/macrocall.js
+++ b/core/modules/parsers/wikiparser/rules/run/macrocall.js
@ -16,34 +16,22 @@ Wiki rule for macro calls
 /*global $tw: false */
 "use strict";

-var MacroCallRule = function(parser,startPos) {
-	// Save state
-	this.parser = parser;
-	// Regexp to match
-	this.reMatch = /<<([^\s>]+)\s*([\s\S]*?)>>/mg;
-	// Get the first match
-	this.matchIndex = startPos-1;
-	this.findNextMatch(startPos);
-};
+exports.name = "macrocall";

-MacroCallRule.prototype.findNextMatch = function(startPos) {
-	if(this.matchIndex !== undefined && startPos > this.matchIndex) {
-		this.reMatch.lastIndex = startPos;
-		this.match = this.reMatch.exec(this.parser.source);
-		this.matchIndex = this.match ? this.match.index : undefined;
-	}
-	return this.matchIndex;
+exports.init = function() {
+	// Regexp to match
+	this.matchRegExp = /<<([^\s>]+)\s*([\s\S]*?)>>/mg;
 };

 /*
 Parse the most recent match
 */
-MacroCallRule.prototype.parse = function() {
+exports.parse = function() {
 	// Get all the details of the match
 	var macroName = this.match[1],
 		paramString = this.match[2];
 	// Move past the macro call
-	this.parser.pos = this.reMatch.lastIndex;
+	this.parser.pos = this.matchRegExp.lastIndex;
 	var params = [],
 		reParam = /\s*(?:([A-Za-z0-9\-_]+)\s*:)?(?:\s*(?:"([^"]*)"|'([^']*)'|\[\[([^\]]*)\]\]|([^"'\s]+)))/mg,
 		paramMatch = reParam.exec(paramString);
@ -66,6 +54,4 @@ MacroCallRule.prototype.parse = function() {
 	}];
 };

-exports.MacroCallRule = MacroCallRule;
-
 })();
--- a/core/modules/parsers/wikiparser/rules/run/wikilink.js
+++ b/core/modules/parsers/wikiparser/rules/run/wikilink.js
@ -0,0 +1,77 @@
+/*\
+title: $:/core/modules/parsers/wikiparser/rules/run/wikilink.js
+type: application/javascript
+module-type: wikirunrule
+
+Wiki text run rule for wiki links. For example:
+
+{{{
+AWikiLink
+AnotherLink
+~SuppressedLink
+}}}
+
+Precede a camel case word with `~` to prevent it from being recognised as a link.
+
+\*/
+(function(){
+
+/*jslint node: true, browser: true */
+/*global $tw: false */
+"use strict";
+
+exports.name = "wikilink";
+
+var textPrimitives = {
+	upperLetter: "[A-Z\u00c0-\u00de\u0150\u0170]",
+	lowerLetter: "[a-z0-9_\\-\u00df-\u00ff\u0151\u0171]",
+	anyLetter:   "[A-Za-z0-9_\\-\u00c0-\u00de\u00df-\u00ff\u0150\u0170\u0151\u0171]",
+	anyLetterStrict: "[A-Za-z0-9\u00c0-\u00de\u00df-\u00ff\u0150\u0170\u0151\u0171]"
+};
+
+textPrimitives.unWikiLink = "~";
+textPrimitives.wikiLink = textPrimitives.upperLetter + "+" +
+	textPrimitives.lowerLetter + "+" +
+	textPrimitives.upperLetter +
+	textPrimitives.anyLetter + "*";
+
+exports.init = function() {
+	// Regexp to match
+	this.matchRegExp = new RegExp(textPrimitives.unWikiLink + "?" + textPrimitives.wikiLink,"mg");
+};
+
+/*
+Parse the most recent match
+*/
+exports.parse = function() {
+	// Get the details of the match
+	var linkText = this.match[0];
+	// Move past the macro call
+	this.parser.pos = this.matchRegExp.lastIndex;
+	// If the link starts with the unwikilink character then just output it as plain text
+	if(linkText.substr(0,1) === textPrimitives.unWikiLink) {
+		return [{type: "text", text: linkText.substr(1)}];
+	}
+	// If the link has been preceded with a letter then don't treat it as a link
+	if(this.match.index > 0) {
+		var preRegExp = new RegExp(textPrimitives.anyLetterStrict,"mg");
+		preRegExp.lastIndex = this.match.index-1;
+		var preMatch = preRegExp.exec(this.parser.source);
+		if(preMatch && preMatch.index === this.match.index-1) {
+			return [{type: "text", text: linkText}];
+		}
+	}
+	return [{
+		type: "widget",
+		tag: "link",
+		attributes: {
+			to: {type: "string", value: linkText}
+		},
+		children: [{
+			type: "text",
+			text: linkText
+		}]
+	}];
+};
+
+})();
--- a/core/modules/parsers/wikiparser/rules/wikirule.js
+++ b/core/modules/parsers/wikiparser/rules/wikirule.js
@ -0,0 +1,35 @@
+/*\
+title: $:/core/modules/parsers/wikiparser/rules/wikirule.js
+type: application/javascript
+module-type: global
+
+Base class for wiki parser rules
+
+\*/
+(function(){
+
+/*jslint node: true, browser: true */
+/*global $tw: false */
+"use strict";
+
+var WikiRuleDefaultProperties = {};
+
+/*
+To be overridden by individual rules
+*/
+WikiRuleDefaultProperties.init = function() {
+
+};
+
+/*
+Default implementation of findNextMatch looks uses RegExp matching
+*/
+WikiRuleDefaultProperties.findNextMatch = function(startPos) {
+	this.matchRegExp.lastIndex = startPos;
+	this.match = this.matchRegExp.exec(this.parser.source);
+	return this.match ? this.match.index : undefined;
+};
+
+exports.WikiRuleDefaultProperties = WikiRuleDefaultProperties;
+
+})();
--- a/core/modules/parsers/wikiparser/wikiparser.js
+++ b/core/modules/parsers/wikiparser/wikiparser.js
@ -38,12 +38,12 @@ var WikiParser = function(vocabulary,type,text,options) {
 	// Initialise the things that pragma rules can change
 	this.macroDefinitions = {}; // Hash map of macro definitions
 	// Instantiate the pragma parse rules
-	this.pragmaRules = this.instantiateRules(this.vocabulary.pragmaRuleClasses,0);
+	this.pragmaRules = this.instantiateRules(this.vocabulary.pragmaRules,0);
 	// Parse any pragmas
 	this.parsePragmas();
 	// Instantiate the parser block and run rules
-	this.blockRules = this.instantiateRules(this.vocabulary.blockRuleClasses,this.pos);
-	this.runRules = this.instantiateRules(this.vocabulary.runRuleClasses,this.pos);
+	this.blockRules = this.instantiateRules(this.vocabulary.blockRules,this.pos);
+	this.runRules = this.instantiateRules(this.vocabulary.runRules,this.pos);
 	// Parse the text into runs or blocks
 	if(this.type === "text/vnd.tiddlywiki-run") {
 		this.tree = this.parseRun();
@ -56,17 +56,21 @@ var WikiParser = function(vocabulary,type,text,options) {
 Instantiate an array of parse rules
 */
 WikiParser.prototype.instantiateRules = function(classes,startPos) {
-	var rules = [],
+	var rulesInfo = [],
 		self = this;
 	$tw.utils.each(classes,function(RuleClass) {
 		// Instantiate the rule
-		var rule = new RuleClass(self,startPos);
-		// Only save the rule if there is at least one match
-		if(rule.matchIndex !== undefined) {
-			rules.push(rule);
+		var rule = new RuleClass(self);
+		rule.init();
+		var matchIndex = rule.findNextMatch(startPos);
+		if(matchIndex !== undefined) {
+			rulesInfo.push({
+				rule: rule,
+				matchIndex: matchIndex
+			});
 		}
 	});
-	return rules;
+	return rulesInfo;
 };

 /*
@ -87,16 +91,23 @@ WikiParser.prototype.skipWhitespace = function(options) {
 Get the next match out of an array of parse rule instances
 */
 WikiParser.prototype.findNextMatch = function(rules,startPos) {
-	var nextMatch = undefined,
-		nextMatchPos = this.sourceLength;
+	// Find the best matching rule by finding the closest match position
+	var matchingRule = undefined,
+		matchingRulePos = this.sourceLength;
+	// Step through each rule
 	for(var t=0; t<rules.length; t++) {
-		var matchPos = rules[t].findNextMatch(startPos);
-		if(matchPos !== undefined && matchPos <= nextMatchPos) {
-			nextMatch = rules[t];
-			nextMatchPos = matchPos;
+		var ruleInfo = rules[t];
+		// Ask the rule to get the next match if we've moved past the current one
+		if(ruleInfo.matchIndex !== undefined  && ruleInfo.matchIndex < startPos) {
+			ruleInfo.matchIndex = ruleInfo.rule.findNextMatch(startPos);
+		}
+		// Adopt this match if it's closer than the current best match
+		if(ruleInfo.matchIndex !== undefined && ruleInfo.matchIndex <= matchingRulePos) {
+			matchingRule = ruleInfo;
+			matchingRulePos = ruleInfo.matchIndex;
 		}
 	}
-	return nextMatch;
+	return matchingRule;
 };

 /*
@ -117,7 +128,7 @@ WikiParser.prototype.parsePragmas = function() {
 			return;
 		}
 		// Process the pragma rule
-		nextMatch.parse();
+		nextMatch.rule.parse();
 	}
 };

@ -134,7 +145,7 @@ WikiParser.prototype.parseBlock = function(terminatorRegExpString) {
 	// Look for a block rule that applies at the current position
 	var nextMatch = this.findNextMatch(this.blockRules,this.pos);
 	if(nextMatch && nextMatch.matchIndex === this.pos) {
-		return nextMatch.parse();
+		return nextMatch.rule.parse();
 	}
 	// Treat it as a paragraph if we didn't find a block rule
 	return [{type: "element", tag: "p", children: this.parseRun(terminatorRegExp)}];
@ -214,7 +225,7 @@ WikiParser.prototype.parseRunUnterminated = function() {
 			this.pos = nextMatch.matchIndex;
 		}
 		// Process the run rule
-		tree.push.apply(tree,nextMatch.parse());
+		tree.push.apply(tree,nextMatch.rule.parse());
 		// Look for the next run rule
 		nextMatch = this.findNextMatch(this.runRules,this.pos);
 	}
@ -253,7 +264,7 @@ WikiParser.prototype.parseRunTerminated = function(terminatorRegExp) {
 				this.pos = runRuleMatch.matchIndex;
 			}
 			// Process the run rule
-			tree.push.apply(tree,runRuleMatch.parse());
+			tree.push.apply(tree,runRuleMatch.rule.parse());
 			// Look for the next run rule
 			runRuleMatch = this.findNextMatch(this.runRules,this.pos);
 			// Look for the next terminator match
--- a/core/modules/wikivocabulary.js
+++ b/core/modules/wikivocabulary.js
@ -13,15 +13,27 @@ module-type: global
 var WikiVocabulary = function(options) {
 	this.wiki = options.wiki;
 	// Hashmaps of the various parse rule classes
-	this.pragmaRuleClasses = $tw.modules.applyMethods("wikipragmarule");
-	this.blockRuleClasses = $tw.modules.applyMethods("wikiblockrule");
-	this.runRuleClasses = $tw.modules.applyMethods("wikirunrule");
+	this.pragmaRules = this.createRuleClasses("wikipragmarule");
+	this.blockRules = this.createRuleClasses("wikiblockrule");
+	this.runRules = this.createRuleClasses("wikirunrule");
 	// Hashmap of the various renderer classes
 	this.rendererClasses = $tw.modules.applyMethods("wikirenderer");
 	// Hashmap of the available widgets
 	this.widgetClasses = $tw.modules.applyMethods("widget");
 };

+WikiVocabulary.prototype.createRuleClasses = function(moduleType) {
+	var ruleClasses = {};
+	$tw.modules.forEachModuleOfType(moduleType,function(title,moduleExports) {
+		var ruleClass = function(parser) {
+			this.parser = parser;
+		}
+		$tw.utils.extend(ruleClass.prototype,$tw.WikiRuleDefaultProperties,moduleExports);
+		ruleClasses[moduleExports.name] = ruleClass;
+	});
+	return ruleClasses;
+};
+
 WikiVocabulary.prototype.parseText = function(type,text) {
 	return new $tw.WikiParser(this,type,text,{wiki: this.wiki});
 };
--- a/editions/tw5.com/tiddlers/moduletypes/WidgetModules.tid
+++ b/editions/tw5.com/tiddlers/moduletypes/WidgetModules.tid
@ -0,0 +1,2 @@
+title: WidgetModules
+
--- a/editions/tw5.com/tiddlers/moduletypes/WikiRuleModules.tid
+++ b/editions/tw5.com/tiddlers/moduletypes/WikiRuleModules.tid
@ -0,0 +1,28 @@
+title: WikiRuleModules
+
+WikiRuleModules cover the module types 'wikirunrule', 'wikiblockrule' and `wikipragmarule`. Modules of these types encapsulate the logic of individual parsing rules used by the WikiParser engine. For example, there is a `wikirunrule` module that identifies references to HTML entities by matching the pattern `&<chars>;`.
+
+Pragma rules are applied at the start of a block of text, and cover definitions and declarations that affect the parsing of the rest of the text. Block rules are only applied at the beginning of a block of wikitext, while run rules can appear anywhere. The only current example of a pragma rule is for macro definitions.
+
+Examples of block rules:
+
+* Headings
+* Tables
+* Lists
+
+Examples of run rules:
+
+* Entities
+* HTML tags
+* Wiki links
+
+Parser rule modules extend the `$tw.WikiParserRule` class. This is done by instantiating the class and then copying the exports of the rule module onto the instance. In this way, the parser rule can override the base behaviour of the `$tw.WikiParserRule` class. In particular, the base class incorporates logic for using regular expressions to match parse rules but this logic could be overridden by a parse rule that wanted to, say, use `indexOf()` instead of regular expressions.
+
+The standard methods and properties of parser rules are as follows:
+
+* `parser`: automatically generated property pointing back to the parser containing this rule
+* `init()`: initialisation function called immediately after the constructor
+* `findNextMatch(pos)`: returns the position of the next match after the specified position
+* `parse()`: parses the most recent match, returning an array of the generated parse tree nodes. Pragma rules don't return parse tree nodes but instead modify the parser object directly (for example, to add local macro definitions)
+
+The built in parser rules use regular expression matching. Such rules can take advantage of the implementation of `findNextMatch()` in the base `$tw.WikiParserRule` class by ensuring that their `init()` method creates a `matchRegExp` property containing the regular expression to match. The `match` property contains the details of the match for use in the `parse()` method.