Refactoring implementation of wiki parse rules

And some documentation.
This commit is contained in:
Jeremy Ruston 2012-12-14 13:31:47 +00:00
parent 28f96de225
commit 31b283ef36
12 changed files with 223 additions and 143 deletions

View File

@ -12,33 +12,21 @@ Wiki text block rule for headings
/*global $tw: false */
"use strict";
var HeadingRule = function(parser,startPos) {
// Save state
this.parser = parser;
// Regexp to match
this.reMatch = /(!{1,6})/mg;
// Get the first match
this.matchIndex = startPos-1;
this.findNextMatch(startPos);
};
exports.name = "heading";
HeadingRule.prototype.findNextMatch = function(startPos) {
if(this.matchIndex !== undefined && startPos > this.matchIndex) {
this.reMatch.lastIndex = startPos;
this.match = this.reMatch.exec(this.parser.source);
this.matchIndex = this.match ? this.match.index : undefined;
}
return this.matchIndex;
exports.init = function() {
// Regexp to match
this.matchRegExp = /(!{1,6})/mg;
};
/*
Parse the most recent match
*/
HeadingRule.prototype.parse = function() {
exports.parse = function() {
// Get all the details of the match
var headingLevel = this.match[1].length;
// Move past the !s
this.parser.pos = this.reMatch.lastIndex;
this.parser.pos = this.matchRegExp.lastIndex;
// Parse the heading
var classedRun = this.parser.parseClassedRun(/(\r?\n)/mg);
// Return the heading
@ -51,7 +39,4 @@ HeadingRule.prototype.parse = function() {
children: classedRun.tree
}];
};
exports.HeadingRule = HeadingRule;
})();

View File

@ -46,23 +46,11 @@ A CSS class can be applied to a list item as follows:
/*global $tw: false */
"use strict";
var ListRule = function(parser,startPos) {
// Save state
this.parser = parser;
// Regexp to match
this.reMatch = /([\\*#;:]+)/mg;
// Get the first match
this.matchIndex = startPos-1;
this.findNextMatch(startPos);
};
exports.name = "list";
ListRule.prototype.findNextMatch = function(startPos) {
if(this.matchIndex !== undefined && startPos > this.matchIndex) {
this.reMatch.lastIndex = startPos;
this.match = this.reMatch.exec(this.parser.source);
this.matchIndex = this.match ? this.match.index : undefined;
}
return this.matchIndex;
exports.init = function() {
// Regexp to match
this.matchRegExp = /([\\*#;:]+)/mg;
};
var listTypes = {
@ -75,7 +63,7 @@ var listTypes = {
/*
Parse the most recent match
*/
ListRule.prototype.parse = function() {
exports.parse = function() {
// Array of parse tree nodes for the previous row of the list
var listStack = [];
// Cycle through the items in the list
@ -136,6 +124,4 @@ ListRule.prototype.parse = function() {
return [listStack[0]];
};
exports.ListRule = ListRule;
})();

View File

@ -18,34 +18,22 @@ definition text, including $param$ markers
/*global $tw: false */
"use strict";
exports.name = "macrodef";
/*
Instantiate parse rule
*/
var MacroDefRule = function(parser,startPos) {
// Save state
this.parser = parser;
exports.init = function() {
// Regexp to match
this.reMatch = /^\\define\s*([^(\s]+)\(\s*([^)]*)\)(\r?\n)?/mg;
// Get the first match
this.matchIndex = startPos-1;
this.findNextMatch(startPos);
};
MacroDefRule.prototype.findNextMatch = function(startPos) {
if(this.matchIndex !== undefined && startPos > this.matchIndex) {
this.reMatch.lastIndex = startPos;
this.match = this.reMatch.exec(this.parser.source);
this.matchIndex = this.match ? this.match.index : undefined;
}
return this.matchIndex;
this.matchRegExp = /^\\define\s*([^(\s]+)\(\s*([^)]*)\)(\r?\n)?/mg;
};
/*
Parse the most recent match
*/
MacroDefRule.prototype.parse = function() {
exports.parse = function() {
// Move past the macro name and parameters
this.parser.pos = this.reMatch.lastIndex;
this.parser.pos = this.matchRegExp.lastIndex;
// Parse the parameters
var paramString = this.match[2],
params = [];
@ -93,6 +81,4 @@ MacroDefRule.prototype.parse = function() {
};
};
exports.MacroDefRule = MacroDefRule;
})();

View File

@ -16,37 +16,23 @@ Wiki text run rule for HTML entities. For example:
/*global $tw: false */
"use strict";
var EntityRule = function(parser,startPos) {
// Save state
this.parser = parser;
// Regexp to match
this.reMatch = /(&#?[a-zA-Z0-9]{2,8};)/mg;
// Get the first match
this.matchIndex = startPos-1;
this.findNextMatch(startPos);
};
exports.name = "entity";
EntityRule.prototype.findNextMatch = function(startPos) {
if(this.matchIndex !== undefined && startPos > this.matchIndex) {
this.reMatch.lastIndex = startPos;
this.match = this.reMatch.exec(this.parser.source);
this.matchIndex = this.match ? this.match.index : undefined;
}
return this.matchIndex;
exports.init = function() {
// Regexp to match
this.matchRegExp = /(&#?[a-zA-Z0-9]{2,8};)/mg;
};
/*
Parse the most recent match
*/
EntityRule.prototype.parse = function() {
exports.parse = function() {
// Get all the details of the match
var entityString = this.match[1];
// Move past the macro call
this.parser.pos = this.reMatch.lastIndex;
this.parser.pos = this.matchRegExp.lastIndex;
// Return the entity
return [{type: "entity", entity: this.match[0]}];
};
exports.EntityRule = EntityRule;
})();

View File

@ -23,38 +23,26 @@ This is a widget invocation
/*global $tw: false */
"use strict";
exports.name = "html";
var voidElements = "area,base,br,col,command,embed,hr,img,input,keygen,link,meta,param,source,track,wbr".split(",");
var HtmlRule = function(parser,startPos) {
// Save state
this.parser = parser;
exports.init = function() {
// Regexp to match
this.reMatch = /<(_)?([A-Za-z]+)(\s*[^>]*?)(\/)?>/mg;
// Get the first match
this.matchIndex = startPos-1;
this.findNextMatch(startPos);
};
HtmlRule.prototype.findNextMatch = function(startPos) {
if(this.matchIndex !== undefined && startPos > this.matchIndex) {
this.reMatch.lastIndex = startPos;
this.match = this.reMatch.exec(this.parser.source);
this.matchIndex = this.match ? this.match.index : undefined;
}
return this.matchIndex;
this.matchRegExp = /<(_)?([A-Za-z]+)(\s*[^>]*?)(\/)?>/mg;
};
/*
Parse the most recent match
*/
HtmlRule.prototype.parse = function() {
exports.parse = function() {
// Get all the details of the match in case this parser is called recursively
var isWidget = !!this.match[1],
tagName = this.match[2],
attributeString = this.match[3],
isSelfClosing = !!this.match[4];
// Move past the tag name and parameters
this.parser.pos = this.reMatch.lastIndex;
this.parser.pos = this.matchRegExp.lastIndex;
var reLineBreak = /(\r?\n)/mg,
reAttr = /\s*([A-Za-z\-_]+)(?:\s*=\s*(?:("[^"]*")|('[^']*')|(\{\{[^\}]*\}\})|([^"'\s]+)))?/mg,
isBlock;
@ -108,6 +96,4 @@ HtmlRule.prototype.parse = function() {
return [element];
};
exports.HtmlRule = HtmlRule;
})();

View File

@ -16,34 +16,22 @@ Wiki rule for macro calls
/*global $tw: false */
"use strict";
var MacroCallRule = function(parser,startPos) {
// Save state
this.parser = parser;
// Regexp to match
this.reMatch = /<<([^\s>]+)\s*([\s\S]*?)>>/mg;
// Get the first match
this.matchIndex = startPos-1;
this.findNextMatch(startPos);
};
exports.name = "macrocall";
MacroCallRule.prototype.findNextMatch = function(startPos) {
if(this.matchIndex !== undefined && startPos > this.matchIndex) {
this.reMatch.lastIndex = startPos;
this.match = this.reMatch.exec(this.parser.source);
this.matchIndex = this.match ? this.match.index : undefined;
}
return this.matchIndex;
exports.init = function() {
// Regexp to match
this.matchRegExp = /<<([^\s>]+)\s*([\s\S]*?)>>/mg;
};
/*
Parse the most recent match
*/
MacroCallRule.prototype.parse = function() {
exports.parse = function() {
// Get all the details of the match
var macroName = this.match[1],
paramString = this.match[2];
// Move past the macro call
this.parser.pos = this.reMatch.lastIndex;
this.parser.pos = this.matchRegExp.lastIndex;
var params = [],
reParam = /\s*(?:([A-Za-z0-9\-_]+)\s*:)?(?:\s*(?:"([^"]*)"|'([^']*)'|\[\[([^\]]*)\]\]|([^"'\s]+)))/mg,
paramMatch = reParam.exec(paramString);
@ -66,6 +54,4 @@ MacroCallRule.prototype.parse = function() {
}];
};
exports.MacroCallRule = MacroCallRule;
})();

View File

@ -0,0 +1,77 @@
/*\
title: $:/core/modules/parsers/wikiparser/rules/run/wikilink.js
type: application/javascript
module-type: wikirunrule
Wiki text run rule for wiki links. For example:
{{{
AWikiLink
AnotherLink
~SuppressedLink
}}}
Precede a camel case word with `~` to prevent it from being recognised as a link.
\*/
(function(){
/*jslint node: true, browser: true */
/*global $tw: false */
"use strict";
exports.name = "wikilink";
var textPrimitives = {
upperLetter: "[A-Z\u00c0-\u00de\u0150\u0170]",
lowerLetter: "[a-z0-9_\\-\u00df-\u00ff\u0151\u0171]",
anyLetter: "[A-Za-z0-9_\\-\u00c0-\u00de\u00df-\u00ff\u0150\u0170\u0151\u0171]",
anyLetterStrict: "[A-Za-z0-9\u00c0-\u00de\u00df-\u00ff\u0150\u0170\u0151\u0171]"
};
textPrimitives.unWikiLink = "~";
textPrimitives.wikiLink = textPrimitives.upperLetter + "+" +
textPrimitives.lowerLetter + "+" +
textPrimitives.upperLetter +
textPrimitives.anyLetter + "*";
exports.init = function() {
// Regexp to match
this.matchRegExp = new RegExp(textPrimitives.unWikiLink + "?" + textPrimitives.wikiLink,"mg");
};
/*
Parse the most recent match
*/
exports.parse = function() {
// Get the details of the match
var linkText = this.match[0];
// Move past the macro call
this.parser.pos = this.matchRegExp.lastIndex;
// If the link starts with the unwikilink character then just output it as plain text
if(linkText.substr(0,1) === textPrimitives.unWikiLink) {
return [{type: "text", text: linkText.substr(1)}];
}
// If the link has been preceded with a letter then don't treat it as a link
if(this.match.index > 0) {
var preRegExp = new RegExp(textPrimitives.anyLetterStrict,"mg");
preRegExp.lastIndex = this.match.index-1;
var preMatch = preRegExp.exec(this.parser.source);
if(preMatch && preMatch.index === this.match.index-1) {
return [{type: "text", text: linkText}];
}
}
return [{
type: "widget",
tag: "link",
attributes: {
to: {type: "string", value: linkText}
},
children: [{
type: "text",
text: linkText
}]
}];
};
})();

View File

@ -0,0 +1,35 @@
/*\
title: $:/core/modules/parsers/wikiparser/rules/wikirule.js
type: application/javascript
module-type: global
Base class for wiki parser rules
\*/
(function(){
/*jslint node: true, browser: true */
/*global $tw: false */
"use strict";
var WikiRuleDefaultProperties = {};
/*
To be overridden by individual rules
*/
WikiRuleDefaultProperties.init = function() {
};
/*
Default implementation of findNextMatch looks uses RegExp matching
*/
WikiRuleDefaultProperties.findNextMatch = function(startPos) {
this.matchRegExp.lastIndex = startPos;
this.match = this.matchRegExp.exec(this.parser.source);
return this.match ? this.match.index : undefined;
};
exports.WikiRuleDefaultProperties = WikiRuleDefaultProperties;
})();

View File

@ -38,12 +38,12 @@ var WikiParser = function(vocabulary,type,text,options) {
// Initialise the things that pragma rules can change
this.macroDefinitions = {}; // Hash map of macro definitions
// Instantiate the pragma parse rules
this.pragmaRules = this.instantiateRules(this.vocabulary.pragmaRuleClasses,0);
this.pragmaRules = this.instantiateRules(this.vocabulary.pragmaRules,0);
// Parse any pragmas
this.parsePragmas();
// Instantiate the parser block and run rules
this.blockRules = this.instantiateRules(this.vocabulary.blockRuleClasses,this.pos);
this.runRules = this.instantiateRules(this.vocabulary.runRuleClasses,this.pos);
this.blockRules = this.instantiateRules(this.vocabulary.blockRules,this.pos);
this.runRules = this.instantiateRules(this.vocabulary.runRules,this.pos);
// Parse the text into runs or blocks
if(this.type === "text/vnd.tiddlywiki-run") {
this.tree = this.parseRun();
@ -56,17 +56,21 @@ var WikiParser = function(vocabulary,type,text,options) {
Instantiate an array of parse rules
*/
WikiParser.prototype.instantiateRules = function(classes,startPos) {
var rules = [],
var rulesInfo = [],
self = this;
$tw.utils.each(classes,function(RuleClass) {
// Instantiate the rule
var rule = new RuleClass(self,startPos);
// Only save the rule if there is at least one match
if(rule.matchIndex !== undefined) {
rules.push(rule);
var rule = new RuleClass(self);
rule.init();
var matchIndex = rule.findNextMatch(startPos);
if(matchIndex !== undefined) {
rulesInfo.push({
rule: rule,
matchIndex: matchIndex
});
}
});
return rules;
return rulesInfo;
};
/*
@ -87,16 +91,23 @@ WikiParser.prototype.skipWhitespace = function(options) {
Get the next match out of an array of parse rule instances
*/
WikiParser.prototype.findNextMatch = function(rules,startPos) {
var nextMatch = undefined,
nextMatchPos = this.sourceLength;
// Find the best matching rule by finding the closest match position
var matchingRule = undefined,
matchingRulePos = this.sourceLength;
// Step through each rule
for(var t=0; t<rules.length; t++) {
var matchPos = rules[t].findNextMatch(startPos);
if(matchPos !== undefined && matchPos <= nextMatchPos) {
nextMatch = rules[t];
nextMatchPos = matchPos;
var ruleInfo = rules[t];
// Ask the rule to get the next match if we've moved past the current one
if(ruleInfo.matchIndex !== undefined && ruleInfo.matchIndex < startPos) {
ruleInfo.matchIndex = ruleInfo.rule.findNextMatch(startPos);
}
// Adopt this match if it's closer than the current best match
if(ruleInfo.matchIndex !== undefined && ruleInfo.matchIndex <= matchingRulePos) {
matchingRule = ruleInfo;
matchingRulePos = ruleInfo.matchIndex;
}
}
return nextMatch;
return matchingRule;
};
/*
@ -117,7 +128,7 @@ WikiParser.prototype.parsePragmas = function() {
return;
}
// Process the pragma rule
nextMatch.parse();
nextMatch.rule.parse();
}
};
@ -134,7 +145,7 @@ WikiParser.prototype.parseBlock = function(terminatorRegExpString) {
// Look for a block rule that applies at the current position
var nextMatch = this.findNextMatch(this.blockRules,this.pos);
if(nextMatch && nextMatch.matchIndex === this.pos) {
return nextMatch.parse();
return nextMatch.rule.parse();
}
// Treat it as a paragraph if we didn't find a block rule
return [{type: "element", tag: "p", children: this.parseRun(terminatorRegExp)}];
@ -214,7 +225,7 @@ WikiParser.prototype.parseRunUnterminated = function() {
this.pos = nextMatch.matchIndex;
}
// Process the run rule
tree.push.apply(tree,nextMatch.parse());
tree.push.apply(tree,nextMatch.rule.parse());
// Look for the next run rule
nextMatch = this.findNextMatch(this.runRules,this.pos);
}
@ -253,7 +264,7 @@ WikiParser.prototype.parseRunTerminated = function(terminatorRegExp) {
this.pos = runRuleMatch.matchIndex;
}
// Process the run rule
tree.push.apply(tree,runRuleMatch.parse());
tree.push.apply(tree,runRuleMatch.rule.parse());
// Look for the next run rule
runRuleMatch = this.findNextMatch(this.runRules,this.pos);
// Look for the next terminator match

View File

@ -13,15 +13,27 @@ module-type: global
var WikiVocabulary = function(options) {
this.wiki = options.wiki;
// Hashmaps of the various parse rule classes
this.pragmaRuleClasses = $tw.modules.applyMethods("wikipragmarule");
this.blockRuleClasses = $tw.modules.applyMethods("wikiblockrule");
this.runRuleClasses = $tw.modules.applyMethods("wikirunrule");
this.pragmaRules = this.createRuleClasses("wikipragmarule");
this.blockRules = this.createRuleClasses("wikiblockrule");
this.runRules = this.createRuleClasses("wikirunrule");
// Hashmap of the various renderer classes
this.rendererClasses = $tw.modules.applyMethods("wikirenderer");
// Hashmap of the available widgets
this.widgetClasses = $tw.modules.applyMethods("widget");
};
WikiVocabulary.prototype.createRuleClasses = function(moduleType) {
var ruleClasses = {};
$tw.modules.forEachModuleOfType(moduleType,function(title,moduleExports) {
var ruleClass = function(parser) {
this.parser = parser;
}
$tw.utils.extend(ruleClass.prototype,$tw.WikiRuleDefaultProperties,moduleExports);
ruleClasses[moduleExports.name] = ruleClass;
});
return ruleClasses;
};
WikiVocabulary.prototype.parseText = function(type,text) {
return new $tw.WikiParser(this,type,text,{wiki: this.wiki});
};

View File

@ -0,0 +1,2 @@
title: WidgetModules

View File

@ -0,0 +1,28 @@
title: WikiRuleModules
WikiRuleModules cover the module types 'wikirunrule', 'wikiblockrule' and `wikipragmarule`. Modules of these types encapsulate the logic of individual parsing rules used by the WikiParser engine. For example, there is a `wikirunrule` module that identifies references to HTML entities by matching the pattern `&<chars>;`.
Pragma rules are applied at the start of a block of text, and cover definitions and declarations that affect the parsing of the rest of the text. Block rules are only applied at the beginning of a block of wikitext, while run rules can appear anywhere. The only current example of a pragma rule is for macro definitions.
Examples of block rules:
* Headings
* Tables
* Lists
Examples of run rules:
* Entities
* HTML tags
* Wiki links
Parser rule modules extend the `$tw.WikiParserRule` class. This is done by instantiating the class and then copying the exports of the rule module onto the instance. In this way, the parser rule can override the base behaviour of the `$tw.WikiParserRule` class. In particular, the base class incorporates logic for using regular expressions to match parse rules but this logic could be overridden by a parse rule that wanted to, say, use `indexOf()` instead of regular expressions.
The standard methods and properties of parser rules are as follows:
* `parser`: automatically generated property pointing back to the parser containing this rule
* `init()`: initialisation function called immediately after the constructor
* `findNextMatch(pos)`: returns the position of the next match after the specified position
* `parse()`: parses the most recent match, returning an array of the generated parse tree nodes. Pragma rules don't return parse tree nodes but instead modify the parser object directly (for example, to add local macro definitions)
The built in parser rules use regular expression matching. Such rules can take advantage of the implementation of `findNextMatch()` in the base `$tw.WikiParserRule` class by ensuring that their `init()` method creates a `matchRegExp` property containing the regular expression to match. The `match` property contains the details of the match for use in the `parse()` method.