mirror of
https://github.com/Jermolene/TiddlyWiki5
synced 2025-01-10 01:10:28 +00:00
1a57d08feb
Also make tests pass
500 lines
17 KiB
JavaScript
500 lines
17 KiB
JavaScript
/*\
|
|
title: $:/core/modules/parsers/wikiparser/wikiparser.js
|
|
type: application/javascript
|
|
module-type: parser
|
|
|
|
The wiki text parser processes blocks of source text into a parse tree.
|
|
|
|
The parse tree is made up of nested arrays of these JavaScript objects:
|
|
|
|
{type: "element", tag: <string>, attributes: {}, children: []} - an HTML element
|
|
{type: "text", text: <string>} - a text node
|
|
{type: "entity", value: <string>} - an entity
|
|
{type: "raw", html: <string>} - raw HTML
|
|
|
|
Attributes are stored as hashmaps of the following objects:
|
|
|
|
{type: "string", value: <string>} - literal string
|
|
{type: "indirect", textReference: <textReference>} - indirect through a text reference
|
|
{type: "macro", macro: <TBD>} - indirect through a macro invocation
|
|
|
|
\*/
|
|
(function(){
|
|
|
|
/*jslint node: true, browser: true */
|
|
/*global $tw: false */
|
|
"use strict";
|
|
|
|
/*
|
|
type: content type of text
|
|
text: text to be parsed
|
|
options: see below:
|
|
parseAsInline: true to parse text as inline instead of block
|
|
wiki: reference to wiki to use
|
|
_canonical_uri: optional URI of content if text is missing or empty
|
|
configTrimWhiteSpace: true to trim whitespace
|
|
*/
|
|
var WikiParser = function(type,text,options) {
|
|
this.wiki = options.wiki;
|
|
var self = this;
|
|
// Check for an externally linked tiddler
|
|
if($tw.browser && (text || "") === "" && options._canonical_uri) {
|
|
this.loadRemoteTiddler(options._canonical_uri);
|
|
text = $tw.language.getRawString("LazyLoadingWarning");
|
|
}
|
|
// Save the parse text
|
|
this.type = type || "text/vnd.tiddlywiki";
|
|
this.source = text || "";
|
|
this.sourceLength = this.source.length;
|
|
// Flag for ignoring whitespace
|
|
this.configTrimWhiteSpace = options.configTrimWhiteSpace !== undefined ? options.configTrimWhiteSpace : false;
|
|
// Parser mode
|
|
this.parseAsInline = options.parseAsInline;
|
|
// Set current parse position
|
|
this.pos = 0;
|
|
// Start with empty output
|
|
this.tree = [];
|
|
// Assemble the rule classes we're going to use
|
|
var pragmaRuleClasses, blockRuleClasses, inlineRuleClasses;
|
|
if(options.rules) {
|
|
pragmaRuleClasses = options.rules.pragma;
|
|
blockRuleClasses = options.rules.block;
|
|
inlineRuleClasses = options.rules.inline;
|
|
} else {
|
|
// Setup the rule classes if we don't have them already
|
|
if(!this.pragmaRuleClasses) {
|
|
WikiParser.prototype.pragmaRuleClasses = $tw.modules.createClassesFromModules("wikirule","pragma",$tw.WikiRuleBase);
|
|
this.setupRules(WikiParser.prototype.pragmaRuleClasses,"$:/config/WikiParserRules/Pragmas/");
|
|
}
|
|
pragmaRuleClasses = this.pragmaRuleClasses;
|
|
if(!this.blockRuleClasses) {
|
|
WikiParser.prototype.blockRuleClasses = $tw.modules.createClassesFromModules("wikirule","block",$tw.WikiRuleBase);
|
|
this.setupRules(WikiParser.prototype.blockRuleClasses,"$:/config/WikiParserRules/Block/");
|
|
}
|
|
blockRuleClasses = this.blockRuleClasses;
|
|
if(!this.inlineRuleClasses) {
|
|
WikiParser.prototype.inlineRuleClasses = $tw.modules.createClassesFromModules("wikirule","inline",$tw.WikiRuleBase);
|
|
this.setupRules(WikiParser.prototype.inlineRuleClasses,"$:/config/WikiParserRules/Inline/");
|
|
}
|
|
inlineRuleClasses = this.inlineRuleClasses;
|
|
}
|
|
// Instantiate the pragma parse rules
|
|
this.pragmaRules = this.instantiateRules(pragmaRuleClasses,"pragma",0);
|
|
// Instantiate the parser block and inline rules
|
|
this.blockRules = this.instantiateRules(blockRuleClasses,"block",0);
|
|
this.inlineRules = this.instantiateRules(inlineRuleClasses,"inline",0);
|
|
// Parse any pragmas
|
|
var topBranch = this.parsePragmas();
|
|
// Parse the text into inline runs or blocks
|
|
if(this.parseAsInline) {
|
|
topBranch.push.apply(topBranch,this.parseInlineRun());
|
|
} else {
|
|
topBranch.push.apply(topBranch,this.parseBlocks());
|
|
}
|
|
// Build rules' name map
|
|
this.usingRuleMap = {};
|
|
$tw.utils.each(this.pragmaRules, function (ruleInfo) { self.usingRuleMap[ruleInfo.rule.name] = Object.getPrototypeOf(ruleInfo.rule); });
|
|
$tw.utils.each(this.blockRules, function (ruleInfo) { self.usingRuleMap[ruleInfo.rule.name] = Object.getPrototypeOf(ruleInfo.rule); });
|
|
$tw.utils.each(this.inlineRules, function (ruleInfo) { self.usingRuleMap[ruleInfo.rule.name] = Object.getPrototypeOf(ruleInfo.rule); });
|
|
// Return the parse tree
|
|
};
|
|
|
|
/*
|
|
*/
|
|
WikiParser.prototype.loadRemoteTiddler = function(url) {
|
|
var self = this;
|
|
$tw.utils.httpRequest({
|
|
url: url,
|
|
type: "GET",
|
|
callback: function(err,data) {
|
|
if(!err) {
|
|
var tiddlers = self.wiki.deserializeTiddlers(".tid",data,self.wiki.getCreationFields());
|
|
$tw.utils.each(tiddlers,function(tiddler) {
|
|
tiddler["_canonical_uri"] = url;
|
|
});
|
|
if(tiddlers) {
|
|
self.wiki.addTiddlers(tiddlers);
|
|
}
|
|
}
|
|
}
|
|
});
|
|
};
|
|
|
|
/*
|
|
*/
|
|
WikiParser.prototype.setupRules = function(proto,configPrefix) {
|
|
var self = this;
|
|
if(!$tw.safeMode) {
|
|
$tw.utils.each(proto,function(object,name) {
|
|
if(self.wiki.getTiddlerText(configPrefix + name,"enable") !== "enable") {
|
|
delete proto[name];
|
|
}
|
|
});
|
|
}
|
|
};
|
|
|
|
/*
|
|
Instantiate an array of parse rules
|
|
*/
|
|
WikiParser.prototype.instantiateRules = function(classes,type,startPos) {
|
|
var rulesInfo = [],
|
|
self = this;
|
|
$tw.utils.each(classes,function(RuleClass) {
|
|
// Instantiate the rule
|
|
var rule = new RuleClass(self);
|
|
rule.is = {};
|
|
rule.is[type] = true;
|
|
rule.init(self);
|
|
var matchIndex = rule.findNextMatch(startPos);
|
|
if(matchIndex !== undefined) {
|
|
rulesInfo.push({
|
|
rule: rule,
|
|
matchIndex: matchIndex
|
|
});
|
|
}
|
|
});
|
|
return rulesInfo;
|
|
};
|
|
|
|
/*
|
|
Skip any whitespace at the current position. Options are:
|
|
treatNewlinesAsNonWhitespace: true if newlines are NOT to be treated as whitespace
|
|
*/
|
|
WikiParser.prototype.skipWhitespace = function(options) {
|
|
options = options || {};
|
|
var whitespaceRegExp = options.treatNewlinesAsNonWhitespace ? /([^\S\n]+)/mg : /(\s+)/mg;
|
|
whitespaceRegExp.lastIndex = this.pos;
|
|
var whitespaceMatch = whitespaceRegExp.exec(this.source);
|
|
if(whitespaceMatch && whitespaceMatch.index === this.pos) {
|
|
this.pos = whitespaceRegExp.lastIndex;
|
|
}
|
|
};
|
|
|
|
/*
|
|
Get the next match out of an array of parse rule instances
|
|
*/
|
|
WikiParser.prototype.findNextMatch = function(rules,startPos) {
|
|
// Find the best matching rule by finding the closest match position
|
|
var matchingRule,
|
|
matchingRulePos = this.sourceLength;
|
|
// Step through each rule
|
|
for(var t=0; t<rules.length; t++) {
|
|
var ruleInfo = rules[t];
|
|
// Ask the rule to get the next match if we've moved past the current one
|
|
if(ruleInfo.matchIndex !== undefined && ruleInfo.matchIndex < startPos) {
|
|
ruleInfo.matchIndex = ruleInfo.rule.findNextMatch(startPos);
|
|
}
|
|
// Adopt this match if it's closer than the current best match
|
|
if(ruleInfo.matchIndex !== undefined && ruleInfo.matchIndex <= matchingRulePos) {
|
|
matchingRule = ruleInfo;
|
|
matchingRulePos = ruleInfo.matchIndex;
|
|
}
|
|
}
|
|
return matchingRule;
|
|
};
|
|
|
|
/*
|
|
Parse any pragmas at the beginning of a block of parse text
|
|
*/
|
|
WikiParser.prototype.parsePragmas = function() {
|
|
var currentTreeBranch = this.tree;
|
|
while(true) {
|
|
var savedPos = this.pos;
|
|
// Skip whitespace
|
|
this.skipWhitespace();
|
|
// Check for the end of the text
|
|
if(this.pos >= this.sourceLength) {
|
|
break;
|
|
}
|
|
// Check if we've arrived at a pragma rule match
|
|
var nextMatch = this.findNextMatch(this.pragmaRules,this.pos);
|
|
// If not, just exit
|
|
if(!nextMatch || nextMatch.matchIndex !== this.pos) {
|
|
this.pos = savedPos;
|
|
break;
|
|
}
|
|
// Process the pragma rule
|
|
var start = this.pos;
|
|
var subTree = nextMatch.rule.parse();
|
|
if(subTree.length > 0) {
|
|
// Set the start and end positions of the pragma rule if
|
|
if (subTree[0].start === undefined) subTree[0].start = start;
|
|
if (subTree[subTree.length - 1].end === undefined) subTree[subTree.length - 1].end = this.pos;
|
|
$tw.utils.each(subTree, function (node) { node.rule = nextMatch.rule.name; });
|
|
// Quick hack; we only cope with a single parse tree node being returned, which is true at the moment
|
|
currentTreeBranch.push.apply(currentTreeBranch,subTree);
|
|
subTree[0].children = [];
|
|
currentTreeBranch = subTree[0].children;
|
|
}
|
|
// Skip whitespace after the pragma
|
|
this.skipWhitespace();
|
|
}
|
|
return currentTreeBranch;
|
|
};
|
|
|
|
/*
|
|
Parse a block from the current position
|
|
terminatorRegExpString: optional regular expression string that identifies the end of plain paragraphs. Must not include capturing parenthesis
|
|
*/
|
|
WikiParser.prototype.parseBlock = function(terminatorRegExpString) {
|
|
var terminatorRegExp = terminatorRegExpString ? new RegExp(terminatorRegExpString + "|\\r?\\n\\r?\\n","mg") : /(\r?\n\r?\n)/mg;
|
|
this.skipWhitespace();
|
|
if(this.pos >= this.sourceLength) {
|
|
return [];
|
|
}
|
|
// Look for a block rule that applies at the current position
|
|
var nextMatch = this.findNextMatch(this.blockRules,this.pos);
|
|
if(nextMatch && nextMatch.matchIndex === this.pos) {
|
|
var start = this.pos;
|
|
var subTree = nextMatch.rule.parse();
|
|
// Set the start and end positions of the first and last blocks if they're not already set
|
|
if (subTree.length > 0) {
|
|
if (subTree[0].start === undefined) subTree[0].start = start;
|
|
if (subTree[subTree.length - 1].end === undefined) subTree[subTree.length - 1].end = this.pos;
|
|
}
|
|
$tw.utils.each(subTree, function (node) { node.rule = nextMatch.rule.name; });
|
|
return subTree;
|
|
}
|
|
// Treat it as a paragraph if we didn't find a block rule
|
|
var start = this.pos;
|
|
var children = this.parseInlineRun(terminatorRegExp);
|
|
var end = this.pos;
|
|
return [{type: "element", tag: "p", children: children, start: start, end: end }];
|
|
};
|
|
|
|
/*
|
|
Parse a series of blocks of text until a terminating regexp is encountered or the end of the text
|
|
terminatorRegExpString: terminating regular expression
|
|
*/
|
|
WikiParser.prototype.parseBlocks = function(terminatorRegExpString) {
|
|
if(terminatorRegExpString) {
|
|
return this.parseBlocksTerminated(terminatorRegExpString);
|
|
} else {
|
|
return this.parseBlocksUnterminated();
|
|
}
|
|
};
|
|
|
|
/*
|
|
Parse a block from the current position to the end of the text
|
|
*/
|
|
WikiParser.prototype.parseBlocksUnterminated = function() {
|
|
var tree = [];
|
|
while(this.pos < this.sourceLength) {
|
|
tree.push.apply(tree,this.parseBlock());
|
|
}
|
|
return tree;
|
|
};
|
|
|
|
/*
|
|
Parse blocks of text until a terminating regexp is encountered. Wrapper for parseBlocksTerminatedExtended that just returns the parse tree
|
|
*/
|
|
WikiParser.prototype.parseBlocksTerminated = function(terminatorRegExpString) {
|
|
var ex = this.parseBlocksTerminatedExtended(terminatorRegExpString);
|
|
return ex.tree;
|
|
};
|
|
|
|
/*
|
|
Parse blocks of text until a terminating regexp is encountered
|
|
*/
|
|
WikiParser.prototype.parseBlocksTerminatedExtended = function(terminatorRegExpString) {
|
|
var terminatorRegExp = new RegExp(terminatorRegExpString,"mg"),
|
|
result = {
|
|
tree: []
|
|
};
|
|
// Skip any whitespace
|
|
this.skipWhitespace();
|
|
// Check if we've got the end marker
|
|
terminatorRegExp.lastIndex = this.pos;
|
|
var match = terminatorRegExp.exec(this.source);
|
|
// Parse the text into blocks
|
|
while(this.pos < this.sourceLength && !(match && match.index === this.pos)) {
|
|
var blocks = this.parseBlock(terminatorRegExpString);
|
|
result.tree.push.apply(result.tree,blocks);
|
|
// Skip any whitespace
|
|
this.skipWhitespace();
|
|
// Check if we've got the end marker
|
|
terminatorRegExp.lastIndex = this.pos;
|
|
match = terminatorRegExp.exec(this.source);
|
|
}
|
|
if(match && match.index === this.pos) {
|
|
this.pos = match.index + match[0].length;
|
|
result.match = match;
|
|
}
|
|
return result;
|
|
};
|
|
|
|
/*
|
|
Parse a run of text at the current position
|
|
terminatorRegExp: a regexp at which to stop the run
|
|
options: see below
|
|
Options available:
|
|
eatTerminator: move the parse position past any encountered terminator (default false)
|
|
*/
|
|
WikiParser.prototype.parseInlineRun = function(terminatorRegExp,options) {
|
|
if(terminatorRegExp) {
|
|
return this.parseInlineRunTerminated(terminatorRegExp,options);
|
|
} else {
|
|
return this.parseInlineRunUnterminated(options);
|
|
}
|
|
};
|
|
|
|
WikiParser.prototype.parseInlineRunUnterminated = function(options) {
|
|
var tree = [];
|
|
// Find the next occurrence of an inline rule
|
|
var nextMatch = this.findNextMatch(this.inlineRules,this.pos);
|
|
// Loop around the matches until we've reached the end of the text
|
|
while(this.pos < this.sourceLength && nextMatch) {
|
|
// Process the text preceding the run rule
|
|
if(nextMatch.matchIndex > this.pos) {
|
|
this.pushTextWidget(tree,this.source.substring(this.pos,nextMatch.matchIndex),this.pos,nextMatch.matchIndex);
|
|
this.pos = nextMatch.matchIndex;
|
|
}
|
|
// Process the run rule
|
|
var start = this.pos;
|
|
var subTree = nextMatch.rule.parse();
|
|
// Set the start and end positions of the first and last child if they're not already set
|
|
if (subTree.length > 0) {
|
|
// Set the start and end positions of the first and last child if they're not already set
|
|
if (subTree[0].start === undefined) subTree[0].start = start;
|
|
if (subTree[subTree.length - 1].end === undefined) subTree[subTree.length - 1].end = this.pos;
|
|
}
|
|
$tw.utils.each(subTree, function (node) { node.rule = nextMatch.rule.name; });
|
|
tree.push.apply(tree,subTree);
|
|
// Look for the next run rule
|
|
nextMatch = this.findNextMatch(this.inlineRules,this.pos);
|
|
}
|
|
// Process the remaining text
|
|
if(this.pos < this.sourceLength) {
|
|
this.pushTextWidget(tree,this.source.substr(this.pos),this.pos,this.sourceLength);
|
|
}
|
|
this.pos = this.sourceLength;
|
|
return tree;
|
|
};
|
|
|
|
WikiParser.prototype.parseInlineRunTerminated = function(terminatorRegExp,options) {
|
|
var ex = this.parseInlineRunTerminatedExtended(terminatorRegExp,options);
|
|
return ex.tree;
|
|
};
|
|
|
|
WikiParser.prototype.parseInlineRunTerminatedExtended = function(terminatorRegExp,options) {
|
|
options = options || {};
|
|
var tree = [];
|
|
// Find the next occurrence of the terminator
|
|
terminatorRegExp.lastIndex = this.pos;
|
|
var terminatorMatch = terminatorRegExp.exec(this.source);
|
|
// Find the next occurrence of a inlinerule
|
|
var inlineRuleMatch = this.findNextMatch(this.inlineRules,this.pos);
|
|
// Loop around until we've reached the end of the text
|
|
while(this.pos < this.sourceLength && (terminatorMatch || inlineRuleMatch)) {
|
|
// Return if we've found the terminator, and it precedes any inline rule match
|
|
if(terminatorMatch) {
|
|
if(!inlineRuleMatch || inlineRuleMatch.matchIndex >= terminatorMatch.index) {
|
|
if(terminatorMatch.index > this.pos) {
|
|
this.pushTextWidget(tree,this.source.substring(this.pos,terminatorMatch.index),this.pos,terminatorMatch.index);
|
|
}
|
|
this.pos = terminatorMatch.index;
|
|
if(options.eatTerminator) {
|
|
this.pos += terminatorMatch[0].length;
|
|
}
|
|
return {
|
|
match: terminatorMatch,
|
|
tree: tree
|
|
};
|
|
}
|
|
}
|
|
// Process any inline rule, along with the text preceding it
|
|
if(inlineRuleMatch) {
|
|
// Preceding text
|
|
if(inlineRuleMatch.matchIndex > this.pos) {
|
|
this.pushTextWidget(tree,this.source.substring(this.pos,inlineRuleMatch.matchIndex),this.pos,inlineRuleMatch.matchIndex);
|
|
this.pos = inlineRuleMatch.matchIndex;
|
|
}
|
|
// Process the inline rule
|
|
var start = this.pos;
|
|
var subTree = inlineRuleMatch.rule.parse();
|
|
// Set the start and end positions of the first and last child if they're not already set
|
|
if (subTree.length > 0) {
|
|
if (subTree[0].start === undefined) subTree[0].start = start;
|
|
if (subTree[subTree.length - 1].end === undefined) subTree[subTree.length - 1].end = this.pos;
|
|
}
|
|
$tw.utils.each(subTree, function (node) { node.rule = inlineRuleMatch.rule.name; });
|
|
tree.push.apply(tree,subTree);
|
|
// Look for the next inline rule
|
|
inlineRuleMatch = this.findNextMatch(this.inlineRules,this.pos);
|
|
// Look for the next terminator match
|
|
terminatorRegExp.lastIndex = this.pos;
|
|
terminatorMatch = terminatorRegExp.exec(this.source);
|
|
}
|
|
}
|
|
// Process the remaining text
|
|
if(this.pos < this.sourceLength) {
|
|
this.pushTextWidget(tree,this.source.substr(this.pos),this.pos,this.sourceLength);
|
|
}
|
|
this.pos = this.sourceLength;
|
|
return {
|
|
tree: tree
|
|
};
|
|
};
|
|
|
|
/*
|
|
Push a text widget onto an array, respecting the configTrimWhiteSpace setting
|
|
*/
|
|
WikiParser.prototype.pushTextWidget = function(array,text,start,end) {
|
|
if(this.configTrimWhiteSpace) {
|
|
text = $tw.utils.trim(text);
|
|
}
|
|
if(text) {
|
|
array.push({type: "text", text: text, start: start, end: end});
|
|
}
|
|
};
|
|
|
|
/*
|
|
Parse zero or more class specifiers `.classname`
|
|
*/
|
|
WikiParser.prototype.parseClasses = function() {
|
|
var classRegExp = /\.([^\s\.]+)/mg,
|
|
classNames = [];
|
|
classRegExp.lastIndex = this.pos;
|
|
var match = classRegExp.exec(this.source);
|
|
while(match && match.index === this.pos) {
|
|
this.pos = match.index + match[0].length;
|
|
classNames.push(match[1]);
|
|
match = classRegExp.exec(this.source);
|
|
}
|
|
return classNames;
|
|
};
|
|
|
|
/*
|
|
Amend the rules used by this instance of the parser
|
|
type: `only` keeps just the named rules, `except` keeps all but the named rules
|
|
names: array of rule names
|
|
*/
|
|
WikiParser.prototype.amendRules = function(type,names) {
|
|
names = names || [];
|
|
// Define the filter function
|
|
var target;
|
|
if(type === "only") {
|
|
target = true;
|
|
} else if(type === "except") {
|
|
target = false;
|
|
} else {
|
|
return;
|
|
}
|
|
// Define a function to process each of our rule arrays
|
|
var processRuleArray = function(ruleArray) {
|
|
for(var t=ruleArray.length-1; t>=0; t--) {
|
|
if((names.indexOf(ruleArray[t].rule.name) === -1) === target) {
|
|
ruleArray.splice(t,1);
|
|
}
|
|
}
|
|
};
|
|
// Process each rule array
|
|
processRuleArray(this.pragmaRules);
|
|
processRuleArray(this.blockRules);
|
|
processRuleArray(this.inlineRules);
|
|
};
|
|
|
|
exports["text/vnd.tiddlywiki"] = WikiParser;
|
|
|
|
})();
|