1
0
mirror of https://github.com/Jermolene/TiddlyWiki5 synced 2025-11-04 09:33:00 +00:00

First pass at a new wiki text parser

This one respects HTML paragraphs properly
This commit is contained in:
Jeremy Ruston
2012-05-26 18:30:32 +01:00
parent 85f1b33ef2
commit 92353d37b2
12 changed files with 570 additions and 5 deletions

View File

@@ -0,0 +1,55 @@
/*\
title: $:/core/modules/parsers/newwikitextparser/blockrules/class.js
type: application/javascript
module-type: wikitextblockrule
Wiki text block rule for assigning classes to paragraphs and other blocks
\*/
(function(){
/*jslint node: true, browser: true */
/*global $tw: false */
"use strict";
exports.name = "class";
exports.regExpString = "\\{\\{(?:[^\\{\\r\\n]*)\\{$";
exports.parse = function(match) {
var tree = [],
reStart = /\{\{([^\{\r\n]*){(?:\r?\n)?/mg,
reEnd = /(\}\}\}$(?:\r?\n)?)/mg,
endMatch;
reStart.lastIndex = this.pos;
match = reStart.exec(this.source);
if(match) {
this.pos = match.index + match[0].length;
// Skip any whitespace
this.skipWhitespace();
// Check if we've got the end marker
reEnd.lastIndex = this.pos;
endMatch = reEnd.exec(this.source);
// Parse the text into blocks
while(this.pos < this.sourceLength && !(endMatch && endMatch.index === this.pos)) {
var blocks = this.parseBlock();
for(var t=0; t<blocks.length; t++) {
blocks[t].addClass(match[1]);
tree.push(blocks[t]);
}
// Skip any whitespace
this.skipWhitespace();
// Check if we've got the end marker
reEnd.lastIndex = this.pos;
endMatch = reEnd.exec(this.source);
}
reEnd.lastIndex = this.pos;
endMatch = reEnd.exec(this.source);
if(endMatch) {
this.pos = endMatch.index + endMatch[0].length;
}
return tree;
}
};
})();

View File

@@ -0,0 +1,25 @@
/*\
title: $:/core/modules/parsers/newwikitextparser/blockrules/heading.js
type: application/javascript
module-type: wikitextblockrule
Wiki text block rule for headings
\*/
(function(){
/*jslint node: true, browser: true */
/*global $tw: false */
"use strict";
exports.name = "heading";
exports.regExpString = "!{1,6}";
exports.parse = function(match) {
this.pos = match.index + match[0].length;
var classedRun = this.parseClassedRun(/(\r?\n)/mg);
return [$tw.Tree.Element("h1",{"class": classedRun["class"]},classedRun.tree)];
};
})();

View File

@@ -0,0 +1,54 @@
/*\
title: $:/core/modules/parsers/newwikitextparser/blockrules/html.js
type: application/javascript
module-type: wikitextblockrule
Wiki text block rule for block level HTML elements
\*/
(function(){
/*jslint node: true, browser: true */
/*global $tw: false */
"use strict";
exports.name = "html";
exports.regExpString = "<[A-Za-z]+\\s*[^>]*>";
exports.parse = function(match) {
var reStart = /<([A-Za-z]+)(\s*[^>]*)>/mg,
reAttr = /\s*([A-Za-z\-_]+)(?:\s*=\s*(?:("[^"]*")|('[^']*')|([^"'\s]+)))?/mg;
reStart.lastIndex = this.pos;
var startMatch = reStart.exec(this.source);
if(startMatch && startMatch.index === this.pos) {
var attrMatch = reAttr.exec(startMatch[2]),
attributes = {};
while(attrMatch) {
var name = attrMatch[1],
value;
if(attrMatch[2]) { // Double quoted
value = attrMatch[2].substring(1,attrMatch[2].length-1);
} else if(attrMatch[3]) { // Single quoted
value = attrMatch[3].substring(1,attrMatch[3].length-1);
} else if(attrMatch[4]) { // Unquoted
value = attrMatch[4];
} else { // Valueless
value = true; // TODO: We should have a way of indicating we want an attribute without a value
}
attributes[name] = value;
attrMatch = reAttr.exec(startMatch[2]);
}
this.pos = startMatch.index + startMatch[0].length;
var reEnd = new RegExp("(</" + startMatch[1] + ">)","mg"),
element = $tw.Tree.Element(startMatch[1],attributes,this.parseRun(reEnd));
reEnd.lastIndex = this.pos;
match = reEnd.exec(this.source);
if(match && match.index === this.pos) {
this.pos = match.index + match[0].length;
}
return [element];
}
};
})();

View File

@@ -0,0 +1,87 @@
/*\
title: $:/core/modules/parsers/newwikitextparser/blockrules/list.js
type: application/javascript
module-type: wikitextblockrule
Wiki text block rule for lists.
\*/
(function(){
/*jslint node: true, browser: true */
/*global $tw: false */
"use strict";
exports.name = "list";
exports.regExpString = "[\\*#;:]+";
var listTypes = {
"*": {listTag: "ul", itemTag: "li"},
"#": {listTag: "ol", itemTag: "li"},
";": {listTag: "dl", itemTag: "dt"},
":": {listTag: "dl", itemTag: "dd"}
};
/*
*/
exports.parse = function(match) {
var listStack = [], // Array containing list elements for the previous row in the list
t, listInfo, listElement, itemElement, previousRootListTag;
// Cycle through the rows in the list
do {
// Walk through the list markers for the current row
for(t=0; t<match[0].length; t++) {
listInfo = listTypes[match[0].charAt(t)];
// Remove any stacked up element if we can't re-use it because the list type doesn't match
if(listStack.length > t && listStack[t].type !== listInfo.listTag) {
listStack.splice(t,listStack.length - t);
}
// Construct the list element or reuse the previous one at this level
if(listStack.length <= t) {
listElement = $tw.Tree.Element(listInfo.listTag,{},[$tw.Tree.Element(listInfo.itemTag,{},[])]);
// Link this list element into the last child item of the parent list item
if(t) {
var prevListItem = listStack[t-1].children[listStack[t-1].children.length-1];
prevListItem.children.push(listElement);
}
// Save this element in the stack
listStack[t] = listElement;
} else if(t === (match[0].length - 1)) {
listStack[t].children.push($tw.Tree.Element(listInfo.itemTag,{},[]));
}
}
if(listStack.length > match[0].length) {
listStack.splice(match[0].length,listStack.length - match[0].length);
}
// Skip the list markers
this.pos = match.index + match[0].length;
// Process the body of the list item into the last list item
var lastListInfo = listTypes[match[0].charAt(match[0].length-1)],
lastListChildren = listStack[listStack.length-1].children,
lastListItem = lastListChildren[lastListChildren.length-1],
classedRun = this.parseClassedRun(/(\r?\n)/mg);
for(t=0; t<classedRun.tree.length; t++) {
lastListItem.children.push(classedRun.tree[t]);
}
if(classedRun["class"]) {
lastListItem.addClass(classedRun["class"]);
}
// Remember the root list tag of this list item
previousRootListTag = listStack[0].type;
// Consume any whitespace following the list item
this.skipWhitespace();
// Lookahead to see if the next line is part of the same list
var nextListItemRegExp = /(^[\*#;:]+)/mg;
nextListItemRegExp.lastIndex = this.pos;
match = nextListItemRegExp.exec(this.source);
listInfo = match ? listTypes[match[0].charAt(0)] : null;
} while(match && match.index === this.pos && listInfo && previousRootListTag === listInfo.listTag);
// Return the root element of the list
return [listStack[0]];
};
})();

View File

@@ -0,0 +1,24 @@
/*\
title: $:/core/modules/parsers/newwikitextparser/blockrules/rule.js
type: application/javascript
module-type: wikitextblockrule
Wiki text block rule for rules
\*/
(function(){
/*jslint node: true, browser: true */
/*global $tw: false */
"use strict";
exports.name = "rule";
exports.regExpString = "-{3,}\r?\n";
exports.parse = function(match) {
this.pos = match.index + match[0].length;
return [$tw.Tree.Element("hr",{},[])];
};
})();

View File

@@ -0,0 +1,182 @@
/*\
title: $:/core/modules/parsers/newwikitextparser/newwikitextparser.js
type: application/javascript
module-type: parser
A new-school wikitext parser
\*/
(function(){
/*jslint node: true, browser: true */
/*global $tw: false */
"use strict";
/*
Define the wikitext renderer constructor
*/
var WikiTextRenderer = function(text,options) {
this.source = text || "";
this.sourceLength = this.source.length;
this.pos = 0;
this.wiki = options.wiki;
this.parser = options.parser;
this.tree = [];
this.dependencies = new $tw.Dependencies();
// Parse the text into blocks
while(this.pos < this.sourceLength) {
this.tree.push.apply(this.tree,this.parseBlock());
}
};
/*
Now make WikiTextRenderer inherit from the default Renderer class
*/
var Renderer = require("$:/core/modules/renderer.js").Renderer;
WikiTextRenderer.prototype = new Renderer();
WikiTextRenderer.constructor = WikiTextRenderer;
/*
Parse a block of text at the current position
*/
WikiTextRenderer.prototype.parseBlock = function() {
this.skipWhitespace();
// Look for a block rule
this.parser.blockRules.regExp.lastIndex = this.pos;
var match = this.parser.blockRules.regExp.exec(this.source);
if(this.parser.blockRules.rules.length && match && match.index === this.pos) {
var rule;
for(var t=0; t<this.parser.blockRules.rules.length; t++) {
if(match[t+1]) {
rule = this.parser.blockRules.rules[t];
}
}
return rule ? rule.parse.call(this,match) : [];
} else {
// Treat it as a paragraph if we didn't find a block rule
return [$tw.Tree.Element("p",{},this.parseRun())];
}
};
WikiTextRenderer.prototype.skipWhitespace = function() {
var whitespaceRegExp = /(\s+)/mg;
whitespaceRegExp.lastIndex = this.pos;
var whitespaceMatch = whitespaceRegExp.exec(this.source);
if(whitespaceMatch && whitespaceMatch.index === this.pos) {
this.pos = whitespaceRegExp.lastIndex;
}
};
/*
Parse a run of text at the current position
terminatorRegExp: a regexp at which to stop the run
Returns an array of tree nodes
*/
WikiTextRenderer.prototype.parseRun = function(terminatorRegExp) {
var tree = [];
// Find the next occurrence of the terminator
terminatorRegExp = terminatorRegExp || /(\r?\n\r?\n)/mg;
terminatorRegExp.lastIndex = this.pos;
var terminatorMatch = terminatorRegExp.exec(this.source);
// Find the next occurrence of a runrule
this.parser.runRules.regExp.lastIndex = this.pos;
var runRuleMatch = this.parser.runRules.regExp.exec(this.source);
// Loop around until we've reached the end of the text
while(this.pos < this.sourceLength && (terminatorMatch || runRuleMatch)) {
// Return if we've found the terminator, and it precedes any run rule match
if(terminatorMatch) {
if(!runRuleMatch || runRuleMatch.index > terminatorMatch.index) {
if(terminatorMatch.index > this.pos) {
tree.push($tw.Tree.Text(this.source.substring(this.pos,terminatorMatch.index)));
}
this.pos = terminatorMatch.index;
return tree;
}
}
// Process any run rule, along with the text preceding it
if(runRuleMatch) {
// Preceding text
if(runRuleMatch.index > this.pos) {
tree.push($tw.Tree.Text(this.source.substring(this.pos,runRuleMatch.index)));
this.pos = runRuleMatch.index;
}
// Process the run rule
var rule;
for(var t=0; t<this.parser.runRules.rules.length; t++) {
if(runRuleMatch[t+1]) {
rule = this.parser.runRules.rules[t];
}
}
if(rule) {
tree.push.apply(tree,rule.parse.call(this,runRuleMatch));
}
// Look for the next run rule
this.parser.runRules.regExp.lastIndex = this.pos;
runRuleMatch = this.parser.runRules.regExp.exec(this.source);
}
}
// Process the remaining text
if(this.pos < this.sourceLength) {
tree.push($tw.tree.Text(this.source.substr(this.pos)));
}
this.pos = this.sourceLength;
return tree;
};
/*
Parse a run of text preceded by an optional class specifier `{{class}}`
*/
WikiTextRenderer.prototype.parseClassedRun = function(terminatorRegExp) {
var classRegExp = /\{\{([^\}]*)\}\}/mg,
className;
classRegExp.lastIndex = this.pos;
var match = classRegExp.exec(this.source);
if(match && match.index === this.pos) {
className = match[1];
this.pos = match.index + match[0].length;
}
var tree = this.parseRun(terminatorRegExp);
return {
"class": className,
tree: tree
};
};
/*
The wikitext parser assembles the rules and uses the wikitext renderer to do the parsing
*/
var WikiTextParser = function(options) {
this.wiki = options.wiki;
// Assemble the rule regexps
this.blockRules = this.getRules("wikitextblockrule");
this.runRules = this.getRules("wikitextrunrule");
};
/*
The wikitext parser constructs a wikitext renderer to do the work
*/
WikiTextParser.prototype.parse = function(type,text) {
return new WikiTextRenderer(text,{
wiki: this.wiki,
parser: this
});
};
/*
Merge all the rule regexp strings into a single regexp
*/
WikiTextParser.prototype.getRules = function(moduleType) {
var rules = ($tw.plugins.moduleTypes[moduleType] || []).slice(0),
regExpStrings = [];
for(var t=0; t<rules.length; t++) {
regExpStrings.push("(" + rules[t].regExpString + ")");
}
return {
regExp: new RegExp(regExpStrings.join("|"),"mg"),
rules: rules
};
};
exports["text/x-tiddlywiki-new"] = WikiTextParser;
})();

View File

@@ -0,0 +1,54 @@
/*\
title: $:/core/modules/parsers/newwikitextparser/runrules/wikilink.js
type: application/javascript
module-type: wikitextrunrule
Wiki text run rule for wiki links
\*/
(function(){
/*jslint node: true, browser: true */
/*global $tw: false */
"use strict";
exports.name = "wikilink";
var textPrimitives = {
upperLetter: "[A-Z\u00c0-\u00de\u0150\u0170]",
lowerLetter: "[a-z0-9_\\-\u00df-\u00ff\u0151\u0171]",
anyLetter: "[A-Za-z0-9_\\-\u00c0-\u00de\u00df-\u00ff\u0150\u0170\u0151\u0171]",
anyLetterStrict: "[A-Za-z0-9\u00c0-\u00de\u00df-\u00ff\u0150\u0170\u0151\u0171]"
};
textPrimitives.unWikiLink = "~";
textPrimitives.wikiLink = "(?:(?:" + textPrimitives.upperLetter + "+" +
textPrimitives.lowerLetter + "+" +
textPrimitives.upperLetter +
textPrimitives.anyLetter + "*)|(?:" +
textPrimitives.upperLetter + "{2,}" +
textPrimitives.lowerLetter + "+))";
exports.regExpString = textPrimitives.unWikiLink+"?"+textPrimitives.wikiLink;
exports.parse = function(match) {
this.pos = match.index + match[0].length;
// If the link starts with the unwikilink character then just output it as plain text
if(match[0].substr(0,1) === textPrimitives.unWikiLink) {
return [$tw.Tree.Text(match[0].substr(1))];
}
// If the link has been preceded with a letter then don't treat it as a link
if(match.index > 0) {
var preRegExp = new RegExp(textPrimitives.anyLetterStrict,"mg");
preRegExp.lastIndex = match.index-1;
var preMatch = preRegExp.exec(this.source);
if(preMatch && preMatch.index === match.index-1) {
return [$tw.Tree.Text(match[0])];
}
}
var macroNode = $tw.Tree.Macro("link",{to: match[0]},[$tw.Tree.Text(match[0])],this.wiki);
this.dependencies.mergeDependencies(macroNode.dependencies);
return [macroNode];
};
})();