First pass at a new wiki text parser

This one respects HTML paragraphs properly
2025-11-04 09:33:00 +00:00 · 2012-05-26 18:30:32 +01:00
parent 85f1b33ef2
commit 92353d37b2
12 changed files with 570 additions and 5 deletions
--- a/core/modules/parsers/newwikitextparser/blockrules/class.js
+++ b/core/modules/parsers/newwikitextparser/blockrules/class.js
@@ -0,0 +1,55 @@
+/*\
+title: $:/core/modules/parsers/newwikitextparser/blockrules/class.js
+type: application/javascript
+module-type: wikitextblockrule
+
+Wiki text block rule for assigning classes to paragraphs and other blocks
+
+\*/
+(function(){
+
+/*jslint node: true, browser: true */
+/*global $tw: false */
+"use strict";
+
+exports.name = "class";
+
+exports.regExpString = "\\{\\{(?:[^\\{\\r\\n]*)\\{$";
+
+exports.parse = function(match) {
+	var tree = [],
+		reStart = /\{\{([^\{\r\n]*){(?:\r?\n)?/mg,
+		reEnd = /(\}\}\}$(?:\r?\n)?)/mg,
+		endMatch;
+	reStart.lastIndex = this.pos;
+	match = reStart.exec(this.source);
+	if(match) {
+		this.pos = match.index + match[0].length;
+		// Skip any whitespace
+		this.skipWhitespace();
+		//  Check if we've got the end marker
+		reEnd.lastIndex = this.pos;
+		endMatch = reEnd.exec(this.source);
+		// Parse the text into blocks
+		while(this.pos < this.sourceLength && !(endMatch && endMatch.index === this.pos)) {
+			var blocks = this.parseBlock();
+			for(var t=0; t<blocks.length; t++) {
+				blocks[t].addClass(match[1]);
+				tree.push(blocks[t]);
+			}
+			// Skip any whitespace
+			this.skipWhitespace();
+			//  Check if we've got the end marker
+			reEnd.lastIndex = this.pos;
+			endMatch = reEnd.exec(this.source);
+		}
+		reEnd.lastIndex = this.pos;
+		endMatch = reEnd.exec(this.source);
+		if(endMatch) {
+			this.pos = endMatch.index + endMatch[0].length;
+		}
+		return tree;
+	}
+};
+
+})();
--- a/core/modules/parsers/newwikitextparser/blockrules/heading.js
+++ b/core/modules/parsers/newwikitextparser/blockrules/heading.js
@@ -0,0 +1,25 @@
+/*\
+title: $:/core/modules/parsers/newwikitextparser/blockrules/heading.js
+type: application/javascript
+module-type: wikitextblockrule
+
+Wiki text block rule for headings
+
+\*/
+(function(){
+
+/*jslint node: true, browser: true */
+/*global $tw: false */
+"use strict";
+
+exports.name = "heading";
+
+exports.regExpString = "!{1,6}";
+
+exports.parse = function(match) {
+	this.pos = match.index + match[0].length;
+	var classedRun = this.parseClassedRun(/(\r?\n)/mg);
+	return [$tw.Tree.Element("h1",{"class": classedRun["class"]},classedRun.tree)];
+};
+
+})();
--- a/core/modules/parsers/newwikitextparser/blockrules/html.js
+++ b/core/modules/parsers/newwikitextparser/blockrules/html.js
@@ -0,0 +1,54 @@
+/*\
+title: $:/core/modules/parsers/newwikitextparser/blockrules/html.js
+type: application/javascript
+module-type: wikitextblockrule
+
+Wiki text block rule for block level HTML elements
+
+\*/
+(function(){
+
+/*jslint node: true, browser: true */
+/*global $tw: false */
+"use strict";
+
+exports.name = "html";
+
+exports.regExpString = "<[A-Za-z]+\\s*[^>]*>";
+
+exports.parse = function(match) {
+	var reStart = /<([A-Za-z]+)(\s*[^>]*)>/mg,
+		reAttr = /\s*([A-Za-z\-_]+)(?:\s*=\s*(?:("[^"]*")|('[^']*')|([^"'\s]+)))?/mg;
+	reStart.lastIndex = this.pos;
+	var startMatch = reStart.exec(this.source);
+	if(startMatch && startMatch.index === this.pos) {
+		var attrMatch = reAttr.exec(startMatch[2]),
+			attributes = {};
+		while(attrMatch) {
+			var name = attrMatch[1],
+				value;
+			if(attrMatch[2]) { // Double quoted
+				value = attrMatch[2].substring(1,attrMatch[2].length-1);
+			} else if(attrMatch[3]) { // Single quoted
+				value = attrMatch[3].substring(1,attrMatch[3].length-1);
+			} else if(attrMatch[4]) { // Unquoted
+				value = attrMatch[4];
+			} else { // Valueless
+				value = true; // TODO: We should have a way of indicating we want an attribute without a value
+			}
+			attributes[name] = value;
+			attrMatch = reAttr.exec(startMatch[2]);
+		}
+		this.pos = startMatch.index + startMatch[0].length;
+		var reEnd = new RegExp("(</" + startMatch[1] + ">)","mg"),
+			element = $tw.Tree.Element(startMatch[1],attributes,this.parseRun(reEnd));
+		reEnd.lastIndex = this.pos;
+		match = reEnd.exec(this.source);
+		if(match && match.index === this.pos) {
+			this.pos = match.index + match[0].length;
+		}
+		return [element];
+	}
+};
+
+})();
--- a/core/modules/parsers/newwikitextparser/blockrules/list.js
+++ b/core/modules/parsers/newwikitextparser/blockrules/list.js
@@ -0,0 +1,87 @@
+/*\
+title: $:/core/modules/parsers/newwikitextparser/blockrules/list.js
+type: application/javascript
+module-type: wikitextblockrule
+
+Wiki text block rule for lists.
+
+
+
+\*/
+(function(){
+
+/*jslint node: true, browser: true */
+/*global $tw: false */
+"use strict";
+
+exports.name = "list";
+
+exports.regExpString = "[\\*#;:]+";
+
+var listTypes = {
+	"*": {listTag: "ul", itemTag: "li"},
+	"#": {listTag: "ol", itemTag: "li"},
+	";": {listTag: "dl", itemTag: "dt"},
+	":": {listTag: "dl", itemTag: "dd"}
+};
+
+/*
+
+*/
+exports.parse = function(match) {
+	var listStack = [], // Array containing list elements for the previous row in the list
+		t, listInfo, listElement, itemElement, previousRootListTag;
+	// Cycle through the rows in the list
+	do {
+		// Walk through the list markers for the current row
+		for(t=0; t<match[0].length; t++) {
+			listInfo = listTypes[match[0].charAt(t)];
+			// Remove any stacked up element if we can't re-use it because the list type doesn't match
+			if(listStack.length > t && listStack[t].type !== listInfo.listTag) {
+				listStack.splice(t,listStack.length - t);
+			}
+			// Construct the list element or reuse the previous one at this level
+			if(listStack.length <= t) {
+				listElement = $tw.Tree.Element(listInfo.listTag,{},[$tw.Tree.Element(listInfo.itemTag,{},[])]);
+				// Link this list element into the last child item of the parent list item
+				if(t) {
+					var prevListItem = listStack[t-1].children[listStack[t-1].children.length-1];
+					prevListItem.children.push(listElement);
+				}
+				// Save this element in the stack
+				listStack[t] = listElement;
+			} else if(t === (match[0].length - 1)) {
+				listStack[t].children.push($tw.Tree.Element(listInfo.itemTag,{},[]));
+			}
+		}
+		if(listStack.length > match[0].length) {
+			listStack.splice(match[0].length,listStack.length - match[0].length);
+		}
+		// Skip the list markers
+		this.pos = match.index + match[0].length;
+		// Process the body of the list item into the last list item
+		var lastListInfo = listTypes[match[0].charAt(match[0].length-1)],
+			lastListChildren = listStack[listStack.length-1].children,
+			lastListItem = lastListChildren[lastListChildren.length-1],
+			classedRun = this.parseClassedRun(/(\r?\n)/mg);
+		for(t=0; t<classedRun.tree.length; t++) {
+			lastListItem.children.push(classedRun.tree[t]);
+		}
+		if(classedRun["class"]) {
+			lastListItem.addClass(classedRun["class"]);
+		}
+		// Remember the root list tag of this list item
+		previousRootListTag = listStack[0].type;
+		// Consume any whitespace following the list item
+		this.skipWhitespace();
+		// Lookahead to see if the next line is part of the same list
+		var nextListItemRegExp = /(^[\*#;:]+)/mg;
+		nextListItemRegExp.lastIndex = this.pos;
+		match = nextListItemRegExp.exec(this.source);
+		listInfo = match ? listTypes[match[0].charAt(0)] : null;
+	} while(match && match.index === this.pos && listInfo && previousRootListTag === listInfo.listTag);
+	// Return the root element of the list
+	return [listStack[0]];
+};
+
+})();
--- a/core/modules/parsers/newwikitextparser/blockrules/rule.js
+++ b/core/modules/parsers/newwikitextparser/blockrules/rule.js
@@ -0,0 +1,24 @@
+/*\
+title: $:/core/modules/parsers/newwikitextparser/blockrules/rule.js
+type: application/javascript
+module-type: wikitextblockrule
+
+Wiki text block rule for rules
+
+\*/
+(function(){
+
+/*jslint node: true, browser: true */
+/*global $tw: false */
+"use strict";
+
+exports.name = "rule";
+
+exports.regExpString = "-{3,}\r?\n";
+
+exports.parse = function(match) {
+	this.pos = match.index + match[0].length;
+	return [$tw.Tree.Element("hr",{},[])];
+};
+
+})();
--- a/core/modules/parsers/newwikitextparser/newwikitextparser.js
+++ b/core/modules/parsers/newwikitextparser/newwikitextparser.js
@@ -0,0 +1,182 @@
+/*\
+title: $:/core/modules/parsers/newwikitextparser/newwikitextparser.js
+type: application/javascript
+module-type: parser
+
+A new-school wikitext parser
+
+\*/
+(function(){
+
+/*jslint node: true, browser: true */
+/*global $tw: false */
+"use strict";
+
+/*
+Define the wikitext renderer constructor
+*/
+var WikiTextRenderer = function(text,options) {
+	this.source = text || "";
+	this.sourceLength = this.source.length;
+	this.pos = 0;
+	this.wiki = options.wiki;
+	this.parser = options.parser;
+	this.tree = [];
+	this.dependencies = new $tw.Dependencies();
+	// Parse the text into blocks
+	while(this.pos < this.sourceLength) {
+		this.tree.push.apply(this.tree,this.parseBlock());
+	}
+};
+
+/*
+Now make WikiTextRenderer inherit from the default Renderer class
+*/
+var Renderer = require("$:/core/modules/renderer.js").Renderer;
+WikiTextRenderer.prototype = new Renderer();
+WikiTextRenderer.constructor = WikiTextRenderer;
+
+/*
+Parse a block of text at the current position
+*/
+WikiTextRenderer.prototype.parseBlock = function() {
+	this.skipWhitespace();
+	// Look for a block rule
+	this.parser.blockRules.regExp.lastIndex = this.pos;
+	var match = this.parser.blockRules.regExp.exec(this.source);
+	if(this.parser.blockRules.rules.length && match && match.index === this.pos) {
+		var rule;
+		for(var t=0; t<this.parser.blockRules.rules.length; t++) {
+			if(match[t+1]) {
+				rule = this.parser.blockRules.rules[t];
+			}
+		}
+		return rule ? rule.parse.call(this,match) : [];
+	} else {
+		// Treat it as a paragraph if we didn't find a block rule
+		return [$tw.Tree.Element("p",{},this.parseRun())];
+	}
+};
+
+WikiTextRenderer.prototype.skipWhitespace = function() {
+	var whitespaceRegExp = /(\s+)/mg;
+	whitespaceRegExp.lastIndex = this.pos;
+	var whitespaceMatch = whitespaceRegExp.exec(this.source);
+	if(whitespaceMatch && whitespaceMatch.index === this.pos) {
+		this.pos = whitespaceRegExp.lastIndex;
+	}
+};
+
+/*
+Parse a run of text at the current position
+	terminatorRegExp: a regexp at which to stop the run
+Returns an array of tree nodes
+*/
+WikiTextRenderer.prototype.parseRun = function(terminatorRegExp) {
+	var tree = [];
+	// Find the next occurrence of the terminator
+	terminatorRegExp = terminatorRegExp || /(\r?\n\r?\n)/mg;
+	terminatorRegExp.lastIndex = this.pos;
+	var terminatorMatch = terminatorRegExp.exec(this.source);
+	// Find the next occurrence of a runrule
+	this.parser.runRules.regExp.lastIndex = this.pos;
+	var runRuleMatch = this.parser.runRules.regExp.exec(this.source);
+	// Loop around until we've reached the end of the text
+	while(this.pos < this.sourceLength && (terminatorMatch || runRuleMatch)) {
+		// Return if we've found the terminator, and it precedes any run rule match
+		if(terminatorMatch) {
+			if(!runRuleMatch || runRuleMatch.index > terminatorMatch.index) {
+				if(terminatorMatch.index > this.pos) {
+					tree.push($tw.Tree.Text(this.source.substring(this.pos,terminatorMatch.index)));
+				}
+				this.pos = terminatorMatch.index;
+				return tree;
+			}
+		}
+		// Process any run rule, along with the text preceding it
+		if(runRuleMatch) {
+			// Preceding text
+			if(runRuleMatch.index > this.pos) {
+				tree.push($tw.Tree.Text(this.source.substring(this.pos,runRuleMatch.index)));
+				this.pos = runRuleMatch.index;
+			}
+			// Process the run rule
+			var rule;
+			for(var t=0; t<this.parser.runRules.rules.length; t++) {
+				if(runRuleMatch[t+1]) {
+					rule = this.parser.runRules.rules[t];
+				}
+			}
+			if(rule) {
+				tree.push.apply(tree,rule.parse.call(this,runRuleMatch));
+			}
+			// Look for the next run rule
+			this.parser.runRules.regExp.lastIndex = this.pos;
+			runRuleMatch = this.parser.runRules.regExp.exec(this.source);
+		}
+	}
+	// Process the remaining text
+	if(this.pos < this.sourceLength) {
+		tree.push($tw.tree.Text(this.source.substr(this.pos)));
+	}
+	this.pos = this.sourceLength;
+	return tree;
+};
+
+/*
+Parse a run of text preceded by an optional class specifier `{{class}}`
+*/
+WikiTextRenderer.prototype.parseClassedRun = function(terminatorRegExp) {
+	var classRegExp = /\{\{([^\}]*)\}\}/mg,
+		className;
+	classRegExp.lastIndex = this.pos;
+	var match = classRegExp.exec(this.source);
+	if(match && match.index === this.pos) {
+		className = match[1];
+		this.pos = match.index + match[0].length;
+	}
+	var tree = this.parseRun(terminatorRegExp);
+	return {
+		"class": className,
+		tree: tree
+	};
+};
+
+/*
+The wikitext parser assembles the rules and uses the wikitext renderer to do the parsing
+*/
+var WikiTextParser = function(options) {
+    this.wiki = options.wiki;
+    // Assemble the rule regexps
+    this.blockRules = this.getRules("wikitextblockrule");
+    this.runRules = this.getRules("wikitextrunrule");
+};
+
+/*
+The wikitext parser constructs a wikitext renderer to do the work
+*/
+WikiTextParser.prototype.parse = function(type,text) {
+	return new WikiTextRenderer(text,{
+		wiki: this.wiki,
+		parser: this
+	});
+};
+
+/*
+Merge all the rule regexp strings into a single regexp
+*/
+WikiTextParser.prototype.getRules = function(moduleType) {
+	var rules = ($tw.plugins.moduleTypes[moduleType] || []).slice(0),
+		regExpStrings = [];
+	for(var t=0; t<rules.length; t++) {
+		regExpStrings.push("(" + rules[t].regExpString + ")");
+	}
+	return {
+		regExp: new RegExp(regExpStrings.join("|"),"mg"),
+		rules: rules
+	};
+};
+
+exports["text/x-tiddlywiki-new"] = WikiTextParser;
+
+})();
--- a/core/modules/parsers/newwikitextparser/runrules/wikilink.js
+++ b/core/modules/parsers/newwikitextparser/runrules/wikilink.js
@@ -0,0 +1,54 @@
+/*\
+title: $:/core/modules/parsers/newwikitextparser/runrules/wikilink.js
+type: application/javascript
+module-type: wikitextrunrule
+
+Wiki text run rule for wiki links
+
+\*/
+(function(){
+
+/*jslint node: true, browser: true */
+/*global $tw: false */
+"use strict";
+
+exports.name = "wikilink";
+
+var textPrimitives = {
+	upperLetter: "[A-Z\u00c0-\u00de\u0150\u0170]",
+	lowerLetter: "[a-z0-9_\\-\u00df-\u00ff\u0151\u0171]",
+	anyLetter:   "[A-Za-z0-9_\\-\u00c0-\u00de\u00df-\u00ff\u0150\u0170\u0151\u0171]",
+	anyLetterStrict: "[A-Za-z0-9\u00c0-\u00de\u00df-\u00ff\u0150\u0170\u0151\u0171]"
+};
+
+textPrimitives.unWikiLink = "~";
+textPrimitives.wikiLink = "(?:(?:" + textPrimitives.upperLetter + "+" +
+	textPrimitives.lowerLetter + "+" +
+	textPrimitives.upperLetter +
+	textPrimitives.anyLetter + "*)|(?:" +
+	textPrimitives.upperLetter + "{2,}" +
+	textPrimitives.lowerLetter + "+))";
+
+exports.regExpString = textPrimitives.unWikiLink+"?"+textPrimitives.wikiLink;
+
+exports.parse = function(match) {
+	this.pos = match.index + match[0].length;
+	// If the link starts with the unwikilink character then just output it as plain text
+	if(match[0].substr(0,1) === textPrimitives.unWikiLink) {
+		return [$tw.Tree.Text(match[0].substr(1))];
+	}
+	// If the link has been preceded with a letter then don't treat it as a link
+	if(match.index > 0) {
+		var preRegExp = new RegExp(textPrimitives.anyLetterStrict,"mg");
+		preRegExp.lastIndex = match.index-1;
+		var preMatch = preRegExp.exec(this.source);
+		if(preMatch && preMatch.index === match.index-1) {
+			return [$tw.Tree.Text(match[0])];
+		}
+	}
+	var macroNode = $tw.Tree.Macro("link",{to: match[0]},[$tw.Tree.Text(match[0])],this.wiki);
+	this.dependencies.mergeDependencies(macroNode.dependencies);
+	return [macroNode];
+};
+
+})();