1
0
mirror of https://github.com/Jermolene/TiddlyWiki5 synced 2024-12-24 00:50:28 +00:00

First pass at a new wiki text parser

This one respects HTML paragraphs properly
This commit is contained in:
Jeremy Ruston 2012-05-26 18:30:32 +01:00
parent 85f1b33ef2
commit 92353d37b2
12 changed files with 570 additions and 5 deletions

View File

@ -0,0 +1,55 @@
/*\
title: $:/core/modules/parsers/newwikitextparser/blockrules/class.js
type: application/javascript
module-type: wikitextblockrule
Wiki text block rule for assigning classes to paragraphs and other blocks
\*/
(function(){
/*jslint node: true, browser: true */
/*global $tw: false */
"use strict";
exports.name = "class";
exports.regExpString = "\\{\\{(?:[^\\{\\r\\n]*)\\{$";
exports.parse = function(match) {
var tree = [],
reStart = /\{\{([^\{\r\n]*){(?:\r?\n)?/mg,
reEnd = /(\}\}\}$(?:\r?\n)?)/mg,
endMatch;
reStart.lastIndex = this.pos;
match = reStart.exec(this.source);
if(match) {
this.pos = match.index + match[0].length;
// Skip any whitespace
this.skipWhitespace();
// Check if we've got the end marker
reEnd.lastIndex = this.pos;
endMatch = reEnd.exec(this.source);
// Parse the text into blocks
while(this.pos < this.sourceLength && !(endMatch && endMatch.index === this.pos)) {
var blocks = this.parseBlock();
for(var t=0; t<blocks.length; t++) {
blocks[t].addClass(match[1]);
tree.push(blocks[t]);
}
// Skip any whitespace
this.skipWhitespace();
// Check if we've got the end marker
reEnd.lastIndex = this.pos;
endMatch = reEnd.exec(this.source);
}
reEnd.lastIndex = this.pos;
endMatch = reEnd.exec(this.source);
if(endMatch) {
this.pos = endMatch.index + endMatch[0].length;
}
return tree;
}
};
})();

View File

@ -0,0 +1,25 @@
/*\
title: $:/core/modules/parsers/newwikitextparser/blockrules/heading.js
type: application/javascript
module-type: wikitextblockrule
Wiki text block rule for headings
\*/
(function(){
/*jslint node: true, browser: true */
/*global $tw: false */
"use strict";
exports.name = "heading";
exports.regExpString = "!{1,6}";
exports.parse = function(match) {
this.pos = match.index + match[0].length;
var classedRun = this.parseClassedRun(/(\r?\n)/mg);
return [$tw.Tree.Element("h1",{"class": classedRun["class"]},classedRun.tree)];
};
})();

View File

@ -0,0 +1,54 @@
/*\
title: $:/core/modules/parsers/newwikitextparser/blockrules/html.js
type: application/javascript
module-type: wikitextblockrule
Wiki text block rule for block level HTML elements
\*/
(function(){
/*jslint node: true, browser: true */
/*global $tw: false */
"use strict";
exports.name = "html";
exports.regExpString = "<[A-Za-z]+\\s*[^>]*>";
exports.parse = function(match) {
var reStart = /<([A-Za-z]+)(\s*[^>]*)>/mg,
reAttr = /\s*([A-Za-z\-_]+)(?:\s*=\s*(?:("[^"]*")|('[^']*')|([^"'\s]+)))?/mg;
reStart.lastIndex = this.pos;
var startMatch = reStart.exec(this.source);
if(startMatch && startMatch.index === this.pos) {
var attrMatch = reAttr.exec(startMatch[2]),
attributes = {};
while(attrMatch) {
var name = attrMatch[1],
value;
if(attrMatch[2]) { // Double quoted
value = attrMatch[2].substring(1,attrMatch[2].length-1);
} else if(attrMatch[3]) { // Single quoted
value = attrMatch[3].substring(1,attrMatch[3].length-1);
} else if(attrMatch[4]) { // Unquoted
value = attrMatch[4];
} else { // Valueless
value = true; // TODO: We should have a way of indicating we want an attribute without a value
}
attributes[name] = value;
attrMatch = reAttr.exec(startMatch[2]);
}
this.pos = startMatch.index + startMatch[0].length;
var reEnd = new RegExp("(</" + startMatch[1] + ">)","mg"),
element = $tw.Tree.Element(startMatch[1],attributes,this.parseRun(reEnd));
reEnd.lastIndex = this.pos;
match = reEnd.exec(this.source);
if(match && match.index === this.pos) {
this.pos = match.index + match[0].length;
}
return [element];
}
};
})();

View File

@ -0,0 +1,87 @@
/*\
title: $:/core/modules/parsers/newwikitextparser/blockrules/list.js
type: application/javascript
module-type: wikitextblockrule
Wiki text block rule for lists.
\*/
(function(){
/*jslint node: true, browser: true */
/*global $tw: false */
"use strict";
exports.name = "list";
exports.regExpString = "[\\*#;:]+";
var listTypes = {
"*": {listTag: "ul", itemTag: "li"},
"#": {listTag: "ol", itemTag: "li"},
";": {listTag: "dl", itemTag: "dt"},
":": {listTag: "dl", itemTag: "dd"}
};
/*
*/
exports.parse = function(match) {
var listStack = [], // Array containing list elements for the previous row in the list
t, listInfo, listElement, itemElement, previousRootListTag;
// Cycle through the rows in the list
do {
// Walk through the list markers for the current row
for(t=0; t<match[0].length; t++) {
listInfo = listTypes[match[0].charAt(t)];
// Remove any stacked up element if we can't re-use it because the list type doesn't match
if(listStack.length > t && listStack[t].type !== listInfo.listTag) {
listStack.splice(t,listStack.length - t);
}
// Construct the list element or reuse the previous one at this level
if(listStack.length <= t) {
listElement = $tw.Tree.Element(listInfo.listTag,{},[$tw.Tree.Element(listInfo.itemTag,{},[])]);
// Link this list element into the last child item of the parent list item
if(t) {
var prevListItem = listStack[t-1].children[listStack[t-1].children.length-1];
prevListItem.children.push(listElement);
}
// Save this element in the stack
listStack[t] = listElement;
} else if(t === (match[0].length - 1)) {
listStack[t].children.push($tw.Tree.Element(listInfo.itemTag,{},[]));
}
}
if(listStack.length > match[0].length) {
listStack.splice(match[0].length,listStack.length - match[0].length);
}
// Skip the list markers
this.pos = match.index + match[0].length;
// Process the body of the list item into the last list item
var lastListInfo = listTypes[match[0].charAt(match[0].length-1)],
lastListChildren = listStack[listStack.length-1].children,
lastListItem = lastListChildren[lastListChildren.length-1],
classedRun = this.parseClassedRun(/(\r?\n)/mg);
for(t=0; t<classedRun.tree.length; t++) {
lastListItem.children.push(classedRun.tree[t]);
}
if(classedRun["class"]) {
lastListItem.addClass(classedRun["class"]);
}
// Remember the root list tag of this list item
previousRootListTag = listStack[0].type;
// Consume any whitespace following the list item
this.skipWhitespace();
// Lookahead to see if the next line is part of the same list
var nextListItemRegExp = /(^[\*#;:]+)/mg;
nextListItemRegExp.lastIndex = this.pos;
match = nextListItemRegExp.exec(this.source);
listInfo = match ? listTypes[match[0].charAt(0)] : null;
} while(match && match.index === this.pos && listInfo && previousRootListTag === listInfo.listTag);
// Return the root element of the list
return [listStack[0]];
};
})();

View File

@ -0,0 +1,24 @@
/*\
title: $:/core/modules/parsers/newwikitextparser/blockrules/rule.js
type: application/javascript
module-type: wikitextblockrule
Wiki text block rule for rules
\*/
(function(){
/*jslint node: true, browser: true */
/*global $tw: false */
"use strict";
exports.name = "rule";
exports.regExpString = "-{3,}\r?\n";
exports.parse = function(match) {
this.pos = match.index + match[0].length;
return [$tw.Tree.Element("hr",{},[])];
};
})();

View File

@ -0,0 +1,182 @@
/*\
title: $:/core/modules/parsers/newwikitextparser/newwikitextparser.js
type: application/javascript
module-type: parser
A new-school wikitext parser
\*/
(function(){
/*jslint node: true, browser: true */
/*global $tw: false */
"use strict";
/*
Define the wikitext renderer constructor
*/
var WikiTextRenderer = function(text,options) {
this.source = text || "";
this.sourceLength = this.source.length;
this.pos = 0;
this.wiki = options.wiki;
this.parser = options.parser;
this.tree = [];
this.dependencies = new $tw.Dependencies();
// Parse the text into blocks
while(this.pos < this.sourceLength) {
this.tree.push.apply(this.tree,this.parseBlock());
}
};
/*
Now make WikiTextRenderer inherit from the default Renderer class
*/
var Renderer = require("$:/core/modules/renderer.js").Renderer;
WikiTextRenderer.prototype = new Renderer();
WikiTextRenderer.constructor = WikiTextRenderer;
/*
Parse a block of text at the current position
*/
WikiTextRenderer.prototype.parseBlock = function() {
this.skipWhitespace();
// Look for a block rule
this.parser.blockRules.regExp.lastIndex = this.pos;
var match = this.parser.blockRules.regExp.exec(this.source);
if(this.parser.blockRules.rules.length && match && match.index === this.pos) {
var rule;
for(var t=0; t<this.parser.blockRules.rules.length; t++) {
if(match[t+1]) {
rule = this.parser.blockRules.rules[t];
}
}
return rule ? rule.parse.call(this,match) : [];
} else {
// Treat it as a paragraph if we didn't find a block rule
return [$tw.Tree.Element("p",{},this.parseRun())];
}
};
WikiTextRenderer.prototype.skipWhitespace = function() {
var whitespaceRegExp = /(\s+)/mg;
whitespaceRegExp.lastIndex = this.pos;
var whitespaceMatch = whitespaceRegExp.exec(this.source);
if(whitespaceMatch && whitespaceMatch.index === this.pos) {
this.pos = whitespaceRegExp.lastIndex;
}
};
/*
Parse a run of text at the current position
terminatorRegExp: a regexp at which to stop the run
Returns an array of tree nodes
*/
WikiTextRenderer.prototype.parseRun = function(terminatorRegExp) {
var tree = [];
// Find the next occurrence of the terminator
terminatorRegExp = terminatorRegExp || /(\r?\n\r?\n)/mg;
terminatorRegExp.lastIndex = this.pos;
var terminatorMatch = terminatorRegExp.exec(this.source);
// Find the next occurrence of a runrule
this.parser.runRules.regExp.lastIndex = this.pos;
var runRuleMatch = this.parser.runRules.regExp.exec(this.source);
// Loop around until we've reached the end of the text
while(this.pos < this.sourceLength && (terminatorMatch || runRuleMatch)) {
// Return if we've found the terminator, and it precedes any run rule match
if(terminatorMatch) {
if(!runRuleMatch || runRuleMatch.index > terminatorMatch.index) {
if(terminatorMatch.index > this.pos) {
tree.push($tw.Tree.Text(this.source.substring(this.pos,terminatorMatch.index)));
}
this.pos = terminatorMatch.index;
return tree;
}
}
// Process any run rule, along with the text preceding it
if(runRuleMatch) {
// Preceding text
if(runRuleMatch.index > this.pos) {
tree.push($tw.Tree.Text(this.source.substring(this.pos,runRuleMatch.index)));
this.pos = runRuleMatch.index;
}
// Process the run rule
var rule;
for(var t=0; t<this.parser.runRules.rules.length; t++) {
if(runRuleMatch[t+1]) {
rule = this.parser.runRules.rules[t];
}
}
if(rule) {
tree.push.apply(tree,rule.parse.call(this,runRuleMatch));
}
// Look for the next run rule
this.parser.runRules.regExp.lastIndex = this.pos;
runRuleMatch = this.parser.runRules.regExp.exec(this.source);
}
}
// Process the remaining text
if(this.pos < this.sourceLength) {
tree.push($tw.tree.Text(this.source.substr(this.pos)));
}
this.pos = this.sourceLength;
return tree;
};
/*
Parse a run of text preceded by an optional class specifier `{{class}}`
*/
WikiTextRenderer.prototype.parseClassedRun = function(terminatorRegExp) {
var classRegExp = /\{\{([^\}]*)\}\}/mg,
className;
classRegExp.lastIndex = this.pos;
var match = classRegExp.exec(this.source);
if(match && match.index === this.pos) {
className = match[1];
this.pos = match.index + match[0].length;
}
var tree = this.parseRun(terminatorRegExp);
return {
"class": className,
tree: tree
};
};
/*
The wikitext parser assembles the rules and uses the wikitext renderer to do the parsing
*/
var WikiTextParser = function(options) {
this.wiki = options.wiki;
// Assemble the rule regexps
this.blockRules = this.getRules("wikitextblockrule");
this.runRules = this.getRules("wikitextrunrule");
};
/*
The wikitext parser constructs a wikitext renderer to do the work
*/
WikiTextParser.prototype.parse = function(type,text) {
return new WikiTextRenderer(text,{
wiki: this.wiki,
parser: this
});
};
/*
Merge all the rule regexp strings into a single regexp
*/
WikiTextParser.prototype.getRules = function(moduleType) {
var rules = ($tw.plugins.moduleTypes[moduleType] || []).slice(0),
regExpStrings = [];
for(var t=0; t<rules.length; t++) {
regExpStrings.push("(" + rules[t].regExpString + ")");
}
return {
regExp: new RegExp(regExpStrings.join("|"),"mg"),
rules: rules
};
};
exports["text/x-tiddlywiki-new"] = WikiTextParser;
})();

View File

@ -0,0 +1,54 @@
/*\
title: $:/core/modules/parsers/newwikitextparser/runrules/wikilink.js
type: application/javascript
module-type: wikitextrunrule
Wiki text run rule for wiki links
\*/
(function(){
/*jslint node: true, browser: true */
/*global $tw: false */
"use strict";
exports.name = "wikilink";
var textPrimitives = {
upperLetter: "[A-Z\u00c0-\u00de\u0150\u0170]",
lowerLetter: "[a-z0-9_\\-\u00df-\u00ff\u0151\u0171]",
anyLetter: "[A-Za-z0-9_\\-\u00c0-\u00de\u00df-\u00ff\u0150\u0170\u0151\u0171]",
anyLetterStrict: "[A-Za-z0-9\u00c0-\u00de\u00df-\u00ff\u0150\u0170\u0151\u0171]"
};
textPrimitives.unWikiLink = "~";
textPrimitives.wikiLink = "(?:(?:" + textPrimitives.upperLetter + "+" +
textPrimitives.lowerLetter + "+" +
textPrimitives.upperLetter +
textPrimitives.anyLetter + "*)|(?:" +
textPrimitives.upperLetter + "{2,}" +
textPrimitives.lowerLetter + "+))";
exports.regExpString = textPrimitives.unWikiLink+"?"+textPrimitives.wikiLink;
exports.parse = function(match) {
this.pos = match.index + match[0].length;
// If the link starts with the unwikilink character then just output it as plain text
if(match[0].substr(0,1) === textPrimitives.unWikiLink) {
return [$tw.Tree.Text(match[0].substr(1))];
}
// If the link has been preceded with a letter then don't treat it as a link
if(match.index > 0) {
var preRegExp = new RegExp(textPrimitives.anyLetterStrict,"mg");
preRegExp.lastIndex = match.index-1;
var preMatch = preRegExp.exec(this.source);
if(preMatch && preMatch.index === match.index-1) {
return [$tw.Tree.Text(match[0])];
}
}
var macroNode = $tw.Tree.Macro("link",{to: match[0]},[$tw.Tree.Text(match[0])],this.wiki);
this.dependencies.mergeDependencies(macroNode.dependencies);
return [macroNode];
};
})();

View File

@ -144,6 +144,14 @@ Element.prototype.broadcastEvent = function(event) {
return true;
};
Element.prototype.addClass = function(className) {
if(typeof this.attributes["class"] === "string") {
this.attributes["class"] = this.attributes["class"].split(" ");
}
this.attributes["class"] = this.attributes["class"] || [];
this.attributes["class"].push(className);
};
exports.Element = Element;
})();

View File

@ -1,6 +1,4 @@
<h1>Welcome to <a class='tw-tiddlylink tw-tiddlylink-internal tw-tiddlylink-missing' href='TiddlyWiki5'>TiddlyWiki5</a></h1><div class='tw-tiddler-frame' data-tiddler-target='HelloThere' data-tiddler-template='HelloThere'>Welcome to TiddlyWiki5, a reboot of TiddlyWiki, the venerable, reusable non-linear personal web notebook first released in 2004. It is a complete interactive wiki that can run from a single HTML file in the browser or as a powerful [[node.js application|What is node.js?]].
TiddlyWiki5 is currently at version &lt;&lt;version&gt;&gt; and is under active development, which is to say that it is useful but incomplete. You can try out the online prototype at http://tiddlywiki.com/tiddlywiki5, [[try out the command line incarnation|TryingOutTiddlyWiki]], get involved in the [[development on GitHub|https://github.com/Jermolene/TiddlyWiki5]] or join the discussions on [[the TiddlyWikiDev Google Group|http://groups.google.com/group/TiddlyWikiDev]].</div><br><h1>Usage</h1><div class='tw-tiddler-frame' data-tiddler-target='CommandLineInterface' data-tiddler-template='CommandLineInterface'><a class='tw-tiddlylink tw-tiddlylink-internal tw-tiddlylink-missing' href='TiddlyWiki5'>TiddlyWiki5</a> can be used on the command line to perform an extensive set of operations based on tiddlers, <a class='tw-tiddlylink tw-tiddlylink-internal tw-tiddlylink-resolves' href='TiddlerFiles'>TiddlerFiles</a> and <a class='tw-tiddlylink tw-tiddlylink-internal tw-tiddlylink-missing' href='TiddlyWikiFiles'>TiddlyWikiFiles</a>. For example, this loads the tiddlers from a <a class='tw-tiddlylink tw-tiddlylink-internal tw-tiddlylink-resolves' href='TiddlyWiki'>TiddlyWiki</a> HTML file and then saves one of them in HTML:<br><pre>node core/boot.js --verbose --load mywiki.html --savetiddler ReadMe ./readme.html
<h1>Welcome to <a class='tw-tiddlylink tw-tiddlylink-internal tw-tiddlylink-missing' href='TiddlyWiki5'>TiddlyWiki5</a></h1><div class='tw-tiddler-frame' data-tiddler-target='HelloThere' data-tiddler-template='HelloThere'>Welcome to <a class='tw-tiddlylink tw-tiddlylink-internal tw-tiddlylink-missing' href='TiddlyWiki5'>TiddlyWiki5</a>, a reboot of <a class='tw-tiddlylink tw-tiddlylink-internal tw-tiddlylink-resolves' href='TiddlyWiki'>TiddlyWiki</a>, the venerable, reusable non-linear personal web notebook first released in 2004. It is a complete interactive wiki that can run from a single HTML file in the browser or as a powerful <a class='tw-tiddlylink tw-tiddlylink-internal tw-tiddlylink-missing' href='What%20is%20node.js%3F'>node.js application</a>.<br><br><a class='tw-tiddlylink tw-tiddlylink-internal tw-tiddlylink-missing' href='TiddlyWiki5'>TiddlyWiki5</a> is currently at version 5.0.0.a2 and is under active development, which is to say that it is useful but incomplete. You can try out the online prototype at <a class='tw-tiddlylink tw-tiddlylink-external' href='http://tiddlywiki.com/tiddlywiki5'>http://tiddlywiki.com/tiddlywiki5</a>, <a class='tw-tiddlylink tw-tiddlylink-internal tw-tiddlylink-resolves' href='TryingOutTiddlyWiki'>try out the command line incarnation</a>, get involved in the <a class='tw-tiddlylink tw-tiddlylink-external' href='https://github.com/Jermolene/TiddlyWiki5'>development on GitHub</a> or join the discussions on <a class='tw-tiddlylink tw-tiddlylink-external' href='http://groups.google.com/group/TiddlyWikiDev'>the TiddlyWikiDev Google Group</a>.</div><br><h1>Usage</h1><div class='tw-tiddler-frame' data-tiddler-target='CommandLineInterface' data-tiddler-template='CommandLineInterface'><a class='tw-tiddlylink tw-tiddlylink-internal tw-tiddlylink-missing' href='TiddlyWiki5'>TiddlyWiki5</a> can be used on the command line to perform an extensive set of operations based on tiddlers, <a class='tw-tiddlylink tw-tiddlylink-internal tw-tiddlylink-resolves' href='TiddlerFiles'>TiddlerFiles</a> and <a class='tw-tiddlylink tw-tiddlylink-internal tw-tiddlylink-missing' href='TiddlyWikiFiles'>TiddlyWikiFiles</a>. For example, this loads the tiddlers from a <a class='tw-tiddlylink tw-tiddlylink-internal tw-tiddlylink-resolves' href='TiddlyWiki'>TiddlyWiki</a> HTML file and then saves one of them in HTML:<br><pre>node core/boot.js --verbose --load mywiki.html --savetiddler ReadMe ./readme.html
</pre><h2>Usage</h2>Running <code>boot.js</code> from the command line boots the <a class='tw-tiddlylink tw-tiddlylink-internal tw-tiddlylink-resolves' href='TiddlyWiki'>TiddlyWiki</a> kernel, loads the core plugins and establishes an empty wiki store. It then sequentially processes the command line arguments from left to right. The arguments are separated with spaces. The commands are identified by the prefix <code>--</code>.<br><pre>node core/boot.js [--&lt;option&gt; [&lt;arg&gt;[,&lt;arg&gt;]]]
</pre><h2>Commands</h2>The following commands are available.<br><br><h3> load</h3>Load tiddlers from 2.x.x <a class='tw-tiddlylink tw-tiddlylink-internal tw-tiddlylink-resolves' href='TiddlyWiki'>TiddlyWiki</a> files (<code>.html</code>), <code>.tiddler</code>, <code>.tid</code>, <code>.json</code> or other files <br><pre>--load &lt;filepath&gt;
</pre><h3> savetiddler</h3>Save an individual tiddler as a specified MIME type, defaults to <code>text/html</code> <br><pre>--savetiddler &lt;title&gt; &lt;filename&gt; [&lt;type&gt;]

View File

@ -1,7 +1,7 @@
title: HelloThere
modifier: JeremyRuston
tags: introduction
type: text/x-tiddlywiki-new
type: text/x-tiddlywiki
Welcome to TiddlyWiki5, a reboot of TiddlyWiki, the venerable, reusable non-linear personal web notebook first released in 2004. It is a complete interactive wiki that can run from a single HTML file in the browser or as a powerful [[node.js application|What is node.js?]].

View File

@ -19,6 +19,7 @@ Learning more about TiddlyWiki5:
Some useful tiddlers for feature testing:
* HelloThere
* TestingNewWikiText shows off the embryonic new wiki text engine
* ImageTests showing different ways of embedding images
* SampleData showing how JSON tiddlers are handled
* SampleJavaScript and SampleJavaScriptWithError showing how JavaScript code is displayed
@ -33,7 +34,7 @@ Technical documentation includes:
* Overview of TiddlyWikiArchitecture
** MacroInternals
* Information about TiddlerFiles and RecipeFiles
* NewWikiTextFeatures
* A discussion of potential NewWikiTextFeatures
All tiddlers:
<<list all>>

View File

@ -0,0 +1,77 @@
title: TestingNewWikiText
type: text/x-tiddlywiki-new
! This is a heading
HelloThere
One two three four. With a link to HelloThere. And a link to TiddlyWiki and TiddlyWiki5. And a suppressed link to ~HelloThere.
! This is a new heading
This is a paragraph
immediately after
that heading
----
* This is a list
* Of lots of items
** And subitems
*# And nested numbered lists inside ordinary lists
*# Yes
*## Definitely
*##* And then
*##** back
*##*** to items
*## And back to numbers once more
*# More numbering
* And back to items
----
; definitionile
: definitionate
; definitionilisation
: definitionatisative
----
This is a list with a class wrapped around it:
{{myclass andanotherone{
* One and one
** Two and three
* Four and five
** Six and Seven
}}}
And here's another one:
{{class1 class2{
* Un et deux
**{{class}} Two and three
* Four and five
** Trois et cinq
}}}
And here's a class wrapped around a heading:
{{class1{
! My heading
}}}
And here's a class assigned directly to a heading:
!!!!{{class1}} My very beautiful heading
---
Here are some HTML paragraph blocks:
<article class="hello">
This is my nice and simple block of text
</article>
And another:
<article class="hello" mysignal data-thing='Nothing'>This time the text is all squashed up, without line breaks</article>