From 620add5579384ddf6c655b6157feda84afb83423 Mon Sep 17 00:00:00 2001 From: Jeremy Ruston Date: Mon, 5 Dec 2011 16:50:25 +0000 Subject: [PATCH] Integrated the TiddlyWiki wikifier A large refactoring to tidy up the interface of the TiddlyWiki wikifier code, and package it as a wiki text parser. --- js/Tiddler.js | 56 ++++++-- js/TiddlerOutput.js | 6 +- js/TiddlyWiki.js | 5 +- js/WikiTextParser.js | 153 +++++++++++++++++++++ js/{Formatter.js => WikiTextRules.js} | 63 ++++----- js/Wikifier.js | 185 -------------------------- wikitest.js | 8 +- 7 files changed, 236 insertions(+), 240 deletions(-) create mode 100644 js/WikiTextParser.js rename js/{Formatter.js => WikiTextRules.js} (92%) delete mode 100755 js/Wikifier.js diff --git a/js/Tiddler.js b/js/Tiddler.js index 6969e4180..d0863864c 100755 --- a/js/Tiddler.js +++ b/js/Tiddler.js @@ -1,17 +1,31 @@ /* Tiddlers are an immutable dictionary of name:value pairs called fields. Values can be a string, an array -of strings, or a date. +of strings, or a date. The only field that is required is the `title` field, but useful tiddlers also +have a `text` field, and some of the standard fields `modified`, `modifier`, `created`, `creator`, +`tags` and `type`. Hardcoded in the system is the knowledge that the 'tags' field is a string array, and that the 'modified' and 'created' fields are dates. All other fields are strings. +Tiddler text is parsed into a tree representation. The parsing performed depends on the type of the +tiddler: wiki text tiddlers are parsed by the wikifier, JSON tiddlers are parsed by JSON.parse(), and so on. + +The parse tree representation of the tiddler is then used for general computations involving the tiddler. For +example, outbound links can be quickly extracted from a parsed tiddler. Parsing doesn't depend on external +context such as the content of other tiddlers, and so the resulting parse tree can be safely cached. + +Rendering a tiddler is the process of producing a representation of the parse tree in the required +format (typically HTML) - this is done within the context of a TiddlyWiki store object, not at the level of +individual tiddlers. + The Tiddler object exposes the following API new Tiddler(src) - create a Tiddler given a hashmap of field values or a tiddler to clone new Tiddler(src1,src2) - create a Tiddler with the union of the fields from the sources, with the rightmost taking priority -Tiddler.fields - hashmap of tiddler fields +Tiddler.fields - hashmap of tiddler fields, OK for read-only access +tiddler.getParseTree() - returns the parse tree for the tiddler The hashmap(s) can specify the "modified" and "created" fields as strings in YYYYMMDDHHMMSSMMM format or as JavaScript date objects. The "tags" field can be given as a JavaScript array of strings or @@ -23,7 +37,8 @@ as a TiddlyWiki quoted string (eg, "one [[two three]]"). "use strict"; var utils = require("./Utils.js"), - ArgParser = require("./ArgParser.js").ArgParser; + ArgParser = require("./ArgParser.js").ArgParser, + WikiTextParser = require("./WikiTextParser.js").WikiTextParser; var Tiddler = function(/* tiddler,fields */) { this.fields = {}; @@ -36,7 +51,7 @@ var Tiddler = function(/* tiddler,fields */) { src = arg; } for(var t in src) { - var f = this.parseField(t,src[t]); + var f = this.parseTiddlerField(t,src[t]); if(f !== null) { this.fields[t] = f; } @@ -44,10 +59,10 @@ var Tiddler = function(/* tiddler,fields */) { } }; -Tiddler.prototype.parseField = function(name,value) { - var type = Tiddler.specialFields[name]; +Tiddler.prototype.parseTiddlerField = function(name,value) { + var type = Tiddler.specialTiddlerFields[name]; if(type) { - return Tiddler.specialParsers[type](value); + return Tiddler.specialTiddlerFieldParsers[type](value); } else if (typeof value === "string") { return value; } else { @@ -56,13 +71,13 @@ Tiddler.prototype.parseField = function(name,value) { }; // These are the non-string fields -Tiddler.specialFields = { +Tiddler.specialTiddlerFields = { "created": "date", "modified": "date", "tags": "array" }; -Tiddler.specialParsers = { +Tiddler.specialTiddlerFieldParsers = { date: function(value) { if(typeof value === "string") { return utils.convertFromYYYYMMDDHHMMSSMMM(value); @@ -90,4 +105,27 @@ Tiddler.specialParsers = { } }; +Tiddler.prototype.getParseTree = function() { + if(!this.parseTree) { + var type = this.fields.type || "application/x-tiddlywikitext", + parser = Tiddler.tiddlerTextParsers[type]; + if(parser) { + this.parseTree = Tiddler.tiddlerTextParsers[type].call(this); + } + } + return this.parseTree; +}; + +Tiddler.tiddlerTextParsers = { + "application/x-tiddlywikitext": function() { + return new WikiTextParser(this.fields.text); + }, + "application/javascript": function() { + // Would be useful to parse so that we can do syntax highlighting and debugging + }, + "application/json": function() { + return JSON.parse(this.fields.text); + } +}; + exports.Tiddler = Tiddler; diff --git a/js/TiddlerOutput.js b/js/TiddlerOutput.js index 73230360a..c8918e9c1 100755 --- a/js/TiddlerOutput.js +++ b/js/TiddlerOutput.js @@ -73,9 +73,9 @@ tiddlerOutput.outputTiddlerDiv = function(tid) { outputAttribute("title"); outputAttribute("creator"); outputAttribute("modifier"); - outputAttribute("created", function(v) {return utils.convertToYYYYMMDDHHMM(v)}); - outputAttribute("modified", function(v) {return utils.convertToYYYYMMDDHHMM(v)}); - outputAttribute("tags", function(v) {return tiddlerOutput.stringifyTags(v)}); + outputAttribute("created", function(v) {return utils.convertToYYYYMMDDHHMM(v);}); + outputAttribute("modified", function(v) {return utils.convertToYYYYMMDDHHMM(v);}); + outputAttribute("tags", function(v) {return tiddlerOutput.stringifyTags(v);}); // Output any other attributes for(t in attributes) { outputAttribute(t,null,true); diff --git a/js/TiddlyWiki.js b/js/TiddlyWiki.js index 677bff938..d2f013bbc 100755 --- a/js/TiddlyWiki.js +++ b/js/TiddlyWiki.js @@ -1,7 +1,8 @@ -/*global require: false, exports: false */ +/*global require: false, exports: false, console: false */ "use strict"; -var Tiddler = require("./Tiddler.js").Tiddler; +var Tiddler = require("./Tiddler.js").Tiddler, + util = require("util"); var TiddlyWiki = function TiddlyWiki(shadowStore) { this.tiddlers = {}; diff --git a/js/WikiTextParser.js b/js/WikiTextParser.js new file mode 100644 index 000000000..0b8753045 --- /dev/null +++ b/js/WikiTextParser.js @@ -0,0 +1,153 @@ +/* + +WikiTextParser.js + +Parses a block of tiddlywiki-format wiki text into a parse tree object. + +HTML elements are stored in the tree like this: + + {type: "div", attributes: { + attr1: value, + style: { + name: value, + name2: value2 + } + }, children: [ + {child}, + {child}, + ]} + +Text nodes are: + + {type: "text", value: "string of text node"} + +*/ + +/*global require: false, exports: false */ +"use strict"; + +var Tiddler = require("./Tiddler.js").Tiddler, + wikiTextRules = require("./WikiTextRules.js").wikiTextRules, + utils = require("./Utils.js"), + util = require("util"); + +var WikiTextParser = function(text) { + this.autoLinkWikiWords = true; + this.source = text; + this.nextMatch = 0; + this.tree = []; + this.output = null; + this.subWikify(this.tree); +}; + +WikiTextParser.prototype.outputText = function(place,startPos,endPos) +{ + if(startPos < endPos) { + place.push({type: "text", value: this.source.substring(startPos,endPos)}); + } +}; + +WikiTextParser.prototype.subWikify = function(output,terminator) +{ + // Handle the terminated and unterminated cases separately, this speeds up wikifikation by about 30% + if(terminator) + this.subWikifyTerm(output,new RegExp("(" + terminator + ")","mg")); + else + this.subWikifyUnterm(output); +}; + +WikiTextParser.prototype.subWikifyUnterm = function(output) +{ + // subWikify can be indirectly recursive, so we need to save the old output pointer + var oldOutput = this.output; + this.output = output; + // Get the first match + wikiTextRules.rulesRegExp.lastIndex = this.nextMatch; + var ruleMatch = wikiTextRules.rulesRegExp.exec(this.source); + while(ruleMatch) { + // Output any text before the match + if(ruleMatch.index > this.nextMatch) + this.outputText(this.output,this.nextMatch,ruleMatch.index); + // Set the match parameters for the handler + this.matchStart = ruleMatch.index; + this.matchLength = ruleMatch[0].length; + this.matchText = ruleMatch[0]; + this.nextMatch = wikiTextRules.rulesRegExp.lastIndex; + // Figure out which rule matched and call its handler + var t; + for(t=1; t this.nextMatch) + this.outputText(this.output,this.nextMatch,terminatorMatch.index); + // Set the match parameters + this.matchText = terminatorMatch[1]; + this.matchLength = terminatorMatch[1].length; + this.matchStart = terminatorMatch.index; + this.nextMatch = this.matchStart + this.matchLength; + // Restore the output pointer + this.output = oldOutput; + return; + } + // It must be a rule match; output any text before the match + if(ruleMatch.index > this.nextMatch) + this.outputText(this.output,this.nextMatch,ruleMatch.index); + // Set the match parameters + this.matchStart = ruleMatch.index; + this.matchLength = ruleMatch[0].length; + this.matchText = ruleMatch[0]; + this.nextMatch = wikiTextRules.rulesRegExp.lastIndex; + // Figure out which rule matched and call its handler + var t; + for(t=1; t this.nextMatch) - this.outputText(this.output,this.nextMatch,formatterMatch.index); - // Set the match parameters for the handler - this.matchStart = formatterMatch.index; - this.matchLength = formatterMatch[0].length; - this.matchText = formatterMatch[0]; - this.nextMatch = this.formatter.formatterRegExp.lastIndex; - // Figure out which formatter matched and call its handler - var t; - for(t=1; t this.nextMatch) - this.outputText(this.output,this.nextMatch,terminatorMatch.index); - // Set the match parameters - this.matchText = terminatorMatch[1]; - this.matchLength = terminatorMatch[1].length; - this.matchStart = terminatorMatch.index; - this.nextMatch = this.matchStart + this.matchLength; - // Restore the output pointer - this.output = oldOutput; - return; - } - // It must be a formatter match; output any text before the match - if(formatterMatch.index > this.nextMatch) - this.outputText(this.output,this.nextMatch,formatterMatch.index); - // Set the match parameters - this.matchStart = formatterMatch.index; - this.matchLength = formatterMatch[0].length; - this.matchText = formatterMatch[0]; - this.nextMatch = this.formatter.formatterRegExp.lastIndex; - // Figure out which formatter matched and call its handler - var t; - for(t=1; t