From 6c210df0bd3757923a88fd4949f3477b3788d17b Mon Sep 17 00:00:00 2001 From: Jeremy Ruston Date: Wed, 3 Apr 2013 12:57:17 +0100 Subject: [PATCH] Update HTML deserializer to cope with TiddlyWiki classic and TW5 files --- core/modules/commands/load.js | 3 +- core/modules/deserializers.js | 108 ++++++++++++++++++++------------- core/modules/widgets/import.js | 4 -- 3 files changed, 66 insertions(+), 49 deletions(-) diff --git a/core/modules/commands/load.js b/core/modules/commands/load.js index 4fea99e28..619417a9c 100644 --- a/core/modules/commands/load.js +++ b/core/modules/commands/load.js @@ -35,8 +35,7 @@ Command.prototype.execute = function() { self.callback(err); } else { var fields = {title: self.params[0]}, - extname = path.extname(self.params[0]), - type = extname === ".html" ? "application/vnd.tiddlywiki2" : extname; + type = path.extname(self.params[0]); var tiddlers = self.commander.wiki.deserializeTiddlers(type,data,fields); if(!tiddlers) { self.callback("No tiddlers found in file \"" + self.params[0] + "\""); diff --git a/core/modules/deserializers.js b/core/modules/deserializers.js index 48da39605..1bdb2cbf4 100644 --- a/core/modules/deserializers.js +++ b/core/modules/deserializers.js @@ -13,22 +13,29 @@ Functions to deserialise tiddlers from a block of text "use strict"; /* -Utility function to parse an old-style tiddler DIV. It looks like this: +Utility function to parse an old-style tiddler DIV in a *.tid file. It looks like this:
The text of the tiddler (without the expected HTML encoding).
 
-Note that the field attributes are HTML encoded, but that the body of the
 tag is not.
+Note that the field attributes are HTML encoded, but that the body of the 
 tag is not encoded.
+
+When these tiddler DIVs are encountered within a TiddlyWiki HTML file then the body is encoded in the usual way.
 */
-var parseTiddlerDiv = function(text,fields) {
+var parseTiddlerDiv = function(text /* [,fields] */) {
+	// Slot together the default results
 	var result = {};
-	if(fields) {
-		for(var t in fields) {
-			result[t] = fields[t];		
+	if(arguments.length > 1) {
+		for(var f=1; f]*)>((?:.|\n)*)<\/div>\s*$/gi,
 		subDivRegExp = /^\s*
((?:.|\n)*)<\/pre>\s*$/gi,
 		attrRegExp = /\s*([^=\s]+)\s*=\s*"([^"]*)"/gi,
@@ -49,8 +56,10 @@ var parseTiddlerDiv = function(text,fields) {
 				result[name] = value;
 			}
 		} while(attrMatch);
+		return result;
+	} else {
+		return undefined;
 	}
-	return result;	
 };
 
 exports["application/x-tiddler-html-div"] = function(text,fields) {
@@ -75,43 +84,56 @@ exports["application/json"] = function(text,fields) {
 	return result;
 };
 
-exports["application/vnd.tiddlywiki2"] = function(text,fields) {
-	var locateStoreArea = function(tiddlywikidoc) {
-			var startSaveArea = '
', - startSaveAreaRegExp = /
/gi, - endSaveArea = '', - endSaveAreaCaps = '', - posOpeningDiv = tiddlywikidoc.search(startSaveAreaRegExp), - limitClosingDiv = tiddlywikidoc.indexOf("<"+"!--POST-STOREAREA--"+">"); - if(limitClosingDiv == -1) { - limitClosingDiv = tiddlywikidoc.indexOf("<"+"!--POST-BODY-START--"+">"); - } - var start = limitClosingDiv == -1 ? tiddlywikidoc.length : limitClosingDiv, - posClosingDiv = tiddlywikidoc.lastIndexOf(endSaveArea,start); - if(posClosingDiv == -1) { - posClosingDiv = tiddlywikidoc.lastIndexOf(endSaveAreaCaps,start); - } - return (posOpeningDiv != -1 && posClosingDiv != -1) ? [posOpeningDiv + startSaveArea.length,posClosingDiv] : null; - }, - results = [], - storeAreaPos = locateStoreArea(text); - if(storeAreaPos) { - var endOfDivRegExp = /(<\/div>\s*)/gi, - startPos = storeAreaPos[0]; - endOfDivRegExp.lastIndex = startPos; - var match = endOfDivRegExp.exec(text); - while(match && startPos < storeAreaPos[1]) { - var endPos = endOfDivRegExp.lastIndex, - tiddlerFields = parseTiddlerDiv(text.substring(startPos,endPos),fields); - if(tiddlerFields.text !== null) { - tiddlerFields.text = $tw.utils.htmlDecode(tiddlerFields.text); - results.push(tiddlerFields); - } - startPos = endPos; - match = endOfDivRegExp.exec(text); - } +/* +Parse an HTML file into tiddlers. There are three possibilities: +# A TiddlyWiki classic HTML file containing `application/vnd.tiddlywiki2` tiddlers +# A TiddlyWiki5 HTML file containing `application/vnd.tiddlywiki` tiddlers +# An ordinary HTML file +*/ +exports["text/html"] = function(text,fields) { + // Check if we've got a store area + var storeAreaMarkerRegExp = /
/gi, + match = storeAreaMarkerRegExp.exec(text); + if(match) { + // If so, it's either a classic TiddlyWiki file or a TW5 file + return deserializeTiddlyWikiFile(text,storeAreaMarkerRegExp.lastIndex,!!match[1],fields); + } else { + // It's not a TiddlyWiki so we'll return the entire HTML file as a tiddler + return deserializeHtmlFile(text,fields); } - return results; }; +function deserializeHtmlFile(text,fields) { + var result = {}; + $tw.utils.each(fields,function(value,name) { + result[name] = value; + }); + result.text = text; + result.type = "text/html"; + return [result]; +} + +function deserializeTiddlyWikiFile(text,storeAreaEnd,isTiddlyWiki5,fields) { + var results = [], + endOfDivRegExp = /(<\/div>\s*)/gi, + startPos = storeAreaEnd, + defaultType = isTiddlyWiki5 ? "application/vnd.tiddlywiki": "application/vnd.tiddlywiki2"; + endOfDivRegExp.lastIndex = startPos; + var match = endOfDivRegExp.exec(text); + while(match) { + var endPos = endOfDivRegExp.lastIndex, + tiddlerFields = parseTiddlerDiv(text.substring(startPos,endPos),fields,{type: defaultType}); + if(!tiddlerFields) { + break; + } + if(tiddlerFields.text !== null) { + tiddlerFields.text = $tw.utils.htmlDecode(tiddlerFields.text); + results.push(tiddlerFields); + } + startPos = endPos; + match = endOfDivRegExp.exec(text); + } + return results; +} + })(); diff --git a/core/modules/widgets/import.js b/core/modules/widgets/import.js index 077fda023..4817bb320 100644 --- a/core/modules/widgets/import.js +++ b/core/modules/widgets/import.js @@ -136,10 +136,6 @@ ImportWidget.prototype.importFiles = function(files) { } } } - // Override HTML files so that they're recognised as TiddlyWiki files - if(type === "text/html") { - type = "application/vnd.tiddlywiki2"; - } // Figure out if we're reading a binary file var contentTypeInfo = $tw.config.contentTypeInfo[type], isBinary = contentTypeInfo ? contentTypeInfo.encoding === "base64" : false;