From 3cfad8b044730e6e88419d50d784259b7759113c Mon Sep 17 00:00:00 2001 From: Jermolene Date: Mon, 10 Aug 2015 11:41:23 +0100 Subject: [PATCH] Refactoring of text-slicer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now we process the rendered HTML of tiddlers, which allows us to process HTML generated by MS Word. In fact, the HTML that MS Word generates is so awful, I’ve instead been using Mammoth to do the conversion: https://github.com/mwilliamson/mammoth.js Also some necessary improvements to the fake dom implementation. --- core/modules/utils/fakedom.js | 18 +++ plugins/tiddlywiki/text-slicer/slicer.js | 185 +++++++++++++++-------- 2 files changed, 141 insertions(+), 62 deletions(-) diff --git a/core/modules/utils/fakedom.js b/core/modules/utils/fakedom.js index 0d6ea6d42..1dc8cd45f 100755 --- a/core/modules/utils/fakedom.js +++ b/core/modules/utils/fakedom.js @@ -43,6 +43,12 @@ var TW_Element = function(tag,namespace) { this.namespaceURI = namespace || "http://www.w3.org/1999/xhtml"; }; +Object.defineProperty(TW_Element.prototype, "nodeType", { + get: function() { + return 1; + } +}); + TW_Element.prototype.setAttribute = function(name,value) { if(this.isRaw) { throw "Cannot setAttribute on a raw TW_Element"; @@ -93,6 +99,12 @@ TW_Element.prototype.hasChildNodes = function() { return !!this.children.length; }; +Object.defineProperty(TW_Element.prototype, "childNodes", { + get: function() { + return this.children; + } +}); + Object.defineProperty(TW_Element.prototype, "firstChild", { get: function() { return this.children[0]; @@ -103,6 +115,12 @@ TW_Element.prototype.addEventListener = function(type,listener,useCapture) { // Do nothing }; +Object.defineProperty(TW_Element.prototype, "tagName", { + get: function() { + return this.tag || ""; + } +}); + Object.defineProperty(TW_Element.prototype, "className", { get: function() { return this.attributes["class"] || ""; diff --git a/plugins/tiddlywiki/text-slicer/slicer.js b/plugins/tiddlywiki/text-slicer/slicer.js index b0e96846b..77574da63 100644 --- a/plugins/tiddlywiki/text-slicer/slicer.js +++ b/plugins/tiddlywiki/text-slicer/slicer.js @@ -23,21 +23,63 @@ var SLICER_OUTPUT_TITLE = "$:/TextSlicer"; // Install the root widget event handlers exports.startup = function() { $tw.rootWidget.addEventListener("tm-slice-tiddler",function(event) { + var slicer = new Slicer($tw.wiki,event.param); + // slicer.sliceTiddler(); + // slicer.outputTiddlers(); // Slice up and output the tiddler - outputTiddlers(sliceTiddler(event.param),event.param,event.param); + slicer.outputTiddlers(slicer.sliceTiddler(event.param),event.param,event.param); + slicer.destroy(); }); }; -var currentId = 0; - -function nextId() { - return ++currentId; +function Slicer(wiki,sourceTitle) { + this.wiki = wiki; + this.sourceTitle = sourceTitle; + this.currentId = 0; + this.iframe = null; // Reference to iframe used for HTML parsing } +Slicer.prototype.destroy = function() { + // Remove the iframe from the DOM + if(this.iframe && this.iframe.parentNode) { + this.iframe.parentNode.removeChild(this.iframe); + } +}; + +Slicer.prototype.nextId = function() { + return ++this.currentId; +}; + +Slicer.prototype.getSourceHtmlDocument = function(tiddler) { + this.iframe = document.createElement("iframe"); + document.body.appendChild(this.iframe); + this.iframe.contentWindow.document.open(); + this.iframe.contentWindow.document.write(tiddler.fields.text); + this.iframe.contentWindow.document.close(); + return this.iframe.contentWindow.document; +}; + +Slicer.prototype.getSourceWikiDocument = function(tiddler) { + var widgetNode = this.wiki.makeTranscludeWidget(this.sourceTitle,{document: $tw.fakeDocument, parseAsInline: false}), + container = $tw.fakeDocument.createElement("div"); + widgetNode.render(container,null); + return container; +}; + +Slicer.prototype.getSourceDocument = function() { + var tiddler = $tw.wiki.getTiddler(this.sourceTitle); + if(tiddler.fields.type === "text/html") { + return this.getSourceHtmlDocument(tiddler); + } else { + return this.getSourceWikiDocument(tiddler); + } +}; + // Slice a tiddler into individual tiddlers -function sliceTiddler(title) { - var tiddlers = {}, - parser = $tw.wiki.parseTiddler(title), +Slicer.prototype.sliceTiddler = function(title) { + var self = this, + tiddlers = {}, + domNode = this.getSourceDocument(), parentStack = [], addTiddler = function(fields) { if(fields.title) { @@ -73,64 +115,83 @@ function sliceTiddler(title) { } while(true); return parentStack[parentStack.length - 1].title; }, - processNodeList = function(nodeList) { - $tw.utils.each(nodeList,function(parseTreeNode) { + isBlank = function(s) { + return (/^[\s\xA0]*$/mg).test(s); + }, + processNodeList = function(domNodeList) { + $tw.utils.each(domNodeList,function(domNode) { var parentTitle, - text = $tw.utils.getParseTreeText(parseTreeNode); - if(parseTreeNode.type === "element" && (parseTreeNode.tag === "h1" || parseTreeNode.tag === "h2" || parseTreeNode.tag === "h3" || parseTreeNode.tag === "h4")) { - parentTitle = popParentStackUntil(parseTreeNode.tag); - addToList(parentTitle,text); - parentStack.push({type: parseTreeNode.tag, title: addTiddler({ - title: text, - text: "<>", - list: [], - tags: [parentTitle] - })}); - } else if(parseTreeNode.type === "element" && (parseTreeNode.tag === "ul" || parseTreeNode.tag === "ol")) { - var listTitle = title + "-list-" + nextId(); - parentTitle = parentStack[parentStack.length - 1].title; - addToList(parentTitle,listTitle); - parentStack.push({type: parseTreeNode.tag, title: addTiddler({ - title: listTitle, - text: "<>", - list: [], - tags: [parentTitle] - })}); - processNodeList(parseTreeNode.children); - parentStack.pop(); - } else if(parseTreeNode.type === "element" && parseTreeNode.tag === "li") { - var listItemTitle = title + "-listitem-" + nextId(); - parentTitle = parentStack[parentStack.length - 1].title; - addToList(parentTitle,listItemTitle); - addTiddler({ - title: listItemTitle, - text: text, - list: [], - tags: [parentTitle] - }); - } else if(parseTreeNode.type === "element" && parseTreeNode.tag === "p") { - parentTitle = parentStack[parentStack.length - 1].title; - addToList(parentTitle,addTiddler({ - title: title + "-para-" + nextId(), - text: text, - tags: [parentTitle] - })); + text = domNode.textContent, + nodeType = domNode.nodeType; + if(nodeType === 1) { + var tagName = domNode.tagName.toLowerCase(); + + if(tagName === "p" && (domNode.getAttribute("style") || "").indexOf("mso-list:") !== -1) { + tagName = "li"; + } + + if(tagName === "h1" || tagName === "h2" || tagName === "h3" || tagName === "h4") { + if(!isBlank(text)) { + parentTitle = popParentStackUntil(tagName); + addToList(parentTitle,text); + parentStack.push({type: tagName, title: addTiddler({ + title: text, + text: "<>", + list: [], + tags: [parentTitle] + })}); + } + } else if(tagName === "ul" || tagName === "ol") { + var listTitle = title + "-list-" + self.nextId(); + parentTitle = parentStack[parentStack.length - 1].title; + addToList(parentTitle,listTitle); + parentStack.push({type: tagName, title: addTiddler({ + title: listTitle, + text: "<>", + list: [], + tags: [parentTitle] + })}); + processNodeList(domNode.childNodes); + parentStack.pop(); + } else if(tagName === "li") { + if(!isBlank(text)) { + var listItemTitle = title + "-listitem-" + self.nextId(); + parentTitle = parentStack[parentStack.length - 1].title; + addToList(parentTitle,listItemTitle); + addTiddler({ + title: listItemTitle, + text: text, + list: [], + tags: [parentTitle] + }); + } + } else if(tagName === "p") { + if(!isBlank(text)) { + parentTitle = parentStack[parentStack.length - 1].title; + addToList(parentTitle,addTiddler({ + title: title + "-para-" + self.nextId(), + text: text, + tags: [parentTitle] + })); + } + } else if(domNode.hasChildNodes()) { + processNodeList(domNode.childNodes); + } } }); }; - if(parser) { - parentStack.push({type: "h0", title: addTiddler({ - title: "Sliced up " + title, - text: "{{||$:/plugins/tiddlywiki/text-slicer/templates/display-document}}", - list: [] - })}); - processNodeList(parser.tree); - } + parentStack.push({type: "h0", title: addTiddler({ + title: "Sliced up " + title, + text: "{{||$:/plugins/tiddlywiki/text-slicer/templates/display-document}}", + list: [] + })}); +console.log(domNode); + processNodeList(domNode.childNodes); return tiddlers; -} +}; // Output directly to the output tiddlers -function outputTiddlers(tiddlers,title,navigateFromTitle) { +Slicer.prototype.outputTiddlers = function(tiddlers,title,navigateFromTitle) { $tw.utils.each(tiddlers,function(tiddlerFields) { var title = tiddlerFields.title; if(title) { @@ -140,10 +201,10 @@ function outputTiddlers(tiddlers,title,navigateFromTitle) { // Navigate to output var story = new $tw.Story({wiki: $tw.wiki}); story.navigateTiddler("Sliced up " + title,navigateFromTitle); -} +}; // Output via an import tiddler -function outputTiddlers_viaImportTiddler(tiddlers,navigateFromTitle) { +Slicer.prototype.outputTiddlers_viaImportTiddler = function(tiddlers,navigateFromTitle) { // Get the current slicer output tiddler var slicerOutputTiddler = $tw.wiki.getTiddler(SLICER_OUTPUT_TITLE), slicerOutputData = $tw.wiki.getTiddlerData(SLICER_OUTPUT_TITLE,{}), @@ -167,6 +228,6 @@ function outputTiddlers_viaImportTiddler(tiddlers,navigateFromTitle) { // Navigate to output var story = new $tw.Story({wiki: $tw.wiki}); story.navigateTiddler(SLICER_OUTPUT_TITLE,navigateFromTitle); -} +}; })();