1
0
mirror of https://github.com/Jermolene/TiddlyWiki5 synced 2025-01-26 00:46:52 +00:00

Refactoring of text-slicer

Now we process the rendered HTML of tiddlers, which allows us to
process HTML generated by MS Word. In fact, the HTML that MS Word
generates is so awful, I’ve instead been using Mammoth to do the
conversion: https://github.com/mwilliamson/mammoth.js

Also some necessary improvements to the fake dom implementation.
This commit is contained in:
Jermolene 2015-08-10 11:41:23 +01:00
parent 010b01f205
commit 3cfad8b044
2 changed files with 141 additions and 62 deletions

View File

@ -43,6 +43,12 @@ var TW_Element = function(tag,namespace) {
this.namespaceURI = namespace || "http://www.w3.org/1999/xhtml"; this.namespaceURI = namespace || "http://www.w3.org/1999/xhtml";
}; };
Object.defineProperty(TW_Element.prototype, "nodeType", {
get: function() {
return 1;
}
});
TW_Element.prototype.setAttribute = function(name,value) { TW_Element.prototype.setAttribute = function(name,value) {
if(this.isRaw) { if(this.isRaw) {
throw "Cannot setAttribute on a raw TW_Element"; throw "Cannot setAttribute on a raw TW_Element";
@ -93,6 +99,12 @@ TW_Element.prototype.hasChildNodes = function() {
return !!this.children.length; return !!this.children.length;
}; };
Object.defineProperty(TW_Element.prototype, "childNodes", {
get: function() {
return this.children;
}
});
Object.defineProperty(TW_Element.prototype, "firstChild", { Object.defineProperty(TW_Element.prototype, "firstChild", {
get: function() { get: function() {
return this.children[0]; return this.children[0];
@ -103,6 +115,12 @@ TW_Element.prototype.addEventListener = function(type,listener,useCapture) {
// Do nothing // Do nothing
}; };
Object.defineProperty(TW_Element.prototype, "tagName", {
get: function() {
return this.tag || "";
}
});
Object.defineProperty(TW_Element.prototype, "className", { Object.defineProperty(TW_Element.prototype, "className", {
get: function() { get: function() {
return this.attributes["class"] || ""; return this.attributes["class"] || "";

View File

@ -23,21 +23,63 @@ var SLICER_OUTPUT_TITLE = "$:/TextSlicer";
// Install the root widget event handlers // Install the root widget event handlers
exports.startup = function() { exports.startup = function() {
$tw.rootWidget.addEventListener("tm-slice-tiddler",function(event) { $tw.rootWidget.addEventListener("tm-slice-tiddler",function(event) {
var slicer = new Slicer($tw.wiki,event.param);
// slicer.sliceTiddler();
// slicer.outputTiddlers();
// Slice up and output the tiddler // Slice up and output the tiddler
outputTiddlers(sliceTiddler(event.param),event.param,event.param); slicer.outputTiddlers(slicer.sliceTiddler(event.param),event.param,event.param);
slicer.destroy();
}); });
}; };
var currentId = 0; function Slicer(wiki,sourceTitle) {
this.wiki = wiki;
function nextId() { this.sourceTitle = sourceTitle;
return ++currentId; this.currentId = 0;
this.iframe = null; // Reference to iframe used for HTML parsing
} }
Slicer.prototype.destroy = function() {
// Remove the iframe from the DOM
if(this.iframe && this.iframe.parentNode) {
this.iframe.parentNode.removeChild(this.iframe);
}
};
Slicer.prototype.nextId = function() {
return ++this.currentId;
};
Slicer.prototype.getSourceHtmlDocument = function(tiddler) {
this.iframe = document.createElement("iframe");
document.body.appendChild(this.iframe);
this.iframe.contentWindow.document.open();
this.iframe.contentWindow.document.write(tiddler.fields.text);
this.iframe.contentWindow.document.close();
return this.iframe.contentWindow.document;
};
Slicer.prototype.getSourceWikiDocument = function(tiddler) {
var widgetNode = this.wiki.makeTranscludeWidget(this.sourceTitle,{document: $tw.fakeDocument, parseAsInline: false}),
container = $tw.fakeDocument.createElement("div");
widgetNode.render(container,null);
return container;
};
Slicer.prototype.getSourceDocument = function() {
var tiddler = $tw.wiki.getTiddler(this.sourceTitle);
if(tiddler.fields.type === "text/html") {
return this.getSourceHtmlDocument(tiddler);
} else {
return this.getSourceWikiDocument(tiddler);
}
};
// Slice a tiddler into individual tiddlers // Slice a tiddler into individual tiddlers
function sliceTiddler(title) { Slicer.prototype.sliceTiddler = function(title) {
var tiddlers = {}, var self = this,
parser = $tw.wiki.parseTiddler(title), tiddlers = {},
domNode = this.getSourceDocument(),
parentStack = [], parentStack = [],
addTiddler = function(fields) { addTiddler = function(fields) {
if(fields.title) { if(fields.title) {
@ -73,64 +115,83 @@ function sliceTiddler(title) {
} while(true); } while(true);
return parentStack[parentStack.length - 1].title; return parentStack[parentStack.length - 1].title;
}, },
processNodeList = function(nodeList) { isBlank = function(s) {
$tw.utils.each(nodeList,function(parseTreeNode) { return (/^[\s\xA0]*$/mg).test(s);
},
processNodeList = function(domNodeList) {
$tw.utils.each(domNodeList,function(domNode) {
var parentTitle, var parentTitle,
text = $tw.utils.getParseTreeText(parseTreeNode); text = domNode.textContent,
if(parseTreeNode.type === "element" && (parseTreeNode.tag === "h1" || parseTreeNode.tag === "h2" || parseTreeNode.tag === "h3" || parseTreeNode.tag === "h4")) { nodeType = domNode.nodeType;
parentTitle = popParentStackUntil(parseTreeNode.tag); if(nodeType === 1) {
addToList(parentTitle,text); var tagName = domNode.tagName.toLowerCase();
parentStack.push({type: parseTreeNode.tag, title: addTiddler({
title: text, if(tagName === "p" && (domNode.getAttribute("style") || "").indexOf("mso-list:") !== -1) {
text: "<<display-heading-tiddler level:'" + parseTreeNode.tag + "'>>", tagName = "li";
list: [], }
tags: [parentTitle]
})}); if(tagName === "h1" || tagName === "h2" || tagName === "h3" || tagName === "h4") {
} else if(parseTreeNode.type === "element" && (parseTreeNode.tag === "ul" || parseTreeNode.tag === "ol")) { if(!isBlank(text)) {
var listTitle = title + "-list-" + nextId(); parentTitle = popParentStackUntil(tagName);
parentTitle = parentStack[parentStack.length - 1].title; addToList(parentTitle,text);
addToList(parentTitle,listTitle); parentStack.push({type: tagName, title: addTiddler({
parentStack.push({type: parseTreeNode.tag, title: addTiddler({ title: text,
title: listTitle, text: "<<display-heading-tiddler level:'" + tagName + "'>>",
text: "<<display-list-tiddler type:'" + parseTreeNode.tag + "'>>", list: [],
list: [], tags: [parentTitle]
tags: [parentTitle] })});
})}); }
processNodeList(parseTreeNode.children); } else if(tagName === "ul" || tagName === "ol") {
parentStack.pop(); var listTitle = title + "-list-" + self.nextId();
} else if(parseTreeNode.type === "element" && parseTreeNode.tag === "li") { parentTitle = parentStack[parentStack.length - 1].title;
var listItemTitle = title + "-listitem-" + nextId(); addToList(parentTitle,listTitle);
parentTitle = parentStack[parentStack.length - 1].title; parentStack.push({type: tagName, title: addTiddler({
addToList(parentTitle,listItemTitle); title: listTitle,
addTiddler({ text: "<<display-list-tiddler type:'" + tagName + "'>>",
title: listItemTitle, list: [],
text: text, tags: [parentTitle]
list: [], })});
tags: [parentTitle] processNodeList(domNode.childNodes);
}); parentStack.pop();
} else if(parseTreeNode.type === "element" && parseTreeNode.tag === "p") { } else if(tagName === "li") {
parentTitle = parentStack[parentStack.length - 1].title; if(!isBlank(text)) {
addToList(parentTitle,addTiddler({ var listItemTitle = title + "-listitem-" + self.nextId();
title: title + "-para-" + nextId(), parentTitle = parentStack[parentStack.length - 1].title;
text: text, addToList(parentTitle,listItemTitle);
tags: [parentTitle] addTiddler({
})); title: listItemTitle,
text: text,
list: [],
tags: [parentTitle]
});
}
} else if(tagName === "p") {
if(!isBlank(text)) {
parentTitle = parentStack[parentStack.length - 1].title;
addToList(parentTitle,addTiddler({
title: title + "-para-" + self.nextId(),
text: text,
tags: [parentTitle]
}));
}
} else if(domNode.hasChildNodes()) {
processNodeList(domNode.childNodes);
}
} }
}); });
}; };
if(parser) { parentStack.push({type: "h0", title: addTiddler({
parentStack.push({type: "h0", title: addTiddler({ title: "Sliced up " + title,
title: "Sliced up " + title, text: "{{||$:/plugins/tiddlywiki/text-slicer/templates/display-document}}",
text: "{{||$:/plugins/tiddlywiki/text-slicer/templates/display-document}}", list: []
list: [] })});
})}); console.log(domNode);
processNodeList(parser.tree); processNodeList(domNode.childNodes);
}
return tiddlers; return tiddlers;
} };
// Output directly to the output tiddlers // Output directly to the output tiddlers
function outputTiddlers(tiddlers,title,navigateFromTitle) { Slicer.prototype.outputTiddlers = function(tiddlers,title,navigateFromTitle) {
$tw.utils.each(tiddlers,function(tiddlerFields) { $tw.utils.each(tiddlers,function(tiddlerFields) {
var title = tiddlerFields.title; var title = tiddlerFields.title;
if(title) { if(title) {
@ -140,10 +201,10 @@ function outputTiddlers(tiddlers,title,navigateFromTitle) {
// Navigate to output // Navigate to output
var story = new $tw.Story({wiki: $tw.wiki}); var story = new $tw.Story({wiki: $tw.wiki});
story.navigateTiddler("Sliced up " + title,navigateFromTitle); story.navigateTiddler("Sliced up " + title,navigateFromTitle);
} };
// Output via an import tiddler // Output via an import tiddler
function outputTiddlers_viaImportTiddler(tiddlers,navigateFromTitle) { Slicer.prototype.outputTiddlers_viaImportTiddler = function(tiddlers,navigateFromTitle) {
// Get the current slicer output tiddler // Get the current slicer output tiddler
var slicerOutputTiddler = $tw.wiki.getTiddler(SLICER_OUTPUT_TITLE), var slicerOutputTiddler = $tw.wiki.getTiddler(SLICER_OUTPUT_TITLE),
slicerOutputData = $tw.wiki.getTiddlerData(SLICER_OUTPUT_TITLE,{}), slicerOutputData = $tw.wiki.getTiddlerData(SLICER_OUTPUT_TITLE,{}),
@ -167,6 +228,6 @@ function outputTiddlers_viaImportTiddler(tiddlers,navigateFromTitle) {
// Navigate to output // Navigate to output
var story = new $tw.Story({wiki: $tw.wiki}); var story = new $tw.Story({wiki: $tw.wiki});
story.navigateTiddler(SLICER_OUTPUT_TITLE,navigateFromTitle); story.navigateTiddler(SLICER_OUTPUT_TITLE,navigateFromTitle);
} };
})(); })();