Refactoring the slicer engine for easier reuse

This commit is contained in:
Jermolene 2016-12-22 17:46:42 +00:00
parent ba9d6187af
commit a1a4bf0f9d
11 changed files with 199 additions and 168 deletions

View File

@ -12,7 +12,8 @@ Command to slice a specified tiddler
/*global $tw: false */
"use strict";
var widget = require("$:/core/modules/widgets/widget.js");
var widget = require("$:/core/modules/widgets/widget.js"),
textSlicer = require("$:/plugins/tiddlywiki/text-slicer/modules/slicer.js");
exports.info = {
name: "slice",
@ -33,12 +34,12 @@ Command.prototype.execute = function() {
wiki = this.commander.wiki,
sourceTitle = this.params[0],
destTitle = this.params[1],
slicer = new $tw.Slicer(wiki,sourceTitle,{
destTitle: destTitle
slicer = new textSlicer.Slicer({
sourceTiddlerTitle: sourceTitle,
baseTiddlerTitle: destTitle,
wiki: wiki
});
slicer.sliceTiddler()
slicer.outputTiddlers();
slicer.destroy();
wiki.addTiddlers(slicer.getTiddlers());
$tw.utils.nextTick(this.callback);
return null;
};

View File

@ -1,9 +1,17 @@
/*\
title: $:/plugins/tiddlywiki/text-slicer/modules/slicer.js
type: application/javascript
module-type: global
module-type: library
Main text-slicing logic
Slice a tiddler or DOM document into individual tiddlers
var slicer = new textSlicer.Slicer(doc,{
sourceTiddlerTitle: tiddler to slice -or-
sourceText: text to slice -or-
sourceDoc: DOM document to
baseTiddlerTitle: "MySlicedTiddlers-",
role: "sliced-content"
});
\*/
(function(){
@ -12,51 +20,101 @@ Main text-slicing logic
/*global $tw: false */
"use strict";
var DOMParser = require("$:/plugins/tiddlywiki/xmldom/dom-parser").DOMParser;
var DOMParser = $tw.browser ? window.DOMParser : require("$:/plugins/tiddlywiki/xmldom/dom-parser").DOMParser;
var SLICER_OUTPUT_TITLE = "$:/TextSlicer";
function Slicer(wiki,sourceTitle,options) {
options = options || {};
this.wiki = wiki;
this.sourceTitle = sourceTitle;
this.sourceTiddler = wiki.getTiddler(this.sourceTitle);
this.destTitle = options.destTitle || this.sourceTiddler.fields["doc-split-to"] || ("Sliced up " + this.sourceTitle);
this.iframe = null; // Reference to iframe used for HTML parsing
this.stopWordList = "the and a of on i".split(" ");
this.tiddlers = {};
function Slicer(options) {
// Marshal parameters
this.sourceDoc = options.sourceDoc;
this.sourceTiddlerTitle = options.sourceTiddlerTitle;
this.sourceText = options.sourceText;
this.wiki = options.wiki;
if(options.baseTiddlerTitle) {
this.baseTiddlerTitle = options.baseTiddlerTitle
} else {
if(this.sourceTiddlerTitle) {
this.baseTiddlerTitle = "Sliced up " + this.sourceTiddlerTitle;
} else {
this.baseTiddlerTitle = "SlicedTiddler";
}
}
this.role = options.role || "sliced-html";
// Initialise state
this.extractedTiddlers = {}; // Hashmap of created tiddlers
this.parentStack = []; // Stack of parent heading or list
this.containerStack = []; // Stack of elements containing other elements
this.sliceTitle = null;
this.slicers = $tw.modules.applyMethods("slicer");
this.anchors = Object.create(null); // Hashmap of HTML anchor ID to tiddler title
// Get the DOM document for the source text
if(!this.sourceDoc) {
if(this.sourceTiddlerTitle) {
this.sourceDoc = this.parseTiddlerText(this.sourceTiddlerTitle);
} else {
this.sourceDoc = this.parseHtmlText(this.sourceText);
}
}
// Create parent tiddler
console.log("Slicing to",this.baseTiddlerTitle)
var sliceTiddler = {
title: this.baseTiddlerTitle,
text: "Sliced at " + (new Date()),
"toc-type": "document",
tags: [],
list: [],
role: this.role
};
this.addTiddler(sliceTiddler);
// Slice the text into subordinate tiddlers
this.parentStack.push({type: "h0", title: sliceTiddler.title});
this.currentTiddler = sliceTiddler.title;
this.containerStack.push(sliceTiddler.title);
this.processNodeList(this.sourceDoc.childNodes);
this.containerStack.pop();
}
Slicer.prototype.destroy = function() {
// Remove the iframe from the DOM
if(this.iframe && this.iframe.parentNode) {
this.iframe.parentNode.removeChild(this.iframe);
Slicer.prototype.parseTiddlerText = function(title) {
var tiddler = this.wiki.getTiddler(title);
if(tiddler) {
if(tiddler.fields.type === "text/html") {
return this.parseHtmlText(tiddler.fields.text);
} else {
return this.parseWikiText(tiddler);
}
}
};
Slicer.prototype.addTiddler = function(fields) {
if(fields.title) {
this.tiddlers[fields.title] = $tw.utils.extend({},this.tiddlers[fields.title],fields);
return fields.title;
Slicer.prototype.parseWikiText = function(tiddler) {
var widgetNode = this.wiki.makeTranscludeWidget(tiddler.fields.title,{
document: $tw.fakeDocument,
parseAsInline: false,
importPageMacros: true}),
container = $tw.fakeDocument.createElement("div");
widgetNode.render(container,null);
return container;
};
Slicer.prototype.parseHtmlText = function(text) {
text = text || "";
if($tw.browser) {
this.iframe = document.createElement("iframe");
document.body.appendChild(this.iframe);
this.iframe.contentWindow.document.open();
this.iframe.contentWindow.document.write(text);
this.iframe.contentWindow.document.close();
return this.iframe.contentWindow.document;
} else {
return null;
return new DOMParser().parseFromString(text);
}
};
Slicer.prototype.addToList = function(parent,child) {
var parentTiddler = this.tiddlers[parent] || {},
var parentTiddler = this.getTiddler(parent) || {},
parentList = parentTiddler.list || [];
parentList.push(child);
this.addTiddler($tw.utils.extend({title: parent},parentTiddler,{list: parentList}));
};
Slicer.prototype.insertBeforeListItem = function(parent,child,beforeSibling) {
var parentTiddler = this.tiddlers[parent] || {},
var parentTiddler = this.getTiddler(parent) || {},
parentList = parentTiddler.list || [],
parentListSiblingPosition = parentList.indexOf(beforeSibling);
if(parentListSiblingPosition !== -1) {
@ -88,11 +146,10 @@ Slicer.prototype.getTopContainer = function() {
Slicer.prototype.appendToCurrentContainer = function(newText) {
var title = this.containerStack[this.containerStack.length-1];
if(title) {
var tiddler = this.tiddlers[title] || {},
var tiddler = this.getTiddler(title) || {},
text = tiddler.text || "";
this.addTiddler($tw.utils.extend({title: title},tiddler,{text: text + newText}));
}
else {debugger;}
};
@ -108,74 +165,6 @@ Slicer.prototype.isBlank = function(s) {
return (/^[\s\xA0]*$/g).test(s);
};
Slicer.prototype.getSourceHtmlDocument = function(tiddler) {
if($tw.browser) {
this.iframe = document.createElement("iframe");
document.body.appendChild(this.iframe);
this.iframe.contentWindow.document.open();
this.iframe.contentWindow.document.write(tiddler.fields.text);
this.iframe.contentWindow.document.close();
return this.iframe.contentWindow.document;
} else {
return new DOMParser().parseFromString(tiddler.fields.text);
}
};
Slicer.prototype.getSourceWikiDocument = function(tiddler) {
var widgetNode = this.wiki.makeTranscludeWidget(this.sourceTitle,{
document: $tw.fakeDocument,
parseAsInline: false,
importPageMacros: true}),
container = $tw.fakeDocument.createElement("div");
widgetNode.render(container,null);
return container;
};
Slicer.prototype.getSourceDocument = function() {
if(this.sourceTiddler.fields.type === "text/html") {
return this.getSourceHtmlDocument(this.sourceTiddler);
} else {
return this.getSourceWikiDocument(this.sourceTiddler);
}
};
Slicer.prototype.makeUniqueTitle = function(prefix,rawText) {
// Remove characters other than lowercase alphanumeric and spaces
var self = this,
cleanText;
if(rawText) {
// Replace non alpha characters with spaces
cleanText = rawText.toLowerCase().replace(/[^\s\xA0]/mg,function($0,$1,$2) {
if(($0 >= "a" && $0 <= "z") || ($0 >= "0" && $0 <= "9")) {
return $0;
} else {
return " ";
}
});
// Split on word boundaries
var words = cleanText.split(/[\s\xA0]+/mg);
// Remove common words
words = words.filter(function(word) {
return word && (self.stopWordList.indexOf(word) === -1);
});
// Accumulate the number of words that will fit
var c = 0,
s = "";
while(c < words.length && (s.length + words[c].length + 1) < 50) {
s += "-" + words[c++];
}
prefix = prefix + s;
}
// Check for duplicates
var baseTitle = prefix;
c = 0;
var title = baseTitle;
while(this.tiddlers[title] || this.wiki.tiddlerExists(title) || this.wiki.isShadowTiddler(title) || this.wiki.findDraft(title)) {
title = baseTitle + "-" + (++c);
}
return title;
};
Slicer.prototype.registerAnchor = function(id) {
this.anchors[id] = this.currentTiddler;
}
@ -202,37 +191,69 @@ Slicer.prototype.processNode = function(domNode) {
}
};
// Slice a tiddler into individual tiddlers
Slicer.prototype.sliceTiddler = function() {
var sliceTitle,sliceTiddler = {};
if(this.sourceTiddler) {
sliceTiddler = $tw.utils.extend({},this.sourceTiddler.fields);
Slicer.prototype.makeUniqueTitle = function(rawText) {
// Remove characters other than lowercase alphanumeric and spaces
var prefix = this.baseTiddlerTitle,
self = this,
cleanText;
if(rawText) {
// Replace non alpha characters with spaces
cleanText = rawText.toLowerCase().replace(/[^\s\xA0]/mg,function($0,$1,$2) {
if(($0 >= "a" && $0 <= "z") || ($0 >= "0" && $0 <= "9")) {
return $0;
} else {
return " ";
}
});
// Split on word boundaries
var words = cleanText.split(/[\s\xA0]+/mg);
// Remove common words
words = words.filter(function(word) {
return word && ("the and a of on i".split(" ").indexOf(word) === -1);
});
// Accumulate the number of words that will fit
var c = 0,
s = "";
while(c < words.length && (s.length + words[c].length + 1) < 50) {
s += "-" + words[c++];
}
prefix = prefix + s;
}
sliceTiddler.title = this.destTitle;
sliceTiddler.text = "Document sliced at " + (new Date());
sliceTiddler.type = "text/vnd.tiddlywiki";
sliceTiddler.tags = [];
sliceTiddler.list = [];
sliceTiddler["toc-type"] = "document";
var domNode = this.getSourceDocument();
this.parentStack.push({type: "h0", title: this.addTiddler(sliceTiddler)});
this.currentTiddler = sliceTiddler.title;
this.containerStack.push(sliceTiddler.title);
this.processNodeList(domNode.childNodes);
this.containerStack.pop();
// Check for duplicates
var baseTitle = prefix;
c = 0;
var title = baseTitle;
while(this.getTiddler(title)) {
title = baseTitle + "-" + (++c);
}
return title;
};
// Output directly to the output tiddlers
Slicer.prototype.outputTiddlers = function() {
Slicer.prototype.addTiddler = function(fields) {
if(fields.title) {
this.extractedTiddlers[fields.title] = Object.assign({},fields);
}
return fields.title;
};
Slicer.prototype.addTiddlers = function(fieldsArray) {
var self = this;
$tw.utils.each(this.tiddlers,function(tiddlerFields) {
var title = tiddlerFields.title;
if(title) {
$tw.wiki.addTiddler(new $tw.Tiddler(self.wiki.getCreationFields(),tiddlerFields,self.wiki.getModificationFields()));
}
(fieldsArray || []).forEach(function(fields) {
self.addTiddler(fields);
});
};
Slicer.prototype.getTiddler = function(title) {
return this.extractedTiddlers[title];
};
Slicer.prototype.getTiddlers = function() {
var self = this;
return Object.keys(this.extractedTiddlers).map(function(title) {
return self.extractedTiddlers[title]
})
};
exports.Slicer = Slicer;
})();

View File

@ -16,7 +16,7 @@ exports.processDefinitionNode = function(domNode,tagName) {
var text = $tw.utils.htmlEncode(domNode.textContent);
if(domNode.nodeType === 1 && tagName === "dd") {
// if(!this.isBlank(text)) {
var title = this.makeUniqueTitle("definition",text),
var title = this.makeUniqueTitle("definition " + text),
parentTitle = this.parentStack[this.parentStack.length - 1].title,
tags = [];
if(domNode.className && domNode.className.trim() !== "") {

View File

@ -15,7 +15,7 @@ Handle slicing heading nodes
exports.processHeadingNode = function(domNode,tagName) {
if(domNode.nodeType === 1 && (tagName === "h1" || tagName === "h2" || tagName === "h3" || tagName === "h4")) {
var text = $tw.utils.htmlEncode(domNode.textContent);
var title = this.makeUniqueTitle("heading",text),
var title = this.makeUniqueTitle("heading " + text),
parentTitle = this.popParentStackUntil(tagName),
tags = [];
if(domNode.className && domNode.className.trim() !== "") {

View File

@ -21,8 +21,8 @@ exports.processImageNode = function(domNode,tagName) {
text = parts[1],
contentTypeInfo = $tw.config.contentTypeInfo[type],
containerTitle = this.getTopContainer(),
containerTiddler = this.tiddlers[containerTitle],
title = this.makeUniqueTitle("image",containerTitle) + contentTypeInfo.extension,
containerTiddler = this.getTiddler(containerTitle),
title = this.makeUniqueTitle("image " + containerTitle) + contentTypeInfo.extension,
tiddler = {
title: title,
type: parts[0],
@ -47,7 +47,7 @@ exports.processImageNode = function(domNode,tagName) {
case "item":
// Create a new older sibling item to contain the image
var parentTitle = this.parentStack[this.parentStack.length - 1].title,
itemTitle = this.makeUniqueTitle("image-item-wrapper",containerTitle),
itemTitle = this.makeUniqueTitle("image-item-wrapper " + containerTitle),
itemTiddler = {
title: itemTitle,
"toc-type": "item",

View File

@ -16,7 +16,7 @@ exports.processListItemNode = function(domNode,tagName) {
var text = $tw.utils.htmlEncode(domNode.textContent);
if(domNode.nodeType === 1 && tagName === "li") {
// if(!this.isBlank(text)) {
var title = this.makeUniqueTitle("list-item",text),
var title = this.makeUniqueTitle("list-item " + text),
parentTitle = this.parentStack[this.parentStack.length - 1].title,
tags = [];
if(domNode.className && domNode.className.trim() !== "") {

View File

@ -14,7 +14,7 @@ Handle slicing list nodes
exports.processListNode = function(domNode,tagName) {
if(domNode.nodeType === 1 && (tagName === "ul" || tagName === "ol")) {
var title = this.makeUniqueTitle("list-" + tagName),
var title = this.makeUniqueTitle("list " + tagName),
parentTitle = this.parentStack[this.parentStack.length - 1].title,
tags = [];
if(domNode.className && domNode.className.trim() !== "") {

View File

@ -18,7 +18,7 @@ exports.processParagraphNode = function(domNode,tagName) {
if(!this.isBlank(text)) {
var parentTitle = this.parentStack[this.parentStack.length - 1].title,
tags = [],
title = this.makeUniqueTitle("paragraph",text);
title = this.makeUniqueTitle("paragraph " + text);
if(domNode.className && domNode.className && domNode.className.trim() !== "") {
tags = tags.concat(domNode.className.split(" "));
}

View File

@ -16,7 +16,7 @@ exports.processTermNode = function(domNode,tagName) {
var text = $tw.utils.htmlEncode(domNode.textContent);
if(domNode.nodeType === 1 && tagName === "dt") {
// if(!this.isBlank(text)) {
var title = this.makeUniqueTitle("term",text),
var title = this.makeUniqueTitle("term " + text),
parentTitle = this.parentStack[this.parentStack.length - 1].title,
tags = [];
if(domNode.className && domNode.className.trim() !== "") {

View File

@ -1,33 +0,0 @@
/*\
title: $:/plugins/tiddlywiki/text-slicer/modules/startup/slicer-message.js
type: application/javascript
module-type: startup
Setup the root widget event handlers
\*/
(function(){
/*jslint node: true, browser: true */
/*global $tw: false */
"use strict";
// Export name and synchronous status
exports.name = "slicer";
exports.platforms = ["browser"];
exports.after = ["startup"];
exports.synchronous = true;
// Install the root widget event handlers
exports.startup = function() {
$tw.rootWidget.addEventListener("tm-slice-tiddler",function(event) {
var slicer = new $tw.Slicer($tw.wiki,event.param,{
destTitle: event.paramObject && event.paramObject.destTitle
});
slicer.sliceTiddler(event.param)
slicer.outputTiddlers();
slicer.destroy();
});
};
})();

View File

@ -0,0 +1,42 @@
/*\
title: $:/plugins/tiddlywiki/text-slicer/modules/startup/slicer-startup.js
type: application/javascript
module-type: startup
Setup the root widget event handlers
\*/
(function(){
/*jslint node: true, browser: true */
/*global $tw: false */
"use strict";
var textSlicer = require("$:/plugins/tiddlywiki/text-slicer/modules/slicer.js");
// Export name and synchronous status
exports.name = "slicer";
exports.platforms = ["browser"];
exports.after = ["startup"];
exports.synchronous = true;
// Install the root widget event handlers
exports.startup = function() {
// Check xmldom is installed
if(!$tw.utils.hop($tw.modules.titles,"$:/plugins/tiddlywiki/xmldom/dom-parser")) {
// Make a logger
var logger = new $tw.utils.Logger("text-slicer");
logger.alert("The plugin 'text-slicer' requires the 'xmldom' plugin to be installed");
}
// Add tm-slice-tiddler event handler
$tw.rootWidget.addEventListener("tm-slice-tiddler",function(event) {
var slicer = new textSlicer.Slicer({
sourceTiddlerTitle: event.param,
baseTiddlerTitle: event.paramObject && event.paramObject.destTitle,
wiki: $tw.wiki
});
$tw.wiki.addTiddlers(slicer.getTiddlers());
});
};
})();