1
0
mirror of https://github.com/Jermolene/TiddlyWiki5 synced 2025-10-25 20:57:38 +00:00

Refactoring the slicer engine for easier reuse

This commit is contained in:
Jermolene
2016-12-22 17:46:42 +00:00
parent ba9d6187af
commit a1a4bf0f9d
11 changed files with 199 additions and 168 deletions

View File

@@ -12,7 +12,8 @@ Command to slice a specified tiddler
/*global $tw: false */ /*global $tw: false */
"use strict"; "use strict";
var widget = require("$:/core/modules/widgets/widget.js"); var widget = require("$:/core/modules/widgets/widget.js"),
textSlicer = require("$:/plugins/tiddlywiki/text-slicer/modules/slicer.js");
exports.info = { exports.info = {
name: "slice", name: "slice",
@@ -33,12 +34,12 @@ Command.prototype.execute = function() {
wiki = this.commander.wiki, wiki = this.commander.wiki,
sourceTitle = this.params[0], sourceTitle = this.params[0],
destTitle = this.params[1], destTitle = this.params[1],
slicer = new $tw.Slicer(wiki,sourceTitle,{ slicer = new textSlicer.Slicer({
destTitle: destTitle sourceTiddlerTitle: sourceTitle,
baseTiddlerTitle: destTitle,
wiki: wiki
}); });
slicer.sliceTiddler() wiki.addTiddlers(slicer.getTiddlers());
slicer.outputTiddlers();
slicer.destroy();
$tw.utils.nextTick(this.callback); $tw.utils.nextTick(this.callback);
return null; return null;
}; };

View File

@@ -1,9 +1,17 @@
/*\ /*\
title: $:/plugins/tiddlywiki/text-slicer/modules/slicer.js title: $:/plugins/tiddlywiki/text-slicer/modules/slicer.js
type: application/javascript type: application/javascript
module-type: global module-type: library
Main text-slicing logic Slice a tiddler or DOM document into individual tiddlers
var slicer = new textSlicer.Slicer(doc,{
sourceTiddlerTitle: tiddler to slice -or-
sourceText: text to slice -or-
sourceDoc: DOM document to
baseTiddlerTitle: "MySlicedTiddlers-",
role: "sliced-content"
});
\*/ \*/
(function(){ (function(){
@@ -12,51 +20,101 @@ Main text-slicing logic
/*global $tw: false */ /*global $tw: false */
"use strict"; "use strict";
var DOMParser = require("$:/plugins/tiddlywiki/xmldom/dom-parser").DOMParser; var DOMParser = $tw.browser ? window.DOMParser : require("$:/plugins/tiddlywiki/xmldom/dom-parser").DOMParser;
var SLICER_OUTPUT_TITLE = "$:/TextSlicer"; function Slicer(options) {
// Marshal parameters
function Slicer(wiki,sourceTitle,options) { this.sourceDoc = options.sourceDoc;
options = options || {}; this.sourceTiddlerTitle = options.sourceTiddlerTitle;
this.wiki = wiki; this.sourceText = options.sourceText;
this.sourceTitle = sourceTitle; this.wiki = options.wiki;
this.sourceTiddler = wiki.getTiddler(this.sourceTitle); if(options.baseTiddlerTitle) {
this.destTitle = options.destTitle || this.sourceTiddler.fields["doc-split-to"] || ("Sliced up " + this.sourceTitle); this.baseTiddlerTitle = options.baseTiddlerTitle
this.iframe = null; // Reference to iframe used for HTML parsing } else {
this.stopWordList = "the and a of on i".split(" "); if(this.sourceTiddlerTitle) {
this.tiddlers = {}; this.baseTiddlerTitle = "Sliced up " + this.sourceTiddlerTitle;
} else {
this.baseTiddlerTitle = "SlicedTiddler";
}
}
this.role = options.role || "sliced-html";
// Initialise state
this.extractedTiddlers = {}; // Hashmap of created tiddlers
this.parentStack = []; // Stack of parent heading or list this.parentStack = []; // Stack of parent heading or list
this.containerStack = []; // Stack of elements containing other elements this.containerStack = []; // Stack of elements containing other elements
this.sliceTitle = null;
this.slicers = $tw.modules.applyMethods("slicer"); this.slicers = $tw.modules.applyMethods("slicer");
this.anchors = Object.create(null); // Hashmap of HTML anchor ID to tiddler title this.anchors = Object.create(null); // Hashmap of HTML anchor ID to tiddler title
// Get the DOM document for the source text
if(!this.sourceDoc) {
if(this.sourceTiddlerTitle) {
this.sourceDoc = this.parseTiddlerText(this.sourceTiddlerTitle);
} else {
this.sourceDoc = this.parseHtmlText(this.sourceText);
}
}
// Create parent tiddler
console.log("Slicing to",this.baseTiddlerTitle)
var sliceTiddler = {
title: this.baseTiddlerTitle,
text: "Sliced at " + (new Date()),
"toc-type": "document",
tags: [],
list: [],
role: this.role
};
this.addTiddler(sliceTiddler);
// Slice the text into subordinate tiddlers
this.parentStack.push({type: "h0", title: sliceTiddler.title});
this.currentTiddler = sliceTiddler.title;
this.containerStack.push(sliceTiddler.title);
this.processNodeList(this.sourceDoc.childNodes);
this.containerStack.pop();
} }
Slicer.prototype.destroy = function() { Slicer.prototype.parseTiddlerText = function(title) {
// Remove the iframe from the DOM var tiddler = this.wiki.getTiddler(title);
if(this.iframe && this.iframe.parentNode) { if(tiddler) {
this.iframe.parentNode.removeChild(this.iframe); if(tiddler.fields.type === "text/html") {
return this.parseHtmlText(tiddler.fields.text);
} else {
return this.parseWikiText(tiddler);
}
} }
}; };
Slicer.prototype.addTiddler = function(fields) { Slicer.prototype.parseWikiText = function(tiddler) {
if(fields.title) { var widgetNode = this.wiki.makeTranscludeWidget(tiddler.fields.title,{
this.tiddlers[fields.title] = $tw.utils.extend({},this.tiddlers[fields.title],fields); document: $tw.fakeDocument,
return fields.title; parseAsInline: false,
importPageMacros: true}),
container = $tw.fakeDocument.createElement("div");
widgetNode.render(container,null);
return container;
};
Slicer.prototype.parseHtmlText = function(text) {
text = text || "";
if($tw.browser) {
this.iframe = document.createElement("iframe");
document.body.appendChild(this.iframe);
this.iframe.contentWindow.document.open();
this.iframe.contentWindow.document.write(text);
this.iframe.contentWindow.document.close();
return this.iframe.contentWindow.document;
} else { } else {
return null; return new DOMParser().parseFromString(text);
} }
}; };
Slicer.prototype.addToList = function(parent,child) { Slicer.prototype.addToList = function(parent,child) {
var parentTiddler = this.tiddlers[parent] || {}, var parentTiddler = this.getTiddler(parent) || {},
parentList = parentTiddler.list || []; parentList = parentTiddler.list || [];
parentList.push(child); parentList.push(child);
this.addTiddler($tw.utils.extend({title: parent},parentTiddler,{list: parentList})); this.addTiddler($tw.utils.extend({title: parent},parentTiddler,{list: parentList}));
}; };
Slicer.prototype.insertBeforeListItem = function(parent,child,beforeSibling) { Slicer.prototype.insertBeforeListItem = function(parent,child,beforeSibling) {
var parentTiddler = this.tiddlers[parent] || {}, var parentTiddler = this.getTiddler(parent) || {},
parentList = parentTiddler.list || [], parentList = parentTiddler.list || [],
parentListSiblingPosition = parentList.indexOf(beforeSibling); parentListSiblingPosition = parentList.indexOf(beforeSibling);
if(parentListSiblingPosition !== -1) { if(parentListSiblingPosition !== -1) {
@@ -88,11 +146,10 @@ Slicer.prototype.getTopContainer = function() {
Slicer.prototype.appendToCurrentContainer = function(newText) { Slicer.prototype.appendToCurrentContainer = function(newText) {
var title = this.containerStack[this.containerStack.length-1]; var title = this.containerStack[this.containerStack.length-1];
if(title) { if(title) {
var tiddler = this.tiddlers[title] || {}, var tiddler = this.getTiddler(title) || {},
text = tiddler.text || ""; text = tiddler.text || "";
this.addTiddler($tw.utils.extend({title: title},tiddler,{text: text + newText})); this.addTiddler($tw.utils.extend({title: title},tiddler,{text: text + newText}));
} }
else {debugger;} else {debugger;}
}; };
@@ -108,74 +165,6 @@ Slicer.prototype.isBlank = function(s) {
return (/^[\s\xA0]*$/g).test(s); return (/^[\s\xA0]*$/g).test(s);
}; };
Slicer.prototype.getSourceHtmlDocument = function(tiddler) {
if($tw.browser) {
this.iframe = document.createElement("iframe");
document.body.appendChild(this.iframe);
this.iframe.contentWindow.document.open();
this.iframe.contentWindow.document.write(tiddler.fields.text);
this.iframe.contentWindow.document.close();
return this.iframe.contentWindow.document;
} else {
return new DOMParser().parseFromString(tiddler.fields.text);
}
};
Slicer.prototype.getSourceWikiDocument = function(tiddler) {
var widgetNode = this.wiki.makeTranscludeWidget(this.sourceTitle,{
document: $tw.fakeDocument,
parseAsInline: false,
importPageMacros: true}),
container = $tw.fakeDocument.createElement("div");
widgetNode.render(container,null);
return container;
};
Slicer.prototype.getSourceDocument = function() {
if(this.sourceTiddler.fields.type === "text/html") {
return this.getSourceHtmlDocument(this.sourceTiddler);
} else {
return this.getSourceWikiDocument(this.sourceTiddler);
}
};
Slicer.prototype.makeUniqueTitle = function(prefix,rawText) {
// Remove characters other than lowercase alphanumeric and spaces
var self = this,
cleanText;
if(rawText) {
// Replace non alpha characters with spaces
cleanText = rawText.toLowerCase().replace(/[^\s\xA0]/mg,function($0,$1,$2) {
if(($0 >= "a" && $0 <= "z") || ($0 >= "0" && $0 <= "9")) {
return $0;
} else {
return " ";
}
});
// Split on word boundaries
var words = cleanText.split(/[\s\xA0]+/mg);
// Remove common words
words = words.filter(function(word) {
return word && (self.stopWordList.indexOf(word) === -1);
});
// Accumulate the number of words that will fit
var c = 0,
s = "";
while(c < words.length && (s.length + words[c].length + 1) < 50) {
s += "-" + words[c++];
}
prefix = prefix + s;
}
// Check for duplicates
var baseTitle = prefix;
c = 0;
var title = baseTitle;
while(this.tiddlers[title] || this.wiki.tiddlerExists(title) || this.wiki.isShadowTiddler(title) || this.wiki.findDraft(title)) {
title = baseTitle + "-" + (++c);
}
return title;
};
Slicer.prototype.registerAnchor = function(id) { Slicer.prototype.registerAnchor = function(id) {
this.anchors[id] = this.currentTiddler; this.anchors[id] = this.currentTiddler;
} }
@@ -202,37 +191,69 @@ Slicer.prototype.processNode = function(domNode) {
} }
}; };
// Slice a tiddler into individual tiddlers Slicer.prototype.makeUniqueTitle = function(rawText) {
Slicer.prototype.sliceTiddler = function() { // Remove characters other than lowercase alphanumeric and spaces
var sliceTitle,sliceTiddler = {}; var prefix = this.baseTiddlerTitle,
if(this.sourceTiddler) { self = this,
sliceTiddler = $tw.utils.extend({},this.sourceTiddler.fields); cleanText;
if(rawText) {
// Replace non alpha characters with spaces
cleanText = rawText.toLowerCase().replace(/[^\s\xA0]/mg,function($0,$1,$2) {
if(($0 >= "a" && $0 <= "z") || ($0 >= "0" && $0 <= "9")) {
return $0;
} else {
return " ";
}
});
// Split on word boundaries
var words = cleanText.split(/[\s\xA0]+/mg);
// Remove common words
words = words.filter(function(word) {
return word && ("the and a of on i".split(" ").indexOf(word) === -1);
});
// Accumulate the number of words that will fit
var c = 0,
s = "";
while(c < words.length && (s.length + words[c].length + 1) < 50) {
s += "-" + words[c++];
}
prefix = prefix + s;
} }
sliceTiddler.title = this.destTitle; // Check for duplicates
sliceTiddler.text = "Document sliced at " + (new Date()); var baseTitle = prefix;
sliceTiddler.type = "text/vnd.tiddlywiki"; c = 0;
sliceTiddler.tags = []; var title = baseTitle;
sliceTiddler.list = []; while(this.getTiddler(title)) {
sliceTiddler["toc-type"] = "document"; title = baseTitle + "-" + (++c);
var domNode = this.getSourceDocument(); }
this.parentStack.push({type: "h0", title: this.addTiddler(sliceTiddler)}); return title;
this.currentTiddler = sliceTiddler.title;
this.containerStack.push(sliceTiddler.title);
this.processNodeList(domNode.childNodes);
this.containerStack.pop();
}; };
// Output directly to the output tiddlers Slicer.prototype.addTiddler = function(fields) {
Slicer.prototype.outputTiddlers = function() { if(fields.title) {
this.extractedTiddlers[fields.title] = Object.assign({},fields);
}
return fields.title;
};
Slicer.prototype.addTiddlers = function(fieldsArray) {
var self = this; var self = this;
$tw.utils.each(this.tiddlers,function(tiddlerFields) { (fieldsArray || []).forEach(function(fields) {
var title = tiddlerFields.title; self.addTiddler(fields);
if(title) {
$tw.wiki.addTiddler(new $tw.Tiddler(self.wiki.getCreationFields(),tiddlerFields,self.wiki.getModificationFields()));
}
}); });
}; };
Slicer.prototype.getTiddler = function(title) {
return this.extractedTiddlers[title];
};
Slicer.prototype.getTiddlers = function() {
var self = this;
return Object.keys(this.extractedTiddlers).map(function(title) {
return self.extractedTiddlers[title]
})
};
exports.Slicer = Slicer; exports.Slicer = Slicer;
})(); })();

View File

@@ -16,7 +16,7 @@ exports.processDefinitionNode = function(domNode,tagName) {
var text = $tw.utils.htmlEncode(domNode.textContent); var text = $tw.utils.htmlEncode(domNode.textContent);
if(domNode.nodeType === 1 && tagName === "dd") { if(domNode.nodeType === 1 && tagName === "dd") {
// if(!this.isBlank(text)) { // if(!this.isBlank(text)) {
var title = this.makeUniqueTitle("definition",text), var title = this.makeUniqueTitle("definition " + text),
parentTitle = this.parentStack[this.parentStack.length - 1].title, parentTitle = this.parentStack[this.parentStack.length - 1].title,
tags = []; tags = [];
if(domNode.className && domNode.className.trim() !== "") { if(domNode.className && domNode.className.trim() !== "") {

View File

@@ -15,7 +15,7 @@ Handle slicing heading nodes
exports.processHeadingNode = function(domNode,tagName) { exports.processHeadingNode = function(domNode,tagName) {
if(domNode.nodeType === 1 && (tagName === "h1" || tagName === "h2" || tagName === "h3" || tagName === "h4")) { if(domNode.nodeType === 1 && (tagName === "h1" || tagName === "h2" || tagName === "h3" || tagName === "h4")) {
var text = $tw.utils.htmlEncode(domNode.textContent); var text = $tw.utils.htmlEncode(domNode.textContent);
var title = this.makeUniqueTitle("heading",text), var title = this.makeUniqueTitle("heading " + text),
parentTitle = this.popParentStackUntil(tagName), parentTitle = this.popParentStackUntil(tagName),
tags = []; tags = [];
if(domNode.className && domNode.className.trim() !== "") { if(domNode.className && domNode.className.trim() !== "") {

View File

@@ -21,8 +21,8 @@ exports.processImageNode = function(domNode,tagName) {
text = parts[1], text = parts[1],
contentTypeInfo = $tw.config.contentTypeInfo[type], contentTypeInfo = $tw.config.contentTypeInfo[type],
containerTitle = this.getTopContainer(), containerTitle = this.getTopContainer(),
containerTiddler = this.tiddlers[containerTitle], containerTiddler = this.getTiddler(containerTitle),
title = this.makeUniqueTitle("image",containerTitle) + contentTypeInfo.extension, title = this.makeUniqueTitle("image " + containerTitle) + contentTypeInfo.extension,
tiddler = { tiddler = {
title: title, title: title,
type: parts[0], type: parts[0],
@@ -47,7 +47,7 @@ exports.processImageNode = function(domNode,tagName) {
case "item": case "item":
// Create a new older sibling item to contain the image // Create a new older sibling item to contain the image
var parentTitle = this.parentStack[this.parentStack.length - 1].title, var parentTitle = this.parentStack[this.parentStack.length - 1].title,
itemTitle = this.makeUniqueTitle("image-item-wrapper",containerTitle), itemTitle = this.makeUniqueTitle("image-item-wrapper " + containerTitle),
itemTiddler = { itemTiddler = {
title: itemTitle, title: itemTitle,
"toc-type": "item", "toc-type": "item",

View File

@@ -16,7 +16,7 @@ exports.processListItemNode = function(domNode,tagName) {
var text = $tw.utils.htmlEncode(domNode.textContent); var text = $tw.utils.htmlEncode(domNode.textContent);
if(domNode.nodeType === 1 && tagName === "li") { if(domNode.nodeType === 1 && tagName === "li") {
// if(!this.isBlank(text)) { // if(!this.isBlank(text)) {
var title = this.makeUniqueTitle("list-item",text), var title = this.makeUniqueTitle("list-item " + text),
parentTitle = this.parentStack[this.parentStack.length - 1].title, parentTitle = this.parentStack[this.parentStack.length - 1].title,
tags = []; tags = [];
if(domNode.className && domNode.className.trim() !== "") { if(domNode.className && domNode.className.trim() !== "") {

View File

@@ -14,7 +14,7 @@ Handle slicing list nodes
exports.processListNode = function(domNode,tagName) { exports.processListNode = function(domNode,tagName) {
if(domNode.nodeType === 1 && (tagName === "ul" || tagName === "ol")) { if(domNode.nodeType === 1 && (tagName === "ul" || tagName === "ol")) {
var title = this.makeUniqueTitle("list-" + tagName), var title = this.makeUniqueTitle("list " + tagName),
parentTitle = this.parentStack[this.parentStack.length - 1].title, parentTitle = this.parentStack[this.parentStack.length - 1].title,
tags = []; tags = [];
if(domNode.className && domNode.className.trim() !== "") { if(domNode.className && domNode.className.trim() !== "") {

View File

@@ -18,7 +18,7 @@ exports.processParagraphNode = function(domNode,tagName) {
if(!this.isBlank(text)) { if(!this.isBlank(text)) {
var parentTitle = this.parentStack[this.parentStack.length - 1].title, var parentTitle = this.parentStack[this.parentStack.length - 1].title,
tags = [], tags = [],
title = this.makeUniqueTitle("paragraph",text); title = this.makeUniqueTitle("paragraph " + text);
if(domNode.className && domNode.className && domNode.className.trim() !== "") { if(domNode.className && domNode.className && domNode.className.trim() !== "") {
tags = tags.concat(domNode.className.split(" ")); tags = tags.concat(domNode.className.split(" "));
} }

View File

@@ -16,7 +16,7 @@ exports.processTermNode = function(domNode,tagName) {
var text = $tw.utils.htmlEncode(domNode.textContent); var text = $tw.utils.htmlEncode(domNode.textContent);
if(domNode.nodeType === 1 && tagName === "dt") { if(domNode.nodeType === 1 && tagName === "dt") {
// if(!this.isBlank(text)) { // if(!this.isBlank(text)) {
var title = this.makeUniqueTitle("term",text), var title = this.makeUniqueTitle("term " + text),
parentTitle = this.parentStack[this.parentStack.length - 1].title, parentTitle = this.parentStack[this.parentStack.length - 1].title,
tags = []; tags = [];
if(domNode.className && domNode.className.trim() !== "") { if(domNode.className && domNode.className.trim() !== "") {

View File

@@ -1,33 +0,0 @@
/*\
title: $:/plugins/tiddlywiki/text-slicer/modules/startup/slicer-message.js
type: application/javascript
module-type: startup
Setup the root widget event handlers
\*/
(function(){
/*jslint node: true, browser: true */
/*global $tw: false */
"use strict";
// Export name and synchronous status
exports.name = "slicer";
exports.platforms = ["browser"];
exports.after = ["startup"];
exports.synchronous = true;
// Install the root widget event handlers
exports.startup = function() {
$tw.rootWidget.addEventListener("tm-slice-tiddler",function(event) {
var slicer = new $tw.Slicer($tw.wiki,event.param,{
destTitle: event.paramObject && event.paramObject.destTitle
});
slicer.sliceTiddler(event.param)
slicer.outputTiddlers();
slicer.destroy();
});
};
})();

View File

@@ -0,0 +1,42 @@
/*\
title: $:/plugins/tiddlywiki/text-slicer/modules/startup/slicer-startup.js
type: application/javascript
module-type: startup
Setup the root widget event handlers
\*/
(function(){
/*jslint node: true, browser: true */
/*global $tw: false */
"use strict";
var textSlicer = require("$:/plugins/tiddlywiki/text-slicer/modules/slicer.js");
// Export name and synchronous status
exports.name = "slicer";
exports.platforms = ["browser"];
exports.after = ["startup"];
exports.synchronous = true;
// Install the root widget event handlers
exports.startup = function() {
// Check xmldom is installed
if(!$tw.utils.hop($tw.modules.titles,"$:/plugins/tiddlywiki/xmldom/dom-parser")) {
// Make a logger
var logger = new $tw.utils.Logger("text-slicer");
logger.alert("The plugin 'text-slicer' requires the 'xmldom' plugin to be installed");
}
// Add tm-slice-tiddler event handler
$tw.rootWidget.addEventListener("tm-slice-tiddler",function(event) {
var slicer = new textSlicer.Slicer({
sourceTiddlerTitle: event.param,
baseTiddlerTitle: event.paramObject && event.paramObject.destTitle,
wiki: $tw.wiki
});
$tw.wiki.addTiddlers(slicer.getTiddlers());
});
};
})();