1
0
mirror of https://github.com/Jermolene/TiddlyWiki5 synced 2025-12-05 00:08:06 +00:00

Major updates to text-slicer plugin

* In the interests of performance and expressiveness, switched to using a Sax parser instead of a DOM implementation.
* Use extensible declarative rules to control the slicing process
* Added new optional set of rules for slicing by heading, where the paragraphs underneath a heading are packed into the same tiddler as the heading
* Added a modal dialogue for specifying parameters when slicing in the browser
This commit is contained in:
Jermolene
2017-12-14 14:16:54 +00:00
parent f128650c6e
commit e344c38349
39 changed files with 2943 additions and 713 deletions

View File

@@ -4,5 +4,18 @@ description: Slice a hierarchical document into individual tiddlers
Slices the specified tiddler
```
--slice <title>
--slice <source-title> [<dest-title>] [<slicer-rules>] [<output-mode>]
```
* ''source-title'': Title of the tiddler to be sliced
* ''dest-title'': Base title for the generated output tiddlers
* ''slicer-rules'': Name of the slicer rules to use for the operation (see below)
* ''output-mode'': "html" vs "wiki"
The plugin comes with several built-in sets of slicer rules:
* //html-by-paragraph//: Slice every paragraph into a separate tiddler, threaded by heading
* //html-by-heading//: Slice every heading into separate threaded tiddlers
* //html-plain-paragraphs//: Slice every paragraph into a separate tiddler, without formatting or headings
Advanced users can create or edit their own slicer rules for precise control over the conversion process

View File

@@ -34,13 +34,22 @@ Command.prototype.execute = function() {
wiki = this.commander.wiki,
sourceTitle = this.params[0],
destTitle = this.params[1],
slicerRules = this.params[2],
outputMode = this.params[3],
slicer = new textSlicer.Slicer({
sourceTiddlerTitle: sourceTitle,
baseTiddlerTitle: destTitle,
wiki: wiki
slicerRules: slicerRules,
outputMode: outputMode,
wiki: wiki,
callback: function(err,tiddlers) {
if(err) {
return self.callback(err);
}
wiki.addTiddlers(tiddlers);
self.callback();
}
});
wiki.addTiddlers(slicer.getTiddlers());
$tw.utils.nextTick(this.callback);
return null;
};

View File

@@ -0,0 +1,189 @@
title: $:/plugins/tiddlywiki/text-slicer/slicer-rules/html-by-heading.json
name: html-by-heading
caption: By Heading (HTML)
description: One tiddler per heading, threaded (HTML)
inherits-from: html-by-paragraph
type: application/json
tags: $:/tags/text-slicer/slicer-rules
[
{
"selector": "address,center,fieldset,form,hr,iframe,isindex,noframes,noscript,ol,ul,li,pre,table",
"actions": {}
},
{
"selector": "blockquote",
"actions": {
"markup": {
"wiki": {
"prefix": "<<<\n",
"suffix": "<<<\n"
}
}
}
},
{
"selector": "dd",
"actions": {
"markup": {
"wiki": {
"prefix": "\n: ",
"suffix": "\n"
}
}
}
},
{
"selector": "dl",
"actions": {
"markup": {
"wiki": {
"prefix": "\n",
"suffix": "\n"
}
}
}
},
{
"selector": "dt",
"actions": {
"markup": {
"wiki": {
"prefix": "\n; ",
"suffix": "\n"
}
}
}
},
{
"selector": "h1",
"actions": {
"startNewChunk": {
"toc-type": "heading",
"toc-heading-level": "h1"
},
"mergeNext": true,
"setCaption": true,
"isParent": true,
"headingLevel": 1,
"markup": {
"wiki": {
"prefix": "! ",
"suffix": "\n"
}
}
}
},
{
"selector": "h2",
"actions": {
"startNewChunk": {
"toc-type": "heading",
"toc-heading-level": "h2"
},
"mergeNext": true,
"setCaption": true,
"isParent": true,
"headingLevel": 2,
"markup": {
"wiki": {
"prefix": "!! ",
"suffix": "\n"
}
}
}
},
{
"selector": "h3",
"actions": {
"startNewChunk": {
"toc-type": "heading",
"toc-heading-level": "h3"
},
"mergeNext": true,
"setCaption": true,
"isParent": true,
"headingLevel": 3,
"markup": {
"wiki": {
"prefix": "!!! ",
"suffix": "\n"
}
}
}
},
{
"selector": "h4",
"actions": {
"startNewChunk": {
"toc-type": "heading",
"toc-heading-level": "h4"
},
"mergeNext": true,
"setCaption": true,
"isParent": true,
"headingLevel": 4,
"markup": {
"wiki": {
"prefix": "!!!! ",
"suffix": "\n"
}
}
}
},
{
"selector": "h5",
"actions": {
"startNewChunk": {
"toc-type": "heading",
"toc-heading-level": "h5"
},
"mergeNext": true,
"setCaption": true,
"isParent": true,
"headingLevel": 5,
"markup": {
"wiki": {
"prefix": "!!!!! ",
"suffix": "\n"
}
}
}
},
{
"selector": "h6",
"actions": {
"startNewChunk": {
"toc-type": "heading",
"toc-heading-level": "h6"
},
"mergeNext": true,
"setCaption": true,
"isParent": true,
"headingLevel": 6,
"markup": {
"wiki": {
"prefix": "!!!!!! ",
"suffix": "\n"
}
}
}
},
{
"selector": "p",
"actions": {
"markup": {
"wiki": {
"prefix": "",
"suffix": "\n"
}
}
}
},
{
"selector": "*",
"actions": {
"dontRenderTag": true
}
}
]

View File

@@ -0,0 +1,265 @@
title: $:/plugins/tiddlywiki/text-slicer/slicer-rules/html-by-paragraph.json
name: html-by-paragraph
caption: By Paragraph (HTML)
description: One tiddler per paragraph, threaded by heading (HTML)
type: application/json
tags: $:/tags/text-slicer/slicer-rules
[
{
"selector": "address,center,fieldset,form,hr,iframe,isindex,noframes,noscript,pre,table",
"actions": {
"startNewChunk": {
"toc-type": "paragraph"
}
}
},
{
"selector": "blockquote",
"actions": {
"startNewChunk": {
"toc-type": "paragraph"
},
"markup": {
"wiki": {
"prefix": "<<<\n",
"suffix": "<<<\n"
}
}
}
},
{
"selector": "body,div,head,html,span",
"actions": {
"dontRenderTag": true
}
},
{
"selector": "dd",
"actions": {
"dontRenderTag": true,
"startNewChunk": {
"toc-type": "definition"
}
}
},
{
"selector": "dl",
"actions": {
"dontRenderTag": true,
"isParent": true,
"startNewChunk": {
"toc-type": "def-list",
"toc-list-filter": "[list<currentTiddler>!has[draft.of]]"
}
}
},
{
"selector": "dt",
"actions": {
"dontRenderTag": true,
"startNewChunk": {
"toc-type": "term"
}
}
},
{
"selector": "em,i",
"actions": {
"markup": {
"wiki": {
"prefix": "//",
"suffix": "//"
}
}
}
},
{
"selector": "h1",
"actions": {
"dontRenderTag": true,
"isParent": true,
"headingLevel": 1,
"startNewChunk": {
"toc-type": "heading",
"toc-heading-level": "h1"
}
}
},
{
"selector": "h2",
"actions": {
"dontRenderTag": true,
"isParent": true,
"headingLevel": 2,
"startNewChunk": {
"toc-type": "heading",
"toc-heading-level": "h2"
}
}
},
{
"selector": "h3",
"actions": {
"dontRenderTag": true,
"isParent": true,
"headingLevel": 3,
"startNewChunk": {
"toc-type": "heading",
"toc-heading-level": "h3"
}
}
},
{
"selector": "h4",
"actions": {
"dontRenderTag": true,
"isParent": true,
"headingLevel": 4,
"startNewChunk": {
"toc-type": "heading",
"toc-heading-level": "h4"
}
}
},
{
"selector": "h5",
"actions": {
"dontRenderTag": true,
"isParent": true,
"headingLevel": 5,
"startNewChunk": {
"toc-type": "heading",
"toc-heading-level": "h5"
}
}
},
{
"selector": "h6",
"actions": {
"dontRenderTag": true,
"isParent": true,
"headingLevel": 6,
"startNewChunk": {
"toc-type": "heading",
"toc-heading-level": "h6"
}
}
},
{
"selector": "img",
"actions": {
"isImage": true
}
},
{
"selector": "li",
"actions": {
"dontRenderTag": true,
"startNewChunk": {
"toc-type": "item"
}
}
},
{
"selector": "ol",
"actions": {
"dontRenderTag": true,
"isParent": true,
"startNewChunk": {
"toc-type": "list",
"toc-list-type": "ol",
"toc-list-filter": "[list<currentTiddler>!has[draft.of]]"
}
}
},
{
"selector": "p",
"actions": {
"dontRenderTag": true,
"startNewChunk": {
"toc-type": "paragraph"
}
}
},
{
"selector": "strike",
"actions": {
"markup": {
"wiki": {
"prefix": "~~",
"suffix": "~~"
}
}
}
},
{
"selector": "strong,b",
"actions": {
"markup": {
"wiki": {
"prefix": "''",
"suffix": "''"
}
}
}
},
{
"selector": "sub",
"actions": {
"markup": {
"wiki": {
"prefix": ",,",
"suffix": ",,"
}
}
}
},
{
"selector": "sup",
"actions": {
"markup": {
"wiki": {
"prefix": "^^",
"suffix": "^^"
}
}
}
},
{
"selector": "head > title",
"actions": {
"dontRenderTag": true,
"startNewChunk": {
"toc-type": "paragraph"
}
}
},
{
"selector": "u",
"actions": {
"markup": {
"wiki": {
"prefix": "__",
"suffix": "__"
}
}
}
},
{
"selector": "ul",
"actions": {
"dontRenderTag": true,
"isParent": true,
"startNewChunk": {
"toc-type": "list",
"toc-list-type": "ul",
"toc-list-filter": "[list<currentTiddler>!has[draft.of]]"
}
}
},
{
"selector": "*",
"actions": {}
}
]

View File

@@ -0,0 +1,24 @@
title: $:/plugins/tiddlywiki/text-slicer/slicer-rules/html-plain-paragraphs.json
name: html-plain-paragraphs
caption: Plain Paragraphs (HTML)
description: One tiddler per paragraph, without formatting (HTML)
type: application/json
tags: $:/tags/text-slicer/slicer-rules
[
{
"selector": "address,blockquote,center,dd,dt,h1,h2,h3,h4,h5,h6,li,p",
"actions": {
"startNewChunk": {
"toc-type": "paragraph"
},
"dontRenderTag": true
}
},
{
"selector": "*",
"actions": {
"dontRenderTag": true
}
}
]

View File

@@ -6,11 +6,13 @@ module-type: library
Slice a tiddler or DOM document into individual tiddlers
var slicer = new textSlicer.Slicer(doc,{
slicerRules: JSON data defining slicer rules -or- title of rules taken from tiddlers tagged $:/tags/text-slicer/slicer-rules
sourceTiddlerTitle: tiddler to slice -or-
sourceText: text to slice -or-
sourceDoc: DOM document to
baseTiddlerTitle: "MySlicedTiddlers-",
sourceText: text to slice
outputMode: "html" (default) -or- "wiki"
baseTiddlerTitle: "MySlicedTiddlers-"
role: "sliced-content"
callback: function(err,tiddlers)
});
\*/
@@ -20,178 +22,383 @@ var slicer = new textSlicer.Slicer(doc,{
/*global $tw: false */
"use strict";
var DOMParser = $tw.browser ? window.DOMParser : require("$:/plugins/tiddlywiki/xmldom/dom-parser").DOMParser;
function Slicer(options) {
// Quick tests
this.testSlicerRuleMatching();
// Marshal parameters
this.sourceDoc = options.sourceDoc;
this.sourceTiddlerTitle = options.sourceTiddlerTitle;
this.sourceText = options.sourceText;
this.wiki = options.wiki;
if(options.baseTiddlerTitle) {
this.baseTiddlerTitle = options.baseTiddlerTitle
} else {
if(this.sourceTiddlerTitle) {
this.baseTiddlerTitle = "Sliced up " + this.sourceTiddlerTitle;
} else {
this.baseTiddlerTitle = "SlicedTiddler";
}
}
this.role = options.role || "sliced-html";
// Initialise state
this.extractedTiddlers = {}; // Hashmap of created tiddlers
this.parentStack = []; // Stack of parent heading or list
this.containerStack = []; // Stack of elements containing other elements
this.slicers = $tw.modules.applyMethods("slicer");
this.anchors = Object.create(null); // Hashmap of HTML anchor ID to tiddler title
// Get the DOM document for the source text
if(!this.sourceDoc) {
if(this.sourceTiddlerTitle) {
this.sourceDoc = this.parseTiddlerText(this.sourceTiddlerTitle);
} else {
this.sourceDoc = this.parseHtmlText(this.sourceText);
}
this.outputMode = options.outputMode || "html";
this.callbackFn = options.callback;
// Get the slicer rules
var nameSlicerRules = null;
if(!options.slicerRules) {
nameSlicerRules = "html-by-paragraph";
this.slicerRules = this.loadSlicerRules(nameSlicerRules);
} else if(typeof options.slicerRules === "string") {
nameSlicerRules = options.slicerRules;
this.slicerRules = this.loadSlicerRules(nameSlicerRules);
} else {
this.slicerRules = options.slicerRules;
}
// Create parent tiddler
console.log("Slicing to",this.baseTiddlerTitle)
var sliceTiddler = {
title: this.baseTiddlerTitle,
text: "Sliced at " + (new Date()),
// Set up the base tiddler title
this.baseTiddlerTitle = this.getBaseTiddlerTitle(options.baseTiddlerTitle);
// Initialise state
this.namespaces = {}; // Hashmap of URLs
this.chunks = []; // Array of tiddlers without titles, addressed by their index. We use the title field to hold the plain text content
this.currentChunk = null; // Index of the chunk currently being written to
this.parentStack = []; // Stack of parent chunks {chunk: chunk index,actions:}
this.elementStack = []; // Stack of {tag:,isSelfClosing:,actions:}
// Set up the document tiddler as top level heading
this.chunks.push({
"toc-type": "document",
tags: [],
title: "", // makeUniqueTitle will later initialise it to baseTiddlerTitle
text: "<div class='tc-table-of-contents'><<toc-selective-expandable '" + this.baseTiddlerTitle + "document'>></div>",
list: [],
role: this.role
};
this.addTiddler(sliceTiddler);
// Slice the text into subordinate tiddlers
this.parentStack.push({type: "h0", title: sliceTiddler.title});
this.currentTiddler = sliceTiddler.title;
this.containerStack.push(sliceTiddler.title);
this.processNodeList(this.sourceDoc.childNodes);
this.containerStack.pop();
tags: [],
role: this.role,
"slicer-rules": nameSlicerRules,
"slicer-output-mode": this.outputMode
});
this.parentStack.push({chunk: 0, actions: this.getMatchingSlicerRuleActions("(document)")});
// Set up the parser
var sax = require("$:/plugins/tiddlywiki/sax/sax.js");
this.sax = sax.parser(true,{
xmlns: true
});
this.sax.onerror = this.onError.bind(this);
this.sax.onopennamespace = this.onOpenNamespace.bind(this);
this.sax.onclosenamespace = this.onCloseNamespace.bind(this);
this.sax.onopentag = this.onOpenTag.bind(this);
this.sax.onclosetag = this.onCloseTag.bind(this);
this.sax.ontext = this.onText.bind(this);
this.sax.onend = this.onEnd.bind(this);
// Start streaming the data
this.sax.write(this.getSourceText());
this.sax.close();
}
Slicer.prototype.parseTiddlerText = function(title) {
var tiddler = this.wiki.getTiddler(title);
if(tiddler) {
if(tiddler.fields.type === "text/html") {
return this.parseHtmlText(tiddler.fields.text);
Slicer.prototype.callback = function(err,tiddlers) {
var self = this;
$tw.utils.nextTick(function() {
self.callbackFn(err,tiddlers);
});
};
Slicer.prototype.loadSlicerRules = function(name) {
// Collect the available slicer rule tiddlers
var self = this,
titles = this.wiki.getTiddlersWithTag("$:/tags/text-slicer/slicer-rules"),
tiddlers = {},
rules = {},
ruleNames = [];
titles.forEach(function(title) {
var tiddler = self.wiki.getTiddler(title);
tiddlers[tiddler.fields.name] = tiddler;
rules[tiddler.fields.name] = self.wiki.getTiddlerData(title,[]);
});
// Follow the inheritance trail to get a stack of slicer rule names
var n = name;
do {
ruleNames.push(n);
n = tiddlers[n] && tiddlers[n].fields["inherits-from"];
} while(n && ruleNames.indexOf(n) === -1);
// Concatenate the slicer rules
rules = ruleNames.reduce(function(accumulator,name) {
return accumulator.concat(rules[name]);
},[]);
return rules;
};
Slicer.prototype.getMatchingSlicerRuleActions = function(name) {
var rule = this.searchSlicerRules(name,this.slicerRules,this.elementStack);
if(!rule) {
return {};
} else {
return rule.actions;
}
};
Slicer.prototype.testSlicerRuleMatching = function() {
var tests = [
{
test: this.searchSlicerRules("title",[
{selector: "title,head,body", rules: true},
{selector: "body", rules: true}
],[
{tag:"head"}
]),
result: "title,head,body"
},
{
test: this.searchSlicerRules("body",[
{selector: "title,head,body", rules: true},
{selector: "body", rules: true}
],[
{tag:"head"}
]),
result: "title,head,body"
},
{
test: this.searchSlicerRules("title",[
{selector: "head > title", rules: true},
{selector: "title", rules: true}
],[
{tag:"head"}
]),
result: "head > title"
}
],
results = tests.forEach(function(test,index) {
if(test.test.selector !== test.result) {
throw "Failing test " + index + ", returns " + test.test.selector + " instead of " + test.result;
}
});
};
Slicer.prototype.searchSlicerRules = function(name,rules,elementStack) {
return rules.find(function(rule) {
// Split and trim the selectors for this rule
return !!rule.selector.split(",").map(function(selector) {
return selector.trim();
// Find the first selector that matches, if any
}).find(function(selector) {
// Split and trim the parts of the selector
var parts = selector.split(" ").map(function(part) {
return part.trim();
});
// * matches any element
if(parts.length === 1 && parts[0] === "*") {
return true;
}
// Make a copy of the element stack so that we can be destructive
var elements = elementStack.slice(0).concat({tag: name}),
nextElementMustBeAtTopOfStack = true,
currentPart = parts.length - 1;
while(currentPart >= 0) {
if(parts[currentPart] === ">") {
nextElementMustBeAtTopOfStack = true;
} else {
if(!nextElementMustBeAtTopOfStack) {
while(elements.length > 0 && elements[elements.length - 1].tag !== parts[currentPart]) {
elements.pop();
}
}
if(elements.length === 0 || elements[elements.length - 1].tag !== parts[currentPart]) {
return false;
}
elements.pop();
nextElementMustBeAtTopOfStack = false;
}
currentPart--;
}
return true;
});
});
};
Slicer.prototype.getBaseTiddlerTitle = function(baseTiddlerTitle) {
if(baseTiddlerTitle) {
return baseTiddlerTitle
} else {
if(this.sourceTiddlerTitle) {
return "Sliced up " + this.sourceTiddlerTitle + ":";
} else {
return this.parseWikiText(tiddler);
return "SlicedTiddler";
}
}
};
Slicer.prototype.parseWikiText = function(tiddler) {
Slicer.prototype.getSourceText = function() {
if(this.sourceTiddlerTitle) {
var tiddler = this.wiki.getTiddler(this.sourceTiddlerTitle);
if(!tiddler) {
console.log("Tiddler '" + this.sourceTiddlerTitle + "' does not exist");
return "";
}
if(tiddler.fields.type === "text/html" || tiddler.fields.type === "text/xml" || (tiddler.fields.type || "").slice(-4) === "+xml") {
return tiddler.fields.text;
} else {
return this.getTiddlerAsHtml(tiddler);
}
} else {
return this.sourceText;
}
};
Slicer.prototype.getTiddlerAsHtml = function(tiddler) {
var widgetNode = this.wiki.makeTranscludeWidget(tiddler.fields.title,{
document: $tw.fakeDocument,
parseAsInline: false,
importPageMacros: true}),
container = $tw.fakeDocument.createElement("div");
widgetNode.render(container,null);
return container;
return ["<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\" \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">","<html xmlns=\"http://www.w3.org/1999/xhtml\">","<head>","</head>","<body>",container.innerHTML,"</body>","</html>"].join("\n");
};
Slicer.prototype.parseHtmlText = function(text) {
text = text || "";
if($tw.browser) {
this.iframe = document.createElement("iframe");
document.body.appendChild(this.iframe);
this.iframe.contentWindow.document.open();
this.iframe.contentWindow.document.write(text);
this.iframe.contentWindow.document.close();
return this.iframe.contentWindow.document;
} else {
return new DOMParser().parseFromString(text);
}
Slicer.prototype.getImmediateParent = function() {
return this.parentStack.slice(-1)[0];
};
Slicer.prototype.addToList = function(parent,child) {
var parentTiddler = this.getTiddler(parent) || {},
parentList = parentTiddler.list || [];
parentList.push(child);
this.addTiddler($tw.utils.extend({title: parent},parentTiddler,{list: parentList}));
Slicer.prototype.onError = function(e) {
console.error("Sax error: ", e)
// Try to resume after errors
this.sax.error = null;
this.sax.resume();
};
Slicer.prototype.insertBeforeListItem = function(parent,child,beforeSibling) {
var parentTiddler = this.getTiddler(parent) || {},
parentList = parentTiddler.list || [],
parentListSiblingPosition = parentList.indexOf(beforeSibling);
if(parentListSiblingPosition !== -1) {
parentList.splice(parentListSiblingPosition,0,child)
this.addTiddler($tw.utils.extend({title: parent},parentTiddler,{list: parentList}));
}
else {debugger;}
Slicer.prototype.onOpenNamespace = function(info) {
this.namespaces[info.prefix] = info.uri;
};
Slicer.prototype.popParentStackUntil = function(type) {
// Pop the stack to remove any entries at the same or lower level
var newLevel = this.convertTypeToLevel(type),
topIndex = this.parentStack.length - 1;
do {
var topLevel = this.convertTypeToLevel(this.parentStack[this.parentStack.length - 1].type);
if(topLevel !== null && topLevel < newLevel ) {
break;
Slicer.prototype.onCloseNamespace = function(info) {
};
Slicer.prototype.onOpenTag = function(node) {
var actions = this.getMatchingSlicerRuleActions(node.name);
// Check for an element that should start a new chunk
if(actions.startNewChunk) {
// If this is a heading, pop off any higher or equal level headings first
if(actions.isParent && actions.headingLevel) {
var parentActions = this.getImmediateParent().actions;
while(parentActions.isParent && parentActions.headingLevel && parentActions.headingLevel >= actions.headingLevel) {
this.parentStack.pop();
parentActions = this.getImmediateParent().actions;
}
}
// Start the new chunk
this.startNewChunk(actions.startNewChunk);
// If this is a parent then also add it to the parent stack
if(actions.isParent) {
this.parentStack.push({chunk: this.currentChunk, actions: actions});
}
this.parentStack.length--;
} while(true);
return this.parentStack[this.parentStack.length - 1].title;
};
Slicer.prototype.getTopContainer = function() {
return this.containerStack[this.containerStack.length-1];
};
Slicer.prototype.appendToCurrentContainer = function(newText) {
var title = this.containerStack[this.containerStack.length-1];
if(title) {
var tiddler = this.getTiddler(title) || {},
text = tiddler.text || "";
this.addTiddler($tw.utils.extend({title: title},tiddler,{text: text + newText}));
}
else {debugger;}
// Render the tag inline in the current chunk unless we should ignore it
if(!actions.dontRenderTag) {
if(actions.isImage) {
this.onImage(node);
} else {
var markupInfo = actions.markup && actions.markup[this.outputMode];
if(markupInfo) {
this.addTextToCurrentChunk(markupInfo.prefix);
} else {
this.addTextToCurrentChunk("<" + node.name + (node.isSelfClosing ? "/" : "") + ">");
}
}
}
// Remember whether this tag is self closing
this.elementStack.push({tag: node.name,isSelfClosing: node.isSelfClosing, actions: actions});
};
Slicer.prototype.convertTypeToLevel = function(type) {
if(type.charAt(0) === "h") {
return parseInt(type.charAt(1),10);
} else {
return null;
Slicer.prototype.onImage = function(node) {
var url = node.attributes.src.value;
if(url.slice(0,5) === "data:") {
// var parts = url.slice(5).split(",");
// this.chunks.push({
// title: ,
// text: parts[1],
// type: parts[0].split[";"][0],
// role: this.role
// });
}
this.addTextToCurrentChunk("[img[" + url + "]]");
};
Slicer.prototype.onCloseTag = function(name) {
var e = this.elementStack.pop(),
actions = e.actions,
selfClosing = e.isSelfClosing;
// Set the caption if required
if(actions.setCaption) {
this.chunks[this.currentChunk].caption = this.chunks[this.currentChunk].title;
}
// Render the tag
if (!actions.dontRenderTag && !selfClosing) {
var markupInfo = actions.markup && actions.markup[this.outputMode];
if(markupInfo) {
this.addTextToCurrentChunk(markupInfo.suffix);
} else {
this.addTextToCurrentChunk("</" + name + ">");
}
}
// Check for an element that started a new chunk
if(actions.startNewChunk) {
if(!actions.mergeNext) {
this.currentChunk = null;
}
// If this is a parent and not a heading then also pop it from the parent stack
if(actions.isParent && !actions.headingLevel) {
this.parentStack.pop();
}
}
};
Slicer.prototype.onText = function(text) {
this.addTextToCurrentChunk($tw.utils.htmlEncode(text));
this.addTextToCurrentChunk(text,"title");
};
Slicer.prototype.onEnd = function() {
this.assignTitlesToChunks();
this.callback(null,this.chunks);
};
Slicer.prototype.addTextToCurrentChunk = function(str,field) {
field = field || "text";
if(this.currentChunk !== null) {
this.chunks[this.currentChunk][field] += str;
}
};
Slicer.prototype.startNewChunk = function(fields) {
var parentIndex = this.getImmediateParent().chunk;
this.chunks.push($tw.utils.extend({},{
title: "",
text: "",
tags: [parentIndex],
list: [],
role: this.role
},fields));
this.currentChunk = this.chunks.length - 1;
this.chunks[parentIndex].list.push(this.currentChunk);
};
Slicer.prototype.isBlank = function(s) {
return (/^[\s\xA0]*$/g).test(s);
};
Slicer.prototype.registerAnchor = function(id) {
this.anchors[id] = this.currentTiddler;
}
Slicer.prototype.processNodeList = function(domNodeList) {
$tw.utils.each(domNodeList,this.processNode.bind(this));
}
Slicer.prototype.processNode = function(domNode) {
var nodeType = domNode.nodeType,
tagName = (domNode.tagName || "").toLowerCase(),
hasProcessed = false;
for(var slicerTitle in this.slicers) {
var slicer = this.slicers[slicerTitle];
if(slicer.bind(this)(domNode,tagName)) {
hasProcessed = true;
break;
Slicer.prototype.assignTitlesToChunks = function() {
var self = this;
// Create a title for each tiddler
var titles = {};
this.chunks.forEach(function(chunk) {
var title = self.makeUniqueTitle(titles,chunk["toc-type"] + "-" + chunk.title)
titles[title] = true;
chunk.title = title;
});
// Link up any indices in the tags and list fields
this.chunks.forEach(function(chunk) {
if(chunk.tags) {
chunk.tags.map(function(tag,index) {
if(typeof tag === "number") {
chunk.tags[index] = self.chunks[tag].title;
}
});
}
}
if(!hasProcessed) {
if(nodeType === 1 && domNode.hasChildNodes()) {
this.processNodeList(domNode.childNodes);
if(chunk.list) {
chunk.list.map(function(listItem,index) {
if(typeof listItem === "number") {
chunk.list[index] = self.chunks[listItem].title;
}
});
}
}
});
};
Slicer.prototype.makeUniqueTitle = function(rawText) {
Slicer.prototype.makeUniqueTitle = function(tiddlers,rawText) {
// Remove characters other than lowercase alphanumeric and spaces
var prefix = this.baseTiddlerTitle,
self = this,
@@ -215,45 +422,19 @@ Slicer.prototype.makeUniqueTitle = function(rawText) {
var c = 0,
s = "";
while(c < words.length && (s.length + words[c].length + 1) < 50) {
s += "-" + words[c++];
s += (s === "" ? "" : "-") + words[c++];
}
prefix = prefix + s;
}
// Check for duplicates
var baseTitle = prefix;
c = 0;
var title = baseTitle;
while(this.getTiddler(title)) {
title = baseTitle + "-" + (++c);
var title = prefix;
while(title in tiddlers) {
title = prefix + "-" + (++c);
}
return title;
};
Slicer.prototype.addTiddler = function(fields) {
if(fields.title) {
this.extractedTiddlers[fields.title] = Object.assign({},fields);
}
return fields.title;
};
Slicer.prototype.addTiddlers = function(fieldsArray) {
var self = this;
(fieldsArray || []).forEach(function(fields) {
self.addTiddler(fields);
});
};
Slicer.prototype.getTiddler = function(title) {
return this.extractedTiddlers[title];
};
Slicer.prototype.getTiddlers = function() {
var self = this;
return Object.keys(this.extractedTiddlers).map(function(title) {
return self.extractedTiddlers[title]
})
};
exports.Slicer = Slicer;
})();

View File

@@ -1,26 +0,0 @@
/*\
title: $:/plugins/tiddlywiki/text-slicer/modules/slicers/anchor.js
type: application/javascript
module-type: slicer
Handle slicing anchor nodes
\*/
(function(){
/*jslint node: true, browser: true */
/*global $tw: false */
"use strict";
exports.processAnchorNode = function(domNode,tagName) {
if(domNode.nodeType === 1 && tagName === "a") {
var id = domNode.getAttribute("id");
if(id) {
this.registerAnchor(id);
return true;
}
}
return false;
};
})();

View File

@@ -1,40 +0,0 @@
/*\
title: $:/plugins/tiddlywiki/text-slicer/modules/slicers/def-list.js
type: application/javascript
module-type: slicer
Handle slicing definition list nodes
\*/
(function(){
/*jslint node: true, browser: true */
/*global $tw: false */
"use strict";
exports.processDefListNode = function(domNode,tagName) {
if(domNode.nodeType === 1 && tagName === "dl") {
var title = this.makeUniqueTitle("def-list-" + tagName),
parentTitle = this.parentStack[this.parentStack.length - 1].title,
tags = [];
if(domNode.className && domNode.className.trim() !== "") {
tags = tags.concat(domNode.className.split(" "));
}
this.addToList(parentTitle,title);
this.parentStack.push({type: tagName, title: this.addTiddler({
"toc-type": "def-list",
"toc-list-filter": "[list<currentTiddler>!has[draft.of]]",
text: "",
title: title,
list: [],
tags: tags
})});
this.currentTiddler = title;
this.processNodeList(domNode.childNodes);
this.parentStack.pop();
return true;
}
return false;
};
})();

View File

@@ -1,44 +0,0 @@
/*\
title: $:/plugins/tiddlywiki/text-slicer/modules/slicers/definition.js
type: application/javascript
module-type: slicer
Handle slicing definition nodes in definition lists
\*/
(function(){
/*jslint node: true, browser: true */
/*global $tw: false */
"use strict";
exports.processDefinitionNode = function(domNode,tagName) {
var text = $tw.utils.htmlEncode(domNode.textContent);
if(domNode.nodeType === 1 && tagName === "dd") {
// if(!this.isBlank(text)) {
var title = this.makeUniqueTitle("definition " + text),
parentTitle = this.parentStack[this.parentStack.length - 1].title,
tags = [];
if(domNode.className && domNode.className.trim() !== "") {
tags = tags.concat(domNode.className.split(" "));
}
this.addToList(parentTitle,title);
this.addTiddler({
"toc-type": "definition",
title: title,
text: "",
list: [],
tags: tags
});
this.currentTiddler = title;
this.containerStack.push(title);
// this.containerStack.push("Just testing" + new Date());
this.processNodeList(domNode.childNodes);
this.containerStack.pop();
return true;
// }
}
return false;
};
})();

View File

@@ -1,42 +0,0 @@
/*\
title: $:/plugins/tiddlywiki/text-slicer/modules/slicers/heading.js
type: application/javascript
module-type: slicer
Handle slicing heading nodes
\*/
(function(){
/*jslint node: true, browser: true */
/*global $tw: false */
"use strict";
exports.processHeadingNode = function(domNode,tagName) {
if(domNode.nodeType === 1 && (tagName === "h1" || tagName === "h2" || tagName === "h3" || tagName === "h4")) {
var text = $tw.utils.htmlEncode(domNode.textContent);
var title = this.makeUniqueTitle("heading " + text),
parentTitle = this.popParentStackUntil(tagName),
tags = [];
if(domNode.className && domNode.className.trim() !== "") {
tags = tags.concat(domNode.className.split(" "));
}
this.addToList(parentTitle,title);
this.parentStack.push({type: tagName, title: this.addTiddler({
"toc-type": "heading",
"toc-heading-level": tagName,
title: title,
text: "",
list: [],
tags: tags
})});
this.currentTiddler = title;
this.containerStack.push(title);
this.processNodeList(domNode.childNodes);
this.containerStack.pop();
return true;
}
return false;
};
})();

View File

@@ -1,71 +0,0 @@
/*\
title: $:/plugins/tiddlywiki/text-slicer/modules/slicers/image.js
type: application/javascript
module-type: slicer
Handle slicing img nodes
\*/
(function(){
/*jslint node: true, browser: true */
/*global $tw: false */
"use strict";
exports.processImageNode = function(domNode,tagName) {
if(domNode.nodeType === 1 && tagName === "img") {
var src = domNode.getAttribute("src");
if(src) {
var containerTitle = this.getTopContainer(),
containerTiddler = this.getTiddler(containerTitle),
title, tiddler = {
"toc-type": "image"
};
if(src.substr(0,5) === "data:") {
var parts = src.toString().substr(5).split(";base64,");
tiddler.type = parts[0];
tiddler.text = parts[1];
var contentTypeInfo = $tw.config.contentTypeInfo[tiddler.type] || {extension: ""};
title = this.makeUniqueTitle("image " + containerTitle) + contentTypeInfo.extension;
tiddler.title = title;
this.addTiddler(tiddler);
} else {
title = $tw.utils.resolvePath(src,this.baseTiddlerTitle);
}
switch(containerTiddler["toc-type"]) {
case "document":
// Make the image be the next child of the document
this.addToList(containerTitle,title);
break;
case "heading":
// Make the image be the older sibling of the heading
var parentTitle = this.parentStack[this.parentStack.length - 2].title;
this.insertBeforeListItem(parentTitle,title,containerTitle);
break;
case "paragraph":
// Make the image be the older sibling of the paragraph
var parentTitle = this.parentStack[this.parentStack.length - 1].title;
this.insertBeforeListItem(parentTitle,title,containerTitle);
break;
case "item":
// Create a new older sibling item to contain the image
var parentTitle = this.parentStack[this.parentStack.length - 1].title,
itemTitle = this.makeUniqueTitle("image-item-wrapper " + containerTitle),
itemTiddler = {
title: itemTitle,
"toc-type": "item",
list: [title],
text: "[img[" + title + "]]"
};
this.addTiddler(itemTiddler);
this.insertBeforeListItem(parentTitle,itemTitle,containerTitle);
break;
}
// this.appendToCurrentContainer("[img[" + title + "]]");
return true;
}
}
return false;
};
})();

View File

@@ -1,44 +0,0 @@
/*\
title: $:/plugins/tiddlywiki/text-slicer/modules/slicers/item.js
type: application/javascript
module-type: slicer
Handle slicing list item nodes
\*/
(function(){
/*jslint node: true, browser: true */
/*global $tw: false */
"use strict";
exports.processListItemNode = function(domNode,tagName) {
var text = $tw.utils.htmlEncode(domNode.textContent);
if(domNode.nodeType === 1 && tagName === "li") {
// if(!this.isBlank(text)) {
var title = this.makeUniqueTitle("list-item " + text),
parentTitle = this.parentStack[this.parentStack.length - 1].title,
tags = [];
if(domNode.className && domNode.className.trim() !== "") {
tags = tags.concat(domNode.className.split(" "));
}
this.addToList(parentTitle,title);
this.addTiddler({
"toc-type": "item",
title: title,
text: "",
list: [],
tags: tags
});
this.currentTiddler = title;
this.containerStack.push(title);
// this.containerStack.push("Just testing" + new Date());
this.processNodeList(domNode.childNodes);
this.containerStack.pop();
return true;
// }
}
return false;
};
})();

View File

@@ -1,41 +0,0 @@
/*\
title: $:/plugins/tiddlywiki/text-slicer/modules/slicers/list.js
type: application/javascript
module-type: slicer
Handle slicing list nodes
\*/
(function(){
/*jslint node: true, browser: true */
/*global $tw: false */
"use strict";
exports.processListNode = function(domNode,tagName) {
if(domNode.nodeType === 1 && (tagName === "ul" || tagName === "ol")) {
var title = this.makeUniqueTitle("list " + tagName),
parentTitle = this.parentStack[this.parentStack.length - 1].title,
tags = [];
if(domNode.className && domNode.className.trim() !== "") {
tags = tags.concat(domNode.className.split(" "));
}
this.addToList(parentTitle,title);
this.parentStack.push({type: tagName, title: this.addTiddler({
"toc-type": "list",
"toc-list-type": tagName,
"toc-list-filter": "[list<currentTiddler>!has[draft.of]]",
text: "",
title: title,
list: [],
tags: tags
})});
this.currentTiddler = title;
this.processNodeList(domNode.childNodes);
this.parentStack.pop();
return true;
}
return false;
};
})();

View File

@@ -1,41 +0,0 @@
/*\
title: $:/plugins/tiddlywiki/text-slicer/modules/slicers/paragraph.js
type: application/javascript
module-type: slicer
Handle slicing paragraph nodes
\*/
(function(){
/*jslint node: true, browser: true */
/*global $tw: false */
"use strict";
exports.processParagraphNode = function(domNode,tagName) {
var text = $tw.utils.htmlEncode(domNode.textContent);
if(domNode.nodeType === 1 && tagName === "p") {
if(!this.isBlank(text)) {
var parentTitle = this.parentStack[this.parentStack.length - 1].title,
tags = [],
title = this.makeUniqueTitle("paragraph " + text);
if(domNode.className && domNode.className && domNode.className.trim() !== "") {
tags = tags.concat(domNode.className.split(" "));
}
this.addToList(parentTitle,this.addTiddler({
"toc-type": "paragraph",
title: title,
text: "",
tags: tags
}));
this.currentTiddler = title;
this.containerStack.push(title);
this.processNodeList(domNode.childNodes);
this.containerStack.pop();
return true;
}
}
return false;
};
})();

View File

@@ -1,44 +0,0 @@
/*\
title: $:/plugins/tiddlywiki/text-slicer/modules/slicers/term.js
type: application/javascript
module-type: slicer
Handle slicing term nodes in definition lists
\*/
(function(){
/*jslint node: true, browser: true */
/*global $tw: false */
"use strict";
exports.processTermNode = function(domNode,tagName) {
var text = $tw.utils.htmlEncode(domNode.textContent);
if(domNode.nodeType === 1 && tagName === "dt") {
// if(!this.isBlank(text)) {
var title = this.makeUniqueTitle("term " + text),
parentTitle = this.parentStack[this.parentStack.length - 1].title,
tags = [];
if(domNode.className && domNode.className.trim() !== "") {
tags = tags.concat(domNode.className.split(" "));
}
this.addToList(parentTitle,title);
this.addTiddler({
"toc-type": "term",
title: title,
text: "",
list: [],
tags: tags
});
this.currentTiddler = title;
this.containerStack.push(title);
// this.containerStack.push("Just testing" + new Date());
this.processNodeList(domNode.childNodes);
this.containerStack.pop();
return true;
// }
}
return false;
};
})();

View File

@@ -1,23 +0,0 @@
/*\
title: $:/plugins/tiddlywiki/text-slicer/modules/slicers/text.js
type: application/javascript
module-type: slicer
Handle slicing text nodes
\*/
(function(){
/*jslint node: true, browser: true */
/*global $tw: false */
"use strict";
exports.processTextNode = function(domNode,tagName) {
if(domNode.nodeType === 3) {
this.appendToCurrentContainer($tw.utils.htmlEncode(domNode.textContent));
return true;
}
return false;
};
})();

View File

@@ -22,20 +22,29 @@ exports.synchronous = true;
// Install the root widget event handlers
exports.startup = function() {
// Check xmldom is installed
if(!$tw.utils.hop($tw.modules.titles,"$:/plugins/tiddlywiki/xmldom/dom-parser")) {
// Check sax is installed
if(!$tw.utils.hop($tw.modules.titles,"$:/plugins/tiddlywiki/sax/sax.js")) {
// Make a logger
var logger = new $tw.utils.Logger("text-slicer");
logger.alert("The plugin 'text-slicer' requires the 'xmldom' plugin to be installed");
logger.alert("The plugin 'text-slicer' requires the 'sax' plugin to be installed");
}
// Add tm-slice-tiddler event handler
$tw.rootWidget.addEventListener("tm-slice-tiddler",function(event) {
var slicer = new textSlicer.Slicer({
sourceTiddlerTitle: event.param,
slicerRules: event.paramObject && event.paramObject.slicerRules,
outputMode: event.paramObject && event.paramObject.outputMode,
baseTiddlerTitle: event.paramObject && event.paramObject.destTitle,
wiki: $tw.wiki
role: event.paramObject && event.paramObject.role,
wiki: $tw.wiki,
callback: function(err,tiddlers) {
if(err) {
logger.alert("Slicer error: " + err);
} else {
$tw.wiki.addTiddlers(tiddlers);
}
}
});
$tw.wiki.addTiddlers(slicer.getTiddlers());
});
};