/*\
title: $:/plugins/tiddlywiki/text-slicer/modules/slicer.js
type: application/javascript
module-type: library
Slice a tiddler or DOM document into individual tiddlers
var slicer = new textSlicer.Slicer(doc,{
slicerRules: JSON data defining slicer rules -or- title of rules taken from tiddlers tagged $:/tags/text-slicer/slicer-rules
sourceTiddlerTitle: tiddler to slice -or-
sourceText: text to slice
outputMode: "html" (default) -or- "wiki"
baseTiddlerTitle: "MySlicedTiddlers-"
role: "sliced-content"
callback: function(err,tiddlers)
});
\*/
(function(){
/*jslint node: true, browser: true */
/*global $tw: false */
"use strict";
function Slicer(options) {
// Quick tests
this.testSlicerRuleMatching();
// Marshal parameters
this.sourceTiddlerTitle = options.sourceTiddlerTitle;
this.sourceText = options.sourceText;
this.wiki = options.wiki;
this.role = options.role || "sliced-html";
this.outputMode = options.outputMode || "html";
this.escapeWikiText = options.escapeWikiText || false;
this.callbackFn = options.callback;
// Get the slicer rules
var nameSlicerRules = null;
if(!options.slicerRules) {
nameSlicerRules = "html-by-paragraph";
this.slicerRules = this.loadSlicerRules(nameSlicerRules);
} else if(typeof options.slicerRules === "string") {
nameSlicerRules = options.slicerRules;
this.slicerRules = this.loadSlicerRules(nameSlicerRules);
} else {
this.slicerRules = options.slicerRules;
}
// Set up the base tiddler title
this.baseTiddlerTitle = this.getBaseTiddlerTitle(options.baseTiddlerTitle);
// Initialise state
this.namespaces = {}; // Hashmap of URLs
this.chunks = []; // Array of tiddlers without titles, addressed by their index. We use the title field to hold the plain text content
this.currentChunk = null; // Index of the chunk currently being written to
this.parentStack = []; // Stack of parent chunks {chunk: chunk index,actions:}
this.elementStack = []; // Stack of {tag:,isSelfClosing:,actions:}
this.titleCounts = {}; // Hashmap of counts of prefixed titles that have been issued
// Set up the document tiddler as top level heading
this.chunks.push({
"toc-type": "document",
title: this.baseTiddlerTitle,
text: "
<>
",
list: [],
tags: [],
role: this.role,
"slicer-rules": nameSlicerRules,
"slicer-output-mode": this.outputMode
});
this.parentStack.push({chunk: 0, actions: this.getMatchingSlicerRuleActions("(document)")});
this.insertPrecedingChunk({
"toc-type": "anchor",
"title": this.baseTiddlerTitle + "-anchor-"
});
// Set up the parser
var sax = require("$:/plugins/tiddlywiki/sax/sax.js");
this.sax = sax.parser(false,{
xmlns: true,
lowercase: true
});
this.sax.onerror = this.onError.bind(this);
this.sax.onopennamespace = this.onOpenNamespace.bind(this);
this.sax.onclosenamespace = this.onCloseNamespace.bind(this);
this.sax.onopentag = this.onOpenTag.bind(this);
this.sax.onclosetag = this.onCloseTag.bind(this);
this.sax.ontext = this.onText.bind(this);
this.sax.onend = this.onEnd.bind(this);
// Start streaming the data
this.sax.write(this.getSourceText());
this.sax.close();
}
Slicer.prototype.callback = function(err,tiddlers) {
var self = this;
$tw.utils.nextTick(function() {
self.callbackFn(err,tiddlers);
});
};
Slicer.prototype.loadSlicerRules = function(name) {
// Collect the available slicer rule tiddlers
var self = this,
titles = this.wiki.getTiddlersWithTag("$:/tags/text-slicer/slicer-rules"),
tiddlers = {},
rules = {},
ruleNames = [];
titles.forEach(function(title) {
var tiddler = self.wiki.getTiddler(title);
tiddlers[tiddler.fields.name] = tiddler;
rules[tiddler.fields.name] = self.wiki.getTiddlerData(title,[]);
});
// Follow the inheritance trail to get a stack of slicer rule names
var n = name;
do {
ruleNames.push(n);
n = tiddlers[n] && tiddlers[n].fields["inherits-from"];
} while(n && ruleNames.indexOf(n) === -1);
// Concatenate the slicer rules
rules = ruleNames.reduce(function(accumulator,name) {
return accumulator.concat(rules[name]);
},[]);
return rules;
};
Slicer.prototype.getMatchingSlicerRuleActions = function(name) {
var rule = this.searchSlicerRules(name,this.slicerRules,this.elementStack);
if(!rule) {
return {};
} else {
return rule.actions;
}
};
Slicer.prototype.testSlicerRuleMatching = function() {
var tests = [
{
test: this.searchSlicerRules("title",[
{selector: "title,head,body", rules: true},
{selector: "body", rules: true}
],[
{tag:"head"}
]),
result: "title,head,body"
},
{
test: this.searchSlicerRules("body",[
{selector: "title,head,body", rules: true},
{selector: "body", rules: true}
],[
{tag:"head"}
]),
result: "title,head,body"
},
{
test: this.searchSlicerRules("title",[
{selector: "head > title", rules: true},
{selector: "title", rules: true}
],[
{tag:"head"}
]),
result: "head > title"
}
],
results = tests.forEach(function(test,index) {
if(test.test.selector !== test.result) {
throw "Failing test " + index + ", returns " + test.test.selector + " instead of " + test.result;
}
});
};
Slicer.prototype.searchSlicerRules = function(name,rules,elementStack) {
return rules.find(function(rule) {
// Split and trim the selectors for this rule
return !!rule.selector.split(",").map(function(selector) {
return selector.trim();
// Find the first selector that matches, if any
}).find(function(selector) {
// Split and trim the parts of the selector
var parts = selector.split(" ").map(function(part) {
return part.trim();
});
// * matches any element
if(parts.length === 1 && parts[0] === "*") {
return true;
}
// Make a copy of the element stack so that we can be destructive
var elements = elementStack.slice(0).concat({tag: name}),
nextElementMustBeAtTopOfStack = true,
currentPart = parts.length - 1;
while(currentPart >= 0) {
if(parts[currentPart] === ">") {
nextElementMustBeAtTopOfStack = true;
} else {
if(!nextElementMustBeAtTopOfStack) {
while(elements.length > 0 && elements[elements.length - 1].tag !== parts[currentPart]) {
elements.pop();
}
}
if(elements.length === 0 || elements[elements.length - 1].tag !== parts[currentPart]) {
return false;
}
elements.pop();
nextElementMustBeAtTopOfStack = false;
}
currentPart--;
}
return true;
});
});
};
Slicer.prototype.getBaseTiddlerTitle = function(baseTiddlerTitle) {
if(baseTiddlerTitle) {
return baseTiddlerTitle
} else {
if(this.sourceTiddlerTitle) {
return "Sliced up " + this.sourceTiddlerTitle + ":";
} else {
return "SlicedTiddler";
}
}
};
Slicer.prototype.getSourceText = function() {
if(this.sourceTiddlerTitle) {
var tiddler = this.wiki.getTiddler(this.sourceTiddlerTitle);
if(!tiddler) {
console.log("Tiddler '" + this.sourceTiddlerTitle + "' does not exist");
return "";
}
if(tiddler.fields.type === "text/html" || tiddler.fields.type === "text/xml" || (tiddler.fields.type || "").slice(-4) === "+xml") {
return tiddler.fields.text;
} else {
return this.getTiddlerAsHtml(tiddler);
}
} else {
return this.sourceText;
}
};
Slicer.prototype.getTiddlerAsHtml = function(tiddler) {
var widgetNode = this.wiki.makeTranscludeWidget(tiddler.fields.title,{
document: $tw.fakeDocument,
parseAsInline: false,
importPageMacros: true}),
container = $tw.fakeDocument.createElement("div");
widgetNode.render(container,null);
return ["","","","","",container.innerHTML,"",""].join("\n");
};
Slicer.prototype.getImmediateParent = function() {
return this.parentStack.slice(-1)[0];
};
Slicer.prototype.onError = function(e) {
console.error("Sax error: ", e)
// Try to resume after errors
this.sax.error = null;
this.sax.resume();
};
Slicer.prototype.onOpenNamespace = function(info) {
this.namespaces[info.prefix] = info.uri;
};
Slicer.prototype.onCloseNamespace = function(info) {
};
Slicer.prototype.onOpenTag = function(node) {
var actions = this.getMatchingSlicerRuleActions(node.name);
// Create an anchor if we encounter an ID
if(node.attributes.id) {
this.insertPrecedingChunk({
"toc-type": "anchor",
"title": this.baseTiddlerTitle + "-anchor-" + node.attributes.id.value
});
}
// Check for an element that should start a new chunk
if(actions.startNewChunk) {
// If this is a heading, pop off any higher or equal level headings first
if(actions.isParent && actions.headingLevel) {
var parentActions = this.getImmediateParent().actions;
while(parentActions.isParent && parentActions.headingLevel && parentActions.headingLevel >= actions.headingLevel) {
this.parentStack.pop();
parentActions = this.getImmediateParent().actions;
}
}
// Start the new chunk
this.startNewChunk(actions.startNewChunk);
// If this is a parent then also add it to the parent stack
if(actions.isParent) {
this.parentStack.push({chunk: this.currentChunk, actions: actions});
}
}
// Render the tag inline in the current chunk unless we should ignore it
if(!actions.dontRenderTag) {
if(actions.isImage) {
this.onOpenImage(node);
} else if(actions.isAnchor) {
this.onOpenAnchor(node);
} else {
var markupInfo = actions.markup && actions.markup[this.outputMode];
if(markupInfo) {
this.addTextToCurrentChunk(markupInfo.prefix);
} else {
this.addTextToCurrentChunk("<" + node.name + (node.isSelfClosing ? "/" : "") + ">");
}
}
}
// Remember whether this tag is self closing
this.elementStack.push({tag: node.name,isSelfClosing: node.isSelfClosing, actions: actions, node: node});
};
Slicer.prototype.onOpenAnchor = function(node) {
if(node.attributes.href) {
var value = node.attributes.href.value;
if(value.indexOf("https://") === 0 || value.indexOf("http://") === 0) {
// External link
this.addTextToCurrentChunk("");
} else {
// Internal link
var parts = value.split("#"),
base = parts[0],
hash = parts[1] || "",
title = $tw.utils.resolvePath(base,this.baseTiddlerTitle) + "-anchor-" + hash;
this.addTextToCurrentChunk("<$link to=\"" + title + "\">");
}
}
};
Slicer.prototype.onCloseAnchor = function(elementInfo) {
if(elementInfo.node.attributes.href) {
var value = elementInfo.node.attributes.href.value;
if(value.indexOf("https://") === 0 || value.indexOf("http://") === 0) {
// External link
this.addTextToCurrentChunk("");
} else {
// Internal link
this.addTextToCurrentChunk("$link>");
}
}
};
Slicer.prototype.onOpenImage = function(node) {
var url = node.attributes.src.value;
if(url.slice(0,5) === "data:") {
// var parts = url.slice(5).split(",");
// this.chunks.push({
// title: ,
// text: parts[1],
// type: parts[0].split[";"][0],
// role: this.role
// });
}
this.addTextToCurrentChunk("[img[" + $tw.utils.resolvePath(url,this.baseTiddlerTitle) + "]]");
};
Slicer.prototype.onCloseTag = function(name) {
var e = this.elementStack.pop(),
actions = e.actions,
selfClosing = e.isSelfClosing;
// Set the caption if required
// TODO
// if(actions.setCaption) {
// this.chunks[this.currentChunk].caption = this.chunks[this.currentChunk].title;
// }
// Render the tag
if(actions.isAnchor) {
this.onCloseAnchor(e);
} else if(!actions.dontRenderTag && !selfClosing) {
var markupInfo = actions.markup && actions.markup[this.outputMode];
if(markupInfo) {
this.addTextToCurrentChunk(markupInfo.suffix);
} else {
this.addTextToCurrentChunk("" + name + ">");
}
}
// Check for an element that started a new chunk
if(actions.startNewChunk) {
if(!actions.mergeNext) {
this.currentChunk = null;
}
// If this is a parent and not a heading then also pop it from the parent stack
if(actions.isParent && !actions.headingLevel) {
this.parentStack.pop();
}
}
};
Slicer.prototype.onText = function(text) {
var self = this;
// Discard the text if we're inside an element with actions.discard set true
if(this.elementStack.some(function(e) {return e.actions.discard;})) {
return;
}
// Optionally escape common character sequences that might be parsed as wikitext
text = $tw.utils.htmlEncode(text);
if(this.escapeWikiText) {
$tw.utils.each(["[[","{{","__","''","//",",,","^^","~~","`","--","\"\"","@@"],function(str) {
var replace = str.split("").map(function(c) {
return "" + c.charCodeAt(0) + ";";
}).join("");
text = text.replace(new RegExp($tw.utils.escapeRegExp(str),"mg"),replace);
});
}
this.addTextToCurrentChunk(text);
this.addTextToCurrentChunk(text,"caption");
};
Slicer.prototype.onEnd = function() {
this.callback(null,this.chunks);
};
Slicer.prototype.addTextToCurrentChunk = function(str,field) {
field = field || "text";
if(this.currentChunk === null && str.trim() !== "") {
this.startNewChunk({
title: this.makeTitle("paragraph"),
"toc-type": "paragraph"
});
}
if(this.currentChunk !== null) {
this.chunks[this.currentChunk][field] += str;
}
};
Slicer.prototype.startNewChunk = function(fields) {
var title = fields.title || this.makeTitle(fields["toc-type"]);
var parentChunk = this.chunks[this.getImmediateParent().chunk];
this.chunks.push($tw.utils.extend({},{
title: title,
text: "",
caption: "",
tags: [parentChunk.title],
list: [],
role: this.role
},fields));
this.currentChunk = this.chunks.length - 1;
parentChunk.list.push(title);
};
Slicer.prototype.insertPrecedingChunk = function(fields) {
if(!fields.title) {
throw "Chunks need a title"
}
if(!this.currentChunk) {
this.startNewChunk(fields);
this.currentChunk = null;
} else {
var parentChunk = this.chunks[this.getImmediateParent().chunk],
index = this.chunks.length - 1;
// Insert the new chunk
this.chunks.splice(index,0,$tw.utils.extend({},{
text: "",
caption: "",
tags: [parentChunk.title],
list: [],
role: this.role
},fields));
// Adjust the current chunk pointer
this.currentChunk += 1;
// Insert a pointer to the new chunk in the parent
parentChunk.list.splice(parentChunk.list.length - 1,0,fields.title);
}
};
Slicer.prototype.isBlank = function(s) {
return (/^[\s\xA0]*$/g).test(s);
};
Slicer.prototype.makeTitle = function(prefix) {
prefix = prefix || "";
var count = (this.titleCounts[prefix] || 0) + 1;
this.titleCounts[prefix] = count;
return this.baseTiddlerTitle + "-" + prefix + "-" + count;
};
exports.Slicer = Slicer;
})();