mirror of
https://github.com/janeczku/calibre-web
synced 2024-11-13 21:34:54 +00:00
99c05650a1
-cfi resolving is now done by that module, therefore removed validateChildNodes and adjusted cfiToXmlNode
1063 lines
27 KiB
JavaScript
Vendored
1063 lines
27 KiB
JavaScript
Vendored
'use strict';
|
|
// https://github.com/fread-ink/epub-cfi-resolver
|
|
// If using node.js
|
|
if(typeof Node === 'undefined') {
|
|
var ELEMENT_NODE = 1;
|
|
var TEXT_NODE = 3;
|
|
var CDATA_SECTION_NODE = 4;
|
|
} else { // In the browser
|
|
var ELEMENT_NODE = Node.ELEMENT_NODE;
|
|
var TEXT_NODE = Node.TEXT_NODE;
|
|
var CDATA_SECTION_NODE = Node.CDATA_SECTION_NODE;
|
|
}
|
|
|
|
function cfiEscape(str) {
|
|
return str.replace(/[\[\]\^,();]/g, "^$&");
|
|
}
|
|
|
|
// Get indices of all matches of regExp in str
|
|
// if `add` is non-null, add it to the matched indices
|
|
function matchAll(str, regExp, add) {
|
|
add = add || 0;
|
|
var matches = [];
|
|
var offset = 0;
|
|
var m;
|
|
do {
|
|
m = str.match(regExp);
|
|
if(!m) break
|
|
matches.push(m.index + add);
|
|
offset += m.index + m.length;
|
|
str = str.slice(m.index + m.length);
|
|
} while(offset < str.length);
|
|
|
|
return matches;
|
|
}
|
|
|
|
// Get the number in a that has the smallest diff to n
|
|
function closest(a, n) {
|
|
var minDiff;
|
|
var closest;
|
|
var i, diff;
|
|
for(i=0; i < a.length; i++) {
|
|
diff = Math.abs(a[i] - n);
|
|
if(!i || diff < minDiff) {
|
|
diff = minDiff;
|
|
closest = a[i];
|
|
}
|
|
}
|
|
return closest;
|
|
}
|
|
|
|
// Given a set of nodes that are all children
|
|
// and a reference to one of those nodes
|
|
// calculate the count/index of the node
|
|
// according to the CFI spec.
|
|
// Also re-calculate offset if supplied and relevant
|
|
function calcSiblingCount(nodes, n, offset) {
|
|
var count = 0;
|
|
var lastWasElement;
|
|
var prevOffset = 0;
|
|
var firstNode = true;
|
|
var i, node;
|
|
for(i=0; i < nodes.length; i++) {
|
|
node = nodes[i];
|
|
if(node.nodeType === ELEMENT_NODE) {
|
|
if(lastWasElement || firstNode) {
|
|
count += 2;
|
|
firstNode = false;
|
|
} else {
|
|
count++;
|
|
}
|
|
|
|
if(n === node) {
|
|
if(node.tagName.toLowerCase() === 'img') {
|
|
return {count, offset};
|
|
} else {
|
|
return {count};
|
|
}
|
|
}
|
|
prevOffset = 0;
|
|
lastWasElement = true;
|
|
} else if (node.nodeType === TEXT_NODE ||
|
|
node.nodeType === CDATA_SECTION_NODE) {
|
|
if(lastWasElement || firstNode) {
|
|
count++;
|
|
firstNode = false;
|
|
}
|
|
|
|
if(n === node) {
|
|
return {count, offset: offset + prevOffset};
|
|
}
|
|
|
|
prevOffset += node.textContent.length;
|
|
lastWasElement = false;
|
|
} else {
|
|
continue;
|
|
}
|
|
}
|
|
throw new Error("The specified node was not found in the array of siblings");
|
|
}
|
|
|
|
function compareTemporal(a, b) {
|
|
const isA = (typeof a === 'number');
|
|
const isB = (typeof b === 'number');
|
|
|
|
if(!isA && !isB) return 0;
|
|
if(!isA && isB) return -1;
|
|
if(isA && !isB) return 1;
|
|
|
|
return (a || 0.0) - (b || 0.0);
|
|
}
|
|
|
|
function compareSpatial(a, b) {
|
|
if(!a && !b) return 0;
|
|
if(!a && b) return -1;
|
|
if(a && !b) return 1;
|
|
|
|
var diff = (a.y || 0) - (b.y || 0);
|
|
if(diff) return diff;
|
|
|
|
return (a.x || 0) - (b.x || 0);
|
|
}
|
|
|
|
class CFI {
|
|
|
|
constructor(str, opts) {
|
|
this.opts = Object.assign({
|
|
// If CFI is a Simple Range, pretend it isn't
|
|
// by parsing only the start of the range
|
|
flattenRange: false,
|
|
// Strip temporal, spatial, offset and textLocationAssertion
|
|
// from places where they don't make sense
|
|
stricter: true
|
|
}, opts || {});
|
|
|
|
this.cfi = str;
|
|
const isCFI = new RegExp(/^epubcfi\((.*)\)$/);
|
|
|
|
str = str.trim();
|
|
var m = str.match(isCFI);
|
|
if(!m) throw new Error("Not a valid CFI");
|
|
if(m.length < 2) return; // Empty CFI
|
|
|
|
str = m[1];
|
|
this.parts = [];
|
|
|
|
var parsed, offset, newDoc;
|
|
var subParts = [];
|
|
var sawComma = 0;
|
|
while(str.length) {
|
|
({parsed, offset, newDoc} = this.parse(str));
|
|
if(!parsed || offset === null) throw new Error("Parsing failed");
|
|
if(sawComma && newDoc) throw new Error("CFI is a range that spans multiple documents. This is not allowed");
|
|
|
|
subParts.push(parsed);
|
|
|
|
// Handle end of string
|
|
if(newDoc || str.length - offset <= 0) {
|
|
// Handle end if this was a range
|
|
if(sawComma === 2) {
|
|
this.to = subParts;
|
|
} else { // not a range
|
|
this.parts.push(subParts);
|
|
}
|
|
subParts = [];
|
|
}
|
|
|
|
str = str.slice(offset);
|
|
|
|
// Handle Simple Ranges
|
|
if(str[0] === ',') {
|
|
if(sawComma === 0) {
|
|
if(subParts.length) {
|
|
this.parts.push(subParts);
|
|
}
|
|
subParts = [];
|
|
} else if(sawComma === 1) {
|
|
if(subParts.length) {
|
|
this.from = subParts;
|
|
}
|
|
subParts = [];
|
|
}
|
|
str = str.slice(1);
|
|
sawComma++;
|
|
}
|
|
}
|
|
if(this.from && this.from.length) {
|
|
if(this.opts.flattenRange || !this.to || !this.to.length) {
|
|
this.parts = this.parts.concat(this.from);
|
|
delete this.from;
|
|
delete this.to;
|
|
} else {
|
|
this.isRange = true;
|
|
}
|
|
}
|
|
if(this.opts.stricter) {
|
|
this.removeIllegalOpts();
|
|
}
|
|
}
|
|
|
|
removeIllegalOpts(parts) {
|
|
if(!parts) {
|
|
if(this.from) {
|
|
this.removeIllegalOpts(this.from);
|
|
if(!this.to) return;
|
|
parts = this.to;
|
|
} else {
|
|
parts = this.parts;
|
|
}
|
|
}
|
|
|
|
var i, j, part, subpart;
|
|
for(i=0; i < parts.length; i++) {
|
|
part = parts[i];
|
|
for(j=0; j < part.length - 1; j++) {
|
|
subpart = part[j];
|
|
delete subpart.temporal;
|
|
delete subpart.spatial;
|
|
delete subpart.offset;
|
|
delete subpart.textLocationAssertion;
|
|
}
|
|
}
|
|
}
|
|
|
|
static generatePart(node, offset, extra) {
|
|
|
|
var cfi = '';
|
|
var o;
|
|
|
|
// The leading path of CFI corresponding to the 'spine' element must be relative
|
|
// to the ancestor 'package' element. If this is a spine child element, we need
|
|
// to stop traversing when we reach the 'package' node.
|
|
var isSpineElement = node.parentNode.nodeName === 'spine' ? true : false;
|
|
|
|
while(node.parentNode) {
|
|
o = calcSiblingCount(node.parentNode.childNodes, node, offset);
|
|
if(!cfi && o.offset) cfi = ':'+o.offset;
|
|
|
|
cfi = '/'+o.count+((node.id) ? '['+cfiEscape(node.id)+']' : '') + cfi;
|
|
|
|
node = node.parentNode;
|
|
|
|
if(isSpineElement && node.nodeName === 'package'){
|
|
break;
|
|
}
|
|
}
|
|
|
|
return cfi;
|
|
}
|
|
|
|
static generate(node, offset, extra) {
|
|
var cfi;
|
|
|
|
if(node instanceof Array) {
|
|
var strs = [];
|
|
for(let o of node) {
|
|
strs.push(this.generatePart(o.node, o.offset));
|
|
}
|
|
cfi = strs.join('!');
|
|
} else {
|
|
cfi = this.generatePart(node, offset, extra);
|
|
}
|
|
|
|
if(extra) cfi += extra;
|
|
|
|
return 'epubcfi('+cfi+')';
|
|
}
|
|
|
|
static toParsed(cfi) {
|
|
if(typeof cfi === 'string') cfi = new this(cfi);
|
|
if(cfi.isRange) {
|
|
return cfi.getFrom();
|
|
} else {
|
|
return cfi.get();
|
|
}
|
|
}
|
|
|
|
|
|
// Takes two CFI paths and compares them
|
|
static comparePath(a, b) {
|
|
const max = Math.max(a.length, b.length);
|
|
|
|
var i, cA, cB, diff;
|
|
for(i=0; i < max; i++) {
|
|
cA = a[i];
|
|
cB = b[i];
|
|
if(!cA) return -1;
|
|
if(!cB) return 1;
|
|
|
|
diff = this.compareParts(cA, cB);
|
|
if(diff) return diff;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
// Sort an array of CFI objects
|
|
static sort(a) {
|
|
a.sort((a, b) => {
|
|
return this.compare(a, b)
|
|
});
|
|
}
|
|
|
|
// Takes two CFI objects and compares them.
|
|
static compare(a, b) {
|
|
var oA = a.get();
|
|
var oB = b.get();
|
|
if(a.isRange || b.isRange) {
|
|
if(a.isRange && b.isRange) {
|
|
var diff = this.comparePath(oA.from, oB.from);
|
|
if(diff) return diff;
|
|
return this.comparePath(oA.to, oB.to);
|
|
}
|
|
if(a.isRange) oA = oA.from;
|
|
if(b.isRange) oB = oB.from;
|
|
|
|
return this.comparePath(oA, oB);
|
|
|
|
} else { // neither a nor b is a range
|
|
|
|
return this.comparePath(oA, oB);
|
|
}
|
|
}
|
|
|
|
// Takes two parsed path parts (assuming path is split on '!') and compares them.
|
|
static compareParts(a, b) {
|
|
const max = Math.max(a.length, b.length);
|
|
|
|
var i, cA, cB, diff;
|
|
for(i=0; i < max; i++) {
|
|
cA = a[i];
|
|
cB = b[i];
|
|
if(!cA) return -1;
|
|
if(!cB) return 1;
|
|
|
|
diff = cA.nodeIndex - cB.nodeIndex;
|
|
if(diff) return diff;
|
|
|
|
// The paths must be equal if the "before the first node" syntax is used
|
|
// and this must be the last subpart (assuming a valid CFI)
|
|
if(cA.nodeIndex === 0) {
|
|
return 0;
|
|
}
|
|
|
|
// Don't bother comparing offsets, temporals or spatials
|
|
// unless we're on the last element, since they're not
|
|
// supposed to be on elements other than the last
|
|
if(i < max - 1) continue;
|
|
|
|
// Only compare spatials or temporals for element nodes
|
|
if(cA.nodeIndex % 2 === 0) {
|
|
|
|
diff = compareTemporal(cA.temporal, cB.temporal);
|
|
if(diff) return diff;
|
|
|
|
diff = compareSpatial(cA.spatial, cB.spatial);
|
|
if(diff) return diff;
|
|
|
|
}
|
|
|
|
diff = (cA.offset || 0) - (cB.offset || 0);
|
|
if(diff) return diff;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
decodeEntities(dom, str) {
|
|
try {
|
|
const el = dom.createElement('textarea');
|
|
el.innerHTML = str;
|
|
return el.valueOf();
|
|
} catch(err) {
|
|
// TODO fall back to simpler decode?
|
|
// e.g. regex match for stuff like   and
|
|
return str;
|
|
}
|
|
}
|
|
|
|
// decode HTML/XML entities and compute length
|
|
trueLength(dom, str) {
|
|
let x=this.decodeEntities(dom, str);
|
|
return x.length;
|
|
}
|
|
|
|
getFrom() {
|
|
if(!this.isRange) throw new Error("Trying to get beginning of non-range CFI");
|
|
if(!this.from) {
|
|
return this.deepClone(this.parts);
|
|
}
|
|
const parts = this.deepClone(this.parts);
|
|
parts[parts.length-1] = parts[parts.length-1].concat(this.from);
|
|
return parts;
|
|
}
|
|
|
|
getTo() {
|
|
if(!this.isRange) throw new Error("Trying to get end of non-range CFI");
|
|
const parts = this.deepClone(this.parts);
|
|
parts[parts.length-1] = parts[parts.length-1].concat(this.to);
|
|
return parts
|
|
}
|
|
|
|
get() {
|
|
if(this.isRange) {
|
|
return {
|
|
from: this.getFrom(),
|
|
to: this.getTo(),
|
|
isRange: true
|
|
};
|
|
}
|
|
return this.deepClone(this.parts);
|
|
}
|
|
|
|
parseSideBias(o, loc) {
|
|
if(!loc) return;
|
|
const m = loc.trim().match(/^(.*);s=([ba])$/);
|
|
if(!m || m.length < 3) {
|
|
if(typeof o.textLocationAssertion === 'object') {
|
|
o.textLocationAssertion.post = loc;
|
|
} else {
|
|
o.textLocationAssertion = loc;
|
|
}
|
|
return;
|
|
}
|
|
if(m[1]) {
|
|
if(typeof o.textLocationAssertion === 'object') {
|
|
o.textLocationAssertion.post = m[1];
|
|
} else {
|
|
o.textLocationAssertion = m[1];
|
|
}
|
|
}
|
|
|
|
if(m[2] === 'a') {
|
|
o.sideBias = 'after';
|
|
} else {
|
|
o.sideBias = 'before';
|
|
}
|
|
}
|
|
|
|
parseSpatialRange(range) {
|
|
if(!range) return undefined;
|
|
const m = range.trim().match(/^([\d\.]+):([\d\.]+)$/);
|
|
if(!m || m.length < 3) return undefined;
|
|
const o = {
|
|
x: parseInt(m[1]),
|
|
y: parseInt(m[2]),
|
|
};
|
|
if(typeof o.x !== 'number' || typeof o.y !== 'number') {
|
|
return undefined;
|
|
}
|
|
return o;
|
|
}
|
|
|
|
parse(cfi) {
|
|
var o = {};
|
|
const isNumber = new RegExp(/[\d]/);
|
|
var f;
|
|
var state;
|
|
var prevState;
|
|
var cur, escape;
|
|
var seenColon = false;
|
|
var seenSlash = false;
|
|
var i;
|
|
for(i=0; i <= cfi.length; i++) {
|
|
if(i < cfi.length) {
|
|
cur = cfi[i];
|
|
} else {
|
|
cur = '';
|
|
}
|
|
if(cur === '^' && !escape) {
|
|
escape = true;
|
|
continue;
|
|
}
|
|
|
|
if(state === '/') {
|
|
if(cur.match(isNumber)) {
|
|
if(!f) {
|
|
f = cur;
|
|
} else {
|
|
f += cur;
|
|
}
|
|
escape = false;
|
|
continue;
|
|
} else {
|
|
if(f) {
|
|
o.nodeIndex = parseInt(f);
|
|
f = null;
|
|
}
|
|
prevState = state;
|
|
state = null;
|
|
}
|
|
}
|
|
|
|
if(state === ':') {
|
|
if(cur.match(isNumber)) {
|
|
if(!f) {
|
|
f = cur;
|
|
} else {
|
|
f += cur;
|
|
}
|
|
escape = false;
|
|
continue;
|
|
} else {
|
|
if(f) {
|
|
o.offset = parseInt(f);
|
|
f = null;
|
|
}
|
|
prevState = state;
|
|
state = null;
|
|
}
|
|
}
|
|
|
|
if(state === '@') {
|
|
let done = false;
|
|
if(cur.match(isNumber) || cur === '.' || cur === ':') {
|
|
if(cur === ':') {
|
|
if(!seenColon) {
|
|
seenColon = true;
|
|
} else {
|
|
done = true;
|
|
}
|
|
}
|
|
} else {
|
|
done = true;
|
|
}
|
|
if(!done) {
|
|
if(!f) {
|
|
f = cur;
|
|
} else {
|
|
f += cur;
|
|
}
|
|
escape = false;
|
|
continue;
|
|
} else {
|
|
prevState = state;
|
|
state = null;
|
|
if(f && seenColon) o.spatial = this.parseSpatialRange(f);
|
|
f = null;
|
|
}
|
|
}
|
|
|
|
if(state === '~' ) {
|
|
if(cur.match(isNumber) || cur === '.') {
|
|
if(!f) {
|
|
f = cur;
|
|
} else {
|
|
f += cur;
|
|
}
|
|
escape = false;
|
|
continue;
|
|
} else {
|
|
if(f) {
|
|
o.temporal = parseFloat(f);
|
|
}
|
|
prevState = state;
|
|
state = null;
|
|
f = null;
|
|
}
|
|
}
|
|
|
|
if(!state) {
|
|
if(cur === '!') {
|
|
i++;
|
|
state = cur;
|
|
break;
|
|
}
|
|
|
|
if(cur === ',') {
|
|
break;
|
|
}
|
|
|
|
if(cur === '/') {
|
|
if(seenSlash) {
|
|
break;
|
|
} else {
|
|
seenSlash = true;
|
|
prevState = state;
|
|
state = cur;
|
|
escape = false;
|
|
continue;
|
|
}
|
|
}
|
|
|
|
if(cur === ':' || cur === '~' || cur === '@') {
|
|
if(this.opts.stricter) {
|
|
// We've already had a temporal or spatial indicator
|
|
// and offset does not make sense and the same time
|
|
if(cur === ':' && (typeof o.temporal !== 'undefined' || typeof o.spatial !== 'undefined')) {
|
|
break;
|
|
}
|
|
// We've already had an offset
|
|
// and temporal or spatial do not make sense at the same time
|
|
if((cur === '~' || cur === '@') && (typeof o.offset !== 'undefined')) {
|
|
break;
|
|
}
|
|
}
|
|
prevState = state;
|
|
state = cur;
|
|
escape = false;
|
|
seenColon = false; // only relevant for '@'
|
|
continue;
|
|
}
|
|
|
|
if(cur === '[' && !escape && prevState === ':') {
|
|
prevState = state;
|
|
state = '[';
|
|
escape = false;
|
|
continue;
|
|
}
|
|
|
|
if(cur === '[' && !escape && prevState === '/') {
|
|
prevState = state;
|
|
state = 'nodeID';
|
|
escape = false;
|
|
continue;
|
|
}
|
|
}
|
|
|
|
|
|
if(state === '[') {
|
|
if(cur === ']' && !escape) {
|
|
prevState = state;
|
|
state = null;
|
|
this.parseSideBias(o, f);
|
|
f = null;
|
|
} else if(cur === ',' && !escape) {
|
|
o.textLocationAssertion = {};
|
|
if(f) {
|
|
o.textLocationAssertion.pre = f;
|
|
}
|
|
f = null;
|
|
} else {
|
|
if(!f) {
|
|
f = cur;
|
|
} else {
|
|
f += cur;
|
|
}
|
|
}
|
|
escape = false;
|
|
continue;
|
|
}
|
|
|
|
if(state === 'nodeID') {
|
|
if(cur === ']' && !escape) {
|
|
prevState = state;
|
|
state = null;
|
|
o.nodeID = f;
|
|
f = null;
|
|
} else {
|
|
if(!f) {
|
|
f = cur;
|
|
} else {
|
|
f += cur;
|
|
}
|
|
}
|
|
escape = false;
|
|
continue;
|
|
}
|
|
|
|
escape = false;
|
|
}
|
|
|
|
if(!o.nodeIndex && o.nodeIndex !== 0) throw new Error("Missing child node index in CFI");
|
|
|
|
return {parsed: o, offset: i, newDoc: (state === '!')};
|
|
}
|
|
|
|
// The CFI counts child nodes differently from the DOM
|
|
// Retrive the child of parentNode at the specified index
|
|
// according to the CFI standard way of counting
|
|
getChildNodeByCFIIndex(dom, parentNode, index, offset) {
|
|
const children = parentNode.childNodes;
|
|
if(!children.length) return {node: parentNode, offset: 0};
|
|
|
|
// index is pointing to the virtual node before the first node
|
|
// as defined in the CFI spec
|
|
if(index <= 0) {
|
|
return {node: children[0], relativeToNode: 'before', offset: 0}
|
|
}
|
|
|
|
var cfiCount = 0;
|
|
var lastChild;
|
|
var i, child;
|
|
for(i=0; i < children.length; i++) {
|
|
child = children[i];
|
|
switch(child.nodeType) {
|
|
case ELEMENT_NODE:
|
|
|
|
// If the previous node was also an element node
|
|
// then we have to pretend there was a text node in between
|
|
// the current and previous nodes (according to the CFI spec)
|
|
// so we increment cfiCount by two
|
|
if(cfiCount % 2 === 0) {
|
|
cfiCount += 2;
|
|
if(cfiCount >= index) {
|
|
if(child.tagName.toLowerCase() === 'img' && offset) {
|
|
return {node: child, offset}
|
|
}
|
|
return {node: child, offset: 0}
|
|
}
|
|
} else { // Previous node was a text node
|
|
cfiCount += 1;
|
|
if(cfiCount === index) {
|
|
if(child.tagName.toLowerCase() === 'img' && offset) {
|
|
return {node: child, offset}
|
|
}
|
|
|
|
return {node: child, offset: 0}
|
|
|
|
// This happens when offset into the previous text node was greater
|
|
// than the number of characters in that text node
|
|
// So we return a position at the end of the previous text node
|
|
} else if(cfiCount > index) {
|
|
if(!lastChild) {
|
|
return {node: parentNode, offset: 0};
|
|
}
|
|
return {node: lastChild, offset: this.trueLength(dom, lastChild.textContent)};
|
|
}
|
|
}
|
|
lastChild = child;
|
|
break;
|
|
case TEXT_NODE:
|
|
case CDATA_SECTION_NODE:
|
|
// If this is the first node or the previous node was an element node
|
|
if(cfiCount === 0 || cfiCount % 2 === 0) {
|
|
cfiCount += 1;
|
|
} else {
|
|
// If previous node was a text node then they should be combined
|
|
// so we count them as one, meaning we don't increment the count
|
|
}
|
|
|
|
if(cfiCount === index) {
|
|
// If offset is greater than the length of the current text node
|
|
// then we assume that the next node will also be a text node
|
|
// and that we'll be combining them with the current node
|
|
let trueLength = this.trueLength(dom, child.textContent);
|
|
|
|
if(offset >= trueLength) {
|
|
offset -= trueLength;
|
|
} else {
|
|
return {node: child, offset: offset}
|
|
}
|
|
}
|
|
lastChild = child;
|
|
break;
|
|
default:
|
|
continue
|
|
}
|
|
}
|
|
|
|
// index is pointing to the virtual node after the last child
|
|
// as defined in the CFI spec
|
|
if(index > cfiCount) {
|
|
var o = {relativeToNode: 'after', offset: 0};
|
|
if(!lastChild) {
|
|
o.node = parentNode;
|
|
} else {
|
|
o.node = lastChild;
|
|
}
|
|
if(this.isTextNode(o.node)) {
|
|
o.offset = this.trueLength(dom, o.node.textContent.length);
|
|
}
|
|
return o;
|
|
}
|
|
}
|
|
|
|
isTextNode(node) {
|
|
if(!node) return false;
|
|
if(node.nodeType === TEXT_NODE || node.nodeType === CDATA_SECTION_NODE) {
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// Use a Text Location Assertion to correct and offset
|
|
correctOffset(dom, node, offset, assertion) {
|
|
var curNode = node;
|
|
|
|
if(typeof assertion === 'string') {
|
|
var matchStr = this.decodeEntities(dom, assertion);
|
|
} else {
|
|
assertion.pre = this.decodeEntities(dom, assertion.pre);
|
|
assertion.post = this.decodeEntities(dom, assertion.post);
|
|
var matchStr = assertion.pre + '.' + assertion.post;
|
|
}
|
|
|
|
if(!(this.isTextNode(node))) {
|
|
return {node, offset: 0};
|
|
}
|
|
|
|
while(this.isTextNode(curNode.previousSibling)) {
|
|
curNode = curNode.previousSibling;
|
|
}
|
|
|
|
const startNode = curNode;
|
|
var str;
|
|
const nodeLengths = [];
|
|
var txt = '';
|
|
var i = 0;
|
|
while(this.isTextNode(curNode)) {
|
|
|
|
str = this.decodeEntities(dom, curNode.textContent);
|
|
nodeLengths[i] = str.length;
|
|
txt += str;
|
|
|
|
if(!curNode.nextSibling) break;
|
|
curNode = curNode.nextSibling;
|
|
i++;
|
|
}
|
|
|
|
// Find all matches to the Text Location Assertion
|
|
const matchOffset = (assertion.pre) ? assertion.pre.length : 0;
|
|
const m = matchAll(txt, new RegExp(matchStr), matchOffset);
|
|
if(!m.length) return {node, offset};
|
|
|
|
// Get the match that has the closest offset to the existing offset
|
|
var newOffset = closest(m, offset);
|
|
|
|
if(curNode === node && newOffset === offset) {
|
|
return {node, offset};
|
|
}
|
|
|
|
i = 0;
|
|
curNode = startNode;
|
|
while(newOffset >= nodeLengths[i]) {
|
|
|
|
newOffset -= nodeLengths[i];
|
|
if(newOffset < 0) return {node, offset}
|
|
|
|
if(!curNode.nextSibling || i+1 >= nodeOffsets.length) return {node, offset}
|
|
i++;
|
|
curNode = curNode.nextSibling;
|
|
}
|
|
|
|
return {node: curNode, offset: newOffset};
|
|
}
|
|
|
|
resolveNode(index, subparts, dom, opts) {
|
|
opts = Object.assign({}, opts || {});
|
|
if(!dom) throw new Error("Missing DOM argument");
|
|
|
|
// Traverse backwards until a subpart with a valid ID is found
|
|
// or the first subpart is reached
|
|
var startNode;
|
|
if(index === 0) {
|
|
startNode = dom.querySelector('package');
|
|
}
|
|
|
|
if(!startNode) {
|
|
for(let n of dom.childNodes) {
|
|
if(n.nodeType === ELEMENT_NODE) {
|
|
startNode = n;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
if(!startNode) throw new Error("Document incompatible with CFIs");
|
|
|
|
var node = startNode;
|
|
var startFrom = 0;
|
|
var i, subpart;
|
|
for(i=subparts.length-1; i >=0; i--) {
|
|
subpart = subparts[i];
|
|
if(!opts.ignoreIDs && subpart.nodeID && (node = dom.getElementById(subpart.nodeID))) {
|
|
startFrom = i + 1;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if(!node) {
|
|
node = startNode;
|
|
}
|
|
|
|
var o = {node, offset: 0};
|
|
|
|
var nodeIndex;
|
|
for(i=startFrom; i < subparts.length; i++) {
|
|
subpart = subparts[i];
|
|
|
|
o = this.getChildNodeByCFIIndex(dom, o.node, subpart.nodeIndex, subpart.offset);
|
|
|
|
if(subpart.textLocationAssertion) {
|
|
o = this.correctOffset(dom, o.node, subpart.offset, subpart.textLocationAssertion);
|
|
}
|
|
}
|
|
|
|
return o;
|
|
}
|
|
|
|
// Each part of a CFI (as separated by '!')
|
|
// references a separate HTML/XHTML/XML document.
|
|
// This function takes an index specifying the part
|
|
// of the CFI and the appropriate Document or XMLDocument
|
|
// that is referenced by the specified part of the CFI
|
|
// and returns the URI for the document referenced by
|
|
// the next part of the CFI
|
|
// If the opt `ignoreIDs` is true then IDs
|
|
// will not be used while resolving
|
|
resolveURI(index, dom, opts) {
|
|
opts = opts || {};
|
|
if(index < 0 || index > this.parts.length - 2) {
|
|
throw new Error("index is out of bounds");
|
|
}
|
|
|
|
const subparts = this.parts[index];
|
|
if(!subparts) throw new Error("Missing CFI part for index: " + index);
|
|
|
|
var o = this.resolveNode(index, subparts, dom, opts);
|
|
var node = o.node;
|
|
|
|
const tagName = node.tagName.toLowerCase();
|
|
if(tagName === 'itemref'
|
|
&& node.parentNode.tagName.toLowerCase() === 'spine') {
|
|
|
|
const idref = node.getAttribute('idref');
|
|
if(!idref) throw new Error("Referenced node had not 'idref' attribute");
|
|
node = dom.getElementById(idref);
|
|
if(!node) throw new Error("Specified node is missing from manifest");
|
|
const href = node.getAttribute('href');
|
|
if(!href) throw new Error("Manifest item is missing href attribute");
|
|
|
|
return href;
|
|
}
|
|
|
|
if(tagName === 'iframe' || tagName === 'embed') {
|
|
const src = node.getAttribute('src');
|
|
if(!src) throw new Error(tagName + " element is missing 'src' attribute");
|
|
return src;
|
|
}
|
|
|
|
if(tagName === 'object') {
|
|
const data = node.getAttribute('data');
|
|
if(!data) throw new Error(tagName + " element is missing 'data' attribute");
|
|
return data;
|
|
}
|
|
|
|
if(tagName === 'image'|| tagName === 'use') {
|
|
const href = node.getAttribute('xlink:href');
|
|
if(!href) throw new Error(tagName + " element is missing 'xlink:href' attribute");
|
|
return href;
|
|
}
|
|
|
|
throw new Error("No URI found");
|
|
}
|
|
|
|
deepClone(o) {
|
|
return JSON.parse(JSON.stringify(o));
|
|
}
|
|
|
|
resolveLocation(dom, parts) {
|
|
const index = parts.length - 1;
|
|
const subparts = parts[index];
|
|
if(!subparts) throw new Error("Missing CFI part for index: " + index);
|
|
var o = this.resolveNode(index, subparts, dom);
|
|
|
|
var lastpart = this.deepClone(subparts[subparts.length - 1]);
|
|
|
|
delete lastpart.nodeIndex;
|
|
if(!lastpart.offset) delete o.offset;
|
|
|
|
Object.assign(lastpart, o);
|
|
|
|
return lastpart;
|
|
}
|
|
|
|
// Takes the Document or XMLDocument for the final
|
|
// document referenced by the CFI
|
|
// and returns the node and offset into that node
|
|
resolveLast(dom, opts) {
|
|
opts = Object.assign({
|
|
range: false
|
|
}, opts || {});
|
|
|
|
if(!this.isRange) {
|
|
return this.resolveLocation(dom, this.parts);
|
|
}
|
|
|
|
if(opts.range) {
|
|
const range = dom.createRange();
|
|
const from = this.getFrom();
|
|
if(from.relativeToNode === 'before') {
|
|
range.setStartBefore(from.node, from.offset)
|
|
} else if(from.relativeToNode === 'after') {
|
|
range.setStartAfter(from.node, from.offset)
|
|
} else {
|
|
range.setStart(from.node, from.offset);
|
|
}
|
|
|
|
const to = this.getTo();
|
|
if(to.relativeToNode === 'before') {
|
|
range.setEndBefore(to.node, to.offset)
|
|
} else if(to.relativeToNode === 'after') {
|
|
range.setEndAfter(to.node, to.offset)
|
|
} else {
|
|
range.setEnd(to.node, to.offset);
|
|
}
|
|
|
|
return range;
|
|
}
|
|
|
|
return {
|
|
from: this.resolveLocation(dom, this.getFrom()),
|
|
to: this.resolveLocation(dom, this.getTo()),
|
|
isRange: true
|
|
};
|
|
}
|
|
|
|
async fetchAndParse(uri) {
|
|
return new Promise((resolv, reject) => {
|
|
|
|
const xhr = new XMLHttpRequest;
|
|
|
|
xhr.open('GET', uri);
|
|
xhr.responseType = 'document';
|
|
|
|
xhr.onload = function() {
|
|
if(xhr.readyState === xhr.DONE) {
|
|
if(xhr.status < 200 || xhr.status >= 300) {
|
|
reject(new Error("Failed to get: " + uri));
|
|
return;
|
|
}
|
|
resolv(xhr.responseXML);
|
|
}
|
|
}
|
|
xhr.onerror = function() {
|
|
reject(new Error("Failed to get: " + uri));
|
|
}
|
|
|
|
xhr.send();
|
|
});
|
|
}
|
|
|
|
async resolve(uriOrDoc, fetchCB, opts) {
|
|
if(typeof fetchCB !== 'function') {
|
|
opts = fetchCB;
|
|
fetchCB = null
|
|
}
|
|
if(!fetchCB) {
|
|
if(typeof XMLHttpRequest === 'undefined') {
|
|
throw new Error("XMLHttpRequest not available. You must supply a function as the second argument.");
|
|
}
|
|
fetchCB = this.fetchAndParse;
|
|
}
|
|
|
|
var uri, doc;
|
|
if(typeof uriOrDoc === 'string') {
|
|
uri = uriOrDoc;
|
|
} else {
|
|
doc = uriOrDoc;
|
|
}
|
|
var i, part, uri;
|
|
for(i=0; i < this.parts.length - 1; i++) {
|
|
if(uri) doc = await fetchCB(uri);
|
|
uri = this.resolveURI(i, doc, opts);
|
|
}
|
|
|
|
if(uri) doc = await fetchCB(uri);
|
|
return this.resolveLast(doc, opts);
|
|
}
|
|
|
|
}
|
|
|
|
//module.exports = CFI;
|
|
|
|
|