TiddlyWiki5/plugins/tiddlywiki/evernote/modules/enex-deserializer.js

/*\
title: $:/plugins/tiddlywiki/evernote/modules/enex-deserializer.js
type: application/javascript
module-type: tiddlerdeserializer

ENEX file deserializer

For details see: https://blog.evernote.com/tech/2013/08/08/evernote-export-format-enex/

\*/
(function(){

/*jslint node: true, browser: true */
/*global $tw: false */
"use strict";

// DOMParser = require("$:/plugins/tiddlywiki/xmldom/dom-parser").DOMParser;
var illegalFilenameCharacters = /[\[\]<>;\:\"\/\\\|\?\*\^\?\$\(\)\s~]/g;

/*
Parse an ENEX file into tiddlers
*/
exports["application/enex+xml"] = function(text,fields) {
	// Collect output tiddlers in an array
	var results = [];
	// Parse the XML document
	var doc = new DOMParser().parseFromString(text,"application/xml");
	// Output a report tiddler with information about the import
	var enex = doc.querySelector("en-export");
	if(!enex) {
		// Firefox's DOMParser have problem in some cases.
		throw new Error('Failed to parse ENEX file, no "en-export" node found, try use Chrome/Edge to export again.');
	}
	results.push({
		title: "Evernote Import Report",
		text: "Evernote file imported on " + enex.getAttribute("export-date") + " from " + enex.getAttribute("application") + " (" + enex.getAttribute("version") + ")"
	})
	// Get all the "note" nodes
	var noteNodes = doc.querySelectorAll("note");
	$tw.utils.each(noteNodes,function(noteNode) {
		var noteTitle = getTextContent(noteNode,"title");
		// get real note content node
		var contentNode = noteNode.querySelector("content")
		var contentText = (contentNode.textContent || "").replace(/&nbsp;/g, ' ').trim();
		if(contentText) {
			// The final content will be HTML instead of xml. And we will save it as wikitext, to make wiki syntax work, and remaining HTML will also work.
			try {
				// may error if content is not valid XML
				contentNode =	new DOMParser().parseFromString(contentText,"application/xml").querySelector("en-note") || contentNode;
			} catch(e) {
				// ignore
			}
		}
		// process main content and metadata, and save as wikitext tiddler.
		var noteResult = {
			title: noteTitle.replace(illegalFilenameCharacters,"_"),
			tags: [],
			modified: convertDate(getTextContent(noteNode,"updated") || getTextContent(noteNode,"created")),
			modifier: getTextContent(noteNode,"author"),
			created:  convertDate(getTextContent(noteNode,"created")),
			creator: getTextContent(noteNode,"author")
		};
		// process resources (images, PDFs, etc.)
		$tw.utils.each(noteNode.querySelectorAll("resource"),function(resourceNode) {
			// hash generated by applying https://github.com/vzhd1701/evernote-backup/pull/54
			var hash = resourceNode.querySelector("data").getAttribute("hash");
			var text = getTextContent(resourceNode,"data");
			var mimeType = getTextContent(resourceNode,"mime");
			var contentTypeInfo = $tw.config.contentTypeInfo[mimeType] || {extension:""};
			var title = getTextContent(resourceNode,"resource-attributes>file-name")
			// a few resources don't have title, use hash as fallback
			title = title || (hash + contentTypeInfo.extension);
			// replace all system reserved characters in title
			title = title.replace(illegalFilenameCharacters,"_");
			// prefix image title with note title, to avoid name conflicts which is quite common in web-clipped content
			title = noteResult.title + "/" + title;
			results.push({
				title: title,
				type: mimeType,
				width: getTextContent(resourceNode,"width"),
				height: getTextContent(resourceNode,"height"),
				text: text,
				// give image same modified and modifier as the note, so they can be grouped together in the "Recent"
				modified: noteResult.modified,
				modifier: noteResult.modifier,
				created: noteResult.created,
				creator: noteResult.creator
			});
			if(hash) {
				fixAttachmentReference(contentNode, hash, mimeType, title);
			}
		});
		// export mixed content of wikitext and HTML
		noteResult.text = contentNode.innerHTML;
		// remove all ` xmlns="http://www.w3.org/1999/xhtml"` attributes to save some space
		noteResult.text = noteResult.text.replace(/ xmlns="http:\/\/www.w3.org\/1999\/xhtml"/g, "");
		$tw.utils.each(noteNode.querySelectorAll("tag"),function(tagNode) {
			noteResult.tags.push(tagNode.textContent);
		});
		// If there's an update date, set modifiy date accordingly
		var update = getTextContent(noteNode,"updated");
		if(update) {
			noteResult.modified = convertDate(update);
		}
		$tw.utils.each(noteNode.querySelectorAll("note-attributes>*"),function(attrNode) {
			noteResult[attrNode.tagName] = attrNode.textContent;
		});
		results.push(noteResult);
	});
	// Return the output tiddlers
	return results;
};

function getTextContent(node,selector) {
	return (node.querySelector(selector) || {}).textContent || "";
}

function convertDate(isoDate) {
	return (isoDate || "").replace("T","").replace("Z","") + "000"
}

function fixAttachmentReference(contentNode, md5Hash, mimeType, name) {
	if(!contentNode) return;
	var mediaNode = contentNode.querySelector('en-media[hash="' + md5Hash + '"]');
	if(!name) {
		throw new Error("name is empty for resource hash" + md5Hash);
	}
	if(!mediaNode) return;
	if(mimeType.indexOf("image/") === 0) {
		// find en-media node, replace with image syntax
		mediaNode.parentNode.replaceChild($tw.utils.domMaker("p", {text: "[img["+ name + "]]"}), mediaNode);
	} else {
		// For other than image attachments, we make a link to the tiddler
		mediaNode.parentNode.replaceChild($tw.utils.domMaker("p", {text: "[["+ name + "]]"}), mediaNode);
	}
}


})();