TiddlyWiki5/plugins/tiddlywiki/evernote/modules/enex-deserializer.js

/*\
title: $:/plugins/tiddlywiki/evernote/modules/enex-deserializer.js
type: application/javascript
module-type: tiddlerdeserializer

ENEX file deserializer

For details see: https://blog.evernote.com/tech/2013/08/08/evernote-export-format-enex/

\*/
(function(){

/*jslint node: true, browser: true */
/*global $tw: false */
"use strict";

// DOMParser = require("$:/plugins/tiddlywiki/xmldom/dom-parser").DOMParser;
var illegalFilenameCharacters = /[\[\]<>;\:\"\/\\\|\?\*\^\?\$\(\)\s~]/g;

/*
Parse an ENEX file into tiddlers
*/
exports["application/enex+xml"] = function(text,fields) {
	// Collect output tiddlers in an array
	var results = [];
	// Parse the XML document
	var doc = new DOMParser().parseFromString(text,"application/xml");
	// Output a report tiddler with information about the import
	var enex = doc.querySelector("en-export");
	if(!enex) {
		// Firefox's DOMParser have problem in some cases.
		throw new Error('Failed to parse ENEX file, no "en-export" node found, try use Chrome/Edge to export again.');
	}
	results.push({
		title: "Evernote Import Report",
		text: "Evernote file imported on " + enex.getAttribute("export-date") + " from " + enex.getAttribute("application") + " (" + enex.getAttribute("version") + ")"
	})
	// Get all the "note" nodes
	var noteNodes = doc.querySelectorAll("note");
	$tw.utils.each(noteNodes,function(noteNode) {
		var noteTitle = getTextContent(noteNode,"title");
		// get real note content node
		var contentNode = noteNode.querySelector("content")
		var contentText = (contentNode.textContent || "").replace(/&nbsp;/g, ' ').trim();
		if(contentText) {
			// The final content will be HTML instead of xml. And we will save it as wikitext, to make wiki syntax work, and remaining HTML will also work.
			try {
				// may error if content is not valid XML
				contentNode =	new DOMParser().parseFromString(contentText,"application/xml").querySelector("en-note") || contentNode;
			} catch(e) {
				// ignore
			}
		}
		// process main content and metadata, and save as wikitext tiddler.
		var noteResult = {
			title: noteTitle.replace(illegalFilenameCharacters,"_"),
			tags: [],
			modified: convertDate(getTextContent(noteNode,"updated") || getTextContent(noteNode,"created")),
			modifier: getTextContent(noteNode,"author"),
			created:  convertDate(getTextContent(noteNode,"created")),
			creator: getTextContent(noteNode,"author")
		};
		// process resources (images, PDFs, etc.)
		$tw.utils.each(noteNode.querySelectorAll("resource"),function(resourceNode) {
			// hash generated by applying https://github.com/vzhd1701/evernote-backup/pull/54
			var hash = resourceNode.querySelector("data").getAttribute("hash");
			var text = getTextContent(resourceNode,"data");
			var mimeType = getTextContent(resourceNode,"mime");
			var contentTypeInfo = $tw.config.contentTypeInfo[mimeType] || {extension:""};
			var title = getTextContent(resourceNode,"resource-attributes>file-name")
			// a few resources don't have title, use hash as fallback
			title = title || (hash + contentTypeInfo.extension);
			// replace all system reserved characters in title
			title = title.replace(illegalFilenameCharacters,"_");
			// prefix image title with note title, to avoid name conflicts which is quite common in web-clipped content
			title = noteResult.title + "/" + title;
			results.push({
				title: title,
				type: mimeType,
				width: getTextContent(resourceNode,"width"),
				height: getTextContent(resourceNode,"height"),
				text: text,
				// give image same modified and modifier as the note, so they can be grouped together in the "Recent"
				modified: noteResult.modified,
				modifier: noteResult.modifier,
				created: noteResult.created,
				creator: noteResult.creator
			});
			if(hash) {
				fixAttachmentReference(contentNode, hash, mimeType, title);
			}
		});
		// export mixed content of wikitext and HTML
		noteResult.text = contentNode.innerHTML;
		// remove all ` xmlns="http://www.w3.org/1999/xhtml"` attributes to save some space
		noteResult.text = noteResult.text.replace(/ xmlns="http:\/\/www.w3.org\/1999\/xhtml"/g, "");
		$tw.utils.each(noteNode.querySelectorAll("tag"),function(tagNode) {
			noteResult.tags.push(tagNode.textContent);
		});
		// If there's an update date, set modifiy date accordingly
		var update = getTextContent(noteNode,"updated");
		if(update) {
			noteResult.modified = convertDate(update);
		}
		$tw.utils.each(noteNode.querySelectorAll("note-attributes>*"),function(attrNode) {
			noteResult[attrNode.tagName] = attrNode.textContent;
		});
		results.push(noteResult);
	});
	// Return the output tiddlers
	return results;
};

function getTextContent(node,selector) {
	return (node.querySelector(selector) || {}).textContent || "";
}

function convertDate(isoDate) {
	return (isoDate || "").replace("T","").replace("Z","") + "000"
}

function fixAttachmentReference(contentNode, md5Hash, mimeType, name) {
	if(!contentNode) return;
	var mediaNode = contentNode.querySelector('en-media[hash="' + md5Hash + '"]');
	if(!name) {
		throw new Error("name is empty for resource hash" + md5Hash);
	}
	if(!mediaNode) return;
	if(mimeType.indexOf("image/") === 0) {
		// find en-media node, replace with image syntax
		mediaNode.parentNode.replaceChild($tw.utils.domMaker("p", {text: "[img["+ name + "]]"}), mediaNode);
	} else {
		// For other than image attachments, we make a link to the tiddler
		mediaNode.parentNode.replaceChild($tw.utils.domMaker("p", {text: "[["+ name + "]]"}), mediaNode);
	}
}


})();
First commit of Evernote migration plugin Starts to address #2268 2016-02-04 10:13:08 +00:00			`/*\`
			`title: $:/plugins/tiddlywiki/evernote/modules/enex-deserializer.js`
			`type: application/javascript`
			`module-type: tiddlerdeserializer`

			`ENEX file deserializer`

			`For details see: https://blog.evernote.com/tech/2013/08/08/evernote-export-format-enex/`

			`\*/`
			`(function(){`

			`/jslint node: true, browser: true /`
			`/global $tw: false /`
			`"use strict";`

			`// DOMParser = require("$:/plugins/tiddlywiki/xmldom/dom-parser").DOMParser;`
Fix: Evernote .enex image import (#7785) * feat: add modifier info * feat: replace image and attachment with [img[] and [[]] * feat: import as wikitext tid * fix: a few resources don't have title * fix: use hash as random name for images * fix: Firefox's DOMParser have problem in some cases * fix: bad char in title, and useless xmlns * Update sample-enex-with-image.xml.enex * Update enex-deserializer.js * Update readme.tid * fix: some dont have modified 2023-10-15 11:40:38 +00:00			`var illegalFilenameCharacters = /[\[\]<>;\:\"\/\\\\|\?\*\^\?\$\(\)\s~]/g;`
First commit of Evernote migration plugin Starts to address #2268 2016-02-04 10:13:08 +00:00
			`/*`
			`Parse an ENEX file into tiddlers`
			`*/`
			`exports["application/enex+xml"] = function(text,fields) {`
			`// Collect output tiddlers in an array`
			`var results = [];`
			`// Parse the XML document`
Fix: Evernote .enex image import (#7785) * feat: add modifier info * feat: replace image and attachment with [img[] and [[]] * feat: import as wikitext tid * fix: a few resources don't have title * fix: use hash as random name for images * fix: Firefox's DOMParser have problem in some cases * fix: bad char in title, and useless xmlns * Update sample-enex-with-image.xml.enex * Update enex-deserializer.js * Update readme.tid * fix: some dont have modified 2023-10-15 11:40:38 +00:00			`var doc = new DOMParser().parseFromString(text,"application/xml");`
First commit of Evernote migration plugin Starts to address #2268 2016-02-04 10:13:08 +00:00			`// Output a report tiddler with information about the import`
			`var enex = doc.querySelector("en-export");`
Fix: Evernote .enex image import (#7785) * feat: add modifier info * feat: replace image and attachment with [img[] and [[]] * feat: import as wikitext tid * fix: a few resources don't have title * fix: use hash as random name for images * fix: Firefox's DOMParser have problem in some cases * fix: bad char in title, and useless xmlns * Update sample-enex-with-image.xml.enex * Update enex-deserializer.js * Update readme.tid * fix: some dont have modified 2023-10-15 11:40:38 +00:00			`if(!enex) {`
			`// Firefox's DOMParser have problem in some cases.`
			`throw new Error('Failed to parse ENEX file, no "en-export" node found, try use Chrome/Edge to export again.');`
			`}`
First commit of Evernote migration plugin Starts to address #2268 2016-02-04 10:13:08 +00:00			`results.push({`
			`title: "Evernote Import Report",`
			`text: "Evernote file imported on " + enex.getAttribute("export-date") + " from " + enex.getAttribute("application") + " (" + enex.getAttribute("version") + ")"`
			`})`
			`// Get all the "note" nodes`
			`var noteNodes = doc.querySelectorAll("note");`
			`$tw.utils.each(noteNodes,function(noteNode) {`
Fix: Evernote .enex image import (#7785) * feat: add modifier info * feat: replace image and attachment with [img[] and [[]] * feat: import as wikitext tid * fix: a few resources don't have title * fix: use hash as random name for images * fix: Firefox's DOMParser have problem in some cases * fix: bad char in title, and useless xmlns * Update sample-enex-with-image.xml.enex * Update enex-deserializer.js * Update readme.tid * fix: some dont have modified 2023-10-15 11:40:38 +00:00			`var noteTitle = getTextContent(noteNode,"title");`
			`// get real note content node`
			`var contentNode = noteNode.querySelector("content")`
			`var contentText = (contentNode.textContent \|\| "").replace(/ /g, ' ').trim();`
			`if(contentText) {`
			`// The final content will be HTML instead of xml. And we will save it as wikitext, to make wiki syntax work, and remaining HTML will also work.`
			`try {`
			`// may error if content is not valid XML`
			`contentNode = new DOMParser().parseFromString(contentText,"application/xml").querySelector("en-note") \|\| contentNode;`
			`} catch(e) {`
			`// ignore`
			`}`
			`}`
			`// process main content and metadata, and save as wikitext tiddler.`
			`var noteResult = {`
			`title: noteTitle.replace(illegalFilenameCharacters,"_"),`
Further tweaks to #2272 @sukima the main issue with the previous code was that it incorrectly used comma to delimit tags. We actually use spaces, and double square brackets to delimit tags containing spaces. Better is to leave the tags field as an array; the core will serialise it correctly as required. I also made some minor consistency tweaks. 2016-02-05 19:48:37 +00:00			`tags: [],`
Fix: Evernote .enex image import (#7785) * feat: add modifier info * feat: replace image and attachment with [img[] and [[]] * feat: import as wikitext tid * fix: a few resources don't have title * fix: use hash as random name for images * fix: Firefox's DOMParser have problem in some cases * fix: bad char in title, and useless xmlns * Update sample-enex-with-image.xml.enex * Update enex-deserializer.js * Update readme.tid * fix: some dont have modified 2023-10-15 11:40:38 +00:00			`modified: convertDate(getTextContent(noteNode,"updated") \|\| getTextContent(noteNode,"created")),`
			`modifier: getTextContent(noteNode,"author"),`
			`created: convertDate(getTextContent(noteNode,"created")),`
			`creator: getTextContent(noteNode,"author")`
Add additional fields support for evernote plugin Relates to Issue #2268 Add the additional field defined in the XML as properties to the result. This also appears to capture the 'author' field. 2016-02-04 23:31:24 +00:00			`};`
Fix: Evernote .enex image import (#7785) * feat: add modifier info * feat: replace image and attachment with [img[] and [[]] * feat: import as wikitext tid * fix: a few resources don't have title * fix: use hash as random name for images * fix: Firefox's DOMParser have problem in some cases * fix: bad char in title, and useless xmlns * Update sample-enex-with-image.xml.enex * Update enex-deserializer.js * Update readme.tid * fix: some dont have modified 2023-10-15 11:40:38 +00:00			`// process resources (images, PDFs, etc.)`
			`$tw.utils.each(noteNode.querySelectorAll("resource"),function(resourceNode) {`
			`// hash generated by applying https://github.com/vzhd1701/evernote-backup/pull/54`
			`var hash = resourceNode.querySelector("data").getAttribute("hash");`
			`var text = getTextContent(resourceNode,"data");`
			`var mimeType = getTextContent(resourceNode,"mime");`
			`var contentTypeInfo = $tw.config.contentTypeInfo[mimeType] \|\| {extension:""};`
			`var title = getTextContent(resourceNode,"resource-attributes>file-name")`
			`// a few resources don't have title, use hash as fallback`
			`title = title \|\| (hash + contentTypeInfo.extension);`
			`// replace all system reserved characters in title`
			`title = title.replace(illegalFilenameCharacters,"_");`
			`// prefix image title with note title, to avoid name conflicts which is quite common in web-clipped content`
			`title = noteResult.title + "/" + title;`
			`results.push({`
			`title: title,`
			`type: mimeType,`
			`width: getTextContent(resourceNode,"width"),`
			`height: getTextContent(resourceNode,"height"),`
			`text: text,`
			`// give image same modified and modifier as the note, so they can be grouped together in the "Recent"`
			`modified: noteResult.modified,`
			`modifier: noteResult.modifier,`
			`created: noteResult.created,`
			`creator: noteResult.creator`
			`});`
			`if(hash) {`
			`fixAttachmentReference(contentNode, hash, mimeType, title);`
			`}`
			`});`
			`// export mixed content of wikitext and HTML`
			`noteResult.text = contentNode.innerHTML;`
			// remove all ` xmlns="http://www.w3.org/1999/xhtml"` attributes to save some space
			`noteResult.text = noteResult.text.replace(/ xmlns="http:\/\/www.w3.org\/1999\/xhtml"/g, "");`
Further tweaks to #2272 @sukima the main issue with the previous code was that it incorrectly used comma to delimit tags. We actually use spaces, and double square brackets to delimit tags containing spaces. Better is to leave the tags field as an array; the core will serialise it correctly as required. I also made some minor consistency tweaks. 2016-02-05 19:48:37 +00:00			`$tw.utils.each(noteNode.querySelectorAll("tag"),function(tagNode) {`
Fix: Evernote .enex image import (#7785) * feat: add modifier info * feat: replace image and attachment with [img[] and [[]] * feat: import as wikitext tid * fix: a few resources don't have title * fix: use hash as random name for images * fix: Firefox's DOMParser have problem in some cases * fix: bad char in title, and useless xmlns * Update sample-enex-with-image.xml.enex * Update enex-deserializer.js * Update readme.tid * fix: some dont have modified 2023-10-15 11:40:38 +00:00			`noteResult.tags.push(tagNode.textContent);`
Further tweaks to #2272 @sukima the main issue with the previous code was that it incorrectly used comma to delimit tags. We actually use spaces, and double square brackets to delimit tags containing spaces. Better is to leave the tags field as an array; the core will serialise it correctly as required. I also made some minor consistency tweaks. 2016-02-05 19:48:37 +00:00			`});`
Minor tweaks to #2275 * Cleaner handling of “updated” attribute * Fixed handling of note attributes, which now get imported as custom fields * Clearer handling of ISO dates 2016-02-16 16:35:28 +00:00			`// If there's an update date, set modifiy date accordingly`
			`var update = getTextContent(noteNode,"updated");`
			`if(update) {`
Fix: Evernote .enex image import (#7785) * feat: add modifier info * feat: replace image and attachment with [img[] and [[]] * feat: import as wikitext tid * fix: a few resources don't have title * fix: use hash as random name for images * fix: Firefox's DOMParser have problem in some cases * fix: bad char in title, and useless xmlns * Update sample-enex-with-image.xml.enex * Update enex-deserializer.js * Update readme.tid * fix: some dont have modified 2023-10-15 11:40:38 +00:00			`noteResult.modified = convertDate(update);`
Minor tweaks to #2275 * Cleaner handling of “updated” attribute * Fixed handling of note attributes, which now get imported as custom fields * Clearer handling of ISO dates 2016-02-16 16:35:28 +00:00			`}`
			`$tw.utils.each(noteNode.querySelectorAll("note-attributes>*"),function(attrNode) {`
Fix: Evernote .enex image import (#7785) * feat: add modifier info * feat: replace image and attachment with [img[] and [[]] * feat: import as wikitext tid * fix: a few resources don't have title * fix: use hash as random name for images * fix: Firefox's DOMParser have problem in some cases * fix: bad char in title, and useless xmlns * Update sample-enex-with-image.xml.enex * Update enex-deserializer.js * Update readme.tid * fix: some dont have modified 2023-10-15 11:40:38 +00:00			`noteResult[attrNode.tagName] = attrNode.textContent;`
Add evernote's additional resources as tiddlers Relates to Issue #2268 Based in the [example XML][1] attachments are listed in the <resources> node. Since in TiddlyWiki these would be media tiddlers I add then one by one as separate tiddlers. There are some things that still need to happen. There should be a mime type check so we don't attempt to import media tha TiddlyWiki doesn't support. Also the example suggests the data is base64 encoded so I blindly use that for the text attribute. Should there be a `data:mediatyp;base64,…` prefix? [1]: https://gist.github.com/evernotegists/6116886 2016-02-04 23:37:45 +00:00			`});`
Fix: Evernote .enex image import (#7785) * feat: add modifier info * feat: replace image and attachment with [img[] and [[]] * feat: import as wikitext tid * fix: a few resources don't have title * fix: use hash as random name for images * fix: Firefox's DOMParser have problem in some cases * fix: bad char in title, and useless xmlns * Update sample-enex-with-image.xml.enex * Update enex-deserializer.js * Update readme.tid * fix: some dont have modified 2023-10-15 11:40:38 +00:00			`results.push(noteResult);`
First commit of Evernote migration plugin Starts to address #2268 2016-02-04 10:13:08 +00:00			`});`
			`// Return the output tiddlers`
			`return results;`
			`};`

Evernote plugin: fix crash with missing fields 2016-02-05 23:05:24 +00:00			`function getTextContent(node,selector) {`
Fix: Evernote .enex image import (#7785) * feat: add modifier info * feat: replace image and attachment with [img[] and [[]] * feat: import as wikitext tid * fix: a few resources don't have title * fix: use hash as random name for images * fix: Firefox's DOMParser have problem in some cases * fix: bad char in title, and useless xmlns * Update sample-enex-with-image.xml.enex * Update enex-deserializer.js * Update readme.tid * fix: some dont have modified 2023-10-15 11:40:38 +00:00			`return (node.querySelector(selector) \|\| {}).textContent \|\| "";`
Evernote plugin: fix crash with missing fields 2016-02-05 23:05:24 +00:00			`}`

Minor tweaks to #2275 * Cleaner handling of “updated” attribute * Fixed handling of note attributes, which now get imported as custom fields * Clearer handling of ISO dates 2016-02-16 16:35:28 +00:00			`function convertDate(isoDate) {`
			`return (isoDate \|\| "").replace("T","").replace("Z","") + "000"`
			`}`

Fix: Evernote .enex image import (#7785) * feat: add modifier info * feat: replace image and attachment with [img[] and [[]] * feat: import as wikitext tid * fix: a few resources don't have title * fix: use hash as random name for images * fix: Firefox's DOMParser have problem in some cases * fix: bad char in title, and useless xmlns * Update sample-enex-with-image.xml.enex * Update enex-deserializer.js * Update readme.tid * fix: some dont have modified 2023-10-15 11:40:38 +00:00			`function fixAttachmentReference(contentNode, md5Hash, mimeType, name) {`
			`if(!contentNode) return;`
			`var mediaNode = contentNode.querySelector('en-media[hash="' + md5Hash + '"]');`
			`if(!name) {`
			`throw new Error("name is empty for resource hash" + md5Hash);`
			`}`
			`if(!mediaNode) return;`
			`if(mimeType.indexOf("image/") === 0) {`
			`// find en-media node, replace with image syntax`
			`mediaNode.parentNode.replaceChild($tw.utils.domMaker("p", {text: "[img["+ name + "]]"}), mediaNode);`
			`} else {`
			`// For other than image attachments, we make a link to the tiddler`
			`mediaNode.parentNode.replaceChild($tw.utils.domMaker("p", {text: "[["+ name + "]]"}), mediaNode);`
			`}`
			`}`


First commit of Evernote migration plugin Starts to address #2268 2016-02-04 10:13:08 +00:00			`})();`