Fix: Evernote .enex image import (#7785)

* feat: add modifier info * feat: replace image and attachment with [img[] and [[]] * feat: import as wikitext tid * fix: a few resources don't have title * fix: use hash as random name for images * fix: Firefox's DOMParser have problem in some cases * fix: bad char in title, and useless xmlns * Update sample-enex-with-image.xml.enex * Update enex-deserializer.js * Update readme.tid * fix: some dont have modified
2025-12-09 18:28:07 +00:00 · 2023-10-15 19:40:38 +08:00
parent efa4f34131
commit d3f5695601
3 changed files with 85 additions and 25 deletions
--- a/plugins/tiddlywiki/evernote/modules/enex-deserializer.js
+++ b/plugins/tiddlywiki/evernote/modules/enex-deserializer.js
@@ -15,6 +15,7 @@ For details see: https://blog.evernote.com/tech/2013/08/08/evernote-export-forma
 "use strict";

 // DOMParser = require("$:/plugins/tiddlywiki/xmldom/dom-parser").DOMParser;
+var illegalFilenameCharacters = /[\[\]<>;\:\"\/\\\|\?\*\^\?\$\(\)\s~]/g;

 /*
 Parse an ENEX file into tiddlers
@@ -23,10 +24,13 @@ exports["application/enex+xml"] = function(text,fields) {
 	// Collect output tiddlers in an array
 	var results = [];
 	// Parse the XML document
-	var parser = new DOMParser(),
-		doc = parser.parseFromString(text,"application/xml");
+	var doc = new DOMParser().parseFromString(text,"application/xml");
 	// Output a report tiddler with information about the import
 	var enex = doc.querySelector("en-export");
+	if(!enex) {
+		// Firefox's DOMParser have problem in some cases.
+		throw new Error('Failed to parse ENEX file, no "en-export" node found, try use Chrome/Edge to export again.');
+	}
 	results.push({
 		title: "Evernote Import Report",
 		text: "Evernote file imported on " + enex.getAttribute("export-date") + " from " + enex.getAttribute("application") + " (" + enex.getAttribute("version") + ")"
@@ -34,47 +38,102 @@ exports["application/enex+xml"] = function(text,fields) {
 	// Get all the "note" nodes
 	var noteNodes = doc.querySelectorAll("note");
 	$tw.utils.each(noteNodes,function(noteNode) {
-		var result = {
-			title: getTextContent(noteNode,"title"),
-			type: "text/html",
+		var noteTitle = getTextContent(noteNode,"title");
+		// get real note content node
+		var contentNode = noteNode.querySelector("content")
+		var contentText = (contentNode.textContent || "").replace(/&nbsp;/g, ' ').trim();
+		if(contentText) {
+			// The final content will be HTML instead of xml. And we will save it as wikitext, to make wiki syntax work, and remaining HTML will also work.
+			try {
+				// may error if content is not valid XML
+				contentNode =	new DOMParser().parseFromString(contentText,"application/xml").querySelector("en-note") || contentNode;
+			} catch(e) {
+				// ignore
+			}
+		}
+		// process main content and metadata, and save as wikitext tiddler.
+		var noteResult = {
+			title: noteTitle.replace(illegalFilenameCharacters,"_"),
 			tags: [],
-			text: getTextContent(noteNode,"content"),
-			modified: convertDate(getTextContent(noteNode,"created")),
-			created:  convertDate(getTextContent(noteNode,"created"))
-
+			modified: convertDate(getTextContent(noteNode,"updated") || getTextContent(noteNode,"created")),
+			modifier: getTextContent(noteNode,"author"),
+			created:  convertDate(getTextContent(noteNode,"created")),
+			creator: getTextContent(noteNode,"author")
 		};
+		// process resources (images, PDFs, etc.)
+		$tw.utils.each(noteNode.querySelectorAll("resource"),function(resourceNode) {
+			// hash generated by applying https://github.com/vzhd1701/evernote-backup/pull/54
+			var hash = resourceNode.querySelector("data").getAttribute("hash");
+			var text = getTextContent(resourceNode,"data");
+			var mimeType = getTextContent(resourceNode,"mime");
+			var contentTypeInfo = $tw.config.contentTypeInfo[mimeType] || {extension:""};
+			var title = getTextContent(resourceNode,"resource-attributes>file-name")
+			// a few resources don't have title, use hash as fallback
+			title = title || (hash + contentTypeInfo.extension);
+			// replace all system reserved characters in title
+			title = title.replace(illegalFilenameCharacters,"_");
+			// prefix image title with note title, to avoid name conflicts which is quite common in web-clipped content
+			title = noteResult.title + "/" + title;
+			results.push({
+				title: title,
+				type: mimeType,
+				width: getTextContent(resourceNode,"width"),
+				height: getTextContent(resourceNode,"height"),
+				text: text,
+				// give image same modified and modifier as the note, so they can be grouped together in the "Recent"
+				modified: noteResult.modified,
+				modifier: noteResult.modifier,
+				created: noteResult.created,
+				creator: noteResult.creator
+			});
+			if(hash) {
+				fixAttachmentReference(contentNode, hash, mimeType, title);
+			}
+		});
+		// export mixed content of wikitext and HTML
+		noteResult.text = contentNode.innerHTML;
+		// remove all ` xmlns="http://www.w3.org/1999/xhtml"` attributes to save some space
+		noteResult.text = noteResult.text.replace(/ xmlns="http:\/\/www.w3.org\/1999\/xhtml"/g, "");
 		$tw.utils.each(noteNode.querySelectorAll("tag"),function(tagNode) {
-			result.tags.push(tagNode.textContent);
+			noteResult.tags.push(tagNode.textContent);
 		});
 		// If there's an update date, set modifiy date accordingly
 		var update = getTextContent(noteNode,"updated");
 		if(update) {
-			result.modified = convertDate(update);
+			noteResult.modified = convertDate(update);
 		}
 		$tw.utils.each(noteNode.querySelectorAll("note-attributes>*"),function(attrNode) {
-			result[attrNode.tagName] = attrNode.textContent;
-		});
-		results.push(result);
-		$tw.utils.each(noteNode.querySelectorAll("resource"),function(resourceNode) {
-			results.push({
-				title: getTextContent(resourceNode,"resource-attributes>file-name"),
-				type: getTextContent(resourceNode,"mime"),
-				width: getTextContent(resourceNode,"width"),
-				height: getTextContent(resourceNode,"height"),
-				text: getTextContent(resourceNode,"data")
-			});
+			noteResult[attrNode.tagName] = attrNode.textContent;
 		});
+		results.push(noteResult);
 	});
 	// Return the output tiddlers
 	return results;
 };

 function getTextContent(node,selector) {
-	return (node.querySelector(selector) || {}).textContent;
+	return (node.querySelector(selector) || {}).textContent || "";
 }

 function convertDate(isoDate) {
 	return (isoDate || "").replace("T","").replace("Z","") + "000"
 }

+function fixAttachmentReference(contentNode, md5Hash, mimeType, name) {
+	if(!contentNode) return;
+	var mediaNode = contentNode.querySelector('en-media[hash="' + md5Hash + '"]');
+	if(!name) {
+		throw new Error("name is empty for resource hash" + md5Hash);
+	}
+	if(!mediaNode) return;
+	if(mimeType.indexOf("image/") === 0) {
+		// find en-media node, replace with image syntax
+		mediaNode.parentNode.replaceChild($tw.utils.domMaker("p", {text: "[img["+ name + "]]"}), mediaNode);
+	} else {
+		// For other than image attachments, we make a link to the tiddler
+		mediaNode.parentNode.replaceChild($tw.utils.domMaker("p", {text: "[["+ name + "]]"}), mediaNode);
+	}
+}
+
+
 })();
--- a/plugins/tiddlywiki/evernote/readme.tid
+++ b/plugins/tiddlywiki/evernote/readme.tid
@@ -5,6 +5,7 @@ This plugin contains tool to assist migration of content from Evernote ENEX file
 !! Instructions

 # Download or save your ENEX file from Evernote
+## Use [ext[evernote-backup|https://github.com/vzhd1701/evernote-backup]] to export ENEX file with resource hash, so images can be linked in the note
 # Rename the file to have an `.enex` extension
 # Drag the file into the TiddlyWiki browser window
 ## Alternatively, click the "Import" button in the "Tools" sidebar tab
--- a/plugins/tiddlywiki/evernote/samples/sample-enex-with-image.xml.enex
+++ b/plugins/tiddlywiki/evernote/samples/sample-enex-with-image.xml.enex