Fix: Evernote .enex image import (#7785)

* feat: add modifier info

* feat: replace image and attachment with [img[] and [[]]

* feat: import as wikitext tid

* fix: a few resources don't have title

* fix: use hash as random name for images

* fix: Firefox's DOMParser have problem in some cases

* fix: bad char in title, and useless xmlns

* Update sample-enex-with-image.xml.enex

* Update enex-deserializer.js

* Update readme.tid

* fix: some dont have modified
This commit is contained in:
lin onetwo 2023-10-15 19:40:38 +08:00 committed by GitHub
parent efa4f34131
commit d3f5695601
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 85 additions and 25 deletions

View File

@ -15,6 +15,7 @@ For details see: https://blog.evernote.com/tech/2013/08/08/evernote-export-forma
"use strict";
// DOMParser = require("$:/plugins/tiddlywiki/xmldom/dom-parser").DOMParser;
var illegalFilenameCharacters = /[\[\]<>;\:\"\/\\\|\?\*\^\?\$\(\)\s~]/g;
/*
Parse an ENEX file into tiddlers
@ -23,10 +24,13 @@ exports["application/enex+xml"] = function(text,fields) {
// Collect output tiddlers in an array
var results = [];
// Parse the XML document
var parser = new DOMParser(),
doc = parser.parseFromString(text,"application/xml");
var doc = new DOMParser().parseFromString(text,"application/xml");
// Output a report tiddler with information about the import
var enex = doc.querySelector("en-export");
if(!enex) {
// Firefox's DOMParser have problem in some cases.
throw new Error('Failed to parse ENEX file, no "en-export" node found, try use Chrome/Edge to export again.');
}
results.push({
title: "Evernote Import Report",
text: "Evernote file imported on " + enex.getAttribute("export-date") + " from " + enex.getAttribute("application") + " (" + enex.getAttribute("version") + ")"
@ -34,47 +38,102 @@ exports["application/enex+xml"] = function(text,fields) {
// Get all the "note" nodes
var noteNodes = doc.querySelectorAll("note");
$tw.utils.each(noteNodes,function(noteNode) {
var result = {
title: getTextContent(noteNode,"title"),
type: "text/html",
var noteTitle = getTextContent(noteNode,"title");
// get real note content node
var contentNode = noteNode.querySelector("content")
var contentText = (contentNode.textContent || "").replace(/&nbsp;/g, ' ').trim();
if(contentText) {
// The final content will be HTML instead of xml. And we will save it as wikitext, to make wiki syntax work, and remaining HTML will also work.
try {
// may error if content is not valid XML
contentNode = new DOMParser().parseFromString(contentText,"application/xml").querySelector("en-note") || contentNode;
} catch(e) {
// ignore
}
}
// process main content and metadata, and save as wikitext tiddler.
var noteResult = {
title: noteTitle.replace(illegalFilenameCharacters,"_"),
tags: [],
text: getTextContent(noteNode,"content"),
modified: convertDate(getTextContent(noteNode,"created")),
created: convertDate(getTextContent(noteNode,"created"))
modified: convertDate(getTextContent(noteNode,"updated") || getTextContent(noteNode,"created")),
modifier: getTextContent(noteNode,"author"),
created: convertDate(getTextContent(noteNode,"created")),
creator: getTextContent(noteNode,"author")
};
// process resources (images, PDFs, etc.)
$tw.utils.each(noteNode.querySelectorAll("resource"),function(resourceNode) {
// hash generated by applying https://github.com/vzhd1701/evernote-backup/pull/54
var hash = resourceNode.querySelector("data").getAttribute("hash");
var text = getTextContent(resourceNode,"data");
var mimeType = getTextContent(resourceNode,"mime");
var contentTypeInfo = $tw.config.contentTypeInfo[mimeType] || {extension:""};
var title = getTextContent(resourceNode,"resource-attributes>file-name")
// a few resources don't have title, use hash as fallback
title = title || (hash + contentTypeInfo.extension);
// replace all system reserved characters in title
title = title.replace(illegalFilenameCharacters,"_");
// prefix image title with note title, to avoid name conflicts which is quite common in web-clipped content
title = noteResult.title + "/" + title;
results.push({
title: title,
type: mimeType,
width: getTextContent(resourceNode,"width"),
height: getTextContent(resourceNode,"height"),
text: text,
// give image same modified and modifier as the note, so they can be grouped together in the "Recent"
modified: noteResult.modified,
modifier: noteResult.modifier,
created: noteResult.created,
creator: noteResult.creator
});
if(hash) {
fixAttachmentReference(contentNode, hash, mimeType, title);
}
});
// export mixed content of wikitext and HTML
noteResult.text = contentNode.innerHTML;
// remove all ` xmlns="http://www.w3.org/1999/xhtml"` attributes to save some space
noteResult.text = noteResult.text.replace(/ xmlns="http:\/\/www.w3.org\/1999\/xhtml"/g, "");
$tw.utils.each(noteNode.querySelectorAll("tag"),function(tagNode) {
result.tags.push(tagNode.textContent);
noteResult.tags.push(tagNode.textContent);
});
// If there's an update date, set modifiy date accordingly
var update = getTextContent(noteNode,"updated");
if(update) {
result.modified = convertDate(update);
noteResult.modified = convertDate(update);
}
$tw.utils.each(noteNode.querySelectorAll("note-attributes>*"),function(attrNode) {
result[attrNode.tagName] = attrNode.textContent;
});
results.push(result);
$tw.utils.each(noteNode.querySelectorAll("resource"),function(resourceNode) {
results.push({
title: getTextContent(resourceNode,"resource-attributes>file-name"),
type: getTextContent(resourceNode,"mime"),
width: getTextContent(resourceNode,"width"),
height: getTextContent(resourceNode,"height"),
text: getTextContent(resourceNode,"data")
});
noteResult[attrNode.tagName] = attrNode.textContent;
});
results.push(noteResult);
});
// Return the output tiddlers
return results;
};
function getTextContent(node,selector) {
return (node.querySelector(selector) || {}).textContent;
return (node.querySelector(selector) || {}).textContent || "";
}
function convertDate(isoDate) {
return (isoDate || "").replace("T","").replace("Z","") + "000"
}
function fixAttachmentReference(contentNode, md5Hash, mimeType, name) {
if(!contentNode) return;
var mediaNode = contentNode.querySelector('en-media[hash="' + md5Hash + '"]');
if(!name) {
throw new Error("name is empty for resource hash" + md5Hash);
}
if(!mediaNode) return;
if(mimeType.indexOf("image/") === 0) {
// find en-media node, replace with image syntax
mediaNode.parentNode.replaceChild($tw.utils.domMaker("p", {text: "[img["+ name + "]]"}), mediaNode);
} else {
// For other than image attachments, we make a link to the tiddler
mediaNode.parentNode.replaceChild($tw.utils.domMaker("p", {text: "[["+ name + "]]"}), mediaNode);
}
}
})();

View File

@ -5,6 +5,7 @@ This plugin contains tool to assist migration of content from Evernote ENEX file
!! Instructions
# Download or save your ENEX file from Evernote
## Use [ext[evernote-backup|https://github.com/vzhd1701/evernote-backup]] to export ENEX file with resource hash, so images can be linked in the note
# Rename the file to have an `.enex` extension
# Drag the file into the TiddlyWiki browser window
## Alternatively, click the "Import" button in the "Tools" sidebar tab

File diff suppressed because one or more lines are too long