1
0
mirror of https://github.com/Jermolene/TiddlyWiki5 synced 2024-11-10 03:49:56 +00:00
TiddlyWiki5/plugins/tiddlywiki/evernote/modules/enex-deserializer.js
lin onetwo d3f5695601
Fix: Evernote .enex image import (#7785)
* feat: add modifier info

* feat: replace image and attachment with [img[] and [[]]

* feat: import as wikitext tid

* fix: a few resources don't have title

* fix: use hash as random name for images

* fix: Firefox's DOMParser have problem in some cases

* fix: bad char in title, and useless xmlns

* Update sample-enex-with-image.xml.enex

* Update enex-deserializer.js

* Update readme.tid

* fix: some dont have modified
2023-10-15 12:40:38 +01:00

140 lines
5.4 KiB
JavaScript

/*\
title: $:/plugins/tiddlywiki/evernote/modules/enex-deserializer.js
type: application/javascript
module-type: tiddlerdeserializer
ENEX file deserializer
For details see: https://blog.evernote.com/tech/2013/08/08/evernote-export-format-enex/
\*/
(function(){
/*jslint node: true, browser: true */
/*global $tw: false */
"use strict";
// DOMParser = require("$:/plugins/tiddlywiki/xmldom/dom-parser").DOMParser;
var illegalFilenameCharacters = /[\[\]<>;\:\"\/\\\|\?\*\^\?\$\(\)\s~]/g;
/*
Parse an ENEX file into tiddlers
*/
exports["application/enex+xml"] = function(text,fields) {
// Collect output tiddlers in an array
var results = [];
// Parse the XML document
var doc = new DOMParser().parseFromString(text,"application/xml");
// Output a report tiddler with information about the import
var enex = doc.querySelector("en-export");
if(!enex) {
// Firefox's DOMParser have problem in some cases.
throw new Error('Failed to parse ENEX file, no "en-export" node found, try use Chrome/Edge to export again.');
}
results.push({
title: "Evernote Import Report",
text: "Evernote file imported on " + enex.getAttribute("export-date") + " from " + enex.getAttribute("application") + " (" + enex.getAttribute("version") + ")"
})
// Get all the "note" nodes
var noteNodes = doc.querySelectorAll("note");
$tw.utils.each(noteNodes,function(noteNode) {
var noteTitle = getTextContent(noteNode,"title");
// get real note content node
var contentNode = noteNode.querySelector("content")
var contentText = (contentNode.textContent || "").replace(/&nbsp;/g, ' ').trim();
if(contentText) {
// The final content will be HTML instead of xml. And we will save it as wikitext, to make wiki syntax work, and remaining HTML will also work.
try {
// may error if content is not valid XML
contentNode = new DOMParser().parseFromString(contentText,"application/xml").querySelector("en-note") || contentNode;
} catch(e) {
// ignore
}
}
// process main content and metadata, and save as wikitext tiddler.
var noteResult = {
title: noteTitle.replace(illegalFilenameCharacters,"_"),
tags: [],
modified: convertDate(getTextContent(noteNode,"updated") || getTextContent(noteNode,"created")),
modifier: getTextContent(noteNode,"author"),
created: convertDate(getTextContent(noteNode,"created")),
creator: getTextContent(noteNode,"author")
};
// process resources (images, PDFs, etc.)
$tw.utils.each(noteNode.querySelectorAll("resource"),function(resourceNode) {
// hash generated by applying https://github.com/vzhd1701/evernote-backup/pull/54
var hash = resourceNode.querySelector("data").getAttribute("hash");
var text = getTextContent(resourceNode,"data");
var mimeType = getTextContent(resourceNode,"mime");
var contentTypeInfo = $tw.config.contentTypeInfo[mimeType] || {extension:""};
var title = getTextContent(resourceNode,"resource-attributes>file-name")
// a few resources don't have title, use hash as fallback
title = title || (hash + contentTypeInfo.extension);
// replace all system reserved characters in title
title = title.replace(illegalFilenameCharacters,"_");
// prefix image title with note title, to avoid name conflicts which is quite common in web-clipped content
title = noteResult.title + "/" + title;
results.push({
title: title,
type: mimeType,
width: getTextContent(resourceNode,"width"),
height: getTextContent(resourceNode,"height"),
text: text,
// give image same modified and modifier as the note, so they can be grouped together in the "Recent"
modified: noteResult.modified,
modifier: noteResult.modifier,
created: noteResult.created,
creator: noteResult.creator
});
if(hash) {
fixAttachmentReference(contentNode, hash, mimeType, title);
}
});
// export mixed content of wikitext and HTML
noteResult.text = contentNode.innerHTML;
// remove all ` xmlns="http://www.w3.org/1999/xhtml"` attributes to save some space
noteResult.text = noteResult.text.replace(/ xmlns="http:\/\/www.w3.org\/1999\/xhtml"/g, "");
$tw.utils.each(noteNode.querySelectorAll("tag"),function(tagNode) {
noteResult.tags.push(tagNode.textContent);
});
// If there's an update date, set modifiy date accordingly
var update = getTextContent(noteNode,"updated");
if(update) {
noteResult.modified = convertDate(update);
}
$tw.utils.each(noteNode.querySelectorAll("note-attributes>*"),function(attrNode) {
noteResult[attrNode.tagName] = attrNode.textContent;
});
results.push(noteResult);
});
// Return the output tiddlers
return results;
};
function getTextContent(node,selector) {
return (node.querySelector(selector) || {}).textContent || "";
}
function convertDate(isoDate) {
return (isoDate || "").replace("T","").replace("Z","") + "000"
}
function fixAttachmentReference(contentNode, md5Hash, mimeType, name) {
if(!contentNode) return;
var mediaNode = contentNode.querySelector('en-media[hash="' + md5Hash + '"]');
if(!name) {
throw new Error("name is empty for resource hash" + md5Hash);
}
if(!mediaNode) return;
if(mimeType.indexOf("image/") === 0) {
// find en-media node, replace with image syntax
mediaNode.parentNode.replaceChild($tw.utils.domMaker("p", {text: "[img["+ name + "]]"}), mediaNode);
} else {
// For other than image attachments, we make a link to the tiddler
mediaNode.parentNode.replaceChild($tw.utils.domMaker("p", {text: "[["+ name + "]]"}), mediaNode);
}
}
})();