1
0
mirror of https://github.com/Jermolene/TiddlyWiki5 synced 2025-01-03 22:10:26 +00:00
TiddlyWiki5/plugins/tiddlywiki/twitter-archivist/archivist.js
jeremy@jermolene.com 06520c8994 Wikify hashtags
2022-11-14 08:51:18 +00:00

320 lines
11 KiB
JavaScript

/*\
title: $:/plugins/tiddlywiki/twitter-archivist/archivist.js
type: application/javascript
module-type: utils
Utility class for manipulating Twitter archives
\*/
(function(){
/*jslint node: true, browser: true */
/*global $tw: false */
"use strict";
function TwitterArchivist(options) {
options = options || {};
this.source = options.source;
}
TwitterArchivist.prototype.loadArchive = async function(options) {
options = options || {};
const wiki = options.wiki;
await this.source.init();
// Process the manifest and profile
const manifestData = await this.loadTwitterJsData("data/manifest.js","window.__THAR_CONFIG = ",""),
profileData = await this.loadTwitterJsData("data/profile.js","window.YTD.profile.part0 = ",""),
accountData = await this.loadTwitterJsData("data/account.js","window.YTD.account.part0 = ",""),
username = manifestData.userInfo.userName,
user_id = manifestData.userInfo.accountId;
wiki.addTiddler({
title: "Twitter Archive for @" + username,
icon: "$:/core/images/twitter",
color: "#1D9CF0",
tags: "$:/tags/TwitterArchive",
user_id: user_id,
username: username,
displayname: manifestData.userInfo.displayName,
generation_date: $tw.utils.stringifyDate(new Date(manifestData.archiveInfo.generationDate)),
account_created_date: $tw.utils.stringifyDate(new Date(accountData[0].account.createdAt)),
bio: profileData[0].profile.description.bio,
website: profileData[0].profile.description.website,
location: profileData[0].profile.description.location
});
// Process the media
await this.source.processFiles("data/tweets_media","base64",function(mediaItem) {
var ext = mediaItem.filename.split(".").slice(-1)[0];
if("jpg png".split(" ").indexOf(ext) !== -1) {
var extensionInfo = $tw.utils.getFileExtensionInfo("." + ext),
type = extensionInfo ? extensionInfo.type : null;
wiki.addTiddler({
title: "Tweet Media - " + mediaItem.filename,
tags: "$:/tags/TweetMedia",
status_id: mediaItem.filename.split("-")[0],
text: mediaItem.contents,
type: type
});
}
});
// Process the favourites
const likeData = await this.loadTwitterJsData("data/like.js","window.YTD.like.part0 = ","");
$tw.utils.each(likeData,function(like) {
// Create the tweet tiddler
var tiddler = {
title: "Tweet - " + like.like.tweetId,
text: "\\rules only html entity extlink\n" + (like.like.fullText || "").replace("\n","<br>"),
status_id: like.like.tweetId,
liked_by: user_id,
tags: "$:/tags/Tweet"
};
wiki.addTiddler(tiddler);
});
// Process the tweets
const tweetData = await this.loadTwitterJsData("data/tweets.js","window.YTD.tweets.part0 = ","");
$tw.utils.each(tweetData,function(tweet) {
// Compile the tags for the tweet
var tags = ["$:/tags/Tweet"];
// Accumulate the replacements/insertions to the text as an array of {startPos:,endPos:,fnTransform:}
var modifications = [];
// Modifications for mentions
var mentions = [];
$tw.utils.each(tweet.tweet.entities.user_mentions,function(mention) {
var title = "Tweeter - " + mention.id_str;
tags.push(title);
mentions.push(mention.id_str);
wiki.addTiddler({
title: title,
screenname: "@" + mention.screen_name,
tags: "$:/tags/Tweeter",
user_id: mention.id_str,
name: mention.name
});
modifications.push({
startPos: parseInt(mention.indices[0],10),
endPos: parseInt(mention.indices[1],10),
fnTransform: function(text) {
return "<$link to=\"" + title + "\">" +
$tw.utils.htmlEncode(text.substring(mention.indices[0],mention.indices[1])) +
"</$link>";
}
});
});
// Modifications for URLs
$tw.utils.each(tweet.tweet.entities.urls,function(urlInfo) {
modifications.push({
startPos: parseInt(urlInfo.indices[0],10),
endPos: parseInt(urlInfo.indices[1],10),
fnTransform: function(text) {
return "<a href=\"" + urlInfo.expanded_url + "\" rel=\"noopener noreferrer\" target=\"_blank\">" +
$tw.utils.htmlEncode(urlInfo.display_url) +
"</a>";
}
});
});
// Modifications for hashtags
$tw.utils.each(tweet.tweet.entities.hashtags,function(hashtag) {
var title = "#" + hashtag.text;
tags.push(title);
wiki.addTiddler({
title: title,
hashtag: hashtag.text,
tags: "$:/tags/Hashtag"
});
modifications.push({
startPos: parseInt(hashtag.indices[0],10),
endPos: parseInt(hashtag.indices[1],10),
fnTransform: function(text) {
return "<$link to=\"" + title + "\">" +
"#" + $tw.utils.htmlEncode(hashtag.text) +
"</$link>";
}
});
});
// Sort the modifications by start position
modifications.sort(function(a,b) {
return a.startPos - b.startPos;
});
// Apply the modifications in reverse order
var rawText = tweet.tweet.full_text,
posText = 0,
chunks = [];
$tw.utils.each(modifications,function(modification) {
// Process any text before the modification
if(modification.startPos > posText) {
chunks.push($tw.utils.htmlEncode(rawText.substring(posText,modification.startPos)));
}
// Process the modification
chunks.push(modification.fnTransform(rawText));
// Adjust the position
posText = modification.endPos;
});
// Process any remaining text
if(posText < rawText.length) {
chunks.push($tw.utils.htmlEncode(rawText.substring(posText)));
}
// Concatenate the chunks and replace newlines with <br>
var text = chunks.join("").replace("\n","<br>");
// Create the tweet tiddler
var tiddler = {
title: "Tweet - " + tweet.tweet.id_str,
text: "\\rules only html entity extlink\n" + text,
status_id: tweet.tweet.id_str,
user_id: user_id,
favorite_count: tweet.tweet.favorite_count,
retweet_count: tweet.tweet.retweet_count,
tags: tags,
created: $tw.utils.stringifyDate(new Date(tweet.tweet.created_at)),
modified: $tw.utils.stringifyDate(new Date(tweet.tweet.created_at))
};
if(tweet.tweet.in_reply_to_status_id_str) {
tiddler.in_reply_to_status_id = tweet.tweet.in_reply_to_status_id_str;
}
if(mentions.length > 0) {
tiddler.mention_user_ids = $tw.utils.stringifyList(mentions);
}
wiki.addTiddler(tiddler);
});
};
TwitterArchivist.prototype.loadTwitterJsData = async function(filePath,prefix,suffix) {
var tweetFileData = await this.source.loadTwitterJsData(filePath);
if(prefix) {
if(tweetFileData.slice(0,prefix.length) !== prefix) {
throw "Reading Twitter JS file " + filePath + " missing prefix '" + prefix + "'";
}
tweetFileData = tweetFileData.slice(prefix.length);
}
if(suffix) {
if(tweetFileData.slice(-suffix.length) !== suffix) {
throw "Reading Twitter JS file " + filePath + " missing suffix '" + suffix + "'";
}
tweetFileData = tweetFileData.slice(0,tweetFileData.length - suffix.length);
}
return JSON.parse(tweetFileData);
};
function TwitterArchivistSourceNodeJs(options) {
options = options || {};
this.archivePath = options.archivePath;
}
TwitterArchivistSourceNodeJs.prototype.init = async function() {
};
TwitterArchivistSourceNodeJs.prototype.processFiles = async function(dirPath,encoding,callback) {
var fs = require("fs"),
path = require("path"),
dirPath = path.resolve(this.archivePath,dirPath),
filenames = fs.readdirSync(dirPath);
$tw.utils.each(filenames,function(filename) {
callback({
filename: filename,
contents: fs.readFileSync(path.resolve(dirPath,filename),encoding)
});
});
};
TwitterArchivistSourceNodeJs.prototype.loadTwitterJsData = async function(filePath) {
var fs = require("fs"),
path = require("path");
return fs.readFileSync(path.resolve(this.archivePath,filePath),"utf8");
};
function TwitterArchivistSourceBrowser(options) {
options = options || {};
}
TwitterArchivistSourceBrowser.prototype.init = async function() {
// Open directory
this.rootDirHandle = await window.showDirectoryPicker();
};
TwitterArchivistSourceBrowser.prototype.processFiles = async function(dirPath,encoding,callback) {
const dirHandle = await this.walkDirectory(dirPath.split("/"));
for await (const [filename, fileHandle] of dirHandle.entries()) {
const contents = await fileHandle.getFile();
callback({
filename: filename,
contents: arrayBufferToBase64(await contents.arrayBuffer())
});
}
};
TwitterArchivistSourceBrowser.prototype.loadTwitterJsData = async function(filePath) {
const filePathParts = filePath.split("/");
const dirHandle = await this.walkDirectory(filePathParts.slice(0,-1));
const fileHandle = await dirHandle.getFileHandle(filePathParts.slice(-1)[0]);
const contents = await fileHandle.getFile();
return await contents.text();
};
TwitterArchivistSourceBrowser.prototype.walkDirectory = async function(arrayDirectoryEntries) {
var entries = arrayDirectoryEntries.slice(0),
dirHandle = this.rootDirHandle;
while(entries.length > 0) {
dirHandle = await dirHandle.getDirectoryHandle(entries[0]);
entries.shift();
}
return dirHandle;
};
// Thanks to MatheusFelipeMarinho
// https://github.com/MatheusFelipeMarinho/venom/blob/43ead0bfffa57a536a5cff67dd909e55da9f0915/src/lib/wapi/helper/array-buffer-to-base64.js#L55
function arrayBufferToBase64(arrayBuffer) {
var base64 = '';
var encodings =
'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/';
var bytes = new Uint8Array(arrayBuffer);
var byteLength = bytes.byteLength;
var byteRemainder = byteLength % 3;
var mainLength = byteLength - byteRemainder;
var a, b, c, d;
var chunk;
// Main loop deals with bytes in chunks of 3
for (var i = 0; i < mainLength; i = i + 3) {
// Combine the three bytes into a single integer
chunk = (bytes[i] << 16) | (bytes[i + 1] << 8) | bytes[i + 2];
// Use bitmasks to extract 6-bit segments from the triplet
a = (chunk & 16515072) >> 18; // 16515072 = (2^6 - 1) << 18
b = (chunk & 258048) >> 12; // 258048 = (2^6 - 1) << 12
c = (chunk & 4032) >> 6; // 4032 = (2^6 - 1) << 6
d = chunk & 63; // 63 = 2^6 - 1
// Convert the raw binary segments to the appropriate ASCII encoding
base64 += encodings[a] + encodings[b] + encodings[c] + encodings[d];
}
// Deal with the remaining bytes and padding
if (byteRemainder == 1) {
chunk = bytes[mainLength];
a = (chunk & 252) >> 2; // 252 = (2^6 - 1) << 2
// Set the 4 least significant bits to zero
b = (chunk & 3) << 4; // 3 = 2^2 - 1
base64 += encodings[a] + encodings[b] + '==';
} else if (byteRemainder == 2) {
chunk = (bytes[mainLength] << 8) | bytes[mainLength + 1];
a = (chunk & 64512) >> 10; // 64512 = (2^6 - 1) << 10
b = (chunk & 1008) >> 4; // 1008 = (2^6 - 1) << 4
// Set the 2 least significant bits to zero
c = (chunk & 15) << 2; // 15 = 2^4 - 1
base64 += encodings[a] + encodings[b] + encodings[c] + '=';
}
return base64;
}
exports.TwitterArchivist = TwitterArchivist;
exports.TwitterArchivistSourceNodeJs = TwitterArchivistSourceNodeJs;
exports.TwitterArchivistSourceBrowser = TwitterArchivistSourceBrowser;
})();