Fix encodebase64 and decodebase64 filters (#7683)

* Fix encodebase64 and decodebase64 filters

The documentation for encodebase64 says that the input is treated as
binary data, but in fact the input is being treated as text data, with
an extra UTF-8 encoding step being performed first.

Likewise, the decodebase64 documentation says that it outputs binary
data, but in fact it will do a UTF-8 decoding step before producing
output, which will in fact garble binary data.

This commit changes the behavior of encodebase64 and decodebase64 to
match what the documentation says they do. It also adds an optional
`text` suffix to both filters to keep the current behavior.

Finally, an optional `urlsafe` suffix is added to both filters to allow
them to use the "URL-safe" variant of base64 (using `-` instead of `+`
and `_` instead of `/`).

* Try to fix failing test

Turns out a little more than this is going to be needed.

* Fix binary base64 encoding, including unit tests

* Update base64 filter documentation

* Can't use replaceAll, too new

Have to use String.replace with a global regex instead

* Replace uses of window.btoa() in rest of code

Since window.btoa() is not available under Node.js, we'll replace all
uses of it with the $tw.utils.base64encode() function that now works
correctly for binary data.

* Add link to UTF-8 glossary definition at MDN
This commit is contained in:
Robin Munn 2023-10-18 22:08:56 +07:00 committed by GitHub
parent c185e373c5
commit 326ae61929
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 70 additions and 10 deletions

View File

@ -18,16 +18,20 @@ Export our filter functions
exports.decodebase64 = function(source,operator,options) {
var results = [];
var binary = operator.suffixes && operator.suffixes.indexOf("binary") !== -1;
var urlsafe = operator.suffixes && operator.suffixes.indexOf("urlsafe") !== -1;
source(function(tiddler,title) {
results.push($tw.utils.base64Decode(title));
results.push($tw.utils.base64Decode(title,binary,urlsafe));
});
return results;
};
exports.encodebase64 = function(source,operator,options) {
var results = [];
var binary = operator.suffixes && operator.suffixes.indexOf("binary") !== -1;
var urlsafe = operator.suffixes && operator.suffixes.indexOf("urlsafe") !== -1;
source(function(tiddler,title) {
results.push($tw.utils.base64Encode(title));
results.push($tw.utils.base64Encode(title,binary,urlsafe));
});
return results;
};

View File

@ -31,7 +31,7 @@ GitHubSaver.prototype.save = function(text,method,callback) {
headers = {
"Accept": "application/vnd.github.v3+json",
"Content-Type": "application/json;charset=UTF-8",
"Authorization": "Basic " + window.btoa(username + ":" + password),
"Authorization": "Basic " + $tw.utils.base64Encode(username + ":" + password),
"If-None-Match": ""
};
// Bail if we don't have everything we need

View File

@ -187,7 +187,7 @@ HttpClientRequest.prototype.send = function(callback) {
for (var i=0; i<len; i++) {
binary += String.fromCharCode(bytes[i]);
}
resultVariables.data = window.btoa(binary);
resultVariables.data = $tw.utils.base64Encode(binary,true);
}
self.wiki.addTiddler(new $tw.Tiddler(self.wiki.getTiddler(requestTrackerTitle),{
status: completionCode,

View File

@ -819,18 +819,41 @@ exports.hashString = function(str) {
},0);
};
/*
Base64 utility functions that work in either browser or Node.js
*/
if(typeof window !== 'undefined') {
exports.btoa = window.btoa;
exports.atob = window.atob;
} else {
exports.btoa = function(binstr) {
return Buffer.from(binstr, 'binary').toString('base64');
}
exports.atob = function(b64) {
return Buffer.from(b64, 'base64').toString('binary');
}
}
/*
Decode a base64 string
*/
exports.base64Decode = function(string64) {
return base64utf8.base64.decode.call(base64utf8,string64);
exports.base64Decode = function(string64,binary,urlsafe) {
var encoded = urlsafe ? string64.replace(/_/g,'/').replace(/-/g,'+') : string64;
if(binary) return exports.atob(encoded)
else return base64utf8.base64.decode.call(base64utf8,encoded);
};
/*
Encode a string to base64
*/
exports.base64Encode = function(string64) {
return base64utf8.base64.encode.call(base64utf8,string64);
exports.base64Encode = function(string64,binary,urlsafe) {
var encoded;
if(binary) encoded = exports.btoa(string64);
else encoded = base64utf8.base64.encode.call(base64utf8,string64);
if(urlsafe) {
encoded = encoded.replace(/\+/g,'-').replace(/\//g,'_');
}
return encoded;
};
/*

View File

@ -48,6 +48,29 @@ describe("Utility tests", function() {
expect($tw.utils.base64Decode($tw.utils.base64Encode(booksEmoji))).toBe(booksEmoji, "should round-trip correctly");
});
it("should handle base64 encoding emojis in URL-safe variant", function() {
var booksEmoji = "📚";
expect($tw.utils.base64Encode(booksEmoji, false, true)).toBe("8J-Tmg==", "if surrogate pairs are correctly treated as a single code unit then base64 should be 8J+Tmg==");
expect($tw.utils.base64Decode("8J-Tmg==", false, true)).toBe(booksEmoji);
expect($tw.utils.base64Decode($tw.utils.base64Encode(booksEmoji, false, true), false, true)).toBe(booksEmoji, "should round-trip correctly");
});
it("should handle base64 encoding binary data", function() {
var binaryData = "\xff\xfe\xfe\xff";
var encoded = $tw.utils.base64Encode(binaryData,true);
expect(encoded).toBe("//7+/w==");
var decoded = $tw.utils.base64Decode(encoded,true);
expect(decoded).toBe(binaryData, "Binary data did not round-trip correctly");
});
it("should handle base64 encoding binary data in URL-safe variant", function() {
var binaryData = "\xff\xfe\xfe\xff";
var encoded = $tw.utils.base64Encode(binaryData,true,true);
expect(encoded).toBe("__7-_w==");
var decoded = $tw.utils.base64Decode(encoded,true,true);
expect(decoded).toBe(binaryData, "Binary data did not round-trip correctly");
});
it("should handle stringifying a string array", function() {
var str = $tw.utils.stringifyList;
expect(str([])).toEqual("");

View File

@ -1,6 +1,7 @@
caption: decodebase64
op-input: a [[selection of titles|Title Selection]]
op-output: the input with base 64 decoding applied
op-suffix: optional: `binary` to produce binary output, `urlsafe` for URL-safe input
op-parameter:
op-parameter-name:
op-purpose: apply base 64 decoding to a string
@ -11,6 +12,10 @@ from-version: 5.2.6
See Mozilla Developer Network for details of [[base 64 encoding|https://developer.mozilla.org/en-US/docs/Glossary/Base64]]. TiddlyWiki uses [[library code from @nijikokun|https://gist.github.com/Nijikokun/5192472]] to handle the conversion.
The input strings must be base64 encoded. The output strings are binary data.
The input strings must be base64 encoded. The output strings are the text (or binary data) decoded from base64 format.
The optional `binary` suffix, if present, changes how the input is processed. The input is normally assumed to be [[UTF-8|https://developer.mozilla.org/en-US/docs/Glossary/UTF-8]] text encoded in base64 form (such as what the <<.op "encodebase64">> operator produces), so only certain byte sequences in the input are valid. If the input is binary data encoded in base64 format (such as an image, audio file, video file, etc.), then use the optional `binary` suffix, which will allow all byte sequences. Note that the output will then be binary, ''not'' text, and should probably not be passed into further filter operators.
The optional `urlsafe` suffix, if present, causes the decoder to assume that the base64 input uses `-` and `_` instead of `+` and `/` for the 62nd and 63rd characters of the base64 "alphabet", which is usually referred to as "URL-safe base64" or "bae64url".
<<.operator-examples "decodebase64">>

View File

@ -1,6 +1,7 @@
caption: encodebase64
op-input: a [[selection of titles|Title Selection]]
op-output: the input with base 64 encoding applied
op-suffix: optional: `binary` to treat input as binary data, `urlsafe` for URL-safe output
op-parameter:
op-parameter-name:
op-purpose: apply base 64 encoding to a string
@ -11,6 +12,10 @@ from-version: 5.2.6
See Mozilla Developer Network for details of [[base 64 encoding|https://developer.mozilla.org/en-US/docs/Glossary/Base64]]. TiddlyWiki uses [[library code from @nijikokun|https://gist.github.com/Nijikokun/5192472]] to handle the conversion.
The input strings are interpreted as binary data. The output strings are base64 encoded.
The input strings are interpreted as [[UTF-8 encoded|https://developer.mozilla.org/en-US/docs/Glossary/UTF-8]] text (or binary data instead if the `binary` suffix is present). The output strings are base64 encoded.
The optional `binary` suffix, if present, causes the input string to be interpreted as binary data instead of text. Normally, an extra UTF-8 encoding step will be added before the base64 output is produced, so that emojis and other Unicode characters will be encoded correctly. If the input is binary data, such as an image, audio file, video, etc., then the UTF-8 encoding step would produce incorrect results, so using the `binary` suffix causes the UTF-8 encoding step to be skipped.
The optional `urlsafe` suffix, if present, will use the alternate "URL-safe" base64 encoding, where `-` and `_` are used instead of `+` and `/` respectively, allowing the result to be used in URL query parameters or filenames.
<<.operator-examples "encodebase64">>