mirror of
https://github.com/Jermolene/TiddlyWiki5
synced 2024-11-23 10:07:19 +00:00
Fix incorrect base64 encoding of astral-plane text (#4813)
Most astral-plane text is emojis like U+1F4DA BOOKS (📚), but some
languages like Osage have their alphabet entirely in the supplementary
multilingual plane as well. For proper support of languages like Osage,
and newer emojis, the UTF-8 decode and encode functions need to properly
handle codepoints above U+FFFF, which are represented by a surrogate
pair in Javascript strings.
This commit is contained in:
parent
f74c49f393
commit
c23eedd069
@ -20,15 +20,27 @@
|
||||
for (i; i < string.length; i++) {
|
||||
charCode = string.charCodeAt(i);
|
||||
|
||||
if (charCode < 128)
|
||||
if (charCode < 128) {
|
||||
output += String.fromCharCode(charCode);
|
||||
else if ((charCode > 127) && (charCode < 2048))
|
||||
output += String.fromCharCode((charCode >> 6) | 192),
|
||||
} else if ((charCode > 127) && (charCode < 2048)) {
|
||||
output += String.fromCharCode((charCode >> 6) | 192);
|
||||
output += String.fromCharCode((charCode & 63) | 128);
|
||||
else
|
||||
output += String.fromCharCode((charCode >> 12) | 224),
|
||||
output += String.fromCharCode(((charCode >> 6) & 63) | 128),
|
||||
} else if ((charCode > 55295) && (charCode < 57344) && string.length > i+1) {
|
||||
// Surrogate pair
|
||||
var hiSurrogate = charCode;
|
||||
var loSurrogate = string.charCodeAt(i+1);
|
||||
i++; // Skip the low surrogate on the next loop pass
|
||||
var codePoint = (((hiSurrogate - 55296) << 10) | (loSurrogate - 56320)) + 65536;
|
||||
output += String.fromCharCode((codePoint >> 18) | 240);
|
||||
output += String.fromCharCode(((codePoint >> 12) & 63) | 128);
|
||||
output += String.fromCharCode(((codePoint >> 6) & 63) | 128);
|
||||
output += String.fromCharCode((codePoint & 63) | 128);
|
||||
} else {
|
||||
// Not a surrogate pair, or a dangling surrogate without its partner that we'll just encode as-is
|
||||
output += String.fromCharCode((charCode >> 12) | 224);
|
||||
output += String.fromCharCode(((charCode >> 6) & 63) | 128);
|
||||
output += String.fromCharCode((charCode & 63) | 128);
|
||||
}
|
||||
}
|
||||
|
||||
return output;
|
||||
@ -41,15 +53,21 @@
|
||||
while (i < string.length) {
|
||||
charCode = string.charCodeAt(i);
|
||||
|
||||
if (charCode < 128)
|
||||
if (charCode < 128) {
|
||||
output += String.fromCharCode(charCode),
|
||||
i++;
|
||||
else if ((charCode > 191) && (charCode < 224))
|
||||
output += String.fromCharCode(((charCode & 31) << 6) | (string.charCodeAt(i + 1) & 63)),
|
||||
} else if ((charCode > 191) && (charCode < 224)) {
|
||||
output += String.fromCharCode(((charCode & 31) << 6) | (string.charCodeAt(i + 1) & 63));
|
||||
i += 2;
|
||||
else
|
||||
output += String.fromCharCode(((charCode & 15) << 12) | ((string.charCodeAt(i + 1) & 63) << 6) | (string.charCodeAt(i + 2) & 63)),
|
||||
} else if ((charCode > 223) && (charCode < 240)) {
|
||||
output += String.fromCharCode(((charCode & 15) << 12) | ((string.charCodeAt(i + 1) & 63) << 6) | (string.charCodeAt(i + 2) & 63));
|
||||
i += 3;
|
||||
} else {
|
||||
var codePoint = ((charCode & 7) << 18) | ((string.charCodeAt(i + 1) & 63) << 12) | ((string.charCodeAt(i + 2) & 63) << 6) | (string.charCodeAt(i + 3) & 63);
|
||||
// output += String.fromCodePoint(codePoint); // Can't do this because Internet Explorer doesn't have String.fromCodePoint
|
||||
output += String.fromCharCode(((codePoint - 65536) >> 10) + 55296) + String.fromCharCode(((codePoint - 65536) & 1023) + 56320); // So we do this instead
|
||||
i += 4;
|
||||
}
|
||||
}
|
||||
|
||||
return output;
|
||||
|
@ -6,4 +6,4 @@
|
||||
//
|
||||
// copyright: MIT
|
||||
// author: Nijiko Yonskai, @nijikokun, nijikokun@gmail.com
|
||||
!function(r,e,o,t){void 0!==o.module&&o.module.exports?o.module.exports=e.apply(o):void 0!==o.define&&"function"===o.define&&o.define.amd?define("utf8",[],e):o.utf8=e.apply(o)}(0,function(){return{encode:function(r){if("string"!=typeof r)return r;r=r.replace(/\r\n/g,"\n");for(var e,o="",t=0;t<r.length;t++)(e=r.charCodeAt(t))<128?o+=String.fromCharCode(e):e>127&&e<2048?(o+=String.fromCharCode(e>>6|192),o+=String.fromCharCode(63&e|128)):(o+=String.fromCharCode(e>>12|224),o+=String.fromCharCode(e>>6&63|128),o+=String.fromCharCode(63&e|128));return o},decode:function(r){if("string"!=typeof r)return r;for(var e="",o=0,t=0;o<r.length;)(t=r.charCodeAt(o))<128?(e+=String.fromCharCode(t),o++):t>191&&t<224?(e+=String.fromCharCode((31&t)<<6|63&r.charCodeAt(o+1)),o+=2):(e+=String.fromCharCode((15&t)<<12|(63&r.charCodeAt(o+1))<<6|63&r.charCodeAt(o+2)),o+=3);return e}}},this),function(r,e,o,t){if(void 0!==o.module&&o.module.exports){if(t&&o.require)for(var n=0;n<t.length;n++)o[t[n]]=o.require(t[n]);o.module.exports=e.apply(o)}else void 0!==o.define&&"function"===o.define&&o.define.amd?define("base64",t||[],e):o.base64=e.apply(o)}(0,function(r){var e=r||this.utf8,o="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=";return{encode:function(r){if(void 0===e)throw{error:"MissingMethod",message:"UTF8 Module is missing."};if("string"!=typeof r)return r;r=e.encode(r);for(var t,n,i,d,f,a,h,c="",u=0;u<r.length;)d=(t=r.charCodeAt(u++))>>2,f=(3&t)<<4|(n=r.charCodeAt(u++))>>4,a=(15&n)<<2|(i=r.charCodeAt(u++))>>6,h=63&i,isNaN(n)?a=h=64:isNaN(i)&&(h=64),c+=o.charAt(d)+o.charAt(f)+o.charAt(a)+o.charAt(h);return c},decode:function(r){if(void 0===e)throw{error:"MissingMethod",message:"UTF8 Module is missing."};if("string"!=typeof r)return r;r=r.replace(/[^A-Za-z0-9\+\/\=]/g,"");for(var t,n,i,d,f,a,h="",c=0;c<r.length;)t=o.indexOf(r.charAt(c++))<<2|(d=o.indexOf(r.charAt(c++)))>>4,n=(15&d)<<4|(f=o.indexOf(r.charAt(c++)))>>2,i=(3&f)<<6|(a=o.indexOf(r.charAt(c++))),h+=String.fromCharCode(t),64!=f&&(h+=String.fromCharCode(n)),64!=a&&(h+=String.fromCharCode(i));return e.decode(h)}}},this,["utf8"]);
|
||||
!function(r,e,o,t){void 0!==o.module&&o.module.exports?o.module.exports=e.apply(o):void 0!==o.define&&"function"===o.define&&o.define.amd?define("utf8",[],e):o.utf8=e.apply(o)}(0,function(){return{encode:function(r){if("string"!=typeof r)return r;r=r.replace(/\r\n/g,"\n");for(var e,o="",t=0;t<r.length;t++)if((e=r.charCodeAt(t))<128)o+=String.fromCharCode(e);else if(e>127&&e<2048)o+=String.fromCharCode(e>>6|192),o+=String.fromCharCode(63&e|128);else if(e>55295&&e<57344&&r.length>t+1){var i=e,n=r.charCodeAt(t+1);t++;var d=65536+(i-55296<<10|n-56320);o+=String.fromCharCode(d>>18|240),o+=String.fromCharCode(d>>12&63|128),o+=String.fromCharCode(d>>6&63|128),o+=String.fromCharCode(63&d|128)}else o+=String.fromCharCode(e>>12|224),o+=String.fromCharCode(e>>6&63|128),o+=String.fromCharCode(63&e|128);return o},decode:function(r){if("string"!=typeof r)return r;for(var e="",o=0,t=0;o<r.length;)if((t=r.charCodeAt(o))<128)e+=String.fromCharCode(t),o++;else if(t>191&&t<224)e+=String.fromCharCode((31&t)<<6|63&r.charCodeAt(o+1)),o+=2;else if(t>223&&t<240)e+=String.fromCharCode((15&t)<<12|(63&r.charCodeAt(o+1))<<6|63&r.charCodeAt(o+2)),o+=3;else{var i=(7&t)<<18|(63&r.charCodeAt(o+1))<<12|(63&r.charCodeAt(o+2))<<6|63&r.charCodeAt(o+3);e+=String.fromCharCode(55296+(i-65536>>10))+String.fromCharCode(56320+(i-65536&1023)),o+=4}return e}}},this),function(r,e,o,t){if(void 0!==o.module&&o.module.exports){if(t&&o.require)for(var i=0;i<t.length;i++)o[t[i]]=o.require(t[i]);o.module.exports=e.apply(o)}else void 0!==o.define&&"function"===o.define&&o.define.amd?define("base64",t||[],e):o.base64=e.apply(o)}(0,function(r){var e=r||this.utf8,o="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=";return{encode:function(r){if(void 0===e)throw{error:"MissingMethod",message:"UTF8 Module is missing."};if("string"!=typeof r)return r;r=e.encode(r);for(var t,i,n,d,f,a,h,C="",c=0;c<r.length;)d=(t=r.charCodeAt(c++))>>2,f=(3&t)<<4|(i=r.charCodeAt(c++))>>4,a=(15&i)<<2|(n=r.charCodeAt(c++))>>6,h=63&n,isNaN(i)?a=h=64:isNaN(n)&&(h=64),C+=o.charAt(d)+o.charAt(f)+o.charAt(a)+o.charAt(h);return C},decode:function(r){if(void 0===e)throw{error:"MissingMethod",message:"UTF8 Module is missing."};if("string"!=typeof r)return r;r=r.replace(/[^A-Za-z0-9\+\/\=]/g,"");for(var t,i,n,d,f,a,h="",C=0;C<r.length;)t=o.indexOf(r.charAt(C++))<<2|(d=o.indexOf(r.charAt(C++)))>>4,i=(15&d)<<4|(f=o.indexOf(r.charAt(C++)))>>2,n=(3&f)<<6|(a=o.indexOf(r.charAt(C++))),h+=String.fromCharCode(t),64!=f&&(h+=String.fromCharCode(i)),64!=a&&(h+=String.fromCharCode(n));return e.decode(h)}}},this,["utf8"]);
|
@ -25,6 +25,19 @@ describe("Utility tests", function() {
|
||||
expect(psa(" [[Tidd\u00a0ler8]] two ")).toEqual(["Tidd\u00a0ler8","two"]);
|
||||
});
|
||||
|
||||
it("should handle base64 encoding emojis", function() {
|
||||
var booksEmoji = "📚";
|
||||
expect(booksEmoji).toBe(booksEmoji);
|
||||
// 📚 is U+1F4DA BOOKS, which is represented by surrogate pair 0xD83D 0xDCDA in Javascript
|
||||
expect(booksEmoji.length).toBe(2);
|
||||
expect(booksEmoji.charCodeAt(0)).toBe(55357); // 0xD83D
|
||||
expect(booksEmoji.charCodeAt(1)).toBe(56538); // 0xDCDA
|
||||
expect($tw.utils.base64Encode(booksEmoji)).not.toBe("7aC97bOa", "if base64 is 7aC97bOa then surrogate pairs were incorrectly treated as codepoints");
|
||||
expect($tw.utils.base64Encode(booksEmoji)).toBe("8J+Tmg==", "if surrogate pairs are correctly treated as a single code unit then base64 should be 8J+Tmg==");
|
||||
expect($tw.utils.base64Decode("8J+Tmg==")).toBe(booksEmoji);
|
||||
expect($tw.utils.base64Decode($tw.utils.base64Encode(booksEmoji))).toBe(booksEmoji, "should round-trip correctly");
|
||||
});
|
||||
|
||||
it("should handle stringifying a string array", function() {
|
||||
var str = $tw.utils.stringifyList;
|
||||
expect(str([])).toEqual("");
|
||||
|
Loading…
Reference in New Issue
Block a user