1
0
mirror of https://github.com/Jermolene/TiddlyWiki5 synced 2024-11-27 03:57:21 +00:00

Add rawunicode suffix to (json)stringify operators (#5191)

Default with no suffix is pre-5.1.23 behavior, escaping all Unicode
characters for maximum compatibility (avoids encoding issues). New
"rawunicode" suffix allows passing through Unicode characters U+0080
and up unchanged, for cases where you know your tools are handling
encoding correctly and you want less verbose escaping.
This commit is contained in:
Robin Munn 2020-12-04 20:19:59 +07:00 committed by GitHub
parent c92f9dd404
commit 8ffe138942
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 46 additions and 25 deletions

View File

@ -77,7 +77,7 @@ exports.encodehtml = function(source,operator,options) {
exports.stringify = function(source,operator,options) {
var results = [];
source(function(tiddler,title) {
results.push($tw.utils.stringify(title));
results.push($tw.utils.stringify(title,(operator.suffix === "rawunicode")));
});
return results;
};
@ -85,7 +85,7 @@ exports.stringify = function(source,operator,options) {
exports.jsonstringify = function(source,operator,options) {
var results = [];
source(function(tiddler,title) {
results.push($tw.utils.jsonStringify(title));
results.push($tw.utils.jsonStringify(title,(operator.suffix === "rawunicode")));
});
return results;
};

View File

@ -564,7 +564,7 @@ exports.escape = function(ch) {
// Turns a string into a legal JavaScript string
// Copied from peg.js, thanks to David Majda
exports.stringify = function(s) {
exports.stringify = function(s, rawUnicode) {
/*
* ECMA-262, 5th ed., 7.8.4: All characters may appear literally in a string
* literal except for the closing quote character, backslash, carriage return,
@ -573,19 +573,21 @@ exports.stringify = function(s) {
*
* For portability, we also escape all non-ASCII characters.
*/
var regex = rawUnicode ? /[\x00-\x1f]/g : /[\x00-\x1f\x80-\uFFFF]/g;
return (s || "")
.replace(/\\/g, '\\\\') // backslash
.replace(/"/g, '\\"') // double quote character
.replace(/'/g, "\\'") // single quote character
.replace(/\r/g, '\\r') // carriage return
.replace(/\n/g, '\\n') // line feed
.replace(/[\x00-\x1f\x80-\uFFFF]/g, exports.escape); // non-ASCII characters
.replace(regex, exports.escape); // non-ASCII characters
};
// Turns a string into a legal JSON string
// Derived from peg.js, thanks to David Majda
exports.jsonStringify = function(s) {
exports.jsonStringify = function(s, rawUnicode) {
// See http://www.json.org/
var regex = rawUnicode ? /[\x00-\x1f]/g : /[\x00-\x1f\x80-\uFFFF]/g;
return (s || "")
.replace(/\\/g, '\\\\') // backslash
.replace(/"/g, '\\"') // double quote character
@ -594,7 +596,7 @@ exports.jsonStringify = function(s) {
.replace(/\x08/g, '\\b') // backspace
.replace(/\x0c/g, '\\f') // formfeed
.replace(/\t/g, '\\t') // tab
.replace(/[\x00-\x1f\x80-\uFFFF]/g,function(s) {
.replace(regex,function(s) {
return '\\u' + $tw.utils.pad(s.charCodeAt(0).toString(16).toUpperCase(),4);
}); // non-ASCII characters
};

View File

@ -5,3 +5,5 @@ title: jsonstringify Operator (Examples)
type: text/vnd.tiddlywiki
<<.operator-example 1 """[[Title with "double quotes" and single ' and \backslash]] +[jsonstringify[]]""">>
<<.operator-example 2 """[[Accents and emojis -> äñøßπ ⌛🎄🍪🍓 without suffix]] +[jsonstringify[]]""">>
<<.operator-example 3 """[[Accents and emojis -> äñøßπ ⌛🎄🍪🍓 with rawunicode suffix]] +[jsonstringify:rawunicode[]]""">>

View File

@ -5,3 +5,5 @@ title: stringify Operator (Examples)
type: text/vnd.tiddlywiki
<<.operator-example 1 """[[Title with "double quotes" and single ' and \backslash]] +[stringify[]]""">>
<<.operator-example 2 """[[Accents and emojis -> äñøßπ ⌛🎄🍪🍓 without suffix]] +[stringify[]]""">>
<<.operator-example 3 """[[Accents and emojis -> äñøßπ ⌛🎄🍪🍓 with rawunicode suffix]] +[stringify:rawunicode[]]""">>

View File

@ -7,22 +7,29 @@ op-output: the input with JSON string encodings applied
op-parameter:
op-parameter-name:
op-purpose: apply JSON string encoding to a string
op-suffix: <<.from-version "5.1.23">> optionally, the keyword `rawunicode`
op-suffix-name: R
tags: [[Filter Operators]] [[String Operators]]
title: jsonstringify Operator
type: text/vnd.tiddlywiki
The following substitutions are made:
|!Character |!Replacement |
|`\` |`\\` |
|`"` |`\"` |
|`\r` (carriage return) |`\\r` |
|`\n` (line feed) |`\\n` |
|`\x08` (backspace) |`\\b` |
|`\x0c` (form field) |`\\f` |
|`\t` (tab) |`\\t` |
|Characters from 0x00 to 0x1f |`\\x##` where ## is two hex digits |
|Characters from 0x80 to 0xffff |`\\u####` where #### is four hex digits |
|!Character |!Replacement |!Condition |
|`\` |`\\` |Always |
|`"` |`\"` |Always |
|Carriage return (0x0d) |`\\r` |Always |
|Line feed (0x0a) |`\\n` |Always |
|Backspace (0x08) |`\\b` |Always |
|Form field (0x0c) |`\\f` |Always |
|Tab (0x09) |`\\t` |Always |
|Characters from 0x00 to 0x1f |`\\u####` where #### is four hex digits |Always |
|Characters from 0x80 to 0xffff|`\\u####` where #### is four hex digits |If `rawunicode` suffix is not present (default) |
|Characters from 0x80 to 0xffff|Unchanged |If `rawunicode` suffix is present <<.from-version "5.1.23">> |
<<.from-version "5.1.23">> If the suffix `rawunicode` is present, Unicode characters above 0x80 (such as ß, ä, ñ or 🎄) will be passed through unchanged. Without the suffix, they will be substituted with `\\u` codes, which was the default behavior before 5.1.23.
<<.note """Technical note: Characters outside the Basic Multilingual Plane, such as 🎄 and other emojis, will be encoded as a UTF-16 surrogate pair, i.e. with two `\u` sequences.""">>
Also see the [[stringify Operator]].

View File

@ -6,6 +6,8 @@ op-output: the input with ~JavaScript string encodings applied
op-parameter:
op-parameter-name:
op-purpose: apply ~JavaScript string encoding to a string
op-suffix: <<.from-version "5.1.23">> optionally, the keyword `rawunicode`
op-suffix-name: R
tags: [[Filter Operators]] [[String Operators]]
title: stringify Operator
type: text/vnd.tiddlywiki
@ -13,15 +15,21 @@ from-version: 5.1.14
The following substitutions are made:
|!Character |!Replacement |
|`\` |`\\` |
|`"` |`\"` |
|`\r` (carriage return) |`\\r` |
|`\n` (line feed) |`\\n` |
|`\x08` (backspace) |`\\b` |
|`\x0c` (form feed) |`\\f` |
|`\t` (tab) |`\\t` |
|Characters from 0x00 to 0x1f and characters from 0x80 to 0xffff |`\\u####` where #### is four hex digits |
|!Character |!Replacement |!Condition |
|`\` |`\\` |Always |
|`"` |`\"` |Always |
|Carriage return (0x0d) |`\\r` |Always |
|Line feed (0x0a) |`\\n` |Always |
|Backspace (0x08) |`\\b` |Always |
|Form field (0x0c) |`\\f` |Always |
|Tab (0x09) |`\\t` |Always |
|Characters from 0x00 to 0x1f |`\\x##` where ## is two hex digits |Always |
|Characters from 0x80 to 0xffff|`\\u####` where #### is four hex digits |If `rawunicode` suffix is not present (default) |
|Characters from 0x80 to 0xffff|<<.from-version "5.1.23">> Unchanged |If `rawunicode` suffix is present |
<<.from-version "5.1.23">> If the suffix `rawunicode` is present, Unicode characters above 0x80 (such as ß, ä, ñ or 🎄) will be passed through unchanged. Without the suffix, they will be substituted with `\\u` codes, which was the default behavior before 5.1.23.
<<.note """Technical note: Characters outside the Basic Multilingual Plane, such as 🎄 and other emojis, will be encoded as a UTF-16 surrogate pair, i.e. with two `\u` sequences.""">>
Also see the [[jsonstringify Operator]].