diff --git a/core/modules/filters/encodings.js b/core/modules/filters/encodings.js index f41350791..557599c1a 100644 --- a/core/modules/filters/encodings.js +++ b/core/modules/filters/encodings.js @@ -77,7 +77,7 @@ exports.encodehtml = function(source,operator,options) { exports.stringify = function(source,operator,options) { var results = []; source(function(tiddler,title) { - results.push($tw.utils.stringify(title)); + results.push($tw.utils.stringify(title,(operator.suffix === "rawunicode"))); }); return results; }; @@ -85,7 +85,7 @@ exports.stringify = function(source,operator,options) { exports.jsonstringify = function(source,operator,options) { var results = []; source(function(tiddler,title) { - results.push($tw.utils.jsonStringify(title)); + results.push($tw.utils.jsonStringify(title,(operator.suffix === "rawunicode"))); }); return results; }; diff --git a/core/modules/utils/utils.js b/core/modules/utils/utils.js index b3c56fb8c..14d251f01 100644 --- a/core/modules/utils/utils.js +++ b/core/modules/utils/utils.js @@ -564,7 +564,7 @@ exports.escape = function(ch) { // Turns a string into a legal JavaScript string // Copied from peg.js, thanks to David Majda -exports.stringify = function(s) { +exports.stringify = function(s, rawUnicode) { /* * ECMA-262, 5th ed., 7.8.4: All characters may appear literally in a string * literal except for the closing quote character, backslash, carriage return, @@ -573,19 +573,21 @@ exports.stringify = function(s) { * * For portability, we also escape all non-ASCII characters. */ + var regex = rawUnicode ? /[\x00-\x1f]/g : /[\x00-\x1f\x80-\uFFFF]/g; return (s || "") .replace(/\\/g, '\\\\') // backslash .replace(/"/g, '\\"') // double quote character .replace(/'/g, "\\'") // single quote character .replace(/\r/g, '\\r') // carriage return .replace(/\n/g, '\\n') // line feed - .replace(/[\x00-\x1f\x80-\uFFFF]/g, exports.escape); // non-ASCII characters + .replace(regex, exports.escape); // non-ASCII characters }; // Turns a string into a legal JSON string // Derived from peg.js, thanks to David Majda -exports.jsonStringify = function(s) { +exports.jsonStringify = function(s, rawUnicode) { // See http://www.json.org/ + var regex = rawUnicode ? /[\x00-\x1f]/g : /[\x00-\x1f\x80-\uFFFF]/g; return (s || "") .replace(/\\/g, '\\\\') // backslash .replace(/"/g, '\\"') // double quote character @@ -594,7 +596,7 @@ exports.jsonStringify = function(s) { .replace(/\x08/g, '\\b') // backspace .replace(/\x0c/g, '\\f') // formfeed .replace(/\t/g, '\\t') // tab - .replace(/[\x00-\x1f\x80-\uFFFF]/g,function(s) { + .replace(regex,function(s) { return '\\u' + $tw.utils.pad(s.charCodeAt(0).toString(16).toUpperCase(),4); }); // non-ASCII characters }; diff --git a/editions/tw5.com/tiddlers/filters/examples/jsonstringify Operator (Examples).tid b/editions/tw5.com/tiddlers/filters/examples/jsonstringify Operator (Examples).tid index 20f7bff75..ead9ffb38 100644 --- a/editions/tw5.com/tiddlers/filters/examples/jsonstringify Operator (Examples).tid +++ b/editions/tw5.com/tiddlers/filters/examples/jsonstringify Operator (Examples).tid @@ -5,3 +5,5 @@ title: jsonstringify Operator (Examples) type: text/vnd.tiddlywiki <<.operator-example 1 """[[Title with "double quotes" and single ' and \backslash]] +[jsonstringify[]]""">> +<<.operator-example 2 """[[Accents and emojis -> äñøßπ ⌛🎄🍪🍓 without suffix]] +[jsonstringify[]]""">> +<<.operator-example 3 """[[Accents and emojis -> äñøßπ ⌛🎄🍪🍓 with rawunicode suffix]] +[jsonstringify:rawunicode[]]""">> diff --git a/editions/tw5.com/tiddlers/filters/examples/stringify_Operator_(Examples).tid b/editions/tw5.com/tiddlers/filters/examples/stringify_Operator_(Examples).tid index 1378287f5..a664cf7d2 100644 --- a/editions/tw5.com/tiddlers/filters/examples/stringify_Operator_(Examples).tid +++ b/editions/tw5.com/tiddlers/filters/examples/stringify_Operator_(Examples).tid @@ -5,3 +5,5 @@ title: stringify Operator (Examples) type: text/vnd.tiddlywiki <<.operator-example 1 """[[Title with "double quotes" and single ' and \backslash]] +[stringify[]]""">> +<<.operator-example 2 """[[Accents and emojis -> äñøßπ ⌛🎄🍪🍓 without suffix]] +[stringify[]]""">> +<<.operator-example 3 """[[Accents and emojis -> äñøßπ ⌛🎄🍪🍓 with rawunicode suffix]] +[stringify:rawunicode[]]""">> diff --git a/editions/tw5.com/tiddlers/filters/jsonstringify Operator.tid b/editions/tw5.com/tiddlers/filters/jsonstringify Operator.tid index ff361450e..a7e4d565c 100644 --- a/editions/tw5.com/tiddlers/filters/jsonstringify Operator.tid +++ b/editions/tw5.com/tiddlers/filters/jsonstringify Operator.tid @@ -7,22 +7,29 @@ op-output: the input with JSON string encodings applied op-parameter: op-parameter-name: op-purpose: apply JSON string encoding to a string +op-suffix: <<.from-version "5.1.23">> optionally, the keyword `rawunicode` +op-suffix-name: R tags: [[Filter Operators]] [[String Operators]] title: jsonstringify Operator type: text/vnd.tiddlywiki The following substitutions are made: -|!Character |!Replacement | -|`\` |`\\` | -|`"` |`\"` | -|`\r` (carriage return) |`\\r` | -|`\n` (line feed) |`\\n` | -|`\x08` (backspace) |`\\b` | -|`\x0c` (form field) |`\\f` | -|`\t` (tab) |`\\t` | -|Characters from 0x00 to 0x1f |`\\x##` where ## is two hex digits | -|Characters from 0x80 to 0xffff |`\\u####` where #### is four hex digits | +|!Character |!Replacement |!Condition | +|`\` |`\\` |Always | +|`"` |`\"` |Always | +|Carriage return (0x0d) |`\\r` |Always | +|Line feed (0x0a) |`\\n` |Always | +|Backspace (0x08) |`\\b` |Always | +|Form field (0x0c) |`\\f` |Always | +|Tab (0x09) |`\\t` |Always | +|Characters from 0x00 to 0x1f |`\\u####` where #### is four hex digits |Always | +|Characters from 0x80 to 0xffff|`\\u####` where #### is four hex digits |If `rawunicode` suffix is not present (default) | +|Characters from 0x80 to 0xffff|Unchanged |If `rawunicode` suffix is present <<.from-version "5.1.23">> | + +<<.from-version "5.1.23">> If the suffix `rawunicode` is present, Unicode characters above 0x80 (such as ß, ä, ñ or 🎄) will be passed through unchanged. Without the suffix, they will be substituted with `\\u` codes, which was the default behavior before 5.1.23. + +<<.note """Technical note: Characters outside the Basic Multilingual Plane, such as 🎄 and other emojis, will be encoded as a UTF-16 surrogate pair, i.e. with two `\u` sequences.""">> Also see the [[stringify Operator]]. diff --git a/editions/tw5.com/tiddlers/filters/stringify_Operator.tid b/editions/tw5.com/tiddlers/filters/stringify_Operator.tid index 7bd0847d9..e06be4387 100644 --- a/editions/tw5.com/tiddlers/filters/stringify_Operator.tid +++ b/editions/tw5.com/tiddlers/filters/stringify_Operator.tid @@ -6,6 +6,8 @@ op-output: the input with ~JavaScript string encodings applied op-parameter: op-parameter-name: op-purpose: apply ~JavaScript string encoding to a string +op-suffix: <<.from-version "5.1.23">> optionally, the keyword `rawunicode` +op-suffix-name: R tags: [[Filter Operators]] [[String Operators]] title: stringify Operator type: text/vnd.tiddlywiki @@ -13,15 +15,21 @@ from-version: 5.1.14 The following substitutions are made: -|!Character |!Replacement | -|`\` |`\\` | -|`"` |`\"` | -|`\r` (carriage return) |`\\r` | -|`\n` (line feed) |`\\n` | -|`\x08` (backspace) |`\\b` | -|`\x0c` (form feed) |`\\f` | -|`\t` (tab) |`\\t` | -|Characters from 0x00 to 0x1f and characters from 0x80 to 0xffff |`\\u####` where #### is four hex digits | +|!Character |!Replacement |!Condition | +|`\` |`\\` |Always | +|`"` |`\"` |Always | +|Carriage return (0x0d) |`\\r` |Always | +|Line feed (0x0a) |`\\n` |Always | +|Backspace (0x08) |`\\b` |Always | +|Form field (0x0c) |`\\f` |Always | +|Tab (0x09) |`\\t` |Always | +|Characters from 0x00 to 0x1f |`\\x##` where ## is two hex digits |Always | +|Characters from 0x80 to 0xffff|`\\u####` where #### is four hex digits |If `rawunicode` suffix is not present (default) | +|Characters from 0x80 to 0xffff|<<.from-version "5.1.23">> Unchanged |If `rawunicode` suffix is present | + +<<.from-version "5.1.23">> If the suffix `rawunicode` is present, Unicode characters above 0x80 (such as ß, ä, ñ or 🎄) will be passed through unchanged. Without the suffix, they will be substituted with `\\u` codes, which was the default behavior before 5.1.23. + +<<.note """Technical note: Characters outside the Basic Multilingual Plane, such as 🎄 and other emojis, will be encoded as a UTF-16 surrogate pair, i.e. with two `\u` sequences.""">> Also see the [[jsonstringify Operator]].