From c8523bf4793d202457db9d97f27733a33aebc577 Mon Sep 17 00:00:00 2001 From: JackMacWindows Date: Sun, 18 Jun 2023 17:42:28 -0400 Subject: [PATCH] Add ability to serialize Unicode strings to JSON (#1489) --- gradle/libs.versions.toml | 2 +- .../computercraft/lua/rom/apis/textutils.lua | 107 +++++++++++++----- .../test-rom/spec/apis/textutils_spec.lua | 15 ++- 3 files changed, 91 insertions(+), 33 deletions(-) diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index e13ca1c60..9ff0cbb6c 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -57,7 +57,7 @@ fabric-loom = "1.1.10" forgeGradle = "5.1.+" githubRelease = "2.2.12" ideaExt = "1.1.6" -illuaminate = "0.1.0-24-gdb28902" +illuaminate = "0.1.0-28-ga7efd71" librarian = "1.+" minotaur = "2.+" mixinGradle = "0.7.+" diff --git a/projects/core/src/main/resources/data/computercraft/lua/rom/apis/textutils.lua b/projects/core/src/main/resources/data/computercraft/lua/rom/apis/textutils.lua index 353dbb799..3d34c27b3 100644 --- a/projects/core/src/main/resources/data/computercraft/lua/rom/apis/textutils.lua +++ b/projects/core/src/main/resources/data/computercraft/lua/rom/apis/textutils.lua @@ -424,12 +424,31 @@ do if map[c] == nil then map[c] = hexify(c) end end - serializeJSONString = function(s) - return ('"%s"'):format(s:gsub("[\0-\x1f\"\\]", map):gsub("[\x7f-\xff]", hexify)) + serializeJSONString = function(s, options) + if options and options.unicode_strings and s:find("[\x80-\xff]") then + local retval = '"' + for _, code in utf8.codes(s) do + if code > 0xFFFF then + -- Encode the codepoint as a UTF-16 surrogate pair + code = code - 0x10000 + local high, low = bit32.extract(code, 10, 10) + 0xD800, bit32.extract(code, 0, 10) + 0xDC00 + retval = retval .. ("\\u%04X\\u%04X"):format(high, low) + elseif code <= 0x5C and map[string.char(code)] then -- 0x5C = `\`, don't run `string.char` if we don't need to + retval = retval .. map[string.char(code)] + elseif code < 0x20 or code >= 0x7F then + retval = retval .. ("\\u%04X"):format(code) + else + retval = retval .. string.char(code) + end + end + return retval .. '"' + else + return ('"%s"'):format(s:gsub("[\0-\x1f\"\\]", map):gsub("[\x7f-\xff]", hexify)) + end end end -local function serializeJSONImpl(t, tTracking, bNBTStyle) +local function serializeJSONImpl(t, tTracking, options) local sType = type(t) if t == empty_json_array then return "[]" elseif t == json_null then return "null" @@ -450,13 +469,14 @@ local function serializeJSONImpl(t, tTracking, bNBTStyle) local nObjectSize = 0 local nArraySize = 0 local largestArrayIndex = 0 + local bNBTStyle = options and options.nbt_style for k, v in pairs(t) do if type(k) == "string" then local sEntry if bNBTStyle then - sEntry = tostring(k) .. ":" .. serializeJSONImpl(v, tTracking, bNBTStyle) + sEntry = tostring(k) .. ":" .. serializeJSONImpl(v, tTracking, options) else - sEntry = serializeJSONString(k) .. ":" .. serializeJSONImpl(v, tTracking, bNBTStyle) + sEntry = serializeJSONString(k, options) .. ":" .. serializeJSONImpl(v, tTracking, options) end if nObjectSize == 0 then sObjectResult = sObjectResult .. sEntry @@ -473,7 +493,7 @@ local function serializeJSONImpl(t, tTracking, bNBTStyle) if t[k] == nil then --if the array is nil at index k the value is "null" as to keep the unused indexes in between used ones. sEntry = "null" else -- if the array index does not point to a nil we serialise it's content. - sEntry = serializeJSONImpl(t[k], tTracking, bNBTStyle) + sEntry = serializeJSONImpl(t[k], tTracking, options) end if nArraySize == 0 then sArrayResult = sArrayResult .. sEntry @@ -492,7 +512,7 @@ local function serializeJSONImpl(t, tTracking, bNBTStyle) end elseif sType == "string" then - return serializeJSONString(t) + return serializeJSONString(t, options) elseif sType == "number" or sType == "boolean" then return tostring(t) @@ -813,32 +833,57 @@ end unserialise = unserialize -- GB version ---- Returns a JSON representation of the given data. --- --- This function attempts to guess whether a table is a JSON array or --- object. However, empty tables are assumed to be empty objects - use --- @{textutils.empty_json_array} to mark an empty array. --- --- This is largely intended for interacting with various functions from the --- @{commands} API, though may also be used in making @{http} requests. --- --- @param t The value to serialise. Like @{textutils.serialise}, this should not --- contain recursive tables or functions. --- @tparam[opt] boolean bNBTStyle Whether to produce NBT-style JSON (non-quoted keys) --- instead of standard JSON. --- @treturn string The JSON representation of the input. --- @throws If the object contains a value which cannot be --- serialised. This includes functions and tables which appear multiple --- times. --- @usage textutils.serialiseJSON({ values = { 1, "2", true } }) --- @since 1.7 --- @see textutils.json_null Use to serialise a JSON `null` value. --- @see textutils.empty_json_array Use to serialise a JSON empty array. -function serializeJSON(t, bNBTStyle) +--[[- Returns a JSON representation of the given data. + +This function attempts to guess whether a table is a JSON array or +object. However, empty tables are assumed to be empty objects - use +@{textutils.empty_json_array} to mark an empty array. + +This is largely intended for interacting with various functions from the +@{commands} API, though may also be used in making @{http} requests. + +@param[1] t The value to serialise. Like @{textutils.serialise}, this should not +contain recursive tables or functions. +@tparam[1,opt] { nbt_style? = boolean, unicode_strings? = boolean } options Options for serialisation. +- `nbt_style`: Whether to produce NBT-style JSON (non-quoted keys) instead of standard JSON. +- `unicode_strings`: Whether to treat strings as containing UTF-8 characters instead of + using the default 8-bit character set. + +@param[2] t The value to serialise. Like @{textutils.serialise}, this should not +contain recursive tables or functions. +@tparam[2] boolean bNBTStyle Whether to produce NBT-style JSON (non-quoted keys) +instead of standard JSON. + +@treturn string The JSON representation of the input. +@throws If the object contains a value which cannot be serialised. This includes +functions and tables which appear multiple times. + +@usage Serialise a simple object + + textutils.serialiseJSON({ values = { 1, "2", true } }) + +@usage Serialise an object to a NBT-style string + + textutils.serialiseJSON({ values = { 1, "2", true } }, { nbt_style = true }) + +@since 1.7 +@changed 1.106.0 Added `options` overload and `unicode_strings` option. + +@see textutils.json_null Use to serialise a JSON `null` value. +@see textutils.empty_json_array Use to serialise a JSON empty array. +]] +function serializeJSON(t, options) expect(1, t, "table", "string", "number", "boolean") - expect(2, bNBTStyle, "boolean", "nil") + expect(2, options, "table", "boolean", "nil") + if type(options) == "boolean" then + options = { nbt_style = options } + elseif type(options) == "table" then + field(options, "nbt_style", "boolean", "nil") + field(options, "unicode_strings", "boolean", "nil") + end + local tTracking = {} - return serializeJSONImpl(t, tTracking, bNBTStyle or false) + return serializeJSONImpl(t, tTracking, options) end serialiseJSON = serializeJSON -- GB version diff --git a/projects/core/src/test/resources/test-rom/spec/apis/textutils_spec.lua b/projects/core/src/test/resources/test-rom/spec/apis/textutils_spec.lua index af5ad9005..53df9fb56 100644 --- a/projects/core/src/test/resources/test-rom/spec/apis/textutils_spec.lua +++ b/projects/core/src/test/resources/test-rom/spec/apis/textutils_spec.lua @@ -143,8 +143,10 @@ describe("The textutils library", function() textutils.serialiseJSON({}) textutils.serialiseJSON(false) textutils.serialiseJSON("", true) + textutils.serializeJSON("", {}) + textutils.serializeJSON(0, { nbt_style = true, unicode_strings = true }) expect.error(textutils.serialiseJSON, nil):eq("bad argument #1 (table, string, number or boolean expected, got nil)") - expect.error(textutils.serialiseJSON, "", 1):eq("bad argument #2 (boolean expected, got number)") + expect.error(textutils.serialiseJSON, "", 1):eq("bad argument #2 (table or boolean expected, got number)") end) it("serializes empty arrays", function() @@ -174,6 +176,17 @@ describe("The textutils library", function() expect(textutils.serializeJSON({ 5, "test", nil, nil, textutils.json_null })):eq('[5,"test",null,null,null]') expect(textutils.serializeJSON({ nil, nil, nil, nil, "text" })):eq('[null,null,null,null,"text"]') end) + + it("serializes NBT style", function() + expect(textutils.serializeJSON({ test = 2 }, { nbt_style = true })):eq('{test:2}') + expect(textutils.serializeJSON({ test = 2 }, true)):eq('{test:2}') -- old style + end) + + it("serializes Unicode strings", function() + expect(textutils.serializeJSON("\u{3053}\u{3093}\u{306B}\u{3061}\u{306F}", { unicode_strings = true })):eq([["\u3053\u3093\u306B\u3061\u306F"]]) + expect(textutils.serializeJSON("\u{1f62f}", { unicode_strings = true })):eq([["\uD83D\uDE2F"]]) + expect(textutils.serializeJSON("\\\"\u{00ff}\n\"", { unicode_strings = true })):eq('"\\\\\\"\\u00FF\\n\\""') + end) end) describe("textutils.unserializeJSON", function()