diff --git a/src/core/parse.c b/src/core/parse.c index 3734d502..4ea736a0 100644 --- a/src/core/parse.c +++ b/src/core/parse.c @@ -273,6 +273,10 @@ static int escapeu(JanetParser *p, JanetParseState *state, uint8_t c) { state->argn = (state->argn << 4) + digit; state->counter--; if (!state->counter) { + if (state->argn > 0x10FFFF) { + p->error = "invalid unicode codepoint"; + return 1; + } write_codepoint(p, state->argn); state->argn = 0; state->consumer = stringchar; @@ -291,7 +295,7 @@ static int escape1(JanetParser *p, JanetParseState *state, uint8_t c) { state->argn = 0; state->consumer = escapeh; } else if (c == 'u' || c == 'U') { - state->counter = c == 'u' ? 4 : 8; + state->counter = c == 'u' ? 4 : 6; state->argn = 0; state->consumer = escapeu; } else { diff --git a/test/suite0.janet b/test/suite0.janet index f1d4432c..c42de3b0 100644 --- a/test/suite0.janet +++ b/test/suite0.janet @@ -206,9 +206,9 @@ (def 🐮 :cow) (assert (= (string "🐼" 🦊 🐮) "🐼foxcow") "emojis 🙉 :)") (assert (not= 🦊 "🦊") "utf8 strings are not symbols and vice versa") -(assert (= "\U0001F637" "😷") "unicode escape 1") -(assert (= "\u2623" "\U00002623" "☣") "unicode escape 2") -(assert (= "\u24c2" "\U000024c2" "Ⓜ") "unicode escape 3") +(assert (= "\U01F637" "😷") "unicode escape 1") +(assert (= "\u2623" "\U002623" "☣") "unicode escape 2") +(assert (= "\u24c2" "\U0024c2" "Ⓜ") "unicode escape 3") (assert (= "\u0061" "a") "unicode escape 4") # Symbols with @ character diff --git a/tools/tm_lang_gen.janet b/tools/tm_lang_gen.janet index f19a23b8..32fcf729 100644 --- a/tools/tm_lang_gen.janet +++ b/tools/tm_lang_gen.janet @@ -308,7 +308,7 @@ match - (\\[nevr0zft"\\']|\\x[0-9a-fA-F]{2}|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8}) + (\\[nevr0zft"\\']|\\x[0-9a-fA-F]{2}|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{6}) name constant.character.escape.janet