Change \UXXXXXXXX -> \UXXXXXX and check codepoint max.

No need to add two extra leading zeros, as the max unicode codepoint is 0x10FFFF.
2025-10-25 12:47:42 +00:00 · 2020-04-05 07:09:53 -05:00
parent 810ef7401c
commit 87ecdb8112
3 changed files with 9 additions and 5 deletions
--- a/src/core/parse.c
+++ b/src/core/parse.c
@@ -273,6 +273,10 @@ static int escapeu(JanetParser *p, JanetParseState *state, uint8_t c) {
    state->argn = (state->argn << 4) + digit;
    state->counter--;
    if (!state->counter) {
        if (state->argn > 0x10FFFF) {
            p->error = "invalid unicode codepoint";
            return 1;
        }
        write_codepoint(p, state->argn);
        state->argn = 0;
        state->consumer = stringchar;
@@ -291,7 +295,7 @@ static int escape1(JanetParser *p, JanetParseState *state, uint8_t c) {
        state->argn = 0;
        state->consumer = escapeh;
    } else if (c == 'u' || c == 'U') {
-        state->counter = c == 'u' ? 4 : 8;
+        state->counter = c == 'u' ? 4 : 6;
        state->argn = 0;
        state->consumer = escapeu;
    } else {
--- a/test/suite0.janet
+++ b/test/suite0.janet
@@ -206,9 +206,9 @@
 (def 🐮 :cow)
 (assert (= (string "🐼" 🦊 🐮) "🐼foxcow") "emojis 🙉 :)")
 (assert (not= 🦊 "🦊") "utf8 strings are not symbols and vice versa")
-(assert (= "\U0001F637" "😷") "unicode escape 1")
+(assert (= "\U01F637" "😷") "unicode escape 1")
-(assert (= "\u2623" "\U00002623" "☣") "unicode escape 2")
+(assert (= "\u2623" "\U002623" "☣") "unicode escape 2")
-(assert (= "\u24c2" "\U000024c2" "Ⓜ") "unicode escape 3")
+(assert (= "\u24c2" "\U0024c2" "Ⓜ") "unicode escape 3")
 (assert (= "\u0061" "a") "unicode escape 4")
 # Symbols with @ character
--- a/tools/tm_lang_gen.janet
+++ b/tools/tm_lang_gen.janet
@@ -308,7 +308,7 @@
      <array>
        <dict>
          <key>match</key>
-          <string>(\\[nevr0zft"\\']|\\x[0-9a-fA-F]{2}|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8})</string>
+          <string>(\\[nevr0zft"\\']|\\x[0-9a-fA-F]{2}|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{6})</string>
          <key>name</key>
          <string>constant.character.escape.janet</string>
        </dict>