mirror of
https://github.com/janet-lang/janet
synced 2024-11-28 11:09:54 +00:00
Change \UXXXXXXXX -> \UXXXXXX and check codepoint max.
No need to add two extra leading zeros, as the max unicode codepoint is 0x10FFFF.
This commit is contained in:
parent
810ef7401c
commit
87ecdb8112
@ -273,6 +273,10 @@ static int escapeu(JanetParser *p, JanetParseState *state, uint8_t c) {
|
|||||||
state->argn = (state->argn << 4) + digit;
|
state->argn = (state->argn << 4) + digit;
|
||||||
state->counter--;
|
state->counter--;
|
||||||
if (!state->counter) {
|
if (!state->counter) {
|
||||||
|
if (state->argn > 0x10FFFF) {
|
||||||
|
p->error = "invalid unicode codepoint";
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
write_codepoint(p, state->argn);
|
write_codepoint(p, state->argn);
|
||||||
state->argn = 0;
|
state->argn = 0;
|
||||||
state->consumer = stringchar;
|
state->consumer = stringchar;
|
||||||
@ -291,7 +295,7 @@ static int escape1(JanetParser *p, JanetParseState *state, uint8_t c) {
|
|||||||
state->argn = 0;
|
state->argn = 0;
|
||||||
state->consumer = escapeh;
|
state->consumer = escapeh;
|
||||||
} else if (c == 'u' || c == 'U') {
|
} else if (c == 'u' || c == 'U') {
|
||||||
state->counter = c == 'u' ? 4 : 8;
|
state->counter = c == 'u' ? 4 : 6;
|
||||||
state->argn = 0;
|
state->argn = 0;
|
||||||
state->consumer = escapeu;
|
state->consumer = escapeu;
|
||||||
} else {
|
} else {
|
||||||
|
@ -206,9 +206,9 @@
|
|||||||
(def 🐮 :cow)
|
(def 🐮 :cow)
|
||||||
(assert (= (string "🐼" 🦊 🐮) "🐼foxcow") "emojis 🙉 :)")
|
(assert (= (string "🐼" 🦊 🐮) "🐼foxcow") "emojis 🙉 :)")
|
||||||
(assert (not= 🦊 "🦊") "utf8 strings are not symbols and vice versa")
|
(assert (not= 🦊 "🦊") "utf8 strings are not symbols and vice versa")
|
||||||
(assert (= "\U0001F637" "😷") "unicode escape 1")
|
(assert (= "\U01F637" "😷") "unicode escape 1")
|
||||||
(assert (= "\u2623" "\U00002623" "☣") "unicode escape 2")
|
(assert (= "\u2623" "\U002623" "☣") "unicode escape 2")
|
||||||
(assert (= "\u24c2" "\U000024c2" "Ⓜ") "unicode escape 3")
|
(assert (= "\u24c2" "\U0024c2" "Ⓜ") "unicode escape 3")
|
||||||
(assert (= "\u0061" "a") "unicode escape 4")
|
(assert (= "\u0061" "a") "unicode escape 4")
|
||||||
|
|
||||||
# Symbols with @ character
|
# Symbols with @ character
|
||||||
|
@ -308,7 +308,7 @@
|
|||||||
<array>
|
<array>
|
||||||
<dict>
|
<dict>
|
||||||
<key>match</key>
|
<key>match</key>
|
||||||
<string>(\\[nevr0zft"\\']|\\x[0-9a-fA-F]{2}|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8})</string>
|
<string>(\\[nevr0zft"\\']|\\x[0-9a-fA-F]{2}|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{6})</string>
|
||||||
<key>name</key>
|
<key>name</key>
|
||||||
<string>constant.character.escape.janet</string>
|
<string>constant.character.escape.janet</string>
|
||||||
</dict>
|
</dict>
|
||||||
|
Loading…
Reference in New Issue
Block a user