mirror of
https://github.com/janet-lang/janet
synced 2024-12-01 04:19:55 +00:00
Address #306 - Add unicode escapes.
Unicode escapes have the same syntax as go - \uXXXX or \UXXXXXXXX.
This commit is contained in:
parent
081d132538
commit
ae70a03383
@ -201,6 +201,8 @@ static int checkescape(uint8_t c) {
|
||||
default:
|
||||
return -1;
|
||||
case 'x':
|
||||
case 'u':
|
||||
case 'U':
|
||||
return 1;
|
||||
case 'n':
|
||||
return '\n';
|
||||
@ -228,6 +230,24 @@ static int checkescape(uint8_t c) {
|
||||
/* Forward declare */
|
||||
static int stringchar(JanetParser *p, JanetParseState *state, uint8_t c);
|
||||
|
||||
static void write_codepoint(JanetParser *p, int32_t codepoint) {
|
||||
if (codepoint <= 0x7F) {
|
||||
push_buf(p, (uint8_t) codepoint);
|
||||
} else if (codepoint <= 0x7FF) {
|
||||
push_buf(p, (uint8_t)((codepoint >> 6) & 0x1F) | 0xC0);
|
||||
push_buf(p, (uint8_t)((codepoint >> 0) & 0x3F) | 0x80);
|
||||
} else if (codepoint <= 0xFFFF) {
|
||||
push_buf(p, (uint8_t)((codepoint >> 12) & 0x0F) | 0xE0);
|
||||
push_buf(p, (uint8_t)((codepoint >> 6) & 0x3F) | 0x80);
|
||||
push_buf(p, (uint8_t)((codepoint >> 0) & 0x3F) | 0x80);
|
||||
} else {
|
||||
push_buf(p, (uint8_t)((codepoint >> 18) & 0x07) | 0xF0);
|
||||
push_buf(p, (uint8_t)((codepoint >> 12) & 0x3F) | 0x80);
|
||||
push_buf(p, (uint8_t)((codepoint >> 6) & 0x3F) | 0x80);
|
||||
push_buf(p, (uint8_t)((codepoint >> 0) & 0x3F) | 0x80);
|
||||
}
|
||||
}
|
||||
|
||||
static int escapeh(JanetParser *p, JanetParseState *state, uint8_t c) {
|
||||
int digit = to_hex(c);
|
||||
if (digit < 0) {
|
||||
@ -237,7 +257,23 @@ static int escapeh(JanetParser *p, JanetParseState *state, uint8_t c) {
|
||||
state->argn = (state->argn << 4) + digit;
|
||||
state->counter--;
|
||||
if (!state->counter) {
|
||||
push_buf(p, (state->argn & 0xFF));
|
||||
push_buf(p, (uint8_t)(state->argn & 0xFF));
|
||||
state->argn = 0;
|
||||
state->consumer = stringchar;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int escapeu(JanetParser *p, JanetParseState *state, uint8_t c) {
|
||||
int digit = to_hex(c);
|
||||
if (digit < 0) {
|
||||
p->error = "invalid hex digit in unicode escape";
|
||||
return 1;
|
||||
}
|
||||
state->argn = (state->argn << 4) + digit;
|
||||
state->counter--;
|
||||
if (!state->counter) {
|
||||
write_codepoint(p, state->argn);
|
||||
state->argn = 0;
|
||||
state->consumer = stringchar;
|
||||
}
|
||||
@ -254,6 +290,10 @@ static int escape1(JanetParser *p, JanetParseState *state, uint8_t c) {
|
||||
state->counter = 2;
|
||||
state->argn = 0;
|
||||
state->consumer = escapeh;
|
||||
} else if (c == 'u' || c == 'U') {
|
||||
state->counter = c == 'u' ? 4 : 8;
|
||||
state->argn = 0;
|
||||
state->consumer = escapeu;
|
||||
} else {
|
||||
push_buf(p, (uint8_t) e);
|
||||
state->consumer = stringchar;
|
||||
|
@ -206,6 +206,10 @@
|
||||
(def 🐮 :cow)
|
||||
(assert (= (string "🐼" 🦊 🐮) "🐼foxcow") "emojis 🙉 :)")
|
||||
(assert (not= 🦊 "🦊") "utf8 strings are not symbols and vice versa")
|
||||
(assert (= "\U0001F637" "😷") "unicode escape 1")
|
||||
(assert (= "\u2623" "\U00002623" "☣") "unicode escape 2")
|
||||
(assert (= "\u24c2" "\U000024c2" "Ⓜ") "unicode escape 3")
|
||||
(assert (= "\u0061" "a") "unicode escape 4")
|
||||
|
||||
# Symbols with @ character
|
||||
|
||||
|
@ -308,7 +308,7 @@
|
||||
<array>
|
||||
<dict>
|
||||
<key>match</key>
|
||||
<string>(\\[nevr0zft"\\']|\\x[0-9a-fA-F][0-9a-fA-f])</string>
|
||||
<string>(\\[nevr0zft"\\']|\\x[0-9a-fA-F]{2}|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8})</string>
|
||||
<key>name</key>
|
||||
<string>constant.character.escape.janet</string>
|
||||
</dict>
|
||||
|
Loading…
Reference in New Issue
Block a user