mirror of
https://github.com/janet-lang/janet
synced 2025-07-08 04:52:53 +00:00
Use more inclusive check for the %j formatter for valid symbols.
We did not allow arbitrary utf8 to be printed with %j, even though the parser allows. Thos changes uses the existing built in utf8 detectiotion to exclude only unprintable symbols from the docstring.
This commit is contained in:
parent
8b2d278840
commit
6e38bf1578
@ -51,15 +51,15 @@ static const uint32_t symchars[8] = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
/* Check if a character is a valid symbol character
|
/* Check if a character is a valid symbol character
|
||||||
* symbol chars are A-Z, a-z, 0-9, or one of !$&*+-./:<=>@\^_~| */
|
* symbol chars are A-Z, a-z, 0-9, or one of !$&*+-./:<=>@\^_| */
|
||||||
static int is_symbol_char(uint8_t c) {
|
int janet_is_symbol_char(uint8_t c) {
|
||||||
return symchars[c >> 5] & ((uint32_t)1 << (c & 0x1F));
|
return symchars[c >> 5] & ((uint32_t)1 << (c & 0x1F));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Validate some utf8. Useful for identifiers. Only validates
|
/* Validate some utf8. Useful for identifiers. Only validates
|
||||||
* the encoding, does not check for valid code points (they
|
* the encoding, does not check for valid code points (they
|
||||||
* are less well defined than the encoding). */
|
* are less well defined than the encoding). */
|
||||||
static int valid_utf8(const uint8_t *str, int32_t len) {
|
int janet_valid_utf8(const uint8_t *str, int32_t len) {
|
||||||
int32_t i = 0;
|
int32_t i = 0;
|
||||||
int32_t j;
|
int32_t j;
|
||||||
while (i < len) {
|
while (i < len) {
|
||||||
@ -411,7 +411,7 @@ static int tokenchar(JanetParser *p, JanetParseState *state, uint8_t c) {
|
|||||||
Janet ret;
|
Janet ret;
|
||||||
double numval;
|
double numval;
|
||||||
int32_t blen;
|
int32_t blen;
|
||||||
if (is_symbol_char(c)) {
|
if (janet_is_symbol_char(c)) {
|
||||||
push_buf(p, (uint8_t) c);
|
push_buf(p, (uint8_t) c);
|
||||||
if (c > 127) state->argn = 1; /* Use to indicate non ascii */
|
if (c > 127) state->argn = 1; /* Use to indicate non ascii */
|
||||||
return 1;
|
return 1;
|
||||||
@ -422,7 +422,7 @@ static int tokenchar(JanetParser *p, JanetParseState *state, uint8_t c) {
|
|||||||
int start_num = start_dig || p->buf[0] == '-' || p->buf[0] == '+' || p->buf[0] == '.';
|
int start_num = start_dig || p->buf[0] == '-' || p->buf[0] == '+' || p->buf[0] == '.';
|
||||||
if (p->buf[0] == ':') {
|
if (p->buf[0] == ':') {
|
||||||
/* Don't do full utf-8 check unless we have seen non ascii characters. */
|
/* Don't do full utf-8 check unless we have seen non ascii characters. */
|
||||||
int valid = (!state->argn) || valid_utf8(p->buf + 1, blen - 1);
|
int valid = (!state->argn) || janet_valid_utf8(p->buf + 1, blen - 1);
|
||||||
if (!valid) {
|
if (!valid) {
|
||||||
p->error = "invalid utf-8 in keyword";
|
p->error = "invalid utf-8 in keyword";
|
||||||
return 0;
|
return 0;
|
||||||
@ -442,7 +442,7 @@ static int tokenchar(JanetParser *p, JanetParseState *state, uint8_t c) {
|
|||||||
return 0;
|
return 0;
|
||||||
} else {
|
} else {
|
||||||
/* Don't do full utf-8 check unless we have seen non ascii characters. */
|
/* Don't do full utf-8 check unless we have seen non ascii characters. */
|
||||||
int valid = (!state->argn) || valid_utf8(p->buf, blen);
|
int valid = (!state->argn) || janet_valid_utf8(p->buf, blen);
|
||||||
if (!valid) {
|
if (!valid) {
|
||||||
p->error = "invalid utf-8 in symbol";
|
p->error = "invalid utf-8 in symbol";
|
||||||
return 0;
|
return 0;
|
||||||
@ -582,7 +582,7 @@ static int root(JanetParser *p, JanetParseState *state, uint8_t c) {
|
|||||||
switch (c) {
|
switch (c) {
|
||||||
default:
|
default:
|
||||||
if (is_whitespace(c)) return 1;
|
if (is_whitespace(c)) return 1;
|
||||||
if (!is_symbol_char(c)) {
|
if (!janet_is_symbol_char(c)) {
|
||||||
p->error = "unexpected character";
|
p->error = "unexpected character";
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
@ -261,21 +261,13 @@ void janet_to_string_b(JanetBuffer *buffer, Janet x) {
|
|||||||
|
|
||||||
/* See parse.c for full table */
|
/* See parse.c for full table */
|
||||||
|
|
||||||
static const uint32_t pp_symchars[8] = {
|
|
||||||
0x00000000, 0xf7ffec72, 0xc7ffffff, 0x07fffffe,
|
|
||||||
0x00000000, 0x00000000, 0x00000000, 0x00000000
|
|
||||||
};
|
|
||||||
|
|
||||||
static int pp_is_symbol_char(uint8_t c) {
|
|
||||||
return pp_symchars[c >> 5] & ((uint32_t)1 << (c & 0x1F));
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Check if a symbol or keyword contains no symbol characters */
|
/* Check if a symbol or keyword contains no symbol characters */
|
||||||
static int contains_bad_chars(const uint8_t *sym, int issym) {
|
static int contains_bad_chars(const uint8_t *sym, int issym) {
|
||||||
int32_t len = janet_string_length(sym);
|
int32_t len = janet_string_length(sym);
|
||||||
if (len && issym && sym[0] >= '0' && sym[0] <= '9') return 1;
|
if (len && issym && sym[0] >= '0' && sym[0] <= '9') return 1;
|
||||||
|
if (!janet_valid_utf8(sym, len)) return 1;
|
||||||
for (int32_t i = 0; i < len; i++) {
|
for (int32_t i = 0; i < len; i++) {
|
||||||
if (!pp_is_symbol_char(sym[i])) return 1;
|
if (!janet_is_symbol_char(sym[i])) return 1;
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -57,6 +57,8 @@
|
|||||||
|
|
||||||
/* Utils */
|
/* Utils */
|
||||||
#define janet_maphash(cap, hash) ((uint32_t)(hash) & (cap - 1))
|
#define janet_maphash(cap, hash) ((uint32_t)(hash) & (cap - 1))
|
||||||
|
int janet_valid_utf8(const uint8_t *str, int32_t len);
|
||||||
|
int janet_is_symbol_char(uint8_t c);
|
||||||
extern const char janet_base64[65];
|
extern const char janet_base64[65];
|
||||||
int32_t janet_array_calchash(const Janet *array, int32_t len);
|
int32_t janet_array_calchash(const Janet *array, int32_t len);
|
||||||
int32_t janet_kv_calchash(const JanetKV *kvs, int32_t len);
|
int32_t janet_kv_calchash(const JanetKV *kvs, int32_t len);
|
||||||
|
@ -202,6 +202,7 @@
|
|||||||
|
|
||||||
#🐙🐙🐙🐙
|
#🐙🐙🐙🐙
|
||||||
|
|
||||||
|
(defn foo [Θa Θb Θc] 0)
|
||||||
(def 🦊 :fox)
|
(def 🦊 :fox)
|
||||||
(def 🐮 :cow)
|
(def 🐮 :cow)
|
||||||
(assert (= (string "🐼" 🦊 🐮) "🐼foxcow") "emojis 🙉 :)")
|
(assert (= (string "🐼" 🦊 🐮) "🐼foxcow") "emojis 🙉 :)")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user