mirror of
https://github.com/janet-lang/janet
synced 2024-12-26 00:10:27 +00:00
Minor fixes for parser
Check length before dereferencing buffer in tokenchar. Check keywords are valid utf-8. Fix minor typos.
This commit is contained in:
parent
8372d1e499
commit
976dfc7195
@ -38,7 +38,7 @@ static int is_whitespace(uint8_t c) {
|
||||
|
||||
/* Code generated by tools/symcharsgen.c.
|
||||
* The table contains 256 bits, where each bit is 1
|
||||
* if the corresponding ascci code is a symbol char, and 0
|
||||
* if the corresponding ascii code is a symbol char, and 0
|
||||
* if not. The upper characters are also considered symbol
|
||||
* chars and are then checked for utf-8 compliance. */
|
||||
static const uint32_t symchars[8] = {
|
||||
@ -233,7 +233,7 @@ static int escapeh(JanetParser *p, JanetParseState *state, uint8_t c) {
|
||||
p->error = "invalid hex digit in hex escape";
|
||||
return 1;
|
||||
}
|
||||
state->argn = (state->argn << 4) + digit;;
|
||||
state->argn = (state->argn << 4) + digit;
|
||||
state->counter--;
|
||||
if (!state->counter) {
|
||||
push_buf(p, (state->argn & 0xFF));
|
||||
@ -329,6 +329,12 @@ static int tokenchar(JanetParser *p, JanetParseState *state, uint8_t c) {
|
||||
int start_dig = p->buf[0] >= '0' && p->buf[0] <= '9';
|
||||
int start_num = start_dig || p->buf[0] == '-' || p->buf[0] == '+' || p->buf[0] == '.';
|
||||
if (p->buf[0] == ':') {
|
||||
/* Don't do full utf-8 check unless we have seen non ascii characters. */
|
||||
int valid = (!state->argn) || valid_utf8(p->buf + 1, blen - 1);
|
||||
if (!valid) {
|
||||
p->error = "invalid utf-8 in keyword";
|
||||
return 0;
|
||||
}
|
||||
ret = janet_keywordv(p->buf + 1, blen - 1);
|
||||
} else if (start_num && !janet_scan_number(p->buf, blen, &numval)) {
|
||||
ret = janet_wrap_number(numval);
|
||||
@ -338,7 +344,7 @@ static int tokenchar(JanetParser *p, JanetParseState *state, uint8_t c) {
|
||||
ret = janet_wrap_false();
|
||||
} else if (!check_str_const("true", p->buf, blen)) {
|
||||
ret = janet_wrap_true();
|
||||
} else if (p->buf) {
|
||||
} else {
|
||||
if (start_dig) {
|
||||
p->error = "symbol literal cannot start with a digit";
|
||||
return 0;
|
||||
@ -351,9 +357,6 @@ static int tokenchar(JanetParser *p, JanetParseState *state, uint8_t c) {
|
||||
}
|
||||
ret = janet_symbolv(p->buf, blen);
|
||||
}
|
||||
} else {
|
||||
p->error = "empty symbol invalid";
|
||||
return 0;
|
||||
}
|
||||
p->bufcount = 0;
|
||||
popstate(p, ret);
|
||||
|
@ -118,6 +118,16 @@
|
||||
(assert (deep= (parser/status p) (parser/status p2)) "parser 2")
|
||||
(assert (deep= (parser/state p) (parser/state p2)) "parser 3")
|
||||
|
||||
# Parser errors
|
||||
(defn parse-error [input]
|
||||
(def p (parser/new))
|
||||
(parser/consume p input)
|
||||
(parser/error p))
|
||||
|
||||
# Invalid utf-8 sequences
|
||||
(assert (not= nil (parse-error @"\xc3\x28")) "reject invalid utf-8 symbol")
|
||||
(assert (not= nil (parse-error @":\xc3\x28")) "reject invalid utf-8 keyword")
|
||||
|
||||
# String check-set
|
||||
(assert (string/check-set "abc" "a") "string/check-set 1")
|
||||
(assert (not (string/check-set "abc" "z")) "string/check-set 2")
|
||||
|
Loading…
Reference in New Issue
Block a user