1
0
mirror of https://github.com/janet-lang/janet synced 2024-12-26 08:20:27 +00:00

Minor fixes for parser

Check length before dereferencing buffer in tokenchar.
Check keywords are valid utf-8.
Fix minor typos.
This commit is contained in:
Andrew Chambers 2019-11-23 16:55:23 +13:00
parent 8372d1e499
commit 976dfc7195
2 changed files with 19 additions and 6 deletions

View File

@ -38,7 +38,7 @@ static int is_whitespace(uint8_t c) {
/* Code generated by tools/symcharsgen.c. /* Code generated by tools/symcharsgen.c.
* The table contains 256 bits, where each bit is 1 * The table contains 256 bits, where each bit is 1
* if the corresponding ascci code is a symbol char, and 0 * if the corresponding ascii code is a symbol char, and 0
* if not. The upper characters are also considered symbol * if not. The upper characters are also considered symbol
* chars and are then checked for utf-8 compliance. */ * chars and are then checked for utf-8 compliance. */
static const uint32_t symchars[8] = { static const uint32_t symchars[8] = {
@ -233,7 +233,7 @@ static int escapeh(JanetParser *p, JanetParseState *state, uint8_t c) {
p->error = "invalid hex digit in hex escape"; p->error = "invalid hex digit in hex escape";
return 1; return 1;
} }
state->argn = (state->argn << 4) + digit;; state->argn = (state->argn << 4) + digit;
state->counter--; state->counter--;
if (!state->counter) { if (!state->counter) {
push_buf(p, (state->argn & 0xFF)); push_buf(p, (state->argn & 0xFF));
@ -329,6 +329,12 @@ static int tokenchar(JanetParser *p, JanetParseState *state, uint8_t c) {
int start_dig = p->buf[0] >= '0' && p->buf[0] <= '9'; int start_dig = p->buf[0] >= '0' && p->buf[0] <= '9';
int start_num = start_dig || p->buf[0] == '-' || p->buf[0] == '+' || p->buf[0] == '.'; int start_num = start_dig || p->buf[0] == '-' || p->buf[0] == '+' || p->buf[0] == '.';
if (p->buf[0] == ':') { if (p->buf[0] == ':') {
/* Don't do full utf-8 check unless we have seen non ascii characters. */
int valid = (!state->argn) || valid_utf8(p->buf + 1, blen - 1);
if (!valid) {
p->error = "invalid utf-8 in keyword";
return 0;
}
ret = janet_keywordv(p->buf + 1, blen - 1); ret = janet_keywordv(p->buf + 1, blen - 1);
} else if (start_num && !janet_scan_number(p->buf, blen, &numval)) { } else if (start_num && !janet_scan_number(p->buf, blen, &numval)) {
ret = janet_wrap_number(numval); ret = janet_wrap_number(numval);
@ -338,7 +344,7 @@ static int tokenchar(JanetParser *p, JanetParseState *state, uint8_t c) {
ret = janet_wrap_false(); ret = janet_wrap_false();
} else if (!check_str_const("true", p->buf, blen)) { } else if (!check_str_const("true", p->buf, blen)) {
ret = janet_wrap_true(); ret = janet_wrap_true();
} else if (p->buf) { } else {
if (start_dig) { if (start_dig) {
p->error = "symbol literal cannot start with a digit"; p->error = "symbol literal cannot start with a digit";
return 0; return 0;
@ -351,9 +357,6 @@ static int tokenchar(JanetParser *p, JanetParseState *state, uint8_t c) {
} }
ret = janet_symbolv(p->buf, blen); ret = janet_symbolv(p->buf, blen);
} }
} else {
p->error = "empty symbol invalid";
return 0;
} }
p->bufcount = 0; p->bufcount = 0;
popstate(p, ret); popstate(p, ret);

View File

@ -118,6 +118,16 @@
(assert (deep= (parser/status p) (parser/status p2)) "parser 2") (assert (deep= (parser/status p) (parser/status p2)) "parser 2")
(assert (deep= (parser/state p) (parser/state p2)) "parser 3") (assert (deep= (parser/state p) (parser/state p2)) "parser 3")
# Parser errors
(defn parse-error [input]
(def p (parser/new))
(parser/consume p input)
(parser/error p))
# Invalid utf-8 sequences
(assert (not= nil (parse-error @"\xc3\x28")) "reject invalid utf-8 symbol")
(assert (not= nil (parse-error @":\xc3\x28")) "reject invalid utf-8 keyword")
# String check-set # String check-set
(assert (string/check-set "abc" "a") "string/check-set 1") (assert (string/check-set "abc" "a") "string/check-set 1")
(assert (not (string/check-set "abc" "z")) "string/check-set 2") (assert (not (string/check-set "abc" "z")) "string/check-set 2")