mirror of
https://github.com/janet-lang/janet
synced 2024-12-26 08:20:27 +00:00
Minor fixes for parser
Check length before dereferencing buffer in tokenchar. Check keywords are valid utf-8. Fix minor typos.
This commit is contained in:
parent
8372d1e499
commit
976dfc7195
@ -38,7 +38,7 @@ static int is_whitespace(uint8_t c) {
|
|||||||
|
|
||||||
/* Code generated by tools/symcharsgen.c.
|
/* Code generated by tools/symcharsgen.c.
|
||||||
* The table contains 256 bits, where each bit is 1
|
* The table contains 256 bits, where each bit is 1
|
||||||
* if the corresponding ascci code is a symbol char, and 0
|
* if the corresponding ascii code is a symbol char, and 0
|
||||||
* if not. The upper characters are also considered symbol
|
* if not. The upper characters are also considered symbol
|
||||||
* chars and are then checked for utf-8 compliance. */
|
* chars and are then checked for utf-8 compliance. */
|
||||||
static const uint32_t symchars[8] = {
|
static const uint32_t symchars[8] = {
|
||||||
@ -233,7 +233,7 @@ static int escapeh(JanetParser *p, JanetParseState *state, uint8_t c) {
|
|||||||
p->error = "invalid hex digit in hex escape";
|
p->error = "invalid hex digit in hex escape";
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
state->argn = (state->argn << 4) + digit;;
|
state->argn = (state->argn << 4) + digit;
|
||||||
state->counter--;
|
state->counter--;
|
||||||
if (!state->counter) {
|
if (!state->counter) {
|
||||||
push_buf(p, (state->argn & 0xFF));
|
push_buf(p, (state->argn & 0xFF));
|
||||||
@ -329,6 +329,12 @@ static int tokenchar(JanetParser *p, JanetParseState *state, uint8_t c) {
|
|||||||
int start_dig = p->buf[0] >= '0' && p->buf[0] <= '9';
|
int start_dig = p->buf[0] >= '0' && p->buf[0] <= '9';
|
||||||
int start_num = start_dig || p->buf[0] == '-' || p->buf[0] == '+' || p->buf[0] == '.';
|
int start_num = start_dig || p->buf[0] == '-' || p->buf[0] == '+' || p->buf[0] == '.';
|
||||||
if (p->buf[0] == ':') {
|
if (p->buf[0] == ':') {
|
||||||
|
/* Don't do full utf-8 check unless we have seen non ascii characters. */
|
||||||
|
int valid = (!state->argn) || valid_utf8(p->buf + 1, blen - 1);
|
||||||
|
if (!valid) {
|
||||||
|
p->error = "invalid utf-8 in keyword";
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
ret = janet_keywordv(p->buf + 1, blen - 1);
|
ret = janet_keywordv(p->buf + 1, blen - 1);
|
||||||
} else if (start_num && !janet_scan_number(p->buf, blen, &numval)) {
|
} else if (start_num && !janet_scan_number(p->buf, blen, &numval)) {
|
||||||
ret = janet_wrap_number(numval);
|
ret = janet_wrap_number(numval);
|
||||||
@ -338,7 +344,7 @@ static int tokenchar(JanetParser *p, JanetParseState *state, uint8_t c) {
|
|||||||
ret = janet_wrap_false();
|
ret = janet_wrap_false();
|
||||||
} else if (!check_str_const("true", p->buf, blen)) {
|
} else if (!check_str_const("true", p->buf, blen)) {
|
||||||
ret = janet_wrap_true();
|
ret = janet_wrap_true();
|
||||||
} else if (p->buf) {
|
} else {
|
||||||
if (start_dig) {
|
if (start_dig) {
|
||||||
p->error = "symbol literal cannot start with a digit";
|
p->error = "symbol literal cannot start with a digit";
|
||||||
return 0;
|
return 0;
|
||||||
@ -351,9 +357,6 @@ static int tokenchar(JanetParser *p, JanetParseState *state, uint8_t c) {
|
|||||||
}
|
}
|
||||||
ret = janet_symbolv(p->buf, blen);
|
ret = janet_symbolv(p->buf, blen);
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
p->error = "empty symbol invalid";
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
p->bufcount = 0;
|
p->bufcount = 0;
|
||||||
popstate(p, ret);
|
popstate(p, ret);
|
||||||
|
@ -118,6 +118,16 @@
|
|||||||
(assert (deep= (parser/status p) (parser/status p2)) "parser 2")
|
(assert (deep= (parser/status p) (parser/status p2)) "parser 2")
|
||||||
(assert (deep= (parser/state p) (parser/state p2)) "parser 3")
|
(assert (deep= (parser/state p) (parser/state p2)) "parser 3")
|
||||||
|
|
||||||
|
# Parser errors
|
||||||
|
(defn parse-error [input]
|
||||||
|
(def p (parser/new))
|
||||||
|
(parser/consume p input)
|
||||||
|
(parser/error p))
|
||||||
|
|
||||||
|
# Invalid utf-8 sequences
|
||||||
|
(assert (not= nil (parse-error @"\xc3\x28")) "reject invalid utf-8 symbol")
|
||||||
|
(assert (not= nil (parse-error @":\xc3\x28")) "reject invalid utf-8 keyword")
|
||||||
|
|
||||||
# String check-set
|
# String check-set
|
||||||
(assert (string/check-set "abc" "a") "string/check-set 1")
|
(assert (string/check-set "abc" "a") "string/check-set 1")
|
||||||
(assert (not (string/check-set "abc" "z")) "string/check-set 2")
|
(assert (not (string/check-set "abc" "z")) "string/check-set 2")
|
||||||
|
Loading…
Reference in New Issue
Block a user