1
0
mirror of https://github.com/janet-lang/janet synced 2025-10-25 20:57:40 +00:00

Add bytecode verification for peg unmarshaling.

This commit is contained in:
Calvin Rose
2019-06-18 13:00:23 -04:00
parent a5e50a0f65
commit e88a9af2f6
2 changed files with 140 additions and 4 deletions

View File

@@ -994,10 +994,129 @@ static void peg_unmarshal(void *p, JanetMarshalContext *ctx) {
for (uint32_t j = 0; j < peg->num_constants; j++) for (uint32_t j = 0; j < peg->num_constants; j++)
constants[j] = janet_unmarshal_janet(ctx); constants[j] = janet_unmarshal_janet(ctx);
/* TODO - verify peg bytecode. This is basically iterating /* After here, no panics except for the bad: label. */
* the bytecode and making sure instructions don't reference
* memory outside the bytecode array. Otherwise, all programs /* Keep track at each index if an instruction was
* should be valid.*/ * reference (0x01) or is in a main bytecode position
* (0x02). This lets us do a linear scan and not
* need to a depth first traversal. It is stricter
* than a dfs by not allowing certain kinds of unused
* bytecode. */
uint32_t blen = peg->bytecode_len;
uint32_t clen = peg->num_constants;
uint8_t *op_flags = calloc(1, blen);
if (NULL == op_flags) {
JANET_OUT_OF_MEMORY;
}
/* verify peg bytecode */
uint32_t i = 0;
while (i < blen) {
uint32_t instr = bytecode[i];
uint32_t *rule = bytecode + i;
op_flags[i] |= 0x02;
switch (instr & 0x1F) {
case RULE_LITERAL:
i += 2 + ((rule[1] + 3) >> 2);
break;
case RULE_NCHAR:
case RULE_NOTNCHAR:
case RULE_RANGE:
case RULE_POSITION:
/* [1 word] */
i += 2;
break;
case RULE_SET:
/* [8 words] */
i += 9;
break;
case RULE_LOOK:
/* [offset, rule] */
if (rule[2] >= blen) goto bad;
op_flags[rule[2]] |= 0x1;
i += 3;
break;
case RULE_CHOICE:
case RULE_SEQUENCE:
/* [len, rules...] */
{
uint32_t len = rule[1];
for (uint32_t j = 0; j < len; j++) {
if (rule[2 + j] >= blen) goto bad;
op_flags[rule[2 + j]] |= 0x1;
}
i += 2 + len;
}
break;
case RULE_IF:
case RULE_IFNOT:
/* [rule_a, rule_b (b if not a)] */
if (rule[1] >= blen) goto bad;
if (rule[2] >= blen) goto bad;
op_flags[rule[1]] |= 0x01;
op_flags[rule[2]] |= 0x01;
i += 3;
break;
case RULE_BETWEEN:
/* [lo, hi, rule] */
if (rule[3] >= blen) goto bad;
op_flags[rule[3]] |= 0x01;
i += 4;
break;
case RULE_ARGUMENT:
case RULE_GETTAG:
/* [searchtag, tag] */
i += 3;
break;
case RULE_CONSTANT:
/* [constant, tag] */
if (rule[1] >= clen) goto bad;
i += 3;
break;
case RULE_ACCUMULATE:
case RULE_GROUP:
case RULE_CAPTURE:
/* [rule, tag] */
if (rule[1] >= blen) goto bad;
op_flags[rule[1]] |= 0x01;
i += 3;
break;
case RULE_REPLACE:
case RULE_MATCHTIME:
/* [rule, constant, tag] */
if (rule[1] >= blen) goto bad;
if (rule[2] >= clen) goto bad;
op_flags[rule[1]] |= 0x01;
i += 2;
break;
case RULE_ERROR:
case RULE_DROP:
case RULE_NOT:
/* [rule] */
if (rule[1] >= blen) goto bad;
op_flags[rule[1]] |= 0x01;
i += 2;
break;
default:
goto bad;
}
}
/* last instruction cannot overflow */
if (i != blen) goto bad;
/* Make sure all referenced instructions are actually
* in instruction positions. */
for (i = 0; i < blen; i++)
if (op_flags[i] == 0x01) goto bad;
/* Good return */
free(op_flags);
return;
bad:
free(op_flags);
janet_panic("invalid peg bytecode");
} }
static const JanetAbstractType peg_type = { static const JanetAbstractType peg_type = {

View File

@@ -131,4 +131,21 @@
(assert (peg/match p "abcdefg") "peg marshal 2") (assert (peg/match p "abcdefg") "peg marshal 2")
(assert (not (peg/match p "zabcdefg")) "peg marshal 3") (assert (not (peg/match p "zabcdefg")) "peg marshal 3")
# This should be valgrind clean.
(var pegi 3)
(defn marshpeg [p]
(assert (-> p peg/compile marshal unmarshal) (string "peg marshal " (++ pegi))))
(marshpeg '(* 1 2 (set "abcd") "asdasd" (+ "." 3)))
(marshpeg '(% (* (+ 1 2 3) (* "drop" "bear") '"hi")))
(marshpeg '(> 123 "abcd"))
(marshpeg '{:main (* 1 "hello" :main)})
(marshpeg '(range "AZ"))
(marshpeg '(if-not "abcdf" 123))
(marshpeg '(error ($)))
(marshpeg '(* "abcd" (constant :hi)))
(marshpeg ~(/ "abc" ,identity))
(marshpeg '(if-not "abcdf" 123))
(marshpeg ~(cmt "abcdf" ,identity))
(marshpeg '(group "abc"))
(end-suite) (end-suite)