mirror of
https://github.com/janet-lang/janet
synced 2024-11-28 11:09:54 +00:00
Add bytecode verification for peg unmarshaling.
This commit is contained in:
parent
a5e50a0f65
commit
e88a9af2f6
127
src/core/peg.c
127
src/core/peg.c
@ -994,10 +994,129 @@ static void peg_unmarshal(void *p, JanetMarshalContext *ctx) {
|
|||||||
for (uint32_t j = 0; j < peg->num_constants; j++)
|
for (uint32_t j = 0; j < peg->num_constants; j++)
|
||||||
constants[j] = janet_unmarshal_janet(ctx);
|
constants[j] = janet_unmarshal_janet(ctx);
|
||||||
|
|
||||||
/* TODO - verify peg bytecode. This is basically iterating
|
/* After here, no panics except for the bad: label. */
|
||||||
* the bytecode and making sure instructions don't reference
|
|
||||||
* memory outside the bytecode array. Otherwise, all programs
|
/* Keep track at each index if an instruction was
|
||||||
* should be valid.*/
|
* reference (0x01) or is in a main bytecode position
|
||||||
|
* (0x02). This lets us do a linear scan and not
|
||||||
|
* need to a depth first traversal. It is stricter
|
||||||
|
* than a dfs by not allowing certain kinds of unused
|
||||||
|
* bytecode. */
|
||||||
|
uint32_t blen = peg->bytecode_len;
|
||||||
|
uint32_t clen = peg->num_constants;
|
||||||
|
uint8_t *op_flags = calloc(1, blen);
|
||||||
|
if (NULL == op_flags) {
|
||||||
|
JANET_OUT_OF_MEMORY;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* verify peg bytecode */
|
||||||
|
uint32_t i = 0;
|
||||||
|
while (i < blen) {
|
||||||
|
uint32_t instr = bytecode[i];
|
||||||
|
uint32_t *rule = bytecode + i;
|
||||||
|
op_flags[i] |= 0x02;
|
||||||
|
switch (instr & 0x1F) {
|
||||||
|
case RULE_LITERAL:
|
||||||
|
i += 2 + ((rule[1] + 3) >> 2);
|
||||||
|
break;
|
||||||
|
case RULE_NCHAR:
|
||||||
|
case RULE_NOTNCHAR:
|
||||||
|
case RULE_RANGE:
|
||||||
|
case RULE_POSITION:
|
||||||
|
/* [1 word] */
|
||||||
|
i += 2;
|
||||||
|
break;
|
||||||
|
case RULE_SET:
|
||||||
|
/* [8 words] */
|
||||||
|
i += 9;
|
||||||
|
break;
|
||||||
|
case RULE_LOOK:
|
||||||
|
/* [offset, rule] */
|
||||||
|
if (rule[2] >= blen) goto bad;
|
||||||
|
op_flags[rule[2]] |= 0x1;
|
||||||
|
i += 3;
|
||||||
|
break;
|
||||||
|
case RULE_CHOICE:
|
||||||
|
case RULE_SEQUENCE:
|
||||||
|
/* [len, rules...] */
|
||||||
|
{
|
||||||
|
uint32_t len = rule[1];
|
||||||
|
for (uint32_t j = 0; j < len; j++) {
|
||||||
|
if (rule[2 + j] >= blen) goto bad;
|
||||||
|
op_flags[rule[2 + j]] |= 0x1;
|
||||||
|
}
|
||||||
|
i += 2 + len;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case RULE_IF:
|
||||||
|
case RULE_IFNOT:
|
||||||
|
/* [rule_a, rule_b (b if not a)] */
|
||||||
|
if (rule[1] >= blen) goto bad;
|
||||||
|
if (rule[2] >= blen) goto bad;
|
||||||
|
op_flags[rule[1]] |= 0x01;
|
||||||
|
op_flags[rule[2]] |= 0x01;
|
||||||
|
i += 3;
|
||||||
|
break;
|
||||||
|
case RULE_BETWEEN:
|
||||||
|
/* [lo, hi, rule] */
|
||||||
|
if (rule[3] >= blen) goto bad;
|
||||||
|
op_flags[rule[3]] |= 0x01;
|
||||||
|
i += 4;
|
||||||
|
break;
|
||||||
|
case RULE_ARGUMENT:
|
||||||
|
case RULE_GETTAG:
|
||||||
|
/* [searchtag, tag] */
|
||||||
|
i += 3;
|
||||||
|
break;
|
||||||
|
case RULE_CONSTANT:
|
||||||
|
/* [constant, tag] */
|
||||||
|
if (rule[1] >= clen) goto bad;
|
||||||
|
i += 3;
|
||||||
|
break;
|
||||||
|
case RULE_ACCUMULATE:
|
||||||
|
case RULE_GROUP:
|
||||||
|
case RULE_CAPTURE:
|
||||||
|
/* [rule, tag] */
|
||||||
|
if (rule[1] >= blen) goto bad;
|
||||||
|
op_flags[rule[1]] |= 0x01;
|
||||||
|
i += 3;
|
||||||
|
break;
|
||||||
|
case RULE_REPLACE:
|
||||||
|
case RULE_MATCHTIME:
|
||||||
|
/* [rule, constant, tag] */
|
||||||
|
if (rule[1] >= blen) goto bad;
|
||||||
|
if (rule[2] >= clen) goto bad;
|
||||||
|
op_flags[rule[1]] |= 0x01;
|
||||||
|
i += 2;
|
||||||
|
break;
|
||||||
|
case RULE_ERROR:
|
||||||
|
case RULE_DROP:
|
||||||
|
case RULE_NOT:
|
||||||
|
/* [rule] */
|
||||||
|
if (rule[1] >= blen) goto bad;
|
||||||
|
op_flags[rule[1]] |= 0x01;
|
||||||
|
i += 2;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
goto bad;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* last instruction cannot overflow */
|
||||||
|
if (i != blen) goto bad;
|
||||||
|
|
||||||
|
/* Make sure all referenced instructions are actually
|
||||||
|
* in instruction positions. */
|
||||||
|
for (i = 0; i < blen; i++)
|
||||||
|
if (op_flags[i] == 0x01) goto bad;
|
||||||
|
|
||||||
|
/* Good return */
|
||||||
|
free(op_flags);
|
||||||
|
return;
|
||||||
|
|
||||||
|
bad:
|
||||||
|
free(op_flags);
|
||||||
|
janet_panic("invalid peg bytecode");
|
||||||
}
|
}
|
||||||
|
|
||||||
static const JanetAbstractType peg_type = {
|
static const JanetAbstractType peg_type = {
|
||||||
|
@ -131,4 +131,21 @@
|
|||||||
(assert (peg/match p "abcdefg") "peg marshal 2")
|
(assert (peg/match p "abcdefg") "peg marshal 2")
|
||||||
(assert (not (peg/match p "zabcdefg")) "peg marshal 3")
|
(assert (not (peg/match p "zabcdefg")) "peg marshal 3")
|
||||||
|
|
||||||
|
# This should be valgrind clean.
|
||||||
|
(var pegi 3)
|
||||||
|
(defn marshpeg [p]
|
||||||
|
(assert (-> p peg/compile marshal unmarshal) (string "peg marshal " (++ pegi))))
|
||||||
|
(marshpeg '(* 1 2 (set "abcd") "asdasd" (+ "." 3)))
|
||||||
|
(marshpeg '(% (* (+ 1 2 3) (* "drop" "bear") '"hi")))
|
||||||
|
(marshpeg '(> 123 "abcd"))
|
||||||
|
(marshpeg '{:main (* 1 "hello" :main)})
|
||||||
|
(marshpeg '(range "AZ"))
|
||||||
|
(marshpeg '(if-not "abcdf" 123))
|
||||||
|
(marshpeg '(error ($)))
|
||||||
|
(marshpeg '(* "abcd" (constant :hi)))
|
||||||
|
(marshpeg ~(/ "abc" ,identity))
|
||||||
|
(marshpeg '(if-not "abcdf" 123))
|
||||||
|
(marshpeg ~(cmt "abcdf" ,identity))
|
||||||
|
(marshpeg '(group "abc"))
|
||||||
|
|
||||||
(end-suite)
|
(end-suite)
|
||||||
|
Loading…
Reference in New Issue
Block a user