1
0
mirror of https://github.com/janet-lang/janet synced 2025-01-22 13:16:52 +00:00

Add integer parsing to pegs.

This commit is contained in:
Calvin Rose 2020-09-27 12:18:12 -05:00
parent 0a1d902f46
commit 45feb55483
7 changed files with 122 additions and 7 deletions

View File

@ -1,6 +1,9 @@
# Changelog
All notable changes to this project will be documented in this file.
## Unreleased - ???
- Add integer parsing forms to pegs. This makes parsing many binary protocols easier.
## 1.12.2 - 2020-09-20
- Add janet\_try and janet\_restore to C API.
- Fix `os/execute` regression on windows.

View File

@ -20,7 +20,7 @@
project('janet', 'c',
default_options : ['c_std=c99', 'b_lundef=false', 'default_library=both'],
version : '1.12.2')
version : '1.13.0')
# Global settings
janet_path = join_paths(get_option('prefix'), get_option('libdir'), 'janet')

View File

@ -27,10 +27,10 @@
#define JANETCONF_H
#define JANET_VERSION_MAJOR 1
#define JANET_VERSION_MINOR 12
#define JANET_VERSION_PATCH 2
#define JANET_VERSION_EXTRA ""
#define JANET_VERSION "1.12.2"
#define JANET_VERSION_MINOR 13
#define JANET_VERSION_PATCH 0
#define JANET_VERSION_EXTRA "-dev"
#define JANET_VERSION "1.13.0-dev"
/* #define JANET_BUILD "local" */

View File

@ -87,6 +87,12 @@ static void pushcap(PegState *s, Janet capture, uint32_t tag) {
}
}
/* Convert a uint64_t to a int64_t by wrapping to a maximum number of bytes */
static int64_t peg_convert_u64_s64(uint64_t from, int width) {
int shift = 8 * (8 - width);
return ((int64_t)(from << shift)) >> shift;
}
/* Prevent stack overflow */
#define down1(s) do { \
if (0 == --((s)->depth)) janet_panic("peg/match recursed too deeply"); \
@ -469,6 +475,47 @@ tail:
return next_text;
}
case RULE_READINT: {
uint32_t tag = rule[2];
uint32_t signedness = rule[1] & 0x10;
uint32_t endianess = rule[1] & 0x20;
int width = (int)(rule[1] & 0xF);
if (text + width > s->text_end) return NULL;
uint64_t accum = 0;
if (endianess) {
/* BE */
for (int i = 0; i < width; i++) accum = (accum << 8) | text[i];
} else {
/* LE */
for (int i = width - 1; i >= 0; i--) accum = (accum << 8) | text[i];
}
Janet capture_value;
/* We can only parse integeres of greater than 6 bytes reliable if int-types are enabled.
* Otherwise, we may lose precision, so 6 is the maximum size when int-types are disabled. */
#ifdef JANET_INT_TYPES
if (width > 6) {
if (signedness) {
capture_value = janet_wrap_s64(peg_convert_u64_s64(accum, width));
} else {
capture_value = janet_wrap_u64(accum);
}
} else
#endif
{
double double_value;
if (signedness) {
double_value = (double)(peg_convert_u64_s64(accum, width));
} else {
double_value = (double)accum;
}
capture_value = janet_wrap_number(double_value);
}
pushcap(s, capture_value, tag);
return text + width;
}
}
}
@ -876,6 +923,36 @@ static void spec_matchtime(Builder *b, int32_t argc, const Janet *argv) {
emit_3(r, RULE_MATCHTIME, subrule, cindex, tag);
}
#ifdef JANET_INT_TYPES
#define JANET_MAX_READINT_WIDTH 8
#else
#define JANET_MAX_READINT_WIDTH 6
#endif
static void spec_readint(Builder *b, int32_t argc, const Janet *argv, uint32_t mask) {
peg_arity(b, argc, 1, 2);
Reserve r = reserve(b, 3);
uint32_t tag = (argc == 2) ? emit_tag(b, argv[3]) : 0;
int32_t width = peg_getnat(b, argv[0]);
if ((width < 0) || (width > JANET_MAX_READINT_WIDTH)) {
peg_panicf(b, "width must be between 0 and %d, got %d", JANET_MAX_READINT_WIDTH, width);
}
emit_2(r, RULE_READINT, mask | ((uint32_t) width), tag);
}
static void spec_uint_le(Builder *b, int32_t argc, const Janet *argv) {
spec_readint(b, argc, argv, 0x0u);
}
static void spec_int_le(Builder *b, int32_t argc, const Janet *argv) {
spec_readint(b, argc, argv, 0x10u);
}
static void spec_uint_be(Builder *b, int32_t argc, const Janet *argv) {
spec_readint(b, argc, argv, 0x20u);
}
static void spec_int_be(Builder *b, int32_t argc, const Janet *argv) {
spec_readint(b, argc, argv, 0x30u);
}
/* Special compiler form */
typedef void (*Special)(Builder *b, int32_t argc, const Janet *argv);
typedef struct {
@ -912,6 +989,8 @@ static const SpecialPair peg_specials[] = {
{"group", spec_group},
{"if", spec_if},
{"if-not", spec_ifnot},
{"int", spec_int_le},
{"int-be", spec_int_be},
{"lenprefix", spec_lenprefix},
{"look", spec_look},
{"not", spec_not},
@ -926,6 +1005,8 @@ static const SpecialPair peg_specials[] = {
{"some", spec_some},
{"thru", spec_thru},
{"to", spec_to},
{"uint", spec_uint_le},
{"uint-be", spec_uint_be},
};
/* Compile a janet value into a rule and return the rule index. */
@ -1226,6 +1307,11 @@ static void *peg_unmarshal(JanetMarshalContext *ctx) {
op_flags[rule[1]] |= 0x01;
i += 2;
break;
case RULE_READINT:
/* [ width | (endianess << 5) | (signedness << 6), tag ] */
if (rule[1] > JANET_MAX_READINT_WIDTH) goto bad;
i += 3;
break;
default:
goto bad;
}

View File

@ -1652,6 +1652,7 @@ typedef enum {
RULE_TO, /* [rule] */
RULE_THRU, /* [rule] */
RULE_LENPREFIX, /* [rule_a, rule_b (repeat rule_b rule_a times)] */
RULE_READINT, /* [(signedness << 4) | (endianess << 5) | bytewidth, tag] */
} JanetPegOpcode;
typedef struct {

View File

@ -11,9 +11,12 @@
(default e "assert error")
(++ num-tests-run)
(when x (++ num-tests-passed))
(def str (string e))
(def truncated
(if (> (length e) 40) (string (string/slice e 0 35) "...") (string e)))
(if x
(xprintf stdout "\e[32m✔\e[0m %s: %v" (string e) x)
(xprintf stdout "\n\e[31m✘\e[0m %s: %v" (string e) x))
(xprintf stdout "\e[32m✔\e[0m %s: %v" truncated x)
(xprintf stdout "\n\e[31m✘\e[0m %s: %v" truncated x))
x)
(defmacro assert-error

View File

@ -443,4 +443,26 @@
(check-match redef-b "aabeef" false)
(check-match redef-b "aaaaaa" false)
# Integer parsing
(check-deep '(int 1) "a" @[(chr "a")])
(check-deep '(uint 1) "a" @[(chr "a")])
(check-deep '(int-be 1) "a" @[(chr "a")])
(check-deep '(uint-be 1) "a" @[(chr "a")])
(check-deep '(int 1) "\xFF" @[-1])
(check-deep '(uint 1) "\xFF" @[255])
(check-deep '(int-be 1) "\xFF" @[-1])
(check-deep '(uint-be 1) "\xFF" @[255])
(check-deep '(int 2) "\xFF\x7f" @[0x7fff])
(check-deep '(int-be 2) "\x7f\xff" @[0x7fff])
(check-deep '(uint 2) "\xff\x7f" @[0x7fff])
(check-deep '(uint-be 2) "\x7f\xff" @[0x7fff])
(check-deep '(uint-be 2) "\x7f\xff" @[0x7fff])
(check-deep '(uint 8) "\xff\x7f\x00\x00\x00\x00\x00\x00" @[(int/u64 0x7fff)])
(check-deep '(int 8) "\xff\x7f\x00\x00\x00\x00\x00\x00" @[(int/s64 0x7fff)])
(check-deep '(uint 7) "\xff\x7f\x00\x00\x00\x00\x00" @[(int/u64 0x7fff)])
(check-deep '(int 7) "\xff\x7f\x00\x00\x00\x00\x00" @[(int/s64 0x7fff)])
(check-deep '(* (int 2) -1) "123" nil)
(end-suite)