mirror of
				https://github.com/janet-lang/janet
				synced 2025-11-04 09:33:02 +00:00 
			
		
		
		
	Add integer parsing to pegs.
This commit is contained in:
		@@ -1,6 +1,9 @@
 | 
			
		||||
# Changelog
 | 
			
		||||
All notable changes to this project will be documented in this file.
 | 
			
		||||
 | 
			
		||||
## Unreleased - ???
 | 
			
		||||
- Add integer parsing forms to pegs. This makes parsing many binary protocols easier.
 | 
			
		||||
 | 
			
		||||
## 1.12.2 - 2020-09-20
 | 
			
		||||
- Add janet\_try and janet\_restore to C API.
 | 
			
		||||
- Fix `os/execute` regression on windows.
 | 
			
		||||
 
 | 
			
		||||
@@ -20,7 +20,7 @@
 | 
			
		||||
 | 
			
		||||
project('janet', 'c',
 | 
			
		||||
  default_options : ['c_std=c99', 'b_lundef=false', 'default_library=both'],
 | 
			
		||||
  version : '1.12.2')
 | 
			
		||||
  version : '1.13.0')
 | 
			
		||||
 | 
			
		||||
# Global settings
 | 
			
		||||
janet_path = join_paths(get_option('prefix'), get_option('libdir'), 'janet')
 | 
			
		||||
 
 | 
			
		||||
@@ -27,10 +27,10 @@
 | 
			
		||||
#define JANETCONF_H
 | 
			
		||||
 | 
			
		||||
#define JANET_VERSION_MAJOR 1
 | 
			
		||||
#define JANET_VERSION_MINOR 12
 | 
			
		||||
#define JANET_VERSION_PATCH 2
 | 
			
		||||
#define JANET_VERSION_EXTRA ""
 | 
			
		||||
#define JANET_VERSION "1.12.2"
 | 
			
		||||
#define JANET_VERSION_MINOR 13
 | 
			
		||||
#define JANET_VERSION_PATCH 0
 | 
			
		||||
#define JANET_VERSION_EXTRA "-dev"
 | 
			
		||||
#define JANET_VERSION "1.13.0-dev"
 | 
			
		||||
 | 
			
		||||
/* #define JANET_BUILD "local" */
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -87,6 +87,12 @@ static void pushcap(PegState *s, Janet capture, uint32_t tag) {
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* Convert a uint64_t to a int64_t by wrapping to a maximum number of bytes */
 | 
			
		||||
static int64_t peg_convert_u64_s64(uint64_t from, int width) {
 | 
			
		||||
    int shift = 8 * (8 - width);
 | 
			
		||||
    return ((int64_t)(from << shift)) >> shift;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* Prevent stack overflow */
 | 
			
		||||
#define down1(s) do { \
 | 
			
		||||
    if (0 == --((s)->depth)) janet_panic("peg/match recursed too deeply"); \
 | 
			
		||||
@@ -469,6 +475,47 @@ tail:
 | 
			
		||||
            return next_text;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        case RULE_READINT: {
 | 
			
		||||
            uint32_t tag = rule[2];
 | 
			
		||||
            uint32_t signedness = rule[1] & 0x10;
 | 
			
		||||
            uint32_t endianess = rule[1] & 0x20;
 | 
			
		||||
            int width = (int)(rule[1] & 0xF);
 | 
			
		||||
            if (text + width > s->text_end) return NULL;
 | 
			
		||||
            uint64_t accum = 0;
 | 
			
		||||
            if (endianess) {
 | 
			
		||||
                /* BE */
 | 
			
		||||
                for (int i = 0; i < width; i++) accum = (accum << 8) | text[i];
 | 
			
		||||
            } else {
 | 
			
		||||
                /* LE */
 | 
			
		||||
                for (int i = width - 1; i >= 0; i--) accum = (accum << 8) | text[i];
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            Janet capture_value;
 | 
			
		||||
            /* We can only parse integeres of greater than 6 bytes reliable if int-types are enabled.
 | 
			
		||||
             * Otherwise, we may lose precision, so 6 is the maximum size when int-types are disabled. */
 | 
			
		||||
#ifdef JANET_INT_TYPES
 | 
			
		||||
            if (width > 6) {
 | 
			
		||||
                if (signedness) {
 | 
			
		||||
                    capture_value = janet_wrap_s64(peg_convert_u64_s64(accum, width));
 | 
			
		||||
                } else {
 | 
			
		||||
                    capture_value = janet_wrap_u64(accum);
 | 
			
		||||
                }
 | 
			
		||||
            } else
 | 
			
		||||
#endif
 | 
			
		||||
            {
 | 
			
		||||
                double double_value;
 | 
			
		||||
                if (signedness) {
 | 
			
		||||
                    double_value = (double)(peg_convert_u64_s64(accum, width));
 | 
			
		||||
                } else {
 | 
			
		||||
                    double_value = (double)accum;
 | 
			
		||||
                }
 | 
			
		||||
                capture_value = janet_wrap_number(double_value);
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            pushcap(s, capture_value, tag);
 | 
			
		||||
            return text + width;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@@ -876,6 +923,36 @@ static void spec_matchtime(Builder *b, int32_t argc, const Janet *argv) {
 | 
			
		||||
    emit_3(r, RULE_MATCHTIME, subrule, cindex, tag);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#ifdef JANET_INT_TYPES
 | 
			
		||||
#define JANET_MAX_READINT_WIDTH 8
 | 
			
		||||
#else
 | 
			
		||||
#define JANET_MAX_READINT_WIDTH 6
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
static void spec_readint(Builder *b, int32_t argc, const Janet *argv, uint32_t mask) {
 | 
			
		||||
    peg_arity(b, argc, 1, 2);
 | 
			
		||||
    Reserve r = reserve(b, 3);
 | 
			
		||||
    uint32_t tag = (argc == 2) ? emit_tag(b, argv[3]) : 0;
 | 
			
		||||
    int32_t width = peg_getnat(b, argv[0]);
 | 
			
		||||
    if ((width < 0) || (width > JANET_MAX_READINT_WIDTH)) {
 | 
			
		||||
        peg_panicf(b, "width must be between 0 and %d, got %d", JANET_MAX_READINT_WIDTH, width);
 | 
			
		||||
    }
 | 
			
		||||
    emit_2(r, RULE_READINT, mask | ((uint32_t) width), tag);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void spec_uint_le(Builder *b, int32_t argc, const Janet *argv) {
 | 
			
		||||
    spec_readint(b, argc, argv, 0x0u);
 | 
			
		||||
}
 | 
			
		||||
static void spec_int_le(Builder *b, int32_t argc, const Janet *argv) {
 | 
			
		||||
    spec_readint(b, argc, argv, 0x10u);
 | 
			
		||||
}
 | 
			
		||||
static void spec_uint_be(Builder *b, int32_t argc, const Janet *argv) {
 | 
			
		||||
    spec_readint(b, argc, argv, 0x20u);
 | 
			
		||||
}
 | 
			
		||||
static void spec_int_be(Builder *b, int32_t argc, const Janet *argv) {
 | 
			
		||||
    spec_readint(b, argc, argv, 0x30u);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* Special compiler form */
 | 
			
		||||
typedef void (*Special)(Builder *b, int32_t argc, const Janet *argv);
 | 
			
		||||
typedef struct {
 | 
			
		||||
@@ -912,6 +989,8 @@ static const SpecialPair peg_specials[] = {
 | 
			
		||||
    {"group", spec_group},
 | 
			
		||||
    {"if", spec_if},
 | 
			
		||||
    {"if-not", spec_ifnot},
 | 
			
		||||
    {"int", spec_int_le},
 | 
			
		||||
    {"int-be", spec_int_be},
 | 
			
		||||
    {"lenprefix", spec_lenprefix},
 | 
			
		||||
    {"look", spec_look},
 | 
			
		||||
    {"not", spec_not},
 | 
			
		||||
@@ -926,6 +1005,8 @@ static const SpecialPair peg_specials[] = {
 | 
			
		||||
    {"some", spec_some},
 | 
			
		||||
    {"thru", spec_thru},
 | 
			
		||||
    {"to", spec_to},
 | 
			
		||||
    {"uint", spec_uint_le},
 | 
			
		||||
    {"uint-be", spec_uint_be},
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/* Compile a janet value into a rule and return the rule index. */
 | 
			
		||||
@@ -1226,6 +1307,11 @@ static void *peg_unmarshal(JanetMarshalContext *ctx) {
 | 
			
		||||
                op_flags[rule[1]] |= 0x01;
 | 
			
		||||
                i += 2;
 | 
			
		||||
                break;
 | 
			
		||||
            case RULE_READINT:
 | 
			
		||||
                /* [ width | (endianess << 5) | (signedness << 6), tag ] */
 | 
			
		||||
                if (rule[1] > JANET_MAX_READINT_WIDTH) goto bad;
 | 
			
		||||
                i += 3;
 | 
			
		||||
                break;
 | 
			
		||||
            default:
 | 
			
		||||
                goto bad;
 | 
			
		||||
        }
 | 
			
		||||
 
 | 
			
		||||
@@ -1652,6 +1652,7 @@ typedef enum {
 | 
			
		||||
    RULE_TO,           /* [rule] */
 | 
			
		||||
    RULE_THRU,         /* [rule] */
 | 
			
		||||
    RULE_LENPREFIX,    /* [rule_a, rule_b (repeat rule_b rule_a times)] */
 | 
			
		||||
    RULE_READINT,      /* [(signedness << 4) | (endianess << 5) | bytewidth, tag] */
 | 
			
		||||
} JanetPegOpcode;
 | 
			
		||||
 | 
			
		||||
typedef struct {
 | 
			
		||||
 
 | 
			
		||||
@@ -11,9 +11,12 @@
 | 
			
		||||
  (default e "assert error")
 | 
			
		||||
  (++ num-tests-run)
 | 
			
		||||
  (when x (++ num-tests-passed))
 | 
			
		||||
  (def str (string e))
 | 
			
		||||
  (def truncated
 | 
			
		||||
    (if (> (length e) 40) (string (string/slice e 0 35) "...") (string e)))
 | 
			
		||||
  (if x
 | 
			
		||||
    (xprintf stdout "\e[32m✔\e[0m %s: %v" (string e) x)
 | 
			
		||||
    (xprintf stdout "\n\e[31m✘\e[0m %s: %v" (string e) x))
 | 
			
		||||
    (xprintf stdout "\e[32m✔\e[0m %s: %v" truncated x)
 | 
			
		||||
    (xprintf stdout "\n\e[31m✘\e[0m %s: %v" truncated x))
 | 
			
		||||
  x)
 | 
			
		||||
 | 
			
		||||
(defmacro assert-error
 | 
			
		||||
 
 | 
			
		||||
@@ -443,4 +443,26 @@
 | 
			
		||||
(check-match redef-b "aabeef" false)
 | 
			
		||||
(check-match redef-b "aaaaaa" false)
 | 
			
		||||
 | 
			
		||||
# Integer parsing
 | 
			
		||||
 | 
			
		||||
(check-deep '(int 1) "a" @[(chr "a")])
 | 
			
		||||
(check-deep '(uint 1) "a" @[(chr "a")])
 | 
			
		||||
(check-deep '(int-be 1) "a" @[(chr "a")])
 | 
			
		||||
(check-deep '(uint-be 1) "a" @[(chr "a")])
 | 
			
		||||
(check-deep '(int 1) "\xFF" @[-1])
 | 
			
		||||
(check-deep '(uint 1) "\xFF" @[255])
 | 
			
		||||
(check-deep '(int-be 1) "\xFF" @[-1])
 | 
			
		||||
(check-deep '(uint-be 1) "\xFF" @[255])
 | 
			
		||||
(check-deep '(int 2) "\xFF\x7f" @[0x7fff])
 | 
			
		||||
(check-deep '(int-be 2) "\x7f\xff" @[0x7fff])
 | 
			
		||||
(check-deep '(uint 2) "\xff\x7f" @[0x7fff])
 | 
			
		||||
(check-deep '(uint-be 2) "\x7f\xff" @[0x7fff])
 | 
			
		||||
(check-deep '(uint-be 2) "\x7f\xff" @[0x7fff])
 | 
			
		||||
(check-deep '(uint 8) "\xff\x7f\x00\x00\x00\x00\x00\x00" @[(int/u64 0x7fff)])
 | 
			
		||||
(check-deep '(int 8) "\xff\x7f\x00\x00\x00\x00\x00\x00" @[(int/s64 0x7fff)])
 | 
			
		||||
(check-deep '(uint 7) "\xff\x7f\x00\x00\x00\x00\x00" @[(int/u64 0x7fff)])
 | 
			
		||||
(check-deep '(int 7) "\xff\x7f\x00\x00\x00\x00\x00" @[(int/s64 0x7fff)])
 | 
			
		||||
 | 
			
		||||
(check-deep '(* (int 2) -1) "123" nil)
 | 
			
		||||
 | 
			
		||||
(end-suite)
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user