/* * Copyright (c) 2021 Calvin Rose * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ /* Use a custom double parser instead of libc's strtod for better portability * and control. * * This version has been modified for much greater flexibility in parsing, such * as choosing the radix and supporting scientific notation with any radix. * * Numbers are of the form [-+]R[rR]I.F[eE&][-+]X in pseudo-regex form, where R * is the radix, I is the integer part, F is the fractional part, and X is the * exponent. All signs, radix, decimal point, fractional part, and exponent can * be omitted. The radix is assumed to be 10 if omitted, and the E or e * separator for the exponent can only be used when the radix is 10. This is * because E is a valid digit in bases 15 or greater. For bases greater than * 10, the letters are used as digits. A through Z correspond to the digits 10 * through 35, and the lowercase letters have the same values. The radix number * is always in base 10. For example, a hexidecimal number could be written * '16rdeadbeef'. janet_scan_number also supports some c style syntax for * hexidecimal literals. The previous number could also be written * '0xdeadbeef'. */ #ifndef JANET_AMALG #include "features.h" #include #include "util.h" #endif #include #include /* Lookup table for getting values of characters when parsing numbers. Handles * digits 0-9 and a-z (and A-Z). A-Z have values of 10 to 35. */ static uint8_t digit_lookup[128] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 0xff, 0xff, 0xff, 0xff, 0xff }; #define BIGNAT_NBIT 31 #define BIGNAT_BASE 0x80000000U /* Allow for large mantissa. BigNat is a natural number. */ struct BigNat { uint32_t first_digit; /* First digit so we don't need to allocate when not needed. */ int32_t n; /* n digits */ int32_t cap; /* allocated digit capacity */ uint32_t *digits; /* Each digit is base (2 ^ 31). Digits are least significant first. */ }; /* Initialize a bignat to 0 */ static void bignat_zero(struct BigNat *x) { x->first_digit = 0; x->n = 0; x->cap = 0; x->digits = NULL; } /* Allocate n more digits for mant. Return a pointer to these digits. */ static uint32_t *bignat_extra(struct BigNat *mant, int32_t n) { int32_t oldn = mant->n; int32_t newn = oldn + n; if (mant->cap < newn) { int32_t newcap = 2 * newn; uint32_t *mem = janet_realloc(mant->digits, (size_t) newcap * sizeof(uint32_t)); if (NULL == mem) { JANET_OUT_OF_MEMORY; } mant->cap = newcap; mant->digits = mem; } mant->n = newn; return mant->digits + oldn; } /* Append a digit */ static void bignat_append(struct BigNat *mant, uint32_t dig) { bignat_extra(mant, 1)[0] = dig; } /* Multiply the mantissa mant by a factor and the add a term * in one operation. factor will be between 2 and 36^4, * term will be between 0 and 36. */ static void bignat_muladd(struct BigNat *mant, uint32_t factor, uint32_t term) { int32_t i; uint64_t carry = ((uint64_t) mant->first_digit) * factor + term; mant->first_digit = carry % BIGNAT_BASE; carry /= BIGNAT_BASE; for (i = 0; i < mant->n; i++) { carry += ((uint64_t) mant->digits[i]) * factor; mant->digits[i] = carry % BIGNAT_BASE; carry /= BIGNAT_BASE; } if (carry) bignat_append(mant, (uint32_t) carry); } /* Divide the mantissa mant by a factor. Drop the remainder. */ static void bignat_div(struct BigNat *mant, uint32_t divisor) { int32_t i; uint32_t quotient, remainder; uint64_t dividend; remainder = 0, quotient = 0; for (i = mant->n - 1; i >= 0; i--) { dividend = ((uint64_t)remainder * BIGNAT_BASE) + mant->digits[i]; if (i < mant->n - 1) mant->digits[i + 1] = quotient; quotient = (uint32_t)(dividend / divisor); remainder = (uint32_t)(dividend % divisor); mant->digits[i] = remainder; } dividend = ((uint64_t)remainder * BIGNAT_BASE) + mant->first_digit; if (mant->n && mant->digits[mant->n - 1] == 0) mant->n--; mant->first_digit = (uint32_t)(dividend / divisor); } /* Shift left by a multiple of BIGNAT_NBIT */ static void bignat_lshift_n(struct BigNat *mant, int n) { if (!n) return; int32_t oldn = mant->n; bignat_extra(mant, n); memmove(mant->digits + n, mant->digits, sizeof(uint32_t) * oldn); memset(mant->digits, 0, sizeof(uint32_t) * (n - 1)); mant->digits[n - 1] = mant->first_digit; mant->first_digit = 0; } #ifdef __GNUC__ #define clz(x) __builtin_clz(x) #else static int clz(uint32_t x) { int n = 0; if (x <= 0x0000ffff) n += 16, x <<= 16; if (x <= 0x00ffffff) n += 8, x <<= 8; if (x <= 0x0fffffff) n += 4, x <<= 4; if (x <= 0x3fffffff) n += 2, x <<= 2; if (x <= 0x7fffffff) n ++; return n; } #endif /* Extract double value from mantissa */ static double bignat_extract(struct BigNat *mant, int32_t exponent2) { uint64_t top53; int32_t n = mant->n; /* Get most significant 53 bits from mant. Bit 52 (0 indexed) should * always be 1. This is essentially a large right shift on mant.*/ if (n) { /* Two or more digits */ uint64_t d1 = mant->digits[n - 1]; /* MSD (non-zero) */ uint64_t d2 = (n == 1) ? mant->first_digit : mant->digits[n - 2]; uint64_t d3 = (n > 2) ? mant->digits[n - 3] : (n == 2) ? mant->first_digit : 0; int lz = clz((uint32_t) d1); int nbits = 32 - lz; /* First get 54 bits */ top53 = (d2 << (54 - BIGNAT_NBIT)) + (d3 >> (2 * BIGNAT_NBIT - 54)); top53 >>= nbits; top53 |= (d1 << (54 - nbits)); /* Rounding based on lowest bit of 54 */ if (top53 & 1) top53++; top53 >>= 1; if (top53 > 0x1FffffFFFFffffUL) { top53 >>= 1; exponent2++; } /* Correct exponent - to correct for large right shift to mantissa. */ exponent2 += (nbits - 53) + BIGNAT_NBIT * n; } else { /* One digit */ top53 = mant->first_digit; } return ldexp((double)top53, exponent2); } /* Read in a mantissa and exponent of a certain base, and give * back the double value. Should properly handle 0s, infinities, and * denormalized numbers. (When the exponent values are too large or small) */ static double convert( int negative, struct BigNat *mant, int32_t base, int32_t exponent) { int32_t exponent2 = 0; /* Approximate exponent in base 2 of mant and exponent. This should get us a good estimate of the final size of the * number, within * 2^32 or so. */ int64_t mant_exp2_approx = mant->n * 32 + 16; int64_t exp_exp2_approx = (int64_t)(floor(log2(base) * exponent)); int64_t exp2_approx = mant_exp2_approx + exp_exp2_approx; /* Short circuit zero, huge, and small numbers. We use the exponent range of valid IEEE754 doubles (-1022, 1023) * with a healthy buffer to allow for inaccuracies in the approximation and denormailzed numbers. */ if (mant->n == 0 && mant->first_digit == 0) return negative ? -0.0 : 0.0; if (exp2_approx > 1176) return negative ? -INFINITY : INFINITY; if (exp2_approx < -1175) return negative ? -0.0 : 0.0; /* Final value is X = mant * base ^ exponent * 2 ^ exponent2 * Get exponent to zero while holding X constant. */ /* Positive exponents are simple */ for (; exponent > 3; exponent -= 4) bignat_muladd(mant, base * base * base * base, 0); for (; exponent > 1; exponent -= 2) bignat_muladd(mant, base * base, 0); for (; exponent > 0; exponent -= 1) bignat_muladd(mant, base, 0); /* Negative exponents are tricky - we don't want to loose bits * from integer division, so we need to premultiply. */ if (exponent < 0) { int32_t shamt = 5 - exponent / 4; bignat_lshift_n(mant, shamt); exponent2 -= shamt * BIGNAT_NBIT; for (; exponent < -3; exponent += 4) bignat_div(mant, base * base * base * base); for (; exponent < -1; exponent += 2) bignat_div(mant, base * base); for (; exponent < 0; exponent += 1) bignat_div(mant, base); } return negative ? -bignat_extract(mant, exponent2) : bignat_extract(mant, exponent2); } /* Scan a real (double) from a string. If the string cannot be converted into * and integer, return 0. */ int janet_scan_number_base( const uint8_t *str, int32_t len, int32_t base, double *out) { const uint8_t *end = str + len; int seenadigit = 0; int ex = 0; int seenpoint = 0; int foundexp = 0; int neg = 0; struct BigNat mant; bignat_zero(&mant); /* Prevent some kinds of overflow bugs relating to the exponent * overflowing. For example, if a string was passed 2GB worth of 0s after * the decimal point, exponent could wrap around and become positive. It's * easier to reject ridiculously large inputs than to check for overflows. * */ if (len > INT32_MAX / 40) goto error; /* Get sign */ if (str >= end) goto error; if (*str == '-') { neg = 1; str++; } else if (*str == '+') { str++; } /* Check for leading 0x or digit digit r */ if (base == 0) { if (str + 1 < end && str[0] == '0' && str[1] == 'x') { base = 16; str += 2; } else if (str + 1 < end && str[0] >= '0' && str[0] <= '9' && str[1] == 'r') { base = str[0] - '0'; str += 2; } else if (str + 2 < end && str[0] >= '0' && str[0] <= '9' && str[1] >= '0' && str[1] <= '9' && str[2] == 'r') { base = 10 * (str[0] - '0') + (str[1] - '0'); if (base < 2 || base > 36) goto error; str += 3; } } /* If still base is 0, set to default (10) */ if (base == 0) { base = 10; } /* Skip leading zeros */ while (str < end && (*str == '0' || *str == '.')) { if (seenpoint) ex--; if (*str == '.') { if (seenpoint) goto error; seenpoint = 1; } else { seenadigit = 1; } str++; } /* Parse significant digits */ while (str < end) { if (*str == '.') { if (seenpoint) goto error; seenpoint = 1; } else if (*str == '&') { foundexp = 1; break; } else if (base == 10 && (*str == 'E' || *str == 'e')) { foundexp = 1; break; } else if (*str == '_') { if (!seenadigit) goto error; } else { int digit = digit_lookup[*str & 0x7F]; if (*str > 127 || digit >= base) goto error; if (seenpoint) ex--; bignat_muladd(&mant, base, digit); seenadigit = 1; } str++; } if (!seenadigit) goto error; /* Read exponent */ if (str < end && foundexp) { int eneg = 0; int32_t ee = 0; seenadigit = 0; str++; if (str >= end) goto error; if (*str == '-') { eneg = 1; str++; } else if (*str == '+') { str++; } /* Skip leading 0s in exponent */ while (str < end && *str == '0') { str++; seenadigit = 1; } while (str < end) { int digit = digit_lookup[*str & 0x7F]; if (*str > 127 || digit >= base) goto error; if (ee < (INT32_MAX / 40)) { ee = base * ee + digit; } str++; seenadigit = 1; } if (eneg) ex -= ee; else ex += ee; } if (!seenadigit) goto error; *out = convert(neg, &mant, base, ex); janet_free(mant.digits); return 0; error: janet_free(mant.digits); return 1; } int janet_scan_number( const uint8_t *str, int32_t len, double *out) { return janet_scan_number_base(str, len, 0, out); } #ifdef JANET_INT_TYPES static int scan_uint64( const uint8_t *str, int32_t len, uint64_t *out, int *neg) { const uint8_t *end = str + len; int seenadigit = 0; int base = 10; *neg = 0; *out = 0; uint64_t accum = 0; /* len max is INT64_MAX in base 2 with _ between each bits */ /* '2r' + 64 bits + 63 _ + sign = 130 => 150 for some leading */ /* zeros */ if (len > 150) return 0; /* Get sign */ if (str >= end) return 0; if (*str == '-') { *neg = 1; str++; } else if (*str == '+') { str++; } /* Check for leading 0x or digit digit r */ if (str + 1 < end && str[0] == '0' && str[1] == 'x') { base = 16; str += 2; } else if (str + 1 < end && str[0] >= '0' && str[0] <= '9' && str[1] == 'r') { base = str[0] - '0'; str += 2; } else if (str + 2 < end && str[0] >= '0' && str[0] <= '9' && str[1] >= '0' && str[1] <= '9' && str[2] == 'r') { base = 10 * (str[0] - '0') + (str[1] - '0'); if (base < 2 || base > 36) return 0; str += 3; } /* Skip leading zeros */ while (str < end && *str == '0') { seenadigit = 1; str++; } /* Parse significant digits */ while (str < end) { if (*str == '_') { if (!seenadigit) return 0; } else { int digit = digit_lookup[*str & 0x7F]; if (*str > 127 || digit >= base) return 0; if (accum > (UINT64_MAX - digit) / base) return 0; accum = accum * base + digit; seenadigit = 1; } str++; } if (!seenadigit) return 0; *out = accum; return 1; } int janet_scan_int64(const uint8_t *str, int32_t len, int64_t *out) { int neg; uint64_t bi; if (scan_uint64(str, len, &bi, &neg)) { if (neg && bi <= ((UINT64_MAX / 2) + 1)) { if (bi > INT64_MAX) { *out = INT64_MIN; } else { *out = -((int64_t) bi); } return 1; } if (!neg && bi <= INT64_MAX) { *out = (int64_t) bi; return 1; } } return 0; } int janet_scan_uint64(const uint8_t *str, int32_t len, uint64_t *out) { int neg; uint64_t bi; if (scan_uint64(str, len, &bi, &neg)) { if (!neg) { *out = bi; return 1; } } return 0; } #endif