/* * Copyright (c) 2018 Calvin Rose * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ /* Use a custom double parser instead of libc's strtod for better portability * and control. Also, uses a less strict rounding method than ieee to not incur * the cost of 4000 loc and dependence on arbitary precision arithmetic. There * is no plan to use arbitrary precision arithmetic for parsing numbers, and a * formal rounding mode has yet to be chosen (round towards 0 seems * reasonable). * * This version has been modified for much greater flexibility in parsing, such * as choosing the radix, supporting integer output, and returning Janets * directly. * * Numbers are of the form [-+]R[rR]I.F[eE&][-+]X where R is the radix, I is * the integer part, F is the fractional part, and X is the exponent. All * signs, radix, decimal point, fractional part, and exponent can be ommited. * The number will be considered and integer if the there is no decimal point * and no exponent. Any number greater the 2^32-1 or less than -(2^32) will be * coerced to a double. If there is an error, the function janet_scan_number will * return a janet nil. The radix is assumed to be 10 if omitted, and the E * separator for the exponent can only be used when the radix is 10. This is * because E is a vaid digit in bases 15 or greater. For bases greater than 10, * the letters are used as digitis. A through Z correspond to the digits 10 * through 35, and the lowercase letters have the same values. The radix number * is always in base 10. For example, a hexidecimal number could be written * '16rdeadbeef'. janet_scan_number also supports some c style syntax for * hexidecimal literals. The previous number could also be written * '0xdeadbeef'. Note that in this case, the number will actually be a double * as it will not fit in the range for a signed 32 bit integer. The string * '0xbeef' would parse to an integer as it is in the range of an int32_t. */ #include #include #include /* Lookup table for getting values of characters when parsing numbers. Handles * digits 0-9 and a-z (and A-Z). A-Z have values of 10 to 35. */ static uint8_t digit_lookup[128] = { 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, 0,1,2,3,4,5,6,7,8,9,0xff,0xff,0xff,0xff,0xff,0xff, 0xff,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24, 25,26,27,28,29,30,31,32,33,34,35,0xff,0xff,0xff,0xff,0xff, 0xff,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24, 25,26,27,28,29,30,31,32,33,34,35,0xff,0xff,0xff,0xff,0xff }; #define MANT_NBIT 27 #define MANT_BASE 0x8000000 /* Allow for BigInt mantissa. Mant is a natural number. */ struct Mant { uint32_t first_digit; /* First digit so we don't need to allocate when not needed. */ int32_t n; /* n digits */ int32_t cap; /* allocated digit capacity */ uint32_t *digits; /* Each digit is base (2 ^ 31). Digits are least significant first. */ }; /* Allocate n more digits for mant. Return a pointer to these digits. */ static uint32_t *mant_extra(struct Mant *mant, int32_t n) { int32_t oldn = mant->n; int32_t newn = oldn + n; if (mant->cap < newn) { int32_t newcap = 2 * newn; uint32_t *mem = realloc(mant->digits, newcap * sizeof(uint32_t)); if (NULL == mem) { JANET_OUT_OF_MEMORY; } mant->cap = newcap; mant->digits = mem; } mant->n = newn; return mant->digits + oldn; } /* Append a digit */ static void mant_append(struct Mant *mant, uint32_t dig) { mant_extra(mant, 1)[0] = dig; } /* Add term to mant */ static void mant_add(struct Mant *mant, uint32_t dig) { int32_t i; int carry = 0; uint32_t next = mant->first_digit + dig; if (next >= MANT_BASE) { next -= MANT_BASE; carry = 1; } mant->first_digit = next; for (i = 0; i < mant->n; i++) { if (!carry) return; uint32_t next = mant->digits[i] + 1; if (next >= MANT_BASE) { next = 0; } else { carry = 0; } mant->digits[i] = next; } if (carry) mant_append(mant, 1); } /* Multiply the mantissa mant by a factor */ static void mant_mul(struct Mant *mant, uint32_t factor) { int32_t i; uint64_t carry = mant->first_digit * factor; mant->first_digit = carry % MANT_BASE; carry /= MANT_BASE; for (i = 0; i < mant->n; i++) { carry += mant->digits[i] * factor; mant->digits[i] = carry % MANT_BASE; carry /= MANT_BASE; } if (carry) mant_append(mant, carry); } /* Divide the mantissa mant by a factor. Returns the remainder */ static int32_t mant_div(struct Mant *mant, uint32_t divisor) { int32_t i; uint32_t quotient, remainder; uint64_t dividend; remainder = 0; for (i = mant->n - 1; i >= 0; i--) { dividend = ((uint64_t)remainder * MANT_BASE) + mant->digits[i]; if (i < mant->n - 1) mant->digits[i + 1] = quotient; quotient = dividend / divisor; remainder = dividend % divisor; mant->digits[i] = remainder; } dividend = ((uint64_t)remainder * MANT_BASE) + mant->first_digit; if (mant->n) { if (mant->digits[mant->n - 1] == 0) mant->n--; } quotient = dividend / divisor; remainder = dividend % divisor; mant->first_digit = quotient; return remainder; } /* Shift left by a multiple of MANT_NBIT */ static void mant_lshift_n(struct Mant *mant, int n) { if (!n) return; int32_t oldn = mant->n; mant_extra(mant, n); memmove(mant->digits + n, mant->digits, sizeof(uint32_t) * oldn); memset(mant->digits, 0, sizeof(uint32_t) * (n - 1)); mant->digits[n - 1] = mant->first_digit; mant->first_digit = 0; } #ifdef __GNUC__ #define clz(x) __builtin_clz(x) #else static int clz(uint32_t x) { int n = 0; if (x <= 0x0000ffff) n += 16, x <<= 16; if (x <= 0x00ffffff) n += 8, x <<= 8; if (x <= 0x0fffffff) n += 4, x <<= 4; if (x <= 0x3fffffff) n += 2, x <<= 2; if (x <= 0x7fffffff) n ++; return n; } #endif /* Extract double value from mantissa */ static double mant_extract(struct Mant *mant, int32_t exponent2) { uint64_t top53; int32_t n = mant->n; /* Get most significant 52 bits from mant. Bit52 (0 indexed) should * always be 1. */ if (n) { /* Two or more digits */ uint64_t d1 = mant->digits[n - 1]; /* MSD (non-zero) */ uint64_t d2 = (n == 1) ? mant->first_digit : mant->digits[n - 2]; uint64_t d3 = (n > 2) ? mant->digits[n - 3] : (n == 2) ? mant->first_digit : 0; int lz = clz(d1); int nbits = 32 - lz; top53 = (d2 << (54 - MANT_NBIT)) + (d3 >> (2 * MANT_NBIT - 54)); top53 >>= nbits; top53 |= (d1 << (54 - nbits)); if (top53 & 1) top53++; top53 >>= 1; if (top53 > 0x1FffffFFFFffffUL) { top53 >>= 1; exponent2++; } exponent2 += (nbits - 53) + MANT_NBIT * n; } else { /* One digit */ top53 = mant->first_digit; } return ldexp(top53, exponent2); } /* Read in a mantissa and exponent of a certain base, and give * back the double value. Should properly handle 0s, Infinities, and * denormalized numbers. (When the exponent values are too large) */ static double convert( int negative, struct Mant *mant, int32_t base, int32_t exponent) { int32_t exponent2 = 0; /* Short circuit zero and huge numbers */ if (mant->n == 0 && mant->first_digit == 0) return 0.0; if (exponent > 1023) return negative ? -INFINITY : INFINITY; /* Final value is X = mant * base ^ exponent * 2 ^ exponent2 * Get exponent to zero while holding X constant. */ /* Positive exponents are simple */ while (exponent > 0) { mant_mul(mant, base); exponent--; } /* Negative exponents are tricky - we don't want to loose bits * from integer division, so we need to premultiply. */ if (exponent < 0) { mant_lshift_n(mant, 20 - exponent); exponent2 -= (20 - exponent) * MANT_NBIT; while (exponent < 0) { mant_div(mant, base); exponent++; } } return negative ? -mant_extract(mant, exponent2) : mant_extract(mant, exponent2); } /* Scan a real (double) from a string. If the string cannot be converted into * and integer, set *err to 1 and return 0. */ double janet_scan_number( const uint8_t *str, int32_t len, int *err) { const uint8_t *end = str + len; int seenadigit = 0; struct Mant mant = {0}; int ex = 0; int base = 10; int seenpoint = 0; int foundexp = 0; int neg = 0; /* Prevent some kinds of overflow bugs relating to the exponent * overflowing. For example, if a string was passed 2GB worth of 0s after * the decimal point, exponent could wrap around and become positive. It's * easier to reject ridiculously large inputs than to check for overflows. * */ if (len > INT32_MAX / 40) goto error; /* Get sign */ if (str >= end) goto error; if (*str == '-') { neg = 1; str++; } else if (*str == '+') { str++; } /* Check for leading 0x or digit digit r */ if (str + 1 < end && str[0] == '0' && str[1] == 'x') { base = 16; str += 2; } else if (str + 2 < end && str[0] >= '0' && str[0] <= '9' && str[1] >= '0' && str[1] <= '9' && str[2] == 'r') { base = 10 * (str[0] - '0') + (str[1] - '0'); if (base < 2 || base > 36) goto error; str += 3; } /* Skip leading zeros */ while (str < end && (*str == '0' || *str == '.')) { if (seenpoint) ex--; if (*str == '.') { if (seenpoint) goto error; seenpoint = 1; } seenadigit = 1; str++; } /* Parse significant digits */ while (str < end) { if (*str == '.') { if (seenpoint) goto error; seenpoint = 1; } else if (*str == '&') { foundexp = 1; break; } else if (base == 10 && (*str == 'E' || *str == 'e')) { foundexp = 1; break; } else if (*str != '_') { /* underscores are ignored - can be used for separator */ int digit = digit_lookup[*str & 0x7F]; if (*str > 127 || digit >= base) goto error; if (seenpoint) ex--; mant_mul(&mant, base); mant_add(&mant, digit); seenadigit = 1; } str++; } if (!seenadigit) goto error; /* Read exponent */ if (str < end && foundexp) { int eneg = 0; int ee = 0; seenadigit = 0; str++; if (str >= end) goto error; if (*str == '-') { eneg = 1; str++; } else if (*str == '+') { str++; } /* Skip leading 0s in exponent */ while (str < end && *str == '0') { str++; seenadigit = 1; } while (str < end && ee < (INT32_MAX / 40)) { int digit = digit_lookup[*str & 0x7F]; if (*str == '_') { str++; continue; } if (*str > 127 || digit >= base) goto error; ee = base * ee + digit; str++; seenadigit = 1; } if (eneg) ex -= ee; else ex += ee; } if (!seenadigit) goto error; double result = convert(neg, &mant, base, ex); free(mant.digits); *err = 0; return result; error: *err = 1; free(mant.digits); return 0.0; }