From 5c84f0f5d909b56f8e89e57990eb714340dd7df8 Mon Sep 17 00:00:00 2001 From: Calvin Rose Date: Fri, 28 Dec 2018 23:32:09 -0500 Subject: [PATCH] Work on number code for more expected behavior and better rounding. Still needs work and testing. --- src/core/multisym.c | 4 +- src/core/string.c | 2 +- src/core/strtod.c | 374 +++++++++++++++++++++++--------------- src/include/janet/janet.h | 1 - 4 files changed, 229 insertions(+), 152 deletions(-) diff --git a/src/core/multisym.c b/src/core/multisym.c index 604773bd..8a63fcbe 100644 --- a/src/core/multisym.c +++ b/src/core/multisym.c @@ -31,11 +31,11 @@ static JanetSlot multisym_parse_part(JanetCompiler *c, const uint8_t *sympart, i return janetc_cslot(janet_symbolv(sympart, len)); } else { int err = 0; - int32_t num = janet_scan_integer(sympart + 1, len - 1, &err); + double num = janet_scan_number(sympart + 1, len - 1, &err); if (err) { return janetc_resolve(c, janet_symbol(sympart + 1, len - 1)); } else { - return janetc_cslot(janet_wrap_integer(num)); + return janetc_cslot(janet_wrap_number(num)); } } } diff --git a/src/core/string.c b/src/core/string.c index 909414e7..063525c3 100644 --- a/src/core/string.c +++ b/src/core/string.c @@ -107,7 +107,7 @@ const uint8_t *janet_cstring(const char *str) { #define BUFSIZE 64 static int32_t number_to_string_impl(uint8_t *buf, double x) { - int count = snprintf((char *) buf, BUFSIZE, "%.17g", x); + int count = snprintf((char *) buf, BUFSIZE, "%g", x); return (int32_t) count; } diff --git a/src/core/strtod.c b/src/core/strtod.c index a4d59d6b..1397a7e9 100644 --- a/src/core/strtod.c +++ b/src/core/strtod.c @@ -51,6 +51,7 @@ #include #include +#include /* Lookup table for getting values of characters when parsing numbers. Handles * digits 0-9 and a-z (and A-Z). A-Z have values of 10 to 35. */ @@ -65,98 +66,217 @@ static uint8_t digit_lookup[128] = { 25,26,27,28,29,30,31,32,33,34,35,0xff,0xff,0xff,0xff,0xff }; +#define MANT_NBIT 27 +#define MANT_BASE 0x8000000 + +/* Allow for BigInt mantissa. Mant is a natural number. */ +struct Mant { + uint32_t first_digit; /* First digit so we don't need to allocate when not needed. */ + int32_t n; /* n digits */ + int32_t cap; /* allocated digit capacity */ + uint32_t *digits; /* Each digit is base (2 ^ 31). Digits are least significant first. */ +}; + +/* Print number */ +static void mant_debug(struct Mant *mant) { + printf("first_digit); + for (int i = 0; i < mant->n; i++) { + printf(" 0x%x", mant->digits[i]); + } + printf(">\n"); +} + +/* Allocate n more digits for mant. Return a pointer to these digits. */ +static uint32_t *mant_extra(struct Mant *mant, int32_t n) { + int32_t oldn = mant->n; + int32_t newn = oldn + n; + if (mant->cap < newn) { + int32_t newcap = 2 * newn; + uint32_t *mem = realloc(mant->digits, newcap * sizeof(uint32_t)); + if (NULL == mem) { + JANET_OUT_OF_MEMORY; + } + mant->cap = newcap; + mant->digits = mem; + } + mant->n = newn; + return mant->digits + oldn; +} + +/* Append a digit */ +static void mant_append(struct Mant *mant, uint32_t dig) { + mant_extra(mant, 1)[0] = dig; +} + +/* Add term to mant */ +static void mant_add(struct Mant *mant, uint32_t dig) { + int32_t i; + int carry = 0; + uint32_t next = mant->first_digit + dig; + if (next >= MANT_BASE) { + next -= MANT_BASE; + carry = 1; + } + mant->first_digit = next; + for (i = 0; i < mant->n; i++) { + if (!carry) return; + uint32_t next = mant->digits[i] + 1; + if (next >= MANT_BASE) { + next = 0; + } else { + carry = 0; + } + mant->digits[i] = next; + } + if (carry) mant_append(mant, 1); +} + +/* Multiply the mantissa mant by a factor */ +static void mant_mul(struct Mant *mant, uint32_t factor) { + int32_t i; + uint64_t carry = mant->first_digit * factor; + mant->first_digit = carry % MANT_BASE; + carry /= MANT_BASE; + for (i = 0; i < mant->n; i++) { + carry += mant->digits[i] * factor; + mant->digits[i] = carry % MANT_BASE; + carry /= MANT_BASE; + } + if (carry) mant_append(mant, carry); +} + +/* Divide the mantissa mant by a factor. Returns the remainder */ +static int32_t mant_div(struct Mant *mant, uint32_t divisor) { + int32_t i; + uint32_t quotient, remainder; + uint64_t dividend; + remainder = 0; + for (i = mant->n - 1; i >= 0; i--) { + dividend = ((uint64_t)remainder * MANT_BASE) + mant->digits[i]; + if (i < mant->n - 1) mant->digits[i + 1] = quotient; + quotient = dividend / divisor; + remainder = dividend % divisor; + mant->digits[i] = remainder; + } + dividend = ((uint64_t)remainder * MANT_BASE) + mant->first_digit; + if (mant->n) { + if (mant->digits[mant->n - 1] == 0) mant->n--; + } + quotient = dividend / divisor; + remainder = dividend % divisor; + mant->first_digit = quotient; + return remainder; +} + +/* Shift left by a multiple of MANT_NBIT */ +static void mant_lshift_n(struct Mant *mant, int n) { + if (!n) return; + int32_t oldn = mant->n; + mant_extra(mant, n); + memmove(mant->digits + n, mant->digits, sizeof(uint32_t) * oldn); + memset(mant->digits, 0, sizeof(uint32_t) * (n - 1)); + mant->digits[n - 1] = mant->first_digit; + mant->first_digit = 0; +} + +#ifdef __GNUC__ +#define clz(x) __builtin_clz(x) +#else +static int clz(uint32_t x) { + int n = 0; + if (x <= 0x0000ffff) n += 16, x <<= 16; + if (x <= 0x00ffffff) n += 8, x <<= 8; + if (x <= 0x0fffffff) n += 4, x <<= 4; + if (x <= 0x3fffffff) n += 2, x <<= 2; + if (x <= 0x7fffffff) n ++; + return n; +} +#endif + +/* Extract double value from mantissa */ +static double mant_extract(struct Mant *mant, int32_t exponent2) { + uint64_t top53; + int32_t n = mant->n; + /* Get most significant 52 bits from mant. Bit52 (0 indexed) should + * always be 1. */ + if (n) { + /* Two or more digits */ + uint64_t d1 = mant->digits[n - 1]; /* MSD (non-zero) */ + uint64_t d2 = (n == 1) ? mant->first_digit : mant->digits[n - 2]; + uint64_t d3 = (n > 2) ? mant->digits[n - 3] : (n == 2) ? mant->first_digit : 0; + int lz = clz(d1); + int nbits = 32 - lz; + top53 = (d2 << (53 - MANT_NBIT)) + (d3 >> (2 * MANT_NBIT - 53)); + top53 >>= nbits; + top53 |= (d1 << (53 - nbits)); + exponent2 += (nbits - 53) + MANT_NBIT * n; + } else { + /* One digit */ + top53 = mant->first_digit; + } + return ldexp((double) top53, exponent2); +} + /* Read in a mantissa and exponent of a certain base, and give - * back the double value. Should properly handle 0s, Inifinties, and + * back the double value. Should properly handle 0s, Infinities, and * denormalized numbers. (When the exponent values are too large) */ static double convert( int negative, - uint64_t mantissa, + struct Mant *mant, int32_t base, int32_t exponent) { int32_t exponent2 = 0; /* Short circuit zero and huge numbers */ - if (mantissa == 0) + if (mant->n == 0 && mant->first_digit == 0) return 0.0; if (exponent > 1022) return negative ? -INFINITY : INFINITY; - /* TODO add fast paths */ + /* Final value is X = mant * base ^ exponent * 2 ^ exponent2 + * Get exponent to zero while holding X constant. */ - /* Convert exponent on the base into exponent2, the power of - * 2 the will be used. Modify the mantissa as we convert. */ - if (exponent > 0) { - /* Make the mantissa large enough so no precision is lost */ - while (mantissa <= 0x03ffffffffffffffULL && exponent > 0) { - mantissa *= base; - exponent--; + /* Positive exponents are simple */ + while (exponent > 0) { + mant_mul(mant, base); + exponent--; + } + + /* Negative exponents are tricky - we don't want to loose bits + * from integer division, so we need to premultiply. */ + if (exponent < 0) { + mant_lshift_n(mant, 10 - exponent); + exponent2 -= (10 - exponent) * MANT_NBIT; + while (exponent < -3) { + mant_div(mant, (base * base) * (base * base)); + exponent += 4; } - while (exponent > 0) { - /* Allow 6 bits of room when multiplying. This is because - * the largest base is 36, which is 6 bits. The space of 6 should - * prevent overflow.*/ - mantissa >>= 1; - exponent2++; - if (mantissa <= 0x03ffffffffffffffULL) { - mantissa *= base; - exponent--; - } - } - } else { while (exponent < 0) { - mantissa <<= 1; - exponent2--; - /* Ensure that the last bit is set for minimum error - * before dividing by the base */ - if (mantissa > 0x7fffffffffffffffULL) { - mantissa /= base; - exponent++; - } + mant_div(mant, base); + exponent++; } } return negative - ? -ldexp((double) mantissa, exponent2) - : ldexp((double) mantissa, exponent2); + ? -mant_extract(mant, exponent2) + : mant_extract(mant, exponent2); } -/* Result of scanning a number source string. Will be further processed - * depending on the desired resultant type. */ -struct JanetScanRes { - uint64_t mant; - int32_t ex; - int error; - int base; - int seenpoint; - int foundexp; - int neg; -}; - -/* Get the mantissa and exponent of decimal number. The - * mantissa will be stored in a 64 bit unsigned integer (always positive). - * The exponent will be in a signed 32 bit integer. Will also check if - * the decimal point has been seen. Returns -1 if there is an invalid - * number. */ -static struct JanetScanRes janet_scan_impl( +/* Scan a real (double) from a string. If the string cannot be converted into + * and integer, set *err to 1 and return 0. */ +double janet_scan_number( const uint8_t *str, - int32_t len) { - - struct JanetScanRes res; + int32_t len, + int *err) { const uint8_t *end = str + len; - - /* Initialize flags */ int seenadigit = 0; - int gotradix = 0; - - /* Initialize result */ - res.mant = 0; - res.ex = 0; - res.error = 0; - res.base = 10; - res.seenpoint = 0; - res.foundexp = 0; - res.neg = 0; + struct Mant mant = {0}; + int ex = 0; + int base = 10; + int seenpoint = 0; + int foundexp = 0; + int neg = 0; /* Prevent some kinds of overflow bugs relating to the exponent * overflowing. For example, if a string was passed 2GB worth of 0s after @@ -168,18 +288,31 @@ static struct JanetScanRes janet_scan_impl( /* Get sign */ if (str >= end) goto error; if (*str == '-') { - res.neg = 1; + neg = 1; str++; } else if (*str == '+') { str++; } + /* Check for leading 0x or digit digit r */ + if (str + 1 < end && str[0] == '0' && str[1] == 'x') { + base = 16; + str += 2; + } else if (str + 2 < end && + str[0] >= '0' && str[0] <= '9' && + str[1] >= '0' && str[1] <= '9' && + str[2] == 'r') { + base = 10 * (str[0] - '0') + (str[1] - '0'); + if (base < 2 || base > 36) goto error; + str += 3; + } + /* Skip leading zeros */ while (str < end && (*str == '0' || *str == '.')) { - if (res.seenpoint) res.ex--; + if (seenpoint) ex--; if (*str == '.') { - if (res.seenpoint) goto error; - res.seenpoint = 1; + if (seenpoint) goto error; + seenpoint = 1; } seenadigit = 1; str++; @@ -188,37 +321,21 @@ static struct JanetScanRes janet_scan_impl( /* Parse significant digits */ while (str < end) { if (*str == '.') { - if (res.seenpoint) goto error; - res.seenpoint = 1; + if (seenpoint) goto error; + seenpoint = 1; } else if (*str == '&') { - res.foundexp = 1; + foundexp = 1; break; - } else if (res.base == 10 && (*str == 'E' || *str == 'e')) { - res.foundexp = 1; + } else if (base == 10 && (*str == 'E' || *str == 'e')) { + foundexp = 1; break; - } else if (!gotradix && (*str == 'x' || *str == 'X')) { - if (!seenadigit) goto error; - if (res.seenpoint || res.mant > 0) goto error; - res.base = 16; - res.mant = 0; - seenadigit = 0; - gotradix = 1; - } else if (!gotradix && (*str == 'r' || *str == 'R')) { - if (res.seenpoint) goto error; - if (res.mant < 2 || res.mant > 36) goto error; - res.base = (int) res.mant; - res.mant = 0; - seenadigit = 0; - gotradix = 1; } else if (*str != '_') { /* underscores are ignored - can be used for separator */ int digit = digit_lookup[*str & 0x7F]; - if (*str > 127 || digit >= res.base) goto error; - if (res.seenpoint) res.ex--; - if (res.mant > 0x00ffffffffffffff) - res.ex++; - else - res.mant = res.base * res.mant + digit; + if (*str > 127 || digit >= base) goto error; + if (seenpoint) ex--; + mant_mul(&mant, base); + mant_add(&mant, digit); seenadigit = 1; } str++; @@ -228,7 +345,7 @@ static struct JanetScanRes janet_scan_impl( goto error; /* Read exponent */ - if (str < end && res.foundexp) { + if (str < end && foundexp) { int eneg = 0; int ee = 0; seenadigit = 0; @@ -241,72 +358,33 @@ static struct JanetScanRes janet_scan_impl( str++; } /* Skip leading 0s in exponent */ - while (str < end && *str == '0') str++; + while (str < end && *str == '0') { + str++; + seenadigit = 1; + } while (str < end && ee < (INT32_MAX / 40)) { int digit = digit_lookup[*str & 0x7F]; if (*str == '_') { str++; continue; } - if (*str > 127 || digit >= res.base) goto error; - ee = res.base * ee + digit; + if (*str > 127 || digit >= base) goto error; + ee = base * ee + digit; str++; seenadigit = 1; } - if (eneg) res.ex -= ee; else res.ex += ee; + if (eneg) ex -= ee; else ex += ee; } if (!seenadigit) goto error; - return res; + double result = convert(neg, &mant, base, ex); + free(mant.digits); + return result; error: - res.error = 1; - return res; -} - -/* Scan an integer from a string. If the string cannot be converted into - * and integer, set *err to 1 and return 0. */ -int32_t janet_scan_integer( - const uint8_t *str, - int32_t len, - int *err) { - struct JanetScanRes res = janet_scan_impl(str, len); - int64_t i64; - if (res.error) goto error; - if (res.seenpoint) goto error; - if (res.ex < 0) goto error; - i64 = res.neg ? -(int64_t)res.mant : (int64_t)res.mant; - while (res.ex > 0) { - i64 *= res.base; - if (i64 > INT32_MAX || i64 < INT32_MIN) goto error; - res.ex--; - } - if (i64 > INT32_MAX || i64 < INT32_MIN) goto error; - if (NULL != err) - *err = 0; - return (int32_t) i64; - error: - if (NULL != err) - *err = 1; - return 0; -} - -/* Scan a real (double) from a string. If the string cannot be converted into - * and integer, set *err to 1 and return 0. */ -double janet_scan_number( - const uint8_t *str, - int32_t len, - int *err) { - struct JanetScanRes res = janet_scan_impl(str, len); - if (res.error) { - if (NULL != err) - *err = 1; - return 0.0; - } else { - if (NULL != err) - *err = 0; - } - return convert(res.neg, res.mant, res.base, res.ex); + *err = 1; + free(mant.digits); + return 0.0; } diff --git a/src/include/janet/janet.h b/src/include/janet/janet.h index 388bbbb6..cf3c893e 100644 --- a/src/include/janet/janet.h +++ b/src/include/janet/janet.h @@ -915,7 +915,6 @@ JANET_API int janet_dobytes(JanetTable *env, const uint8_t *bytes, int32_t len, JANET_API int janet_dostring(JanetTable *env, const char *str, const char *sourcePath, Janet *out); /* Number scanning */ -JANET_API int32_t janet_scan_integer(const uint8_t *str, int32_t len, int *err); JANET_API double janet_scan_number(const uint8_t *str, int32_t len, int *err); /* Debugging */