From d84cc5342e2250a4767aa028ebb2ec8b9547fdb6 Mon Sep 17 00:00:00 2001 From: Calvin Rose Date: Mon, 27 Nov 2017 14:03:34 -0500 Subject: [PATCH] Fix write after free bug. Remove caching from strings, tuples, and structs. Keyword style strings removed, now are just symbols. The compiler can decide to treat symbols with a leading ':' differently for mostly the same effect. This was done because as strings are no longer interned, symbols are cheaper to look up and check for equality. --- .gitignore | 1 + Makefile | 6 +- core/asm.c | 15 +-- core/cache.c | 246 ----------------------------------- core/fiber.c | 4 +- core/gc.c | 25 ++-- core/parse.c | 16 +-- core/string.c | 131 +++++++------------ core/struct.c | 56 +++++++- core/symcache.c | 244 ++++++++++++++++++++++++++++++++++ core/{cache.h => symcache.h} | 17 +-- core/syscalls.c | 2 +- core/tuple.c | 50 ++++++- core/util.c | 24 ++++ core/value.c | 74 +++-------- core/vm.c | 21 +-- dsts/minimal.dsts | 1 + include/dst/dst.h | 29 ++++- unittests/buffer_test.c | 3 +- unittests/nanbox_test.c | 4 +- 20 files changed, 492 insertions(+), 477 deletions(-) delete mode 100644 core/cache.c create mode 100644 core/symcache.c rename core/{cache.h => symcache.h} (78%) diff --git a/.gitignore b/.gitignore index 4c8ee751..e315eb64 100644 --- a/.gitignore +++ b/.gitignore @@ -16,6 +16,7 @@ tags # Valgrind files vgcore.* +core.* # Created by https://www.gitignore.io/api/c diff --git a/Makefile b/Makefile index 5871dacd..f4311981 100644 --- a/Makefile +++ b/Makefile @@ -31,7 +31,7 @@ PREFIX=/usr/local DST_TARGET=dst DST_XXD=xxd DEBUGGER=lldb -DST_INTERNAL_HEADERS=$(addprefix core/,cache.h opcodes.h) +DST_INTERNAL_HEADERS=$(addprefix core/,symcache.h opcodes.h) DST_HEADERS=$(addprefix include/dst/,dst.h) ############################# @@ -59,8 +59,8 @@ $(DST_XXD): libs/xxd.c ################################### DST_CORE_SOURCES=$(addprefix core/,\ - array.c asm.c buffer.c cache.c fiber.c func.c gc.c parse.c string.c\ - struct.c syscalls.c table.c tuple.c userdata.c util.c\ + array.c asm.c buffer.c fiber.c func.c gc.c parse.c string.c\ + struct.c symcache.c syscalls.c table.c tuple.c userdata.c util.c\ value.c vm.c wrap.c) DST_CORE_OBJECTS=$(patsubst %.c,%.o,$(DST_CORE_SOURCES)) diff --git a/core/asm.c b/core/asm.c index 4975e3c3..6dee2b67 100644 --- a/core/asm.c +++ b/core/asm.c @@ -533,11 +533,11 @@ static DstAssembleResult dst_asm1(DstAssembler *parent, DstAssembleOptions opts) dst_asm_assert(&a, opts.source.type == DST_STRUCT, "expected struct for assembly source"); /* Set function arity */ - x = dst_struct_get(st, dst_wrap_symbol(dst_cstring("arity"))); + x = dst_struct_get(st, dst_csymbolv("arity")); def->arity = x.type == DST_INTEGER ? x.as.integer : 0; /* Create slot aliases */ - x = dst_struct_get(st, dst_wrap_symbol(dst_cstring("slots"))); + x = dst_struct_get(st, dst_csymbolv("slots")); if (dst_seq_view(x, &arr, &count)) { for (i = 0; i < count; i++) { DstValue v = arr[i]; @@ -557,7 +557,7 @@ static DstAssembleResult dst_asm1(DstAssembler *parent, DstAssembleOptions opts) } /* Create environment aliases */ - x = dst_struct_get(st, dst_wrap_symbol(dst_cstring("environments"))); + x = dst_struct_get(st, dst_csymbolv("environments")); if (dst_seq_view(x, &arr, &count)) { for (i = 0; i < count; i++) { dst_asm_assert(&a, arr[i].type == DST_SYMBOL, "environment must be a symbol"); @@ -568,7 +568,7 @@ static DstAssembleResult dst_asm1(DstAssembler *parent, DstAssembleOptions opts) } /* Parse constants */ - x = dst_struct_get(st, dst_wrap_symbol(dst_cstring("constants"))); + x = dst_struct_get(st, dst_csymbolv("constants")); if (dst_seq_view(x, &arr, &count)) { def->constants_length = count; def->constants = malloc(sizeof(DstValue) * count); @@ -603,14 +603,13 @@ static DstAssembleResult dst_asm1(DstAssembler *parent, DstAssembleOptions opts) } /* Parse bytecode and labels */ - x = dst_struct_get(st, dst_wrap_symbol(dst_cstring("bytecode"))); + x = dst_struct_get(st, dst_csymbolv("bytecode")); if (dst_seq_view(x, &arr, &count)) { /* Do labels and find length */ uint32_t blength = 0; for (i = 0; i < count; ++i) { DstValue instr = arr[i]; - if (instr.type == DST_STRING) { - instr.type = DST_SYMBOL; + if (instr.type == DST_SYMBOL) { dst_table_put(&a.labels, instr, dst_wrap_integer(blength)); } else if (instr.type == DST_TUPLE) { blength++; @@ -627,7 +626,7 @@ static DstAssembleResult dst_asm1(DstAssembler *parent, DstAssembleOptions opts) /* Do bytecode */ for (i = 0; i < count; ++i) { DstValue instr = arr[i]; - if (instr.type == DST_STRING) { + if (instr.type == DST_SYMBOL) { continue; } else { uint32_t op; diff --git a/core/cache.c b/core/cache.c deleted file mode 100644 index bce065d4..00000000 --- a/core/cache.c +++ /dev/null @@ -1,246 +0,0 @@ -/* -* Copyright (c) 2017 Calvin Rose -* -* Permission is hereby granted, free of charge, to any person obtaining a copy -* of this software and associated documentation files (the "Software"), to -* deal in the Software without restriction, including without limitation the -* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -* sell copies of the Software, and to permit persons to whom the Software is -* furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be included in -* all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -* IN THE SOFTWARE. -*/ - -#include -#include "cache.h" - -/* All immutable values are cached in a global hash table. When an immutable - * value is created, this hashtable is checked to see if the value exists. If it - * does, return the cached copy instead. This trades creation time and memory for - * fast equality, which is especially useful for symbols and strings. This may not - * be useful for structs and tuples, in which case it may be removed. However, in cases - * where ther are many copies of the same tuple in the program, this approach may - * save memory. Values are removed from the cache when they are garbage collected. - */ - -/* Cache state */ -DstValue *dst_vm_cache = NULL; -uint32_t dst_vm_cache_capacity; -uint32_t dst_vm_cache_count; -uint32_t dst_vm_cache_deleted; - -/* Check if two not necesarrily finalized immutable values - * are equal. Does caching logic */ -static int dst_cache_equal(DstValue x, DstValue y) { - uint32_t i, len; - if (x.type != y.type) return 0; - switch (x.type) { - /* Don't bother implementing equality checks for all types. We only care - * about immutable data structures */ - default: - return 0; - case DST_STRING: - if (dst_string_hash(x.as.string) != dst_string_hash(y.as.string)) return 0; - if (dst_string_length(x.as.string) != dst_string_length(y.as.string)) return 0; - len = dst_string_length(x.as.string); - for (i = 0; i < len; ++i) - if (x.as.string[i] != y.as.string[i]) - return 0; - return 1; - case DST_STRUCT: - if (dst_struct_hash(x.as.st) != dst_struct_hash(y.as.st)) return 0; - if (dst_struct_length(x.as.st) != dst_struct_length(y.as.st)) return 0; - len = dst_struct_capacity(x.as.st); - for (i = 0; i < len; ++i) - if (!dst_equals(x.as.st[i], y.as.st[i])) - return 0; - return 1; - case DST_TUPLE: - if (dst_tuple_hash(x.as.tuple) != dst_tuple_hash(y.as.tuple)) return 0; - if (dst_tuple_length(x.as.tuple) != dst_tuple_length(y.as.tuple)) return 0; - len = dst_tuple_length(x.as.tuple); - for (i = 0; i < len; ++i) - if (!dst_equals(x.as.tuple[i], y.as.tuple[i])) - return 0; - return 1; - } -} - -/* Check if a value x is equal to a string. Special version of - * dst_cache_equal */ -static int dst_cache_strequal(DstValue x, const uint8_t *str, uint32_t len, uint32_t hash) { - uint32_t i; - if (x.type != DST_STRING) return 0; - if (dst_string_hash(x.as.string) != hash) return 0; - if (dst_string_length(x.as.string) != len) return 0; - for (i = 0; i < len; ++i) - if (x.as.string[i] != str[i]) - return 0; - return 1; -} - -/* Find an item in the cache and return its location. - * If the item is not found, return the location - * where one would put it. */ -static DstValue *dst_cache_find(DstValue key, int *success) { - uint32_t bounds[4]; - uint32_t i, j, index; - uint32_t hash = dst_hash(key); - DstValue *firstEmpty = NULL; - index = hash % dst_vm_cache_capacity; - bounds[0] = index; - bounds[1] = dst_vm_cache_capacity; - bounds[2] = 0; - bounds[3] = index; - for (j = 0; j < 4; j += 2) - for (i = bounds[j]; i < bounds[j+1]; ++i) { - DstValue test = dst_vm_cache[i]; - /* Check empty spots */ - if (test.type == DST_NIL) { - if (firstEmpty == NULL) - firstEmpty = dst_vm_cache + i; - goto notfound; - } - /* Check for marked deleted - use booleans as deleted */ - if (test.type == DST_BOOLEAN) { - if (firstEmpty == NULL) - firstEmpty = dst_vm_cache + i; - continue; - } - if (dst_cache_equal(test, key)) { - /* Replace first deleted */ - *success = 1; - if (firstEmpty != NULL) { - *firstEmpty = test; - dst_vm_cache[i].type = DST_BOOLEAN; - return firstEmpty; - } - return dst_vm_cache + i; - } - } - notfound: - *success = 0; - return firstEmpty; -} - -/* Find an item in the cache and return its location. - * If the item is not found, return the location - * where one would put it. Special case of dst_cache_find */ -DstValue *dst_cache_strfind( - const uint8_t *str, - uint32_t len, - uint32_t hash, - int *success) { - uint32_t bounds[4]; - uint32_t i, j, index; - DstValue *firstEmpty = NULL; - index = hash % dst_vm_cache_capacity; - bounds[0] = index; - bounds[1] = dst_vm_cache_capacity; - bounds[2] = 0; - bounds[3] = index; - for (j = 0; j < 4; j += 2) - for (i = bounds[j]; i < bounds[j+1]; ++i) { - DstValue test = dst_vm_cache[i]; - /* Check empty spots */ - if (test.type == DST_NIL) { - if (firstEmpty == NULL) - firstEmpty = dst_vm_cache + i; - goto notfound; - } - /* Check for marked deleted - use booleans as deleted */ - if (test.type == DST_BOOLEAN) { - if (firstEmpty == NULL) - firstEmpty = dst_vm_cache + i; - continue; - } - if (dst_cache_strequal(test, str, len, hash)) { - /* Replace first deleted */ - *success = 1; - if (firstEmpty != NULL) { - *firstEmpty = test; - dst_vm_cache[i].type = DST_BOOLEAN; - return firstEmpty; - } - return dst_vm_cache + i; - } - } - notfound: - *success = 0; - return firstEmpty; -} - -/* Resize the cache. */ -static void dst_cache_resize(uint32_t newCapacity) { - uint32_t i, oldCapacity; - DstValue *oldCache = dst_vm_cache; - DstValue *newCache = calloc(1, newCapacity * sizeof(DstValue)); - if (newCache == NULL) { - DST_OUT_OF_MEMORY; - } - oldCapacity = dst_vm_cache_capacity; - dst_vm_cache = newCache; - dst_vm_cache_capacity = newCapacity; - dst_vm_cache_deleted = 0; - /* Add all of the old cache entries back */ - for (i = 0; i < oldCapacity; ++i) { - int status; - DstValue *bucket; - DstValue x = oldCache[i]; - if (x.type != DST_NIL && x.type != DST_BOOLEAN) { - bucket = dst_cache_find(x, &status); - if (status || bucket == NULL) { - /* there was a problem with the algorithm. */ - break; - } - *bucket = x; - } - } - /* Free the old cache */ - free(oldCache); -} - -/* Add a value to the cache given we know it is not - * already in the cache and we have a bucket. */ -DstValue dst_cache_add_bucket(DstValue x, DstValue *bucket) { - if ((dst_vm_cache_count + dst_vm_cache_deleted) * 2 > dst_vm_cache_capacity) { - int status; - dst_cache_resize(dst_vm_cache_count * 4); - bucket = dst_cache_find(x, &status); - } - /* Add x to the cache */ - dst_vm_cache_count++; - *bucket = x; - return x; -} - -/* Add a value to the cache */ -DstValue dst_cache_add(DstValue x) { - int status = 0; - DstValue *bucket = dst_cache_find(x, &status); - if (!status) { - return dst_cache_add_bucket(x, bucket); - } else { - return *bucket; - } -} - -/* Remove a value from the cache */ -void dst_cache_remove(DstValue x) { - int status = 0; - DstValue *bucket = dst_cache_find(x, &status); - if (status) { - dst_vm_cache_count--; - dst_vm_cache_deleted++; - bucket->type = DST_BOOLEAN; - } -} diff --git a/core/fiber.c b/core/fiber.c index 8148df02..91f43ac6 100644 --- a/core/fiber.c +++ b/core/fiber.c @@ -21,7 +21,6 @@ */ #include -#include "gc.h" /* Initialize a new fiber */ DstFiber *dst_fiber(uint32_t capacity) { @@ -36,6 +35,7 @@ DstFiber *dst_fiber(uint32_t capacity) { } else { fiber->data = NULL; } + fiber->parent = NULL; return dst_fiber_reset(fiber); } @@ -45,6 +45,8 @@ DstFiber *dst_fiber_reset(DstFiber *fiber) { fiber->frametop = 0; fiber->stacktop = DST_FRAME_SIZE; fiber->status = DST_FIBER_DEAD; + fiber->parent = NULL; + fiber->ret = dst_wrap_nil(); return fiber; } diff --git a/core/gc.c b/core/gc.c index e669af0a..aaf77626 100644 --- a/core/gc.c +++ b/core/gc.c @@ -21,7 +21,7 @@ */ #include -#include "cache.h" +#include "symcache.h" /* GC State */ void *dst_vm_blocks; @@ -224,30 +224,23 @@ static void dst_mark_fiber(DstFiber *fiber) { static void dst_deinit_block(DstGCMemoryHeader *block) { void *mem = ((char *)(block + 1)); DstUserdataHeader *h = (DstUserdataHeader *)mem; - void *smem = mem + 2 * sizeof(uint32_t); switch (block->flags & DST_MEM_TYPEBITS) { default: break; /* Do nothing for non gc types */ - case DST_MEMORY_STRING: - dst_cache_remove(dst_wrap_string(smem)); + case DST_MEMORY_SYMBOL: + dst_symbol_deinit((const uint8_t *)mem + 2 * sizeof(uint32_t)); break; case DST_MEMORY_ARRAY: - free(((DstArray*) mem)->data); - break; - case DST_MEMORY_TUPLE: - dst_cache_remove(dst_wrap_tuple(smem)); + dst_array_deinit((DstArray*) mem); break; case DST_MEMORY_TABLE: - free(((DstTable*) mem)->data); - break; - case DST_MEMORY_STRUCT: - dst_cache_remove(dst_wrap_struct(smem)); + dst_table_deinit((DstTable*) mem); break; case DST_MEMORY_FIBER: free(((DstFiber *) mem)->data); break; case DST_MEMORY_BUFFER: - free(((DstBuffer *) mem)->data); + dst_buffer_deinit((DstBuffer *) mem); break; case DST_MEMORY_FUNCTION: free(((DstFunction *)mem)->envs); @@ -281,20 +274,20 @@ void dst_sweep() { DstGCMemoryHeader *previous = NULL; DstGCMemoryHeader *current = dst_vm_blocks; DstGCMemoryHeader *next; - while (current) { + while (NULL != current) { next = current->next; if (current->flags & (DST_MEM_REACHABLE | DST_MEM_DISABLED)) { previous = current; + current->flags &= ~DST_MEM_REACHABLE; } else { dst_deinit_block(current); - if (previous) { + if (NULL != previous) { previous->next = next; } else { dst_vm_blocks = next; } free(current); } - current->flags &= ~DST_MEM_REACHABLE; current = next; } } diff --git a/core/parse.c b/core/parse.c index b8cfee0b..51efb93e 100644 --- a/core/parse.c +++ b/core/parse.c @@ -117,7 +117,7 @@ static int check_str_const(const char *ref, const uint8_t *start, const uint8_t /* Quote a value */ static DstValue quote(DstValue x) { DstValue *t = dst_tuple_begin(2); - t[0] = dst_cstrings("quote"); + t[0] = dst_csymbolv("quote"); t[1] = x; return dst_wrap_tuple(dst_tuple_end(t)); } @@ -223,7 +223,7 @@ static const uint8_t *parse_recur( if (*src >= '0' && *src <= '9') { goto sym_nodigits; } else { - ret = dst_wrap_symbol(dst_string(src, tokenend - src)); + ret = dst_symbolv(src, tokenend - src); } } src = tokenend; @@ -238,18 +238,6 @@ static const uint8_t *parse_recur( goto begin; } - /* Check keyword style strings */ - case ':': - { - const uint8_t *tokenend = ++src; - while (tokenend < end && is_symbol_char(*tokenend)) - tokenend++; - if (tokenend >= end) goto unexpected_eos; - ret = dst_wrap_string(dst_string(src, tokenend - src)); - src = tokenend; - break; - } - /* String literals */ case '"': { diff --git a/core/string.c b/core/string.c index e2612026..4ccf2493 100644 --- a/core/string.c +++ b/core/string.c @@ -21,113 +21,74 @@ */ #include -#include "cache.h" - -static const char base64[] = - "0123456789" - "ABCDEFGHIJKLMNOPQRSTUVWXYZ" - "abcdefghijklmnopqrstuvwxyz" - "_="; - -/* Calculate hash for string */ -static uint32_t dst_string_calchash(const uint8_t *str, uint32_t len) { - const uint8_t *end = str + len; - uint32_t hash = 5381; - while (str < end) - hash = (hash << 5) + hash + *str++; - return hash; -} /* Begin building a string */ uint8_t *dst_string_begin(uint32_t length) { - char *data = dst_alloc(DST_MEMORY_NONE, 2 * sizeof(uint32_t) + length + 1); + char *data = dst_alloc(DST_MEMORY_STRING, 2 * sizeof(uint32_t) + length); uint8_t *str = (uint8_t *) (data + 2 * sizeof(uint32_t)); dst_string_length(str) = length; - str[length] = 0; return str; } /* Finish building a string */ const uint8_t *dst_string_end(uint8_t *str) { - DstValue check; dst_string_hash(str) = dst_string_calchash(str, dst_string_length(str)); - check = dst_cache_add(dst_wrap_string(str)); - /* Don't tag the memory of the string builder directly. If the string is - * already cached, we don't want the gc to remove it from cache when the original - * string builder is gced (check will contained the cached string) */ - dst_gc_settype(dst_string_raw(check.as.string), DST_MEMORY_STRING); - return check.as.string; + return str; } /* Load a buffer as a string */ const uint8_t *dst_string(const uint8_t *buf, uint32_t len) { uint32_t hash = dst_string_calchash(buf, len); - int status = 0; - DstValue *bucket = dst_cache_strfind(buf, len, hash, &status); - if (status) { - return bucket->as.string; - } else { - uint32_t newbufsize = len + 2 * sizeof(uint32_t) + 1; - uint8_t *str = (uint8_t *)(dst_alloc(DST_MEMORY_STRING, newbufsize) + 2 * sizeof(uint32_t)); - memcpy(str, buf, len); - dst_string_length(str) = len; - dst_string_hash(str) = hash; - str[len] = 0; - return dst_cache_add_bucket(dst_wrap_string(str), bucket).as.string; - } + char *data = dst_alloc(DST_MEMORY_STRING, 2 * sizeof(uint32_t) + len); + uint8_t *str = (uint8_t *) (data + 2 * sizeof(uint32_t)); + memcpy(str, buf, len); + dst_string_length(str) = len; + dst_string_hash(str) = hash; + return str; } -/* Helper for creating a unique string. Increment an integer - * represented as an array of integer digits. */ -static void inc_counter(uint8_t *digits, int base, int len) { - int i; - uint8_t carry = 1; - for (i = len - 1; i >= 0; --i) { - digits[i] += carry; - carry = 0; - if (digits[i] == base) { - digits[i] = 0; - carry = 1; +/* Compare two strings */ +int dst_string_compare(const uint8_t *lhs, const uint8_t *rhs) { + uint32_t xlen = dst_string_length(lhs); + uint32_t ylen = dst_string_length(rhs); + uint32_t len = xlen > ylen ? ylen : xlen; + uint32_t i; + for (i = 0; i < len; ++i) { + if (lhs[i] == rhs[i]) { + continue; + } else if (lhs[i] < rhs[i]) { + return -1; /* x is less than y */ + } else { + return 1; /* y is less than x */ } } -} - -/* Generate a unique symbol. This is used in the library function gensym. The - * symbol string data does not have GC enabled on it yet. You must manuallyb enable - * it later. */ -const uint8_t *dst_string_unique(const uint8_t *buf, uint32_t len) { - DstValue *bucket; - uint32_t hash; - uint8_t counter[6] = {63, 63, 63, 63, 63, 63}; - /* Leave spaces for 6 base 64 digits and two dashes. That means 64^6 possible suffixes, which - * is enough for resolving collisions. */ - uint32_t newlen = len + 8; - uint32_t newbufsize = newlen + 2 * sizeof(uint32_t) + 1; - uint8_t *str = (uint8_t *)(dst_alloc(DST_MEMORY_STRING, newbufsize) + 2 * sizeof(uint32_t)); - dst_string_length(str) = newlen; - memcpy(str, buf, len); - str[len] = '-'; - str[len + 1] = '-'; - str[newlen] = 0; - uint8_t *saltbuf = str + len + 2; - int status = 1; - while (status) { - int i; - inc_counter(counter, 64, 6); - for (i = 0; i < 6; ++i) - saltbuf[i] = base64[counter[i]]; - hash = dst_string_calchash(str, newlen); - bucket = dst_cache_strfind(str, newlen, hash, &status); + if (xlen == ylen) { + return 0; + } else { + return xlen < ylen ? -1 : 1; } - dst_string_hash(str) = hash; - return dst_cache_add_bucket(dst_wrap_string(str), bucket).as.string; } -/* Generate a unique string from a cstring */ -const uint8_t *dst_cstring_unique(const char *s) { - uint32_t len = 0; - while (s[len]) ++len; - return dst_string_unique((const uint8_t *)s, len); +/* Compare a dst string with a piece of memory */ +int dst_string_equalconst(const uint8_t *lhs, const uint8_t *rhs, uint32_t rlen, uint32_t rhash) { + uint32_t index; + uint32_t lhash = dst_string_hash(lhs); + uint32_t llen = dst_string_length(lhs); + if (lhs == rhs) + return 1; + if (lhash != rhash || llen != rlen) + return 0; + for (index = 0; index < llen; index++) { + if (lhs[index] != rhs[index]) + return 0; + } + return 1; +} + +/* Check if two strings are equal */ +int dst_string_equal(const uint8_t *lhs, const uint8_t *rhs) { + return dst_string_equalconst(lhs, rhs, + dst_string_length(rhs), dst_string_hash(rhs)); } /* Load a c string */ @@ -195,7 +156,7 @@ static const uint8_t *integer_to_string(int64_t x) { return dst_string(buf, integer_to_string_impl(buf, x)); } -#define HEX(i) (((uint8_t *) base64)[(i)]) +#define HEX(i) (((uint8_t *) dst_base64)[(i)]) /* Returns a string description for a pointer. Truncates * title to 12 characters */ diff --git a/core/struct.c b/core/struct.c index d7c602b3..658c0501 100644 --- a/core/struct.c +++ b/core/struct.c @@ -21,7 +21,6 @@ */ #include -#include "cache.h" /* Begin creation of a struct */ DstValue *dst_struct_begin(uint32_t count) { @@ -123,7 +122,6 @@ void dst_struct_put(DstValue *st, DstValue key, DstValue value) { /* Finish building a struct */ const DstValue *dst_struct_end(DstValue *st) { - DstValue check; if (dst_struct_hash(st) != dst_struct_length(st)) { /* Error building struct, probably duplicate values. We need to rebuild * the struct using only the values that went in. The second creation should always @@ -142,10 +140,8 @@ const DstValue *dst_struct_end(DstValue *st) { } st = newst; } - dst_struct_hash(st) = dst_calchash_array(st, dst_struct_capacity(st)); - check = dst_cache_add(dst_wrap_struct(st)); - dst_gc_settype(dst_tuple_raw(check.as.st), DST_MEMORY_STRUCT); - return check.as.st; + dst_struct_hash(st) = 0; + return (const DstValue *)st; } /* Get an item from a struct */ @@ -190,3 +186,51 @@ DstTable *dst_struct_to_table(const DstValue *st) { } return table; } + +/* Check if two structs are equal */ +int dst_struct_equal(const DstValue *lhs, const DstValue *rhs) { + uint32_t index; + uint32_t llen = dst_struct_capacity(lhs); + uint32_t rlen = dst_struct_capacity(rhs); + uint32_t lhash = dst_struct_hash(lhs); + uint32_t rhash = dst_struct_hash(rhs); + if (llen != rlen) + return 0; + if (lhash == 0) + lhash = dst_struct_hash(lhs) = dst_array_calchash(lhs, llen); + if (rhash == 0) + rhash = dst_struct_hash(rhs) = dst_array_calchash(rhs, rlen); + if (lhash != rhash) + return 0; + for (index = 0; index < llen; index++) { + if (!dst_equals(lhs[index], rhs[index])) + return 0; + } + return 1; +} + +/* Compare structs */ +int dst_struct_compare(const DstValue *lhs, const DstValue *rhs) { + uint32_t i; + uint32_t lhash = dst_struct_hash(lhs); + uint32_t rhash = dst_struct_hash(rhs); + uint32_t llen = dst_struct_capacity(lhs); + uint32_t rlen = dst_struct_capacity(rhs); + if (llen < rlen) + return -1; + if (llen > rlen) + return 1; + if (0 == lhash) + lhash = dst_struct_hash(lhs) = dst_array_calchash(lhs, llen); + if (0 == rhash) + rhash = dst_struct_hash(rhs) = dst_array_calchash(rhs, rlen); + if (lhash < rhash) + return -1; + if (lhash > rhash) + return 1; + for (i = 0; i < llen; ++i) { + int comp = dst_compare(lhs[i], rhs[i]); + if (comp != 0) return comp; + } + return 0; +} diff --git a/core/symcache.c b/core/symcache.c new file mode 100644 index 00000000..fa0a5bad --- /dev/null +++ b/core/symcache.c @@ -0,0 +1,244 @@ +/* +* Copyright (c) 2017 Calvin Rose +* +* Permission is hereby granted, free of charge, to any person obtaining a copy +* of this software and associated documentation files (the "Software"), to +* deal in the Software without restriction, including without limitation the +* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +* sell copies of the Software, and to permit persons to whom the Software is +* furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be included in +* all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +* IN THE SOFTWARE. +*/ + +/* The symbol cache is an open hashtable with all active symbols in the program + * stored in it. As the primary use of symbols is table lookups and equality + * checks, all symbols are interned so that there is a single copy of it in the + * whole program. Equality is then just a pointer check. */ + +#include + +/* Cache state */ +const uint8_t **dst_vm_cache = NULL; +uint32_t dst_vm_cache_capacity = 0; +uint32_t dst_vm_cache_count = 0; +uint32_t dst_vm_cache_deleted = 0; + +/* Initialize the cache (allocate cache memory) */ +void dst_symcache_init() { + dst_vm_cache_capacity = 1024; + dst_vm_cache = calloc(1, dst_vm_cache_capacity * sizeof(const uint8_t **)); + if (NULL == dst_vm_cache) { + DST_OUT_OF_MEMORY; + } + dst_vm_cache_count = 0; + dst_vm_cache_deleted = 0; +} + +/* Deinitialize the cache (free the cache memory) */ +void dst_symcache_deinit() { + free(dst_vm_cache); + dst_vm_cache = NULL; + dst_vm_cache_capacity = 0; + dst_vm_cache_count = 0; + dst_vm_cache_deleted = 0; +} + +/* Mark an entry in the table as deleted. */ +#define DST_SYMCACHE_DELETED ((NULL) + 1) + +/* Find an item in the cache and return its location. + * If the item is not found, return the location + * where one would put it. */ +static const uint8_t **dst_symcache_findmem( + const uint8_t *str, + uint32_t len, + uint32_t hash, + int *success) { + uint32_t bounds[4]; + uint32_t i, j, index; + const uint8_t **firstEmpty = NULL; + + /* We will search two ranges - index to the end, + * and 0 to the index. */ + index = hash % dst_vm_cache_capacity; + bounds[0] = index; + bounds[1] = dst_vm_cache_capacity; + bounds[2] = 0; + bounds[3] = index; + for (j = 0; j < 4; j += 2) + for (i = bounds[j]; i < bounds[j+1]; ++i) { + const uint8_t *test = dst_vm_cache[i]; + /* Check empty spots */ + if (NULL == test) { + if (NULL == firstEmpty) + firstEmpty = dst_vm_cache + i; + goto notfound; + } + /* Check for marked deleted */ + if (DST_SYMCACHE_DELETED == test) { + if (firstEmpty == NULL) + firstEmpty = dst_vm_cache + i; + continue; + } + if (dst_string_equalconst(test, str, len, hash)) { + /* Replace first deleted */ + *success = 1; + if (firstEmpty != NULL) { + *firstEmpty = test; + dst_vm_cache[i] = DST_SYMCACHE_DELETED; + return firstEmpty; + } + return dst_vm_cache + i; + } + } + notfound: + *success = 0; + return firstEmpty; +} + +#define dst_symcache_find(str, success) \ + dst_symcache_findmem((str), dst_string_length(str), dst_string_hash(str), (success)) + +/* Resize the cache. */ +static void dst_cache_resize(uint32_t newCapacity) { + uint32_t i, oldCapacity; + const uint8_t **oldCache = dst_vm_cache; + const uint8_t **newCache = calloc(1, newCapacity * sizeof(const uint8_t **)); + if (newCache == NULL) { + DST_OUT_OF_MEMORY; + } + oldCapacity = dst_vm_cache_capacity; + dst_vm_cache = newCache; + dst_vm_cache_capacity = newCapacity; + dst_vm_cache_deleted = 0; + /* Add all of the old cache entries back */ + for (i = 0; i < oldCapacity; ++i) { + int status; + const uint8_t **bucket; + const uint8_t *x = oldCache[i]; + if (x != NULL && x != DST_SYMCACHE_DELETED) { + bucket = dst_symcache_find(x, &status); + if (status || bucket == NULL) { + /* there was a problem with the algorithm. */ + break; + } + *bucket = x; + } + } + /* Free the old cache */ + free(oldCache); +} + +/* Add an item to the cache */ +static void dst_symcache_put(const uint8_t *x, const uint8_t **bucket) { +if ((dst_vm_cache_count + dst_vm_cache_deleted) * 2 > dst_vm_cache_capacity) { + int status; + dst_cache_resize(dst_vm_cache_count * 4); + bucket = dst_symcache_find(x, &status); + } + /* Add x to the cache */ + dst_vm_cache_count++; + *bucket = x; +} + +/* Remove a symbol from the symcache */ +void dst_symbol_deinit(const uint8_t *sym) { + int status = 0; + const uint8_t **bucket = dst_symcache_find(sym, &status); + if (status) { + dst_vm_cache_count--; + dst_vm_cache_deleted++; + *bucket = DST_SYMCACHE_DELETED; + } +} + +/* Create a symbol from a byte string */ +const uint8_t *dst_symbol(const uint8_t *str, uint32_t len) { + uint32_t hash = dst_string_calchash(str, len); + uint8_t *newstr; + int success = 0; + const uint8_t **bucket = dst_symcache_findmem(str, len, hash, &success); + if (success) + return *bucket; + newstr = dst_alloc(DST_MEMORY_SYMBOL, 2 * sizeof(uint32_t) + len) + + (2 * sizeof(uint32_t)); + dst_string_hash(newstr) = hash; + dst_string_length(newstr) = len; + memcpy(newstr, str, len); + dst_symcache_put((const uint8_t *)newstr, bucket); + return newstr; +} + +/* Get a symbol from a cstring */ +const uint8_t *dst_csymbol(const char *cstr) { + uint32_t len = 0; + while (cstr[len]) len++; + return dst_symbol((const uint8_t *)cstr, len); +} + +/* Convert a string to a symbol */ +const uint8_t *dst_symbol_from_string(const uint8_t *str) { + int success = 0; + const uint8_t **bucket = dst_symcache_find(str, &success); + if (success) + return *bucket; + dst_symcache_put((const uint8_t *)str, bucket); + dst_gc_settype(dst_string_raw(str), DST_MEMORY_SYMBOL); + return str; +} + +/* Helper for creating a unique string. Increment an integer + * represented as an array of integer digits. */ +static void inc_counter(uint8_t *digits, int base, int len) { + int i; + uint8_t carry = 1; + for (i = len - 1; i >= 0; --i) { + digits[i] += carry; + carry = 0; + if (digits[i] == base) { + digits[i] = 0; + carry = 1; + } + } +} + +/* Generate a unique symbol. This is used in the library function gensym. The + * symbol will be of the format prefix--XXXXXX, where X is a base64 digit, and + * prefix is the argument passed. */ +const uint8_t *dst_symbol_gen(const uint8_t *buf, uint32_t len) { + const uint8_t **bucket; + uint32_t hash; + uint8_t counter[6] = {63, 63, 63, 63, 63, 63}; + /* Leave spaces for 6 base 64 digits and two dashes. That means 64^6 possible suffixes, which + * is enough for resolving collisions. */ + uint32_t newlen = len + 8; + uint32_t newbufsize = newlen + 2 * sizeof(uint32_t); + uint8_t *str = (uint8_t *)(dst_alloc(DST_MEMORY_SYMBOL, newbufsize) + 2 * sizeof(uint32_t)); + dst_string_length(str) = newlen; + memcpy(str, buf, len); + str[len] = '-'; + str[len + 1] = '-'; + uint8_t *saltbuf = str + len + 2; + int status = 1; + while (status) { + int i; + inc_counter(counter, 64, 6); + for (i = 0; i < 6; ++i) + saltbuf[i] = dst_base64[counter[i]]; + hash = dst_string_calchash(str, newlen); + bucket = dst_symcache_findmem(str, newlen, hash, &status); + } + dst_string_hash(str) = hash; + dst_symcache_put((const uint8_t *)str, bucket); + return (const uint8_t *)str; +} diff --git a/core/cache.h b/core/symcache.h similarity index 78% rename from core/cache.h rename to core/symcache.h index fa09f0d9..febe95ba 100644 --- a/core/cache.h +++ b/core/symcache.h @@ -20,19 +20,14 @@ * IN THE SOFTWARE. */ -#ifndef DST_CACHE_H_defined -#define DST_CACHE_H_defined +#ifndef DST_SYMCACHE_H_defined +#define DST_SYMCACHE_H_defined #include -DstValue dst_cache_add(DstValue x); -DstValue *dst_cache_strfind( - const uint8_t *str, - uint32_t len, - uint32_t hash, - int *success); -DstValue dst_cache_add_bucket(DstValue x, DstValue *bucket); - -void dst_cache_remove(DstValue x); +/* Initialize the cache (allocate cache memory) */ +void dst_symcache_init(); +void dst_symcache_deinit(); +void dst_symbol_deinit(const uint8_t *sym); #endif diff --git a/core/syscalls.c b/core/syscalls.c index e6b12348..ae76e3ba 100644 --- a/core/syscalls.c +++ b/core/syscalls.c @@ -131,7 +131,7 @@ int dst_sys_put(DstValue *argv, uint32_t argn) { return 0; } -DstCFunction dst_vm_syscalls[256] = { +const DstCFunction dst_vm_syscalls[256] = { dst_sys_print, dst_sys_asm, dst_sys_tuple, diff --git a/core/tuple.c b/core/tuple.c index 462475c4..567683cb 100644 --- a/core/tuple.c +++ b/core/tuple.c @@ -21,25 +21,22 @@ */ #include -#include "cache.h" +#include "symcache.h" /* Create a new empty tuple of the given size. This will return memory * which should be filled with DstValues. The memory will not be collected until * dst_tuple_end is called. */ DstValue *dst_tuple_begin(uint32_t length) { - char *data = dst_alloc(DST_MEMORY_NONE, 2 * sizeof(uint32_t) + length * sizeof(DstValue)); + char *data = dst_alloc(DST_MEMORY_TUPLE, 2 * sizeof(uint32_t) + length * sizeof(DstValue)); DstValue *tuple = (DstValue *)(data + (2 * sizeof(uint32_t))); dst_tuple_length(tuple) = length; + dst_tuple_hash(tuple) = 0; return tuple; } /* Finish building a tuple */ const DstValue *dst_tuple_end(DstValue *tuple) { - DstValue check; - dst_tuple_hash(tuple) = dst_calchash_array(tuple, dst_tuple_length(tuple)); - check = dst_cache_add(dst_wrap_tuple((const DstValue *) tuple)); - dst_gc_settype(dst_tuple_raw(check.as.tuple), DST_MEMORY_TUPLE); - return check.as.tuple; + return (const DstValue *)tuple; } /* Build a tuple with n values */ @@ -48,3 +45,42 @@ const DstValue *dst_tuple_n(DstValue *values, uint32_t n) { memcpy(t, values, sizeof(DstValue) * n); return dst_tuple_end(t); } + +/* Check if two tuples are equal */ +int dst_tuple_equal(const DstValue *lhs, const DstValue *rhs) { + uint32_t index; + uint32_t llen = dst_tuple_length(lhs); + uint32_t rlen = dst_tuple_length(rhs); + uint32_t lhash = dst_tuple_hash(lhs); + uint32_t rhash = dst_tuple_hash(rhs); + if (llen != rlen) + return 0; + if (lhash == 0) + lhash = dst_tuple_hash(lhs) = dst_array_calchash(lhs, llen); + if (rhash == 0) + rhash = dst_tuple_hash(rhs) = dst_array_calchash(rhs, rlen); + if (lhash != rhash) + return 0; + for (index = 0; index < llen; index++) { + if (!dst_equals(lhs[index], rhs[index])) + return 0; + } + return 1; +} + +/* Compare tuples */ +int dst_tuple_compare(const DstValue *lhs, const DstValue *rhs) { + uint32_t i; + uint32_t llen = dst_tuple_length(lhs); + uint32_t rlen = dst_tuple_length(rhs); + uint32_t count = llen < rlen ? llen : rlen; + for (i = 0; i < count; ++i) { + int comp = dst_compare(lhs[i], rhs[i]); + if (comp != 0) return comp; + } + if (llen < rlen) + return -1; + else if (llen > rlen) + return 1; + return 0; +} diff --git a/core/util.c b/core/util.c index f04f2685..b85dfb4f 100644 --- a/core/util.c +++ b/core/util.c @@ -22,6 +22,12 @@ #include +/* Base 64 lookup table for digits */ +const char dst_base64[65] = + "0123456789" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "_="; /* The DST value types in order. These types can be used as * mnemonics instead of a bit pattern for type checking */ @@ -43,6 +49,24 @@ const char *dst_type_names[15] = { "userdata" }; +/* Computes hash of an array of values */ +uint32_t dst_array_calchash(const DstValue *array, uint32_t len) { + const DstValue *end = array + len; + uint32_t hash = 5381; + while (array < end) + hash = (hash << 5) + hash + dst_hash(*array++); + return hash; +} + +/* Calculate hash for string */ +uint32_t dst_string_calchash(const uint8_t *str, uint32_t len) { + const uint8_t *end = str + len; + uint32_t hash = 5381; + while (str < end) + hash = (hash << 5) + hash + *str++; + return hash; +} + /* Read both tuples and arrays as c pointers + uint32_t length. Return 1 if the * view can be constructed, 0 if an invalid type. */ int dst_seq_view(DstValue seq, const DstValue **data, uint32_t *len) { diff --git a/core/value.c b/core/value.c index 586b5955..665e7870 100644 --- a/core/value.c +++ b/core/value.c @@ -50,6 +50,12 @@ int dst_equals(DstValue x, DstValue y) { case DST_INTEGER: result = (x.as.integer == y.as.integer); break; + case DST_STRING: + result = dst_string_equal(x.as.string, y.as.string); + break; + case DST_STRUCT: + result = dst_struct_equal(x.as.st, y.as.st); + break; default: /* compare pointers */ result = (x.as.pointer == y.as.pointer); @@ -74,56 +80,33 @@ uint32_t dst_hash(DstValue x) { hash = dst_string_hash(x.as.string); break; case DST_TUPLE: - hash = dst_tuple_hash(x.as.tuple); + if (0 == dst_tuple_hash(x.as.tuple)) + hash = dst_tuple_hash(x.as.tuple) = + dst_array_calchash(x.as.tuple, dst_tuple_length(x.as.tuple)); + else + hash = dst_tuple_hash(x.as.tuple); break; case DST_STRUCT: - hash = dst_struct_hash(x.as.st); + if (0 == dst_struct_hash(x.as.st)) + hash = dst_struct_hash(x.as.st) = + dst_array_calchash(x.as.st, dst_struct_capacity(x.as.st)); + else + hash = dst_struct_hash(x.as.st); break; default: if (sizeof(double) == sizeof(void *)) { /* Assuming 8 byte pointer */ uint64_t i = x.as.integer; - hash = (i >> 32) ^ (i & 0xFFFFFFFF); + hash = (uint32_t)(i >> 32) ^ (uint32_t)(i & 0xFFFFFFFF); } else { /* Assuming 4 byte pointer (or smaller) */ - hash = (uint32_t) x.as.pointer; + hash = (uint32_t) (x.as.pointer - NULL); } break; } return hash; } -/* Computes hash of an array of values */ -uint32_t dst_calchash_array(const DstValue *array, uint32_t len) { - const DstValue *end = array + len; - uint32_t hash = 5381; - while (array < end) - hash = (hash << 5) + hash + dst_hash(*array++); - return hash; -} - -/* Compare two strings */ -int dst_string_compare(const uint8_t *lhs, const uint8_t *rhs) { - uint32_t xlen = dst_string_length(lhs); - uint32_t ylen = dst_string_length(rhs); - uint32_t len = xlen > ylen ? ylen : xlen; - uint32_t i; - for (i = 0; i < len; ++i) { - if (lhs[i] == rhs[i]) { - continue; - } else if (lhs[i] < rhs[i]) { - return -1; /* x is less than y */ - } else { - return 1; /* y is less than x */ - } - } - if (xlen == ylen) { - return 0; - } else { - return xlen < ylen ? -1 : 1; - } -} - /* Compares x to y. If they are equal retuns 0. If x is less, returns -1. * If y is less, returns 1. All types are comparable * and should have strict ordering. */ @@ -161,25 +144,10 @@ int dst_compare(DstValue x, DstValue y) { } case DST_STRING: return dst_string_compare(x.as.string, y.as.string); - /* Lower indices are most significant */ case DST_TUPLE: - { - uint32_t i; - uint32_t xlen = dst_tuple_length(x.as.tuple); - uint32_t ylen = dst_tuple_length(y.as.tuple); - uint32_t count = xlen < ylen ? xlen : ylen; - for (i = 0; i < count; ++i) { - int comp = dst_compare(x.as.tuple[i], y.as.tuple[i]); - if (comp != 0) return comp; - } - if (xlen < ylen) - return -1; - else if (xlen > ylen) - return 1; - return 0; - } - break; - /* TODO - how should structs compare by default? For now, just use pointers. */ + return dst_tuple_compare(x.as.tuple, y.as.tuple); + case DST_STRUCT: + return dst_struct_compare(x.as.st, y.as.st); default: if (x.as.string == y.as.string) { return 0; diff --git a/core/vm.c b/core/vm.c index 5f7c9353..b6dac379 100644 --- a/core/vm.c +++ b/core/vm.c @@ -22,6 +22,7 @@ #include #include "opcodes.h" +#include "symcache.h" /* VM State */ DstFiber *dst_vm_fiber; @@ -649,7 +650,7 @@ int dst_run(DstValue callee) { dst_fiber_reset(dst_vm_fiber); } if (callee.type == DST_CFUNCTION) { - dst_vm_fiber->ret.type = DST_NIL; + dst_vm_fiber->ret = dst_wrap_nil(); dst_fiber_cframe(dst_vm_fiber); return callee.as.cfunction(dst_vm_fiber->data + dst_vm_fiber->frame, 0); } else if (callee.type == DST_FUNCTION) { @@ -670,16 +671,7 @@ int dst_init() { * horrible for performance, but helps ensure * there are no memory bugs during dev */ dst_vm_memory_interval = 0; - - uint32_t initialCacheCapacity = 1024; - /* Set up the cache */ - dst_vm_cache = calloc(1, initialCacheCapacity * sizeof(DstValue)); - if (NULL == dst_vm_cache) { - return 1; - } - dst_vm_cache_capacity = dst_vm_cache == NULL ? 0 : initialCacheCapacity; - dst_vm_cache_count = 0; - dst_vm_cache_deleted = 0; + dst_symcache_init(); /* Set thread */ dst_vm_fiber = NULL; return 0; @@ -689,10 +681,5 @@ int dst_init() { void dst_deinit() { dst_clear_memory(); dst_vm_fiber = NULL; - /* Deinit the cache */ - free(dst_vm_cache); - dst_vm_cache = NULL; - dst_vm_cache_count = 0; - dst_vm_cache_capacity = 0; - dst_vm_cache_deleted = 0; + dst_symcache_deinit(); } diff --git a/dsts/minimal.dsts b/dsts/minimal.dsts index fd034b9a..0cb42c60 100644 --- a/dsts/minimal.dsts +++ b/dsts/minimal.dsts @@ -1,4 +1,5 @@ { + bork 'boop bytecode [ (load-integer 0 15) (load-integer 1 0) diff --git a/include/dst/dst.h b/include/dst/dst.h index 6f2b61e5..0d1f969c 100644 --- a/include/dst/dst.h +++ b/include/dst/dst.h @@ -282,13 +282,13 @@ extern uint32_t dst_vm_memory_interval; extern uint32_t dst_vm_next_collection; /* Immutable value cache */ -extern DstValue *dst_vm_cache; +extern const uint8_t **dst_vm_cache; extern uint32_t dst_vm_cache_capacity; extern uint32_t dst_vm_cache_count; extern uint32_t dst_vm_cache_deleted; /* Syscall table */ -extern DstCFunction dst_vm_syscalls[256]; +extern const DstCFunction dst_vm_syscalls[256]; /* GC roots - TODO consider a top level fiber pool (per thread?) */ extern DstFiber *dst_vm_fiber; @@ -323,6 +323,8 @@ void dst_buffer_push_u64(DstBuffer *buffer, uint64_t x); DstValue *dst_tuple_begin(uint32_t length); const DstValue *dst_tuple_end(DstValue *tuple); const DstValue *dst_tuple_n(DstValue *values, uint32_t n); +int dst_tuple_equal(const DstValue *lhs, const DstValue *rhs); +int dst_tuple_compare(const DstValue *lhs, const DstValue *rhs); /* String/Symbol functions */ #define dst_string_raw(s) ((uint32_t *)(s) - 2) @@ -333,15 +335,24 @@ const uint8_t *dst_string_end(uint8_t *str); const uint8_t *dst_string(const uint8_t *buf, uint32_t len); const uint8_t *dst_cstring(const char *cstring); int dst_string_compare(const uint8_t *lhs, const uint8_t *rhs); +int dst_string_equal(const uint8_t *lhs, const uint8_t *rhs); +int dst_string_equalconst(const uint8_t *lhs, const uint8_t *rhs, uint32_t rlen, uint32_t rhash); const uint8_t *dst_string_unique(const uint8_t *buf, uint32_t len); const uint8_t *dst_cstring_unique(const char *s); const uint8_t *dst_description(DstValue x); const uint8_t *dst_to_string(DstValue x); #define dst_cstringv(cstr) dst_wrap_string(dst_cstring(cstr)) -#define dst_cstrings(cstr) dst_wrap_symbol(dst_cstring(cstr)) const uint8_t *dst_formatc(const char *format, ...); void dst_puts(const uint8_t *str); +/* Symbol functions */ +const uint8_t *dst_symbol(const uint8_t *str, uint32_t len); +const uint8_t *dst_symbol_from_string(const uint8_t *str); +const uint8_t *dst_csymbol(const char *str); +const uint8_t *dst_symbol_gen(const uint8_t *buf, uint32_t len); +#define dst_symbolv(str, len) dst_wrap_symbol(dst_symbol((str), (len))) +#define dst_csymbolv(cstr) dst_wrap_symbol(dst_csymbol(cstr)) + /* Structs */ #define dst_struct_raw(t) ((uint32_t *)(t) - 2) #define dst_struct_length(t) (dst_struct_raw(t)[0]) @@ -353,6 +364,8 @@ const DstValue *dst_struct_end(DstValue *st); DstValue dst_struct_get(const DstValue *st, DstValue key); DstValue dst_struct_next(const DstValue *st, DstValue key); DstTable *dst_struct_to_table(const DstValue *st); +int dst_struct_equal(const DstValue *lhs, const DstValue *rhs); +int dst_struct_compare(const DstValue *lhs, const DstValue *rhs); /* Table functions */ DstTable *dst_table(uint32_t capacity); @@ -431,8 +444,11 @@ DstValue dst_getindex(DstValue ds, uint32_t index); void dst_setindex(DstValue ds, DstValue value, uint32_t index); /* Utils */ +extern const char dst_base64[65]; int64_t dst_real_to_integer(double real); double dst_integer_to_real(int64_t integer); +uint32_t dst_array_calchash(const DstValue *array, uint32_t len); +uint32_t dst_string_calchash(const uint8_t *str, uint32_t len); /* Parsing */ typedef enum { @@ -506,6 +522,7 @@ typedef enum DstMemoryType DstMemoryType; enum DstMemoryType { DST_MEMORY_NONE, DST_MEMORY_STRING, + DST_MEMORY_SYMBOL, DST_MEMORY_ARRAY, DST_MEMORY_TUPLE, DST_MEMORY_TABLE, @@ -518,13 +535,13 @@ enum DstMemoryType { DST_MEMORY_FUNCDEF }; -/* Prevent GC from freeing some memory. */ -#define dst_disablegc(m) (dst_gc_header(m)->flags |= DST_MEM_DISABLED, (m)) +/* Preventn GC from freeing some memory. */ +#define dst_disablegc(m) dst_gc_header(m)->flags |= DST_MEM_DISABLED /* To allocate collectable memory, one must calk dst_alloc, initialize the memory, * and then call when dst_enablegc when it is initailize and reachable by the gc (on the DST stack) */ void *dst_alloc(DstMemoryType type, size_t size); -#define dst_enablegc(m) (dst_gc_header(m)->flags &= ~DST_MEM_DISABLED, (m)) +#define dst_enablegc(m) dst_gc_header(m)->flags &= ~DST_MEM_DISABLED /* When doing C interop, it is often needed to disable GC on a value. * This is needed when a garbage collection could occur in the middle diff --git a/unittests/buffer_test.c b/unittests/buffer_test.c index 14f1df9e..7e4ef90d 100644 --- a/unittests/buffer_test.c +++ b/unittests/buffer_test.c @@ -11,8 +11,9 @@ int main() { dst_buffer_push_u8(buffer, 'l'); dst_buffer_push_u8(buffer, 'l'); dst_buffer_push_u8(buffer, 'o'); + dst_buffer_push_cstring(buffer, " world!"); assert(dst_equals( - dst_wrap_string(dst_cstring("hello")), + dst_wrap_string(dst_cstring("hello world!")), dst_wrap_string(dst_string(buffer->data, buffer->count)) )); return 0; diff --git a/unittests/nanbox_test.c b/unittests/nanbox_test.c index afc8644a..01f62123 100644 --- a/unittests/nanbox_test.c +++ b/unittests/nanbox_test.c @@ -180,7 +180,7 @@ static inline dst_t dst_nanbox_from_bits(uint64_t bits) { void dst_nanbox_print(dst_t x) { assert(dst_nanbox_checktype(x, dst_nanbox_type(x))); - printf("hex: 0x%llx, " + printf("hex: 0x%lx, " "description: ", x.u64); switch (dst_nanbox_type(x)) { case DST_T_NIL: @@ -229,7 +229,7 @@ void dst_nanbox_print(dst_t x) { printf("\n", dst_nanbox_unwrap_buffer(x)); break; default: - printf("unknown type 0x%llu\n", dst_nanbox_type(x)); + printf("unknown type 0x%lu\n", dst_nanbox_type(x)); case DST_T_REAL: printf("%.21g\n", dst_nanbox_unwrap_real(x)); break;