From 841ee3696d0da01d632476f349d582a01c816734 Mon Sep 17 00:00:00 2001 From: Calvin Rose Date: Wed, 22 Mar 2017 00:27:18 -0400 Subject: [PATCH] Add cache for strings. --- Makefile | 6 +- client/main.c | 3 + core/compile.c | 5 +- core/dict.c | 264 --------------------------------------------- core/ds.c | 12 +-- core/gc.c | 18 +++- core/parse.c | 6 +- core/serialize.c | 8 +- core/stl.c | 99 +++++++++++++---- core/stringcache.c | 120 +++++++++++++++++++++ core/stringcache.h | 15 +++ core/strings.c | 87 +++++++++++++++ core/thread.c | 6 +- core/value.c | 98 ++--------------- core/vm.c | 11 +- libs/stl.gst | 2 + 16 files changed, 342 insertions(+), 418 deletions(-) delete mode 100644 core/dict.c create mode 100644 core/stringcache.c create mode 100644 core/stringcache.h create mode 100644 core/strings.c create mode 100644 libs/stl.gst diff --git a/Makefile b/Makefile index b93e7c48..bdf6e10d 100644 --- a/Makefile +++ b/Makefile @@ -7,8 +7,7 @@ CFLAGS=-std=c99 -Wall -Wextra -Wpedantic -g -I./include PREFIX=/usr/local GST_TARGET=client/gst GST_CORELIB=core/libgst.a -GST_HEADERS=$(addprefix include/gst/,\ - vm.h ds.h value.h datatypes.h gc.h util.h gst.h stl.h thread.h serialize.h) +GST_HEADERS=$(addprefix include/gst/, gst.h stl.h compile.h disasm.h parse.h) all: $(GST_TARGET) @@ -16,7 +15,7 @@ all: $(GST_TARGET) ##### The core vm and runtime ##### ################################### GST_CORE_SOURCES=$(addprefix core/,\ - compile.c disasm.c parse.c stl.c\ + compile.c disasm.c parse.c stl.c strings.c stringcache.c\ value.c vm.c ds.c gc.c thread.c serialize.c) GST_CORE_OBJECTS=$(patsubst %.c,%.o,$(GST_CORE_SOURCES)) $(GST_CORELIB): $(GST_CORE_OBJECTS) $(GST_HEADERS) @@ -48,5 +47,6 @@ clean: rm $(GST_CORELIB) || true rm $(GST_CORE_OBJECTS) || true rm $(GST_CLIENT_OBJECTS) || true + rm vgcore.* || true .PHONY: clean install run debug valgrind diff --git a/client/main.c b/client/main.c index a3bc999d..73fff0eb 100644 --- a/client/main.c +++ b/client/main.c @@ -1,6 +1,9 @@ #include #include #include +#include +#include +#include #include /* A simple repl for debugging */ diff --git a/core/compile.c b/core/compile.c index 09d460cf..34dfd02e 100644 --- a/core/compile.c +++ b/core/compile.c @@ -1,8 +1,5 @@ +#include #include -#include -#include -#include -#include /* During compilation, FormOptions are passed to ASTs * as configuration options to allow for some optimizations. */ diff --git a/core/dict.c b/core/dict.c deleted file mode 100644 index d4346503..00000000 --- a/core/dict.c +++ /dev/null @@ -1,264 +0,0 @@ -#include "dict.h" -#include "util.h" -#include "value.h" -#include "vm.h" - -/****/ -/* Bag implementation */ -/****/ - -/* Find a kv pair in a bag */ -static GstValue *gst_object_bag_find(GstDict *obj, GstValue key) { - GstValue *start = obj->data; - GstValue *end = obj->data + obj->count * 2; - while (start < end) { - if (gst_equals(*start, key)) - return start; - start += 2; - } - return NULL; -} - -/* Check for string equality */ -static int str_equal_value(GstValue v, const char *str, uint32_t len, uint32_t hash) { - uint32_t i; - if (v.type != GST_STRING) return 0; - if (gst_string_length(str) != len) return 0; - if (!gst_string_hash(str)) - gst_string_hash(str) = gst_string_calchash((uint8_t *)str); - if (gst_string_hash(str) != hash) return 0; - for (i = 0; i < len; ++i) - if (str[1] != v.data.string[i]) return 0; - return 1; -} - -/* Find key value pair with c string key */ -static GstValue *gst_object_bag_findcstring(GstDict *obj, const char *key) { - uint32_t len, hash; - for (len = 0; key[len]; ++len); - hash = gst_cstring_calchash((uint8_t *)key, len); - GstValue *start = obj->data; - GstValue *end = obj->data + obj->count * 2; - while (start < end) { - if (start->type == GST_STRING) { - uint8_t *str = start->data.string; - if (gst_string_length(str) == len) { - if (!gst_string_hash(str)) - gst_string_hash(str) = gst_string_calchash(str); - if (gst_string_hash(str) == hash) { - return start - } - } - } - start += 2; - } - return NULL; -} - -/* Remove a key from a bag */ -static void gst_object_bag_remove(GstDict *obj, GstValue key) { - GstValue *kv = gst_object_bag_find(obj, key); - if (kv != NULL) { - GstValue *lastKv = obj->data + --obj->count * 2; - kv[0] = lastKv[0]; - kv[1] = lastKv[1]; - } -} - -/* Add a key to a bag */ -static void gst_object_bag_put(Gst *vm, GstDict *obj, GstValue key, GstValue value) { - GstValue *kv = gst_object_bag_find(obj, key); - if (kv != NULL) { - /* Replace value */ - kv[1] = value; - } else { - /* Check for need to resize */ - if (obj->count + 1 > obj->capacity) { - uint32_t newCap = 2 * obj->count + 2; - GstValue *newData = gst_alloc(vm, sizeof(GstValue) * 2 * newCap); - gst_memcpy(newData, obj->data, obj->count * 2 * sizeof(GstValue)); - obj->data = newData; - obj->capacity = newCap; - } - /* Push to end */ - kv = obj->data + obj->count * 2; - kv[0] = key; - kv[1] = value; - ++obj->count; - } -} - -/****/ -/* Hashtable implementaion */ -/****/ - -/* Add a key value pair to a given array. Returns if key successfully added. */ -static void hash_putkv(GstValue *data, uint32_t cap, GstValue key, GstValue value) { - GstValue *end = data + 2 * cap; - GstValue *start = data + (gst_hash(key) % cap) * 2; - GstValue *bucket; - /* Check second half of array */ - for (bucket = start; bucket < end; bucket += 2) { - if (bucket[0].type == GST_NIL) { - bucket[0] = key; - bucket[1] = value; - return; - } - } - /* Check first half of array */ - for (bucket = data; bucket < start; bucket += 2) { - if (bucket[0].type == GST_NIL) { - bucket[0] = key; - bucket[1] = value; - return; - } - } - /* Should never reach here - data would be full */ -} - -/* Find a bucket in the hastable */ -static GstValue *hash_findkv(GstValue *data, uint32_t cap, GstValue key, GstValue **out) { - GstValue *end = data + 2 * cap; - GstValue *start = data + (gst_hash(key) % cap) * 2; - GstValue *bucket; - /* Check second half of array */ - for (bucket = start; bucket < end; bucket += 2) - if (bucket[0].type == GST_NIL) - if (bucket[1].type == GST_BOOLEAN) /* Check if just marked deleted */ - continue; - else { - *out = bucket; - return NULL; - } - else if (gst_equals(bucket[0], key)) - return bucket; - /* Check first half of array */ - for (bucket = data; bucket < start; bucket += 2) - if (bucket[0].type == GST_NIL) - if (bucket[1].type == GST_BOOLEAN) /* Check if just marked deleted */ - continue; - else { - *out = bucket; - return NULL; - } - else if (gst_equals(bucket[0], key)) - return bucket; - /* Should never reach here - data would be full */ - *out = bucket; - return NULL; -} - -/* Resize internal hashtable. Also works if currently a bag. */ -static void gst_object_rehash(Gst *vm, GstDict *obj, uint32_t capacity) { - GstValue *toData, *fromBucket, *toBucket, *toStart *fromEnd, *toEnd; - toData = gst_alloc(vm, capacity * 2 * sizeof(GstValue)); - toEnd = toData + 2 * capacity; - fromBucket = obj->data; - fromEnd = fromBucket + obj->count * 2; - for (; fromBucket < fromEnd; fromBucket += 2) { - if (fromBucket[0].type == GST_NIL) continue; - toStart = toData + (gst_hash(fromBucket[0]) % capacity) * 2; - /* Check second half of array */ - for (toBucket = toStart; toBucket < toEnd; toBucket += 2) { - if (toBucket[0].type == GST_NIL) { - toBucket[0] = fromBucket[0]; - toBucket[1] = fromBucket[1]; - goto finish_put; - } - } - /* Check first half of array */ - for (toBucket = toData; toBucket < toStart; toBucket += 2) { - if (toBucket[0].type == GST_NIL) { - toBucket[0] = fromBucket[0]; - toBucket[1] = fromBucket[1]; - goto finish_put; - } - } - /* Error if we got here - backing array to small. */ - ; - /* Continue. */ - finish_put: continue; - } - obj->capacity = capacity; - obj->data = toData; -} - -/****/ -/* Interface */ -/****/ - -/* Initialize a dictionary */ -GstDict *gst_dict(Gst *vm, uint32_t capacity) { - GstDict *dict = gst_alloc(vm, sizeof(GstDict)); - GstValue *data = gst_zalloc(vm, sizeof(GstValue) * 2 * capacity); - dict->data = data; - dict->capacity = capacity; - dict->count = 0; - dict->flags = (capacity < GST_OBJECT_BAG_THRESHOLD) ? GST_OBJECT_FLAG_ISBAG : 0; - return dict; -} - -/* Get item from dictionary */ -GstValue gst_dict_get(GstDict *dict, GstValue key) { - GstValue *bucket *notused; - if (dict->flags & GST_OBJECT_FLAG_ISBAG) { - bucket = gst_object_bag_find(dict, key); - } else { - bucket = hash_findkv(obj->data, obj->capacity, key, ¬used); - } - if (bucket != NULL) { - return bucket[1]; - } else { - GstValue ret; - ret.type = GST_NIL; - return ret; - } -} - -/* Get item with c string key */ -GstValue gst_dict_get_cstring(GstDict *dict, const char *key); - -/* Add item to dictionary */ -void gst_dict_put(Gst *vm, GstDict *obj, GstValue key, GstValue value) { - if (obj->flags & GST_OBJECT_FLAG_ISBAG) { - if (obj->count > GST_OBJECT_BAG_THRESHOLD) { - /* Change to hashtable */ - obj->flags |= GST_OBJECT_FLAG_ISBAG; - gst_object_rehash(vm, obj, 4 * obj->count); - goto put_hash; - } - gst_object_bag_put(vm, obj, key, value); - } else { - GstValue *bucket, *out; - put_hash: - bucket = hash_findkv(obj->data, obj->capacity, key, &out); - if (bucket != NULL) { - bucket[1] = value; - } else { - /* Check for resize */ - if (obj->count + 1 > obj->capacity) { - gst_object_rehash(vm, obj, 2 * (obj->count + 1)); - bucket = hash_findkv(obj->data, obj->capacity, key, &out); - } - out[0] = key; - out[1] = value; - ++obj->count; - } - } -} - -/* Remove item from dictionary */ -void gst_dict_remove(GstDict *obj, GstValue key) { - if (obj->flags & GST_OBJECT_FLAG_ISBAG) { - gst_object_bag_remove(obj, key); - } else { - GstValue *bucket, *out; - bucket = hash_findkv(obj->data, obj->capacity, key, &out); - if (bucket != NULL) { - --obj->count; - bucket[0].type = GST_NIL; - bucket[1].type = GST_BOOLEAN; - } - } -} - diff --git a/core/ds.c b/core/ds.c index 86d75435..fef3ef54 100644 --- a/core/ds.c +++ b/core/ds.c @@ -1,7 +1,4 @@ -#include -#include -#include -#include +#include /****/ /* Buffer functions */ @@ -56,12 +53,7 @@ void gst_buffer_append(Gst *vm, GstBuffer *buffer, uint8_t *string, uint32_t len /* Convert the buffer to a string */ uint8_t *gst_buffer_to_string(Gst *vm, GstBuffer *buffer) { - uint8_t *data = gst_alloc(vm, buffer->count + 2 * sizeof(uint32_t)); - data += 2 * sizeof(uint32_t); - gst_string_length(data) = buffer->count; - gst_string_hash(data) = 0; - gst_memcpy(data, buffer->data, buffer->count * sizeof(uint8_t)); - return data; + return gst_load_cstring_rawlen(vm, (char *) buffer->data, buffer->count); } /****/ diff --git a/core/gc.c b/core/gc.c index a767442e..24b91921 100644 --- a/core/gc.c +++ b/core/gc.c @@ -1,7 +1,5 @@ -#include -#include -#include -#include +#include +#include "stringcache.h" /* The metadata header associated with an allocated block of memory */ #define gc_header(mem) ((GCMemoryHeader *)(mem) - 1) @@ -11,6 +9,7 @@ typedef struct GCMemoryHeader GCMemoryHeader; struct GCMemoryHeader { GCMemoryHeader * next; uint32_t color : 1; + uint32_t tags : 31; }; /* Helper to mark function environments */ @@ -186,6 +185,10 @@ void gst_sweep(Gst *vm) { } else { vm->blocks = next; } + /* Remove from string cache */ + if (current->tags & GST_MEMTAG_STRING) { + gst_stringcache_remove(vm, (uint8_t *)(current + 1) + 2 * sizeof(uint32_t)); + } gst_raw_free(current); } else { previous = current; @@ -207,6 +210,7 @@ static void *gst_alloc_prepare(Gst *vm, char *rawBlock, uint32_t size) { mdata->next = vm->blocks; vm->blocks = mdata; mdata->color = !vm->black; + mdata->tags = 0; return rawBlock + sizeof(GCMemoryHeader); } @@ -222,6 +226,12 @@ void *gst_zalloc(Gst *vm, uint32_t size) { return gst_alloc_prepare(vm, gst_raw_calloc(1, totalSize), totalSize); } +/* Tag some memory to mark it with special properties */ +void gst_mem_tag(void *mem, uint32_t tags) { + GCMemoryHeader *mh = (GCMemoryHeader *)mem - 1; + mh->tags |= tags; +} + /* Run garbage collection */ void gst_collect(Gst *vm) { GstValue temp; diff --git a/core/parse.c b/core/parse.c index 3b19bb44..c4bd7534 100644 --- a/core/parse.c +++ b/core/parse.c @@ -1,9 +1,5 @@ -#include -#include -#include +#include #include -#include -#include static const char UNEXPECTED_CLOSING_DELIM[] = "Unexpected closing delimiter"; diff --git a/core/serialize.c b/core/serialize.c index 3dd8598a..18b7c0d9 100644 --- a/core/serialize.c +++ b/core/serialize.c @@ -1,10 +1,4 @@ -#include -#include -#include -#include -#include -#include -#include +#include /** * Data format diff --git a/core/stl.c b/core/stl.c index 6791058f..14842ba7 100644 --- a/core/stl.c +++ b/core/stl.c @@ -1,8 +1,7 @@ -/* This implements a standard library in gst. Some of this - * will eventually be ported over to gst if possible */ -#include #include -#include +#include +#include +#include /****/ /* Core */ @@ -39,21 +38,6 @@ int gst_stl_setclass(Gst *vm) { gst_c_return(vm, x); } -/* Call a function */ -int gst_stl_callforeach(Gst *vm) { - GstValue func = gst_arg(vm, 0); - uint32_t argCount = gst_count_args(vm); - uint32_t i; - if (argCount) { - for (i = 1; i < argCount; ++i) - gst_call(vm, func, 1, vm->thread->data + vm->thread->count + i); - vm->ret.type = GST_NIL; - return GST_RETURN_OK; - } else { - gst_c_throwc(vm, "expected at least one argument"); - } -} - /* Create a buffer */ int gst_stl_make_buffer(Gst *vm) { uint32_t i, count; @@ -91,7 +75,6 @@ void gst_stl_load_core(GstCompiler *c) { gst_compiler_add_global_cfunction(c, "print", gst_stl_print); gst_compiler_add_global_cfunction(c, "get-class", gst_stl_getclass); gst_compiler_add_global_cfunction(c, "set-class", gst_stl_setclass); - gst_compiler_add_global_cfunction(c, "call-for-each", gst_stl_callforeach); gst_compiler_add_global_cfunction(c, "make-buffer", gst_stl_make_buffer); gst_compiler_add_global_cfunction(c, "tostring", gst_stl_tostring); gst_compiler_add_global_cfunction(c, "exit", gst_stl_exit); @@ -101,6 +84,81 @@ void gst_stl_load_core(GstCompiler *c) { /* Parsing */ /****/ +/* Get an integer power of 10 */ +static double exp10(int power) { + if (power == 0) return 1; + if (power > 0) { + double result = 10; + int currentPower = 1; + while (currentPower * 2 <= power) { + result = result * result; + currentPower *= 2; + } + return result * exp10(power - currentPower); + } else { + return 1 / exp10(-power); + } +} + +/* Read a number from a string. Returns if successfuly + * parsed a number from the enitre input string. + * If returned 1, output is int ret.*/ +static int read_number(const uint8_t *string, const uint8_t *end, double *ret, int forceInt) { + int sign = 1, x = 0; + double accum = 0, exp = 1, place = 1; + /* Check the sign */ + if (*string == '-') { + sign = -1; + ++string; + } else if (*string == '+') { + ++string; + } + if (string >= end) return 0; + while (string < end) { + if (*string == '.' && !forceInt) { + place = 0.1; + } else if (!forceInt && (*string == 'e' || *string == 'E')) { + /* Read the exponent */ + ++string; + if (string >= end) return 0; + if (!read_number(string, end, &exp, 1)) + return 0; + exp = exp10(exp); + break; + } else { + x = *string; + if (x < '0' || x > '9') return 0; + x -= '0'; + if (place < 1) { + accum += x * place; + place *= 0.1; + } else { + accum *= 10; + accum += x; + } + } + ++string; + } + *ret = accum * sign * exp; + return 1; +} + +/* Convert string to integer */ +int gst_stl_parse_number(Gst *vm) { + GstValue ret; + double number; + uint8_t *str = gst_to_string(vm, gst_arg(vm, 0)); + uint8_t *end = str + gst_string_length(str); + if (read_number(str, end, &number, 0)) { + ret.type = GST_NUMBER; + ret.data.number = number; + } else { + ret.type = GST_NIL; + } + gst_c_return(vm, ret); + +} + /* Parse a source string into an AST */ int gst_stl_parse(Gst *vm) { uint8_t *source = gst_to_string(vm, gst_arg(vm, 0)); @@ -122,6 +180,7 @@ int gst_stl_parse(Gst *vm) { /* Load parsing */ void gst_stl_load_parse(GstCompiler *c) { gst_compiler_add_global_cfunction(c, "parse", gst_stl_parse); + gst_compiler_add_global_cfunction(c, "parse-number", gst_stl_parse_number); } /****/ diff --git a/core/stringcache.c b/core/stringcache.c new file mode 100644 index 00000000..46c36cd7 --- /dev/null +++ b/core/stringcache.c @@ -0,0 +1,120 @@ +#include +#include "stringcache.h" + +/* Dud pointer to serve as deletion marker */ +static uint8_t *deleted = (uint8_t *) "DELETED"; + +/* Initialize the string cache for a vm */ +void gst_stringcache_init(Gst *vm, uint32_t capacity) { + vm->strings = gst_raw_calloc(1, capacity * sizeof(uint8_t *)); + if (vm->strings == NULL) + GST_OUT_OF_MEMORY; + vm->stringsCapacity = capacity; + vm->stringsCount = 0; + vm->stringsDeleted = 0; +} + +/* Deinitialize the stringcache for a vm */ +void gst_stringcache_deinit(Gst *vm) { + gst_raw_free(vm->strings); + vm->stringsCapacity = 0; + vm->stringsCount = 0; + vm->stringsDeleted = 0; +} + +/* Find a string in the hashtable. Returns null if + * not found. */ +static uint8_t **gst_stringcache_find(Gst *vm, uint8_t *str, int *success) { + uint32_t bounds[4]; + uint32_t i, j, index, hash; + uint8_t **firstEmpty = NULL; + hash = gst_string_hash(str); + if (!hash) { + hash = gst_string_hash(str) = gst_string_calchash(str); + } + index = hash % vm->stringsCapacity; + bounds[0] = index; + bounds[1] = vm->stringsCapacity; + bounds[2] = 0; + bounds[3] = index; + for (j = 0; j < 4; j += 2) + for (i = bounds[j]; i < bounds[j+1]; ++i) { + uint8_t *testStr = vm->strings[i]; + /* Check empty spots */ + if (testStr == NULL) { + if (firstEmpty == NULL) + firstEmpty = vm->strings + i; + goto notfound; + } + if (testStr == deleted) { + if (firstEmpty == NULL) + firstEmpty = vm->strings + i; + continue; + } + if (gst_string_equal(testStr, str)) { + /* Replace first deleted */ + *success = 1; + if (firstEmpty != NULL) { + *firstEmpty = testStr; + vm->strings[i] = deleted; + return firstEmpty; + } + return vm->strings + i; + } + } + notfound: + *success = 0; + return firstEmpty; +} + +/* Resize the hashtable. */ +static void gst_stringcache_resize(Gst *vm, uint32_t newCapacity) { + uint32_t i, oldCapacity; + uint8_t **oldCache = vm->strings; + uint8_t **newCache = gst_raw_calloc(1, newCapacity * sizeof(uint8_t *)); + if (newCache == NULL) + GST_OUT_OF_MEMORY; + oldCapacity = vm->stringsCapacity; + vm->strings = newCache; + vm->stringsCapacity = newCapacity; + vm->stringsCount = 0; + vm->stringsDeleted = 0; + /* Add all of the old strings back */ + for (i = 0; i < oldCapacity; ++i) { + uint8_t *str = oldCache[i]; + if (str != NULL && str != deleted) + gst_stringcache_get(vm, str); + } + /* Free the old cache */ + gst_raw_free(oldCache); +} + +/* Get a string from the string cache */ +uint8_t *gst_stringcache_get(Gst *vm, uint8_t *str) { + int status = 0; + uint8_t **bucket = gst_stringcache_find(vm, str, &status); + if (status) { + return *bucket; + } else { + if ((vm->stringsCount + vm->stringsDeleted) * 2 > vm->stringsCapacity) { + gst_stringcache_resize(vm, vm->stringsCount * 4); + bucket = gst_stringcache_find(vm, str, &status); + } + vm->stringsCount++; + *bucket = str; + /* Mark the memory as string memory */ + gst_mem_tag(gst_string_raw(str), GST_MEMTAG_STRING); + return str; + } +} + +/* Remove a string from the cache */ +void gst_stringcache_remove(Gst *vm, uint8_t *str) { + int status = 0; + uint8_t **bucket = gst_stringcache_find(vm, str, &status); + if (status) { + vm->stringsCount--; + vm->stringsDeleted++; + *bucket = deleted; + } +} diff --git a/core/stringcache.h b/core/stringcache.h new file mode 100644 index 00000000..f4c2899f --- /dev/null +++ b/core/stringcache.h @@ -0,0 +1,15 @@ +#ifndef GST_STRINGCACHE_defined +#define GST_STRINGCACHE_defined + +#include + +/****/ +/* String Cache (move internal) */ +/****/ + +void gst_stringcache_init(Gst *vm, uint32_t capacity); +void gst_stringcache_deinit(Gst *vm); +uint8_t *gst_stringcache_get(Gst *vm, uint8_t *str); +void gst_stringcache_remove(Gst *vm, uint8_t *str); + +#endif diff --git a/core/strings.c b/core/strings.c new file mode 100644 index 00000000..bb3d0f46 --- /dev/null +++ b/core/strings.c @@ -0,0 +1,87 @@ +#include +#include "stringcache.h" + +uint8_t *gst_load_cstring_rawlen(Gst *vm, const char *string, uint32_t len) { + uint8_t *data = gst_alloc(vm, len + 1 + 2 * sizeof(uint32_t)); + data += 2 * sizeof(uint32_t); + gst_string_hash(data) = 0; + gst_string_length(data) = len; + gst_memcpy(data, string, len); + data[len] = 0; + /* Check string cache */ + return gst_stringcache_get(vm, data); +} + +/* Load a c string into a GST string */ +GstValue gst_load_cstring(Gst *vm, const char *string) { + GstValue ret; + ret.type = GST_STRING; + ret.data.string = gst_load_cstring_rawlen(vm, string, strlen(string)); + return ret; +} + +/* Load a c string into a GST symbol */ +GstValue gst_load_csymbol(Gst *vm, const char *string) { + GstValue ret; + ret.type = GST_SYMBOL; + ret.data.string = gst_load_cstring_rawlen(vm, string, strlen(string)); + return ret; +} + +/* Simple hash function (djb2) */ +uint32_t gst_cstring_calchash(const uint8_t *str, uint32_t len) { + const uint8_t *end = str + len; + uint32_t hash = 5381; + while (str < end) + hash = (hash << 5) + hash + *str++; + return hash; +} + +/* GST string version */ +uint32_t gst_string_calchash(const uint8_t *str) { + return gst_cstring_calchash(str, gst_string_length(str)); +} + +/* Check if two strings are equal. Does not check the string cache. */ +int gst_string_equal(const uint8_t *lhs, const uint8_t *rhs) { + uint32_t hash_l, hash_r, len, i; + if (lhs == rhs) + return 1; + /* Check lengths */ + len = gst_string_length(lhs); + if (len != gst_string_length(rhs)) return 0; + /* Check hashes */ + hash_l = gst_string_hash(lhs); + hash_r = gst_string_hash(rhs); + if (!hash_l) + hash_l = gst_string_hash(lhs) = gst_string_calchash(lhs); + if (!hash_r) + hash_r = gst_string_hash(rhs) = gst_string_calchash(rhs); + if (hash_l != hash_r) return 0; + for (i = 0; i < len; ++i) + if (lhs[i] != rhs[i]) + return 0; + return 1; +} + +/* Compares two strings */ +int gst_string_compare(const uint8_t *lhs, const uint8_t *rhs) { + uint32_t xlen = gst_string_length(lhs); + uint32_t ylen = gst_string_length(rhs); + uint32_t len = xlen > ylen ? ylen : xlen; + uint32_t i; + for (i = 0; i < len; ++i) { + if (lhs[i] == rhs[i]) { + continue; + } else if (lhs[i] < rhs[i]) { + return -1; /* x is less then y */ + } else { + return 1; /* y is less than x */ + } + } + if (xlen == ylen) { + return 0; + } else { + return xlen < ylen ? -1 : 1; + } +} diff --git a/core/thread.c b/core/thread.c index 3e6d8a56..a461e870 100644 --- a/core/thread.c +++ b/core/thread.c @@ -1,8 +1,4 @@ -#include -#include -#include -#include -#include +#include /* Create a new thread */ GstThread *gst_thread(Gst *vm, GstValue callee, uint32_t capacity) { diff --git a/core/value.c b/core/value.c index 13700cc2..de3393a3 100644 --- a/core/value.c +++ b/core/value.c @@ -1,7 +1,4 @@ -#include -#include -#include -#include +#include #include /* Boolean truth definition */ @@ -9,30 +6,6 @@ int gst_truthy(GstValue v) { return v.type != GST_NIL && !(v.type == GST_BOOLEAN && !v.data.boolean); } -static uint8_t *load_cstring(Gst *vm, const char *string, uint32_t len) { - uint8_t *data = gst_alloc(vm, len + 1 + 2 * sizeof(uint32_t)); - data += 2 * sizeof(uint32_t); - gst_string_hash(data) = 0; - gst_string_length(data) = len; - gst_memcpy(data, string, len); - data[len] = 0; - return data; -} - -GstValue gst_load_cstring(Gst *vm, const char *string) { - GstValue ret; - ret.type = GST_STRING; - ret.data.string = load_cstring(vm, string, strlen(string)); - return ret; -} - -GstValue gst_load_csymbol(Gst *vm, const char *string) { - GstValue ret; - ret.type = GST_SYMBOL; - ret.data.string = load_cstring(vm, string, strlen(string)); - return ret; -} - static uint8_t * number_to_string(Gst *vm, GstNumber x) { static const uint32_t SIZE = 20; uint8_t *data = gst_alloc(vm, SIZE + 1 + 2 * sizeof(uint32_t)); @@ -84,12 +57,12 @@ static uint8_t *string_description(Gst *vm, const char *title, uint32_t titlelen uint8_t *gst_to_string(Gst *vm, GstValue x) { switch (x.type) { case GST_NIL: - return load_cstring(vm, "nil", 3); + return gst_load_cstring_rawlen(vm, "nil", 3); case GST_BOOLEAN: if (x.data.boolean) { - return load_cstring(vm, "true", 4); + return gst_load_cstring_rawlen(vm, "true", 4); } else { - return load_cstring(vm, "false", 5); + return gst_load_cstring_rawlen(vm, "false", 5); } case GST_NUMBER: return number_to_string(vm, x.data.number); @@ -121,20 +94,6 @@ uint8_t *gst_to_string(Gst *vm, GstValue x) { return NULL; } -/* GST string version */ -uint32_t gst_string_calchash(const uint8_t *str) { - return gst_cstring_calchash(str, gst_string_length(str)); -} - -/* Simple hash function (djb2) */ -uint32_t gst_cstring_calchash(const uint8_t *str, uint32_t len) { - const uint8_t *end = str + len; - uint32_t hash = 5381; - while (str < end) - hash = (hash << 5) + hash + *str++; - return hash; -} - /* Simple hash function to get tuple hash */ static uint32_t tuple_calchash(GstValue *tuple) { uint32_t i; @@ -161,25 +120,6 @@ int gst_equals(GstValue x, GstValue y) { case GST_NUMBER: result = (x.data.number == y.data.number); break; - /* Assume that when strings are created, equal strings - * are set to the same string */ - case GST_STRING: - case GST_SYMBOL: - if (x.data.string == y.data.string) { - result = 1; - break; - } - if (gst_hash(x) != gst_hash(y) || - gst_string_length(x.data.string) != gst_string_length(y.data.string)) { - result = 0; - break; - } - if (!strncmp((char *) x.data.string, (char *) y.data.string, gst_string_length(x.data.string))) { - result = 1; - break; - } - result = 0; - break; case GST_TUPLE: if (x.data.tuple == y.data.tuple) { result = 1; @@ -203,7 +143,7 @@ int gst_equals(GstValue x, GstValue y) { break; default: /* compare pointers */ - result = (x.data.array == y.data.array); + result = (x.data.pointer == y.data.pointer); break; } } @@ -234,9 +174,8 @@ uint32_t gst_hash(GstValue x) { case GST_STRING: case GST_SYMBOL: /* Assume 0 is not hashed. */ - if (gst_string_hash(x.data.string)) - hash = gst_string_hash(x.data.string); - else + hash = gst_string_hash(x.data.string); + if (!hash) hash = gst_string_hash(x.data.string) = gst_string_calchash(x.data.string); break; case GST_TUPLE: @@ -283,28 +222,7 @@ int gst_compare(GstValue x, GstValue y) { } case GST_STRING: case GST_SYMBOL: - if (x.data.string == y.data.string) { - return 0; - } else { - uint32_t xlen = gst_string_length(x.data.string); - uint32_t ylen = gst_string_length(y.data.string); - uint32_t len = xlen > ylen ? ylen : xlen; - uint32_t i; - for (i = 0; i < len; ++i) { - if (x.data.string[i] == y.data.string[i]) { - continue; - } else if (x.data.string[i] < y.data.string[i]) { - return -1; /* x is less then y */ - } else { - return 1; /* y is less than x */ - } - } - if (xlen == ylen) { - return 0; - } else { - return xlen < ylen ? -1 : 1; - } - } + return gst_string_compare(x.data.string, y.data.string); /* Lower indices are most significant */ case GST_TUPLE: { diff --git a/core/vm.c b/core/vm.c index 7304a209..1ab60e98 100644 --- a/core/vm.c +++ b/core/vm.c @@ -1,9 +1,5 @@ -#include -#include -#include -#include -#include -#include +#include +#include "stringcache.h" /* Macros for errors in the vm */ @@ -557,6 +553,8 @@ void gst_init(Gst *vm) { /* Add thread */ vm->thread = NULL; vm->rootenv.type = GST_NIL; + /* Set up string cache */ + gst_stringcache_init(vm, 128); } /* Clear all memory associated with the VM */ @@ -565,4 +563,5 @@ void gst_deinit(Gst *vm) { vm->thread = NULL; vm->rootenv.type = GST_NIL; vm->ret.type = GST_NIL; + gst_stringcache_deinit(vm); } diff --git a/libs/stl.gst b/libs/stl.gst new file mode 100644 index 00000000..81084c1e --- /dev/null +++ b/libs/stl.gst @@ -0,0 +1,2 @@ +# The standard library +(fn + [...args]