From f456de5fac554d7cd1ae51e5f090d24e7a6f7841 Mon Sep 17 00:00:00 2001 From: Calvin Rose Date: Sun, 16 Apr 2017 09:39:41 -0400 Subject: [PATCH] Change object implementaion to use open hashing. Currently using simple linear probing. --- core/compile.c | 24 +++---- core/ds.c | 165 ++++++++++++++++++++++------------------------ core/gc.c | 14 ++-- core/ids.c | 72 ++++++++++++-------- core/stl.c | 17 ++++- core/vm.c | 2 +- include/gst/gst.h | 17 ++--- 7 files changed, 158 insertions(+), 153 deletions(-) diff --git a/core/compile.c b/core/compile.c index 7a1e204f..8b22fd4f 100644 --- a/core/compile.c +++ b/core/compile.c @@ -918,21 +918,18 @@ static Slot compile_object(GstCompiler *c, FormOptions opts, GstObject *obj) { GstScope *scope = c->tail; FormOptions subOpts = form_options_default(); GstBuffer *buffer = c->buffer; - GstBucket *bucket; Slot ret; SlotTracker tracker; uint32_t i, cap; cap = obj->capacity; ret = compiler_get_target(c, opts); tracker_init(c, &tracker); - for (i = 0; i < cap; ++i) { - bucket = obj->buckets[i]; - while (bucket != NULL) { - Slot slot = compile_value(c, subOpts, bucket->key); + for (i = 0; i < cap; i += 2) { + if (obj->data[i].type != GST_NIL) { + Slot slot = compile_value(c, subOpts, obj->data[i]); compiler_tracker_push(c, &tracker, compiler_realize_slot(c, slot)); - slot = compile_value(c, subOpts, bucket->value); + slot = compile_value(c, subOpts, obj->data[i + 1]); compiler_tracker_push(c, &tracker, compiler_realize_slot(c, slot)); - bucket = bucket->next; } } compiler_tracker_free(c, scope, &tracker); @@ -1034,15 +1031,10 @@ void gst_compiler_global(GstCompiler *c, const char *name, GstValue x) { /* Add many global variables */ void gst_compiler_globals(GstCompiler *c, GstObject *env) { uint32_t i; - GstBucket *bucket; - for (i = 0; i < env->capacity; ++i) { - bucket = env->buckets[i]; - while (bucket) { - if (bucket->key.type == GST_STRING) { - compiler_declare_symbol(c, c->tail, bucket->key); - gst_array_push(c->vm, c->env, bucket->value); - } - bucket = bucket->next; + for (i = 0; i < env->capacity; i += 2) { + if (env->data[i].type == GST_STRING) { + compiler_declare_symbol(c, c->tail, env->data[i]); + gst_array_push(c->vm, c->env, env->data[i + 1]); } } } diff --git a/core/ds.c b/core/ds.c index 9b676117..5b45790b 100644 --- a/core/ds.c +++ b/core/ds.c @@ -153,50 +153,60 @@ void *gst_userdata(Gst *vm, uint32_t size, GstObject *meta) { /* Create a new dictionary */ GstObject* gst_object(Gst *vm, uint32_t capacity) { GstObject *o = gst_alloc(vm, sizeof(GstObject)); - GstBucket **buckets = gst_zalloc(vm, capacity * sizeof(GstBucket *)); - o->buckets = buckets; + GstValue *data = gst_zalloc(vm, capacity * sizeof(GstValue)); + o->data = data; o->capacity = capacity; o->count = 0; o->parent = NULL; + o->deleted = 0; return o; } +/* Find the bucket that contains the given key. Will also return + * bucket where key should go if not in object. */ +static GstValue *gst_object_find(GstObject *o, GstValue key) { + uint32_t index = (gst_hash(key) % (o->capacity / 2)) * 2; + uint32_t i, j; + uint32_t start[2], end[2]; + start[0] = index; end[0] = o->capacity; + start[1] = 0; end[1] = index; + for (j = 0; j < 2; ++j) + for (i = start[j]; i < end[j]; i += 2) { + if (o->data[i].type == GST_NIL) { + if (o->data[i + 1].type == GST_NIL) { + /* Empty */ + return o->data + i; + } + } else if (gst_equals(o->data[i], key)) { + return o->data + i; + } + } + return NULL; +} + /* Resize the dictionary table. */ static void gst_object_rehash(Gst *vm, GstObject *o, uint32_t size) { - GstBucket **newBuckets = gst_zalloc(vm, size * sizeof(GstBucket *)); - uint32_t i, count; - for (i = 0, count = o->capacity; i < count; ++i) { - GstBucket *bucket = o->buckets[i]; - while (bucket) { - uint32_t index; - GstBucket *next = bucket->next; - index = gst_hash(bucket->key) % size; - bucket->next = newBuckets[index]; - newBuckets[index] = bucket; - bucket = next; + GstValue *olddata = o->data; + GstValue *newdata = gst_zalloc(vm, size * sizeof(GstValue)); + uint32_t i, oldcapacity; + oldcapacity = o->capacity; + o->data = newdata; + o->capacity = size; + o->deleted = 0; + for (i = 0; i < oldcapacity; i += 2) { + if (olddata[i].type != GST_NIL) { + GstValue *bucket = gst_object_find(o, olddata[i]); + bucket[0] = olddata[i]; + bucket[1] = olddata[i + 1]; } } - o->buckets = newBuckets; - o->capacity = size; -} - -/* Find the bucket that contains the given key */ -static GstBucket *gst_object_find(GstObject *o, GstValue key) { - uint32_t index = gst_hash(key) % o->capacity; - GstBucket *bucket = o->buckets[index]; - while (bucket) { - if (gst_equals(bucket->key, key)) - return bucket; - bucket = bucket->next; - } - return (GstBucket *)0; } /* Get a value out of the object */ GstValue gst_object_get(GstObject *o, GstValue key) { - GstBucket *bucket = gst_object_find(o, key); - if (bucket) { - return bucket->value; + GstValue *bucket = gst_object_find(o, key); + if (bucket && bucket[0].type != GST_NIL) { + return bucket[1]; } else { GstValue nil; nil.type = GST_NIL; @@ -205,75 +215,60 @@ GstValue gst_object_get(GstObject *o, GstValue key) { } /* Remove an entry from the dictionary */ -GstValue gst_object_remove(Gst * vm, GstObject *o, GstValue key) { - GstBucket *bucket, *previous; - uint32_t index = gst_hash(key) % o->capacity; - bucket = o->buckets[index]; - previous = (GstBucket *)0; - while (bucket) { - if (gst_equals(bucket->key, key)) { - if (previous) { - previous->next = bucket->next; - } else { - o->buckets[index] = bucket->next; - } - if (o->count < o->capacity / 4) { - gst_object_rehash(vm, o, o->capacity / 2); - } - --o->count; - return bucket->value; - } - previous = bucket; - bucket = bucket->next; - } - /* Return nil if we found nothing */ - { +GstValue gst_object_remove(GstObject *o, GstValue key) { + GstValue *bucket = gst_object_find(o, key); + if (bucket && bucket[0].type != GST_NIL) { + GstValue ret = bucket[1]; + o->count--; + o->deleted++; + bucket[0].type = GST_NIL; + bucket[1].type = GST_BOOLEAN; + return ret; + } else { GstValue nil; nil.type = GST_NIL; return nil; } } -/* Put a value into the dictionary. */ +/* Put a value into the object */ void gst_object_put(Gst *vm, GstObject *o, GstValue key, GstValue value) { - GstBucket *bucket, *previous; - uint32_t index = gst_hash(key) % o->capacity; if (key.type == GST_NIL) return; - /* Do a removal if value is nil */ if (value.type == GST_NIL) { - bucket = o->buckets[index]; - previous = (GstBucket *)0; - while (bucket) { - if (gst_equals(bucket->key, key)) { - if (previous) { - previous->next = bucket->next; - } else { - o->buckets[index] = bucket->next; - } - if (o->count < o->capacity / 4) { - gst_object_rehash(vm, o, o->capacity / 2); - } - --o->count; - return; - } - previous = bucket; - bucket = bucket->next; - } + gst_object_remove(o, key); } else { - bucket = gst_object_find(o, key); - if (bucket) { - bucket->value = value; + GstValue *bucket = gst_object_find(o, key); + if (bucket && bucket[0].type != GST_NIL) { + bucket[1] = value; } else { - if (o->count >= 2 * o->capacity) { - gst_object_rehash(vm, o, 2 * o->capacity); + if (!bucket || 4 * (o->count + o->deleted) >= o->capacity) { + gst_object_rehash(vm, o, 4 * o->count + 6); } - bucket = gst_alloc(vm, sizeof(GstBucket)); - bucket->next = o->buckets[index]; - bucket->value = value; - bucket->key = key; - o->buckets[index] = bucket; + bucket = gst_object_find(o, key); + bucket[0] = key; + bucket[1] = value; ++o->count; } } } +/* Find next key in an object. Returns nil if no next key. */ +GstValue gst_object_next(GstObject *o, GstValue key) { + GstValue ret; + GstValue *bucket; + if (key.type == GST_NIL) + bucket = o->data - 2; + else + bucket = gst_object_find(o, key); + if (bucket && bucket[0].type != GST_NIL) { + GstValue *nextbucket, *end; + end = o->data + o->capacity; + for (nextbucket = bucket + 2; nextbucket < end; nextbucket += 2) { + if (nextbucket[0].type != GST_NIL) + return nextbucket[0]; + } + } + ret.type = GST_NIL; + return ret; +} + diff --git a/core/gc.c b/core/gc.c index a30ecffc..e106a00c 100644 --- a/core/gc.c +++ b/core/gc.c @@ -142,16 +142,12 @@ void gst_mark(Gst *vm, GstValueUnion x, GstType type) { case GST_OBJECT: if (gc_header(x.object)->color != vm->black) { uint32_t i; - GstBucket *bucket; gc_header(x.object)->color = vm->black; - gc_header(x.object->buckets)->color = vm->black; - for (i = 0; i < x.object->capacity; ++i) { - bucket = x.object->buckets[i]; - while (bucket) { - gc_header(bucket)->color = vm->black; - gst_mark_value(vm, bucket->key); - gst_mark_value(vm, bucket->value); - bucket = bucket->next; + gc_header(x.object->data)->color = vm->black; + for (i = 0; i < x.object->capacity; i += 2) { + if (x.object->data[i].type != GST_NIL) { + gst_mark_value(vm, x.object->data[i]); + gst_mark_value(vm, x.object->data[i + 1]); } } if (x.object->parent != NULL) { diff --git a/core/ids.c b/core/ids.c index 2c03628f..a027129b 100644 --- a/core/ids.c +++ b/core/ids.c @@ -205,32 +205,40 @@ void gst_cache_remove_struct(Gst *vm, char *structmem) { /* Begin creation of a struct */ GstValue *gst_struct_begin(Gst *vm, uint32_t count) { - char *data = gst_alloc(vm, sizeof(uint32_t) * 2 + 4 * count * sizeof(GstValue)); + char *data = gst_zalloc(vm, sizeof(uint32_t) * 2 + 4 * count * sizeof(GstValue)); GstValue *st = (GstValue *) (data + 2 * sizeof(uint32_t)); gst_struct_length(st) = count; return st; } -/* Put a kv pair into a struct that has not yet been fully constructed. */ -void gst_struct_put(GstValue *st, GstValue key, GstValue value) { +/* Find an item in a struct */ +static const GstValue *gst_struct_find(const GstValue *st, GstValue key) { uint32_t cap = gst_struct_capacity(st); uint32_t index = (gst_hash(key) % (cap / 2)) * 2; uint32_t i; for (i = index; i < cap; i += 2) { - if (st[i + 1].type == GST_NIL) { - st[i] = key; - st[i + 1] = value; - return; + if (st[i].type == GST_NIL || gst_equals(st[i], key)) { + return st + i; } } for (i = 0; i < index; i += 2) { - if (st[i + 1].type == GST_NIL) { - st[i] = key; - st[i + 1] = value; - return; + if (st[i].type == GST_NIL || gst_equals(st[i], key)) { + return st + i; } } - /* Should not get here if struct was initialized with proper size */ + return NULL; +} + +/* Put a kv pair into a struct that has not yet been fully constructed. + * Behavior is undefined if too many keys are added, or if a key is added + * twice. Nil keys and values are ignored. */ +void gst_struct_put(GstValue *st, GstValue key, GstValue value) { + GstValue *bucket; + if (key.type == GST_NIL || value.type == GST_NIL) return; + bucket = (GstValue *) gst_struct_find(st, key); + if (!bucket) return; + bucket[0] = key; + bucket[1] = value; } /* Finish building a struct */ @@ -246,27 +254,35 @@ const GstValue *gst_struct_end(Gst *vm, GstValue *st) { /* Get an item from a struct */ GstValue gst_struct_get(const GstValue *st, GstValue key) { + GstValue *bucket = gst_struct_find(st, key); + if (!bucket || bucket[0].type == GST_NIL) { + GstValue ret; + ret.type = GST_NIL; + return ret; + } else { + return bucket[1]; + } +} + +/* Get the next key in a struct */ +GstValue gst_struct_next(const GstValue *st, GstValue key) { GstValue ret; - uint32_t cap = gst_struct_capacity(st); - uint32_t index = (gst_hash(key) % (cap / 2)) * 2; - uint32_t i; - for (i = index; i < cap; i += 2) { - if (st[i + 1].type == GST_NIL) { - goto notfound; - } else if (gst_equals(st[i], key)) { - return st[i + 1]; + const GstValue *bucket; + if (key.type == GST_NIL) + bucket = st - 2; + else + bucket = gst_struct_find(st, key); + if (bucket && bucket[0].type != GST_NIL) { + const GstValue *nextbucket, *end; + end = st + gst_struct_capacity(st); + for (nextbucket = bucket + 2; nextbucket < end; nextbucket += 2) { + if (nextbucket[0].type != GST_NIL) + return nextbucket[0]; } } - for (i = 0; i < index; i += 2) { - if (st[i + 1].type == GST_NIL) { - goto notfound; - } else if (gst_equals(st[i], key)) { - return st[i + 1]; - } - } - notfound: ret.type = GST_NIL; return ret; + } /****/ diff --git a/core/stl.c b/core/stl.c index 2460047b..f19caa83 100644 --- a/core/stl.c +++ b/core/stl.c @@ -237,7 +237,7 @@ int gst_stl_object(Gst *vm) { if (count % 2 != 0) { gst_c_throwc(vm, "expected even number of arguments"); } - object = gst_object(vm, count / 2); + object = gst_object(vm, count * 2); for (i = 0; i < count; i += 2) { gst_object_put(vm, object, gst_arg(vm, i), gst_arg(vm, i + 1)); } @@ -255,7 +255,7 @@ int gst_stl_struct(Gst *vm) { if (count % 2 != 0) { gst_c_throwc(vm, "expected even number of arguments"); } - st = gst_struct_begin(vm, count / 2); + st = gst_struct_begin(vm, count * 2); for (i = 0; i < count; i += 2) { gst_struct_put(st, gst_arg(vm, i), gst_arg(vm, i + 1)); } @@ -340,6 +340,19 @@ int gst_stl_rawset(Gst *vm) { } } +/* Get next key in struct or object */ +int gst_stl_next(Gst *vm) { + GstValue ds = gst_arg(vm, 0); + GstValue key = gst_arg(vm, 1); + if (ds.type == GST_OBJECT) { + gst_c_return(vm, gst_object_next(ds.data.object, key)); + } else if (ds.type == GST_STRUCT) { + gst_c_return(vm, gst_struct_next(ds.data.st, key)); + } else { + gst_c_throwc(vm, "expected object or struct"); + } +} + /* Print values for inspection */ int gst_stl_print(Gst *vm) { uint32_t j, count; diff --git a/core/vm.c b/core/vm.c index 4424cfa4..a0880b6e 100644 --- a/core/vm.c +++ b/core/vm.c @@ -371,7 +371,7 @@ static int gst_continue_size(Gst *vm, uint32_t stackBase) { { uint32_t i = 3; uint32_t kvs = pc[2]; - GstObject *o = gst_object(vm, kvs + 2); + GstObject *o = gst_object(vm, 2 * kvs + 2); kvs = kvs + 3; while (i < kvs) { v1 = stack[pc[i++]]; diff --git a/include/gst/gst.h b/include/gst/gst.h index 1c45f01b..91ef0828 100644 --- a/include/gst/gst.h +++ b/include/gst/gst.h @@ -134,9 +134,6 @@ typedef struct GstFuncDef GstFuncDef; typedef struct GstFuncEnv GstFuncEnv; typedef union GstValueUnion GstValueUnion; -/* Definitely implementation details */ -typedef struct GstBucket GstBucket; - /* API Types */ typedef struct GstModuleItem GstModuleItem; @@ -208,7 +205,8 @@ struct GstBuffer { struct GstObject { uint32_t count; uint32_t capacity; - GstBucket **buckets; + uint32_t deleted; + GstValue *data; GstObject *parent; }; @@ -240,13 +238,6 @@ struct GstFunction { GstFunction *parent; }; -/* A hash table bucket in an object */ -struct GstBucket { - GstValue key; - GstValue value; - GstBucket *next; -}; - /* Contains information about userdata */ struct GstUserdataHeader { uint32_t size; @@ -381,6 +372,7 @@ GstValue *gst_struct_begin(Gst *vm, uint32_t count); void gst_struct_put(GstValue *st, GstValue key, GstValue value); const GstValue *gst_struct_end(Gst *vm, GstValue *st); GstValue gst_struct_get(const GstValue *st, GstValue key); +GstValue gst_struct_next(const GstValue *st, GstValue key); /****/ /* Object functions */ @@ -388,8 +380,9 @@ GstValue gst_struct_get(const GstValue *st, GstValue key); GstObject *gst_object(Gst *vm, uint32_t capacity); GstValue gst_object_get(GstObject *obj, GstValue key); -GstValue gst_object_remove(Gst *vm, GstObject *obj, GstValue key); +GstValue gst_object_remove(GstObject *obj, GstValue key); void gst_object_put(Gst *vm, GstObject *obj, GstValue key, GstValue value); +GstValue gst_object_next(GstObject *o, GstValue key); /****/ /* Threads */