From 0e29b52d9647ef145fec1da92fd6c2ac0ed9a83c Mon Sep 17 00:00:00 2001 From: Calvin Rose Date: Tue, 9 May 2017 13:18:07 -0400 Subject: [PATCH] Add robinhood hashing to structs. This corrects changes in internal structure when values were inserted in different orders (which was previously incorrect.) Robinhood hashing should correct this by making the internal structure of the hashtable invariant of insertion order. This, in turn, allows naive and deterministic equality, comparison, and hashing of structs. --- core/ids.c | 66 +++++++++++++++++++++++++++++++++++++---------- core/stl.c | 43 ++++++++++++++++++++++++++++++ core/vm.c | 5 +++- include/gst/gst.h | 3 ++- 4 files changed, 102 insertions(+), 15 deletions(-) diff --git a/core/ids.c b/core/ids.c index 0728ebdc..bc573b05 100644 --- a/core/ids.c +++ b/core/ids.c @@ -238,16 +238,12 @@ static const GstValue *gst_struct_find(const GstValue *st, GstValue key) { uint32_t cap = gst_struct_capacity(st); uint32_t index = (gst_hash(key) % (cap / 2)) * 2; uint32_t i; - for (i = index; i < cap; i += 2) { - if (st[i].type == GST_NIL || gst_equals(st[i], key)) { + for (i = index; i < cap; i += 2) + if (st[i].type == GST_NIL || gst_equals(st[i], key)) return st + i; - } - } - for (i = 0; i < index; i += 2) { - if (st[i].type == GST_NIL || gst_equals(st[i], key)) { + for (i = 0; i < index; i += 2) + if (st[i].type == GST_NIL || gst_equals(st[i], key)) return st + i; - } - } return NULL; } @@ -255,12 +251,56 @@ static const GstValue *gst_struct_find(const GstValue *st, GstValue key) { * Behavior is undefined if too many keys are added, or if a key is added * twice. Nil keys and values are ignored. */ void gst_struct_put(GstValue *st, GstValue key, GstValue value) { - GstValue *bucket; + uint32_t cap = gst_struct_capacity(st); + uint32_t hash = gst_hash(key); + uint32_t index = (hash % (cap / 2)) * 2; + uint32_t i, j, dist; + uint32_t bounds[4] = {index, cap, 0, index}; if (key.type == GST_NIL || value.type == GST_NIL) return; - bucket = (GstValue *) gst_struct_find(st, key); - if (!bucket) return; - bucket[0] = key; - bucket[1] = value; + for (dist = 0, j = 0; j < 4; j += 2) + for (i = bounds[j]; i < bounds[j + 1]; i += 2, dist += 2) { + int status; + uint32_t otherhash, otherindex, otherdist; + /* We found an empty slot, so just add key and value */ + if (st[i].type == GST_NIL) { + st[i] = key; + st[i + 1] = value; + return; + } + /* Robinhood hashing - check if colliding kv pair + * is closer to their source than current. */ + otherhash = gst_hash(st[i]); + otherindex = (otherhash % (cap / 2)) * 2; + otherdist = (i + cap - otherindex) % cap; + if (dist < otherdist) + status = -1; + else if (otherdist < dist) + status = 1; + else if (hash < otherhash) + status = -1; + else if (otherhash < hash) + status = 1; + else + status = gst_compare(key, st[i]); + /* If other is closer to their ideal slot */ + if (status == 1) { + /* Swap current kv pair with pair in slot */ + GstValue t1, t2; + t1 = st[i]; + t2 = st[i + 1]; + st[i] = key; + st[i + 1] = value; + key = t1; + value = t2; + /* Save dist and hash of new kv pair */ + dist = otherdist; + hash = otherhash; + } else if (status == 0) { + /* This should not happen - it means + * than a key was added to the struct more than once */ + return; + } + } } /* Finish building a struct */ diff --git a/core/stl.c b/core/stl.c index ccdc6a8f..6b471fa1 100644 --- a/core/stl.c +++ b/core/stl.c @@ -427,6 +427,46 @@ int gst_stl_transfer(Gst *vm) { gst_c_return(vm, ret); } +/* Get current thread */ +int gst_stl_current(Gst *vm) { + gst_c_return(vm, gst_wrap_thread(vm->thread)); +} + +/* Get parent of a thread */ +/* TODO - consider implications of this function + * for sandboxing */ +int gst_stl_parent(Gst *vm) { + GstThread *t; + if (!gst_check_thread(vm, 0, &t)) + gst_c_throwc(vm, "expected thread"); + if (t->parent == NULL) + gst_c_return(vm, gst_wrap_nil()); + gst_c_return(vm, gst_wrap_thread(t->parent)); +} + +/* Get the status of a thread */ +int gst_stl_status(Gst *vm) { + GstThread *t; + const char *cstr; + if (!gst_check_thread(vm, 0, &t)) + gst_c_throwc(vm, "expected thread"); + switch (t->status) { + case GST_THREAD_PENDING: + cstr = "pending"; + break; + case GST_THREAD_ALIVE: + cstr = "alive"; + break; + case GST_THREAD_DEAD: + cstr = "dead"; + break; + case GST_THREAD_ERROR: + cstr = "error"; + break; + } + gst_c_return(vm, gst_string_cv(vm, cstr)); +} + /* Associative get */ int gst_stl_get(Gst *vm) { GstValue ret; @@ -840,6 +880,9 @@ static const GstModuleItem const std_module[] = { {"string", gst_stl_string}, {"thread", gst_stl_thread}, {"transfer", gst_stl_transfer}, + {"status", gst_stl_status}, + {"current", gst_stl_current}, + {"parent", gst_stl_parent}, {"print", gst_stl_print}, {"tostring", gst_stl_tostring}, {"exit", gst_stl_exit}, diff --git a/core/vm.c b/core/vm.c index b263272f..b6c24ed9 100644 --- a/core/vm.c +++ b/core/vm.c @@ -186,6 +186,7 @@ int gst_continue(Gst *vm) { case GST_OP_RTN: /* Return nil */ stack = gst_thread_popframe(vm, vm->thread); if (vm->thread->count < GST_FRAME_SIZE) { + vm->thread->status = GST_THREAD_DEAD; vm->ret.type = GST_NIL; return GST_RETURN_OK; } @@ -197,6 +198,7 @@ int gst_continue(Gst *vm) { temp = stack[pc[1]]; stack = gst_thread_popframe(vm, vm->thread); if (vm->thread->count < GST_FRAME_SIZE) { + vm->thread->status = GST_THREAD_DEAD; vm->ret = temp; return GST_RETURN_OK; } @@ -303,6 +305,7 @@ int gst_continue(Gst *vm) { stack = gst_thread_popframe(vm, vm->thread); if (status == GST_RETURN_OK) { if (vm->thread->count < GST_FRAME_SIZE) { + vm->thread->status = GST_THREAD_DEAD; return status; } else { stack[gst_frame_ret(stack)] = vm->ret; @@ -379,7 +382,7 @@ int gst_continue(Gst *vm) { vm_error: if (stack == NULL || vm->thread->parent == NULL) return GST_RETURN_ERROR; - vm->thread->status = GST_THREAD_DEAD; + vm->thread->status = GST_THREAD_ERROR; vm->thread = vm->thread->parent; stack = vm->thread->data + vm->thread->count; pc = gst_frame_pc(stack); diff --git a/include/gst/gst.h b/include/gst/gst.h index 74f094ea..0f3b4efc 100644 --- a/include/gst/gst.h +++ b/include/gst/gst.h @@ -209,7 +209,8 @@ struct GstThread { enum { GST_THREAD_PENDING = 0, GST_THREAD_ALIVE, - GST_THREAD_DEAD + GST_THREAD_DEAD, + GST_THREAD_ERROR } status; };