Add robinhood hashing to structs.

This corrects changes in internal structure when values
were inserted in different orders (which was previously
incorrect.) Robinhood hashing should correct this by
making the internal structure of the hashtable invariant
of insertion order. This, in turn, allows naive and deterministic equality, comparison, and hashing of structs.
This commit is contained in:
Calvin Rose 2017-05-09 13:18:07 -04:00
parent 8aa99556e7
commit 0e29b52d96
4 changed files with 102 additions and 15 deletions

View File

@ -238,16 +238,12 @@ static const GstValue *gst_struct_find(const GstValue *st, GstValue key) {
uint32_t cap = gst_struct_capacity(st);
uint32_t index = (gst_hash(key) % (cap / 2)) * 2;
uint32_t i;
for (i = index; i < cap; i += 2) {
if (st[i].type == GST_NIL || gst_equals(st[i], key)) {
for (i = index; i < cap; i += 2)
if (st[i].type == GST_NIL || gst_equals(st[i], key))
return st + i;
}
}
for (i = 0; i < index; i += 2) {
if (st[i].type == GST_NIL || gst_equals(st[i], key)) {
for (i = 0; i < index; i += 2)
if (st[i].type == GST_NIL || gst_equals(st[i], key))
return st + i;
}
}
return NULL;
}
@ -255,12 +251,56 @@ static const GstValue *gst_struct_find(const GstValue *st, GstValue key) {
* Behavior is undefined if too many keys are added, or if a key is added
* twice. Nil keys and values are ignored. */
void gst_struct_put(GstValue *st, GstValue key, GstValue value) {
GstValue *bucket;
uint32_t cap = gst_struct_capacity(st);
uint32_t hash = gst_hash(key);
uint32_t index = (hash % (cap / 2)) * 2;
uint32_t i, j, dist;
uint32_t bounds[4] = {index, cap, 0, index};
if (key.type == GST_NIL || value.type == GST_NIL) return;
bucket = (GstValue *) gst_struct_find(st, key);
if (!bucket) return;
bucket[0] = key;
bucket[1] = value;
for (dist = 0, j = 0; j < 4; j += 2)
for (i = bounds[j]; i < bounds[j + 1]; i += 2, dist += 2) {
int status;
uint32_t otherhash, otherindex, otherdist;
/* We found an empty slot, so just add key and value */
if (st[i].type == GST_NIL) {
st[i] = key;
st[i + 1] = value;
return;
}
/* Robinhood hashing - check if colliding kv pair
* is closer to their source than current. */
otherhash = gst_hash(st[i]);
otherindex = (otherhash % (cap / 2)) * 2;
otherdist = (i + cap - otherindex) % cap;
if (dist < otherdist)
status = -1;
else if (otherdist < dist)
status = 1;
else if (hash < otherhash)
status = -1;
else if (otherhash < hash)
status = 1;
else
status = gst_compare(key, st[i]);
/* If other is closer to their ideal slot */
if (status == 1) {
/* Swap current kv pair with pair in slot */
GstValue t1, t2;
t1 = st[i];
t2 = st[i + 1];
st[i] = key;
st[i + 1] = value;
key = t1;
value = t2;
/* Save dist and hash of new kv pair */
dist = otherdist;
hash = otherhash;
} else if (status == 0) {
/* This should not happen - it means
* than a key was added to the struct more than once */
return;
}
}
}
/* Finish building a struct */

View File

@ -427,6 +427,46 @@ int gst_stl_transfer(Gst *vm) {
gst_c_return(vm, ret);
}
/* Get current thread */
int gst_stl_current(Gst *vm) {
gst_c_return(vm, gst_wrap_thread(vm->thread));
}
/* Get parent of a thread */
/* TODO - consider implications of this function
* for sandboxing */
int gst_stl_parent(Gst *vm) {
GstThread *t;
if (!gst_check_thread(vm, 0, &t))
gst_c_throwc(vm, "expected thread");
if (t->parent == NULL)
gst_c_return(vm, gst_wrap_nil());
gst_c_return(vm, gst_wrap_thread(t->parent));
}
/* Get the status of a thread */
int gst_stl_status(Gst *vm) {
GstThread *t;
const char *cstr;
if (!gst_check_thread(vm, 0, &t))
gst_c_throwc(vm, "expected thread");
switch (t->status) {
case GST_THREAD_PENDING:
cstr = "pending";
break;
case GST_THREAD_ALIVE:
cstr = "alive";
break;
case GST_THREAD_DEAD:
cstr = "dead";
break;
case GST_THREAD_ERROR:
cstr = "error";
break;
}
gst_c_return(vm, gst_string_cv(vm, cstr));
}
/* Associative get */
int gst_stl_get(Gst *vm) {
GstValue ret;
@ -840,6 +880,9 @@ static const GstModuleItem const std_module[] = {
{"string", gst_stl_string},
{"thread", gst_stl_thread},
{"transfer", gst_stl_transfer},
{"status", gst_stl_status},
{"current", gst_stl_current},
{"parent", gst_stl_parent},
{"print", gst_stl_print},
{"tostring", gst_stl_tostring},
{"exit", gst_stl_exit},

View File

@ -186,6 +186,7 @@ int gst_continue(Gst *vm) {
case GST_OP_RTN: /* Return nil */
stack = gst_thread_popframe(vm, vm->thread);
if (vm->thread->count < GST_FRAME_SIZE) {
vm->thread->status = GST_THREAD_DEAD;
vm->ret.type = GST_NIL;
return GST_RETURN_OK;
}
@ -197,6 +198,7 @@ int gst_continue(Gst *vm) {
temp = stack[pc[1]];
stack = gst_thread_popframe(vm, vm->thread);
if (vm->thread->count < GST_FRAME_SIZE) {
vm->thread->status = GST_THREAD_DEAD;
vm->ret = temp;
return GST_RETURN_OK;
}
@ -303,6 +305,7 @@ int gst_continue(Gst *vm) {
stack = gst_thread_popframe(vm, vm->thread);
if (status == GST_RETURN_OK) {
if (vm->thread->count < GST_FRAME_SIZE) {
vm->thread->status = GST_THREAD_DEAD;
return status;
} else {
stack[gst_frame_ret(stack)] = vm->ret;
@ -379,7 +382,7 @@ int gst_continue(Gst *vm) {
vm_error:
if (stack == NULL || vm->thread->parent == NULL)
return GST_RETURN_ERROR;
vm->thread->status = GST_THREAD_DEAD;
vm->thread->status = GST_THREAD_ERROR;
vm->thread = vm->thread->parent;
stack = vm->thread->data + vm->thread->count;
pc = gst_frame_pc(stack);

View File

@ -209,7 +209,8 @@ struct GstThread {
enum {
GST_THREAD_PENDING = 0,
GST_THREAD_ALIVE,
GST_THREAD_DEAD
GST_THREAD_DEAD,
GST_THREAD_ERROR
} status;
};