1
0
mirror of https://github.com/janet-lang/janet synced 2024-11-15 05:04:49 +00:00
janet/core/ids.c
Calvin Rose 0e29b52d96 Add robinhood hashing to structs.
This corrects changes in internal structure when values
were inserted in different orders (which was previously
incorrect.) Robinhood hashing should correct this by
making the internal structure of the hashtable invariant
of insertion order. This, in turn, allows naive and deterministic equality, comparison, and hashing of structs.
2017-05-09 13:20:28 -04:00

460 lines
15 KiB
C

/*
* Copyright (c) 2017 Calvin Rose
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <gst/gst.h>
/****/
/* Cache */
/****/
/* Calculate hash for string */
static uint32_t gst_string_calchash(const uint8_t *str, uint32_t len) {
const uint8_t *end = str + len;
uint32_t hash = 5381;
while (str < end)
hash = (hash << 5) + hash + *str++;
return hash;
}
/* Calculate hash for tuple (and struct) */
static uint32_t gst_tuple_calchash(const GstValue *tuple, uint32_t len) {
const GstValue *end = tuple + len;
uint32_t hash = 5381;
while (tuple < end)
hash = (hash << 5) + hash + gst_hash(*tuple++);
return hash;
}
/* Check if two not necesarrily finalized immutable values
* are equal. Does caching logic */
static int gst_cache_equal(GstValue x, GstValue y) {
uint32_t i, len;
if (x.type != y.type) return 0;
switch (x.type) {
/* Don't bother implemeting equality checks for all types. We only care
* about immutable data structures */
default:
return 0;
case GST_STRING:
if (gst_string_hash(x.data.string) != gst_string_hash(y.data.string)) return 0;
if (gst_string_length(x.data.string) != gst_string_length(y.data.string)) return 0;
len = gst_string_length(x.data.string);
for (i = 0; i < len; ++i)
if (x.data.string[i] != y.data.string[i])
return 0;
return 1;
case GST_STRUCT:
if (gst_struct_hash(x.data.st) != gst_struct_hash(y.data.st)) return 0;
if (gst_struct_length(x.data.st) != gst_struct_length(y.data.st)) return 0;
len = gst_struct_capacity(x.data.st);
for (i = 0; i < len; ++i)
if (!gst_equals(x.data.st[i], y.data.st[i]))
return 0;
return 1;
case GST_TUPLE:
if (gst_tuple_hash(x.data.tuple) != gst_tuple_hash(y.data.tuple)) return 0;
if (gst_tuple_length(x.data.tuple) != gst_tuple_length(y.data.tuple)) return 0;
len = gst_tuple_length(x.data.tuple);
for (i = 0; i < len; ++i)
if (!gst_equals(x.data.tuple[i], y.data.tuple[i]))
return 0;
return 1;
}
}
/* Find an item in the cache and return its location.
* If the item is not found, return the location
* where one would put it. */
static GstValue *gst_cache_find(Gst *vm, GstValue key, int *success) {
uint32_t bounds[4];
uint32_t i, j, index;
uint32_t hash = gst_hash(key);
GstValue *firstEmpty = NULL;
index = hash % vm->cache_capacity;
bounds[0] = index;
bounds[1] = vm->cache_capacity;
bounds[2] = 0;
bounds[3] = index;
for (j = 0; j < 4; j += 2)
for (i = bounds[j]; i < bounds[j+1]; ++i) {
GstValue test = vm->cache[i];
/* Check empty spots */
if (test.type == GST_NIL) {
if (firstEmpty == NULL)
firstEmpty = vm->cache + i;
goto notfound;
}
/* Check for marked deleted - use booleans as deleted */
if (test.type == GST_BOOLEAN) {
if (firstEmpty == NULL)
firstEmpty = vm->cache + i;
continue;
}
if (gst_cache_equal(test, key)) {
/* Replace first deleted */
*success = 1;
if (firstEmpty != NULL) {
*firstEmpty = test;
vm->cache[i].type = GST_BOOLEAN;
return firstEmpty;
}
return vm->cache + i;
}
}
notfound:
*success = 0;
return firstEmpty;
}
/* Resize the cache. */
static void gst_cache_resize(Gst *vm, uint32_t newCapacity) {
uint32_t i, oldCapacity;
GstValue *oldCache = vm->cache;
GstValue *newCache = gst_raw_calloc(1, newCapacity * sizeof(GstValue));
if (newCache == NULL)
GST_OUT_OF_MEMORY;
oldCapacity = vm->cache_capacity;
vm->cache = newCache;
vm->cache_capacity = newCapacity;
vm->cache_deleted = 0;
/* Add all of the old strings back */
for (i = 0; i < oldCapacity; ++i) {
int status;
GstValue *bucket;
GstValue x = oldCache[i];
if (x.type != GST_NIL && x.type != GST_BOOLEAN) {
bucket = gst_cache_find(vm, x, &status);
if (status || bucket == NULL) {
/* there was a problem with the algorithm. */
break;
}
*bucket = x;
}
}
/* Free the old cache */
gst_raw_free(oldCache);
}
/* Add a value to the cache */
static GstValue gst_cache_add(Gst *vm, GstValue x) {
int status = 0;
GstValue *bucket = gst_cache_find(vm, x, &status);
if (!status) {
if ((vm->cache_count + vm->cache_deleted) * 2 > vm->cache_capacity) {
gst_cache_resize(vm, vm->cache_count * 4);
bucket = gst_cache_find(vm, x, &status);
}
/* Mark the memory for the gc */
switch (x.type) {
default:
break;
case GST_STRING:
gst_mem_tag(gst_string_raw(x.data.string), GST_MEMTAG_STRING);
break;
case GST_STRUCT:
gst_mem_tag(gst_struct_raw(x.data.st), GST_MEMTAG_STRUCT);
break;
case GST_TUPLE:
gst_mem_tag(gst_tuple_raw(x.data.tuple), GST_MEMTAG_TUPLE);
break;
}
/* Add x to the cache */
vm->cache_count++;
*bucket = x;
return x;
} else {
return *bucket;
}
}
/* Remove a value from the cache */
static void gst_cache_remove(Gst *vm, GstValue x) {
int status = 0;
GstValue *bucket = gst_cache_find(vm, x, &status);
if (status) {
vm->cache_count--;
vm->cache_deleted++;
bucket->type = GST_BOOLEAN;
}
}
/* Remove a string from cache (called from gc) */
void gst_cache_remove_string(Gst *vm, char *strmem) {
GstValue x;
x.type = GST_STRING;
x.data.string = (const uint8_t *)(strmem + 2 * sizeof(uint32_t));
gst_cache_remove(vm, x);
}
/* Remove a tuple from cache (called from gc) */
void gst_cache_remove_tuple(Gst *vm, char *tuplemem) {
GstValue x;
x.type = GST_TUPLE;
x.data.tuple = (const GstValue *)(tuplemem + 2 * sizeof(uint32_t));
gst_cache_remove(vm, x);
}
/* Remove a struct from cache (called from gc) */
void gst_cache_remove_struct(Gst *vm, char *structmem) {
GstValue x;
x.type = GST_STRUCT;
x.data.st = (const GstValue *)(structmem + 2 * sizeof(uint32_t));
gst_cache_remove(vm, x);
}
/****/
/* Struct Functions */
/****/
/* Begin creation of a struct */
GstValue *gst_struct_begin(Gst *vm, uint32_t count) {
char *data = gst_zalloc(vm, sizeof(uint32_t) * 2 + 4 * count * sizeof(GstValue));
GstValue *st = (GstValue *) (data + 2 * sizeof(uint32_t));
gst_struct_length(st) = count;
return st;
}
/* Find an item in a struct */
static const GstValue *gst_struct_find(const GstValue *st, GstValue key) {
uint32_t cap = gst_struct_capacity(st);
uint32_t index = (gst_hash(key) % (cap / 2)) * 2;
uint32_t i;
for (i = index; i < cap; i += 2)
if (st[i].type == GST_NIL || gst_equals(st[i], key))
return st + i;
for (i = 0; i < index; i += 2)
if (st[i].type == GST_NIL || gst_equals(st[i], key))
return st + i;
return NULL;
}
/* Put a kv pair into a struct that has not yet been fully constructed.
* Behavior is undefined if too many keys are added, or if a key is added
* twice. Nil keys and values are ignored. */
void gst_struct_put(GstValue *st, GstValue key, GstValue value) {
uint32_t cap = gst_struct_capacity(st);
uint32_t hash = gst_hash(key);
uint32_t index = (hash % (cap / 2)) * 2;
uint32_t i, j, dist;
uint32_t bounds[4] = {index, cap, 0, index};
if (key.type == GST_NIL || value.type == GST_NIL) return;
for (dist = 0, j = 0; j < 4; j += 2)
for (i = bounds[j]; i < bounds[j + 1]; i += 2, dist += 2) {
int status;
uint32_t otherhash, otherindex, otherdist;
/* We found an empty slot, so just add key and value */
if (st[i].type == GST_NIL) {
st[i] = key;
st[i + 1] = value;
return;
}
/* Robinhood hashing - check if colliding kv pair
* is closer to their source than current. */
otherhash = gst_hash(st[i]);
otherindex = (otherhash % (cap / 2)) * 2;
otherdist = (i + cap - otherindex) % cap;
if (dist < otherdist)
status = -1;
else if (otherdist < dist)
status = 1;
else if (hash < otherhash)
status = -1;
else if (otherhash < hash)
status = 1;
else
status = gst_compare(key, st[i]);
/* If other is closer to their ideal slot */
if (status == 1) {
/* Swap current kv pair with pair in slot */
GstValue t1, t2;
t1 = st[i];
t2 = st[i + 1];
st[i] = key;
st[i + 1] = value;
key = t1;
value = t2;
/* Save dist and hash of new kv pair */
dist = otherdist;
hash = otherhash;
} else if (status == 0) {
/* This should not happen - it means
* than a key was added to the struct more than once */
return;
}
}
}
/* Finish building a struct */
const GstValue *gst_struct_end(Gst *vm, GstValue *st) {
GstValue cached;
GstValue check;
gst_struct_hash(st) = gst_tuple_calchash(st, gst_struct_capacity(st));
check.type = GST_STRUCT;
check.data.st = (const GstValue *) st;
cached = gst_cache_add(vm, check);
return cached.data.st;
}
/* Get an item from a struct */
GstValue gst_struct_get(const GstValue *st, GstValue key) {
const GstValue *bucket = gst_struct_find(st, key);
if (!bucket || bucket[0].type == GST_NIL) {
GstValue ret;
ret.type = GST_NIL;
return ret;
} else {
return bucket[1];
}
}
/* Get the next key in a struct */
GstValue gst_struct_next(const GstValue *st, GstValue key) {
const GstValue *bucket, *end;
end = st + gst_struct_capacity(st);
if (key.type == GST_NIL) {
bucket = st;
} else {
bucket = gst_struct_find(st, key);
if (!bucket || bucket[0].type == GST_NIL)
return gst_wrap_nil();
bucket += 2;
}
for (; bucket < end; bucket += 2) {
if (bucket[0].type != GST_NIL)
return bucket[0];
}
return gst_wrap_nil();
}
/****/
/* Tuple functions */
/****/
/* Create a new empty tuple of the given size. Expected to be
* mutated immediately */
GstValue *gst_tuple_begin(Gst *vm, uint32_t length) {
char *data = gst_alloc(vm, 2 * sizeof(uint32_t) + length * sizeof(GstValue));
GstValue *tuple = (GstValue *)(data + (2 * sizeof(uint32_t)));
gst_tuple_length(tuple) = length;
return tuple;
}
/* Finish building a tuple */
const GstValue *gst_tuple_end(Gst *vm, GstValue *tuple) {
GstValue cached;
GstValue check;
gst_tuple_hash(tuple) = gst_tuple_calchash(tuple, gst_tuple_length(tuple));
check.type = GST_TUPLE;
check.data.tuple = (const GstValue *) tuple;
cached = gst_cache_add(vm, check);
return cached.data.tuple;
}
/****/
/* String Functions */
/****/
/* Begin building a string */
uint8_t *gst_string_begin(Gst *vm, uint32_t length) {
char *data = gst_alloc(vm, 2 * sizeof(uint32_t) + length + 1);
uint8_t *str = (uint8_t *) (data + 2 * sizeof(uint32_t));
gst_string_length(str) = length;
str[length] = 0;
return str;
}
/* Finish building a string */
const uint8_t *gst_string_end(Gst *vm, uint8_t *str) {
GstValue cached;
GstValue check;
gst_string_hash(str) = gst_string_calchash(str, gst_string_length(str));
check.type = GST_STRING;
check.data.string = (const uint8_t *) str;
cached = gst_cache_add(vm, check);
return cached.data.string;
}
/* Load a buffer as a string */
const uint8_t *gst_string_b(Gst *vm, const uint8_t *buf, uint32_t len) {
GstValue cached;
GstValue check;
uint32_t newbufsize = len + 2 * sizeof(uint32_t) + 1;
uint8_t *str;
/* Ensure enough scratch memory */
if (vm->scratch_len < newbufsize) {
vm->scratch = gst_alloc(vm, newbufsize);
vm->scratch_len = newbufsize;
}
str = (uint8_t *)(vm->scratch + 2 * sizeof(uint32_t));
gst_memcpy(str, buf, len);
gst_string_length(str) = len;
gst_string_hash(str) = gst_string_calchash(str, gst_string_length(str));
str[len] = 0;
check.type = GST_STRING;
check.data.string = (const uint8_t *) str;
cached = gst_cache_add(vm, check);
if (cached.data.string == (const uint8_t *) str) {
vm->scratch_len = 0;
vm->scratch = NULL;
}
return cached.data.string;
}
/* Load a c string */
const uint8_t *gst_string_c(Gst *vm, const char *str) {
uint32_t len = 0;
while (str[len]) ++len;
return gst_string_b(vm, (const uint8_t *)str, len);
}
/* Load a c string and return it as a GstValue */
GstValue gst_string_cv(Gst *vm, const char *str) {
GstValue ret;
const uint8_t *data = gst_string_c(vm, str);
ret.type = GST_STRING;
ret.data.string = data;
return ret;
}
/* Compares two strings */
int gst_string_compare(const uint8_t *lhs, const uint8_t *rhs) {
uint32_t xlen = gst_string_length(lhs);
uint32_t ylen = gst_string_length(rhs);
uint32_t len = xlen > ylen ? ylen : xlen;
uint32_t i;
for (i = 0; i < len; ++i) {
if (lhs[i] == rhs[i]) {
continue;
} else if (lhs[i] < rhs[i]) {
return -1; /* x is less than y */
} else {
return 1; /* y is less than x */
}
}
if (xlen == ylen) {
return 0;
} else {
return xlen < ylen ? -1 : 1;
}
}