From 6822400abee9d73e538b3e2e52461fcdef8f2a60 Mon Sep 17 00:00:00 2001
From: Calvin Rose <calsrose@gmail.com>
Date: Sun, 1 Jul 2018 15:49:33 -0400
Subject: [PATCH] Fix large function compilation issue.

---
 src/compiler/boot.dst   | 14 ++++----
 src/compiler/compile.c  | 10 +++---
 src/compiler/emit.c     | 55 +++++++++++++++---------------
 src/compiler/emit.h     | 10 +++---
 src/compiler/regalloc.c | 27 ++++++++-------
 src/compiler/regalloc.h | 50 +++-------------------------
 src/compiler/specials.c | 19 ++++-------
 src/core/corelib.c      | 11 ++----
 src/core/symcache.c     | 74 ++++++++++++++++++++++-------------------
 src/include/dst/dst.h   |  2 +-
 test/suite0.dst         |  3 +-
 test/suite1.dst         |  6 ++++
 12 files changed, 120 insertions(+), 161 deletions(-)

diff --git a/src/compiler/boot.dst b/src/compiler/boot.dst
index 44a1423c..9f1b8df6 100644
--- a/src/compiler/boot.dst
+++ b/src/compiler/boot.dst
@@ -266,7 +266,7 @@ value."
                                    subloop
                                    (tuple ':= bindings (tuple + bindings inc)))))
             :keys (do
-                    (def $dict (gensym "dict"))
+                    (def $dict (gensym))
                     (def preds @['and (tuple not= nil bindings)])
                     (def subloop (doone (+ i 3) preds))
                     (tuple 'do
@@ -276,9 +276,9 @@ value."
                                   subloop
                                   (tuple ':= bindings (tuple next $dict bindings)))))
             :in (do
-                  (def $len (gensym "len"))
-                  (def $i (gensym "i"))
-                  (def $indexed (gensym "indexed"))
+                  (def $len (gensym))
+                  (def $i (gensym))
+                  (def $indexed (gensym))
                   (def preds @['and (tuple < $i $len)])
                   (def subloop (doone (+ i 3) preds))
                   (tuple 'do
@@ -295,7 +295,7 @@ value."
 (defmacro for
   "Similar to loop, but accumulates the loop body into an array and returns that."
   [head & body]
-  (def $accum (gensym "accum"))
+  (def $accum (gensym))
   (tuple 'do
          (tuple 'def $accum @[])
          (tuple 'loop head
@@ -1062,8 +1062,8 @@ onvalue."
      (do
        (file.write stdout " [" source "]")
        (if source-line (file.write stdout " on line "
-         (string source-line) ", column " (string source-col))))
-     (if pc (file.write stdout " at (pc=" (string pc) ")")))
+         (string source-line) ", column " (string source-col)))))
+   (if pc (file.write stdout " (pc=" (string pc) ")"))
    (when tail (file.write stdout " (tailcall)"))
    (file.write stdout "\n"))))
 
diff --git a/src/compiler/compile.c b/src/compiler/compile.c
index 27f8aed9..a96c1073 100644
--- a/src/compiler/compile.c
+++ b/src/compiler/compile.c
@@ -101,7 +101,7 @@ DstSlot dstc_cslot(Dst x) {
 DstSlot dstc_nearslot(DstCompiler *c, DstcRegisterTemp tag) {
     DstSlot ret;
     ret.flags = DST_SLOTTYPE_ANY;
-    ret.index = dstc_getreg_temp(c, tag);
+    ret.index = dstc_allocnear(c, tag);
     ret.constant = dst_wrap_nil();
     ret.envindex = -1;
     return ret;
@@ -111,7 +111,7 @@ DstSlot dstc_nearslot(DstCompiler *c, DstcRegisterTemp tag) {
 DstSlot dstc_farslot(DstCompiler *c) {
     DstSlot ret;
     ret.flags = DST_SLOTTYPE_ANY;
-    ret.index = dstc_getreg(c);
+    ret.index = dstc_allocfar(c);
     ret.constant = dst_wrap_nil();
     ret.envindex = -1;
     return ret;
@@ -314,7 +314,7 @@ DstSlot dstc_gettarget(DstFopts opts) {
         slot.envindex = -1;
         slot.constant = dst_wrap_nil();
         slot.flags = 0;
-        slot.index = dstc_getreg_temp(opts.compiler, DSTC_REGTEMP_3);
+        slot.index = dstc_allocnear(opts.compiler, DSTC_REGTEMP_3);
     }
     return slot;
 }
@@ -399,13 +399,13 @@ static DstSlot dstc_call(DstFopts opts, DstSlot *slots, DstSlot fun) {
         dstc_pushslots(c, slots);
         int32_t fun_register;
         if (opts.flags & DST_FOPTS_TAIL) {
-            fun_register = dstc_to_reg(c, fun);
+            fun_register = dstc_regfar(c, fun, DSTC_REGTEMP_0);
             dstc_emit(c, DOP_TAILCALL | (fun_register << 8));
             retslot = dstc_cslot(dst_wrap_nil());
             retslot.flags = DST_SLOT_RETURNED;
         } else {
             retslot = dstc_gettarget(opts);
-            fun_register = dstc_to_tempreg(c, fun, DSTC_REGTEMP_0);
+            fun_register = dstc_regnear(c, fun, DSTC_REGTEMP_0);
             dstc_emit(c, DOP_CALL |
                     (retslot.index << 8) |
                     (fun_register << 16));
diff --git a/src/compiler/emit.c b/src/compiler/emit.c
index ce88a1f8..582a7245 100644
--- a/src/compiler/emit.c
+++ b/src/compiler/emit.c
@@ -25,7 +25,7 @@
 #include "emit.h"
 
 /* Get a register */
-int32_t dstc_getreg(DstCompiler *c) {
+int32_t dstc_allocfar(DstCompiler *c) {
     int32_t reg = dstc_regalloc_1(&c->scope->ra);
     if (reg > 0xFFFF) {
         dstc_cerror(c, "ran out of internal registers");
@@ -34,7 +34,7 @@ int32_t dstc_getreg(DstCompiler *c) {
 }
 
 /* Get a register less than 256 */
-int32_t dstc_getreg_temp(DstCompiler *c, DstcRegisterTemp tag) {
+int32_t dstc_allocnear(DstCompiler *c, DstcRegisterTemp tag) {
     return dstc_regalloc_temp(&c->scope->ra, tag);
 }
 
@@ -107,10 +107,10 @@ static void dstc_loadconst(DstCompiler *c, Dst k, int32_t reg) {
 }
 
 /* Convert a slot to a two byte register */
-int32_t dstc_to_reg(DstCompiler *c, DstSlot s) {
+int32_t dstc_regfar(DstCompiler *c, DstSlot s, DstcRegisterTemp tag) {
     int32_t reg;
     if (s.flags & (DST_SLOT_CONSTANT | DST_SLOT_REF)) {
-        reg = dstc_getreg(c);
+        reg = dstc_allocnear(c, tag);
         dstc_loadconst(c, s.constant, reg);
         /* If we also are a reference, deref the one element array */
         if (s.flags & DST_SLOT_REF) {
@@ -119,8 +119,8 @@ int32_t dstc_to_reg(DstCompiler *c, DstSlot s) {
                     (reg << 8) |
                     DOP_GET_INDEX);
         }
-    } else if (s.envindex >= 0 || s.index > 0xFF) {
-        reg = dstc_getreg(c);
+    } else if (s.envindex >= 0) {
+        reg = dstc_allocnear(c, tag);
         dstc_emit(c,
                 ((uint32_t)(s.index) << 24) |
                 ((uint32_t)(s.envindex) << 16) |
@@ -134,10 +134,10 @@ int32_t dstc_to_reg(DstCompiler *c, DstSlot s) {
 }
 
 /* Convert a slot to a temporary 1 byte register */
-int32_t dstc_to_tempreg(DstCompiler *c, DstSlot s, DstcRegisterTemp tag) {
+int32_t dstc_regnear(DstCompiler *c, DstSlot s, DstcRegisterTemp tag) {
     int32_t reg;
     if (s.flags & (DST_SLOT_CONSTANT | DST_SLOT_REF)) {
-        reg = dstc_getreg_temp(c, tag);
+        reg = dstc_allocnear(c, tag);
         dstc_loadconst(c, s.constant, reg);
         /* If we also are a reference, deref the one element array */
         if (s.flags & DST_SLOT_REF) {
@@ -146,15 +146,15 @@ int32_t dstc_to_tempreg(DstCompiler *c, DstSlot s, DstcRegisterTemp tag) {
                     (reg << 8) |
                     DOP_GET_INDEX);
         }
-    } else if (s.envindex >= 0 || s.index > 0xFF) {
-        reg = dstc_getreg_temp(c, tag);
+    } else if (s.envindex >= 0) {
+        reg = dstc_allocnear(c, tag);
         dstc_emit(c,
                 ((uint32_t)(s.index) << 24) |
                 ((uint32_t)(s.envindex) << 16) |
                 ((uint32_t)(reg) << 8) |
                 DOP_LOAD_UPVALUE);
     } else if (s.index > 0xFF) {
-        reg = dstc_getreg_temp(c, tag);
+        reg = dstc_allocnear(c, tag);
         dstc_emit(c,
                 ((uint32_t)(s.index) << 16) |
                 ((uint32_t)(reg) << 8) |
@@ -175,7 +175,7 @@ void dstc_free_reg(DstCompiler *c, DstSlot s, int32_t reg) {
 }
 
 /* Check if two slots are equal */
-int dstc_sequal(DstSlot lhs, DstSlot rhs) {
+static int dstc_sequal(DstSlot lhs, DstSlot rhs) {
     if (lhs.flags == rhs.flags &&
             lhs.index == rhs.index &&
             lhs.envindex == rhs.envindex) {
@@ -190,7 +190,7 @@ int dstc_sequal(DstSlot lhs, DstSlot rhs) {
 
 /* Move values from one slot to another. The destination must
  * be writeable (not a literal). */
-int dstc_copy(
+void dstc_copy(
         DstCompiler *c,
         DstSlot dest,
         DstSlot src) {
@@ -202,11 +202,11 @@ int dstc_copy(
     /* Can't write to constants */
     if (dest.flags & DST_SLOT_CONSTANT) {
         dstc_cerror(c, "cannot write to constant");
-        return 0;
+        return;
     }
 
     /* Short circuit if dest and source are equal */
-    if (dstc_sequal(dest, src)) return 0;
+    if (dstc_sequal(dest, src)) return;
 
     /* Types of slots - src */
     /* constants */
@@ -243,19 +243,19 @@ int dstc_copy(
                     (dest.index << 8) |
                     DOP_MOVE_NEAR);
         }
-        return 1;
+        return;
     }
 
     /* Process: src -> srclocal -> destlocal -> dest */
 
     /* src -> srclocal */
-    srclocal = dstc_to_tempreg(c, src, DSTC_REGTEMP_0);
+    srclocal = dstc_regnear(c, src, DSTC_REGTEMP_0);
 
     /* Pull down dest (find destlocal) */
     if (dest.flags & DST_SLOT_REF) {
         writeback = 1;
         destlocal = srclocal;
-        reflocal = dstc_getreg_temp(c, DSTC_REGTEMP_1);
+        reflocal = dstc_allocnear(c, DSTC_REGTEMP_1);
         dstc_emit(c,
                 (dstc_const(c, dest.constant) << 16) |
                 (reflocal << 8) |
@@ -302,13 +302,12 @@ int dstc_copy(
         dstc_regalloc_free(&c->scope->ra, reflocal);
     }
     dstc_free_reg(c, src, srclocal);
-    return 1;
 }
 
 /* Instruction templated emitters */
 
 static int32_t emit1s(DstCompiler *c, uint8_t op, DstSlot s, int32_t rest) {
-    int32_t reg = dstc_to_tempreg(c, s, DSTC_REGTEMP_0);
+    int32_t reg = dstc_regnear(c, s, DSTC_REGTEMP_0);
     int32_t label = dst_v_count(c->buffer);
     dstc_emit(c, op | (reg << 8) | (rest << 16));
     dstc_free_reg(c, s, reg);
@@ -316,7 +315,7 @@ static int32_t emit1s(DstCompiler *c, uint8_t op, DstSlot s, int32_t rest) {
 }
 
 int32_t dstc_emit_s(DstCompiler *c, uint8_t op, DstSlot s) {
-    int32_t reg = dstc_to_reg(c, s);
+    int32_t reg = dstc_regfar(c, s, DSTC_REGTEMP_0);
     int32_t label = dst_v_count(c->buffer);
     dstc_emit(c, op | (reg << 8));
     dstc_free_reg(c, s, reg);
@@ -345,8 +344,8 @@ int32_t dstc_emit_su(DstCompiler *c, uint8_t op, DstSlot s, uint16_t immediate)
 }
 
 static int32_t emit2s(DstCompiler *c, uint8_t op, DstSlot s1, DstSlot s2, int32_t rest) {
-    int32_t reg1 = dstc_to_tempreg(c, s1, DSTC_REGTEMP_0);
-    int32_t reg2 = dstc_to_tempreg(c, s2, DSTC_REGTEMP_1);
+    int32_t reg1 = dstc_regnear(c, s1, DSTC_REGTEMP_0);
+    int32_t reg2 = dstc_regnear(c, s2, DSTC_REGTEMP_1);
     int32_t label = dst_v_count(c->buffer);
     dstc_emit(c, op | (reg1 << 8) | (reg2 << 16) | (rest << 24));
     dstc_free_reg(c, s1, reg1);
@@ -355,8 +354,8 @@ static int32_t emit2s(DstCompiler *c, uint8_t op, DstSlot s1, DstSlot s2, int32_
 }
 
 int32_t dstc_emit_ss(DstCompiler *c, uint8_t op, DstSlot s1, DstSlot s2) {
-    int32_t reg1 = dstc_to_tempreg(c, s1, DSTC_REGTEMP_0);
-    int32_t reg2 = dstc_to_reg(c, s2);
+    int32_t reg1 = dstc_regnear(c, s1, DSTC_REGTEMP_0);
+    int32_t reg2 = dstc_regfar(c, s2, DSTC_REGTEMP_1);
     int32_t label = dst_v_count(c->buffer);
     dstc_emit(c, op | (reg1 << 8) | (reg2 << 16));
     dstc_free_reg(c, s1, reg1);
@@ -373,9 +372,9 @@ int32_t dstc_emit_ssu(DstCompiler *c, uint8_t op, DstSlot s1, DstSlot s2, uint8_
 }
 
 int32_t dstc_emit_sss(DstCompiler *c, uint8_t op, DstSlot s1, DstSlot s2, DstSlot s3) {
-    int32_t reg1 = dstc_to_tempreg(c, s1, DSTC_REGTEMP_0);
-    int32_t reg2 = dstc_to_tempreg(c, s2, DSTC_REGTEMP_1);
-    int32_t reg3 = dstc_to_tempreg(c, s3, DSTC_REGTEMP_2);
+    int32_t reg1 = dstc_regnear(c, s1, DSTC_REGTEMP_0);
+    int32_t reg2 = dstc_regnear(c, s2, DSTC_REGTEMP_1);
+    int32_t reg3 = dstc_regnear(c, s3, DSTC_REGTEMP_2);
     int32_t label = dst_v_count(c->buffer);
     dstc_emit(c, op | (reg1 << 8) | (reg2 << 16) | (reg3 << 24));
     dstc_free_reg(c, s1, reg1);
diff --git a/src/compiler/emit.h b/src/compiler/emit.h
index f4818ffe..269701b2 100644
--- a/src/compiler/emit.h
+++ b/src/compiler/emit.h
@@ -27,11 +27,11 @@
 
 void dstc_emit(DstCompiler *c, uint32_t instr);
 
-int32_t dstc_getreg(DstCompiler *c);
-int32_t dstc_getreg_temp(DstCompiler *c, DstcRegisterTemp);
+int32_t dstc_allocfar(DstCompiler *c);
+int32_t dstc_allocnear(DstCompiler *c, DstcRegisterTemp);
 
-int32_t dstc_to_reg(DstCompiler *c, DstSlot s);
-int32_t dstc_to_tempreg(DstCompiler *c, DstSlot s, DstcRegisterTemp tag);
+int32_t dstc_regfar(DstCompiler *c, DstSlot s, DstcRegisterTemp tag);
+int32_t dstc_regnear(DstCompiler *c, DstSlot s, DstcRegisterTemp tag);
 void dstc_free_reg(DstCompiler *c, DstSlot s, int32_t reg);
 
 int32_t dstc_emit_s(DstCompiler *c, uint8_t op, DstSlot s);
@@ -45,6 +45,6 @@ int32_t dstc_emit_ssu(DstCompiler *c, uint8_t op, DstSlot s1, DstSlot s2, uint8_
 int32_t dstc_emit_sss(DstCompiler *c, uint8_t op, DstSlot s1, DstSlot s2, DstSlot s3);
 
 /* Move value from one slot to another. Cannot copy to constant slots. */
-int dstc_copy(DstCompiler *c, DstSlot dest, DstSlot src);
+void dstc_copy(DstCompiler *c, DstSlot dest, DstSlot src);
 
 #endif
diff --git a/src/compiler/regalloc.c b/src/compiler/regalloc.c
index c1138c51..da936203 100644
--- a/src/compiler/regalloc.c
+++ b/src/compiler/regalloc.c
@@ -29,6 +29,7 @@ void dstc_regalloc_init(DstcRegisterAllocator *ra) {
     ra->count = 0;
     ra->capacity = 0;
     ra->max = 0;
+    ra->regtemps = 0;
 }
 
 void dstc_regalloc_deinit(DstcRegisterAllocator *ra) {
@@ -126,9 +127,16 @@ int32_t dstc_regalloc_1(DstcRegisterAllocator *ra) {
  * without being freed. */
 void dstc_regalloc_free(DstcRegisterAllocator *ra, int32_t reg) {
     /* We cannot free reserved slots */
-    if (reg < 0 || (reg >= 240 && reg <= 255))
+    if (reg < 0)
         return;
+    if (reg >= 0xF0 && reg <= 0xFF) {
+        ra->regtemps &= ~(1 << (reg - 0xF0));
+        return;
+    }
     int32_t chunk = reg >> 5;
+    /* Outside normal chunk range */
+    if (chunk >= ra->count)
+        return;
     int32_t bit = reg & 0x1F;
     ra->chunks[chunk] &= ~ithbit(bit);
 }
@@ -139,6 +147,8 @@ void dstc_regalloc_free(DstcRegisterAllocator *ra, int32_t reg) {
 int32_t dstc_regalloc_temp(DstcRegisterAllocator *ra, DstcRegisterTemp nth) {
     int32_t oldmax = ra->max;
     int32_t reg = dstc_regalloc_1(ra);
+    dst_assert(~(ra->regtemps & (1 << nth)), "regtemp already allocated");
+    ra->regtemps |= 1 << nth;
     if (reg > 0xFF) {
         reg = 0xF0 + nth;
         ra->max = (reg > oldmax) ? reg : oldmax;
@@ -146,8 +156,9 @@ int32_t dstc_regalloc_temp(DstcRegisterAllocator *ra, DstcRegisterTemp nth) {
     return reg;
 }
 
-/* Check if a range is free. Returns the next index to check if not free,
- * -1 if free. */
+/* Disable multi-slot allocation for now. */
+
+/*
 static int32_t checkrange(DstcRegisterAllocator *ra, int32_t start, int32_t end) {
     int32_t startchunk = start / 32;
     int32_t endchunk = end / 32;
@@ -161,7 +172,6 @@ static int32_t checkrange(DstcRegisterAllocator *ra, int32_t start, int32_t end)
         uint32_t block = ra->chunks[chunk];
         uint32_t masking = mask & block;
         if (masking) {
-            /* If block is full, skip it completely. */
             int32_t nextbit = (block == 0xFFFFFFFF)
                 ? 32
                 : count_trailing_zeros(masking) + 1;
@@ -171,7 +181,6 @@ static int32_t checkrange(DstcRegisterAllocator *ra, int32_t start, int32_t end)
     return -1;
 }
 
-/* Mark a range */
 static void markrange(DstcRegisterAllocator *ra, int32_t start, int32_t end) {
     int32_t startchunk = start / 32;
     int32_t endchunk = end / 32;
@@ -185,7 +194,6 @@ static void markrange(DstcRegisterAllocator *ra, int32_t start, int32_t end) {
     }
 }
 
-/* Free a range of registers. */
 void dstc_regalloc_freerange(DstcRegisterAllocator *ra, int32_t start, int32_t n) {
     int32_t end = start + n - 1;
     int32_t startchunk = start / 32;
@@ -200,8 +208,6 @@ void dstc_regalloc_freerange(DstcRegisterAllocator *ra, int32_t start, int32_t n
     }
 }
 
-/* Allocate n contiguous registers. Returns the first register
- * in the range allocated. */
 int32_t dstc_regalloc_n(DstcRegisterAllocator *ra, int32_t n) {
     int32_t start = 0, end = 0, next = 0;
     while (next >= 0) {
@@ -210,15 +216,11 @@ int32_t dstc_regalloc_n(DstcRegisterAllocator *ra, int32_t n) {
         next = checkrange(ra, start, end);
     }
     markrange(ra, start, end);
-    /* Set max */
     if (end > ra->max)
         ra->max = end;
     return start;
 }
 
-/* Allocates registers for a function call. Tries to not move the callee,
- * but will find nargs + 1 other contiguous registers if there is not enough
- * space after the callee. */
 int32_t dstc_regalloc_call(DstcRegisterAllocator *ra, int32_t callee, int32_t nargs) {
     if (checkrange(ra, callee, callee + nargs) < 0) {
         markrange(ra, callee + 1, callee + nargs);
@@ -227,3 +229,4 @@ int32_t dstc_regalloc_call(DstcRegisterAllocator *ra, int32_t callee, int32_t na
     return dstc_regalloc_n(ra, nargs + 1);
 }
 
+*/
diff --git a/src/compiler/regalloc.h b/src/compiler/regalloc.h
index 76fd1a5f..c5367eb9 100644
--- a/src/compiler/regalloc.h
+++ b/src/compiler/regalloc.h
@@ -44,6 +44,7 @@ typedef struct {
     int32_t count; /* number of chunks in chunks */
     int32_t capacity; /* amount allocated for chunks */
     int32_t max; /* The maximum allocated register so far */
+    int32_t regtemps; /* Hold which tempregistered are alloced. */
 } DstcRegisterAllocator;
 
 void dstc_regalloc_init(DstcRegisterAllocator *ra);
@@ -51,56 +52,15 @@ void dstc_regalloc_deinit(DstcRegisterAllocator *ra);
 
 int32_t dstc_regalloc_1(DstcRegisterAllocator *ra);
 void dstc_regalloc_free(DstcRegisterAllocator *ra, int32_t reg);
-void dstc_regalloc_freerange(DstcRegisterAllocator *ra, int32_t regstart, int32_t n);
 int32_t dstc_regalloc_temp(DstcRegisterAllocator *ra, DstcRegisterTemp nth);
-int32_t dstc_regalloc_n(DstcRegisterAllocator *ra, int32_t n);
-int32_t dstc_regalloc_call(DstcRegisterAllocator *ra, int32_t callee, int32_t nargs);
 void dstc_regalloc_clone(DstcRegisterAllocator *dest, DstcRegisterAllocator *src);
 void dstc_regalloc_touch(DstcRegisterAllocator *ra, int32_t reg);
 
-/* Test code */
+/* Mutli-slot allocation disabled */
 /*
-#include <stdio.h>
-static void printreg(DstcRegisterAllocator *ra) {
-    printf("count=%d, cap=%d, max=%d\n", ra->count, ra->capacity, ra->max);
-    for (int row = 0; row < ra->count; row++) {
-        uint32_t chunk = ra->chunks[row];
-        putc('[', stdout);
-        for (int i = 0; i < 32; i++) {
-            putc(
-                (chunk & (1 << i))
-                    ? '*'
-                    : '.', stdout);
-        }
-        putc(']', stdout);
-        putc('\n', stdout);
-    }
-    putc('\n', stdout);
-}
-
-static void runtest(void) {
-    DstcRegisterAllocator ra, rb;
-    dstc_regalloc_init(&ra);
-    int32_t a = dstc_regalloc_1(&ra);
-    int32_t b = dstc_regalloc_1(&ra);
-    int32_t c = dstc_regalloc_1(&ra);
-    int32_t d = dstc_regalloc_1(&ra);
-    int32_t e = dstc_regalloc_1(&ra);
-    printreg(&ra);
-    dstc_regalloc_free(&ra, b);
-    dstc_regalloc_free(&ra, d);
-    printreg(&ra);
-    int32_t x = dstc_regalloc_n(&ra, 32);
-    printreg(&ra);
-    dstc_regalloc_1(&ra);
-    printreg(&ra);
-    int32_t y = dstc_regalloc_n(&ra, 101);
-    printreg(&ra);
-    dstc_regalloc_clone(&rb, &ra);
-    printreg(&rb);
-    dstc_regalloc_deinit(&ra);
-    dstc_regalloc_deinit(&rb);
-}
+int32_t dstc_regalloc_n(DstcRegisterAllocator *ra, int32_t n);
+int32_t dstc_regalloc_call(DstcRegisterAllocator *ra, int32_t callee, int32_t nargs);
+void dstc_regalloc_freerange(DstcRegisterAllocator *ra, int32_t regstart, int32_t n);
 */
 
 #endif
diff --git a/src/compiler/specials.c b/src/compiler/specials.c
index 02957097..5d7d2f71 100644
--- a/src/compiler/specials.c
+++ b/src/compiler/specials.c
@@ -63,8 +63,8 @@ static void destructure(DstCompiler *c,
                 for (i = 0; i < len; i++) {
                     DstSlot nextright;
                     Dst subval = values[i];
-                    right_register = dstc_to_tempreg(c, right, DSTC_REGTEMP_0);
-                    subval_register = dstc_getreg_temp(c, DSTC_REGTEMP_1);
+                    right_register = dstc_regnear(c, right, DSTC_REGTEMP_0);
+                    subval_register = dstc_allocnear(c, DSTC_REGTEMP_1);
                     if (i < 0x100) {
                         dstc_emit(c, DOP_GET_INDEX |
                                 (subval_register << 8) |
@@ -72,7 +72,7 @@ static void destructure(DstCompiler *c,
                                 (i << 24));
                     } else {
                         DstSlot islot = dstc_cslot(dst_wrap_integer(i));
-                        int32_t i_register = dstc_to_tempreg(c, islot, DSTC_REGTEMP_2);
+                        int32_t i_register = dstc_regnear(c, islot, DSTC_REGTEMP_2);
                         dstc_emit(c, DOP_GET_INDEX |
                                 (subval_register << 8) |
                                 (right_register << 16) |
@@ -88,8 +88,6 @@ static void destructure(DstCompiler *c,
                     dstc_free_reg(c, right, right_register);
                 }
             }
-            /* Free right */
-            dstc_freeslot(c, right);
             break;
         case DST_TABLE:
         case DST_STRUCT:
@@ -100,9 +98,9 @@ static void destructure(DstCompiler *c,
                     DstSlot nextright;
                     DstSlot kslot = dstc_value(dstc_fopts_default(c), kv->key);
 
-                    right_register = dstc_to_tempreg(c, right, DSTC_REGTEMP_0);
-                    subval_register = dstc_getreg_temp(c, DSTC_REGTEMP_1);
-                    k_register = dstc_to_tempreg(c, kslot, DSTC_REGTEMP_2);
+                    right_register = dstc_regnear(c, right, DSTC_REGTEMP_0);
+                    subval_register = dstc_allocnear(c, DSTC_REGTEMP_1);
+                    k_register = dstc_regnear(c, kslot, DSTC_REGTEMP_2);
                     dstc_emit(c, DOP_GET |
                             (subval_register << 8) |
                             (right_register << 16) |
@@ -117,11 +115,8 @@ static void destructure(DstCompiler *c,
                     dstc_free_reg(c, right, right_register);
                 }
             }
-            /* Free right */
-            dstc_freeslot(c, right);
             break;
     }
-
 }
 
 DstSlot dstc_varset(DstFopts opts, int32_t argn, const Dst *argv) {
@@ -193,7 +188,7 @@ static DstSlot namelocal(DstCompiler *c, Dst head, int32_t flags, DstSlot ret) {
             ret.index > 0xFF) {
         /* Slot is not able to be named */
         DstSlot localslot;
-        localslot.index = dstc_getreg(c);
+        localslot.index = dstc_allocfar(c);
         /* infer type? */
         localslot.flags = flags;
         localslot.envindex = -1;
diff --git a/src/core/corelib.c b/src/core/corelib.c
index 96a8ea8e..cab8bb40 100644
--- a/src/core/corelib.c
+++ b/src/core/corelib.c
@@ -169,15 +169,8 @@ int dst_core_struct(DstArgs args) {
 }
 
 int dst_core_gensym(DstArgs args) {
-    DST_MAXARITY(args, 1);
-    if (args.n == 0) {
-        DST_RETURN_SYMBOL(args, dst_symbol_gen(NULL, 0));
-    } else {
-        const uint8_t *s;
-        int32_t len;
-        DST_ARG_BYTES(s, len, args, 0);
-        DST_RETURN_SYMBOL(args, dst_symbol_gen(s, len));
-    }
+    DST_FIXARITY(args, 0);
+    DST_RETURN_SYMBOL(args, dst_symbol_gen());
 }
 
 int dst_core_gccollect(DstArgs args) {
diff --git a/src/core/symcache.c b/src/core/symcache.c
index 4d90157c..b08b3685 100644
--- a/src/core/symcache.c
+++ b/src/core/symcache.c
@@ -201,48 +201,52 @@ const uint8_t *dst_symbol_from_string(const uint8_t *str) {
     return str;
 }
 
-/* Helper for creating a unique string. Increment an integer
- * represented as an array of integer digits. */
-static void inc_counter(uint8_t *digits, int base, int len) {
-    int i;
-    uint8_t carry = 1;
-    for (i = len - 1; i >= 0; --i) {
-        digits[i] += carry;
-        carry = 0;
-        if (digits[i] == base) {
-            digits[i] = 0;
-            carry = 1;
+/* Store counter for genysm to avoid quadratic behavior */
+DST_THREAD_LOCAL uint8_t gensym_counter[8] = {'_', '0', '0', '0', '0', '0', '0', 0};
+
+/* Increment the gensym buffer */
+static void inc_gensym(void) {
+    for (int i = sizeof(gensym_counter) - 2; i; i--) {
+        if (gensym_counter[i] == '9') {
+            gensym_counter[i] = 'a';
+        } else if (gensym_counter[i] == 'z') {
+            gensym_counter[i] = 'A';
+        } else if (gensym_counter[i] == 'Z') {
+            gensym_counter[i] = '0';
+        } else {
+            gensym_counter[i]++;
+            break;
         }
     }
 }
 
 /* Generate a unique symbol. This is used in the library function gensym. The
- * symbol will be of the format prefix_XXXXXX, where X is a base64 digit, and
- * prefix is the argument passed.  */
-const uint8_t *dst_symbol_gen(const uint8_t *buf, int32_t len) {
+ * symbol will be of the format _XXXXXX, where X is a base64 digit, and
+ * prefix is the argument passed. No prefix for speed. */
+const uint8_t *dst_symbol_gen(void) {
     const uint8_t **bucket = NULL;
+    uint8_t *sym;
     int32_t hash = 0;
-    uint8_t counter[6] = {63, 63, 63, 63, 63, 63};
+    int status;
     /* Leave spaces for 6 base 64 digits and two dashes. That means 64^6 possible suffixes, which
      * is enough for resolving collisions. */
-    int32_t newlen = len + 7;
-    int32_t newbufsize = newlen + 2 * sizeof(int32_t) + 1;
-    uint8_t *str = (uint8_t *)dst_gcalloc(DST_MEMORY_SYMBOL, newbufsize) + 2 * sizeof(int32_t);
-    dst_string_length(str) = newlen;
-    memcpy(str, buf, len);
-    str[len] = '_';
-    str[newlen] = 0;
-    uint8_t *saltbuf = str + len + 1;
-    int status = 1;
-    while (status) {
-        int i;
-        inc_counter(counter, 64, 6);
-        for (i = 0; i < 6; ++i)
-            saltbuf[i] = dst_base64[counter[i]];
-        hash = dst_string_calchash(str, newlen);
-        bucket = dst_symcache_findmem(str, newlen, hash, &status);
-    }
-    dst_string_hash(str) = hash;
-    dst_symcache_put((const uint8_t *)str, bucket);
-    return (const uint8_t *)str;
+    do {
+        hash = dst_string_calchash(
+                gensym_counter, 
+                sizeof(gensym_counter) - 1);
+        bucket = dst_symcache_findmem(
+                gensym_counter, 
+                sizeof(gensym_counter) - 1,
+                hash,
+                &status);
+    } while (status && (inc_gensym(), 1));
+    sym = (uint8_t *) dst_gcalloc(
+            DST_MEMORY_SYMBOL, 
+            2 * sizeof(int32_t) + sizeof(gensym_counter)) +
+        (2 * sizeof(int32_t));
+    memcpy(sym, gensym_counter, sizeof(gensym_counter));
+    dst_string_length(sym) = sizeof(gensym_counter) - 1;
+    dst_string_hash(sym) = hash;
+    dst_symcache_put((const uint8_t *)sym, bucket);
+    return (const uint8_t *)sym;
 }
diff --git a/src/include/dst/dst.h b/src/include/dst/dst.h
index c579d5da..f048fcbe 100644
--- a/src/include/dst/dst.h
+++ b/src/include/dst/dst.h
@@ -129,7 +129,7 @@ void dst_puts(const uint8_t *str);
 const uint8_t *dst_symbol(const uint8_t *str, int32_t len);
 const uint8_t *dst_symbol_from_string(const uint8_t *str);
 const uint8_t *dst_csymbol(const char *str);
-const uint8_t *dst_symbol_gen(const uint8_t *buf, int32_t len);
+const uint8_t *dst_symbol_gen();
 #define dst_symbolv(str, len) dst_wrap_symbol(dst_symbol((str), (len)))
 #define dst_csymbolv(cstr) dst_wrap_symbol(dst_csymbol(cstr))
 
diff --git a/test/suite0.dst b/test/suite0.dst
index 57b48e0f..e51ed92c 100644
--- a/test/suite0.dst
+++ b/test/suite0.dst
@@ -238,12 +238,11 @@
 # Gensym tests
 
 (assert (not= (gensym) (gensym)) "two gensyms not equal")
-(assert (not= (gensym 'abc) (gensym 'abc)) "two gensyms with arg not equal")
 ((fn []
 	(def syms (table))
 	(var count 0)
 	(while (< count 128)
-		(put syms (gensym 'beep) true)
+		(put syms (gensym) true)
 		(:= count (+ 1 count)))
 	(assert (= (length syms) 128) "many symbols")))
 
diff --git a/test/suite1.dst b/test/suite1.dst
index 2e3c36b8..5e175b07 100644
--- a/test/suite1.dst
+++ b/test/suite1.dst
@@ -161,4 +161,10 @@
 (testmarsh @{1 2 3 4}  "marshal table")
 (testmarsh {1 2 3 4}  "marshal struct")
 
+# Large functions
+(def manydefs (for [i :range [0 300]] (tuple 'def (gensym) (string "value_" i))))
+(array.push manydefs (tuple * 10000 3 5 7 9))
+(def f (compile (tuple.prepend manydefs 'do) *env*))
+(assert (= (f) (* 10000 3 5 7 9)) "long function compilation")
+
 (end-suite)