Initial rewrite of compiler

This commit is contained in:
bakpakin 2017-12-14 19:33:45 -05:00
parent a2ee6ffe5c
commit 1293d2e301
6 changed files with 569 additions and 78 deletions

View File

@ -31,7 +31,7 @@ PREFIX=/usr/local
DST_TARGET=dst
DST_XXD=xxd
DEBUGGER=lldb
DST_INTERNAL_HEADERS=$(addprefix core/,symcache.h opcodes.h strtod.h)
DST_INTERNAL_HEADERS=$(addprefix core/,symcache.h opcodes.h strtod.h compile.h)
DST_HEADERS=$(addprefix include/dst/,dst.h)
#############################

View File

@ -21,76 +21,333 @@
*/
#include <dst/dst.h>
#include "compile.h"
#define DST_LOCAL_FLAG_MUTABLE 1
static void dst_compile_cleanup(DstCompiler *c) {
/* Compiler typedefs */
typedef struct DstCompiler DstCompiler;
typedef struct FormOptions FormOptions;
typedef struct SlotTracker SlotTracker;
typedef struct DstScope DstScope;
typedef struct DstCFunctionOptimizer DstCFunctionOptimizer;
#define DST_SLOT_CONSTANT 1
#define DST_SLOT_TEMP 2
#define DST_SLOT_RETURNED 4
#define DST_SLOT_NIL 8
/* A stack slot */
struct DstSlot {
int32_t index;
uint32_t flags;
uint32_t types; /* bit set of possible primitive types */
DstValue constant; /* If the slot has a constant value */
}
/* Most forms that return a constant will not generate any bytecode */
DstSlot dst_compile_error(DstCompiler *c, const DstValue *sourcemap, const uint8_t *m) {
DstSlot ret;
c->error_start = dst_unwrap_integer(sourcemap[0]);
c->error_end = dst_unwrap_integer(sourcemap[1]);
c->error = m;
ret.flags = DST_SLOT_ERROR;
ret.index = 0;
ret.constant = dst_wrap_nil();
return ret;
}
/* Special forms that need support */
/* cond
* while (continue, break)
* quote
* fn
* def
* var
* do
DstSlot dst_compile_cerror(DstCompiler *c, const DstValue *sourcemap, const char *m) {
return dst_compile_error(c, sourcemap, dst_cstring(m));
}
/* Use these to get sub options. They will traverse the source map so
* compiler errors make sense. Then modify the returned options. */
DstFormOptions dst_compile_getopts_index(DstFormOptions opts, int32_t index) {
DstCompiler *c = opts.compiler;
const DstValue *sourcemap = dst_parse_submap_index(opts.sourcemap, index);
DstValue nextval = dst_getindex(opts.x, index);
opts.x = nextval;
opts.sourcemap = sourcemap;
return opts;
}
DstFormOptions dst_compile_getopts_key(DstFormOptions opts, DstValue key) {
DstCompiler *c = opts.compiler;
const DstValue *sourcemap = dst_parse_submap_key(opts.sourcemap, key);
opts.x = key;
opts.sourcemap = sourcemap;
return opts;
}
DstFormOptions dst_compile_getopts_value(DstFormOptions opts, DstValue key) {
DstCompiler *c = opts.compiler;
const DstValue *sourcemap = dst_parse_submap_value(opts.sourcemap, key);
DstValue nextval = dst_get(opts.x, key);
opts.x = nextval;
opts.sourcemap = sourcemap;
return opts;
}
/* Eneter a new scope */
void dst_compile_scope(DstCompiler *c, int newfn) {
DstScope *scope;
if (c->scopecap < c->scopecount) {
c->scopes = realloc(c->scopes, 2 * sizeof(DstScope) * c->scopecount + 2);
if (NULL == c->scope) {
DST_OUT_OF_MEMORY;
}
}
scope = c->scopes + c->scopecount++;
dst_array_init(&scope->constants, 0);
dst_table_init(&scope->symbols, 4);
scope->envs = NULL;
scope->envcount = 0;
scope->envcap = 0;
scope->slots = NULL;
scope->slotcount = 0;
scope->slotcap = 0;
scope->freeslots = NULL;
scope->freeslotcount = 0;
scope->freeslotcap = 0;
scope->buffer_offset = c->buffer.count;
scope->nextslot = 0;
scope->lastslot = -1;
scope->flags = newfn ? DST_SCOPE_FUNCTION : 0;
}
DstSlot dst_slot_nil() {
DstSlot ret;
ret.index = 0;
ret.flags = (1 << DST_TYPE_NIL) | DST_SLOT_CONSTANT;
ret.constant = dst_wrap_nil();
return ret;
}
/* Leave a scope. Does not build closure*/
void dst_compile_popscope(DstCompiler *c) {
DstScope *scope;
DstSlot ret;
dst_assert(c->scopecount, "could not pop scope");
scope = c->scopes + --c->scopecount;
/* Move free slots to parent scope if not a new function */
if (!(scope->flags & DST_SCOPE_FUNCTION) && c->scopecount) {
int32_t i;
int32_t newcount;
DstScope *topscope = c->scopes + c->scopecount - 1;
topscope->nextslot = scope->nextslot;
newcount = topscope->freeslotcount + scope->freeslotcount;
if (topscope->freeslotcap < newcount) {
topscope->freeslots = realloc(topscope->freeslot, sizeof(int32_t) * newcount);
if (NULL == topscope->freeslots) {
DST_OUT_OF_MEMORY;
}
topscope->freeslotcap = newcount;
}
memcpy(
topscope->freeslots + topescope->freeslotcount,
scope->freeslots,
sizeof(int32_t) * scope->freeslotcount);
topscope->freeslotcount = newcount;
}
dst_table_deinit(&scope->symbols);
dst_array_deinit(&scope->constants);
free(scope->slots);
free(scope->freeslots);
free(scope->envs);
return ret;
}
#define dst_compile_topscope(c) ((c)->scopes + (c)->scopecount - 1)
/* Allocate a slot space */
static int32_t dst_compile_slotalloc(DstCompiler *c) {
DstScope *scope = dst_compile_topscope(c);
if (scope->freeslotcount == 0) {
return scope->nextslot++;
} else {
return scope->freeslots[--scope->freeslotcount];
}
}
int dst_compile_slotmatch(DstFormOptions opts, DstSlot slot) {
return opts.type & slot.type;
}
DstSlot dst_compile_normalslot(DstCompiler *c, uint32_t flags) {
DstSlot ret;
int32_t index = dst_compile_slotalloc(c);
ret.flags = flags;
ret.constant = dst_wrap_nil();
ret.index = index;
ret.envindex = 0;
return ret;
}
DstSlot dst_compile_constantslot(DstCompiler *c, DstValue x) {
DstSlot ret;
ret.flags = (1 << dst_type(x)) | DST_SLOT_CONSTANT;
ret.index = -1;
ret.constant = x;
ret.envindex = 0;
return ret;
}
/* Free a single slot */
void dst_compile_freeslot(DstCompiler *c, DstSlot slot) {
DstScope *scope = dst_compile_topscope(c);
int32_t newcount = scope->freeslotcount + 1;
if (slot.flags & (DST_SLOT_CONSTANT | DST_SLOT_ERROR))
return;
if (scope->freeslotcap < newcount) {
int32_t newcap = 2 * newcount;
scope->freeslots = realloc(scope->freeslots, sizeof(int32_t) * newcap);
if (NULL == scope->freeslots) {
DST_OUT_OF_MEMORY;
}
scope->freeslotcap = newcap;
}
scope->freeslots[scope->freeslotcount] = slot.index;
scope->freeslotcount = newcount;
}
/* Free an array of slots */
void dst_compile_freeslotarray(DstCompiler *c, DstArray *slots) {
int32_t i;
for (i = 0; i < slots->count; i++) {
dst_compile_freeslot(c, slots->data[i]);
}
}
/*
* The mechanism for passing environments to to closures is a bit complicated,
* but ensures a few properties.
* * Environments are on the stack unless they need to be closurized
* * Environments can be shared between closures
* * A single closure can access any of multiple parent environments in constant time (no linked lists)
*
* FuncDefs all have a list of a environment indices that are inherited from the
* parent function, as well as a flag indicating if the closures own stack variables
* are needed in a nested closure. The list of indices says which of the parent environments
* go into which environment slot for the new closure. This allows closures to use whatever environments
* they need to, as well as pass these environments to sub closures. To access the direct parent's environment,
* the FuncDef must copy the 0th parent environment. If a closure does not need to export it's own stack
* variables for creating closures, it must keep the 0th entry in the env table to NULL.
*
* TODO - check if this code is bottle neck and search for better data structures.
*/
#define DST_OPTIMIZER_CONSTANTS 1
#define DST_OPTIMIZER_BYTECODE 2
#define DST_OPTIMIZER_PARTIAL_CONSTANTS 4
/* Allow searching for symbols. Return information about the symbol */
DstSlot dst_compile_resolve(
DstCompiler *c,
const DstValue *sourcemap,
const uint8_t *sym) {
/* A grouping of optimization on a cfunction given certain conditions
* on the arguments (such as all constants, or some known types). The appropriate
* optimizations should be tried before compiling a normal function call. */
struct DstCFunctionOptimizer {
uint32_t flags; /* Indicate what kind of optimizations can be performed */
DstSlot ret;
DstScope *scope = dst_compile_topscope(c);
int32_t env_index = 0;
int foundlocal;
/* Search scopes for symbol, starting from top */
while (scope >= c->scopes) {
DstValue check = dst_table_get(scope->symbols, dst_wrap_symbol(sym));
if (dst_checktype(check, DST_INTEGER)) {
ret = scope->slots[dst_unwrap_integer(check)];
goto found;
}
scope--;
}
/* Symbol not found */
return dst_compile_error(c, sourcemap, dst_formatc("unknown symbol %q", sym));
/* Symbol was found */
found:
/* Constants and errors can be returned immediately (they are stateless) */
if (ret.flags & (DST_SLOT_CONSTANT | DST_SLOT_ERROR))
return ret;
/* non-local scope needs to expose its environment */
foundlocal = scope == dst_compile_topscope(c);
if (!foundlocal) {
scope->flags |= DST_SCOPE_ENV;
if (scope->envcount < 1) {
scope->envcount = 1;
scope->envs = malloc(sizeof(int32_t) * 10);
if (NULL == scope->envs) {
DST_OUT_OF_MEMORY;
}
scope->envcap = 10;
scope->envs[0] = -1;
}
scope++;
}
/* Propogate env up to current scope */
while (scope <= dst_compile_topscope(c)) {
int32_t j;
int32_t newcount = scope->envcount + 1;
int scopefound = 0;
/* Check if scope already has env. If so, break */
for (j = 1; j < scope->envcount; j++) {
if (scope->envs[j] == env_index) {
scopefound = 1;
env_index = j;
break;
}
}
if (!scopefound) {
env_index = scope->envcount;
/* Ensure capacity for adding scope */
if (newcount > scope->envcap) {
int32_t newcap = 2 * newcount;
scope->envs = realloc(scope->envs, sizeof(int32_t) * newcap);
if (NULL == scope->envs) {
DST_OUT_OF_MEMORY;
}
scope->envcap = newcap;
}
scope->envs[scope->envcount] = env_index;
scope->envcount = newcount;
}
scope++;
}
/* Take the slot from the upper scope, and set its envindex before returning. */
if (!foundlocal) {
ret.envindex = env_index;
}
return ret;
}
/* A lexical scope during compilation */
struct DstScope {
DstArray slots;
DstArray freeslots;
DstArray constants;
DstTable symbols; /* Positive numbers are stack slots, negative are negative constant indices */
uint32_t flags;
int32_t nextslot;
/* Compile an array */
/* Compile a single value */
DstSlot dst_compile_value(DstFormOptions opts) {
DstSlot ret;
if (opts.compiler->recursion_guard <= 0) {
return dst_compiler_cerror(opts.compiler, opts.sourcemap, "recursed too deeply");
}
opts.compiler->recursion_guard--;
switch (dst_type(opts.x)) {
default:
ret = dst_compile_constantslot(opts.x);
break;
case DST_SYMBOL:
{
const uint8_t *sym = dst_unwrap_symbol(opts.x);
if (dst_string_length(sym) > 0 && sym[0] != ':')
ret = dst_compile_resolve(opts.compiler, opts.sourcemap, sym);
else
ret = dst_compile_constantslot(opts.x);
break;
}
case DST_TUPLE:
ret = dst_compile_tuple(opts);
break;
case DST_ARRAY:
ret = dst_compile_array(opts);
break;
case DST_STRUCT:
ret = dst_compile_struct(opts);
break;
case DST_TABLE:
ret = dst_compile_table(opts);
break;
}
opts.compiler->recursion_guard++;
return ret;
}
/* Compilation state */
struct DstCompiler {
DstValue error;
jmp_buf on_error;
DstArray scopes;
DstBuffer buffer;
int recursion_guard;
};
DstSlot dst_compile_targetslot(DstFormOptions opts, DstSlot s);
/* Compiler state */
struct DstFormOptions {
DstCompiler *compiler;
DstValue x;
uint32_t flags;
uint32_t types; /* bit set of accepted primitive types */
int32_t target_slot;
};
/* Coerce any slot into the target slot. If no target is specified, return
* the slot unaltered. Otherwise, move and load upvalues as necesarry to set the slot. */
DstSlot dst_compile_coercetargetslot(DstFormOptions opts, DstSlot s);

220
core/compile.h Normal file
View File

@ -0,0 +1,220 @@
/*
* Copyright (c) 2017 Calvin Rose
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#ifndef DST_COMPILE_H
#define DST_COMPILE_H
#include <dst/dst.h>
/* Compiler typedefs */
typedef struct DstCompiler DstCompiler;
typedef struct FormOptions FormOptions;
typedef struct SlotTracker SlotTracker;
typedef struct DstScope DstScope;
typedef struct DstCFunctionOptimizer DstCFunctionOptimizer;
#define DST_SLOT_CONSTANT 0x10000
#define DST_SLOT_TEMP 0x20000
#define DST_SLOT_RETURNED 0x40000
#define DST_SLOT_NIL 0x80000
#define DST_SLOT_MUTABLE 0x100000
#define DST_SLOT_ERROR 0x200000
#define DST_SLOTTYPE_ANY 0xFFFF
/* A stack slot */
struct DstSlot {
int32_t index;
int32_t envindex; /* 0 is local, positive number is an upvalue */
uint32_t flags;
DstValue constant; /* If the slot has a constant value */
}
/* Most forms that return a constant will not generate any bytecode */
/* Special forms that need support */
/* cond
* while (continue, break)
* quote
* fn
* def
* var
* varset
* do
*/
#define DST_OPTIMIZER_CONSTANTS 0x10000
#define DST_OPTIMIZER_BYTECODE 0x20000
#define DST_OPTIMIZER_PARTIAL_CONSTANTS 0x40000
#define DST_OPTIMIZER_SYSCALL 0x80000
/* A grouping of optimization on a cfunction given certain conditions
* on the arguments (such as all constants, or some known types). The appropriate
* optimizations should be tried before compiling a normal function call. */
struct DstCFunctionOptimizer {
uint32_t flags; /* Indicate what kind of optimizations can be performed. */
/*Also what kind of types can be returned*/
int32_t syscall;
}
#define DST_SCOPE_FUNCTION 1
#define DST_SCOPE_LASTSLOT 2
#define DST_SCOPE_FIRSTSLOT 4
#define DST_SCOPE_ENV
/* A lexical scope during compilation */
struct DstScope {
DstArray constants; /* Constants for the funcdef */
DstTable symbols; /* Map symbols -> Slot inidices */
/* Hold all slots in use. Data structures that store
* slots should link them to this datatstructure */
DstSlot *slots;
int32_t slotcount;
int32_t slotcap;
/* A vector of freed slots. */
int32_t *freeslots;
int32_t freeslotcount;
int32_t freeslotcap;
int32_t lastslot;
int32_t nextslot;
/* Referenced closure environemts. The values at each index correspond
* to which index to get the environment from in the parent. The enironment
* that corresponds to the direct parent's stack will always have value 0. */
int32_t *envs;
int32_t envcount;
int32_t envcap;
int32_t buffer_offset; /* Where the bytecode for this scope begins */
uint32_t flags;
}
/* Compilation state */
struct DstCompiler {
int32_t scopecount;
int32_t scopecap;
DstScope *scopes;
DstBuffer buffer;
DstBuffer mapbuffer;
int32_t error_start;
int32_t error_end;
DstValue error;
int recursion_guard;
};
#define DST_FOPTS_TAIL 0x10000
#define DST_FOPTS_FORCESLOT 0x20000
/* Compiler state */
struct DstFormOptions {
DstCompiler *compiler;
DstValue x;
const DstValue *sourcemap;
uint32_t flags; /* bit set of accepted primitive types */
};
typedef enum DstCompileStatus {
DST_COMPILE_OK,
DST_COMPILE_ERROR
} DstCompileStatus;
/* Results of compilation */
typedef struct DstCompileResults {
DstCompileStatus status;
DstFuncDef *funcdef;
const uint8_t *error;
} DstCompileResults;
typedef struct DstCompileOptions {
uint32_t flags;
const DstValue *sourcemap;
DstValue src;
int32_t target;
};
/* Compiler handlers. Used to compile different kinds of expressions. */
typedef DstSlot (*DstFormCompiler)(DstFormOptions opts);
/* Dispatch to correct form compiler */
DstSlot dst_compile_value(DstFormOptions opts);
/* Compile basic types */
DstSlot dst_compile_constant(DstFormOptions opts);
DstSlot dst_compile_symbol(DstFormOptions opts);
DstSlot dst_copmile_array(DstFormOptions opts);
DstSlot dst_copmile_struct(DstFormOptions opts);
DstSlot dst_copmile_table(DstFormOptions opts);
/* Tuple compiliation will handle much of the work */
DstSlot dst_compile_tuple(DstFormOptions opts);
/* Compile special forms */
DstSlot dst_compile_do(DstFormOptions opts);
DstSlot dst_compile_fn(DstFormOptions opts);
DstSlot dst_compile_cond(DstFormOptions opts);
DstSlot dst_compile_while(DstFormOptions opts);
DstSlot dst_compile_quote(DstFormOptions opts);
DstSlot dst_compile_def(DstFormOptions opts);
DstSlot dst_compile_var(DstFormOptions opts);
DstSlot dst_compile_varset(DstFormOptions opts);
/* Compile source code into FuncDef. */
DstCompileResults dst_compile(DstCompileOptions opts);
/****************************************************/
DstSlot dst_compile_error(DstCompiler *c, const DstValue *sourcemap, const uint8_t *m);
DstSlot dst_compile_cerror(DstCompiler *c, const DstValue *sourcemap, const char *m);
/* Use these to get sub options. They will traverse the source map so
* compiler errors make sense. Then modify the returned options. */
DstFormOptions dst_compile_getopts_index(DstFormOptions opts, int32_t index);
DstFormOptions dst_compile_getopts_key(DstFormOptions opts, DstValue key);
DstFormOptions dst_compile_getopts_value(DstFormOptions opts, DstValue key);
void dst_compile_scope(DstCompiler *c, int newfn);
DstSlot dst_compile_popscope(DstCompiler *c);
int dst_compile_slotmatch(DstFormOptions opts, DstSlot slot);
DstSlot dst_compile_normalslot(DstCompiler *c, uint32_t types);
DstSlot dst_compile_constantslot(DstCompiler *c, DstValue x);
void dst_compile_freeslot(DstCompiler *c, DstSlot slot);
void dst_compile_freeslotarray(DstCompiler *c, DstArray *slots);
/* Search for a symbol */
DstSlot dst_compile_resolve(DstCompiler *c, const DstValue *sourcemap, const uint8_t *sym);
/* Get a local slot that can be used as the desination for whatever is compiling. */
DstSlot dst_compile_targetslot(DstFormOptions opts, DstSlot s);
/* Coerce any slot into the target slot. If no target is specified, return
* the slot unaltered. Otherwise, move and load upvalues as necesarry to set the slot. */
DstSlot dst_compile_coercetargetslot(DstFormOptions opts, DstSlot s);
DstSlot dst_compile_realizeslot(DstCompiler *c, DstSlot s);
DstSlot dst_compile_returnslot(DstCompiler *c, DstSlot s);
#endif

View File

@ -298,9 +298,7 @@ void *dst_alloc(DstMemoryType type, size_t size) {
size_t total = size + sizeof(DstGCMemoryHeader);
/* Make sure everything is inited */
if (NULL == dst_vm_cache) {
DST_PLEASE_INIT;
}
dst_assert(NULL != dst_vm_cache, "please initialize dst before use");
void *mem = malloc(total);
/* Check for bad malloc */

View File

@ -630,7 +630,7 @@ const uint8_t *dst_formatc(const char *format, ...) {
}
case 'q':
{
const uint8_t *str = dst_to_string(va_arg(args, DstValue));
const uint8_t *str = va_arg(args, const uint8_t *);
dst_escape_string_b(bufp, str);
break;
}

View File

@ -85,18 +85,31 @@
#define DST_LITTLE_ENDIAN 1
#endif
/* Handle runtime errors */
#ifndef dst_exit
#include <stdlib.h>
#include <stdio.h>
#define dst_exit(m) do { \
printf("runtime error at line %d in file %s: %s\n",\
__LINE__,\
__FILE__,\
(m));\
exit(-1);\
} while (0)
#endif
#ifndef DST_NOASSERT
#define dst_assert(c, m) do { \
if (!(c)) dst_exit((m)); \
} while (0)
#endif
/* What to do when out of memory */
#ifndef DST_OUT_OF_MEMORY
#include <stdio.h>
#define DST_OUT_OF_MEMORY do { printf("out of memory\n"); exit(1); } while (0)
#endif
/* What to do when dst is used in unitialized state */
#ifndef DST_PLEASE_INIT
#include <stdio.h>
#define DST_PLEASE_INIT do { printf("dst is uninitialized\n"); exit(1); } while (0)
#endif
#define DST_INTEGER_MIN INT32_MIN
#define DST_INTEGER_MAX INT32_MAX
@ -717,13 +730,16 @@ enum DstMemoryType {
void *dst_alloc(DstMemoryType type, size_t size);
#define dst_enablegc(m) dst_gc_header(m)->flags &= ~DST_MEM_DISABLED
/* When doing C interop, it is often needed to disable GC on a value.
* This is needed when a garbage collection could occur in the middle
* of a c function. This could happen, for example, if one calls back
* into dst inside of a c function. The pin and unpin functions toggle
* garbage collection on a value when needed. Note that no dst functions
* will call gc when you don't want it to. GC only happens automatically
* in the interpreter loop. */
/* When doing C interop, it is often needed to disable GC on a value. This is
* needed when a garbage collection could occur in the middle of a c function.
* This could happen, for example, if one calls back into dst inside of a c
* function. The pin and unpin functions toggle garbage collection on a value
* when needed. Note that no dst functions will call gc when you don't want it
* to. GC only happens automatically in the interpreter loop. Pinning values
* wil NOT recursively pin sub values.
*
* Be careful whennig bypassing garbage collection like this. It can easily
* lead to memory leaks or other undesirable side effects. */
void dst_pin(DstValue x);
void dst_unpin(DstValue x);