More work on a proof of concept frontend.

Basic frontend being prototyped in examples/sysir/frontend.janet. Still
a lot of work needs to be done here, and some of this code will
eventually move to C most likely, but this is a good way to better
exercise our backend.

Type inference - at the very least _forward_ inference, is the most
needed change here. While one could do this in the compiler
frontend, doing so in sysir/asm is not so much of an issue. "Inference"
here means inserting "bind" instructions when there is only a single
type that will work correctly.
This commit is contained in:
Calvin Rose 2024-05-05 14:45:00 -05:00
parent f582fe1f69
commit ef2dfcd7c3
2 changed files with 243 additions and 7 deletions

View File

@ -0,0 +1,186 @@
# Make a language frontend for the sysir.
# Dialect:
# TODO -
# * basic types
# * constants
# * sequence (do)
# * basic arithmetic
# * bindings
# * branch (if)
# * looping
# * returns
# * tail call returns
# * function definitions
# * arrays (declaration, loads, stores)
# * ...
# insight - using : inside symbols for types can be used to allow manipulating symbols with macros
(def slot-to-name @[])
(def name-to-slot @{})
(def type-to-name @[])
(def name-to-type @{})
(defn get-slot
[&opt new-name]
(def next-slot (length slot-to-name))
(array/push slot-to-name new-name)
(if new-name (put name-to-slot new-name next-slot))
next-slot)
(defn named-slot
[name]
(assert (get name-to-slot name)))
(defn make-type
[&opt new-name]
(def next-type (length type-to-name))
(array/push type-to-name new-name)
(if new-name (put name-to-type new-name next-type))
next-type)
(defn named-type
[name]
(def t (get name-to-type name))
(assert t)
t)
(defn setup-default-types
[into]
(defn add-prim-type
[name native-name]
(array/push into ~(type-prim ,name ,native-name))
(make-type name))
(add-prim-type 'float 'f32)
(add-prim-type 'double 'f64))
(defn type-extract
"Given a symbol:type combination, extract the proper name and the type separately"
[combined-name &opt default-type]
(def parts (string/split ":" combined-name 0 1))
(def [name tp] parts)
[(symbol name) (symbol (or tp default-type))])
(var do-binop nil)
(defn visit1
"Take in a form and compile code and put it into `into`. Return result slot."
[code into]
(cond
# Compile a constant
(number? code)
(let [slot (get-slot)
slottype 'double]
(array/push into ~(bind ,slot ,slottype))
(array/push into ~(constant ,slot ,code))
slot)
# Binding
(symbol? code)
(named-slot code)
# Compile forms
(and (tuple? code) (= :parens (tuple/type code)))
(do
(assert (> (length code) 0))
(def [op & args] code)
(case op
# Arithmetic
'+ (do-binop 'add args into)
'- (do-binop 'subtract args into)
'* (do-binop 'multiply args into)
'/ (do-binop 'divide args into)
# Type hinting
'the
(do
(assert (= 2 (length args)))
(def [xtype x] args)
(def result (visit1 x into))
(array/push into ~(bind ,result ,xtype))
result)
# Named bindings
# TODO - type inference
'def
(do
(assert (= 2 (length args)))
(def [full-name value] args)
(assert (symbol? full-name))
(def [name tp] (type-extract full-name 'double))
(def result (visit1 value into))
(def slot (get-slot name))
(array/push into ~(bind ,slot ,tp))
(array/push into ~(move ,slot ,result))
slot)
# Assignment
'set
(do
(assert (= 2 (length args)))
(def [to x] args)
(def result (visit1 x into))
(def toslot (get-slot to))
(array/push into ~(move ,toslot ,result))
toslot)
# Return
'return
(do
(assert (>= 1 (length args)))
(if (empty? args)
(array/push into '(return))
(do
(def [x] args)
(array/push into ~(return ,(visit1 x into)))))
nil)
# Sequence of operations
'do
(do
(var ret nil)
(each form args (set ret (visit1 form into)))
ret)
(errorf "unknown form %V" code)))
(errorf "cannot compile %V" code)))
(varfn do-binop
[opcode args into]
(var final nil)
(each arg args
(def right (visit1 arg into))
(set final
(if final
(let [result (get-slot)]
# TODO - finish type inference - we should be able to omit the bind
# call and sysir should be able to infer the type
(array/push into ~(bind ,result double))
(array/push into ~(,opcode ,result ,final ,right))
result)
right)))
(assert final))
###
###
###
(def myprog
'(do
(def xyz (+ 1 2 3))
(def abc (* 4 5 6))
(return (/ abc xyz))))
(defn dotest
[]
(def ctx (sysir/context))
(def ir-asm
@['(link-name "entry")
'(parameter-count 0)])
(setup-default-types ir-asm)
(visit1 myprog ir-asm)
(printf "%.99M" ir-asm)
(sysir/asm ctx ir-asm)
(print (sysir/to-c ctx)))
(dotest)

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2023 Calvin Rose
* Copyright (c) 2024 Calvin Rose
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
@ -30,8 +30,10 @@
* [ ] named fields (for debugging mostly)
* [x] named registers and types
* [x] better type errors (perhaps mostly for compiler debugging - full type system goes on top)
* [ ] switch internal use of uint32_t everywhere to type struct wrappers for safety
* [ ] support for switch-case
* [ ] x86/x64 machine code target
* [ ] LLVM target
* [ ] target specific extensions - custom instructions and custom primitives
* [ ] better casting semantics
* [x] separate pointer arithmetic from generalized arithmetic (easier to instrument code for safety)?
@ -950,11 +952,59 @@ static void tcheck_equal(JanetSysIR *sysir, uint32_t reg1, uint32_t reg2) {
}
}
static int tcheck_cast_type(JanetSysIR *sysir, uint32_t td, uint32_t ts) {
JanetSysIRLinkage *linkage = sysir->linkage;
if (td == ts) return 0; /* trivial case */
JanetPrim primd = linkage->type_defs[td].prim;
JanetPrim prims = linkage->type_defs[ts].prim;
if (primd == prims) {
/* Pointer casting should be stricter than in C - for now, we
* allow casts as long as size and aligment are identical. Also
* no casting between arrays and pointers
*
* TODO - check array and pointer types have same alignment
switch (primd) {
default:
return -1;
case JANET_PRIM_ARRAY:
return tcheck_cast_type(sysir, linkage->type_defs[td].array.type, linkage->type_defs[ts].array.type);
case JANET_PRIM_POINTER:
return tcheck_cast_type(sysir, linkage->type_defs[td].pointer.type, linkage->type_defs[ts].pointer.type);
}
return 0;
*/
return -1; /* TODO */
}
/* Check that both src and dest are primitive numerics */
for (int i = 0; i < 2; i++) {
JanetPrim p = i ? prims : primd;
switch (p) {
default:
break;
case JANET_PRIM_STRUCT:
case JANET_PRIM_UNION:
case JANET_PRIM_UNKNOWN:
case JANET_PRIM_ARRAY:
case JANET_PRIM_POINTER:
return -1;
}
}
return 0;
}
static void tcheck_cast(JanetSysIR *sysir, uint32_t dest, uint32_t src) {
(void) sysir;
(void) dest;
(void) src;
/* TODO - casting rules */
uint32_t td = sysir->types[dest];
uint32_t ts = sysir->types[src];
int notok = tcheck_cast_type(sysir, td, ts);
if (notok) {
janet_panicf("type failure, %V cannot be cast to %V",
tname(sysir, ts),
tname(sysir, td));
}
}
static void tcheck_constant(JanetSysIR *sysir, uint32_t dest, Janet c) {
@ -1239,7 +1289,6 @@ static void emit_binop(JanetSysIR *ir, JanetBuffer *buffer, JanetBuffer *tempbuf
/* Add nested for loops for any dimensionality of array */
while (linkage->type_defs[operand_type].prim == JANET_PRIM_ARRAY) {
/* TODO - handle fixed_count == SIZE_MAX */
janet_formatb(buffer, "for (size_t _j%u = 0; _j%u < %u; _j%u++) ",
index_index, index_index,
linkage->type_defs[operand_type].array.fixed_count,
@ -1277,7 +1326,7 @@ void janet_sys_ir_lower_to_c(JanetSysIRLinkage *linkage, JanetBuffer *buffer) {
#define EMITBINOP(OP) emit_binop(ir, buffer, tempbuf, instruction, OP)
/* Prelude */
janet_formatb(buffer, "#include <stdint.h>\n\n");
janet_formatb(buffer, "#include <stddef.h>\n\n");
/* Emit type defs */
for (uint32_t j = 0; j < (uint32_t) linkage->ir_ordered->count; j++) {
@ -1457,9 +1506,11 @@ void janet_sys_ir_lower_to_c(JanetSysIRLinkage *linkage, JanetBuffer *buffer) {
}
janet_formatb(buffer, ");\n");
break;
case JANET_SYSOP_CAST:
janet_formatb(buffer, "_r%u = (_t%u) _r%u;\n", instruction.two.dest, ir->types[instruction.two.dest], instruction.two.src);
case JANET_SYSOP_CAST: {
uint32_t to = ir->types[instruction.two.dest];
janet_formatb(buffer, "_r%u = (_t%u) (_r%u);\n", instruction.two.dest, to, instruction.two.src);
break;
}
case JANET_SYSOP_MOVE:
janet_formatb(buffer, "_r%u = _r%u;\n", instruction.two.dest, instruction.two.src);
break;
@ -1517,7 +1568,6 @@ static int sysir_gcmark(void *p, size_t s) {
return 0;
}
static int sysir_context_gc(void *p, size_t s) {
JanetSysIRLinkage *linkage = (JanetSysIRLinkage *)p;
(void) s;