From b31791200bc53cd0ac7e6c75cc43625794e4828a Mon Sep 17 00:00:00 2001 From: Calvin Rose Date: Sun, 6 May 2018 13:28:09 -0400 Subject: [PATCH] Add long string syntax to dst. --- doc/bytecode.md | 17 ++-- natives/hello/.gitignore | 76 +++++++++++++++ natives/hello/Build/Makefile | 178 ----------------------------------- natives/sqlite3/.gitignore | 59 ++++++++++++ natives/sqlite3/main.c | 4 + src/parser/parse.c | 133 +++++++++++++++----------- src/tools/symcharsgen.c | 64 +++++++++++++ 7 files changed, 289 insertions(+), 242 deletions(-) create mode 100644 natives/hello/.gitignore delete mode 100644 natives/hello/Build/Makefile create mode 100644 src/tools/symcharsgen.c diff --git a/doc/bytecode.md b/doc/bytecode.md index 90261f88..ffe6e7e2 100644 --- a/doc/bytecode.md +++ b/doc/bytecode.md @@ -82,22 +82,23 @@ as well as constants referenced by the function. ## C Functions -Dst uses c functions to bridge to native code. A C function +Dst uses C functions to bridge to native code. A C function (`DstCFunction *` in C) is a C function pointer that can be called like a normal dst closure. From the perspective of the bytecode instruction set, there is no difference -in invoking a c function and invoking a normal dst function. +in invoking a C function and invoking a normal dst function. ## Bytecode Format -Dst bytecode presents an interface to virtual machine with a large number +Dst bytecode presents an interface to a virtual machine with a large number of identical registers that can hold any Dst value (`Dst *` in C). Most instructions have a destination register, and 1 or 2 source register. Registers are simply named with positive integers. Each instruction is a 32 bit integer, meaning that the instruction set is a constant width instruction set like MIPS. The opcode of each instruction is the least significant -byte of the instruction. This means there are 256 possible opcodes, but half of those -are reserved, so 128 possible opcodes. The current implementation uses about half of these. +byte of the instruction. The highest bit of +this leading byte is reserved for debugging purpose, so there are 128 possible opcodes encodable +with this scheme. The current implementation uses about half of these possible opcodes. ``` X - Payload bits @@ -114,7 +115,7 @@ There are a few instruction variants that divide these payload bits. * 0 arg - Used for noops, returning nil, or other instructions that take no arguments. The payload is essentially ignored. -* 1 arg - All payload bits correspond to a single value, usually a signed or a signed integer/ +* 1 arg - All payload bits correspond to a single value, usually a signed or unsigned integer. Used for instructions of 1 argument, like returning a value, yielding a value to the parent fiber, or doing a jump. * 2 arg - Payload is split into byte 2 and bytes 3 and 4. @@ -139,13 +140,13 @@ Each instruction is also listed with a signature, which are the arguments the in expects. There are a handful of instruction signatures, which combine the arity and type of the instruction. The assembler does not do any typechecking per closure, but does prevent jumping to invalid instructions and -failiure to return or error. +failure to return or error. ### Notation * The $ prefix indicates that a instruction parameter is acting as a virtual register (slot). If a parameter does not have the $ suffix in the description, it is acting as some kind - of literal (usually an unisgned integer for indexes, and a signed integer for literal integers). + of literal (usually an unsigned integer for indexes, and a signed integer for literal integers). * Some operators in the description have the suffix 'i' or 'r'. These indicate that these operators correspond to integers or real numbers only, respectively. All diff --git a/natives/hello/.gitignore b/natives/hello/.gitignore new file mode 100644 index 00000000..65e5b53d --- /dev/null +++ b/natives/hello/.gitignore @@ -0,0 +1,76 @@ + +# Created by https://www.gitignore.io/api/c + +### C ### +# Prerequisites +*.d + +# Object files +*.o +*.ko +*.obj +*.elf + +# Linker output +*.ilk +*.map +*.exp + +# Precompiled Headers +*.gch +*.pch + +# Libraries +*.lib +*.a +*.la +*.lo + +# Shared objects (inc. Windows DLLs) +*.dll +*.so +*.so.* +*.dylib + +# Executables +*.exe +*.out +*.app +*.i*86 +*.x86_64 +*.hex + +# Debug files +*.dSYM/ +*.su +*.idb +*.pdb + +# Kernel Module Compile Results +*.mod* +*.cmd +.tmp_versions/ +modules.order +Module.symvers +Mkfile.old +dkms.conf + + +# End of https://www.gitignore.io/api/c + +# Created by https://www.gitignore.io/api/cmake + +### CMake ### +CMakeCache.txt +CMakeFiles +CMakeScripts +Testing +Makefile +cmake_install.cmake +install_manifest.txt +compile_commands.json +CTestTestfile.cmake +build + + +# End of https://www.gitignore.io/api/cmake diff --git a/natives/hello/Build/Makefile b/natives/hello/Build/Makefile deleted file mode 100644 index 38639cab..00000000 --- a/natives/hello/Build/Makefile +++ /dev/null @@ -1,178 +0,0 @@ -# CMAKE generated file: DO NOT EDIT! -# Generated by "Unix Makefiles" Generator, CMake Version 3.9 - -# Default target executed when no arguments are given to make. -default_target: all - -.PHONY : default_target - -# Allow only one "make -f Makefile2" at a time, but pass parallelism. -.NOTPARALLEL: - - -#============================================================================= -# Special targets provided by cmake. - -# Disable implicit rules so canonical targets will work. -.SUFFIXES: - - -# Remove some rules from gmake that .SUFFIXES does not remove. -SUFFIXES = - -.SUFFIXES: .hpux_make_needs_suffix_list - - -# Suppress display of executed commands. -$(VERBOSE).SILENT: - - -# A target that is always out of date. -cmake_force: - -.PHONY : cmake_force - -#============================================================================= -# Set environment variables for the build. - -# The shell in which to execute make rules. -SHELL = /bin/sh - -# The CMake executable. -CMAKE_COMMAND = /usr/bin/cmake - -# The command to remove a file. -RM = /usr/bin/cmake -E remove -f - -# Escaping for special characters. -EQUALS = = - -# The top-level source directory on which CMake was run. -CMAKE_SOURCE_DIR = /home/calvin/code/dst/natives/hello - -# The top-level build directory on which CMake was run. -CMAKE_BINARY_DIR = /home/calvin/code/dst/natives/hello/Build - -#============================================================================= -# Targets provided globally by CMake. - -# Special rule for the target rebuild_cache -rebuild_cache: - @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Running CMake to regenerate build system..." - /usr/bin/cmake -H$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) -.PHONY : rebuild_cache - -# Special rule for the target rebuild_cache -rebuild_cache/fast: rebuild_cache - -.PHONY : rebuild_cache/fast - -# Special rule for the target edit_cache -edit_cache: - @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "No interactive CMake dialog available..." - /usr/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available. -.PHONY : edit_cache - -# Special rule for the target edit_cache -edit_cache/fast: edit_cache - -.PHONY : edit_cache/fast - -# The main all target -all: cmake_check_build_system - $(CMAKE_COMMAND) -E cmake_progress_start /home/calvin/code/dst/natives/hello/Build/CMakeFiles /home/calvin/code/dst/natives/hello/Build/CMakeFiles/progress.marks - $(MAKE) -f CMakeFiles/Makefile2 all - $(CMAKE_COMMAND) -E cmake_progress_start /home/calvin/code/dst/natives/hello/Build/CMakeFiles 0 -.PHONY : all - -# The main clean target -clean: - $(MAKE) -f CMakeFiles/Makefile2 clean -.PHONY : clean - -# The main clean target -clean/fast: clean - -.PHONY : clean/fast - -# Prepare targets for installation. -preinstall: all - $(MAKE) -f CMakeFiles/Makefile2 preinstall -.PHONY : preinstall - -# Prepare targets for installation. -preinstall/fast: - $(MAKE) -f CMakeFiles/Makefile2 preinstall -.PHONY : preinstall/fast - -# clear depends -depend: - $(CMAKE_COMMAND) -H$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 1 -.PHONY : depend - -#============================================================================= -# Target rules for targets named hello - -# Build rule for target. -hello: cmake_check_build_system - $(MAKE) -f CMakeFiles/Makefile2 hello -.PHONY : hello - -# fast build rule for target. -hello/fast: - $(MAKE) -f CMakeFiles/hello.dir/build.make CMakeFiles/hello.dir/build -.PHONY : hello/fast - -main.o: main.c.o - -.PHONY : main.o - -# target to build an object file -main.c.o: - $(MAKE) -f CMakeFiles/hello.dir/build.make CMakeFiles/hello.dir/main.c.o -.PHONY : main.c.o - -main.i: main.c.i - -.PHONY : main.i - -# target to preprocess a source file -main.c.i: - $(MAKE) -f CMakeFiles/hello.dir/build.make CMakeFiles/hello.dir/main.c.i -.PHONY : main.c.i - -main.s: main.c.s - -.PHONY : main.s - -# target to generate assembly for a file -main.c.s: - $(MAKE) -f CMakeFiles/hello.dir/build.make CMakeFiles/hello.dir/main.c.s -.PHONY : main.c.s - -# Help Target -help: - @echo "The following are some of the valid targets for this Makefile:" - @echo "... all (the default if no target is provided)" - @echo "... clean" - @echo "... depend" - @echo "... rebuild_cache" - @echo "... hello" - @echo "... edit_cache" - @echo "... main.o" - @echo "... main.i" - @echo "... main.s" -.PHONY : help - - - -#============================================================================= -# Special targets to cleanup operation of make. - -# Special rule to run CMake to check the build system integrity. -# No rule that depends on this can have commands that come from listfiles -# because they might be regenerated. -cmake_check_build_system: - $(CMAKE_COMMAND) -H$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 0 -.PHONY : cmake_check_build_system - diff --git a/natives/sqlite3/.gitignore b/natives/sqlite3/.gitignore index d5e61ece..be614b55 100644 --- a/natives/sqlite3/.gitignore +++ b/natives/sqlite3/.gitignore @@ -15,3 +15,62 @@ build # End of https://www.gitignore.io/api/cmake + +# Created by https://www.gitignore.io/api/c + +### C ### +# Prerequisites +*.d + +# Object files +*.o +*.ko +*.obj +*.elf + +# Linker output +*.ilk +*.map +*.exp + +# Precompiled Headers +*.gch +*.pch + +# Libraries +*.lib +*.a +*.la +*.lo + +# Shared objects (inc. Windows DLLs) +*.dll +*.so +*.so.* +*.dylib + +# Executables +*.exe +*.out +*.app +*.i*86 +*.x86_64 +*.hex + +# Debug files +*.dSYM/ +*.su +*.idb +*.pdb + +# Kernel Module Compile Results +*.mod* +*.cmd +.tmp_versions/ +modules.order +Module.symvers +Mkfile.old +dkms.conf + + +# End of https://www.gitignore.io/api/c diff --git a/natives/sqlite3/main.c b/natives/sqlite3/main.c index 1486b24c..9594cc94 100644 --- a/natives/sqlite3/main.c +++ b/natives/sqlite3/main.c @@ -20,6 +20,10 @@ * IN THE SOFTWARE. */ +#ifdef ONEFILE +#include "sqlite3.c" +#endif + #include "sqlite3.h" #include diff --git a/src/parser/parse.c b/src/parser/parse.c index 244fc199..1989a8a0 100644 --- a/src/parser/parse.c +++ b/src/parser/parse.c @@ -44,51 +44,14 @@ static int is_whitespace(uint8_t c) { || c == ','; } -/* Code gen - -printf("static uint32_t symchars[8] = {\n\t"); -for (int i = 0; i < 256; i += 32) { - uint32_t block = 0; - for (int j = 0; j < 32; j++) { - block |= is_symbol_char_gen(i + j) << j; - } - printf("0x%08x%s", block, (i == (256 - 32)) ? "" : ", "); -} -printf("\n};\n"); - -static int is_symbol_char_gen(uint8_t c) { - if (c >= 'a' && c <= 'z') return 1; - if (c >= 'A' && c <= 'Z') return 1; - if (c >= '0' && c <= '9') return 1; - return (c == '!' || - c == '$' || - c == '%' || - c == '&' || - c == '*' || - c == '+' || - c == '-' || - c == '.' || - c == '/' || - c == ':' || - c == '<' || - c == '?' || - c == '=' || - c == '>' || - c == '@' || - c == '\\' || - c == '^' || - c == '_' || - c == '~' || - c == '|'); -} - -The table contains 256 bits, where each bit is 1 -if the corresponding ascci code is a symbol char, and 0 -if not. The upper characters are also considered symbol -chars and are then checked for utf-8 compliance. */ +/* Code generated by tools/symcharsgen.c. + * The table contains 256 bits, where each bit is 1 + * if the corresponding ascci code is a symbol char, and 0 + * if not. The upper characters are also considered symbol + * chars and are then checked for utf-8 compliance. */ static const uint32_t symchars[8] = { - 0x00000000, 0xF7ffec72, 0xd7ffffff, 0x57fffffe, - 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff + 0x00000000, 0xf7ffec72, 0xc7ffffff, 0x57fffffe, + 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }; /* Check if a character is a valid symbol character @@ -247,6 +210,20 @@ static int escape1(DstParser *p, DstParseState *state, uint8_t c) { return 1; } +static int stringend(DstParser *p, DstParseState *state) { + Dst ret; + if (state->flags & PFLAG_BUFFER) { + DstBuffer *b = dst_buffer(dst_v_count(p->buf)); + dst_buffer_push_bytes(b, p->buf, dst_v_count(p->buf)); + ret = dst_wrap_buffer(b); + } else { + ret = dst_wrap_string(dst_string(p->buf, dst_v_count(p->buf))); + } + dst_v_empty(p->buf); + popstate(p, ret); + return 1; +} + static int stringchar(DstParser *p, DstParseState *state, uint8_t c) { /* Enter escape */ if (c == '\\') { @@ -255,18 +232,7 @@ static int stringchar(DstParser *p, DstParseState *state, uint8_t c) { } /* String end */ if (c == '"') { - /* String end */ - Dst ret; - if (state->flags & PFLAG_BUFFER) { - DstBuffer *b = dst_buffer(dst_v_count(p->buf)); - dst_buffer_push_bytes(b, p->buf, dst_v_count(p->buf)); - ret = dst_wrap_buffer(b); - } else { - ret = dst_wrap_string(dst_string(p->buf, dst_v_count(p->buf))); - } - dst_v_empty(p->buf); - popstate(p, ret); - return 1; + return stringend(p, state); } /* normal char */ dst_v_push(p->buf, c); @@ -407,6 +373,55 @@ static int dotable(DstParser *p, DstParseState *state, uint8_t c) { return root(p, state, c); } +#define PFLAG_INSTRING 8 +#define PFLAG_END_CANDIDATE 16 +static int longstring(DstParser *p, DstParseState *state, uint8_t c) { + if (state->flags & PFLAG_INSTRING) { + /* We are inside the long string */ + if (c == '\\') { + state->flags |= PFLAG_END_CANDIDATE; + state->flags &= ~PFLAG_INSTRING; + state->qcount = 0; /* Use qcount to keep track of number of '=' seen */ + return 1; + } + dst_v_push(p->buf, c); + return 1; + } else if (state->flags & PFLAG_END_CANDIDATE) { + int i; + /* We are checking a potential end of the string */ + if (c == '\\' && state->qcount == state->argn) { + return stringend(p, state); + } + if (c == '=' && state->qcount < state->argn) { + state->qcount++; + return 1; + } + /* Failed end candidate */ + dst_v_push(p->buf, '\\'); + for (i = 0; i < state->qcount; i++) { + dst_v_push(p->buf, '='); + } + dst_v_push(p->buf, c); + state->qcount = 0; + state->flags &= ~PFLAG_END_CANDIDATE; + state->flags |= PFLAG_INSTRING; + return 1; + } else { + /* We are at beginning of string */ + switch (c) { + default: + p->error = "unexpected character in long string delimiter"; + return 1; + case '\\': + state->flags |= PFLAG_INSTRING; + return 1; + case '=': + state->argn++; + return 1; + } + } +} + static int ampersand(DstParser *p, DstParseState *state, uint8_t c) { (void) state; dst_v_pop(p->states); @@ -417,6 +432,9 @@ static int ampersand(DstParser *p, DstParseState *state, uint8_t c) { case '"': pushstate(p, stringchar, PFLAG_BUFFER); return 1; + case '\\': + pushstate(p, longstring, PFLAG_BUFFER); + return 1; case '[': pushstate(p, doarray, PFLAG_CONTAINER | PFLAG_SQRBRACKETS); return 1; @@ -453,6 +471,9 @@ static int root(DstParser *p, DstParseState *state, uint8_t c) { case '@': pushstate(p, ampersand, 0); return 1; + case '\\': + pushstate(p, longstring, 0); + return 1; case ')': case ']': case '}': diff --git a/src/tools/symcharsgen.c b/src/tools/symcharsgen.c new file mode 100644 index 00000000..f436905e --- /dev/null +++ b/src/tools/symcharsgen.c @@ -0,0 +1,64 @@ +/* +* Copyright (c) 2018 Calvin Rose +* +* Permission is hereby granted, free of charge, to any person obtaining a copy +* of this software and associated documentation files (the "Software"), to +* deal in the Software without restriction, including without limitation the +* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +* sell copies of the Software, and to permit persons to whom the Software is +* furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be included in +* all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +* IN THE SOFTWARE. +*/ + +#include +#include + +static int is_symbol_char_gen(uint8_t c) { + if (c & 0x80) return 1; + if (c >= 'a' && c <= 'z') return 1; + if (c >= 'A' && c <= 'Z') return 1; + if (c >= '0' && c <= '9') return 1; + return (c == '!' || + c == '$' || + c == '%' || + c == '&' || + c == '*' || + c == '+' || + c == '-' || + c == '.' || + c == '/' || + c == ':' || + c == '<' || + c == '?' || + c == '=' || + c == '>' || + c == '@' || + c == '^' || + c == '_' || + c == '~' || + c == '|'); +} + +int main() { + printf("static const uint32_t symchars[8] = {\n "); + for (int i = 0; i < 256; i += 32) { + uint32_t block = 0; + for (int j = 0; j < 32; j++) { + block |= is_symbol_char_gen(i + j) << j; + } + printf("0x%08x%s", block, (i == (256 - 32)) ? "" : ", "); + } + printf("\n};\n"); + return 0; +} +