1
0
mirror of https://github.com/janet-lang/janet synced 2024-12-25 07:50:27 +00:00

Add long string syntax to dst.

This commit is contained in:
Calvin Rose 2018-05-06 13:28:09 -04:00
parent 181a38f412
commit b31791200b
7 changed files with 289 additions and 242 deletions

View File

@ -82,22 +82,23 @@ as well as constants referenced by the function.
## C Functions
Dst uses c functions to bridge to native code. A C function
Dst uses C functions to bridge to native code. A C function
(`DstCFunction *` in C) is a C function pointer that can be called like
a normal dst closure. From the perspective of the bytecode instruction set, there is no difference
in invoking a c function and invoking a normal dst function.
in invoking a C function and invoking a normal dst function.
## Bytecode Format
Dst bytecode presents an interface to virtual machine with a large number
Dst bytecode presents an interface to a virtual machine with a large number
of identical registers that can hold any Dst value (`Dst *` in C). Most instructions
have a destination register, and 1 or 2 source register. Registers are simply
named with positive integers.
Each instruction is a 32 bit integer, meaning that the instruction set is a constant
width instruction set like MIPS. The opcode of each instruction is the least significant
byte of the instruction. This means there are 256 possible opcodes, but half of those
are reserved, so 128 possible opcodes. The current implementation uses about half of these.
byte of the instruction. The highest bit of
this leading byte is reserved for debugging purpose, so there are 128 possible opcodes encodable
with this scheme. The current implementation uses about half of these possible opcodes.
```
X - Payload bits
@ -114,7 +115,7 @@ There are a few instruction variants that divide these payload bits.
* 0 arg - Used for noops, returning nil, or other instructions that take no
arguments. The payload is essentially ignored.
* 1 arg - All payload bits correspond to a single value, usually a signed or a signed integer/
* 1 arg - All payload bits correspond to a single value, usually a signed or unsigned integer.
Used for instructions of 1 argument, like returning a value, yielding a value to the parent fiber,
or doing a jump.
* 2 arg - Payload is split into byte 2 and bytes 3 and 4.
@ -139,13 +140,13 @@ Each instruction is also listed with a signature, which are the arguments the in
expects. There are a handful of instruction signatures, which combine the arity and type
of the instruction. The assembler does not
do any typechecking per closure, but does prevent jumping to invalid instructions and
failiure to return or error.
failure to return or error.
### Notation
* The $ prefix indicates that a instruction parameter is acting as a virtual register (slot).
If a parameter does not have the $ suffix in the description, it is acting as some kind
of literal (usually an unisgned integer for indexes, and a signed integer for literal integers).
of literal (usually an unsigned integer for indexes, and a signed integer for literal integers).
* Some operators in the description have the suffix 'i' or 'r'. These indicate
that these operators correspond to integers or real numbers only, respectively. All

76
natives/hello/.gitignore vendored Normal file
View File

@ -0,0 +1,76 @@
# Created by https://www.gitignore.io/api/c
### C ###
# Prerequisites
*.d
# Object files
*.o
*.ko
*.obj
*.elf
# Linker output
*.ilk
*.map
*.exp
# Precompiled Headers
*.gch
*.pch
# Libraries
*.lib
*.a
*.la
*.lo
# Shared objects (inc. Windows DLLs)
*.dll
*.so
*.so.*
*.dylib
# Executables
*.exe
*.out
*.app
*.i*86
*.x86_64
*.hex
# Debug files
*.dSYM/
*.su
*.idb
*.pdb
# Kernel Module Compile Results
*.mod*
*.cmd
.tmp_versions/
modules.order
Module.symvers
Mkfile.old
dkms.conf
# End of https://www.gitignore.io/api/c
# Created by https://www.gitignore.io/api/cmake
### CMake ###
CMakeCache.txt
CMakeFiles
CMakeScripts
Testing
Makefile
cmake_install.cmake
install_manifest.txt
compile_commands.json
CTestTestfile.cmake
build
# End of https://www.gitignore.io/api/cmake

View File

@ -1,178 +0,0 @@
# CMAKE generated file: DO NOT EDIT!
# Generated by "Unix Makefiles" Generator, CMake Version 3.9
# Default target executed when no arguments are given to make.
default_target: all
.PHONY : default_target
# Allow only one "make -f Makefile2" at a time, but pass parallelism.
.NOTPARALLEL:
#=============================================================================
# Special targets provided by cmake.
# Disable implicit rules so canonical targets will work.
.SUFFIXES:
# Remove some rules from gmake that .SUFFIXES does not remove.
SUFFIXES =
.SUFFIXES: .hpux_make_needs_suffix_list
# Suppress display of executed commands.
$(VERBOSE).SILENT:
# A target that is always out of date.
cmake_force:
.PHONY : cmake_force
#=============================================================================
# Set environment variables for the build.
# The shell in which to execute make rules.
SHELL = /bin/sh
# The CMake executable.
CMAKE_COMMAND = /usr/bin/cmake
# The command to remove a file.
RM = /usr/bin/cmake -E remove -f
# Escaping for special characters.
EQUALS = =
# The top-level source directory on which CMake was run.
CMAKE_SOURCE_DIR = /home/calvin/code/dst/natives/hello
# The top-level build directory on which CMake was run.
CMAKE_BINARY_DIR = /home/calvin/code/dst/natives/hello/Build
#=============================================================================
# Targets provided globally by CMake.
# Special rule for the target rebuild_cache
rebuild_cache:
@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Running CMake to regenerate build system..."
/usr/bin/cmake -H$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR)
.PHONY : rebuild_cache
# Special rule for the target rebuild_cache
rebuild_cache/fast: rebuild_cache
.PHONY : rebuild_cache/fast
# Special rule for the target edit_cache
edit_cache:
@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "No interactive CMake dialog available..."
/usr/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available.
.PHONY : edit_cache
# Special rule for the target edit_cache
edit_cache/fast: edit_cache
.PHONY : edit_cache/fast
# The main all target
all: cmake_check_build_system
$(CMAKE_COMMAND) -E cmake_progress_start /home/calvin/code/dst/natives/hello/Build/CMakeFiles /home/calvin/code/dst/natives/hello/Build/CMakeFiles/progress.marks
$(MAKE) -f CMakeFiles/Makefile2 all
$(CMAKE_COMMAND) -E cmake_progress_start /home/calvin/code/dst/natives/hello/Build/CMakeFiles 0
.PHONY : all
# The main clean target
clean:
$(MAKE) -f CMakeFiles/Makefile2 clean
.PHONY : clean
# The main clean target
clean/fast: clean
.PHONY : clean/fast
# Prepare targets for installation.
preinstall: all
$(MAKE) -f CMakeFiles/Makefile2 preinstall
.PHONY : preinstall
# Prepare targets for installation.
preinstall/fast:
$(MAKE) -f CMakeFiles/Makefile2 preinstall
.PHONY : preinstall/fast
# clear depends
depend:
$(CMAKE_COMMAND) -H$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 1
.PHONY : depend
#=============================================================================
# Target rules for targets named hello
# Build rule for target.
hello: cmake_check_build_system
$(MAKE) -f CMakeFiles/Makefile2 hello
.PHONY : hello
# fast build rule for target.
hello/fast:
$(MAKE) -f CMakeFiles/hello.dir/build.make CMakeFiles/hello.dir/build
.PHONY : hello/fast
main.o: main.c.o
.PHONY : main.o
# target to build an object file
main.c.o:
$(MAKE) -f CMakeFiles/hello.dir/build.make CMakeFiles/hello.dir/main.c.o
.PHONY : main.c.o
main.i: main.c.i
.PHONY : main.i
# target to preprocess a source file
main.c.i:
$(MAKE) -f CMakeFiles/hello.dir/build.make CMakeFiles/hello.dir/main.c.i
.PHONY : main.c.i
main.s: main.c.s
.PHONY : main.s
# target to generate assembly for a file
main.c.s:
$(MAKE) -f CMakeFiles/hello.dir/build.make CMakeFiles/hello.dir/main.c.s
.PHONY : main.c.s
# Help Target
help:
@echo "The following are some of the valid targets for this Makefile:"
@echo "... all (the default if no target is provided)"
@echo "... clean"
@echo "... depend"
@echo "... rebuild_cache"
@echo "... hello"
@echo "... edit_cache"
@echo "... main.o"
@echo "... main.i"
@echo "... main.s"
.PHONY : help
#=============================================================================
# Special targets to cleanup operation of make.
# Special rule to run CMake to check the build system integrity.
# No rule that depends on this can have commands that come from listfiles
# because they might be regenerated.
cmake_check_build_system:
$(CMAKE_COMMAND) -H$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 0
.PHONY : cmake_check_build_system

View File

@ -15,3 +15,62 @@ build
# End of https://www.gitignore.io/api/cmake
# Created by https://www.gitignore.io/api/c
### C ###
# Prerequisites
*.d
# Object files
*.o
*.ko
*.obj
*.elf
# Linker output
*.ilk
*.map
*.exp
# Precompiled Headers
*.gch
*.pch
# Libraries
*.lib
*.a
*.la
*.lo
# Shared objects (inc. Windows DLLs)
*.dll
*.so
*.so.*
*.dylib
# Executables
*.exe
*.out
*.app
*.i*86
*.x86_64
*.hex
# Debug files
*.dSYM/
*.su
*.idb
*.pdb
# Kernel Module Compile Results
*.mod*
*.cmd
.tmp_versions/
modules.order
Module.symvers
Mkfile.old
dkms.conf
# End of https://www.gitignore.io/api/c

View File

@ -20,6 +20,10 @@
* IN THE SOFTWARE.
*/
#ifdef ONEFILE
#include "sqlite3.c"
#endif
#include "sqlite3.h"
#include <dst/dst.h>

View File

@ -44,51 +44,14 @@ static int is_whitespace(uint8_t c) {
|| c == ',';
}
/* Code gen
printf("static uint32_t symchars[8] = {\n\t");
for (int i = 0; i < 256; i += 32) {
uint32_t block = 0;
for (int j = 0; j < 32; j++) {
block |= is_symbol_char_gen(i + j) << j;
}
printf("0x%08x%s", block, (i == (256 - 32)) ? "" : ", ");
}
printf("\n};\n");
static int is_symbol_char_gen(uint8_t c) {
if (c >= 'a' && c <= 'z') return 1;
if (c >= 'A' && c <= 'Z') return 1;
if (c >= '0' && c <= '9') return 1;
return (c == '!' ||
c == '$' ||
c == '%' ||
c == '&' ||
c == '*' ||
c == '+' ||
c == '-' ||
c == '.' ||
c == '/' ||
c == ':' ||
c == '<' ||
c == '?' ||
c == '=' ||
c == '>' ||
c == '@' ||
c == '\\' ||
c == '^' ||
c == '_' ||
c == '~' ||
c == '|');
}
The table contains 256 bits, where each bit is 1
if the corresponding ascci code is a symbol char, and 0
if not. The upper characters are also considered symbol
chars and are then checked for utf-8 compliance. */
/* Code generated by tools/symcharsgen.c.
* The table contains 256 bits, where each bit is 1
* if the corresponding ascci code is a symbol char, and 0
* if not. The upper characters are also considered symbol
* chars and are then checked for utf-8 compliance. */
static const uint32_t symchars[8] = {
0x00000000, 0xF7ffec72, 0xd7ffffff, 0x57fffffe,
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
0x00000000, 0xf7ffec72, 0xc7ffffff, 0x57fffffe,
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
};
/* Check if a character is a valid symbol character
@ -247,6 +210,20 @@ static int escape1(DstParser *p, DstParseState *state, uint8_t c) {
return 1;
}
static int stringend(DstParser *p, DstParseState *state) {
Dst ret;
if (state->flags & PFLAG_BUFFER) {
DstBuffer *b = dst_buffer(dst_v_count(p->buf));
dst_buffer_push_bytes(b, p->buf, dst_v_count(p->buf));
ret = dst_wrap_buffer(b);
} else {
ret = dst_wrap_string(dst_string(p->buf, dst_v_count(p->buf)));
}
dst_v_empty(p->buf);
popstate(p, ret);
return 1;
}
static int stringchar(DstParser *p, DstParseState *state, uint8_t c) {
/* Enter escape */
if (c == '\\') {
@ -255,18 +232,7 @@ static int stringchar(DstParser *p, DstParseState *state, uint8_t c) {
}
/* String end */
if (c == '"') {
/* String end */
Dst ret;
if (state->flags & PFLAG_BUFFER) {
DstBuffer *b = dst_buffer(dst_v_count(p->buf));
dst_buffer_push_bytes(b, p->buf, dst_v_count(p->buf));
ret = dst_wrap_buffer(b);
} else {
ret = dst_wrap_string(dst_string(p->buf, dst_v_count(p->buf)));
}
dst_v_empty(p->buf);
popstate(p, ret);
return 1;
return stringend(p, state);
}
/* normal char */
dst_v_push(p->buf, c);
@ -407,6 +373,55 @@ static int dotable(DstParser *p, DstParseState *state, uint8_t c) {
return root(p, state, c);
}
#define PFLAG_INSTRING 8
#define PFLAG_END_CANDIDATE 16
static int longstring(DstParser *p, DstParseState *state, uint8_t c) {
if (state->flags & PFLAG_INSTRING) {
/* We are inside the long string */
if (c == '\\') {
state->flags |= PFLAG_END_CANDIDATE;
state->flags &= ~PFLAG_INSTRING;
state->qcount = 0; /* Use qcount to keep track of number of '=' seen */
return 1;
}
dst_v_push(p->buf, c);
return 1;
} else if (state->flags & PFLAG_END_CANDIDATE) {
int i;
/* We are checking a potential end of the string */
if (c == '\\' && state->qcount == state->argn) {
return stringend(p, state);
}
if (c == '=' && state->qcount < state->argn) {
state->qcount++;
return 1;
}
/* Failed end candidate */
dst_v_push(p->buf, '\\');
for (i = 0; i < state->qcount; i++) {
dst_v_push(p->buf, '=');
}
dst_v_push(p->buf, c);
state->qcount = 0;
state->flags &= ~PFLAG_END_CANDIDATE;
state->flags |= PFLAG_INSTRING;
return 1;
} else {
/* We are at beginning of string */
switch (c) {
default:
p->error = "unexpected character in long string delimiter";
return 1;
case '\\':
state->flags |= PFLAG_INSTRING;
return 1;
case '=':
state->argn++;
return 1;
}
}
}
static int ampersand(DstParser *p, DstParseState *state, uint8_t c) {
(void) state;
dst_v_pop(p->states);
@ -417,6 +432,9 @@ static int ampersand(DstParser *p, DstParseState *state, uint8_t c) {
case '"':
pushstate(p, stringchar, PFLAG_BUFFER);
return 1;
case '\\':
pushstate(p, longstring, PFLAG_BUFFER);
return 1;
case '[':
pushstate(p, doarray, PFLAG_CONTAINER | PFLAG_SQRBRACKETS);
return 1;
@ -453,6 +471,9 @@ static int root(DstParser *p, DstParseState *state, uint8_t c) {
case '@':
pushstate(p, ampersand, 0);
return 1;
case '\\':
pushstate(p, longstring, 0);
return 1;
case ')':
case ']':
case '}':

64
src/tools/symcharsgen.c Normal file
View File

@ -0,0 +1,64 @@
/*
* Copyright (c) 2018 Calvin Rose
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <stdio.h>
#include <stdint.h>
static int is_symbol_char_gen(uint8_t c) {
if (c & 0x80) return 1;
if (c >= 'a' && c <= 'z') return 1;
if (c >= 'A' && c <= 'Z') return 1;
if (c >= '0' && c <= '9') return 1;
return (c == '!' ||
c == '$' ||
c == '%' ||
c == '&' ||
c == '*' ||
c == '+' ||
c == '-' ||
c == '.' ||
c == '/' ||
c == ':' ||
c == '<' ||
c == '?' ||
c == '=' ||
c == '>' ||
c == '@' ||
c == '^' ||
c == '_' ||
c == '~' ||
c == '|');
}
int main() {
printf("static const uint32_t symchars[8] = {\n ");
for (int i = 0; i < 256; i += 32) {
uint32_t block = 0;
for (int j = 0; j < 32; j++) {
block |= is_symbol_char_gen(i + j) << j;
}
printf("0x%08x%s", block, (i == (256 - 32)) ? "" : ", ");
}
printf("\n};\n");
return 0;
}