refactor tokenize function

This commit is contained in:
matt 2022-04-01 23:42:22 +08:00
parent efca2f5483
commit e1b1a52921
1 changed files with 103 additions and 92 deletions

195
token.c
View File

@ -120,6 +120,103 @@ getcharfromstr(struct tokenstate *ts)
}
}
static inline void
tokenizenum(struct token *token, struct tokenstate *ts)
{
int64_t num = ts->look - '0';
getch(ts);
while (isdigit(ts->look)) {
num *= 10;
num += ts->look - '0';
getch(ts);
}
token->type = NUM;
token->val.num = num;
}
/* tokenize a variable or keyword */
static inline void
tokenizevarkw(struct token *token, struct tokenstate *ts)
{
/* NOTE: maximum var size is 32, maybe change? */
int i;
char *buf = malloc(32);
if (buf == NULL) {
perror("woody lexer");
exit(EXIT_FAILURE);
}
/* read the var/kw */
for (int i = 0; i < 32 && isalpha(ts->look)
&& isdigit(ts->look); i++) {
buf[i] = ts->look;
getch(ts);
}
buf[i] = '\0';
for (i = 0; i < ENDKWTYPE; i++) {
if (strcmp(buf, keywords[i]) == 0) {
/* keyword */
token->type = KW;
token->val.kw = i;
}
}
/* variable */
if (i == ENDKWTYPE) {
token->type = VAR;
token->val.var = buf;
}
}
static inline void
tokenizestring(struct token *token, struct tokenstate *ts)
{
int i = 0;
size_t bufsz = 512;
char *buf;
getch(ts);
if (ts->look == '"') {
/* empty string is illegal */
fprintf(stderr,
"%s:%i:%i: empty string"
" is illegal\n",
ts->filename, ts->line,
ts->col);
exit(EXIT_FAILURE);
}
buf = malloc(bufsz);
if (buf == NULL) {
perror("woody lexer");
exit(EXIT_FAILURE);
}
read_string:
for (; i < bufsz && ts->look != '"'
&& ts->look != '\0'; i++)
buf[i] = getcharfromstr(ts);
if (ts->look != '"' && ts->look != '\0') {
/* buf too small */
buf = realloc(buf, bufsz + 127);
if (buf == NULL) {
perror("woody lexer");
exit(EXIT_FAILURE);
}
goto read_string;
}
buf[i] = '\0';
token->type = STRING;
token->val.string = buf;
}
/**
* This function loops through the given file and tokenizes it.
@ -151,98 +248,12 @@ tokenize(struct tokenstate *ts)
token->line = ts->line;
token->pos = ts->pos;
if (isdigit(ts->look)) {
/* number */
int64_t num = ts->look - '0';
getch(ts);
while (isdigit(ts->look)) {
num *= 10;
num += ts->look - '0';
getch(ts);
}
token->type = NUM;
token->val.num = num;
}
else if (isalpha(ts->look)) {
/* variable or keyword */
/* NOTE: maximum var size is 32, maybe change? */
int i;
char *buf = malloc(32);
if (buf == NULL) {
perror("woody lexer");
exit(EXIT_FAILURE);
}
/* read the var/kw */
for (int i = 0; i < 32 && isalpha(ts->look)
&& isdigit(ts->look); i++) {
buf[i] = ts->look;
getch(ts);
}
buf[i] = '\0';
for (i = 0; i < ENDKWTYPE; i++) {
if (strcmp(buf, keywords[i]) == 0) {
/* keyword */
token->type = KW;
token->val.kw = i;
}
}
/* variable */
if (i == ENDKWTYPE) {
token->type = VAR;
token->val.var = buf;
}
}
else if (ts->look == '"') {
/* string */
int i;
size_t bufsz = 512;
char *buf;
getch(ts);
if (ts->look == '"') {
/* empty string is illegal */
fprintf(stderr,
"%s:%i:%i: empty string"
" is illegal\n",
ts->filename, ts->line,
ts->col);
exit(EXIT_FAILURE);
}
buf = malloc(bufsz);
if (buf == NULL) {
perror("woody lexer");
exit(EXIT_FAILURE);
}
read_string:
for (i = 0; i < bufsz && ts->look != '"'
&& ts->look != '\0'; i++)
buf[i] = getcharfromstr(ts);
if (ts->look != '"' && ts->look != '\0') {
/* buf too small */
buf = realloc(buf, bufsz + 127);
if (buf == NULL) {
perror("woody lexer");
exit(EXIT_FAILURE);
}
goto read_string;
}
buf[i] = '\0';
token->type = STRING;
token->val.string = buf;
}
if (isdigit(ts->look))
tokenizenum(token, ts);
else if (isalpha(ts->look))
tokenizevarkw(token, ts);
else if (ts->look == '"')
tokenizestring(token, ts);
else {
/* something else, probably an operator */
/* don't bother to check here, it's not worth it */