From e1b1a52921a00d906bb071d0f4f2c219fbd37cd7 Mon Sep 17 00:00:00 2001 From: matt Date: Fri, 1 Apr 2022 23:42:22 +0800 Subject: [PATCH] refactor tokenize function --- token.c | 195 ++++++++++++++++++++++++++++++-------------------------- 1 file changed, 103 insertions(+), 92 deletions(-) diff --git a/token.c b/token.c index 8250bed..bf3760a 100644 --- a/token.c +++ b/token.c @@ -120,6 +120,103 @@ getcharfromstr(struct tokenstate *ts) } } +static inline void +tokenizenum(struct token *token, struct tokenstate *ts) +{ + int64_t num = ts->look - '0'; + + getch(ts); + while (isdigit(ts->look)) { + num *= 10; + num += ts->look - '0'; + getch(ts); + } + + token->type = NUM; + token->val.num = num; + +} + +/* tokenize a variable or keyword */ +static inline void +tokenizevarkw(struct token *token, struct tokenstate *ts) +{ + /* NOTE: maximum var size is 32, maybe change? */ + int i; + char *buf = malloc(32); + if (buf == NULL) { + perror("woody lexer"); + exit(EXIT_FAILURE); + } + + /* read the var/kw */ + for (int i = 0; i < 32 && isalpha(ts->look) + && isdigit(ts->look); i++) { + buf[i] = ts->look; + getch(ts); + } + buf[i] = '\0'; + + for (i = 0; i < ENDKWTYPE; i++) { + if (strcmp(buf, keywords[i]) == 0) { + /* keyword */ + token->type = KW; + token->val.kw = i; + } + } + + /* variable */ + if (i == ENDKWTYPE) { + token->type = VAR; + token->val.var = buf; + } +} + +static inline void +tokenizestring(struct token *token, struct tokenstate *ts) +{ + int i = 0; + size_t bufsz = 512; + char *buf; + + getch(ts); + if (ts->look == '"') { + /* empty string is illegal */ + fprintf(stderr, + "%s:%i:%i: empty string" + " is illegal\n", + ts->filename, ts->line, + ts->col); + exit(EXIT_FAILURE); + } + + buf = malloc(bufsz); + if (buf == NULL) { + perror("woody lexer"); + exit(EXIT_FAILURE); + } + +read_string: + for (; i < bufsz && ts->look != '"' + && ts->look != '\0'; i++) + buf[i] = getcharfromstr(ts); + + if (ts->look != '"' && ts->look != '\0') { + /* buf too small */ + buf = realloc(buf, bufsz + 127); + if (buf == NULL) { + perror("woody lexer"); + exit(EXIT_FAILURE); + } + + goto read_string; + } + + buf[i] = '\0'; + + token->type = STRING; + token->val.string = buf; +} /** * This function loops through the given file and tokenizes it. @@ -151,98 +248,12 @@ tokenize(struct tokenstate *ts) token->line = ts->line; token->pos = ts->pos; - if (isdigit(ts->look)) { - /* number */ - int64_t num = ts->look - '0'; - - getch(ts); - while (isdigit(ts->look)) { - num *= 10; - num += ts->look - '0'; - getch(ts); - } - - token->type = NUM; - token->val.num = num; - } - else if (isalpha(ts->look)) { - /* variable or keyword */ - /* NOTE: maximum var size is 32, maybe change? */ - int i; - char *buf = malloc(32); - if (buf == NULL) { - perror("woody lexer"); - exit(EXIT_FAILURE); - } - - /* read the var/kw */ - for (int i = 0; i < 32 && isalpha(ts->look) - && isdigit(ts->look); i++) { - buf[i] = ts->look; - getch(ts); - } - buf[i] = '\0'; - - for (i = 0; i < ENDKWTYPE; i++) { - if (strcmp(buf, keywords[i]) == 0) { - /* keyword */ - token->type = KW; - token->val.kw = i; - } - } - - /* variable */ - if (i == ENDKWTYPE) { - token->type = VAR; - token->val.var = buf; - } - - } - else if (ts->look == '"') { - /* string */ - int i; - size_t bufsz = 512; - char *buf; - - getch(ts); - if (ts->look == '"') { - /* empty string is illegal */ - fprintf(stderr, - "%s:%i:%i: empty string" - " is illegal\n", - ts->filename, ts->line, - ts->col); - exit(EXIT_FAILURE); - } - - buf = malloc(bufsz); - if (buf == NULL) { - perror("woody lexer"); - exit(EXIT_FAILURE); - } - -read_string: - for (i = 0; i < bufsz && ts->look != '"' - && ts->look != '\0'; i++) - buf[i] = getcharfromstr(ts); - - if (ts->look != '"' && ts->look != '\0') { - /* buf too small */ - buf = realloc(buf, bufsz + 127); - if (buf == NULL) { - perror("woody lexer"); - exit(EXIT_FAILURE); - } - - goto read_string; - } - - buf[i] = '\0'; - - token->type = STRING; - token->val.string = buf; - - } + if (isdigit(ts->look)) + tokenizenum(token, ts); + else if (isalpha(ts->look)) + tokenizevarkw(token, ts); + else if (ts->look == '"') + tokenizestring(token, ts); else { /* something else, probably an operator */ /* don't bother to check here, it's not worth it */