diff options
-rw-r--r-- | include/lexer/char_info.h | 17 | ||||
-rw-r--r-- | include/lexer/token.h | 11 | ||||
-rw-r--r-- | src/lexer/char_info.c | 16 | ||||
-rw-r--r-- | src/lexer/lexer.c | 17 |
4 files changed, 50 insertions, 11 deletions
diff --git a/include/lexer/char_info.h b/include/lexer/char_info.h index 59db2be..d9205b0 100644 --- a/include/lexer/char_info.h +++ b/include/lexer/char_info.h @@ -31,6 +31,7 @@ #define _LEXER_CHAR_INFO_H 1 #include <stdint.h> +#include "lexer/token.h" #define CHAR_HORZ_WS (1 << 0) #define CHAR_VERT_WS (1 << 1) @@ -38,6 +39,7 @@ #define CHAR_XDIGIT (1 << 3) #define CHAR_UPPER (1 << 4) #define CHAR_LOWER (1 << 5) +#define CHAR_SINGLE (1 << 6) #define CHAR_HEX (CHAR_DIGIT | CHAR_XDIGIT) #define CHAR_XUPPER (CHAR_XDIGIT | CHAR_UPPER) @@ -46,8 +48,21 @@ #define CHAR_ALPHA (CHAR_UPPER | CHAR_LOWER) #define CHAR_ALNUM (CHAR_ALPHA | CHAR_DIGIT) +#define CHAR_SINGLE_SHIFT 8 +#define MAKE_SINGLE(type) ((type << CHAR_SINGLE_SHIFT) | CHAR_SINGLE) +#define CHAR_COMMA MAKE_SINGLE(TOK_COMMA) +#define CHAR_DOT MAKE_SINGLE(TOK_DOT) +#define CHAR_COLON MAKE_SINGLE(TOK_COLON) +#define CHAR_SEMI MAKE_SINGLE(TOK_SEMICOLON) +#define CHAR_LPAREN MAKE_SINGLE(TOK_LPAREN) +#define CHAR_RPAREN MAKE_SINGLE(TOK_RPAREN) +#define CHAR_LCURLY MAKE_SINGLE(TOK_LCURLY) +#define CHAR_RCURLY MAKE_SINGLE(TOK_RCURLY) +#define CHAR_LSQUARE MAKE_SINGLE(TOK_LSQUARE) +#define CHAR_RSQUARE MAKE_SINGLE(TOK_RSQUARE) + #define CHAR_INFO_COUNT 256 -extern uint8_t char_info[CHAR_INFO_COUNT]; +extern uint16_t char_info[CHAR_INFO_COUNT]; #endif /* !_LEXER_CHAR_INFO_H */ diff --git a/include/lexer/token.h b/include/lexer/token.h index ab8231d..f51b5e9 100644 --- a/include/lexer/token.h +++ b/include/lexer/token.h @@ -39,6 +39,17 @@ enum token_kind { TOK_IDENTIFIER, + TOK_COMMA, + TOK_DOT, + TOK_COLON, + TOK_SEMICOLON, + TOK_LPAREN, + TOK_RPAREN, + TOK_LCURLY, + TOK_RCURLY, + TOK_LSQUARE, + TOK_RSQUARE, + /* * Do not modify this ordering without updating parser/types.c */ diff --git a/src/lexer/char_info.c b/src/lexer/char_info.c index d66c651..144bbe7 100644 --- a/src/lexer/char_info.c +++ b/src/lexer/char_info.c @@ -29,7 +29,7 @@ #include "lexer/char_info.h" -uint8_t char_info[CHAR_INFO_COUNT] = { +uint16_t char_info[CHAR_INFO_COUNT] = { /* NUL SOH STX ETX EOT ENQ ACK BEL @@ -60,8 +60,8 @@ uint8_t char_info[CHAR_INFO_COUNT] = { */ CHAR_HORZ_WS, 0 , 0 , 0 , 0 , 0 , 0 , 0 , - 0 , 0 , 0 , 0 , - 0 , 0 , 0 , 0 , + CHAR_LPAREN , CHAR_RPAREN , 0 , 0 , + CHAR_COMMA , 0 , CHAR_DOT , 0 , /* 0 1 2 3 @@ -71,7 +71,7 @@ uint8_t char_info[CHAR_INFO_COUNT] = { */ CHAR_DIGIT , CHAR_DIGIT , CHAR_DIGIT , CHAR_DIGIT , CHAR_DIGIT , CHAR_DIGIT , CHAR_DIGIT , CHAR_DIGIT , - CHAR_DIGIT , CHAR_DIGIT , 0 , 0 , + CHAR_DIGIT , CHAR_DIGIT , CHAR_COLON , CHAR_SEMI , 0 , 0 , 0 , 0 , /* @@ -93,8 +93,8 @@ uint8_t char_info[CHAR_INFO_COUNT] = { */ CHAR_UPPER , CHAR_UPPER , CHAR_UPPER , CHAR_UPPER , CHAR_UPPER , CHAR_UPPER , CHAR_UPPER , CHAR_UPPER , - CHAR_UPPER , CHAR_UPPER , CHAR_UPPER , 0 , - 0 , 0 , 0 , 0 , + CHAR_UPPER , CHAR_UPPER , CHAR_UPPER , CHAR_LSQUARE, + 0 , CHAR_RSQUARE, 0 , 0 , /* ` a b c @@ -115,6 +115,6 @@ uint8_t char_info[CHAR_INFO_COUNT] = { */ CHAR_LOWER , CHAR_LOWER , CHAR_LOWER , CHAR_LOWER , CHAR_LOWER , CHAR_LOWER , CHAR_LOWER , CHAR_LOWER , - CHAR_LOWER , CHAR_LOWER , CHAR_LOWER , 0 , - 0 , 0 , 0 , 0 , + CHAR_LOWER , CHAR_LOWER , CHAR_LOWER , CHAR_LCURLY , + 0 , CHAR_RCURLY , 0 , 0 , }; diff --git a/src/lexer/lexer.c b/src/lexer/lexer.c index 1431789..e7f277e 100644 --- a/src/lexer/lexer.c +++ b/src/lexer/lexer.c @@ -73,6 +73,9 @@ lex_identifier(struct lexer *ctx, struct token *tok) bool lexer_next(struct lexer *ctx, struct token *tok) { + char ch; + uint16_t ch_info; + if (ctx == NULL || tok == NULL) { return false; } @@ -84,12 +87,22 @@ lexer_next(struct lexer *ctx, struct token *tok) tok->line = ctx->line; tok->col = (int)(tok->pos - ctx->line_start) + 1; - if (char_info[(int)*ctx->pos] & CHAR_ALPHA || *ctx->pos == '_') { + ch = *ctx->pos; + ch_info = char_info[(int)ch]; + + if (ch_info & CHAR_ALPHA || ch == '_') { lex_identifier(ctx, tok); return true; } - if (*ctx->pos == '\0') { + if (ch_info & CHAR_SINGLE) { + tok->kind = ch_info >> CHAR_SINGLE_SHIFT; + tok->len = 1; + ctx->pos++; + return true; + } + + if (ch == '\0') { tok->kind = TOK_EOF; return true; } |