summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/lexer/char_info.h17
-rw-r--r--include/lexer/token.h11
-rw-r--r--src/lexer/char_info.c16
-rw-r--r--src/lexer/lexer.c17
4 files changed, 50 insertions, 11 deletions
diff --git a/include/lexer/char_info.h b/include/lexer/char_info.h
index 59db2be..d9205b0 100644
--- a/include/lexer/char_info.h
+++ b/include/lexer/char_info.h
@@ -31,6 +31,7 @@
#define _LEXER_CHAR_INFO_H 1
#include <stdint.h>
+#include "lexer/token.h"
#define CHAR_HORZ_WS (1 << 0)
#define CHAR_VERT_WS (1 << 1)
@@ -38,6 +39,7 @@
#define CHAR_XDIGIT (1 << 3)
#define CHAR_UPPER (1 << 4)
#define CHAR_LOWER (1 << 5)
+#define CHAR_SINGLE (1 << 6)
#define CHAR_HEX (CHAR_DIGIT | CHAR_XDIGIT)
#define CHAR_XUPPER (CHAR_XDIGIT | CHAR_UPPER)
@@ -46,8 +48,21 @@
#define CHAR_ALPHA (CHAR_UPPER | CHAR_LOWER)
#define CHAR_ALNUM (CHAR_ALPHA | CHAR_DIGIT)
+#define CHAR_SINGLE_SHIFT 8
+#define MAKE_SINGLE(type) ((type << CHAR_SINGLE_SHIFT) | CHAR_SINGLE)
+#define CHAR_COMMA MAKE_SINGLE(TOK_COMMA)
+#define CHAR_DOT MAKE_SINGLE(TOK_DOT)
+#define CHAR_COLON MAKE_SINGLE(TOK_COLON)
+#define CHAR_SEMI MAKE_SINGLE(TOK_SEMICOLON)
+#define CHAR_LPAREN MAKE_SINGLE(TOK_LPAREN)
+#define CHAR_RPAREN MAKE_SINGLE(TOK_RPAREN)
+#define CHAR_LCURLY MAKE_SINGLE(TOK_LCURLY)
+#define CHAR_RCURLY MAKE_SINGLE(TOK_RCURLY)
+#define CHAR_LSQUARE MAKE_SINGLE(TOK_LSQUARE)
+#define CHAR_RSQUARE MAKE_SINGLE(TOK_RSQUARE)
+
#define CHAR_INFO_COUNT 256
-extern uint8_t char_info[CHAR_INFO_COUNT];
+extern uint16_t char_info[CHAR_INFO_COUNT];
#endif /* !_LEXER_CHAR_INFO_H */
diff --git a/include/lexer/token.h b/include/lexer/token.h
index ab8231d..f51b5e9 100644
--- a/include/lexer/token.h
+++ b/include/lexer/token.h
@@ -39,6 +39,17 @@ enum token_kind {
TOK_IDENTIFIER,
+ TOK_COMMA,
+ TOK_DOT,
+ TOK_COLON,
+ TOK_SEMICOLON,
+ TOK_LPAREN,
+ TOK_RPAREN,
+ TOK_LCURLY,
+ TOK_RCURLY,
+ TOK_LSQUARE,
+ TOK_RSQUARE,
+
/*
* Do not modify this ordering without updating parser/types.c
*/
diff --git a/src/lexer/char_info.c b/src/lexer/char_info.c
index d66c651..144bbe7 100644
--- a/src/lexer/char_info.c
+++ b/src/lexer/char_info.c
@@ -29,7 +29,7 @@
#include "lexer/char_info.h"
-uint8_t char_info[CHAR_INFO_COUNT] = {
+uint16_t char_info[CHAR_INFO_COUNT] = {
/*
NUL SOH STX ETX
EOT ENQ ACK BEL
@@ -60,8 +60,8 @@ uint8_t char_info[CHAR_INFO_COUNT] = {
*/
CHAR_HORZ_WS, 0 , 0 , 0 ,
0 , 0 , 0 , 0 ,
- 0 , 0 , 0 , 0 ,
- 0 , 0 , 0 , 0 ,
+ CHAR_LPAREN , CHAR_RPAREN , 0 , 0 ,
+ CHAR_COMMA , 0 , CHAR_DOT , 0 ,
/*
0 1 2 3
@@ -71,7 +71,7 @@ uint8_t char_info[CHAR_INFO_COUNT] = {
*/
CHAR_DIGIT , CHAR_DIGIT , CHAR_DIGIT , CHAR_DIGIT ,
CHAR_DIGIT , CHAR_DIGIT , CHAR_DIGIT , CHAR_DIGIT ,
- CHAR_DIGIT , CHAR_DIGIT , 0 , 0 ,
+ CHAR_DIGIT , CHAR_DIGIT , CHAR_COLON , CHAR_SEMI ,
0 , 0 , 0 , 0 ,
/*
@@ -93,8 +93,8 @@ uint8_t char_info[CHAR_INFO_COUNT] = {
*/
CHAR_UPPER , CHAR_UPPER , CHAR_UPPER , CHAR_UPPER ,
CHAR_UPPER , CHAR_UPPER , CHAR_UPPER , CHAR_UPPER ,
- CHAR_UPPER , CHAR_UPPER , CHAR_UPPER , 0 ,
- 0 , 0 , 0 , 0 ,
+ CHAR_UPPER , CHAR_UPPER , CHAR_UPPER , CHAR_LSQUARE,
+ 0 , CHAR_RSQUARE, 0 , 0 ,
/*
` a b c
@@ -115,6 +115,6 @@ uint8_t char_info[CHAR_INFO_COUNT] = {
*/
CHAR_LOWER , CHAR_LOWER , CHAR_LOWER , CHAR_LOWER ,
CHAR_LOWER , CHAR_LOWER , CHAR_LOWER , CHAR_LOWER ,
- CHAR_LOWER , CHAR_LOWER , CHAR_LOWER , 0 ,
- 0 , 0 , 0 , 0 ,
+ CHAR_LOWER , CHAR_LOWER , CHAR_LOWER , CHAR_LCURLY ,
+ 0 , CHAR_RCURLY , 0 , 0 ,
};
diff --git a/src/lexer/lexer.c b/src/lexer/lexer.c
index 1431789..e7f277e 100644
--- a/src/lexer/lexer.c
+++ b/src/lexer/lexer.c
@@ -73,6 +73,9 @@ lex_identifier(struct lexer *ctx, struct token *tok)
bool
lexer_next(struct lexer *ctx, struct token *tok)
{
+ char ch;
+ uint16_t ch_info;
+
if (ctx == NULL || tok == NULL) {
return false;
}
@@ -84,12 +87,22 @@ lexer_next(struct lexer *ctx, struct token *tok)
tok->line = ctx->line;
tok->col = (int)(tok->pos - ctx->line_start) + 1;
- if (char_info[(int)*ctx->pos] & CHAR_ALPHA || *ctx->pos == '_') {
+ ch = *ctx->pos;
+ ch_info = char_info[(int)ch];
+
+ if (ch_info & CHAR_ALPHA || ch == '_') {
lex_identifier(ctx, tok);
return true;
}
- if (*ctx->pos == '\0') {
+ if (ch_info & CHAR_SINGLE) {
+ tok->kind = ch_info >> CHAR_SINGLE_SHIFT;
+ tok->len = 1;
+ ctx->pos++;
+ return true;
+ }
+
+ if (ch == '\0') {
tok->kind = TOK_EOF;
return true;
}