summaryrefslogtreecommitdiff
path: root/include/lexer
diff options
context:
space:
mode:
authorIan Moffett <ian@osmora.org>2024-11-01 23:46:08 -0400
committerIan Moffett <ian@osmora.org>2024-11-01 23:46:08 -0400
commita515dfb3b8f8e999362db7a6b52b3104c03b750a (patch)
treed0180f0cbc39d9c3e367af30791ad774e4d419ff /include/lexer
Import quark sources
Signed-off-by: Ian Moffett <ian@osmora.org>
Diffstat (limited to 'include/lexer')
-rw-r--r--include/lexer/char_info.h60
-rw-r--r--include/lexer/keywords.h15
-rw-r--r--include/lexer/token.h92
3 files changed, 167 insertions, 0 deletions
diff --git a/include/lexer/char_info.h b/include/lexer/char_info.h
new file mode 100644
index 0000000..5987bbf
--- /dev/null
+++ b/include/lexer/char_info.h
@@ -0,0 +1,60 @@
+/*
+ * Character info table.
+ * Copyright (c) 2023-2024, Quinn Stephens and the OSMORA team.
+ * Provided under the BSD 3-Clause license.
+ */
+
+#ifndef _LEXER_CHAR_INFO_H
+#define _LEXER_CHAR_INFO_H
+
+#include <stdint.h>
+#include "lexer/token.h"
+
+#define CHAR_HORZ_WS (1 << 0)
+#define CHAR_VERT_WS (1 << 1)
+#define CHAR_DIGIT (1 << 2)
+#define CHAR_XDIGIT (1 << 3)
+#define CHAR_UPPER (1 << 4)
+#define CHAR_LOWER (1 << 5)
+#define CHAR_OPER (1 << 6)
+#define CHAR_SINGLE (1 << 7)
+
+#define CHAR_HEX (CHAR_DIGIT | CHAR_XDIGIT)
+#define CHAR_XUPPER (CHAR_XDIGIT | CHAR_UPPER)
+#define CHAR_XLOWER (CHAR_XDIGIT | CHAR_LOWER)
+#define CHAR_WHITESPACE (CHAR_HORZ_WS | CHAR_VERT_WS)
+#define CHAR_ALPHA (CHAR_UPPER | CHAR_LOWER)
+#define CHAR_ALNUM (CHAR_ALPHA | CHAR_DIGIT)
+
+#define CHAR_SINGLE_SHIFT 8
+#define MAKE_SINGLE(kind) ((kind << CHAR_SINGLE_SHIFT) | CHAR_SINGLE)
+#define CHAR_COMMA MAKE_SINGLE(TK_COMMA)
+#define CHAR_DOT MAKE_SINGLE(TK_DOT)
+#define CHAR_COLON MAKE_SINGLE(TK_COLON)
+#define CHAR_SEMI MAKE_SINGLE(TK_SEMICOLON)
+#define CHAR_LPAREN MAKE_SINGLE(TK_LPAREN)
+#define CHAR_RPAREN MAKE_SINGLE(TK_RPAREN)
+#define CHAR_LBRACE MAKE_SINGLE(TK_LBRACE)
+#define CHAR_RBRACE MAKE_SINGLE(TK_RBRACE)
+#define CHAR_LBRACK MAKE_SINGLE(TK_LBRACK)
+#define CHAR_RBRACK MAKE_SINGLE(TK_RBRACK)
+#define CHAR_TILDE MAKE_SINGLE(TK_TILDE)
+#define CHAR_EQUALS MAKE_SINGLE(TK_EQUALS)
+
+#define CHAR_OPER_SHIFT 8
+#define MAKE_OPER(kind) ((kind << CHAR_OPER_SHIFT) | CHAR_OPER)
+#define CHAR_PLUS MAKE_OPER(TK_PLUS)
+#define CHAR_MINUS MAKE_OPER(TK_MINUS)
+#define CHAR_STAR MAKE_OPER(TK_STAR)
+#define CHAR_SLASH MAKE_OPER(TK_SLASH)
+#define CHAR_PERCENT MAKE_OPER(TK_PERCENT)
+#define CHAR_EXCLAIM MAKE_OPER(TK_EXCLAMATION)
+#define CHAR_LESS MAKE_OPER(TK_LESS_THAN)
+#define CHAR_GREATER MAKE_OPER(TK_GREATER_THAN)
+#define CHAR_CARET MAKE_OPER(TK_CARET)
+#define CHAR_AMPER MAKE_OPER(TK_AMPERSAND)
+#define CHAR_PIPE MAKE_OPER(TK_PIPE)
+
+extern uint16_t char_info[256];
+
+#endif /* !_LEXER_CHAR_INFO_H */
diff --git a/include/lexer/keywords.h b/include/lexer/keywords.h
new file mode 100644
index 0000000..7da7dc1
--- /dev/null
+++ b/include/lexer/keywords.h
@@ -0,0 +1,15 @@
+/*
+ * Keyword hashmap.
+ * Copyright (c) 2023-2024, Quinn Stephens and the OSMORA team.
+ * Provided under the BSD 3-Clause license.
+ */
+
+#ifndef _LEXER_KEYWORDS_H
+#define _LEXER_KEYWORDS_H
+
+#include "lexer/token.h"
+
+token_kind_t keywords_find(struct token *tok);
+void keywords_init(void);
+
+#endif /* !_LEXER_KEYWORDS_H */
diff --git a/include/lexer/token.h b/include/lexer/token.h
new file mode 100644
index 0000000..e0a9ea3
--- /dev/null
+++ b/include/lexer/token.h
@@ -0,0 +1,92 @@
+/*
+ * Token definitions.
+ * Copyright (c) 2023-2024, Quinn Stephens and the OSMORA team.
+ * Provided under the BSD 3-Clause license.
+ */
+
+#ifndef _LEXER_TOKEN_H
+#define _LEXER_TOKEN_H
+
+#include <stddef.h>
+#include <stdint.h>
+#include "hash.h"
+
+typedef enum {
+ TK_UNKNOWN,
+ TK_EOF,
+
+ TK_IDENTIFIER,
+ TK_NUMBER,
+ TK_STRING,
+ TK_CHARACTER,
+
+ /* Keywords */
+ TK_TYPE,
+ TK_ENUM,
+ TK_STRUCT,
+
+ /*
+ * Operators.
+ * NOTE: lex_oper() requires that TK_*_EQUALS
+ * immediately follows TK_*.
+ */
+ TK_PLUS,
+ TK_PLUS_EQUALS,
+ TK_PLUS_PLUS,
+ TK_MINUS,
+ TK_MINUS_EQUALS,
+ TK_MINUS_MINUS,
+ TK_ARROW,
+ TK_STAR,
+ TK_STAR_EQUALS,
+ TK_SLASH,
+ TK_SLASH_EQUALS,
+ TK_PERCENT,
+ TK_PERCENT_EQUALS,
+ TK_EXCLAMATION,
+ TK_EXCLAMATION_EQUALS,
+ TK_LESS_THAN,
+ TK_LESS_THAN_EQUALS,
+ TK_SHIFT_LEFT,
+ TK_SHIFT_LEFT_EQUALS,
+ TK_GREATER_THAN,
+ TK_GREATER_THAN_EQUALS,
+ TK_SHIFT_RIGHT,
+ TK_SHIFT_RIGHT_EQUALS,
+ TK_CARET,
+ TK_CARET_EQUALS,
+ TK_AMPERSAND,
+ TK_AMPERSAND_EQUALS,
+ TK_PIPE,
+ TK_PIPE_EQUALS,
+ TK_TILDE,
+ TK_EQUALS,
+
+ /* Miscellaneous */
+ TK_COMMA,
+ TK_DOT,
+ TK_COLON,
+ TK_SEMICOLON,
+ TK_LPAREN,
+ TK_RPAREN,
+ TK_LBRACE,
+ TK_RBRACE,
+ TK_LBRACK,
+ TK_RBRACK
+} token_kind_t;
+
+struct token {
+ token_kind_t kind;
+
+ char *fname;
+ int line, col;
+ char *pos;
+ size_t len;
+
+ union {
+ hash_t hash;
+ uint64_t value;
+ };
+};
+
+#endif /* !_LEXER_TOKEN_H */