summaryrefslogtreecommitdiff
path: root/src/lexer
diff options
context:
space:
mode:
authorIan Moffett <ian@osmora.org>2025-06-07 19:25:40 -0400
committerIan Moffett <ian@osmora.org>2025-06-07 19:25:40 -0400
commitc395bce5617a4529036ef75e89be336b396eb880 (patch)
treefb0f7935131f433183f2f4778279e80d1a19b88b /src/lexer
Import OCC sources to OSMORA.ORG
Signed-off-by: Ian Moffett <ian@osmora.org> Signed-off-by: Quinn Stephens <quinn@osmora.org>
Diffstat (limited to 'src/lexer')
-rw-r--r--src/lexer/char_info.c120
-rw-r--r--src/lexer/keywords.c98
-rw-r--r--src/lexer/lexer.c116
3 files changed, 334 insertions, 0 deletions
diff --git a/src/lexer/char_info.c b/src/lexer/char_info.c
new file mode 100644
index 0000000..d66c651
--- /dev/null
+++ b/src/lexer/char_info.c
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2025 Quinn Stephens and the OSMORA team.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS”
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "lexer/char_info.h"
+
+uint8_t char_info[CHAR_INFO_COUNT] = {
+ /*
+ NUL SOH STX ETX
+ EOT ENQ ACK BEL
+ BS TAB LF VT
+ FF CR SO SI
+ */
+ 0 , 0 , 0 , 0 ,
+ 0 , 0 , 0 , 0 ,
+ 0 , CHAR_HORZ_WS, CHAR_VERT_WS, CHAR_VERT_WS,
+ CHAR_VERT_WS, CHAR_HORZ_WS, 0 , 0 ,
+
+ /*
+ DLE DC1 DC2 DC3
+ DC4 NAK SYN ETB
+ CAN EM SUB ESC
+ FS GS RS US
+ */
+ 0 , 0 , 0 , 0 ,
+ 0 , 0 , 0 , 0 ,
+ 0 , 0 , 0 , 0 ,
+ 0 , 0 , 0 , 0 ,
+
+ /*
+ ! " #
+ $ % & '
+ ( ) * +
+ , - . /
+ */
+ CHAR_HORZ_WS, 0 , 0 , 0 ,
+ 0 , 0 , 0 , 0 ,
+ 0 , 0 , 0 , 0 ,
+ 0 , 0 , 0 , 0 ,
+
+ /*
+ 0 1 2 3
+ 4 5 6 7
+ 8 9 : ;
+ < = > ?
+ */
+ CHAR_DIGIT , CHAR_DIGIT , CHAR_DIGIT , CHAR_DIGIT ,
+ CHAR_DIGIT , CHAR_DIGIT , CHAR_DIGIT , CHAR_DIGIT ,
+ CHAR_DIGIT , CHAR_DIGIT , 0 , 0 ,
+ 0 , 0 , 0 , 0 ,
+
+ /*
+ @ A B C
+ D E F G
+ H I J K
+ L M N O
+ */
+ 0 , CHAR_XUPPER , CHAR_XUPPER , CHAR_XUPPER ,
+ CHAR_XUPPER , CHAR_XUPPER , CHAR_XUPPER , CHAR_UPPER ,
+ CHAR_UPPER , CHAR_UPPER , CHAR_UPPER , CHAR_UPPER ,
+ CHAR_UPPER , CHAR_UPPER , CHAR_UPPER , CHAR_UPPER ,
+
+ /*
+ P Q R S
+ T U V W
+ X Y Z [
+ \ ] ^ _
+ */
+ CHAR_UPPER , CHAR_UPPER , CHAR_UPPER , CHAR_UPPER ,
+ CHAR_UPPER , CHAR_UPPER , CHAR_UPPER , CHAR_UPPER ,
+ CHAR_UPPER , CHAR_UPPER , CHAR_UPPER , 0 ,
+ 0 , 0 , 0 , 0 ,
+
+ /*
+ ` a b c
+ d e f g
+ h i j k
+ l m n o
+ */
+ 0 , CHAR_XLOWER , CHAR_XLOWER , CHAR_XLOWER ,
+ CHAR_XLOWER , CHAR_XLOWER , CHAR_XLOWER , CHAR_LOWER ,
+ CHAR_LOWER , CHAR_LOWER , CHAR_LOWER , CHAR_LOWER ,
+ CHAR_LOWER , CHAR_LOWER , CHAR_LOWER , CHAR_LOWER ,
+
+ /*
+ p q r s
+ t u v w
+ x y z {
+ | } ~ DEL
+ */
+ CHAR_LOWER , CHAR_LOWER , CHAR_LOWER , CHAR_LOWER ,
+ CHAR_LOWER , CHAR_LOWER , CHAR_LOWER , CHAR_LOWER ,
+ CHAR_LOWER , CHAR_LOWER , CHAR_LOWER , 0 ,
+ 0 , 0 , 0 , 0 ,
+};
diff --git a/src/lexer/keywords.c b/src/lexer/keywords.c
new file mode 100644
index 0000000..7bb6b47
--- /dev/null
+++ b/src/lexer/keywords.c
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2025 Quinn Stephens and the OSMORA team.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS”
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <string.h>
+#include "hash.h"
+#include "lexer/keywords.h"
+#include "log.h"
+
+#define KEYWORD_COUNT 1
+
+#define KEYWORD_MAP_ROWS 16
+
+static struct list map_rows[KEYWORD_MAP_ROWS];
+static struct hashmap map;
+
+static struct {
+ const char *str;
+ enum token_kind value;
+} info[KEYWORD_COUNT] = {
+ { "void", TK_VOID }
+};
+
+static void
+add_keyword(const char *str, enum token_kind value)
+{
+ struct keyword *kwd;
+
+ kwd = malloc(sizeof(struct keyword));
+ if (kwd == NULL) {
+ log_error("Failed to allocate memory for keyword \"%s\"\n", str);
+ return;
+ }
+
+ kwd->len = strlen(str);
+ kwd->value = value;
+
+ kwd->hashmap_entry.hash = hash(str, kwd->len);
+ hashmap_add(map, &kwd->hashmap_entry);
+}
+
+struct keyword *
+keywords_find(struct token *tok)
+{
+ struct keyword *kwd;
+
+ kwd = (struct keyword *)hashmap_find(map, tok->hash);
+
+ /* Check that the lengths match just in case */
+ if (kwd != NULL && kwd->len == tok->len) {
+ return kwd;
+ }
+
+ return NULL;
+}
+
+void
+keywords_init(void)
+{
+ log_debug("Initializing keywords...\n");
+
+ /* Intiailize hashmap */
+ map.rows = map_rows;
+ map.row_count = KEYWORD_MAP_ROWS;
+ hashmap_init(map);
+
+ /* Register all keywords */
+ for (int k = 0; k < KEYWORD_COUNT; k++) {
+ add_keyword(info[k].str, info[k].value);
+ }
+}
diff --git a/src/lexer/lexer.c b/src/lexer/lexer.c
new file mode 100644
index 0000000..183bd76
--- /dev/null
+++ b/src/lexer/lexer.c
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2025 Quinn Stephens and the OSMORA team.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS”
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "lexer.h"
+#include "lexer/char_info.h"
+#include "lexer/keywords.h"
+#include "log.h"
+
+static void
+skip_whitespace(struct lexer *ctx)
+{
+ while (char_info[(int)*ctx->pos] & CHAR_WHITESPACE) {
+ if (char_info[(int)*ctx->pos] & CHAR_VERT_WS) {
+ ctx->line++;
+ ctx->line_start = ctx->pos + 1;
+ }
+
+ ctx->pos++;
+ }
+}
+
+static void
+lex_identifier(struct lexer *ctx, struct token *tok)
+{
+ struct keyword *kwd;
+
+ /* Find end of identifier */
+ ctx->pos++;
+ while (char_info[(int)*ctx->pos] & CHAR_ALNUM || *ctx->pos == '_') {
+ ctx->pos++;
+ }
+
+ /* Calculate length and hash */
+ tok->len = (size_t)(ctx->pos - tok->pos);
+ tok->hash = hash(tok->pos, tok->len);
+
+ /* Look for a keyword matching the identifier */
+ kwd = keywords_find(tok);
+ if (kwd != NULL) {
+ tok->kind = kwd->value;
+ } else {
+ tok->kind = TK_IDENTIFIER;
+ }
+}
+
+bool
+lexer_next(struct lexer *ctx, struct token *tok)
+{
+ if (ctx == NULL || tok == NULL) {
+ return false;
+ }
+
+ skip_whitespace(ctx);
+
+ tok->pos = ctx->pos;
+ tok->line = ctx->line;
+ tok->col = (int)(tok->pos - ctx->line_start) + 1;
+
+ if (char_info[(int)*ctx->pos] & CHAR_ALPHA || *ctx->pos == '_') {
+ lex_identifier(ctx, tok);
+ return true;
+ }
+
+ if (*ctx->pos == '\0') {
+ tok->kind = TK_EOF;
+ return true;
+ }
+
+ tok->kind = TK_UNKNOWN;
+ return true;
+}
+
+bool
+lexer_init(struct lexer *ctx, const char *src)
+{
+ log_debug("Initializing lexer...\n");
+
+ if (ctx == NULL || src == NULL) {
+ return false;
+ }
+
+ ctx->pos = src;
+ ctx->line_start = ctx->pos;
+ ctx->line = 1;
+
+ /* TODO: Only do this once */
+ keywords_init();
+
+ return true;
+}