summaryrefslogtreecommitdiff
path: root/src/lexer/lexer.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/lexer/lexer.c')
-rw-r--r--src/lexer/lexer.c116
1 files changed, 116 insertions, 0 deletions
diff --git a/src/lexer/lexer.c b/src/lexer/lexer.c
new file mode 100644
index 0000000..183bd76
--- /dev/null
+++ b/src/lexer/lexer.c
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2025 Quinn Stephens and the OSMORA team.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS”
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "lexer.h"
+#include "lexer/char_info.h"
+#include "lexer/keywords.h"
+#include "log.h"
+
+static void
+skip_whitespace(struct lexer *ctx)
+{
+ while (char_info[(int)*ctx->pos] & CHAR_WHITESPACE) {
+ if (char_info[(int)*ctx->pos] & CHAR_VERT_WS) {
+ ctx->line++;
+ ctx->line_start = ctx->pos + 1;
+ }
+
+ ctx->pos++;
+ }
+}
+
+static void
+lex_identifier(struct lexer *ctx, struct token *tok)
+{
+ struct keyword *kwd;
+
+ /* Find end of identifier */
+ ctx->pos++;
+ while (char_info[(int)*ctx->pos] & CHAR_ALNUM || *ctx->pos == '_') {
+ ctx->pos++;
+ }
+
+ /* Calculate length and hash */
+ tok->len = (size_t)(ctx->pos - tok->pos);
+ tok->hash = hash(tok->pos, tok->len);
+
+ /* Look for a keyword matching the identifier */
+ kwd = keywords_find(tok);
+ if (kwd != NULL) {
+ tok->kind = kwd->value;
+ } else {
+ tok->kind = TK_IDENTIFIER;
+ }
+}
+
+bool
+lexer_next(struct lexer *ctx, struct token *tok)
+{
+ if (ctx == NULL || tok == NULL) {
+ return false;
+ }
+
+ skip_whitespace(ctx);
+
+ tok->pos = ctx->pos;
+ tok->line = ctx->line;
+ tok->col = (int)(tok->pos - ctx->line_start) + 1;
+
+ if (char_info[(int)*ctx->pos] & CHAR_ALPHA || *ctx->pos == '_') {
+ lex_identifier(ctx, tok);
+ return true;
+ }
+
+ if (*ctx->pos == '\0') {
+ tok->kind = TK_EOF;
+ return true;
+ }
+
+ tok->kind = TK_UNKNOWN;
+ return true;
+}
+
+bool
+lexer_init(struct lexer *ctx, const char *src)
+{
+ log_debug("Initializing lexer...\n");
+
+ if (ctx == NULL || src == NULL) {
+ return false;
+ }
+
+ ctx->pos = src;
+ ctx->line_start = ctx->pos;
+ ctx->line = 1;
+
+ /* TODO: Only do this once */
+ keywords_init();
+
+ return true;
+}