summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorQuinn Stephens <quinn@osmora.org>2025-06-07 23:03:12 -0400
committerQuinn Stephens <quinn@osmora.org>2025-06-07 23:03:12 -0400
commit405d0c32ba8a6a065c2a8767295490e4add20498 (patch)
tree13163943514d9b9e5583e62128c9f92e8d3dd60e /src
parentc395bce5617a4529036ef75e89be336b396eb880 (diff)
Refactor and begin parser
* Added token flags * Added `int` keyword * Moved code from main.c to parser/parser.c * Began work on parsing declarations Signed-off-by: Quinn Stephens <quinn@osmora.org>
Diffstat (limited to 'src')
-rw-r--r--src/lexer/keywords.c15
-rw-r--r--src/lexer/lexer.c4
-rw-r--r--src/main.c35
-rw-r--r--src/parser/parser.c99
4 files changed, 116 insertions, 37 deletions
diff --git a/src/lexer/keywords.c b/src/lexer/keywords.c
index 7bb6b47..30eb2a1 100644
--- a/src/lexer/keywords.c
+++ b/src/lexer/keywords.c
@@ -34,7 +34,7 @@
#include "lexer/keywords.h"
#include "log.h"
-#define KEYWORD_COUNT 1
+#define KEYWORD_COUNT 2
#define KEYWORD_MAP_ROWS 16
@@ -43,13 +43,15 @@ static struct hashmap map;
static struct {
const char *str;
- enum token_kind value;
+ enum token_kind tok_kind;
+ uint8_t tok_flags;
} info[KEYWORD_COUNT] = {
- { "void", TK_VOID }
+ { "void", TK_VOID, TF_BUILTIN_TYPE },
+ { "int" , TK_INT , TF_BUILTIN_TYPE }
};
static void
-add_keyword(const char *str, enum token_kind value)
+add_keyword(const char *str, enum token_kind tok_kind, uint8_t tok_flags)
{
struct keyword *kwd;
@@ -60,7 +62,8 @@ add_keyword(const char *str, enum token_kind value)
}
kwd->len = strlen(str);
- kwd->value = value;
+ kwd->tok_kind = tok_kind;
+ kwd->tok_flags = tok_flags;
kwd->hashmap_entry.hash = hash(str, kwd->len);
hashmap_add(map, &kwd->hashmap_entry);
@@ -93,6 +96,6 @@ keywords_init(void)
/* Register all keywords */
for (int k = 0; k < KEYWORD_COUNT; k++) {
- add_keyword(info[k].str, info[k].value);
+ add_keyword(info[k].str, info[k].tok_kind, info[k].tok_flags);
}
}
diff --git a/src/lexer/lexer.c b/src/lexer/lexer.c
index 183bd76..9be4cb1 100644
--- a/src/lexer/lexer.c
+++ b/src/lexer/lexer.c
@@ -63,7 +63,8 @@ lex_identifier(struct lexer *ctx, struct token *tok)
/* Look for a keyword matching the identifier */
kwd = keywords_find(tok);
if (kwd != NULL) {
- tok->kind = kwd->value;
+ tok->kind = kwd->tok_kind;
+ tok->flags |= kwd->tok_flags;
} else {
tok->kind = TK_IDENTIFIER;
}
@@ -78,6 +79,7 @@ lexer_next(struct lexer *ctx, struct token *tok)
skip_whitespace(ctx);
+ tok->flags = TF_NONE;
tok->pos = ctx->pos;
tok->line = ctx->line;
tok->col = (int)(tok->pos - ctx->line_start) + 1;
diff --git a/src/main.c b/src/main.c
index e7a3225..c42239d 100644
--- a/src/main.c
+++ b/src/main.c
@@ -27,53 +27,28 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include <stdarg.h>
-#include <stdio.h>
#include <stdlib.h>
#include "lexer.h"
#include "log.h"
+#include "parser.h"
-static const char *src = "void main";
-
-static void
-tok_error(struct token *tok, const char *fmt, ...)
-{
- va_list args;
-
- fprintf(stderr, "\033[1;97m%d:%d: \033[1;91merror: \033[0m", tok->line, tok->col);
- va_start(args, fmt);
- vfprintf(stderr, fmt, args);
- va_end(args);
-}
+static const char *src = "int test\nint main";
int
main(int argc, char **argv)
{
struct lexer lexer;
- struct token tok;
(void)argc;
(void)argv;
if (!lexer_init(&lexer, src)) {
+ log_error("Failed to initialize lexer\n");
return EXIT_FAILURE;
}
- while (lexer_next(&lexer, &tok)) {
- if (tok.kind == TK_EOF) {
- return EXIT_SUCCESS;
- }
-
- if (tok.kind == TK_UNKNOWN) {
- tok_error(&tok, "unrecognized token\n");
- return EXIT_FAILURE;
- }
-
- if (tok.kind == TK_VOID) {
- log_debug("got void\n");
- } else if (tok.kind == TK_IDENTIFIER) {
- log_debug("got identifier \"%.*s\"\n", tok.len, tok.pos);
- }
+ if (!parser_parse(&lexer)) {
+ return EXIT_FAILURE;
}
return EXIT_SUCCESS;
diff --git a/src/parser/parser.c b/src/parser/parser.c
new file mode 100644
index 0000000..065bb32
--- /dev/null
+++ b/src/parser/parser.c
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2025 Quinn Stephens and the OSMORA team.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS”
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "lexer.h"
+#include "log.h"
+#include "parser.h"
+
+static void
+tok_error(struct token *tok, const char *fmt, ...)
+{
+ va_list args;
+
+ fprintf(stderr, "\033[1;97m%d:%d: \033[1;91merror: \033[0m", tok->line, tok->col);
+ va_start(args, fmt);
+ vfprintf(stderr, fmt, args);
+ va_end(args);
+}
+
+static bool
+parse_declaration(struct lexer *lexer, struct token *tok)
+{
+ /*
+ * TODO: Support more types.
+ */
+ if (tok->kind != TK_VOID && tok->kind != TK_INT) {
+ tok_error(tok, "expected \"void\" or \"int\"\n");
+ return false;
+ }
+
+ lexer_next(lexer, tok);
+ if (tok->kind != TK_IDENTIFIER) {
+ tok_error(tok, "expected identifier\n");
+ return false;
+ }
+
+ log_debug("Parsed declaration of \"%.*s\"\n", tok->len, tok->pos);
+
+ lexer_next(lexer, tok);
+ return true;
+}
+
+bool
+parser_parse(struct lexer *lexer)
+{
+ struct token tok;
+ bool success;
+
+ log_debug("Parsing...\n");
+
+ if (!lexer_next(lexer, &tok)) {
+ log_error("failed to get first token\n");
+ return false;
+ }
+
+ while (tok.kind != TK_EOF) {
+ if (tok.kind == TK_UNKNOWN) {
+ tok_error(&tok, "unrecognized token\n");
+ return false;
+ }
+
+ if ((tok.flags & TF_BUILTIN_TYPE) || tok.kind == TK_IDENTIFIER) {
+ success = parse_declaration(lexer, &tok);
+ }
+
+ if (!success) {
+ return false;
+ }
+ }
+
+ return true;
+}