diff options
author | Quinn Stephens <quinn@osmora.org> | 2025-06-07 23:03:12 -0400 |
---|---|---|
committer | Quinn Stephens <quinn@osmora.org> | 2025-06-07 23:03:12 -0400 |
commit | 405d0c32ba8a6a065c2a8767295490e4add20498 (patch) | |
tree | 13163943514d9b9e5583e62128c9f92e8d3dd60e /src | |
parent | c395bce5617a4529036ef75e89be336b396eb880 (diff) |
Refactor and begin parser
* Added token flags
* Added `int` keyword
* Moved code from main.c to parser/parser.c
* Began work on parsing declarations
Signed-off-by: Quinn Stephens <quinn@osmora.org>
Diffstat (limited to 'src')
-rw-r--r-- | src/lexer/keywords.c | 15 | ||||
-rw-r--r-- | src/lexer/lexer.c | 4 | ||||
-rw-r--r-- | src/main.c | 35 | ||||
-rw-r--r-- | src/parser/parser.c | 99 |
4 files changed, 116 insertions, 37 deletions
diff --git a/src/lexer/keywords.c b/src/lexer/keywords.c index 7bb6b47..30eb2a1 100644 --- a/src/lexer/keywords.c +++ b/src/lexer/keywords.c @@ -34,7 +34,7 @@ #include "lexer/keywords.h" #include "log.h" -#define KEYWORD_COUNT 1 +#define KEYWORD_COUNT 2 #define KEYWORD_MAP_ROWS 16 @@ -43,13 +43,15 @@ static struct hashmap map; static struct { const char *str; - enum token_kind value; + enum token_kind tok_kind; + uint8_t tok_flags; } info[KEYWORD_COUNT] = { - { "void", TK_VOID } + { "void", TK_VOID, TF_BUILTIN_TYPE }, + { "int" , TK_INT , TF_BUILTIN_TYPE } }; static void -add_keyword(const char *str, enum token_kind value) +add_keyword(const char *str, enum token_kind tok_kind, uint8_t tok_flags) { struct keyword *kwd; @@ -60,7 +62,8 @@ add_keyword(const char *str, enum token_kind value) } kwd->len = strlen(str); - kwd->value = value; + kwd->tok_kind = tok_kind; + kwd->tok_flags = tok_flags; kwd->hashmap_entry.hash = hash(str, kwd->len); hashmap_add(map, &kwd->hashmap_entry); @@ -93,6 +96,6 @@ keywords_init(void) /* Register all keywords */ for (int k = 0; k < KEYWORD_COUNT; k++) { - add_keyword(info[k].str, info[k].value); + add_keyword(info[k].str, info[k].tok_kind, info[k].tok_flags); } } diff --git a/src/lexer/lexer.c b/src/lexer/lexer.c index 183bd76..9be4cb1 100644 --- a/src/lexer/lexer.c +++ b/src/lexer/lexer.c @@ -63,7 +63,8 @@ lex_identifier(struct lexer *ctx, struct token *tok) /* Look for a keyword matching the identifier */ kwd = keywords_find(tok); if (kwd != NULL) { - tok->kind = kwd->value; + tok->kind = kwd->tok_kind; + tok->flags |= kwd->tok_flags; } else { tok->kind = TK_IDENTIFIER; } @@ -78,6 +79,7 @@ lexer_next(struct lexer *ctx, struct token *tok) skip_whitespace(ctx); + tok->flags = TF_NONE; tok->pos = ctx->pos; tok->line = ctx->line; tok->col = (int)(tok->pos - ctx->line_start) + 1; @@ -27,53 +27,28 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#include <stdarg.h> -#include <stdio.h> #include <stdlib.h> #include "lexer.h" #include "log.h" +#include "parser.h" -static const char *src = "void main"; - -static void -tok_error(struct token *tok, const char *fmt, ...) -{ - va_list args; - - fprintf(stderr, "\033[1;97m%d:%d: \033[1;91merror: \033[0m", tok->line, tok->col); - va_start(args, fmt); - vfprintf(stderr, fmt, args); - va_end(args); -} +static const char *src = "int test\nint main"; int main(int argc, char **argv) { struct lexer lexer; - struct token tok; (void)argc; (void)argv; if (!lexer_init(&lexer, src)) { + log_error("Failed to initialize lexer\n"); return EXIT_FAILURE; } - while (lexer_next(&lexer, &tok)) { - if (tok.kind == TK_EOF) { - return EXIT_SUCCESS; - } - - if (tok.kind == TK_UNKNOWN) { - tok_error(&tok, "unrecognized token\n"); - return EXIT_FAILURE; - } - - if (tok.kind == TK_VOID) { - log_debug("got void\n"); - } else if (tok.kind == TK_IDENTIFIER) { - log_debug("got identifier \"%.*s\"\n", tok.len, tok.pos); - } + if (!parser_parse(&lexer)) { + return EXIT_FAILURE; } return EXIT_SUCCESS; diff --git a/src/parser/parser.c b/src/parser/parser.c new file mode 100644 index 0000000..065bb32 --- /dev/null +++ b/src/parser/parser.c @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2025 Quinn Stephens and the OSMORA team. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdarg.h> +#include <stdio.h> +#include "lexer.h" +#include "log.h" +#include "parser.h" + +static void +tok_error(struct token *tok, const char *fmt, ...) +{ + va_list args; + + fprintf(stderr, "\033[1;97m%d:%d: \033[1;91merror: \033[0m", tok->line, tok->col); + va_start(args, fmt); + vfprintf(stderr, fmt, args); + va_end(args); +} + +static bool +parse_declaration(struct lexer *lexer, struct token *tok) +{ + /* + * TODO: Support more types. + */ + if (tok->kind != TK_VOID && tok->kind != TK_INT) { + tok_error(tok, "expected \"void\" or \"int\"\n"); + return false; + } + + lexer_next(lexer, tok); + if (tok->kind != TK_IDENTIFIER) { + tok_error(tok, "expected identifier\n"); + return false; + } + + log_debug("Parsed declaration of \"%.*s\"\n", tok->len, tok->pos); + + lexer_next(lexer, tok); + return true; +} + +bool +parser_parse(struct lexer *lexer) +{ + struct token tok; + bool success; + + log_debug("Parsing...\n"); + + if (!lexer_next(lexer, &tok)) { + log_error("failed to get first token\n"); + return false; + } + + while (tok.kind != TK_EOF) { + if (tok.kind == TK_UNKNOWN) { + tok_error(&tok, "unrecognized token\n"); + return false; + } + + if ((tok.flags & TF_BUILTIN_TYPE) || tok.kind == TK_IDENTIFIER) { + success = parse_declaration(lexer, &tok); + } + + if (!success) { + return false; + } + } + + return true; +} |