summaryrefslogtreecommitdiff
path: root/compiler/parser
diff options
context:
space:
mode:
authorIan Moffett <ian@osmora.org>2024-11-01 23:46:08 -0400
committerIan Moffett <ian@osmora.org>2024-11-01 23:46:08 -0400
commita515dfb3b8f8e999362db7a6b52b3104c03b750a (patch)
treed0180f0cbc39d9c3e367af30791ad774e4d419ff /compiler/parser
Import quark sources
Signed-off-by: Ian Moffett <ian@osmora.org>
Diffstat (limited to 'compiler/parser')
-rw-r--r--compiler/parser/parser.c84
-rw-r--r--compiler/parser/type.c254
2 files changed, 338 insertions, 0 deletions
diff --git a/compiler/parser/parser.c b/compiler/parser/parser.c
new file mode 100644
index 0000000..aeec48b
--- /dev/null
+++ b/compiler/parser/parser.c
@@ -0,0 +1,84 @@
+/*
+ * Quark parser.
+ * Turns tokens into an AST (Abstract Syntax Tree).
+ * Copyright (c) 2023-2024, Quinn Stephens and the OSMORA team.
+ * Provided under the BSD 3-Clause license.
+ */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "debug.h"
+#include "hashmap.h"
+#include "parser/type.h"
+#include "parser.h"
+
+static void
+add_builtin(struct hashmap *map, char *name, size_t size, int n_ptrs)
+{
+ struct type *type;
+ size_t name_len;
+
+ name_len = strlen(name);
+
+ type = malloc(sizeof(struct type));
+ type->hashmap_entry.hash = hash_data(name, name_len);
+ type->name = name;
+ type->name_len = name_len;
+ type->size = size;
+ type->n_ptrs = n_ptrs;
+
+ hashmap_add(map, &type->hashmap_entry);
+}
+
+void
+tok_error(struct token *tok, const char *fmt, ...)
+{
+ va_list ap;
+
+ fprintf(stderr, "\033[1m%s:%d:%d: \033[31merror:\033[0m ", tok->fname, tok->line, tok->col);
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+}
+
+void
+tok_warn(struct token *tok, const char *fmt, ...)
+{
+ va_list ap;
+
+ printf("\033[1m%s:%d:%d: \033[33mwarning:\033[0m ", tok->fname, tok->line, tok->col);
+ va_start(ap, fmt);
+ vprintf(fmt, ap);
+ va_end(ap);
+}
+
+void
+parser_parse(struct parser *ctx)
+{
+ debug("Parsing...\n");
+
+ next_token(ctx);
+ while (ctx->tok.kind != TK_EOF) {
+ switch (ctx->tok.kind) {
+ case TK_TYPE:
+ parse_type(ctx);
+ break;
+ default:
+ tok_error(&ctx->tok, "unexpected \"%.*s\"\n", (int)ctx->tok.len, ctx->tok.pos);
+ next_token(ctx);
+ break;
+ }
+ }
+}
+
+void
+parser_init(struct parser *ctx, char *source)
+{
+ debug("Initializing parser...\n");
+ lexer_init(&ctx->lexer, source);
+
+ add_builtin(ctx->types, "uint32", 4, 0);
+ add_builtin(ctx->types, "any", 0, 0);
+}
diff --git a/compiler/parser/type.c b/compiler/parser/type.c
new file mode 100644
index 0000000..4ac9327
--- /dev/null
+++ b/compiler/parser/type.c
@@ -0,0 +1,254 @@
+/*
+ * Type parser.
+ * Copyright (c) 2023-2024, Quinn Stephens and the OSMORA team.
+ * Provided under the BSD 3-Clause license.
+ */
+
+#include <stdbool.h>
+#include <stdlib.h>
+#include "debug.h"
+#include "hashmap.h"
+#include "lexer/token.h"
+#include "parser/type.h"
+#include "parser.h"
+
+#define HASHMAP_ROWS 8
+
+static struct type *
+type_new(struct token *tok)
+{
+ struct type *typ;
+
+ typ = malloc(sizeof(struct type));
+ typ->hashmap_entry.hash = tok->hash;
+ typ->name = tok->pos;
+ typ->name_len = tok->len;
+
+ return typ;
+}
+
+static bool
+parse_type_ref(struct parser *ctx, struct type **typ_out, int *n_ptrs_out)
+{
+ struct type *typ;
+ int n_ptrs;
+
+ debug("Parsing type reference...\n");
+
+ /* Find type */
+ typ = (struct type*)hashmap_find(ctx->types, ctx->tok.hash);
+ if (typ == NULL) {
+ tok_error(&ctx->tok, "type \"%.*s\" not found\n", (int)ctx->tok.len, ctx->tok.pos);
+ return false;
+ }
+
+ /* Find number of pointers */
+ n_ptrs = 0;
+ while (next_token(ctx)->kind == TK_STAR) {
+ n_ptrs++;
+ }
+
+ /* Ensure number of pointers is allowed */
+ if (typ->size == 0 && n_ptrs < 1) {
+ tok_error(&ctx->tok, "type \"%.*s\" can only be used in a pointer\n", (int)typ->name_len, typ->name);
+ return false;
+ }
+
+ *typ_out = typ;
+ *n_ptrs_out = n_ptrs;
+ return true;
+}
+
+static bool
+parse_alias(struct parser *ctx, struct type *typ)
+{
+ struct type *base;
+
+ debug("Parsing type alias definition...\n");
+
+ typ->kind = TYK_ALIAS;
+
+ if (!parse_type_ref(ctx, &base, &typ->n_ptrs)) {
+ return false;
+ }
+
+ if (typ->n_ptrs >= 1) {
+ typ->size = sizeof(void*);
+ } else {
+ typ->size = base->size;
+ }
+
+ return true;
+}
+
+static bool
+parse_enum(struct parser *ctx, struct type *typ)
+{
+ struct enum_member *mem;
+ uint64_t value;
+
+ debug("Parsing enum definition...\n");
+
+ typ->kind = TYK_ENUM;
+
+ if (next_token(ctx)->kind != TK_LBRACE) {
+ tok_error(&ctx->tok, "expected \"{\" after \"enum\"\n");
+ return false;
+ }
+
+ typ->members.rows = malloc(HASHMAP_ROWS * sizeof(struct list));
+ typ->members.n_rows = HASHMAP_ROWS;
+ hashmap_init(&typ->members);
+
+ value = 0;
+ next_token(ctx);
+ while (ctx->tok.kind != TK_RBRACE) {
+ if (ctx->tok.kind != TK_IDENTIFIER) {
+ tok_error(&ctx->tok, "expected enum member name\n");
+ return true;
+ }
+
+ mem = malloc(sizeof(struct enum_member));
+ mem->hashmap_entry.hash = ctx->tok.hash;
+ mem->name = ctx->tok.pos;
+ mem->name_len = ctx->tok.len;
+ mem->value = value++;
+ hashmap_add(&typ->members, &mem->hashmap_entry);
+
+ if (next_token(ctx)->kind == TK_COMMA) {
+ next_token(ctx);
+ continue;
+ }
+
+ if (ctx->tok.kind != TK_RBRACE) {
+ tok_error(&ctx->tok, "expected \",\" or \"}\" after enum member name\n");
+ return true;
+ }
+ }
+
+ next_token(ctx);
+ return true;
+}
+
+static bool
+parse_struct(struct parser *ctx, struct type *typ)
+{
+ struct struct_member *mem;
+ uint64_t off;
+
+ debug("Parsing struct definition...\n");
+
+ typ->kind = TYK_STRUCT;
+
+ if (next_token(ctx)->kind != TK_LBRACE) {
+ tok_error(&ctx->tok, "expected \"{\" after \"struct\"\n");
+ return false;
+ }
+
+ typ->members.rows = malloc(HASHMAP_ROWS * sizeof(struct list));
+ typ->members.n_rows = HASHMAP_ROWS;
+ hashmap_init(&typ->members);
+
+ off = 0;
+ next_token(ctx);
+ while (ctx->tok.kind != TK_RBRACE) {
+ if (ctx->tok.kind != TK_IDENTIFIER) {
+ tok_error(&ctx->tok, "expected type name\n");
+ return true;
+ }
+
+ mem = malloc(sizeof(struct struct_member));
+ mem->hashmap_entry.hash = ctx->tok.hash;
+ mem->name = ctx->tok.pos;
+ mem->name_len = ctx->tok.len;
+ mem->off = off;
+
+ if (!parse_type_ref(ctx, &mem->typ, &mem->n_ptrs)) {
+ return true;
+ }
+
+ if (ctx->tok.kind != TK_IDENTIFIER) {
+ tok_error(&ctx->tok, "expected struct member name\n");
+ return true;
+ }
+ mem->name = ctx->tok.pos;
+ mem->name_len = ctx->tok.len;
+
+ if (mem->n_ptrs >= 1) {
+ mem->size = sizeof(void*);
+ } else {
+ mem->size = mem->typ->size;
+ }
+ off += mem->size;
+
+ hashmap_add(&typ->members, &mem->hashmap_entry);
+
+ if (next_token(ctx)->kind != TK_SEMICOLON) {
+ tok_error(&ctx->tok, "expected \";\" after struct member name\n");
+ return true;
+ }
+ next_token(ctx);
+ }
+
+ next_token(ctx);
+ return true;
+}
+
+void
+parse_type(struct parser *ctx)
+{
+ struct type *typ;
+ bool success;
+
+ debug("Parsing type definition...\n");
+
+ /* Type name */
+ if (next_token(ctx)->kind != TK_IDENTIFIER) {
+ tok_error(&ctx->tok, "expected identifier after \"type\"\n");
+ return;
+ }
+
+ /* Ensure type does not already exist */
+ typ = (struct type*)hashmap_find(ctx->types, ctx->tok.hash);
+ if (typ != NULL) {
+ tok_error(&ctx->tok, "type \"%.*s\" already defined\n", (int)ctx->tok.len, ctx->tok.pos);
+ return;
+ }
+
+ /* Create type */
+ typ = type_new(&ctx->tok);
+
+ if (next_token(ctx)->kind != TK_COLON) {
+ tok_error(&ctx->tok, "expected \":\" after type name\n");
+ free(typ);
+ return;
+ }
+
+ next_token(ctx);
+ if (ctx->tok.kind == TK_IDENTIFIER) {
+ success = parse_alias(ctx, typ);
+ } else if (ctx->tok.kind == TK_ENUM) {
+ success = parse_enum(ctx, typ);
+ } else if (ctx->tok.kind == TK_STRUCT) {
+ success = parse_struct(ctx, typ);
+ } else {
+ tok_error(&ctx->tok, "expected type name or \"enum\" after \":\"\n");
+ free(typ);
+ return;
+ }
+
+ if (!success) {
+ free(typ);
+ return;
+ }
+
+ if (ctx->tok.kind != TK_SEMICOLON) {
+ tok_error(&ctx->tok, "expected \";\" after type definition\n");
+ free(typ);
+ return;
+ }
+ next_token(ctx);
+
+ /* Add type to parser's registry */
+ hashmap_add(ctx->types, &typ->hashmap_entry);
+}