summaryrefslogtreecommitdiff
path: root/compiler
diff options
context:
space:
mode:
authorIan Moffett <ian@osmora.org>2024-11-01 23:46:08 -0400
committerIan Moffett <ian@osmora.org>2024-11-01 23:46:08 -0400
commita515dfb3b8f8e999362db7a6b52b3104c03b750a (patch)
treed0180f0cbc39d9c3e367af30791ad774e4d419ff /compiler
Import quark sources
Signed-off-by: Ian Moffett <ian@osmora.org>
Diffstat (limited to 'compiler')
-rw-r--r--compiler/debug.c20
-rw-r--r--compiler/hash.c35
-rw-r--r--compiler/hashmap.c39
-rw-r--r--compiler/lexer/char_info.c106
-rw-r--r--compiler/lexer/keywords.c73
-rw-r--r--compiler/lexer/lexer.c282
-rw-r--r--compiler/main.c310
-rw-r--r--compiler/parser/parser.c84
-rw-r--r--compiler/parser/type.c254
9 files changed, 1203 insertions, 0 deletions
diff --git a/compiler/debug.c b/compiler/debug.c
new file mode 100644
index 0000000..d14bca6
--- /dev/null
+++ b/compiler/debug.c
@@ -0,0 +1,20 @@
+/*
+ * Logging utilities.
+ * Copyright (c) 2023-2024, Quinn Stephens and the OSMORA team.
+ * Provided under the BSD 3-Clause license.
+ */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "debug.h"
+
+void
+__debug(const char *fmt, ...)
+{
+ va_list ap;
+
+ printf("\033[90mdebug:\033[0m ");
+ va_start(ap, fmt);
+ vprintf(fmt, ap);
+ va_end(ap);
+}
diff --git a/compiler/hash.c b/compiler/hash.c
new file mode 100644
index 0000000..a63691c
--- /dev/null
+++ b/compiler/hash.c
@@ -0,0 +1,35 @@
+/*
+ * 32-bit FNV-1a hash function.
+ * Copyright (c) 2023-2024, Quinn Stephens and the OSMORA team.
+ * Provided under the BSD 3-Clause license.
+ */
+
+#include "hash.h"
+
+hash_t
+hash_data(const void *data, size_t length)
+{
+ hash_t hash;
+
+ hash = FNV_OFFSET_BASIS;
+ for (size_t i = 0; i < length; i++) {
+ hash ^= ((uint8_t*)data)[i];
+ hash *= FNV_PRIME;
+ }
+
+ return hash;
+}
+
+hash_t
+hash_string(const char *str)
+{
+ hash_t hash;
+
+ hash = FNV_OFFSET_BASIS;
+ while (*str) {
+ hash ^= (uint8_t)*str++;
+ hash *= FNV_PRIME;
+ }
+
+ return hash;
+}
diff --git a/compiler/hashmap.c b/compiler/hashmap.c
new file mode 100644
index 0000000..0fb2e14
--- /dev/null
+++ b/compiler/hashmap.c
@@ -0,0 +1,39 @@
+/*
+ * Tiny hashmap implementation.
+ * Copyright (c) 2023-2024, Quinn Stephens and the OSMORA team.
+ * Provided under the BSD 3-Clause license.
+ */
+
+#include <stddef.h>
+#include "hashmap.h"
+
+void
+hashmap_add(struct hashmap *map, struct hashmap_entry *entry)
+{
+ list_prepend(&map->rows[entry->hash % map->n_rows], &entry->list_entry);
+}
+
+struct hashmap_entry *
+hashmap_find(struct hashmap *map, hash_t hash)
+{
+ struct hashmap_entry *head, *entry;
+
+ entry = head = (struct hashmap_entry*)(map->rows[hash % map->n_rows].head);
+ do {
+ if (entry->hash == hash) {
+ return entry;
+ }
+
+ entry = (struct hashmap_entry*)(entry->list_entry.next);
+ } while (entry != head);
+
+ return NULL;
+}
+
+void
+hashmap_init(struct hashmap *map)
+{
+ for (size_t r = 0; r < map->n_rows; r++) {
+ list_init(&map->rows[r]);
+ }
+}
diff --git a/compiler/lexer/char_info.c b/compiler/lexer/char_info.c
new file mode 100644
index 0000000..97ecb7d
--- /dev/null
+++ b/compiler/lexer/char_info.c
@@ -0,0 +1,106 @@
+/*
+ * Character info table.
+ * Copyright (c) 2023-2024, Quinn Stephens and the OSMORA team.
+ * Provided under the BSD 3-Clause license.
+ */
+
+#include "lexer/char_info.h"
+
+uint16_t char_info[256] = {
+ /*
+ NUL SOH STX ETX
+ EOT ENQ ACK BEL
+ */
+ 0 , 0 , 0 , 0 ,
+ 0 , 0 , 0 , 0 ,
+ /*
+ BS TAB LF VT
+ FF CR SO SI
+ */
+ 0 , CHAR_HORZ_WS, CHAR_VERT_WS, CHAR_VERT_WS,
+ CHAR_VERT_WS, CHAR_HORZ_WS, 0 , 0 ,
+ /*
+ DLE DC1 DC2 DC3
+ DC4 NAK SYN ETB
+ */
+ 0 , 0 , 0 , 0 ,
+ 0 , 0 , 0 , 0 ,
+ /*
+ CAN EM SUB ESC
+ FS GS RS US
+ */
+ 0 , 0 , 0 , 0 ,
+ 0 , 0 , 0 , 0 ,
+ /*
+ ! " #
+ $ % & '
+ */
+ CHAR_HORZ_WS, CHAR_EXCLAIM, 0 , 0 ,
+ 0 , CHAR_PERCENT, CHAR_AMPER , 0 ,
+ /*
+ ( ) * +
+ , - . /
+ */
+ CHAR_LPAREN , CHAR_RPAREN , CHAR_STAR , CHAR_PLUS ,
+ CHAR_COMMA , CHAR_MINUS , CHAR_DOT , CHAR_SLASH ,
+ /*
+ 0 1 2 3
+ 4 5 6 7
+ */
+ CHAR_DIGIT , CHAR_DIGIT , CHAR_DIGIT , CHAR_DIGIT ,
+ CHAR_DIGIT , CHAR_DIGIT , CHAR_DIGIT , CHAR_DIGIT ,
+ /*
+ 8 9 : ;
+ < = > ?
+ */
+ CHAR_DIGIT , CHAR_DIGIT , CHAR_COLON , CHAR_SEMI ,
+ CHAR_LESS , CHAR_EQUALS , CHAR_GREATER, 0 ,
+ /*
+ @ A B C
+ D E F G
+ */
+ 0 , CHAR_XUPPER , CHAR_XUPPER , CHAR_XUPPER ,
+ CHAR_XUPPER , CHAR_XUPPER , CHAR_XUPPER , CHAR_UPPER ,
+ /*
+ H I J K
+ L M N O
+ */
+ CHAR_UPPER , CHAR_UPPER , CHAR_UPPER , CHAR_UPPER ,
+ CHAR_UPPER , CHAR_UPPER , CHAR_UPPER , CHAR_UPPER ,
+ /*
+ P Q R S
+ T U V W
+ */
+ CHAR_UPPER , CHAR_UPPER , CHAR_UPPER , CHAR_UPPER ,
+ CHAR_UPPER , CHAR_UPPER , CHAR_UPPER , CHAR_UPPER ,
+ /*
+ X Y Z [
+ \ ] ^ _
+ */
+ CHAR_UPPER , CHAR_UPPER , CHAR_UPPER , CHAR_LBRACK ,
+ 0 , CHAR_RBRACK , CHAR_CARET , 0 ,
+ /*
+ ` a b c
+ d e f g
+ */
+ 0 , CHAR_XLOWER , CHAR_XLOWER , CHAR_XLOWER ,
+ CHAR_XLOWER , CHAR_XLOWER , CHAR_XLOWER , CHAR_LOWER ,
+ /*
+ h i j k
+ l m n o
+ */
+ CHAR_LOWER , CHAR_LOWER , CHAR_LOWER , CHAR_LOWER ,
+ CHAR_LOWER , CHAR_LOWER , CHAR_LOWER , CHAR_LOWER ,
+ /*
+ p q r s
+ t u v w
+ */
+ CHAR_LOWER , CHAR_LOWER , CHAR_LOWER , CHAR_LOWER ,
+ CHAR_LOWER , CHAR_LOWER , CHAR_LOWER , CHAR_LOWER ,
+ /*
+ x y z {
+ | } ~
+ */
+ CHAR_LOWER , CHAR_LOWER , CHAR_LOWER , CHAR_LBRACE ,
+ CHAR_PIPE , CHAR_RBRACE , CHAR_TILDE , 0 ,
+};
diff --git a/compiler/lexer/keywords.c b/compiler/lexer/keywords.c
new file mode 100644
index 0000000..0e95048
--- /dev/null
+++ b/compiler/lexer/keywords.c
@@ -0,0 +1,73 @@
+/*
+ * Keyword hashmap.
+ * Copyright (c) 2023-2024, Quinn Stephens and the OSMORA team.
+ * Provided under the BSD 3-Clause license.
+ */
+
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include "hash.h"
+#include "hashmap.h"
+#include "debug.h"
+#include "lexer/keywords.h"
+
+#define HASHMAP_ROWS 8
+
+struct keyword {
+ struct hashmap_entry hashmap_entry;
+ size_t len;
+ token_kind_t value;
+};
+
+static bool initialized = false;
+static struct list keywords_rows[HASHMAP_ROWS];
+static struct hashmap keywords;
+
+static void
+add_keyword(char *name, token_kind_t value)
+{
+ size_t len;
+ struct keyword *kwd;
+
+ len = strlen(name);
+ kwd = malloc(sizeof(struct keyword));
+ kwd->hashmap_entry.hash = hash_data(name, len);
+ kwd->len = len;
+ kwd->value = value;
+
+ hashmap_add(&keywords, &kwd->hashmap_entry);
+}
+
+token_kind_t
+keywords_find(struct token *tok)
+{
+ struct keyword *kwd;
+
+ kwd = (struct keyword*)hashmap_find(&keywords, tok->hash);
+ if (kwd == NULL || kwd->len != tok->len) {
+ return TK_UNKNOWN;
+ }
+
+ return kwd->value;
+}
+
+void
+keywords_init(void)
+{
+ if (initialized) {
+ return;
+ }
+
+ debug("Initializing keywords...\n");
+
+ keywords.rows = keywords_rows;
+ keywords.n_rows = HASHMAP_ROWS;
+ hashmap_init(&keywords);
+
+ add_keyword("type", TK_TYPE);
+ add_keyword("enum", TK_ENUM);
+ add_keyword("struct", TK_STRUCT);
+
+ initialized = true;
+}
diff --git a/compiler/lexer/lexer.c b/compiler/lexer/lexer.c
new file mode 100644
index 0000000..06f7e89
--- /dev/null
+++ b/compiler/lexer/lexer.c
@@ -0,0 +1,282 @@
+/*
+ * Quark lexer (lexical analyzer).
+ * Turns source code into tokens.
+ * Copyright (c) 2023-2024, Quinn Stephens and the OSMORA team.
+ * Provided under the BSD 3-Clause license.
+ */
+
+#include "debug.h"
+#include "lexer.h"
+#include "lexer/char_info.h"
+#include "lexer/keywords.h"
+
+static void
+skip_ignored(struct lexer *ctx)
+{
+ while (char_info[(int)*ctx->pos] & CHAR_WHITESPACE) {
+ if (char_info[(int)*ctx->pos] & CHAR_VERT_WS) {
+ ctx->line++;
+ ctx->line_start = ctx->pos + 1;
+ }
+
+ ctx->pos++;
+ }
+}
+
+static void
+parse_num_hex(struct lexer *ctx, struct token *tok)
+{
+ ctx->pos += 2;
+ tok->value = 0;
+ while (char_info[(int)*ctx->pos] & CHAR_HEX) {
+ if (char_info[(int)*ctx->pos] == CHAR_XLOWER) {
+ tok->value |= *ctx->pos++ - 'a' + 0xa;
+ } else if (char_info[(int)*ctx->pos] == CHAR_XUPPER) {
+ tok->value |= *ctx->pos++ - 'A' + 0xa;
+ } else {
+ tok->value |= *ctx->pos++ - '0';
+ }
+ }
+}
+
+static void
+parse_num_dec(struct lexer *ctx, struct token *tok)
+{
+ ctx->pos++;
+ tok->value = *ctx->pos - '0';
+ while (char_info[(int)*ctx->pos] & CHAR_DIGIT) {
+ tok->value *= 10;
+ tok->value += *ctx->pos++ - '0';
+ }
+}
+
+static void
+parse_num_bin(struct lexer *ctx, struct token *tok)
+{
+ ctx->pos += 2;
+ tok->value = 0;
+ while (*ctx->pos == '0' || *ctx->pos == '1') {
+ tok->value <<= 1;
+ tok->value |= *ctx->pos++ - '0';
+ }
+}
+
+static void
+lex_ident(struct lexer *ctx, struct token *tok)
+{
+ /* Find end of identifier */
+ ctx->pos++;
+ while (char_info[(int)*ctx->pos] & CHAR_ALNUM || *ctx->pos == '_') {
+ ctx->pos++;
+ }
+
+ tok->len = (size_t)(ctx->pos - tok->pos);
+ tok->hash = hash_data(tok->pos, tok->len);
+
+ /* Determine if this is a keyword or just an identifier */
+ tok->kind = keywords_find(tok);
+ if (tok->kind == TK_UNKNOWN) {
+ tok->kind = TK_IDENTIFIER;
+ return;
+ }
+}
+
+static void
+lex_oper(struct lexer *ctx, struct token *tok)
+{
+ tok->len = 1;
+
+ switch (*ctx->pos) {
+ case '+':
+ if (ctx->pos[1] == '+') {
+ tok->kind = TK_PLUS_PLUS;
+ tok->len = 2;
+ } else if (ctx->pos[1] == '=') {
+ tok->kind = TK_PLUS_EQUALS;
+ tok->len = 2;
+ } else {
+ tok->kind = TK_PLUS;
+ }
+
+ break;
+ case '-':
+ if (ctx->pos[1] == '>') {
+ tok->kind = TK_ARROW;
+ tok->len = 2;
+ } if (ctx->pos[1] == '-') {
+ tok->kind = TK_MINUS_MINUS;
+ tok->len = 2;
+ } else if (ctx->pos[1] == '=') {
+ tok->kind = TK_MINUS_EQUALS;
+ tok->len = 2;
+ } else {
+ tok->kind = TK_MINUS;
+ }
+
+ break;
+ case '<':
+ if (ctx->pos[1] == '=') {
+ tok->kind = TK_LESS_THAN_EQUALS;
+ tok->len = 2;
+ } else if (ctx->pos[1] == '<') {
+ if (ctx->pos[2] == '=') {
+ tok->kind = TK_SHIFT_LEFT_EQUALS;
+ tok->len = 3;
+ } else {
+ tok->kind = TK_SHIFT_LEFT;
+ tok->len = 2;
+ }
+ } else {
+ tok->kind = TK_LESS_THAN;
+ }
+
+ break;
+ case '>':
+ if (ctx->pos[1] == '=') {
+ tok->kind = TK_GREATER_THAN_EQUALS;
+ tok->len = 2;
+ } else if (ctx->pos[1] == '>') {
+ if (ctx->pos[2] == '=') {
+ tok->kind = TK_SHIFT_RIGHT_EQUALS;
+ tok->len = 3;
+ } else {
+ tok->kind = TK_SHIFT_RIGHT;
+ tok->len = 2;
+ }
+ } else {
+ tok->kind = TK_GREATER_THAN;
+ }
+
+ break;
+ default:
+ tok->kind = char_info[(int)*ctx->pos] >> CHAR_OPER_SHIFT;
+ if (ctx->pos[1] == '=') {
+ tok->kind++;
+ tok->len = 2;
+ }
+
+ break;
+ }
+
+ ctx->pos += tok->len;
+}
+
+static void
+lex_str(struct lexer *ctx, struct token *tok)
+{
+ /* Find end of string */
+ ctx->pos++;
+ while (*ctx->pos != '"') {
+ if (*ctx->pos == '\\' && ctx->pos[1] == '\"') {
+ ctx->pos++;
+ }
+
+ ctx->pos++;
+ }
+ ctx->pos++;
+
+ tok->kind = TK_STRING;
+ tok->len = (size_t)(ctx->pos - tok->pos) - 1;
+}
+
+static void
+lex_char(struct lexer *ctx, struct token *tok)
+{
+ /* Find end of character */
+ ctx->pos++;
+ while (*ctx->pos != '\'') {
+ if (*ctx->pos == '\\' && ctx->pos[1] == '\'') {
+ ctx->pos++;
+ }
+
+ ctx->pos++;
+ }
+ ctx->pos++;
+
+ tok->kind = TK_CHARACTER;
+ tok->len = (size_t)(ctx->pos - tok->pos) - 1;
+}
+
+void
+lexer_next(struct lexer *ctx, struct token *tok)
+{
+ if (tok == NULL) {
+ return;
+ }
+
+ if (ctx == NULL) {
+ tok->kind = TK_UNKNOWN;
+ return;
+ }
+
+ skip_ignored(ctx);
+
+ /* Initialize token */
+ tok->kind = TK_UNKNOWN;
+ tok->pos = ctx->pos;
+ tok->line = ctx->line;
+ tok->col = (int)(tok->pos - ctx->line_start) + 1;
+
+ if (char_info[(int)*ctx->pos] & CHAR_ALPHA || *ctx->pos == '_') {
+ lex_ident(ctx, tok);
+ return;
+ }
+
+ if (char_info[(int)*ctx->pos] & CHAR_SINGLE) {
+ tok->kind = char_info[(int)*ctx->pos] >> CHAR_SINGLE_SHIFT;
+ tok->len = 1;
+ ctx->pos++;
+ return;
+ }
+
+ if (char_info[(int)*ctx->pos] & CHAR_OPER) {
+ lex_oper(ctx, tok);
+ return;
+ }
+
+ if (char_info[(int)*ctx->pos] & CHAR_DIGIT) {
+ tok->kind = TK_NUMBER;
+
+ if (*ctx->pos == '0' && ctx->pos[1] == 'x') {
+ parse_num_hex(ctx, tok);
+ } else if (*ctx->pos == '0' && ctx->pos[1] == 'b') {
+ parse_num_bin(ctx, tok);
+ } else {
+ parse_num_dec(ctx, tok);
+ }
+
+ tok->len = (size_t)(ctx->pos - tok->pos);
+ return;
+ }
+
+ if (*ctx->pos == '"') {
+ lex_str(ctx, tok);
+ return;
+ }
+
+ if (*ctx->pos == '\'') {
+ lex_char(ctx, tok);
+ return;
+ }
+
+ if (*ctx->pos == '\0') {
+ tok->kind = TK_EOF;
+ return;
+ }
+}
+
+void
+lexer_init(struct lexer *ctx, char *source)
+{
+ if (ctx == NULL || source == NULL) {
+ return;
+ }
+
+ debug("Initializing lexer...\n");
+
+ ctx->pos = source;
+ ctx->line_start = ctx->pos;
+ ctx->line = 1;
+
+ keywords_init();
+}
diff --git a/compiler/main.c b/compiler/main.c
new file mode 100644
index 0000000..da261bc
--- /dev/null
+++ b/compiler/main.c
@@ -0,0 +1,310 @@
+/*
+ * Compiler command-line interface.
+ * Copyright (c) 2023-2024, Quinn Stephens and the OSMORA team.
+ * Provided under the BSD 3-Clause license.
+ */
+
+#include <errno.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "debug.h"
+#include "hashmap.h"
+#include "parser/type.h"
+#include "parser.h"
+
+#define HASHMAP_ROWS 16
+
+#define OID_O 0x00
+
+struct option {
+ char *name;
+ char *value;
+};
+
+static char *argv0;
+static char *in_fname;
+static struct option options[] = {
+ [OID_O] = { "o", NULL }
+};
+
+__attribute__((noreturn)) static void
+cmd_fatal(const char *fmt, ...)
+{
+ va_list ap;
+
+ fprintf(stderr, "\033[1m%s: \033[31mfatal error:\033[0m ", argv0);
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+
+ fprintf(stderr, "compilation terminated.\n");
+ exit(EXIT_FAILURE);
+}
+
+static void
+cmd_error(const char *fmt, ...)
+{
+ va_list ap;
+
+ fprintf(stderr, "\033[1m%s: \033[31merror:\033[0m ", argv0);
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+}
+
+static char *
+load_text_file(char *filename)
+{
+ FILE *fp;
+ size_t size;
+ char *buf;
+
+ fp = fopen(filename, "rb");
+ if (fp == NULL) {
+ cmd_error("%s: %s\n", filename, strerror(errno));
+ return NULL;
+ }
+
+ /* Get file size */
+ fseek(fp, 0, SEEK_END);
+ size = (size_t)ftell(fp);
+ fseek(fp, 0, SEEK_SET);
+ if (size < sizeof(char)) {
+ cmd_error("%s: empty file\n", filename);
+ fclose(fp);
+ return NULL;
+ }
+
+ /* Read entire file */
+ buf = malloc(size + sizeof(char));
+ if (fread(buf, 1, size, fp) != size) {
+ cmd_error("%s: %s\n", filename, strerror(errno));
+ free(buf);
+ fclose(fp);
+ return NULL;
+ }
+
+ fclose(fp);
+
+ /* Terminate string */
+ buf[size] = '\0';
+
+ return buf;
+}
+
+static struct option *
+find_option(char *name)
+{
+ for (int o = 0; o < (int)(sizeof(options) / sizeof(struct option)); o++) {
+ if (strcmp(options[o].name, name) == 0) {
+ return &options[0];
+ }
+ }
+
+ return NULL;
+}
+
+static void
+set_out_fname(void)
+{
+ size_t len;
+ char *buf;
+ char *dot;
+
+ /* Skip if set already */
+ if (options[OID_O].value != NULL) {
+ return;
+ }
+
+ len = strlen(in_fname);
+ buf = malloc(len + 4);
+ strcpy(buf, in_fname);
+
+ /* Append extension after last '.' or at end */
+ dot = strrchr(buf, '.');
+ if (dot == NULL) {
+ strcpy(buf + len, ".txt");
+ } else {
+ strcpy(buf + (dot - buf), ".txt");
+ }
+
+ options[OID_O].value = buf;
+}
+
+static void
+parse_args(int argc, char **argv)
+{
+ struct option *opt;
+
+ in_fname = NULL;
+ for (int a = 1; a < argc; a++) {
+ /* Set input filename */
+ if (argv[a][0] != '-') {
+ if (in_fname != NULL) {
+ cmd_error("multiple input files specified\n");
+ } else {
+ in_fname = argv[a];
+ }
+
+ continue;
+ }
+
+ /* Identify option */
+ opt = find_option(argv[a] + 1);
+ if (opt == NULL) {
+ cmd_error("unrecognized command-line option '%s'\n", argv[a]);
+ continue;
+ }
+
+ /* Set option value */
+ a++;
+ if (a >= argc) {
+ cmd_error("missing value after '%s'\n", argv[a - 1]);
+ }
+ opt->value = argv[a];
+ }
+
+ if (in_fname == NULL) {
+ cmd_fatal("no input files\n");
+ }
+
+ set_out_fname();
+}
+
+static void
+print_alias(struct type *typ)
+{
+ printf("alias (%lu bytes, %d pointer(s))", typ->size, typ->n_ptrs);
+}
+
+static void
+print_enum(struct type *typ)
+{
+ struct enum_member *mem;
+
+ printf("enum {\n");
+
+ for (size_t r = 0; r < typ->members.n_rows; r++) {
+ mem = (struct enum_member*)typ->members.rows[r].head;
+ if (mem == (struct enum_member*)&typ->members.rows[r]) {
+ continue;
+ }
+
+ do {
+ printf(" %.*s,\n", (int)mem->name_len, mem->name);
+
+ mem = (struct enum_member*)mem->hashmap_entry.list_entry.next;
+ } while (mem != (struct enum_member*)&typ->members.rows[r]);
+ }
+
+ printf("}");
+}
+
+static void
+print_struct(struct type *typ)
+{
+ struct struct_member *mem;
+
+ printf("struct {\n");
+
+ for (size_t r = 0; r < typ->members.n_rows; r++) {
+ mem = (struct struct_member*)typ->members.rows[r].head;
+ if (mem == (struct struct_member*)&typ->members.rows[r]) {
+ continue;
+ }
+
+ do {
+ printf(" %.*s %.*s; (%d pointer(s))\n", (int)mem->typ->name_len, mem->typ->name, (int)mem->name_len, mem->name, mem->n_ptrs);
+
+ mem = (struct struct_member*)mem->hashmap_entry.list_entry.next;
+ } while (mem != (struct struct_member*)&typ->members.rows[r]);
+ }
+
+ printf("}");
+}
+
+static void
+print_type(struct type *typ)
+{
+ printf("type %.*s: ", (int)typ->name_len, typ->name);
+
+ switch (typ->kind) {
+ case TYK_ALIAS:
+ print_alias(typ);
+ break;
+ case TYK_ENUM:
+ print_enum(typ);
+ break;
+ case TYK_STRUCT:
+ print_struct(typ);
+ break;
+ default:
+ printf("(unknown)");
+ break;
+ }
+
+ printf(";\n");
+
+}
+
+int
+main(int argc, char **argv)
+{
+ char *input;
+ struct parser parser;
+ struct list *types_rows, *procs_rows;
+ struct hashmap types, procs;
+ struct type *typ;
+
+ argv0 = argv[0];
+ if (argc < 2) {
+ cmd_fatal("no input files\n");
+ exit(EXIT_FAILURE);
+ }
+
+ /* Parse command-line arguments */
+ parse_args(argc, argv);
+
+ /* Load input file */
+ input = load_text_file(in_fname);
+ if (input == NULL) {
+ exit(EXIT_FAILURE);
+ }
+
+ /* Initialize hashmaps */
+ types_rows = malloc(HASHMAP_ROWS * sizeof(struct list));
+ types.rows = types_rows;
+ types.n_rows = HASHMAP_ROWS;
+ hashmap_init(&types);
+ procs_rows = malloc(HASHMAP_ROWS * sizeof(struct list));
+ procs.rows = procs_rows;
+ procs.n_rows = HASHMAP_ROWS;
+ hashmap_init(&procs);
+
+ /* Parse input */
+ parser.tok.fname = in_fname;
+ parser.types = &types;
+ parser.procs = &procs;
+ parser_init(&parser, input);
+ parser_parse(&parser);
+
+ /* Print parsed types */
+ for (size_t r = 0; r < types.n_rows; r++) {
+ typ = (struct type*)types.rows[r].head;
+ if (typ == (struct type*)&types.rows[r]) {
+ continue;
+ }
+
+ do {
+ print_type(typ);
+ typ = (struct type*)typ->hashmap_entry.list_entry.next;
+ } while (typ != (struct type*)&types.rows[r]);
+ }
+
+ free(procs_rows);
+ free(types_rows);
+ free(input);
+ return 0;
+}
diff --git a/compiler/parser/parser.c b/compiler/parser/parser.c
new file mode 100644
index 0000000..aeec48b
--- /dev/null
+++ b/compiler/parser/parser.c
@@ -0,0 +1,84 @@
+/*
+ * Quark parser.
+ * Turns tokens into an AST (Abstract Syntax Tree).
+ * Copyright (c) 2023-2024, Quinn Stephens and the OSMORA team.
+ * Provided under the BSD 3-Clause license.
+ */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "debug.h"
+#include "hashmap.h"
+#include "parser/type.h"
+#include "parser.h"
+
+static void
+add_builtin(struct hashmap *map, char *name, size_t size, int n_ptrs)
+{
+ struct type *type;
+ size_t name_len;
+
+ name_len = strlen(name);
+
+ type = malloc(sizeof(struct type));
+ type->hashmap_entry.hash = hash_data(name, name_len);
+ type->name = name;
+ type->name_len = name_len;
+ type->size = size;
+ type->n_ptrs = n_ptrs;
+
+ hashmap_add(map, &type->hashmap_entry);
+}
+
+void
+tok_error(struct token *tok, const char *fmt, ...)
+{
+ va_list ap;
+
+ fprintf(stderr, "\033[1m%s:%d:%d: \033[31merror:\033[0m ", tok->fname, tok->line, tok->col);
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+}
+
+void
+tok_warn(struct token *tok, const char *fmt, ...)
+{
+ va_list ap;
+
+ printf("\033[1m%s:%d:%d: \033[33mwarning:\033[0m ", tok->fname, tok->line, tok->col);
+ va_start(ap, fmt);
+ vprintf(fmt, ap);
+ va_end(ap);
+}
+
+void
+parser_parse(struct parser *ctx)
+{
+ debug("Parsing...\n");
+
+ next_token(ctx);
+ while (ctx->tok.kind != TK_EOF) {
+ switch (ctx->tok.kind) {
+ case TK_TYPE:
+ parse_type(ctx);
+ break;
+ default:
+ tok_error(&ctx->tok, "unexpected \"%.*s\"\n", (int)ctx->tok.len, ctx->tok.pos);
+ next_token(ctx);
+ break;
+ }
+ }
+}
+
+void
+parser_init(struct parser *ctx, char *source)
+{
+ debug("Initializing parser...\n");
+ lexer_init(&ctx->lexer, source);
+
+ add_builtin(ctx->types, "uint32", 4, 0);
+ add_builtin(ctx->types, "any", 0, 0);
+}
diff --git a/compiler/parser/type.c b/compiler/parser/type.c
new file mode 100644
index 0000000..4ac9327
--- /dev/null
+++ b/compiler/parser/type.c
@@ -0,0 +1,254 @@
+/*
+ * Type parser.
+ * Copyright (c) 2023-2024, Quinn Stephens and the OSMORA team.
+ * Provided under the BSD 3-Clause license.
+ */
+
+#include <stdbool.h>
+#include <stdlib.h>
+#include "debug.h"
+#include "hashmap.h"
+#include "lexer/token.h"
+#include "parser/type.h"
+#include "parser.h"
+
+#define HASHMAP_ROWS 8
+
+static struct type *
+type_new(struct token *tok)
+{
+ struct type *typ;
+
+ typ = malloc(sizeof(struct type));
+ typ->hashmap_entry.hash = tok->hash;
+ typ->name = tok->pos;
+ typ->name_len = tok->len;
+
+ return typ;
+}
+
+static bool
+parse_type_ref(struct parser *ctx, struct type **typ_out, int *n_ptrs_out)
+{
+ struct type *typ;
+ int n_ptrs;
+
+ debug("Parsing type reference...\n");
+
+ /* Find type */
+ typ = (struct type*)hashmap_find(ctx->types, ctx->tok.hash);
+ if (typ == NULL) {
+ tok_error(&ctx->tok, "type \"%.*s\" not found\n", (int)ctx->tok.len, ctx->tok.pos);
+ return false;
+ }
+
+ /* Find number of pointers */
+ n_ptrs = 0;
+ while (next_token(ctx)->kind == TK_STAR) {
+ n_ptrs++;
+ }
+
+ /* Ensure number of pointers is allowed */
+ if (typ->size == 0 && n_ptrs < 1) {
+ tok_error(&ctx->tok, "type \"%.*s\" can only be used in a pointer\n", (int)typ->name_len, typ->name);
+ return false;
+ }
+
+ *typ_out = typ;
+ *n_ptrs_out = n_ptrs;
+ return true;
+}
+
+static bool
+parse_alias(struct parser *ctx, struct type *typ)
+{
+ struct type *base;
+
+ debug("Parsing type alias definition...\n");
+
+ typ->kind = TYK_ALIAS;
+
+ if (!parse_type_ref(ctx, &base, &typ->n_ptrs)) {
+ return false;
+ }
+
+ if (typ->n_ptrs >= 1) {
+ typ->size = sizeof(void*);
+ } else {
+ typ->size = base->size;
+ }
+
+ return true;
+}
+
+static bool
+parse_enum(struct parser *ctx, struct type *typ)
+{
+ struct enum_member *mem;
+ uint64_t value;
+
+ debug("Parsing enum definition...\n");
+
+ typ->kind = TYK_ENUM;
+
+ if (next_token(ctx)->kind != TK_LBRACE) {
+ tok_error(&ctx->tok, "expected \"{\" after \"enum\"\n");
+ return false;
+ }
+
+ typ->members.rows = malloc(HASHMAP_ROWS * sizeof(struct list));
+ typ->members.n_rows = HASHMAP_ROWS;
+ hashmap_init(&typ->members);
+
+ value = 0;
+ next_token(ctx);
+ while (ctx->tok.kind != TK_RBRACE) {
+ if (ctx->tok.kind != TK_IDENTIFIER) {
+ tok_error(&ctx->tok, "expected enum member name\n");
+ return true;
+ }
+
+ mem = malloc(sizeof(struct enum_member));
+ mem->hashmap_entry.hash = ctx->tok.hash;
+ mem->name = ctx->tok.pos;
+ mem->name_len = ctx->tok.len;
+ mem->value = value++;
+ hashmap_add(&typ->members, &mem->hashmap_entry);
+
+ if (next_token(ctx)->kind == TK_COMMA) {
+ next_token(ctx);
+ continue;
+ }
+
+ if (ctx->tok.kind != TK_RBRACE) {
+ tok_error(&ctx->tok, "expected \",\" or \"}\" after enum member name\n");
+ return true;
+ }
+ }
+
+ next_token(ctx);
+ return true;
+}
+
+static bool
+parse_struct(struct parser *ctx, struct type *typ)
+{
+ struct struct_member *mem;
+ uint64_t off;
+
+ debug("Parsing struct definition...\n");
+
+ typ->kind = TYK_STRUCT;
+
+ if (next_token(ctx)->kind != TK_LBRACE) {
+ tok_error(&ctx->tok, "expected \"{\" after \"struct\"\n");
+ return false;
+ }
+
+ typ->members.rows = malloc(HASHMAP_ROWS * sizeof(struct list));
+ typ->members.n_rows = HASHMAP_ROWS;
+ hashmap_init(&typ->members);
+
+ off = 0;
+ next_token(ctx);
+ while (ctx->tok.kind != TK_RBRACE) {
+ if (ctx->tok.kind != TK_IDENTIFIER) {
+ tok_error(&ctx->tok, "expected type name\n");
+ return true;
+ }
+
+ mem = malloc(sizeof(struct struct_member));
+ mem->hashmap_entry.hash = ctx->tok.hash;
+ mem->name = ctx->tok.pos;
+ mem->name_len = ctx->tok.len;
+ mem->off = off;
+
+ if (!parse_type_ref(ctx, &mem->typ, &mem->n_ptrs)) {
+ return true;
+ }
+
+ if (ctx->tok.kind != TK_IDENTIFIER) {
+ tok_error(&ctx->tok, "expected struct member name\n");
+ return true;
+ }
+ mem->name = ctx->tok.pos;
+ mem->name_len = ctx->tok.len;
+
+ if (mem->n_ptrs >= 1) {
+ mem->size = sizeof(void*);
+ } else {
+ mem->size = mem->typ->size;
+ }
+ off += mem->size;
+
+ hashmap_add(&typ->members, &mem->hashmap_entry);
+
+ if (next_token(ctx)->kind != TK_SEMICOLON) {
+ tok_error(&ctx->tok, "expected \";\" after struct member name\n");
+ return true;
+ }
+ next_token(ctx);
+ }
+
+ next_token(ctx);
+ return true;
+}
+
+void
+parse_type(struct parser *ctx)
+{
+ struct type *typ;
+ bool success;
+
+ debug("Parsing type definition...\n");
+
+ /* Type name */
+ if (next_token(ctx)->kind != TK_IDENTIFIER) {
+ tok_error(&ctx->tok, "expected identifier after \"type\"\n");
+ return;
+ }
+
+ /* Ensure type does not already exist */
+ typ = (struct type*)hashmap_find(ctx->types, ctx->tok.hash);
+ if (typ != NULL) {
+ tok_error(&ctx->tok, "type \"%.*s\" already defined\n", (int)ctx->tok.len, ctx->tok.pos);
+ return;
+ }
+
+ /* Create type */
+ typ = type_new(&ctx->tok);
+
+ if (next_token(ctx)->kind != TK_COLON) {
+ tok_error(&ctx->tok, "expected \":\" after type name\n");
+ free(typ);
+ return;
+ }
+
+ next_token(ctx);
+ if (ctx->tok.kind == TK_IDENTIFIER) {
+ success = parse_alias(ctx, typ);
+ } else if (ctx->tok.kind == TK_ENUM) {
+ success = parse_enum(ctx, typ);
+ } else if (ctx->tok.kind == TK_STRUCT) {
+ success = parse_struct(ctx, typ);
+ } else {
+ tok_error(&ctx->tok, "expected type name or \"enum\" after \":\"\n");
+ free(typ);
+ return;
+ }
+
+ if (!success) {
+ free(typ);
+ return;
+ }
+
+ if (ctx->tok.kind != TK_SEMICOLON) {
+ tok_error(&ctx->tok, "expected \";\" after type definition\n");
+ free(typ);
+ return;
+ }
+ next_token(ctx);
+
+ /* Add type to parser's registry */
+ hashmap_add(ctx->types, &typ->hashmap_entry);
+}