diff options
author | Ian Moffett <ian@osmora.org> | 2024-11-01 23:46:08 -0400 |
---|---|---|
committer | Ian Moffett <ian@osmora.org> | 2024-11-01 23:46:08 -0400 |
commit | a515dfb3b8f8e999362db7a6b52b3104c03b750a (patch) | |
tree | d0180f0cbc39d9c3e367af30791ad774e4d419ff |
Import quark sources
Signed-off-by: Ian Moffett <ian@osmora.org>
-rw-r--r-- | .gitattributes | 5 | ||||
-rw-r--r-- | .gitignore | 13 | ||||
-rw-r--r-- | LICENSE | 28 | ||||
-rw-r--r-- | Makefile | 32 | ||||
-rw-r--r-- | compiler/debug.c | 20 | ||||
-rw-r--r-- | compiler/hash.c | 35 | ||||
-rw-r--r-- | compiler/hashmap.c | 39 | ||||
-rw-r--r-- | compiler/lexer/char_info.c | 106 | ||||
-rw-r--r-- | compiler/lexer/keywords.c | 73 | ||||
-rw-r--r-- | compiler/lexer/lexer.c | 282 | ||||
-rw-r--r-- | compiler/main.c | 310 | ||||
-rw-r--r-- | compiler/parser/parser.c | 84 | ||||
-rw-r--r-- | compiler/parser/type.c | 254 | ||||
-rw-r--r-- | include/debug.h | 18 | ||||
-rw-r--r-- | include/hash.h | 21 | ||||
-rw-r--r-- | include/hashmap.h | 34 | ||||
-rw-r--r-- | include/lexer.h | 22 | ||||
-rw-r--r-- | include/lexer/char_info.h | 60 | ||||
-rw-r--r-- | include/lexer/keywords.h | 15 | ||||
-rw-r--r-- | include/lexer/token.h | 92 | ||||
-rw-r--r-- | include/list.h | 58 | ||||
-rw-r--r-- | include/parser.h | 34 | ||||
-rw-r--r-- | include/parser/ast.h | 20 | ||||
-rw-r--r-- | include/parser/type.h | 57 | ||||
-rw-r--r-- | test.c | 1 | ||||
-rw-r--r-- | test.q | 13 |
26 files changed, 1726 insertions, 0 deletions
diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..652ede7 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,5 @@ +# Git attributes. +# Copyright (c) 2023-2024, Quinn Stephens and the OSMORA team. +# Provided under the BSD 3-Clause license. + +* text=auto eol=lf diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7ec5f2c --- /dev/null +++ b/.gitignore @@ -0,0 +1,13 @@ +# Git-ignored paths. +# Copyright (c) 2023-2024, Quinn Stephens and the OSMORA team. +# Provided under the BSD 3-Clause license. + +quarkc +.vscode/ +.vs/ +*.a +*.d +*.o +*.so +*.elf +*.exe @@ -0,0 +1,28 @@ +BSD 3-Clause License + +Copyright (c) 2023-2024, Quinn Stephens and the OSMORA team. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..b758a0a --- /dev/null +++ b/Makefile @@ -0,0 +1,32 @@ +# Build configuration. +# Copyright (c) 2023-2024, Quinn Stephens and the OSMORA team. +# Provided under the BSD 3-Clause license. + +ENABLE_DEBUG = 1 +CC = clang +LD = ld.ldd + +EXENAME = quarkc +OFILES = $(addprefix compiler/,debug.o hash.o hashmap.o lexer/char_info.o lexer/keywords.o lexer/lexer.o parser/type.o parser/parser.o main.o) +CFLAGS = -Wall -Wextra -Iinclude +LDFLAGS = + +ifeq ($(ENABLE_DEBUG),1) +CFLAGS += -DENABLE_DEBUG +endif + +.PHONY: all +all: $(EXENAME) + +$(EXENAME): $(OFILES) + @echo Linking $@... + @$(CC) $^ $(LDFLAGS) -o $@ + +%.o: %.c + @echo Compiling $<... + @$(CC) -c $< $(CFLAGS) -o $@ + +.PHONY: clean +clean: + @echo Cleaning... + @rm -f $(OFILES) diff --git a/compiler/debug.c b/compiler/debug.c new file mode 100644 index 0000000..d14bca6 --- /dev/null +++ b/compiler/debug.c @@ -0,0 +1,20 @@ +/* + * Logging utilities. + * Copyright (c) 2023-2024, Quinn Stephens and the OSMORA team. + * Provided under the BSD 3-Clause license. + */ + +#include <stdarg.h> +#include <stdio.h> +#include "debug.h" + +void +__debug(const char *fmt, ...) +{ + va_list ap; + + printf("\033[90mdebug:\033[0m "); + va_start(ap, fmt); + vprintf(fmt, ap); + va_end(ap); +} diff --git a/compiler/hash.c b/compiler/hash.c new file mode 100644 index 0000000..a63691c --- /dev/null +++ b/compiler/hash.c @@ -0,0 +1,35 @@ +/* + * 32-bit FNV-1a hash function. + * Copyright (c) 2023-2024, Quinn Stephens and the OSMORA team. + * Provided under the BSD 3-Clause license. + */ + +#include "hash.h" + +hash_t +hash_data(const void *data, size_t length) +{ + hash_t hash; + + hash = FNV_OFFSET_BASIS; + for (size_t i = 0; i < length; i++) { + hash ^= ((uint8_t*)data)[i]; + hash *= FNV_PRIME; + } + + return hash; +} + +hash_t +hash_string(const char *str) +{ + hash_t hash; + + hash = FNV_OFFSET_BASIS; + while (*str) { + hash ^= (uint8_t)*str++; + hash *= FNV_PRIME; + } + + return hash; +} diff --git a/compiler/hashmap.c b/compiler/hashmap.c new file mode 100644 index 0000000..0fb2e14 --- /dev/null +++ b/compiler/hashmap.c @@ -0,0 +1,39 @@ +/* + * Tiny hashmap implementation. + * Copyright (c) 2023-2024, Quinn Stephens and the OSMORA team. + * Provided under the BSD 3-Clause license. + */ + +#include <stddef.h> +#include "hashmap.h" + +void +hashmap_add(struct hashmap *map, struct hashmap_entry *entry) +{ + list_prepend(&map->rows[entry->hash % map->n_rows], &entry->list_entry); +} + +struct hashmap_entry * +hashmap_find(struct hashmap *map, hash_t hash) +{ + struct hashmap_entry *head, *entry; + + entry = head = (struct hashmap_entry*)(map->rows[hash % map->n_rows].head); + do { + if (entry->hash == hash) { + return entry; + } + + entry = (struct hashmap_entry*)(entry->list_entry.next); + } while (entry != head); + + return NULL; +} + +void +hashmap_init(struct hashmap *map) +{ + for (size_t r = 0; r < map->n_rows; r++) { + list_init(&map->rows[r]); + } +} diff --git a/compiler/lexer/char_info.c b/compiler/lexer/char_info.c new file mode 100644 index 0000000..97ecb7d --- /dev/null +++ b/compiler/lexer/char_info.c @@ -0,0 +1,106 @@ +/* + * Character info table. + * Copyright (c) 2023-2024, Quinn Stephens and the OSMORA team. + * Provided under the BSD 3-Clause license. + */ + +#include "lexer/char_info.h" + +uint16_t char_info[256] = { + /* + NUL SOH STX ETX + EOT ENQ ACK BEL + */ + 0 , 0 , 0 , 0 , + 0 , 0 , 0 , 0 , + /* + BS TAB LF VT + FF CR SO SI + */ + 0 , CHAR_HORZ_WS, CHAR_VERT_WS, CHAR_VERT_WS, + CHAR_VERT_WS, CHAR_HORZ_WS, 0 , 0 , + /* + DLE DC1 DC2 DC3 + DC4 NAK SYN ETB + */ + 0 , 0 , 0 , 0 , + 0 , 0 , 0 , 0 , + /* + CAN EM SUB ESC + FS GS RS US + */ + 0 , 0 , 0 , 0 , + 0 , 0 , 0 , 0 , + /* + ! " # + $ % & ' + */ + CHAR_HORZ_WS, CHAR_EXCLAIM, 0 , 0 , + 0 , CHAR_PERCENT, CHAR_AMPER , 0 , + /* + ( ) * + + , - . / + */ + CHAR_LPAREN , CHAR_RPAREN , CHAR_STAR , CHAR_PLUS , + CHAR_COMMA , CHAR_MINUS , CHAR_DOT , CHAR_SLASH , + /* + 0 1 2 3 + 4 5 6 7 + */ + CHAR_DIGIT , CHAR_DIGIT , CHAR_DIGIT , CHAR_DIGIT , + CHAR_DIGIT , CHAR_DIGIT , CHAR_DIGIT , CHAR_DIGIT , + /* + 8 9 : ; + < = > ? + */ + CHAR_DIGIT , CHAR_DIGIT , CHAR_COLON , CHAR_SEMI , + CHAR_LESS , CHAR_EQUALS , CHAR_GREATER, 0 , + /* + @ A B C + D E F G + */ + 0 , CHAR_XUPPER , CHAR_XUPPER , CHAR_XUPPER , + CHAR_XUPPER , CHAR_XUPPER , CHAR_XUPPER , CHAR_UPPER , + /* + H I J K + L M N O + */ + CHAR_UPPER , CHAR_UPPER , CHAR_UPPER , CHAR_UPPER , + CHAR_UPPER , CHAR_UPPER , CHAR_UPPER , CHAR_UPPER , + /* + P Q R S + T U V W + */ + CHAR_UPPER , CHAR_UPPER , CHAR_UPPER , CHAR_UPPER , + CHAR_UPPER , CHAR_UPPER , CHAR_UPPER , CHAR_UPPER , + /* + X Y Z [ + \ ] ^ _ + */ + CHAR_UPPER , CHAR_UPPER , CHAR_UPPER , CHAR_LBRACK , + 0 , CHAR_RBRACK , CHAR_CARET , 0 , + /* + ` a b c + d e f g + */ + 0 , CHAR_XLOWER , CHAR_XLOWER , CHAR_XLOWER , + CHAR_XLOWER , CHAR_XLOWER , CHAR_XLOWER , CHAR_LOWER , + /* + h i j k + l m n o + */ + CHAR_LOWER , CHAR_LOWER , CHAR_LOWER , CHAR_LOWER , + CHAR_LOWER , CHAR_LOWER , CHAR_LOWER , CHAR_LOWER , + /* + p q r s + t u v w + */ + CHAR_LOWER , CHAR_LOWER , CHAR_LOWER , CHAR_LOWER , + CHAR_LOWER , CHAR_LOWER , CHAR_LOWER , CHAR_LOWER , + /* + x y z { + | } ~ + */ + CHAR_LOWER , CHAR_LOWER , CHAR_LOWER , CHAR_LBRACE , + CHAR_PIPE , CHAR_RBRACE , CHAR_TILDE , 0 , +}; diff --git a/compiler/lexer/keywords.c b/compiler/lexer/keywords.c new file mode 100644 index 0000000..0e95048 --- /dev/null +++ b/compiler/lexer/keywords.c @@ -0,0 +1,73 @@ +/* + * Keyword hashmap. + * Copyright (c) 2023-2024, Quinn Stephens and the OSMORA team. + * Provided under the BSD 3-Clause license. + */ + +#include <stdbool.h> +#include <stdlib.h> +#include <string.h> +#include "hash.h" +#include "hashmap.h" +#include "debug.h" +#include "lexer/keywords.h" + +#define HASHMAP_ROWS 8 + +struct keyword { + struct hashmap_entry hashmap_entry; + size_t len; + token_kind_t value; +}; + +static bool initialized = false; +static struct list keywords_rows[HASHMAP_ROWS]; +static struct hashmap keywords; + +static void +add_keyword(char *name, token_kind_t value) +{ + size_t len; + struct keyword *kwd; + + len = strlen(name); + kwd = malloc(sizeof(struct keyword)); + kwd->hashmap_entry.hash = hash_data(name, len); + kwd->len = len; + kwd->value = value; + + hashmap_add(&keywords, &kwd->hashmap_entry); +} + +token_kind_t +keywords_find(struct token *tok) +{ + struct keyword *kwd; + + kwd = (struct keyword*)hashmap_find(&keywords, tok->hash); + if (kwd == NULL || kwd->len != tok->len) { + return TK_UNKNOWN; + } + + return kwd->value; +} + +void +keywords_init(void) +{ + if (initialized) { + return; + } + + debug("Initializing keywords...\n"); + + keywords.rows = keywords_rows; + keywords.n_rows = HASHMAP_ROWS; + hashmap_init(&keywords); + + add_keyword("type", TK_TYPE); + add_keyword("enum", TK_ENUM); + add_keyword("struct", TK_STRUCT); + + initialized = true; +} diff --git a/compiler/lexer/lexer.c b/compiler/lexer/lexer.c new file mode 100644 index 0000000..06f7e89 --- /dev/null +++ b/compiler/lexer/lexer.c @@ -0,0 +1,282 @@ +/* + * Quark lexer (lexical analyzer). + * Turns source code into tokens. + * Copyright (c) 2023-2024, Quinn Stephens and the OSMORA team. + * Provided under the BSD 3-Clause license. + */ + +#include "debug.h" +#include "lexer.h" +#include "lexer/char_info.h" +#include "lexer/keywords.h" + +static void +skip_ignored(struct lexer *ctx) +{ + while (char_info[(int)*ctx->pos] & CHAR_WHITESPACE) { + if (char_info[(int)*ctx->pos] & CHAR_VERT_WS) { + ctx->line++; + ctx->line_start = ctx->pos + 1; + } + + ctx->pos++; + } +} + +static void +parse_num_hex(struct lexer *ctx, struct token *tok) +{ + ctx->pos += 2; + tok->value = 0; + while (char_info[(int)*ctx->pos] & CHAR_HEX) { + if (char_info[(int)*ctx->pos] == CHAR_XLOWER) { + tok->value |= *ctx->pos++ - 'a' + 0xa; + } else if (char_info[(int)*ctx->pos] == CHAR_XUPPER) { + tok->value |= *ctx->pos++ - 'A' + 0xa; + } else { + tok->value |= *ctx->pos++ - '0'; + } + } +} + +static void +parse_num_dec(struct lexer *ctx, struct token *tok) +{ + ctx->pos++; + tok->value = *ctx->pos - '0'; + while (char_info[(int)*ctx->pos] & CHAR_DIGIT) { + tok->value *= 10; + tok->value += *ctx->pos++ - '0'; + } +} + +static void +parse_num_bin(struct lexer *ctx, struct token *tok) +{ + ctx->pos += 2; + tok->value = 0; + while (*ctx->pos == '0' || *ctx->pos == '1') { + tok->value <<= 1; + tok->value |= *ctx->pos++ - '0'; + } +} + +static void +lex_ident(struct lexer *ctx, struct token *tok) +{ + /* Find end of identifier */ + ctx->pos++; + while (char_info[(int)*ctx->pos] & CHAR_ALNUM || *ctx->pos == '_') { + ctx->pos++; + } + + tok->len = (size_t)(ctx->pos - tok->pos); + tok->hash = hash_data(tok->pos, tok->len); + + /* Determine if this is a keyword or just an identifier */ + tok->kind = keywords_find(tok); + if (tok->kind == TK_UNKNOWN) { + tok->kind = TK_IDENTIFIER; + return; + } +} + +static void +lex_oper(struct lexer *ctx, struct token *tok) +{ + tok->len = 1; + + switch (*ctx->pos) { + case '+': + if (ctx->pos[1] == '+') { + tok->kind = TK_PLUS_PLUS; + tok->len = 2; + } else if (ctx->pos[1] == '=') { + tok->kind = TK_PLUS_EQUALS; + tok->len = 2; + } else { + tok->kind = TK_PLUS; + } + + break; + case '-': + if (ctx->pos[1] == '>') { + tok->kind = TK_ARROW; + tok->len = 2; + } if (ctx->pos[1] == '-') { + tok->kind = TK_MINUS_MINUS; + tok->len = 2; + } else if (ctx->pos[1] == '=') { + tok->kind = TK_MINUS_EQUALS; + tok->len = 2; + } else { + tok->kind = TK_MINUS; + } + + break; + case '<': + if (ctx->pos[1] == '=') { + tok->kind = TK_LESS_THAN_EQUALS; + tok->len = 2; + } else if (ctx->pos[1] == '<') { + if (ctx->pos[2] == '=') { + tok->kind = TK_SHIFT_LEFT_EQUALS; + tok->len = 3; + } else { + tok->kind = TK_SHIFT_LEFT; + tok->len = 2; + } + } else { + tok->kind = TK_LESS_THAN; + } + + break; + case '>': + if (ctx->pos[1] == '=') { + tok->kind = TK_GREATER_THAN_EQUALS; + tok->len = 2; + } else if (ctx->pos[1] == '>') { + if (ctx->pos[2] == '=') { + tok->kind = TK_SHIFT_RIGHT_EQUALS; + tok->len = 3; + } else { + tok->kind = TK_SHIFT_RIGHT; + tok->len = 2; + } + } else { + tok->kind = TK_GREATER_THAN; + } + + break; + default: + tok->kind = char_info[(int)*ctx->pos] >> CHAR_OPER_SHIFT; + if (ctx->pos[1] == '=') { + tok->kind++; + tok->len = 2; + } + + break; + } + + ctx->pos += tok->len; +} + +static void +lex_str(struct lexer *ctx, struct token *tok) +{ + /* Find end of string */ + ctx->pos++; + while (*ctx->pos != '"') { + if (*ctx->pos == '\\' && ctx->pos[1] == '\"') { + ctx->pos++; + } + + ctx->pos++; + } + ctx->pos++; + + tok->kind = TK_STRING; + tok->len = (size_t)(ctx->pos - tok->pos) - 1; +} + +static void +lex_char(struct lexer *ctx, struct token *tok) +{ + /* Find end of character */ + ctx->pos++; + while (*ctx->pos != '\'') { + if (*ctx->pos == '\\' && ctx->pos[1] == '\'') { + ctx->pos++; + } + + ctx->pos++; + } + ctx->pos++; + + tok->kind = TK_CHARACTER; + tok->len = (size_t)(ctx->pos - tok->pos) - 1; +} + +void +lexer_next(struct lexer *ctx, struct token *tok) +{ + if (tok == NULL) { + return; + } + + if (ctx == NULL) { + tok->kind = TK_UNKNOWN; + return; + } + + skip_ignored(ctx); + + /* Initialize token */ + tok->kind = TK_UNKNOWN; + tok->pos = ctx->pos; + tok->line = ctx->line; + tok->col = (int)(tok->pos - ctx->line_start) + 1; + + if (char_info[(int)*ctx->pos] & CHAR_ALPHA || *ctx->pos == '_') { + lex_ident(ctx, tok); + return; + } + + if (char_info[(int)*ctx->pos] & CHAR_SINGLE) { + tok->kind = char_info[(int)*ctx->pos] >> CHAR_SINGLE_SHIFT; + tok->len = 1; + ctx->pos++; + return; + } + + if (char_info[(int)*ctx->pos] & CHAR_OPER) { + lex_oper(ctx, tok); + return; + } + + if (char_info[(int)*ctx->pos] & CHAR_DIGIT) { + tok->kind = TK_NUMBER; + + if (*ctx->pos == '0' && ctx->pos[1] == 'x') { + parse_num_hex(ctx, tok); + } else if (*ctx->pos == '0' && ctx->pos[1] == 'b') { + parse_num_bin(ctx, tok); + } else { + parse_num_dec(ctx, tok); + } + + tok->len = (size_t)(ctx->pos - tok->pos); + return; + } + + if (*ctx->pos == '"') { + lex_str(ctx, tok); + return; + } + + if (*ctx->pos == '\'') { + lex_char(ctx, tok); + return; + } + + if (*ctx->pos == '\0') { + tok->kind = TK_EOF; + return; + } +} + +void +lexer_init(struct lexer *ctx, char *source) +{ + if (ctx == NULL || source == NULL) { + return; + } + + debug("Initializing lexer...\n"); + + ctx->pos = source; + ctx->line_start = ctx->pos; + ctx->line = 1; + + keywords_init(); +} diff --git a/compiler/main.c b/compiler/main.c new file mode 100644 index 0000000..da261bc --- /dev/null +++ b/compiler/main.c @@ -0,0 +1,310 @@ +/* + * Compiler command-line interface. + * Copyright (c) 2023-2024, Quinn Stephens and the OSMORA team. + * Provided under the BSD 3-Clause license. + */ + +#include <errno.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "debug.h" +#include "hashmap.h" +#include "parser/type.h" +#include "parser.h" + +#define HASHMAP_ROWS 16 + +#define OID_O 0x00 + +struct option { + char *name; + char *value; +}; + +static char *argv0; +static char *in_fname; +static struct option options[] = { + [OID_O] = { "o", NULL } +}; + +__attribute__((noreturn)) static void +cmd_fatal(const char *fmt, ...) +{ + va_list ap; + + fprintf(stderr, "\033[1m%s: \033[31mfatal error:\033[0m ", argv0); + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + + fprintf(stderr, "compilation terminated.\n"); + exit(EXIT_FAILURE); +} + +static void +cmd_error(const char *fmt, ...) +{ + va_list ap; + + fprintf(stderr, "\033[1m%s: \033[31merror:\033[0m ", argv0); + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); +} + +static char * +load_text_file(char *filename) +{ + FILE *fp; + size_t size; + char *buf; + + fp = fopen(filename, "rb"); + if (fp == NULL) { + cmd_error("%s: %s\n", filename, strerror(errno)); + return NULL; + } + + /* Get file size */ + fseek(fp, 0, SEEK_END); + size = (size_t)ftell(fp); + fseek(fp, 0, SEEK_SET); + if (size < sizeof(char)) { + cmd_error("%s: empty file\n", filename); + fclose(fp); + return NULL; + } + + /* Read entire file */ + buf = malloc(size + sizeof(char)); + if (fread(buf, 1, size, fp) != size) { + cmd_error("%s: %s\n", filename, strerror(errno)); + free(buf); + fclose(fp); + return NULL; + } + + fclose(fp); + + /* Terminate string */ + buf[size] = '\0'; + + return buf; +} + +static struct option * +find_option(char *name) +{ + for (int o = 0; o < (int)(sizeof(options) / sizeof(struct option)); o++) { + if (strcmp(options[o].name, name) == 0) { + return &options[0]; + } + } + + return NULL; +} + +static void +set_out_fname(void) +{ + size_t len; + char *buf; + char *dot; + + /* Skip if set already */ + if (options[OID_O].value != NULL) { + return; + } + + len = strlen(in_fname); + buf = malloc(len + 4); + strcpy(buf, in_fname); + + /* Append extension after last '.' or at end */ + dot = strrchr(buf, '.'); + if (dot == NULL) { + strcpy(buf + len, ".txt"); + } else { + strcpy(buf + (dot - buf), ".txt"); + } + + options[OID_O].value = buf; +} + +static void +parse_args(int argc, char **argv) +{ + struct option *opt; + + in_fname = NULL; + for (int a = 1; a < argc; a++) { + /* Set input filename */ + if (argv[a][0] != '-') { + if (in_fname != NULL) { + cmd_error("multiple input files specified\n"); + } else { + in_fname = argv[a]; + } + + continue; + } + + /* Identify option */ + opt = find_option(argv[a] + 1); + if (opt == NULL) { + cmd_error("unrecognized command-line option '%s'\n", argv[a]); + continue; + } + + /* Set option value */ + a++; + if (a >= argc) { + cmd_error("missing value after '%s'\n", argv[a - 1]); + } + opt->value = argv[a]; + } + + if (in_fname == NULL) { + cmd_fatal("no input files\n"); + } + + set_out_fname(); +} + +static void +print_alias(struct type *typ) +{ + printf("alias (%lu bytes, %d pointer(s))", typ->size, typ->n_ptrs); +} + +static void +print_enum(struct type *typ) +{ + struct enum_member *mem; + + printf("enum {\n"); + + for (size_t r = 0; r < typ->members.n_rows; r++) { + mem = (struct enum_member*)typ->members.rows[r].head; + if (mem == (struct enum_member*)&typ->members.rows[r]) { + continue; + } + + do { + printf(" %.*s,\n", (int)mem->name_len, mem->name); + + mem = (struct enum_member*)mem->hashmap_entry.list_entry.next; + } while (mem != (struct enum_member*)&typ->members.rows[r]); + } + + printf("}"); +} + +static void +print_struct(struct type *typ) +{ + struct struct_member *mem; + + printf("struct {\n"); + + for (size_t r = 0; r < typ->members.n_rows; r++) { + mem = (struct struct_member*)typ->members.rows[r].head; + if (mem == (struct struct_member*)&typ->members.rows[r]) { + continue; + } + + do { + printf(" %.*s %.*s; (%d pointer(s))\n", (int)mem->typ->name_len, mem->typ->name, (int)mem->name_len, mem->name, mem->n_ptrs); + + mem = (struct struct_member*)mem->hashmap_entry.list_entry.next; + } while (mem != (struct struct_member*)&typ->members.rows[r]); + } + + printf("}"); +} + +static void +print_type(struct type *typ) +{ + printf("type %.*s: ", (int)typ->name_len, typ->name); + + switch (typ->kind) { + case TYK_ALIAS: + print_alias(typ); + break; + case TYK_ENUM: + print_enum(typ); + break; + case TYK_STRUCT: + print_struct(typ); + break; + default: + printf("(unknown)"); + break; + } + + printf(";\n"); + +} + +int +main(int argc, char **argv) +{ + char *input; + struct parser parser; + struct list *types_rows, *procs_rows; + struct hashmap types, procs; + struct type *typ; + + argv0 = argv[0]; + if (argc < 2) { + cmd_fatal("no input files\n"); + exit(EXIT_FAILURE); + } + + /* Parse command-line arguments */ + parse_args(argc, argv); + + /* Load input file */ + input = load_text_file(in_fname); + if (input == NULL) { + exit(EXIT_FAILURE); + } + + /* Initialize hashmaps */ + types_rows = malloc(HASHMAP_ROWS * sizeof(struct list)); + types.rows = types_rows; + types.n_rows = HASHMAP_ROWS; + hashmap_init(&types); + procs_rows = malloc(HASHMAP_ROWS * sizeof(struct list)); + procs.rows = procs_rows; + procs.n_rows = HASHMAP_ROWS; + hashmap_init(&procs); + + /* Parse input */ + parser.tok.fname = in_fname; + parser.types = &types; + parser.procs = &procs; + parser_init(&parser, input); + parser_parse(&parser); + + /* Print parsed types */ + for (size_t r = 0; r < types.n_rows; r++) { + typ = (struct type*)types.rows[r].head; + if (typ == (struct type*)&types.rows[r]) { + continue; + } + + do { + print_type(typ); + typ = (struct type*)typ->hashmap_entry.list_entry.next; + } while (typ != (struct type*)&types.rows[r]); + } + + free(procs_rows); + free(types_rows); + free(input); + return 0; +} diff --git a/compiler/parser/parser.c b/compiler/parser/parser.c new file mode 100644 index 0000000..aeec48b --- /dev/null +++ b/compiler/parser/parser.c @@ -0,0 +1,84 @@ +/* + * Quark parser. + * Turns tokens into an AST (Abstract Syntax Tree). + * Copyright (c) 2023-2024, Quinn Stephens and the OSMORA team. + * Provided under the BSD 3-Clause license. + */ + +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "debug.h" +#include "hashmap.h" +#include "parser/type.h" +#include "parser.h" + +static void +add_builtin(struct hashmap *map, char *name, size_t size, int n_ptrs) +{ + struct type *type; + size_t name_len; + + name_len = strlen(name); + + type = malloc(sizeof(struct type)); + type->hashmap_entry.hash = hash_data(name, name_len); + type->name = name; + type->name_len = name_len; + type->size = size; + type->n_ptrs = n_ptrs; + + hashmap_add(map, &type->hashmap_entry); +} + +void +tok_error(struct token *tok, const char *fmt, ...) +{ + va_list ap; + + fprintf(stderr, "\033[1m%s:%d:%d: \033[31merror:\033[0m ", tok->fname, tok->line, tok->col); + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); +} + +void +tok_warn(struct token *tok, const char *fmt, ...) +{ + va_list ap; + + printf("\033[1m%s:%d:%d: \033[33mwarning:\033[0m ", tok->fname, tok->line, tok->col); + va_start(ap, fmt); + vprintf(fmt, ap); + va_end(ap); +} + +void +parser_parse(struct parser *ctx) +{ + debug("Parsing...\n"); + + next_token(ctx); + while (ctx->tok.kind != TK_EOF) { + switch (ctx->tok.kind) { + case TK_TYPE: + parse_type(ctx); + break; + default: + tok_error(&ctx->tok, "unexpected \"%.*s\"\n", (int)ctx->tok.len, ctx->tok.pos); + next_token(ctx); + break; + } + } +} + +void +parser_init(struct parser *ctx, char *source) +{ + debug("Initializing parser...\n"); + lexer_init(&ctx->lexer, source); + + add_builtin(ctx->types, "uint32", 4, 0); + add_builtin(ctx->types, "any", 0, 0); +} diff --git a/compiler/parser/type.c b/compiler/parser/type.c new file mode 100644 index 0000000..4ac9327 --- /dev/null +++ b/compiler/parser/type.c @@ -0,0 +1,254 @@ +/* + * Type parser. + * Copyright (c) 2023-2024, Quinn Stephens and the OSMORA team. + * Provided under the BSD 3-Clause license. + */ + +#include <stdbool.h> +#include <stdlib.h> +#include "debug.h" +#include "hashmap.h" +#include "lexer/token.h" +#include "parser/type.h" +#include "parser.h" + +#define HASHMAP_ROWS 8 + +static struct type * +type_new(struct token *tok) +{ + struct type *typ; + + typ = malloc(sizeof(struct type)); + typ->hashmap_entry.hash = tok->hash; + typ->name = tok->pos; + typ->name_len = tok->len; + + return typ; +} + +static bool +parse_type_ref(struct parser *ctx, struct type **typ_out, int *n_ptrs_out) +{ + struct type *typ; + int n_ptrs; + + debug("Parsing type reference...\n"); + + /* Find type */ + typ = (struct type*)hashmap_find(ctx->types, ctx->tok.hash); + if (typ == NULL) { + tok_error(&ctx->tok, "type \"%.*s\" not found\n", (int)ctx->tok.len, ctx->tok.pos); + return false; + } + + /* Find number of pointers */ + n_ptrs = 0; + while (next_token(ctx)->kind == TK_STAR) { + n_ptrs++; + } + + /* Ensure number of pointers is allowed */ + if (typ->size == 0 && n_ptrs < 1) { + tok_error(&ctx->tok, "type \"%.*s\" can only be used in a pointer\n", (int)typ->name_len, typ->name); + return false; + } + + *typ_out = typ; + *n_ptrs_out = n_ptrs; + return true; +} + +static bool +parse_alias(struct parser *ctx, struct type *typ) +{ + struct type *base; + + debug("Parsing type alias definition...\n"); + + typ->kind = TYK_ALIAS; + + if (!parse_type_ref(ctx, &base, &typ->n_ptrs)) { + return false; + } + + if (typ->n_ptrs >= 1) { + typ->size = sizeof(void*); + } else { + typ->size = base->size; + } + + return true; +} + +static bool +parse_enum(struct parser *ctx, struct type *typ) +{ + struct enum_member *mem; + uint64_t value; + + debug("Parsing enum definition...\n"); + + typ->kind = TYK_ENUM; + + if (next_token(ctx)->kind != TK_LBRACE) { + tok_error(&ctx->tok, "expected \"{\" after \"enum\"\n"); + return false; + } + + typ->members.rows = malloc(HASHMAP_ROWS * sizeof(struct list)); + typ->members.n_rows = HASHMAP_ROWS; + hashmap_init(&typ->members); + + value = 0; + next_token(ctx); + while (ctx->tok.kind != TK_RBRACE) { + if (ctx->tok.kind != TK_IDENTIFIER) { + tok_error(&ctx->tok, "expected enum member name\n"); + return true; + } + + mem = malloc(sizeof(struct enum_member)); + mem->hashmap_entry.hash = ctx->tok.hash; + mem->name = ctx->tok.pos; + mem->name_len = ctx->tok.len; + mem->value = value++; + hashmap_add(&typ->members, &mem->hashmap_entry); + + if (next_token(ctx)->kind == TK_COMMA) { + next_token(ctx); + continue; + } + + if (ctx->tok.kind != TK_RBRACE) { + tok_error(&ctx->tok, "expected \",\" or \"}\" after enum member name\n"); + return true; + } + } + + next_token(ctx); + return true; +} + +static bool +parse_struct(struct parser *ctx, struct type *typ) +{ + struct struct_member *mem; + uint64_t off; + + debug("Parsing struct definition...\n"); + + typ->kind = TYK_STRUCT; + + if (next_token(ctx)->kind != TK_LBRACE) { + tok_error(&ctx->tok, "expected \"{\" after \"struct\"\n"); + return false; + } + + typ->members.rows = malloc(HASHMAP_ROWS * sizeof(struct list)); + typ->members.n_rows = HASHMAP_ROWS; + hashmap_init(&typ->members); + + off = 0; + next_token(ctx); + while (ctx->tok.kind != TK_RBRACE) { + if (ctx->tok.kind != TK_IDENTIFIER) { + tok_error(&ctx->tok, "expected type name\n"); + return true; + } + + mem = malloc(sizeof(struct struct_member)); + mem->hashmap_entry.hash = ctx->tok.hash; + mem->name = ctx->tok.pos; + mem->name_len = ctx->tok.len; + mem->off = off; + + if (!parse_type_ref(ctx, &mem->typ, &mem->n_ptrs)) { + return true; + } + + if (ctx->tok.kind != TK_IDENTIFIER) { + tok_error(&ctx->tok, "expected struct member name\n"); + return true; + } + mem->name = ctx->tok.pos; + mem->name_len = ctx->tok.len; + + if (mem->n_ptrs >= 1) { + mem->size = sizeof(void*); + } else { + mem->size = mem->typ->size; + } + off += mem->size; + + hashmap_add(&typ->members, &mem->hashmap_entry); + + if (next_token(ctx)->kind != TK_SEMICOLON) { + tok_error(&ctx->tok, "expected \";\" after struct member name\n"); + return true; + } + next_token(ctx); + } + + next_token(ctx); + return true; +} + +void +parse_type(struct parser *ctx) +{ + struct type *typ; + bool success; + + debug("Parsing type definition...\n"); + + /* Type name */ + if (next_token(ctx)->kind != TK_IDENTIFIER) { + tok_error(&ctx->tok, "expected identifier after \"type\"\n"); + return; + } + + /* Ensure type does not already exist */ + typ = (struct type*)hashmap_find(ctx->types, ctx->tok.hash); + if (typ != NULL) { + tok_error(&ctx->tok, "type \"%.*s\" already defined\n", (int)ctx->tok.len, ctx->tok.pos); + return; + } + + /* Create type */ + typ = type_new(&ctx->tok); + + if (next_token(ctx)->kind != TK_COLON) { + tok_error(&ctx->tok, "expected \":\" after type name\n"); + free(typ); + return; + } + + next_token(ctx); + if (ctx->tok.kind == TK_IDENTIFIER) { + success = parse_alias(ctx, typ); + } else if (ctx->tok.kind == TK_ENUM) { + success = parse_enum(ctx, typ); + } else if (ctx->tok.kind == TK_STRUCT) { + success = parse_struct(ctx, typ); + } else { + tok_error(&ctx->tok, "expected type name or \"enum\" after \":\"\n"); + free(typ); + return; + } + + if (!success) { + free(typ); + return; + } + + if (ctx->tok.kind != TK_SEMICOLON) { + tok_error(&ctx->tok, "expected \";\" after type definition\n"); + free(typ); + return; + } + next_token(ctx); + + /* Add type to parser's registry */ + hashmap_add(ctx->types, &typ->hashmap_entry); +} diff --git a/include/debug.h b/include/debug.h new file mode 100644 index 0000000..29434be --- /dev/null +++ b/include/debug.h @@ -0,0 +1,18 @@ +/* + * Debugging utilities. + * Copyright (c) 2023-2024, Quinn Stephens and the OSMORA team. + * Provided under the BSD 3-Clause license. + */ + +#ifndef _DEBUG_H +#define _DEBUG_H + +void __debug(const char *fmt, ...); + +#if defined(ENABLE_DEBUG) +#define debug __debug +#else +#define debug(fmt, ...) +#endif + +#endif /* !_DEBUG_H */ diff --git a/include/hash.h b/include/hash.h new file mode 100644 index 0000000..f39c6da --- /dev/null +++ b/include/hash.h @@ -0,0 +1,21 @@ +/* + * 32-bit FNV-1a hash function. + * Copyright (c) 2023-2024, Quinn Stephens and the OSMORA team. + * Provided under the BSD 3-Clause license. + */ + +#ifndef _HASH_H +#define _HASH_H + +#include <stdint.h> +#include <stdlib.h> + +#define FNV_PRIME 0x01000193 +#define FNV_OFFSET_BASIS 0x811c9dc5 + +typedef uint32_t hash_t; + +hash_t hash_data(const void *data, size_t length); +hash_t hash_string(const char *str); + +#endif /* !_HASH_H */ diff --git a/include/hashmap.h b/include/hashmap.h new file mode 100644 index 0000000..26dfc61 --- /dev/null +++ b/include/hashmap.h @@ -0,0 +1,34 @@ +/* + * Tiny hashmap implementation. + * Copyright (c) 2023-2024, Quinn Stephens and the OSMORA team. + * Provided under the BSD 3-Clause license. + */ + +#ifndef _HASHMAP_H +#define _HASHMAP_H + +#include <stddef.h> +#include "list.h" +#include "hash.h" + +struct hashmap_entry { + struct list_entry list_entry; + hash_t hash; +}; + +struct hashmap { + struct list *rows; + size_t n_rows; +}; + +static inline void +hashmap_remove(struct hashmap_entry *entry) +{ + list_remove(&entry->list_entry); +} + +void hashmap_add(struct hashmap *map, struct hashmap_entry *entry); +struct hashmap_entry *hashmap_find(struct hashmap *map, hash_t hash); +void hashmap_init(struct hashmap *map); + +#endif /* !_HASHMAP_H */ diff --git a/include/lexer.h b/include/lexer.h new file mode 100644 index 0000000..99431ec --- /dev/null +++ b/include/lexer.h @@ -0,0 +1,22 @@ +/* + * Quark lexer (lexical analyzer). + * Turns source code into tokens. + * Copyright (c) 2023-2024, Quinn Stephens and the OSMORA team. + * Provided under the BSD 3-Clause license. + */ + +#ifndef _LEXER_H +#define _LEXER_H + +#include "lexer/token.h" + +struct lexer { + char *pos; + char *line_start; + int line; +}; + +void lexer_next(struct lexer *ctx, struct token *tok); +void lexer_init(struct lexer *ctx, char *source); + +#endif /* !_LEXER_H */ diff --git a/include/lexer/char_info.h b/include/lexer/char_info.h new file mode 100644 index 0000000..5987bbf --- /dev/null +++ b/include/lexer/char_info.h @@ -0,0 +1,60 @@ +/* + * Character info table. + * Copyright (c) 2023-2024, Quinn Stephens and the OSMORA team. + * Provided under the BSD 3-Clause license. + */ + +#ifndef _LEXER_CHAR_INFO_H +#define _LEXER_CHAR_INFO_H + +#include <stdint.h> +#include "lexer/token.h" + +#define CHAR_HORZ_WS (1 << 0) +#define CHAR_VERT_WS (1 << 1) +#define CHAR_DIGIT (1 << 2) +#define CHAR_XDIGIT (1 << 3) +#define CHAR_UPPER (1 << 4) +#define CHAR_LOWER (1 << 5) +#define CHAR_OPER (1 << 6) +#define CHAR_SINGLE (1 << 7) + +#define CHAR_HEX (CHAR_DIGIT | CHAR_XDIGIT) +#define CHAR_XUPPER (CHAR_XDIGIT | CHAR_UPPER) +#define CHAR_XLOWER (CHAR_XDIGIT | CHAR_LOWER) +#define CHAR_WHITESPACE (CHAR_HORZ_WS | CHAR_VERT_WS) +#define CHAR_ALPHA (CHAR_UPPER | CHAR_LOWER) +#define CHAR_ALNUM (CHAR_ALPHA | CHAR_DIGIT) + +#define CHAR_SINGLE_SHIFT 8 +#define MAKE_SINGLE(kind) ((kind << CHAR_SINGLE_SHIFT) | CHAR_SINGLE) +#define CHAR_COMMA MAKE_SINGLE(TK_COMMA) +#define CHAR_DOT MAKE_SINGLE(TK_DOT) +#define CHAR_COLON MAKE_SINGLE(TK_COLON) +#define CHAR_SEMI MAKE_SINGLE(TK_SEMICOLON) +#define CHAR_LPAREN MAKE_SINGLE(TK_LPAREN) +#define CHAR_RPAREN MAKE_SINGLE(TK_RPAREN) +#define CHAR_LBRACE MAKE_SINGLE(TK_LBRACE) +#define CHAR_RBRACE MAKE_SINGLE(TK_RBRACE) +#define CHAR_LBRACK MAKE_SINGLE(TK_LBRACK) +#define CHAR_RBRACK MAKE_SINGLE(TK_RBRACK) +#define CHAR_TILDE MAKE_SINGLE(TK_TILDE) +#define CHAR_EQUALS MAKE_SINGLE(TK_EQUALS) + +#define CHAR_OPER_SHIFT 8 +#define MAKE_OPER(kind) ((kind << CHAR_OPER_SHIFT) | CHAR_OPER) +#define CHAR_PLUS MAKE_OPER(TK_PLUS) +#define CHAR_MINUS MAKE_OPER(TK_MINUS) +#define CHAR_STAR MAKE_OPER(TK_STAR) +#define CHAR_SLASH MAKE_OPER(TK_SLASH) +#define CHAR_PERCENT MAKE_OPER(TK_PERCENT) +#define CHAR_EXCLAIM MAKE_OPER(TK_EXCLAMATION) +#define CHAR_LESS MAKE_OPER(TK_LESS_THAN) +#define CHAR_GREATER MAKE_OPER(TK_GREATER_THAN) +#define CHAR_CARET MAKE_OPER(TK_CARET) +#define CHAR_AMPER MAKE_OPER(TK_AMPERSAND) +#define CHAR_PIPE MAKE_OPER(TK_PIPE) + +extern uint16_t char_info[256]; + +#endif /* !_LEXER_CHAR_INFO_H */ diff --git a/include/lexer/keywords.h b/include/lexer/keywords.h new file mode 100644 index 0000000..7da7dc1 --- /dev/null +++ b/include/lexer/keywords.h @@ -0,0 +1,15 @@ +/* + * Keyword hashmap. + * Copyright (c) 2023-2024, Quinn Stephens and the OSMORA team. + * Provided under the BSD 3-Clause license. + */ + +#ifndef _LEXER_KEYWORDS_H +#define _LEXER_KEYWORDS_H + +#include "lexer/token.h" + +token_kind_t keywords_find(struct token *tok); +void keywords_init(void); + +#endif /* !_LEXER_KEYWORDS_H */ diff --git a/include/lexer/token.h b/include/lexer/token.h new file mode 100644 index 0000000..e0a9ea3 --- /dev/null +++ b/include/lexer/token.h @@ -0,0 +1,92 @@ +/* + * Token definitions. + * Copyright (c) 2023-2024, Quinn Stephens and the OSMORA team. + * Provided under the BSD 3-Clause license. + */ + +#ifndef _LEXER_TOKEN_H +#define _LEXER_TOKEN_H + +#include <stddef.h> +#include <stdint.h> +#include "hash.h" + +typedef enum { + TK_UNKNOWN, + TK_EOF, + + TK_IDENTIFIER, + TK_NUMBER, + TK_STRING, + TK_CHARACTER, + + /* Keywords */ + TK_TYPE, + TK_ENUM, + TK_STRUCT, + + /* + * Operators. + * NOTE: lex_oper() requires that TK_*_EQUALS + * immediately follows TK_*. + */ + TK_PLUS, + TK_PLUS_EQUALS, + TK_PLUS_PLUS, + TK_MINUS, + TK_MINUS_EQUALS, + TK_MINUS_MINUS, + TK_ARROW, + TK_STAR, + TK_STAR_EQUALS, + TK_SLASH, + TK_SLASH_EQUALS, + TK_PERCENT, + TK_PERCENT_EQUALS, + TK_EXCLAMATION, + TK_EXCLAMATION_EQUALS, + TK_LESS_THAN, + TK_LESS_THAN_EQUALS, + TK_SHIFT_LEFT, + TK_SHIFT_LEFT_EQUALS, + TK_GREATER_THAN, + TK_GREATER_THAN_EQUALS, + TK_SHIFT_RIGHT, + TK_SHIFT_RIGHT_EQUALS, + TK_CARET, + TK_CARET_EQUALS, + TK_AMPERSAND, + TK_AMPERSAND_EQUALS, + TK_PIPE, + TK_PIPE_EQUALS, + TK_TILDE, + TK_EQUALS, + + /* Miscellaneous */ + TK_COMMA, + TK_DOT, + TK_COLON, + TK_SEMICOLON, + TK_LPAREN, + TK_RPAREN, + TK_LBRACE, + TK_RBRACE, + TK_LBRACK, + TK_RBRACK +} token_kind_t; + +struct token { + token_kind_t kind; + + char *fname; + int line, col; + char *pos; + size_t len; + + union { + hash_t hash; + uint64_t value; + }; +}; + +#endif /* !_LEXER_TOKEN_H */ diff --git a/include/list.h b/include/list.h new file mode 100644 index 0000000..0364f7c --- /dev/null +++ b/include/list.h @@ -0,0 +1,58 @@ +/* + * Tiny doubly-linked list implementation. + * Copyright (c) 2023-2024, Quinn Stephens and the OSMORA team. + * Provided under the BSD 3-Clause license. + */ + +#ifndef _LIST_H +#define _LIST_H + +#include <stddef.h> + +struct list_entry { + struct list_entry *prev; + struct list_entry *next; +}; + +struct list { + struct list_entry *tail; + struct list_entry *head; + size_t length; +}; + +static inline void +list_remove(struct list_entry *entry) +{ + entry->prev->next = entry->next; + entry->next->prev = entry->prev; +} + +static inline void +list_append(struct list *list, struct list_entry *entry) +{ + entry->prev = list->tail; + entry->next = (struct list_entry*)list; + entry->prev->next = entry; + list->tail = entry; + list->length++; +} + +static inline void +list_prepend(struct list *list, struct list_entry *entry) +{ + entry->next = list->head; + entry->prev = (struct list_entry*)list; + entry->next->prev = entry; + list->head = entry; + list->length++; +} + +static inline void +list_init(struct list *list) +{ + list->tail = (struct list_entry*)list; + list->head = (struct list_entry*)list; + list->length = 0; +} + +#endif /* !_LIST_H */ diff --git a/include/parser.h b/include/parser.h new file mode 100644 index 0000000..d5e7acf --- /dev/null +++ b/include/parser.h @@ -0,0 +1,34 @@ +/* + * Quark parser. + * Turns tokens into an AST (Abstract Syntax Tree). + * Copyright (c) 2023-2024, Quinn Stephens and the OSMORA team. + * Provided under the BSD 3-Clause license. + */ + +#ifndef _PARSER_H +#define _PARSER_H + +#include "lexer/token.h" +#include "lexer.h" + +struct parser { + struct lexer lexer; + struct token tok; + struct hashmap *types; + struct hashmap *procs; +}; + +static inline struct token * +next_token(struct parser *ctx) +{ + lexer_next(&ctx->lexer, &ctx->tok); + return &ctx->tok; +} + +void tok_error(struct token *tok, const char *fmt, ...); +void tok_warn(struct token *tok, const char *fmt, ...); + +void parser_parse(struct parser *ctx); +void parser_init(struct parser *ctx, char *source); + +#endif /* !_PARSER_H */ diff --git a/include/parser/ast.h b/include/parser/ast.h new file mode 100644 index 0000000..03be1bd --- /dev/null +++ b/include/parser/ast.h @@ -0,0 +1,20 @@ +/* + * AST (Abstract Syntax Tree) definitions. + * Copyright (c) 2023-2024, Quinn Stephens and the OSMORA team. + * Provided under the BSD 3-Clause license. + */ + +#ifndef _PARSER_AST_H +#define _PARSER_AST_H + +enum ast_node_kind { + NK_UNKNOWN, + + NK_ +}; + +struct ast_node { + enum ast_node_kind kind; +}; + +#endif /* !_PARSER_AST_H */ diff --git a/include/parser/type.h b/include/parser/type.h new file mode 100644 index 0000000..6a939ab --- /dev/null +++ b/include/parser/type.h @@ -0,0 +1,57 @@ +/* + * Type parser. + * Copyright (c) 2023-2024, Quinn Stephens and the OSMORA team. + * Provided under the BSD 3-Clause license. + */ + +#ifndef _PARSER_TYPE_H +#define _PARSER_TYPE_H + +#include <stddef.h> +#include "hashmap.h" +#include "parser.h" + +enum type_kind { + TYK_ALIAS, + TYK_ENUM, + TYK_STRUCT +}; + +struct enum_member { + struct hashmap_entry hashmap_entry; + + char *name; + size_t name_len; + + uint64_t value; +}; + +struct struct_member { + struct hashmap_entry hashmap_entry; + + char *name; + size_t name_len; + + size_t off, size; + struct type *typ; + int n_ptrs; +}; + +struct type { + struct hashmap_entry hashmap_entry; + + enum type_kind kind; + char *name; + size_t name_len; + + size_t size; + + union { + int n_ptrs; + struct hashmap members; + }; +}; + +void parse_type(struct parser *ctx); + +#endif /* !_PARSER_TYPE_H */ @@ -0,0 +1 @@ +int main
\ No newline at end of file @@ -0,0 +1,13 @@ +type EfiStatus: uint32; +type EfiHandle: any*; + +type TestEnum: enum { + nog, + bal +}; + +type TestStruct: struct { + EfiStatus status; + EfiHandle imageHandle; + any* systemTable; +}; |