diff options
author | Ian Moffett <ian@osmora.org> | 2024-11-01 23:46:08 -0400 |
---|---|---|
committer | Ian Moffett <ian@osmora.org> | 2024-11-01 23:46:08 -0400 |
commit | a515dfb3b8f8e999362db7a6b52b3104c03b750a (patch) | |
tree | d0180f0cbc39d9c3e367af30791ad774e4d419ff /compiler |
Import quark sources
Signed-off-by: Ian Moffett <ian@osmora.org>
Diffstat (limited to 'compiler')
-rw-r--r-- | compiler/debug.c | 20 | ||||
-rw-r--r-- | compiler/hash.c | 35 | ||||
-rw-r--r-- | compiler/hashmap.c | 39 | ||||
-rw-r--r-- | compiler/lexer/char_info.c | 106 | ||||
-rw-r--r-- | compiler/lexer/keywords.c | 73 | ||||
-rw-r--r-- | compiler/lexer/lexer.c | 282 | ||||
-rw-r--r-- | compiler/main.c | 310 | ||||
-rw-r--r-- | compiler/parser/parser.c | 84 | ||||
-rw-r--r-- | compiler/parser/type.c | 254 |
9 files changed, 1203 insertions, 0 deletions
diff --git a/compiler/debug.c b/compiler/debug.c new file mode 100644 index 0000000..d14bca6 --- /dev/null +++ b/compiler/debug.c @@ -0,0 +1,20 @@ +/* + * Logging utilities. + * Copyright (c) 2023-2024, Quinn Stephens and the OSMORA team. + * Provided under the BSD 3-Clause license. + */ + +#include <stdarg.h> +#include <stdio.h> +#include "debug.h" + +void +__debug(const char *fmt, ...) +{ + va_list ap; + + printf("\033[90mdebug:\033[0m "); + va_start(ap, fmt); + vprintf(fmt, ap); + va_end(ap); +} diff --git a/compiler/hash.c b/compiler/hash.c new file mode 100644 index 0000000..a63691c --- /dev/null +++ b/compiler/hash.c @@ -0,0 +1,35 @@ +/* + * 32-bit FNV-1a hash function. + * Copyright (c) 2023-2024, Quinn Stephens and the OSMORA team. + * Provided under the BSD 3-Clause license. + */ + +#include "hash.h" + +hash_t +hash_data(const void *data, size_t length) +{ + hash_t hash; + + hash = FNV_OFFSET_BASIS; + for (size_t i = 0; i < length; i++) { + hash ^= ((uint8_t*)data)[i]; + hash *= FNV_PRIME; + } + + return hash; +} + +hash_t +hash_string(const char *str) +{ + hash_t hash; + + hash = FNV_OFFSET_BASIS; + while (*str) { + hash ^= (uint8_t)*str++; + hash *= FNV_PRIME; + } + + return hash; +} diff --git a/compiler/hashmap.c b/compiler/hashmap.c new file mode 100644 index 0000000..0fb2e14 --- /dev/null +++ b/compiler/hashmap.c @@ -0,0 +1,39 @@ +/* + * Tiny hashmap implementation. + * Copyright (c) 2023-2024, Quinn Stephens and the OSMORA team. + * Provided under the BSD 3-Clause license. + */ + +#include <stddef.h> +#include "hashmap.h" + +void +hashmap_add(struct hashmap *map, struct hashmap_entry *entry) +{ + list_prepend(&map->rows[entry->hash % map->n_rows], &entry->list_entry); +} + +struct hashmap_entry * +hashmap_find(struct hashmap *map, hash_t hash) +{ + struct hashmap_entry *head, *entry; + + entry = head = (struct hashmap_entry*)(map->rows[hash % map->n_rows].head); + do { + if (entry->hash == hash) { + return entry; + } + + entry = (struct hashmap_entry*)(entry->list_entry.next); + } while (entry != head); + + return NULL; +} + +void +hashmap_init(struct hashmap *map) +{ + for (size_t r = 0; r < map->n_rows; r++) { + list_init(&map->rows[r]); + } +} diff --git a/compiler/lexer/char_info.c b/compiler/lexer/char_info.c new file mode 100644 index 0000000..97ecb7d --- /dev/null +++ b/compiler/lexer/char_info.c @@ -0,0 +1,106 @@ +/* + * Character info table. + * Copyright (c) 2023-2024, Quinn Stephens and the OSMORA team. + * Provided under the BSD 3-Clause license. + */ + +#include "lexer/char_info.h" + +uint16_t char_info[256] = { + /* + NUL SOH STX ETX + EOT ENQ ACK BEL + */ + 0 , 0 , 0 , 0 , + 0 , 0 , 0 , 0 , + /* + BS TAB LF VT + FF CR SO SI + */ + 0 , CHAR_HORZ_WS, CHAR_VERT_WS, CHAR_VERT_WS, + CHAR_VERT_WS, CHAR_HORZ_WS, 0 , 0 , + /* + DLE DC1 DC2 DC3 + DC4 NAK SYN ETB + */ + 0 , 0 , 0 , 0 , + 0 , 0 , 0 , 0 , + /* + CAN EM SUB ESC + FS GS RS US + */ + 0 , 0 , 0 , 0 , + 0 , 0 , 0 , 0 , + /* + ! " # + $ % & ' + */ + CHAR_HORZ_WS, CHAR_EXCLAIM, 0 , 0 , + 0 , CHAR_PERCENT, CHAR_AMPER , 0 , + /* + ( ) * + + , - . / + */ + CHAR_LPAREN , CHAR_RPAREN , CHAR_STAR , CHAR_PLUS , + CHAR_COMMA , CHAR_MINUS , CHAR_DOT , CHAR_SLASH , + /* + 0 1 2 3 + 4 5 6 7 + */ + CHAR_DIGIT , CHAR_DIGIT , CHAR_DIGIT , CHAR_DIGIT , + CHAR_DIGIT , CHAR_DIGIT , CHAR_DIGIT , CHAR_DIGIT , + /* + 8 9 : ; + < = > ? + */ + CHAR_DIGIT , CHAR_DIGIT , CHAR_COLON , CHAR_SEMI , + CHAR_LESS , CHAR_EQUALS , CHAR_GREATER, 0 , + /* + @ A B C + D E F G + */ + 0 , CHAR_XUPPER , CHAR_XUPPER , CHAR_XUPPER , + CHAR_XUPPER , CHAR_XUPPER , CHAR_XUPPER , CHAR_UPPER , + /* + H I J K + L M N O + */ + CHAR_UPPER , CHAR_UPPER , CHAR_UPPER , CHAR_UPPER , + CHAR_UPPER , CHAR_UPPER , CHAR_UPPER , CHAR_UPPER , + /* + P Q R S + T U V W + */ + CHAR_UPPER , CHAR_UPPER , CHAR_UPPER , CHAR_UPPER , + CHAR_UPPER , CHAR_UPPER , CHAR_UPPER , CHAR_UPPER , + /* + X Y Z [ + \ ] ^ _ + */ + CHAR_UPPER , CHAR_UPPER , CHAR_UPPER , CHAR_LBRACK , + 0 , CHAR_RBRACK , CHAR_CARET , 0 , + /* + ` a b c + d e f g + */ + 0 , CHAR_XLOWER , CHAR_XLOWER , CHAR_XLOWER , + CHAR_XLOWER , CHAR_XLOWER , CHAR_XLOWER , CHAR_LOWER , + /* + h i j k + l m n o + */ + CHAR_LOWER , CHAR_LOWER , CHAR_LOWER , CHAR_LOWER , + CHAR_LOWER , CHAR_LOWER , CHAR_LOWER , CHAR_LOWER , + /* + p q r s + t u v w + */ + CHAR_LOWER , CHAR_LOWER , CHAR_LOWER , CHAR_LOWER , + CHAR_LOWER , CHAR_LOWER , CHAR_LOWER , CHAR_LOWER , + /* + x y z { + | } ~ + */ + CHAR_LOWER , CHAR_LOWER , CHAR_LOWER , CHAR_LBRACE , + CHAR_PIPE , CHAR_RBRACE , CHAR_TILDE , 0 , +}; diff --git a/compiler/lexer/keywords.c b/compiler/lexer/keywords.c new file mode 100644 index 0000000..0e95048 --- /dev/null +++ b/compiler/lexer/keywords.c @@ -0,0 +1,73 @@ +/* + * Keyword hashmap. + * Copyright (c) 2023-2024, Quinn Stephens and the OSMORA team. + * Provided under the BSD 3-Clause license. + */ + +#include <stdbool.h> +#include <stdlib.h> +#include <string.h> +#include "hash.h" +#include "hashmap.h" +#include "debug.h" +#include "lexer/keywords.h" + +#define HASHMAP_ROWS 8 + +struct keyword { + struct hashmap_entry hashmap_entry; + size_t len; + token_kind_t value; +}; + +static bool initialized = false; +static struct list keywords_rows[HASHMAP_ROWS]; +static struct hashmap keywords; + +static void +add_keyword(char *name, token_kind_t value) +{ + size_t len; + struct keyword *kwd; + + len = strlen(name); + kwd = malloc(sizeof(struct keyword)); + kwd->hashmap_entry.hash = hash_data(name, len); + kwd->len = len; + kwd->value = value; + + hashmap_add(&keywords, &kwd->hashmap_entry); +} + +token_kind_t +keywords_find(struct token *tok) +{ + struct keyword *kwd; + + kwd = (struct keyword*)hashmap_find(&keywords, tok->hash); + if (kwd == NULL || kwd->len != tok->len) { + return TK_UNKNOWN; + } + + return kwd->value; +} + +void +keywords_init(void) +{ + if (initialized) { + return; + } + + debug("Initializing keywords...\n"); + + keywords.rows = keywords_rows; + keywords.n_rows = HASHMAP_ROWS; + hashmap_init(&keywords); + + add_keyword("type", TK_TYPE); + add_keyword("enum", TK_ENUM); + add_keyword("struct", TK_STRUCT); + + initialized = true; +} diff --git a/compiler/lexer/lexer.c b/compiler/lexer/lexer.c new file mode 100644 index 0000000..06f7e89 --- /dev/null +++ b/compiler/lexer/lexer.c @@ -0,0 +1,282 @@ +/* + * Quark lexer (lexical analyzer). + * Turns source code into tokens. + * Copyright (c) 2023-2024, Quinn Stephens and the OSMORA team. + * Provided under the BSD 3-Clause license. + */ + +#include "debug.h" +#include "lexer.h" +#include "lexer/char_info.h" +#include "lexer/keywords.h" + +static void +skip_ignored(struct lexer *ctx) +{ + while (char_info[(int)*ctx->pos] & CHAR_WHITESPACE) { + if (char_info[(int)*ctx->pos] & CHAR_VERT_WS) { + ctx->line++; + ctx->line_start = ctx->pos + 1; + } + + ctx->pos++; + } +} + +static void +parse_num_hex(struct lexer *ctx, struct token *tok) +{ + ctx->pos += 2; + tok->value = 0; + while (char_info[(int)*ctx->pos] & CHAR_HEX) { + if (char_info[(int)*ctx->pos] == CHAR_XLOWER) { + tok->value |= *ctx->pos++ - 'a' + 0xa; + } else if (char_info[(int)*ctx->pos] == CHAR_XUPPER) { + tok->value |= *ctx->pos++ - 'A' + 0xa; + } else { + tok->value |= *ctx->pos++ - '0'; + } + } +} + +static void +parse_num_dec(struct lexer *ctx, struct token *tok) +{ + ctx->pos++; + tok->value = *ctx->pos - '0'; + while (char_info[(int)*ctx->pos] & CHAR_DIGIT) { + tok->value *= 10; + tok->value += *ctx->pos++ - '0'; + } +} + +static void +parse_num_bin(struct lexer *ctx, struct token *tok) +{ + ctx->pos += 2; + tok->value = 0; + while (*ctx->pos == '0' || *ctx->pos == '1') { + tok->value <<= 1; + tok->value |= *ctx->pos++ - '0'; + } +} + +static void +lex_ident(struct lexer *ctx, struct token *tok) +{ + /* Find end of identifier */ + ctx->pos++; + while (char_info[(int)*ctx->pos] & CHAR_ALNUM || *ctx->pos == '_') { + ctx->pos++; + } + + tok->len = (size_t)(ctx->pos - tok->pos); + tok->hash = hash_data(tok->pos, tok->len); + + /* Determine if this is a keyword or just an identifier */ + tok->kind = keywords_find(tok); + if (tok->kind == TK_UNKNOWN) { + tok->kind = TK_IDENTIFIER; + return; + } +} + +static void +lex_oper(struct lexer *ctx, struct token *tok) +{ + tok->len = 1; + + switch (*ctx->pos) { + case '+': + if (ctx->pos[1] == '+') { + tok->kind = TK_PLUS_PLUS; + tok->len = 2; + } else if (ctx->pos[1] == '=') { + tok->kind = TK_PLUS_EQUALS; + tok->len = 2; + } else { + tok->kind = TK_PLUS; + } + + break; + case '-': + if (ctx->pos[1] == '>') { + tok->kind = TK_ARROW; + tok->len = 2; + } if (ctx->pos[1] == '-') { + tok->kind = TK_MINUS_MINUS; + tok->len = 2; + } else if (ctx->pos[1] == '=') { + tok->kind = TK_MINUS_EQUALS; + tok->len = 2; + } else { + tok->kind = TK_MINUS; + } + + break; + case '<': + if (ctx->pos[1] == '=') { + tok->kind = TK_LESS_THAN_EQUALS; + tok->len = 2; + } else if (ctx->pos[1] == '<') { + if (ctx->pos[2] == '=') { + tok->kind = TK_SHIFT_LEFT_EQUALS; + tok->len = 3; + } else { + tok->kind = TK_SHIFT_LEFT; + tok->len = 2; + } + } else { + tok->kind = TK_LESS_THAN; + } + + break; + case '>': + if (ctx->pos[1] == '=') { + tok->kind = TK_GREATER_THAN_EQUALS; + tok->len = 2; + } else if (ctx->pos[1] == '>') { + if (ctx->pos[2] == '=') { + tok->kind = TK_SHIFT_RIGHT_EQUALS; + tok->len = 3; + } else { + tok->kind = TK_SHIFT_RIGHT; + tok->len = 2; + } + } else { + tok->kind = TK_GREATER_THAN; + } + + break; + default: + tok->kind = char_info[(int)*ctx->pos] >> CHAR_OPER_SHIFT; + if (ctx->pos[1] == '=') { + tok->kind++; + tok->len = 2; + } + + break; + } + + ctx->pos += tok->len; +} + +static void +lex_str(struct lexer *ctx, struct token *tok) +{ + /* Find end of string */ + ctx->pos++; + while (*ctx->pos != '"') { + if (*ctx->pos == '\\' && ctx->pos[1] == '\"') { + ctx->pos++; + } + + ctx->pos++; + } + ctx->pos++; + + tok->kind = TK_STRING; + tok->len = (size_t)(ctx->pos - tok->pos) - 1; +} + +static void +lex_char(struct lexer *ctx, struct token *tok) +{ + /* Find end of character */ + ctx->pos++; + while (*ctx->pos != '\'') { + if (*ctx->pos == '\\' && ctx->pos[1] == '\'') { + ctx->pos++; + } + + ctx->pos++; + } + ctx->pos++; + + tok->kind = TK_CHARACTER; + tok->len = (size_t)(ctx->pos - tok->pos) - 1; +} + +void +lexer_next(struct lexer *ctx, struct token *tok) +{ + if (tok == NULL) { + return; + } + + if (ctx == NULL) { + tok->kind = TK_UNKNOWN; + return; + } + + skip_ignored(ctx); + + /* Initialize token */ + tok->kind = TK_UNKNOWN; + tok->pos = ctx->pos; + tok->line = ctx->line; + tok->col = (int)(tok->pos - ctx->line_start) + 1; + + if (char_info[(int)*ctx->pos] & CHAR_ALPHA || *ctx->pos == '_') { + lex_ident(ctx, tok); + return; + } + + if (char_info[(int)*ctx->pos] & CHAR_SINGLE) { + tok->kind = char_info[(int)*ctx->pos] >> CHAR_SINGLE_SHIFT; + tok->len = 1; + ctx->pos++; + return; + } + + if (char_info[(int)*ctx->pos] & CHAR_OPER) { + lex_oper(ctx, tok); + return; + } + + if (char_info[(int)*ctx->pos] & CHAR_DIGIT) { + tok->kind = TK_NUMBER; + + if (*ctx->pos == '0' && ctx->pos[1] == 'x') { + parse_num_hex(ctx, tok); + } else if (*ctx->pos == '0' && ctx->pos[1] == 'b') { + parse_num_bin(ctx, tok); + } else { + parse_num_dec(ctx, tok); + } + + tok->len = (size_t)(ctx->pos - tok->pos); + return; + } + + if (*ctx->pos == '"') { + lex_str(ctx, tok); + return; + } + + if (*ctx->pos == '\'') { + lex_char(ctx, tok); + return; + } + + if (*ctx->pos == '\0') { + tok->kind = TK_EOF; + return; + } +} + +void +lexer_init(struct lexer *ctx, char *source) +{ + if (ctx == NULL || source == NULL) { + return; + } + + debug("Initializing lexer...\n"); + + ctx->pos = source; + ctx->line_start = ctx->pos; + ctx->line = 1; + + keywords_init(); +} diff --git a/compiler/main.c b/compiler/main.c new file mode 100644 index 0000000..da261bc --- /dev/null +++ b/compiler/main.c @@ -0,0 +1,310 @@ +/* + * Compiler command-line interface. + * Copyright (c) 2023-2024, Quinn Stephens and the OSMORA team. + * Provided under the BSD 3-Clause license. + */ + +#include <errno.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "debug.h" +#include "hashmap.h" +#include "parser/type.h" +#include "parser.h" + +#define HASHMAP_ROWS 16 + +#define OID_O 0x00 + +struct option { + char *name; + char *value; +}; + +static char *argv0; +static char *in_fname; +static struct option options[] = { + [OID_O] = { "o", NULL } +}; + +__attribute__((noreturn)) static void +cmd_fatal(const char *fmt, ...) +{ + va_list ap; + + fprintf(stderr, "\033[1m%s: \033[31mfatal error:\033[0m ", argv0); + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + + fprintf(stderr, "compilation terminated.\n"); + exit(EXIT_FAILURE); +} + +static void +cmd_error(const char *fmt, ...) +{ + va_list ap; + + fprintf(stderr, "\033[1m%s: \033[31merror:\033[0m ", argv0); + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); +} + +static char * +load_text_file(char *filename) +{ + FILE *fp; + size_t size; + char *buf; + + fp = fopen(filename, "rb"); + if (fp == NULL) { + cmd_error("%s: %s\n", filename, strerror(errno)); + return NULL; + } + + /* Get file size */ + fseek(fp, 0, SEEK_END); + size = (size_t)ftell(fp); + fseek(fp, 0, SEEK_SET); + if (size < sizeof(char)) { + cmd_error("%s: empty file\n", filename); + fclose(fp); + return NULL; + } + + /* Read entire file */ + buf = malloc(size + sizeof(char)); + if (fread(buf, 1, size, fp) != size) { + cmd_error("%s: %s\n", filename, strerror(errno)); + free(buf); + fclose(fp); + return NULL; + } + + fclose(fp); + + /* Terminate string */ + buf[size] = '\0'; + + return buf; +} + +static struct option * +find_option(char *name) +{ + for (int o = 0; o < (int)(sizeof(options) / sizeof(struct option)); o++) { + if (strcmp(options[o].name, name) == 0) { + return &options[0]; + } + } + + return NULL; +} + +static void +set_out_fname(void) +{ + size_t len; + char *buf; + char *dot; + + /* Skip if set already */ + if (options[OID_O].value != NULL) { + return; + } + + len = strlen(in_fname); + buf = malloc(len + 4); + strcpy(buf, in_fname); + + /* Append extension after last '.' or at end */ + dot = strrchr(buf, '.'); + if (dot == NULL) { + strcpy(buf + len, ".txt"); + } else { + strcpy(buf + (dot - buf), ".txt"); + } + + options[OID_O].value = buf; +} + +static void +parse_args(int argc, char **argv) +{ + struct option *opt; + + in_fname = NULL; + for (int a = 1; a < argc; a++) { + /* Set input filename */ + if (argv[a][0] != '-') { + if (in_fname != NULL) { + cmd_error("multiple input files specified\n"); + } else { + in_fname = argv[a]; + } + + continue; + } + + /* Identify option */ + opt = find_option(argv[a] + 1); + if (opt == NULL) { + cmd_error("unrecognized command-line option '%s'\n", argv[a]); + continue; + } + + /* Set option value */ + a++; + if (a >= argc) { + cmd_error("missing value after '%s'\n", argv[a - 1]); + } + opt->value = argv[a]; + } + + if (in_fname == NULL) { + cmd_fatal("no input files\n"); + } + + set_out_fname(); +} + +static void +print_alias(struct type *typ) +{ + printf("alias (%lu bytes, %d pointer(s))", typ->size, typ->n_ptrs); +} + +static void +print_enum(struct type *typ) +{ + struct enum_member *mem; + + printf("enum {\n"); + + for (size_t r = 0; r < typ->members.n_rows; r++) { + mem = (struct enum_member*)typ->members.rows[r].head; + if (mem == (struct enum_member*)&typ->members.rows[r]) { + continue; + } + + do { + printf(" %.*s,\n", (int)mem->name_len, mem->name); + + mem = (struct enum_member*)mem->hashmap_entry.list_entry.next; + } while (mem != (struct enum_member*)&typ->members.rows[r]); + } + + printf("}"); +} + +static void +print_struct(struct type *typ) +{ + struct struct_member *mem; + + printf("struct {\n"); + + for (size_t r = 0; r < typ->members.n_rows; r++) { + mem = (struct struct_member*)typ->members.rows[r].head; + if (mem == (struct struct_member*)&typ->members.rows[r]) { + continue; + } + + do { + printf(" %.*s %.*s; (%d pointer(s))\n", (int)mem->typ->name_len, mem->typ->name, (int)mem->name_len, mem->name, mem->n_ptrs); + + mem = (struct struct_member*)mem->hashmap_entry.list_entry.next; + } while (mem != (struct struct_member*)&typ->members.rows[r]); + } + + printf("}"); +} + +static void +print_type(struct type *typ) +{ + printf("type %.*s: ", (int)typ->name_len, typ->name); + + switch (typ->kind) { + case TYK_ALIAS: + print_alias(typ); + break; + case TYK_ENUM: + print_enum(typ); + break; + case TYK_STRUCT: + print_struct(typ); + break; + default: + printf("(unknown)"); + break; + } + + printf(";\n"); + +} + +int +main(int argc, char **argv) +{ + char *input; + struct parser parser; + struct list *types_rows, *procs_rows; + struct hashmap types, procs; + struct type *typ; + + argv0 = argv[0]; + if (argc < 2) { + cmd_fatal("no input files\n"); + exit(EXIT_FAILURE); + } + + /* Parse command-line arguments */ + parse_args(argc, argv); + + /* Load input file */ + input = load_text_file(in_fname); + if (input == NULL) { + exit(EXIT_FAILURE); + } + + /* Initialize hashmaps */ + types_rows = malloc(HASHMAP_ROWS * sizeof(struct list)); + types.rows = types_rows; + types.n_rows = HASHMAP_ROWS; + hashmap_init(&types); + procs_rows = malloc(HASHMAP_ROWS * sizeof(struct list)); + procs.rows = procs_rows; + procs.n_rows = HASHMAP_ROWS; + hashmap_init(&procs); + + /* Parse input */ + parser.tok.fname = in_fname; + parser.types = &types; + parser.procs = &procs; + parser_init(&parser, input); + parser_parse(&parser); + + /* Print parsed types */ + for (size_t r = 0; r < types.n_rows; r++) { + typ = (struct type*)types.rows[r].head; + if (typ == (struct type*)&types.rows[r]) { + continue; + } + + do { + print_type(typ); + typ = (struct type*)typ->hashmap_entry.list_entry.next; + } while (typ != (struct type*)&types.rows[r]); + } + + free(procs_rows); + free(types_rows); + free(input); + return 0; +} diff --git a/compiler/parser/parser.c b/compiler/parser/parser.c new file mode 100644 index 0000000..aeec48b --- /dev/null +++ b/compiler/parser/parser.c @@ -0,0 +1,84 @@ +/* + * Quark parser. + * Turns tokens into an AST (Abstract Syntax Tree). + * Copyright (c) 2023-2024, Quinn Stephens and the OSMORA team. + * Provided under the BSD 3-Clause license. + */ + +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "debug.h" +#include "hashmap.h" +#include "parser/type.h" +#include "parser.h" + +static void +add_builtin(struct hashmap *map, char *name, size_t size, int n_ptrs) +{ + struct type *type; + size_t name_len; + + name_len = strlen(name); + + type = malloc(sizeof(struct type)); + type->hashmap_entry.hash = hash_data(name, name_len); + type->name = name; + type->name_len = name_len; + type->size = size; + type->n_ptrs = n_ptrs; + + hashmap_add(map, &type->hashmap_entry); +} + +void +tok_error(struct token *tok, const char *fmt, ...) +{ + va_list ap; + + fprintf(stderr, "\033[1m%s:%d:%d: \033[31merror:\033[0m ", tok->fname, tok->line, tok->col); + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); +} + +void +tok_warn(struct token *tok, const char *fmt, ...) +{ + va_list ap; + + printf("\033[1m%s:%d:%d: \033[33mwarning:\033[0m ", tok->fname, tok->line, tok->col); + va_start(ap, fmt); + vprintf(fmt, ap); + va_end(ap); +} + +void +parser_parse(struct parser *ctx) +{ + debug("Parsing...\n"); + + next_token(ctx); + while (ctx->tok.kind != TK_EOF) { + switch (ctx->tok.kind) { + case TK_TYPE: + parse_type(ctx); + break; + default: + tok_error(&ctx->tok, "unexpected \"%.*s\"\n", (int)ctx->tok.len, ctx->tok.pos); + next_token(ctx); + break; + } + } +} + +void +parser_init(struct parser *ctx, char *source) +{ + debug("Initializing parser...\n"); + lexer_init(&ctx->lexer, source); + + add_builtin(ctx->types, "uint32", 4, 0); + add_builtin(ctx->types, "any", 0, 0); +} diff --git a/compiler/parser/type.c b/compiler/parser/type.c new file mode 100644 index 0000000..4ac9327 --- /dev/null +++ b/compiler/parser/type.c @@ -0,0 +1,254 @@ +/* + * Type parser. + * Copyright (c) 2023-2024, Quinn Stephens and the OSMORA team. + * Provided under the BSD 3-Clause license. + */ + +#include <stdbool.h> +#include <stdlib.h> +#include "debug.h" +#include "hashmap.h" +#include "lexer/token.h" +#include "parser/type.h" +#include "parser.h" + +#define HASHMAP_ROWS 8 + +static struct type * +type_new(struct token *tok) +{ + struct type *typ; + + typ = malloc(sizeof(struct type)); + typ->hashmap_entry.hash = tok->hash; + typ->name = tok->pos; + typ->name_len = tok->len; + + return typ; +} + +static bool +parse_type_ref(struct parser *ctx, struct type **typ_out, int *n_ptrs_out) +{ + struct type *typ; + int n_ptrs; + + debug("Parsing type reference...\n"); + + /* Find type */ + typ = (struct type*)hashmap_find(ctx->types, ctx->tok.hash); + if (typ == NULL) { + tok_error(&ctx->tok, "type \"%.*s\" not found\n", (int)ctx->tok.len, ctx->tok.pos); + return false; + } + + /* Find number of pointers */ + n_ptrs = 0; + while (next_token(ctx)->kind == TK_STAR) { + n_ptrs++; + } + + /* Ensure number of pointers is allowed */ + if (typ->size == 0 && n_ptrs < 1) { + tok_error(&ctx->tok, "type \"%.*s\" can only be used in a pointer\n", (int)typ->name_len, typ->name); + return false; + } + + *typ_out = typ; + *n_ptrs_out = n_ptrs; + return true; +} + +static bool +parse_alias(struct parser *ctx, struct type *typ) +{ + struct type *base; + + debug("Parsing type alias definition...\n"); + + typ->kind = TYK_ALIAS; + + if (!parse_type_ref(ctx, &base, &typ->n_ptrs)) { + return false; + } + + if (typ->n_ptrs >= 1) { + typ->size = sizeof(void*); + } else { + typ->size = base->size; + } + + return true; +} + +static bool +parse_enum(struct parser *ctx, struct type *typ) +{ + struct enum_member *mem; + uint64_t value; + + debug("Parsing enum definition...\n"); + + typ->kind = TYK_ENUM; + + if (next_token(ctx)->kind != TK_LBRACE) { + tok_error(&ctx->tok, "expected \"{\" after \"enum\"\n"); + return false; + } + + typ->members.rows = malloc(HASHMAP_ROWS * sizeof(struct list)); + typ->members.n_rows = HASHMAP_ROWS; + hashmap_init(&typ->members); + + value = 0; + next_token(ctx); + while (ctx->tok.kind != TK_RBRACE) { + if (ctx->tok.kind != TK_IDENTIFIER) { + tok_error(&ctx->tok, "expected enum member name\n"); + return true; + } + + mem = malloc(sizeof(struct enum_member)); + mem->hashmap_entry.hash = ctx->tok.hash; + mem->name = ctx->tok.pos; + mem->name_len = ctx->tok.len; + mem->value = value++; + hashmap_add(&typ->members, &mem->hashmap_entry); + + if (next_token(ctx)->kind == TK_COMMA) { + next_token(ctx); + continue; + } + + if (ctx->tok.kind != TK_RBRACE) { + tok_error(&ctx->tok, "expected \",\" or \"}\" after enum member name\n"); + return true; + } + } + + next_token(ctx); + return true; +} + +static bool +parse_struct(struct parser *ctx, struct type *typ) +{ + struct struct_member *mem; + uint64_t off; + + debug("Parsing struct definition...\n"); + + typ->kind = TYK_STRUCT; + + if (next_token(ctx)->kind != TK_LBRACE) { + tok_error(&ctx->tok, "expected \"{\" after \"struct\"\n"); + return false; + } + + typ->members.rows = malloc(HASHMAP_ROWS * sizeof(struct list)); + typ->members.n_rows = HASHMAP_ROWS; + hashmap_init(&typ->members); + + off = 0; + next_token(ctx); + while (ctx->tok.kind != TK_RBRACE) { + if (ctx->tok.kind != TK_IDENTIFIER) { + tok_error(&ctx->tok, "expected type name\n"); + return true; + } + + mem = malloc(sizeof(struct struct_member)); + mem->hashmap_entry.hash = ctx->tok.hash; + mem->name = ctx->tok.pos; + mem->name_len = ctx->tok.len; + mem->off = off; + + if (!parse_type_ref(ctx, &mem->typ, &mem->n_ptrs)) { + return true; + } + + if (ctx->tok.kind != TK_IDENTIFIER) { + tok_error(&ctx->tok, "expected struct member name\n"); + return true; + } + mem->name = ctx->tok.pos; + mem->name_len = ctx->tok.len; + + if (mem->n_ptrs >= 1) { + mem->size = sizeof(void*); + } else { + mem->size = mem->typ->size; + } + off += mem->size; + + hashmap_add(&typ->members, &mem->hashmap_entry); + + if (next_token(ctx)->kind != TK_SEMICOLON) { + tok_error(&ctx->tok, "expected \";\" after struct member name\n"); + return true; + } + next_token(ctx); + } + + next_token(ctx); + return true; +} + +void +parse_type(struct parser *ctx) +{ + struct type *typ; + bool success; + + debug("Parsing type definition...\n"); + + /* Type name */ + if (next_token(ctx)->kind != TK_IDENTIFIER) { + tok_error(&ctx->tok, "expected identifier after \"type\"\n"); + return; + } + + /* Ensure type does not already exist */ + typ = (struct type*)hashmap_find(ctx->types, ctx->tok.hash); + if (typ != NULL) { + tok_error(&ctx->tok, "type \"%.*s\" already defined\n", (int)ctx->tok.len, ctx->tok.pos); + return; + } + + /* Create type */ + typ = type_new(&ctx->tok); + + if (next_token(ctx)->kind != TK_COLON) { + tok_error(&ctx->tok, "expected \":\" after type name\n"); + free(typ); + return; + } + + next_token(ctx); + if (ctx->tok.kind == TK_IDENTIFIER) { + success = parse_alias(ctx, typ); + } else if (ctx->tok.kind == TK_ENUM) { + success = parse_enum(ctx, typ); + } else if (ctx->tok.kind == TK_STRUCT) { + success = parse_struct(ctx, typ); + } else { + tok_error(&ctx->tok, "expected type name or \"enum\" after \":\"\n"); + free(typ); + return; + } + + if (!success) { + free(typ); + return; + } + + if (ctx->tok.kind != TK_SEMICOLON) { + tok_error(&ctx->tok, "expected \";\" after type definition\n"); + free(typ); + return; + } + next_token(ctx); + + /* Add type to parser's registry */ + hashmap_add(ctx->types, &typ->hashmap_entry); +} |