diff options
author | Ian Moffett <ian@osmora.org> | 2024-11-01 23:46:08 -0400 |
---|---|---|
committer | Ian Moffett <ian@osmora.org> | 2024-11-01 23:46:08 -0400 |
commit | a515dfb3b8f8e999362db7a6b52b3104c03b750a (patch) | |
tree | d0180f0cbc39d9c3e367af30791ad774e4d419ff /compiler/parser |
Import quark sources
Signed-off-by: Ian Moffett <ian@osmora.org>
Diffstat (limited to 'compiler/parser')
-rw-r--r-- | compiler/parser/parser.c | 84 | ||||
-rw-r--r-- | compiler/parser/type.c | 254 |
2 files changed, 338 insertions, 0 deletions
diff --git a/compiler/parser/parser.c b/compiler/parser/parser.c new file mode 100644 index 0000000..aeec48b --- /dev/null +++ b/compiler/parser/parser.c @@ -0,0 +1,84 @@ +/* + * Quark parser. + * Turns tokens into an AST (Abstract Syntax Tree). + * Copyright (c) 2023-2024, Quinn Stephens and the OSMORA team. + * Provided under the BSD 3-Clause license. + */ + +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "debug.h" +#include "hashmap.h" +#include "parser/type.h" +#include "parser.h" + +static void +add_builtin(struct hashmap *map, char *name, size_t size, int n_ptrs) +{ + struct type *type; + size_t name_len; + + name_len = strlen(name); + + type = malloc(sizeof(struct type)); + type->hashmap_entry.hash = hash_data(name, name_len); + type->name = name; + type->name_len = name_len; + type->size = size; + type->n_ptrs = n_ptrs; + + hashmap_add(map, &type->hashmap_entry); +} + +void +tok_error(struct token *tok, const char *fmt, ...) +{ + va_list ap; + + fprintf(stderr, "\033[1m%s:%d:%d: \033[31merror:\033[0m ", tok->fname, tok->line, tok->col); + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); +} + +void +tok_warn(struct token *tok, const char *fmt, ...) +{ + va_list ap; + + printf("\033[1m%s:%d:%d: \033[33mwarning:\033[0m ", tok->fname, tok->line, tok->col); + va_start(ap, fmt); + vprintf(fmt, ap); + va_end(ap); +} + +void +parser_parse(struct parser *ctx) +{ + debug("Parsing...\n"); + + next_token(ctx); + while (ctx->tok.kind != TK_EOF) { + switch (ctx->tok.kind) { + case TK_TYPE: + parse_type(ctx); + break; + default: + tok_error(&ctx->tok, "unexpected \"%.*s\"\n", (int)ctx->tok.len, ctx->tok.pos); + next_token(ctx); + break; + } + } +} + +void +parser_init(struct parser *ctx, char *source) +{ + debug("Initializing parser...\n"); + lexer_init(&ctx->lexer, source); + + add_builtin(ctx->types, "uint32", 4, 0); + add_builtin(ctx->types, "any", 0, 0); +} diff --git a/compiler/parser/type.c b/compiler/parser/type.c new file mode 100644 index 0000000..4ac9327 --- /dev/null +++ b/compiler/parser/type.c @@ -0,0 +1,254 @@ +/* + * Type parser. + * Copyright (c) 2023-2024, Quinn Stephens and the OSMORA team. + * Provided under the BSD 3-Clause license. + */ + +#include <stdbool.h> +#include <stdlib.h> +#include "debug.h" +#include "hashmap.h" +#include "lexer/token.h" +#include "parser/type.h" +#include "parser.h" + +#define HASHMAP_ROWS 8 + +static struct type * +type_new(struct token *tok) +{ + struct type *typ; + + typ = malloc(sizeof(struct type)); + typ->hashmap_entry.hash = tok->hash; + typ->name = tok->pos; + typ->name_len = tok->len; + + return typ; +} + +static bool +parse_type_ref(struct parser *ctx, struct type **typ_out, int *n_ptrs_out) +{ + struct type *typ; + int n_ptrs; + + debug("Parsing type reference...\n"); + + /* Find type */ + typ = (struct type*)hashmap_find(ctx->types, ctx->tok.hash); + if (typ == NULL) { + tok_error(&ctx->tok, "type \"%.*s\" not found\n", (int)ctx->tok.len, ctx->tok.pos); + return false; + } + + /* Find number of pointers */ + n_ptrs = 0; + while (next_token(ctx)->kind == TK_STAR) { + n_ptrs++; + } + + /* Ensure number of pointers is allowed */ + if (typ->size == 0 && n_ptrs < 1) { + tok_error(&ctx->tok, "type \"%.*s\" can only be used in a pointer\n", (int)typ->name_len, typ->name); + return false; + } + + *typ_out = typ; + *n_ptrs_out = n_ptrs; + return true; +} + +static bool +parse_alias(struct parser *ctx, struct type *typ) +{ + struct type *base; + + debug("Parsing type alias definition...\n"); + + typ->kind = TYK_ALIAS; + + if (!parse_type_ref(ctx, &base, &typ->n_ptrs)) { + return false; + } + + if (typ->n_ptrs >= 1) { + typ->size = sizeof(void*); + } else { + typ->size = base->size; + } + + return true; +} + +static bool +parse_enum(struct parser *ctx, struct type *typ) +{ + struct enum_member *mem; + uint64_t value; + + debug("Parsing enum definition...\n"); + + typ->kind = TYK_ENUM; + + if (next_token(ctx)->kind != TK_LBRACE) { + tok_error(&ctx->tok, "expected \"{\" after \"enum\"\n"); + return false; + } + + typ->members.rows = malloc(HASHMAP_ROWS * sizeof(struct list)); + typ->members.n_rows = HASHMAP_ROWS; + hashmap_init(&typ->members); + + value = 0; + next_token(ctx); + while (ctx->tok.kind != TK_RBRACE) { + if (ctx->tok.kind != TK_IDENTIFIER) { + tok_error(&ctx->tok, "expected enum member name\n"); + return true; + } + + mem = malloc(sizeof(struct enum_member)); + mem->hashmap_entry.hash = ctx->tok.hash; + mem->name = ctx->tok.pos; + mem->name_len = ctx->tok.len; + mem->value = value++; + hashmap_add(&typ->members, &mem->hashmap_entry); + + if (next_token(ctx)->kind == TK_COMMA) { + next_token(ctx); + continue; + } + + if (ctx->tok.kind != TK_RBRACE) { + tok_error(&ctx->tok, "expected \",\" or \"}\" after enum member name\n"); + return true; + } + } + + next_token(ctx); + return true; +} + +static bool +parse_struct(struct parser *ctx, struct type *typ) +{ + struct struct_member *mem; + uint64_t off; + + debug("Parsing struct definition...\n"); + + typ->kind = TYK_STRUCT; + + if (next_token(ctx)->kind != TK_LBRACE) { + tok_error(&ctx->tok, "expected \"{\" after \"struct\"\n"); + return false; + } + + typ->members.rows = malloc(HASHMAP_ROWS * sizeof(struct list)); + typ->members.n_rows = HASHMAP_ROWS; + hashmap_init(&typ->members); + + off = 0; + next_token(ctx); + while (ctx->tok.kind != TK_RBRACE) { + if (ctx->tok.kind != TK_IDENTIFIER) { + tok_error(&ctx->tok, "expected type name\n"); + return true; + } + + mem = malloc(sizeof(struct struct_member)); + mem->hashmap_entry.hash = ctx->tok.hash; + mem->name = ctx->tok.pos; + mem->name_len = ctx->tok.len; + mem->off = off; + + if (!parse_type_ref(ctx, &mem->typ, &mem->n_ptrs)) { + return true; + } + + if (ctx->tok.kind != TK_IDENTIFIER) { + tok_error(&ctx->tok, "expected struct member name\n"); + return true; + } + mem->name = ctx->tok.pos; + mem->name_len = ctx->tok.len; + + if (mem->n_ptrs >= 1) { + mem->size = sizeof(void*); + } else { + mem->size = mem->typ->size; + } + off += mem->size; + + hashmap_add(&typ->members, &mem->hashmap_entry); + + if (next_token(ctx)->kind != TK_SEMICOLON) { + tok_error(&ctx->tok, "expected \";\" after struct member name\n"); + return true; + } + next_token(ctx); + } + + next_token(ctx); + return true; +} + +void +parse_type(struct parser *ctx) +{ + struct type *typ; + bool success; + + debug("Parsing type definition...\n"); + + /* Type name */ + if (next_token(ctx)->kind != TK_IDENTIFIER) { + tok_error(&ctx->tok, "expected identifier after \"type\"\n"); + return; + } + + /* Ensure type does not already exist */ + typ = (struct type*)hashmap_find(ctx->types, ctx->tok.hash); + if (typ != NULL) { + tok_error(&ctx->tok, "type \"%.*s\" already defined\n", (int)ctx->tok.len, ctx->tok.pos); + return; + } + + /* Create type */ + typ = type_new(&ctx->tok); + + if (next_token(ctx)->kind != TK_COLON) { + tok_error(&ctx->tok, "expected \":\" after type name\n"); + free(typ); + return; + } + + next_token(ctx); + if (ctx->tok.kind == TK_IDENTIFIER) { + success = parse_alias(ctx, typ); + } else if (ctx->tok.kind == TK_ENUM) { + success = parse_enum(ctx, typ); + } else if (ctx->tok.kind == TK_STRUCT) { + success = parse_struct(ctx, typ); + } else { + tok_error(&ctx->tok, "expected type name or \"enum\" after \":\"\n"); + free(typ); + return; + } + + if (!success) { + free(typ); + return; + } + + if (ctx->tok.kind != TK_SEMICOLON) { + tok_error(&ctx->tok, "expected \";\" after type definition\n"); + free(typ); + return; + } + next_token(ctx); + + /* Add type to parser's registry */ + hashmap_add(ctx->types, &typ->hashmap_entry); +} |