diff options
Diffstat (limited to 'src/lexer/lexer.c')
-rw-r--r-- | src/lexer/lexer.c | 116 |
1 files changed, 116 insertions, 0 deletions
diff --git a/src/lexer/lexer.c b/src/lexer/lexer.c new file mode 100644 index 0000000..183bd76 --- /dev/null +++ b/src/lexer/lexer.c @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2025 Quinn Stephens and the OSMORA team. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "lexer.h" +#include "lexer/char_info.h" +#include "lexer/keywords.h" +#include "log.h" + +static void +skip_whitespace(struct lexer *ctx) +{ + while (char_info[(int)*ctx->pos] & CHAR_WHITESPACE) { + if (char_info[(int)*ctx->pos] & CHAR_VERT_WS) { + ctx->line++; + ctx->line_start = ctx->pos + 1; + } + + ctx->pos++; + } +} + +static void +lex_identifier(struct lexer *ctx, struct token *tok) +{ + struct keyword *kwd; + + /* Find end of identifier */ + ctx->pos++; + while (char_info[(int)*ctx->pos] & CHAR_ALNUM || *ctx->pos == '_') { + ctx->pos++; + } + + /* Calculate length and hash */ + tok->len = (size_t)(ctx->pos - tok->pos); + tok->hash = hash(tok->pos, tok->len); + + /* Look for a keyword matching the identifier */ + kwd = keywords_find(tok); + if (kwd != NULL) { + tok->kind = kwd->value; + } else { + tok->kind = TK_IDENTIFIER; + } +} + +bool +lexer_next(struct lexer *ctx, struct token *tok) +{ + if (ctx == NULL || tok == NULL) { + return false; + } + + skip_whitespace(ctx); + + tok->pos = ctx->pos; + tok->line = ctx->line; + tok->col = (int)(tok->pos - ctx->line_start) + 1; + + if (char_info[(int)*ctx->pos] & CHAR_ALPHA || *ctx->pos == '_') { + lex_identifier(ctx, tok); + return true; + } + + if (*ctx->pos == '\0') { + tok->kind = TK_EOF; + return true; + } + + tok->kind = TK_UNKNOWN; + return true; +} + +bool +lexer_init(struct lexer *ctx, const char *src) +{ + log_debug("Initializing lexer...\n"); + + if (ctx == NULL || src == NULL) { + return false; + } + + ctx->pos = src; + ctx->line_start = ctx->pos; + ctx->line = 1; + + /* TODO: Only do this once */ + keywords_init(); + + return true; +} |