/* * Copyright (c) 2025 Quinn Stephens and the OSMORA team. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include "lexer.h" #include "lexer/char_info.h" #include "lexer/keywords.h" #include "log.h" static void skip_whitespace(struct lexer *ctx) { while (char_info[(int)*ctx->pos] & CHAR_WHITESPACE) { if (char_info[(int)*ctx->pos] & CHAR_VERT_WS) { ctx->line++; ctx->line_start = ctx->pos + 1; } ctx->pos++; } } static void lex_identifier(struct lexer *ctx, struct token *tok) { struct keyword *kwd; /* Find end of identifier */ ctx->pos++; while (char_info[(int)*ctx->pos] & CHAR_ALNUM || *ctx->pos == '_') { ctx->pos++; } /* Calculate length and hash */ tok->len = (size_t)(ctx->pos - tok->pos); tok->hash = hash(tok->pos, tok->len); /* Look for a keyword matching the identifier */ kwd = keywords_find(tok); if (kwd != NULL) { tok->kind = kwd->tok_kind; tok->flags |= kwd->tok_flags; } else { tok->kind = TOK_IDENTIFIER; } } bool lexer_next(struct lexer *ctx, struct token *tok) { char ch; uint16_t ch_info; if (ctx == NULL || tok == NULL) { return false; } skip_whitespace(ctx); tok->flags = TF_NONE; tok->pos = ctx->pos; tok->line = ctx->line; tok->col = (int)(tok->pos - ctx->line_start) + 1; ch = *ctx->pos; ch_info = char_info[(int)ch]; if (ch_info & CHAR_ALPHA || ch == '_') { lex_identifier(ctx, tok); return true; } if (ch_info & CHAR_SINGLE) { tok->kind = ch_info >> CHAR_SINGLE_SHIFT; tok->len = 1; ctx->pos++; return true; } if (ch == '\0') { tok->kind = TOK_EOF; return true; } tok->kind = TOK_UNKNOWN; return true; } bool lexer_init(struct lexer *ctx, const char *src) { log_debug("initializing lexer...\n"); if (ctx == NULL || src == NULL) { return false; } ctx->pos = src; ctx->line_start = ctx->pos; ctx->line = 1; /* TODO: Only do this once */ keywords_init(); return true; }