summaryrefslogtreecommitdiff
path: root/src/lexer/lexer.c
blob: 9be4cb1aa4f0527df426112c8897e1bc084060ae (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
/*
 * Copyright (c) 2025 Quinn Stephens and the OSMORA team.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
 * 3. Neither the name of the copyright holder nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS”
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include "lexer.h"
#include "lexer/char_info.h"
#include "lexer/keywords.h"
#include "log.h"

static void
skip_whitespace(struct lexer *ctx)
{
    while (char_info[(int)*ctx->pos] & CHAR_WHITESPACE) {
        if (char_info[(int)*ctx->pos] & CHAR_VERT_WS) {
            ctx->line++;
            ctx->line_start = ctx->pos + 1;
        }

        ctx->pos++;
    }
}

static void
lex_identifier(struct lexer *ctx, struct token *tok)
{
    struct keyword *kwd;

    /* Find end of identifier */
    ctx->pos++;
    while (char_info[(int)*ctx->pos] & CHAR_ALNUM || *ctx->pos == '_') {
        ctx->pos++;
    }

    /* Calculate length and hash */
    tok->len = (size_t)(ctx->pos - tok->pos);
    tok->hash = hash(tok->pos, tok->len);

    /* Look for a keyword matching the identifier */
    kwd = keywords_find(tok);
    if (kwd != NULL) {
        tok->kind = kwd->tok_kind;
        tok->flags |= kwd->tok_flags;
    } else {
        tok->kind = TK_IDENTIFIER;
    }
}

bool
lexer_next(struct lexer *ctx, struct token *tok)
{
    if (ctx == NULL || tok == NULL) {
        return false;
    }

    skip_whitespace(ctx);

    tok->flags = TF_NONE;
    tok->pos = ctx->pos;
    tok->line = ctx->line;
    tok->col = (int)(tok->pos - ctx->line_start) + 1;

    if (char_info[(int)*ctx->pos] & CHAR_ALPHA || *ctx->pos == '_') {
        lex_identifier(ctx, tok);
        return true;
    }

    if (*ctx->pos == '\0') {
        tok->kind = TK_EOF;
        return true;
    }

    tok->kind = TK_UNKNOWN;
    return true;
}

bool
lexer_init(struct lexer *ctx, const char *src)
{
    log_debug("Initializing lexer...\n");

    if (ctx == NULL || src == NULL) {
        return false;
    }

    ctx->pos = src;
    ctx->line_start = ctx->pos;
    ctx->line = 1;

    /* TODO: Only do this once */
    keywords_init();

    return true;
}