1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
|
/*
* Copyright (c) 2025 Quinn Stephens and the OSMORA team.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* 3. Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS”
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "lexer.h"
#include "lexer/char_info.h"
#include "lexer/keywords.h"
#include "log.h"
static void
skip_whitespace(struct lexer *ctx)
{
while (char_info[(int)*ctx->pos] & CHAR_WHITESPACE) {
if (char_info[(int)*ctx->pos] & CHAR_VERT_WS) {
ctx->line++;
ctx->line_start = ctx->pos + 1;
}
ctx->pos++;
}
}
static void
lex_identifier(struct lexer *ctx, struct token *tok)
{
struct keyword *kwd;
/* Find end of identifier */
ctx->pos++;
while (char_info[(int)*ctx->pos] & CHAR_ALNUM || *ctx->pos == '_') {
ctx->pos++;
}
/* Calculate length and hash */
tok->len = (size_t)(ctx->pos - tok->pos);
tok->hash = hash(tok->pos, tok->len);
/* Look for a keyword matching the identifier */
kwd = keywords_find(tok);
if (kwd != NULL) {
tok->kind = kwd->tok_kind;
tok->flags |= kwd->tok_flags;
} else {
tok->kind = TK_IDENTIFIER;
}
}
bool
lexer_next(struct lexer *ctx, struct token *tok)
{
if (ctx == NULL || tok == NULL) {
return false;
}
skip_whitespace(ctx);
tok->flags = TF_NONE;
tok->pos = ctx->pos;
tok->line = ctx->line;
tok->col = (int)(tok->pos - ctx->line_start) + 1;
if (char_info[(int)*ctx->pos] & CHAR_ALPHA || *ctx->pos == '_') {
lex_identifier(ctx, tok);
return true;
}
if (*ctx->pos == '\0') {
tok->kind = TK_EOF;
return true;
}
tok->kind = TK_UNKNOWN;
return true;
}
bool
lexer_init(struct lexer *ctx, const char *src)
{
log_debug("Initializing lexer...\n");
if (ctx == NULL || src == NULL) {
return false;
}
ctx->pos = src;
ctx->line_start = ctx->pos;
ctx->line = 1;
/* TODO: Only do this once */
keywords_init();
return true;
}
|