summaryrefslogtreecommitdiff
path: root/usr.bin/oasm/lex.c
diff options
context:
space:
mode:
Diffstat (limited to 'usr.bin/oasm/lex.c')
-rw-r--r--usr.bin/oasm/lex.c294
1 files changed, 294 insertions, 0 deletions
diff --git a/usr.bin/oasm/lex.c b/usr.bin/oasm/lex.c
new file mode 100644
index 0000000..afbe21d
--- /dev/null
+++ b/usr.bin/oasm/lex.c
@@ -0,0 +1,294 @@
+/*
+ * Copyright (c) 2023-2025 Ian Marco Moffett and the Osmora Team.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of Hyra nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/errno.h>
+#include <string.h>
+#include <oasm/state.h>
+#include <oasm/lex.h>
+#include <oasm/log.h>
+
+#define is_num(c) ((c) >= '0' && (c) <= '9')
+
+static char putback = '\0';
+
+/* Instruction mnemonic strings */
+#define S_IMN_MOV "mov"
+#define S_IMN_ADD "add"
+#define S_IMN_SUB "sub"
+#define S_IMN_DIV "div"
+#define S_IMN_INC "inc"
+#define S_IMN_DEC "dec"
+
+/*
+ * Returns 0 if a char is counted as a
+ * skippable token. Otherwise, -1
+ */
+static inline int
+lex_skippable(struct oasm_state *state, char c)
+{
+ switch (c) {
+ case ' ': return 0;
+ case '\f': return 0;
+ case '\t': return 0;
+ case '\r': return 0;
+ case '\n':
+ ++state->line;
+ return 0;
+ }
+
+ return -1;
+}
+
+/*
+ * Put back a token to grab later
+ *
+ * @c: Character to put back
+ */
+static inline char
+lex_putback(char c)
+{
+ putback = c;
+ return c;
+}
+
+/*
+ * Grab a character from the input file
+ * descriptor.
+ */
+static char
+lex_cin(struct oasm_state *state)
+{
+ char retval;
+
+ if (putback != '\0') {
+ retval = putback;
+ putback = '\0';
+ return retval;
+ }
+
+ if (read(state->in_fd, &retval, 1) <= 0) {
+ return '\0';
+ }
+ return retval;
+}
+
+/*
+ * Nom an operation, directive or any kind
+ * of raw string (unquoted/builtin) and return
+ * memory allocated by strdup() pointing to the
+ * string.
+ *
+ * @state: OASM state pointer
+ * @res: Resulting string
+ *
+ * Returns 0 on success. Greater than zero
+ * value of the last character if a comma or
+ * space was not buffered.
+ */
+static int
+lex_nomstr(struct oasm_state *state, char **res)
+{
+ char buf[256];
+ int retval = 0, n = 0;
+ int tmp;
+
+ memset(buf, 0, sizeof(buf));
+
+ /*
+ * We are filling the buffer containing
+ * the operation or directive.
+ *
+ * Keep going until we hit a space or comman (^)
+ * Examples of such strings (everything in '[]'):
+ *
+ * [mov] [x0], [#1]
+ * ^ ^
+ */
+ while ((tmp = lex_cin(state)) != 0) {
+ if (tmp == ' ' || tmp == ',') {
+ retval = tmp;
+ break;
+ }
+ if (tmp == '\n') {
+ ++state->line;
+ retval = tmp;
+ break;
+
+ }
+
+ buf[n++] = tmp;
+ }
+
+ *res = strdup(buf);
+ return retval;
+}
+
+static tt_t
+token_arith(char *p)
+{
+ if (strcmp(p, S_IMN_MOV) == 0) {
+ return TT_MOV;
+ } else if (strcmp(p, S_IMN_INC) == 0) {
+ return TT_INC;
+ } else if (strcmp(p, S_IMN_DEC) == 0) {
+ return TT_DEC;
+ } else if (strcmp(p, S_IMN_ADD) == 0) {
+ return TT_ADD;
+ } else if (strcmp(p, S_IMN_SUB) == 0) {
+ return TT_SUB;
+ } else if (strcmp(p, S_IMN_DIV) == 0) {
+ return TT_DIV;
+ }
+
+ return TT_UNKNOWN;
+}
+
+static tt_t
+token_xreg(char *p)
+{
+ int num;
+
+ if (p[0] != 'x') {
+ return TT_UNKNOWN;
+ }
+
+ if (!is_num(p[1])) {
+ return TT_UNKNOWN;
+ }
+
+ num = atoi(&p[1]);
+ switch (num) {
+ case 0: return TT_X0;
+ case 1: return TT_X1;
+ case 2: return TT_X2;
+ case 3: return TT_X3;
+ case 4: return TT_X4;
+ case 5: return TT_X5;
+ case 6: return TT_X6;
+ case 7: return TT_X7;
+ case 8: return TT_X8;
+ case 9: return TT_X9;
+ case 10: return TT_X10;
+ case 11: return TT_X11;
+ case 12: return TT_X12;
+ case 13: return TT_X13;
+ case 14: return TT_X14;
+ case 15: return TT_X15;
+ }
+
+ return TT_UNKNOWN;
+}
+
+static tt_t
+token_operand(char *p)
+{
+ /* Is this a numeric constant? */
+ if (p[0] == '#') {
+ return TT_IMM;
+ }
+
+ return TT_UNKNOWN;
+}
+
+static tt_t
+token_reg(char *p)
+{
+ tt_t tok;
+
+ if ((tok = token_xreg(p)) != TT_UNKNOWN) {
+ return tok;
+ }
+
+ return TT_UNKNOWN;
+}
+
+int
+lex_tok(struct oasm_state *state, struct oasm_token *ttp)
+{
+ char *p;
+ char c = ' ';
+ int tmp;
+ tt_t tok;
+
+ if (state == NULL || ttp == NULL) {
+ return -EINVAL;
+ }
+
+ /*
+ * Grab characters. If they are skippable,
+ * don't use them.
+ */
+ while (lex_skippable(state, c) == 0) {
+ if ((c = lex_cin(state)) == 0) {
+ return -1;
+ }
+ }
+
+ switch (c) {
+ case '\n':
+ ++state->line;
+ return 0;
+ case '\0':
+ return -1;
+ case ',':
+ return TT_COMMA;
+ default:
+ ttp->type = TT_UNKNOWN;
+ ttp->raw = NULL;
+
+ lex_putback(c);
+ lex_nomstr(state, &p);
+
+ /* Arithmetic operation? */
+ if ((tok = token_arith(p)) != TT_UNKNOWN) {
+ ttp->type = tok;
+ ttp->raw = p;
+ return 0;
+ }
+
+ /* Register? */
+ if ((tok = token_reg(p)) != TT_UNKNOWN) {
+ ttp->is_reg = 1;
+ ttp->type = tok;
+ ttp->raw = p;
+ return 0;
+ }
+
+ /* Immediate operand? */
+ if ((tok = token_operand(p)) != TT_UNKNOWN) {
+ ttp->type = tok;
+ ttp->raw = p;
+ return 0;
+ }
+ oasm_err("bad token \"%s\"\n", p);
+ return -1;
+ }
+
+ return 0;
+}