diff options
Diffstat (limited to 'usr.bin/oasm')
-rw-r--r-- | usr.bin/oasm/emit.c | 334 | ||||
-rw-r--r-- | usr.bin/oasm/include/oasm/emit.h | 113 | ||||
-rw-r--r-- | usr.bin/oasm/include/oasm/lex.h | 36 | ||||
-rw-r--r-- | usr.bin/oasm/lex.c | 25 | ||||
-rw-r--r-- | usr.bin/oasm/oasm.c | 10 | ||||
-rw-r--r-- | usr.bin/oasm/parse.c | 101 |
6 files changed, 615 insertions, 4 deletions
diff --git a/usr.bin/oasm/emit.c b/usr.bin/oasm/emit.c new file mode 100644 index 0000000..cef90d7 --- /dev/null +++ b/usr.bin/oasm/emit.c @@ -0,0 +1,334 @@ +/* + * Copyright (c) 2023-2025 Ian Marco Moffett and the Osmora Team. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Hyra nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/errno.h> +#include <oasm/emit.h> +#include <oasm/log.h> +#include <stdlib.h> +#include <string.h> + +static inline void +emit_bytes(struct emit_state *state, void *p, size_t len) +{ + write(state->out_fd, p, len); +} + +/* + * Convert an IR register to an OSMX64 + * valid register value that can be encoded + * into the instruction. + */ +static inline reg_t +ir_to_reg(tt_t ir) +{ + switch (ir) { + case TT_X0: return OSMX64_R_X0; + case TT_X1: return OSMX64_R_X1; + case TT_X2: return OSMX64_R_X2; + case TT_X3: return OSMX64_R_X3; + case TT_X4: return OSMX64_R_X4; + case TT_X5: return OSMX64_R_X5; + case TT_X6: return OSMX64_R_X6; + case TT_X7: return OSMX64_R_X7; + case TT_X8: return OSMX64_R_X8; + case TT_X9: return OSMX64_R_X9; + case TT_X10: return OSMX64_R_X10; + case TT_X11: return OSMX64_R_X11; + case TT_X12: return OSMX64_R_X12; + case TT_X13: return OSMX64_R_X13; + case TT_X14: return OSMX64_R_X14; + case TT_X15: return OSMX64_R_X15; + } + + return OSMX64_R_BAD; +} + +/* + * Encode a MOV instruction + * + * mov [r], [r/imm] + * + * Returns the next token on success, + * otherwise NULL. + */ +static struct oasm_token * +emit_encode_mov(struct emit_state *state, struct oasm_token *tok) +{ + inst_t curinst; + reg_t rd; + + if (state == NULL || tok == NULL) { + return NULL; + } + + /* Next token should be a register */ + tok = TAILQ_NEXT(tok, link); + if (tok == NULL) { + return NULL; + } + if (!tok_is_xreg(tok->type)) { + oasm_err("[emit error]: bad 'mov' order\n"); + return NULL; + } + + rd = ir_to_reg(tok->type); + if (rd == OSMX64_R_BAD) { + oasm_err("[emit error]: got bad reg in 'mov'\n"); + return NULL; + } + + /* Next token should be an IMM */ + tok = TAILQ_NEXT(tok, link); + if (tok == NULL) { + oasm_err("[emit error]: bad 'mov' order\n"); + return NULL; + } + if (tok->type != TT_IMM) { + oasm_err("[emit error]: expected <imm>\n"); + return NULL; + } + + curinst.opcode = OSMX64_MOV_IMM; + curinst.rd = rd; + curinst.imm = tok->imm; + emit_bytes(state, &curinst, sizeof(curinst)); + return TAILQ_NEXT(tok, link); +} + +/* + * Encode a INC/DEC instruction + * + * inc/dec [r] + * + * Returns the next token on success, + * otherwise NULL. + */ +static struct oasm_token * +emit_encode_incdec(struct emit_state *state, struct oasm_token *tok) +{ + inst_t curinst; + reg_t rd; + uint8_t opcode = OSMX64_INC; + char *inst_str = "inc"; + + if (state == NULL || tok == NULL) { + return NULL; + } + + if (tok->type == TT_DEC) { + inst_str = "dec"; + opcode = OSMX64_DEC; + } + + /* Next token should be a register */ + tok = TAILQ_NEXT(tok, link); + if (tok == NULL) { + return NULL; + } + if (!tok_is_xreg(tok->type)) { + oasm_err("[emit error]: bad '%s' order\n", inst_str); + return NULL; + } + + rd = ir_to_reg(tok->type); + if (rd == OSMX64_R_BAD) { + oasm_err("[emit error]: got bad reg in '%s'\n", inst_str); + return NULL; + } + + curinst.opcode = opcode; + curinst.rd = rd; + curinst.unused = 0; + emit_bytes(state, &curinst, sizeof(curinst)); + return TAILQ_NEXT(tok, link); +} + +/* + * Encode an ADD instruction + * + * add [r], <imm> + * + * Returns the next token on success, + * otherwise NULL. + */ +static struct oasm_token * +emit_encode_add(struct emit_state *state, struct oasm_token *tok) +{ + inst_t curinst; + reg_t rd; + + /* + * The next operand must be an X<n> + * register. + */ + tok = TAILQ_NEXT(tok, link); + if (tok == NULL) { + return NULL; + } + if (!tok_is_xreg(tok->type)) { + oasm_err("[emit error]: bad 'add' order\n"); + return NULL; + } + + /* Get the register and validate it */ + rd = ir_to_reg(tok->type); + if (rd == OSMX64_R_BAD) { + oasm_err("[emit error]: got bad reg in 'add'\n"); + return NULL; + } + + /* The next token should be an <imm> */ + tok = TAILQ_NEXT(tok, link); + if (tok == NULL) { + return NULL; + } + if (tok->type != TT_IMM) { + oasm_err("[emit error]: expected <imm> in 'add'\n"); + return NULL; + } + + curinst.opcode = OSMX64_ADD; + curinst.rd = rd; + curinst.imm = tok->imm; + emit_bytes(state, &curinst, sizeof(curinst)); + return TAILQ_NEXT(tok, link); +} + +/* + * Encode a HLT instruction + * + * 'hlt' - no operands + * + * Returns the next token on success, + * otherwise NULL. + */ +static struct oasm_token * +emit_encode_hlt(struct emit_state *state, struct oasm_token *tok) +{ + inst_t curinst; + + curinst.opcode = OSMX64_HLT; + curinst.rd = 0; + curinst.unused = 0; + emit_bytes(state, &curinst, sizeof(curinst)); + return TAILQ_NEXT(tok, link); +} + +int +emit_osxm64(struct emit_state *state, struct oasm_token *tp) +{ + struct oasm_token *toknew; + + if (state == NULL || tp == NULL) { + return -EINVAL; + } + + /* + * We need to create a copy of the object as the + * caller will likely end up destroying it. + */ + toknew = malloc(sizeof(*toknew)); + if (toknew == NULL) { + return -ENOMEM; + } + + memcpy(toknew, tp, sizeof(*toknew)); + TAILQ_INSERT_TAIL(&state->ir, toknew, link); + return 0; +} + +int +emit_init(struct emit_state *state) +{ + state->last_token = TT_UNKNOWN; + state->is_init = 1; + TAILQ_INIT(&state->ir); + return 0; +} + +int +emit_destroy(struct emit_state *state) +{ + struct oasm_token *curtok, *last = NULL; + + TAILQ_FOREACH(curtok, &state->ir, link) { + if (last != NULL) { + free(last); + last = NULL; + } + if (curtok->raw != NULL) { + free(curtok->raw); + } + + last = curtok; + } + + /* Clean up any last objects */ + if (last != NULL) { + free(last); + } + + return 0; +} + +int +emit_process(struct oasm_state *oasm, struct emit_state *emit) +{ + struct oasm_token *curtok; + tt_t last_tok; + + if (!emit->is_init) { + return -1; + } + + emit->out_fd = oasm->out_fd; + curtok = TAILQ_FIRST(&emit->ir); + while (curtok != NULL) { + switch (curtok->type) { + case TT_MOV: + curtok = emit_encode_mov(emit, curtok); + break; + case TT_INC: + case TT_DEC: + curtok = emit_encode_incdec(emit, curtok); + break; + case TT_ADD: + curtok = emit_encode_add(emit, curtok); + break; + case TT_HLT: + curtok = emit_encode_hlt(emit, curtok); + break; + default: + curtok = TAILQ_NEXT(curtok, link); + break; + } + } + + return 0; +} diff --git a/usr.bin/oasm/include/oasm/emit.h b/usr.bin/oasm/include/oasm/emit.h new file mode 100644 index 0000000..b0a2fd1 --- /dev/null +++ b/usr.bin/oasm/include/oasm/emit.h @@ -0,0 +1,113 @@ +/* Copyright (c) 2023-2025 Ian Marco Moffett and the Osmora Team. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Hyra nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _EMIT_H_ +#define _EMIT_H_ + +#include <sys/queue.h> +#include <stdint.h> +#include <stddef.h> +#include <oasm/lex.h> +#include <oasm/state.h> + +/* + * The OSMX64 architecture has 32-bit instructions + * that are encoded in the following manner: + * + * - [0:7]: Opcode + * - [11:8]: Register + * - [31:12]: Reserved + * + * The values below define various operation + * codes. + */ +#define OSMX64_NOP 0x00 /* No-operation */ +#define OSMX64_ADD 0x01 /* Add operation */ +#define OSMX64_SUB 0x02 /* Sub operation */ +#define OSMX64_MUL 0x03 /* Multiply operation */ +#define OSMX64_DIV 0x04 /* Divide operation */ +#define OSMX64_INC 0x05 /* Increment operation */ +#define OSMX64_DEC 0x06 /* Decrement operation */ +#define OSMX64_OR 0x07 /* Bitwise OR operation */ +#define OSMX64_XOR 0x08 /* Bitwise XOR operation */ +#define OSMX64_AND 0x09 /* Bitwise AND operation */ +#define OSMX64_NOT 0x10 /* Bitwise NOT operation */ +#define OSMX64_SLL 0x11 /* Shift left logical operation */ +#define OSMX64_SRL 0x12 /* Shift right logical operation */ +#define OSMX64_MOV_IMM 0x13 /* Data move operation from IMM */ +#define OSMX64_HLT 0x14 /* Halt the processor */ + +/* + * OSMX64 register definitions + */ +#define OSMX64_R_X0 0x00 +#define OSMX64_R_X1 0x01 +#define OSMX64_R_X2 0x02 +#define OSMX64_R_X3 0x03 +#define OSMX64_R_X4 0x04 +#define OSMX64_R_X5 0x05 +#define OSMX64_R_X6 0x06 +#define OSMX64_R_X7 0x07 +#define OSMX64_R_X8 0x08 +#define OSMX64_R_X9 0x09 +#define OSMX64_R_X10 0x0A +#define OSMX64_R_X11 0x0B +#define OSMX64_R_X12 0x0C +#define OSMX64_R_X13 0x0D +#define OSMX64_R_X14 0x0E +#define OSMX64_R_X15 0x0F +#define OSMX64_R_BAD 0xFF + +typedef uint8_t reg_t; +typedef uint16_t imm_t; + +/* + * OSMX64 instruction + */ +typedef struct { + uint8_t opcode; + uint8_t rd; + union { + uint16_t imm; + uint16_t unused; + }; +} inst_t; + +struct emit_state { + tt_t last_token; + uint8_t is_init : 1; + int out_fd; + TAILQ_HEAD(, oasm_token) ir; +}; + +int emit_init(struct emit_state *state); +int emit_destroy(struct emit_state *state); +int emit_process(struct oasm_state *oasm, struct emit_state *emit); +int emit_osxm64(struct emit_state *state, struct oasm_token *tp); + +#endif /* !_EMIT_H_ */ diff --git a/usr.bin/oasm/include/oasm/lex.h b/usr.bin/oasm/include/oasm/lex.h index 9e293e6..6ffaf79 100644 --- a/usr.bin/oasm/include/oasm/lex.h +++ b/usr.bin/oasm/include/oasm/lex.h @@ -30,6 +30,8 @@ #ifndef _OASM_LEX_H_ #define _OASM_LEX_H_ +#include <sys/queue.h> +#include <sys/cdefs.h> #include <stdint.h> struct oasm_state; @@ -93,6 +95,7 @@ typedef enum { TT_SUB, /* 'sub' */ TT_MUL, /* 'mul' */ TT_DIV, /* 'div' */ + TT_HLT, /* 'hlt' */ /* Register ops */ TT_MOV, /* 'mov' */ @@ -113,9 +116,42 @@ typedef enum { struct oasm_token { tt_t type; uint8_t is_reg : 1; + uint16_t imm; char *raw; + TAILQ_ENTRY(oasm_token) link; }; int lex_tok(struct oasm_state *state, struct oasm_token *ttp); + +/* + * Check if a token is an X<n> register. + * Returns true on match. + */ +__always_inline static inline bool +tok_is_xreg(tt_t tok) +{ + switch (tok) { + case TT_X0: + case TT_X1: + case TT_X2: + case TT_X3: + case TT_X4: + case TT_X5: + case TT_X6: + case TT_X7: + case TT_X8: + case TT_X9: + case TT_X10: + case TT_X11: + case TT_X12: + case TT_X13: + case TT_X14: + case TT_X15: + return true; + } + + return false; +} + #endif /* !_OASM_LEX_H_ */ diff --git a/usr.bin/oasm/lex.c b/usr.bin/oasm/lex.c index afbe21d..b3af2b1 100644 --- a/usr.bin/oasm/lex.c +++ b/usr.bin/oasm/lex.c @@ -29,6 +29,7 @@ #include <sys/errno.h> #include <string.h> +#include <stdlib.h> #include <oasm/state.h> #include <oasm/lex.h> #include <oasm/log.h> @@ -44,6 +45,7 @@ static char putback = '\0'; #define S_IMN_DIV "div" #define S_IMN_INC "inc" #define S_IMN_DEC "dec" +#define S_IMN_HLT "hlt" /* * Returns 0 if a char is counted as a @@ -66,6 +68,20 @@ lex_skippable(struct oasm_state *state, char c) } /* + * For cleaning up allocated sources + * during error conditions + * + * @p: Memory to free + */ +static inline void +lex_try_free(void *p) +{ + if (p != NULL) { + free(p); + } +} + +/* * Put back a token to grab later * * @c: Character to put back @@ -164,6 +180,8 @@ token_arith(char *p) return TT_SUB; } else if (strcmp(p, S_IMN_DIV) == 0) { return TT_DIV; + } else if (strcmp(p, S_IMN_HLT) == 0) { + return TT_HLT; } return TT_UNKNOWN; @@ -231,7 +249,7 @@ token_reg(char *p) int lex_tok(struct oasm_state *state, struct oasm_token *ttp) { - char *p; + char *p = NULL; char c = ' '; int tmp; tt_t tok; @@ -282,11 +300,16 @@ lex_tok(struct oasm_state *state, struct oasm_token *ttp) /* Immediate operand? */ if ((tok = token_operand(p)) != TT_UNKNOWN) { + if (tok == TT_IMM) { + ttp->imm = atoi(&p[1]); + } + ttp->type = tok; ttp->raw = p; return 0; } oasm_err("bad token \"%s\"\n", p); + lex_try_free(p); return -1; } diff --git a/usr.bin/oasm/oasm.c b/usr.bin/oasm/oasm.c index 844f004..6c37778 100644 --- a/usr.bin/oasm/oasm.c +++ b/usr.bin/oasm/oasm.c @@ -47,7 +47,7 @@ oasm_start(struct oasm_state *state) int main(int argc, char **argv) { - if (argc < 2) { + if (argc < 3) { printf("oasm: usage: oasm <file> <output>\n"); return -1; } @@ -58,8 +58,16 @@ main(int argc, char **argv) return -1; } + g_state.out_fd = open(argv[2], O_CREAT | O_WRONLY); + if (g_state.out_fd < 0) { + printf("could not open output \"%s\"\n", argv[2]); + close(g_state.in_fd); + return -1; + } + g_state.filename = argv[1]; oasm_start(&g_state); close(g_state.in_fd); + close(g_state.out_fd); return 0; } diff --git a/usr.bin/oasm/parse.c b/usr.bin/oasm/parse.c index 7c5b94c..6851935 100644 --- a/usr.bin/oasm/parse.c +++ b/usr.bin/oasm/parse.c @@ -29,17 +29,20 @@ #include <stdint.h> #include <stddef.h> #include <stdlib.h> +#include <oasm/emit.h> #include <oasm/state.h> #include <oasm/lex.h> #include <oasm/parse.h> #include <oasm/log.h> +static struct emit_state emit_state; static const char *tokstr[] = { [ TT_UNKNOWN] = "bad", [ TT_ADD ] = "add", [ TT_SUB ] = "sub", [ TT_MUL ] = "mul", [ TT_DIV ] = "div", + [ TT_HLT ] = "hlt", [ TT_COMMA ] = ",", [ TT_INC ] = "inc", [ TT_DEC ] = "dec", @@ -95,20 +98,114 @@ static const char *tokstr[] = { [ TT_V7 ] = "v7", }; +static int +parse_reg(struct oasm_state *state, struct oasm_token *tok) +{ + const char *p; + + /* Valid instructions that go with regs */ + switch (state->last) { + case TT_MOV: + case TT_DEC: + case TT_INC: + case TT_ADD: + state->last = tok->type; + break; + default: + p = tokstr[state->last]; + oasm_err("bad instruction '%s' for regop\n", p); + return -1; + } + + if (!tok_is_xreg(tok->type)) { + p = tokstr[tok->type]; + oasm_err("bad register \"%s\"\n", p); + return -1; + } + + state->last = tok->type; + emit_osxm64(&emit_state, tok); + return 0; +} + +static int +parse_imm(struct oasm_token *tok, tt_t last) +{ + return 0; +} + +static int +parse_tok(struct oasm_state *state, struct oasm_token *tok) +{ + const char *p; + int error; + + switch (tok->type) { + case TT_HLT: + state->last = tok->type; + emit_osxm64(&emit_state, tok); + case TT_MOV: + state->last = tok->type; + emit_osxm64(&emit_state, tok); + break; + case TT_ADD: + state->last = tok->type; + emit_osxm64(&emit_state, tok); + break; + case TT_DEC: + case TT_INC: + state->last = tok->type; + emit_osxm64(&emit_state, tok); + break; + case TT_IMM: + p = tokstr[state->last]; + if (!tok_is_xreg(state->last)) { + printf("expected X<n> but got %s\n", p); + return -1; + } + emit_osxm64(&emit_state, tok); + break; + default: + if (!tok->is_reg) { + oasm_err("syntax error\n"); + return -1; + } + + error = parse_reg(state, tok); + if (error < 0) { + return error; + } + break; + } + + return 0; +} + void parse_enter(struct oasm_state *state) { struct oasm_token tok; + const char *type, *raw; int error = 0; + emit_init(&emit_state); + for (;;) { error = lex_tok(state, &tok); if (error < 0) { break; } - if (tok.raw != NULL) { - free(tok.raw); + if (parse_tok(state, &tok) < 0) { + break; } + + type = tokstr[tok.type]; + raw = tok.raw; + oasm_debug("got token type %s (%s)\n", type, raw); } + + /* Process then destroy the emit state */ + emit_process(state, &emit_state); + emit_destroy(&emit_state); } |