From 986a736de0418bf01f52f2e44fdf58f23f04e615 Mon Sep 17 00:00:00 2001 From: Ian Moffett Date: Wed, 1 Oct 2025 19:51:26 -0400 Subject: np: parse: Create proc AST, handle end/begin, ... This commit introduces AST object types and AST integer type definitions. We also now keep track of how deep we are in begin/end tags. - Introduce parsing for TT_BEGIN - Introduce parsing for TT_END Signed-off-by: Ian Moffett --- src/sys/include/np/ast.h | 14 ++++++- src/sys/include/os/np.h | 4 ++ src/sys/np/core/np_parse.c | 92 +++++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 103 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/sys/include/np/ast.h b/src/sys/include/np/ast.h index 8e516d7..9232226 100644 --- a/src/sys/include/np/ast.h +++ b/src/sys/include/np/ast.h @@ -51,17 +51,27 @@ typedef enum { AST_I64 } ast_itype_t; +/* + * AST node type + */ +typedef enum { + AT_BAD_OBJTYPE, /* Bad */ + AST_PROC, /* Procedure */ +} ast_type_t; + /* * Represents an AST node * * @ident: Identifier - * @token: Token type + * @num_type: Integer type + * @type: Object type * @left: Left node * @right: Right node */ struct ast_node { char *ident; - tt_t token; + ast_itype_t num_type; + ast_type_t type; struct ast_node *left; struct ast_node *right; }; diff --git a/src/sys/include/os/np.h b/src/sys/include/os/np.h index c3921cb..9b04f0e 100644 --- a/src/sys/include/os/np.h +++ b/src/sys/include/os/np.h @@ -49,6 +49,8 @@ * @lex_st: Lexer state * @ast_root: Parse tree * @ccache: Character cache (temporary store for lexer) + * @in_func: Is set if we are inside a function + * @begin_depth: How deep in "begin" we are */ struct np_work { char *source; @@ -58,6 +60,8 @@ struct np_work { struct ptrbox *work_mem; struct ast_node *ast_root; char ccache; + uint8_t in_func : 1; + uint8_t begin_depth; }; /* diff --git a/src/sys/np/core/np_parse.c b/src/sys/np/core/np_parse.c index 4973ac0..34c5f0b 100644 --- a/src/sys/np/core/np_parse.c +++ b/src/sys/np/core/np_parse.c @@ -35,6 +35,9 @@ #include #include #include +#include + +#define MAX_BEGIN_DEPTH 8 #define pr_trace(fmt, ...) printf("pirho.parse: " fmt, ##__VA_ARGS__) #define pr_error(fmt, ...) printf("pirho.parse: error: " fmt, ##__VA_ARGS__) @@ -168,25 +171,38 @@ parse_type(struct np_work *work, struct lex_token *tok) * Parse a procedure / function * * @work: Input work + * @npp: AST node pointer result * @tok: Current token * * Returns zero on success */ static int -parse_proc(struct np_work *work, struct lex_token *tok) +parse_proc(struct np_work *work, struct ast_node **npp, struct lex_token *tok) { + char *ident; + struct ast_node *np; + ast_itype_t ret_type = AST_BAD_TYPE; tt_t tt; if (work == NULL || tok == NULL) { return -EINVAL; } + if (npp == NULL) { + return -EINVAL; + } + /* We need the identifier */ tt = parse_expect(work, "proc", TT_IDENT, tok); if (tt == TT_NONE) { return -1; } + ident = ptrbox_strdup(tok->val_str, work->work_mem); + if (ident == NULL) { + return -ENOMEM; + } + /* Expect the left paren */ tt = parse_expect(work, "", TT_LPAREN, tok); if (tt == TT_NONE) { @@ -212,7 +228,8 @@ parse_proc(struct np_work *work, struct lex_token *tok) } /* And now the return type */ - if (parse_type(work, tok) == AST_BAD_TYPE) { + ret_type = parse_type(work, tok); + if (ret_type == AST_BAD_TYPE) { pr_error( "line %d: expected valid type, got %s\n", work->line_no, @@ -221,6 +238,24 @@ parse_proc(struct np_work *work, struct lex_token *tok) return -1; } + /* Need a 'begin' */ + tt = parse_expect(work, "", TT_BEGIN, tok); + if (tt == TT_NONE) { + return -1; + } + + np = ast_alloc(work); + if (np == NULL) { + pr_error("could not alloc AST node\n"); + return -ENOMEM; + } + + ++work->begin_depth; + work->in_func = 1; + np->num_type = ret_type; + np->type = AST_PROC; + np->ident = ident; + *npp = np; return 0; } @@ -228,24 +263,60 @@ parse_proc(struct np_work *work, struct lex_token *tok) * Parse a token * * @work: Input work + * @root: Root AST node * @tok: Current token */ static int -parse_token(struct np_work *work, struct lex_token *tok) +parse_token(struct np_work *work, struct ast_node *root, struct lex_token *tok) { tt_t tt; int error; + struct ast_node *np; /* * XXX: wrapped in "[]" indicates optional * + * TT_BEGIN => nil + * TT_END => nil * TT_PROC => proc (..., ...) [ -> ] */ switch (tok->token) { + case TT_BEGIN: + /* Don't exceed the max depth */ + if (work->begin_depth >= MAX_BEGIN_DEPTH) { + pr_error("line %d: max depth reached\n", work->line_no); + return -1; + } + + ++work->begin_depth; + case TT_END: + /* Do the begin statements match? */ + if (work->begin_depth > 0) { + --work->begin_depth; + break; + } + + pr_error( + "line %d: got 'end' statement but no equal 'begin' statements\n", + work->line_no + ); + return -1; case TT_PROC: - if ((error = parse_proc(work, tok)) != 0) { + /* Can't be nested */ + if (work->in_func) { + pr_error( + "line %d: nested functions not supported\n", + work->line_no + ); + return -1; + } + + if ((error = parse_proc(work, &np, tok)) != 0) { return -1; } + + root->left = NULL; /* arguments */ + root->right = np; break; } @@ -255,6 +326,7 @@ parse_token(struct np_work *work, struct lex_token *tok) int parse_work(struct np_work *work) { + struct ast_node *root; struct lex_token tok; int error = 0; @@ -265,6 +337,7 @@ parse_work(struct np_work *work) /* Get the AST root node */ work->ast_root = ast_alloc(work); + root = work->ast_root; if (work->ast_root == NULL) { pr_error("failed to alloc root AST|n"); return -ENOMEM; @@ -285,10 +358,19 @@ parse_work(struct np_work *work) return -1; } - if (parse_token(work, &tok) < 0) { + if (parse_token(work, root, &tok) < 0) { return -1; } } + /* + * If there are more begin clauses than end + * clauses, someone mismatched them. + */ + if (work->begin_depth > 0) { + pr_error("line %d: expected 'end' statement\n", work->line_no); + return -1; + } + return 0; } -- cgit v1.2.3