diff options
Diffstat (limited to 'sys/kern')
35 files changed, 4686 insertions, 240 deletions
diff --git a/sys/kern/disk_engine.c b/sys/kern/disk_engine.c new file mode 100644 index 0000000..1061165 --- /dev/null +++ b/sys/kern/disk_engine.c @@ -0,0 +1,208 @@ +/* + * Copyright (c) 2023-2025 Ian Marco Moffett and the Osmora Team. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Hyra nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <sys/errno.h> +#include <sys/syscall.h> +#include <sys/syslog.h> +#include <sys/systm.h> +#include <sys/disk.h> +#include <vm/dynalloc.h> + +#define pr_trace(fmt, ...) kprintf("disk: " fmt, ##__VA_ARGS__) +#define pr_error(...) pr_trace(__VA_ARGS__) + +/* + * Clones a disk parameter structure passed + * by a user. The structure returned is safe + * to be accessed freely by the kernel. + * + * @u_param: Contains user-side pointer + * @res: Resulting safe data + * + * Returns zero on success, otherwise a less than + * zero value is returned. + */ +static int +disk_param_clone(struct disk_param *u_param, struct disk_param *res) +{ + void *data; + int error; + + if (u_param == NULL) { + pr_error("disk_param_clone: got NULL u_param\n"); + return -EINVAL; + } + + error = copyin(u_param, res, sizeof(*res)); + if (error < 0) { + return error; + } + + /* + * If these parameters do not have a valid cookie, fuck + * that object, something is not right with it... + */ + if (res->cookie != DISK_PARAM_COOKIE) { + pr_error("disk_param_clone: erroneous params (bad cookie)\n"); + return -EACCES; + } + + data = dynalloc(res->size); + if (data == NULL) { + pr_error("disk_param_clone: out of memory\n"); + return -ENOMEM; + } + + error = copyin(res->buf, data, res->size); + if (error < 0) { + pr_error("failed to copy in param data\n"); + dynfree(data); + return error; + } + + res->u_buf = res->buf; + res->buf = data; + return 0; +} + +/* + * Deallocate a kernel managed disk parameter + * structure created by disk_param_clone() + * + * @param: Params to free + * + * Returns zero on success, otherwise a less than + * zero value is returned. + */ +static int +disk_param_free(struct disk_param *param) +{ + if (param == NULL) { + return -EINVAL; + } + + if (param->cookie != DISK_PARAM_COOKIE) { + return -EACCES; + } + + dynfree(param->buf); + return 0; +} + +/* + * Perform an operation on a disk. + * + * @id: ID of disk to operate on + * @opcode: Operation to perform (see DISK_IO_*) + * @u_param: User side disk parameters + * + * Returns a less than zero value on error + */ +static ssize_t +disk_mux_io(diskid_t id, diskop_t opcode, struct disk_param *u_param) +{ + struct disk_param param; + struct disk *dp; + ssize_t retval = -EIO; + int error; + + if (u_param == NULL) { + return -EINVAL; + } + + error = disk_param_clone(u_param, ¶m); + if (error < 0) { + return error; + } + + /* First, attempt to acquire the disk */ + error = disk_get_id(id, &dp); + if (error < 0) { + pr_error("disk_mux_io: no such device (id=%d)\n", id); + return error; + } + + switch (opcode) { + case DISK_IO_READ: + retval = disk_read( + id, + param.blk, + param.buf, + param.size + ); + + /* Write back the data to the user program */ + error = copyout(param.buf, param.u_buf, param.size); + if (error < 0) { + retval = error; + } + break; + case DISK_IO_WRITE: + retval = disk_write( + id, + param.blk, + param.buf, + param.size + ); + break; + case DISK_IO_QUERY: + retval = disk_query( + id, + param.buf + ); + + /* Write back info to user program */ + error = copyout(param.buf, param.u_buf, param.size); + if (error < 0) { + retval = error; + } + break; + } + + disk_param_free(¶m); + return retval; +} + +/* + * Disk I/O multiplexer syscall + * + * arg0: disk id + * arg1: opcode + * arg2: disk params + */ +scret_t +sys_disk(struct syscall_args *scargs) +{ + struct disk_param *u_param = (void *)scargs->arg2; + diskid_t id = scargs->arg0; + diskop_t opcode = scargs->arg1; + + return disk_mux_io(id, opcode, u_param); +} diff --git a/sys/kern/driver_blacklist.c b/sys/kern/driver_blacklist.c new file mode 100644 index 0000000..982d5c9 --- /dev/null +++ b/sys/kern/driver_blacklist.c @@ -0,0 +1,170 @@ +/* + * Copyright (c) 2023-2025 Ian Marco Moffett and the Osmora Team. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Hyra nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <sys/errno.h> +#include <sys/queue.h> +#include <sys/driver.h> +#include <vm/dynalloc.h> +#include <string.h> + +#define BLACKLIST_SIZE 64 + +/* + * A driver blacklist entry + * + * @name: Name of driver to be blacklisted + * @buckets: To handle collisions + */ +struct blacklist_entry { + char *name; + TAILQ_ENTRY(blacklist_entry) link; + TAILQ_HEAD(, blacklist_entry) buckets; +}; + +static struct blacklist_entry blacklist[BLACKLIST_SIZE]; + +static uint32_t +fnv1_hash(const char *s) +{ + uint32_t hash = 2166136261UL; + const uint8_t *p = (uint8_t *)s; + + while (*p != '\0') { + hash ^= *p; + hash = hash * 0x01000193; + ++p; + } + + return hash; +} + +/* + * Returns a bucket in case of collision + */ +static struct blacklist_entry * +blacklist_collide(struct blacklist_entry *entp, const char *name) +{ + struct blacklist_entry *tmp; + + if (entp->name == NULL) { + return NULL; + } + + TAILQ_FOREACH(tmp, &entp->buckets, link) { + if (strcmp(name, tmp->name) == 0) { + return tmp; + } + } + + return NULL; +} + +/* + * Mark a driver to be ignored during startup. + * Blacklisted drivers will not be ran. + * + * @name: Name of driver (e.g., 'ahci') + */ +int +driver_blacklist(const char *name) +{ + struct blacklist_entry *ent; + struct blacklist_entry *bucket; + size_t name_len; + uint32_t hash; + + if (name == NULL) { + return -EINVAL; + } + + hash = fnv1_hash(name); + ent = &blacklist[hash % BLACKLIST_SIZE]; + if (ent->name != NULL) { + bucket = dynalloc(sizeof(*bucket)); + if (bucket == NULL) { + return -EINVAL; + } + TAILQ_INSERT_TAIL(&ent->buckets, bucket, link); + return 0; + } + + name_len = strlen(name); + ent->name = dynalloc(name_len + 1); + if (ent->name == NULL) { + return -ENOMEM; + } + memcpy(ent->name, name, name_len + 1); + return 0; +} + +/* + * Checks if a driver name is in the blacklist. + * Returns 0 if not, otherwise 1. + */ +int +driver_blacklist_check(const char *name) +{ + struct blacklist_entry *ent; + uint32_t hash; + + if (name == NULL) { + return -EINVAL; + } + + hash = fnv1_hash(name); + ent = &blacklist[hash % BLACKLIST_SIZE]; + if (ent->name == NULL) { + return 0; + } + + if (strcmp(ent->name, name) == 0) { + return 1; + } + + ent = blacklist_collide(ent, name); + if (ent != NULL) { + return 1; + } + + return 0; +} + +/* + * Initialize each entry in the driver + * blacklist + */ +void +driver_blacklist_init(void) +{ + for (size_t i = 0; i < BLACKLIST_SIZE; ++i) { + blacklist[i].name = NULL; + TAILQ_INIT(&blacklist[i].buckets); + } +} diff --git a/sys/kern/driver_subr.c b/sys/kern/driver_subr.c new file mode 100644 index 0000000..a0f9f73 --- /dev/null +++ b/sys/kern/driver_subr.c @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2023-2025 Ian Marco Moffett and the Osmora Team. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Hyra nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/driver.h> +#include <sys/proc.h> +#include <sys/cdefs.h> +#include <sys/syslog.h> +#include <sys/panic.h> +#include <dev/timer.h> +#include <machine/sync.h> + +/* + * Initialize early drivers + * + * XXX: This should *NOT* be called directly, + * use DRIVERS_SCHED() instead. + */ +void +__driver_init_td(void) +{ + const struct driver *dp; + struct driver_var *var; + struct proc *td; + uintptr_t start, end; + + td = this_td(); + start = (uintptr_t)__driversd_init_start; + end = (uintptr_t)__driversd_init_end; + + for (dp = (void *)start; (uintptr_t)dp < end; ++dp) { + var = dp->data; + + /* + * Check the blacklist to see if this driver + * is marked to be ignored. If so, just continue + * to the next. + */ + if (driver_blacklist_check(dp->name)) { + continue; + } + + if (var->deferred) { + dp->init(); + var->deferred = 0; + } + } + + exit1(td, 0); + __builtin_unreachable(); +} diff --git a/sys/kern/exec_elf64.c b/sys/kern/exec_elf64.c index 3767b0b..8dc87dc 100644 --- a/sys/kern/exec_elf64.c +++ b/sys/kern/exec_elf64.c @@ -49,11 +49,43 @@ #define PHDR(HDRP, IDX) \ (void *)((uintptr_t)HDRP + (HDRP)->e_phoff + (HDRP->e_phentsize * IDX)) +#define SHDR(HDRP, IDX) \ + (void *)((uintptr_t)HDRP + (HDRP)->e_shoff + (HDRP->e_shentsize * IDX)) + struct elf_file { char *data; size_t size; }; +static int +elf_parse_shdrs(Elf64_Ehdr *eh) +{ + Elf64_Shdr *shp; + uint32_t nshdr; + + if (eh == NULL) { + return -EINVAL; + } + + nshdr = eh->e_shnum; + for (uint32_t i = 0; i < nshdr; ++i) { + shp = SHDR(eh, i); + + /* Drop null entries */ + if (shp->sh_type == SHT_NULL) { + continue; + } + + switch (shp->sh_type) { + case SHT_NOBITS: + memset((void *)shp->sh_addr, 0x0, shp->sh_size); + break; + } + } + + return 0; +} + /* * Load the file and give back an "elf_file" * structure. @@ -80,7 +112,7 @@ elf_get_file(const char *pathname, struct elf_file *res) getattr_args.res = &vattr; getattr_args.vp = vp; - status = vfs_vop_getattr(vp, &getattr_args); + status = vfs_vop_getattr(&getattr_args); if (status != 0) goto done; @@ -192,6 +224,7 @@ elf64_load(const char *pathname, struct proc *td, struct exec_prog *prog) if ((status = elf64_verify(hdr)) != 0) goto done; + memset(loadmap, 0, sizeof(loadmap)); pcbp = &td->pcb; start = -1; end = 0; @@ -242,6 +275,7 @@ elf64_load(const char *pathname, struct proc *td, struct exec_prog *prog) } } + elf_parse_shdrs(hdr); memcpy(prog->loadmap, loadmap, sizeof(loadmap)); prog->start = start; prog->end = end; diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c index 667bb97..4a0f7a8 100644 --- a/sys/kern/init_main.c +++ b/sys/kern/init_main.c @@ -35,14 +35,26 @@ #include <sys/exec.h> #include <sys/driver.h> #include <sys/panic.h> +#include <sys/sysctl.h> +#include <sys/systm.h> +#include <dev/acpi/uacpi.h> #include <dev/cons/cons.h> #include <dev/acpi/acpi.h> #include <machine/cpu.h> #include <machine/cdefs.h> #include <vm/vm.h> +#include <vm/stat.h> #include <string.h> -static struct proc proc0; +#define _START_PATH "/usr/sbin/init" +#if defined(_INSTALL_MEDIA) +#define _START_ARG "/usr/sbin/install" +#else +#define _START_ARG NULL +#endif /* _INSTALL_MEDIA */ + +struct proc g_proc0; +struct proc *g_init; static void copyright(void) @@ -56,9 +68,10 @@ start_init(void) { struct proc *td = this_td(); struct execve_args execve_args; - char *argv[] = { "/usr/bin/osh", NULL }; + char *argv[] = { _START_PATH, _START_ARG, NULL }; char *envp[] = { NULL }; + kprintf("starting init...\n"); execve_args.pathname = argv[0]; execve_args.argv = argv; execve_args.envp = envp; @@ -92,6 +105,9 @@ main(void) /* Init the virtual file system */ vfs_init(); + /* Init vmstats */ + vm_stat_init(); + /* Expose the console to devfs */ cons_expose(); @@ -99,14 +115,25 @@ main(void) md_intoff(); sched_init(); + memset(&g_proc0, 0, sizeof(g_proc0)); + sysctl_clearstr(KERN_HOSTNAME); + /* Startup pid 1 */ - memset(&proc0, 0, sizeof(proc0.tf)); - fork1(&proc0, 0, start_init, NULL); + spawn(&g_proc0, start_init, NULL, 0, &g_init); + md_inton(); - /* Load all drivers */ + uacpi_init(); + + /* Load all early drivers */ DRIVERS_INIT(); - /* Bootstrap APs and here we go! */ + /* Only log to kmsg from here */ + syslog_silence(true); + + /* + * Bootstrap APs, schedule all other drivers + * and here we go! + */ mp_bootstrap_aps(&g_bsp_ci); sched_enter(); __builtin_unreachable(); diff --git a/sys/kern/kern_accnt.c b/sys/kern/kern_accnt.c new file mode 100644 index 0000000..51905e7 --- /dev/null +++ b/sys/kern/kern_accnt.c @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2023-2025 Ian Marco Moffett and the Osmora Team. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Hyra nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * System Accounting + */ + +#include <sys/sched.h> +#include <sys/schedvar.h> +#include <sys/proc.h> +#include <fs/ctlfs.h> +#include <machine/cpu.h> +#include <string.h> + +/* Called within kern_sched.c */ +void sched_accnt_init(void); + +static struct ctlops sched_stat_ctl; +volatile size_t g_nthreads; + +static int +ctl_stat_read(struct ctlfs_dev *cdp, struct sio_txn *sio) +{ + struct sched_stat stat; + + if (sio->len > sizeof(stat)) { + sio->len = sizeof(stat); + } + + sched_stat(&stat); + memcpy(sio->buf, &stat, sio->len); + return sio->len; +} + +static uint16_t +cpu_nhlt(void) +{ + uint16_t nhlt = 0; + struct cpu_info *ci; + + for (size_t i = 0; i < CPU_MAX; ++i) { + ci = cpu_get(i); + if (ci == NULL) { + continue; + } + if (!ci->online) { + ++nhlt; + } + } + + return nhlt; +} + +/* + * Get scheduler accounting information + * + * @statp: Info gets copied here + */ +void +sched_stat(struct sched_stat *statp) +{ + struct sched_cpu *cpustat; + + statp->nproc = atomic_load_64(&g_nthreads); + statp->ncpu = cpu_count(); + statp->quantum_usec = DEFAULT_TIMESLICE_USEC; + statp->nhlt = cpu_nhlt(); + + /* + * Setup the per-cpu info/statistics + */ + for (int i = 0; i < CPU_MAX; ++i) { + cpustat = cpu_get_stat(i); + if (cpustat == NULL) { + break; + } + + statp->cpus[i] = *cpustat; + } +} + +void +sched_accnt_init(void) +{ + char devname[] = "sched"; + struct ctlfs_dev ctl; + + /* + * Register some accounting information in + * '/ctl/sched/stat' + */ + ctl.mode = 0444; + ctlfs_create_node(devname, &ctl); + ctl.devname = devname; + ctl.ops = &sched_stat_ctl; + ctlfs_create_entry("stat", &ctl); +} + +static struct ctlops sched_stat_ctl = { + .read = ctl_stat_read, + .write = NULL +}; diff --git a/sys/kern/kern_cpu.c b/sys/kern/kern_cpu.c new file mode 100644 index 0000000..69d44c4 --- /dev/null +++ b/sys/kern/kern_cpu.c @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2023-2025 Ian Marco Moffett and the Osmora Team. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Hyra nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/systm.h> +#include <sys/sysctl.h> +#include <sys/types.h> + +/* + * Report the number of processors that are online + * in the machine. + * + * @count: Number of processors active + * + * Returns zero on success, otherwise a less + * than zero value is returned. + */ +int +cpu_report_count(uint32_t count) +{ + struct sysctl_args args; + int error, name = HW_NCPU; + + args.name = &name; + args.nlen = 1; + args.oldlenp = 0; + args.oldp = NULL; + args.newp = &count; + args.newlen = sizeof(count); + + if ((error = sysctl(&args)) != 0) { + return error; + } + + return 0; +} diff --git a/sys/kern/kern_cred.c b/sys/kern/kern_cred.c new file mode 100644 index 0000000..017b22a --- /dev/null +++ b/sys/kern/kern_cred.c @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2023-2025 Ian Marco Moffett and the Osmora Team. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Hyra nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <sys/errno.h> +#include <sys/ucred.h> +#include <sys/proc.h> + +int +setuid(uid_t new) +{ + struct proc *td; + struct ucred *cur_cred; + + td = this_td(); + cur_cred = &td->cred; + + /* + * Only root can become other users. If you are not + * root, fuck off. + */ + if (cur_cred->ruid != 0) { + return -EPERM; + } + + spinlock_acquire(&cur_cred->lock); + cur_cred->euid = new; + cur_cred->ruid = new; + spinlock_release(&cur_cred->lock); + return 0; +} + +uid_t +getuid(void) +{ + struct proc *td; + + td = this_td(); + if (td == NULL) { + return -1; + } + + return td->cred.ruid; +} + +/* + * setuid() syscall + * + * arg0: `new' + */ +scret_t +sys_setuid(struct syscall_args *scargs) +{ + return setuid(scargs->arg0); +} + +scret_t +sys_getuid(struct syscall_args *scargs) +{ + return getuid(); +} diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c index d122e89..83845f6 100644 --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -41,6 +41,7 @@ /* * Allocate a file descriptor. * + * @td: Process to allocate from (null for CURRENT) * @fd_out: Pointer to allocated file descriptor output. * * This routine will create a new file descriptor @@ -49,10 +50,13 @@ * Returns 0 on success. */ int -fd_alloc(struct filedesc **fd_out) +fd_alloc(struct proc *td, struct filedesc **fd_out) { struct filedesc *fd; - struct proc *td = this_td(); + + if (td == NULL) { + td = this_td(); + } /* Find free fd table entry */ for (size_t i = 3; i < PROC_MAX_FILEDES; ++i) { @@ -85,12 +89,15 @@ fd_alloc(struct filedesc **fd_out) * Fetch a file descriptor from a file descriptor * number. * + * @td: Process to get fd from (NULL for current) * @fdno: File descriptor to fetch */ struct filedesc * -fd_get(unsigned int fdno) +fd_get(struct proc *td, unsigned int fdno) { - struct proc *td = this_td(); + if (td == NULL) { + td = this_td(); + } if (fdno > PROC_MAX_FILEDES) { return NULL; @@ -111,7 +118,7 @@ fd_close(unsigned int fd) struct filedesc *filedes; struct proc *td; - if ((filedes = fd_get(fd)) == NULL) { + if ((filedes = fd_get(NULL, fd)) == NULL) { return -EBADF; } @@ -149,18 +156,32 @@ fd_rw(unsigned int fd, void *buf, size_t count, uint8_t write) { char *kbuf = NULL; ssize_t n; + uint32_t seal; struct filedesc *filedes; struct sio_txn sio; scret_t retval = 0; + if (fd > PROC_MAX_FILEDES) { + return -EBADF; + } + if (count > SSIZE_MAX) { retval = -EINVAL; goto done; } - filedes = fd_get(fd); - kbuf = dynalloc(count); + filedes = fd_get(NULL, fd); + seal = filedes->flags; + /* Check the seal */ + if (write && !ISSET(seal, O_ALLOW_WR)) { + return -EPERM; + } + if (!write && ISSET(seal, O_WRONLY)) { + return -EPERM; + } + + kbuf = dynalloc(count); if (kbuf == NULL) { retval = -ENOMEM; goto done; @@ -187,6 +208,7 @@ fd_rw(unsigned int fd, void *buf, size_t count, uint8_t write) sio.buf = kbuf; sio.offset = filedes->offset; + spinlock_acquire(&filedes->lock); if (write) { /* Copy in user buffer */ if (copyin(buf, kbuf, count) < 0) { @@ -205,19 +227,52 @@ fd_rw(unsigned int fd, void *buf, size_t count, uint8_t write) goto done; } + /* End of file? */ + if (n == 0) { + retval = 0; + goto done; + } + if (copyout(kbuf, buf, count) < 0) { retval = -EFAULT; goto done; } } - retval = count; + + /* Increment the offset per read */ + filedes->offset += n; + retval = n; done: if (kbuf != NULL) { dynfree(kbuf); } + spinlock_release(&filedes->lock); return retval; } +static int +fd_do_create(const char *path, struct nameidata *ndp) +{ + struct vop_create_args cargs; + struct vnode *dirvp = ndp->vp; + const struct vops *vops = dirvp->vops; + int error; + + if (vops->create == NULL) { + return -EINVAL; + } + + cargs.path = path; + cargs.ppath = ndp->path; + cargs.dirvp = dirvp; + cargs.vpp = &ndp->vp; + if ((error = vops->create(&cargs)) < 0) { + return error; + } + + return 0; +} + int fd_read(unsigned int fd, void *buf, size_t count) { @@ -236,28 +291,35 @@ fd_write(unsigned int fd, void *buf, size_t count) * * @pathname: Path of file to open. * @flags: Flags to use. - * - * TODO: Use of flags. */ int fd_open(const char *pathname, int flags) { int error; + const struct vops *vops; struct filedesc *filedes; struct nameidata nd; nd.path = pathname; - nd.flags = 0; + nd.flags = ISSET(flags, O_CREAT) ? NAMEI_WANTPARENT : 0; if ((error = namei(&nd)) < 0) { return error; } - if ((error = fd_alloc(&filedes)) != 0) { + if ((error = fd_alloc(NULL, &filedes)) != 0) { vfs_release_vnode(nd.vp); return error; } + vops = nd.vp->vops; + if (ISSET(flags, O_CREAT) && vops->create != NULL) { + error = fd_do_create(pathname, &nd); + } + if (error < 0) { + return error; + } + filedes->vp = nd.vp; filedes->flags = flags; return filedes->fdno; @@ -266,18 +328,25 @@ fd_open(const char *pathname, int flags) /* * Duplicate a file descriptor. New file descriptor * points to the same vnode. + * + * @td: Process of fd to dup (NULL for current) + * @fd: File descriptor to dup */ int -fd_dup(int fd) +fd_dup(struct proc *td, int fd) { int error; struct filedesc *new_desc, *tmp; - tmp = fd_get(fd); + if (td == NULL) { + td = this_td(); + } + + tmp = fd_get(td, fd); if (tmp == NULL) return -EBADF; - if ((error = fd_alloc(&new_desc)) != 0) + if ((error = fd_alloc(td, &new_desc)) != 0) return error; /* Ref that vnode before we point to it */ @@ -285,3 +354,51 @@ fd_dup(int fd) new_desc->vp = tmp->vp; return new_desc->fdno; } + +off_t +fd_seek(int fildes, off_t offset, int whence) +{ + struct filedesc *tmp; + struct vattr attr; + struct vop_getattr_args getattr_args; + + tmp = fd_get(NULL, fildes); + if (tmp == NULL) { + return -EBADF; + } + + getattr_args.vp = tmp->vp; + getattr_args.res = &attr; + if ((vfs_vop_getattr(&getattr_args)) < 0) { + return -EPIPE; + } + + switch (whence) { + case SEEK_SET: + tmp->offset = offset; + break; + case SEEK_CUR: + tmp->offset += offset; + break; + case SEEK_END: + tmp->offset = attr.size + offset; + break; + default: + return -EINVAL; + } + + return tmp->offset; +} + +/* + * Update file offset + * + * arg0: `filedes' + * arg1: `offset' + * arg2: `whence' + */ +scret_t +sys_lseek(struct syscall_args *scargs) +{ + return fd_seek(scargs->arg0, scargs->arg1, scargs->arg2); +} diff --git a/sys/kern/kern_disk.c b/sys/kern/kern_disk.c new file mode 100644 index 0000000..a3fa05e --- /dev/null +++ b/sys/kern/kern_disk.c @@ -0,0 +1,475 @@ +/* + * Copyright (c) 2023-2025 Ian Marco Moffett and the Osmora Team. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Hyra nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <sys/queue.h> +#include <sys/errno.h> +#include <sys/syslog.h> +#include <sys/sio.h> +#include <sys/param.h> +#include <sys/panic.h> +#include <sys/spinlock.h> +#include <sys/device.h> +#include <sys/disk.h> +#include <vm/dynalloc.h> +#include <assert.h> +#include <string.h> + +#define pr_trace(fmt, ...) kprintf("disk: " fmt, ##__VA_ARGS__) +#define pr_error(...) pr_trace(__VA_ARGS__) + +#define DEFAULT_BSIZE 512 /* Default block size in bytes */ +#define DISKQ_COOKIE 0xD9EA /* Verification cookie */ + +/* + * The maximum disks supported by the kernel + * is defined by the `DISK_MAX' kconf(9) option. + * + * We define a default of 16 if that option is not + * specified. + */ +#if defined(__DISK_MAX) +#define DISK_MAX __DISK_MAX +#else +#define DISK_MAX 16 /* Maximum disks */ +#endif + +/* + * We set a hard limit at 64 disks to prevent misconfiguration as + * it is unlikely that one would ever have that many on a single + * instance. Though of course, anything is possible, so one may + * patch the hard limit defined below to a higher value if needed. + */ +__static_assert(DISK_MAX < 64, "DISK_MAX exceeds hard limit"); + +/* + * The disk queue stores descriptors of disks that + * are registered with the system. This allows for + * easy and simplified access of the storage medium. + * + * XXX: An array would be more efficent, however disks + * could be detached or swapped during runtime thus + * making the usage of queues a more sane design. + * + * This also provides the added benefit of lazy-allocation + * so memory isn't wasted and only allocated when we actually + * have a disk descriptor that it would be used to store. + */ +static struct spinlock diskq_lock; +static TAILQ_HEAD(, disk) diskq; +static uint16_t disk_count = 0; +static uint16_t diskq_cookie = 0; + +/* + * Verify that a disk descriptor has been properly + * initialized by comparing against the cookie field. + * + * Returns a value of zero if valid, otherwise a less + * than zero value is returned. + */ +__always_inline static inline int +check_disk_cookie(struct disk *dp) +{ + __assert(dp != NULL); + return (dp->cookie == DISKQ_COOKIE) ? 0 : -1; +} + +/* + * Verify if the disk queue is initialized and + * ready for descriptors to be added. + * + * Returns a value of zero if it has already been + * initialized, otherwise a value less than zero + * is returned after check_diskq() initializes + * the disk queue. + */ +static inline int +check_diskq(void) +{ + if (diskq_cookie != DISKQ_COOKIE) { + TAILQ_INIT(&diskq); + diskq_cookie = DISKQ_COOKIE; + return -1; + } + + return 0; +} + +/* + * Acquire a disk descriptor through a zero-based + * disk index. Returns a pointer to the disk descriptor + * on success, otherwise a less than zero value is returned. + * + * @id: Disk index + * + * XXX: This is the lockless internal implementation, + * please use disk_get_id() instead. + */ +static struct disk * +__disk_get_id(diskid_t id) +{ + struct disk *dp; + + if (id >= disk_count) { + return NULL; + } + + dp = TAILQ_FIRST(&diskq); + if (dp == NULL) { + return NULL; + } + + /* + * Now, we start at the first disk entry and + * traverse the list. If the ID of a disk matches + * the ID we are looking for, return it. + */ + while (dp != NULL) { + if (dp->id == id) { + return dp; + } + + dp = TAILQ_NEXT(dp, link); + } + + /* Nothing found :( */ + return NULL; +} + +/* + * Attempt to perform a read/write operation on + * a disk. + * + * @id: ID of disk to operate on + * @blk: Block offset to read at + * @buf: Buffer to read data into + * @len: Number of bytes to read + * @write: If true, do a write + * + * XXX: The size in which blocks are read at is in + * virtual blocks which is defined by V_BSIZE + * in sys/disk.h + */ +static ssize_t +disk_rw(diskid_t id, blkoff_t blk, void *buf, size_t len, bool write) +{ + const struct bdevsw *bdev; + struct sio_txn sio; + struct disk *dp; + int error; + + len = ALIGN_UP(len, V_BSIZE); + + /* Attempt to grab the disk object */ + error = disk_get_id(id, &dp); + if (error < 0) { + return error; + } + + /* Sanity check, should not happen */ + bdev = dp->bdev; + if (__unlikely(bdev == NULL)) { + return -EIO; + } + + /* Prepare the buffer */ + sio.buf = buf; + sio.offset = blk * dp->bsize; + sio.len = len; + + /* Handle writes */ + if (write) { + if (bdev->write == NULL) { + return -ENOTSUP; + } + + return bdev->write(dp->dev, &sio, 0); + } + + /* Do we support this operation? */ + if (bdev->read == NULL) { + return -ENOTSUP; + } + + return bdev->read(dp->dev, &sio, 0); +} + +/* + * Register a disk with the system so that it may + * be accessible independently of its device major + * and minor numbers + * + * @name: Name of the disk + * @dev: Device minor + * @bdev: Block device operations associated with device + * + * Returns zero on success, otherwise a less than zero + * value is returned. + */ +int +disk_add(const char *name, dev_t dev, const struct bdevsw *bdev, int flags) +{ + struct disk *dp; + size_t name_len; + + if (name == NULL || bdev == NULL) { + return -EINVAL; + } + + /* Disk queue must be initialized */ + check_diskq(); + + /* There is a limit to how many can be added */ + if (disk_count >= DISK_MAX) { + pr_error("disk_add: disk limit %d/%d reached\n", + disk_count, DISK_MAX); + return -EAGAIN; + } + + /* Is the disk name of correct length? */ + name_len = strlen(name); + if (name_len >= sizeof(dp->name) - 1) { + pr_error("disk_add: name too big (len=%d)\n", name_len); + return -E2BIG; + } + + dp = dynalloc(sizeof(*dp)); + if (dp == NULL) { + pr_error("failed to allocate disk\n"); + return -ENOMEM; + } + + /* Initialize the descriptor */ + memset(dp, 0, sizeof(*dp)); + memcpy(dp->name, name, name_len); + dp->cookie = DISKQ_COOKIE; + dp->bdev = bdev; + dp->dev = dev; + dp->id = disk_count++; + dp->bsize = DEFAULT_BSIZE; + + /* + * We are to panic if the virtual blocksize + * defined is not a multiple of any hardware + * block size + */ + if ((V_BSIZE & (dp->bsize - 1)) != 0) { + panic("virtual block size not hw bsize aligned\n"); + } + + /* Now we can add it to the queue */ + spinlock_acquire(&diskq_lock); + TAILQ_INSERT_TAIL(&diskq, dp, link); + spinlock_release(&diskq_lock); + return 0; +} + +/* + * Acquire a disk descriptor by using a zero-based + * index. + * + * @id: Disk index (0: primary) + * @res: Resulting disk descriptor + * + * Returns zero on success, otherwise a less than + * zero value is returned. + */ +int +disk_get_id(diskid_t id, struct disk **res) +{ + int error; + struct disk *dp; + + if (res == NULL) { + return -EINVAL; + } + + if (id >= disk_count) { + return -ENODEV; + } + + /* Grab the disk */ + spinlock_acquire(&diskq_lock); + dp = __disk_get_id(id); + spinlock_release(&diskq_lock); + + /* Did it even exist? */ + if (dp == NULL) { + return -ENODEV; + } + + /* Should not fail but make sure */ + error = check_disk_cookie(dp); + if (__unlikely(error < 0)) { + panic("disk_get_id: got bad disk object\n"); + } + + *res = dp; + return 0; +} + +/* + * Allocate a memory buffer that may be used for + * disk I/O. + * + * @id: ID of disk buffer will be used for + * @len: Length to allocate + */ +void * +disk_buf_alloc(diskid_t id, size_t len) +{ + struct disk *dp; + void *buf; + + if (len == 0) { + return NULL; + } + + /* Attempt to acquire the disk */ + if (disk_get_id(id, &dp) < 0) { + return NULL; + } + + /* + * Here we will align the buffer size by the + * virtual block size to ensure it is big enough. + */ + len = ALIGN_UP(len, V_BSIZE); + buf = dynalloc(len); + return buf; +} + +/* + * Free a memory buffer that was allocated by + * disk_buf_alloc() + */ +void +disk_buf_free(void *p) +{ + if (p != NULL) { + dynfree(p); + } +} + +/* + * Attempt to perform a read operation on + * a disk. + * + * @id: ID of disk to operate on + * @blk: Block offset to read at + * @buf: Buffer to read data into + * @len: Number of bytes to read + */ +ssize_t +disk_read(diskid_t id, blkoff_t blk, void *buf, size_t len) +{ + ssize_t retval; + char *tmp; + + tmp = disk_buf_alloc(id, len); + if (tmp == NULL) { + return -ENOMEM; + } + + retval = disk_rw(id, blk, tmp, len, false); + if (retval < 0) { + disk_buf_free(tmp); + return retval; + } + + memcpy(buf, tmp, len); + disk_buf_free(tmp); + return retval; +} + +/* + * Attempt to perform a write operation on + * a disk. + * + * @id: ID of disk to operate on + * @blk: Block offset to read at + * @buf: Buffer containing data to write + * @len: Number of bytes to read + */ +ssize_t +disk_write(diskid_t id, blkoff_t blk, const void *buf, size_t len) +{ + ssize_t retval; + char *tmp; + + tmp = disk_buf_alloc(id, len); + if (tmp == NULL) { + return -ENOMEM; + } + + memcpy(tmp, buf, len); + retval = disk_rw(id, blk, tmp, len, true); + disk_buf_free(tmp); + return retval; +} + +/* + * Attempt to request attributes from a specific + * device. + * + * @id: ID of disk to query + * @res: Resulting information goes here + * + * This function returns zero on success, otherwise + * a less than zero value is returned. + */ +int +disk_query(diskid_t id, struct disk_info *res) +{ + const struct bdevsw *bdev; + struct disk *dp; + int error; + + if (res == NULL) { + return -EINVAL; + } + + /* Attempt to grab the disk */ + error = disk_get_id(id, &dp); + if (error < 0) { + pr_error("disk_query: bad disk ID %d\n", id); + return error; + } + + bdev = dp->bdev; + if (__unlikely(bdev == NULL)) { + pr_error("disk_query: no bdev for disk %d\n", id); + return -EIO; + } + + res->block_size = dp->bsize; + res->vblock_size = V_BSIZE; + res->n_block = bdev->bsize(dp->dev); + return 0; +} diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index bf6a26e..2a53b8a 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -37,6 +37,7 @@ #include <vm/map.h> #include <vm/physmem.h> #include <machine/pcb.h> +#include <machine/cdefs.h> #include <string.h> /* @@ -87,6 +88,7 @@ execve(struct proc *td, const struct execve_args *args) release_stack(td); /* Save program state */ + md_intoff(); memcpy(&td->exec, &prog, sizeof(td->exec)); /* Set new stack and map it to userspace */ @@ -99,7 +101,7 @@ execve(struct proc *td, const struct execve_args *args) stack_top = td->stack_base + (PROC_STACK_SIZE - 1); /* Setup registers, signals and stack */ - md_td_stackinit(td, (void *)(stack_top + VM_HIGHER_HALF), &prog); + stack_top = md_td_stackinit(td, (void *)(stack_top + VM_HIGHER_HALF), &prog); setregs(td, &prog, stack_top); signals_init(td); diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c index 75ab0e9..af697d7 100644 --- a/sys/kern/kern_exit.c +++ b/sys/kern/kern_exit.c @@ -30,15 +30,24 @@ #include <sys/proc.h> #include <sys/sched.h> #include <sys/syslog.h> +#include <sys/atomic.h> +#include <sys/panic.h> +#include <sys/filedesc.h> +#include <sys/vnode.h> +#include <dev/cons/cons.h> #include <vm/physmem.h> #include <vm/dynalloc.h> #include <vm/vm.h> #include <vm/map.h> #include <machine/pcb.h> +#include <machine/cpu.h> #define pr_trace(fmt, ...) kprintf("exit: " fmt, ##__VA_ARGS__) #define pr_error(...) pr_trace(__VA_ARGS__) +extern volatile size_t g_nthreads; +extern struct proc g_init; + static void unload_td(struct proc *td) { @@ -48,6 +57,11 @@ unload_td(struct proc *td) struct pcb *pcbp; size_t len; + sched_detach(td); + if (ISSET(td->flags, PROC_KTD)) { + return; + } + execp = &td->exec; auxvalp = &execp->auxval; pcbp = &td->pcb; @@ -72,56 +86,138 @@ unload_td(struct proc *td) } } +void +proc_reap(struct proc *td) +{ + struct pcb *pcbp; + struct filedesc *fdp; + vaddr_t stack_va; + paddr_t stack_pa; + + cons_detach(); + + /* Clear out all fds */ + for (size_t i = 4; i < PROC_MAX_FILEDES; ++i) { + fdp = td->fds[i]; + if (fdp == NULL) { + continue; + } + if (fdp->refcnt == 1) { + vfs_release_vnode(fdp->vp); + dynfree(fdp); + fdp = NULL; + } + } + + pcbp = &td->pcb; + unload_td(td); + + /* + * User space stacks are identity mapped and + * kernel space stacks are not. + */ + if (ISSET(td->flags, PROC_KTD)) { + stack_va = td->stack_base; + stack_pa = td->stack_base - VM_HIGHER_HALF; + } else { + stack_va = td->stack_base; + stack_pa = td->stack_base; + vm_unmap(pcbp->addrsp, stack_va, PROC_STACK_SIZE); + } + + vm_free_frame(stack_pa, PROC_STACK_PAGES); + pmap_destroy_vas(pcbp->addrsp); +} + /* * Kill a thread and deallocate its resources. * * @td: Thread to exit */ int -exit1(struct proc *td) +exit1(struct proc *td, int flags) { - struct pcb *pcbp; - struct proc *curtd; - uintptr_t stack; + struct proc *curtd, *procp; + struct proc *parent; + struct cpu_info *ci; pid_t target_pid, curpid; + if (td->pid == 1) { + panic("init died\n"); + } + + ci = this_cpu(); target_pid = td->pid; curtd = this_td(); - pcbp = &td->pcb; curpid = curtd->pid; - stack = td->stack_base; + td->flags |= PROC_EXITING; + parent = td->parent; - /* - * If this is on the higher half, it is kernel - * mapped and we need to convert it to a physical - * address. - */ - if (stack >= VM_HIGHER_HALF) { - stack -= VM_HIGHER_HALF; + /* We have one less process in the system! */ + atomic_dec_64(&g_nthreads); + + /* Reassign children to init */ + if (td->nleaves > 0) { + TAILQ_FOREACH(procp, &td->leafq, leaf_link) { + procp->parent = &g_init; + } } - unload_td(td); - vm_unmap(pcbp->addrsp, td->stack_base, PROC_STACK_SIZE); - vm_free_frame(stack, PROC_STACK_PAGES); + if (target_pid != curpid) { + proc_reap(td); + } - pmap_destroy_vas(pcbp->addrsp); - dynfree(td); + if (td->data != NULL) { + dynfree(td->data); + } + + /* + * Only free the process structure if we aren't + * being waited on, otherwise let it be so the + * parent can examine what's left of it. + */ + if (!ISSET(td->flags, PROC_WAITED)) { + dynfree(td); + } else { + td->flags |= PROC_ZOMB; + td->flags &= ~PROC_WAITED; + } /* * If we are the thread exiting, reenter the scheduler * and do not return. */ - if (target_pid == curpid) + if (target_pid == curpid) { + /* + * If the thread is exiting on a core that is not + * preemptable, something is not right. + */ + if (__unlikely(!sched_preemptable())) { + panic("exit1: cpu %d not preemptable\n", ci->id); + } + + ci->curtd = NULL; + if (parent->pid == 0) + sched_enter(); + + parent->flags &= ~PROC_SLEEP; sched_enter(); + } return 0; } +/* + * arg0: Exit status. + */ scret_t sys_exit(struct syscall_args *scargs) { - exit1(this_td()); + struct proc *td = this_td(); + + td->exit_status = scargs->arg0; + exit1(td, 0); __builtin_unreachable(); } diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c index abb7707..2755ea0 100644 --- a/sys/kern/kern_fork.c +++ b/sys/kern/kern_fork.c @@ -27,61 +27,7 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#include <sys/mman.h> -#include <sys/tree.h> #include <sys/types.h> #include <sys/proc.h> -#include <sys/errno.h> -#include <sys/sched.h> -#include <sys/signal.h> -#include <vm/dynalloc.h> -#include <string.h> -static size_t nthreads = 0; -/* - * Fork1 - fork and direct a thread to 'ip' - * - * @cur: Current process. - * @flags: Flags to set. - * @ip: Location for new thread to start at. - * @newprocp: Will contain new thread if not NULL. - */ -int -fork1(struct proc *cur, int flags, void(*ip)(void), struct proc **newprocp) -{ - struct proc *newproc; - struct mmap_lgdr *mlgdr; - int status = 0; - - newproc = dynalloc(sizeof(*newproc)); - if (newproc == NULL) - return -ENOMEM; - - mlgdr = dynalloc(sizeof(*mlgdr)); - if (mlgdr == NULL) - return -ENOMEM; - - memset(newproc, 0, sizeof(*newproc)); - status = md_fork(newproc, cur, (uintptr_t)ip); - if (status != 0) - goto done; - - /* Set proc output if we can */ - if (newprocp != NULL) - *newprocp = newproc; - - /* Initialize the mmap ledger */ - mlgdr->nbytes = 0; - RBT_INIT(lgdr_entries, &mlgdr->hd); - newproc->mlgdr = mlgdr; - - newproc->pid = ++nthreads; - signals_init(newproc); - sched_enqueue_td(newproc); -done: - if (status != 0) - dynfree(newproc); - - return status; -} diff --git a/sys/kern/kern_krq.c b/sys/kern/kern_krq.c new file mode 100644 index 0000000..c12a98c --- /dev/null +++ b/sys/kern/kern_krq.c @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2023-2025 Ian Marco Moffett and the Osmora Team. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Hyra nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/syscall.h> +#include <sys/krq.h> +#include <sys/errno.h> +#include <sys/spinlock.h> +#include <sys/driver.h> +#include <sys/syslog.h> + +static struct spinlock krq_lock = {0}; + +/* + * Load a kernel runtime quantum (KRQ) + * + * @arg0: path + * + * XXX: If the 'path' argument is NULL, all deferrable + * drivers are loaded. + * + * TODO: Handle non-null paths where a completly seperate + * module/krq can be loaded. + */ +scret_t +sys_inject(struct syscall_args *scargs) +{ + if (scargs->arg0 != 0) { + return -EINVAL; + } + + spinlock_acquire(&krq_lock); + DRIVERS_SCHED(); + spinlock_release(&krq_lock); + return 0; +} diff --git a/sys/kern/kern_panic.c b/sys/kern/kern_panic.c index 950ea8f..13b4964 100644 --- a/sys/kern/kern_panic.c +++ b/sys/kern/kern_panic.c @@ -31,6 +31,25 @@ #include <sys/spinlock.h> #include <sys/syslog.h> #include <sys/reboot.h> +#include <dev/cons/cons.h> +#include <machine/cdefs.h> +#include <machine/cpu.h> +#include <string.h> + +#if defined(__PANIC_SCR) +#define PANIC_SCR __PANIC_SCR +#else +#define PANIC_SCR 0 +#endif + +static void +panic_puts(const char *str) +{ + size_t len; + + len = strlen(str); + cons_putstr(&g_root_scr, str, len); +} /* * Burn and sizzle - the core logic that really ends @@ -47,14 +66,40 @@ bas(bool do_trace, int reboot_type) spinlock_acquire(&lock); /* Never released */ if (do_trace) { - kprintf(OMIT_TIMESTAMP "** backtrace\n"); + panic_puts(" ** backtrace\n"); md_backtrace(); } + panic_puts("\n-- ALL CORES HAVE BEEN HALTED --\n"); cpu_reboot(reboot_type); __builtin_unreachable(); } +static void +panic_screen(void) +{ + struct cons_screen *scr = &g_root_scr; + + if (scr->fb_mem != NULL) { + scr->bg = 0x8B0000; + scr->fg = 0xAABBAA; + cons_reset_cursor(scr); + cons_clear_scr(scr, 0x393B39); + } +} + +static void +do_panic(const char *fmt, va_list *ap) +{ + syslog_silence(false); + spinlock_release(&g_root_scr.lock); + panic_puts("panic: "); + vkprintf(fmt, ap); + bas(true, REBOOT_HALT); + + __builtin_unreachable(); +} + /* * Tells the user something terribly wrong happened then * halting the system as soon as possible. @@ -69,11 +114,15 @@ panic(const char *fmt, ...) { va_list ap; - va_start(ap, fmt); - kprintf(OMIT_TIMESTAMP "panic: "); - vkprintf(fmt, &ap); - bas(true, REBOOT_HALT); + /* Shut everything else up */ + md_intoff(); + cpu_halt_others(); + if (PANIC_SCR) { + panic_screen(); + } + va_start(ap, fmt); + do_panic(fmt, &ap); __builtin_unreachable(); } @@ -89,7 +138,6 @@ hcf(const char *fmt, ...) { va_list ap; - if (fmt != NULL) { va_start(ap, fmt); kprintf(OMIT_TIMESTAMP); diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c new file mode 100644 index 0000000..8bc5680 --- /dev/null +++ b/sys/kern/kern_proc.c @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2023-2025 Ian Marco Moffett and the Osmora Team. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Hyra nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <sys/proc.h> +#include <sys/errno.h> +#include <sys/cdefs.h> +#include <sys/vnode.h> +#include <sys/tree.h> +#include <sys/syscall.h> +#include <sys/filedesc.h> +#include <sys/fcntl.h> +#include <string.h> +#include <crc32.h> + +extern volatile size_t g_nthreads; + +pid_t +getpid(void) +{ + struct proc *td; + + td = this_td(); + if (td == NULL) { + return -1; + } + + return td->pid; +} + +pid_t +getppid(void) +{ + struct proc *td; + + td = this_td(); + if (td == NULL) { + return -1; + } + if (td->parent == NULL) { + return -1; + } + + return td->parent->pid; +} + +void +proc_coredump(struct proc *td, uintptr_t fault_addr) +{ + struct coredump core; + struct sio_txn sio; + struct vnode *vp; + char pathname[128]; + int fd; + + snprintf(pathname, sizeof(pathname), "/tmp/core.%d", td->pid); + fd = fd_open(pathname, O_RDWR | O_CREAT); + + /* ... Hopefully not */ + if (__unlikely(fd < 0)) { + return; + } + + core.pid = td->pid; + core.fault_addr = fault_addr; + memcpy(&core.tf, &td->tf, sizeof(td->tf)); + + core.checksum = crc32(&core, sizeof(core) - sizeof(core.checksum)); + vp = fd_get(NULL, fd)->vp; + + sio.buf = &core; + sio.len = sizeof(core); + sio.offset = 0; + + /* Write the core file */ + vfs_vop_write(vp, &sio); + fd_close(fd); +} + +int +proc_init(struct proc *td, struct proc *parent) +{ + struct mmap_lgdr *mlgdr; + + mlgdr = dynalloc(sizeof(*mlgdr)); + if (mlgdr == NULL) { + return -ENOMEM; + } + + /* Add to parent leafq */ + TAILQ_INSERT_TAIL(&parent->leafq, td, leaf_link); + atomic_inc_int(&parent->nleaves); + atomic_inc_64(&g_nthreads); + td->parent = parent; + td->exit_status = -1; + td->cred = parent->cred; + + /* Initialize the mmap ledger */ + mlgdr->nbytes = 0; + RBT_INIT(lgdr_entries, &mlgdr->hd); + td->mlgdr = mlgdr; + td->flags |= PROC_WAITED; + signals_init(td); + return 0; +} + +scret_t +sys_getpid(struct syscall_args *scargs) +{ + return getpid(); +} + +scret_t +sys_getppid(struct syscall_args *scargs) +{ + return getppid(); +} diff --git a/sys/kern/kern_sched.c b/sys/kern/kern_sched.c index 4bbe5a0..9c5e215 100644 --- a/sys/kern/kern_sched.c +++ b/sys/kern/kern_sched.c @@ -34,6 +34,7 @@ #include <sys/param.h> #include <sys/syslog.h> #include <sys/atomic.h> +#include <dev/cons/cons.h> #include <machine/frame.h> #include <machine/cpu.h> #include <machine/cdefs.h> @@ -44,7 +45,8 @@ #define pr_trace(fmt, ...) kprintf("ksched: " fmt, ##__VA_ARGS__) -void sched_switch(struct trapframe *tf); +void md_sched_switch(struct trapframe *tf); +void sched_accnt_init(void); static sched_policy_t policy = SCHED_POLICY_MLFQ; @@ -63,7 +65,7 @@ __cacheline_aligned static struct spinlock tdq_lock = {0}; /* * Perform timer oneshot */ -static inline void +void sched_oneshot(bool now) { struct timer timer; @@ -77,39 +79,75 @@ sched_oneshot(bool now) } /* - * Save thread state and enqueue it back into one - * of the ready queues. + * Returns true if a processor is associated + * with a specific thread + * + * @ci: CPU that wants to take 'td' + * @td: Thread to check against */ -static void -sched_save_td(struct proc *td, struct trapframe *tf) +static bool +cpu_is_assoc(struct cpu_info *ci, struct proc *td) { /* - * Save trapframe to process structure only - * if PROC_EXEC is not set. + * If we are not pinned, any processor is + * associated. */ - if (!ISSET(td->flags, PROC_EXEC)) { - memcpy(&td->tf, tf, sizeof(td->tf)); + if (!ISSET(td->flags, PROC_PINNED)) { + return true; } - sched_enqueue_td(td); + return ci->id == td->affinity; } -static struct proc * +struct proc * sched_dequeue_td(void) { struct sched_queue *queue; struct proc *td = NULL; + struct cpu_info *ci; + uint32_t ncpu = 0; spinlock_acquire(&tdq_lock); + ci = this_cpu(); for (size_t i = 0; i < SCHED_NQUEUE; ++i) { queue = &qlist[i]; - if (!TAILQ_EMPTY(&queue->q)) { - td = TAILQ_FIRST(&queue->q); - TAILQ_REMOVE(&queue->q, td, link); - spinlock_release(&tdq_lock); - return td; + if (TAILQ_EMPTY(&queue->q)) { + continue; } + + td = TAILQ_FIRST(&queue->q); + if (td == NULL) { + continue; + } + + while (ISSET(td->flags, PROC_SLEEP)) { + td = TAILQ_NEXT(td, link); + if (td == NULL) { + break; + } + } + + /* + * If we are on a multicore system and this isn't + * our process, don't take it. Some threads might + * be pinned to a specific processor. + */ + ncpu = cpu_count(); + while (!cpu_is_assoc(ci, td) && ncpu > 1) { + td = TAILQ_NEXT(td, link); + if (td == NULL) { + break; + } + } + + if (td == NULL) { + continue; + } + + TAILQ_REMOVE(&queue->q, td, link); + spinlock_release(&tdq_lock); + return td; } /* We got nothing */ @@ -141,6 +179,9 @@ this_td(void) struct cpu_info *ci; ci = this_cpu(); + if (ci == NULL) { + return NULL; + } return ci->curtd; } @@ -177,62 +218,21 @@ td_pri_update(struct proc *td) } /* - * Perform a context switch. + * MI work to be done during a context + * switch. Called by md_sched_switch() */ void -sched_switch(struct trapframe *tf) +mi_sched_switch(struct proc *from) { - struct cpu_info *ci; - struct pcb *pcbp; - struct proc *next_td, *td; - bool use_current = true; - - ci = this_cpu(); - td = ci->curtd; - - if (td != NULL) { - dispatch_signals(td); - td_pri_update(td); - } - - /* - * Get the next thread and use it only if it isn't - * in the middle of an exit, exec, or whatever. - */ - do { - if ((next_td = sched_dequeue_td()) == NULL) { - sched_oneshot(false); + if (from != NULL) { + if (from->pid == 0) return; - } - /* - * If we are in the middle of an exec, don't use this - * thread. - */ - if (ISSET(next_td->flags, PROC_EXEC)) { - use_current = false; - } - - /* - * Don't use this thread if we are currently - * exiting. - */ - if (ISSET(next_td->flags, PROC_EXITING)) { - use_current = false; - } - } while (!use_current); - - /* Save the previous thread */ - if (td != NULL) { - sched_save_td(td, tf); + dispatch_signals(from); + td_pri_update(from); } - memcpy(tf, &next_td->tf, sizeof(*tf)); - ci->curtd = next_td; - pcbp = &next_td->pcb; - - pmap_switch_vas(pcbp->addrsp); - sched_oneshot(false); + cons_detach(); } /* @@ -242,9 +242,8 @@ void sched_enter(void) { md_inton(); - md_sync_all(); + sched_oneshot(false); for (;;) { - sched_oneshot(false); md_pause(); } } @@ -252,14 +251,154 @@ sched_enter(void) void sched_yield(void) { - struct proc *td = this_td(); + struct proc *td; + struct cpu_info *ci = this_cpu(); - if (td != NULL) { - td->rested = true; + if ((td = ci->curtd) == NULL) { + return; } + td->rested = true; + + /* FIXME: Hang yielding when waited on */ + if (ISSET(td->flags, PROC_WAITED)) { + return; + } + + ci->curtd = NULL; + md_inton(); sched_oneshot(false); - while (td->rested); + + md_hlt(); + md_intoff(); + ci->curtd = td; +} + +void +sched_detach(struct proc *td) +{ + struct sched_queue *queue; + + spinlock_acquire(&tdq_lock); + queue = &qlist[td->priority]; + + TAILQ_REMOVE(&queue->q, td, link); + spinlock_release(&tdq_lock); +} + +/* + * Pin a process to a specific processor + * + * @td: Process to pin + * @cpu: Logical processor ID to pin `td' to. + * + * XXX: 'cpu' is a machine independent value, representing + * CPU<n> + */ +void +proc_pin(struct proc *td, affinity_t cpu) +{ + td->affinity = cpu; + td->flags |= PROC_PINNED; +} + +/* + * Unpin a pinned process, allowing it to be + * picked up by any processor + * + * @td: Process to unpin + */ +void +proc_unpin(struct proc *td) +{ + td->affinity = 0; + td->flags &= ~PROC_PINNED; +} + +/* + * Suspend a process for a specified amount + * of time. This calling process will yield for + * the amount of time specified in 'tv' + * + * @td: Process to suspend (NULL for current) + * @tv: Time value to use + * + * XXX: 'tv' being NULL is equivalent to calling + * sched_detach() + */ +void +sched_suspend(struct proc *td, const struct timeval *tv) +{ + struct timer tmr; + const time_t USEC_PER_SEC = 1000000; + ssize_t usec; + time_t usec_cur, usec_tmp; + bool have_timer = true; + tmrr_status_t tmr_status; + + if (td == NULL) + td = this_td(); + if (__unlikely(td == NULL)) + return; + + if (tv == NULL) { + sched_detach(td); + return; + } + + /* + * Now, we need a generic timer so that we can compute + * how much time has elapsed since this process has + * requested to be suspended. However, we cannot assume + * that it would be present. If the lookup fails, all we + * can do is try to estimate how much time went by which + * works fine too, just not as accurate. + */ + tmr_status = req_timer(TIMER_GP, &tmr); + if (tmr_status != TMRR_SUCCESS) { + have_timer = false; + } + + /* We need microsecond precision */ + if (tmr.get_time_sec == NULL) { + have_timer = false; + } + + /* + * Compute the max time in microseconds that + * we will wait. We are using both tv->tv_sec + * and tv->tv_usec + */ + usec = tv->tv_usec; + usec += tv->tv_sec * USEC_PER_SEC; + usec_cur = (have_timer) ? tmr.get_time_usec() : 0; + + for (;;) { + sched_yield(); + + /* + * If we have a timer in our paws, compute how much + * time went by. Otherwise we estimate by subtracting + * the scheduler quantum. + * + * XXX: The timing here works decently as intended. However, + * it would be nice to smoothen out any jitter. Such can + * probably be done by subtracting 'usec' by the exponential + * moving average of 'usec_tmp' rather than the raw original + * value. + */ + if (have_timer) { + usec_tmp = (tmr.get_time_usec() - usec_cur); + } else { + usec_tmp = DEFAULT_TIMESLICE_USEC; + } + + /* We are done here! */ + usec -= usec_tmp; + if (usec <= 0) { + break; + } + } } void @@ -272,4 +411,6 @@ sched_init(void) pr_trace("prepared %d queues (policy=0x%x)\n", SCHED_NQUEUE, policy); + + sched_accnt_init(); } diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c index 58bd52d..044de7b 100644 --- a/sys/kern/kern_sig.c +++ b/sys/kern/kern_sig.c @@ -58,6 +58,12 @@ static struct sigaction sa_tab[] = { .sa_flags = 0, .sa_sigaction = NULL }, + [SIGTERM] = { + .sa_handler = sigterm_default, + .sa_mask = 0, + .sa_flags = 0, + .sa_sigaction = NULL + } }; /* diff --git a/sys/kern/kern_socket.c b/sys/kern/kern_socket.c new file mode 100644 index 0000000..d0fbe19 --- /dev/null +++ b/sys/kern/kern_socket.c @@ -0,0 +1,1009 @@ +/* + * Copyright (c) 2023-2025 Ian Marco Moffett and the Osmora Team. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Hyra nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/socket.h> +#include <sys/sio.h> +#include <sys/systm.h> +#include <sys/proc.h> +#include <sys/time.h> +#include <sys/namei.h> +#include <sys/sched.h> +#include <sys/errno.h> +#include <sys/syslog.h> +#include <sys/filedesc.h> +#include <sys/fcntl.h> +#include <sys/vnode.h> +#include <vm/dynalloc.h> +#include <string.h> + +#define pr_trace(fmt, ...) kprintf("socket: " fmt, ##__VA_ARGS__) +#define pr_error(...) pr_trace(__VA_ARGS__) + +static struct vops socket_vops; + +/* + * This table maps socket option names to + * lengths of their underlying structure. + * + * This is used for bounds/length checking within + * setsockopt() + */ +static size_t sockopt_lentab[_SO_MAX] = { + [ SO_RCVTIMEO ] = sizeof(struct timeval) +}; + +/* + * Get a kernel socket structure from a + * file descriptor. + * + * @sockfd: File descriptor to lookup + * @res: Result pointer + * + * Returns zero on success, otherwise a less + * than zero errno. + */ +static int +get_ksock(int sockfd, struct ksocket **res) +{ + struct ksocket *ksock; + struct filedesc *fdesc; + struct vnode *vp; + + if (res == NULL) { + return -EINVAL; + } + + /* Grab the file descriptor */ + fdesc = fd_get(NULL, sockfd); + if (fdesc == NULL) { + return -EBADF; + } + + /* Is this even a socket? */ + if ((vp = fdesc->vp) == NULL) { + return -ENOTSOCK; + } + if (vp->type != VSOCK) { + return -ENOTSOCK; + } + + ksock = vp->data; + if (__unlikely(ksock == NULL)) { + return -EIO; + } + + *res = ksock; + return 0; +} + +/* + * VFS reclaim callback for the socket + * layer + * + * Returns zero on success, otherwise a less + * than zero errno. + */ +static int +socket_reclaim(struct vnode *vp) +{ + struct ksocket *ksock; + struct sockopt *opt; + + /* Is this even a socket? */ + if (vp->type != VSOCK) { + return -ENOTSOCK; + } + + /* Is there any data attached? */ + if ((ksock = vp->data) == NULL) { + return -EIO; + } + + /* Free up any used options */ + for (int i = 0; i < _SO_MAX; ++i) { + opt = ksock->opt[i]; + if (opt != NULL) { + dynfree(opt); + ksock->opt[i] = NULL; + } + } + + fd_close(ksock->sockfd); + mutex_free(ksock->mtx); + dynfree(ksock); + return 0; +} + +/* + * Create a socket file from the sockaddr + * structure + * + * @ksock: Socket to create a file for + * @sockaddr_un: domain sockaddr + */ +static int +socket_mkfile(struct ksocket *ksock, struct sockaddr_un *un) +{ + struct filedesc *fdesc; + struct vnode *vp; + int fd; + + fd = fd_open(un->sun_path, O_CREAT | O_RDONLY); + if (fd < 0) { + return fd; + } + + /* Grab the actual handle now */ + fdesc = fd_get(NULL, fd); + if (fdesc == NULL) { + fd_close(fd); + return -EIO; + } + + /* Hijack the vnode */ + vp = fdesc->vp; + vp->type = VSOCK; + vp->vops = &socket_vops; + vp->data = ksock; + return fd; +} + +/* + * Connect to a domain socket - used by connect() + * + * @sockfd: Socket file descriptor + * @ksock: Current ksock + * @un: Current sockaddr_un + */ +static int +connect_domain(int sockfd, struct ksocket *ksock, struct sockaddr_un *un) +{ + int error; + struct nameidata ndp; + struct filedesc *filedesc; + struct vnode *vp; + + ndp.path = un->sun_path; + ndp.flags = 0; + if ((error = namei(&ndp)) < 0) { + return error; + } + + vp = ndp.vp; + filedesc = fd_get(NULL, sockfd); + if (filedesc == NULL) { + pr_error("connect: no filedesc for current\n"); + return -EIO; + } + + filedesc->vp = vp; + return 0; +} + +/* + * Wait until data is received for the + * recv() function. + * + * @sockfd: Socket we are waiting on + * + * Returns zero on success, otherwise a less + * than zero value is returned. + */ +static int +socket_rx_wait(int sockfd) +{ + struct ksocket *ksock; + struct sockopt *opt; + struct timeval tv; + int error; + + if (ksock == NULL) { + return -EINVAL; + } + + error = get_ksock(sockfd, &ksock); + if (error < 0) { + return error; + } + + /* + * If the socket does not have this option set, + * we will assume that there is no timeout value. + */ + opt = ksock->opt[SO_RCVTIMEO]; + if (opt == NULL) { + return 0; + } + + memcpy(&tv, opt->data, opt->len); + sched_suspend(NULL, &tv); + return 0; +} + +/* + * Send data to socket - POSIX send(2) core + * + * @sockfd: File descriptor that backs this socket + * @buf: Buffer containing data to transmit + * @size: Size of the buffer + * @flags: Optional flags + * + * Returns zero on success, otherwise a less + * than zero errno. + */ +ssize_t +send(int sockfd, const void *buf, size_t size, int flags) +{ + struct ksocket *ksock; + struct sockbuf *sbuf; + struct netbuf *netbuf; + size_t tail; + int error; + + /* Size cannot be zero */ + if (size == 0) { + return -EINVAL; + } + + if ((error = get_ksock(sockfd, &ksock)) < 0) { + return error; + } + + sbuf = &ksock->buf; + netbuf = &sbuf->buf; + mutex_acquire(ksock->mtx, 0); + + /* Make sure we dont overflow */ + if (netbuf->len > sbuf->watermark) { + mutex_release(ksock->mtx); + return -ENOBUFS; + } + + if (netbuf->len == 0) { + sbuf->head = 0; + sbuf->tail = 0; + } + + /* Clamp the size if needed */ + if ((netbuf->len + size) > sbuf->watermark) { + size = sbuf->watermark - netbuf->len; + } + if (size == 0) { + return -ENOBUFS; + } + + /* Copy the new data */ + tail = sbuf->tail; + memcpy(&netbuf->data[tail], buf, size); + + sbuf->tail += size; + netbuf->len += size; + mutex_release(ksock->mtx); + return size; +} + +/* + * Recv data from socket - POSIX recv(2) core + * + * @sockfd: File descriptor that backs this socket + * @buf: RX buffer + * @size: Size of the buffer + * @flags: Optional flags + * + * Returns length on success, otherwise a less + * than zero errno. + */ +ssize_t +recv(int sockfd, void *buf, size_t len, int flags) +{ + struct ksocket *ksock; + struct sockbuf *sbuf; + struct netbuf *netbuf; + size_t head; + ssize_t retval = len; + int error; + + /* Length cannot be zero */ + if (len == 0) { + return -EINVAL; + } + + if ((error = get_ksock(sockfd, &ksock)) < 0) { + return error; + } + + sbuf = &ksock->buf; + netbuf = &sbuf->buf; + mutex_acquire(ksock->mtx, 0); + + /* Is it empty? */ + if (netbuf->len == 0) { + sbuf->head = 0; + sbuf->tail = 0; + retval = -EAGAIN; + goto done; + } + + if (len > netbuf->len) { + len = netbuf->len; + } + + head = sbuf->head; + memcpy(buf, &netbuf->data[head], len); + sbuf->head = (sbuf->head + len) % NETBUF_LEN; +done: + mutex_release(ksock->mtx); + return retval; +} + +/* + * POSIX socket(7) core + * + * @domain: Address family (see AF_*) + * @type: Socket type + * @protocol: Socket protocol + * + * Returns zero on success, otherwise a less + * than zero errno. + */ +int +socket(int domain, int type, int protocol) +{ + struct ksocket *ksock = NULL; + struct sockbuf *sbuf = NULL; + struct proc *td = this_td(); + int fd, error = -1; + + ksock = dynalloc(sizeof(*ksock)); + if (ksock == NULL) { + error = -ENOMEM; + goto fail; + } + + memset(ksock, 0, sizeof(*ksock)); + sbuf = &ksock->buf; + sbuf->head = 0; + sbuf->tail = 0; + + switch (domain) { + case AF_UNIX: + { + struct sockaddr_un *un; + + un = &ksock->un; + sbuf->watermark = NETBUF_LEN; + + /* Set up a path and create a socket file */ + un->sun_family = domain; + snprintf(un->sun_path, sizeof(un->sun_path), "/tmp/%d-sock0", td->pid); + fd = socket_mkfile(ksock, un); + } + return fd; + default: + error = -EINVAL; + break; + } + +fail: + if (ksock != NULL) + dynfree(ksock); + + fd_close(fd); + return error; +} + +/* + * Bind address to socket - POSIX bind(2) core + * + * @sockfd: File descriptor + * @addr: Address to bind + * @len: Sockaddr len + * + * Returns zero on success, otherwise a less + * than zero errno. + */ +int +bind(int sockfd, const struct sockaddr *addr, socklen_t len) +{ + struct proc *td; + struct ksocket *ksock; + struct cmsg_list *clp; + int error; + + if ((error = get_ksock(sockfd, &ksock)) < 0) { + kprintf("error=%d\n", error); + return error; + } + + /* Create the new mutex lock */ + ksock->mtx = mutex_new("ksocket"); + if (ksock->mtx == NULL) { + return -ENOMEM; + } + + /* Mark ourselves as the owner */ + td = this_td(); + ksock->owner = td; + + /* Initialize the cmsg list queue */ + clp = &ksock->cmsg_list; + TAILQ_INIT(&clp->list); + clp->is_init = 1; + return 0; +} + +/* + * Set socket options - POSIX setsockopt(3) core + * + * @sockfd: File descriptor of socket + * @level: Protocol level + * @v: Options value + * @len: Length of data pointed to by 'v' + */ +int +setsockopt(int sockfd, int level, int name, const void *v, socklen_t len) +{ + struct ksocket *ksock; + struct sockopt *opt; + size_t exp_len; + int error; + + /* Must have a valid fd */ + if (sockfd < 0) { + return -EBADF; + } + + /* Ensure value and length are valid */ + if (v == NULL || len == 0) { + return -EINVAL; + } + + /* Verify the name */ + if (name >= _SO_MAX) { + return -EINVAL; + } + + /* Grab a new socket */ + if ((error = get_ksock(sockfd, &ksock)) < 0) { + return error; + } + + /* Clamp the input length as needed */ + exp_len = sockopt_lentab[name]; + if (len > exp_len) { + len = exp_len; + } + + /* + * Here we will grab the socket options. If it is + * NULL, we'll need to allocate one. + */ + if ((opt = ksock->opt[name]) == NULL) { + opt = dynalloc(sizeof(*opt) + len); + + if (opt == NULL) { + return -ENOMEM; + } + + opt->len = len; + ksock->opt[name] = opt; + } + + memcpy(opt->data, v, len); + opt->len = len; + return 0; +} + +/* + * Connect to a socket + * + * @sockfd: File descriptor to connect + * @addr: Address to connect to + * @len: Length of address + */ +int +connect(int sockfd, const struct sockaddr *addr, socklen_t len) +{ + struct ksocket *ksock; + int error = -1; + + if ((error = get_ksock(sockfd, &ksock)) < 0) { + return error; + } + + switch (addr->sa_family) { + case AF_UNIX: + { + struct sockaddr_un *un; + + un = (struct sockaddr_un *)addr; + if (un->sun_path[0] == '\0') { + pr_error("connect: bad socket path\n"); + return -1; + } + + /* Wait for the connection to be established */ + do { + error = connect_domain(sockfd, ksock, un); + if (error != 0) { + sched_yield(); + } + } while (error != 0); + + return 0; + } + } + + return -1; +} + +/* + * Send socket control message - POSIX.1-2008 + * + * @socket: Socket to transmit on + * @msg: Further arguments + * @flags: Optional flags + * + * Returns zero on success, otherwise a less + * than zero errno. + */ +ssize_t +sendmsg(int socket, const struct msghdr *msg, int flags) +{ + struct ksocket *ksock; + struct cmsg *cmsg; + struct sockaddr_un *un; + struct cmsg_list *clp; + size_t control_len = 0; + int error; + + if ((error = get_ksock(socket, &ksock)) < 0) { + return error; + } + + /* We cannot do sendmsg() non domain sockets */ + un = &ksock->un; + if (un->sun_family != AF_UNIX) { + return -EBADF; + } + + control_len = MALIGN(msg->msg_controllen); + + /* Allocate a new cmsg */ + cmsg = dynalloc(control_len + sizeof(struct cmsg)); + if (cmsg == NULL) { + return -EINVAL; + } + + memcpy(cmsg->buf, msg->msg_control, control_len); + clp = &ksock->cmsg_list; + cmsg->control_len = control_len; + TAILQ_INSERT_TAIL(&clp->list, cmsg, link); + return 0; +} + +/* + * Receive socket control message - POSIX.1‐2017 + * + * @socket: Socket to receive on + * @msg: Further arguments + * @flags: Optional flags + * + * Returns zero on success, otherwise a less + * than zero errno. + */ +ssize_t +recvmsg(int socket, struct msghdr *msg, int flags) +{ + struct ksocket *ksock; + struct sockaddr_un *un; + struct cmsg *cmsg, *tmp; + struct cmsghdr *cmsghdr; + struct cmsg_list *clp; + uint8_t *fds; + int error; + + if (socket < 0) { + return -EINVAL; + } + + /* Grab the socket descriptor */ + if ((error = get_ksock(socket, &ksock)) < 0) { + return error; + } + + /* Must be a unix domain socket */ + un = &ksock->un; + if (un->sun_family != AF_UNIX) { + return -EBADF; + } + + /* Grab the control message list */ + clp = &ksock->cmsg_list; + cmsg = TAILQ_FIRST(&clp->list); + + /* Empty? */ + while (cmsg == NULL) { + sched_yield(); + cmsg = TAILQ_FIRST(&clp->list); + } + + while (cmsg != NULL) { + cmsghdr = &cmsg->hdr; + + /* Check the control message type */ + switch (cmsghdr->cmsg_type) { + case SCM_RIGHTS: + { + fds = (uint8_t *)CMSG_DATA(cmsghdr); + pr_trace("SCM_RIGHTS -> fd %d (from pid %d)\n", fds[0], + ksock->owner->pid); + + break; + } + } + + tmp = cmsg; + cmsg = TAILQ_NEXT(cmsg, link); + + TAILQ_REMOVE(&clp->list, tmp, link); + dynfree(tmp); + } + + return 0; +} + +/* + * socket(7) syscall + * + * arg0: domain + * arg1: type + * arg2: protocol + */ +scret_t +sys_socket(struct syscall_args *scargs) +{ + int domain = scargs->arg0; + int type = scargs->arg1; + int protocol = scargs->arg2; + + return socket(domain, type, protocol); +} + +/* + * bind(2) syscall + * + * arg0: sockfd + * arg1: addr + * arg2: len + */ +scret_t +sys_bind(struct syscall_args *scargs) +{ + const struct sockaddr *u_addr = (void *)scargs->arg1; + struct sockaddr addr_copy; + int sockfd = scargs->arg0; + int len = scargs->arg2; + int error; + + error = copyin(u_addr, &addr_copy, sizeof(addr_copy)); + if (error < 0) { + return error; + } + + return bind(sockfd, &addr_copy, len); +} + +/* + * recv(2) syscall + * + * arg0: sockfd + * arg1: buf + * arg2: size + * arg3: flags + */ +scret_t +sys_recv(struct syscall_args *scargs) +{ + char buf[NETBUF_LEN]; + void *u_buf = (void *)scargs->arg1; + int sockfd = scargs->arg0; + size_t len = scargs->arg2; + int error, flags = scargs->arg3; + + if (len > sizeof(buf)) { + return -ENOBUFS; + } + + for (;;) { + error = recv(sockfd, buf, len, flags); + if (error <= 0 && error != -EAGAIN) { + break; + } + + /* + * Wait for data to be ready on the socket. + * If a less than zero value is returned, don't + * handle timeouts. + */ + error = socket_rx_wait(sockfd); + if (error < 0) { + continue; + } + + /* Try one more time, obey timeout */ + error = recv(sockfd, buf, len, flags); + if (error == -EAGAIN) { + return error; + } + break; + } + + if (error < 0) { + pr_error("sys_recv: recv() fail (fd=%d)\n", sockfd); + return error; + } + + error = copyout(buf, u_buf, len); + return (error == 0) ? len : error; +} + +/* + * send(2) syscall + * + * arg0: sockfd + * arg1: buf + * arg2: size + * arg3: flags + */ +scret_t +sys_send(struct syscall_args *scargs) +{ + char buf[NETBUF_LEN]; + const void *u_buf = (void *)scargs->arg1; + int sockfd = scargs->arg0; + size_t len = scargs->arg2; + int error, flags = scargs->arg3; + + if (len > sizeof(buf)) { + return -ENOBUFS; + } + + error = copyin(u_buf, buf, len); + if (error < 0) { + pr_error("sys_send: copyin() failure (fd=%d)\n", sockfd); + return error; + } + + return send(sockfd, buf, len, flags); +} + +/* + * recvmsg(3) syscall + * + * arg0: socket + * arg1: msg + * arg2: flags + */ +scret_t +sys_recvmsg(struct syscall_args *scargs) +{ + struct msghdr *u_msg = (void *)scargs->arg1; + void *u_control, *control = NULL; + size_t controllen; + struct iovec msg_iov; + struct msghdr msg; + ssize_t retval; + int socket = scargs->arg0; + int flags = scargs->arg2; + int error; + + /* Read the message header */ + error = copyin(u_msg, &msg, sizeof(msg)); + if (error < 0) { + pr_error("sys_recvmsg: bad msg\n"); + return error; + } + + /* Grab the message I/O vector */ + error = uio_copyin(msg.msg_iov, &msg_iov, msg.msg_iovlen); + if (error < 0) { + return error; + } + + /* Save control fields */ + u_control = msg.msg_control; + controllen = msg.msg_controllen; + + /* Allocate a new control field to copy in */ + control = dynalloc(controllen); + msg.msg_control = control; + if (msg.msg_control == NULL) { + uio_copyin_clean(&msg_iov, msg.msg_iovlen); + return -ENOMEM; + } + + memset(msg.msg_control, 0, controllen); + error = copyin(u_control, msg.msg_control, controllen); + if (error < 0) { + retval = error; + goto done; + } + + /* + * Now wait until we get a control + * message + */ + msg.msg_iov = &msg_iov; + for (;;) { + retval = recvmsg(socket, &msg, flags); + if (retval == 0) { + break; + } + + sched_yield(); + } +done: + uio_copyin_clean(&msg_iov, msg.msg_iovlen); + dynfree(control); + return retval; +} + +/* + * sendmsg(3) syscall + * + * arg0: socket + * arg1: msg + * arg2: flags + */ +scret_t +sys_sendmsg(struct syscall_args *scargs) +{ + struct iovec msg_iov; + struct msghdr *u_msg = (void *)scargs->arg1; + struct msghdr msg; + ssize_t retval; + int socket = scargs->arg0; + int flags = scargs->arg2; + int error; + + /* Read the message header */ + error = copyin(u_msg, &msg, sizeof(msg)); + if (error < 0) { + pr_error("sys_sendmsg: bad msg\n"); + return error; + } + + /* Grab the message I/O vector */ + error = uio_copyin(msg.msg_iov, &msg_iov, msg.msg_iovlen); + if (error < 0) { + return error; + } + + msg.msg_name = NULL; + msg.msg_namelen = 0; + msg.msg_iov = &msg_iov; + + for (;;) { + retval = sendmsg(socket, &msg, flags); + if (retval == 0) { + break; + } + sched_yield(); + } + uio_copyin_clean(&msg_iov, msg.msg_iovlen); + return retval; +} + +/* + * connect(3) syscall + * + * arg0: sockfd + * arg1: address + * arg2: len + */ +scret_t +sys_connect(struct syscall_args *scargs) +{ + char buf[256]; + struct sockaddr *u_addr = (void *)scargs->arg1; + struct sockaddr *sockaddr; + int error; + int sockfd = scargs->arg0; + socklen_t len = scargs->arg2; + + if (len >= sizeof(buf)) { + pr_error("sys_connect: address too big\n"); + return -E2BIG; + } + + error = copyin(u_addr, buf, len); + if (error < 0) { + pr_error("sys_connect: bad 'address'\n"); + return error; + } + + sockaddr = (struct sockaddr *)buf; + return connect(sockfd, sockaddr, len); +} + +/* + * POSIX setsockopt(3) syscall + * + * arg0: sockfd + * arg1: level + * arg2: name + * arg3: data + * arg4: len + */ +scret_t +sys_setsockopt(struct syscall_args *scargs) +{ + int sockfd = scargs->arg0; + int level = scargs->arg1; + int name = scargs->arg2; + void *u_data = (void *)scargs->arg3; + socklen_t len = scargs->arg4; + void *data; + size_t exp_len; + int retval; + + /* Verify that the name is correct */ + if (name >= _SO_MAX) { + return -EINVAL; + } + + /* Clamp length as needed */ + exp_len = sockopt_lentab[name]; + if (len > exp_len) { + len = exp_len; + } + + data = dynalloc(len); + if (data == NULL) { + return -ENOMEM; + } + + /* Grab data from userland */ + retval = copyin(u_data, data, len); + if (retval < 0) { + dynfree(data); + return retval; + } + + retval = setsockopt(sockfd, level, name, data, len); + dynfree(data); + return retval; +} + +static struct vops socket_vops = { + .read = NULL, + .write = NULL, + .reclaim = socket_reclaim, +}; diff --git a/sys/kern/kern_spawn.c b/sys/kern/kern_spawn.c new file mode 100644 index 0000000..7962ced --- /dev/null +++ b/sys/kern/kern_spawn.c @@ -0,0 +1,295 @@ +/* + * Copyright (c) 2023-2025 Ian Marco Moffett and the Osmora Team. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Hyra nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/spawn.h> +#include <sys/wait.h> +#include <sys/proc.h> +#include <sys/exec.h> +#include <sys/mman.h> +#include <sys/systm.h> +#include <sys/errno.h> +#include <sys/syslog.h> +#include <sys/syscall.h> +#include <sys/signal.h> +#include <sys/limits.h> +#include <sys/sched.h> +#include <vm/dynalloc.h> +#include <string.h> + +#define pr_trace(fmt, ...) kprintf("spawn: " fmt, ##__VA_ARGS__) +#define pr_error(...) pr_trace(__VA_ARGS__) + +#define ARGVP_MAX (ARG_MAX / sizeof(void *)) + +static size_t next_pid = 1; + +/* + * TODO: envp + */ +struct spawn_args { + char path[PATH_MAX]; + char argv_blk[ARG_MAX]; + char *argv[ARGVP_MAX]; +}; + +static inline void +try_free_data(void *p) +{ + if (p != NULL) { + dynfree(p); + } +} + +static void +spawn_thunk(void) +{ + const char *path; + char pathbuf[PATH_MAX]; + struct proc *cur; + struct execve_args execve_args; + struct spawn_args *args; + char *envp[] = { NULL }; + + cur = this_td(); + args = cur->data; + path = args->path; + memset(pathbuf, 0, sizeof(pathbuf)); + memcpy(pathbuf, path, strlen(path)); + + execve_args.pathname = pathbuf; + execve_args.argv = (char **)&args->argv[0]; + execve_args.envp = envp; + path = NULL; + + if (execve(cur, &execve_args) != 0) { + pr_error("execve failed, aborting\n"); + exit1(this_td(), 0); + } + __builtin_unreachable(); +} + +pid_t +waitpid(pid_t pid, int *wstatus, int options) +{ + struct proc *child, *td; + pid_t ret; + + td = this_td(); + child = get_child(td, pid); + + if (child == NULL) { + return -1; + } + + /* Wait for it to be done */ + while (!ISSET(child->flags, PROC_ZOMB)) { + sched_yield(); + } + + + /* Give back the status */ + if (wstatus != NULL) { + copyout(&child->exit_status, wstatus, sizeof(*wstatus)); + } + + ret = child->pid; + proc_reap(child); + return ret; +} + +/* + * Spawn a new process + * + * @cur: Parent (current) process. + * @func: Address of start code. + * @p: Data to pass to new process (used for user procs) + * @flags: Spawn flags. + * @newprocp: If not NULL, will contain the new process. + * + * Returns the PID of the child on success, otherwise an + * errno value that is less than zero. + * + * XXX: `p` is only used by sys_spawn and should be set + * to NULL if called in the kernel. + */ +pid_t +spawn(struct proc *cur, void(*func)(void), void *p, int flags, struct proc **newprocp) +{ + struct proc *newproc; + int error; + pid_t pid; + + newproc = dynalloc(sizeof(*newproc)); + if (newproc == NULL) { + pr_error("could not alloc proc (-ENOMEM)\n"); + try_free_data(p); + return -ENOMEM; + } + + memset(newproc, 0, sizeof(*newproc)); + error = md_spawn(newproc, cur, (uintptr_t)func); + if (error < 0) { + dynfree(newproc); + try_free_data(p); + pr_error("error initializing proc\n"); + return error; + } + + /* Set proc output if we can */ + if (newprocp != NULL) { + *newprocp = newproc; + } + + if (!ISSET(cur->flags, PROC_LEAFQ)) { + TAILQ_INIT(&cur->leafq); + cur->flags |= PROC_LEAFQ; + } + + error = proc_init(newproc, cur); + if (error < 0) { + dynfree(newproc); + try_free_data(p); + pr_error("error initializing proc\n"); + return error; + } + + newproc->data = p; + newproc->pid = next_pid++; + sched_enqueue_td(newproc); + pid = newproc->pid; + return pid; +} + +/* + * Get the child of a process by PID. + * + * @cur: Parent process. + * @pid: Child PID. + * + * Returns NULL if no child was found. + */ +struct proc * +get_child(struct proc *cur, pid_t pid) +{ + struct proc *procp; + + TAILQ_FOREACH(procp, &cur->leafq, leaf_link) { + if (procp == NULL) { + continue; + } + if (procp->pid == pid) { + return procp; + } + } + + return NULL; +} + +/* + * arg0: PID + * arg1: wstatus + * arg2: options + * + * Returns PID of terminated child, returns + * -1 on failure. + */ +scret_t +sys_waitpid(struct syscall_args *scargs) +{ + pid_t pid; + int *u_wstatus; + int options; + + pid = scargs->arg0; + u_wstatus = (void *)scargs->arg1; + options = scargs->arg2; + return waitpid(pid, u_wstatus, options); +} + +/* + * arg0: The file /path/to/executable + * arg1: Argv + * arg2: Envp (TODO) + * arg3: Optional flags (`flags') + */ +scret_t +sys_spawn(struct syscall_args *scargs) +{ + struct spawn_args *args; + char *path; + const char *u_path, **u_argv; + const char *u_p = NULL; + struct proc *td; + int flags, error; + size_t len, bytes_copied = 0; + size_t argv_i = 0; + + td = this_td(); + u_path = (const char *)scargs->arg0; + u_argv = (const char **)scargs->arg1; + flags = scargs->arg3; + + args = dynalloc(sizeof(*args)); + if (args == NULL) { + return -ENOMEM; + } + + error = copyinstr(u_path, args->path, sizeof(args->path)); + if (error < 0) { + dynfree(args); + return error; + } + + memset(args->argv, 0, ARG_MAX); + for (size_t i = 0; i < ARG_MAX - 1; ++i) { + error = copyin(&u_argv[argv_i], &u_p, sizeof(u_p)); + if (error < 0) { + dynfree(args); + return error; + } + if (u_p == NULL) { + args->argv[argv_i++] = NULL; + break; + } + + path = &args->argv_blk[i]; + error = copyinstr(u_p, path, ARG_MAX - bytes_copied); + if (error < 0) { + dynfree(args); + return error; + } + + args->argv[argv_i++] = &args->argv_blk[i]; + len = strlen(path); + bytes_copied += (len + 1); + i += len; + } + + return spawn(td, spawn_thunk, args, flags, NULL); +} diff --git a/sys/kern/kern_stub.c b/sys/kern/kern_stub.c index 8603fd5..a9a56ac 100644 --- a/sys/kern/kern_stub.c +++ b/sys/kern/kern_stub.c @@ -40,8 +40,10 @@ sigfpe_default(int signo) static struct proc *td; td = this_td(); - kprintf("Floating point exception (pid=%d)\n", td->pid); - exit1(td); + syslog_silence(false); + kprintf(OMIT_TIMESTAMP "Floating point exception (pid=%d)\n", td->pid); + syslog_silence(true); + exit1(td, 0); } void @@ -50,8 +52,10 @@ sigkill_default(int signo) static struct proc *td; td = this_td(); - kprintf("Terminated (pid=%d)\n", td->pid); - exit1(td); + syslog_silence(false); + kprintf(OMIT_TIMESTAMP "Terminated (pid=%d)\n", td->pid); + syslog_silence(true); + exit1(td, 0); } void @@ -60,8 +64,22 @@ sigsegv_default(int signo) static struct proc *td; td = this_td(); - kprintf("Segmentation fault (pid=%d)\n", td->pid); - exit1(td); + syslog_silence(false); + kprintf(OMIT_TIMESTAMP "Segmentation fault (pid=%d)\n", td->pid); + syslog_silence(true); + exit1(td, 0); +} + +void +sigterm_default(int signo) +{ + static struct proc *td; + + td = this_td(); + syslog_silence(false); + kprintf(OMIT_TIMESTAMP "Terminated (pid=%d)\n", td->pid); + syslog_silence(true); + exit1(td, 0); } int @@ -75,3 +93,9 @@ dev_nowrite(void) { return -ENOTSUP; } + +int +dev_nobsize(void) +{ + return -ENOTSUP; +} diff --git a/sys/kern/kern_subr.c b/sys/kern/kern_subr.c index f437ec7..8a08f33 100644 --- a/sys/kern/kern_subr.c +++ b/sys/kern/kern_subr.c @@ -29,9 +29,12 @@ #include <sys/proc.h> #include <sys/types.h> +#include <sys/param.h> #include <sys/errno.h> +#include <sys/mman.h> #include <sys/exec.h> #include <sys/systm.h> +#include <vm/vm.h> #include <string.h> /* @@ -45,6 +48,8 @@ static bool check_uaddr(const void *uaddr) { vaddr_t stack_start, stack_end; + struct mmap_lgdr *lp; + struct mmap_entry find, *res; struct exec_prog exec; struct proc *td; uintptr_t addr; @@ -61,6 +66,22 @@ check_uaddr(const void *uaddr) if (addr >= stack_start && addr <= stack_end) return true; + /* Try to grab the mmap ledger */ + if ((lp = td->mlgdr) == NULL) { + return false; + } + + /* + * Now give an attempt at looking through the + * mmap ledger. Perhaps this memory was allocated + * in the user heap? + */ + find.va_start = ALIGN_DOWN(addr, DEFAULT_PAGESIZE); + res = RBT_FIND(lgdr_entries, &lp->hd, &find); + if (res != NULL) { + return true; + } + return false; } diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c index 57b27d0..7660f1f 100644 --- a/sys/kern/kern_synch.c +++ b/sys/kern/kern_synch.c @@ -28,19 +28,20 @@ */ #include <sys/types.h> +#include <sys/mutex.h> #include <sys/systm.h> #include <sys/errno.h> +#include <sys/sched.h> #include <sys/atomic.h> #include <sys/syslog.h> #include <sys/spinlock.h> +#include <machine/cdefs.h> #include <dev/timer.h> +#include <string.h> #define pr_trace(fmt, ...) kprintf("synch: " fmt, ##__VA_ARGS__) #define pr_error(...) pr_trace(__VA_ARGS__) -/* XXX: Be very careful with this */ -static struct spinlock __syslock; - /* * Returns 0 on success, returns non-zero value * on timeout/failure. @@ -80,7 +81,10 @@ spinlock_usleep(struct spinlock *lock, size_t usec_max) void spinlock_acquire(struct spinlock *lock) { - while (__atomic_test_and_set(&lock->lock, __ATOMIC_ACQUIRE)); + sched_preempt_set(false); + while (__atomic_test_and_set(&lock->lock, __ATOMIC_ACQUIRE)) { + md_pause(); + } } /* @@ -104,35 +108,66 @@ spinlock_try_acquire(struct spinlock *lock) return 1; } - while (__atomic_test_and_set(&lock->lock, __ATOMIC_ACQUIRE)); - return 0; + return __atomic_test_and_set(&lock->lock, __ATOMIC_ACQUIRE); } void spinlock_release(struct spinlock *lock) { __atomic_clear(&lock->lock, __ATOMIC_RELEASE); + sched_preempt_set(true); } /* - * Attempt to hold the system-wide lock, returns 1 - * if already held. - * - * XXX: Only use for CRITICAL code sections. + * Create a new mutex lock object */ -int -syslock(void) +struct mutex * +mutex_new(const char *name) { - return spinlock_try_acquire(&__syslock); + struct mutex *mtx; + size_t namelen; + + mtx = dynalloc(sizeof(*mtx)); + if (mtx == NULL) { + return NULL; + } + + mtx->lock = 0; + namelen = strlen(name); + + /* Don't overflow the name buffer */ + if (namelen >= MUTEX_NAME_LEN) { + namelen = MUTEX_NAME_LEN - 1; + } + + memcpy(mtx->name, name, namelen); + return mtx; } /* - * Release the system-wide lock + * Acquire a mutex * - * XXX: Only use for CRITICAL code sections. + * @mtx: Mutex to acquire + * @flags: Optional flags */ +int +mutex_acquire(struct mutex *mtx, int flags) +{ + while (__atomic_test_and_set(&mtx->lock, __ATOMIC_ACQUIRE)) { + sched_yield(); + } + + return 0; +} + +void +mutex_release(struct mutex *mtx) +{ + __atomic_clear(&mtx->lock, __ATOMIC_RELEASE); +} + void -sysrel(void) +mutex_free(struct mutex *mtx) { - spinlock_release(&__syslock); + dynfree(mtx); } diff --git a/sys/kern/kern_syscall.c b/sys/kern/kern_syscall.c index 986d82a..c352b9c 100644 --- a/sys/kern/kern_syscall.c +++ b/sys/kern/kern_syscall.c @@ -29,9 +29,16 @@ #include <sys/syscall.h> #include <sys/sysctl.h> +#include <sys/socket.h> +#include <sys/reboot.h> #include <sys/types.h> +#include <sys/ucred.h> +#include <sys/disk.h> +#include <sys/time.h> +#include <sys/mman.h> #include <sys/proc.h> #include <sys/vfs.h> +#include <sys/krq.h> scret_t(*g_sctab[])(struct syscall_args *) = { NULL, /* SYS_none */ @@ -42,6 +49,28 @@ scret_t(*g_sctab[])(struct syscall_args *) = { sys_stat, /* SYS_stat */ sys_sysctl, /* SYS_sysctl */ sys_write, /* SYS_write */ + sys_spawn, /* SYS_spawn */ + sys_reboot, /* SYS_reboot */ + sys_mmap, /* SYS_mmap */ + sys_munmap, /* SYS_munap */ + sys_access, /* SYS_access */ + sys_lseek, /* SYS_lseek */ + sys_sleep, /* SYS_sleep */ + sys_inject, /* SYS_inject */ + sys_getpid, /* SYS_getpid */ + sys_getppid, /* SYS_getppid */ + sys_setuid, /* SYS_setuid */ + sys_getuid, /* SYS_getuid */ + sys_waitpid, /* SYS_waitpid */ + sys_socket, /* SYS_socket */ + sys_bind, /* SYS_bind */ + sys_recv, /* SYS_recv */ + sys_send, /* SYS_send */ + sys_sendmsg, /* SYS_sendmsg */ + sys_recvmsg, /* SYS_recvmsg */ + sys_connect, /* SYS_connect */ + sys_setsockopt, /* SYS_setsockopt */ + sys_disk, /* SYS_disk */ }; const size_t MAX_SYSCALLS = NELEM(g_sctab); diff --git a/sys/kern/kern_sysctl.c b/sys/kern/kern_sysctl.c index 7679aa1..1f5e578 100644 --- a/sys/kern/kern_sysctl.c +++ b/sys/kern/kern_sysctl.c @@ -33,12 +33,16 @@ #include <sys/errno.h> #include <sys/systm.h> #include <vm/dynalloc.h> +#include <vm/vm.h> #include <string.h> #define HYRA_RELEASE "Hyra/" HYRA_ARCH " " \ HYRA_VERSION " " \ HYRA_BUILDDATE +extern size_t g_nthreads; +static uint32_t pagesize = DEFAULT_PAGESIZE; +static char machine[] = HYRA_ARCH; static char hyra[] = "Hyra"; static char hyra_version[] = HYRA_VERSION; static char osrelease[] = HYRA_RELEASE; @@ -49,10 +53,20 @@ static char osrelease[] = HYRA_RELEASE; * allocated through dynalloc(9). */ static struct sysctl_entry common_optab[] = { + /* 'kern.*' */ [KERN_OSTYPE] = { KERN_OSTYPE, SYSCTL_OPTYPE_STR_RO, hyra }, [KERN_OSRELEASE] = { KERN_OSRELEASE, SYSCTL_OPTYPE_STR_RO, &osrelease }, [KERN_VERSION] = { KERN_VERSION, SYSCTL_OPTYPE_STR_RO, &hyra_version }, - [KERN_VCACHE_TYPE] = { KERN_VCACHE_TYPE, SYSCTL_OPTYPE_STR, NULL } + [KERN_VCACHE_TYPE] = { KERN_VCACHE_TYPE, SYSCTL_OPTYPE_STR, NULL }, + [KERN_HOSTNAME] = { KERN_HOSTNAME, SYSCTL_OPTYPE_STR, NULL }, + + /* 'hw.*' */ + [HW_PAGESIZE] = { HW_PAGESIZE, SYSCTL_OPTYPE_INT_RO, &pagesize }, + [HW_NCPU] = { HW_NCPU, SYSCTL_OPTYPE_INT, NULL }, + [HW_MACHINE] = {HW_MACHINE, SYSCTL_OPTYPE_STR_RO, &machine }, + + /* 'proc.*' */ + [PROC_COUNT] = { PROC_COUNT, SYSCTL_OPTYPE_INT_RO, &g_nthreads } }; static int @@ -91,19 +105,18 @@ static int do_sysctl(struct sysctl_args *args) { struct sysctl_args new_args; - size_t name_len, oldlenp; + size_t name_len = 1, oldlenp = 0; int *name = NULL; void *oldp = NULL, *newp = NULL; - int retval = 0; - - if (args->oldlenp == NULL) { - return -EINVAL; - } - - name_len = args->nlen; - retval = copyin(args->oldlenp, &oldlenp, sizeof(oldlenp)); - if (retval != 0) { - goto done; + int retval = 0, have_oldlen = 0; + + if (args->oldlenp != NULL) { + have_oldlen = 1; + name_len = args->nlen; + retval = copyin(args->oldlenp, &oldlenp, sizeof(oldlenp)); + if (retval != 0) { + goto done; + } } /* Copy in newp if it is set */ @@ -124,25 +137,30 @@ do_sysctl(struct sysctl_args *args) return retval; } - oldp = dynalloc(oldlenp); - retval = copyin(args->oldp, oldp, oldlenp); - if (retval != 0) { - return retval; + if (oldlenp != 0) { + oldp = dynalloc(oldlenp); + retval = copyin(args->oldp, oldp, oldlenp); + if (retval != 0) { + return retval; + } } /* Prepare the arguments for the sysctl call */ new_args.name = name; new_args.nlen = name_len; new_args.oldp = oldp; - new_args.oldlenp = &oldlenp; + new_args.oldlenp = (have_oldlen) ? &oldlenp : NULL; new_args.newp = newp; + new_args.newlen = args->newlen; retval = sysctl(&new_args); if (retval != 0) { goto done; } - copyout(oldp, args->oldp, oldlenp); + if (oldlenp != 0) { + copyout(oldp, args->oldp, oldlenp); + } done: if (name != NULL) dynfree(name); @@ -154,6 +172,33 @@ done: return retval; } +/* + * Clear a writable sysctl string variable to the + * value of "(undef)" + * + * @name: Name to clear + */ +int +sysctl_clearstr(int name) +{ + struct sysctl_args args; + char val[] = "(undef)"; + int error; + + args.name = &name; + args.nlen = 1; + args.oldlenp = 0; + args.oldp = NULL; + args.newp = val; + args.newlen = sizeof(val); + + if ((error = sysctl(&args)) != 0) { + return error; + } + + return 0; +} + int sysctl(struct sysctl_args *args) { diff --git a/sys/kern/kern_syslog.c b/sys/kern/kern_syslog.c index 10bf348..c7f51f7 100644 --- a/sys/kern/kern_syslog.c +++ b/sys/kern/kern_syslog.c @@ -28,9 +28,14 @@ */ #include <sys/syslog.h> +#include <sys/cdefs.h> +#include <sys/sio.h> #include <sys/spinlock.h> +#include <sys/device.h> +#include <sys/errno.h> #include <dev/cons/cons.h> #include <dev/timer.h> +#include <fs/devfs.h> #include <stdarg.h> #include <string.h> @@ -40,21 +45,105 @@ #define SERIAL_DEBUG 0 #endif +#if defined(__USER_KMSG) +#define USER_KMSG __USER_KMSG +#else +#define USER_KMSG 0 +#endif + +#define KBUF_SIZE (1 << 16) + +/* Sanity check */ +__static_assert(KBUF_SIZE <= (1 << 16), "KBUF_SIZE too high!"); + /* Global logger lock */ -static struct spinlock lock = {0}; +static struct spinlock kmsg_lock = {0}; +static bool no_cons_log = false; + +/* Kernel message buffer */ +static char kmsg[KBUF_SIZE]; +static size_t kmsg_i = 0; +static struct cdevsw kmsg_cdevw; + +static void +kmsg_append(const char *s, size_t len) +{ + spinlock_acquire(&kmsg_lock); + if ((kmsg_i + len) >= KBUF_SIZE) { + kmsg_i = 0; + } + + for (size_t i = 0; i < len; ++i) { + kmsg[kmsg_i + i] = s[i]; + } + kmsg_i += len; + spinlock_release(&kmsg_lock); +} + +/* + * Character device function. + */ +static int +kmsg_read(dev_t dev, struct sio_txn *sio, int flags) +{ + size_t len, offset, j; + size_t bytes_read = 0; + char *p = sio->buf; + + spinlock_acquire(&kmsg_lock); + len = sio->len; + offset = sio->offset; + + if (len == 0) { + spinlock_release(&kmsg_lock); + return -EINVAL; + } + if (offset >= kmsg_i) { + spinlock_release(&kmsg_lock); + return 0; + } + + for (size_t i = 0; i < len; ++i) { + j = offset + i; + if (j > kmsg_i) { + break; + } + + p[i] = kmsg[j]; + ++bytes_read; + } + + spinlock_release(&kmsg_lock); + return bytes_read; +} static void syslog_write(const char *s, size_t len) { - const char *p = s; + const char *p; + size_t l; - while (len--) { - cons_putch(&g_root_scr, *p); - if (SERIAL_DEBUG) { + if (SERIAL_DEBUG) { + p = s; + l = len; + while (l--) { serial_putc(*p); + ++p; } - ++p; } + + kmsg_append(s, len); + + /* + * If the USER_KMSG option is disabled in kconf, + * do not log to the console if everything else + * has already started. + */ + if (!USER_KMSG && no_cons_log) { + return; + } + + cons_putstr(&g_root_scr, s, len); } /* @@ -105,10 +194,42 @@ kprintf(const char *fmt, ...) syslog_write(timestamp, strlen(timestamp)); } - spinlock_acquire(&lock); va_start(ap, fmt); vkprintf(fmt_p, &ap); va_end(ap); - spinlock_release(&lock); } + +/* + * Silence kernel messages in if the system + * is already operating in a user context. + * + * XXX: This is ignored if the kconf USER_KMSG + * option is set to "no". A kmsg device file + * is also created on the first call. + */ +void +syslog_silence(bool option) +{ + static bool once = false; + static char devname[] = "kmsg"; + devmajor_t major; + dev_t dev; + + if (!once) { + once = true; + major = dev_alloc_major(); + dev = dev_alloc(major); + + dev_register(major, dev, &kmsg_cdevw); + devfs_create_entry(devname, major, dev, 0444); + + } + + no_cons_log = option; +} + +static struct cdevsw kmsg_cdevw = { + .read = kmsg_read, + .write = nowrite +}; diff --git a/sys/kern/kern_time.c b/sys/kern/kern_time.c new file mode 100644 index 0000000..e741157 --- /dev/null +++ b/sys/kern/kern_time.c @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2023-2025 Ian Marco Moffett and the Osmora Team. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Hyra nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <sys/time.h> +#include <sys/syscall.h> +#include <sys/systm.h> +#include <sys/errno.h> +#include <sys/cdefs.h> +#include <dev/timer.h> +#include <machine/cdefs.h> + +/* + * arg0: Timespec + * arg1: Remaining timeval + */ +scret_t +sys_sleep(struct syscall_args *scargs) +{ + struct timespec ts; + struct timer tmr; + size_t timeout_msec; + tmrr_status_t status; + int error; + + error = copyin((void *)scargs->arg0, &ts, sizeof(ts)); + if (error < 0) { + return error; + } + + if (ts.tv_nsec >= 1000000000) { + return -EINVAL; + } + + status = req_timer(TIMER_GP, &tmr); + if (__unlikely(status != TMRR_SUCCESS)) { + return -ENOTSUP; + } + if (__unlikely(tmr.msleep == NULL)) { + return -ENOTSUP; + } + + timeout_msec = ts.tv_nsec / 1000000; + timeout_msec += ts.tv_sec * 1000; + tmr.msleep(timeout_msec); + return 0; +} diff --git a/sys/kern/kern_uio.c b/sys/kern/kern_uio.c new file mode 100644 index 0000000..2ec1532 --- /dev/null +++ b/sys/kern/kern_uio.c @@ -0,0 +1,272 @@ +/* + * Copyright (c) 2023-2025 Ian Marco Moffett and the Osmora Team. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Hyra nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/limits.h> +#include <sys/systm.h> +#include <sys/errno.h> +#include <sys/types.h> +#include <sys/uio.h> +#include <sys/filedesc.h> + +/* + * Clean up after a UIO copyin() operation + * + * @iov: iovec copy to clean up + * @iovcnt: Number of iovec entries + */ +void +uio_copyin_clean(struct iovec *iov, int iovcnt) +{ + for (int i = 0; i < iovcnt; ++i) { + if (iov[i].iov_base == NULL) { + continue; + } + + dynfree(iov[i].iov_base); + iov[i].iov_base = NULL; + } +} + +/* + * Read data into POSIX.1‐2017 iovec + * + * @filedes: File descriptor number + * @iov: I/O vector to read file into + * @iovnt: Number of I/O vectors + */ +ssize_t +readv(int filedes, const struct iovec *iov, int iovcnt) +{ + void *base; + size_t len; + ssize_t tmp, bytes_read = 0; + + if (filedes < 0) { + return -EINVAL; + } + + /* + * Make sure that this conforms to our max + * iovec limit. + */ + if (iovcnt > IOVEC_MAX) { + return -EINVAL; + } + + /* + * Go through each I/O vector and read a chunk + * of data into one. + */ + for (int i = 0; i < iovcnt; ++i) { + base = iov[i].iov_base; + len = iov[i].iov_len; + + /* + * If we encounter a base that is NULL, + * or if the length to read is an invalid + * value of zero. We can just assume this + * is some sort of weird list termination? + */ + if (base == NULL || len == 0) { + break; + } + + /* Read the file into this base */ + tmp = fd_read(filedes, base, len); + + /* Did anything go wrong? */ + if (tmp < 0) { + return tmp; + } + + /* No more data */ + if (tmp == 0) { + break; + } + + /* Read more bytes */ + bytes_read += tmp; + } + + return bytes_read; +} + +/* + * Write data from POSIX.1‐2017 iovec + * + * @filedes: File descriptor number + * @iov: I/O vector to write to file + * @iovnt: Number of I/O vectors + */ +ssize_t +writev(int filedes, const struct iovec *iov, int iovcnt) +{ + void *base; + size_t len; + ssize_t bytes_written = 0; + ssize_t tmp; + + if (filedes < 0) { + return -EINVAL; + } + + /* + * Are we within the limits? Return an + * error if not. + */ + if (iovcnt > IOVEC_MAX) { + return -EINVAL; + } + + for (int i = 0; i < iovcnt; ++i) { + base = iov[i].iov_base; + len = iov[i].iov_len; + + /* + * These are invalid, whatever these are, + * terminate our walk through. + */ + if (base == NULL || len == 0) { + break; + } + + /* Write the data from the iovec */ + tmp = fd_write(filedes, base, len); + + /* Was there an error? */ + if (tmp < 0) { + return tmp; + } + + /* No more data to read? */ + if (tmp == 0) { + break; + } + + bytes_written += tmp; + } + + return bytes_written; +} + +/* + * Validate iovecs coming in from userland + * and copy it to a kernel buffer. + * + * XXX: A new buffer is allocated in k_iov[i]->iov_base + * and must be freed with dynfree() after use. + * + * @u_iov: Userspace source iovecs + * @k_iov: Kernel destination iovec + * @iovcnt: Number of iovecs to copy + */ +int +uio_copyin(const struct iovec *u_iov, struct iovec *k_iov, int iovcnt) +{ + struct iovec *iov_dest; + const struct iovec *iov_src; + size_t len; + void *old_base; + int error; + + if (u_iov == NULL || k_iov == NULL) { + return -EINVAL; + } + + for (int i = 0; i < iovcnt; ++i) { + iov_dest = &k_iov[i]; + iov_src = &u_iov[i]; + error = copyin(iov_src, iov_dest, sizeof(*iov_dest)); + + if (error < 0) { + uio_copyin_clean(iov_dest, i + 1); + return error; + } + + /* + * Save the old base so that we may copy the data to + * the new kernel buffer. First we'd need to allocate + * one of course. + */ + old_base = iov_dest->iov_base; + len = iov_dest->iov_len; + iov_dest->iov_base = dynalloc(len); + + /* Did it fail? */ + if (iov_dest->iov_base == NULL) { + uio_copyin_clean(iov_dest, i + 1); + return -ENOMEM; + } + + /* Copy actual data in */ + error = copyin(old_base, iov_dest->iov_base, len); + if (error < 0) { + uio_copyin_clean(iov_dest, i + 1); + return error; + } + } + + return 0; +} + + +/* + * Validate iovecs going out from kernel space (us) + * before actually sending it out. + * + * @k_iov: Kernel iovec to copyout + * @u_iov: Userspace destination + * @iovcnt: Number of iovecs + */ +int +uio_copyout(const struct iovec *k_iov, struct iovec *u_iov, int iovcnt) +{ + struct iovec iov_shadow, *iov_dest; + const struct iovec *iov_src; + int error; + + for (int i = 0; i < iovcnt; ++i) { + iov_dest = &u_iov[i]; + iov_src = &k_iov[i]; + + /* Grab a shadow copy */ + error = copyin(iov_src, &iov_shadow, sizeof(iov_shadow)); + if (error < 0) { + return error; + } + + /* Copy out actual data */ + error = copyout(iov_src->iov_base, iov_dest->iov_base, iov_dest->iov_len); + if (error < 0) { + return error; + } + } + + return 0; +} diff --git a/sys/kern/kern_vsr.c b/sys/kern/kern_vsr.c new file mode 100644 index 0000000..c59be1e --- /dev/null +++ b/sys/kern/kern_vsr.c @@ -0,0 +1,344 @@ +/* + * Copyright (c) 2023-2025 Ian Marco Moffett and the Osmora Team. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Hyra nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/vsr.h> +#include <sys/proc.h> +#include <sys/param.h> +#include <sys/limits.h> +#include <sys/syslog.h> +#include <vm/dynalloc.h> +#include <string.h> + +#define pr_trace(fmt, ...) kprintf("vsr: " fmt, ##__VA_ARGS__) +#define pr_error(...) pr_trace(__VA_ARGS__) + +static uint32_t +fnv1_hash(const char *s) +{ + uint32_t hash = 2166136261UL; + const uint8_t *p = (uint8_t *)s; + + while (*p != '\0') { + hash ^= *p; + hash = hash * 0x01000193; + ++p; + } + + return hash; +} + +/* + * Add a VSR capsule to a domain. + */ +static void +vsr_domain_add(struct vsr_domain *vsp, struct vsr_capsule *cap) +{ + struct vsr_table *tab; + struct vsr_capsule **slot; + uint32_t hash; + + if (vsp == NULL || cap == NULL) { + return; + } + + if (cap->name == NULL) { + pr_error("vsr_domain_add: cap->name == NULL\n"); + return; + } + + tab = &vsp->table; + hash = fnv1_hash(cap->name); + slot = &tab->capsules[hash % VSR_MAX_CAPSULE]; + + /* If this slot is free, set it */ + if (*slot == NULL) { + *slot = cap; + return; + } + + /* Handle collision */ + TAILQ_INSERT_TAIL(&(*slot)->buckets, cap, link); +} + +/* + * Handle VSR domain hashmap collisions. + * + * @slot: Slot that we have collided with + * @name: Name to lookup + * + * Returns the pointer to the actual capsule if the + * collision has been resolved, otherwise, NULL if the + * entry to look up was not found. + */ +static struct vsr_capsule * +vsr_domain_clash(struct vsr_capsule *slot, const char *name) +{ + struct vsr_capsule *cap_ent; + + TAILQ_FOREACH(cap_ent, &slot->buckets, link) { + if (cap_ent == NULL) { + continue; + } + + if (strcmp(cap_ent->name, name) == 0) { + return cap_ent; + } + } + + return NULL; +} + +/* + * Lookup a capsule within a VSR domain + * by name. + * + * @vsp: Domain to lookup within + * @name: Name to use as lookup key + * + * Returns NULL if no entry was found. + */ +static struct vsr_capsule * +vfs_domain_lookup(struct vsr_domain *vsp, const char *name) +{ + uint32_t hash; + struct vsr_table *tab; + struct vsr_capsule **slot; + + if (vsp == NULL || name == NULL) { + return NULL; + } + + tab = &vsp->table; + hash = fnv1_hash(name); + slot = &tab->capsules[hash % VSR_MAX_CAPSULE]; + + if (*slot == NULL) { + return NULL; + } + + if (strcmp((*slot)->name, name) != 0) { + return vsr_domain_clash(*slot, name); + } + + return *slot; +} + +/* + * Destroy a VSR capsule + * + * @capule: Capsule to destroy + */ +static void +vsr_destroy_capsule(struct vsr_capsule *capsule) +{ + struct vsr_capsule *bucket; + struct capsule_ops *ops; + + if (capsule->name != NULL) { + dynfree(capsule->name); + capsule->name = NULL; + } + + ops = &capsule->ops; + if (ops->reclaim != NULL) { + ops->reclaim(capsule, 0); + } + + TAILQ_FOREACH(bucket, &capsule->buckets, link) { + if (bucket == NULL) { + continue; + } + vsr_destroy_capsule(bucket); + } + + /* Release any held locks */ + mutex_release(&capsule->lock); +} + +/* + * Destroy a VSR table + * + * @tab: Table to destroy. + */ +static void +vsr_destroy_table(struct vsr_table *tab) +{ + struct vsr_capsule *capsule; + + if (tab == NULL) { + pr_error("vsr_destroy_table: tab is NULL\n"); + return; + } + + for (int i = 0; i < VSR_MAX_CAPSULE; ++i) { + if ((capsule = tab->capsules[i]) == NULL) { + continue; + } + + vsr_destroy_capsule(capsule); + } +} + +/* + * Allocate a new VSR capsule and add it to + * VSR domain. + * + * @type: Domain type (e.g., VSR_FILE) + * @name: Capsule name (e.g., "mod0.data") + * @sz: Length of capsulized data + */ +struct vsr_capsule * +vsr_new_capsule(struct proc *td, vsr_domain_t type, const char *name) +{ + struct vsr_capsule *capsule; + struct vsr_domain *domain; + + /* Valid args? */ + if (type >= VSR_MAX_DOMAIN || td == NULL) { + return NULL; + } + + /* + * The VSR domain must be registered for + * us to add any capsules to it. + */ + if ((domain = td->vsr_tab[type]) == NULL) { + pr_error("VSR domain %d not registered\n", type); + return NULL; + } + + /* Allocate a new capsule */ + capsule = dynalloc(sizeof(*capsule)); + if (capsule == NULL) { + return NULL; + } + + memset(capsule, 0, sizeof(*capsule)); + capsule->name = strdup(name); + + TAILQ_INIT(&capsule->buckets); + vsr_domain_add(domain, capsule); + return capsule; +} + +/* + * Allocate a new VSR domain and add it to + * a specific process. + * + * @type: VSR type (e.g., VSR_FILE) + */ +struct vsr_domain * +vsr_new_domain(struct proc *td, vsr_domain_t type) +{ + struct vsr_domain *domain; + + /* Valid args? */ + if (type >= VSR_MAX_DOMAIN || td == NULL) { + return NULL; + } + + /* + * Do not overwrite the entry if it is + * already allocated and log this anomalous + * activity. + */ + if (td->vsr_tab[type] != NULL) { + pr_error("[security]: type %d already allocated\n", type); + return NULL; + } + + domain = dynalloc(sizeof(*domain)); + if (domain == NULL) { + return NULL; + } + + /* Initialize the domain */ + memset(domain, 0, sizeof(*domain)); + domain->type = type; + + td->vsr_tab[type] = domain; + return domain; +} + +/* + * Lookup a capsule by name for the current + * process. + */ +struct vsr_capsule * +vsr_lookup_capsule(struct proc *td, vsr_domain_t type, const char *name) +{ + struct vsr_domain *domain; + + if (td == NULL) { + return NULL; + } + + /* + * The VSR domain must be registered for + * us to lookup any capsules from it. + */ + if ((domain = td->vsr_tab[type]) == NULL) { + pr_error("VSR domain %d not registered\n", type); + return NULL; + } + + return vfs_domain_lookup(domain, name); +} + +/* + * Initialize per-process domains + */ +void +vsr_init_domains(struct proc *td) +{ + if (vsr_new_domain(td, VSR_FILE) == NULL) { + pr_error("failed to initialize VSR file domain\n"); + } +} + +/* + * Destroy per-process domains + */ +void +vsr_destroy_domains(struct proc *td) +{ + struct vsr_domain *domain; + + if (td == NULL) { + return; + } + + for (int i = 0; i < VSR_MAX_DOMAIN; ++i) { + if ((domain = td->vsr_tab[i]) == NULL) { + continue; + } + + vsr_destroy_table(&domain->table); + } +} diff --git a/sys/kern/kern_work.c b/sys/kern/kern_work.c new file mode 100644 index 0000000..918af89 --- /dev/null +++ b/sys/kern/kern_work.c @@ -0,0 +1,274 @@ +/* + * Copyright (c) 2023-2025 Ian Marco Moffett and the Osmora Team. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Hyra nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <sys/errno.h> +#include <sys/panic.h> +#include <sys/proc.h> +#include <sys/sched.h> +#include <sys/syslog.h> +#include <sys/workqueue.h> +#include <vm/dynalloc.h> +#include <string.h> + +#define pr_trace(fmt, ...) kprintf("workq: " fmt, ##__VA_ARGS__) +#define pr_error(...) pr_trace(__VA_ARGS__) + +extern struct proc g_proc0; + +/* + * The workqueue cookie value that is used for + * verifying if a workqueue object is properly + * set up or not. + */ +#define WQ_COOKIE 0xFC0B + +/* + * A worker services work in the queue + * and there is one per workqueue. + */ +static void +workqueue_worker(void) +{ + struct proc *td; + struct workqueue *wqp; + struct work *wp; + + td = this_td(); + if ((wqp = td->data) == NULL) { + panic("no workqueue in thread\n"); + } + + /* + * Weird things can happen, just be careful + * here... + */ + if (wqp->cookie != WQ_COOKIE) { + panic("bad WQ_COOKIE in worker\n"); + } + + for (;;) { + mutex_acquire(wqp->lock, 0); + wp = TAILQ_FIRST(&wqp->work); + + /* Try again later if empty */ + if (wp == NULL) { + mutex_release(wqp->lock); + sched_yield(); + continue; + } + + wp->func(wqp, wp); + TAILQ_REMOVE(&wqp->work, wp, link); + + /* + * Decrement the amount of work that is + * left to get done. Check for underflows + * which should not happen unless something + * clobbers the fields. + */ + if ((--wqp->nwork) < 0) { + panic("wqp nwork underflow\n"); + } + + mutex_release(wqp->lock); + sched_yield(); + } +} + +/* + * Allocates a new work queue that may be used + * to hold queued up tasks. + * + * @name: Name to give the workqueue + * @max_work: Maximum number of jobs to be added + * @ipl: IPL that the work must operate in + * + * Returns a pointer to the new workqueue on success, + * otherwise a value of NULL is returned. + */ +struct workqueue * +workqueue_new(const char *name, size_t max_work, int ipl) +{ + struct workqueue *wqp; + struct proc *td; + + td = this_td(); + if (__unlikely(td == NULL)) { + pr_error("no thread in workqueue_new()\n"); + return NULL; + } + + wqp = dynalloc(sizeof(*wqp)); + if (wqp == NULL) { + return NULL; + } + + wqp->name = strdup(name); + TAILQ_INIT(&wqp->work); + wqp->ipl = ipl; + wqp->max_work = max_work; + wqp->nwork = 0; + wqp->cookie = WQ_COOKIE; + wqp->lock = mutex_new(wqp->name); + + /* + * We need to spawn the work thread which + * is behind the management of this specific + * workqueue. It typically does something like + * dequeuing at the head of the workqueue, performing + * the work, cleaning up as needed and dequeuing the + * next and waiting if there are none yet. + */ + spawn( + &g_proc0, workqueue_worker, + wqp, 0, + &wqp->worktd + ); + + return wqp; +} + +/* + * Enqueue a work item onto a specific + * workqueue. + * + * @wqp: Pointer to specific workqueue + * @name: Name to set for work unit + * @wp: Pointer to work that should be enqueued + * + * Returns zero on success, otherwise a less than + * zero value is returned. + */ +int +workqueue_enq(struct workqueue *wqp, const char *name, struct work *wp) +{ + if (wqp == NULL || wp == NULL) { + return -EINVAL; + } + + if (name == NULL) { + return -EINVAL; + } + + /* Verify that we have a valid workqueue */ + if (__unlikely(wqp->cookie != WQ_COOKIE)) { + panic("workq: bad cookie on work enqueue\n"); + } + + wp->name = strdup(name); + mutex_acquire(wqp->lock, 0); + + /* + * If we have reached the max amount of jobs + * that we can enqueue here, just log it and + * bail. + */ + if (wqp->nwork >= wqp->max_work) { + pr_error("max jobs reached for '%s'\n", wqp->name); + mutex_release(wqp->lock); + return -EAGAIN; + } + + TAILQ_INSERT_TAIL(&wqp->work, wp, link); + ++wqp->nwork; + mutex_release(wqp->lock); + return 0; +} + +/* + * Destroy a workqueue and free resources + * associated with it. + * + * @wqp: Pointer to workqueue to destroy + * + * Returns zero on success, otherwise a less + * than zero value is returned. + */ +int +workqueue_destroy(struct workqueue *wqp) +{ + if (wqp == NULL) { + return -EINVAL; + } + + /* Should not happen but just make sure */ + if (__unlikely(wqp->cookie != WQ_COOKIE)) { + panic("workq: bad cookie on destroy\n"); + } + + /* Free the name if we have it */ + if (wqp->name != NULL) { + dynfree(wqp->name); + } + + if (wqp->lock != NULL) { + mutex_free(wqp->lock); + } + + /* Brutally murder any workthreads */ + if (wqp->worktd != NULL) { + exit1(wqp->worktd, 0); + wqp->worktd = NULL; + } + + /* + * Zero before we free for security reasons, we + * don't really know what will be queued up but + * for certain things, it is best if we make it + * as if it never existed in the first place. + * + * XXX: There is no need to free the workqueue here as + * we had to pass it to spawn() to run the worker. + * + * During an exit, spawn() will free the thread data + * meaning this is already cleaned up. + */ + memset(wqp, 0, sizeof(*wqp)); + return 0; +} + +/* + * Cleanup after work + * + * @wp: Work to clean up + */ +int +work_destroy(struct work *wp) +{ + if (wp == NULL) { + return -EINVAL; + } + + if (wp->name != NULL) { + dynfree(wp->name); + } + + return 0; +} diff --git a/sys/kern/vfs_init.c b/sys/kern/vfs_init.c index 8c1bc74..bc7f8b0 100644 --- a/sys/kern/vfs_init.c +++ b/sys/kern/vfs_init.c @@ -37,7 +37,8 @@ struct vnode *g_root_vnode = NULL; static struct fs_info fs_list[] = { {MOUNT_RAMFS, &g_initramfs_vfsops, 0, 0}, {MOUNT_DEVFS, &g_devfs_vfsops, 0, 0}, - {MOUNT_CTLFS, &g_ctlfs_vfsops, 0, 0} + {MOUNT_CTLFS, &g_ctlfs_vfsops, 0, 0}, + {MOUNT_TMPFS, &g_tmpfs_vfsops, 0, 0} }; void diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c index d04b812..7320102 100644 --- a/sys/kern/vfs_lookup.c +++ b/sys/kern/vfs_lookup.c @@ -29,6 +29,7 @@ #include <sys/namei.h> #include <sys/vnode.h> +#include <sys/param.h> #include <sys/mount.h> #include <sys/errno.h> #include <vm/dynalloc.h> @@ -118,20 +119,60 @@ vfs_get_fname_at(const char *path, size_t idx) } /* + * Count the number of components that exist within + * a path minus the delimiter as well as any redundant + * delimiters. + * + * @path: Path to count + */ +static uint8_t +namei_num_cnp(const char *path) +{ + const char *p = path; + uint8_t count = 0; + + while (*p != '\0') { + /* Skip redundant delimiters */ + if (p[0] == '/' && p[1] == '/') { + ++p; + continue; + } + + if (*p == '/') { + ++count; + } + ++p; + } + + /* Don't count leading slash */ + if (*(p - 1) == '/') { + --count; + } + + return count; +} + +/* * Search for a path within a mountpoint. * * @mp: Mountpoint to search in. * @path: Path to search for. + * @ndp: Namei data pointer */ static struct vnode * -namei_mp_search(struct mount *mp, const char *path) +namei_mp_search(struct mount *mp, const char *path, struct nameidata *ndp) { struct vop_lookup_args lookup_args; struct vnode *vp = mp->vp; + uint8_t n_cnp = 0; char *name; int status; - for (size_t i = 1;; ++i) { + n_cnp = namei_num_cnp(path); + if (ISSET(ndp->flags, NAMEI_WANTPARENT)) { + --n_cnp; + } + for (size_t i = 1; i < n_cnp; ++i) { name = vfs_get_fname_at(path, i); if (name == NULL) break; @@ -140,7 +181,7 @@ namei_mp_search(struct mount *mp, const char *path) lookup_args.dirvp = vp; lookup_args.vpp = &vp; - status = vfs_vop_lookup(vp, &lookup_args); + status = vfs_vop_lookup(&lookup_args); dynfree(name); if (status != 0) { @@ -193,7 +234,7 @@ namei(struct nameidata *ndp) lookup_args.name = path; lookup_args.dirvp = g_root_vnode; lookup_args.vpp = &vp; - status = vfs_vop_lookup(lookup_args.dirvp, &lookup_args); + status = vfs_vop_lookup(&lookup_args); /* Did we find it in the root */ if (status == 0) { @@ -212,7 +253,7 @@ namei(struct nameidata *ndp) /* If the name matches, search within */ if (strcmp(mp->name, name) == 0) - vp = namei_mp_search(mp, path); + vp = namei_mp_search(mp, path, ndp); /* Did we find it at this mountpoint? */ if (vp != NULL) { diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index da0a4f9..69417d0 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -141,8 +141,9 @@ vfs_release_vnode(struct vnode *vp) } int -vfs_vop_lookup(struct vnode *vp, struct vop_lookup_args *args) +vfs_vop_lookup(struct vop_lookup_args *args) { + const struct vnode *vp = args->dirvp; const struct vops *vops = vp->vops; if (vops == NULL) @@ -180,8 +181,9 @@ vfs_vop_write(struct vnode *vp, struct sio_txn *sio) } int -vfs_vop_getattr(struct vnode *vp, struct vop_getattr_args *args) +vfs_vop_getattr(struct vop_getattr_args *args) { + const struct vnode *vp = args->vp; const struct vops *vops = vp->vops; if (vops == NULL) diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index 6f2d683..d15ecf1 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -43,7 +43,7 @@ static int vfs_dostat(const char *path, struct stat *sbuf) { char pathbuf[PATH_MAX]; - struct vattr *attr; + struct vattr attr; struct stat st; struct vnode *vp; struct vop_getattr_args gattr; @@ -54,11 +54,11 @@ vfs_dostat(const char *path, struct stat *sbuf) return -EINVAL; } - if ((copyinstr(path, pathbuf, sizeof(path))) < 0) { + if ((copyinstr(path, pathbuf, sizeof(pathbuf))) < 0) { return -EFAULT; } - nd.path = path; + nd.path = pathbuf; nd.flags = 0; if ((error = namei(&nd)) != 0) { @@ -67,19 +67,42 @@ vfs_dostat(const char *path, struct stat *sbuf) vp = nd.vp; gattr.vp = vp; - error = vfs_vop_getattr(vp, &gattr); + gattr.res = &attr; + error = vfs_vop_getattr(&gattr); if (error != 0) { return error; } - attr = gattr.res; memset(&st, VNOVAL, sizeof(st)); /* Copy stat data to userspace statbuf */ - st.st_mode = attr->mode; - st.st_size = attr->size; + st.st_mode = attr.mode; + st.st_size = attr.size; copyout(&st, sbuf, sizeof(*sbuf)); + vfs_release_vnode(vp); + return 0; +} + +static int +vfs_doaccess(const char *path) +{ + struct nameidata nd; + char pathbuf[PATH_MAX]; + int error; + + if ((copyinstr(path, pathbuf, sizeof(pathbuf))) < 0) { + return -EFAULT; + } + + nd.path = pathbuf; + nd.flags = 0; + + if ((error = namei(&nd)) != 0) { + return error; + } + + vfs_release_vnode(nd.vp); return 0; } @@ -149,3 +172,14 @@ sys_stat(struct syscall_args *scargs) { return vfs_dostat((const char *)scargs->arg0, (void *)scargs->arg1); } + +/* + * Check if a file can be accessed. + * + * @arg0: path + */ +scret_t +sys_access(struct syscall_args *scargs) +{ + return vfs_doaccess((const char *)scargs->arg0); +} diff --git a/sys/kern/vfs_vcache.c b/sys/kern/vfs_vcache.c index 25e244c..6c08caf 100644 --- a/sys/kern/vfs_vcache.c +++ b/sys/kern/vfs_vcache.c @@ -161,7 +161,7 @@ vfs_vcache_migrate(int newtype) args.oldp = NULL; args.oldlenp = NULL; args.newp = sysctl_val; - args.newlen = strlen(sysctl_val); + args.newlen = strlen(sysctl_val) + 1; if ((retval = sysctl(&args)) != 0) { return retval; |