summaryrefslogtreecommitdiff
path: root/sys/kern
diff options
context:
space:
mode:
Diffstat (limited to 'sys/kern')
-rw-r--r--sys/kern/disk_engine.c208
-rw-r--r--sys/kern/driver_blacklist.c170
-rw-r--r--sys/kern/driver_subr.c76
-rw-r--r--sys/kern/exec_elf64.c36
-rw-r--r--sys/kern/init_main.c39
-rw-r--r--sys/kern/kern_accnt.c128
-rw-r--r--sys/kern/kern_cpu.c61
-rw-r--r--sys/kern/kern_cred.c87
-rw-r--r--sys/kern/kern_descrip.c147
-rw-r--r--sys/kern/kern_disk.c475
-rw-r--r--sys/kern/kern_exec.c4
-rw-r--r--sys/kern/kern_exit.c115
-rw-r--r--sys/kern/kern_fork.c33
-rw-r--r--sys/kern/kern_krq.c61
-rw-r--r--sys/kern/kern_panic.c60
-rw-r--r--sys/kern/kern_proc.c143
-rw-r--r--sys/kern/kern_sched.c285
-rw-r--r--sys/kern/kern_sig.c6
-rw-r--r--sys/kern/kern_socket.c1009
-rw-r--r--sys/kern/kern_spawn.c155
-rw-r--r--sys/kern/kern_stub.c36
-rw-r--r--sys/kern/kern_subr.c21
-rw-r--r--sys/kern/kern_synch.c69
-rw-r--r--sys/kern/kern_syscall.c28
-rw-r--r--sys/kern/kern_sysctl.c81
-rw-r--r--sys/kern/kern_syslog.c137
-rw-r--r--sys/kern/kern_time.c73
-rw-r--r--sys/kern/kern_uio.c272
-rw-r--r--sys/kern/kern_vsr.c344
-rw-r--r--sys/kern/kern_work.c274
-rw-r--r--sys/kern/vfs_init.c3
-rw-r--r--sys/kern/vfs_lookup.c51
-rw-r--r--sys/kern/vfs_subr.c6
-rw-r--r--sys/kern/vfs_syscalls.c46
-rw-r--r--sys/kern/vfs_vcache.c2
35 files changed, 4519 insertions, 222 deletions
diff --git a/sys/kern/disk_engine.c b/sys/kern/disk_engine.c
new file mode 100644
index 0000000..1061165
--- /dev/null
+++ b/sys/kern/disk_engine.c
@@ -0,0 +1,208 @@
+/*
+ * Copyright (c) 2023-2025 Ian Marco Moffett and the Osmora Team.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of Hyra nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/syscall.h>
+#include <sys/syslog.h>
+#include <sys/systm.h>
+#include <sys/disk.h>
+#include <vm/dynalloc.h>
+
+#define pr_trace(fmt, ...) kprintf("disk: " fmt, ##__VA_ARGS__)
+#define pr_error(...) pr_trace(__VA_ARGS__)
+
+/*
+ * Clones a disk parameter structure passed
+ * by a user. The structure returned is safe
+ * to be accessed freely by the kernel.
+ *
+ * @u_param: Contains user-side pointer
+ * @res: Resulting safe data
+ *
+ * Returns zero on success, otherwise a less than
+ * zero value is returned.
+ */
+static int
+disk_param_clone(struct disk_param *u_param, struct disk_param *res)
+{
+ void *data;
+ int error;
+
+ if (u_param == NULL) {
+ pr_error("disk_param_clone: got NULL u_param\n");
+ return -EINVAL;
+ }
+
+ error = copyin(u_param, res, sizeof(*res));
+ if (error < 0) {
+ return error;
+ }
+
+ /*
+ * If these parameters do not have a valid cookie, fuck
+ * that object, something is not right with it...
+ */
+ if (res->cookie != DISK_PARAM_COOKIE) {
+ pr_error("disk_param_clone: erroneous params (bad cookie)\n");
+ return -EACCES;
+ }
+
+ data = dynalloc(res->size);
+ if (data == NULL) {
+ pr_error("disk_param_clone: out of memory\n");
+ return -ENOMEM;
+ }
+
+ error = copyin(res->buf, data, res->size);
+ if (error < 0) {
+ pr_error("failed to copy in param data\n");
+ dynfree(data);
+ return error;
+ }
+
+ res->u_buf = res->buf;
+ res->buf = data;
+ return 0;
+}
+
+/*
+ * Deallocate a kernel managed disk parameter
+ * structure created by disk_param_clone()
+ *
+ * @param: Params to free
+ *
+ * Returns zero on success, otherwise a less than
+ * zero value is returned.
+ */
+static int
+disk_param_free(struct disk_param *param)
+{
+ if (param == NULL) {
+ return -EINVAL;
+ }
+
+ if (param->cookie != DISK_PARAM_COOKIE) {
+ return -EACCES;
+ }
+
+ dynfree(param->buf);
+ return 0;
+}
+
+/*
+ * Perform an operation on a disk.
+ *
+ * @id: ID of disk to operate on
+ * @opcode: Operation to perform (see DISK_IO_*)
+ * @u_param: User side disk parameters
+ *
+ * Returns a less than zero value on error
+ */
+static ssize_t
+disk_mux_io(diskid_t id, diskop_t opcode, struct disk_param *u_param)
+{
+ struct disk_param param;
+ struct disk *dp;
+ ssize_t retval = -EIO;
+ int error;
+
+ if (u_param == NULL) {
+ return -EINVAL;
+ }
+
+ error = disk_param_clone(u_param, &param);
+ if (error < 0) {
+ return error;
+ }
+
+ /* First, attempt to acquire the disk */
+ error = disk_get_id(id, &dp);
+ if (error < 0) {
+ pr_error("disk_mux_io: no such device (id=%d)\n", id);
+ return error;
+ }
+
+ switch (opcode) {
+ case DISK_IO_READ:
+ retval = disk_read(
+ id,
+ param.blk,
+ param.buf,
+ param.size
+ );
+
+ /* Write back the data to the user program */
+ error = copyout(param.buf, param.u_buf, param.size);
+ if (error < 0) {
+ retval = error;
+ }
+ break;
+ case DISK_IO_WRITE:
+ retval = disk_write(
+ id,
+ param.blk,
+ param.buf,
+ param.size
+ );
+ break;
+ case DISK_IO_QUERY:
+ retval = disk_query(
+ id,
+ param.buf
+ );
+
+ /* Write back info to user program */
+ error = copyout(param.buf, param.u_buf, param.size);
+ if (error < 0) {
+ retval = error;
+ }
+ break;
+ }
+
+ disk_param_free(&param);
+ return retval;
+}
+
+/*
+ * Disk I/O multiplexer syscall
+ *
+ * arg0: disk id
+ * arg1: opcode
+ * arg2: disk params
+ */
+scret_t
+sys_disk(struct syscall_args *scargs)
+{
+ struct disk_param *u_param = (void *)scargs->arg2;
+ diskid_t id = scargs->arg0;
+ diskop_t opcode = scargs->arg1;
+
+ return disk_mux_io(id, opcode, u_param);
+}
diff --git a/sys/kern/driver_blacklist.c b/sys/kern/driver_blacklist.c
new file mode 100644
index 0000000..982d5c9
--- /dev/null
+++ b/sys/kern/driver_blacklist.c
@@ -0,0 +1,170 @@
+/*
+ * Copyright (c) 2023-2025 Ian Marco Moffett and the Osmora Team.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of Hyra nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/queue.h>
+#include <sys/driver.h>
+#include <vm/dynalloc.h>
+#include <string.h>
+
+#define BLACKLIST_SIZE 64
+
+/*
+ * A driver blacklist entry
+ *
+ * @name: Name of driver to be blacklisted
+ * @buckets: To handle collisions
+ */
+struct blacklist_entry {
+ char *name;
+ TAILQ_ENTRY(blacklist_entry) link;
+ TAILQ_HEAD(, blacklist_entry) buckets;
+};
+
+static struct blacklist_entry blacklist[BLACKLIST_SIZE];
+
+static uint32_t
+fnv1_hash(const char *s)
+{
+ uint32_t hash = 2166136261UL;
+ const uint8_t *p = (uint8_t *)s;
+
+ while (*p != '\0') {
+ hash ^= *p;
+ hash = hash * 0x01000193;
+ ++p;
+ }
+
+ return hash;
+}
+
+/*
+ * Returns a bucket in case of collision
+ */
+static struct blacklist_entry *
+blacklist_collide(struct blacklist_entry *entp, const char *name)
+{
+ struct blacklist_entry *tmp;
+
+ if (entp->name == NULL) {
+ return NULL;
+ }
+
+ TAILQ_FOREACH(tmp, &entp->buckets, link) {
+ if (strcmp(name, tmp->name) == 0) {
+ return tmp;
+ }
+ }
+
+ return NULL;
+}
+
+/*
+ * Mark a driver to be ignored during startup.
+ * Blacklisted drivers will not be ran.
+ *
+ * @name: Name of driver (e.g., 'ahci')
+ */
+int
+driver_blacklist(const char *name)
+{
+ struct blacklist_entry *ent;
+ struct blacklist_entry *bucket;
+ size_t name_len;
+ uint32_t hash;
+
+ if (name == NULL) {
+ return -EINVAL;
+ }
+
+ hash = fnv1_hash(name);
+ ent = &blacklist[hash % BLACKLIST_SIZE];
+ if (ent->name != NULL) {
+ bucket = dynalloc(sizeof(*bucket));
+ if (bucket == NULL) {
+ return -EINVAL;
+ }
+ TAILQ_INSERT_TAIL(&ent->buckets, bucket, link);
+ return 0;
+ }
+
+ name_len = strlen(name);
+ ent->name = dynalloc(name_len + 1);
+ if (ent->name == NULL) {
+ return -ENOMEM;
+ }
+ memcpy(ent->name, name, name_len + 1);
+ return 0;
+}
+
+/*
+ * Checks if a driver name is in the blacklist.
+ * Returns 0 if not, otherwise 1.
+ */
+int
+driver_blacklist_check(const char *name)
+{
+ struct blacklist_entry *ent;
+ uint32_t hash;
+
+ if (name == NULL) {
+ return -EINVAL;
+ }
+
+ hash = fnv1_hash(name);
+ ent = &blacklist[hash % BLACKLIST_SIZE];
+ if (ent->name == NULL) {
+ return 0;
+ }
+
+ if (strcmp(ent->name, name) == 0) {
+ return 1;
+ }
+
+ ent = blacklist_collide(ent, name);
+ if (ent != NULL) {
+ return 1;
+ }
+
+ return 0;
+}
+
+/*
+ * Initialize each entry in the driver
+ * blacklist
+ */
+void
+driver_blacklist_init(void)
+{
+ for (size_t i = 0; i < BLACKLIST_SIZE; ++i) {
+ blacklist[i].name = NULL;
+ TAILQ_INIT(&blacklist[i].buckets);
+ }
+}
diff --git a/sys/kern/driver_subr.c b/sys/kern/driver_subr.c
new file mode 100644
index 0000000..a0f9f73
--- /dev/null
+++ b/sys/kern/driver_subr.c
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2023-2025 Ian Marco Moffett and the Osmora Team.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of Hyra nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/driver.h>
+#include <sys/proc.h>
+#include <sys/cdefs.h>
+#include <sys/syslog.h>
+#include <sys/panic.h>
+#include <dev/timer.h>
+#include <machine/sync.h>
+
+/*
+ * Initialize early drivers
+ *
+ * XXX: This should *NOT* be called directly,
+ * use DRIVERS_SCHED() instead.
+ */
+void
+__driver_init_td(void)
+{
+ const struct driver *dp;
+ struct driver_var *var;
+ struct proc *td;
+ uintptr_t start, end;
+
+ td = this_td();
+ start = (uintptr_t)__driversd_init_start;
+ end = (uintptr_t)__driversd_init_end;
+
+ for (dp = (void *)start; (uintptr_t)dp < end; ++dp) {
+ var = dp->data;
+
+ /*
+ * Check the blacklist to see if this driver
+ * is marked to be ignored. If so, just continue
+ * to the next.
+ */
+ if (driver_blacklist_check(dp->name)) {
+ continue;
+ }
+
+ if (var->deferred) {
+ dp->init();
+ var->deferred = 0;
+ }
+ }
+
+ exit1(td, 0);
+ __builtin_unreachable();
+}
diff --git a/sys/kern/exec_elf64.c b/sys/kern/exec_elf64.c
index 3767b0b..8dc87dc 100644
--- a/sys/kern/exec_elf64.c
+++ b/sys/kern/exec_elf64.c
@@ -49,11 +49,43 @@
#define PHDR(HDRP, IDX) \
(void *)((uintptr_t)HDRP + (HDRP)->e_phoff + (HDRP->e_phentsize * IDX))
+#define SHDR(HDRP, IDX) \
+ (void *)((uintptr_t)HDRP + (HDRP)->e_shoff + (HDRP->e_shentsize * IDX))
+
struct elf_file {
char *data;
size_t size;
};
+static int
+elf_parse_shdrs(Elf64_Ehdr *eh)
+{
+ Elf64_Shdr *shp;
+ uint32_t nshdr;
+
+ if (eh == NULL) {
+ return -EINVAL;
+ }
+
+ nshdr = eh->e_shnum;
+ for (uint32_t i = 0; i < nshdr; ++i) {
+ shp = SHDR(eh, i);
+
+ /* Drop null entries */
+ if (shp->sh_type == SHT_NULL) {
+ continue;
+ }
+
+ switch (shp->sh_type) {
+ case SHT_NOBITS:
+ memset((void *)shp->sh_addr, 0x0, shp->sh_size);
+ break;
+ }
+ }
+
+ return 0;
+}
+
/*
* Load the file and give back an "elf_file"
* structure.
@@ -80,7 +112,7 @@ elf_get_file(const char *pathname, struct elf_file *res)
getattr_args.res = &vattr;
getattr_args.vp = vp;
- status = vfs_vop_getattr(vp, &getattr_args);
+ status = vfs_vop_getattr(&getattr_args);
if (status != 0)
goto done;
@@ -192,6 +224,7 @@ elf64_load(const char *pathname, struct proc *td, struct exec_prog *prog)
if ((status = elf64_verify(hdr)) != 0)
goto done;
+ memset(loadmap, 0, sizeof(loadmap));
pcbp = &td->pcb;
start = -1;
end = 0;
@@ -242,6 +275,7 @@ elf64_load(const char *pathname, struct proc *td, struct exec_prog *prog)
}
}
+ elf_parse_shdrs(hdr);
memcpy(prog->loadmap, loadmap, sizeof(loadmap));
prog->start = start;
prog->end = end;
diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c
index 670d68c..4a0f7a8 100644
--- a/sys/kern/init_main.c
+++ b/sys/kern/init_main.c
@@ -35,14 +35,26 @@
#include <sys/exec.h>
#include <sys/driver.h>
#include <sys/panic.h>
+#include <sys/sysctl.h>
+#include <sys/systm.h>
+#include <dev/acpi/uacpi.h>
#include <dev/cons/cons.h>
#include <dev/acpi/acpi.h>
#include <machine/cpu.h>
#include <machine/cdefs.h>
#include <vm/vm.h>
+#include <vm/stat.h>
#include <string.h>
-static struct proc proc0;
+#define _START_PATH "/usr/sbin/init"
+#if defined(_INSTALL_MEDIA)
+#define _START_ARG "/usr/sbin/install"
+#else
+#define _START_ARG NULL
+#endif /* _INSTALL_MEDIA */
+
+struct proc g_proc0;
+struct proc *g_init;
static void
copyright(void)
@@ -56,9 +68,10 @@ start_init(void)
{
struct proc *td = this_td();
struct execve_args execve_args;
- char *argv[] = { "/usr/bin/osh", NULL };
+ char *argv[] = { _START_PATH, _START_ARG, NULL };
char *envp[] = { NULL };
+ kprintf("starting init...\n");
execve_args.pathname = argv[0];
execve_args.argv = argv;
execve_args.envp = envp;
@@ -92,6 +105,9 @@ main(void)
/* Init the virtual file system */
vfs_init();
+ /* Init vmstats */
+ vm_stat_init();
+
/* Expose the console to devfs */
cons_expose();
@@ -99,14 +115,25 @@ main(void)
md_intoff();
sched_init();
+ memset(&g_proc0, 0, sizeof(g_proc0));
+ sysctl_clearstr(KERN_HOSTNAME);
+
/* Startup pid 1 */
- memset(&proc0, 0, sizeof(proc0.tf));
- spawn(&proc0, start_init, NULL, 0, NULL);
+ spawn(&g_proc0, start_init, NULL, 0, &g_init);
+ md_inton();
- /* Load all drivers */
+ uacpi_init();
+
+ /* Load all early drivers */
DRIVERS_INIT();
- /* Bootstrap APs and here we go! */
+ /* Only log to kmsg from here */
+ syslog_silence(true);
+
+ /*
+ * Bootstrap APs, schedule all other drivers
+ * and here we go!
+ */
mp_bootstrap_aps(&g_bsp_ci);
sched_enter();
__builtin_unreachable();
diff --git a/sys/kern/kern_accnt.c b/sys/kern/kern_accnt.c
new file mode 100644
index 0000000..51905e7
--- /dev/null
+++ b/sys/kern/kern_accnt.c
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2023-2025 Ian Marco Moffett and the Osmora Team.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of Hyra nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * System Accounting
+ */
+
+#include <sys/sched.h>
+#include <sys/schedvar.h>
+#include <sys/proc.h>
+#include <fs/ctlfs.h>
+#include <machine/cpu.h>
+#include <string.h>
+
+/* Called within kern_sched.c */
+void sched_accnt_init(void);
+
+static struct ctlops sched_stat_ctl;
+volatile size_t g_nthreads;
+
+static int
+ctl_stat_read(struct ctlfs_dev *cdp, struct sio_txn *sio)
+{
+ struct sched_stat stat;
+
+ if (sio->len > sizeof(stat)) {
+ sio->len = sizeof(stat);
+ }
+
+ sched_stat(&stat);
+ memcpy(sio->buf, &stat, sio->len);
+ return sio->len;
+}
+
+static uint16_t
+cpu_nhlt(void)
+{
+ uint16_t nhlt = 0;
+ struct cpu_info *ci;
+
+ for (size_t i = 0; i < CPU_MAX; ++i) {
+ ci = cpu_get(i);
+ if (ci == NULL) {
+ continue;
+ }
+ if (!ci->online) {
+ ++nhlt;
+ }
+ }
+
+ return nhlt;
+}
+
+/*
+ * Get scheduler accounting information
+ *
+ * @statp: Info gets copied here
+ */
+void
+sched_stat(struct sched_stat *statp)
+{
+ struct sched_cpu *cpustat;
+
+ statp->nproc = atomic_load_64(&g_nthreads);
+ statp->ncpu = cpu_count();
+ statp->quantum_usec = DEFAULT_TIMESLICE_USEC;
+ statp->nhlt = cpu_nhlt();
+
+ /*
+ * Setup the per-cpu info/statistics
+ */
+ for (int i = 0; i < CPU_MAX; ++i) {
+ cpustat = cpu_get_stat(i);
+ if (cpustat == NULL) {
+ break;
+ }
+
+ statp->cpus[i] = *cpustat;
+ }
+}
+
+void
+sched_accnt_init(void)
+{
+ char devname[] = "sched";
+ struct ctlfs_dev ctl;
+
+ /*
+ * Register some accounting information in
+ * '/ctl/sched/stat'
+ */
+ ctl.mode = 0444;
+ ctlfs_create_node(devname, &ctl);
+ ctl.devname = devname;
+ ctl.ops = &sched_stat_ctl;
+ ctlfs_create_entry("stat", &ctl);
+}
+
+static struct ctlops sched_stat_ctl = {
+ .read = ctl_stat_read,
+ .write = NULL
+};
diff --git a/sys/kern/kern_cpu.c b/sys/kern/kern_cpu.c
new file mode 100644
index 0000000..69d44c4
--- /dev/null
+++ b/sys/kern/kern_cpu.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2023-2025 Ian Marco Moffett and the Osmora Team.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of Hyra nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/systm.h>
+#include <sys/sysctl.h>
+#include <sys/types.h>
+
+/*
+ * Report the number of processors that are online
+ * in the machine.
+ *
+ * @count: Number of processors active
+ *
+ * Returns zero on success, otherwise a less
+ * than zero value is returned.
+ */
+int
+cpu_report_count(uint32_t count)
+{
+ struct sysctl_args args;
+ int error, name = HW_NCPU;
+
+ args.name = &name;
+ args.nlen = 1;
+ args.oldlenp = 0;
+ args.oldp = NULL;
+ args.newp = &count;
+ args.newlen = sizeof(count);
+
+ if ((error = sysctl(&args)) != 0) {
+ return error;
+ }
+
+ return 0;
+}
diff --git a/sys/kern/kern_cred.c b/sys/kern/kern_cred.c
new file mode 100644
index 0000000..017b22a
--- /dev/null
+++ b/sys/kern/kern_cred.c
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2023-2025 Ian Marco Moffett and the Osmora Team.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of Hyra nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/ucred.h>
+#include <sys/proc.h>
+
+int
+setuid(uid_t new)
+{
+ struct proc *td;
+ struct ucred *cur_cred;
+
+ td = this_td();
+ cur_cred = &td->cred;
+
+ /*
+ * Only root can become other users. If you are not
+ * root, fuck off.
+ */
+ if (cur_cred->ruid != 0) {
+ return -EPERM;
+ }
+
+ spinlock_acquire(&cur_cred->lock);
+ cur_cred->euid = new;
+ cur_cred->ruid = new;
+ spinlock_release(&cur_cred->lock);
+ return 0;
+}
+
+uid_t
+getuid(void)
+{
+ struct proc *td;
+
+ td = this_td();
+ if (td == NULL) {
+ return -1;
+ }
+
+ return td->cred.ruid;
+}
+
+/*
+ * setuid() syscall
+ *
+ * arg0: `new'
+ */
+scret_t
+sys_setuid(struct syscall_args *scargs)
+{
+ return setuid(scargs->arg0);
+}
+
+scret_t
+sys_getuid(struct syscall_args *scargs)
+{
+ return getuid();
+}
diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c
index d122e89..83845f6 100644
--- a/sys/kern/kern_descrip.c
+++ b/sys/kern/kern_descrip.c
@@ -41,6 +41,7 @@
/*
* Allocate a file descriptor.
*
+ * @td: Process to allocate from (null for CURRENT)
* @fd_out: Pointer to allocated file descriptor output.
*
* This routine will create a new file descriptor
@@ -49,10 +50,13 @@
* Returns 0 on success.
*/
int
-fd_alloc(struct filedesc **fd_out)
+fd_alloc(struct proc *td, struct filedesc **fd_out)
{
struct filedesc *fd;
- struct proc *td = this_td();
+
+ if (td == NULL) {
+ td = this_td();
+ }
/* Find free fd table entry */
for (size_t i = 3; i < PROC_MAX_FILEDES; ++i) {
@@ -85,12 +89,15 @@ fd_alloc(struct filedesc **fd_out)
* Fetch a file descriptor from a file descriptor
* number.
*
+ * @td: Process to get fd from (NULL for current)
* @fdno: File descriptor to fetch
*/
struct filedesc *
-fd_get(unsigned int fdno)
+fd_get(struct proc *td, unsigned int fdno)
{
- struct proc *td = this_td();
+ if (td == NULL) {
+ td = this_td();
+ }
if (fdno > PROC_MAX_FILEDES) {
return NULL;
@@ -111,7 +118,7 @@ fd_close(unsigned int fd)
struct filedesc *filedes;
struct proc *td;
- if ((filedes = fd_get(fd)) == NULL) {
+ if ((filedes = fd_get(NULL, fd)) == NULL) {
return -EBADF;
}
@@ -149,18 +156,32 @@ fd_rw(unsigned int fd, void *buf, size_t count, uint8_t write)
{
char *kbuf = NULL;
ssize_t n;
+ uint32_t seal;
struct filedesc *filedes;
struct sio_txn sio;
scret_t retval = 0;
+ if (fd > PROC_MAX_FILEDES) {
+ return -EBADF;
+ }
+
if (count > SSIZE_MAX) {
retval = -EINVAL;
goto done;
}
- filedes = fd_get(fd);
- kbuf = dynalloc(count);
+ filedes = fd_get(NULL, fd);
+ seal = filedes->flags;
+ /* Check the seal */
+ if (write && !ISSET(seal, O_ALLOW_WR)) {
+ return -EPERM;
+ }
+ if (!write && ISSET(seal, O_WRONLY)) {
+ return -EPERM;
+ }
+
+ kbuf = dynalloc(count);
if (kbuf == NULL) {
retval = -ENOMEM;
goto done;
@@ -187,6 +208,7 @@ fd_rw(unsigned int fd, void *buf, size_t count, uint8_t write)
sio.buf = kbuf;
sio.offset = filedes->offset;
+ spinlock_acquire(&filedes->lock);
if (write) {
/* Copy in user buffer */
if (copyin(buf, kbuf, count) < 0) {
@@ -205,19 +227,52 @@ fd_rw(unsigned int fd, void *buf, size_t count, uint8_t write)
goto done;
}
+ /* End of file? */
+ if (n == 0) {
+ retval = 0;
+ goto done;
+ }
+
if (copyout(kbuf, buf, count) < 0) {
retval = -EFAULT;
goto done;
}
}
- retval = count;
+
+ /* Increment the offset per read */
+ filedes->offset += n;
+ retval = n;
done:
if (kbuf != NULL) {
dynfree(kbuf);
}
+ spinlock_release(&filedes->lock);
return retval;
}
+static int
+fd_do_create(const char *path, struct nameidata *ndp)
+{
+ struct vop_create_args cargs;
+ struct vnode *dirvp = ndp->vp;
+ const struct vops *vops = dirvp->vops;
+ int error;
+
+ if (vops->create == NULL) {
+ return -EINVAL;
+ }
+
+ cargs.path = path;
+ cargs.ppath = ndp->path;
+ cargs.dirvp = dirvp;
+ cargs.vpp = &ndp->vp;
+ if ((error = vops->create(&cargs)) < 0) {
+ return error;
+ }
+
+ return 0;
+}
+
int
fd_read(unsigned int fd, void *buf, size_t count)
{
@@ -236,28 +291,35 @@ fd_write(unsigned int fd, void *buf, size_t count)
*
* @pathname: Path of file to open.
* @flags: Flags to use.
- *
- * TODO: Use of flags.
*/
int
fd_open(const char *pathname, int flags)
{
int error;
+ const struct vops *vops;
struct filedesc *filedes;
struct nameidata nd;
nd.path = pathname;
- nd.flags = 0;
+ nd.flags = ISSET(flags, O_CREAT) ? NAMEI_WANTPARENT : 0;
if ((error = namei(&nd)) < 0) {
return error;
}
- if ((error = fd_alloc(&filedes)) != 0) {
+ if ((error = fd_alloc(NULL, &filedes)) != 0) {
vfs_release_vnode(nd.vp);
return error;
}
+ vops = nd.vp->vops;
+ if (ISSET(flags, O_CREAT) && vops->create != NULL) {
+ error = fd_do_create(pathname, &nd);
+ }
+ if (error < 0) {
+ return error;
+ }
+
filedes->vp = nd.vp;
filedes->flags = flags;
return filedes->fdno;
@@ -266,18 +328,25 @@ fd_open(const char *pathname, int flags)
/*
* Duplicate a file descriptor. New file descriptor
* points to the same vnode.
+ *
+ * @td: Process of fd to dup (NULL for current)
+ * @fd: File descriptor to dup
*/
int
-fd_dup(int fd)
+fd_dup(struct proc *td, int fd)
{
int error;
struct filedesc *new_desc, *tmp;
- tmp = fd_get(fd);
+ if (td == NULL) {
+ td = this_td();
+ }
+
+ tmp = fd_get(td, fd);
if (tmp == NULL)
return -EBADF;
- if ((error = fd_alloc(&new_desc)) != 0)
+ if ((error = fd_alloc(td, &new_desc)) != 0)
return error;
/* Ref that vnode before we point to it */
@@ -285,3 +354,51 @@ fd_dup(int fd)
new_desc->vp = tmp->vp;
return new_desc->fdno;
}
+
+off_t
+fd_seek(int fildes, off_t offset, int whence)
+{
+ struct filedesc *tmp;
+ struct vattr attr;
+ struct vop_getattr_args getattr_args;
+
+ tmp = fd_get(NULL, fildes);
+ if (tmp == NULL) {
+ return -EBADF;
+ }
+
+ getattr_args.vp = tmp->vp;
+ getattr_args.res = &attr;
+ if ((vfs_vop_getattr(&getattr_args)) < 0) {
+ return -EPIPE;
+ }
+
+ switch (whence) {
+ case SEEK_SET:
+ tmp->offset = offset;
+ break;
+ case SEEK_CUR:
+ tmp->offset += offset;
+ break;
+ case SEEK_END:
+ tmp->offset = attr.size + offset;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return tmp->offset;
+}
+
+/*
+ * Update file offset
+ *
+ * arg0: `filedes'
+ * arg1: `offset'
+ * arg2: `whence'
+ */
+scret_t
+sys_lseek(struct syscall_args *scargs)
+{
+ return fd_seek(scargs->arg0, scargs->arg1, scargs->arg2);
+}
diff --git a/sys/kern/kern_disk.c b/sys/kern/kern_disk.c
new file mode 100644
index 0000000..a3fa05e
--- /dev/null
+++ b/sys/kern/kern_disk.c
@@ -0,0 +1,475 @@
+/*
+ * Copyright (c) 2023-2025 Ian Marco Moffett and the Osmora Team.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of Hyra nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/queue.h>
+#include <sys/errno.h>
+#include <sys/syslog.h>
+#include <sys/sio.h>
+#include <sys/param.h>
+#include <sys/panic.h>
+#include <sys/spinlock.h>
+#include <sys/device.h>
+#include <sys/disk.h>
+#include <vm/dynalloc.h>
+#include <assert.h>
+#include <string.h>
+
+#define pr_trace(fmt, ...) kprintf("disk: " fmt, ##__VA_ARGS__)
+#define pr_error(...) pr_trace(__VA_ARGS__)
+
+#define DEFAULT_BSIZE 512 /* Default block size in bytes */
+#define DISKQ_COOKIE 0xD9EA /* Verification cookie */
+
+/*
+ * The maximum disks supported by the kernel
+ * is defined by the `DISK_MAX' kconf(9) option.
+ *
+ * We define a default of 16 if that option is not
+ * specified.
+ */
+#if defined(__DISK_MAX)
+#define DISK_MAX __DISK_MAX
+#else
+#define DISK_MAX 16 /* Maximum disks */
+#endif
+
+/*
+ * We set a hard limit at 64 disks to prevent misconfiguration as
+ * it is unlikely that one would ever have that many on a single
+ * instance. Though of course, anything is possible, so one may
+ * patch the hard limit defined below to a higher value if needed.
+ */
+__static_assert(DISK_MAX < 64, "DISK_MAX exceeds hard limit");
+
+/*
+ * The disk queue stores descriptors of disks that
+ * are registered with the system. This allows for
+ * easy and simplified access of the storage medium.
+ *
+ * XXX: An array would be more efficent, however disks
+ * could be detached or swapped during runtime thus
+ * making the usage of queues a more sane design.
+ *
+ * This also provides the added benefit of lazy-allocation
+ * so memory isn't wasted and only allocated when we actually
+ * have a disk descriptor that it would be used to store.
+ */
+static struct spinlock diskq_lock;
+static TAILQ_HEAD(, disk) diskq;
+static uint16_t disk_count = 0;
+static uint16_t diskq_cookie = 0;
+
+/*
+ * Verify that a disk descriptor has been properly
+ * initialized by comparing against the cookie field.
+ *
+ * Returns a value of zero if valid, otherwise a less
+ * than zero value is returned.
+ */
+__always_inline static inline int
+check_disk_cookie(struct disk *dp)
+{
+ __assert(dp != NULL);
+ return (dp->cookie == DISKQ_COOKIE) ? 0 : -1;
+}
+
+/*
+ * Verify if the disk queue is initialized and
+ * ready for descriptors to be added.
+ *
+ * Returns a value of zero if it has already been
+ * initialized, otherwise a value less than zero
+ * is returned after check_diskq() initializes
+ * the disk queue.
+ */
+static inline int
+check_diskq(void)
+{
+ if (diskq_cookie != DISKQ_COOKIE) {
+ TAILQ_INIT(&diskq);
+ diskq_cookie = DISKQ_COOKIE;
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * Acquire a disk descriptor through a zero-based
+ * disk index. Returns a pointer to the disk descriptor
+ * on success, otherwise a less than zero value is returned.
+ *
+ * @id: Disk index
+ *
+ * XXX: This is the lockless internal implementation,
+ * please use disk_get_id() instead.
+ */
+static struct disk *
+__disk_get_id(diskid_t id)
+{
+ struct disk *dp;
+
+ if (id >= disk_count) {
+ return NULL;
+ }
+
+ dp = TAILQ_FIRST(&diskq);
+ if (dp == NULL) {
+ return NULL;
+ }
+
+ /*
+ * Now, we start at the first disk entry and
+ * traverse the list. If the ID of a disk matches
+ * the ID we are looking for, return it.
+ */
+ while (dp != NULL) {
+ if (dp->id == id) {
+ return dp;
+ }
+
+ dp = TAILQ_NEXT(dp, link);
+ }
+
+ /* Nothing found :( */
+ return NULL;
+}
+
+/*
+ * Attempt to perform a read/write operation on
+ * a disk.
+ *
+ * @id: ID of disk to operate on
+ * @blk: Block offset to read at
+ * @buf: Buffer to read data into
+ * @len: Number of bytes to read
+ * @write: If true, do a write
+ *
+ * XXX: The size in which blocks are read at is in
+ * virtual blocks which is defined by V_BSIZE
+ * in sys/disk.h
+ */
+static ssize_t
+disk_rw(diskid_t id, blkoff_t blk, void *buf, size_t len, bool write)
+{
+ const struct bdevsw *bdev;
+ struct sio_txn sio;
+ struct disk *dp;
+ int error;
+
+ len = ALIGN_UP(len, V_BSIZE);
+
+ /* Attempt to grab the disk object */
+ error = disk_get_id(id, &dp);
+ if (error < 0) {
+ return error;
+ }
+
+ /* Sanity check, should not happen */
+ bdev = dp->bdev;
+ if (__unlikely(bdev == NULL)) {
+ return -EIO;
+ }
+
+ /* Prepare the buffer */
+ sio.buf = buf;
+ sio.offset = blk * dp->bsize;
+ sio.len = len;
+
+ /* Handle writes */
+ if (write) {
+ if (bdev->write == NULL) {
+ return -ENOTSUP;
+ }
+
+ return bdev->write(dp->dev, &sio, 0);
+ }
+
+ /* Do we support this operation? */
+ if (bdev->read == NULL) {
+ return -ENOTSUP;
+ }
+
+ return bdev->read(dp->dev, &sio, 0);
+}
+
+/*
+ * Register a disk with the system so that it may
+ * be accessible independently of its device major
+ * and minor numbers
+ *
+ * @name: Name of the disk
+ * @dev: Device minor
+ * @bdev: Block device operations associated with device
+ *
+ * Returns zero on success, otherwise a less than zero
+ * value is returned.
+ */
+int
+disk_add(const char *name, dev_t dev, const struct bdevsw *bdev, int flags)
+{
+ struct disk *dp;
+ size_t name_len;
+
+ if (name == NULL || bdev == NULL) {
+ return -EINVAL;
+ }
+
+ /* Disk queue must be initialized */
+ check_diskq();
+
+ /* There is a limit to how many can be added */
+ if (disk_count >= DISK_MAX) {
+ pr_error("disk_add: disk limit %d/%d reached\n",
+ disk_count, DISK_MAX);
+ return -EAGAIN;
+ }
+
+ /* Is the disk name of correct length? */
+ name_len = strlen(name);
+ if (name_len >= sizeof(dp->name) - 1) {
+ pr_error("disk_add: name too big (len=%d)\n", name_len);
+ return -E2BIG;
+ }
+
+ dp = dynalloc(sizeof(*dp));
+ if (dp == NULL) {
+ pr_error("failed to allocate disk\n");
+ return -ENOMEM;
+ }
+
+ /* Initialize the descriptor */
+ memset(dp, 0, sizeof(*dp));
+ memcpy(dp->name, name, name_len);
+ dp->cookie = DISKQ_COOKIE;
+ dp->bdev = bdev;
+ dp->dev = dev;
+ dp->id = disk_count++;
+ dp->bsize = DEFAULT_BSIZE;
+
+ /*
+ * We are to panic if the virtual blocksize
+ * defined is not a multiple of any hardware
+ * block size
+ */
+ if ((V_BSIZE & (dp->bsize - 1)) != 0) {
+ panic("virtual block size not hw bsize aligned\n");
+ }
+
+ /* Now we can add it to the queue */
+ spinlock_acquire(&diskq_lock);
+ TAILQ_INSERT_TAIL(&diskq, dp, link);
+ spinlock_release(&diskq_lock);
+ return 0;
+}
+
+/*
+ * Acquire a disk descriptor by using a zero-based
+ * index.
+ *
+ * @id: Disk index (0: primary)
+ * @res: Resulting disk descriptor
+ *
+ * Returns zero on success, otherwise a less than
+ * zero value is returned.
+ */
+int
+disk_get_id(diskid_t id, struct disk **res)
+{
+ int error;
+ struct disk *dp;
+
+ if (res == NULL) {
+ return -EINVAL;
+ }
+
+ if (id >= disk_count) {
+ return -ENODEV;
+ }
+
+ /* Grab the disk */
+ spinlock_acquire(&diskq_lock);
+ dp = __disk_get_id(id);
+ spinlock_release(&diskq_lock);
+
+ /* Did it even exist? */
+ if (dp == NULL) {
+ return -ENODEV;
+ }
+
+ /* Should not fail but make sure */
+ error = check_disk_cookie(dp);
+ if (__unlikely(error < 0)) {
+ panic("disk_get_id: got bad disk object\n");
+ }
+
+ *res = dp;
+ return 0;
+}
+
+/*
+ * Allocate a memory buffer that may be used for
+ * disk I/O.
+ *
+ * @id: ID of disk buffer will be used for
+ * @len: Length to allocate
+ */
+void *
+disk_buf_alloc(diskid_t id, size_t len)
+{
+ struct disk *dp;
+ void *buf;
+
+ if (len == 0) {
+ return NULL;
+ }
+
+ /* Attempt to acquire the disk */
+ if (disk_get_id(id, &dp) < 0) {
+ return NULL;
+ }
+
+ /*
+ * Here we will align the buffer size by the
+ * virtual block size to ensure it is big enough.
+ */
+ len = ALIGN_UP(len, V_BSIZE);
+ buf = dynalloc(len);
+ return buf;
+}
+
+/*
+ * Free a memory buffer that was allocated by
+ * disk_buf_alloc()
+ */
+void
+disk_buf_free(void *p)
+{
+ if (p != NULL) {
+ dynfree(p);
+ }
+}
+
+/*
+ * Attempt to perform a read operation on
+ * a disk.
+ *
+ * @id: ID of disk to operate on
+ * @blk: Block offset to read at
+ * @buf: Buffer to read data into
+ * @len: Number of bytes to read
+ */
+ssize_t
+disk_read(diskid_t id, blkoff_t blk, void *buf, size_t len)
+{
+ ssize_t retval;
+ char *tmp;
+
+ tmp = disk_buf_alloc(id, len);
+ if (tmp == NULL) {
+ return -ENOMEM;
+ }
+
+ retval = disk_rw(id, blk, tmp, len, false);
+ if (retval < 0) {
+ disk_buf_free(tmp);
+ return retval;
+ }
+
+ memcpy(buf, tmp, len);
+ disk_buf_free(tmp);
+ return retval;
+}
+
+/*
+ * Attempt to perform a write operation on
+ * a disk.
+ *
+ * @id: ID of disk to operate on
+ * @blk: Block offset to read at
+ * @buf: Buffer containing data to write
+ * @len: Number of bytes to read
+ */
+ssize_t
+disk_write(diskid_t id, blkoff_t blk, const void *buf, size_t len)
+{
+ ssize_t retval;
+ char *tmp;
+
+ tmp = disk_buf_alloc(id, len);
+ if (tmp == NULL) {
+ return -ENOMEM;
+ }
+
+ memcpy(tmp, buf, len);
+ retval = disk_rw(id, blk, tmp, len, true);
+ disk_buf_free(tmp);
+ return retval;
+}
+
+/*
+ * Attempt to request attributes from a specific
+ * device.
+ *
+ * @id: ID of disk to query
+ * @res: Resulting information goes here
+ *
+ * This function returns zero on success, otherwise
+ * a less than zero value is returned.
+ */
+int
+disk_query(diskid_t id, struct disk_info *res)
+{
+ const struct bdevsw *bdev;
+ struct disk *dp;
+ int error;
+
+ if (res == NULL) {
+ return -EINVAL;
+ }
+
+ /* Attempt to grab the disk */
+ error = disk_get_id(id, &dp);
+ if (error < 0) {
+ pr_error("disk_query: bad disk ID %d\n", id);
+ return error;
+ }
+
+ bdev = dp->bdev;
+ if (__unlikely(bdev == NULL)) {
+ pr_error("disk_query: no bdev for disk %d\n", id);
+ return -EIO;
+ }
+
+ res->block_size = dp->bsize;
+ res->vblock_size = V_BSIZE;
+ res->n_block = bdev->bsize(dp->dev);
+ return 0;
+}
diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
index bf6a26e..2a53b8a 100644
--- a/sys/kern/kern_exec.c
+++ b/sys/kern/kern_exec.c
@@ -37,6 +37,7 @@
#include <vm/map.h>
#include <vm/physmem.h>
#include <machine/pcb.h>
+#include <machine/cdefs.h>
#include <string.h>
/*
@@ -87,6 +88,7 @@ execve(struct proc *td, const struct execve_args *args)
release_stack(td);
/* Save program state */
+ md_intoff();
memcpy(&td->exec, &prog, sizeof(td->exec));
/* Set new stack and map it to userspace */
@@ -99,7 +101,7 @@ execve(struct proc *td, const struct execve_args *args)
stack_top = td->stack_base + (PROC_STACK_SIZE - 1);
/* Setup registers, signals and stack */
- md_td_stackinit(td, (void *)(stack_top + VM_HIGHER_HALF), &prog);
+ stack_top = md_td_stackinit(td, (void *)(stack_top + VM_HIGHER_HALF), &prog);
setregs(td, &prog, stack_top);
signals_init(td);
diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c
index 24c7e74..af697d7 100644
--- a/sys/kern/kern_exit.c
+++ b/sys/kern/kern_exit.c
@@ -30,15 +30,24 @@
#include <sys/proc.h>
#include <sys/sched.h>
#include <sys/syslog.h>
+#include <sys/atomic.h>
+#include <sys/panic.h>
+#include <sys/filedesc.h>
+#include <sys/vnode.h>
+#include <dev/cons/cons.h>
#include <vm/physmem.h>
#include <vm/dynalloc.h>
#include <vm/vm.h>
#include <vm/map.h>
#include <machine/pcb.h>
+#include <machine/cpu.h>
#define pr_trace(fmt, ...) kprintf("exit: " fmt, ##__VA_ARGS__)
#define pr_error(...) pr_trace(__VA_ARGS__)
+extern volatile size_t g_nthreads;
+extern struct proc g_init;
+
static void
unload_td(struct proc *td)
{
@@ -48,6 +57,11 @@ unload_td(struct proc *td)
struct pcb *pcbp;
size_t len;
+ sched_detach(td);
+ if (ISSET(td->flags, PROC_KTD)) {
+ return;
+ }
+
execp = &td->exec;
auxvalp = &execp->auxval;
pcbp = &td->pcb;
@@ -72,48 +86,92 @@ unload_td(struct proc *td)
}
}
+void
+proc_reap(struct proc *td)
+{
+ struct pcb *pcbp;
+ struct filedesc *fdp;
+ vaddr_t stack_va;
+ paddr_t stack_pa;
+
+ cons_detach();
+
+ /* Clear out all fds */
+ for (size_t i = 4; i < PROC_MAX_FILEDES; ++i) {
+ fdp = td->fds[i];
+ if (fdp == NULL) {
+ continue;
+ }
+ if (fdp->refcnt == 1) {
+ vfs_release_vnode(fdp->vp);
+ dynfree(fdp);
+ fdp = NULL;
+ }
+ }
+
+ pcbp = &td->pcb;
+ unload_td(td);
+
+ /*
+ * User space stacks are identity mapped and
+ * kernel space stacks are not.
+ */
+ if (ISSET(td->flags, PROC_KTD)) {
+ stack_va = td->stack_base;
+ stack_pa = td->stack_base - VM_HIGHER_HALF;
+ } else {
+ stack_va = td->stack_base;
+ stack_pa = td->stack_base;
+ vm_unmap(pcbp->addrsp, stack_va, PROC_STACK_SIZE);
+ }
+
+ vm_free_frame(stack_pa, PROC_STACK_PAGES);
+ pmap_destroy_vas(pcbp->addrsp);
+}
+
/*
* Kill a thread and deallocate its resources.
*
* @td: Thread to exit
*/
int
-exit1(struct proc *td)
+exit1(struct proc *td, int flags)
{
- struct pcb *pcbp;
struct proc *curtd, *procp;
- uintptr_t stack;
+ struct proc *parent;
+ struct cpu_info *ci;
pid_t target_pid, curpid;
+ if (td->pid == 1) {
+ panic("init died\n");
+ }
+
+ ci = this_cpu();
target_pid = td->pid;
curtd = this_td();
- pcbp = &td->pcb;
curpid = curtd->pid;
- stack = td->stack_base;
+
td->flags |= PROC_EXITING;
+ parent = td->parent;
- /* If we have any children, kill them too */
+ /* We have one less process in the system! */
+ atomic_dec_64(&g_nthreads);
+
+ /* Reassign children to init */
if (td->nleaves > 0) {
TAILQ_FOREACH(procp, &td->leafq, leaf_link) {
- exit1(procp);
+ procp->parent = &g_init;
}
}
- /*
- * If this is on the higher half, it is kernel
- * mapped and we need to convert it to a physical
- * address.
- */
- if (stack >= VM_HIGHER_HALF) {
- stack -= VM_HIGHER_HALF;
+ if (target_pid != curpid) {
+ proc_reap(td);
}
- unload_td(td);
- vm_unmap(pcbp->addrsp, td->stack_base, PROC_STACK_SIZE);
- vm_free_frame(stack, PROC_STACK_PAGES);
-
- pmap_destroy_vas(pcbp->addrsp);
+ if (td->data != NULL) {
+ dynfree(td->data);
+ }
/*
* Only free the process structure if we aren't
@@ -124,14 +182,29 @@ exit1(struct proc *td)
dynfree(td);
} else {
td->flags |= PROC_ZOMB;
+ td->flags &= ~PROC_WAITED;
}
/*
* If we are the thread exiting, reenter the scheduler
* and do not return.
*/
- if (target_pid == curpid)
+ if (target_pid == curpid) {
+ /*
+ * If the thread is exiting on a core that is not
+ * preemptable, something is not right.
+ */
+ if (__unlikely(!sched_preemptable())) {
+ panic("exit1: cpu %d not preemptable\n", ci->id);
+ }
+
+ ci->curtd = NULL;
+ if (parent->pid == 0)
+ sched_enter();
+
+ parent->flags &= ~PROC_SLEEP;
sched_enter();
+ }
return 0;
}
@@ -145,6 +218,6 @@ sys_exit(struct syscall_args *scargs)
struct proc *td = this_td();
td->exit_status = scargs->arg0;
- exit1(td);
+ exit1(td, 0);
__builtin_unreachable();
}
diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c
new file mode 100644
index 0000000..2755ea0
--- /dev/null
+++ b/sys/kern/kern_fork.c
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2023-2025 Ian Marco Moffett and the Osmora Team.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of Hyra nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/proc.h>
+
+
diff --git a/sys/kern/kern_krq.c b/sys/kern/kern_krq.c
new file mode 100644
index 0000000..c12a98c
--- /dev/null
+++ b/sys/kern/kern_krq.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2023-2025 Ian Marco Moffett and the Osmora Team.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of Hyra nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/syscall.h>
+#include <sys/krq.h>
+#include <sys/errno.h>
+#include <sys/spinlock.h>
+#include <sys/driver.h>
+#include <sys/syslog.h>
+
+static struct spinlock krq_lock = {0};
+
+/*
+ * Load a kernel runtime quantum (KRQ)
+ *
+ * @arg0: path
+ *
+ * XXX: If the 'path' argument is NULL, all deferrable
+ * drivers are loaded.
+ *
+ * TODO: Handle non-null paths where a completly seperate
+ * module/krq can be loaded.
+ */
+scret_t
+sys_inject(struct syscall_args *scargs)
+{
+ if (scargs->arg0 != 0) {
+ return -EINVAL;
+ }
+
+ spinlock_acquire(&krq_lock);
+ DRIVERS_SCHED();
+ spinlock_release(&krq_lock);
+ return 0;
+}
diff --git a/sys/kern/kern_panic.c b/sys/kern/kern_panic.c
index 950ea8f..13b4964 100644
--- a/sys/kern/kern_panic.c
+++ b/sys/kern/kern_panic.c
@@ -31,6 +31,25 @@
#include <sys/spinlock.h>
#include <sys/syslog.h>
#include <sys/reboot.h>
+#include <dev/cons/cons.h>
+#include <machine/cdefs.h>
+#include <machine/cpu.h>
+#include <string.h>
+
+#if defined(__PANIC_SCR)
+#define PANIC_SCR __PANIC_SCR
+#else
+#define PANIC_SCR 0
+#endif
+
+static void
+panic_puts(const char *str)
+{
+ size_t len;
+
+ len = strlen(str);
+ cons_putstr(&g_root_scr, str, len);
+}
/*
* Burn and sizzle - the core logic that really ends
@@ -47,14 +66,40 @@ bas(bool do_trace, int reboot_type)
spinlock_acquire(&lock); /* Never released */
if (do_trace) {
- kprintf(OMIT_TIMESTAMP "** backtrace\n");
+ panic_puts(" ** backtrace\n");
md_backtrace();
}
+ panic_puts("\n-- ALL CORES HAVE BEEN HALTED --\n");
cpu_reboot(reboot_type);
__builtin_unreachable();
}
+static void
+panic_screen(void)
+{
+ struct cons_screen *scr = &g_root_scr;
+
+ if (scr->fb_mem != NULL) {
+ scr->bg = 0x8B0000;
+ scr->fg = 0xAABBAA;
+ cons_reset_cursor(scr);
+ cons_clear_scr(scr, 0x393B39);
+ }
+}
+
+static void
+do_panic(const char *fmt, va_list *ap)
+{
+ syslog_silence(false);
+ spinlock_release(&g_root_scr.lock);
+ panic_puts("panic: ");
+ vkprintf(fmt, ap);
+ bas(true, REBOOT_HALT);
+
+ __builtin_unreachable();
+}
+
/*
* Tells the user something terribly wrong happened then
* halting the system as soon as possible.
@@ -69,11 +114,15 @@ panic(const char *fmt, ...)
{
va_list ap;
- va_start(ap, fmt);
- kprintf(OMIT_TIMESTAMP "panic: ");
- vkprintf(fmt, &ap);
- bas(true, REBOOT_HALT);
+ /* Shut everything else up */
+ md_intoff();
+ cpu_halt_others();
+ if (PANIC_SCR) {
+ panic_screen();
+ }
+ va_start(ap, fmt);
+ do_panic(fmt, &ap);
__builtin_unreachable();
}
@@ -89,7 +138,6 @@ hcf(const char *fmt, ...)
{
va_list ap;
-
if (fmt != NULL) {
va_start(ap, fmt);
kprintf(OMIT_TIMESTAMP);
diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c
new file mode 100644
index 0000000..8bc5680
--- /dev/null
+++ b/sys/kern/kern_proc.c
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 2023-2025 Ian Marco Moffett and the Osmora Team.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of Hyra nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/proc.h>
+#include <sys/errno.h>
+#include <sys/cdefs.h>
+#include <sys/vnode.h>
+#include <sys/tree.h>
+#include <sys/syscall.h>
+#include <sys/filedesc.h>
+#include <sys/fcntl.h>
+#include <string.h>
+#include <crc32.h>
+
+extern volatile size_t g_nthreads;
+
+pid_t
+getpid(void)
+{
+ struct proc *td;
+
+ td = this_td();
+ if (td == NULL) {
+ return -1;
+ }
+
+ return td->pid;
+}
+
+pid_t
+getppid(void)
+{
+ struct proc *td;
+
+ td = this_td();
+ if (td == NULL) {
+ return -1;
+ }
+ if (td->parent == NULL) {
+ return -1;
+ }
+
+ return td->parent->pid;
+}
+
+void
+proc_coredump(struct proc *td, uintptr_t fault_addr)
+{
+ struct coredump core;
+ struct sio_txn sio;
+ struct vnode *vp;
+ char pathname[128];
+ int fd;
+
+ snprintf(pathname, sizeof(pathname), "/tmp/core.%d", td->pid);
+ fd = fd_open(pathname, O_RDWR | O_CREAT);
+
+ /* ... Hopefully not */
+ if (__unlikely(fd < 0)) {
+ return;
+ }
+
+ core.pid = td->pid;
+ core.fault_addr = fault_addr;
+ memcpy(&core.tf, &td->tf, sizeof(td->tf));
+
+ core.checksum = crc32(&core, sizeof(core) - sizeof(core.checksum));
+ vp = fd_get(NULL, fd)->vp;
+
+ sio.buf = &core;
+ sio.len = sizeof(core);
+ sio.offset = 0;
+
+ /* Write the core file */
+ vfs_vop_write(vp, &sio);
+ fd_close(fd);
+}
+
+int
+proc_init(struct proc *td, struct proc *parent)
+{
+ struct mmap_lgdr *mlgdr;
+
+ mlgdr = dynalloc(sizeof(*mlgdr));
+ if (mlgdr == NULL) {
+ return -ENOMEM;
+ }
+
+ /* Add to parent leafq */
+ TAILQ_INSERT_TAIL(&parent->leafq, td, leaf_link);
+ atomic_inc_int(&parent->nleaves);
+ atomic_inc_64(&g_nthreads);
+ td->parent = parent;
+ td->exit_status = -1;
+ td->cred = parent->cred;
+
+ /* Initialize the mmap ledger */
+ mlgdr->nbytes = 0;
+ RBT_INIT(lgdr_entries, &mlgdr->hd);
+ td->mlgdr = mlgdr;
+ td->flags |= PROC_WAITED;
+ signals_init(td);
+ return 0;
+}
+
+scret_t
+sys_getpid(struct syscall_args *scargs)
+{
+ return getpid();
+}
+
+scret_t
+sys_getppid(struct syscall_args *scargs)
+{
+ return getppid();
+}
diff --git a/sys/kern/kern_sched.c b/sys/kern/kern_sched.c
index 4bbe5a0..9c5e215 100644
--- a/sys/kern/kern_sched.c
+++ b/sys/kern/kern_sched.c
@@ -34,6 +34,7 @@
#include <sys/param.h>
#include <sys/syslog.h>
#include <sys/atomic.h>
+#include <dev/cons/cons.h>
#include <machine/frame.h>
#include <machine/cpu.h>
#include <machine/cdefs.h>
@@ -44,7 +45,8 @@
#define pr_trace(fmt, ...) kprintf("ksched: " fmt, ##__VA_ARGS__)
-void sched_switch(struct trapframe *tf);
+void md_sched_switch(struct trapframe *tf);
+void sched_accnt_init(void);
static sched_policy_t policy = SCHED_POLICY_MLFQ;
@@ -63,7 +65,7 @@ __cacheline_aligned static struct spinlock tdq_lock = {0};
/*
* Perform timer oneshot
*/
-static inline void
+void
sched_oneshot(bool now)
{
struct timer timer;
@@ -77,39 +79,75 @@ sched_oneshot(bool now)
}
/*
- * Save thread state and enqueue it back into one
- * of the ready queues.
+ * Returns true if a processor is associated
+ * with a specific thread
+ *
+ * @ci: CPU that wants to take 'td'
+ * @td: Thread to check against
*/
-static void
-sched_save_td(struct proc *td, struct trapframe *tf)
+static bool
+cpu_is_assoc(struct cpu_info *ci, struct proc *td)
{
/*
- * Save trapframe to process structure only
- * if PROC_EXEC is not set.
+ * If we are not pinned, any processor is
+ * associated.
*/
- if (!ISSET(td->flags, PROC_EXEC)) {
- memcpy(&td->tf, tf, sizeof(td->tf));
+ if (!ISSET(td->flags, PROC_PINNED)) {
+ return true;
}
- sched_enqueue_td(td);
+ return ci->id == td->affinity;
}
-static struct proc *
+struct proc *
sched_dequeue_td(void)
{
struct sched_queue *queue;
struct proc *td = NULL;
+ struct cpu_info *ci;
+ uint32_t ncpu = 0;
spinlock_acquire(&tdq_lock);
+ ci = this_cpu();
for (size_t i = 0; i < SCHED_NQUEUE; ++i) {
queue = &qlist[i];
- if (!TAILQ_EMPTY(&queue->q)) {
- td = TAILQ_FIRST(&queue->q);
- TAILQ_REMOVE(&queue->q, td, link);
- spinlock_release(&tdq_lock);
- return td;
+ if (TAILQ_EMPTY(&queue->q)) {
+ continue;
}
+
+ td = TAILQ_FIRST(&queue->q);
+ if (td == NULL) {
+ continue;
+ }
+
+ while (ISSET(td->flags, PROC_SLEEP)) {
+ td = TAILQ_NEXT(td, link);
+ if (td == NULL) {
+ break;
+ }
+ }
+
+ /*
+ * If we are on a multicore system and this isn't
+ * our process, don't take it. Some threads might
+ * be pinned to a specific processor.
+ */
+ ncpu = cpu_count();
+ while (!cpu_is_assoc(ci, td) && ncpu > 1) {
+ td = TAILQ_NEXT(td, link);
+ if (td == NULL) {
+ break;
+ }
+ }
+
+ if (td == NULL) {
+ continue;
+ }
+
+ TAILQ_REMOVE(&queue->q, td, link);
+ spinlock_release(&tdq_lock);
+ return td;
}
/* We got nothing */
@@ -141,6 +179,9 @@ this_td(void)
struct cpu_info *ci;
ci = this_cpu();
+ if (ci == NULL) {
+ return NULL;
+ }
return ci->curtd;
}
@@ -177,62 +218,21 @@ td_pri_update(struct proc *td)
}
/*
- * Perform a context switch.
+ * MI work to be done during a context
+ * switch. Called by md_sched_switch()
*/
void
-sched_switch(struct trapframe *tf)
+mi_sched_switch(struct proc *from)
{
- struct cpu_info *ci;
- struct pcb *pcbp;
- struct proc *next_td, *td;
- bool use_current = true;
-
- ci = this_cpu();
- td = ci->curtd;
-
- if (td != NULL) {
- dispatch_signals(td);
- td_pri_update(td);
- }
-
- /*
- * Get the next thread and use it only if it isn't
- * in the middle of an exit, exec, or whatever.
- */
- do {
- if ((next_td = sched_dequeue_td()) == NULL) {
- sched_oneshot(false);
+ if (from != NULL) {
+ if (from->pid == 0)
return;
- }
- /*
- * If we are in the middle of an exec, don't use this
- * thread.
- */
- if (ISSET(next_td->flags, PROC_EXEC)) {
- use_current = false;
- }
-
- /*
- * Don't use this thread if we are currently
- * exiting.
- */
- if (ISSET(next_td->flags, PROC_EXITING)) {
- use_current = false;
- }
- } while (!use_current);
-
- /* Save the previous thread */
- if (td != NULL) {
- sched_save_td(td, tf);
+ dispatch_signals(from);
+ td_pri_update(from);
}
- memcpy(tf, &next_td->tf, sizeof(*tf));
- ci->curtd = next_td;
- pcbp = &next_td->pcb;
-
- pmap_switch_vas(pcbp->addrsp);
- sched_oneshot(false);
+ cons_detach();
}
/*
@@ -242,9 +242,8 @@ void
sched_enter(void)
{
md_inton();
- md_sync_all();
+ sched_oneshot(false);
for (;;) {
- sched_oneshot(false);
md_pause();
}
}
@@ -252,14 +251,154 @@ sched_enter(void)
void
sched_yield(void)
{
- struct proc *td = this_td();
+ struct proc *td;
+ struct cpu_info *ci = this_cpu();
- if (td != NULL) {
- td->rested = true;
+ if ((td = ci->curtd) == NULL) {
+ return;
}
+ td->rested = true;
+
+ /* FIXME: Hang yielding when waited on */
+ if (ISSET(td->flags, PROC_WAITED)) {
+ return;
+ }
+
+ ci->curtd = NULL;
+ md_inton();
sched_oneshot(false);
- while (td->rested);
+
+ md_hlt();
+ md_intoff();
+ ci->curtd = td;
+}
+
+void
+sched_detach(struct proc *td)
+{
+ struct sched_queue *queue;
+
+ spinlock_acquire(&tdq_lock);
+ queue = &qlist[td->priority];
+
+ TAILQ_REMOVE(&queue->q, td, link);
+ spinlock_release(&tdq_lock);
+}
+
+/*
+ * Pin a process to a specific processor
+ *
+ * @td: Process to pin
+ * @cpu: Logical processor ID to pin `td' to.
+ *
+ * XXX: 'cpu' is a machine independent value, representing
+ * CPU<n>
+ */
+void
+proc_pin(struct proc *td, affinity_t cpu)
+{
+ td->affinity = cpu;
+ td->flags |= PROC_PINNED;
+}
+
+/*
+ * Unpin a pinned process, allowing it to be
+ * picked up by any processor
+ *
+ * @td: Process to unpin
+ */
+void
+proc_unpin(struct proc *td)
+{
+ td->affinity = 0;
+ td->flags &= ~PROC_PINNED;
+}
+
+/*
+ * Suspend a process for a specified amount
+ * of time. This calling process will yield for
+ * the amount of time specified in 'tv'
+ *
+ * @td: Process to suspend (NULL for current)
+ * @tv: Time value to use
+ *
+ * XXX: 'tv' being NULL is equivalent to calling
+ * sched_detach()
+ */
+void
+sched_suspend(struct proc *td, const struct timeval *tv)
+{
+ struct timer tmr;
+ const time_t USEC_PER_SEC = 1000000;
+ ssize_t usec;
+ time_t usec_cur, usec_tmp;
+ bool have_timer = true;
+ tmrr_status_t tmr_status;
+
+ if (td == NULL)
+ td = this_td();
+ if (__unlikely(td == NULL))
+ return;
+
+ if (tv == NULL) {
+ sched_detach(td);
+ return;
+ }
+
+ /*
+ * Now, we need a generic timer so that we can compute
+ * how much time has elapsed since this process has
+ * requested to be suspended. However, we cannot assume
+ * that it would be present. If the lookup fails, all we
+ * can do is try to estimate how much time went by which
+ * works fine too, just not as accurate.
+ */
+ tmr_status = req_timer(TIMER_GP, &tmr);
+ if (tmr_status != TMRR_SUCCESS) {
+ have_timer = false;
+ }
+
+ /* We need microsecond precision */
+ if (tmr.get_time_sec == NULL) {
+ have_timer = false;
+ }
+
+ /*
+ * Compute the max time in microseconds that
+ * we will wait. We are using both tv->tv_sec
+ * and tv->tv_usec
+ */
+ usec = tv->tv_usec;
+ usec += tv->tv_sec * USEC_PER_SEC;
+ usec_cur = (have_timer) ? tmr.get_time_usec() : 0;
+
+ for (;;) {
+ sched_yield();
+
+ /*
+ * If we have a timer in our paws, compute how much
+ * time went by. Otherwise we estimate by subtracting
+ * the scheduler quantum.
+ *
+ * XXX: The timing here works decently as intended. However,
+ * it would be nice to smoothen out any jitter. Such can
+ * probably be done by subtracting 'usec' by the exponential
+ * moving average of 'usec_tmp' rather than the raw original
+ * value.
+ */
+ if (have_timer) {
+ usec_tmp = (tmr.get_time_usec() - usec_cur);
+ } else {
+ usec_tmp = DEFAULT_TIMESLICE_USEC;
+ }
+
+ /* We are done here! */
+ usec -= usec_tmp;
+ if (usec <= 0) {
+ break;
+ }
+ }
}
void
@@ -272,4 +411,6 @@ sched_init(void)
pr_trace("prepared %d queues (policy=0x%x)\n",
SCHED_NQUEUE, policy);
+
+ sched_accnt_init();
}
diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c
index 58bd52d..044de7b 100644
--- a/sys/kern/kern_sig.c
+++ b/sys/kern/kern_sig.c
@@ -58,6 +58,12 @@ static struct sigaction sa_tab[] = {
.sa_flags = 0,
.sa_sigaction = NULL
},
+ [SIGTERM] = {
+ .sa_handler = sigterm_default,
+ .sa_mask = 0,
+ .sa_flags = 0,
+ .sa_sigaction = NULL
+ }
};
/*
diff --git a/sys/kern/kern_socket.c b/sys/kern/kern_socket.c
new file mode 100644
index 0000000..d0fbe19
--- /dev/null
+++ b/sys/kern/kern_socket.c
@@ -0,0 +1,1009 @@
+/*
+ * Copyright (c) 2023-2025 Ian Marco Moffett and the Osmora Team.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of Hyra nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/socket.h>
+#include <sys/sio.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/time.h>
+#include <sys/namei.h>
+#include <sys/sched.h>
+#include <sys/errno.h>
+#include <sys/syslog.h>
+#include <sys/filedesc.h>
+#include <sys/fcntl.h>
+#include <sys/vnode.h>
+#include <vm/dynalloc.h>
+#include <string.h>
+
+#define pr_trace(fmt, ...) kprintf("socket: " fmt, ##__VA_ARGS__)
+#define pr_error(...) pr_trace(__VA_ARGS__)
+
+static struct vops socket_vops;
+
+/*
+ * This table maps socket option names to
+ * lengths of their underlying structure.
+ *
+ * This is used for bounds/length checking within
+ * setsockopt()
+ */
+static size_t sockopt_lentab[_SO_MAX] = {
+ [ SO_RCVTIMEO ] = sizeof(struct timeval)
+};
+
+/*
+ * Get a kernel socket structure from a
+ * file descriptor.
+ *
+ * @sockfd: File descriptor to lookup
+ * @res: Result pointer
+ *
+ * Returns zero on success, otherwise a less
+ * than zero errno.
+ */
+static int
+get_ksock(int sockfd, struct ksocket **res)
+{
+ struct ksocket *ksock;
+ struct filedesc *fdesc;
+ struct vnode *vp;
+
+ if (res == NULL) {
+ return -EINVAL;
+ }
+
+ /* Grab the file descriptor */
+ fdesc = fd_get(NULL, sockfd);
+ if (fdesc == NULL) {
+ return -EBADF;
+ }
+
+ /* Is this even a socket? */
+ if ((vp = fdesc->vp) == NULL) {
+ return -ENOTSOCK;
+ }
+ if (vp->type != VSOCK) {
+ return -ENOTSOCK;
+ }
+
+ ksock = vp->data;
+ if (__unlikely(ksock == NULL)) {
+ return -EIO;
+ }
+
+ *res = ksock;
+ return 0;
+}
+
+/*
+ * VFS reclaim callback for the socket
+ * layer
+ *
+ * Returns zero on success, otherwise a less
+ * than zero errno.
+ */
+static int
+socket_reclaim(struct vnode *vp)
+{
+ struct ksocket *ksock;
+ struct sockopt *opt;
+
+ /* Is this even a socket? */
+ if (vp->type != VSOCK) {
+ return -ENOTSOCK;
+ }
+
+ /* Is there any data attached? */
+ if ((ksock = vp->data) == NULL) {
+ return -EIO;
+ }
+
+ /* Free up any used options */
+ for (int i = 0; i < _SO_MAX; ++i) {
+ opt = ksock->opt[i];
+ if (opt != NULL) {
+ dynfree(opt);
+ ksock->opt[i] = NULL;
+ }
+ }
+
+ fd_close(ksock->sockfd);
+ mutex_free(ksock->mtx);
+ dynfree(ksock);
+ return 0;
+}
+
+/*
+ * Create a socket file from the sockaddr
+ * structure
+ *
+ * @ksock: Socket to create a file for
+ * @sockaddr_un: domain sockaddr
+ */
+static int
+socket_mkfile(struct ksocket *ksock, struct sockaddr_un *un)
+{
+ struct filedesc *fdesc;
+ struct vnode *vp;
+ int fd;
+
+ fd = fd_open(un->sun_path, O_CREAT | O_RDONLY);
+ if (fd < 0) {
+ return fd;
+ }
+
+ /* Grab the actual handle now */
+ fdesc = fd_get(NULL, fd);
+ if (fdesc == NULL) {
+ fd_close(fd);
+ return -EIO;
+ }
+
+ /* Hijack the vnode */
+ vp = fdesc->vp;
+ vp->type = VSOCK;
+ vp->vops = &socket_vops;
+ vp->data = ksock;
+ return fd;
+}
+
+/*
+ * Connect to a domain socket - used by connect()
+ *
+ * @sockfd: Socket file descriptor
+ * @ksock: Current ksock
+ * @un: Current sockaddr_un
+ */
+static int
+connect_domain(int sockfd, struct ksocket *ksock, struct sockaddr_un *un)
+{
+ int error;
+ struct nameidata ndp;
+ struct filedesc *filedesc;
+ struct vnode *vp;
+
+ ndp.path = un->sun_path;
+ ndp.flags = 0;
+ if ((error = namei(&ndp)) < 0) {
+ return error;
+ }
+
+ vp = ndp.vp;
+ filedesc = fd_get(NULL, sockfd);
+ if (filedesc == NULL) {
+ pr_error("connect: no filedesc for current\n");
+ return -EIO;
+ }
+
+ filedesc->vp = vp;
+ return 0;
+}
+
+/*
+ * Wait until data is received for the
+ * recv() function.
+ *
+ * @sockfd: Socket we are waiting on
+ *
+ * Returns zero on success, otherwise a less
+ * than zero value is returned.
+ */
+static int
+socket_rx_wait(int sockfd)
+{
+ struct ksocket *ksock;
+ struct sockopt *opt;
+ struct timeval tv;
+ int error;
+
+ if (ksock == NULL) {
+ return -EINVAL;
+ }
+
+ error = get_ksock(sockfd, &ksock);
+ if (error < 0) {
+ return error;
+ }
+
+ /*
+ * If the socket does not have this option set,
+ * we will assume that there is no timeout value.
+ */
+ opt = ksock->opt[SO_RCVTIMEO];
+ if (opt == NULL) {
+ return 0;
+ }
+
+ memcpy(&tv, opt->data, opt->len);
+ sched_suspend(NULL, &tv);
+ return 0;
+}
+
+/*
+ * Send data to socket - POSIX send(2) core
+ *
+ * @sockfd: File descriptor that backs this socket
+ * @buf: Buffer containing data to transmit
+ * @size: Size of the buffer
+ * @flags: Optional flags
+ *
+ * Returns zero on success, otherwise a less
+ * than zero errno.
+ */
+ssize_t
+send(int sockfd, const void *buf, size_t size, int flags)
+{
+ struct ksocket *ksock;
+ struct sockbuf *sbuf;
+ struct netbuf *netbuf;
+ size_t tail;
+ int error;
+
+ /* Size cannot be zero */
+ if (size == 0) {
+ return -EINVAL;
+ }
+
+ if ((error = get_ksock(sockfd, &ksock)) < 0) {
+ return error;
+ }
+
+ sbuf = &ksock->buf;
+ netbuf = &sbuf->buf;
+ mutex_acquire(ksock->mtx, 0);
+
+ /* Make sure we dont overflow */
+ if (netbuf->len > sbuf->watermark) {
+ mutex_release(ksock->mtx);
+ return -ENOBUFS;
+ }
+
+ if (netbuf->len == 0) {
+ sbuf->head = 0;
+ sbuf->tail = 0;
+ }
+
+ /* Clamp the size if needed */
+ if ((netbuf->len + size) > sbuf->watermark) {
+ size = sbuf->watermark - netbuf->len;
+ }
+ if (size == 0) {
+ return -ENOBUFS;
+ }
+
+ /* Copy the new data */
+ tail = sbuf->tail;
+ memcpy(&netbuf->data[tail], buf, size);
+
+ sbuf->tail += size;
+ netbuf->len += size;
+ mutex_release(ksock->mtx);
+ return size;
+}
+
+/*
+ * Recv data from socket - POSIX recv(2) core
+ *
+ * @sockfd: File descriptor that backs this socket
+ * @buf: RX buffer
+ * @size: Size of the buffer
+ * @flags: Optional flags
+ *
+ * Returns length on success, otherwise a less
+ * than zero errno.
+ */
+ssize_t
+recv(int sockfd, void *buf, size_t len, int flags)
+{
+ struct ksocket *ksock;
+ struct sockbuf *sbuf;
+ struct netbuf *netbuf;
+ size_t head;
+ ssize_t retval = len;
+ int error;
+
+ /* Length cannot be zero */
+ if (len == 0) {
+ return -EINVAL;
+ }
+
+ if ((error = get_ksock(sockfd, &ksock)) < 0) {
+ return error;
+ }
+
+ sbuf = &ksock->buf;
+ netbuf = &sbuf->buf;
+ mutex_acquire(ksock->mtx, 0);
+
+ /* Is it empty? */
+ if (netbuf->len == 0) {
+ sbuf->head = 0;
+ sbuf->tail = 0;
+ retval = -EAGAIN;
+ goto done;
+ }
+
+ if (len > netbuf->len) {
+ len = netbuf->len;
+ }
+
+ head = sbuf->head;
+ memcpy(buf, &netbuf->data[head], len);
+ sbuf->head = (sbuf->head + len) % NETBUF_LEN;
+done:
+ mutex_release(ksock->mtx);
+ return retval;
+}
+
+/*
+ * POSIX socket(7) core
+ *
+ * @domain: Address family (see AF_*)
+ * @type: Socket type
+ * @protocol: Socket protocol
+ *
+ * Returns zero on success, otherwise a less
+ * than zero errno.
+ */
+int
+socket(int domain, int type, int protocol)
+{
+ struct ksocket *ksock = NULL;
+ struct sockbuf *sbuf = NULL;
+ struct proc *td = this_td();
+ int fd, error = -1;
+
+ ksock = dynalloc(sizeof(*ksock));
+ if (ksock == NULL) {
+ error = -ENOMEM;
+ goto fail;
+ }
+
+ memset(ksock, 0, sizeof(*ksock));
+ sbuf = &ksock->buf;
+ sbuf->head = 0;
+ sbuf->tail = 0;
+
+ switch (domain) {
+ case AF_UNIX:
+ {
+ struct sockaddr_un *un;
+
+ un = &ksock->un;
+ sbuf->watermark = NETBUF_LEN;
+
+ /* Set up a path and create a socket file */
+ un->sun_family = domain;
+ snprintf(un->sun_path, sizeof(un->sun_path), "/tmp/%d-sock0", td->pid);
+ fd = socket_mkfile(ksock, un);
+ }
+ return fd;
+ default:
+ error = -EINVAL;
+ break;
+ }
+
+fail:
+ if (ksock != NULL)
+ dynfree(ksock);
+
+ fd_close(fd);
+ return error;
+}
+
+/*
+ * Bind address to socket - POSIX bind(2) core
+ *
+ * @sockfd: File descriptor
+ * @addr: Address to bind
+ * @len: Sockaddr len
+ *
+ * Returns zero on success, otherwise a less
+ * than zero errno.
+ */
+int
+bind(int sockfd, const struct sockaddr *addr, socklen_t len)
+{
+ struct proc *td;
+ struct ksocket *ksock;
+ struct cmsg_list *clp;
+ int error;
+
+ if ((error = get_ksock(sockfd, &ksock)) < 0) {
+ kprintf("error=%d\n", error);
+ return error;
+ }
+
+ /* Create the new mutex lock */
+ ksock->mtx = mutex_new("ksocket");
+ if (ksock->mtx == NULL) {
+ return -ENOMEM;
+ }
+
+ /* Mark ourselves as the owner */
+ td = this_td();
+ ksock->owner = td;
+
+ /* Initialize the cmsg list queue */
+ clp = &ksock->cmsg_list;
+ TAILQ_INIT(&clp->list);
+ clp->is_init = 1;
+ return 0;
+}
+
+/*
+ * Set socket options - POSIX setsockopt(3) core
+ *
+ * @sockfd: File descriptor of socket
+ * @level: Protocol level
+ * @v: Options value
+ * @len: Length of data pointed to by 'v'
+ */
+int
+setsockopt(int sockfd, int level, int name, const void *v, socklen_t len)
+{
+ struct ksocket *ksock;
+ struct sockopt *opt;
+ size_t exp_len;
+ int error;
+
+ /* Must have a valid fd */
+ if (sockfd < 0) {
+ return -EBADF;
+ }
+
+ /* Ensure value and length are valid */
+ if (v == NULL || len == 0) {
+ return -EINVAL;
+ }
+
+ /* Verify the name */
+ if (name >= _SO_MAX) {
+ return -EINVAL;
+ }
+
+ /* Grab a new socket */
+ if ((error = get_ksock(sockfd, &ksock)) < 0) {
+ return error;
+ }
+
+ /* Clamp the input length as needed */
+ exp_len = sockopt_lentab[name];
+ if (len > exp_len) {
+ len = exp_len;
+ }
+
+ /*
+ * Here we will grab the socket options. If it is
+ * NULL, we'll need to allocate one.
+ */
+ if ((opt = ksock->opt[name]) == NULL) {
+ opt = dynalloc(sizeof(*opt) + len);
+
+ if (opt == NULL) {
+ return -ENOMEM;
+ }
+
+ opt->len = len;
+ ksock->opt[name] = opt;
+ }
+
+ memcpy(opt->data, v, len);
+ opt->len = len;
+ return 0;
+}
+
+/*
+ * Connect to a socket
+ *
+ * @sockfd: File descriptor to connect
+ * @addr: Address to connect to
+ * @len: Length of address
+ */
+int
+connect(int sockfd, const struct sockaddr *addr, socklen_t len)
+{
+ struct ksocket *ksock;
+ int error = -1;
+
+ if ((error = get_ksock(sockfd, &ksock)) < 0) {
+ return error;
+ }
+
+ switch (addr->sa_family) {
+ case AF_UNIX:
+ {
+ struct sockaddr_un *un;
+
+ un = (struct sockaddr_un *)addr;
+ if (un->sun_path[0] == '\0') {
+ pr_error("connect: bad socket path\n");
+ return -1;
+ }
+
+ /* Wait for the connection to be established */
+ do {
+ error = connect_domain(sockfd, ksock, un);
+ if (error != 0) {
+ sched_yield();
+ }
+ } while (error != 0);
+
+ return 0;
+ }
+ }
+
+ return -1;
+}
+
+/*
+ * Send socket control message - POSIX.1-2008
+ *
+ * @socket: Socket to transmit on
+ * @msg: Further arguments
+ * @flags: Optional flags
+ *
+ * Returns zero on success, otherwise a less
+ * than zero errno.
+ */
+ssize_t
+sendmsg(int socket, const struct msghdr *msg, int flags)
+{
+ struct ksocket *ksock;
+ struct cmsg *cmsg;
+ struct sockaddr_un *un;
+ struct cmsg_list *clp;
+ size_t control_len = 0;
+ int error;
+
+ if ((error = get_ksock(socket, &ksock)) < 0) {
+ return error;
+ }
+
+ /* We cannot do sendmsg() non domain sockets */
+ un = &ksock->un;
+ if (un->sun_family != AF_UNIX) {
+ return -EBADF;
+ }
+
+ control_len = MALIGN(msg->msg_controllen);
+
+ /* Allocate a new cmsg */
+ cmsg = dynalloc(control_len + sizeof(struct cmsg));
+ if (cmsg == NULL) {
+ return -EINVAL;
+ }
+
+ memcpy(cmsg->buf, msg->msg_control, control_len);
+ clp = &ksock->cmsg_list;
+ cmsg->control_len = control_len;
+ TAILQ_INSERT_TAIL(&clp->list, cmsg, link);
+ return 0;
+}
+
+/*
+ * Receive socket control message - POSIX.1‐2017
+ *
+ * @socket: Socket to receive on
+ * @msg: Further arguments
+ * @flags: Optional flags
+ *
+ * Returns zero on success, otherwise a less
+ * than zero errno.
+ */
+ssize_t
+recvmsg(int socket, struct msghdr *msg, int flags)
+{
+ struct ksocket *ksock;
+ struct sockaddr_un *un;
+ struct cmsg *cmsg, *tmp;
+ struct cmsghdr *cmsghdr;
+ struct cmsg_list *clp;
+ uint8_t *fds;
+ int error;
+
+ if (socket < 0) {
+ return -EINVAL;
+ }
+
+ /* Grab the socket descriptor */
+ if ((error = get_ksock(socket, &ksock)) < 0) {
+ return error;
+ }
+
+ /* Must be a unix domain socket */
+ un = &ksock->un;
+ if (un->sun_family != AF_UNIX) {
+ return -EBADF;
+ }
+
+ /* Grab the control message list */
+ clp = &ksock->cmsg_list;
+ cmsg = TAILQ_FIRST(&clp->list);
+
+ /* Empty? */
+ while (cmsg == NULL) {
+ sched_yield();
+ cmsg = TAILQ_FIRST(&clp->list);
+ }
+
+ while (cmsg != NULL) {
+ cmsghdr = &cmsg->hdr;
+
+ /* Check the control message type */
+ switch (cmsghdr->cmsg_type) {
+ case SCM_RIGHTS:
+ {
+ fds = (uint8_t *)CMSG_DATA(cmsghdr);
+ pr_trace("SCM_RIGHTS -> fd %d (from pid %d)\n", fds[0],
+ ksock->owner->pid);
+
+ break;
+ }
+ }
+
+ tmp = cmsg;
+ cmsg = TAILQ_NEXT(cmsg, link);
+
+ TAILQ_REMOVE(&clp->list, tmp, link);
+ dynfree(tmp);
+ }
+
+ return 0;
+}
+
+/*
+ * socket(7) syscall
+ *
+ * arg0: domain
+ * arg1: type
+ * arg2: protocol
+ */
+scret_t
+sys_socket(struct syscall_args *scargs)
+{
+ int domain = scargs->arg0;
+ int type = scargs->arg1;
+ int protocol = scargs->arg2;
+
+ return socket(domain, type, protocol);
+}
+
+/*
+ * bind(2) syscall
+ *
+ * arg0: sockfd
+ * arg1: addr
+ * arg2: len
+ */
+scret_t
+sys_bind(struct syscall_args *scargs)
+{
+ const struct sockaddr *u_addr = (void *)scargs->arg1;
+ struct sockaddr addr_copy;
+ int sockfd = scargs->arg0;
+ int len = scargs->arg2;
+ int error;
+
+ error = copyin(u_addr, &addr_copy, sizeof(addr_copy));
+ if (error < 0) {
+ return error;
+ }
+
+ return bind(sockfd, &addr_copy, len);
+}
+
+/*
+ * recv(2) syscall
+ *
+ * arg0: sockfd
+ * arg1: buf
+ * arg2: size
+ * arg3: flags
+ */
+scret_t
+sys_recv(struct syscall_args *scargs)
+{
+ char buf[NETBUF_LEN];
+ void *u_buf = (void *)scargs->arg1;
+ int sockfd = scargs->arg0;
+ size_t len = scargs->arg2;
+ int error, flags = scargs->arg3;
+
+ if (len > sizeof(buf)) {
+ return -ENOBUFS;
+ }
+
+ for (;;) {
+ error = recv(sockfd, buf, len, flags);
+ if (error <= 0 && error != -EAGAIN) {
+ break;
+ }
+
+ /*
+ * Wait for data to be ready on the socket.
+ * If a less than zero value is returned, don't
+ * handle timeouts.
+ */
+ error = socket_rx_wait(sockfd);
+ if (error < 0) {
+ continue;
+ }
+
+ /* Try one more time, obey timeout */
+ error = recv(sockfd, buf, len, flags);
+ if (error == -EAGAIN) {
+ return error;
+ }
+ break;
+ }
+
+ if (error < 0) {
+ pr_error("sys_recv: recv() fail (fd=%d)\n", sockfd);
+ return error;
+ }
+
+ error = copyout(buf, u_buf, len);
+ return (error == 0) ? len : error;
+}
+
+/*
+ * send(2) syscall
+ *
+ * arg0: sockfd
+ * arg1: buf
+ * arg2: size
+ * arg3: flags
+ */
+scret_t
+sys_send(struct syscall_args *scargs)
+{
+ char buf[NETBUF_LEN];
+ const void *u_buf = (void *)scargs->arg1;
+ int sockfd = scargs->arg0;
+ size_t len = scargs->arg2;
+ int error, flags = scargs->arg3;
+
+ if (len > sizeof(buf)) {
+ return -ENOBUFS;
+ }
+
+ error = copyin(u_buf, buf, len);
+ if (error < 0) {
+ pr_error("sys_send: copyin() failure (fd=%d)\n", sockfd);
+ return error;
+ }
+
+ return send(sockfd, buf, len, flags);
+}
+
+/*
+ * recvmsg(3) syscall
+ *
+ * arg0: socket
+ * arg1: msg
+ * arg2: flags
+ */
+scret_t
+sys_recvmsg(struct syscall_args *scargs)
+{
+ struct msghdr *u_msg = (void *)scargs->arg1;
+ void *u_control, *control = NULL;
+ size_t controllen;
+ struct iovec msg_iov;
+ struct msghdr msg;
+ ssize_t retval;
+ int socket = scargs->arg0;
+ int flags = scargs->arg2;
+ int error;
+
+ /* Read the message header */
+ error = copyin(u_msg, &msg, sizeof(msg));
+ if (error < 0) {
+ pr_error("sys_recvmsg: bad msg\n");
+ return error;
+ }
+
+ /* Grab the message I/O vector */
+ error = uio_copyin(msg.msg_iov, &msg_iov, msg.msg_iovlen);
+ if (error < 0) {
+ return error;
+ }
+
+ /* Save control fields */
+ u_control = msg.msg_control;
+ controllen = msg.msg_controllen;
+
+ /* Allocate a new control field to copy in */
+ control = dynalloc(controllen);
+ msg.msg_control = control;
+ if (msg.msg_control == NULL) {
+ uio_copyin_clean(&msg_iov, msg.msg_iovlen);
+ return -ENOMEM;
+ }
+
+ memset(msg.msg_control, 0, controllen);
+ error = copyin(u_control, msg.msg_control, controllen);
+ if (error < 0) {
+ retval = error;
+ goto done;
+ }
+
+ /*
+ * Now wait until we get a control
+ * message
+ */
+ msg.msg_iov = &msg_iov;
+ for (;;) {
+ retval = recvmsg(socket, &msg, flags);
+ if (retval == 0) {
+ break;
+ }
+
+ sched_yield();
+ }
+done:
+ uio_copyin_clean(&msg_iov, msg.msg_iovlen);
+ dynfree(control);
+ return retval;
+}
+
+/*
+ * sendmsg(3) syscall
+ *
+ * arg0: socket
+ * arg1: msg
+ * arg2: flags
+ */
+scret_t
+sys_sendmsg(struct syscall_args *scargs)
+{
+ struct iovec msg_iov;
+ struct msghdr *u_msg = (void *)scargs->arg1;
+ struct msghdr msg;
+ ssize_t retval;
+ int socket = scargs->arg0;
+ int flags = scargs->arg2;
+ int error;
+
+ /* Read the message header */
+ error = copyin(u_msg, &msg, sizeof(msg));
+ if (error < 0) {
+ pr_error("sys_sendmsg: bad msg\n");
+ return error;
+ }
+
+ /* Grab the message I/O vector */
+ error = uio_copyin(msg.msg_iov, &msg_iov, msg.msg_iovlen);
+ if (error < 0) {
+ return error;
+ }
+
+ msg.msg_name = NULL;
+ msg.msg_namelen = 0;
+ msg.msg_iov = &msg_iov;
+
+ for (;;) {
+ retval = sendmsg(socket, &msg, flags);
+ if (retval == 0) {
+ break;
+ }
+ sched_yield();
+ }
+ uio_copyin_clean(&msg_iov, msg.msg_iovlen);
+ return retval;
+}
+
+/*
+ * connect(3) syscall
+ *
+ * arg0: sockfd
+ * arg1: address
+ * arg2: len
+ */
+scret_t
+sys_connect(struct syscall_args *scargs)
+{
+ char buf[256];
+ struct sockaddr *u_addr = (void *)scargs->arg1;
+ struct sockaddr *sockaddr;
+ int error;
+ int sockfd = scargs->arg0;
+ socklen_t len = scargs->arg2;
+
+ if (len >= sizeof(buf)) {
+ pr_error("sys_connect: address too big\n");
+ return -E2BIG;
+ }
+
+ error = copyin(u_addr, buf, len);
+ if (error < 0) {
+ pr_error("sys_connect: bad 'address'\n");
+ return error;
+ }
+
+ sockaddr = (struct sockaddr *)buf;
+ return connect(sockfd, sockaddr, len);
+}
+
+/*
+ * POSIX setsockopt(3) syscall
+ *
+ * arg0: sockfd
+ * arg1: level
+ * arg2: name
+ * arg3: data
+ * arg4: len
+ */
+scret_t
+sys_setsockopt(struct syscall_args *scargs)
+{
+ int sockfd = scargs->arg0;
+ int level = scargs->arg1;
+ int name = scargs->arg2;
+ void *u_data = (void *)scargs->arg3;
+ socklen_t len = scargs->arg4;
+ void *data;
+ size_t exp_len;
+ int retval;
+
+ /* Verify that the name is correct */
+ if (name >= _SO_MAX) {
+ return -EINVAL;
+ }
+
+ /* Clamp length as needed */
+ exp_len = sockopt_lentab[name];
+ if (len > exp_len) {
+ len = exp_len;
+ }
+
+ data = dynalloc(len);
+ if (data == NULL) {
+ return -ENOMEM;
+ }
+
+ /* Grab data from userland */
+ retval = copyin(u_data, data, len);
+ if (retval < 0) {
+ dynfree(data);
+ return retval;
+ }
+
+ retval = setsockopt(sockfd, level, name, data, len);
+ dynfree(data);
+ return retval;
+}
+
+static struct vops socket_vops = {
+ .read = NULL,
+ .write = NULL,
+ .reclaim = socket_reclaim,
+};
diff --git a/sys/kern/kern_spawn.c b/sys/kern/kern_spawn.c
index 2cddd84..7962ced 100644
--- a/sys/kern/kern_spawn.c
+++ b/sys/kern/kern_spawn.c
@@ -27,6 +27,8 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
+#include <sys/spawn.h>
+#include <sys/wait.h>
#include <sys/proc.h>
#include <sys/exec.h>
#include <sys/mman.h>
@@ -34,7 +36,6 @@
#include <sys/errno.h>
#include <sys/syslog.h>
#include <sys/syscall.h>
-#include <sys/atomic.h>
#include <sys/signal.h>
#include <sys/limits.h>
#include <sys/sched.h>
@@ -44,10 +45,17 @@
#define pr_trace(fmt, ...) kprintf("spawn: " fmt, ##__VA_ARGS__)
#define pr_error(...) pr_trace(__VA_ARGS__)
-static volatile size_t nthreads = 0;
+#define ARGVP_MAX (ARG_MAX / sizeof(void *))
+static size_t next_pid = 1;
+
+/*
+ * TODO: envp
+ */
struct spawn_args {
char path[PATH_MAX];
+ char argv_blk[ARG_MAX];
+ char *argv[ARGVP_MAX];
};
static inline void
@@ -66,25 +74,55 @@ spawn_thunk(void)
struct proc *cur;
struct execve_args execve_args;
struct spawn_args *args;
- char *argv[] = { NULL, NULL };
char *envp[] = { NULL };
cur = this_td();
- args = cur->spawn_data;
+ args = cur->data;
path = args->path;
+ memset(pathbuf, 0, sizeof(pathbuf));
memcpy(pathbuf, path, strlen(path));
- argv[0] = (char *)pathbuf;
- execve_args.pathname = argv[0];
- execve_args.argv = argv;
+ execve_args.pathname = pathbuf;
+ execve_args.argv = (char **)&args->argv[0];
execve_args.envp = envp;
-
path = NULL;
- dynfree(args);
- execve(cur, &execve_args);
+
+ if (execve(cur, &execve_args) != 0) {
+ pr_error("execve failed, aborting\n");
+ exit1(this_td(), 0);
+ }
__builtin_unreachable();
}
+pid_t
+waitpid(pid_t pid, int *wstatus, int options)
+{
+ struct proc *child, *td;
+ pid_t ret;
+
+ td = this_td();
+ child = get_child(td, pid);
+
+ if (child == NULL) {
+ return -1;
+ }
+
+ /* Wait for it to be done */
+ while (!ISSET(child->flags, PROC_ZOMB)) {
+ sched_yield();
+ }
+
+
+ /* Give back the status */
+ if (wstatus != NULL) {
+ copyout(&child->exit_status, wstatus, sizeof(*wstatus));
+ }
+
+ ret = child->pid;
+ proc_reap(child);
+ return ret;
+}
+
/*
* Spawn a new process
*
@@ -104,8 +142,8 @@ pid_t
spawn(struct proc *cur, void(*func)(void), void *p, int flags, struct proc **newprocp)
{
struct proc *newproc;
- struct mmap_lgdr *mlgdr;
int error;
+ pid_t pid;
newproc = dynalloc(sizeof(*newproc));
if (newproc == NULL) {
@@ -114,19 +152,10 @@ spawn(struct proc *cur, void(*func)(void), void *p, int flags, struct proc **new
return -ENOMEM;
}
- mlgdr = dynalloc(sizeof(*mlgdr));
- if (mlgdr == NULL) {
- dynfree(newproc);
- try_free_data(p);
- pr_error("could not alloc proc mlgdr (-ENOMEM)\n");
- return -ENOMEM;
- }
-
memset(newproc, 0, sizeof(*newproc));
error = md_spawn(newproc, cur, (uintptr_t)func);
if (error < 0) {
dynfree(newproc);
- dynfree(mlgdr);
try_free_data(p);
pr_error("error initializing proc\n");
return error;
@@ -142,21 +171,19 @@ spawn(struct proc *cur, void(*func)(void), void *p, int flags, struct proc **new
cur->flags |= PROC_LEAFQ;
}
- /* Add to parent leafq */
- TAILQ_INSERT_TAIL(&cur->leafq, newproc, leaf_link);
- atomic_inc_int(&cur->nleaves);
- newproc->parent = cur;
- newproc->spawn_data = p;
-
- /* Initialize the mmap ledger */
- mlgdr->nbytes = 0;
- RBT_INIT(lgdr_entries, &mlgdr->hd);
- newproc->mlgdr = mlgdr;
+ error = proc_init(newproc, cur);
+ if (error < 0) {
+ dynfree(newproc);
+ try_free_data(p);
+ pr_error("error initializing proc\n");
+ return error;
+ }
- newproc->pid = ++nthreads;
- signals_init(newproc);
+ newproc->data = p;
+ newproc->pid = next_pid++;
sched_enqueue_td(newproc);
- return newproc->pid;
+ pid = newproc->pid;
+ return pid;
}
/*
@@ -173,6 +200,9 @@ get_child(struct proc *cur, pid_t pid)
struct proc *procp;
TAILQ_FOREACH(procp, &cur->leafq, leaf_link) {
+ if (procp == NULL) {
+ continue;
+ }
if (procp->pid == pid) {
return procp;
}
@@ -182,20 +212,48 @@ get_child(struct proc *cur, pid_t pid)
}
/*
+ * arg0: PID
+ * arg1: wstatus
+ * arg2: options
+ *
+ * Returns PID of terminated child, returns
+ * -1 on failure.
+ */
+scret_t
+sys_waitpid(struct syscall_args *scargs)
+{
+ pid_t pid;
+ int *u_wstatus;
+ int options;
+
+ pid = scargs->arg0;
+ u_wstatus = (void *)scargs->arg1;
+ options = scargs->arg2;
+ return waitpid(pid, u_wstatus, options);
+}
+
+/*
* arg0: The file /path/to/executable
- * arg1: Optional flags (`flags')
+ * arg1: Argv
+ * arg2: Envp (TODO)
+ * arg3: Optional flags (`flags')
*/
scret_t
sys_spawn(struct syscall_args *scargs)
{
struct spawn_args *args;
- const char *u_path;
+ char *path;
+ const char *u_path, **u_argv;
+ const char *u_p = NULL;
struct proc *td;
int flags, error;
+ size_t len, bytes_copied = 0;
+ size_t argv_i = 0;
td = this_td();
- flags = scargs->arg1;
u_path = (const char *)scargs->arg0;
+ u_argv = (const char **)scargs->arg1;
+ flags = scargs->arg3;
args = dynalloc(sizeof(*args));
if (args == NULL) {
@@ -208,5 +266,30 @@ sys_spawn(struct syscall_args *scargs)
return error;
}
+ memset(args->argv, 0, ARG_MAX);
+ for (size_t i = 0; i < ARG_MAX - 1; ++i) {
+ error = copyin(&u_argv[argv_i], &u_p, sizeof(u_p));
+ if (error < 0) {
+ dynfree(args);
+ return error;
+ }
+ if (u_p == NULL) {
+ args->argv[argv_i++] = NULL;
+ break;
+ }
+
+ path = &args->argv_blk[i];
+ error = copyinstr(u_p, path, ARG_MAX - bytes_copied);
+ if (error < 0) {
+ dynfree(args);
+ return error;
+ }
+
+ args->argv[argv_i++] = &args->argv_blk[i];
+ len = strlen(path);
+ bytes_copied += (len + 1);
+ i += len;
+ }
+
return spawn(td, spawn_thunk, args, flags, NULL);
}
diff --git a/sys/kern/kern_stub.c b/sys/kern/kern_stub.c
index 8603fd5..a9a56ac 100644
--- a/sys/kern/kern_stub.c
+++ b/sys/kern/kern_stub.c
@@ -40,8 +40,10 @@ sigfpe_default(int signo)
static struct proc *td;
td = this_td();
- kprintf("Floating point exception (pid=%d)\n", td->pid);
- exit1(td);
+ syslog_silence(false);
+ kprintf(OMIT_TIMESTAMP "Floating point exception (pid=%d)\n", td->pid);
+ syslog_silence(true);
+ exit1(td, 0);
}
void
@@ -50,8 +52,10 @@ sigkill_default(int signo)
static struct proc *td;
td = this_td();
- kprintf("Terminated (pid=%d)\n", td->pid);
- exit1(td);
+ syslog_silence(false);
+ kprintf(OMIT_TIMESTAMP "Terminated (pid=%d)\n", td->pid);
+ syslog_silence(true);
+ exit1(td, 0);
}
void
@@ -60,8 +64,22 @@ sigsegv_default(int signo)
static struct proc *td;
td = this_td();
- kprintf("Segmentation fault (pid=%d)\n", td->pid);
- exit1(td);
+ syslog_silence(false);
+ kprintf(OMIT_TIMESTAMP "Segmentation fault (pid=%d)\n", td->pid);
+ syslog_silence(true);
+ exit1(td, 0);
+}
+
+void
+sigterm_default(int signo)
+{
+ static struct proc *td;
+
+ td = this_td();
+ syslog_silence(false);
+ kprintf(OMIT_TIMESTAMP "Terminated (pid=%d)\n", td->pid);
+ syslog_silence(true);
+ exit1(td, 0);
}
int
@@ -75,3 +93,9 @@ dev_nowrite(void)
{
return -ENOTSUP;
}
+
+int
+dev_nobsize(void)
+{
+ return -ENOTSUP;
+}
diff --git a/sys/kern/kern_subr.c b/sys/kern/kern_subr.c
index f437ec7..8a08f33 100644
--- a/sys/kern/kern_subr.c
+++ b/sys/kern/kern_subr.c
@@ -29,9 +29,12 @@
#include <sys/proc.h>
#include <sys/types.h>
+#include <sys/param.h>
#include <sys/errno.h>
+#include <sys/mman.h>
#include <sys/exec.h>
#include <sys/systm.h>
+#include <vm/vm.h>
#include <string.h>
/*
@@ -45,6 +48,8 @@ static bool
check_uaddr(const void *uaddr)
{
vaddr_t stack_start, stack_end;
+ struct mmap_lgdr *lp;
+ struct mmap_entry find, *res;
struct exec_prog exec;
struct proc *td;
uintptr_t addr;
@@ -61,6 +66,22 @@ check_uaddr(const void *uaddr)
if (addr >= stack_start && addr <= stack_end)
return true;
+ /* Try to grab the mmap ledger */
+ if ((lp = td->mlgdr) == NULL) {
+ return false;
+ }
+
+ /*
+ * Now give an attempt at looking through the
+ * mmap ledger. Perhaps this memory was allocated
+ * in the user heap?
+ */
+ find.va_start = ALIGN_DOWN(addr, DEFAULT_PAGESIZE);
+ res = RBT_FIND(lgdr_entries, &lp->hd, &find);
+ if (res != NULL) {
+ return true;
+ }
+
return false;
}
diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c
index 57b27d0..7660f1f 100644
--- a/sys/kern/kern_synch.c
+++ b/sys/kern/kern_synch.c
@@ -28,19 +28,20 @@
*/
#include <sys/types.h>
+#include <sys/mutex.h>
#include <sys/systm.h>
#include <sys/errno.h>
+#include <sys/sched.h>
#include <sys/atomic.h>
#include <sys/syslog.h>
#include <sys/spinlock.h>
+#include <machine/cdefs.h>
#include <dev/timer.h>
+#include <string.h>
#define pr_trace(fmt, ...) kprintf("synch: " fmt, ##__VA_ARGS__)
#define pr_error(...) pr_trace(__VA_ARGS__)
-/* XXX: Be very careful with this */
-static struct spinlock __syslock;
-
/*
* Returns 0 on success, returns non-zero value
* on timeout/failure.
@@ -80,7 +81,10 @@ spinlock_usleep(struct spinlock *lock, size_t usec_max)
void
spinlock_acquire(struct spinlock *lock)
{
- while (__atomic_test_and_set(&lock->lock, __ATOMIC_ACQUIRE));
+ sched_preempt_set(false);
+ while (__atomic_test_and_set(&lock->lock, __ATOMIC_ACQUIRE)) {
+ md_pause();
+ }
}
/*
@@ -104,35 +108,66 @@ spinlock_try_acquire(struct spinlock *lock)
return 1;
}
- while (__atomic_test_and_set(&lock->lock, __ATOMIC_ACQUIRE));
- return 0;
+ return __atomic_test_and_set(&lock->lock, __ATOMIC_ACQUIRE);
}
void
spinlock_release(struct spinlock *lock)
{
__atomic_clear(&lock->lock, __ATOMIC_RELEASE);
+ sched_preempt_set(true);
}
/*
- * Attempt to hold the system-wide lock, returns 1
- * if already held.
- *
- * XXX: Only use for CRITICAL code sections.
+ * Create a new mutex lock object
*/
-int
-syslock(void)
+struct mutex *
+mutex_new(const char *name)
{
- return spinlock_try_acquire(&__syslock);
+ struct mutex *mtx;
+ size_t namelen;
+
+ mtx = dynalloc(sizeof(*mtx));
+ if (mtx == NULL) {
+ return NULL;
+ }
+
+ mtx->lock = 0;
+ namelen = strlen(name);
+
+ /* Don't overflow the name buffer */
+ if (namelen >= MUTEX_NAME_LEN) {
+ namelen = MUTEX_NAME_LEN - 1;
+ }
+
+ memcpy(mtx->name, name, namelen);
+ return mtx;
}
/*
- * Release the system-wide lock
+ * Acquire a mutex
*
- * XXX: Only use for CRITICAL code sections.
+ * @mtx: Mutex to acquire
+ * @flags: Optional flags
*/
+int
+mutex_acquire(struct mutex *mtx, int flags)
+{
+ while (__atomic_test_and_set(&mtx->lock, __ATOMIC_ACQUIRE)) {
+ sched_yield();
+ }
+
+ return 0;
+}
+
+void
+mutex_release(struct mutex *mtx)
+{
+ __atomic_clear(&mtx->lock, __ATOMIC_RELEASE);
+}
+
void
-sysrel(void)
+mutex_free(struct mutex *mtx)
{
- spinlock_release(&__syslock);
+ dynfree(mtx);
}
diff --git a/sys/kern/kern_syscall.c b/sys/kern/kern_syscall.c
index bc61fa6..c352b9c 100644
--- a/sys/kern/kern_syscall.c
+++ b/sys/kern/kern_syscall.c
@@ -29,9 +29,16 @@
#include <sys/syscall.h>
#include <sys/sysctl.h>
+#include <sys/socket.h>
+#include <sys/reboot.h>
#include <sys/types.h>
+#include <sys/ucred.h>
+#include <sys/disk.h>
+#include <sys/time.h>
+#include <sys/mman.h>
#include <sys/proc.h>
#include <sys/vfs.h>
+#include <sys/krq.h>
scret_t(*g_sctab[])(struct syscall_args *) = {
NULL, /* SYS_none */
@@ -43,6 +50,27 @@ scret_t(*g_sctab[])(struct syscall_args *) = {
sys_sysctl, /* SYS_sysctl */
sys_write, /* SYS_write */
sys_spawn, /* SYS_spawn */
+ sys_reboot, /* SYS_reboot */
+ sys_mmap, /* SYS_mmap */
+ sys_munmap, /* SYS_munap */
+ sys_access, /* SYS_access */
+ sys_lseek, /* SYS_lseek */
+ sys_sleep, /* SYS_sleep */
+ sys_inject, /* SYS_inject */
+ sys_getpid, /* SYS_getpid */
+ sys_getppid, /* SYS_getppid */
+ sys_setuid, /* SYS_setuid */
+ sys_getuid, /* SYS_getuid */
+ sys_waitpid, /* SYS_waitpid */
+ sys_socket, /* SYS_socket */
+ sys_bind, /* SYS_bind */
+ sys_recv, /* SYS_recv */
+ sys_send, /* SYS_send */
+ sys_sendmsg, /* SYS_sendmsg */
+ sys_recvmsg, /* SYS_recvmsg */
+ sys_connect, /* SYS_connect */
+ sys_setsockopt, /* SYS_setsockopt */
+ sys_disk, /* SYS_disk */
};
const size_t MAX_SYSCALLS = NELEM(g_sctab);
diff --git a/sys/kern/kern_sysctl.c b/sys/kern/kern_sysctl.c
index 7679aa1..1f5e578 100644
--- a/sys/kern/kern_sysctl.c
+++ b/sys/kern/kern_sysctl.c
@@ -33,12 +33,16 @@
#include <sys/errno.h>
#include <sys/systm.h>
#include <vm/dynalloc.h>
+#include <vm/vm.h>
#include <string.h>
#define HYRA_RELEASE "Hyra/" HYRA_ARCH " " \
HYRA_VERSION " " \
HYRA_BUILDDATE
+extern size_t g_nthreads;
+static uint32_t pagesize = DEFAULT_PAGESIZE;
+static char machine[] = HYRA_ARCH;
static char hyra[] = "Hyra";
static char hyra_version[] = HYRA_VERSION;
static char osrelease[] = HYRA_RELEASE;
@@ -49,10 +53,20 @@ static char osrelease[] = HYRA_RELEASE;
* allocated through dynalloc(9).
*/
static struct sysctl_entry common_optab[] = {
+ /* 'kern.*' */
[KERN_OSTYPE] = { KERN_OSTYPE, SYSCTL_OPTYPE_STR_RO, hyra },
[KERN_OSRELEASE] = { KERN_OSRELEASE, SYSCTL_OPTYPE_STR_RO, &osrelease },
[KERN_VERSION] = { KERN_VERSION, SYSCTL_OPTYPE_STR_RO, &hyra_version },
- [KERN_VCACHE_TYPE] = { KERN_VCACHE_TYPE, SYSCTL_OPTYPE_STR, NULL }
+ [KERN_VCACHE_TYPE] = { KERN_VCACHE_TYPE, SYSCTL_OPTYPE_STR, NULL },
+ [KERN_HOSTNAME] = { KERN_HOSTNAME, SYSCTL_OPTYPE_STR, NULL },
+
+ /* 'hw.*' */
+ [HW_PAGESIZE] = { HW_PAGESIZE, SYSCTL_OPTYPE_INT_RO, &pagesize },
+ [HW_NCPU] = { HW_NCPU, SYSCTL_OPTYPE_INT, NULL },
+ [HW_MACHINE] = {HW_MACHINE, SYSCTL_OPTYPE_STR_RO, &machine },
+
+ /* 'proc.*' */
+ [PROC_COUNT] = { PROC_COUNT, SYSCTL_OPTYPE_INT_RO, &g_nthreads }
};
static int
@@ -91,19 +105,18 @@ static int
do_sysctl(struct sysctl_args *args)
{
struct sysctl_args new_args;
- size_t name_len, oldlenp;
+ size_t name_len = 1, oldlenp = 0;
int *name = NULL;
void *oldp = NULL, *newp = NULL;
- int retval = 0;
-
- if (args->oldlenp == NULL) {
- return -EINVAL;
- }
-
- name_len = args->nlen;
- retval = copyin(args->oldlenp, &oldlenp, sizeof(oldlenp));
- if (retval != 0) {
- goto done;
+ int retval = 0, have_oldlen = 0;
+
+ if (args->oldlenp != NULL) {
+ have_oldlen = 1;
+ name_len = args->nlen;
+ retval = copyin(args->oldlenp, &oldlenp, sizeof(oldlenp));
+ if (retval != 0) {
+ goto done;
+ }
}
/* Copy in newp if it is set */
@@ -124,25 +137,30 @@ do_sysctl(struct sysctl_args *args)
return retval;
}
- oldp = dynalloc(oldlenp);
- retval = copyin(args->oldp, oldp, oldlenp);
- if (retval != 0) {
- return retval;
+ if (oldlenp != 0) {
+ oldp = dynalloc(oldlenp);
+ retval = copyin(args->oldp, oldp, oldlenp);
+ if (retval != 0) {
+ return retval;
+ }
}
/* Prepare the arguments for the sysctl call */
new_args.name = name;
new_args.nlen = name_len;
new_args.oldp = oldp;
- new_args.oldlenp = &oldlenp;
+ new_args.oldlenp = (have_oldlen) ? &oldlenp : NULL;
new_args.newp = newp;
+ new_args.newlen = args->newlen;
retval = sysctl(&new_args);
if (retval != 0) {
goto done;
}
- copyout(oldp, args->oldp, oldlenp);
+ if (oldlenp != 0) {
+ copyout(oldp, args->oldp, oldlenp);
+ }
done:
if (name != NULL)
dynfree(name);
@@ -154,6 +172,33 @@ done:
return retval;
}
+/*
+ * Clear a writable sysctl string variable to the
+ * value of "(undef)"
+ *
+ * @name: Name to clear
+ */
+int
+sysctl_clearstr(int name)
+{
+ struct sysctl_args args;
+ char val[] = "(undef)";
+ int error;
+
+ args.name = &name;
+ args.nlen = 1;
+ args.oldlenp = 0;
+ args.oldp = NULL;
+ args.newp = val;
+ args.newlen = sizeof(val);
+
+ if ((error = sysctl(&args)) != 0) {
+ return error;
+ }
+
+ return 0;
+}
+
int
sysctl(struct sysctl_args *args)
{
diff --git a/sys/kern/kern_syslog.c b/sys/kern/kern_syslog.c
index 10bf348..c7f51f7 100644
--- a/sys/kern/kern_syslog.c
+++ b/sys/kern/kern_syslog.c
@@ -28,9 +28,14 @@
*/
#include <sys/syslog.h>
+#include <sys/cdefs.h>
+#include <sys/sio.h>
#include <sys/spinlock.h>
+#include <sys/device.h>
+#include <sys/errno.h>
#include <dev/cons/cons.h>
#include <dev/timer.h>
+#include <fs/devfs.h>
#include <stdarg.h>
#include <string.h>
@@ -40,21 +45,105 @@
#define SERIAL_DEBUG 0
#endif
+#if defined(__USER_KMSG)
+#define USER_KMSG __USER_KMSG
+#else
+#define USER_KMSG 0
+#endif
+
+#define KBUF_SIZE (1 << 16)
+
+/* Sanity check */
+__static_assert(KBUF_SIZE <= (1 << 16), "KBUF_SIZE too high!");
+
/* Global logger lock */
-static struct spinlock lock = {0};
+static struct spinlock kmsg_lock = {0};
+static bool no_cons_log = false;
+
+/* Kernel message buffer */
+static char kmsg[KBUF_SIZE];
+static size_t kmsg_i = 0;
+static struct cdevsw kmsg_cdevw;
+
+static void
+kmsg_append(const char *s, size_t len)
+{
+ spinlock_acquire(&kmsg_lock);
+ if ((kmsg_i + len) >= KBUF_SIZE) {
+ kmsg_i = 0;
+ }
+
+ for (size_t i = 0; i < len; ++i) {
+ kmsg[kmsg_i + i] = s[i];
+ }
+ kmsg_i += len;
+ spinlock_release(&kmsg_lock);
+}
+
+/*
+ * Character device function.
+ */
+static int
+kmsg_read(dev_t dev, struct sio_txn *sio, int flags)
+{
+ size_t len, offset, j;
+ size_t bytes_read = 0;
+ char *p = sio->buf;
+
+ spinlock_acquire(&kmsg_lock);
+ len = sio->len;
+ offset = sio->offset;
+
+ if (len == 0) {
+ spinlock_release(&kmsg_lock);
+ return -EINVAL;
+ }
+ if (offset >= kmsg_i) {
+ spinlock_release(&kmsg_lock);
+ return 0;
+ }
+
+ for (size_t i = 0; i < len; ++i) {
+ j = offset + i;
+ if (j > kmsg_i) {
+ break;
+ }
+
+ p[i] = kmsg[j];
+ ++bytes_read;
+ }
+
+ spinlock_release(&kmsg_lock);
+ return bytes_read;
+}
static void
syslog_write(const char *s, size_t len)
{
- const char *p = s;
+ const char *p;
+ size_t l;
- while (len--) {
- cons_putch(&g_root_scr, *p);
- if (SERIAL_DEBUG) {
+ if (SERIAL_DEBUG) {
+ p = s;
+ l = len;
+ while (l--) {
serial_putc(*p);
+ ++p;
}
- ++p;
}
+
+ kmsg_append(s, len);
+
+ /*
+ * If the USER_KMSG option is disabled in kconf,
+ * do not log to the console if everything else
+ * has already started.
+ */
+ if (!USER_KMSG && no_cons_log) {
+ return;
+ }
+
+ cons_putstr(&g_root_scr, s, len);
}
/*
@@ -105,10 +194,42 @@ kprintf(const char *fmt, ...)
syslog_write(timestamp, strlen(timestamp));
}
- spinlock_acquire(&lock);
va_start(ap, fmt);
vkprintf(fmt_p, &ap);
va_end(ap);
- spinlock_release(&lock);
}
+
+/*
+ * Silence kernel messages in if the system
+ * is already operating in a user context.
+ *
+ * XXX: This is ignored if the kconf USER_KMSG
+ * option is set to "no". A kmsg device file
+ * is also created on the first call.
+ */
+void
+syslog_silence(bool option)
+{
+ static bool once = false;
+ static char devname[] = "kmsg";
+ devmajor_t major;
+ dev_t dev;
+
+ if (!once) {
+ once = true;
+ major = dev_alloc_major();
+ dev = dev_alloc(major);
+
+ dev_register(major, dev, &kmsg_cdevw);
+ devfs_create_entry(devname, major, dev, 0444);
+
+ }
+
+ no_cons_log = option;
+}
+
+static struct cdevsw kmsg_cdevw = {
+ .read = kmsg_read,
+ .write = nowrite
+};
diff --git a/sys/kern/kern_time.c b/sys/kern/kern_time.c
new file mode 100644
index 0000000..e741157
--- /dev/null
+++ b/sys/kern/kern_time.c
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2023-2025 Ian Marco Moffett and the Osmora Team.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of Hyra nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/syscall.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/cdefs.h>
+#include <dev/timer.h>
+#include <machine/cdefs.h>
+
+/*
+ * arg0: Timespec
+ * arg1: Remaining timeval
+ */
+scret_t
+sys_sleep(struct syscall_args *scargs)
+{
+ struct timespec ts;
+ struct timer tmr;
+ size_t timeout_msec;
+ tmrr_status_t status;
+ int error;
+
+ error = copyin((void *)scargs->arg0, &ts, sizeof(ts));
+ if (error < 0) {
+ return error;
+ }
+
+ if (ts.tv_nsec >= 1000000000) {
+ return -EINVAL;
+ }
+
+ status = req_timer(TIMER_GP, &tmr);
+ if (__unlikely(status != TMRR_SUCCESS)) {
+ return -ENOTSUP;
+ }
+ if (__unlikely(tmr.msleep == NULL)) {
+ return -ENOTSUP;
+ }
+
+ timeout_msec = ts.tv_nsec / 1000000;
+ timeout_msec += ts.tv_sec * 1000;
+ tmr.msleep(timeout_msec);
+ return 0;
+}
diff --git a/sys/kern/kern_uio.c b/sys/kern/kern_uio.c
new file mode 100644
index 0000000..2ec1532
--- /dev/null
+++ b/sys/kern/kern_uio.c
@@ -0,0 +1,272 @@
+/*
+ * Copyright (c) 2023-2025 Ian Marco Moffett and the Osmora Team.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of Hyra nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/limits.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <sys/filedesc.h>
+
+/*
+ * Clean up after a UIO copyin() operation
+ *
+ * @iov: iovec copy to clean up
+ * @iovcnt: Number of iovec entries
+ */
+void
+uio_copyin_clean(struct iovec *iov, int iovcnt)
+{
+ for (int i = 0; i < iovcnt; ++i) {
+ if (iov[i].iov_base == NULL) {
+ continue;
+ }
+
+ dynfree(iov[i].iov_base);
+ iov[i].iov_base = NULL;
+ }
+}
+
+/*
+ * Read data into POSIX.1‐2017 iovec
+ *
+ * @filedes: File descriptor number
+ * @iov: I/O vector to read file into
+ * @iovnt: Number of I/O vectors
+ */
+ssize_t
+readv(int filedes, const struct iovec *iov, int iovcnt)
+{
+ void *base;
+ size_t len;
+ ssize_t tmp, bytes_read = 0;
+
+ if (filedes < 0) {
+ return -EINVAL;
+ }
+
+ /*
+ * Make sure that this conforms to our max
+ * iovec limit.
+ */
+ if (iovcnt > IOVEC_MAX) {
+ return -EINVAL;
+ }
+
+ /*
+ * Go through each I/O vector and read a chunk
+ * of data into one.
+ */
+ for (int i = 0; i < iovcnt; ++i) {
+ base = iov[i].iov_base;
+ len = iov[i].iov_len;
+
+ /*
+ * If we encounter a base that is NULL,
+ * or if the length to read is an invalid
+ * value of zero. We can just assume this
+ * is some sort of weird list termination?
+ */
+ if (base == NULL || len == 0) {
+ break;
+ }
+
+ /* Read the file into this base */
+ tmp = fd_read(filedes, base, len);
+
+ /* Did anything go wrong? */
+ if (tmp < 0) {
+ return tmp;
+ }
+
+ /* No more data */
+ if (tmp == 0) {
+ break;
+ }
+
+ /* Read more bytes */
+ bytes_read += tmp;
+ }
+
+ return bytes_read;
+}
+
+/*
+ * Write data from POSIX.1‐2017 iovec
+ *
+ * @filedes: File descriptor number
+ * @iov: I/O vector to write to file
+ * @iovnt: Number of I/O vectors
+ */
+ssize_t
+writev(int filedes, const struct iovec *iov, int iovcnt)
+{
+ void *base;
+ size_t len;
+ ssize_t bytes_written = 0;
+ ssize_t tmp;
+
+ if (filedes < 0) {
+ return -EINVAL;
+ }
+
+ /*
+ * Are we within the limits? Return an
+ * error if not.
+ */
+ if (iovcnt > IOVEC_MAX) {
+ return -EINVAL;
+ }
+
+ for (int i = 0; i < iovcnt; ++i) {
+ base = iov[i].iov_base;
+ len = iov[i].iov_len;
+
+ /*
+ * These are invalid, whatever these are,
+ * terminate our walk through.
+ */
+ if (base == NULL || len == 0) {
+ break;
+ }
+
+ /* Write the data from the iovec */
+ tmp = fd_write(filedes, base, len);
+
+ /* Was there an error? */
+ if (tmp < 0) {
+ return tmp;
+ }
+
+ /* No more data to read? */
+ if (tmp == 0) {
+ break;
+ }
+
+ bytes_written += tmp;
+ }
+
+ return bytes_written;
+}
+
+/*
+ * Validate iovecs coming in from userland
+ * and copy it to a kernel buffer.
+ *
+ * XXX: A new buffer is allocated in k_iov[i]->iov_base
+ * and must be freed with dynfree() after use.
+ *
+ * @u_iov: Userspace source iovecs
+ * @k_iov: Kernel destination iovec
+ * @iovcnt: Number of iovecs to copy
+ */
+int
+uio_copyin(const struct iovec *u_iov, struct iovec *k_iov, int iovcnt)
+{
+ struct iovec *iov_dest;
+ const struct iovec *iov_src;
+ size_t len;
+ void *old_base;
+ int error;
+
+ if (u_iov == NULL || k_iov == NULL) {
+ return -EINVAL;
+ }
+
+ for (int i = 0; i < iovcnt; ++i) {
+ iov_dest = &k_iov[i];
+ iov_src = &u_iov[i];
+ error = copyin(iov_src, iov_dest, sizeof(*iov_dest));
+
+ if (error < 0) {
+ uio_copyin_clean(iov_dest, i + 1);
+ return error;
+ }
+
+ /*
+ * Save the old base so that we may copy the data to
+ * the new kernel buffer. First we'd need to allocate
+ * one of course.
+ */
+ old_base = iov_dest->iov_base;
+ len = iov_dest->iov_len;
+ iov_dest->iov_base = dynalloc(len);
+
+ /* Did it fail? */
+ if (iov_dest->iov_base == NULL) {
+ uio_copyin_clean(iov_dest, i + 1);
+ return -ENOMEM;
+ }
+
+ /* Copy actual data in */
+ error = copyin(old_base, iov_dest->iov_base, len);
+ if (error < 0) {
+ uio_copyin_clean(iov_dest, i + 1);
+ return error;
+ }
+ }
+
+ return 0;
+}
+
+
+/*
+ * Validate iovecs going out from kernel space (us)
+ * before actually sending it out.
+ *
+ * @k_iov: Kernel iovec to copyout
+ * @u_iov: Userspace destination
+ * @iovcnt: Number of iovecs
+ */
+int
+uio_copyout(const struct iovec *k_iov, struct iovec *u_iov, int iovcnt)
+{
+ struct iovec iov_shadow, *iov_dest;
+ const struct iovec *iov_src;
+ int error;
+
+ for (int i = 0; i < iovcnt; ++i) {
+ iov_dest = &u_iov[i];
+ iov_src = &k_iov[i];
+
+ /* Grab a shadow copy */
+ error = copyin(iov_src, &iov_shadow, sizeof(iov_shadow));
+ if (error < 0) {
+ return error;
+ }
+
+ /* Copy out actual data */
+ error = copyout(iov_src->iov_base, iov_dest->iov_base, iov_dest->iov_len);
+ if (error < 0) {
+ return error;
+ }
+ }
+
+ return 0;
+}
diff --git a/sys/kern/kern_vsr.c b/sys/kern/kern_vsr.c
new file mode 100644
index 0000000..c59be1e
--- /dev/null
+++ b/sys/kern/kern_vsr.c
@@ -0,0 +1,344 @@
+/*
+ * Copyright (c) 2023-2025 Ian Marco Moffett and the Osmora Team.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of Hyra nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/vsr.h>
+#include <sys/proc.h>
+#include <sys/param.h>
+#include <sys/limits.h>
+#include <sys/syslog.h>
+#include <vm/dynalloc.h>
+#include <string.h>
+
+#define pr_trace(fmt, ...) kprintf("vsr: " fmt, ##__VA_ARGS__)
+#define pr_error(...) pr_trace(__VA_ARGS__)
+
+static uint32_t
+fnv1_hash(const char *s)
+{
+ uint32_t hash = 2166136261UL;
+ const uint8_t *p = (uint8_t *)s;
+
+ while (*p != '\0') {
+ hash ^= *p;
+ hash = hash * 0x01000193;
+ ++p;
+ }
+
+ return hash;
+}
+
+/*
+ * Add a VSR capsule to a domain.
+ */
+static void
+vsr_domain_add(struct vsr_domain *vsp, struct vsr_capsule *cap)
+{
+ struct vsr_table *tab;
+ struct vsr_capsule **slot;
+ uint32_t hash;
+
+ if (vsp == NULL || cap == NULL) {
+ return;
+ }
+
+ if (cap->name == NULL) {
+ pr_error("vsr_domain_add: cap->name == NULL\n");
+ return;
+ }
+
+ tab = &vsp->table;
+ hash = fnv1_hash(cap->name);
+ slot = &tab->capsules[hash % VSR_MAX_CAPSULE];
+
+ /* If this slot is free, set it */
+ if (*slot == NULL) {
+ *slot = cap;
+ return;
+ }
+
+ /* Handle collision */
+ TAILQ_INSERT_TAIL(&(*slot)->buckets, cap, link);
+}
+
+/*
+ * Handle VSR domain hashmap collisions.
+ *
+ * @slot: Slot that we have collided with
+ * @name: Name to lookup
+ *
+ * Returns the pointer to the actual capsule if the
+ * collision has been resolved, otherwise, NULL if the
+ * entry to look up was not found.
+ */
+static struct vsr_capsule *
+vsr_domain_clash(struct vsr_capsule *slot, const char *name)
+{
+ struct vsr_capsule *cap_ent;
+
+ TAILQ_FOREACH(cap_ent, &slot->buckets, link) {
+ if (cap_ent == NULL) {
+ continue;
+ }
+
+ if (strcmp(cap_ent->name, name) == 0) {
+ return cap_ent;
+ }
+ }
+
+ return NULL;
+}
+
+/*
+ * Lookup a capsule within a VSR domain
+ * by name.
+ *
+ * @vsp: Domain to lookup within
+ * @name: Name to use as lookup key
+ *
+ * Returns NULL if no entry was found.
+ */
+static struct vsr_capsule *
+vfs_domain_lookup(struct vsr_domain *vsp, const char *name)
+{
+ uint32_t hash;
+ struct vsr_table *tab;
+ struct vsr_capsule **slot;
+
+ if (vsp == NULL || name == NULL) {
+ return NULL;
+ }
+
+ tab = &vsp->table;
+ hash = fnv1_hash(name);
+ slot = &tab->capsules[hash % VSR_MAX_CAPSULE];
+
+ if (*slot == NULL) {
+ return NULL;
+ }
+
+ if (strcmp((*slot)->name, name) != 0) {
+ return vsr_domain_clash(*slot, name);
+ }
+
+ return *slot;
+}
+
+/*
+ * Destroy a VSR capsule
+ *
+ * @capule: Capsule to destroy
+ */
+static void
+vsr_destroy_capsule(struct vsr_capsule *capsule)
+{
+ struct vsr_capsule *bucket;
+ struct capsule_ops *ops;
+
+ if (capsule->name != NULL) {
+ dynfree(capsule->name);
+ capsule->name = NULL;
+ }
+
+ ops = &capsule->ops;
+ if (ops->reclaim != NULL) {
+ ops->reclaim(capsule, 0);
+ }
+
+ TAILQ_FOREACH(bucket, &capsule->buckets, link) {
+ if (bucket == NULL) {
+ continue;
+ }
+ vsr_destroy_capsule(bucket);
+ }
+
+ /* Release any held locks */
+ mutex_release(&capsule->lock);
+}
+
+/*
+ * Destroy a VSR table
+ *
+ * @tab: Table to destroy.
+ */
+static void
+vsr_destroy_table(struct vsr_table *tab)
+{
+ struct vsr_capsule *capsule;
+
+ if (tab == NULL) {
+ pr_error("vsr_destroy_table: tab is NULL\n");
+ return;
+ }
+
+ for (int i = 0; i < VSR_MAX_CAPSULE; ++i) {
+ if ((capsule = tab->capsules[i]) == NULL) {
+ continue;
+ }
+
+ vsr_destroy_capsule(capsule);
+ }
+}
+
+/*
+ * Allocate a new VSR capsule and add it to
+ * VSR domain.
+ *
+ * @type: Domain type (e.g., VSR_FILE)
+ * @name: Capsule name (e.g., "mod0.data")
+ * @sz: Length of capsulized data
+ */
+struct vsr_capsule *
+vsr_new_capsule(struct proc *td, vsr_domain_t type, const char *name)
+{
+ struct vsr_capsule *capsule;
+ struct vsr_domain *domain;
+
+ /* Valid args? */
+ if (type >= VSR_MAX_DOMAIN || td == NULL) {
+ return NULL;
+ }
+
+ /*
+ * The VSR domain must be registered for
+ * us to add any capsules to it.
+ */
+ if ((domain = td->vsr_tab[type]) == NULL) {
+ pr_error("VSR domain %d not registered\n", type);
+ return NULL;
+ }
+
+ /* Allocate a new capsule */
+ capsule = dynalloc(sizeof(*capsule));
+ if (capsule == NULL) {
+ return NULL;
+ }
+
+ memset(capsule, 0, sizeof(*capsule));
+ capsule->name = strdup(name);
+
+ TAILQ_INIT(&capsule->buckets);
+ vsr_domain_add(domain, capsule);
+ return capsule;
+}
+
+/*
+ * Allocate a new VSR domain and add it to
+ * a specific process.
+ *
+ * @type: VSR type (e.g., VSR_FILE)
+ */
+struct vsr_domain *
+vsr_new_domain(struct proc *td, vsr_domain_t type)
+{
+ struct vsr_domain *domain;
+
+ /* Valid args? */
+ if (type >= VSR_MAX_DOMAIN || td == NULL) {
+ return NULL;
+ }
+
+ /*
+ * Do not overwrite the entry if it is
+ * already allocated and log this anomalous
+ * activity.
+ */
+ if (td->vsr_tab[type] != NULL) {
+ pr_error("[security]: type %d already allocated\n", type);
+ return NULL;
+ }
+
+ domain = dynalloc(sizeof(*domain));
+ if (domain == NULL) {
+ return NULL;
+ }
+
+ /* Initialize the domain */
+ memset(domain, 0, sizeof(*domain));
+ domain->type = type;
+
+ td->vsr_tab[type] = domain;
+ return domain;
+}
+
+/*
+ * Lookup a capsule by name for the current
+ * process.
+ */
+struct vsr_capsule *
+vsr_lookup_capsule(struct proc *td, vsr_domain_t type, const char *name)
+{
+ struct vsr_domain *domain;
+
+ if (td == NULL) {
+ return NULL;
+ }
+
+ /*
+ * The VSR domain must be registered for
+ * us to lookup any capsules from it.
+ */
+ if ((domain = td->vsr_tab[type]) == NULL) {
+ pr_error("VSR domain %d not registered\n", type);
+ return NULL;
+ }
+
+ return vfs_domain_lookup(domain, name);
+}
+
+/*
+ * Initialize per-process domains
+ */
+void
+vsr_init_domains(struct proc *td)
+{
+ if (vsr_new_domain(td, VSR_FILE) == NULL) {
+ pr_error("failed to initialize VSR file domain\n");
+ }
+}
+
+/*
+ * Destroy per-process domains
+ */
+void
+vsr_destroy_domains(struct proc *td)
+{
+ struct vsr_domain *domain;
+
+ if (td == NULL) {
+ return;
+ }
+
+ for (int i = 0; i < VSR_MAX_DOMAIN; ++i) {
+ if ((domain = td->vsr_tab[i]) == NULL) {
+ continue;
+ }
+
+ vsr_destroy_table(&domain->table);
+ }
+}
diff --git a/sys/kern/kern_work.c b/sys/kern/kern_work.c
new file mode 100644
index 0000000..918af89
--- /dev/null
+++ b/sys/kern/kern_work.c
@@ -0,0 +1,274 @@
+/*
+ * Copyright (c) 2023-2025 Ian Marco Moffett and the Osmora Team.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of Hyra nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/panic.h>
+#include <sys/proc.h>
+#include <sys/sched.h>
+#include <sys/syslog.h>
+#include <sys/workqueue.h>
+#include <vm/dynalloc.h>
+#include <string.h>
+
+#define pr_trace(fmt, ...) kprintf("workq: " fmt, ##__VA_ARGS__)
+#define pr_error(...) pr_trace(__VA_ARGS__)
+
+extern struct proc g_proc0;
+
+/*
+ * The workqueue cookie value that is used for
+ * verifying if a workqueue object is properly
+ * set up or not.
+ */
+#define WQ_COOKIE 0xFC0B
+
+/*
+ * A worker services work in the queue
+ * and there is one per workqueue.
+ */
+static void
+workqueue_worker(void)
+{
+ struct proc *td;
+ struct workqueue *wqp;
+ struct work *wp;
+
+ td = this_td();
+ if ((wqp = td->data) == NULL) {
+ panic("no workqueue in thread\n");
+ }
+
+ /*
+ * Weird things can happen, just be careful
+ * here...
+ */
+ if (wqp->cookie != WQ_COOKIE) {
+ panic("bad WQ_COOKIE in worker\n");
+ }
+
+ for (;;) {
+ mutex_acquire(wqp->lock, 0);
+ wp = TAILQ_FIRST(&wqp->work);
+
+ /* Try again later if empty */
+ if (wp == NULL) {
+ mutex_release(wqp->lock);
+ sched_yield();
+ continue;
+ }
+
+ wp->func(wqp, wp);
+ TAILQ_REMOVE(&wqp->work, wp, link);
+
+ /*
+ * Decrement the amount of work that is
+ * left to get done. Check for underflows
+ * which should not happen unless something
+ * clobbers the fields.
+ */
+ if ((--wqp->nwork) < 0) {
+ panic("wqp nwork underflow\n");
+ }
+
+ mutex_release(wqp->lock);
+ sched_yield();
+ }
+}
+
+/*
+ * Allocates a new work queue that may be used
+ * to hold queued up tasks.
+ *
+ * @name: Name to give the workqueue
+ * @max_work: Maximum number of jobs to be added
+ * @ipl: IPL that the work must operate in
+ *
+ * Returns a pointer to the new workqueue on success,
+ * otherwise a value of NULL is returned.
+ */
+struct workqueue *
+workqueue_new(const char *name, size_t max_work, int ipl)
+{
+ struct workqueue *wqp;
+ struct proc *td;
+
+ td = this_td();
+ if (__unlikely(td == NULL)) {
+ pr_error("no thread in workqueue_new()\n");
+ return NULL;
+ }
+
+ wqp = dynalloc(sizeof(*wqp));
+ if (wqp == NULL) {
+ return NULL;
+ }
+
+ wqp->name = strdup(name);
+ TAILQ_INIT(&wqp->work);
+ wqp->ipl = ipl;
+ wqp->max_work = max_work;
+ wqp->nwork = 0;
+ wqp->cookie = WQ_COOKIE;
+ wqp->lock = mutex_new(wqp->name);
+
+ /*
+ * We need to spawn the work thread which
+ * is behind the management of this specific
+ * workqueue. It typically does something like
+ * dequeuing at the head of the workqueue, performing
+ * the work, cleaning up as needed and dequeuing the
+ * next and waiting if there are none yet.
+ */
+ spawn(
+ &g_proc0, workqueue_worker,
+ wqp, 0,
+ &wqp->worktd
+ );
+
+ return wqp;
+}
+
+/*
+ * Enqueue a work item onto a specific
+ * workqueue.
+ *
+ * @wqp: Pointer to specific workqueue
+ * @name: Name to set for work unit
+ * @wp: Pointer to work that should be enqueued
+ *
+ * Returns zero on success, otherwise a less than
+ * zero value is returned.
+ */
+int
+workqueue_enq(struct workqueue *wqp, const char *name, struct work *wp)
+{
+ if (wqp == NULL || wp == NULL) {
+ return -EINVAL;
+ }
+
+ if (name == NULL) {
+ return -EINVAL;
+ }
+
+ /* Verify that we have a valid workqueue */
+ if (__unlikely(wqp->cookie != WQ_COOKIE)) {
+ panic("workq: bad cookie on work enqueue\n");
+ }
+
+ wp->name = strdup(name);
+ mutex_acquire(wqp->lock, 0);
+
+ /*
+ * If we have reached the max amount of jobs
+ * that we can enqueue here, just log it and
+ * bail.
+ */
+ if (wqp->nwork >= wqp->max_work) {
+ pr_error("max jobs reached for '%s'\n", wqp->name);
+ mutex_release(wqp->lock);
+ return -EAGAIN;
+ }
+
+ TAILQ_INSERT_TAIL(&wqp->work, wp, link);
+ ++wqp->nwork;
+ mutex_release(wqp->lock);
+ return 0;
+}
+
+/*
+ * Destroy a workqueue and free resources
+ * associated with it.
+ *
+ * @wqp: Pointer to workqueue to destroy
+ *
+ * Returns zero on success, otherwise a less
+ * than zero value is returned.
+ */
+int
+workqueue_destroy(struct workqueue *wqp)
+{
+ if (wqp == NULL) {
+ return -EINVAL;
+ }
+
+ /* Should not happen but just make sure */
+ if (__unlikely(wqp->cookie != WQ_COOKIE)) {
+ panic("workq: bad cookie on destroy\n");
+ }
+
+ /* Free the name if we have it */
+ if (wqp->name != NULL) {
+ dynfree(wqp->name);
+ }
+
+ if (wqp->lock != NULL) {
+ mutex_free(wqp->lock);
+ }
+
+ /* Brutally murder any workthreads */
+ if (wqp->worktd != NULL) {
+ exit1(wqp->worktd, 0);
+ wqp->worktd = NULL;
+ }
+
+ /*
+ * Zero before we free for security reasons, we
+ * don't really know what will be queued up but
+ * for certain things, it is best if we make it
+ * as if it never existed in the first place.
+ *
+ * XXX: There is no need to free the workqueue here as
+ * we had to pass it to spawn() to run the worker.
+ *
+ * During an exit, spawn() will free the thread data
+ * meaning this is already cleaned up.
+ */
+ memset(wqp, 0, sizeof(*wqp));
+ return 0;
+}
+
+/*
+ * Cleanup after work
+ *
+ * @wp: Work to clean up
+ */
+int
+work_destroy(struct work *wp)
+{
+ if (wp == NULL) {
+ return -EINVAL;
+ }
+
+ if (wp->name != NULL) {
+ dynfree(wp->name);
+ }
+
+ return 0;
+}
diff --git a/sys/kern/vfs_init.c b/sys/kern/vfs_init.c
index 8c1bc74..bc7f8b0 100644
--- a/sys/kern/vfs_init.c
+++ b/sys/kern/vfs_init.c
@@ -37,7 +37,8 @@ struct vnode *g_root_vnode = NULL;
static struct fs_info fs_list[] = {
{MOUNT_RAMFS, &g_initramfs_vfsops, 0, 0},
{MOUNT_DEVFS, &g_devfs_vfsops, 0, 0},
- {MOUNT_CTLFS, &g_ctlfs_vfsops, 0, 0}
+ {MOUNT_CTLFS, &g_ctlfs_vfsops, 0, 0},
+ {MOUNT_TMPFS, &g_tmpfs_vfsops, 0, 0}
};
void
diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c
index d04b812..7320102 100644
--- a/sys/kern/vfs_lookup.c
+++ b/sys/kern/vfs_lookup.c
@@ -29,6 +29,7 @@
#include <sys/namei.h>
#include <sys/vnode.h>
+#include <sys/param.h>
#include <sys/mount.h>
#include <sys/errno.h>
#include <vm/dynalloc.h>
@@ -118,20 +119,60 @@ vfs_get_fname_at(const char *path, size_t idx)
}
/*
+ * Count the number of components that exist within
+ * a path minus the delimiter as well as any redundant
+ * delimiters.
+ *
+ * @path: Path to count
+ */
+static uint8_t
+namei_num_cnp(const char *path)
+{
+ const char *p = path;
+ uint8_t count = 0;
+
+ while (*p != '\0') {
+ /* Skip redundant delimiters */
+ if (p[0] == '/' && p[1] == '/') {
+ ++p;
+ continue;
+ }
+
+ if (*p == '/') {
+ ++count;
+ }
+ ++p;
+ }
+
+ /* Don't count leading slash */
+ if (*(p - 1) == '/') {
+ --count;
+ }
+
+ return count;
+}
+
+/*
* Search for a path within a mountpoint.
*
* @mp: Mountpoint to search in.
* @path: Path to search for.
+ * @ndp: Namei data pointer
*/
static struct vnode *
-namei_mp_search(struct mount *mp, const char *path)
+namei_mp_search(struct mount *mp, const char *path, struct nameidata *ndp)
{
struct vop_lookup_args lookup_args;
struct vnode *vp = mp->vp;
+ uint8_t n_cnp = 0;
char *name;
int status;
- for (size_t i = 1;; ++i) {
+ n_cnp = namei_num_cnp(path);
+ if (ISSET(ndp->flags, NAMEI_WANTPARENT)) {
+ --n_cnp;
+ }
+ for (size_t i = 1; i < n_cnp; ++i) {
name = vfs_get_fname_at(path, i);
if (name == NULL)
break;
@@ -140,7 +181,7 @@ namei_mp_search(struct mount *mp, const char *path)
lookup_args.dirvp = vp;
lookup_args.vpp = &vp;
- status = vfs_vop_lookup(vp, &lookup_args);
+ status = vfs_vop_lookup(&lookup_args);
dynfree(name);
if (status != 0) {
@@ -193,7 +234,7 @@ namei(struct nameidata *ndp)
lookup_args.name = path;
lookup_args.dirvp = g_root_vnode;
lookup_args.vpp = &vp;
- status = vfs_vop_lookup(lookup_args.dirvp, &lookup_args);
+ status = vfs_vop_lookup(&lookup_args);
/* Did we find it in the root */
if (status == 0) {
@@ -212,7 +253,7 @@ namei(struct nameidata *ndp)
/* If the name matches, search within */
if (strcmp(mp->name, name) == 0)
- vp = namei_mp_search(mp, path);
+ vp = namei_mp_search(mp, path, ndp);
/* Did we find it at this mountpoint? */
if (vp != NULL) {
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index da0a4f9..69417d0 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -141,8 +141,9 @@ vfs_release_vnode(struct vnode *vp)
}
int
-vfs_vop_lookup(struct vnode *vp, struct vop_lookup_args *args)
+vfs_vop_lookup(struct vop_lookup_args *args)
{
+ const struct vnode *vp = args->dirvp;
const struct vops *vops = vp->vops;
if (vops == NULL)
@@ -180,8 +181,9 @@ vfs_vop_write(struct vnode *vp, struct sio_txn *sio)
}
int
-vfs_vop_getattr(struct vnode *vp, struct vop_getattr_args *args)
+vfs_vop_getattr(struct vop_getattr_args *args)
{
+ const struct vnode *vp = args->vp;
const struct vops *vops = vp->vops;
if (vops == NULL)
diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
index 990d722..d15ecf1 100644
--- a/sys/kern/vfs_syscalls.c
+++ b/sys/kern/vfs_syscalls.c
@@ -43,7 +43,7 @@ static int
vfs_dostat(const char *path, struct stat *sbuf)
{
char pathbuf[PATH_MAX];
- struct vattr *attr;
+ struct vattr attr;
struct stat st;
struct vnode *vp;
struct vop_getattr_args gattr;
@@ -58,7 +58,7 @@ vfs_dostat(const char *path, struct stat *sbuf)
return -EFAULT;
}
- nd.path = path;
+ nd.path = pathbuf;
nd.flags = 0;
if ((error = namei(&nd)) != 0) {
@@ -67,19 +67,42 @@ vfs_dostat(const char *path, struct stat *sbuf)
vp = nd.vp;
gattr.vp = vp;
- error = vfs_vop_getattr(vp, &gattr);
+ gattr.res = &attr;
+ error = vfs_vop_getattr(&gattr);
if (error != 0) {
return error;
}
- attr = gattr.res;
memset(&st, VNOVAL, sizeof(st));
/* Copy stat data to userspace statbuf */
- st.st_mode = attr->mode;
- st.st_size = attr->size;
+ st.st_mode = attr.mode;
+ st.st_size = attr.size;
copyout(&st, sbuf, sizeof(*sbuf));
+ vfs_release_vnode(vp);
+ return 0;
+}
+
+static int
+vfs_doaccess(const char *path)
+{
+ struct nameidata nd;
+ char pathbuf[PATH_MAX];
+ int error;
+
+ if ((copyinstr(path, pathbuf, sizeof(pathbuf))) < 0) {
+ return -EFAULT;
+ }
+
+ nd.path = pathbuf;
+ nd.flags = 0;
+
+ if ((error = namei(&nd)) != 0) {
+ return error;
+ }
+
+ vfs_release_vnode(nd.vp);
return 0;
}
@@ -149,3 +172,14 @@ sys_stat(struct syscall_args *scargs)
{
return vfs_dostat((const char *)scargs->arg0, (void *)scargs->arg1);
}
+
+/*
+ * Check if a file can be accessed.
+ *
+ * @arg0: path
+ */
+scret_t
+sys_access(struct syscall_args *scargs)
+{
+ return vfs_doaccess((const char *)scargs->arg0);
+}
diff --git a/sys/kern/vfs_vcache.c b/sys/kern/vfs_vcache.c
index 25e244c..6c08caf 100644
--- a/sys/kern/vfs_vcache.c
+++ b/sys/kern/vfs_vcache.c
@@ -161,7 +161,7 @@ vfs_vcache_migrate(int newtype)
args.oldp = NULL;
args.oldlenp = NULL;
args.newp = sysctl_val;
- args.newlen = strlen(sysctl_val);
+ args.newlen = strlen(sysctl_val) + 1;
if ((retval = sysctl(&args)) != 0) {
return retval;