14 files changed, 1090 insertions, 120 deletions
diff --git a/sys/arch/amd64/amd64/intr.c b/sys/arch/amd64/amd64/intr.c
index a545788..c44c88e 100644
--- a/sys/arch/amd64/amd64/intr.c
+++ b/sys/arch/amd64/amd64/intr.c
@@ -98,12 +98,12 @@ intr_register(const char *name, const struct intr_hand *ih)
      * Try to allocate an interrupt vector. An IPL is made up
      * of 4 bits so there can be 16 vectors per IPL.
      *
-     * XXX: Vector 0x20 is reserved for the Hyra scheduler,
-     *      vector 0x21 is reserved for the CPU halt IPI,
-     *      and vector 0x22 is reserved for TLB shootdowns.
+     * XXX: Vector 0x20 is reserved for the Hyra scheduler and
+     *      vectors 0x21 to 0x21 + N_IPIVEC are reserved for
+     *      inter-processor interrupts.
      */
     for (int i = vec; i < vec + 16; ++i) {
-        if (g_intrs[i] != NULL || i < 0x23) {
+        if (g_intrs[i] != NULL || i < 0x24) {
             continue;
         }
 
diff --git a/sys/arch/amd64/amd64/ipi.c b/sys/arch/amd64/amd64/ipi.c
new file mode 100644
index 0000000..ffa291f
--- /dev/null
+++ b/sys/arch/amd64/amd64/ipi.c
@@ -0,0 +1,365 @@
+/*
+ * Copyright (c) 2023-2025 Ian Marco Moffett and the Osmora Team.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of Hyra nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/syslog.h>
+#include <sys/param.h>
+#include <sys/panic.h>
+#include <sys/spinlock.h>
+#include <machine/cpu.h>
+#include <machine/idt.h>
+#include <machine/ipi.h>
+#include <machine/lapic.h>
+#include <string.h>
+
+void ipi_isr0(void);
+void ipi_isr1(void);
+void ipi_isr2(void);
+void ipi_isr3(void);
+
+void __ipi_handle_common(void);
+
+#define pr_trace(fmt, ...) kprintf("ipi: " fmt, ##__VA_ARGS__)
+#define pr_error(...) pr_trace(__VA_ARGS__)
+
+#define BASE_VECTOR 0x21
+#define COOKIE 0x7E0A
+
+/* For the global state of the subsystem */
+static uint32_t cookie = 0;
+
+/*
+ * The next vector that will be used for an IPI to
+ * be allocated. It starts at 0x21 because interrupt
+ * vector 0x20 is used for the Hyra scheduler and `N_IPIVEC'
+ * vectors up are reserved for inter-processor interrupts.
+ *
+ * XXX: This must not go beyond N_IPIVEC !!
+ */
+static uint8_t next_vec = BASE_VECTOR;
+static uint8_t vec_entries = 0;
+
+/*
+ * In order to get an index into the 'vectors' array,
+ * one can pass an `ipi_bitmap' bit index into the
+ * ipi_vector() function. The index into the `ipi`
+ * field within may be acquired with the ipi_index()
+ * function.
+ */
+static uint64_t ipi_bitmap = 0;
+static struct ipi_vector vectors[N_IPIVEC];
+static struct spinlock lock;
+
+/*
+ * Allocate a bit from the `ipi_bitmap' and
+ * return the index.
+ *
+ * Returns a less than zero value upon error.
+ */
+static ssize_t
+alloc_ipi_bit(void)
+{
+    const size_t MAX = sizeof(ipi_bitmap) * 8;
+    off_t i;
+
+    for (i = 0; i < MAX; ++i) {
+        if (!ISSET(ipi_bitmap, BIT(i))) {
+            ipi_bitmap |= BIT(i);
+            return i;
+        }
+    }
+
+    return -1;
+}
+
+/*
+ * Allocate an IPI that can be sent to other
+ * cores on the CPU. This is the core logic
+ * and contains *no* locks. One should be
+ * using the md_ipi_alloc() function instead.
+ *
+ * Returns the allocated IPI identifier on succes,
+ * otherwise a less than zero value is returned.
+ */
+static int
+__ipi_alloc(struct cpu_ipi **res)
+{
+    struct ipi_vector *vp;
+    struct cpu_ipi *ipip;
+    ssize_t bit;
+    uint8_t idx;
+
+    if (res == NULL) {
+        return -EINVAL;
+    }
+
+    if (next_vec >= BASE_VECTOR + N_IPIVEC) {
+        return -EAGAIN;
+    }
+
+    /*
+     * Attempt to allocate a bit index from
+     * the bitmap.
+     */
+    if ((bit = alloc_ipi_bit()) < 0) {
+        return -EAGAIN;
+    }
+
+    idx = ipi_vector(bit);
+    vp = &vectors[idx];
+
+    /* Initialize the vector if not already */
+    if (vp->cookie != COOKIE) {
+        vp->cookie = COOKIE;
+        vp->nipi = 0;
+        vp->vec = next_vec;
+        memset(vp->ipi, 0, sizeof(vp->ipi));
+    }
+
+    /*
+     * Just a sanity check here, the number of
+     * IPIs per vector should never exceed the
+     * maximum, and if it does, that gives us more
+     * than enough grounds to panic the system as
+     * it would not be wise to trust it.
+     */
+    if (__unlikely(vp->nipi >= IPI_PER_VEC)) {
+        panic("too many IPIs in vector %x\n", vp->vec);
+    }
+
+    idx = ipi_index(bit);
+    ipip = &vp->ipi[idx];
+
+    /* We are allocating, not clobbering */
+    if (ipip->cookie == COOKIE) {
+        panic("ipi table corruption\n");
+    }
+
+    if ((++vec_entries) >= IPI_PER_VEC) {
+        vec_entries = 0;
+        ++next_vec;
+    }
+
+    /* Set up the initial state */
+    ipip->cookie = COOKIE;
+    ipip->handler = NULL;
+    ipip->id = bit;
+    *res = ipip;
+    return bit;
+}
+
+/*
+ * Dispatch pending IPIs for the current
+ * processor.
+ *
+ * @vector: Backing interrupt vector
+ * @ci: Current processor
+ */
+static void
+ipi_dispatch_pending(struct ipi_vector *vec, struct cpu_info *ci)
+{
+    uint8_t bit_i;
+    uint8_t n_bit;
+    uint8_t index;
+    struct cpu_ipi *ipip = NULL;
+    ipi_pend_t pending;
+
+    if (vec == NULL || ci == NULL) {
+        return;
+    }
+
+    n_bit = sizeof(pending) * 8;
+    for (bit_i = 0; bit_i < n_bit; ++bit_i) {
+        index = ipi_vector(bit_i);
+        pending = ci->ipi_pending[index];
+
+        vec = &vectors[index];
+        index = ipi_index(bit_i);
+        ipip = &vec->ipi[index];
+
+        /* Is this pending or not? */
+        if (!ISSET(pending, BIT(bit_i))) {
+            continue;
+        }
+
+        /* Handle and mark as no longer pending */
+        ipip->handler(ipip);
+        ci->ipi_pending[vec->vec] &= ~BIT(bit_i);
+    }
+}
+
+/*
+ * Check an IPI pending bitmap for a
+ * vector and send IPIs as needed
+ *
+ * @ci: Target processor
+ * @pending: Pending IPIs
+ */
+static void
+ipi_send_vector(struct cpu_info *ci, ipi_pend_t pending)
+{
+    struct ipi_vector *vp;
+    struct cpu_ipi *ipip;
+    uint8_t n_bits = sizeof(pending) * 8;
+    uint8_t bit_i;
+    uint8_t vector, index;
+    uint32_t apic_id = 0;
+
+    if (ci != NULL) {
+        /*
+         * We are already dispatching IPIs, we don't
+         * want to find ourselves in interrupt hell.
+         */
+        if (ci->ipi_dispatch) {
+            return;
+        }
+
+        apic_id = ci->apicid;
+    }
+
+    ci->ipi_dispatch = 1;
+    for (bit_i = 0; bit_i < n_bits; ++bit_i) {
+        if (ISSET(pending, BIT(bit_i))) {
+            vector = ipi_vector(bit_i);
+            index = ipi_index(bit_i);
+
+            if (ci != NULL)
+                ci->ipi_id = bit_i;
+
+            vp = &vectors[vector];
+            ipip = &vp->ipi[index];
+
+            /* Ignore if cookie does match */
+            if (ipip->cookie != COOKIE)
+                continue;
+
+            /* Ignore if there is no handler */
+            if (ipip->handler == NULL)
+                continue;
+
+            /* Send that IPI through */
+            lapic_send_ipi(
+                apic_id,
+                IPI_SHORTHAND_NONE,
+                BASE_VECTOR + vector
+            );
+        }
+    }
+}
+
+/*
+ * Common IPI routine, called from vector.S
+ *
+ * XXX: Internal usage only
+ */
+void
+__ipi_handle_common(void)
+{
+    struct ipi_vector *vp;
+    struct cpu_info *ci = this_cpu();
+    uint8_t vector;
+
+    if (cookie != COOKIE) {
+        pr_trace("[warn]: got spurious ipi\n");
+        return;
+    }
+
+    /* Grab the vector */
+    vector = ipi_vector(ci->ipi_id);
+    vp = &vectors[vector];
+    if (vp->cookie != COOKIE) {
+        pr_error("got IPI for uninitialized vector\n");
+        return;
+    }
+
+    if ((ci = this_cpu()) == NULL) {
+        pr_error("could not get current CPU\n");
+        return;
+    }
+
+    ipi_dispatch_pending(vp, ci);
+
+    /* We are done dispatching IPIs */
+    ci->ipi_dispatch = 0;
+    ci->ipi_id = 0;
+}
+
+/*
+ * Send one or more IPIs to a specific
+ * processor after caller sets bits in
+ * the `ci->ipi_pending' field
+ *
+ * @ci: Processor to send IPI(s) to
+ */
+int
+md_ipi_send(struct cpu_info *ci)
+{
+    if (ci == NULL) {
+        return -EINVAL;
+    }
+
+    spinlock_acquire(&lock);
+    for (int i = 0; i < N_IPIVEC; ++i) {
+        ipi_send_vector(ci, ci->ipi_pending[i]);
+    }
+
+    spinlock_release(&lock);
+    return 0;
+}
+
+
+/*
+ * IPI allocation interface with
+ * locking.
+ */
+int
+md_ipi_alloc(struct cpu_ipi **res)
+{
+    int retval;
+
+    spinlock_acquire(&lock);
+    retval = __ipi_alloc(res);
+    spinlock_release(&lock);
+    return retval;
+}
+
+/*
+ * Initialize the IPI thunks
+ */
+void
+md_ipi_init(void)
+{
+    /* Initialize the IPI vectors */
+    idt_set_desc(0x21, IDT_INT_GATE, ISR(ipi_isr0), 0);
+    idt_set_desc(0x22, IDT_INT_GATE, ISR(ipi_isr1), 0);
+    idt_set_desc(0x23, IDT_INT_GATE, ISR(ipi_isr2), 0);
+    idt_set_desc(0x24, IDT_INT_GATE, ISR(ipi_isr3), 0);
+    cookie = COOKIE;
+}
diff --git a/sys/arch/amd64/amd64/machdep.c b/sys/arch/amd64/amd64/machdep.c
index 9ff96e1..3f8580a 100644
--- a/sys/arch/amd64/amd64/machdep.c
+++ b/sys/arch/amd64/amd64/machdep.c
@@ -42,6 +42,7 @@
 #include <machine/uart.h>
 #include <machine/sync.h>
 #include <machine/intr.h>
+#include <machine/ipi.h>
 #include <machine/cdefs.h>
 #include <machine/isa/i8042var.h>
 #include <dev/cons/cons.h>
@@ -61,9 +62,6 @@
         pr_trace(__VA_ARGS__);  \
     }
 
-#define HALT_VECTOR 0x21
-#define TLB_VECTOR  0x22
-
 #if defined(__SPECTRE_IBRS)
 #define SPECTRE_IBRS  __SPECTRE_IBRS
 #else
@@ -82,19 +80,20 @@ void syscall_isr(void);
 void pin_isr_load(void);
 
 struct cpu_info g_bsp_ci = {0};
+static struct cpu_ipi *halt_ipi;
+static struct cpu_ipi *tlb_ipi;
+static struct spinlock ipi_lock = {0};
 static bool bsp_init = false;
 
-__attribute__((__interrupt__))
-static void
-cpu_halt_isr(void *p)
+static int
+cpu_halt_handler(struct cpu_ipi *ipi)
 {
     __ASMV("cli; hlt");
     __builtin_unreachable();
 }
 
-__attribute__((__interrupt__))
-static void
-tlb_shootdown_isr(void *p)
+static int
+tlb_shootdown_handler(struct cpu_ipi *ipi)
 {
     struct cpu_info *ci;
     int ipl;
@@ -106,7 +105,7 @@ tlb_shootdown_isr(void *p)
      */
     ci = this_cpu();
     if (!ci->tlb_shootdown) {
-        return;
+        return -1;
     }
 
     ipl = splraise(IPL_HIGH);
@@ -115,6 +114,7 @@ tlb_shootdown_isr(void *p)
     ci->shootdown_va = 0;
     ci->tlb_shootdown = 0;
     splx(ipl);
+    return 0;
 }
 
 static void
@@ -141,8 +141,6 @@ setup_vectors(struct cpu_info *ci)
     idt_set_desc(0xD, IDT_TRAP_GATE, ISR(general_prot), 0);
     idt_set_desc(0xE, IDT_TRAP_GATE, ISR(page_fault), 0);
     idt_set_desc(0x80, IDT_USER_INT_GATE, ISR(syscall_isr), IST_SYSCALL);
-    idt_set_desc(HALT_VECTOR, IDT_INT_GATE, ISR(cpu_halt_isr), 0);
-    idt_set_desc(TLB_VECTOR, IDT_INT_GATE, ISR(tlb_shootdown_isr), 0);
     pin_isr_load();
 }
 
@@ -202,6 +200,44 @@ enable_simd(void)
 }
 
 static void
+init_ipis(void)
+{
+    int error;
+
+    if (bsp_init) {
+        return;
+    }
+
+    spinlock_acquire(&ipi_lock);
+    error = md_ipi_alloc(&halt_ipi);
+    if (error < 0) {
+        pr_error("md_ipi_alloc: returned %d\n", error);
+        panic("failed to init halt IPI\n");
+    }
+
+    halt_ipi->handler = cpu_halt_handler;
+    error = md_ipi_alloc(&tlb_ipi);
+    if (error < 0) {
+        pr_error("md_ipi_alloc: returned %d\n", error);
+        panic("failed to init TLB IPI\n");
+    }
+
+    tlb_ipi->handler = tlb_shootdown_handler;
+
+    /*
+     * Some IPIs must have very specific IDs
+     * so that they are standard and usable
+     * throughout the rest of the sytem.
+     */
+    if (halt_ipi->id != IPI_HALT)
+        panic("expected IPI_HALT for halt IPI\n");
+    if (tlb_ipi->id != IPI_TLB)
+        panic("expected IPI_TLB for TLB IPI\n");
+
+    spinlock_release(&ipi_lock);
+}
+
+static void
 cpu_get_info(struct cpu_info *ci)
 {
     uint32_t eax, ebx, unused;
@@ -257,7 +293,7 @@ cpu_shootdown_tlb(vaddr_t va)
         spinlock_acquire(&cip->lock);
         cip->shootdown_va = va;
         cip->tlb_shootdown = 1;
-        lapic_send_ipi(cip->apicid, IPI_SHORTHAND_NONE, TLB_VECTOR);
+        cpu_ipi_send(cip, IPI_TLB);
         spinlock_release(&cip->lock);
     }
 }
@@ -309,6 +345,9 @@ md_backtrace(void)
 void
 cpu_halt_all(void)
 {
+    struct cpu_info *ci;
+    uint32_t ncpu;
+
     /*
      * If we have no current 'cpu_info' structure set,
      * we can't send IPIs, so just assume only the current
@@ -319,8 +358,15 @@ cpu_halt_all(void)
         __ASMV("cli; hlt");
     }
 
-    /* Send IPI to all cores */
-    lapic_send_ipi(0, IPI_SHORTHAND_ALL, HALT_VECTOR);
+    for (int i = 0; i < ncpu; ++i) {
+        ci = cpu_get(i);
+        if (ci == NULL) {
+            continue;
+        }
+
+        cpu_ipi_send(ci, IPI_HALT);
+    }
+
     for (;;);
 }
 
@@ -331,12 +377,24 @@ cpu_halt_all(void)
 void
 cpu_halt_others(void)
 {
+    struct cpu_info *curcpu, *ci;
+    uint32_t ncpu;
+
     if (rdmsr(IA32_GS_BASE) == 0) {
         __ASMV("cli; hlt");
     }
 
-    /* Send IPI to all cores */
-    lapic_send_ipi(0, IPI_SHORTHAND_OTHERS, HALT_VECTOR);
+    curcpu = this_cpu();
+    ncpu = cpu_count();
+
+    for (int i = 0; i < ncpu; ++i) {
+        if ((ci = cpu_get(i)) == NULL)
+            continue;
+        if (ci->id == curcpu->id)
+            continue;
+
+        cpu_ipi_send(ci, IPI_HALT);
+    }
 }
 
 void
@@ -441,7 +499,10 @@ cpu_startup(struct cpu_info *ci)
 
     wrmsr(IA32_GS_BASE, (uintptr_t)ci);
     init_tss(ci);
+
     setup_vectors(ci);
+    md_ipi_init();
+    init_ipis();
 
     try_mitigate_spectre();
     ci->online = 1;
diff --git a/sys/arch/amd64/amd64/vector.S b/sys/arch/amd64/amd64/vector.S
index 890b314..19c68d5 100644
--- a/sys/arch/amd64/amd64/vector.S
+++ b/sys/arch/amd64/amd64/vector.S
@@ -73,72 +73,101 @@ done:
 
     .globl pin_isr_load
 pin_isr_load:
-    IDT_SET_VEC 35, ioapic_edge_0
-    IDT_SET_VEC 36, ioapic_edge_1
-    IDT_SET_VEC 37, ioapic_edge_2
-    IDT_SET_VEC 38, ioapic_edge_3
-    IDT_SET_VEC 39, ioapic_edge_4
-    IDT_SET_VEC 40, ioapic_edge_5
-    IDT_SET_VEC 41, ioapic_edge_6
-    IDT_SET_VEC 42, ioapic_edge_7
-    IDT_SET_VEC 43, ioapic_edge_8
-    IDT_SET_VEC 44, ioapic_edge_9
-    IDT_SET_VEC 45, ioapic_edge_10
-    IDT_SET_VEC 46, ioapic_edge_11
-    IDT_SET_VEC 47, ioapic_edge_12
-    IDT_SET_VEC 48, ioapic_edge_13
-    IDT_SET_VEC 49, ioapic_edge_14
-    IDT_SET_VEC 50, ioapic_edge_15
-    IDT_SET_VEC 51, ioapic_edge_16
-    IDT_SET_VEC 52, ioapic_edge_17
-    IDT_SET_VEC 53, ioapic_edge_18
-    IDT_SET_VEC 54, ioapic_edge_19
-    IDT_SET_VEC 55, ioapic_edge_20
-    IDT_SET_VEC 56, ioapic_edge_21
-    IDT_SET_VEC 57, ioapic_edge_22
-    IDT_SET_VEC 58, ioapic_edge_23
-    IDT_SET_VEC 59, ioapic_edge_24
-    IDT_SET_VEC 60, ioapic_edge_25
-    IDT_SET_VEC 61, ioapic_edge_26
-    IDT_SET_VEC 62, ioapic_edge_27
-    IDT_SET_VEC 63, ioapic_edge_28
-    IDT_SET_VEC 64, ioapic_edge_29
-    IDT_SET_VEC 65, ioapic_edge_30
-    IDT_SET_VEC 66, ioapic_edge_31
-    IDT_SET_VEC 67, ioapic_edge_32
-    IDT_SET_VEC 68, ioapic_edge_33
-    IDT_SET_VEC 69, ioapic_edge_34
-    IDT_SET_VEC 70, ioapic_edge_35
-    IDT_SET_VEC 71, ioapic_edge_36
-    IDT_SET_VEC 72, ioapic_edge_37
-    IDT_SET_VEC 73, ioapic_edge_38
-    IDT_SET_VEC 74, ioapic_edge_39
-    IDT_SET_VEC 75, ioapic_edge_40
-    IDT_SET_VEC 76, ioapic_edge_41
-    IDT_SET_VEC 77, ioapic_edge_42
-    IDT_SET_VEC 78, ioapic_edge_43
-    IDT_SET_VEC 79, ioapic_edge_44
-    IDT_SET_VEC 80, ioapic_edge_45
-    IDT_SET_VEC 81, ioapic_edge_46
-    IDT_SET_VEC 82, ioapic_edge_47
-    IDT_SET_VEC 83, ioapic_edge_48
-    IDT_SET_VEC 84, ioapic_edge_49
-    IDT_SET_VEC 85, ioapic_edge_50
-    IDT_SET_VEC 86, ioapic_edge_51
-    IDT_SET_VEC 87, ioapic_edge_52
-    IDT_SET_VEC 88, ioapic_edge_53
-    IDT_SET_VEC 89, ioapic_edge_54
-    IDT_SET_VEC 90, ioapic_edge_55
-    IDT_SET_VEC 91, ioapic_edge_56
-    IDT_SET_VEC 92, ioapic_edge_57
-    IDT_SET_VEC 93, ioapic_edge_58
-    IDT_SET_VEC 94, ioapic_edge_59
-    IDT_SET_VEC 95, ioapic_edge_60
-    IDT_SET_VEC 96, ioapic_edge_61
-    IDT_SET_VEC 97, ioapic_edge_62
-    IDT_SET_VEC 97, ioapic_edge_63
+    IDT_SET_VEC 37, ioapic_edge_0
+    IDT_SET_VEC 38, ioapic_edge_1
+    IDT_SET_VEC 39, ioapic_edge_2
+    IDT_SET_VEC 40, ioapic_edge_3
+    IDT_SET_VEC 41, ioapic_edge_4
+    IDT_SET_VEC 42, ioapic_edge_5
+    IDT_SET_VEC 43, ioapic_edge_6
+    IDT_SET_VEC 44, ioapic_edge_7
+    IDT_SET_VEC 45, ioapic_edge_8
+    IDT_SET_VEC 46, ioapic_edge_9
+    IDT_SET_VEC 47, ioapic_edge_10
+    IDT_SET_VEC 48, ioapic_edge_11
+    IDT_SET_VEC 49, ioapic_edge_12
+    IDT_SET_VEC 50, ioapic_edge_13
+    IDT_SET_VEC 51, ioapic_edge_14
+    IDT_SET_VEC 52, ioapic_edge_15
+    IDT_SET_VEC 53, ioapic_edge_16
+    IDT_SET_VEC 54, ioapic_edge_17
+    IDT_SET_VEC 55, ioapic_edge_18
+    IDT_SET_VEC 56, ioapic_edge_19
+    IDT_SET_VEC 57, ioapic_edge_20
+    IDT_SET_VEC 58, ioapic_edge_21
+    IDT_SET_VEC 59, ioapic_edge_22
+    IDT_SET_VEC 60, ioapic_edge_23
+    IDT_SET_VEC 61, ioapic_edge_24
+    IDT_SET_VEC 62, ioapic_edge_25
+    IDT_SET_VEC 63, ioapic_edge_26
+    IDT_SET_VEC 64, ioapic_edge_27
+    IDT_SET_VEC 65, ioapic_edge_28
+    IDT_SET_VEC 66, ioapic_edge_29
+    IDT_SET_VEC 67, ioapic_edge_30
+    IDT_SET_VEC 68, ioapic_edge_31
+    IDT_SET_VEC 69, ioapic_edge_32
+    IDT_SET_VEC 70, ioapic_edge_33
+    IDT_SET_VEC 71, ioapic_edge_34
+    IDT_SET_VEC 72, ioapic_edge_35
+    IDT_SET_VEC 73, ioapic_edge_36
+    IDT_SET_VEC 74, ioapic_edge_37
+    IDT_SET_VEC 75, ioapic_edge_38
+    IDT_SET_VEC 76, ioapic_edge_39
+    IDT_SET_VEC 77, ioapic_edge_40
+    IDT_SET_VEC 78, ioapic_edge_41
+    IDT_SET_VEC 79, ioapic_edge_42
+    IDT_SET_VEC 80, ioapic_edge_43
+    IDT_SET_VEC 81, ioapic_edge_44
+    IDT_SET_VEC 82, ioapic_edge_45
+    IDT_SET_VEC 83, ioapic_edge_46
+    IDT_SET_VEC 84, ioapic_edge_47
+    IDT_SET_VEC 85, ioapic_edge_48
+    IDT_SET_VEC 86, ioapic_edge_49
+    IDT_SET_VEC 87, ioapic_edge_50
+    IDT_SET_VEC 88, ioapic_edge_51
+    IDT_SET_VEC 89, ioapic_edge_52
+    IDT_SET_VEC 90, ioapic_edge_53
+    IDT_SET_VEC 91, ioapic_edge_54
+    IDT_SET_VEC 92, ioapic_edge_55
+    IDT_SET_VEC 93, ioapic_edge_56
+    IDT_SET_VEC 94, ioapic_edge_57
+    IDT_SET_VEC 95, ioapic_edge_58
+    IDT_SET_VEC 96, ioapic_edge_59
+    IDT_SET_VEC 97, ioapic_edge_60
+    IDT_SET_VEC 98, ioapic_edge_61
+    IDT_SET_VEC 99, ioapic_edge_62
+    IDT_SET_VEC 100, ioapic_edge_63
     ret
 
+    .globl ipi_isr0
+INTRENTRY(ipi_isr0, ipi_trampoline)
+    call ipi_trampoline
+    retq
+
+    .globl ipi_isr1
+INTRENTRY(ipi_isr1, ipi_trampoline)
+    call ipi_trampoline
+    retq
+
+    .globl ipi_isr2
+INTRENTRY(ipi_isr2, ipi_trampoline)
+    call ipi_trampoline
+    retq
+
+    .globl ipi_isr3
+INTRENTRY(ipi_isr3, ipi_trampoline)
+    call ipi_trampoline
+    retq
+
+/*
+ * Hyra supports 16 IPI handlers per 4 reserved
+ * IDT vectors. That allows for a maximum of
+ * 64 IPIs.
+ */
+ipi_trampoline:
+    call __ipi_handle_common
+    retq
+
 /* I/O APIC edge ISRs */
 INTRENTRY(ioapic_edge_0, ioapic_common_func)
 INTRENTRY(ioapic_edge_1, ioapic_common_func)
diff --git a/sys/dev/acpi/uacpi.c b/sys/dev/acpi/uacpi.c
index ffec436..6c2bf50 100644
--- a/sys/dev/acpi/uacpi.c
+++ b/sys/dev/acpi/uacpi.c
@@ -55,6 +55,9 @@
 #include <vm/vm.h>
 #include <string.h>
 
+#define pr_trace(fmt, ...) kprintf("acpi: " fmt, ##__VA_ARGS__)
+#define pr_error(...) pr_trace(__VA_ARGS__)
+
 typedef struct {
     uacpi_io_addr base;
     uacpi_size length;
@@ -633,25 +636,25 @@ uacpi_init(void)
 
     ret = uacpi_initialize(0);
     if (uacpi_unlikely_error(ret)) {
-        kprintf("uacpi init error: %s\n", uacpi_status_to_string(ret));
+        pr_error("uacpi init error: %s\n", uacpi_status_to_string(ret));
         return -1;
     }
 
     ret = uacpi_namespace_load();
     if (uacpi_unlikely_error(ret)) {
-        kprintf("uacpi namespace load error: %s\n", uacpi_status_to_string(ret));
+        pr_error("uacpi namespace load error: %s\n", uacpi_status_to_string(ret));
         return -1;
     }
 
     ret = uacpi_namespace_initialize();
     if (uacpi_unlikely_error(ret)) {
-        kprintf("uacpi namespace init error: %s\n", uacpi_status_to_string(ret));
+        pr_error("uacpi namespace init error: %s\n", uacpi_status_to_string(ret));
         return -1;
     }
 
     ret = uacpi_finalize_gpe_initialization();
     if (uacpi_unlikely_error(ret)) {
-        kprintf("uacpi GPE init error: %s\n", uacpi_status_to_string(ret));
+        pr_error("uacpi GPE init error: %s\n", uacpi_status_to_string(ret));
         return -1;
     }
 
@@ -661,7 +664,7 @@ uacpi_init(void)
     );
 
     if (uacpi_unlikely_error(ret)) {
-        kprintf("failed to install power button event: %s\n",
+        pr_error("failed to install power button event: %s\n",
             uacpi_status_to_string(ret)
         );
         return -1;
diff --git a/sys/include/arch/amd64/cpu.h b/sys/include/arch/amd64/cpu.h
index 116661b..4586163 100644
--- a/sys/include/arch/amd64/cpu.h
+++ b/sys/include/arch/amd64/cpu.h
@@ -37,6 +37,7 @@
 #include <sys/spinlock.h>
 #include <machine/tss.h>
 #include <machine/cdefs.h>
+#include <machine/intr.h>
 
 #define CPU_IRQ(IRQ_N) (BIT((IRQ_N)) & 0xFF)
 
@@ -44,9 +45,14 @@
 #define CPU_FEAT_SMAP  BIT(0)
 #define CPU_FEAT_SMEP  BIT(1)
 
+typedef uint16_t ipi_pend_t;
+
 struct cpu_info {
     uint32_t apicid;
     uint32_t feat;
+    uint8_t ipi_dispatch : 1;   /* 1: IPIs being dispatched */
+    uint8_t ipi_id;
+    ipi_pend_t ipi_pending[N_IPIVEC];
     uint8_t id;                 /* MI Logical ID */
     uint8_t model : 4;          /* CPU model number */
     uint8_t family : 4;         /* CPU family ID */
diff --git a/sys/include/arch/amd64/intr.h b/sys/include/arch/amd64/intr.h
index 3870f18..6d9bb09 100644
--- a/sys/include/arch/amd64/intr.h
+++ b/sys/include/arch/amd64/intr.h
@@ -48,6 +48,9 @@
 #define IPL_CLOCK   2   /* Clock */
 #define IPL_HIGH    3   /* Defer everything */
 
+#define N_IPIVEC 4      /* Number of vectors reserved for IPIs */
+#define IPI_PER_VEC 16  /* Max IPIs per vector */
+
 struct intr_hand;
 
 /*
diff --git a/sys/include/arch/amd64/ipi.h b/sys/include/arch/amd64/ipi.h
new file mode 100644
index 0000000..1a3b51c
--- /dev/null
+++ b/sys/include/arch/amd64/ipi.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2023-2025 Ian Marco Moffett and the Osmora Team.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of Hyra nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _MACHINE_IPI_H_
+#define _MACHINE_IPI_H_
+
+#include <sys/types.h>
+#include <machine/cpu.h>
+#include <machine/lapic.h>
+
+/* Fixed IPI IDs */
+#define IPI_HALT   0
+#define IPI_TLB    1
+
+/*
+ * Represents an interprocessor interrupt
+ * handler.
+ *
+ * @cookie: Used to verifying an instance
+ * @id: IPI ID (identifies the IPI)
+ * @mask: If set, IPIs are ignored
+ * @handler: Handler routine
+ */
+struct cpu_ipi {
+    uint16_t cookie;
+    uint8_t id;
+    int(*handler)(struct cpu_ipi *ipi);
+};
+
+/*
+ * Represents an interrupt vector for a
+ * specific IPI
+ *
+ * @ipi: IPIs associated with this vector
+ * @cookie: Used to verify an instance
+ * @nipi: Number of IPIs associated
+ * @vec: System interrupt vector number
+ */
+struct ipi_vector {
+    struct cpu_ipi ipi[IPI_PER_VEC];
+    uint16_t cookie;
+    uint8_t nipi;
+    uint8_t vec;
+};
+
+int md_ipi_alloc(struct cpu_ipi **res);
+int md_ipi_send(struct cpu_info *ci);
+void md_ipi_init(void);
+
+/*
+ * Get the vector an IPI belongs to
+ *
+ * @ipi: IPI to check
+ */
+__always_inline static inline uint8_t
+ipi_vector(uint8_t ipi)
+{
+    return ipi / N_IPIVEC;
+}
+
+/*
+ * Get the handler index an IPI belongs
+ * to
+ *
+ * @ipi: IPI to check
+ */
+__always_inline static inline uint8_t
+ipi_index(uint8_t ipi)
+{
+    return ipi % (sizeof(ipi_pend_t) * 8);
+}
+
+__always_inline static inline int
+cpu_ipi_send(struct cpu_info *ci, uint8_t ipi)
+{
+    uint8_t vec = ipi_vector(ipi);
+    uint8_t idx = ipi_index(ipi);
+
+    ci->ipi_pending[vec] |= BIT(idx);
+    return md_ipi_send(ci);
+}
+
+#endif  /* !_MACHINE_IPI_H_ */
diff --git a/sys/include/sys/sysctl.h b/sys/include/sys/sysctl.h
index 3b8d3c7..ce7510d 100644
--- a/sys/include/sys/sysctl.h
+++ b/sys/include/sys/sysctl.h
@@ -56,6 +56,11 @@
 #define HW_MACHINE    7
 
 /*
+ * List of 'proc.*' identifiers
+ */
+#define PROC_COUNT    8
+
+/*
  * Option types (i.e., int, string, etc) for
  * sysctl entries.
  *
diff --git a/sys/include/sys/workqueue.h b/sys/include/sys/workqueue.h
new file mode 100644
index 0000000..9925f79
--- /dev/null
+++ b/sys/include/sys/workqueue.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2023-2025 Ian Marco Moffett and the Osmora Team.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of Hyra nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _SYS_WORKQUEUE_H_
+#define _SYS_WORKQUEUE_H_
+
+#if defined(_KERNEL)
+
+#include <sys/types.h>
+#include <sys/queue.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+
+struct workqueue;
+struct work;
+
+/*
+ * A work function can either refer to a work thread
+ * entry (or actual work to be done
+ */
+typedef void(*workfunc_t)(struct workqueue *wqp, struct work *wp);
+
+/*
+ * Represents work that may be added to a
+ * workqueue.
+ *
+ * @name: Name of this work/task [i]
+ * @data: Optional data to be passed with work [p]
+ * @func: Function with work to be done [p]
+ * @cookie: Used for validating the work structure [i]
+ *
+ * Field attributes:
+ * - [i]: Used internally
+ * - [p]: Used as parameter
+ */
+struct work {
+    char *name;
+    void *data;
+    workfunc_t func;
+    TAILQ_ENTRY(work) link;
+};
+
+/*
+ * A workqueue contains tasks that are
+ * queued up to be completed in their own
+ * thread context.
+ *
+ * @name: Name of workqueue.
+ * @work: Start of the workqueue
+ * @ipl: IPL that work here must run with
+ * @max_work: Max number of jobs that can be queued
+ * @nwork: Number of tasks to be done
+ * @cookie: For validating workqueues
+ * @worktd: Thread associated with the workqueue
+ * @lock: Protects the workqueue
+ */
+struct workqueue {
+    char *name;
+    TAILQ_HEAD(, work) work;
+    uint8_t ipl;
+    size_t max_work;
+    ssize_t nwork;
+    uint16_t cookie;
+    struct proc *worktd;
+    struct mutex *lock;
+};
+
+struct workqueue *workqueue_new(const char *name, size_t max_work, int ipl);
+
+int workqueue_enq(struct workqueue *wqp, const char *name, struct work *wp);
+int workqueue_destroy(struct workqueue *wqp);
+int work_destroy(struct work *wp);
+
+#endif  /* !_KERNEL */
+#endif  /* !_SYS_WORKQUEUE_H_ */
diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c
index 87dcc74..8bc5680 100644
--- a/sys/kern/kern_proc.c
+++ b/sys/kern/kern_proc.c
@@ -29,14 +29,18 @@
 
 #include <sys/types.h>
 #include <sys/proc.h>
+#include <sys/errno.h>
 #include <sys/cdefs.h>
 #include <sys/vnode.h>
+#include <sys/tree.h>
 #include <sys/syscall.h>
 #include <sys/filedesc.h>
 #include <sys/fcntl.h>
 #include <string.h>
 #include <crc32.h>
 
+extern volatile size_t g_nthreads;
+
 pid_t
 getpid(void)
 {
@@ -50,7 +54,6 @@ getpid(void)
     return td->pid;
 }
 
-
 pid_t
 getppid(void)
 {
@@ -100,6 +103,33 @@ proc_coredump(struct proc *td, uintptr_t fault_addr)
     fd_close(fd);
 }
 
+int
+proc_init(struct proc *td, struct proc *parent)
+{
+    struct mmap_lgdr *mlgdr;
+
+    mlgdr = dynalloc(sizeof(*mlgdr));
+    if (mlgdr == NULL) {
+        return -ENOMEM;
+    }
+
+    /* Add to parent leafq */
+    TAILQ_INSERT_TAIL(&parent->leafq, td, leaf_link);
+    atomic_inc_int(&parent->nleaves);
+    atomic_inc_64(&g_nthreads);
+    td->parent = parent;
+    td->exit_status = -1;
+    td->cred = parent->cred;
+
+    /* Initialize the mmap ledger */
+    mlgdr->nbytes = 0;
+    RBT_INIT(lgdr_entries, &mlgdr->hd);
+    td->mlgdr = mlgdr;
+    td->flags |= PROC_WAITED;
+    signals_init(td);
+    return 0;
+}
+
 scret_t
 sys_getpid(struct syscall_args *scargs)
 {
diff --git a/sys/kern/kern_spawn.c b/sys/kern/kern_spawn.c
index b9551f3..7962ced 100644
--- a/sys/kern/kern_spawn.c
+++ b/sys/kern/kern_spawn.c
@@ -34,10 +34,8 @@
 #include <sys/mman.h>
 #include <sys/systm.h>
 #include <sys/errno.h>
-#include <sys/atomic.h>
 #include <sys/syslog.h>
 #include <sys/syscall.h>
-#include <sys/atomic.h>
 #include <sys/signal.h>
 #include <sys/limits.h>
 #include <sys/sched.h>
@@ -50,7 +48,6 @@
 #define ARGVP_MAX (ARG_MAX / sizeof(void *))
 
 static size_t next_pid = 1;
-extern volatile size_t g_nthreads;
 
 /*
  * TODO: envp
@@ -145,7 +142,6 @@ pid_t
 spawn(struct proc *cur, void(*func)(void), void *p, int flags, struct proc **newprocp)
 {
     struct proc *newproc;
-    struct mmap_lgdr *mlgdr;
     int error;
     pid_t pid;
 
@@ -156,19 +152,10 @@ spawn(struct proc *cur, void(*func)(void), void *p, int flags, struct proc **new
         return -ENOMEM;
     }
 
-    mlgdr = dynalloc(sizeof(*mlgdr));
-    if (mlgdr == NULL) {
-        dynfree(newproc);
-        try_free_data(p);
-        pr_error("could not alloc proc mlgdr (-ENOMEM)\n");
-        return -ENOMEM;
-    }
-
     memset(newproc, 0, sizeof(*newproc));
     error = md_spawn(newproc, cur, (uintptr_t)func);
     if (error < 0) {
         dynfree(newproc);
-        dynfree(mlgdr);
         try_free_data(p);
         pr_error("error initializing proc\n");
         return error;
@@ -184,23 +171,16 @@ spawn(struct proc *cur, void(*func)(void), void *p, int flags, struct proc **new
         cur->flags |= PROC_LEAFQ;
     }
 
-    /* Add to parent leafq */
-    TAILQ_INSERT_TAIL(&cur->leafq, newproc, leaf_link);
-    atomic_inc_int(&cur->nleaves);
-    newproc->parent = cur;
-    newproc->data = p;
-    newproc->exit_status = -1;
-    newproc->cred = cur->cred;
-
-    /* Initialize the mmap ledger */
-    mlgdr->nbytes = 0;
-    RBT_INIT(lgdr_entries, &mlgdr->hd);
-    newproc->mlgdr = mlgdr;
-    newproc->flags |= PROC_WAITED;
+    error = proc_init(newproc, cur);
+    if (error < 0) {
+        dynfree(newproc);
+        try_free_data(p);
+        pr_error("error initializing proc\n");
+        return error;
+    }
 
-    atomic_inc_64(&g_nthreads);
+    newproc->data = p;
     newproc->pid = next_pid++;
-    signals_init(newproc);
     sched_enqueue_td(newproc);
     pid = newproc->pid;
     return pid;
diff --git a/sys/kern/kern_sysctl.c b/sys/kern/kern_sysctl.c
index a4c16bb..1f5e578 100644
--- a/sys/kern/kern_sysctl.c
+++ b/sys/kern/kern_sysctl.c
@@ -40,6 +40,7 @@
         HYRA_VERSION " "              \
         HYRA_BUILDDATE
 
+extern size_t g_nthreads;
 static uint32_t pagesize = DEFAULT_PAGESIZE;
 static char machine[] = HYRA_ARCH;
 static char hyra[] = "Hyra";
@@ -62,7 +63,10 @@ static struct sysctl_entry common_optab[] = {
     /* 'hw.*' */
     [HW_PAGESIZE] = { HW_PAGESIZE, SYSCTL_OPTYPE_INT_RO, &pagesize },
     [HW_NCPU] = { HW_NCPU, SYSCTL_OPTYPE_INT, NULL },
-    [HW_MACHINE] = {HW_MACHINE, SYSCTL_OPTYPE_STR_RO, &machine }
+    [HW_MACHINE] = {HW_MACHINE, SYSCTL_OPTYPE_STR_RO, &machine },
+
+    /* 'proc.*' */
+    [PROC_COUNT] = { PROC_COUNT, SYSCTL_OPTYPE_INT_RO, &g_nthreads }
 };
 
 static int
diff --git a/sys/kern/kern_work.c b/sys/kern/kern_work.c
new file mode 100644
index 0000000..918af89
--- /dev/null
+++ b/sys/kern/kern_work.c
@@ -0,0 +1,274 @@
+/*
+ * Copyright (c) 2023-2025 Ian Marco Moffett and the Osmora Team.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of Hyra nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/panic.h>
+#include <sys/proc.h>
+#include <sys/sched.h>
+#include <sys/syslog.h>
+#include <sys/workqueue.h>
+#include <vm/dynalloc.h>
+#include <string.h>
+
+#define pr_trace(fmt, ...) kprintf("workq: " fmt, ##__VA_ARGS__)
+#define pr_error(...) pr_trace(__VA_ARGS__)
+
+extern struct proc g_proc0;
+
+/*
+ * The workqueue cookie value that is used for
+ * verifying if a workqueue object is properly
+ * set up or not.
+ */
+#define WQ_COOKIE 0xFC0B
+
+/*
+ * A worker services work in the queue
+ * and there is one per workqueue.
+ */
+static void
+workqueue_worker(void)
+{
+    struct proc *td;
+    struct workqueue *wqp;
+    struct work *wp;
+
+    td = this_td();
+    if ((wqp = td->data) == NULL) {
+        panic("no workqueue in thread\n");
+    }
+
+    /*
+     * Weird things can happen, just be careful
+     * here...
+     */
+    if (wqp->cookie != WQ_COOKIE) {
+        panic("bad WQ_COOKIE in worker\n");
+    }
+
+    for (;;) {
+        mutex_acquire(wqp->lock, 0);
+        wp = TAILQ_FIRST(&wqp->work);
+
+        /* Try again later if empty */
+        if (wp == NULL) {
+            mutex_release(wqp->lock);
+            sched_yield();
+            continue;
+        }
+
+        wp->func(wqp, wp);
+        TAILQ_REMOVE(&wqp->work, wp, link);
+
+        /*
+         * Decrement the amount of work that is
+         * left to get done. Check for underflows
+         * which should not happen unless something
+         * clobbers the fields.
+         */
+        if ((--wqp->nwork) < 0) {
+            panic("wqp nwork underflow\n");
+        }
+
+        mutex_release(wqp->lock);
+        sched_yield();
+    }
+}
+
+/*
+ * Allocates a new work queue that may be used
+ * to hold queued up tasks.
+ *
+ * @name: Name to give the workqueue
+ * @max_work: Maximum number of jobs to be added
+ * @ipl: IPL that the work must operate in
+ *
+ * Returns a pointer to the new workqueue on success,
+ * otherwise a value of NULL is returned.
+ */
+struct workqueue *
+workqueue_new(const char *name, size_t max_work, int ipl)
+{
+    struct workqueue *wqp;
+    struct proc *td;
+
+    td = this_td();
+    if (__unlikely(td == NULL)) {
+        pr_error("no thread in workqueue_new()\n");
+        return NULL;
+    }
+
+    wqp = dynalloc(sizeof(*wqp));
+    if (wqp == NULL) {
+        return NULL;
+    }
+
+    wqp->name = strdup(name);
+    TAILQ_INIT(&wqp->work);
+    wqp->ipl = ipl;
+    wqp->max_work = max_work;
+    wqp->nwork = 0;
+    wqp->cookie = WQ_COOKIE;
+    wqp->lock = mutex_new(wqp->name);
+
+    /*
+     * We need to spawn the work thread which
+     * is behind the management of this specific
+     * workqueue. It typically does something like
+     * dequeuing at the head of the workqueue, performing
+     * the work, cleaning up as needed and dequeuing the
+     * next and waiting if there are none yet.
+     */
+    spawn(
+        &g_proc0, workqueue_worker,
+        wqp, 0,
+        &wqp->worktd
+    );
+
+    return wqp;
+}
+
+/*
+ * Enqueue a work item onto a specific
+ * workqueue.
+ *
+ * @wqp: Pointer to specific workqueue
+ * @name: Name to set for work unit
+ * @wp: Pointer to work that should be enqueued
+ *
+ * Returns zero on success, otherwise a less than
+ * zero value is returned.
+ */
+int
+workqueue_enq(struct workqueue *wqp, const char *name, struct work *wp)
+{
+    if (wqp == NULL || wp == NULL) {
+        return -EINVAL;
+    }
+
+    if (name == NULL) {
+        return -EINVAL;
+    }
+
+    /* Verify that we have a valid workqueue */
+    if (__unlikely(wqp->cookie != WQ_COOKIE)) {
+        panic("workq: bad cookie on work enqueue\n");
+    }
+
+    wp->name = strdup(name);
+    mutex_acquire(wqp->lock, 0);
+
+    /*
+     * If we have reached the max amount of jobs
+     * that we can enqueue here, just log it and
+     * bail.
+     */
+    if (wqp->nwork >= wqp->max_work) {
+        pr_error("max jobs reached for '%s'\n", wqp->name);
+        mutex_release(wqp->lock);
+        return -EAGAIN;
+    }
+
+    TAILQ_INSERT_TAIL(&wqp->work, wp, link);
+    ++wqp->nwork;
+    mutex_release(wqp->lock);
+    return 0;
+}
+
+/*
+ * Destroy a workqueue and free resources
+ * associated with it.
+ *
+ * @wqp: Pointer to workqueue to destroy
+ *
+ * Returns zero on success, otherwise a less
+ * than zero value is returned.
+ */
+int
+workqueue_destroy(struct workqueue *wqp)
+{
+    if (wqp == NULL) {
+        return -EINVAL;
+    }
+
+    /* Should not happen but just make sure */
+    if (__unlikely(wqp->cookie != WQ_COOKIE)) {
+        panic("workq: bad cookie on destroy\n");
+    }
+
+    /* Free the name if we have it */
+    if (wqp->name != NULL) {
+        dynfree(wqp->name);
+    }
+
+    if (wqp->lock != NULL) {
+        mutex_free(wqp->lock);
+    }
+
+    /* Brutally murder any workthreads */
+    if (wqp->worktd != NULL) {
+        exit1(wqp->worktd, 0);
+        wqp->worktd = NULL;
+    }
+
+    /*
+     * Zero before we free for security reasons, we
+     * don't really know what will be queued up but
+     * for certain things, it is best if we make it
+     * as if it never existed in the first place.
+     *
+     * XXX: There is no need to free the workqueue here as
+     *      we had to pass it to spawn() to run the worker.
+     *
+     *      During an exit, spawn() will free the thread data
+     *      meaning this is already cleaned up.
+     */
+    memset(wqp, 0, sizeof(*wqp));
+    return 0;
+}
+
+/*
+ * Cleanup after work
+ *
+ * @wp: Work to clean up
+ */
+int
+work_destroy(struct work *wp)
+{
+    if (wp == NULL) {
+        return -EINVAL;
+    }
+
+    if (wp->name != NULL) {
+        dynfree(wp->name);
+    }
+
+    return 0;
+}