diff options
Diffstat (limited to 'sys/arch/amd64')
-rw-r--r-- | sys/arch/amd64/amd64/gdt.c | 76 | ||||
-rw-r--r-- | sys/arch/amd64/amd64/hpet.c | 17 | ||||
-rw-r--r-- | sys/arch/amd64/amd64/intr.c | 72 | ||||
-rw-r--r-- | sys/arch/amd64/amd64/ipi.c | 191 | ||||
-rw-r--r-- | sys/arch/amd64/amd64/lapic.c | 3 | ||||
-rw-r--r-- | sys/arch/amd64/amd64/lapic_intr.S | 3 | ||||
-rw-r--r-- | sys/arch/amd64/amd64/machdep.c | 433 | ||||
-rw-r--r-- | sys/arch/amd64/amd64/mp.c | 71 | ||||
-rw-r--r-- | sys/arch/amd64/amd64/pmap.c | 75 | ||||
-rw-r--r-- | sys/arch/amd64/amd64/proc_machdep.c | 134 | ||||
-rw-r--r-- | sys/arch/amd64/amd64/reboot.c | 62 | ||||
-rw-r--r-- | sys/arch/amd64/amd64/simd.S (renamed from sys/arch/amd64/isa/i8042.S) | 51 | ||||
-rw-r--r-- | sys/arch/amd64/amd64/trap.c | 71 | ||||
-rw-r--r-- | sys/arch/amd64/amd64/tsc.c | 109 | ||||
-rw-r--r-- | sys/arch/amd64/amd64/vector.S | 221 | ||||
-rw-r--r-- | sys/arch/amd64/conf/GENERIC | 22 | ||||
-rw-r--r-- | sys/arch/amd64/conf/link.ld | 6 | ||||
-rw-r--r-- | sys/arch/amd64/isa/i8042.c | 363 | ||||
-rw-r--r-- | sys/arch/amd64/isa/mc1468.c | 281 | ||||
-rw-r--r-- | sys/arch/amd64/isa/spkr.c | 53 | ||||
-rw-r--r-- | sys/arch/amd64/pci/pci_machdep.c | 21 |
21 files changed, 2050 insertions, 285 deletions
diff --git a/sys/arch/amd64/amd64/gdt.c b/sys/arch/amd64/amd64/gdt.c index a8fe54d..40d8f48 100644 --- a/sys/arch/amd64/amd64/gdt.c +++ b/sys/arch/amd64/amd64/gdt.c @@ -29,50 +29,70 @@ #include <machine/gdt.h> -struct gdt_entry g_gdt_data[256] = { +/* + * The GDT should be cache line aligned, since it is accessed every time a + * segment selector is reloaded + */ +__cacheline_aligned struct gdt_entry g_gdt_data[GDT_ENTRY_COUNT] = { /* Null */ {0}, - /* Kernel code (0x8) */ + /* Kernel code (0x08) */ { - .limit = 0x0000, - .base_low = 0x0000, - .base_mid = 0x00, - .access = 0x9A, - .granularity = 0x20, - .base_hi = 0x00 + .limit = 0x0000, + .base_low = 0x0000, + .base_mid = 0x00, + .attributes = GDT_ATTRIBUTE_64BIT_CODE | GDT_ATTRIBUTE_PRESENT | + GDT_ATTRIBUTE_DPL0 | GDT_ATTRIBUTE_NONSYSTEM | + GDT_ATTRIBUTE_EXECUTABLE | GDT_ATTRIBUTE_READABLE, + .base_hi = 0x00 }, /* Kernel data (0x10) */ { - .limit = 0x0000, - .base_low = 0x0000, - .base_mid = 0x00, - .access = 0x92, - .granularity = 0x00, - .base_hi = 0x00 + .limit = 0x0000, + .base_low = 0x0000, + .base_mid = 0x00, + .attributes = GDT_ATTRIBUTE_PRESENT | GDT_ATTRIBUTE_DPL0 | + GDT_ATTRIBUTE_NONSYSTEM | GDT_ATTRIBUTE_WRITABLE, + .base_hi = 0x00 }, /* User code (0x18) */ { - .limit = 0x0000, - .base_low = 0x0000, - .base_mid = 0x00, - .access = 0xFA, - .granularity = 0xAF, - .base_hi = 0x00 + .limit = 0x0000, + .base_low = 0x0000, + .base_mid = 0x00, + .attributes = GDT_ATTRIBUTE_64BIT_CODE | GDT_ATTRIBUTE_PRESENT | + GDT_ATTRIBUTE_DPL3 | GDT_ATTRIBUTE_NONSYSTEM | + GDT_ATTRIBUTE_EXECUTABLE | GDT_ATTRIBUTE_READABLE, + .base_hi = 0x00 }, /* User data (0x20) */ { - .limit = 0x0000, - .base_low = 0x0000, - .base_mid = 0x00, - .access = 0xF2, - .granularity = 0x00, - .base_hi = 0x00 + .limit = 0x0000, + .base_low = 0x0000, + .base_mid = 0x00, + .attributes = GDT_ATTRIBUTE_PRESENT | GDT_ATTRIBUTE_DPL3 | + GDT_ATTRIBUTE_NONSYSTEM | GDT_ATTRIBUTE_WRITABLE, + .base_hi = 0x00 }, - /* TSS segment (0x28) */ - {0} + /* + * TSS segment (0x28) + * + * NOTE: 64-bit TSS descriptors are 16 bytes, equivalent to the size of two + * regular descriptor entries. + * See Intel SPG 3/25 Section 9.2.3 - TSS Descriptor in 64-bit mode. + */ + {0}, {0} +}; + +/* Verify that the GDT is of the correct size */ +__static_assert(sizeof(g_gdt_data) == (8 * GDT_ENTRY_COUNT)); + +const struct gdtr g_gdtr = { + .limit = sizeof(g_gdt_data) - 1, + .offset = (uintptr_t)&g_gdt_data[0] }; diff --git a/sys/arch/amd64/amd64/hpet.c b/sys/arch/amd64/amd64/hpet.c index 1670546..9191bee 100644 --- a/sys/arch/amd64/amd64/hpet.c +++ b/sys/arch/amd64/amd64/hpet.c @@ -47,6 +47,7 @@ #define CAP_CLK_PERIOD(caps) (caps >> 32) #define FSEC_PER_SECOND 1000000000000000ULL +#define NSEC_PER_SECOND 1000000000ULL #define USEC_PER_SECOND 1000000ULL static void *hpet_base = NULL; @@ -135,6 +136,20 @@ hpet_time_usec(void) } static size_t +hpet_time_nsec(void) +{ + uint64_t period, freq, caps; + uint64_t counter; + + caps = hpet_read(HPET_REG_CAPS); + period = CAP_CLK_PERIOD(caps); + freq = FSEC_PER_SECOND / period; + + counter = hpet_read(HPET_REG_MAIN_COUNTER); + return (counter * NSEC_PER_SECOND) / freq; +} + +static size_t hpet_time_sec(void) { return hpet_time_usec() / USEC_PER_SECOND; @@ -180,7 +195,9 @@ hpet_init(void) timer.usleep = hpet_usleep; timer.nsleep = hpet_nsleep; timer.get_time_usec = hpet_time_usec; + timer.get_time_nsec = hpet_time_nsec; timer.get_time_sec = hpet_time_sec; + timer.flags = TIMER_MONOTONIC; register_timer(TIMER_GP, &timer); return 0; } diff --git a/sys/arch/amd64/amd64/intr.c b/sys/arch/amd64/amd64/intr.c index c31ee3c..c44c88e 100644 --- a/sys/arch/amd64/amd64/intr.c +++ b/sys/arch/amd64/amd64/intr.c @@ -31,12 +31,19 @@ #include <sys/param.h> #include <sys/errno.h> #include <sys/panic.h> +#include <sys/cdefs.h> +#include <sys/syslog.h> #include <machine/intr.h> #include <machine/cpu.h> #include <machine/asm.h> +#include <machine/ioapic.h> #include <vm/dynalloc.h> +#include <string.h> -static struct intr_entry *intrs[256] = {0}; +#define pr_trace(fmt, ...) kprintf("intr: " fmt, ##__VA_ARGS__) +#define pr_error(...) pr_trace(__VA_ARGS__) + +struct intr_hand *g_intrs[256] = {0}; int splraise(uint8_t s) @@ -67,35 +74,70 @@ splx(uint8_t s) ci->ipl = s; } -int -intr_alloc_vector(const char *name, uint8_t priority) +void * +intr_register(const char *name, const struct intr_hand *ih) { - size_t vec = MAX(priority << IPL_SHIFT, 0x20); - struct intr_entry *intr; + uint32_t vec = MAX(ih->priority << IPL_SHIFT, 0x20); + struct intr_hand *ih_new; + struct intr_data *idp_new; + const struct intr_data *idp; + size_t name_len; /* Sanity check */ - if (vec > NELEM(intrs)) { - return -1; + if (vec > NELEM(g_intrs) || name == NULL) { + return NULL; + } + + ih_new = dynalloc(sizeof(*ih_new)); + if (ih_new == NULL) { + pr_error("could not allocate new interrupt handler\n"); + return NULL; } /* * Try to allocate an interrupt vector. An IPL is made up * of 4 bits so there can be 16 vectors per IPL. + * + * XXX: Vector 0x20 is reserved for the Hyra scheduler and + * vectors 0x21 to 0x21 + N_IPIVEC are reserved for + * inter-processor interrupts. */ for (int i = vec; i < vec + 16; ++i) { - if (intrs[i] != NULL) { + if (g_intrs[i] != NULL || i < 0x24) { continue; } - intr = dynalloc(sizeof(*intr)); - if (intr == NULL) { - return -ENOMEM; + /* Allocate memory for the name */ + name_len = strlen(name) + 1; + ih_new->name = dynalloc(name_len); + if (ih_new->name == NULL) { + dynfree(ih_new); + pr_trace("could not allocate interrupt name\n"); + return NULL; } - intr->priority = priority; - intrs[i] = intr; - return i; + memcpy(ih_new->name, name, name_len); + idp_new = &ih_new->data; + idp = &ih->data; + + /* Pass the interrupt data */ + idp_new->ihp = ih_new; + idp_new->data_u64 = idp->data_u64; + + /* Setup the new intr_hand */ + ih_new->func = ih->func; + ih_new->priority = ih->priority; + ih_new->irq = ih->irq; + ih_new->vector = i; + ih_new->nintr = 0; + g_intrs[i] = ih_new; + + if (ih->irq >= 0) { + ioapic_set_vec(ih->irq, i); + ioapic_irq_unmask(ih->irq); + } + return ih_new; } - return -1; + return NULL; } diff --git a/sys/arch/amd64/amd64/ipi.c b/sys/arch/amd64/amd64/ipi.c new file mode 100644 index 0000000..bf263d3 --- /dev/null +++ b/sys/arch/amd64/amd64/ipi.c @@ -0,0 +1,191 @@ +/* + * Copyright (c) 2023-2025 Ian Marco Moffett and the Osmora Team. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Hyra nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <sys/errno.h> +#include <sys/syslog.h> +#include <sys/param.h> +#include <sys/panic.h> +#include <sys/spinlock.h> +#include <machine/cpu.h> +#include <machine/idt.h> +#include <machine/ipi.h> +#include <machine/lapic.h> +#include <vm/dynalloc.h> +#include <string.h> + +void ipi_isr(void); +void halt_isr(void); + +void __ipi_handle_common(void); + +#define pr_trace(fmt, ...) kprintf("ipi: " fmt, ##__VA_ARGS__) +#define pr_error(...) pr_trace(__VA_ARGS__) + +#define COOKIE 0x7E0A +#define MAX_IPI 32 + +/* For the global state of the subsystem */ +static uint32_t cookie = 0; + +static struct cpu_ipi ipi_list[MAX_IPI]; +static uint8_t ipi_count = 0; +static struct spinlock lock; + +/* + * Allocate an IPI that can be sent to other + * cores on the CPU. This is the core logic + * and contains *no* locks. One should be + * using the md_ipi_alloc() function instead. + * + * Returns the allocated IPI identifier on succes, + * otherwise a less than zero value is returned. + */ +static int +__ipi_alloc(struct cpu_ipi **res) +{ + struct cpu_ipi *ipip; + + if (ipi_count >= MAX_IPI) { + return -EAGAIN; + } + + ipip = &ipi_list[ipi_count]; + ipip->cookie = COOKIE; + ipip->id = ipi_count++; + ipip->handler = NULL; + *res = ipip; + return ipip->id; +} + +/* + * Common IPI routine, called from vector.S + * + * XXX: Internal usage only + */ +void +__ipi_handle_common(void) +{ + struct cpu_ipi *ipip; + struct cpu_info *ci = this_cpu(); + ipi_pend_t pending = 0; + + if (cookie != COOKIE) { + pr_trace("[warn]: got spurious ipi\n"); + return; + } + + if (ci == NULL) { + pr_error("could not get current CPU\n"); + return; + } + + if (ipi_count == 0) { + pr_error("no registered IPIs\n"); + return; + } + + /* Attempt to find a handler */ + pending = ci->ipi_pending; + for (int i = 0; i < ipi_count; ++i) { + ipip = &ipi_list[i]; + if (ISSET(pending, BIT(i))) { + ipip->handler(ipip); + ci->ipi_pending &= ~BIT(i); + } + } + + /* We are done dispatching IPIs */ + ci->ipi_dispatch = 0; +} + +/* + * Send one or more IPIs to a specific + * processor after caller sets bits in + * the `ci->ipi_pending' field + * + * @ci: Processor to send IPI(s) to + * @ipi: IPIs to send + */ +int +md_ipi_send(struct cpu_info *ci, ipi_pend_t ipi) +{ + uint32_t apic_id = 0; + + if (ci != NULL) { + /* + * We are already dispatching IPIs, we don't + * want to find ourselves in interrupt hell. + */ + if (ci->ipi_dispatch) { + return -EAGAIN; + } + + apic_id = ci->apicid; + } + + ci->ipi_dispatch = 1; + ci->ipi_pending |= BIT(ipi); + + /* Send it through on the bus */ + lapic_send_ipi( + apic_id, + IPI_SHORTHAND_NONE, + IPI_VECTOR + ); + return 0; +} + + +/* + * IPI allocation interface with + * locking. + */ +int +md_ipi_alloc(struct cpu_ipi **res) +{ + int retval; + + spinlock_acquire(&lock); + retval = __ipi_alloc(res); + spinlock_release(&lock); + return retval; +} + +/* + * Initialize the IPI thunks + */ +void +md_ipi_init(void) +{ + /* Initialize the IPI vectors */ + idt_set_desc(IPI_VECTOR, IDT_INT_GATE, ISR(ipi_isr), 0); + idt_set_desc(HALT_VECTOR, IDT_INT_GATE, ISR(halt_isr), 0); + cookie = COOKIE; +} diff --git a/sys/arch/amd64/amd64/lapic.c b/sys/arch/amd64/amd64/lapic.c index 70d36a5..ceb5428 100644 --- a/sys/arch/amd64/amd64/lapic.c +++ b/sys/arch/amd64/amd64/lapic.c @@ -340,7 +340,7 @@ lapic_init(void) /* Allocate a vector if needed */ if (lapic_timer_vec == 0) { - lapic_timer_vec = intr_alloc_vector("lapictmr", IPL_CLOCK); + lapic_timer_vec = (IPL_CLOCK << IPL_SHIFT) | 0x20; idt_set_desc(lapic_timer_vec, IDT_INT_GATE, ISR(lapic_tmr_isr), IST_SCHED); } @@ -364,5 +364,6 @@ lapic_init(void) lapic_timer.name = "LAPIC_INTEGRATED_TIMER"; lapic_timer.stop = lapic_timer_stop; lapic_timer.oneshot_us = lapic_timer_oneshot_us; + lapic_timer.flags = 0; register_timer(TIMER_SCHED, &lapic_timer); } diff --git a/sys/arch/amd64/amd64/lapic_intr.S b/sys/arch/amd64/amd64/lapic_intr.S index e22cbca..1413660 100644 --- a/sys/arch/amd64/amd64/lapic_intr.S +++ b/sys/arch/amd64/amd64/lapic_intr.S @@ -33,7 +33,6 @@ .globl lapic_tmr_isr INTRENTRY(lapic_tmr_isr, handle_lapic_tmr) handle_lapic_tmr: - call sched_switch // Context switch per every timer IRQ - call i8042_sync // Sometimes needed depending on i8042 quirks + call md_sched_switch // Context switch per every timer IRQ call lapic_eoi // Done! Signal that we finished to the Local APIC retq diff --git a/sys/arch/amd64/amd64/machdep.c b/sys/arch/amd64/amd64/machdep.c index 07d6cdd..60c37bf 100644 --- a/sys/arch/amd64/amd64/machdep.c +++ b/sys/arch/amd64/amd64/machdep.c @@ -42,7 +42,25 @@ #include <machine/uart.h> #include <machine/sync.h> #include <machine/intr.h> +#include <machine/ipi.h> +#include <machine/cdefs.h> #include <machine/isa/i8042var.h> +#include <dev/cons/cons.h> +#include <string.h> + +/* + * This defines the max number of frames + * we will pass while walking the callstack + * in md_backtrace() + */ +#define MAX_FRAME_DEPTH 16 + +#define pr_trace(fmt, ...) kprintf("cpu: " fmt, ##__VA_ARGS__) +#define pr_error(...) pr_trace(__VA_ARGS__) +#define pr_trace_bsp(...) \ + if (!bsp_init) { \ + pr_trace(__VA_ARGS__); \ + } #if defined(__SPECTRE_IBRS) #define SPECTRE_IBRS __SPECTRE_IBRS @@ -50,46 +68,86 @@ #define SPECTRE_IBRS 0 #endif -static uint8_t halt_vector = 0; +#if defined(__CPU_SMEP) +#define CPU_SMEP __CPU_SMEP +#else +#define CPU_SMEP 0 +#endif + +#if defined(__CPU_UMIP) +#define CPU_UMIP __CPU_UMIP +#else +#define CPU_UMIP 0 +#endif int ibrs_enable(void); +int simd_init(void); void syscall_isr(void); +void pin_isr_load(void); struct cpu_info g_bsp_ci = {0}; -static struct gdtr bsp_gdtr = { - .limit = sizeof(struct gdt_entry) * 256 - 1, - .offset = (uintptr_t)&g_gdt_data[0] -}; +static struct cpu_ipi *tlb_ipi; +static struct spinlock ipi_lock = {0}; +static bool bsp_init = false; -__attribute__((__interrupt__)) -static void -cpu_halt_isr(void *p) +static int +tlb_shootdown_handler(struct cpu_ipi *ipi) { - __ASMV("cli; hlt"); - __builtin_unreachable(); + struct cpu_info *ci; + int ipl; + + /* + * Get the current CPU and check if we even + * need a shootdown. If `tlb_shootdown' is + * unset, this is not for us. + */ + ci = this_cpu(); + if (!ci->tlb_shootdown) { + return -1; + } + + ipl = splraise(IPL_HIGH); + __invlpg(ci->shootdown_va); + + ci->shootdown_va = 0; + ci->tlb_shootdown = 0; + splx(ipl); + return 0; } static void -setup_vectors(void) +setup_vectors(struct cpu_info *ci) { - if (halt_vector == 0) { - halt_vector = intr_alloc_vector("cpu-halt", IPL_HIGH); + union tss_stack scstack; + union tss_stack dfstack; + + /* Try to allocate a syscall stack */ + if (tss_alloc_stack(&scstack, DEFAULT_PAGESIZE) != 0) { + panic("failed to allocate syscall stack\n"); + } + + /* Try to allocate a double fault stack */ + if (tss_alloc_stack(&dfstack, DEFAULT_PAGESIZE) != 0) { + panic("failed to allocate double fault stack\n"); } + tss_update_ist(ci, scstack, IST_SYSCALL); + tss_update_ist(ci, dfstack, IST_DBFLT); + idt_set_desc(0x0, IDT_TRAP_GATE, ISR(arith_err), 0); idt_set_desc(0x2, IDT_TRAP_GATE, ISR(nmi), 0); idt_set_desc(0x3, IDT_TRAP_GATE, ISR(breakpoint_handler), 0); idt_set_desc(0x4, IDT_TRAP_GATE, ISR(overflow), 0); idt_set_desc(0x5, IDT_TRAP_GATE, ISR(bound_range), 0); idt_set_desc(0x6, IDT_TRAP_GATE, ISR(invl_op), 0); - idt_set_desc(0x8, IDT_TRAP_GATE, ISR(double_fault), 0); + idt_set_desc(0x8, IDT_TRAP_GATE, ISR(double_fault), IST_DBFLT); idt_set_desc(0xA, IDT_TRAP_GATE, ISR(invl_tss), 0); idt_set_desc(0xB, IDT_TRAP_GATE, ISR(segnp), 0); idt_set_desc(0xC, IDT_TRAP_GATE, ISR(ss_fault), 0); idt_set_desc(0xD, IDT_TRAP_GATE, ISR(general_prot), 0); idt_set_desc(0xE, IDT_TRAP_GATE, ISR(page_fault), 0); - idt_set_desc(0x80, IDT_USER_INT_GATE, ISR(syscall_isr), 0); - idt_set_desc(halt_vector, IDT_INT_GATE, ISR(cpu_halt_isr), 0); + idt_set_desc(0x80, IDT_USER_INT_GATE, ISR(syscall_isr), IST_SYSCALL); + pin_isr_load(); } static inline void @@ -97,7 +155,7 @@ init_tss(struct cpu_info *ci) { struct tss_desc *desc; - desc = (struct tss_desc *)&g_gdt_data[GDT_TSS]; + desc = (struct tss_desc *)&g_gdt_data[GDT_TSS_INDEX]; write_tss(ci, desc); tss_load(); } @@ -133,45 +191,281 @@ backtrace_addr_to_name(uintptr_t addr, off_t *off) return NULL; } +static void +enable_simd(void) +{ + int retval; + + if ((retval = simd_init()) < 0) { + pr_trace_bsp("SIMD not supported\n"); + } + + if (retval == 1) { + pr_trace_bsp("SSE enabled but not AVX\n"); + } +} + +static void +init_ipis(void) +{ + int error; + + if (bsp_init) { + return; + } + + spinlock_acquire(&ipi_lock); + error = md_ipi_alloc(&tlb_ipi); + if (error < 0) { + pr_error("md_ipi_alloc: returned %d\n", error); + panic("failed to init TLB IPI\n"); + } + + tlb_ipi->handler = tlb_shootdown_handler; + + /* + * Some IPIs must have very specific IDs + * so that they are standard and usable + * throughout the rest of the sytem. + */ + if (tlb_ipi->id != IPI_TLB) + panic("expected IPI_TLB for TLB IPI\n"); + + spinlock_release(&ipi_lock); +} + +static void +cpu_get_vendor(struct cpu_info *ci) +{ + uint32_t unused, ebx, ecx, edx; + char vendor_str[13]; + + /* + * This CPUID returns a 12 byte CPU vendor string + * that we'll put together and use to detect the vendor. + */ + CPUID(0, unused, ebx, ecx, edx); + + /* Dword 0 */ + vendor_str[0] = ebx & 0xFF; + vendor_str[1] = (ebx >> 8) & 0xFF; + vendor_str[2] = (ebx >> 16) & 0xFF; + vendor_str[3] = (ebx >> 24) & 0xFF; + + /* Dword 1 */ + vendor_str[4] = edx & 0xFF; + vendor_str[5] = (edx >> 8) & 0xFF; + vendor_str[6] = (edx >> 16) & 0xFF; + vendor_str[7] = (edx >> 24) & 0xFF; + + /* Dword 2 */ + vendor_str[8] = ecx & 0xFF; + vendor_str[9] = (ecx >> 8) & 0xFF; + vendor_str[10] = (ecx >> 16) & 0xFF; + vendor_str[11] = (ecx >> 24) & 0xFF; + vendor_str[12] = '\0'; + + /* Is this an AMD CPU? */ + if (strcmp(vendor_str, "AuthenticAMD") == 0) { + ci->vendor = CPU_VENDOR_AMD; + return; + } + + /* Is this an Intel CPU? */ + if (strcmp(vendor_str, "GenuineIntel") == 0) { + ci->vendor = CPU_VENDOR_INTEL; + return; + } + + /* + * Some buggy Intel CPUs report the string "GenuineIotel" + * instead of "GenuineIntel". This is rare but we should + * still handle it as it can happen. Probably a good idea + * to log it so the user can know about their rare CPU + * quirk and brag to their friends :~) + */ + if (strcmp(vendor_str, "GenuineIotel") == 0) { + pr_trace_bsp("vendor_str=%s\n", vendor_str); + pr_trace_bsp("detected vendor string quirk\n"); + ci->vendor = CPU_VENDOR_INTEL; + return; + } + + ci->vendor = CPU_VENDOR_OTHER; +} + +static void +cpu_get_info(struct cpu_info *ci) +{ + uint32_t unused, eax, ebx, ecx, edx; + uint8_t ext_model, ext_family; + + /* Get the vendor information */ + cpu_get_vendor(ci); + + /* Extended features */ + CPUID(0x07, unused, ebx, ecx, unused); + if (ISSET(ebx, BIT(7))) + ci->feat |= CPU_FEAT_SMEP; + if (ISSET(ebx, BIT(20))) + ci->feat |= CPU_FEAT_SMAP; + if (ISSET(ecx, BIT(2))) + ci->feat |= CPU_FEAT_UMIP; + + /* + * Processor power management information bits as well + * as bits describing RAS capabilities + */ + CPUID(0x80000007, unused, unused, unused, edx); + if (ISSET(edx, BIT(8))) + ci->feat |= CPU_FEAT_TSCINV; + + /* + * Processor info and feature bits + */ + CPUID(0x01, eax, unused, unused, unused); + ci->model = (eax >> 4) & 0xF; + ci->family = (eax >> 8) & 0xF; + + /* + * If the family ID is 15 then the actual family + * ID is the sum of the extended family and the + * family ID fields. + */ + if (ci->family == 0xF) { + ext_family = (eax >> 20) & 0xFF; + ci->family += ext_family; + } + + /* + * If the family has the value of either 6 or 15, + * then the extended model number would be used. + * Slap them together if this is the case. + */ + if (ci->family == 6 || ci->family == 15) { + ext_model = (eax >> 16) & 0xF; + ci->model |= (ext_model << 4); + } +} + +/* + * The CR4.UMIP bit prevents user programs from + * executing instructions related to accessing + * system memory structures. This should be enabled + * by default if supported. + */ +static void +cpu_enable_umip(void) +{ + struct cpu_info *ci = this_cpu(); + uint64_t cr4; + + if (!CPU_UMIP) { + pr_trace_bsp("UMIP not configured\n"); + return; + } + + if (ISSET(ci->feat, CPU_FEAT_UMIP)) { + cr4 = amd64_read_cr4(); + cr4 |= CR4_UMIP; + amd64_write_cr4(cr4); + } +} + +void +cpu_shootdown_tlb(vaddr_t va) +{ + uint32_t ncpu = cpu_count(); + struct cpu_info *cip; + + for (uint32_t i = 0; i < ncpu; ++i) { + cip = cpu_get(i); + if (cip == NULL) { + break; + } + + spinlock_acquire(&cip->lock); + cip->shootdown_va = va; + cip->tlb_shootdown = 1; + md_ipi_send(cip, IPI_TLB); + spinlock_release(&cip->lock); + } +} + void md_backtrace(void) { - uintptr_t *rbp; - uintptr_t rip; + uintptr_t *rbp = NULL; + uintptr_t rip, tmp; off_t off; const char *name; + char line[256]; + uint8_t n = 0; __ASMV("mov %%rbp, %0" : "=r" (rbp) :: "memory"); while (1) { + if (n >= MAX_FRAME_DEPTH) { + break; + } + + /* End of callstack */ + if (rbp == NULL) { + break; + } + rip = rbp[1]; rbp = (uintptr_t *)rbp[0]; - name = backtrace_addr_to_name(rip, &off); - if (rbp == NULL) + /* + * RBP should be aligned on an 8-byte + * boundary... Don't trust this state + * anymore if it is not. + */ + tmp = (uintptr_t)rbp; + if ((tmp & (8 - 1)) != 0) { break; - if (name == NULL) - name = "???"; + } - kprintf(OMIT_TIMESTAMP "%p @ <%s+0x%x>\n", rip, name, off); + /* + * This is not a valid value, get out + * of this loop!! + */ + if (rip == 0) { + break; + } + + name = backtrace_addr_to_name(rip, &off); + snprintf(line, sizeof(line), "%p @ <%s+0x%x>\n", rip, name, off); + cons_putstr(&g_root_scr, line, strlen(line)); + ++n; } } void cpu_halt_all(void) { - /* - * If we have no current 'cpu_info' structure set, - * we can't send IPIs, so just assume only the current - * processor is the only one active, clear interrupts - * then halt it. - */ - if (rdmsr(IA32_GS_BASE) == 0) { - __ASMV("cli; hlt"); - } + lapic_send_ipi( + 0, + IPI_SHORTHAND_ALL, + HALT_VECTOR + ); - /* Send IPI to all cores */ - lapic_send_ipi(0, IPI_SHORTHAND_ALL, halt_vector); - for (;;); + __ASMV("cli; hlt"); + __builtin_unreachable(); +} + +/* + * Same as cpu_halt_all() but for all other + * cores but ourselves. + */ +void +cpu_halt_others(void) +{ + lapic_send_ipi( + 0, + IPI_SHORTHAND_OTHERS, + HALT_VECTOR + ); } void @@ -195,6 +489,10 @@ this_cpu(void) { struct cpu_info *ci; + if (rdmsr(IA32_GS_BASE) == 0) { + return NULL; + } + /* * This might look crazy but we are just leveraging the "m" * constraint to add the offset of the self field within @@ -221,17 +519,74 @@ md_sync_all(void) } void +cpu_enable_smep(void) +{ + struct cpu_info *ci; + uint64_t cr4; + + /* Don't bother if not enabled */ + if (!CPU_SMEP) { + return; + } + + ci = this_cpu(); + if (!ISSET(ci->feat, CPU_FEAT_SMEP)) { + pr_trace_bsp("SMEP not supported\n"); + return; + } + + cr4 = amd64_read_cr4(); + cr4 |= BIT(20); /* CR4.SMEP */ + amd64_write_cr4(cr4); +} + +void +cpu_disable_smep(void) +{ + struct cpu_info *ci; + uint64_t cr4; + + if (!CPU_SMEP) { + return; + } + + ci = this_cpu(); + if (!ISSET(ci->feat, CPU_FEAT_SMEP)) { + return; + } + + cr4 = amd64_read_cr4(); + cr4 &= ~BIT(20); /* CR4.SMEP */ + amd64_write_cr4(cr4); +} + +void cpu_startup(struct cpu_info *ci) { ci->self = ci; - gdt_load(&bsp_gdtr); + ci->feat = 0; + gdt_load(); idt_load(); - setup_vectors(); wrmsr(IA32_GS_BASE, (uintptr_t)ci); - init_tss(ci); + + setup_vectors(ci); + md_ipi_init(); + init_ipis(); + try_mitigate_spectre(); + ci->online = 1; + ci->preempt = 1; + + cpu_get_info(ci); + cpu_enable_smep(); + cpu_enable_umip(); + enable_simd(); lapic_init(); + + if (!bsp_init) { + bsp_init = true; + } } diff --git a/sys/arch/amd64/amd64/mp.c b/sys/arch/amd64/amd64/mp.c index a8a36c7..43830ba 100644 --- a/sys/arch/amd64/amd64/mp.c +++ b/sys/arch/amd64/amd64/mp.c @@ -29,9 +29,13 @@ #include <sys/types.h> #include <sys/limine.h> +#include <sys/limits.h> +#include <sys/systm.h> #include <sys/syslog.h> +#include <sys/proc.h> #include <sys/spinlock.h> #include <sys/sched.h> +#include <sys/atomic.h> #include <machine/cpu.h> #include <vm/dynalloc.h> #include <assert.h> @@ -39,42 +43,95 @@ #define pr_trace(fmt, ...) kprintf("cpu_mp: " fmt, ##__VA_ARGS__) +extern struct proc g_proc0; static volatile struct limine_smp_request g_smp_req = { .id = LIMINE_SMP_REQUEST, .revision = 0 }; +static volatile uint32_t ncpu_up = 1; +static struct cpu_info *ci_list[CPU_MAX]; +static struct spinlock ci_list_lock = {0}; + static void ap_trampoline(struct limine_smp_info *si) { - static struct spinlock lock = {0}; struct cpu_info *ci; + struct proc *idle; ci = dynalloc(sizeof(*ci)); __assert(ci != NULL); memset(ci, 0, sizeof(*ci)); - spinlock_acquire(&lock); cpu_startup(ci); + spinlock_acquire(&ci_list_lock); + ci_list[ncpu_up] = ci; - spinlock_release(&lock); - sched_enter(); + ci->id = ncpu_up; + spawn(&g_proc0, sched_enter, NULL, 0, &idle); + proc_pin(idle, ci->id); + + spinlock_release(&ci_list_lock); + atomic_inc_int(&ncpu_up); + sched_enter(); while (1); } +struct cpu_info * +cpu_get(uint32_t index) +{ + if (index >= ncpu_up) { + return NULL; + } + + return ci_list[index]; +} + +/* + * Grab the CPU stat structured of a specified + * processor + * + * @cpu_index: CPU index number + */ +struct sched_cpu * +cpu_get_stat(uint32_t cpu_index) +{ + struct cpu_info *ci; + + if ((ci = cpu_get(cpu_index)) == NULL) { + return NULL; + } + + return &ci->stat; +} + +uint32_t +cpu_count(void) +{ + return ncpu_up; +} + void mp_bootstrap_aps(struct cpu_info *ci) { struct limine_smp_response *resp = g_smp_req.response; struct limine_smp_info **cpus; + struct proc *idle; size_t cpu_init_counter; + uint32_t ncpu; /* Should not happen */ __assert(resp != NULL); cpus = resp->cpus; - cpu_init_counter = resp->cpu_count - 1; + ncpu = resp->cpu_count; + cpu_init_counter = ncpu - 1; + ci_list[0] = ci; + + /* Pin an idle thread to the BSP */ + spawn(&g_proc0, sched_enter, NULL, 0, &idle); + proc_pin(idle, 0); if (resp->cpu_count == 1) { pr_trace("CPU has 1 core, no APs to bootstrap...\n"); @@ -90,4 +147,8 @@ mp_bootstrap_aps(struct cpu_info *ci) cpus[i]->goto_address = ap_trampoline; } + + /* Wait for all cores to be ready */ + while ((ncpu_up - 1) < cpu_init_counter); + cpu_report_count(ncpu_up); } diff --git a/sys/arch/amd64/amd64/pmap.c b/sys/arch/amd64/amd64/pmap.c index 2e62a4b..6c6bfcd 100644 --- a/sys/arch/amd64/amd64/pmap.c +++ b/sys/arch/amd64/amd64/pmap.c @@ -33,6 +33,8 @@ #include <sys/errno.h> #include <machine/tlb.h> #include <machine/vas.h> +#include <machine/cpu.h> +#include <machine/cdefs.h> #include <vm/pmap.h> #include <vm/physmem.h> #include <vm/vm.h> @@ -52,7 +54,7 @@ #define PTE_PCD BIT(4) /* Page-level cache disable */ #define PTE_ACC BIT(5) /* Accessed */ #define PTE_DIRTY BIT(6) /* Dirty (written-to page) */ -#define PTE_PAT BIT(7) +#define PTE_PS BIT(7) /* Page size */ #define PTE_GLOBAL BIT(8) #define PTE_NX BIT(63) /* Execute-disable */ @@ -112,6 +114,16 @@ pmap_extract(uint8_t level, vaddr_t va, vaddr_t *pmap, bool alloc) return NULL; } + /* + * TODO: Support huge pages... For now, don't let the + * bootloader fuck us up with their pre-kernel + * mappings and tell huge pages to get the fuck. + * + */ + if (ISSET(pmap[idx], PTE_PS)) { + pmap[idx] = 0; + } + if (ISSET(pmap[idx], PTE_P)) { next = (pmap[idx] & PTE_ADDR_MASK); return PHYS_TO_VIRT(next); @@ -176,14 +188,15 @@ done: * @vas: Virtual address space. * @va: Target virtual address. * @val: Value to write. + * @alloc: True to alloc new paging entries. */ static int -pmap_update_tbl(struct vas vas, vaddr_t va, uint64_t val) +pmap_update_tbl(struct vas vas, vaddr_t va, uint64_t val, bool alloc) { uintptr_t *tbl; int status; - if ((status = pmap_get_tbl(vas, va, true, &tbl)) != 0) { + if ((status = pmap_get_tbl(vas, va, alloc, &tbl)) != 0) { return status; } @@ -266,19 +279,21 @@ pmap_map(struct vas vas, vaddr_t va, paddr_t pa, vm_prot_t prot) { uint32_t flags = pmap_prot_to_pte(prot); - return pmap_update_tbl(vas, va, (pa | flags)); + return pmap_update_tbl(vas, va, (pa | flags), true); } int pmap_unmap(struct vas vas, vaddr_t va) { - return pmap_update_tbl(vas, va, 0); + return pmap_update_tbl(vas, va, 0, false); } int pmap_set_cache(struct vas vas, vaddr_t va, int type) { uintptr_t *tbl; + uint32_t flags; + paddr_t pa; int status; size_t idx; @@ -286,20 +301,62 @@ pmap_set_cache(struct vas vas, vaddr_t va, int type) return status; idx = pmap_get_level_index(1, va); + pa = tbl[idx] & PTE_ADDR_MASK; + flags = tbl[idx] & ~PTE_ADDR_MASK; /* Set the caching policy */ switch (type) { case VM_CACHE_UC: - tbl[idx] |= PTE_PCD; - tbl[idx] &= ~PTE_PWT; + flags |= PTE_PCD; + flags &= ~PTE_PWT; break; case VM_CACHE_WT: - tbl[idx] &= ~PTE_PCD; - tbl[idx] |= PTE_PWT; + flags &= ~PTE_PCD; + flags |= PTE_PWT; break; default: return -EINVAL; } + return pmap_update_tbl(vas, va, (pa | flags), false); +} + +bool +pmap_is_clean(struct vas vas, vaddr_t va) +{ + uintptr_t *tbl; + int status; + size_t idx; + + if ((status = pmap_get_tbl(vas, va, false, &tbl)) != 0) + return status; + + idx = pmap_get_level_index(1, va); + return ISSET(tbl[idx], PTE_DIRTY) == 0; +} + +void +pmap_mark_clean(struct vas vas, vaddr_t va) +{ + uintptr_t *tbl; + int status; + size_t idx; + + if ((status = pmap_get_tbl(vas, va, false, &tbl)) != 0) + return; + + idx = pmap_get_level_index(1, va); + tbl[idx] &= ~PTE_DIRTY; + + if (cpu_count() > 1) { + cpu_shootdown_tlb(va); + } else { + __invlpg(va); + } +} + +int +pmap_init(void) +{ return 0; } diff --git a/sys/arch/amd64/amd64/proc_machdep.c b/sys/arch/amd64/amd64/proc_machdep.c index 0be85fd..82b4e4f 100644 --- a/sys/arch/amd64/amd64/proc_machdep.c +++ b/sys/arch/amd64/amd64/proc_machdep.c @@ -32,6 +32,8 @@ #include <sys/param.h> #include <sys/errno.h> #include <sys/exec.h> +#include <sys/sched.h> +#include <sys/schedvar.h> #include <machine/frame.h> #include <machine/gdt.h> #include <machine/cpu.h> @@ -40,7 +42,7 @@ #include <vm/map.h> #include <string.h> -void +uintptr_t md_td_stackinit(struct proc *td, void *stack_top, struct exec_prog *prog) { uintptr_t *sp = stack_top; @@ -97,6 +99,7 @@ md_td_stackinit(struct proc *td, void *stack_top, struct exec_prog *prog) STACK_PUSH(sp, argc); tfp = &td->tf; tfp->rsp = (uintptr_t)sp - VM_HIGHER_HALF; + return tfp->rsp; } void @@ -123,24 +126,31 @@ md_td_kick(struct proc *td) { struct trapframe *tfp; struct cpu_info *ci; + uint16_t ds = USER_DS | 3; tfp = &td->tf; ci = this_cpu(); ci->curtd = td; + td->flags &= ~PROC_KTD; __ASMV( - "push %0\n" + "mov %0, %%rax\n" "push %1\n" - "pushf\n" "push %2\n" "push %3\n" + "push %%rax\n" + "push %4\n" + "test $3, %%ax\n" + "jz 1f\n" "lfence\n" "swapgs\n" - "iretq" + "1:\n" + " iretq" : - : "i" (USER_DS | 3), + : "r" (tfp->cs), + "r" (ds), "r" (tfp->rsp), - "i" (USER_CS | 3), + "m" (tfp->rflags), "r" (tfp->rip) ); @@ -155,13 +165,14 @@ md_td_kick(struct proc *td) * @ip: Instruction pointer. */ int -md_fork(struct proc *p, struct proc *parent, uintptr_t ip) +md_spawn(struct proc *p, struct proc *parent, uintptr_t ip) { uintptr_t stack_base; struct trapframe *tfp; struct pcb *pcbp; uint8_t rpl = 0; int error; + vm_prot_t prot = PROT_READ | PROT_WRITE; tfp = &p->tf; @@ -201,12 +212,117 @@ md_fork(struct proc *p, struct proc *parent, uintptr_t ip) */ if (rpl == 0) { stack_base += VM_HIGHER_HALF; + p->flags |= PROC_KTD; } else { - vm_map(pcbp->addrsp, stack_base, stack_base, - PROT_READ | PROT_WRITE | PROT_USER, PROC_STACK_PAGES); + prot |= PROT_USER; + vm_map(pcbp->addrsp, stack_base, stack_base, prot, PROC_STACK_PAGES); } p->stack_base = stack_base; tfp->rsp = ALIGN_DOWN((stack_base + PROC_STACK_SIZE) - 1, 16); return 0; } + +/* + * Save thread state and enqueue it back into one + * of the ready queues. + */ +static void +sched_save_td(struct proc *td, struct trapframe *tf) +{ + /* + * Save trapframe to process structure only + * if PROC_EXEC is not set. + */ + if (!ISSET(td->flags, PROC_EXEC)) { + memcpy(&td->tf, tf, sizeof(td->tf)); + } + + sched_enqueue_td(td); +} + +static void +sched_switch_to(struct trapframe *tf, struct proc *td) +{ + struct cpu_info *ci; + struct sched_cpu *cpustat; + struct pcb *pcbp; + + ci = this_cpu(); + + if (tf != NULL) { + memcpy(tf, &td->tf, sizeof(*tf)); + } + + /* Update stats */ + cpustat = &ci->stat; + atomic_inc_64(&cpustat->nswitch); + + ci->curtd = td; + pcbp = &td->pcb; + pmap_switch_vas(pcbp->addrsp); +} + +/* + * Enable or disable preemption on the current + * processor + * + * @enable: Enable preemption if true + */ +void +sched_preempt_set(bool enable) +{ + struct cpu_info *ci = this_cpu(); + + if (ci == NULL) { + return; + } + + ci->preempt = enable; +} + +bool +sched_preemptable(void) +{ + struct cpu_info *ci = this_cpu(); + + if (ci == NULL) { + return false; + } + + return ci->preempt; +} + +/* + * Perform a context switch. + */ +void +md_sched_switch(struct trapframe *tf) +{ + struct proc *next_td, *td; + struct cpu_info *ci; + + ci = this_cpu(); + if (!ci->preempt) { + sched_oneshot(false); + return; + } + + td = ci->curtd; + mi_sched_switch(td); + + if (td != NULL) { + if (td->pid == 0) + return; + + sched_save_td(td, tf); + } + + if ((next_td = sched_dequeue_td()) == NULL) { + sched_oneshot(false); + return; + } + + sched_switch_to(tf, next_td); + sched_oneshot(false); +} diff --git a/sys/arch/amd64/amd64/reboot.c b/sys/arch/amd64/amd64/reboot.c index b9df1c0..8ebe15e 100644 --- a/sys/arch/amd64/amd64/reboot.c +++ b/sys/arch/amd64/amd64/reboot.c @@ -32,16 +32,70 @@ #include <sys/cdefs.h> #include <machine/pio.h> #include <machine/cpu.h> +#include <dev/acpi/acpi.h> + +static void +cpu_reset_intel(struct cpu_info *ci) +{ + /* + * Ivy bridge processors and their panther point chipsets + * (family 6) can be reset through special PCH reset control + * registers + */ + if (ci->family == 6) { + outb(0xCF9, 3 << 1); + } +} + +/* + * Attempt to reboot the system, we do this in many + * stages of escalation. If a reset via the i8042 + * controller fails and we are on an Intel processor, + * attempt a chipset specific reset. If that somehow fails + * as well, just smack the cpu with a NULL IDTR as well + * as an INT $0x0 + */ +static void +__cpu_reset(struct cpu_info *ci) +{ + /* Try via the i8042 */ + outb(0x64, 0xFE); + + /* Something went wrong if we are here */ + if (ci == NULL) { + return; + } + + if (ci->vendor == CPU_VENDOR_INTEL) { + cpu_reset_intel(ci); + } +} void cpu_reboot(int method) { + struct cpu_info *ci = this_cpu(); + uint32_t *__dmmy = NULL; + + if (ISSET(method, REBOOT_POWEROFF)) { + acpi_sleep(ACPI_SLEEP_S5); + } + if (ISSET(method, REBOOT_HALT)) { cpu_halt_all(); } - /* Pulse the reset line until the machine goes down */ - for (;;) { - outb(0x64, 0xFE); - } + __cpu_reset(ci); + asm volatile("lgdt %0; int $0x0" :: "m" (__dmmy)); + __builtin_unreachable(); +} + +/* + * arg0: Method bits + */ +scret_t +sys_reboot(struct syscall_args *scargs) +{ + cpu_reboot(scargs->arg0); + __builtin_unreachable(); } diff --git a/sys/arch/amd64/isa/i8042.S b/sys/arch/amd64/amd64/simd.S index 123d3a5..23fe461 100644 --- a/sys/arch/amd64/isa/i8042.S +++ b/sys/arch/amd64/amd64/simd.S @@ -27,11 +27,50 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#include <machine/frameasm.h> - .text - .globl i8042_kb_isr -INTRENTRY(i8042_kb_isr, handle_kb) -handle_kb: - call i8042_kb_event + .globl simd_init +simd_init: + /* + * Enable SIMD, if SSE and AVX is supported, + * a value of zero is returned. If SSE is + * supported yet AVX is not, a value of one + * is returned. However, if none are supported, + * this routine returns -1. + */ + + // Do we support SSE? + mov $1, %eax + cpuid + bt $25, %edx + jnc .sse_not_sup + + mov %cr0, %rax // Old CR0 -> EAX + and $0xFFFB, %ax // Disable co-processor emulation + or $0x02, %ax // Enable co-processor monitoring + mov %rax, %cr0 // Update CR0 with new flags + + mov %cr4, %rax // Old CR4 -> EAX + or $0x200, %ax // Enable FXSAVE/FXRSTOR + or $0x400, %ax // Enable SIMD FP exceptions + mov %rax, %cr4 // Update CR4 with new flags + + mov $1, %eax // LEAF 1 + cpuid // Bit 28 of ECX indicates AVX support + mov $3, %eax // We need to check two bits + shl $27, %eax // Which are ECX.OSXSAVE and ECX.AVX + test %eax, %ecx // Are XSAVE and AVX supported? + jnc .avx_not_sup // Nope, just continue + + // Enable AVX + xor %rcx, %rcx // Select XCR0 + xgetbv // Load extended control register + or $0x07, %eax // Set AVX + SSE bits + xsetbv // Store new flags + xor %rax, %rax // Everything is good + retq // Return back to caller (RETURN) +.sse_not_sup: + mov $-1, %rax + retq +.avx_not_sup: + mov $1, %rax retq diff --git a/sys/arch/amd64/amd64/trap.c b/sys/arch/amd64/amd64/trap.c index 9a3a7ba..68d7f89 100644 --- a/sys/arch/amd64/amd64/trap.c +++ b/sys/arch/amd64/amd64/trap.c @@ -60,6 +60,17 @@ static const char *trap_type[] = { [TRAP_SS] = "stack-segment fault" }; +/* Page-fault flags */ +static const char pf_flags[] = { + 'p', /* Present */ + 'w', /* Write */ + 'u', /* User */ + 'r', /* Reserved write */ + 'x', /* Instruction fetch */ + 'k', /* Protection key violation */ + 's' /* Shadow stack access */ +}; + static inline uintptr_t pf_faultaddr(void) { @@ -69,7 +80,24 @@ pf_faultaddr(void) } static void -regdump(struct trapframe *tf) +pf_code(uint64_t error_code) +{ + char tab[8] = { + '-', '-', '-', + '-', '-', '-', + '-', '\0' + }; + + for (int i = 0; i < 7; ++i) { + if (ISSET(error_code, BIT(i))) { + tab[i] = pf_flags[i]; + } + } + kprintf("code=[%s]\n", tab); +} + +__dead static void +trap_fatal(struct trapframe *tf) { uintptr_t cr3, cr2 = pf_faultaddr(); @@ -79,11 +107,17 @@ regdump(struct trapframe *tf) : "memory" ); - kprintf(OMIT_TIMESTAMP + if (tf->trapno == TRAP_PAGEFLT) { + pf_code(tf->error_code); + } + + panic("got fatal trap (%s)\n\n" + "-- DUMPING PROCESSOR STATE --\n" "RAX=%p RCX=%p RDX=%p\n" "RBX=%p RSI=%p RDI=%p\n" "RFL=%p CR2=%p CR3=%p\n" - "RBP=%p RSP=%p RIP=%p\n", + "RBP=%p RSP=%p RIP=%p\n\n", + trap_type[tf->trapno], tf->rax, tf->rcx, tf->rdx, tf->rbx, tf->rsi, tf->rdi, tf->rflags, cr2, cr3, @@ -94,6 +128,7 @@ static void trap_user(struct trapframe *tf) { struct proc *td = this_td(); + uintptr_t fault_addr; sigset_t sigset; sigemptyset(&sigset); @@ -101,6 +136,9 @@ trap_user(struct trapframe *tf) switch (tf->trapno) { case TRAP_PROTFLT: case TRAP_PAGEFLT: + if (tf->trapno == TRAP_PAGEFLT) { + pf_code(tf->error_code); + } sigaddset(&sigset, SIGSEGV); break; case TRAP_ARITH_ERR: @@ -112,6 +150,9 @@ trap_user(struct trapframe *tf) break; } + fault_addr = pf_faultaddr(); + proc_coredump(td, fault_addr); + /* * Send the signal then flush the signal queue right * away as these types of events are critical. @@ -120,20 +161,6 @@ trap_user(struct trapframe *tf) dispatch_signals(td); } -static void -trap_quirks(struct cpu_info *ci) -{ - static uint8_t count; - - if (ISSET(ci->irq_mask, CPU_IRQ(1)) && count < 1) { - ++count; - pr_error("detected buggy i8042\n"); - pr_error("applying I8042_HOSTILE quirk\n"); - i8042_quirk(I8042_HOSTILE); - return; - } -} - void trap_syscall(struct trapframe *tf) { @@ -155,17 +182,11 @@ trap_syscall(struct trapframe *tf) void trap_handler(struct trapframe *tf) { - struct cpu_info *ci; - - splraise(IPL_HIGH); - if (tf->trapno >= NELEM(trap_type)) { panic("got unknown trap %d\n", tf->trapno); } pr_error("got %s\n", trap_type[tf->trapno]); - ci = this_cpu(); - trap_quirks(ci); /* Handle traps from userland */ if (ISSET(tf->cs, 3)) { @@ -173,6 +194,6 @@ trap_handler(struct trapframe *tf) return; } - regdump(tf); - panic("fatal trap - halting\n"); + trap_fatal(tf); + __builtin_unreachable(); } diff --git a/sys/arch/amd64/amd64/tsc.c b/sys/arch/amd64/amd64/tsc.c new file mode 100644 index 0000000..2111cd0 --- /dev/null +++ b/sys/arch/amd64/amd64/tsc.c @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2023-2025 Ian Marco Moffett and the Osmora Team. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Hyra nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/errno.h> +#include <sys/types.h> +#include <sys/param.h> +#include <sys/cdefs.h> +#include <sys/driver.h> +#include <sys/syslog.h> +#include <machine/tsc.h> +#include <machine/asm.h> +#include <machine/cpuid.h> + +/* See kconf(9) */ +#if defined(__USER_TSC) +#define USER_TSC __USER_TSC +#else +#define USER_TSC 0 +#endif /* __USER_TSC */ + +#define pr_trace(fmt, ...) kprintf("tsc: " fmt, ##__VA_ARGS__) +#define pr_error(...) pr_trace(__VA_ARGS__) + +static uint64_t tsc_i = 0; + +uint64_t +rdtsc_rel(void) +{ + return rdtsc() - tsc_i; +} + +/* + * Check if the TSC and RDTSC instruction is + * supported on the current CPU. + * + * Returns zero if supported, otherwise a less + * than zero value is returned. + */ +static int +tsc_check(void) +{ + uint32_t edx, unused; + + CPUID(1, unused, unused, unused, edx); + if (ISSET(edx, BIT(4))) { + return 0; + } + + return -ENOTSUP; +} + +static int +tsc_init(void) +{ + uint64_t cr4; + int error; + + /* Is the TSC even supported? */ + if ((error = tsc_check()) != 0) { + pr_error("TSC not supported by machine\n"); + return error; + } + + cr4 = amd64_read_cr4(); + tsc_i = rdtsc(); + pr_trace("initial count @ %d\n", rdtsc_rel()); + + /* + * If we USER_TSC is configured to "yes" then + * we'll need to enable the 'rdtsc' instruction + * in user mode. + */ + if (!USER_TSC) { + cr4 &= ~CR4_TSD; + } else { + cr4 |= CR4_TSD; + } + + amd64_write_cr4(cr4); + return 0; +} + +DRIVER_EXPORT(tsc_init, "x86-tsc"); diff --git a/sys/arch/amd64/amd64/vector.S b/sys/arch/amd64/amd64/vector.S new file mode 100644 index 0000000..62bed1b --- /dev/null +++ b/sys/arch/amd64/amd64/vector.S @@ -0,0 +1,221 @@ +/* + * Copyright (c) 2023-2025 Ian Marco Moffett and the Osmora Team. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Hyra nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <machine/frameasm.h> + +#define IDT_INT_GATE 0x8E + +.macro IDT_SET_VEC vec, sym + mov $\vec, %rdi + mov $IDT_INT_GATE, %rsi + lea \sym(%rip), %rdx + xor %rcx, %rcx + call idt_set_desc +.endm + + .text + ALIGN_TEXT +ioapic_common_func: + xor %rcx, %rcx // Clear counter +.walk: // Walk the handlers + lea g_intrs(%rip), %rbx // Grab table to RBX + lea (%rbx, %rcx, 8), %rbx // g_intrs + (8 * rcx) + mov (%rbx), %rdx // Grab the intr_hand + or %rdx, %rdx // No more? + jz 1f // Nope, return + + mov (%rdx), %rbx // intr_hand.func + add $16, %rdx // Get interrupt data + mov %rdx, %rdi // Pass the interrupt data + push %rcx // Save our counter + push %rdx + call *%rbx // Call the handler + pop %rdx + pop %rcx // Restore our counter + or %rax, %rax // Was it theirs? (RET >= 1) + jnz handled // Yes, we are done. +1: inc %rcx // Next + cmp $256, %rcx // Did we reach the end? + jl .walk // Nope, keep going + jmp done // Out of entries +handled: + sub $8, %rdx + addq $1, (%rdx) +done: + call lapic_eoi + retq + + .globl pin_isr_load +pin_isr_load: + IDT_SET_VEC 37, ioapic_edge_0 + IDT_SET_VEC 38, ioapic_edge_1 + IDT_SET_VEC 39, ioapic_edge_2 + IDT_SET_VEC 40, ioapic_edge_3 + IDT_SET_VEC 41, ioapic_edge_4 + IDT_SET_VEC 42, ioapic_edge_5 + IDT_SET_VEC 43, ioapic_edge_6 + IDT_SET_VEC 44, ioapic_edge_7 + IDT_SET_VEC 45, ioapic_edge_8 + IDT_SET_VEC 46, ioapic_edge_9 + IDT_SET_VEC 47, ioapic_edge_10 + IDT_SET_VEC 48, ioapic_edge_11 + IDT_SET_VEC 49, ioapic_edge_12 + IDT_SET_VEC 50, ioapic_edge_13 + IDT_SET_VEC 51, ioapic_edge_14 + IDT_SET_VEC 52, ioapic_edge_15 + IDT_SET_VEC 53, ioapic_edge_16 + IDT_SET_VEC 54, ioapic_edge_17 + IDT_SET_VEC 55, ioapic_edge_18 + IDT_SET_VEC 56, ioapic_edge_19 + IDT_SET_VEC 57, ioapic_edge_20 + IDT_SET_VEC 58, ioapic_edge_21 + IDT_SET_VEC 59, ioapic_edge_22 + IDT_SET_VEC 60, ioapic_edge_23 + IDT_SET_VEC 61, ioapic_edge_24 + IDT_SET_VEC 62, ioapic_edge_25 + IDT_SET_VEC 63, ioapic_edge_26 + IDT_SET_VEC 64, ioapic_edge_27 + IDT_SET_VEC 65, ioapic_edge_28 + IDT_SET_VEC 66, ioapic_edge_29 + IDT_SET_VEC 67, ioapic_edge_30 + IDT_SET_VEC 68, ioapic_edge_31 + IDT_SET_VEC 69, ioapic_edge_32 + IDT_SET_VEC 70, ioapic_edge_33 + IDT_SET_VEC 71, ioapic_edge_34 + IDT_SET_VEC 72, ioapic_edge_35 + IDT_SET_VEC 73, ioapic_edge_36 + IDT_SET_VEC 74, ioapic_edge_37 + IDT_SET_VEC 75, ioapic_edge_38 + IDT_SET_VEC 76, ioapic_edge_39 + IDT_SET_VEC 77, ioapic_edge_40 + IDT_SET_VEC 78, ioapic_edge_41 + IDT_SET_VEC 79, ioapic_edge_42 + IDT_SET_VEC 80, ioapic_edge_43 + IDT_SET_VEC 81, ioapic_edge_44 + IDT_SET_VEC 82, ioapic_edge_45 + IDT_SET_VEC 83, ioapic_edge_46 + IDT_SET_VEC 84, ioapic_edge_47 + IDT_SET_VEC 85, ioapic_edge_48 + IDT_SET_VEC 86, ioapic_edge_49 + IDT_SET_VEC 87, ioapic_edge_50 + IDT_SET_VEC 88, ioapic_edge_51 + IDT_SET_VEC 89, ioapic_edge_52 + IDT_SET_VEC 90, ioapic_edge_53 + IDT_SET_VEC 91, ioapic_edge_54 + IDT_SET_VEC 92, ioapic_edge_55 + IDT_SET_VEC 93, ioapic_edge_56 + IDT_SET_VEC 94, ioapic_edge_57 + IDT_SET_VEC 95, ioapic_edge_58 + IDT_SET_VEC 96, ioapic_edge_59 + IDT_SET_VEC 97, ioapic_edge_60 + IDT_SET_VEC 98, ioapic_edge_61 + IDT_SET_VEC 99, ioapic_edge_62 + IDT_SET_VEC 100, ioapic_edge_63 + ret + + .globl ipi_isr +INTRENTRY(ipi_isr, ipi_trampoline) + call ipi_trampoline + retq + + .globl halt_isr +INTRENTRY(halt_isr, halt_trampoline) +halt_trampoline: + cli + hlt + +ipi_trampoline: + call __ipi_handle_common + retq + +/* I/O APIC edge ISRs */ +INTRENTRY(ioapic_edge_0, ioapic_common_func) +INTRENTRY(ioapic_edge_1, ioapic_common_func) +INTRENTRY(ioapic_edge_2, ioapic_common_func) +INTRENTRY(ioapic_edge_3, ioapic_common_func) +INTRENTRY(ioapic_edge_4, ioapic_common_func) +INTRENTRY(ioapic_edge_5, ioapic_common_func) +INTRENTRY(ioapic_edge_6, ioapic_common_func) +INTRENTRY(ioapic_edge_7, ioapic_common_func) +INTRENTRY(ioapic_edge_8, ioapic_common_func) +INTRENTRY(ioapic_edge_9, ioapic_common_func) +INTRENTRY(ioapic_edge_10, ioapic_common_func) +INTRENTRY(ioapic_edge_11, ioapic_common_func) +INTRENTRY(ioapic_edge_12, ioapic_common_func) +INTRENTRY(ioapic_edge_13, ioapic_common_func) +INTRENTRY(ioapic_edge_14, ioapic_common_func) +INTRENTRY(ioapic_edge_15, ioapic_common_func) +INTRENTRY(ioapic_edge_16, ioapic_common_func) +INTRENTRY(ioapic_edge_17, ioapic_common_func) +INTRENTRY(ioapic_edge_18, ioapic_common_func) +INTRENTRY(ioapic_edge_19, ioapic_common_func) +INTRENTRY(ioapic_edge_20, ioapic_common_func) +INTRENTRY(ioapic_edge_21, ioapic_common_func) +INTRENTRY(ioapic_edge_22, ioapic_common_func) +INTRENTRY(ioapic_edge_23, ioapic_common_func) +INTRENTRY(ioapic_edge_24, ioapic_common_func) +INTRENTRY(ioapic_edge_25, ioapic_common_func) +INTRENTRY(ioapic_edge_26, ioapic_common_func) +INTRENTRY(ioapic_edge_27, ioapic_common_func) +INTRENTRY(ioapic_edge_28, ioapic_common_func) +INTRENTRY(ioapic_edge_29, ioapic_common_func) +INTRENTRY(ioapic_edge_30, ioapic_common_func) +INTRENTRY(ioapic_edge_31, ioapic_common_func) +INTRENTRY(ioapic_edge_32, ioapic_common_func) +INTRENTRY(ioapic_edge_33, ioapic_common_func) +INTRENTRY(ioapic_edge_34, ioapic_common_func) +INTRENTRY(ioapic_edge_35, ioapic_common_func) +INTRENTRY(ioapic_edge_36, ioapic_common_func) +INTRENTRY(ioapic_edge_37, ioapic_common_func) +INTRENTRY(ioapic_edge_38, ioapic_common_func) +INTRENTRY(ioapic_edge_39, ioapic_common_func) +INTRENTRY(ioapic_edge_40, ioapic_common_func) +INTRENTRY(ioapic_edge_41, ioapic_common_func) +INTRENTRY(ioapic_edge_42, ioapic_common_func) +INTRENTRY(ioapic_edge_43, ioapic_common_func) +INTRENTRY(ioapic_edge_44, ioapic_common_func) +INTRENTRY(ioapic_edge_45, ioapic_common_func) +INTRENTRY(ioapic_edge_46, ioapic_common_func) +INTRENTRY(ioapic_edge_47, ioapic_common_func) +INTRENTRY(ioapic_edge_48, ioapic_common_func) +INTRENTRY(ioapic_edge_49, ioapic_common_func) +INTRENTRY(ioapic_edge_50, ioapic_common_func) +INTRENTRY(ioapic_edge_51, ioapic_common_func) +INTRENTRY(ioapic_edge_52, ioapic_common_func) +INTRENTRY(ioapic_edge_53, ioapic_common_func) +INTRENTRY(ioapic_edge_54, ioapic_common_func) +INTRENTRY(ioapic_edge_55, ioapic_common_func) +INTRENTRY(ioapic_edge_56, ioapic_common_func) +INTRENTRY(ioapic_edge_57, ioapic_common_func) +INTRENTRY(ioapic_edge_58, ioapic_common_func) +INTRENTRY(ioapic_edge_59, ioapic_common_func) +INTRENTRY(ioapic_edge_60, ioapic_common_func) +INTRENTRY(ioapic_edge_61, ioapic_common_func) +INTRENTRY(ioapic_edge_62, ioapic_common_func) +INTRENTRY(ioapic_edge_63, ioapic_common_func) diff --git a/sys/arch/amd64/conf/GENERIC b/sys/arch/amd64/conf/GENERIC index d3a4368..6bf3af5 100644 --- a/sys/arch/amd64/conf/GENERIC +++ b/sys/arch/amd64/conf/GENERIC @@ -1,10 +1,14 @@ +// // Kernel options -option SPECTRE_IBRS no -option SERIAL_DEBUG yes - -// Kernel constants -setval SCHED_NQUEUE 4 - -// Console attributes -setval CONSOLE_BG 0x000000 -setval CONSOLE_FG 0XB57614 +// +// XXX: Indirect branch restricted speculation (SPECTRE_IBRS) +// is disabled by default as it can lead to significant +// performance degradation. +// +option SPECTRE_IBRS no // Enable the IBRS CPU feature +option SERIAL_DEBUG yes // Enable kmsg serial logging +option CPU_UMIP yes // Enable User-mode Instruction Prevention +option USER_KMSG no // Show kmsg in user consoles +option USER_TSC no // Enable 'rdtsc' in user mode +option CPU_SMEP yes // Supervisor Memory Exec Protection +option I8042_POLL yes // Use polling for the i8042 diff --git a/sys/arch/amd64/conf/link.ld b/sys/arch/amd64/conf/link.ld index 9c47a81..a43824f 100644 --- a/sys/arch/amd64/conf/link.ld +++ b/sys/arch/amd64/conf/link.ld @@ -29,6 +29,12 @@ SECTIONS __drivers_init_end = .; } :rodata + .drivers.defer : { + __driversd_init_start = .; + *(.drivers.defer .drivers.defer) + __driversd_init_end = .; + } :rodata + . += CONSTANT(MAXPAGESIZE); .data : { diff --git a/sys/arch/amd64/isa/i8042.c b/sys/arch/amd64/isa/i8042.c index 89bebc5..095f1f4 100644 --- a/sys/arch/amd64/isa/i8042.c +++ b/sys/arch/amd64/isa/i8042.c @@ -33,12 +33,14 @@ #include <sys/syslog.h> #include <sys/spinlock.h> #include <sys/param.h> +#include <sys/ascii.h> #include <sys/proc.h> #include <sys/reboot.h> #include <sys/queue.h> #include <dev/acpi/acpi.h> #include <dev/timer.h> #include <dev/cons/cons.h> +#include <dev/dmi/dmi.h> #include <machine/cpu.h> #include <machine/pio.h> #include <machine/isa/i8042var.h> @@ -51,6 +53,13 @@ #include <string.h> #include <assert.h> +/* From kconf(9) */ +#if !defined(__I8042_POLL) +#define I8042_POLL 0 +#else +#define I8042_POLL __I8042_POLL +#endif + #define KEY_REP_MAX 2 #define pr_trace(fmt, ...) kprintf("i8042: " fmt, ##__VA_ARGS__) @@ -58,7 +67,28 @@ #define IO_NOP() inb(0x80) -static struct spinlock data_lock; +struct i8042_databuf { + uint8_t data[8]; + size_t len; +}; + +/* + * This table allows the lookup of extended + * scancode bytes. + * + * XXX: Excludes the 0xE0 byte + */ +static struct i8042_databuf i8042_etab[] = { + [ I8042_XSC_ENDPR] = { + .data = { 0x4F }, + .len = 1 + }, + [I8042_XSC_ENDRL] = { + .data = { 0xCF }, + .len = 1 + } +}; + static struct spinlock isr_lock; static bool shift_key = false; static bool capslock = false; @@ -68,12 +98,13 @@ static struct proc polltd; static struct timer tmr; static bool is_init = false; +static void i8042_ibuf_wait(void); static int dev_send(bool aux, uint8_t data); static int i8042_kb_getc(uint8_t sc, char *chr); -static void i8042_drain(void); +static void i8042_drain(struct i8042_databuf *res); static char keytab[] = { - '\0', '\0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', + '\0', '\x1B', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '-', '=', '\b', '\t', 'q', 'w', 'e', 'r', 't', 'y', 'u', 'i', 'o', 'p', '[', ']', '\n', '\0', 'a', 's', 'd', 'f', 'g', 'h', 'j', 'k', 'l', ';', '\'', '`', '\0', '\\', 'z', 'x', 'c', 'v', @@ -103,54 +134,56 @@ kbd_set_leds(uint8_t mask) dev_send(false, mask); } -/* - * Poll the i8042 status register - * - * @bits: Status bits. - * @pollset: True to poll if set - */ -static int -i8042_statpoll(uint8_t bits, bool pollset) +static void +i8042_obuf_wait(void) { - size_t usec_start, usec; - size_t elapsed_msec; - uint8_t val; - bool tmp; + uint8_t status; - usec_start = tmr.get_time_usec(); for (;;) { - val = inb(I8042_STATUS); - tmp = (pollset) ? ISSET(val, bits) : !ISSET(val, bits); - usec = tmr.get_time_usec(); - elapsed_msec = (usec - usec_start) / 1000; - - IO_NOP(); - - /* If tmp is set, the register updated in time */ - if (tmp) { - break; + status = inb(I8042_STATUS); + if (ISSET(status, I8042_OBUFF)) { + return; } + } +} - /* Exit with an error if we timeout */ - if (elapsed_msec > I8042_DELAY) { - return -ETIME; +static void +i8042_ibuf_wait(void) +{ + uint8_t status; + + for (;;) { + status = inb(I8042_STATUS); + if (!ISSET(status, I8042_IBUFF)) { + return; } } - - return val; } /* * Drain i8042 internal data registers. + * + * @res: Pointer for read data to be buffered to + * + * XXX: The 'res' argument is NULLable */ static void -i8042_drain(void) +i8042_drain(struct i8042_databuf *res) { - spinlock_acquire(&data_lock); while (ISSET(inb(I8042_STATUS), I8042_OBUFF)) { - inb(I8042_DATA); + if (res == NULL) { + inb(I8042_DATA); + continue; + } + + if (res->len >= sizeof(res->data)) { + pr_error("data recieved from i8042 is too big\n"); + break; + } + + res->data[res->len++] = inb(I8042_DATA); + tmr.msleep(10); } - spinlock_release(&data_lock); } /* @@ -162,34 +195,47 @@ i8042_drain(void) static void i8042_write(uint16_t port, uint8_t val) { - i8042_statpoll(I8042_IBUFF, false); + i8042_ibuf_wait(); outb(port, val); } /* - * Read the i8042 config register + * Read from an i8042 register. + * + * @port: I/O port + */ +static uint8_t +i8042_read(uint16_t port) +{ + i8042_obuf_wait(); + return inb(port); +} + +/* + * Read the i8042 controller configuration + * byte. */ static uint8_t i8042_read_conf(void) { - i8042_drain(); + uint8_t conf; + i8042_write(I8042_CMD, I8042_GET_CONFB); - i8042_statpoll(I8042_OBUFF, true); - return inb(I8042_DATA); + i8042_obuf_wait(); + conf = i8042_read(I8042_DATA); + return conf; } /* - * Write the i8042 config register + * Write a new value to the i8042 controller + * configuration byte. */ static void -i8042_write_conf(uint8_t value) +i8042_write_conf(uint8_t conf) { - i8042_drain(); - i8042_statpoll(I8042_IBUFF, false); i8042_write(I8042_CMD, I8042_SET_CONFB); - i8042_statpoll(I8042_IBUFF, false); - i8042_write(I8042_DATA, value); - i8042_drain(); + i8042_ibuf_wait(); + i8042_write(I8042_DATA, conf); } /* @@ -205,14 +251,13 @@ dev_send(bool aux, uint8_t data) i8042_write(I8042_CMD, I8042_PORT1_SEND); } - i8042_statpoll(I8042_IBUFF, false); i8042_write(I8042_DATA, data); - i8042_statpoll(I8042_OBUFF, true); + i8042_obuf_wait(); return inb(I8042_DATA); } -void -i8042_kb_event(void) +static int +i8042_kb_event(void *sp) { struct cpu_info *ci; struct cons_input input; @@ -232,50 +277,103 @@ i8042_kb_event(void) input.chr = c; cons_ibuf_push(&g_root_scr, input); done: - ci->irq_mask &= CPU_IRQ(1); + ci->irq_mask &= ~CPU_IRQ(1); spinlock_release(&isr_lock); - lapic_eoi(); + return 1; /* handled */ } static void i8042_en_intr(void) { + struct intr_hand ih; uint8_t conf; - int vec; - - pr_trace("ENTER -> i8042_en_intr\n"); - i8042_write(I8042_CMD, I8042_DISABLE_PORT0); - pr_trace("port 0 disabled\n"); - vec = intr_alloc_vector("i8042-kb", IPL_BIO); - idt_set_desc(vec, IDT_INT_GATE, ISR(i8042_kb_isr), IST_HW_IRQ); - ioapic_set_vec(KB_IRQ, vec); - ioapic_irq_unmask(KB_IRQ); - pr_trace("irq 1 -> vec[%x]\n", vec); + ih.func = i8042_kb_event; + ih.priority = IPL_BIO; + ih.irq = KB_IRQ; + intr_register("i8042-kb", &ih); - /* Setup config bits */ + /* + * Enable the clock of PS/2 port 0 and tell + * the controller that we are accepting + * interrupts. + */ conf = i8042_read_conf(); + conf &= ~I8042_PORT0_CLK; conf |= I8042_PORT0_INTR; - conf &= ~I8042_PORT1_INTR; i8042_write_conf(conf); - pr_trace("conf written\n"); +} - i8042_write(I8042_CMD, I8042_ENABLE_PORT0); - pr_trace("port 0 enabled\n"); +/* + * Toggle the capslock and LED + */ +static void +capslock_toggle(void) +{ + /* + * In case we are holding the caps lock button down, + * we don't want it to be spam toggled as that would + * be pretty strange looking and probably annoying. + */ + if (!capslock_released) { + return; + } + + capslock_released = false; + capslock = !capslock; + + if (!capslock) { + kbd_set_leds(0); + } else { + kbd_set_leds(I8042_LED_CAPS); + } } +/* + * Dump extended data buffer + * + * @buf: Data + */ static void -esckey_reboot(void) +i8042_ext_dump(struct i8042_databuf *buf) { - syslock(); - kprintf(OMIT_TIMESTAMP "** Machine going down for a reboot\f"); + if (buf == NULL) { + return; + } + + for (int i = 0; i < buf->len; ++i) { + kprintf(OMIT_TIMESTAMP "%x", buf->data[i]); + } - for (size_t i = 0; i < 3; ++i) { - kprintf(OMIT_TIMESTAMP ".\f"); - tmr.msleep(1000); + kprintf(OMIT_TIMESTAMP "\n"); +} + +/* + * Used internally by i8042_kb_getc() to acquire + * a key from an extended scancode + * + * @buf: Scancode buf + * @chr: Char res + * + * Returns the extended scancode type on success, + * otherwise a less than zero value (see I8042_XSC_*) + */ +static int +i8042_kb_getxc(struct i8042_databuf *buf, char *chr) +{ + size_t nelem = NELEM(i8042_etab); + struct i8042_databuf *buf_tmp; + size_t len; + + for (int i = 0; i < nelem; ++i) { + buf_tmp = &i8042_etab[i]; + len = buf_tmp->len; + if (memcmp(buf->data, buf_tmp->data, len) == 0) { + return i; + } } - cpu_reboot(0); + return -1; } /* @@ -290,31 +388,16 @@ static int i8042_kb_getc(uint8_t sc, char *chr) { bool release = ISSET(sc, BIT(7)); + struct i8042_databuf buf = {0}; + int x_type; switch (sc) { - /* Left alt [press] */ - case 0x38: - esckey_reboot(); - break; + case 0x76: + *chr = ASCII_ESC; + return 0; /* Caps lock [press] */ case 0x3A: - /* - * In case we are holding the caps lock button down, - * we don't want it to be spam toggled as that would - * be pretty strange looking and probably annoying. - */ - if (!capslock_released) { - return -EAGAIN; - } - - capslock_released = false; - capslock = !capslock; - - if (!capslock) { - kbd_set_leds(0); - } else { - kbd_set_leds(I8042_LED_CAPS); - } + capslock_toggle(); return -EAGAIN; /* Caps lock [release] */ case 0xBA: @@ -331,6 +414,26 @@ i8042_kb_getc(uint8_t sc, char *chr) shift_key = false; } return -EAGAIN; + /* Extended byte */ + case 0xE0: + /* + * Most keyboards have extended scancodes which + * consist of multiple bytes to represent certain + * special keys. We'll need to give the controller + * about 10 ms to refill its buffer. + */ + tmr.msleep(10); + i8042_drain(&buf); + x_type = i8042_kb_getxc(&buf, chr); + + /* Did we implement it? */ + if (x_type < 0) { + pr_error("unknown xsc: "); + i8042_ext_dump(&buf); + return -EAGAIN; + } + + return -1; } if (release) { @@ -351,43 +454,30 @@ i8042_kb_getc(uint8_t sc, char *chr) return 0; } -static void -i8042_sync_loop(void) -{ - /* Wake up the bus */ - outb(I8042_DATA, 0x00); - i8042_drain(); - - for (;;) { - i8042_sync(); - md_pause(); - } -} - /* * Grabs a key from the keyboard, used typically * for syncing the machine however can be used - * to bypass IRQs in case of buggy EC. + * to bypass IRQs to prevent lost bytes. */ void i8042_sync(void) { static struct spinlock lock; struct cons_input input; - uint8_t data; + uint8_t data, status; char c; if (spinlock_try_acquire(&lock)) { return; } - if (ISSET(quirks, I8042_HOSTILE) && is_init) { - if (i8042_statpoll(I8042_OBUFF, true) < 0) { - /* No data ready */ + if (is_init) { + status = inb(I8042_STATUS); + if (!ISSET(status, I8042_OBUFF)) { goto done; } - data = inb(I8042_DATA); + data = inb(I8042_DATA); if (i8042_kb_getc(data, &c) == 0) { input.scancode = data; input.chr = c; @@ -404,9 +494,20 @@ i8042_quirk(int mask) quirks |= mask; } +static void +i8042_sync_loop(void) +{ + for (;;) { + i8042_obuf_wait(); + i8042_sync(); + } +} + static int i8042_init(void) { + const char *prodver = NULL; + /* Try to request a general purpose timer */ if (req_timer(TIMER_GP, &tmr) != TMRR_SUCCESS) { pr_error("failed to fetch general purpose timer\n"); @@ -425,6 +526,9 @@ i8042_init(void) return -ENODEV; } + i8042_write(I8042_CMD, I8042_DISABLE_PORT0); + i8042_write(I8042_CMD, I8042_DISABLE_PORT1); + /* * On some thinkpads, e.g., the T420s, the EC implementing * the i8042 logic likes to play cop and throw NMIs at us @@ -432,26 +536,33 @@ i8042_init(void) * etc... As of now, treat the i8042 like a fucking bomb * if this bit is set. */ - if (strcmp(acpi_oemid(), "LENOVO") == 0) { + if ((prodver = dmi_prodver()) == NULL) { + prodver = "None"; + } + if (strcmp(prodver, "ThinkPad T420s") == 0) { quirks |= I8042_HOSTILE; - pr_trace("lenovo device, assuming hostile\n"); + pr_trace("ThinkPad T420s detected, assuming hostile\n"); pr_trace("disabling irq 1, polling as fallback\n"); - fork1(&polltd, 0, i8042_sync_loop, NULL); } - if (!ISSET(quirks, I8042_HOSTILE)) { + /* + * If the i8042 has the hostile quirk or we are + * configured to poll for events, spawn the polling + * thread. + */ + if (!ISSET(quirks, I8042_HOSTILE) && !I8042_POLL) { /* Enable interrupts */ - i8042_drain(); + i8042_drain(NULL); i8042_en_intr(); + } else if (ISSET(quirks, I8042_HOSTILE) || I8042_POLL) { + spawn(&polltd, i8042_sync_loop, NULL, 0, NULL); + pr_trace("polling events\n"); } - if (dev_send(false, 0xFF) == 0xFC) { - pr_error("kbd self test failure\n"); - return -EIO; - } - + i8042_write(I8042_CMD, I8042_ENABLE_PORT0); + i8042_drain(NULL); is_init = true; return 0; } -DRIVER_EXPORT(i8042_init); +DRIVER_EXPORT(i8042_init, "i8042"); diff --git a/sys/arch/amd64/isa/mc1468.c b/sys/arch/amd64/isa/mc1468.c new file mode 100644 index 0000000..1f3ae1d --- /dev/null +++ b/sys/arch/amd64/isa/mc1468.c @@ -0,0 +1,281 @@ +/* + * Copyright (c) 2023-2025 Ian Marco Moffett and the Osmora Team. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Hyra nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <sys/param.h> +#include <sys/time.h> +#include <sys/driver.h> +#include <sys/device.h> +#include <sys/syslog.h> +#include <fs/devfs.h> +#include <machine/pio.h> +#include <machine/cdefs.h> +#include <string.h> + +#define MC1468_REGSEL 0x70 +#define MC1468_DATA 0x71 + +/* Register A flags */ +#define MC1468_UPDATING BIT(7) + +/* Register B flags */ +#define MC1468_DAYSAVE BIT(1) +#define MC1468_CLOCK24 BIT(2) + +static struct cdevsw mc1468_cdevsw; + +static uint8_t +bin_dabble(uint8_t bin) +{ + uint8_t retval = 0; + uint8_t nibble; + + for (int i = 7; i >= 0; --i) { + retval <<= 1; + if (bin & (1 << i)) { + retval |= 1; + } + + for (int j = 0; j < 2; ++j) { + nibble = retval & (retval >> (4 * nibble)) & 0x0F; + if (nibble >= 5) { + retval += 0x03 << (4 * nibble); + } + } + } + + return retval; +} + +/* + * Read a byte from an MC1468XX register. + */ +static uint8_t +mc1468_read(uint8_t reg) +{ + outb(MC1468_REGSEL, reg); + return inb(MC1468_DATA); +} + +/* + * Write a byte to the MC1468XX register. + */ +static void +mc1468_write(uint8_t reg, uint8_t val) +{ + outb(MC1468_REGSEL, reg); + outb(MC1468_DATA, val); +} + +/* + * Returns true if the MC1468XX is updating + * its time registers. + */ +static bool +mc1468_updating(void) +{ + uint8_t reg_b; + + reg_b = mc1468_read(0xB); + return ISSET(reg_b, MC1468_UPDATING) != 0; +} + +/* + * Check if date `a' and date `b' are synced. + * Used to make sure a bogus date caused by a + * read right before an MC1468XX register + * update doesn't occur. + */ +static bool +mc1468_date_synced(struct date *a, struct date *b) +{ + if (a->year != b->year) + return false; + if (a->month != b->month) + return false; + if (a->day != b->day) + return false; + if (a->sec != b->sec) + return false; + if (a->min != b->min) + return false; + if (a->hour != b->hour) + return false; + + return true; +} + +/* + * Sometimes the clock chip may encode the + * date in binary-coded-decimal. This function + * converts a date in BCD format to plain binary. + */ +static void +mc1468_bcd_conv(struct date *dp) +{ + dp->year = (dp->year & 0x0F) + ((dp->year / 16) * 10); + dp->month = (dp->month & 0x0F) + ((dp->month / 16) * 10); + dp->day = (dp->day & 0x0F) + ((dp->day / 16) * 10); + dp->sec = (dp->sec & 0x0F) + ((dp->sec / 16) * 10); + dp->min = (dp->min & 0x0F) + ((dp->min / 16) * 10); + dp->hour = (dp->hour & 0x0F) + (((dp->hour & 0x70) / 16) * 10); + dp->hour |= dp->hour & 0x80; +} + +/* + * Read the time for the clock without syncing + * it up. + * + * XXX: Please use mc1468_get_date() instead as + * this function may return inconsistent + * values if not used correctly. + */ +static void +__mc1468_get_time(struct date *dp) +{ + dp->year = mc1468_read(0x09); + dp->month = mc1468_read(0x08); + dp->day = mc1468_read(0x07); + dp->sec = mc1468_read(0x00); + dp->min = mc1468_read(0x02); + dp->hour = mc1468_read(0x04); +} + +/* + * Write a new time/date to the chip. + */ +static void +mc1468_set_date(const struct date *dp) +{ + while (mc1468_updating()) { + md_pause(); + } + + mc1468_write(0x08, bin_dabble(dp->month)); + mc1468_write(0x07, bin_dabble(dp->day)); + mc1468_write(0x04, bin_dabble(dp->hour)); + mc1468_write(0x02, bin_dabble(dp->min)); + mc1468_write(0x00, bin_dabble(dp->sec)); +} + +static int +mc1468_get_date(struct date *dp) +{ + struct date date_cur, date_last; + uint8_t reg_b = mc1468_read(0x0B); + + while (mc1468_updating()) { + __mc1468_get_time(&date_last); + } + + /* + * Get the current date and time. + * + * XXX: The date and time returned by __mc1468_get_time() + * may at times be out of sync, read it twice to + * make sure everything is synced up. + */ + do { + while (mc1468_updating()) { + md_pause(); + } + __mc1468_get_time(&date_last); + date_cur.year = date_last.year; + date_cur.month = date_last.month; + date_cur.day = date_last.day; + date_cur.sec = date_last.sec; + date_cur.min = date_last.min; + date_cur.hour = date_last.hour; + } while (!mc1468_date_synced(&date_cur, &date_last)); + + /* Is this in BCD? */ + if (!ISSET(reg_b, 0x04)) { + mc1468_bcd_conv(&date_cur); + } + + /* 24-hour mode? */ + if (ISSET(reg_b, MC1468_CLOCK24)) { + date_cur.hour = ((date_cur.hour & 0x7F) + 12) % 24; + } + + date_cur.year += 2000; + *dp = date_cur; + return 0; +} + +static int +mc1468_dev_read(dev_t dev, struct sio_txn *sio, int flags) +{ + struct date d; + size_t len = sizeof(d); + + if (sio->len > len) { + sio->len = len; + } + + mc1468_get_date(&d); + memcpy(sio->buf, &d, sio->len); + return sio->len; +} + +static int +mc1468_dev_write(dev_t dev, struct sio_txn *sio, int flags) +{ + struct date d; + size_t len = sizeof(d); + + if (sio->len > len) { + sio->len = len; + } + + memcpy(&d, sio->buf, sio->len); + mc1468_set_date(&d); + return sio->len; +} + +static int +mc1468_init(void) +{ + char devname[] = "rtc"; + devmajor_t major; + dev_t dev; + + major = dev_alloc_major(); + dev = dev_alloc(major); + dev_register(major, dev, &mc1468_cdevsw); + devfs_create_entry(devname, major, dev, 0444); + return 0; +} + +static struct cdevsw mc1468_cdevsw = { + .read = mc1468_dev_read, + .write = mc1468_dev_write, +}; + +DRIVER_EXPORT(mc1468_init, "mc1468"); diff --git a/sys/arch/amd64/isa/spkr.c b/sys/arch/amd64/isa/spkr.c index b1bd2a2..c96e5f9 100644 --- a/sys/arch/amd64/isa/spkr.c +++ b/sys/arch/amd64/isa/spkr.c @@ -30,14 +30,60 @@ #include <sys/cdefs.h> #include <sys/errno.h> #include <sys/param.h> +#include <sys/device.h> +#include <sys/driver.h> +#include <fs/devfs.h> #include <dev/timer.h> #include <machine/isa/spkr.h> #include <machine/isa/i8254.h> #include <machine/pio.h> +#include <string.h> #define DIVIDEND 1193180 #define CTRL_PORT 0x61 +static struct cdevsw beep_cdevsw; + +/* + * Write to the pcspkr + * + * Bits 15:0 - frequency (hz) + * Bits 31:16 - duration (msec) + */ +static int +dev_write(dev_t dev, struct sio_txn *sio, int flags) +{ + uint32_t payload = 0; + uint16_t hz; + uint16_t duration; + size_t len = sizeof(payload); + + if (sio->len < len) { + return -EINVAL; + } + + memcpy(&payload, sio->buf, len); + hz = payload & 0xFFFF; + duration = (payload >> 16) & 0xFFFF; + pcspkr_tone(hz, duration); + return sio->len; +} + +static int +beep_init(void) +{ + char devname[] = "beep"; + devmajor_t major; + dev_t dev; + + /* Register the device here */ + major = dev_alloc_major(); + dev = dev_alloc(major); + dev_register(major, dev, &beep_cdevsw); + devfs_create_entry(devname, major, dev, 0666); + return 0; +} + int pcspkr_tone(uint16_t freq, uint32_t msec) { @@ -67,3 +113,10 @@ pcspkr_tone(uint16_t freq, uint32_t msec) outb(CTRL_PORT, tmp & ~3); return 0; } + +static struct cdevsw beep_cdevsw = { + .read = noread, + .write = dev_write +}; + +DRIVER_EXPORT(beep_init, "pcspkr"); diff --git a/sys/arch/amd64/pci/pci_machdep.c b/sys/arch/amd64/pci/pci_machdep.c index 43065b0..5b49a78 100644 --- a/sys/arch/amd64/pci/pci_machdep.c +++ b/sys/arch/amd64/pci/pci_machdep.c @@ -33,6 +33,7 @@ #include <sys/mmio.h> #include <dev/pci/pci.h> #include <dev/pci/pciregs.h> +#include <machine/pci/pci.h> #include <machine/pio.h> #include <machine/bus.h> #include <machine/cpu.h> @@ -73,8 +74,8 @@ pci_get_barreg(struct pci_device *dev, uint8_t bar) } } -pcireg_t -pci_readl(struct pci_device *dev, uint32_t offset) +__weak pcireg_t +md_pci_readl(struct pci_device *dev, uint32_t offset) { uint32_t address; @@ -83,8 +84,8 @@ pci_readl(struct pci_device *dev, uint32_t offset) return inl(0xCFC) >> ((offset & 3) * 8); } -void -pci_writel(struct pci_device *dev, uint32_t offset, pcireg_t val) +__weak void +md_pci_writel(struct pci_device *dev, uint32_t offset, pcireg_t val) { uint32_t address; @@ -163,6 +164,7 @@ pci_enable_msix(struct pci_device *dev, const struct msi_intr *intr) { volatile uint64_t *tbl; struct cpu_info *ci; + struct intr_hand ih, *ih_res; uint32_t data, msg_ctl; uint64_t msg_addr, tmp; uint16_t tbl_off; @@ -184,9 +186,14 @@ pci_enable_msix(struct pci_device *dev, const struct msi_intr *intr) tbl = (void *)((dev->bar[bir] & PCI_BAR_MEMMASK) + MMIO_OFFSET); tbl = (void *)((char *)tbl + tbl_off); - /* Get the vector and setup handler */ - vector = intr_alloc_vector(intr->name, IPL_BIO); - idt_set_desc(vector, IDT_INT_GATE, ISR(intr->handler), 0); + ih.func = intr->handler; + ih.priority = IPL_BIO; + ih.irq = -1; + ih_res = intr_register(intr->name, &ih); + if (ih_res == NULL) { + return -EIO; + } + vector = ih_res->vector; /* * Setup the message data at bits 95:64 of the message |