diff options
Diffstat (limited to 'sys/arch/amd64')
-rw-r--r-- | sys/arch/amd64/amd64/gdt.c | 76 | ||||
-rw-r--r-- | sys/arch/amd64/amd64/hpet.c | 16 | ||||
-rw-r--r-- | sys/arch/amd64/amd64/intr.c | 71 | ||||
-rw-r--r-- | sys/arch/amd64/amd64/lapic.c | 2 | ||||
-rw-r--r-- | sys/arch/amd64/amd64/lapic_intr.S | 1 | ||||
-rw-r--r-- | sys/arch/amd64/amd64/machdep.c | 111 | ||||
-rw-r--r-- | sys/arch/amd64/amd64/mp.c | 46 | ||||
-rw-r--r-- | sys/arch/amd64/amd64/pmap.c | 75 | ||||
-rw-r--r-- | sys/arch/amd64/amd64/proc_machdep.c | 28 | ||||
-rw-r--r-- | sys/arch/amd64/amd64/reboot.c | 15 | ||||
-rw-r--r-- | sys/arch/amd64/amd64/simd.S (renamed from sys/arch/amd64/isa/i8042.S) | 51 | ||||
-rw-r--r-- | sys/arch/amd64/amd64/trap.c | 53 | ||||
-rw-r--r-- | sys/arch/amd64/amd64/vector.S | 200 | ||||
-rw-r--r-- | sys/arch/amd64/conf/GENERIC | 15 | ||||
-rw-r--r-- | sys/arch/amd64/conf/link.ld | 6 | ||||
-rw-r--r-- | sys/arch/amd64/isa/i8042.c | 195 | ||||
-rw-r--r-- | sys/arch/amd64/isa/mc1468.c | 281 | ||||
-rw-r--r-- | sys/arch/amd64/isa/spkr.c | 53 | ||||
-rw-r--r-- | sys/arch/amd64/pci/pci_machdep.c | 21 |
19 files changed, 1093 insertions, 223 deletions
diff --git a/sys/arch/amd64/amd64/gdt.c b/sys/arch/amd64/amd64/gdt.c index a8fe54d..40d8f48 100644 --- a/sys/arch/amd64/amd64/gdt.c +++ b/sys/arch/amd64/amd64/gdt.c @@ -29,50 +29,70 @@ #include <machine/gdt.h> -struct gdt_entry g_gdt_data[256] = { +/* + * The GDT should be cache line aligned, since it is accessed every time a + * segment selector is reloaded + */ +__cacheline_aligned struct gdt_entry g_gdt_data[GDT_ENTRY_COUNT] = { /* Null */ {0}, - /* Kernel code (0x8) */ + /* Kernel code (0x08) */ { - .limit = 0x0000, - .base_low = 0x0000, - .base_mid = 0x00, - .access = 0x9A, - .granularity = 0x20, - .base_hi = 0x00 + .limit = 0x0000, + .base_low = 0x0000, + .base_mid = 0x00, + .attributes = GDT_ATTRIBUTE_64BIT_CODE | GDT_ATTRIBUTE_PRESENT | + GDT_ATTRIBUTE_DPL0 | GDT_ATTRIBUTE_NONSYSTEM | + GDT_ATTRIBUTE_EXECUTABLE | GDT_ATTRIBUTE_READABLE, + .base_hi = 0x00 }, /* Kernel data (0x10) */ { - .limit = 0x0000, - .base_low = 0x0000, - .base_mid = 0x00, - .access = 0x92, - .granularity = 0x00, - .base_hi = 0x00 + .limit = 0x0000, + .base_low = 0x0000, + .base_mid = 0x00, + .attributes = GDT_ATTRIBUTE_PRESENT | GDT_ATTRIBUTE_DPL0 | + GDT_ATTRIBUTE_NONSYSTEM | GDT_ATTRIBUTE_WRITABLE, + .base_hi = 0x00 }, /* User code (0x18) */ { - .limit = 0x0000, - .base_low = 0x0000, - .base_mid = 0x00, - .access = 0xFA, - .granularity = 0xAF, - .base_hi = 0x00 + .limit = 0x0000, + .base_low = 0x0000, + .base_mid = 0x00, + .attributes = GDT_ATTRIBUTE_64BIT_CODE | GDT_ATTRIBUTE_PRESENT | + GDT_ATTRIBUTE_DPL3 | GDT_ATTRIBUTE_NONSYSTEM | + GDT_ATTRIBUTE_EXECUTABLE | GDT_ATTRIBUTE_READABLE, + .base_hi = 0x00 }, /* User data (0x20) */ { - .limit = 0x0000, - .base_low = 0x0000, - .base_mid = 0x00, - .access = 0xF2, - .granularity = 0x00, - .base_hi = 0x00 + .limit = 0x0000, + .base_low = 0x0000, + .base_mid = 0x00, + .attributes = GDT_ATTRIBUTE_PRESENT | GDT_ATTRIBUTE_DPL3 | + GDT_ATTRIBUTE_NONSYSTEM | GDT_ATTRIBUTE_WRITABLE, + .base_hi = 0x00 }, - /* TSS segment (0x28) */ - {0} + /* + * TSS segment (0x28) + * + * NOTE: 64-bit TSS descriptors are 16 bytes, equivalent to the size of two + * regular descriptor entries. + * See Intel SPG 3/25 Section 9.2.3 - TSS Descriptor in 64-bit mode. + */ + {0}, {0} +}; + +/* Verify that the GDT is of the correct size */ +__static_assert(sizeof(g_gdt_data) == (8 * GDT_ENTRY_COUNT)); + +const struct gdtr g_gdtr = { + .limit = sizeof(g_gdt_data) - 1, + .offset = (uintptr_t)&g_gdt_data[0] }; diff --git a/sys/arch/amd64/amd64/hpet.c b/sys/arch/amd64/amd64/hpet.c index 1670546..3b0ca46 100644 --- a/sys/arch/amd64/amd64/hpet.c +++ b/sys/arch/amd64/amd64/hpet.c @@ -47,6 +47,7 @@ #define CAP_CLK_PERIOD(caps) (caps >> 32) #define FSEC_PER_SECOND 1000000000000000ULL +#define NSEC_PER_SECOND 1000000000ULL #define USEC_PER_SECOND 1000000ULL static void *hpet_base = NULL; @@ -135,6 +136,20 @@ hpet_time_usec(void) } static size_t +hpet_time_nsec(void) +{ + uint64_t period, freq, caps; + uint64_t counter; + + caps = hpet_read(HPET_REG_CAPS); + period = CAP_CLK_PERIOD(caps); + freq = FSEC_PER_SECOND / period; + + counter = hpet_read(HPET_REG_MAIN_COUNTER); + return (counter * NSEC_PER_SECOND) / freq; +} + +static size_t hpet_time_sec(void) { return hpet_time_usec() / USEC_PER_SECOND; @@ -180,6 +195,7 @@ hpet_init(void) timer.usleep = hpet_usleep; timer.nsleep = hpet_nsleep; timer.get_time_usec = hpet_time_usec; + timer.get_time_nsec = hpet_time_nsec; timer.get_time_sec = hpet_time_sec; register_timer(TIMER_GP, &timer); return 0; diff --git a/sys/arch/amd64/amd64/intr.c b/sys/arch/amd64/amd64/intr.c index c31ee3c..685a16d 100644 --- a/sys/arch/amd64/amd64/intr.c +++ b/sys/arch/amd64/amd64/intr.c @@ -31,12 +31,19 @@ #include <sys/param.h> #include <sys/errno.h> #include <sys/panic.h> +#include <sys/cdefs.h> +#include <sys/syslog.h> #include <machine/intr.h> #include <machine/cpu.h> #include <machine/asm.h> +#include <machine/ioapic.h> #include <vm/dynalloc.h> +#include <string.h> -static struct intr_entry *intrs[256] = {0}; +#define pr_trace(fmt, ...) kprintf("intr: " fmt, ##__VA_ARGS__) +#define pr_error(...) pr_trace(__VA_ARGS__) + +struct intr_hand *g_intrs[256] = {0}; int splraise(uint8_t s) @@ -67,35 +74,69 @@ splx(uint8_t s) ci->ipl = s; } -int -intr_alloc_vector(const char *name, uint8_t priority) +void * +intr_register(const char *name, const struct intr_hand *ih) { - size_t vec = MAX(priority << IPL_SHIFT, 0x20); - struct intr_entry *intr; + uint32_t vec = MAX(ih->priority << IPL_SHIFT, 0x20); + struct intr_hand *ih_new; + struct intr_data *idp_new; + const struct intr_data *idp; + size_t name_len; /* Sanity check */ - if (vec > NELEM(intrs)) { - return -1; + if (vec > NELEM(g_intrs) || name == NULL) { + return NULL; + } + + ih_new = dynalloc(sizeof(*ih_new)); + if (ih_new == NULL) { + pr_error("could not allocate new interrupt handler\n"); + return NULL; } /* * Try to allocate an interrupt vector. An IPL is made up * of 4 bits so there can be 16 vectors per IPL. + * + * XXX: Vector 0x20 is reserved for the Hyra scheduler, + * vector 0x21 is reserved for the CPU halt IPI, + * and vector 0x22 is reserved for TLB shootdowns. */ for (int i = vec; i < vec + 16; ++i) { - if (intrs[i] != NULL) { + if (g_intrs[i] != NULL || i < 0x23) { continue; } - intr = dynalloc(sizeof(*intr)); - if (intr == NULL) { - return -ENOMEM; + /* Allocate memory for the name */ + name_len = strlen(name) + 1; + ih_new->name = dynalloc(name_len); + if (ih_new->name == NULL) { + dynfree(ih_new); + pr_trace("could not allocate interrupt name\n"); + return NULL; } - intr->priority = priority; - intrs[i] = intr; - return i; + memcpy(ih_new->name, name, name_len); + idp_new = &ih_new->data; + idp = &ih->data; + + /* Pass the interrupt data */ + idp_new->ihp = ih_new; + idp_new->data_u64 = idp->data_u64; + + /* Setup the new intr_hand */ + ih_new->func = ih->func; + ih_new->priority = ih->priority; + ih_new->irq = ih->irq; + ih_new->vector = i; + g_intrs[i] = ih_new; + + if (ih->irq >= 0) { + ioapic_set_vec(ih->irq, i); + ioapic_irq_unmask(ih->irq); + } + return ih_new; } - return -1; + return NULL; } diff --git a/sys/arch/amd64/amd64/lapic.c b/sys/arch/amd64/amd64/lapic.c index 70d36a5..022592c 100644 --- a/sys/arch/amd64/amd64/lapic.c +++ b/sys/arch/amd64/amd64/lapic.c @@ -340,7 +340,7 @@ lapic_init(void) /* Allocate a vector if needed */ if (lapic_timer_vec == 0) { - lapic_timer_vec = intr_alloc_vector("lapictmr", IPL_CLOCK); + lapic_timer_vec = (IPL_CLOCK << IPL_SHIFT) | 0x20; idt_set_desc(lapic_timer_vec, IDT_INT_GATE, ISR(lapic_tmr_isr), IST_SCHED); } diff --git a/sys/arch/amd64/amd64/lapic_intr.S b/sys/arch/amd64/amd64/lapic_intr.S index e22cbca..5ae8f39 100644 --- a/sys/arch/amd64/amd64/lapic_intr.S +++ b/sys/arch/amd64/amd64/lapic_intr.S @@ -34,6 +34,5 @@ INTRENTRY(lapic_tmr_isr, handle_lapic_tmr) handle_lapic_tmr: call sched_switch // Context switch per every timer IRQ - call i8042_sync // Sometimes needed depending on i8042 quirks call lapic_eoi // Done! Signal that we finished to the Local APIC retq diff --git a/sys/arch/amd64/amd64/machdep.c b/sys/arch/amd64/amd64/machdep.c index 07d6cdd..8258f8e 100644 --- a/sys/arch/amd64/amd64/machdep.c +++ b/sys/arch/amd64/amd64/machdep.c @@ -42,24 +42,32 @@ #include <machine/uart.h> #include <machine/sync.h> #include <machine/intr.h> +#include <machine/cdefs.h> #include <machine/isa/i8042var.h> +#define pr_trace(fmt, ...) kprintf("cpu: " fmt, ##__VA_ARGS__) +#define pr_error(...) pr_trace(__VA_ARGS__) +#define pr_trace_bsp(...) \ + if (!bsp_init) { \ + pr_trace(__VA_ARGS__); \ + } + +#define HALT_VECTOR 0x21 +#define TLB_VECTOR 0x22 + #if defined(__SPECTRE_IBRS) #define SPECTRE_IBRS __SPECTRE_IBRS #else #define SPECTRE_IBRS 0 #endif -static uint8_t halt_vector = 0; - int ibrs_enable(void); +int simd_init(void); void syscall_isr(void); +void pin_isr_load(void); struct cpu_info g_bsp_ci = {0}; -static struct gdtr bsp_gdtr = { - .limit = sizeof(struct gdt_entry) * 256 - 1, - .offset = (uintptr_t)&g_gdt_data[0] -}; +static bool bsp_init = false; __attribute__((__interrupt__)) static void @@ -69,13 +77,34 @@ cpu_halt_isr(void *p) __builtin_unreachable(); } +__attribute__((__interrupt__)) static void -setup_vectors(void) +tlb_shootdown_isr(void *p) { - if (halt_vector == 0) { - halt_vector = intr_alloc_vector("cpu-halt", IPL_HIGH); + struct cpu_info *ci; + int ipl; + + /* + * Get the current CPU and check if we even + * need a shootdown. If `tlb_shootdown' is + * unset, this is not for us. + */ + ci = this_cpu(); + if (!ci->tlb_shootdown) { + return; } + ipl = splraise(IPL_HIGH); + __invlpg(ci->shootdown_va); + + ci->shootdown_va = 0; + ci->tlb_shootdown = 0; + splx(ipl); +} + +static void +setup_vectors(void) +{ idt_set_desc(0x0, IDT_TRAP_GATE, ISR(arith_err), 0); idt_set_desc(0x2, IDT_TRAP_GATE, ISR(nmi), 0); idt_set_desc(0x3, IDT_TRAP_GATE, ISR(breakpoint_handler), 0); @@ -89,7 +118,9 @@ setup_vectors(void) idt_set_desc(0xD, IDT_TRAP_GATE, ISR(general_prot), 0); idt_set_desc(0xE, IDT_TRAP_GATE, ISR(page_fault), 0); idt_set_desc(0x80, IDT_USER_INT_GATE, ISR(syscall_isr), 0); - idt_set_desc(halt_vector, IDT_INT_GATE, ISR(cpu_halt_isr), 0); + idt_set_desc(HALT_VECTOR, IDT_INT_GATE, ISR(cpu_halt_isr), 0); + idt_set_desc(TLB_VECTOR, IDT_INT_GATE, ISR(tlb_shootdown_isr), 0); + pin_isr_load(); } static inline void @@ -97,7 +128,7 @@ init_tss(struct cpu_info *ci) { struct tss_desc *desc; - desc = (struct tss_desc *)&g_gdt_data[GDT_TSS]; + desc = (struct tss_desc *)&g_gdt_data[GDT_TSS_INDEX]; write_tss(ci, desc); tss_load(); } @@ -133,6 +164,40 @@ backtrace_addr_to_name(uintptr_t addr, off_t *off) return NULL; } +static void +enable_simd(void) +{ + int retval; + + if ((retval = simd_init()) < 0) { + pr_trace_bsp("SIMD not supported\n"); + } + + if (retval == 1) { + pr_trace_bsp("SSE enabled but not AVX\n"); + } +} + +void +cpu_shootdown_tlb(vaddr_t va) +{ + uint32_t ncpu = cpu_count(); + struct cpu_info *cip; + + for (uint32_t i = 0; i < ncpu; ++i) { + cip = cpu_get(i); + if (cip == NULL) { + break; + } + + spinlock_acquire(&cip->lock); + cip->shootdown_va = va; + cip->tlb_shootdown = 1; + lapic_send_ipi(cip->apicid, IPI_SHORTHAND_NONE, TLB_VECTOR); + spinlock_release(&cip->lock); + } +} + void md_backtrace(void) { @@ -170,10 +235,25 @@ cpu_halt_all(void) } /* Send IPI to all cores */ - lapic_send_ipi(0, IPI_SHORTHAND_ALL, halt_vector); + lapic_send_ipi(0, IPI_SHORTHAND_ALL, HALT_VECTOR); for (;;); } +/* + * Same as cpu_halt_all() but for all other + * cores but ourselves. + */ +void +cpu_halt_others(void) +{ + if (rdmsr(IA32_GS_BASE) == 0) { + __ASMV("cli; hlt"); + } + + /* Send IPI to all cores */ + lapic_send_ipi(0, IPI_SHORTHAND_OTHERS, HALT_VECTOR); +} + void serial_init(void) { @@ -224,7 +304,7 @@ void cpu_startup(struct cpu_info *ci) { ci->self = ci; - gdt_load(&bsp_gdtr); + gdt_load(); idt_load(); setup_vectors(); @@ -233,5 +313,10 @@ cpu_startup(struct cpu_info *ci) init_tss(ci); try_mitigate_spectre(); + enable_simd(); lapic_init(); + + if (!bsp_init) { + bsp_init = true; + } } diff --git a/sys/arch/amd64/amd64/mp.c b/sys/arch/amd64/amd64/mp.c index a8a36c7..dbee32c 100644 --- a/sys/arch/amd64/amd64/mp.c +++ b/sys/arch/amd64/amd64/mp.c @@ -29,9 +29,12 @@ #include <sys/types.h> #include <sys/limine.h> +#include <sys/limits.h> #include <sys/syslog.h> +#include <sys/proc.h> #include <sys/spinlock.h> #include <sys/sched.h> +#include <sys/atomic.h> #include <machine/cpu.h> #include <vm/dynalloc.h> #include <assert.h> @@ -39,42 +42,66 @@ #define pr_trace(fmt, ...) kprintf("cpu_mp: " fmt, ##__VA_ARGS__) +extern struct proc g_proc0; static volatile struct limine_smp_request g_smp_req = { .id = LIMINE_SMP_REQUEST, .revision = 0 }; +static volatile uint32_t ncpu_up = 1; +static struct cpu_info *ci_list[CPU_MAX]; +static struct spinlock ci_list_lock = {0}; + static void ap_trampoline(struct limine_smp_info *si) { - static struct spinlock lock = {0}; struct cpu_info *ci; ci = dynalloc(sizeof(*ci)); __assert(ci != NULL); memset(ci, 0, sizeof(*ci)); - spinlock_acquire(&lock); cpu_startup(ci); + spinlock_acquire(&ci_list_lock); + ci_list[ncpu_up] = ci; + spinlock_release(&ci_list_lock); - spinlock_release(&lock); + atomic_inc_int(&ncpu_up); sched_enter(); - while (1); } +struct cpu_info * +cpu_get(uint32_t index) +{ + if (index >= ncpu_up) { + return NULL; + } + + return ci_list[index]; +} + +uint32_t +cpu_count(void) +{ + return ncpu_up; +} + void mp_bootstrap_aps(struct cpu_info *ci) { struct limine_smp_response *resp = g_smp_req.response; struct limine_smp_info **cpus; size_t cpu_init_counter; + uint32_t ncpu; /* Should not happen */ __assert(resp != NULL); cpus = resp->cpus; - cpu_init_counter = resp->cpu_count - 1; + ncpu = resp->cpu_count; + cpu_init_counter = ncpu - 1; + ci_list[0] = ci; if (resp->cpu_count == 1) { pr_trace("CPU has 1 core, no APs to bootstrap...\n"); @@ -90,4 +117,13 @@ mp_bootstrap_aps(struct cpu_info *ci) cpus[i]->goto_address = ap_trampoline; } + + /* Start up idle threads */ + pr_trace("kicking %d idle threads...\n", ncpu); + for (uint32_t i = 0; i < ncpu; ++i) { + spawn(&g_proc0, sched_enter, NULL, 0, NULL); + } + + /* Wait for all cores to be ready */ + while ((ncpu_up - 1) < cpu_init_counter); } diff --git a/sys/arch/amd64/amd64/pmap.c b/sys/arch/amd64/amd64/pmap.c index 2e62a4b..6c6bfcd 100644 --- a/sys/arch/amd64/amd64/pmap.c +++ b/sys/arch/amd64/amd64/pmap.c @@ -33,6 +33,8 @@ #include <sys/errno.h> #include <machine/tlb.h> #include <machine/vas.h> +#include <machine/cpu.h> +#include <machine/cdefs.h> #include <vm/pmap.h> #include <vm/physmem.h> #include <vm/vm.h> @@ -52,7 +54,7 @@ #define PTE_PCD BIT(4) /* Page-level cache disable */ #define PTE_ACC BIT(5) /* Accessed */ #define PTE_DIRTY BIT(6) /* Dirty (written-to page) */ -#define PTE_PAT BIT(7) +#define PTE_PS BIT(7) /* Page size */ #define PTE_GLOBAL BIT(8) #define PTE_NX BIT(63) /* Execute-disable */ @@ -112,6 +114,16 @@ pmap_extract(uint8_t level, vaddr_t va, vaddr_t *pmap, bool alloc) return NULL; } + /* + * TODO: Support huge pages... For now, don't let the + * bootloader fuck us up with their pre-kernel + * mappings and tell huge pages to get the fuck. + * + */ + if (ISSET(pmap[idx], PTE_PS)) { + pmap[idx] = 0; + } + if (ISSET(pmap[idx], PTE_P)) { next = (pmap[idx] & PTE_ADDR_MASK); return PHYS_TO_VIRT(next); @@ -176,14 +188,15 @@ done: * @vas: Virtual address space. * @va: Target virtual address. * @val: Value to write. + * @alloc: True to alloc new paging entries. */ static int -pmap_update_tbl(struct vas vas, vaddr_t va, uint64_t val) +pmap_update_tbl(struct vas vas, vaddr_t va, uint64_t val, bool alloc) { uintptr_t *tbl; int status; - if ((status = pmap_get_tbl(vas, va, true, &tbl)) != 0) { + if ((status = pmap_get_tbl(vas, va, alloc, &tbl)) != 0) { return status; } @@ -266,19 +279,21 @@ pmap_map(struct vas vas, vaddr_t va, paddr_t pa, vm_prot_t prot) { uint32_t flags = pmap_prot_to_pte(prot); - return pmap_update_tbl(vas, va, (pa | flags)); + return pmap_update_tbl(vas, va, (pa | flags), true); } int pmap_unmap(struct vas vas, vaddr_t va) { - return pmap_update_tbl(vas, va, 0); + return pmap_update_tbl(vas, va, 0, false); } int pmap_set_cache(struct vas vas, vaddr_t va, int type) { uintptr_t *tbl; + uint32_t flags; + paddr_t pa; int status; size_t idx; @@ -286,20 +301,62 @@ pmap_set_cache(struct vas vas, vaddr_t va, int type) return status; idx = pmap_get_level_index(1, va); + pa = tbl[idx] & PTE_ADDR_MASK; + flags = tbl[idx] & ~PTE_ADDR_MASK; /* Set the caching policy */ switch (type) { case VM_CACHE_UC: - tbl[idx] |= PTE_PCD; - tbl[idx] &= ~PTE_PWT; + flags |= PTE_PCD; + flags &= ~PTE_PWT; break; case VM_CACHE_WT: - tbl[idx] &= ~PTE_PCD; - tbl[idx] |= PTE_PWT; + flags &= ~PTE_PCD; + flags |= PTE_PWT; break; default: return -EINVAL; } + return pmap_update_tbl(vas, va, (pa | flags), false); +} + +bool +pmap_is_clean(struct vas vas, vaddr_t va) +{ + uintptr_t *tbl; + int status; + size_t idx; + + if ((status = pmap_get_tbl(vas, va, false, &tbl)) != 0) + return status; + + idx = pmap_get_level_index(1, va); + return ISSET(tbl[idx], PTE_DIRTY) == 0; +} + +void +pmap_mark_clean(struct vas vas, vaddr_t va) +{ + uintptr_t *tbl; + int status; + size_t idx; + + if ((status = pmap_get_tbl(vas, va, false, &tbl)) != 0) + return; + + idx = pmap_get_level_index(1, va); + tbl[idx] &= ~PTE_DIRTY; + + if (cpu_count() > 1) { + cpu_shootdown_tlb(va); + } else { + __invlpg(va); + } +} + +int +pmap_init(void) +{ return 0; } diff --git a/sys/arch/amd64/amd64/proc_machdep.c b/sys/arch/amd64/amd64/proc_machdep.c index 0be85fd..63604a4 100644 --- a/sys/arch/amd64/amd64/proc_machdep.c +++ b/sys/arch/amd64/amd64/proc_machdep.c @@ -40,7 +40,7 @@ #include <vm/map.h> #include <string.h> -void +uintptr_t md_td_stackinit(struct proc *td, void *stack_top, struct exec_prog *prog) { uintptr_t *sp = stack_top; @@ -97,6 +97,7 @@ md_td_stackinit(struct proc *td, void *stack_top, struct exec_prog *prog) STACK_PUSH(sp, argc); tfp = &td->tf; tfp->rsp = (uintptr_t)sp - VM_HIGHER_HALF; + return tfp->rsp; } void @@ -123,24 +124,31 @@ md_td_kick(struct proc *td) { struct trapframe *tfp; struct cpu_info *ci; + uint16_t ds = USER_DS | 3; tfp = &td->tf; ci = this_cpu(); ci->curtd = td; + td->flags &= ~PROC_KTD; __ASMV( - "push %0\n" + "mov %0, %%rax\n" "push %1\n" - "pushf\n" "push %2\n" "push %3\n" + "push %%rax\n" + "push %4\n" + "test $3, %%ax\n" + "jz 1f\n" "lfence\n" "swapgs\n" - "iretq" + "1:\n" + " iretq" : - : "i" (USER_DS | 3), + : "r" (tfp->cs), + "r" (ds), "r" (tfp->rsp), - "i" (USER_CS | 3), + "m" (tfp->rflags), "r" (tfp->rip) ); @@ -155,13 +163,14 @@ md_td_kick(struct proc *td) * @ip: Instruction pointer. */ int -md_fork(struct proc *p, struct proc *parent, uintptr_t ip) +md_spawn(struct proc *p, struct proc *parent, uintptr_t ip) { uintptr_t stack_base; struct trapframe *tfp; struct pcb *pcbp; uint8_t rpl = 0; int error; + vm_prot_t prot = PROT_READ | PROT_WRITE; tfp = &p->tf; @@ -201,9 +210,10 @@ md_fork(struct proc *p, struct proc *parent, uintptr_t ip) */ if (rpl == 0) { stack_base += VM_HIGHER_HALF; + p->flags |= PROC_KTD; } else { - vm_map(pcbp->addrsp, stack_base, stack_base, - PROT_READ | PROT_WRITE | PROT_USER, PROC_STACK_PAGES); + prot |= PROT_USER; + vm_map(pcbp->addrsp, stack_base, stack_base, prot, PROC_STACK_PAGES); } p->stack_base = stack_base; diff --git a/sys/arch/amd64/amd64/reboot.c b/sys/arch/amd64/amd64/reboot.c index b9df1c0..d47a352 100644 --- a/sys/arch/amd64/amd64/reboot.c +++ b/sys/arch/amd64/amd64/reboot.c @@ -32,10 +32,15 @@ #include <sys/cdefs.h> #include <machine/pio.h> #include <machine/cpu.h> +#include <dev/acpi/acpi.h> void cpu_reboot(int method) { + if (ISSET(method, REBOOT_POWEROFF)) { + acpi_sleep(ACPI_SLEEP_S5); + } + if (ISSET(method, REBOOT_HALT)) { cpu_halt_all(); } @@ -45,3 +50,13 @@ cpu_reboot(int method) outb(0x64, 0xFE); } } + +/* + * arg0: Method bits + */ +scret_t +sys_reboot(struct syscall_args *scargs) +{ + cpu_reboot(scargs->arg0); + __builtin_unreachable(); +} diff --git a/sys/arch/amd64/isa/i8042.S b/sys/arch/amd64/amd64/simd.S index 123d3a5..23fe461 100644 --- a/sys/arch/amd64/isa/i8042.S +++ b/sys/arch/amd64/amd64/simd.S @@ -27,11 +27,50 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#include <machine/frameasm.h> - .text - .globl i8042_kb_isr -INTRENTRY(i8042_kb_isr, handle_kb) -handle_kb: - call i8042_kb_event + .globl simd_init +simd_init: + /* + * Enable SIMD, if SSE and AVX is supported, + * a value of zero is returned. If SSE is + * supported yet AVX is not, a value of one + * is returned. However, if none are supported, + * this routine returns -1. + */ + + // Do we support SSE? + mov $1, %eax + cpuid + bt $25, %edx + jnc .sse_not_sup + + mov %cr0, %rax // Old CR0 -> EAX + and $0xFFFB, %ax // Disable co-processor emulation + or $0x02, %ax // Enable co-processor monitoring + mov %rax, %cr0 // Update CR0 with new flags + + mov %cr4, %rax // Old CR4 -> EAX + or $0x200, %ax // Enable FXSAVE/FXRSTOR + or $0x400, %ax // Enable SIMD FP exceptions + mov %rax, %cr4 // Update CR4 with new flags + + mov $1, %eax // LEAF 1 + cpuid // Bit 28 of ECX indicates AVX support + mov $3, %eax // We need to check two bits + shl $27, %eax // Which are ECX.OSXSAVE and ECX.AVX + test %eax, %ecx // Are XSAVE and AVX supported? + jnc .avx_not_sup // Nope, just continue + + // Enable AVX + xor %rcx, %rcx // Select XCR0 + xgetbv // Load extended control register + or $0x07, %eax // Set AVX + SSE bits + xsetbv // Store new flags + xor %rax, %rax // Everything is good + retq // Return back to caller (RETURN) +.sse_not_sup: + mov $-1, %rax + retq +.avx_not_sup: + mov $1, %rax retq diff --git a/sys/arch/amd64/amd64/trap.c b/sys/arch/amd64/amd64/trap.c index 9a3a7ba..84a6a77 100644 --- a/sys/arch/amd64/amd64/trap.c +++ b/sys/arch/amd64/amd64/trap.c @@ -60,6 +60,17 @@ static const char *trap_type[] = { [TRAP_SS] = "stack-segment fault" }; +/* Page-fault flags */ +static const char pf_flags[] = { + 'p', /* Present */ + 'w', /* Write */ + 'u', /* User */ + 'r', /* Reserved write */ + 'x', /* Instruction fetch */ + 'k', /* Protection key violation */ + 's' /* Shadow stack access */ +}; + static inline uintptr_t pf_faultaddr(void) { @@ -69,6 +80,23 @@ pf_faultaddr(void) } static void +pf_code(uint64_t error_code) +{ + char tab[8] = { + '-', '-', '-', + '-', '-', '-', + '-', '\0' + }; + + for (int i = 0; i < 7; ++i) { + if (ISSET(error_code, BIT(i))) { + tab[i] = pf_flags[i]; + } + } + kprintf("code=[%s]\n", tab); +} + +static void regdump(struct trapframe *tf) { uintptr_t cr3, cr2 = pf_faultaddr(); @@ -79,6 +107,10 @@ regdump(struct trapframe *tf) : "memory" ); + if (tf->trapno == TRAP_PAGEFLT) { + pf_code(tf->error_code); + } + kprintf(OMIT_TIMESTAMP "RAX=%p RCX=%p RDX=%p\n" "RBX=%p RSI=%p RDI=%p\n" @@ -101,6 +133,9 @@ trap_user(struct trapframe *tf) switch (tf->trapno) { case TRAP_PROTFLT: case TRAP_PAGEFLT: + if (tf->trapno == TRAP_PAGEFLT) { + pf_code(tf->error_code); + } sigaddset(&sigset, SIGSEGV); break; case TRAP_ARITH_ERR: @@ -120,20 +155,6 @@ trap_user(struct trapframe *tf) dispatch_signals(td); } -static void -trap_quirks(struct cpu_info *ci) -{ - static uint8_t count; - - if (ISSET(ci->irq_mask, CPU_IRQ(1)) && count < 1) { - ++count; - pr_error("detected buggy i8042\n"); - pr_error("applying I8042_HOSTILE quirk\n"); - i8042_quirk(I8042_HOSTILE); - return; - } -} - void trap_syscall(struct trapframe *tf) { @@ -155,8 +176,6 @@ trap_syscall(struct trapframe *tf) void trap_handler(struct trapframe *tf) { - struct cpu_info *ci; - splraise(IPL_HIGH); if (tf->trapno >= NELEM(trap_type)) { @@ -164,8 +183,6 @@ trap_handler(struct trapframe *tf) } pr_error("got %s\n", trap_type[tf->trapno]); - ci = this_cpu(); - trap_quirks(ci); /* Handle traps from userland */ if (ISSET(tf->cs, 3)) { diff --git a/sys/arch/amd64/amd64/vector.S b/sys/arch/amd64/amd64/vector.S new file mode 100644 index 0000000..c820a41 --- /dev/null +++ b/sys/arch/amd64/amd64/vector.S @@ -0,0 +1,200 @@ +/* + * Copyright (c) 2023-2025 Ian Marco Moffett and the Osmora Team. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Hyra nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <machine/frameasm.h> + +#define IDT_INT_GATE 0x8E + +.macro IDT_SET_VEC vec, sym + mov $\vec, %rdi + mov $IDT_INT_GATE, %rsi + lea \sym(%rip), %rdx + xor %rcx, %rcx + call idt_set_desc +.endm + + .text + ALIGN_TEXT +ioapic_common_func: + xor %rcx, %rcx // Clear counter +.walk: // Walk the handlers + lea g_intrs(%rip), %rbx // Grab table to RBX + lea (%rbx, %rcx, 8), %rbx // g_intrs + (8 * rcx) + mov (%rbx), %rdx // Grab the intr_hand + or %rdx, %rdx // No more? + jz 1f // Nope, return + + mov (%rdx), %rbx // intr_hand.func + add $8, %rdx // Get interrupt data + mov %rdx, %rdi // Pass the interrupt data + push %rcx // Save our counter + call *%rbx // Call the handler + pop %rcx // Restore our counter + or %rax, %rax // Was it theirs? (RET >= 1) + jnz done // Yes, we are done. +1: inc %rcx // Next + cmp $256, %rcx // Did we reach the end? + jl .walk // Nope, keep going +done: + call lapic_eoi + retq + + .globl pin_isr_load +pin_isr_load: + IDT_SET_VEC 35, ioapic_edge_0 + IDT_SET_VEC 36, ioapic_edge_1 + IDT_SET_VEC 37, ioapic_edge_2 + IDT_SET_VEC 38, ioapic_edge_3 + IDT_SET_VEC 39, ioapic_edge_4 + IDT_SET_VEC 40, ioapic_edge_5 + IDT_SET_VEC 41, ioapic_edge_6 + IDT_SET_VEC 42, ioapic_edge_7 + IDT_SET_VEC 43, ioapic_edge_8 + IDT_SET_VEC 44, ioapic_edge_9 + IDT_SET_VEC 45, ioapic_edge_10 + IDT_SET_VEC 46, ioapic_edge_11 + IDT_SET_VEC 47, ioapic_edge_12 + IDT_SET_VEC 48, ioapic_edge_13 + IDT_SET_VEC 49, ioapic_edge_14 + IDT_SET_VEC 50, ioapic_edge_15 + IDT_SET_VEC 51, ioapic_edge_16 + IDT_SET_VEC 52, ioapic_edge_17 + IDT_SET_VEC 53, ioapic_edge_18 + IDT_SET_VEC 54, ioapic_edge_19 + IDT_SET_VEC 55, ioapic_edge_20 + IDT_SET_VEC 56, ioapic_edge_21 + IDT_SET_VEC 57, ioapic_edge_22 + IDT_SET_VEC 58, ioapic_edge_23 + IDT_SET_VEC 59, ioapic_edge_24 + IDT_SET_VEC 60, ioapic_edge_25 + IDT_SET_VEC 61, ioapic_edge_26 + IDT_SET_VEC 62, ioapic_edge_27 + IDT_SET_VEC 63, ioapic_edge_28 + IDT_SET_VEC 64, ioapic_edge_29 + IDT_SET_VEC 65, ioapic_edge_30 + IDT_SET_VEC 66, ioapic_edge_31 + IDT_SET_VEC 67, ioapic_edge_32 + IDT_SET_VEC 68, ioapic_edge_33 + IDT_SET_VEC 69, ioapic_edge_34 + IDT_SET_VEC 70, ioapic_edge_35 + IDT_SET_VEC 71, ioapic_edge_36 + IDT_SET_VEC 72, ioapic_edge_37 + IDT_SET_VEC 73, ioapic_edge_38 + IDT_SET_VEC 74, ioapic_edge_39 + IDT_SET_VEC 75, ioapic_edge_40 + IDT_SET_VEC 76, ioapic_edge_41 + IDT_SET_VEC 77, ioapic_edge_42 + IDT_SET_VEC 78, ioapic_edge_43 + IDT_SET_VEC 79, ioapic_edge_44 + IDT_SET_VEC 80, ioapic_edge_45 + IDT_SET_VEC 81, ioapic_edge_46 + IDT_SET_VEC 82, ioapic_edge_47 + IDT_SET_VEC 83, ioapic_edge_48 + IDT_SET_VEC 84, ioapic_edge_49 + IDT_SET_VEC 85, ioapic_edge_50 + IDT_SET_VEC 86, ioapic_edge_51 + IDT_SET_VEC 87, ioapic_edge_52 + IDT_SET_VEC 88, ioapic_edge_53 + IDT_SET_VEC 89, ioapic_edge_54 + IDT_SET_VEC 90, ioapic_edge_55 + IDT_SET_VEC 91, ioapic_edge_56 + IDT_SET_VEC 92, ioapic_edge_57 + IDT_SET_VEC 93, ioapic_edge_58 + IDT_SET_VEC 94, ioapic_edge_59 + IDT_SET_VEC 95, ioapic_edge_60 + IDT_SET_VEC 96, ioapic_edge_61 + IDT_SET_VEC 97, ioapic_edge_62 + IDT_SET_VEC 97, ioapic_edge_63 + ret + +/* I/O APIC edge ISRs */ +INTRENTRY(ioapic_edge_0, ioapic_common_func) +INTRENTRY(ioapic_edge_1, ioapic_common_func) +INTRENTRY(ioapic_edge_2, ioapic_common_func) +INTRENTRY(ioapic_edge_3, ioapic_common_func) +INTRENTRY(ioapic_edge_4, ioapic_common_func) +INTRENTRY(ioapic_edge_5, ioapic_common_func) +INTRENTRY(ioapic_edge_6, ioapic_common_func) +INTRENTRY(ioapic_edge_7, ioapic_common_func) +INTRENTRY(ioapic_edge_8, ioapic_common_func) +INTRENTRY(ioapic_edge_9, ioapic_common_func) +INTRENTRY(ioapic_edge_10, ioapic_common_func) +INTRENTRY(ioapic_edge_11, ioapic_common_func) +INTRENTRY(ioapic_edge_12, ioapic_common_func) +INTRENTRY(ioapic_edge_13, ioapic_common_func) +INTRENTRY(ioapic_edge_14, ioapic_common_func) +INTRENTRY(ioapic_edge_15, ioapic_common_func) +INTRENTRY(ioapic_edge_16, ioapic_common_func) +INTRENTRY(ioapic_edge_17, ioapic_common_func) +INTRENTRY(ioapic_edge_18, ioapic_common_func) +INTRENTRY(ioapic_edge_19, ioapic_common_func) +INTRENTRY(ioapic_edge_20, ioapic_common_func) +INTRENTRY(ioapic_edge_21, ioapic_common_func) +INTRENTRY(ioapic_edge_22, ioapic_common_func) +INTRENTRY(ioapic_edge_23, ioapic_common_func) +INTRENTRY(ioapic_edge_24, ioapic_common_func) +INTRENTRY(ioapic_edge_25, ioapic_common_func) +INTRENTRY(ioapic_edge_26, ioapic_common_func) +INTRENTRY(ioapic_edge_27, ioapic_common_func) +INTRENTRY(ioapic_edge_28, ioapic_common_func) +INTRENTRY(ioapic_edge_29, ioapic_common_func) +INTRENTRY(ioapic_edge_30, ioapic_common_func) +INTRENTRY(ioapic_edge_31, ioapic_common_func) +INTRENTRY(ioapic_edge_32, ioapic_common_func) +INTRENTRY(ioapic_edge_33, ioapic_common_func) +INTRENTRY(ioapic_edge_34, ioapic_common_func) +INTRENTRY(ioapic_edge_35, ioapic_common_func) +INTRENTRY(ioapic_edge_36, ioapic_common_func) +INTRENTRY(ioapic_edge_37, ioapic_common_func) +INTRENTRY(ioapic_edge_38, ioapic_common_func) +INTRENTRY(ioapic_edge_39, ioapic_common_func) +INTRENTRY(ioapic_edge_40, ioapic_common_func) +INTRENTRY(ioapic_edge_41, ioapic_common_func) +INTRENTRY(ioapic_edge_42, ioapic_common_func) +INTRENTRY(ioapic_edge_43, ioapic_common_func) +INTRENTRY(ioapic_edge_44, ioapic_common_func) +INTRENTRY(ioapic_edge_45, ioapic_common_func) +INTRENTRY(ioapic_edge_46, ioapic_common_func) +INTRENTRY(ioapic_edge_47, ioapic_common_func) +INTRENTRY(ioapic_edge_48, ioapic_common_func) +INTRENTRY(ioapic_edge_49, ioapic_common_func) +INTRENTRY(ioapic_edge_50, ioapic_common_func) +INTRENTRY(ioapic_edge_51, ioapic_common_func) +INTRENTRY(ioapic_edge_52, ioapic_common_func) +INTRENTRY(ioapic_edge_53, ioapic_common_func) +INTRENTRY(ioapic_edge_54, ioapic_common_func) +INTRENTRY(ioapic_edge_55, ioapic_common_func) +INTRENTRY(ioapic_edge_56, ioapic_common_func) +INTRENTRY(ioapic_edge_57, ioapic_common_func) +INTRENTRY(ioapic_edge_58, ioapic_common_func) +INTRENTRY(ioapic_edge_59, ioapic_common_func) +INTRENTRY(ioapic_edge_60, ioapic_common_func) +INTRENTRY(ioapic_edge_61, ioapic_common_func) +INTRENTRY(ioapic_edge_62, ioapic_common_func) +INTRENTRY(ioapic_edge_63, ioapic_common_func) diff --git a/sys/arch/amd64/conf/GENERIC b/sys/arch/amd64/conf/GENERIC index d3a4368..69e071a 100644 --- a/sys/arch/amd64/conf/GENERIC +++ b/sys/arch/amd64/conf/GENERIC @@ -1,10 +1,17 @@ +// // Kernel options -option SPECTRE_IBRS no -option SERIAL_DEBUG yes +// +// XXX: Indirect branch restricted speculation (SPECTRE_IBRS) +// is disabled by default as it can lead to significant +// performance degradation. +// +option SPECTRE_IBRS no // Enable the IBRS CPU feature +option SERIAL_DEBUG yes // Enable kmsg serial logging +option USER_KMSG no // Show kmsg in user consoles // Kernel constants -setval SCHED_NQUEUE 4 +setval SCHED_NQUEUE 4 // Number of scheduler queues (for MLFQ) // Console attributes setval CONSOLE_BG 0x000000 -setval CONSOLE_FG 0XB57614 +setval CONSOLE_FG 0xB57614 diff --git a/sys/arch/amd64/conf/link.ld b/sys/arch/amd64/conf/link.ld index 9c47a81..a43824f 100644 --- a/sys/arch/amd64/conf/link.ld +++ b/sys/arch/amd64/conf/link.ld @@ -29,6 +29,12 @@ SECTIONS __drivers_init_end = .; } :rodata + .drivers.defer : { + __driversd_init_start = .; + *(.drivers.defer .drivers.defer) + __driversd_init_end = .; + } :rodata + . += CONSTANT(MAXPAGESIZE); .data : { diff --git a/sys/arch/amd64/isa/i8042.c b/sys/arch/amd64/isa/i8042.c index 2582d8f..eb8960c 100644 --- a/sys/arch/amd64/isa/i8042.c +++ b/sys/arch/amd64/isa/i8042.c @@ -39,6 +39,7 @@ #include <dev/acpi/acpi.h> #include <dev/timer.h> #include <dev/cons/cons.h> +#include <dev/dmi/dmi.h> #include <machine/cpu.h> #include <machine/pio.h> #include <machine/isa/i8042var.h> @@ -68,6 +69,7 @@ static struct proc polltd; static struct timer tmr; static bool is_init = false; +static void i8042_ibuf_wait(void); static int dev_send(bool aux, uint8_t data); static int i8042_kb_getc(uint8_t sc, char *chr); static void i8042_drain(void); @@ -103,41 +105,30 @@ kbd_set_leds(uint8_t mask) dev_send(false, mask); } -/* - * Poll the i8042 status register - * - * @bits: Status bits. - * @pollset: True to poll if set - */ -static int -i8042_statpoll(uint8_t bits, bool pollset) +static void +i8042_obuf_wait(void) { - size_t usec_start, usec; - size_t elapsed_msec; - uint8_t val; - bool tmp; + uint8_t status; - usec_start = tmr.get_time_usec(); for (;;) { - val = inb(I8042_STATUS); - tmp = (pollset) ? ISSET(val, bits) : !ISSET(val, bits); - usec = tmr.get_time_usec(); - elapsed_msec = (usec - usec_start) / 1000; - - IO_NOP(); - - /* If tmp is set, the register updated in time */ - if (tmp) { - break; + status = inb(I8042_STATUS); + if (ISSET(status, I8042_OBUFF)) { + return; } + } +} - /* Exit with an error if we timeout */ - if (elapsed_msec > I8042_DELAY) { - return -ETIME; +static void +i8042_ibuf_wait(void) +{ + uint8_t status; + + for (;;) { + status = inb(I8042_STATUS); + if (!ISSET(status, I8042_IBUFF)) { + return; } } - - return val; } /* @@ -162,34 +153,47 @@ i8042_drain(void) static void i8042_write(uint16_t port, uint8_t val) { - i8042_statpoll(I8042_IBUFF, false); + i8042_ibuf_wait(); outb(port, val); } /* - * Read the i8042 config register + * Read from an i8042 register. + * + * @port: I/O port + */ +static uint8_t +i8042_read(uint16_t port) +{ + i8042_obuf_wait(); + return inb(port); +} + +/* + * Read the i8042 controller configuration + * byte. */ static uint8_t i8042_read_conf(void) { - i8042_drain(); + uint8_t conf; + i8042_write(I8042_CMD, I8042_GET_CONFB); - i8042_statpoll(I8042_OBUFF, true); - return inb(I8042_DATA); + i8042_obuf_wait(); + conf = i8042_read(I8042_DATA); + return conf; } /* - * Write the i8042 config register + * Write a new value to the i8042 controller + * configuration byte. */ static void -i8042_write_conf(uint8_t value) +i8042_write_conf(uint8_t conf) { - i8042_drain(); - i8042_statpoll(I8042_IBUFF, false); i8042_write(I8042_CMD, I8042_SET_CONFB); - i8042_statpoll(I8042_IBUFF, false); - i8042_write(I8042_DATA, value); - i8042_drain(); + i8042_ibuf_wait(); + i8042_write(I8042_DATA, conf); } /* @@ -205,14 +209,13 @@ dev_send(bool aux, uint8_t data) i8042_write(I8042_CMD, I8042_PORT1_SEND); } - i8042_statpoll(I8042_IBUFF, false); i8042_write(I8042_DATA, data); - i8042_statpoll(I8042_OBUFF, true); + i8042_obuf_wait(); return inb(I8042_DATA); } -void -i8042_kb_event(void) +static int +i8042_kb_event(void *sp) { struct cpu_info *ci; struct cons_input input; @@ -232,50 +235,31 @@ i8042_kb_event(void) input.chr = c; cons_ibuf_push(&g_root_scr, input); done: - ci->irq_mask &= CPU_IRQ(1); + ci->irq_mask &= ~CPU_IRQ(1); spinlock_release(&isr_lock); - lapic_eoi(); + return 1; /* handled */ } static void i8042_en_intr(void) { + struct intr_hand ih; uint8_t conf; - int vec; - - pr_trace("ENTER -> i8042_en_intr\n"); - i8042_write(I8042_CMD, I8042_DISABLE_PORT0); - pr_trace("port 0 disabled\n"); - vec = intr_alloc_vector("i8042-kb", IPL_BIO); - idt_set_desc(vec, IDT_INT_GATE, ISR(i8042_kb_isr), IST_HW_IRQ); - ioapic_set_vec(KB_IRQ, vec); - ioapic_irq_unmask(KB_IRQ); - pr_trace("irq 1 -> vec[%x]\n", vec); + ih.func = i8042_kb_event; + ih.priority = IPL_BIO; + ih.irq = KB_IRQ; + intr_register("i8042-kb", &ih); - /* Setup config bits */ + /* + * Enable the clock of PS/2 port 0 and tell + * the controller that we are accepting + * interrupts. + */ conf = i8042_read_conf(); + conf &= ~I8042_PORT0_CLK; conf |= I8042_PORT0_INTR; - conf &= ~I8042_PORT1_INTR; i8042_write_conf(conf); - pr_trace("conf written\n"); - - i8042_write(I8042_CMD, I8042_ENABLE_PORT0); - pr_trace("port 0 enabled\n"); -} - -static void -esckey_reboot(void) -{ - syslock(); - kprintf("** Machine going down for a reboot"); - - for (size_t i = 0; i < 3; ++i) { - kprintf(OMIT_TIMESTAMP "."); - tmr.msleep(1000); - } - - cpu_reboot(0); } /* @@ -292,10 +276,6 @@ i8042_kb_getc(uint8_t sc, char *chr) bool release = ISSET(sc, BIT(7)); switch (sc) { - /* Left alt [press] */ - case 0x38: - esckey_reboot(); - break; /* Caps lock [press] */ case 0x3A: /* @@ -351,43 +331,30 @@ i8042_kb_getc(uint8_t sc, char *chr) return 0; } -static void -i8042_sync_loop(void) -{ - /* Wake up the bus */ - outb(I8042_DATA, 0x00); - i8042_drain(); - - for (;;) { - i8042_sync(); - md_pause(); - } -} - /* * Grabs a key from the keyboard, used typically * for syncing the machine however can be used - * to bypass IRQs in case of buggy EC. + * to bypass IRQs to prevent lost bytes. */ void i8042_sync(void) { static struct spinlock lock; struct cons_input input; - uint8_t data; + uint8_t data, status; char c; if (spinlock_try_acquire(&lock)) { return; } - if (ISSET(quirks, I8042_HOSTILE) && is_init) { - if (i8042_statpoll(I8042_OBUFF, true) < 0) { - /* No data ready */ + if (is_init) { + status = inb(I8042_STATUS); + if (!ISSET(status, I8042_OBUFF)) { goto done; } - data = inb(I8042_DATA); + data = inb(I8042_DATA); if (i8042_kb_getc(data, &c) == 0) { input.scancode = data; input.chr = c; @@ -404,9 +371,20 @@ i8042_quirk(int mask) quirks |= mask; } +static void +i8042_sync_loop(void) +{ + for (;;) { + i8042_obuf_wait(); + i8042_sync(); + } +} + static int i8042_init(void) { + const char *prodver = NULL; + /* Try to request a general purpose timer */ if (req_timer(TIMER_GP, &tmr) != TMRR_SUCCESS) { pr_error("failed to fetch general purpose timer\n"); @@ -425,6 +403,9 @@ i8042_init(void) return -ENODEV; } + i8042_write(I8042_CMD, I8042_DISABLE_PORT0); + i8042_write(I8042_CMD, I8042_DISABLE_PORT1); + /* * On some thinkpads, e.g., the T420s, the EC implementing * the i8042 logic likes to play cop and throw NMIs at us @@ -432,11 +413,14 @@ i8042_init(void) * etc... As of now, treat the i8042 like a fucking bomb * if this bit is set. */ - if (strcmp(acpi_oemid(), "LENOVO") == 0) { + if ((prodver = dmi_prodver()) == NULL) { + prodver = "None"; + } + if (strcmp(prodver, "ThinkPad T420s") == 0) { quirks |= I8042_HOSTILE; - pr_trace("lenovo device, assuming hostile\n"); + pr_trace("ThinkPad T420s detected, assuming hostile\n"); pr_trace("disabling irq 1, polling as fallback\n"); - fork1(&polltd, 0, i8042_sync_loop, NULL); + spawn(&polltd, i8042_sync_loop, NULL, 0, NULL); } if (!ISSET(quirks, I8042_HOSTILE)) { @@ -445,11 +429,8 @@ i8042_init(void) i8042_en_intr(); } - if (dev_send(false, 0xFF) == 0xFC) { - pr_error("kbd self test failure\n"); - return -EIO; - } - + i8042_write(I8042_CMD, I8042_ENABLE_PORT0); + i8042_drain(); is_init = true; return 0; } diff --git a/sys/arch/amd64/isa/mc1468.c b/sys/arch/amd64/isa/mc1468.c new file mode 100644 index 0000000..bbaa3d1 --- /dev/null +++ b/sys/arch/amd64/isa/mc1468.c @@ -0,0 +1,281 @@ +/* + * Copyright (c) 2023-2025 Ian Marco Moffett and the Osmora Team. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Hyra nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <sys/param.h> +#include <sys/time.h> +#include <sys/driver.h> +#include <sys/device.h> +#include <sys/syslog.h> +#include <fs/devfs.h> +#include <machine/pio.h> +#include <machine/cdefs.h> +#include <string.h> + +#define MC1468_REGSEL 0x70 +#define MC1468_DATA 0x71 + +/* Register A flags */ +#define MC1468_UPDATING BIT(7) + +/* Register B flags */ +#define MC1468_DAYSAVE BIT(1) +#define MC1468_CLOCK24 BIT(2) + +static struct cdevsw mc1468_cdevsw; + +static uint8_t +bin_dabble(uint8_t bin) +{ + uint8_t retval = 0; + uint8_t nibble; + + for (int i = 7; i >= 0; --i) { + retval <<= 1; + if (bin & (1 << i)) { + retval |= 1; + } + + for (int j = 0; j < 2; ++j) { + nibble = retval & (retval >> (4 * nibble)) & 0x0F; + if (nibble >= 5) { + retval += 0x03 << (4 * nibble); + } + } + } + + return retval; +} + +/* + * Read a byte from an MC1468XX register. + */ +static uint8_t +mc1468_read(uint8_t reg) +{ + outb(MC1468_REGSEL, reg); + return inb(MC1468_DATA); +} + +/* + * Write a byte to the MC1468XX register. + */ +static void +mc1468_write(uint8_t reg, uint8_t val) +{ + outb(MC1468_REGSEL, reg); + outb(MC1468_DATA, val); +} + +/* + * Returns true if the MC1468XX is updating + * its time registers. + */ +static bool +mc1468_updating(void) +{ + uint8_t reg_b; + + reg_b = mc1468_read(0xB); + return ISSET(reg_b, MC1468_UPDATING) != 0; +} + +/* + * Check if date `a' and date `b' are synced. + * Used to make sure a bogus date caused by a + * read right before an MC1468XX register + * update doesn't occur. + */ +static bool +mc1468_date_synced(struct date *a, struct date *b) +{ + if (a->year != b->year) + return false; + if (a->month != b->month) + return false; + if (a->day != b->day) + return false; + if (a->sec != b->sec) + return false; + if (a->min != b->min) + return false; + if (a->hour != b->hour) + return false; + + return true; +} + +/* + * Sometimes the clock chip may encode the + * date in binary-coded-decimal. This function + * converts a date in BCD format to plain binary. + */ +static void +mc1468_bcd_conv(struct date *dp) +{ + dp->year = (dp->year & 0x0F) + ((dp->year / 16) * 10); + dp->month = (dp->month & 0x0F) + ((dp->month / 16) * 10); + dp->day = (dp->day & 0x0F) + ((dp->day / 16) * 10); + dp->sec = (dp->sec & 0x0F) + ((dp->sec / 16) * 10); + dp->min = (dp->min & 0x0F) + ((dp->min / 16) * 10); + dp->hour = (dp->hour & 0x0F) + (((dp->hour & 0x70) / 16) * 10); + dp->hour |= dp->hour & 0x80; +} + +/* + * Read the time for the clock without syncing + * it up. + * + * XXX: Please use mc1468_get_date() instead as + * this function may return inconsistent + * values if not used correctly. + */ +static void +__mc1468_get_time(struct date *dp) +{ + dp->year = mc1468_read(0x09); + dp->month = mc1468_read(0x08); + dp->day = mc1468_read(0x07); + dp->sec = mc1468_read(0x00); + dp->min = mc1468_read(0x02); + dp->hour = mc1468_read(0x04); +} + +/* + * Write a new time/date to the chip. + */ +static void +mc1468_set_date(const struct date *dp) +{ + while (mc1468_updating()) { + md_pause(); + } + + mc1468_write(0x08, bin_dabble(dp->month)); + mc1468_write(0x07, bin_dabble(dp->day)); + mc1468_write(0x04, bin_dabble(dp->hour)); + mc1468_write(0x02, bin_dabble(dp->min)); + mc1468_write(0x00, bin_dabble(dp->sec)); +} + +static int +mc1468_get_date(struct date *dp) +{ + struct date date_cur, date_last; + uint8_t reg_b = mc1468_read(0x0B); + + while (mc1468_updating()) { + __mc1468_get_time(&date_last); + } + + /* + * Get the current date and time. + * + * XXX: The date and time returned by __mc1468_get_time() + * may at times be out of sync, read it twice to + * make sure everything is synced up. + */ + do { + while (mc1468_updating()) { + md_pause(); + } + __mc1468_get_time(&date_last); + date_cur.year = date_last.year; + date_cur.month = date_last.month; + date_cur.day = date_last.day; + date_cur.sec = date_last.sec; + date_cur.min = date_last.min; + date_cur.hour = date_last.hour; + } while (!mc1468_date_synced(&date_cur, &date_last)); + + /* Is this in BCD? */ + if (!ISSET(reg_b, 0x04)) { + mc1468_bcd_conv(&date_cur); + } + + /* 24-hour mode? */ + if (ISSET(reg_b, MC1468_CLOCK24)) { + date_cur.hour = ((date_cur.hour & 0x7F) + 12) % 24; + } + + date_cur.year += 2000; + *dp = date_cur; + return 0; +} + +static int +mc1468_dev_read(dev_t dev, struct sio_txn *sio, int flags) +{ + struct date d; + size_t len = sizeof(d); + + if (sio->len > len) { + sio->len = len; + } + + mc1468_get_date(&d); + memcpy(sio->buf, &d, sio->len); + return sio->len; +} + +static int +mc1468_dev_write(dev_t dev, struct sio_txn *sio, int flags) +{ + struct date d; + size_t len = sizeof(d); + + if (sio->len > len) { + sio->len = len; + } + + memcpy(&d, sio->buf, sio->len); + mc1468_set_date(&d); + return sio->len; +} + +static int +mc1468_init(void) +{ + char devname[] = "rtc"; + devmajor_t major; + dev_t dev; + + major = dev_alloc_major(); + dev = dev_alloc(major); + dev_register(major, dev, &mc1468_cdevsw); + devfs_create_entry(devname, major, dev, 0444); + return 0; +} + +static struct cdevsw mc1468_cdevsw = { + .read = mc1468_dev_read, + .write = mc1468_dev_write, +}; + +DRIVER_EXPORT(mc1468_init); diff --git a/sys/arch/amd64/isa/spkr.c b/sys/arch/amd64/isa/spkr.c index b1bd2a2..b2f63b0 100644 --- a/sys/arch/amd64/isa/spkr.c +++ b/sys/arch/amd64/isa/spkr.c @@ -30,14 +30,60 @@ #include <sys/cdefs.h> #include <sys/errno.h> #include <sys/param.h> +#include <sys/device.h> +#include <sys/driver.h> +#include <fs/devfs.h> #include <dev/timer.h> #include <machine/isa/spkr.h> #include <machine/isa/i8254.h> #include <machine/pio.h> +#include <string.h> #define DIVIDEND 1193180 #define CTRL_PORT 0x61 +static struct cdevsw beep_cdevsw; + +/* + * Write to the pcspkr + * + * Bits 15:0 - frequency (hz) + * Bits 31:16 - duration (msec) + */ +static int +dev_write(dev_t dev, struct sio_txn *sio, int flags) +{ + uint32_t payload = 0; + uint16_t hz; + uint16_t duration; + size_t len = sizeof(payload); + + if (sio->len < len) { + return -EINVAL; + } + + memcpy(&payload, sio->buf, len); + hz = payload & 0xFFFF; + duration = (payload >> 16) & 0xFFFF; + pcspkr_tone(hz, duration); + return sio->len; +} + +static int +beep_init(void) +{ + char devname[] = "beep"; + devmajor_t major; + dev_t dev; + + /* Register the device here */ + major = dev_alloc_major(); + dev = dev_alloc(major); + dev_register(major, dev, &beep_cdevsw); + devfs_create_entry(devname, major, dev, 0666); + return 0; +} + int pcspkr_tone(uint16_t freq, uint32_t msec) { @@ -67,3 +113,10 @@ pcspkr_tone(uint16_t freq, uint32_t msec) outb(CTRL_PORT, tmp & ~3); return 0; } + +static struct cdevsw beep_cdevsw = { + .read = noread, + .write = dev_write +}; + +DRIVER_EXPORT(beep_init); diff --git a/sys/arch/amd64/pci/pci_machdep.c b/sys/arch/amd64/pci/pci_machdep.c index 43065b0..5b49a78 100644 --- a/sys/arch/amd64/pci/pci_machdep.c +++ b/sys/arch/amd64/pci/pci_machdep.c @@ -33,6 +33,7 @@ #include <sys/mmio.h> #include <dev/pci/pci.h> #include <dev/pci/pciregs.h> +#include <machine/pci/pci.h> #include <machine/pio.h> #include <machine/bus.h> #include <machine/cpu.h> @@ -73,8 +74,8 @@ pci_get_barreg(struct pci_device *dev, uint8_t bar) } } -pcireg_t -pci_readl(struct pci_device *dev, uint32_t offset) +__weak pcireg_t +md_pci_readl(struct pci_device *dev, uint32_t offset) { uint32_t address; @@ -83,8 +84,8 @@ pci_readl(struct pci_device *dev, uint32_t offset) return inl(0xCFC) >> ((offset & 3) * 8); } -void -pci_writel(struct pci_device *dev, uint32_t offset, pcireg_t val) +__weak void +md_pci_writel(struct pci_device *dev, uint32_t offset, pcireg_t val) { uint32_t address; @@ -163,6 +164,7 @@ pci_enable_msix(struct pci_device *dev, const struct msi_intr *intr) { volatile uint64_t *tbl; struct cpu_info *ci; + struct intr_hand ih, *ih_res; uint32_t data, msg_ctl; uint64_t msg_addr, tmp; uint16_t tbl_off; @@ -184,9 +186,14 @@ pci_enable_msix(struct pci_device *dev, const struct msi_intr *intr) tbl = (void *)((dev->bar[bir] & PCI_BAR_MEMMASK) + MMIO_OFFSET); tbl = (void *)((char *)tbl + tbl_off); - /* Get the vector and setup handler */ - vector = intr_alloc_vector(intr->name, IPL_BIO); - idt_set_desc(vector, IDT_INT_GATE, ISR(intr->handler), 0); + ih.func = intr->handler; + ih.priority = IPL_BIO; + ih.irq = -1; + ih_res = intr_register(intr->name, &ih); + if (ih_res == NULL) { + return -EIO; + } + vector = ih_res->vector; /* * Setup the message data at bits 95:64 of the message |