diff options
Diffstat (limited to 'sys/arch/amd64/amd64/machdep.c')
-rw-r--r-- | sys/arch/amd64/amd64/machdep.c | 459 |
1 files changed, 430 insertions, 29 deletions
diff --git a/sys/arch/amd64/amd64/machdep.c b/sys/arch/amd64/amd64/machdep.c index 07d6cdd..2976a51 100644 --- a/sys/arch/amd64/amd64/machdep.c +++ b/sys/arch/amd64/amd64/machdep.c @@ -42,7 +42,25 @@ #include <machine/uart.h> #include <machine/sync.h> #include <machine/intr.h> +#include <machine/ipi.h> +#include <machine/cdefs.h> #include <machine/isa/i8042var.h> +#include <dev/cons/cons.h> +#include <string.h> + +/* + * This defines the max number of frames + * we will pass while walking the callstack + * in md_backtrace() + */ +#define MAX_FRAME_DEPTH 16 + +#define pr_trace(fmt, ...) kprintf("cpu: " fmt, ##__VA_ARGS__) +#define pr_error(...) pr_trace(__VA_ARGS__) +#define pr_trace_bsp(...) \ + if (!bsp_init) { \ + pr_trace(__VA_ARGS__); \ + } #if defined(__SPECTRE_IBRS) #define SPECTRE_IBRS __SPECTRE_IBRS @@ -50,46 +68,94 @@ #define SPECTRE_IBRS 0 #endif -static uint8_t halt_vector = 0; +#if defined(__CPU_SMEP) +#define CPU_SMEP __CPU_SMEP +#else +#define CPU_SMEP 0 +#endif + +#if defined(__CPU_UMIP) +#define CPU_UMIP __CPU_UMIP +#else +#define CPU_UMIP 0 +#endif int ibrs_enable(void); +int simd_init(void); void syscall_isr(void); +void pin_isr_load(void); struct cpu_info g_bsp_ci = {0}; -static struct gdtr bsp_gdtr = { - .limit = sizeof(struct gdt_entry) * 256 - 1, - .offset = (uintptr_t)&g_gdt_data[0] -}; +static struct cpu_ipi *halt_ipi; +static struct cpu_ipi *tlb_ipi; +static struct spinlock ipi_lock = {0}; +static bool bsp_init = false; -__attribute__((__interrupt__)) -static void -cpu_halt_isr(void *p) +static int +cpu_halt_handler(struct cpu_ipi *ipi) { __ASMV("cli; hlt"); __builtin_unreachable(); } +static int +tlb_shootdown_handler(struct cpu_ipi *ipi) +{ + struct cpu_info *ci; + int ipl; + + /* + * Get the current CPU and check if we even + * need a shootdown. If `tlb_shootdown' is + * unset, this is not for us. + */ + ci = this_cpu(); + if (!ci->tlb_shootdown) { + return -1; + } + + ipl = splraise(IPL_HIGH); + __invlpg(ci->shootdown_va); + + ci->shootdown_va = 0; + ci->tlb_shootdown = 0; + splx(ipl); + return 0; +} + static void -setup_vectors(void) +setup_vectors(struct cpu_info *ci) { - if (halt_vector == 0) { - halt_vector = intr_alloc_vector("cpu-halt", IPL_HIGH); + union tss_stack scstack; + union tss_stack dfstack; + + /* Try to allocate a syscall stack */ + if (tss_alloc_stack(&scstack, DEFAULT_PAGESIZE) != 0) { + panic("failed to allocate syscall stack\n"); + } + + /* Try to allocate a double fault stack */ + if (tss_alloc_stack(&dfstack, DEFAULT_PAGESIZE) != 0) { + panic("failed to allocate double fault stack\n"); } + tss_update_ist(ci, scstack, IST_SYSCALL); + tss_update_ist(ci, dfstack, IST_DBFLT); + idt_set_desc(0x0, IDT_TRAP_GATE, ISR(arith_err), 0); idt_set_desc(0x2, IDT_TRAP_GATE, ISR(nmi), 0); idt_set_desc(0x3, IDT_TRAP_GATE, ISR(breakpoint_handler), 0); idt_set_desc(0x4, IDT_TRAP_GATE, ISR(overflow), 0); idt_set_desc(0x5, IDT_TRAP_GATE, ISR(bound_range), 0); idt_set_desc(0x6, IDT_TRAP_GATE, ISR(invl_op), 0); - idt_set_desc(0x8, IDT_TRAP_GATE, ISR(double_fault), 0); + idt_set_desc(0x8, IDT_TRAP_GATE, ISR(double_fault), IST_DBFLT); idt_set_desc(0xA, IDT_TRAP_GATE, ISR(invl_tss), 0); idt_set_desc(0xB, IDT_TRAP_GATE, ISR(segnp), 0); idt_set_desc(0xC, IDT_TRAP_GATE, ISR(ss_fault), 0); idt_set_desc(0xD, IDT_TRAP_GATE, ISR(general_prot), 0); idt_set_desc(0xE, IDT_TRAP_GATE, ISR(page_fault), 0); - idt_set_desc(0x80, IDT_USER_INT_GATE, ISR(syscall_isr), 0); - idt_set_desc(halt_vector, IDT_INT_GATE, ISR(cpu_halt_isr), 0); + idt_set_desc(0x80, IDT_USER_INT_GATE, ISR(syscall_isr), IST_SYSCALL); + pin_isr_load(); } static inline void @@ -97,7 +163,7 @@ init_tss(struct cpu_info *ci) { struct tss_desc *desc; - desc = (struct tss_desc *)&g_gdt_data[GDT_TSS]; + desc = (struct tss_desc *)&g_gdt_data[GDT_TSS_INDEX]; write_tss(ci, desc); tss_load(); } @@ -133,47 +199,321 @@ backtrace_addr_to_name(uintptr_t addr, off_t *off) return NULL; } +static void +enable_simd(void) +{ + int retval; + + if ((retval = simd_init()) < 0) { + pr_trace_bsp("SIMD not supported\n"); + } + + if (retval == 1) { + pr_trace_bsp("SSE enabled but not AVX\n"); + } +} + +static void +init_ipis(void) +{ + int error; + + if (bsp_init) { + return; + } + + spinlock_acquire(&ipi_lock); + error = md_ipi_alloc(&halt_ipi); + if (error < 0) { + pr_error("md_ipi_alloc: returned %d\n", error); + panic("failed to init halt IPI\n"); + } + + halt_ipi->handler = cpu_halt_handler; + error = md_ipi_alloc(&tlb_ipi); + if (error < 0) { + pr_error("md_ipi_alloc: returned %d\n", error); + panic("failed to init TLB IPI\n"); + } + + tlb_ipi->handler = tlb_shootdown_handler; + + /* + * Some IPIs must have very specific IDs + * so that they are standard and usable + * throughout the rest of the sytem. + */ + if (halt_ipi->id != IPI_HALT) + panic("expected IPI_HALT for halt IPI\n"); + if (tlb_ipi->id != IPI_TLB) + panic("expected IPI_TLB for TLB IPI\n"); + + spinlock_release(&ipi_lock); +} + +static void +cpu_get_vendor(struct cpu_info *ci) +{ + uint32_t unused, ebx, ecx, edx; + char vendor_str[13]; + + /* + * This CPUID returns a 12 byte CPU vendor string + * that we'll put together and use to detect the vendor. + */ + CPUID(0, unused, ebx, ecx, edx); + + /* Dword 0 */ + vendor_str[0] = ebx & 0xFF; + vendor_str[1] = (ebx >> 8) & 0xFF; + vendor_str[2] = (ebx >> 16) & 0xFF; + vendor_str[3] = (ebx >> 24) & 0xFF; + + /* Dword 1 */ + vendor_str[4] = edx & 0xFF; + vendor_str[5] = (edx >> 8) & 0xFF; + vendor_str[6] = (edx >> 16) & 0xFF; + vendor_str[7] = (edx >> 24) & 0xFF; + + /* Dword 2 */ + vendor_str[8] = ecx & 0xFF; + vendor_str[9] = (ecx >> 8) & 0xFF; + vendor_str[10] = (ecx >> 16) & 0xFF; + vendor_str[11] = (ecx >> 24) & 0xFF; + vendor_str[12] = '\0'; + + /* Is this an AMD CPU? */ + if (strcmp(vendor_str, "AuthenticAMD") == 0) { + ci->vendor = CPU_VENDOR_AMD; + return; + } + + /* Is this an Intel CPU? */ + if (strcmp(vendor_str, "GenuineIntel") == 0) { + ci->vendor = CPU_VENDOR_INTEL; + return; + } + + /* + * Some buggy Intel CPUs report the string "GenuineIotel" + * instead of "GenuineIntel". This is rare but we should + * still handle it as it can happen. Probably a good idea + * to log it so the user can know about their rare CPU + * quirk and brag to their friends :~) + */ + if (strcmp(vendor_str, "GenuineIotel") == 0) { + pr_trace_bsp("vendor_str=%s\n", vendor_str); + pr_trace_bsp("detected vendor string quirk\n"); + ci->vendor = CPU_VENDOR_INTEL; + return; + } + + ci->vendor = CPU_VENDOR_OTHER; +} + +static void +cpu_get_info(struct cpu_info *ci) +{ + uint32_t unused, eax, ebx, ecx, edx; + uint8_t ext_model, ext_family; + + /* Get the vendor information */ + cpu_get_vendor(ci); + + /* Extended features */ + CPUID(0x07, unused, ebx, ecx, unused); + if (ISSET(ebx, BIT(7))) + ci->feat |= CPU_FEAT_SMEP; + if (ISSET(ebx, BIT(20))) + ci->feat |= CPU_FEAT_SMAP; + if (ISSET(ecx, BIT(2))) + ci->feat |= CPU_FEAT_UMIP; + + /* + * Processor power management information bits as well + * as bits describing RAS capabilities + */ + CPUID(0x80000007, unused, unused, unused, edx); + if (ISSET(edx, BIT(8))) + ci->feat |= CPU_FEAT_TSCINV; + + /* + * Processor info and feature bits + */ + CPUID(0x01, eax, unused, unused, unused); + ci->model = (eax >> 4) & 0xF; + ci->family = (eax >> 8) & 0xF; + + /* + * If the family ID is 15 then the actual family + * ID is the sum of the extended family and the + * family ID fields. + */ + if (ci->family == 0xF) { + ext_family = (eax >> 20) & 0xFF; + ci->family += ext_family; + } + + /* + * If the family has the value of either 6 or 15, + * then the extended model number would be used. + * Slap them together if this is the case. + */ + if (ci->family == 6 || ci->family == 15) { + ext_model = (eax >> 16) & 0xF; + ci->model |= (ext_model << 4); + } +} + +/* + * The CR4.UMIP bit prevents user programs from + * executing instructions related to accessing + * system memory structures. This should be enabled + * by default if supported. + */ +static void +cpu_enable_umip(void) +{ + struct cpu_info *ci = this_cpu(); + uint64_t cr4; + + if (!CPU_UMIP) { + pr_trace_bsp("UMIP not configured\n"); + return; + } + + if (ISSET(ci->feat, CPU_FEAT_UMIP)) { + cr4 = amd64_read_cr4(); + cr4 |= CR4_UMIP; + amd64_write_cr4(cr4); + } +} + +void +cpu_shootdown_tlb(vaddr_t va) +{ + uint32_t ncpu = cpu_count(); + struct cpu_info *cip; + + for (uint32_t i = 0; i < ncpu; ++i) { + cip = cpu_get(i); + if (cip == NULL) { + break; + } + + spinlock_acquire(&cip->lock); + cip->shootdown_va = va; + cip->tlb_shootdown = 1; + md_ipi_send(cip, IPI_TLB); + spinlock_release(&cip->lock); + } +} + void md_backtrace(void) { - uintptr_t *rbp; - uintptr_t rip; + uintptr_t *rbp = NULL; + uintptr_t rip, tmp; off_t off; const char *name; + char line[256]; + uint8_t n = 0; __ASMV("mov %%rbp, %0" : "=r" (rbp) :: "memory"); while (1) { + if (n >= MAX_FRAME_DEPTH) { + break; + } + + /* End of callstack */ + if (rbp == NULL) { + break; + } + rip = rbp[1]; rbp = (uintptr_t *)rbp[0]; - name = backtrace_addr_to_name(rip, &off); - if (rbp == NULL) + /* + * RBP should be aligned on an 8-byte + * boundary... Don't trust this state + * anymore if it is not. + */ + tmp = (uintptr_t)rbp; + if ((tmp & (8 - 1)) != 0) { break; - if (name == NULL) - name = "???"; + } + + /* + * This is not a valid value, get out + * of this loop!! + */ + if (rip == 0) { + break; + } - kprintf(OMIT_TIMESTAMP "%p @ <%s+0x%x>\n", rip, name, off); + name = backtrace_addr_to_name(rip, &off); + snprintf(line, sizeof(line), "%p @ <%s+0x%x>\n", rip, name, off); + cons_putstr(&g_root_scr, line, strlen(line)); + ++n; } } void cpu_halt_all(void) { + struct cpu_info *ci, *curcpu; + uint32_t ncpu = cpu_count(); + /* * If we have no current 'cpu_info' structure set, * we can't send IPIs, so just assume only the current * processor is the only one active, clear interrupts * then halt it. */ - if (rdmsr(IA32_GS_BASE) == 0) { - __ASMV("cli; hlt"); + __ASMV("cli"); + if ((curcpu = this_cpu()) == NULL) { + __ASMV("hlt"); + } + + for (int i = 0; i < ncpu; ++i) { + ci = cpu_get(i); + if (ci->id == curcpu->id) { + continue; + } + + md_ipi_send(ci, IPI_HALT); } - /* Send IPI to all cores */ - lapic_send_ipi(0, IPI_SHORTHAND_ALL, halt_vector); + __ASMV("hlt"); for (;;); } +/* + * Same as cpu_halt_all() but for all other + * cores but ourselves. + */ +void +cpu_halt_others(void) +{ + struct cpu_info *curcpu, *ci; + uint32_t ncpu = cpu_count(); + + if (rdmsr(IA32_GS_BASE) == 0 || ncpu <= 1) { + return; + } + + curcpu = this_cpu(); + + for (int i = 0; i < ncpu; ++i) { + if ((ci = cpu_get(i)) == NULL) + continue; + if (ci->id == curcpu->id) + continue; + + md_ipi_send(ci, IPI_HALT); + } +} + void serial_init(void) { @@ -195,6 +535,10 @@ this_cpu(void) { struct cpu_info *ci; + if (rdmsr(IA32_GS_BASE) == 0) { + return NULL; + } + /* * This might look crazy but we are just leveraging the "m" * constraint to add the offset of the self field within @@ -221,17 +565,74 @@ md_sync_all(void) } void +cpu_enable_smep(void) +{ + struct cpu_info *ci; + uint64_t cr4; + + /* Don't bother if not enabled */ + if (!CPU_SMEP) { + return; + } + + ci = this_cpu(); + if (!ISSET(ci->feat, CPU_FEAT_SMEP)) { + pr_trace_bsp("SMEP not supported\n"); + return; + } + + cr4 = amd64_read_cr4(); + cr4 |= BIT(20); /* CR4.SMEP */ + amd64_write_cr4(cr4); +} + +void +cpu_disable_smep(void) +{ + struct cpu_info *ci; + uint64_t cr4; + + if (!CPU_SMEP) { + return; + } + + ci = this_cpu(); + if (!ISSET(ci->feat, CPU_FEAT_SMEP)) { + return; + } + + cr4 = amd64_read_cr4(); + cr4 &= ~BIT(20); /* CR4.SMEP */ + amd64_write_cr4(cr4); +} + +void cpu_startup(struct cpu_info *ci) { ci->self = ci; - gdt_load(&bsp_gdtr); + ci->feat = 0; + gdt_load(); idt_load(); - setup_vectors(); wrmsr(IA32_GS_BASE, (uintptr_t)ci); - init_tss(ci); + + setup_vectors(ci); + md_ipi_init(); + init_ipis(); + try_mitigate_spectre(); + ci->online = 1; + ci->preempt = 1; + + cpu_get_info(ci); + cpu_enable_smep(); + cpu_enable_umip(); + enable_simd(); lapic_init(); + + if (!bsp_init) { + bsp_init = true; + } } |