diff options
author | Ian Moffett <ian@osmora.org> | 2025-09-14 20:45:19 -0400 |
---|---|---|
committer | Ian Moffett <ian@osmora.org> | 2025-09-14 20:45:19 -0400 |
commit | 819e76156b9b955074a58bc177965a3d65d837ae (patch) | |
tree | 33f4d69124905bfa97c17ee5a7cc848cfc468159 | |
parent | 977a102db2f47e097f7389313223d36ff7d47333 (diff) |
kern: vm: Implement page mapping and VASes
This commit implements an interface to allow other parts of Lunos to
talk to the MMU:
- Added the 'vas' structure as the virtual address space
- Added MMU specific prot flags
- Added mmu_map_single() to map pages
- Added mmu_this_vas() to grab the current VAS
...
Signed-off-by: Ian Moffett <ian@osmora.org>
-rw-r--r-- | src/sys/arch/amd64/cpu/mmu.c | 267 | ||||
-rw-r--r-- | src/sys/include/arch/amd64/mmu.h | 60 | ||||
-rw-r--r-- | src/sys/vm/vm_init.c | 4 |
3 files changed, 328 insertions, 3 deletions
diff --git a/src/sys/arch/amd64/cpu/mmu.c b/src/sys/arch/amd64/cpu/mmu.c index 9c86c69..e61e0b0 100644 --- a/src/sys/arch/amd64/cpu/mmu.c +++ b/src/sys/arch/amd64/cpu/mmu.c @@ -28,10 +28,32 @@ */ #include <sys/types.h> +#include <sys/errno.h> #include <sys/param.h> #include <sys/cdefs.h> #include <sys/panic.h> #include <machine/mmu.h> +#include <vm/vm.h> +#include <vm/physseg.h> +#include <string.h> +#include <stdbool.h> + +/* + * Page-Table Entry (PTE) flags + * + * See Intel SDM Vol 3A, Section 4.5, Table 4-19 + */ +#define PTE_ADDR_MASK 0x000FFFFFFFFFF000 +#define PTE_P BIT(0) /* Present */ +#define PTE_RW BIT(1) /* Writable */ +#define PTE_US BIT(2) /* User r/w allowed */ +#define PTE_PWT BIT(3) /* Page-level write-through */ +#define PTE_PCD BIT(4) /* Page-level cache disable */ +#define PTE_ACC BIT(5) /* Accessed */ +#define PTE_DIRTY BIT(6) /* Dirty (written-to page) */ +#define PTE_PS BIT(7) /* Page size */ +#define PTE_GLOBAL BIT(8) /* Global / sticky map */ +#define PTE_NX BIT(63) /* Execute-disable */ /* * Used to enable/disable 57-bit paging which expands @@ -42,11 +64,13 @@ /* * Describes each paging level * - * @MMU_PAGE: Page table + * @MMU_OFF: Offset + * @MMU_TBL: Page table * @MMU_L<n>: Page table level n */ typedef enum { - MMU_PAGE, + MMU_OFF, + MMU_TBL, MMU_L2, MMU_L3, MMU_L4, @@ -54,6 +78,68 @@ typedef enum { } pglvl_t; /* + * Convert machine independent protection flags + * to machine dependent flags. + * + * @prot: MI flags + */ +static uint64_t +prot_to_pte(int prot) +{ + uint64_t pte_flags = PTE_NX; + + if (ISSET(prot, MMU_PROT_READ)) + pte_flags |= PTE_P; + if (ISSET(prot, MMU_PROT_WRITE)) + pte_flags |= PTE_RW; + if (ISSET(prot, MMU_PROT_EXEC)) + pte_flags &= ~PTE_NX; + + return pte_flags; +} + +/* + * Invalidate a page in the TLB. We use this to prevent + * stale entries when remapping or changing attributes + * of pages. + * + * @ptr: Page base to invalidate from the TLB + */ +static inline void +__invlpg(void *ptr) +{ + uintptr_t v = (uintptr_t)ptr; + + v = ALIGN_UP(v, DEFAULT_PAGESIZE); + __ASMV( + "invlpg (%0)" + : + : "r" (v) + : "memory" + ); +} + +/* + * Get the current value of CR3 which holds + * the physical address of the current virtual + * address space. + */ +static inline uint64_t +__mmu_read_cr3(void) +{ + uint64_t cr3; + + __ASMV( + "mov %%cr3, %0" + : "=r" (cr3) + : + : "memory" + ); + + return cr3; +} + +/* * Acquire the paging level used by the * current processing element (pcore) */ @@ -64,8 +150,8 @@ mmu_pg_level(void) __ASMV( "mov %%cr0, %0" + : "=r" (cr0) : - : "r" (cr0) : "memory" ); @@ -77,11 +163,183 @@ mmu_pg_level(void) } /* + * Get the table index of a specific level by + * using a specific virtual address as a key. + * + * @vaddr: Virtual address to use in lookup + * @level: The index of the desired level + */ +static inline size_t +mmu_get_level(vaddr_t vaddr, pglvl_t level) +{ + switch (level) { + case MMU_L5: + return (vaddr >> 48) & 0x1FF; + case MMU_L4: + return (vaddr >> 39) & 0x1FF; + case MMU_L3: + return (vaddr >> 30) & 0x1FF; + case MMU_L2: + return (vaddr >> 21) & 0x1FF; + case MMU_TBL: + return (vaddr >> 12) & 0x1FF; + case MMU_OFF: + return vaddr & 0x1FF; + } + + panic("mmu_get_level: bad level index\n"); + __builtin_unreachable(); +} + +/* + * Get the table at the desired level + * + * @vas: Virtual address space + * @va: Virtual address to use as key + * @lvl: Desired level + * @res: Virtual address result is written here + * @a: If true, allocate memory for unmapped entries + * + * Returns zero on success, otherwise a less than + * zero value on failure. + */ +static int +mmu_read_level(struct vm_vas *vas, vaddr_t va, pglvl_t lvl, vaddr_t **res, bool a) +{ + uintptr_t *cur, tmp_va, addr; + size_t index; + pglvl_t cur_level = MMU_L4; + + if (vas == NULL || lvl > MMU_L5) { + return -EINVAL; + } + + if (res == NULL) { + return -EINVAL; + } + + /* + * We'll do a recursive descent style algorithm + * to get the page table that we want. Keep going + * down levels [lvl, MMU_TBL) until we hit the + * bottom. + */ + cur = PHYS_TO_VIRT(vas->cr3 & PTE_ADDR_MASK); + while (cur_level > lvl) { + index = mmu_get_level(va, cur_level); + addr = cur[index]; + + /* Is this present? */ + if (ISSET(addr, PTE_P)) { + addr = cur[index] & PTE_ADDR_MASK; + cur = PHYS_TO_VIRT(addr); + --cur_level; + continue; + } + + /* If we can't alloc, bail */ + if (!a) { + return -EPIPE; + } + + /* Allocate new frame */ + addr = vm_alloc_frame(1); + if (__unlikely(addr == 0)) { + panic("mmu_read_level: out of memory\n"); + } + + /* Write the new entry */ + addr |= (PTE_P | PTE_RW | PTE_US); + addr = (uintptr_t)PHYS_TO_VIRT(addr); + cur[index] = addr; + + /* + * To be certain that we will see every change + * per every level, we must invalidate its + * corresponding entry. + */ + __invlpg(cur); + --cur_level; + } + + *res = cur; + return 0; +} + +/* + * Read the current VAS into 'vasres_p' + */ +int +mmu_this_vas(struct vm_vas *vasres_p) +{ + if (vasres_p == NULL) { + return -EINVAL; + } + + vasres_p->cr3 = __mmu_read_cr3(); + return 0; +} + +/* + * Create a virtual to physical mapping + */ +int +mmu_map_single(struct vm_vas *vas, struct mmu_map *spec, int prot) +{ + int error; + size_t index; + uint64_t pte_flags; + vaddr_t *pte; + + if (spec == NULL) { + return -EINVAL; + } + + /* + * First things first, we need to translate these + * architecture abstracting protection flags to + * something these Intel MMUs will understand. We + * just need to start at the PML4, hit the bottom + * and plop em there. + */ + pte_flags = prot_to_pte(prot); + error = mmu_read_level( + vas, spec->va, MMU_TBL, + &pte, true + ); + + /* Did this fail? */ + if (error < 0) { + return error; + } + + /* + * Now using the virtual address within the map spec, + * we'll acquire the index at which we put the physical + * address, along with its flags. Then of course, flush the + * TLB entry. + */ + index = mmu_get_level(spec->va, MMU_TBL); + pte[index] = pte_flags | spec->pa; + __invlpg((void *)spec->va); + return 0; +} + +/* * Verify that we are in a known state */ int mmu_init(void) { + struct pcore *self = this_core(); + struct mdcore *core; + + if (self == NULL) { + panic("mmu_init: could not get core\n"); + } + + core = &self->md; + /* * It would be foolish to assume the state of the * processor we are handed over with. Check first, @@ -90,5 +348,8 @@ mmu_init(void) if (mmu_pg_level() != MMU_L4) { panic("mmu_init: processor not using L4 paging\n"); } + + core->cr3 = __mmu_read_cr3(); + g_kvas.cr3 = core->cr3; return 0; } diff --git a/src/sys/include/arch/amd64/mmu.h b/src/sys/include/arch/amd64/mmu.h index 09298b6..0cecd68 100644 --- a/src/sys/include/arch/amd64/mmu.h +++ b/src/sys/include/arch/amd64/mmu.h @@ -35,6 +35,46 @@ #ifndef _MACHINE_MMU_H_ #define _MACHINE_MMU_H_ +#include <sys/cpuvar.h> +#include <sys/param.h> +#include <vm/vm.h> + +/* + * Standard memory protection flags + */ +#define MMU_PROT_NONE 0x0 /* Nothing */ +#define MMU_PROT_READ BIT(0) /* Readable */ +#define MMU_PROT_WRITE BIT(1) /* Writable */ +#define MMU_PROT_EXEC BIT(2) /* Executable */ + + +/* + * This will represent a virtual to + * physical address mapping. + * + * @va: Virtual address + * @pa: Physical address + */ +struct mmu_map { + vaddr_t va; + paddr_t pa; +}; + +/* + * Represents the current virtual address + * + * @cr3: The value of CR3 for this VAS + */ +struct vm_vas { + paddr_t cr3; +}; + +/* + * Global early kernel VAS structure used in the + * creation of new virtual address spaces. + */ +extern struct vm_vas g_kvas; + /* * Initialize arch-specific MMU state such as * page tables, initial mappings and sanity checks. @@ -44,4 +84,24 @@ */ int mmu_init(void); +/* + * Map a single virtual page into physical address + * space. + * + * @spec: Mapping specifier (virtual/physical address) + * @prot: Protection flags for the mapping (see MMU_PROT_*) + */ +int mmu_map_single(struct vm_vas *vas, struct mmu_map *spec, int prot); + +/* + * Get a pointer to the current virtual address + * space. + * + * @vasres_p: Resulting VAS is written here + * + * Returns zero on success, otherwise a less than zero + * value on failure. + */ +int mmu_this_vas(struct vm_vas *vasres_p); + #endif /* !_MACHINE_MMU_H_ */ diff --git a/src/sys/vm/vm_init.c b/src/sys/vm/vm_init.c index c8137a9..77b0a21 100644 --- a/src/sys/vm/vm_init.c +++ b/src/sys/vm/vm_init.c @@ -28,15 +28,19 @@ */ #include <sys/panic.h> +#include <sys/cpuvar.h> #include <vm/vm.h> #include <vm/physseg.h> #include <machine/mmu.h> /* standard */ static struct physmem_stat stat; +struct vm_vas g_kvas; void vm_init(void) { + struct pcore *pcore = this_core(); + if (vm_seg_init(&stat) < 0) { panic("vm_init: vm_seg_init() failed\n"); } |