From 600403a8666da95c8a9e99497b93825cce63c750 Mon Sep 17 00:00:00 2001 From: Ian Moffett Date: Tue, 21 May 2024 11:40:32 -0400 Subject: kernel: Initial sched rewrite - Introduce dynamic scheduling policy - Introduce MLFQ scheduling - Clean up a lot of code Signed-off-by: Ian Moffett --- sys/arch/amd64/amd64/cpu_mp.c | 5 +- sys/arch/amd64/conf/GENERIC | 1 + sys/include/sys/proc.h | 2 + sys/include/sys/sched.h | 4 +- sys/include/sys/schedvar.h | 25 +++ sys/kern/init_main.c | 2 +- sys/kern/kern_sched.c | 462 +++++++++++++++++++++++------------------- 7 files changed, 284 insertions(+), 217 deletions(-) diff --git a/sys/arch/amd64/amd64/cpu_mp.c b/sys/arch/amd64/amd64/cpu_mp.c index 7b95196..2afdd43 100644 --- a/sys/arch/amd64/amd64/cpu_mp.c +++ b/sys/arch/amd64/amd64/cpu_mp.c @@ -51,18 +51,15 @@ static bool is_mp_supported = false; static void ap_trampoline(struct limine_smp_info *si) { - struct cpu_info *ci; static struct spinlock lock = {0}; spinlock_acquire(&lock); - ci = (void *)si->extra_argument; pre_init(); processor_init(); spinlock_release(&lock); - - sched_init_processor(ci); + sched_enter(); /* Should not be reached */ __assert(0); diff --git a/sys/arch/amd64/conf/GENERIC b/sys/arch/amd64/conf/GENERIC index 0baff12..b4fb29d 100644 --- a/sys/arch/amd64/conf/GENERIC +++ b/sys/arch/amd64/conf/GENERIC @@ -5,3 +5,4 @@ option PANIC_BEEP yes // Kernel constants setval PANIC_BEEP_HZ 1050 +setval SCHED_NQUEUE 4 diff --git a/sys/include/sys/proc.h b/sys/include/sys/proc.h index ec7d25e..e3416f9 100644 --- a/sys/include/sys/proc.h +++ b/sys/include/sys/proc.h @@ -79,7 +79,9 @@ struct proc { struct vm_range addr_range[PROC_MAX_ADDR_RANGE]; struct spinlock lock; uint8_t is_user; + uint8_t rested; uint32_t signal; + uint32_t priority; struct filedesc *fds[PROC_MAX_FDS]; struct spinlock mapspace_lock; struct vm_mapspace mapspace; diff --git a/sys/include/sys/sched.h b/sys/include/sys/sched.h index 8245120..06cf860 100644 --- a/sys/include/sys/sched.h +++ b/sys/include/sys/sched.h @@ -45,9 +45,9 @@ void sched_context_switch(struct trapframe *tf); void sched_rest(void); __noreturn -uint64_t sys_exit(struct syscall_args *args); +void sched_enter(void); __noreturn -void sched_init_processor(struct cpu_info *ci); +uint64_t sys_exit(struct syscall_args *args); #endif /* !_SYS_SCHED_H_ */ diff --git a/sys/include/sys/schedvar.h b/sys/include/sys/schedvar.h index 431a93e..2a0a1fc 100644 --- a/sys/include/sys/schedvar.h +++ b/sys/include/sys/schedvar.h @@ -30,7 +30,32 @@ #ifndef _SYS_SCHEDVAR_H_ #define _SYS_SCHEDVAR_H_ +#include +#include +#include + #define DEFAULT_TIMESLICE_USEC 3000 #define SHORT_TIMESLICE_USEC 10 +#define SCHED_POLICY_MLFQ 0x0000U /* Multilevel feedback queue */ +#define SCHED_POLICY_RR 0x0001U /* Round robin */ + +typedef uint8_t schedpolicy_t; + +/* Might be set by kconf(1) */ +#if defined(__SCHED_NQUEUE) +#define SCHED_NQUEUE __SCHED_NQUEUE +#else +#define SCHED_NQUEUE 4 +#endif + +/* Ensure SCHED_NQUEUE is an acceptable value */ +__STATIC_ASSERT(SCHED_NQUEUE <= 8, "SCHED_NQUEUE exceeds max"); +__STATIC_ASSERT(SCHED_NQUEUE > 0, "SCHED_NQUEUE cannot be zero"); + +struct sched_queue { + TAILQ_HEAD(, proc) q; + size_t nthread; +}; + #endif /* !_SYS_SCHEDVAR_H_ */ diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c index 4b7f62e..10cd90e 100644 --- a/sys/kern/init_main.c +++ b/sys/kern/init_main.c @@ -103,7 +103,7 @@ main(void) ci = this_cpu(); __TRY_CALL(ap_bootstrap, ci); - sched_init_processor(ci); + sched_enter(); while (1); __builtin_unreachable(); diff --git a/sys/kern/kern_sched.c b/sys/kern/kern_sched.c index b7e6af5..c85d9bf 100644 --- a/sys/kern/kern_sched.c +++ b/sys/kern/kern_sched.c @@ -29,38 +29,40 @@ #include #include -#include -#include -#include -#include -#include -#include -#include #include +#include +#include +#include #include +#include +#include #include #include #include #include -#include #include -#include -#include +#include #include +#include + +/* + * Thread ready queues - all threads ready to be + * scheduled should be added to the toplevel queue. + */ +static struct sched_queue qlist[SCHED_NQUEUE]; /* - * Thread ready queue - all threads ready to be - * scheduled should be added to this queue. + * Global scheduler state. */ -static TAILQ_HEAD(, proc) td_queue; static size_t nthread = 0; +static schedpolicy_t policy = SCHED_POLICY_MLFQ; /* - * Thread queue lock - all operations to `td_queue' + * Thread queue lock - all operations to `qlist' * must be done with this lock acquired. * * This lock is aligned on a cache line boundary to ensure - * it has its own cacheline to reduce contention. This is + * it has its own cache line to reduce contention. This is * because it is constantly acquired and released on every * processor. */ @@ -68,188 +70,293 @@ __cacheline_aligned static struct spinlock tdq_lock = {0}; /* - * Perform timer oneshot - * - * @now: True for shortest timeslice. + * Lower thread priority. */ static inline void -sched_oneshot(bool now) +td_pri_lower(struct proc *td) { - struct timer timer; - size_t usec = (now) ? SHORT_TIMESLICE_USEC : DEFAULT_TIMESLICE_USEC; - tmrr_status_t tmr_status; + if (td->priority < (SCHED_NQUEUE - 1)) + ++td->priority; +} - tmr_status = req_timer(TIMER_SCHED, &timer); - __assert(tmr_status == TMRR_SUCCESS); +/* + * Raise thread priority. + */ +static inline void +td_pri_raise(struct proc *td) +{ + if (td->priority > 0) + --td->priority; +} - timer.oneshot_us(usec); +/* + * Called during preemption. We raise the priority + * if the thread has been rested. If the thread has not + * been rested, we lower its priority. + */ +static void +td_pri_update(struct proc *td) +{ + if (td->rested) { + td->rested = 0; + td_pri_raise(td); + return; + } + + td_pri_lower(td); } /* - * Push a thread into the thread ready queue - * allowing it to be eventually dequeued - * and ran. + * Enqueue a thread into the scheduler. */ static void sched_enqueue_td(struct proc *td) { - /* Sanity check */ - if (td == NULL) - return; + struct sched_queue *queue; spinlock_acquire(&tdq_lock); + queue = &qlist[td->priority]; - td->pid = nthread++; - TAILQ_INSERT_TAIL(&td_queue, td, link); - + TAILQ_INSERT_TAIL(&queue->q, td, link); spinlock_release(&tdq_lock); } /* - * Dequeue the first thread in the thread ready - * queue. + * Dequeue a thread from a queue. */ static struct proc * sched_dequeue_td(void) { + struct sched_queue *queue; struct proc *td = NULL; spinlock_acquire(&tdq_lock); - if (!TAILQ_EMPTY(&td_queue)) { - td = TAILQ_FIRST(&td_queue); - TAILQ_REMOVE(&td_queue, td, link); + /* + * Try to pop a thread from a queue. We start + * at the lowest priority which is 0. + */ + for (size_t i = 0; i < SCHED_NQUEUE; ++i) { + queue = &qlist[i]; + + if (!TAILQ_EMPTY(&queue->q)) { + td = TAILQ_FIRST(&queue->q); + TAILQ_REMOVE(&queue->q, td, link); + break; + } } spinlock_release(&tdq_lock); return td; } -__noreturn static void -sched_idle(void) -{ - for (;;) { - hint_spinwait(); - } -} - /* - * Processor awaiting tasks to be assigned will be here spinning. + * Create a new thread stack. + * sched_new_td() helper. */ -__noreturn static void -sched_enter(void) -{ - sched_oneshot(false); - sched_idle(); - __builtin_unreachable(); -} - static uintptr_t -sched_create_stack(struct vas vas, bool user, struct exec_args args, - struct proc *td) +sched_create_stack(bool is_user, struct exec_args args, struct proc *td) { int status; uintptr_t stack; - const vm_prot_t USER_STACK_PROT = PROT_WRITE | PROT_USER; - struct vm_range *stack_range = &td->addr_range[ADDR_RANGE_STACK]; + struct vm_range *stack_range; + const vm_prot_t USERSTACK_PROT = PROT_WRITE | PROT_USER; + + stack_range = &td->addr_range[ADDR_RANGE_STACK]; - if (!user) { + /* + * Kernel stacks can be allocated with dynalloc() as they + * are on the higher half. + */ + if (!is_user) { stack = (uintptr_t)dynalloc(PROC_STACK_SIZE); - stack_range->start = (uintptr_t)stack; - stack_range->end = (uintptr_t)stack + PROC_STACK_SIZE; + stack_range->start = stack; + stack_range->end = stack + PROC_STACK_SIZE; return loader_init_stack((void *)(stack + PROC_STACK_SIZE), args); } stack = vm_alloc_pageframe(PROC_STACK_PAGES); + if (stack == 0) { + return 0; + } - stack_range->start = stack; - stack_range->end = stack + PROC_STACK_SIZE; - status = vm_map_create(vas, stack, stack, USER_STACK_PROT, PROC_STACK_SIZE); - + status = vm_map_create(args.vas, stack, stack, USERSTACK_PROT, PROC_STACK_SIZE); if (status != 0) { + vm_free_pageframe(stack, PROC_STACK_PAGES); return 0; } + stack_range->start = stack; + stack_range->end = stack + PROC_STACK_SIZE; + memset(USER_TO_KERN(stack), 0, PROC_STACK_SIZE); stack = loader_init_stack((void *)USER_TO_KERN(stack + PROC_STACK_SIZE), args); return stack; } -static struct proc * -sched_create_td(uintptr_t rip, struct exec_args args, bool is_user, - struct vm_range *prog_range) +/* + * Create a new thread. + * + * @ip: Instruction pointer to start at. + * @is_user: True for user program. + * @exec_args: Common exec args. + */ +static int +sched_new_td(uintptr_t ip, bool is_user, struct exec_args args, struct vm_range *prog_range, + struct proc **res) { - struct proc *td; struct vm_range *exec_range; - uintptr_t stack; + struct proc *td; struct trapframe *tf; + uintptr_t stack; + int retval = 0; + td = dynalloc(sizeof(struct proc)); tf = dynalloc(sizeof(struct trapframe)); - if (tf == NULL) { - return NULL; + if (td == NULL || tf == NULL) { + retval = -ENOMEM; + goto done; } - td = dynalloc(sizeof(struct proc)); - if (td == NULL) { - /* TODO: Free stack */ - dynfree(tf); - return NULL; - } + /* Keep them in a known state */ + memset(td, 0, sizeof(*td)); + memset(tf, 0, sizeof(*tf)); - memset(td, 0, sizeof(struct proc)); - stack = sched_create_stack(args.vas, is_user, args, td); + /* Try to create a stack */ + stack = sched_create_stack(is_user, args, td); if (stack == 0) { - dynfree(tf); - dynfree(td); - return NULL; + retval = -ENOMEM; + goto done; } - memset(tf, 0, sizeof(struct trapframe)); - - /* Setup process itself */ - exec_range = &td->addr_range[ADDR_RANGE_EXEC]; - td->pid = 0; /* Don't assign PID until enqueued */ - td->cpu = NULL; /* Not yet assigned a core */ + /* Setup initial thread state */ + td->pid = nthread++; td->tf = tf; td->addrsp = args.vas; td->is_user = is_user; + processor_init_pcb(td); + + /* Setup each mapping table */ for (size_t i = 0; i < MTAB_ENTRIES; ++i) { - /* Init the memory mapping table */ TAILQ_INIT(&td->mapspace.mtab[i]); } - if (prog_range != NULL) { - memcpy(exec_range, prog_range, sizeof(struct vm_range)); - } - processor_init_pcb(td); - /* Allocate standard file descriptors */ - __assert(fd_alloc(td, NULL) == 0); /* STDIN */ - __assert(fd_alloc(td, NULL) == 0); /* STDOUT */ - __assert(fd_alloc(td, NULL) == 0); /* STDERR */ + /* Setup standard file descriptors */ + __assert(fd_alloc(td, NULL) == 0); /* STDIN */ + __assert(fd_alloc(td, NULL) == 0); /* STDOUT */ + __assert(fd_alloc(td, NULL) == 0); /* STDERR */ + + exec_range = &td->addr_range[ADDR_RANGE_EXEC]; + memcpy(exec_range, prog_range, sizeof(struct vm_range)); - /* Setup trapframe */ + /* Init the trapframe */ if (!is_user) { - init_frame(tf, rip, (uintptr_t)stack); + init_frame(tf, ip, stack); } else { - init_frame_user(tf, rip, KERN_TO_USER(stack)); + init_frame_user(tf, ip, KERN_TO_USER(stack)); + } +done: + if (retval != 0 && td != NULL) + dynfree(td); + if (retval != 0 && tf != NULL) + dynfree(td); + if (retval == 0 && res != NULL) + *res = td; + + return retval; +} + +/* + * Perform timer oneshot + * + * @now: True for shortest timeslice. + */ +static inline void +sched_oneshot(bool now) +{ + struct timer timer; + size_t usec = (now) ? SHORT_TIMESLICE_USEC : DEFAULT_TIMESLICE_USEC; + tmrr_status_t tmr_status; + + tmr_status = req_timer(TIMER_SCHED, &timer); + __assert(tmr_status == TMRR_SUCCESS); + + timer.oneshot_us(usec); +} + +/* + * Enter the schedulera and wait until + * preemption. + */ +void +sched_enter(void) +{ + sched_oneshot(false); + + for (;;) { + hint_spinwait(); } - return td; +} + +/* + * Initialize all of the queues. + */ +static void +sched_init_queues(void) +{ + for (size_t i = 0; i < SCHED_NQUEUE; ++i) { + TAILQ_INIT(&qlist[i].q); + } +} + +/* + * Load the first thread (init) + */ +static void +sched_load_init(void) +{ + struct exec_args args; + struct proc *init; + struct auxval *auxvp = &args.auxv; + struct vm_range init_range; + int tmp; + + char *argv[] = {"/usr/sbin/init", NULL}; + char *envp[] = {NULL}; + const char *init_bin; + + if ((init_bin = initramfs_open("/usr/sbin/init")) == NULL) + panic("Could not open /usr/sbin/init\n"); + + args.vas = pmap_create_vas(vm_get_ctx()); + args.argp = argv; + args.envp = envp; + + tmp = loader_load(args.vas, init_bin, auxvp, 0, NULL, &init_range); + if (tmp != 0) + panic("Failed to load init\n"); + + if (sched_new_td(auxvp->at_entry, true, args, &init_range, &init) != 0) + panic("Failed to create init thread\n"); + + sched_enqueue_td(init); } static void sched_destroy_td(struct proc *td) { - const struct vm_range *stack_range = &td->addr_range[ADDR_RANGE_STACK]; - struct vm_range *exec_range = &td->addr_range[ADDR_RANGE_EXEC]; + struct vm_range *stack_range; + struct vm_range *exec_range; vm_mapq_t *mapq; processor_free_pcb(td); + stack_range = &td->addr_range[ADDR_RANGE_STACK]; + exec_range = &td->addr_range[ADDR_RANGE_EXEC]; /* - * User stacks are allocated with vm_alloc_pageframe(), - * while kernel stacks are allocated with dynalloc(). - * We want to check if we are a user program or kernel - * program to perform the proper deallocation method. + * User thread's have their stack allocated + * with vm_alloc_pageframe() and kernel thread's + * have their stacks allocated with dynalloc() */ if (td->is_user) { vm_free_pageframe(stack_range->start, PROC_STACK_PAGES); @@ -257,43 +364,20 @@ sched_destroy_td(struct proc *td) dynfree((void *)stack_range->start); } - /* Close all of the file descriptors */ - for (size_t i = 0; i < PROC_MAX_FDS; ++i) { - fd_close_fdnum(td, i); - } - for (size_t i = 0; i < MTAB_ENTRIES; ++i) { mapq = &td->mapspace.mtab[i]; vm_free_mapq(mapq); } + for (size_t i = 0; i < PROC_MAX_FDS; ++i) { + fd_close_fdnum(td, i); + } + loader_unload(td->addrsp, exec_range); pmap_free_vas(vm_get_ctx(), td->addrsp); dynfree(td); } -/* - * Create the idle thread. - */ -static void -sched_make_idletd(void) -{ - struct exec_args exec_args = {0}; - struct vm_range range; - struct proc *td; - - char *argv[] = {NULL}; - char *envp[] = {NULL}; - - exec_args.argp = argv; - exec_args.envp = envp; - exec_args.vas = pmap_read_vas(); - - td = sched_create_td((uintptr_t)sched_idle, exec_args, false, &range); - - sched_enqueue_td(td); -} - /* * Cause an early preemption and lets * the next thread run. @@ -301,24 +385,29 @@ sched_make_idletd(void) void sched_rest(void) { + struct proc *td = this_td(); + + if (td == NULL) + return; + + td->rested = 1; sched_oneshot(true); } void sched_exit(void) { - struct proc *td; + struct proc *td = this_td(); struct vas kvas = vm_get_kvas(); + spinlock_acquire(&tdq_lock); intr_mask(); - td = this_td(); - spinlock_acquire(&td->lock); /* Never release */ - /* Switch back to the kernel address space and destroy ourself */ + /* Switch to kernel VAS and destroy td */ pmap_switch_vas(vm_get_ctx(), kvas); - TAILQ_REMOVE(&td_queue, td, link); sched_destroy_td(td); + spinlock_release(&tdq_lock); intr_unmask(); sched_enter(); } @@ -339,52 +428,49 @@ this_td(void) /* * Thread context switch routine + * + * Handles the transition from the currently running + * thread to the next thread. */ void sched_context_switch(struct trapframe *tf) { struct cpu_info *ci = this_cpu(); struct sched_state *state = &ci->sched_state; - struct proc *td, *next_td; + struct proc *next_td, *td = state->td; - if (state->td != NULL) { + if (td != NULL) { signal_handle(state->td); } /* - * If we have no threads, we should not - * preempt at all. + * If a thread is currently running and our policy is + * MLFQ, then we should update the thread's priority. */ - if (nthread == 0 || (next_td = sched_dequeue_td()) == NULL) { + if (td != NULL && policy == SCHED_POLICY_MLFQ) { + td_pri_update(td); + } + + /* Don't preempt if we have no threads */ + if ((next_td = sched_dequeue_td()) == NULL) { sched_oneshot(false); return; } /* - * If we have a thread currently running and we are switching - * to another, we shall save our current register state - * by copying the trapframe. + * If we have a thread currently running, then we should save + * its current register state and re-enqueue it. */ - if (state->td != NULL) { - td = state->td; - memcpy(td->tf, tf, sizeof(struct trapframe)); - } - - /* Copy over the next thread's register state to us */ - memcpy(tf, next_td->tf, sizeof(struct trapframe)); - - td = state->td; - state->td = next_td; - - /* Re-enqueue the previous thread if it exists */ if (td != NULL) { + memcpy(td->tf, tf, sizeof(struct trapframe)); sched_enqueue_td(td); } - /* Do architecture specific context switch logic */ + /* Perform the switch */ + memcpy(tf, next_td->tf, sizeof(struct trapframe)); processor_switch_to(td, next_td); - /* Done, switch out our vas and oneshot */ + state->td = next_td; pmap_switch_vas(vm_get_ctx(), next_td->addrsp); sched_oneshot(false); } @@ -399,50 +485,6 @@ sys_exit(struct syscall_args *args) void sched_init(void) { - struct proc *init; - struct vm_range init_range; - const char *init_bin; - - struct exec_args exec_args = {0}; - struct auxval *auxvp = &exec_args.auxv; - struct vas vas; - - char *argv[] = {"/usr/sbin/init", NULL}; - char *envp[] = {NULL}; - - vas = pmap_create_vas(vm_get_ctx()); - exec_args.vas = vas; - exec_args.argp = argv; - exec_args.envp = envp; - - TAILQ_INIT(&td_queue); - sched_make_idletd(); - - if ((init_bin = initramfs_open("/usr/sbin/init")) == NULL) { - panic("Could not open /usr/sbin/init\n"); - } - if (loader_load(vas, init_bin, auxvp, 0, NULL, &init_range) != 0) { - panic("Could not load init\n"); - } - - init = sched_create_td((uintptr_t)auxvp->at_entry, exec_args, true, &init_range); - if (init == NULL) { - panic("Failed to create thread for init\n"); - } - - sched_enqueue_td(init); -} - -/* - * Setup scheduler related things and enqueue AP. - */ -void -sched_init_processor(struct cpu_info *ci) -{ - struct sched_state *sched_state = &ci->sched_state; - (void)sched_state; /* TODO */ - - sched_enter(); - - __builtin_unreachable(); + sched_init_queues(); + sched_load_init(); } -- cgit v1.2.3