diff options
Diffstat (limited to 'sys/kern/kern_sched.c')
-rw-r--r-- | sys/kern/kern_sched.c | 285 |
1 files changed, 213 insertions, 72 deletions
diff --git a/sys/kern/kern_sched.c b/sys/kern/kern_sched.c index 4bbe5a0..9c5e215 100644 --- a/sys/kern/kern_sched.c +++ b/sys/kern/kern_sched.c @@ -34,6 +34,7 @@ #include <sys/param.h> #include <sys/syslog.h> #include <sys/atomic.h> +#include <dev/cons/cons.h> #include <machine/frame.h> #include <machine/cpu.h> #include <machine/cdefs.h> @@ -44,7 +45,8 @@ #define pr_trace(fmt, ...) kprintf("ksched: " fmt, ##__VA_ARGS__) -void sched_switch(struct trapframe *tf); +void md_sched_switch(struct trapframe *tf); +void sched_accnt_init(void); static sched_policy_t policy = SCHED_POLICY_MLFQ; @@ -63,7 +65,7 @@ __cacheline_aligned static struct spinlock tdq_lock = {0}; /* * Perform timer oneshot */ -static inline void +void sched_oneshot(bool now) { struct timer timer; @@ -77,39 +79,75 @@ sched_oneshot(bool now) } /* - * Save thread state and enqueue it back into one - * of the ready queues. + * Returns true if a processor is associated + * with a specific thread + * + * @ci: CPU that wants to take 'td' + * @td: Thread to check against */ -static void -sched_save_td(struct proc *td, struct trapframe *tf) +static bool +cpu_is_assoc(struct cpu_info *ci, struct proc *td) { /* - * Save trapframe to process structure only - * if PROC_EXEC is not set. + * If we are not pinned, any processor is + * associated. */ - if (!ISSET(td->flags, PROC_EXEC)) { - memcpy(&td->tf, tf, sizeof(td->tf)); + if (!ISSET(td->flags, PROC_PINNED)) { + return true; } - sched_enqueue_td(td); + return ci->id == td->affinity; } -static struct proc * +struct proc * sched_dequeue_td(void) { struct sched_queue *queue; struct proc *td = NULL; + struct cpu_info *ci; + uint32_t ncpu = 0; spinlock_acquire(&tdq_lock); + ci = this_cpu(); for (size_t i = 0; i < SCHED_NQUEUE; ++i) { queue = &qlist[i]; - if (!TAILQ_EMPTY(&queue->q)) { - td = TAILQ_FIRST(&queue->q); - TAILQ_REMOVE(&queue->q, td, link); - spinlock_release(&tdq_lock); - return td; + if (TAILQ_EMPTY(&queue->q)) { + continue; } + + td = TAILQ_FIRST(&queue->q); + if (td == NULL) { + continue; + } + + while (ISSET(td->flags, PROC_SLEEP)) { + td = TAILQ_NEXT(td, link); + if (td == NULL) { + break; + } + } + + /* + * If we are on a multicore system and this isn't + * our process, don't take it. Some threads might + * be pinned to a specific processor. + */ + ncpu = cpu_count(); + while (!cpu_is_assoc(ci, td) && ncpu > 1) { + td = TAILQ_NEXT(td, link); + if (td == NULL) { + break; + } + } + + if (td == NULL) { + continue; + } + + TAILQ_REMOVE(&queue->q, td, link); + spinlock_release(&tdq_lock); + return td; } /* We got nothing */ @@ -141,6 +179,9 @@ this_td(void) struct cpu_info *ci; ci = this_cpu(); + if (ci == NULL) { + return NULL; + } return ci->curtd; } @@ -177,62 +218,21 @@ td_pri_update(struct proc *td) } /* - * Perform a context switch. + * MI work to be done during a context + * switch. Called by md_sched_switch() */ void -sched_switch(struct trapframe *tf) +mi_sched_switch(struct proc *from) { - struct cpu_info *ci; - struct pcb *pcbp; - struct proc *next_td, *td; - bool use_current = true; - - ci = this_cpu(); - td = ci->curtd; - - if (td != NULL) { - dispatch_signals(td); - td_pri_update(td); - } - - /* - * Get the next thread and use it only if it isn't - * in the middle of an exit, exec, or whatever. - */ - do { - if ((next_td = sched_dequeue_td()) == NULL) { - sched_oneshot(false); + if (from != NULL) { + if (from->pid == 0) return; - } - /* - * If we are in the middle of an exec, don't use this - * thread. - */ - if (ISSET(next_td->flags, PROC_EXEC)) { - use_current = false; - } - - /* - * Don't use this thread if we are currently - * exiting. - */ - if (ISSET(next_td->flags, PROC_EXITING)) { - use_current = false; - } - } while (!use_current); - - /* Save the previous thread */ - if (td != NULL) { - sched_save_td(td, tf); + dispatch_signals(from); + td_pri_update(from); } - memcpy(tf, &next_td->tf, sizeof(*tf)); - ci->curtd = next_td; - pcbp = &next_td->pcb; - - pmap_switch_vas(pcbp->addrsp); - sched_oneshot(false); + cons_detach(); } /* @@ -242,9 +242,8 @@ void sched_enter(void) { md_inton(); - md_sync_all(); + sched_oneshot(false); for (;;) { - sched_oneshot(false); md_pause(); } } @@ -252,14 +251,154 @@ sched_enter(void) void sched_yield(void) { - struct proc *td = this_td(); + struct proc *td; + struct cpu_info *ci = this_cpu(); - if (td != NULL) { - td->rested = true; + if ((td = ci->curtd) == NULL) { + return; } + td->rested = true; + + /* FIXME: Hang yielding when waited on */ + if (ISSET(td->flags, PROC_WAITED)) { + return; + } + + ci->curtd = NULL; + md_inton(); sched_oneshot(false); - while (td->rested); + + md_hlt(); + md_intoff(); + ci->curtd = td; +} + +void +sched_detach(struct proc *td) +{ + struct sched_queue *queue; + + spinlock_acquire(&tdq_lock); + queue = &qlist[td->priority]; + + TAILQ_REMOVE(&queue->q, td, link); + spinlock_release(&tdq_lock); +} + +/* + * Pin a process to a specific processor + * + * @td: Process to pin + * @cpu: Logical processor ID to pin `td' to. + * + * XXX: 'cpu' is a machine independent value, representing + * CPU<n> + */ +void +proc_pin(struct proc *td, affinity_t cpu) +{ + td->affinity = cpu; + td->flags |= PROC_PINNED; +} + +/* + * Unpin a pinned process, allowing it to be + * picked up by any processor + * + * @td: Process to unpin + */ +void +proc_unpin(struct proc *td) +{ + td->affinity = 0; + td->flags &= ~PROC_PINNED; +} + +/* + * Suspend a process for a specified amount + * of time. This calling process will yield for + * the amount of time specified in 'tv' + * + * @td: Process to suspend (NULL for current) + * @tv: Time value to use + * + * XXX: 'tv' being NULL is equivalent to calling + * sched_detach() + */ +void +sched_suspend(struct proc *td, const struct timeval *tv) +{ + struct timer tmr; + const time_t USEC_PER_SEC = 1000000; + ssize_t usec; + time_t usec_cur, usec_tmp; + bool have_timer = true; + tmrr_status_t tmr_status; + + if (td == NULL) + td = this_td(); + if (__unlikely(td == NULL)) + return; + + if (tv == NULL) { + sched_detach(td); + return; + } + + /* + * Now, we need a generic timer so that we can compute + * how much time has elapsed since this process has + * requested to be suspended. However, we cannot assume + * that it would be present. If the lookup fails, all we + * can do is try to estimate how much time went by which + * works fine too, just not as accurate. + */ + tmr_status = req_timer(TIMER_GP, &tmr); + if (tmr_status != TMRR_SUCCESS) { + have_timer = false; + } + + /* We need microsecond precision */ + if (tmr.get_time_sec == NULL) { + have_timer = false; + } + + /* + * Compute the max time in microseconds that + * we will wait. We are using both tv->tv_sec + * and tv->tv_usec + */ + usec = tv->tv_usec; + usec += tv->tv_sec * USEC_PER_SEC; + usec_cur = (have_timer) ? tmr.get_time_usec() : 0; + + for (;;) { + sched_yield(); + + /* + * If we have a timer in our paws, compute how much + * time went by. Otherwise we estimate by subtracting + * the scheduler quantum. + * + * XXX: The timing here works decently as intended. However, + * it would be nice to smoothen out any jitter. Such can + * probably be done by subtracting 'usec' by the exponential + * moving average of 'usec_tmp' rather than the raw original + * value. + */ + if (have_timer) { + usec_tmp = (tmr.get_time_usec() - usec_cur); + } else { + usec_tmp = DEFAULT_TIMESLICE_USEC; + } + + /* We are done here! */ + usec -= usec_tmp; + if (usec <= 0) { + break; + } + } } void @@ -272,4 +411,6 @@ sched_init(void) pr_trace("prepared %d queues (policy=0x%x)\n", SCHED_NQUEUE, policy); + + sched_accnt_init(); } |