summaryrefslogtreecommitdiff
path: root/sys/kern/kern_sched.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/kern/kern_sched.c')
-rw-r--r--sys/kern/kern_sched.c285
1 files changed, 213 insertions, 72 deletions
diff --git a/sys/kern/kern_sched.c b/sys/kern/kern_sched.c
index 4bbe5a0..9c5e215 100644
--- a/sys/kern/kern_sched.c
+++ b/sys/kern/kern_sched.c
@@ -34,6 +34,7 @@
#include <sys/param.h>
#include <sys/syslog.h>
#include <sys/atomic.h>
+#include <dev/cons/cons.h>
#include <machine/frame.h>
#include <machine/cpu.h>
#include <machine/cdefs.h>
@@ -44,7 +45,8 @@
#define pr_trace(fmt, ...) kprintf("ksched: " fmt, ##__VA_ARGS__)
-void sched_switch(struct trapframe *tf);
+void md_sched_switch(struct trapframe *tf);
+void sched_accnt_init(void);
static sched_policy_t policy = SCHED_POLICY_MLFQ;
@@ -63,7 +65,7 @@ __cacheline_aligned static struct spinlock tdq_lock = {0};
/*
* Perform timer oneshot
*/
-static inline void
+void
sched_oneshot(bool now)
{
struct timer timer;
@@ -77,39 +79,75 @@ sched_oneshot(bool now)
}
/*
- * Save thread state and enqueue it back into one
- * of the ready queues.
+ * Returns true if a processor is associated
+ * with a specific thread
+ *
+ * @ci: CPU that wants to take 'td'
+ * @td: Thread to check against
*/
-static void
-sched_save_td(struct proc *td, struct trapframe *tf)
+static bool
+cpu_is_assoc(struct cpu_info *ci, struct proc *td)
{
/*
- * Save trapframe to process structure only
- * if PROC_EXEC is not set.
+ * If we are not pinned, any processor is
+ * associated.
*/
- if (!ISSET(td->flags, PROC_EXEC)) {
- memcpy(&td->tf, tf, sizeof(td->tf));
+ if (!ISSET(td->flags, PROC_PINNED)) {
+ return true;
}
- sched_enqueue_td(td);
+ return ci->id == td->affinity;
}
-static struct proc *
+struct proc *
sched_dequeue_td(void)
{
struct sched_queue *queue;
struct proc *td = NULL;
+ struct cpu_info *ci;
+ uint32_t ncpu = 0;
spinlock_acquire(&tdq_lock);
+ ci = this_cpu();
for (size_t i = 0; i < SCHED_NQUEUE; ++i) {
queue = &qlist[i];
- if (!TAILQ_EMPTY(&queue->q)) {
- td = TAILQ_FIRST(&queue->q);
- TAILQ_REMOVE(&queue->q, td, link);
- spinlock_release(&tdq_lock);
- return td;
+ if (TAILQ_EMPTY(&queue->q)) {
+ continue;
}
+
+ td = TAILQ_FIRST(&queue->q);
+ if (td == NULL) {
+ continue;
+ }
+
+ while (ISSET(td->flags, PROC_SLEEP)) {
+ td = TAILQ_NEXT(td, link);
+ if (td == NULL) {
+ break;
+ }
+ }
+
+ /*
+ * If we are on a multicore system and this isn't
+ * our process, don't take it. Some threads might
+ * be pinned to a specific processor.
+ */
+ ncpu = cpu_count();
+ while (!cpu_is_assoc(ci, td) && ncpu > 1) {
+ td = TAILQ_NEXT(td, link);
+ if (td == NULL) {
+ break;
+ }
+ }
+
+ if (td == NULL) {
+ continue;
+ }
+
+ TAILQ_REMOVE(&queue->q, td, link);
+ spinlock_release(&tdq_lock);
+ return td;
}
/* We got nothing */
@@ -141,6 +179,9 @@ this_td(void)
struct cpu_info *ci;
ci = this_cpu();
+ if (ci == NULL) {
+ return NULL;
+ }
return ci->curtd;
}
@@ -177,62 +218,21 @@ td_pri_update(struct proc *td)
}
/*
- * Perform a context switch.
+ * MI work to be done during a context
+ * switch. Called by md_sched_switch()
*/
void
-sched_switch(struct trapframe *tf)
+mi_sched_switch(struct proc *from)
{
- struct cpu_info *ci;
- struct pcb *pcbp;
- struct proc *next_td, *td;
- bool use_current = true;
-
- ci = this_cpu();
- td = ci->curtd;
-
- if (td != NULL) {
- dispatch_signals(td);
- td_pri_update(td);
- }
-
- /*
- * Get the next thread and use it only if it isn't
- * in the middle of an exit, exec, or whatever.
- */
- do {
- if ((next_td = sched_dequeue_td()) == NULL) {
- sched_oneshot(false);
+ if (from != NULL) {
+ if (from->pid == 0)
return;
- }
- /*
- * If we are in the middle of an exec, don't use this
- * thread.
- */
- if (ISSET(next_td->flags, PROC_EXEC)) {
- use_current = false;
- }
-
- /*
- * Don't use this thread if we are currently
- * exiting.
- */
- if (ISSET(next_td->flags, PROC_EXITING)) {
- use_current = false;
- }
- } while (!use_current);
-
- /* Save the previous thread */
- if (td != NULL) {
- sched_save_td(td, tf);
+ dispatch_signals(from);
+ td_pri_update(from);
}
- memcpy(tf, &next_td->tf, sizeof(*tf));
- ci->curtd = next_td;
- pcbp = &next_td->pcb;
-
- pmap_switch_vas(pcbp->addrsp);
- sched_oneshot(false);
+ cons_detach();
}
/*
@@ -242,9 +242,8 @@ void
sched_enter(void)
{
md_inton();
- md_sync_all();
+ sched_oneshot(false);
for (;;) {
- sched_oneshot(false);
md_pause();
}
}
@@ -252,14 +251,154 @@ sched_enter(void)
void
sched_yield(void)
{
- struct proc *td = this_td();
+ struct proc *td;
+ struct cpu_info *ci = this_cpu();
- if (td != NULL) {
- td->rested = true;
+ if ((td = ci->curtd) == NULL) {
+ return;
}
+ td->rested = true;
+
+ /* FIXME: Hang yielding when waited on */
+ if (ISSET(td->flags, PROC_WAITED)) {
+ return;
+ }
+
+ ci->curtd = NULL;
+ md_inton();
sched_oneshot(false);
- while (td->rested);
+
+ md_hlt();
+ md_intoff();
+ ci->curtd = td;
+}
+
+void
+sched_detach(struct proc *td)
+{
+ struct sched_queue *queue;
+
+ spinlock_acquire(&tdq_lock);
+ queue = &qlist[td->priority];
+
+ TAILQ_REMOVE(&queue->q, td, link);
+ spinlock_release(&tdq_lock);
+}
+
+/*
+ * Pin a process to a specific processor
+ *
+ * @td: Process to pin
+ * @cpu: Logical processor ID to pin `td' to.
+ *
+ * XXX: 'cpu' is a machine independent value, representing
+ * CPU<n>
+ */
+void
+proc_pin(struct proc *td, affinity_t cpu)
+{
+ td->affinity = cpu;
+ td->flags |= PROC_PINNED;
+}
+
+/*
+ * Unpin a pinned process, allowing it to be
+ * picked up by any processor
+ *
+ * @td: Process to unpin
+ */
+void
+proc_unpin(struct proc *td)
+{
+ td->affinity = 0;
+ td->flags &= ~PROC_PINNED;
+}
+
+/*
+ * Suspend a process for a specified amount
+ * of time. This calling process will yield for
+ * the amount of time specified in 'tv'
+ *
+ * @td: Process to suspend (NULL for current)
+ * @tv: Time value to use
+ *
+ * XXX: 'tv' being NULL is equivalent to calling
+ * sched_detach()
+ */
+void
+sched_suspend(struct proc *td, const struct timeval *tv)
+{
+ struct timer tmr;
+ const time_t USEC_PER_SEC = 1000000;
+ ssize_t usec;
+ time_t usec_cur, usec_tmp;
+ bool have_timer = true;
+ tmrr_status_t tmr_status;
+
+ if (td == NULL)
+ td = this_td();
+ if (__unlikely(td == NULL))
+ return;
+
+ if (tv == NULL) {
+ sched_detach(td);
+ return;
+ }
+
+ /*
+ * Now, we need a generic timer so that we can compute
+ * how much time has elapsed since this process has
+ * requested to be suspended. However, we cannot assume
+ * that it would be present. If the lookup fails, all we
+ * can do is try to estimate how much time went by which
+ * works fine too, just not as accurate.
+ */
+ tmr_status = req_timer(TIMER_GP, &tmr);
+ if (tmr_status != TMRR_SUCCESS) {
+ have_timer = false;
+ }
+
+ /* We need microsecond precision */
+ if (tmr.get_time_sec == NULL) {
+ have_timer = false;
+ }
+
+ /*
+ * Compute the max time in microseconds that
+ * we will wait. We are using both tv->tv_sec
+ * and tv->tv_usec
+ */
+ usec = tv->tv_usec;
+ usec += tv->tv_sec * USEC_PER_SEC;
+ usec_cur = (have_timer) ? tmr.get_time_usec() : 0;
+
+ for (;;) {
+ sched_yield();
+
+ /*
+ * If we have a timer in our paws, compute how much
+ * time went by. Otherwise we estimate by subtracting
+ * the scheduler quantum.
+ *
+ * XXX: The timing here works decently as intended. However,
+ * it would be nice to smoothen out any jitter. Such can
+ * probably be done by subtracting 'usec' by the exponential
+ * moving average of 'usec_tmp' rather than the raw original
+ * value.
+ */
+ if (have_timer) {
+ usec_tmp = (tmr.get_time_usec() - usec_cur);
+ } else {
+ usec_tmp = DEFAULT_TIMESLICE_USEC;
+ }
+
+ /* We are done here! */
+ usec -= usec_tmp;
+ if (usec <= 0) {
+ break;
+ }
+ }
}
void
@@ -272,4 +411,6 @@ sched_init(void)
pr_trace("prepared %d queues (policy=0x%x)\n",
SCHED_NQUEUE, policy);
+
+ sched_accnt_init();
}