diff options
-rw-r--r-- | kernel/sched.c | 8 | ||||
-rw-r--r-- | kernel/sched_rt.c | 225 |
2 files changed, 231 insertions, 2 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 6185fa080ec..97cab609fc3 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1952,6 +1952,8 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) prev_state = prev->state; finish_arch_switch(prev); finish_lock_switch(rq, prev); + schedule_tail_balance_rt(rq); + fire_sched_in_preempt_notifiers(current); if (mm) mmdrop(mm); @@ -2185,11 +2187,13 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2) /* * double_lock_balance - lock the busiest runqueue, this_rq is locked already. */ -static void double_lock_balance(struct rq *this_rq, struct rq *busiest) +static int double_lock_balance(struct rq *this_rq, struct rq *busiest) __releases(this_rq->lock) __acquires(busiest->lock) __acquires(this_rq->lock) { + int ret = 0; + if (unlikely(!irqs_disabled())) { /* printk() doesn't work good under rq->lock */ spin_unlock(&this_rq->lock); @@ -2200,9 +2204,11 @@ static void double_lock_balance(struct rq *this_rq, struct rq *busiest) spin_unlock(&this_rq->lock); spin_lock(&busiest->lock); spin_lock(&this_rq->lock); + ret = 1; } else spin_lock(&busiest->lock); } + return ret; } /* diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 136c2857a04..7815e90b114 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -137,6 +137,227 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p) } #ifdef CONFIG_SMP +/* Only try algorithms three times */ +#define RT_MAX_TRIES 3 + +static int double_lock_balance(struct rq *this_rq, struct rq *busiest); +static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep); + +/* Return the second highest RT task, NULL otherwise */ +static struct task_struct *pick_next_highest_task_rt(struct rq *rq) +{ + struct rt_prio_array *array = &rq->rt.active; + struct task_struct *next; + struct list_head *queue; + int idx; + + assert_spin_locked(&rq->lock); + + if (likely(rq->rt.rt_nr_running < 2)) + return NULL; + + idx = sched_find_first_bit(array->bitmap); + if (unlikely(idx >= MAX_RT_PRIO)) { + WARN_ON(1); /* rt_nr_running is bad */ + return NULL; + } + + queue = array->queue + idx; + next = list_entry(queue->next, struct task_struct, run_list); + if (unlikely(next != rq->curr)) + return next; + + if (queue->next->next != queue) { + /* same prio task */ + next = list_entry(queue->next->next, struct task_struct, run_list); + return next; + } + + /* slower, but more flexible */ + idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx+1); + if (unlikely(idx >= MAX_RT_PRIO)) { + WARN_ON(1); /* rt_nr_running was 2 and above! */ + return NULL; + } + + queue = array->queue + idx; + next = list_entry(queue->next, struct task_struct, run_list); + + return next; +} + +static DEFINE_PER_CPU(cpumask_t, local_cpu_mask); + +/* Will lock the rq it finds */ +static struct rq *find_lock_lowest_rq(struct task_struct *task, + struct rq *this_rq) +{ + struct rq *lowest_rq = NULL; + int cpu; + int tries; + cpumask_t *cpu_mask = &__get_cpu_var(local_cpu_mask); + + cpus_and(*cpu_mask, cpu_online_map, task->cpus_allowed); + + for (tries = 0; tries < RT_MAX_TRIES; tries++) { + /* + * Scan each rq for the lowest prio. + */ + for_each_cpu_mask(cpu, *cpu_mask) { + struct rq *rq = &per_cpu(runqueues, cpu); + + if (cpu == this_rq->cpu) + continue; + + /* We look for lowest RT prio or non-rt CPU */ + if (rq->rt.highest_prio >= MAX_RT_PRIO) { + lowest_rq = rq; + break; + } + + /* no locking for now */ + if (rq->rt.highest_prio > task->prio && + (!lowest_rq || rq->rt.highest_prio > lowest_rq->rt.highest_prio)) { + lowest_rq = rq; + } + } + + if (!lowest_rq) + break; + + /* if the prio of this runqueue changed, try again */ + if (double_lock_balance(this_rq, lowest_rq)) { + /* + * We had to unlock the run queue. In + * the mean time, task could have + * migrated already or had its affinity changed. + * Also make sure that it wasn't scheduled on its rq. + */ + if (unlikely(task_rq(task) != this_rq || + !cpu_isset(lowest_rq->cpu, task->cpus_allowed) || + task_running(this_rq, task) || + !task->se.on_rq)) { + spin_unlock(&lowest_rq->lock); + lowest_rq = NULL; + break; + } + } + + /* If this rq is still suitable use it. */ + if (lowest_rq->rt.highest_prio > task->prio) + break; + + /* try again */ + spin_unlock(&lowest_rq->lock); + lowest_rq = NULL; + } + + return lowest_rq; +} + +/* + * If the current CPU has more than one RT task, see if the non + * running task can migrate over to a CPU that is running a task + * of lesser priority. + */ +static int push_rt_task(struct rq *this_rq) +{ + struct task_struct *next_task; + struct rq *lowest_rq; + int ret = 0; + int paranoid = RT_MAX_TRIES; + + assert_spin_locked(&this_rq->lock); + + next_task = pick_next_highest_task_rt(this_rq); + if (!next_task) + return 0; + + retry: + if (unlikely(next_task == this_rq->curr)) + return 0; + + /* + * It's possible that the next_task slipped in of + * higher priority than current. If that's the case + * just reschedule current. + */ + if (unlikely(next_task->prio < this_rq->curr->prio)) { + resched_task(this_rq->curr); + return 0; + } + + /* We might release this_rq lock */ + get_task_struct(next_task); + + /* find_lock_lowest_rq locks the rq if found */ + lowest_rq = find_lock_lowest_rq(next_task, this_rq); + if (!lowest_rq) { + struct task_struct *task; + /* + * find lock_lowest_rq releases this_rq->lock + * so it is possible that next_task has changed. + * If it has, then try again. + */ + task = pick_next_highest_task_rt(this_rq); + if (unlikely(task != next_task) && task && paranoid--) { + put_task_struct(next_task); + next_task = task; + goto retry; + } + goto out; + } + + assert_spin_locked(&lowest_rq->lock); + + deactivate_task(this_rq, next_task, 0); + set_task_cpu(next_task, lowest_rq->cpu); + activate_task(lowest_rq, next_task, 0); + + resched_task(lowest_rq->curr); + + spin_unlock(&lowest_rq->lock); + + ret = 1; +out: + put_task_struct(next_task); + + return ret; +} + +/* + * TODO: Currently we just use the second highest prio task on + * the queue, and stop when it can't migrate (or there's + * no more RT tasks). There may be a case where a lower + * priority RT task has a different affinity than the + * higher RT task. In this case the lower RT task could + * possibly be able to migrate where as the higher priority + * RT task could not. We currently ignore this issue. + * Enhancements are welcome! + */ +static void push_rt_tasks(struct rq *rq) +{ + /* push_rt_task will return true if it moved an RT */ + while (push_rt_task(rq)) + ; +} + +static void schedule_tail_balance_rt(struct rq *rq) +{ + /* + * If we have more than one rt_task queued, then + * see if we can push the other rt_tasks off to other CPUS. + * Note we may release the rq lock, and since + * the lock was owned by prev, we need to release it + * first via finish_lock_switch and then reaquire it here. + */ + if (unlikely(rq->rt.rt_nr_running > 1)) { + spin_lock_irq(&rq->lock); + push_rt_tasks(rq); + spin_unlock_irq(&rq->lock); + } +} + /* * Load-balancing iterator. Note: while the runqueue stays locked * during the whole iteration, the current task might be @@ -241,7 +462,9 @@ move_one_task_rt(struct rq *this_rq, int this_cpu, struct rq *busiest, return iter_move_one_task(this_rq, this_cpu, busiest, sd, idle, &rt_rq_iterator); } -#endif +#else /* CONFIG_SMP */ +# define schedule_tail_balance_rt(rq) do { } while (0) +#endif /* CONFIG_SMP */ static void task_tick_rt(struct rq *rq, struct task_struct *p) { |