diff options
Diffstat (limited to 'kernel/sched/rt.c')
| -rw-r--r-- | kernel/sched/rt.c | 554 |
1 files changed, 322 insertions, 232 deletions
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index f42ae7fb5ec..a49083192c6 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -7,6 +7,8 @@ #include <linux/slab.h> +int sched_rr_timeslice = RR_TIMESLICE; + static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun); struct rt_bandwidth def_rt_bandwidth; @@ -77,6 +79,8 @@ void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq) rt_rq->overloaded = 0; plist_head_init(&rt_rq->pushable_tasks); #endif + /* We start is dequeued state, because no RT tasks are queued */ + rt_rq->rt_queued = 0; rt_rq->rt_time = 0; rt_rq->rt_throttled = 0; @@ -110,6 +114,13 @@ static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se) return rt_se->rt_rq; } +static inline struct rq *rq_of_rt_se(struct sched_rt_entity *rt_se) +{ + struct rt_rq *rt_rq = rt_se->rt_rq; + + return rt_rq->rq; +} + void free_rt_sched_group(struct task_group *tg) { int i; @@ -209,10 +220,16 @@ static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq) return container_of(rt_rq, struct rq, rt); } -static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se) +static inline struct rq *rq_of_rt_se(struct sched_rt_entity *rt_se) { struct task_struct *p = rt_task_of(rt_se); - struct rq *rq = task_rq(p); + + return task_rq(p); +} + +static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se) +{ + struct rq *rq = rq_of_rt_se(rt_se); return &rq->rt; } @@ -227,6 +244,14 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) #ifdef CONFIG_SMP +static int pull_rt_task(struct rq *this_rq); + +static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev) +{ + /* Try to pull RT tasks here if we lower this rq's prio */ + return rq->rt.highest_prio.curr > prev->prio; +} + static inline int rt_overloaded(struct rq *rq) { return atomic_read(&rq->rd->rto_count); @@ -244,8 +269,10 @@ static inline void rt_set_overload(struct rq *rq) * if we should look at the mask. It would be a shame * if we looked at the mask, but the mask was not * updated yet. + * + * Matched by the barrier in pull_rt_task(). */ - wmb(); + smp_wmb(); atomic_inc(&rq->rd->rto_count); } @@ -274,13 +301,16 @@ static void update_rt_migration(struct rt_rq *rt_rq) static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) { + struct task_struct *p; + if (!rt_entity_is_task(rt_se)) return; + p = rt_task_of(rt_se); rt_rq = &rq_of_rt_rq(rt_rq)->rt; rt_rq->rt_nr_total++; - if (rt_se->nr_cpus_allowed > 1) + if (p->nr_cpus_allowed > 1) rt_rq->rt_nr_migratory++; update_rt_migration(rt_rq); @@ -288,13 +318,16 @@ static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) { + struct task_struct *p; + if (!rt_entity_is_task(rt_se)) return; + p = rt_task_of(rt_se); rt_rq = &rq_of_rt_rq(rt_rq)->rt; rt_rq->rt_nr_total--; - if (rt_se->nr_cpus_allowed > 1) + if (p->nr_cpus_allowed > 1) rt_rq->rt_nr_migratory--; update_rt_migration(rt_rq); @@ -305,6 +338,15 @@ static inline int has_pushable_tasks(struct rq *rq) return !plist_head_empty(&rq->rt.pushable_tasks); } +static inline void set_post_schedule(struct rq *rq) +{ + /* + * We detect this state here so that we can avoid taking the RQ + * lock again later if there is no need to push + */ + rq->post_schedule = has_pushable_tasks(rq); +} + static void enqueue_pushable_task(struct rq *rq, struct task_struct *p) { plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks); @@ -349,8 +391,24 @@ void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) { } +static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev) +{ + return false; +} + +static inline int pull_rt_task(struct rq *this_rq) +{ + return 0; +} + +static inline void set_post_schedule(struct rq *rq) +{ +} #endif /* CONFIG_SMP */ +static void enqueue_top_rt_rq(struct rt_rq *rt_rq); +static void dequeue_top_rt_rq(struct rt_rq *rt_rq); + static inline int on_rt_rq(struct sched_rt_entity *rt_se) { return !list_empty(&rt_se->run_list); @@ -391,20 +449,6 @@ static inline struct task_group *next_task_group(struct task_group *tg) (iter = next_task_group(iter)) && \ (rt_rq = iter->rt_rq[cpu_of(rq)]);) -static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq) -{ - list_add_rcu(&rt_rq->leaf_rt_rq_list, - &rq_of_rt_rq(rt_rq)->leaf_rt_rq_list); -} - -static inline void list_del_leaf_rt_rq(struct rt_rq *rt_rq) -{ - list_del_rcu(&rt_rq->leaf_rt_rq_list); -} - -#define for_each_leaf_rt_rq(rt_rq, rq) \ - list_for_each_entry_rcu(rt_rq, &rq->leaf_rt_rq_list, leaf_rt_rq_list) - #define for_each_sched_rt_entity(rt_se) \ for (; rt_se; rt_se = rt_se->parent) @@ -426,8 +470,11 @@ static void sched_rt_rq_enqueue(struct rt_rq *rt_rq) rt_se = rt_rq->tg->rt_se[cpu]; if (rt_rq->rt_nr_running) { - if (rt_se && !on_rt_rq(rt_se)) + if (!rt_se) + enqueue_top_rt_rq(rt_rq); + else if (!on_rt_rq(rt_se)) enqueue_rt_entity(rt_se, false); + if (rt_rq->highest_prio.curr < curr->prio) resched_task(curr); } @@ -440,7 +487,9 @@ static void sched_rt_rq_dequeue(struct rt_rq *rt_rq) rt_se = rt_rq->tg->rt_se[cpu]; - if (rt_se && on_rt_rq(rt_se)) + if (!rt_se) + dequeue_top_rt_rq(rt_rq); + else if (on_rt_rq(rt_se)) dequeue_rt_entity(rt_se); } @@ -464,7 +513,7 @@ static int rt_se_boosted(struct sched_rt_entity *rt_se) #ifdef CONFIG_SMP static inline const struct cpumask *sched_rt_period_mask(void) { - return cpu_rq(smp_processor_id())->rd->span; + return this_rq()->rd->span; } #else static inline const struct cpumask *sched_rt_period_mask(void) @@ -501,17 +550,6 @@ typedef struct rt_rq *rt_rq_iter_t; #define for_each_rt_rq(rt_rq, iter, rq) \ for ((void) iter, rt_rq = &rq->rt; rt_rq; rt_rq = NULL) -static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq) -{ -} - -static inline void list_del_leaf_rt_rq(struct rt_rq *rt_rq) -{ -} - -#define for_each_leaf_rt_rq(rt_rq, rq) \ - for (rt_rq = &rq->rt; rt_rq; rt_rq = NULL) - #define for_each_sched_rt_entity(rt_se) \ for (; rt_se; rt_se = NULL) @@ -522,12 +560,18 @@ static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se) static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq) { - if (rt_rq->rt_nr_running) - resched_task(rq_of_rt_rq(rt_rq)->curr); + struct rq *rq = rq_of_rt_rq(rt_rq); + + if (!rt_rq->rt_nr_running) + return; + + enqueue_top_rt_rq(rt_rq); + resched_task(rq->curr); } static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq) { + dequeue_top_rt_rq(rt_rq); } static inline int rt_rq_throttled(struct rt_rq *rt_rq) @@ -553,6 +597,14 @@ static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq) #endif /* CONFIG_RT_GROUP_SCHED */ +bool sched_rt_bandwidth_account(struct rt_rq *rt_rq) +{ + struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); + + return (hrtimer_active(&rt_b->rt_period_timer) || + rt_rq->rt_time < rt_b->rt_runtime); +} + #ifdef CONFIG_SMP /* * We ran out of runtime, see if we can borrow some from our neighbours. @@ -560,7 +612,7 @@ static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq) static int do_balance_runtime(struct rt_rq *rt_rq) { struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); - struct root_domain *rd = cpu_rq(smp_processor_id())->rd; + struct root_domain *rd = rq_of_rt_rq(rt_rq)->rd; int i, weight, more = 0; u64 rt_period; @@ -685,20 +737,12 @@ balanced: * runtime - in which case borrowing doesn't make sense. */ rt_rq->rt_runtime = RUNTIME_INF; + rt_rq->rt_throttled = 0; raw_spin_unlock(&rt_rq->rt_runtime_lock); raw_spin_unlock(&rt_b->rt_runtime_lock); } } -static void disable_runtime(struct rq *rq) -{ - unsigned long flags; - - raw_spin_lock_irqsave(&rq->lock, flags); - __disable_runtime(rq); - raw_spin_unlock_irqrestore(&rq->lock, flags); -} - static void __enable_runtime(struct rq *rq) { rt_rq_iter_t iter; @@ -723,37 +767,6 @@ static void __enable_runtime(struct rq *rq) } } -static void enable_runtime(struct rq *rq) -{ - unsigned long flags; - - raw_spin_lock_irqsave(&rq->lock, flags); - __enable_runtime(rq); - raw_spin_unlock_irqrestore(&rq->lock, flags); -} - -int update_runtime(struct notifier_block *nfb, unsigned long action, void *hcpu) -{ - int cpu = (int)(long)hcpu; - - switch (action) { - case CPU_DOWN_PREPARE: - case CPU_DOWN_PREPARE_FROZEN: - disable_runtime(cpu_rq(cpu)); - return NOTIFY_OK; - - case CPU_DOWN_FAILED: - case CPU_DOWN_FAILED_FROZEN: - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - enable_runtime(cpu_rq(cpu)); - return NOTIFY_OK; - - default: - return NOTIFY_DONE; - } -} - static int balance_runtime(struct rt_rq *rt_rq) { int more = 0; @@ -778,13 +791,23 @@ static inline int balance_runtime(struct rt_rq *rt_rq) static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) { - int i, idle = 1; + int i, idle = 1, throttled = 0; const struct cpumask *span; - if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF) - return 1; - span = sched_rt_period_mask(); +#ifdef CONFIG_RT_GROUP_SCHED + /* + * FIXME: isolated CPUs should really leave the root task group, + * whether they are isolcpus or were isolated via cpusets, lest + * the timer run on a CPU which does not service all runqueues, + * potentially leaving other CPUs indefinitely throttled. If + * isolation is really required, the user will turn the throttle + * off to kill the perturbations it causes anyway. Meanwhile, + * this maintains functionality for boot and/or troubleshooting. + */ + if (rt_b == &root_task_group.rt_bandwidth) + span = cpu_online_mask; +#endif for_each_cpu(i, span) { int enqueue = 0; struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i); @@ -818,12 +841,17 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) if (!rt_rq_throttled(rt_rq)) enqueue = 1; } + if (rt_rq->rt_throttled) + throttled = 1; if (enqueue) sched_rt_rq_enqueue(rt_rq); raw_spin_unlock(&rq->lock); } + if (!throttled && (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)) + return 1; + return idle; } @@ -855,8 +883,24 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) return 0; if (rt_rq->rt_time > runtime) { - rt_rq->rt_throttled = 1; - printk_once(KERN_WARNING "sched: RT throttling activated\n"); + struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); + + /* + * Don't actually throttle groups that have no runtime assigned + * but accrue some time due to boosting. + */ + if (likely(rt_b->rt_runtime)) { + rt_rq->rt_throttled = 1; + printk_deferred_once("sched: RT throttling activated\n"); + } else { + /* + * In case we did anyway, make it go away, + * replenishment is a joke, since it will replenish us + * with exactly 0 ns. + */ + rt_rq->rt_time = 0; + } + if (rt_rq_throttled(rt_rq)) { sched_rt_rq_dequeue(rt_rq); return 1; @@ -874,22 +918,22 @@ static void update_curr_rt(struct rq *rq) { struct task_struct *curr = rq->curr; struct sched_rt_entity *rt_se = &curr->rt; - struct rt_rq *rt_rq = rt_rq_of_se(rt_se); u64 delta_exec; if (curr->sched_class != &rt_sched_class) return; - delta_exec = rq->clock_task - curr->se.exec_start; - if (unlikely((s64)delta_exec < 0)) - delta_exec = 0; + delta_exec = rq_clock_task(rq) - curr->se.exec_start; + if (unlikely((s64)delta_exec <= 0)) + return; - schedstat_set(curr->se.statistics.exec_max, max(curr->se.statistics.exec_max, delta_exec)); + schedstat_set(curr->se.statistics.exec_max, + max(curr->se.statistics.exec_max, delta_exec)); curr->se.sum_exec_runtime += delta_exec; account_group_exec_runtime(curr, delta_exec); - curr->se.exec_start = rq->clock_task; + curr->se.exec_start = rq_clock_task(rq); cpuacct_charge(curr, delta_exec); sched_rt_avg_update(rq, delta_exec); @@ -898,7 +942,7 @@ static void update_curr_rt(struct rq *rq) return; for_each_sched_rt_entity(rt_se) { - rt_rq = rt_rq_of_se(rt_se); + struct rt_rq *rt_rq = rt_rq_of_se(rt_se); if (sched_rt_runtime(rt_rq) != RUNTIME_INF) { raw_spin_lock(&rt_rq->rt_runtime_lock); @@ -910,6 +954,38 @@ static void update_curr_rt(struct rq *rq) } } +static void +dequeue_top_rt_rq(struct rt_rq *rt_rq) +{ + struct rq *rq = rq_of_rt_rq(rt_rq); + + BUG_ON(&rq->rt != rt_rq); + + if (!rt_rq->rt_queued) + return; + + BUG_ON(!rq->nr_running); + + sub_nr_running(rq, rt_rq->rt_nr_running); + rt_rq->rt_queued = 0; +} + +static void +enqueue_top_rt_rq(struct rt_rq *rt_rq) +{ + struct rq *rq = rq_of_rt_rq(rt_rq); + + BUG_ON(&rq->rt != rt_rq); + + if (rt_rq->rt_queued) + return; + if (rt_rq_throttled(rt_rq) || !rt_rq->rt_nr_running) + return; + + add_nr_running(rq, rt_rq->rt_nr_running); + rt_rq->rt_queued = 1; +} + #if defined CONFIG_SMP static void @@ -917,6 +993,13 @@ inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) { struct rq *rq = rq_of_rt_rq(rt_rq); +#ifdef CONFIG_RT_GROUP_SCHED + /* + * Change rq's cpupri only if rt_rq is the top queue. + */ + if (&rq->rt != rt_rq) + return; +#endif if (rq->online && prio < prev_prio) cpupri_set(&rq->rd->cpupri, rq->cpu, prio); } @@ -926,6 +1009,13 @@ dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) { struct rq *rq = rq_of_rt_rq(rt_rq); +#ifdef CONFIG_RT_GROUP_SCHED + /* + * Change rq's cpupri only if rt_rq is the top queue. + */ + if (&rq->rt != rt_rq) + return; +#endif if (rq->online && rt_rq->highest_prio.curr != prev_prio) cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio.curr); } @@ -1019,12 +1109,23 @@ void dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) {} #endif /* CONFIG_RT_GROUP_SCHED */ static inline +unsigned int rt_se_nr_running(struct sched_rt_entity *rt_se) +{ + struct rt_rq *group_rq = group_rt_rq(rt_se); + + if (group_rq) + return group_rq->rt_nr_running; + else + return 1; +} + +static inline void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) { int prio = rt_se_prio(rt_se); WARN_ON(!rt_prio(prio)); - rt_rq->rt_nr_running++; + rt_rq->rt_nr_running += rt_se_nr_running(rt_se); inc_rt_prio(rt_rq, prio); inc_rt_migration(rt_se, rt_rq); @@ -1036,7 +1137,7 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) { WARN_ON(!rt_prio(rt_se_prio(rt_se))); WARN_ON(!rt_rq->rt_nr_running); - rt_rq->rt_nr_running--; + rt_rq->rt_nr_running -= rt_se_nr_running(rt_se); dec_rt_prio(rt_rq, rt_se_prio(rt_se)); dec_rt_migration(rt_se, rt_rq); @@ -1059,9 +1160,6 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head) if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) return; - if (!rt_rq->rt_nr_running) - list_add_leaf_rt_rq(rt_rq); - if (head) list_add(&rt_se->run_list, queue); else @@ -1081,8 +1179,6 @@ static void __dequeue_rt_entity(struct sched_rt_entity *rt_se) __clear_bit(rt_se_prio(rt_se), array->bitmap); dec_rt_tasks(rt_se, rt_rq); - if (!rt_rq->rt_nr_running) - list_del_leaf_rt_rq(rt_rq); } /* @@ -1098,6 +1194,8 @@ static void dequeue_rt_stack(struct sched_rt_entity *rt_se) back = rt_se; } + dequeue_top_rt_rq(rt_rq_of_se(back)); + for (rt_se = back; rt_se; rt_se = rt_se->back) { if (on_rt_rq(rt_se)) __dequeue_rt_entity(rt_se); @@ -1106,13 +1204,18 @@ static void dequeue_rt_stack(struct sched_rt_entity *rt_se) static void enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head) { + struct rq *rq = rq_of_rt_se(rt_se); + dequeue_rt_stack(rt_se); for_each_sched_rt_entity(rt_se) __enqueue_rt_entity(rt_se, head); + enqueue_top_rt_rq(&rq->rt); } static void dequeue_rt_entity(struct sched_rt_entity *rt_se) { + struct rq *rq = rq_of_rt_se(rt_se); + dequeue_rt_stack(rt_se); for_each_sched_rt_entity(rt_se) { @@ -1121,6 +1224,7 @@ static void dequeue_rt_entity(struct sched_rt_entity *rt_se) if (rt_rq && rt_rq->rt_nr_running) __enqueue_rt_entity(rt_se, false); } + enqueue_top_rt_rq(&rq->rt); } /* @@ -1136,10 +1240,8 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags) enqueue_rt_entity(rt_se, flags & ENQUEUE_HEAD); - if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1) + if (!task_current(rq, p) && p->nr_cpus_allowed > 1) enqueue_pushable_task(rq, p); - - inc_nr_running(rq); } static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags) @@ -1150,8 +1252,6 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags) dequeue_rt_entity(rt_se); dequeue_pushable_task(rq, p); - - dec_nr_running(rq); } /* @@ -1192,15 +1292,12 @@ static void yield_task_rt(struct rq *rq) static int find_lowest_rq(struct task_struct *task); static int -select_task_rq_rt(struct task_struct *p, int sd_flag, int flags) +select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags) { struct task_struct *curr; struct rq *rq; - int cpu; - - cpu = task_cpu(p); - if (p->rt.nr_cpus_allowed == 1) + if (p->nr_cpus_allowed == 1) goto out; /* For anything but wake ups, just return the task_cpu */ @@ -1235,9 +1332,8 @@ select_task_rq_rt(struct task_struct *p, int sd_flag, int flags) * will have to sort it out. */ if (curr && unlikely(rt_task(curr)) && - (curr->rt.nr_cpus_allowed < 2 || - curr->prio <= p->prio) && - (p->rt.nr_cpus_allowed > 1)) { + (curr->nr_cpus_allowed < 2 || + curr->prio <= p->prio)) { int target = find_lowest_rq(p); if (target != -1) @@ -1251,10 +1347,10 @@ out: static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) { - if (rq->curr->rt.nr_cpus_allowed == 1) + if (rq->curr->nr_cpus_allowed == 1) return; - if (p->rt.nr_cpus_allowed != 1 + if (p->nr_cpus_allowed != 1 && cpupri_find(&rq->rd->cpupri, p, NULL)) return; @@ -1321,15 +1417,7 @@ static struct task_struct *_pick_next_task_rt(struct rq *rq) { struct sched_rt_entity *rt_se; struct task_struct *p; - struct rt_rq *rt_rq; - - rt_rq = &rq->rt; - - if (!rt_rq->rt_nr_running) - return NULL; - - if (rt_rq_throttled(rt_rq)) - return NULL; + struct rt_rq *rt_rq = &rq->rt; do { rt_se = pick_next_rt_entity(rq, rt_rq); @@ -1338,26 +1426,48 @@ static struct task_struct *_pick_next_task_rt(struct rq *rq) } while (rt_rq); p = rt_task_of(rt_se); - p->se.exec_start = rq->clock_task; + p->se.exec_start = rq_clock_task(rq); return p; } -static struct task_struct *pick_next_task_rt(struct rq *rq) +static struct task_struct * +pick_next_task_rt(struct rq *rq, struct task_struct *prev) { - struct task_struct *p = _pick_next_task_rt(rq); + struct task_struct *p; + struct rt_rq *rt_rq = &rq->rt; + + if (need_pull_rt_task(rq, prev)) { + pull_rt_task(rq); + /* + * pull_rt_task() can drop (and re-acquire) rq->lock; this + * means a dl or stop task can slip in, in which case we need + * to re-start task selection. + */ + if (unlikely((rq->stop && rq->stop->on_rq) || + rq->dl.dl_nr_running)) + return RETRY_TASK; + } + + /* + * We may dequeue prev's rt_rq in put_prev_task(). + * So, we update time before rt_nr_running check. + */ + if (prev->sched_class == &rt_sched_class) + update_curr_rt(rq); + + if (!rt_rq->rt_queued) + return NULL; + + put_prev_task(rq, prev); + + p = _pick_next_task_rt(rq); /* The running task is never eligible for pushing */ if (p) dequeue_pushable_task(rq, p); -#ifdef CONFIG_SMP - /* - * We detect this state here so that we can avoid taking the RQ - * lock again later if there is no need to push - */ - rq->post_schedule = has_pushable_tasks(rq); -#endif + set_post_schedule(rq); return p; } @@ -1370,7 +1480,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p) * The previous task needs to be made eligible for pushing * if it is still active */ - if (on_rt_rq(&p->rt) && p->rt.nr_cpus_allowed > 1) + if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1) enqueue_pushable_task(rq, p); } @@ -1382,48 +1492,29 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p) static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) { if (!task_running(rq, p) && - (cpu < 0 || cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) && - (p->rt.nr_cpus_allowed > 1)) + cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) return 1; return 0; } -/* Return the second highest RT task, NULL otherwise */ -static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu) +/* + * Return the highest pushable rq's task, which is suitable to be executed + * on the cpu, NULL otherwise + */ +static struct task_struct *pick_highest_pushable_task(struct rq *rq, int cpu) { - struct task_struct *next = NULL; - struct sched_rt_entity *rt_se; - struct rt_prio_array *array; - struct rt_rq *rt_rq; - int idx; - - for_each_leaf_rt_rq(rt_rq, rq) { - array = &rt_rq->active; - idx = sched_find_first_bit(array->bitmap); -next_idx: - if (idx >= MAX_RT_PRIO) - continue; - if (next && next->prio < idx) - continue; - list_for_each_entry(rt_se, array->queue + idx, run_list) { - struct task_struct *p; + struct plist_head *head = &rq->rt.pushable_tasks; + struct task_struct *p; - if (!rt_entity_is_task(rt_se)) - continue; + if (!has_pushable_tasks(rq)) + return NULL; - p = rt_task_of(rt_se); - if (pick_rt_task(rq, p, cpu)) { - next = p; - break; - } - } - if (!next) { - idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx+1); - goto next_idx; - } + plist_for_each_entry(p, head, pushable_tasks) { + if (pick_rt_task(rq, p, cpu)) + return p; } - return next; + return NULL; } static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask); @@ -1439,7 +1530,7 @@ static int find_lowest_rq(struct task_struct *task) if (unlikely(!lowest_mask)) return -1; - if (task->rt.nr_cpus_allowed == 1) + if (task->nr_cpus_allowed == 1) return -1; /* No other targets possible */ if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask)) @@ -1531,7 +1622,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq) task_running(rq, task) || !task->on_rq)) { - raw_spin_unlock(&lowest_rq->lock); + double_unlock_balance(rq, lowest_rq); lowest_rq = NULL; break; } @@ -1561,7 +1652,7 @@ static struct task_struct *pick_next_pushable_task(struct rq *rq) BUG_ON(rq->cpu != task_cpu(p)); BUG_ON(task_current(rq, p)); - BUG_ON(p->rt.nr_cpus_allowed <= 1); + BUG_ON(p->nr_cpus_allowed <= 1); BUG_ON(!p->on_rq); BUG_ON(!rt_task(p)); @@ -1587,11 +1678,6 @@ static int push_rt_task(struct rq *rq) if (!next_task) return 0; -#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW - if (unlikely(task_running(rq, next_task))) - return 0; -#endif - retry: if (unlikely(next_task == rq->curr)) { WARN_ON(1); @@ -1677,6 +1763,12 @@ static int pull_rt_task(struct rq *this_rq) if (likely(!rt_overloaded(this_rq))) return 0; + /* + * Match the barrier from rt_set_overloaded; this guarantees that if we + * see overloaded we must also see the rto_mask bit. + */ + smp_rmb(); + for_each_cpu(cpu, this_rq->rd->rto_mask) { if (this_cpu == cpu) continue; @@ -1702,12 +1794,10 @@ static int pull_rt_task(struct rq *this_rq) double_lock_balance(this_rq, src_rq); /* - * Are there still pullable RT tasks? + * We can pull only a task, which is pushable + * on its rq, and no others. */ - if (src_rq->rt.rt_nr_running <= 1) - goto skip; - - p = pick_next_highest_task_rt(src_rq, this_cpu); + p = pick_highest_pushable_task(src_rq, this_cpu); /* * Do we have an RT task that preempts @@ -1747,13 +1837,6 @@ skip: return ret; } -static void pre_schedule_rt(struct rq *rq, struct task_struct *prev) -{ - /* Try to pull RT tasks here if we lower this rq's prio */ - if (rq->rt.highest_prio.curr > prev->prio) - pull_rt_task(rq); -} - static void post_schedule_rt(struct rq *rq) { push_rt_tasks(rq); @@ -1768,9 +1851,9 @@ static void task_woken_rt(struct rq *rq, struct task_struct *p) if (!task_running(rq, p) && !test_tsk_need_resched(rq->curr) && has_pushable_tasks(rq) && - p->rt.nr_cpus_allowed > 1 && - rt_task(rq->curr) && - (rq->curr->rt.nr_cpus_allowed < 2 || + p->nr_cpus_allowed > 1 && + (dl_task(rq->curr) || rt_task(rq->curr)) && + (rq->curr->nr_cpus_allowed < 2 || rq->curr->prio <= p->prio)) push_rt_tasks(rq); } @@ -1778,44 +1861,40 @@ static void task_woken_rt(struct rq *rq, struct task_struct *p) static void set_cpus_allowed_rt(struct task_struct *p, const struct cpumask *new_mask) { - int weight = cpumask_weight(new_mask); + struct rq *rq; + int weight; BUG_ON(!rt_task(p)); - /* - * Update the migration status of the RQ if we have an RT task - * which is running AND changing its weight value. - */ - if (p->on_rq && (weight != p->rt.nr_cpus_allowed)) { - struct rq *rq = task_rq(p); - - if (!task_current(rq, p)) { - /* - * Make sure we dequeue this task from the pushable list - * before going further. It will either remain off of - * the list because we are no longer pushable, or it - * will be requeued. - */ - if (p->rt.nr_cpus_allowed > 1) - dequeue_pushable_task(rq, p); + if (!p->on_rq) + return; - /* - * Requeue if our weight is changing and still > 1 - */ - if (weight > 1) - enqueue_pushable_task(rq, p); + weight = cpumask_weight(new_mask); - } + /* + * Only update if the process changes its state from whether it + * can migrate or not. + */ + if ((p->nr_cpus_allowed > 1) == (weight > 1)) + return; - if ((p->rt.nr_cpus_allowed <= 1) && (weight > 1)) { - rq->rt.rt_nr_migratory++; - } else if ((p->rt.nr_cpus_allowed > 1) && (weight <= 1)) { - BUG_ON(!rq->rt.rt_nr_migratory); - rq->rt.rt_nr_migratory--; - } + rq = task_rq(p); - update_rt_migration(&rq->rt); + /* + * The process used to be able to migrate OR it can now migrate + */ + if (weight <= 1) { + if (!task_current(rq, p)) + dequeue_pushable_task(rq, p); + BUG_ON(!rq->rt.rt_nr_migratory); + rq->rt.rt_nr_migratory--; + } else { + if (!task_current(rq, p)) + enqueue_pushable_task(rq, p); + rq->rt.rt_nr_migratory++; } + + update_rt_migration(&rq->rt); } /* Assumes rq->lock is held */ @@ -1853,11 +1932,14 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p) * we may need to handle the pulling of RT tasks * now. */ - if (p->on_rq && !rq->rt.rt_nr_running) - pull_rt_task(rq); + if (!p->on_rq || rq->rt.rt_nr_running) + return; + + if (pull_rt_task(rq)) + resched_task(rq->curr); } -void init_sched_rt_class(void) +void __init init_sched_rt_class(void) { unsigned int i; @@ -1886,9 +1968,9 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p) */ if (p->on_rq && rq->curr != p) { #ifdef CONFIG_SMP - if (rq->rt.overloaded && push_rt_task(rq) && + if (p->nr_cpus_allowed > 1 && rq->rt.overloaded && /* Don't resched if we changed runqueues */ - rq != task_rq(p)) + push_rt_task(rq) && rq != task_rq(p)) check_resched = 0; #endif /* CONFIG_SMP */ if (check_resched && p->prio < rq->curr->prio) @@ -1949,7 +2031,11 @@ static void watchdog(struct rq *rq, struct task_struct *p) if (soft != RLIM_INFINITY) { unsigned long next; - p->rt.timeout++; + if (p->rt.watchdog_stamp != jiffies) { + p->rt.timeout++; + p->rt.watchdog_stamp = jiffies; + } + next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ); if (p->rt.timeout > next) p->cputime_expires.sched_exp = p->se.sum_exec_runtime; @@ -1958,6 +2044,8 @@ static void watchdog(struct rq *rq, struct task_struct *p) static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued) { + struct sched_rt_entity *rt_se = &p->rt; + update_curr_rt(rq); watchdog(rq, p); @@ -1972,15 +2060,18 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued) if (--p->rt.time_slice) return; - p->rt.time_slice = DEF_TIMESLICE; + p->rt.time_slice = sched_rr_timeslice; /* - * Requeue to the end of queue if we are not the only element - * on the queue: + * Requeue to the end of queue if we (and all of our ancestors) are not + * the only element on the queue */ - if (p->rt.run_list.prev != p->rt.run_list.next) { - requeue_task_rt(rq, p, 0); - set_tsk_need_resched(p); + for_each_sched_rt_entity(rt_se) { + if (rt_se->run_list.prev != rt_se->run_list.next) { + requeue_task_rt(rq, p, 0); + set_tsk_need_resched(p); + return; + } } } @@ -1988,7 +2079,7 @@ static void set_curr_task_rt(struct rq *rq) { struct task_struct *p = rq->curr; - p->se.exec_start = rq->clock_task; + p->se.exec_start = rq_clock_task(rq); /* The running task is never eligible for pushing */ dequeue_pushable_task(rq, p); @@ -2000,7 +2091,7 @@ static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task) * Time slice is 0 for SCHED_FIFO tasks */ if (task->policy == SCHED_RR) - return DEF_TIMESLICE; + return sched_rr_timeslice; else return 0; } @@ -2022,7 +2113,6 @@ const struct sched_class rt_sched_class = { .set_cpus_allowed = set_cpus_allowed_rt, .rq_online = rq_online_rt, .rq_offline = rq_offline_rt, - .pre_schedule = pre_schedule_rt, .post_schedule = post_schedule_rt, .task_woken = task_woken_rt, .switched_from = switched_from_rt, |
