From 424c93fe4cbe719e7fd7169248d2b648c493b68d Mon Sep 17 00:00:00 2001 From: Nathan Zimmer Date: Thu, 9 May 2013 11:24:03 -0500 Subject: sched: Use this_rq() helper It is a few instructions more efficent to and slightly more readable to use this_rq()-> instead of cpu_rq(smp_processor_id())-> . Size comparison of kernel/sched/fair.o: text data bss dec hex filename 27972 122 26 28120 6dd8 fair.o.before 27956 122 26 28104 6dc8 fair.o.after Signed-off-by: Nathan Zimmer Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1368116643-87971-1-git-send-email-nzimmer@sgi.com Signed-off-by: Ingo Molnar --- kernel/sched/rt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/sched/rt.c') diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 127a2c4cf4a..7aced2e3b08 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -472,7 +472,7 @@ static int rt_se_boosted(struct sched_rt_entity *rt_se) #ifdef CONFIG_SMP static inline const struct cpumask *sched_rt_period_mask(void) { - return cpu_rq(smp_processor_id())->rd->span; + return this_rq()->rd->span; } #else static inline const struct cpumask *sched_rt_period_mask(void) -- cgit v1.2.3-70-g09d2 From c5405a495e88d93cf9b4f4cc91507c7f4afcb901 Mon Sep 17 00:00:00 2001 From: Neil Zhang Date: Thu, 11 Apr 2013 21:04:59 +0800 Subject: sched: Remove redundant update_runtime notifier migration_call() will do all the things that update_runtime() does. So let's remove it. Furthermore, there is potential risk that the current code will catch BUG_ON at line 689 of rt.c when do cpu hotplug while there are realtime threads running because of enabling runtime twice while the rt_runtime may already changed. Signed-off-by: Neil Zhang Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1365685499-26515-1-git-send-email-zhangwm@marvell.com Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 3 --- kernel/sched/rt.c | 40 ---------------------------------------- kernel/sched/sched.h | 1 - 3 files changed, 44 deletions(-) (limited to 'kernel/sched/rt.c') diff --git a/kernel/sched/core.c b/kernel/sched/core.c index bfa7e77e0b5..79e48e6a938 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6285,9 +6285,6 @@ void __init sched_init_smp(void) hotcpu_notifier(cpuset_cpu_active, CPU_PRI_CPUSET_ACTIVE); hotcpu_notifier(cpuset_cpu_inactive, CPU_PRI_CPUSET_INACTIVE); - /* RT runtime code needs to handle some hotplug events */ - hotcpu_notifier(update_runtime, 0); - init_hrtick(); /* Move init over to a non-isolated CPU */ diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 7aced2e3b08..8853ab17b75 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -699,15 +699,6 @@ balanced: } } -static void disable_runtime(struct rq *rq) -{ - unsigned long flags; - - raw_spin_lock_irqsave(&rq->lock, flags); - __disable_runtime(rq); - raw_spin_unlock_irqrestore(&rq->lock, flags); -} - static void __enable_runtime(struct rq *rq) { rt_rq_iter_t iter; @@ -732,37 +723,6 @@ static void __enable_runtime(struct rq *rq) } } -static void enable_runtime(struct rq *rq) -{ - unsigned long flags; - - raw_spin_lock_irqsave(&rq->lock, flags); - __enable_runtime(rq); - raw_spin_unlock_irqrestore(&rq->lock, flags); -} - -int update_runtime(struct notifier_block *nfb, unsigned long action, void *hcpu) -{ - int cpu = (int)(long)hcpu; - - switch (action) { - case CPU_DOWN_PREPARE: - case CPU_DOWN_PREPARE_FROZEN: - disable_runtime(cpu_rq(cpu)); - return NOTIFY_OK; - - case CPU_DOWN_FAILED: - case CPU_DOWN_FAILED_FROZEN: - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - enable_runtime(cpu_rq(cpu)); - return NOTIFY_OK; - - default: - return NOTIFY_DONE; - } -} - static int balance_runtime(struct rt_rq *rt_rq) { int more = 0; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index f1f6256c122..c806c61a126 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1041,7 +1041,6 @@ static inline void idle_balance(int cpu, struct rq *rq) extern void sysrq_sched_debug_show(void); extern void sched_init_granularity(void); extern void update_max_interval(void); -extern int update_runtime(struct notifier_block *nfb, unsigned long action, void *hcpu); extern void init_sched_rt_class(void); extern void init_sched_fair_class(void); -- cgit v1.2.3-70-g09d2 From 78becc27097585c6aec7043834cadde950ae79f2 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 12 Apr 2013 01:51:02 +0200 Subject: sched: Use an accessor to read the rq clock Read the runqueue clock through an accessor. This prepares for adding a debugging infrastructure to detect missing or redundant calls to update_rq_clock() between a scheduler's entry and exit point. Signed-off-by: Frederic Weisbecker Cc: Li Zhong Cc: Steven Rostedt Cc: Paul Turner Cc: Mike Galbraith Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1365724262-20142-6-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 6 +++--- kernel/sched/fair.c | 44 ++++++++++++++++++++++---------------------- kernel/sched/rt.c | 8 ++++---- kernel/sched/sched.h | 10 ++++++++++ kernel/sched/stats.h | 8 ++++---- kernel/sched/stop_task.c | 8 ++++---- 6 files changed, 47 insertions(+), 37 deletions(-) (limited to 'kernel/sched/rt.c') diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 46d00172ae4..36f85be2932 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -667,7 +667,7 @@ void sched_avg_update(struct rq *rq) { s64 period = sched_avg_period(); - while ((s64)(rq->clock - rq->age_stamp) > period) { + while ((s64)(rq_clock(rq) - rq->age_stamp) > period) { /* * Inline assembly required to prevent the compiler * optimising this loop into a divmod call. @@ -1328,7 +1328,7 @@ ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags) p->sched_class->task_woken(rq, p); if (rq->idle_stamp) { - u64 delta = rq->clock - rq->idle_stamp; + u64 delta = rq_clock(rq) - rq->idle_stamp; u64 max = 2*sysctl_sched_migration_cost; if (delta > max) @@ -2106,7 +2106,7 @@ static u64 do_task_delta_exec(struct task_struct *p, struct rq *rq) if (task_current(rq, p)) { update_rq_clock(rq); - ns = rq->clock_task - p->se.exec_start; + ns = rq_clock_task(rq) - p->se.exec_start; if ((s64)ns < 0) ns = 0; } diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 1c8762a5370..3ee1c2e4ae6 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -704,7 +704,7 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr, static void update_curr(struct cfs_rq *cfs_rq) { struct sched_entity *curr = cfs_rq->curr; - u64 now = rq_of(cfs_rq)->clock_task; + u64 now = rq_clock_task(rq_of(cfs_rq)); unsigned long delta_exec; if (unlikely(!curr)) @@ -736,7 +736,7 @@ static void update_curr(struct cfs_rq *cfs_rq) static inline void update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se) { - schedstat_set(se->statistics.wait_start, rq_of(cfs_rq)->clock); + schedstat_set(se->statistics.wait_start, rq_clock(rq_of(cfs_rq))); } /* @@ -756,14 +756,14 @@ static void update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se) { schedstat_set(se->statistics.wait_max, max(se->statistics.wait_max, - rq_of(cfs_rq)->clock - se->statistics.wait_start)); + rq_clock(rq_of(cfs_rq)) - se->statistics.wait_start)); schedstat_set(se->statistics.wait_count, se->statistics.wait_count + 1); schedstat_set(se->statistics.wait_sum, se->statistics.wait_sum + - rq_of(cfs_rq)->clock - se->statistics.wait_start); + rq_clock(rq_of(cfs_rq)) - se->statistics.wait_start); #ifdef CONFIG_SCHEDSTATS if (entity_is_task(se)) { trace_sched_stat_wait(task_of(se), - rq_of(cfs_rq)->clock - se->statistics.wait_start); + rq_clock(rq_of(cfs_rq)) - se->statistics.wait_start); } #endif schedstat_set(se->statistics.wait_start, 0); @@ -789,7 +789,7 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se) /* * We are starting a new run period: */ - se->exec_start = rq_of(cfs_rq)->clock_task; + se->exec_start = rq_clock_task(rq_of(cfs_rq)); } /************************************************** @@ -1515,7 +1515,7 @@ static void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq, int force_update) static inline void update_rq_runnable_avg(struct rq *rq, int runnable) { - __update_entity_runnable_avg(rq->clock_task, &rq->avg, runnable); + __update_entity_runnable_avg(rq_clock_task(rq), &rq->avg, runnable); __update_tg_runnable_avg(&rq->avg, &rq->cfs); } @@ -1530,7 +1530,7 @@ static inline void enqueue_entity_load_avg(struct cfs_rq *cfs_rq, * accumulated while sleeping. */ if (unlikely(se->avg.decay_count <= 0)) { - se->avg.last_runnable_update = rq_of(cfs_rq)->clock_task; + se->avg.last_runnable_update = rq_clock_task(rq_of(cfs_rq)); if (se->avg.decay_count) { /* * In a wake-up migration we have to approximate the @@ -1625,7 +1625,7 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) tsk = task_of(se); if (se->statistics.sleep_start) { - u64 delta = rq_of(cfs_rq)->clock - se->statistics.sleep_start; + u64 delta = rq_clock(rq_of(cfs_rq)) - se->statistics.sleep_start; if ((s64)delta < 0) delta = 0; @@ -1642,7 +1642,7 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) } } if (se->statistics.block_start) { - u64 delta = rq_of(cfs_rq)->clock - se->statistics.block_start; + u64 delta = rq_clock(rq_of(cfs_rq)) - se->statistics.block_start; if ((s64)delta < 0) delta = 0; @@ -1823,9 +1823,9 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) struct task_struct *tsk = task_of(se); if (tsk->state & TASK_INTERRUPTIBLE) - se->statistics.sleep_start = rq_of(cfs_rq)->clock; + se->statistics.sleep_start = rq_clock(rq_of(cfs_rq)); if (tsk->state & TASK_UNINTERRUPTIBLE) - se->statistics.block_start = rq_of(cfs_rq)->clock; + se->statistics.block_start = rq_clock(rq_of(cfs_rq)); } #endif } @@ -2100,7 +2100,7 @@ static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq) if (unlikely(cfs_rq->throttle_count)) return cfs_rq->throttled_clock_task; - return rq_of(cfs_rq)->clock_task - cfs_rq->throttled_clock_task_time; + return rq_clock_task(rq_of(cfs_rq)) - cfs_rq->throttled_clock_task_time; } /* returns 0 on failure to allocate runtime */ @@ -2159,7 +2159,7 @@ static void expire_cfs_rq_runtime(struct cfs_rq *cfs_rq) struct rq *rq = rq_of(cfs_rq); /* if the deadline is ahead of our clock, nothing to do */ - if (likely((s64)(rq->clock - cfs_rq->runtime_expires) < 0)) + if (likely((s64)(rq_clock(rq_of(cfs_rq)) - cfs_rq->runtime_expires) < 0)) return; if (cfs_rq->runtime_remaining < 0) @@ -2248,7 +2248,7 @@ static int tg_unthrottle_up(struct task_group *tg, void *data) #ifdef CONFIG_SMP if (!cfs_rq->throttle_count) { /* adjust cfs_rq_clock_task() */ - cfs_rq->throttled_clock_task_time += rq->clock_task - + cfs_rq->throttled_clock_task_time += rq_clock_task(rq) - cfs_rq->throttled_clock_task; } #endif @@ -2263,7 +2263,7 @@ static int tg_throttle_down(struct task_group *tg, void *data) /* group is entering throttled state, stop time */ if (!cfs_rq->throttle_count) - cfs_rq->throttled_clock_task = rq->clock_task; + cfs_rq->throttled_clock_task = rq_clock_task(rq); cfs_rq->throttle_count++; return 0; @@ -2302,7 +2302,7 @@ static void throttle_cfs_rq(struct cfs_rq *cfs_rq) rq->nr_running -= task_delta; cfs_rq->throttled = 1; - cfs_rq->throttled_clock = rq->clock; + cfs_rq->throttled_clock = rq_clock(rq); raw_spin_lock(&cfs_b->lock); list_add_tail_rcu(&cfs_rq->throttled_list, &cfs_b->throttled_cfs_rq); raw_spin_unlock(&cfs_b->lock); @@ -2323,7 +2323,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) update_rq_clock(rq); raw_spin_lock(&cfs_b->lock); - cfs_b->throttled_time += rq->clock - cfs_rq->throttled_clock; + cfs_b->throttled_time += rq_clock(rq) - cfs_rq->throttled_clock; list_del_rcu(&cfs_rq->throttled_list); raw_spin_unlock(&cfs_b->lock); @@ -2726,7 +2726,7 @@ static void __maybe_unused unthrottle_offline_cfs_rqs(struct rq *rq) #else /* CONFIG_CFS_BANDWIDTH */ static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq) { - return rq_of(cfs_rq)->clock_task; + return rq_clock_task(rq_of(cfs_rq)); } static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, @@ -3966,7 +3966,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env) * 2) too many balance attempts have failed. */ - tsk_cache_hot = task_hot(p, env->src_rq->clock_task, env->sd); + tsk_cache_hot = task_hot(p, rq_clock_task(env->src_rq), env->sd); if (!tsk_cache_hot || env->sd->nr_balance_failed > env->sd->cache_nice_tries) { @@ -4322,7 +4322,7 @@ static unsigned long scale_rt_power(int cpu) age_stamp = ACCESS_ONCE(rq->age_stamp); avg = ACCESS_ONCE(rq->rt_avg); - total = sched_avg_period() + (rq->clock - age_stamp); + total = sched_avg_period() + (rq_clock(rq) - age_stamp); if (unlikely(total < avg)) { /* Ensures that power won't end up being negative */ @@ -5261,7 +5261,7 @@ void idle_balance(int this_cpu, struct rq *this_rq) int pulled_task = 0; unsigned long next_balance = jiffies + HZ; - this_rq->idle_stamp = this_rq->clock; + this_rq->idle_stamp = rq_clock(this_rq); if (this_rq->avg_idle < sysctl_sched_migration_cost) return; diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 8853ab17b75..8d85f9ac426 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -886,7 +886,7 @@ static void update_curr_rt(struct rq *rq) if (curr->sched_class != &rt_sched_class) return; - delta_exec = rq->clock_task - curr->se.exec_start; + delta_exec = rq_clock_task(rq) - curr->se.exec_start; if (unlikely((s64)delta_exec <= 0)) return; @@ -896,7 +896,7 @@ static void update_curr_rt(struct rq *rq) curr->se.sum_exec_runtime += delta_exec; account_group_exec_runtime(curr, delta_exec); - curr->se.exec_start = rq->clock_task; + curr->se.exec_start = rq_clock_task(rq); cpuacct_charge(curr, delta_exec); sched_rt_avg_update(rq, delta_exec); @@ -1345,7 +1345,7 @@ static struct task_struct *_pick_next_task_rt(struct rq *rq) } while (rt_rq); p = rt_task_of(rt_se); - p->se.exec_start = rq->clock_task; + p->se.exec_start = rq_clock_task(rq); return p; } @@ -1997,7 +1997,7 @@ static void set_curr_task_rt(struct rq *rq) { struct task_struct *p = rq->curr; - p->se.exec_start = rq->clock_task; + p->se.exec_start = rq_clock_task(rq); /* The running task is never eligible for pushing */ dequeue_pushable_task(rq, p); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index c806c61a126..74ff659e964 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -548,6 +548,16 @@ DECLARE_PER_CPU(struct rq, runqueues); #define cpu_curr(cpu) (cpu_rq(cpu)->curr) #define raw_rq() (&__raw_get_cpu_var(runqueues)) +static inline u64 rq_clock(struct rq *rq) +{ + return rq->clock; +} + +static inline u64 rq_clock_task(struct rq *rq) +{ + return rq->clock_task; +} + #ifdef CONFIG_SMP #define rcu_dereference_check_sched_domain(p) \ diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h index 2ef90a51ec5..17d7065c387 100644 --- a/kernel/sched/stats.h +++ b/kernel/sched/stats.h @@ -61,7 +61,7 @@ static inline void sched_info_reset_dequeued(struct task_struct *t) */ static inline void sched_info_dequeued(struct task_struct *t) { - unsigned long long now = task_rq(t)->clock, delta = 0; + unsigned long long now = rq_clock(task_rq(t)), delta = 0; if (unlikely(sched_info_on())) if (t->sched_info.last_queued) @@ -79,7 +79,7 @@ static inline void sched_info_dequeued(struct task_struct *t) */ static void sched_info_arrive(struct task_struct *t) { - unsigned long long now = task_rq(t)->clock, delta = 0; + unsigned long long now = rq_clock(task_rq(t)), delta = 0; if (t->sched_info.last_queued) delta = now - t->sched_info.last_queued; @@ -100,7 +100,7 @@ static inline void sched_info_queued(struct task_struct *t) { if (unlikely(sched_info_on())) if (!t->sched_info.last_queued) - t->sched_info.last_queued = task_rq(t)->clock; + t->sched_info.last_queued = rq_clock(task_rq(t)); } /* @@ -112,7 +112,7 @@ static inline void sched_info_queued(struct task_struct *t) */ static inline void sched_info_depart(struct task_struct *t) { - unsigned long long delta = task_rq(t)->clock - + unsigned long long delta = rq_clock(task_rq(t)) - t->sched_info.last_arrival; rq_sched_info_depart(task_rq(t), delta); diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c index da5eb5bed84..e08fbeeb54b 100644 --- a/kernel/sched/stop_task.c +++ b/kernel/sched/stop_task.c @@ -28,7 +28,7 @@ static struct task_struct *pick_next_task_stop(struct rq *rq) struct task_struct *stop = rq->stop; if (stop && stop->on_rq) { - stop->se.exec_start = rq->clock_task; + stop->se.exec_start = rq_clock_task(rq); return stop; } @@ -57,7 +57,7 @@ static void put_prev_task_stop(struct rq *rq, struct task_struct *prev) struct task_struct *curr = rq->curr; u64 delta_exec; - delta_exec = rq->clock_task - curr->se.exec_start; + delta_exec = rq_clock_task(rq) - curr->se.exec_start; if (unlikely((s64)delta_exec < 0)) delta_exec = 0; @@ -67,7 +67,7 @@ static void put_prev_task_stop(struct rq *rq, struct task_struct *prev) curr->se.sum_exec_runtime += delta_exec; account_group_exec_runtime(curr, delta_exec); - curr->se.exec_start = rq->clock_task; + curr->se.exec_start = rq_clock_task(rq); cpuacct_charge(curr, delta_exec); } @@ -79,7 +79,7 @@ static void set_curr_task_stop(struct rq *rq) { struct task_struct *stop = rq->stop; - stop->se.exec_start = rq->clock_task; + stop->se.exec_start = rq_clock_task(rq); } static void switched_to_stop(struct rq *rq, struct task_struct *p) -- cgit v1.2.3-70-g09d2 From e23ee74777f389369431d77390c4b09332ce026a Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Fri, 7 Jun 2013 15:37:43 -0400 Subject: sched/rt: Simplify pull_rt_task() logic and remove .leaf_rt_rq_list [ Peter, this is based off of some of my work, I ran it though a few tests and it passed. I also reviewed it, and added my SOB as I am somewhat a co-author to it. ] Based on the patch by Steven Rostedt from previous year: https://lkml.org/lkml/2012/4/18/517 1)Simplify pull_rt_task() logic: search in pushable tasks of dest runqueue. The only pullable tasks are the tasks which are pushable in their local rq, and no others. 2)Remove .leaf_rt_rq_list member of struct rt_rq and functions connected with it: nobody uses it since now. Signed-off-by: Kirill Tkhai Signed-off-by: Steven Rostedt Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/287571370557898@web7d.yandex.ru Signed-off-by: Ingo Molnar --- kernel/sched/rt.c | 82 ++++++++++------------------------------------------ kernel/sched/sched.h | 1 - 2 files changed, 16 insertions(+), 67 deletions(-) (limited to 'kernel/sched/rt.c') diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 8d85f9ac426..01970c8e64d 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -399,20 +399,6 @@ static inline struct task_group *next_task_group(struct task_group *tg) (iter = next_task_group(iter)) && \ (rt_rq = iter->rt_rq[cpu_of(rq)]);) -static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq) -{ - list_add_rcu(&rt_rq->leaf_rt_rq_list, - &rq_of_rt_rq(rt_rq)->leaf_rt_rq_list); -} - -static inline void list_del_leaf_rt_rq(struct rt_rq *rt_rq) -{ - list_del_rcu(&rt_rq->leaf_rt_rq_list); -} - -#define for_each_leaf_rt_rq(rt_rq, rq) \ - list_for_each_entry_rcu(rt_rq, &rq->leaf_rt_rq_list, leaf_rt_rq_list) - #define for_each_sched_rt_entity(rt_se) \ for (; rt_se; rt_se = rt_se->parent) @@ -509,17 +495,6 @@ typedef struct rt_rq *rt_rq_iter_t; #define for_each_rt_rq(rt_rq, iter, rq) \ for ((void) iter, rt_rq = &rq->rt; rt_rq; rt_rq = NULL) -static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq) -{ -} - -static inline void list_del_leaf_rt_rq(struct rt_rq *rt_rq) -{ -} - -#define for_each_leaf_rt_rq(rt_rq, rq) \ - for (rt_rq = &rq->rt; rt_rq; rt_rq = NULL) - #define for_each_sched_rt_entity(rt_se) \ for (; rt_se; rt_se = NULL) @@ -1066,9 +1041,6 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head) if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) return; - if (!rt_rq->rt_nr_running) - list_add_leaf_rt_rq(rt_rq); - if (head) list_add(&rt_se->run_list, queue); else @@ -1088,8 +1060,6 @@ static void __dequeue_rt_entity(struct sched_rt_entity *rt_se) __clear_bit(rt_se_prio(rt_se), array->bitmap); dec_rt_tasks(rt_se, rt_rq); - if (!rt_rq->rt_nr_running) - list_del_leaf_rt_rq(rt_rq); } /* @@ -1394,42 +1364,24 @@ static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) return 0; } -/* Return the second highest RT task, NULL otherwise */ -static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu) +/* + * Return the highest pushable rq's task, which is suitable to be executed + * on the cpu, NULL otherwise + */ +static struct task_struct *pick_highest_pushable_task(struct rq *rq, int cpu) { - struct task_struct *next = NULL; - struct sched_rt_entity *rt_se; - struct rt_prio_array *array; - struct rt_rq *rt_rq; - int idx; - - for_each_leaf_rt_rq(rt_rq, rq) { - array = &rt_rq->active; - idx = sched_find_first_bit(array->bitmap); -next_idx: - if (idx >= MAX_RT_PRIO) - continue; - if (next && next->prio <= idx) - continue; - list_for_each_entry(rt_se, array->queue + idx, run_list) { - struct task_struct *p; + struct plist_head *head = &rq->rt.pushable_tasks; + struct task_struct *p; - if (!rt_entity_is_task(rt_se)) - continue; + if (!has_pushable_tasks(rq)) + return NULL; - p = rt_task_of(rt_se); - if (pick_rt_task(rq, p, cpu)) { - next = p; - break; - } - } - if (!next) { - idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx+1); - goto next_idx; - } + plist_for_each_entry(p, head, pushable_tasks) { + if (pick_rt_task(rq, p, cpu)) + return p; } - return next; + return NULL; } static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask); @@ -1703,12 +1655,10 @@ static int pull_rt_task(struct rq *this_rq) double_lock_balance(this_rq, src_rq); /* - * Are there still pullable RT tasks? + * We can pull only a task, which is pushable + * on its rq, and no others. */ - if (src_rq->rt.rt_nr_running <= 1) - goto skip; - - p = pick_next_highest_task_rt(src_rq, this_cpu); + p = pick_highest_pushable_task(src_rq, this_cpu); /* * Do we have an RT task that preempts diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 74ff659e964..029601a6158 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -361,7 +361,6 @@ struct rt_rq { unsigned long rt_nr_boosted; struct rq *rq; - struct list_head leaf_rt_rq_list; struct task_group *tg; #endif }; -- cgit v1.2.3-70-g09d2