From a1ba4d8ba9f06a397e97cbd67a93ee306860b40a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 1 Apr 2009 18:40:15 +0200 Subject: sched_rt: Fix overload bug on rt group scheduling Fixes an easily triggerable BUG() when setting process affinities. Make sure to count the number of migratable tasks in the same place: the root rt_rq. Otherwise the number doesn't make sense and we'll hit the BUG in set_cpus_allowed_rt(). Also, make sure we only count tasks, not groups (this is probably already taken care of by the fact that rt_se->nr_cpus_allowed will be 0 for groups, but be more explicit) Tested-by: Thomas Gleixner CC: stable@kernel.org Signed-off-by: Peter Zijlstra Acked-by: Gregory Haskins LKML-Reference: <1247067476.9777.57.camel@twins> Signed-off-by: Ingo Molnar --- kernel/sched.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index 7c9098d186e..a17f3d9a8bf 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -493,6 +493,7 @@ struct rt_rq { #endif #ifdef CONFIG_SMP unsigned long rt_nr_migratory; + unsigned long rt_nr_total; int overloaded; struct plist_head pushable_tasks; #endif -- cgit v1.2.3-18-g5258 From 7793527b90d9418211f4fe8464cc1dcb1631ea1b Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 9 Jul 2009 13:57:20 +0200 Subject: sched: Reset sched stats on fork() The sched_stat fields are currently not reset upon fork. Ingo's recent commit 6c594c21fcb02c662f11c97be4d7d2b73060a205 did reset nr_migrations, but it didn't reset any of the others. This patch resets all sched_stat fields on fork. Signed-off-by: Lucas De Marchi Signed-off-by: Peter Zijlstra LKML-Reference: <193b0f820907090457s7a3662f4gcdecdc22fcae857b@mail.gmail.com> Signed-off-by: Ingo Molnar --- kernel/sched.c | 40 +++++++++++++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 9 deletions(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index a17f3d9a8bf..c4549bd7e17 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2572,15 +2572,37 @@ static void __sched_fork(struct task_struct *p) p->se.avg_wakeup = sysctl_sched_wakeup_granularity; #ifdef CONFIG_SCHEDSTATS - p->se.wait_start = 0; - p->se.sum_sleep_runtime = 0; - p->se.sleep_start = 0; - p->se.block_start = 0; - p->se.sleep_max = 0; - p->se.block_max = 0; - p->se.exec_max = 0; - p->se.slice_max = 0; - p->se.wait_max = 0; + p->se.wait_start = 0; + p->se.wait_max = 0; + p->se.wait_count = 0; + p->se.wait_sum = 0; + + p->se.sleep_start = 0; + p->se.sleep_max = 0; + p->se.sum_sleep_runtime = 0; + + p->se.block_start = 0; + p->se.block_max = 0; + p->se.exec_max = 0; + p->se.slice_max = 0; + + p->se.nr_migrations_cold = 0; + p->se.nr_failed_migrations_affine = 0; + p->se.nr_failed_migrations_running = 0; + p->se.nr_failed_migrations_hot = 0; + p->se.nr_forced_migrations = 0; + p->se.nr_forced2_migrations = 0; + + p->se.nr_wakeups = 0; + p->se.nr_wakeups_sync = 0; + p->se.nr_wakeups_migrate = 0; + p->se.nr_wakeups_local = 0; + p->se.nr_wakeups_remote = 0; + p->se.nr_wakeups_affine = 0; + p->se.nr_wakeups_affine_attempts = 0; + p->se.nr_wakeups_passive = 0; + p->se.nr_wakeups_idle = 0; + #endif INIT_LIST_HEAD(&p->rt.run_list); -- cgit v1.2.3-18-g5258 From c20b08e3986c2dbfa6df1e880bf4f7159994d199 Mon Sep 17 00:00:00 2001 From: Fabio Checconi Date: Mon, 15 Jun 2009 20:56:38 +0200 Subject: sched: Fix rt_rq->pushable_tasks initialization in init_rt_rq() init_rt_rq() initializes only rq->rt.pushable_tasks, and not the pushable_tasks field of the passed rt_rq. The plist is not used uninitialized since the only pushable_tasks plists used are the ones of root rt_rqs; anyway reinitializing the list on every group creation corrupts the root plist, losing its previous contents. Signed-off-by: Fabio Checconi Signed-off-by: Peter Zijlstra LKML-Reference: <20090615185638.GK21741@gandalf.sssup.it> CC: Gregory Haskins Signed-off-by: Ingo Molnar --- kernel/sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index c4549bd7e17..efecfdad1b5 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -9093,7 +9093,7 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq) #ifdef CONFIG_SMP rt_rq->rt_nr_migratory = 0; rt_rq->overloaded = 0; - plist_head_init(&rq->rt.pushable_tasks, &rq->lock); + plist_head_init(&rt_rq->pushable_tasks, &rq->lock); #endif rt_rq->rt_time = 0; -- cgit v1.2.3-18-g5258 From d86ee4809d0329d4aa0d0f2c76c2295a16862799 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 10 Jul 2009 14:57:57 +0200 Subject: sched: optimize cond_resched() Optimize cond_resched() by removing one conditional. Currently cond_resched() checks system_state == SYSTEM_RUNNING in order to avoid scheduling before the scheduler is running. We can however, as per suggestion of Matt, use PREEMPT_ACTIVE to accomplish that very same. Suggested-by: Matt Mackall Signed-off-by: Peter Zijlstra Acked-by: Matt Mackall Signed-off-by: Linus Torvalds --- kernel/sched.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index 7c9098d186e..01f55ada359 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -6541,6 +6541,11 @@ SYSCALL_DEFINE0(sched_yield) return 0; } +static inline int should_resched(void) +{ + return need_resched() && !(preempt_count() & PREEMPT_ACTIVE); +} + static void __cond_resched(void) { #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP @@ -6560,8 +6565,7 @@ static void __cond_resched(void) int __sched _cond_resched(void) { - if (need_resched() && !(preempt_count() & PREEMPT_ACTIVE) && - system_state == SYSTEM_RUNNING) { + if (should_resched()) { __cond_resched(); return 1; } @@ -6579,12 +6583,12 @@ EXPORT_SYMBOL(_cond_resched); */ int cond_resched_lock(spinlock_t *lock) { - int resched = need_resched() && system_state == SYSTEM_RUNNING; + int resched = should_resched(); int ret = 0; if (spin_needbreak(lock) || resched) { spin_unlock(lock); - if (resched && need_resched()) + if (resched) __cond_resched(); else cpu_relax(); @@ -6599,7 +6603,7 @@ int __sched cond_resched_softirq(void) { BUG_ON(!in_softirq()); - if (need_resched() && system_state == SYSTEM_RUNNING) { + if (should_resched()) { local_bh_enable(); __cond_resched(); local_bh_disable(); -- cgit v1.2.3-18-g5258 From a468d389349a7560249b355cdb6d2097ea1616c9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 17 Jul 2009 14:15:46 +0200 Subject: sched: fix load average accounting vs. cpu hotplug The new load average code clears rq->calc_load_active on CPU_ONLINE. That's wrong as the new onlined CPU might have got a scheduler tick already and accounted the delta to the stale value of the time we offlined the CPU. Clear the value when we cleanup the dead CPU instead. Also move the update of the calc_load_update time for the newly online CPU to CPU_UP_PREPARE to avoid that the CPU plays catch up with the stale update time value. Signed-off-by: Thomas Gleixner --- kernel/sched.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index 98972d366fd..1b59e265273 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -7289,6 +7289,7 @@ static void migrate_dead_tasks(unsigned int dead_cpu) static void calc_global_load_remove(struct rq *rq) { atomic_long_sub(rq->calc_load_active, &calc_load_tasks); + rq->calc_load_active = 0; } #endif /* CONFIG_HOTPLUG_CPU */ @@ -7515,6 +7516,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) task_rq_unlock(rq, &flags); get_task_struct(p); cpu_rq(cpu)->migration_thread = p; + rq->calc_load_update = calc_load_update; break; case CPU_ONLINE: @@ -7525,8 +7527,6 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) /* Update our root-domain */ rq = cpu_rq(cpu); spin_lock_irqsave(&rq->lock, flags); - rq->calc_load_update = calc_load_update; - rq->calc_load_active = 0; if (rq->rd) { BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); -- cgit v1.2.3-18-g5258