aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Lameter <clameter@sgi.com>2007-06-19 21:39:03 +0200
committerGreg Kroah-Hartman <gregkh@suse.de>2007-08-04 09:10:24 -0700
commitad2aa2d5cd8012db90cdbf32ec11e7fefab57cee (patch)
treef7b4347c64e44125a5544e2c563b3f2cdc47c97b
parentc1228d7339dff95df5ea093f174372be5c1268b7 (diff)
sched: fix next_interval determination in idle_balance()
Fix massive SMP imbalance on NUMA nodes observed on 2.6.21.5 with CFS. (and later on reproduced without CFS as well). The intervals of domains that do not have SD_BALANCE_NEWIDLE must be considered for the calculation of the time of the next balance. Otherwise we may defer rebalancing forever and nodes might stay idle for very long times. Siddha also spotted that the conversion of the balance interval to jiffies is missing. Fix that to. From: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com> also continue the loop if !(sd->flags & SD_LOAD_BALANCE). Tested-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> It did in fact trigger under all three of mainline, CFS, and -rt including CFS -- see below for a couple of emails from last Friday giving results for these three on the AMD box (where it happened) and on a single-quad NUMA-Q system (where it did not, at least not with such severity). Signed-off-by: Christoph Lameter <clameter@sgi.com> Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Chris Wright <chrisw@sous-sol.org> Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
-rw-r--r--kernel/sched.c22
1 files changed, 13 insertions, 9 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index a3993b9e750..f745a443713 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2831,17 +2831,21 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
unsigned long next_balance = jiffies + 60 * HZ;
for_each_domain(this_cpu, sd) {
- if (sd->flags & SD_BALANCE_NEWIDLE) {
+ unsigned long interval;
+
+ if (!(sd->flags & SD_LOAD_BALANCE))
+ continue;
+
+ if (sd->flags & SD_BALANCE_NEWIDLE)
/* If we've pulled tasks over stop searching: */
pulled_task = load_balance_newidle(this_cpu,
- this_rq, sd);
- if (time_after(next_balance,
- sd->last_balance + sd->balance_interval))
- next_balance = sd->last_balance
- + sd->balance_interval;
- if (pulled_task)
- break;
- }
+ this_rq, sd);
+
+ interval = msecs_to_jiffies(sd->balance_interval);
+ if (time_after(next_balance, sd->last_balance + interval))
+ next_balance = sd->last_balance + interval;
+ if (pulled_task)
+ break;
}
if (!pulled_task)
/*