aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2012-05-11 17:31:26 +0200
committerBen Hutchings <ben@decadent.org.uk>2012-08-02 14:37:38 +0100
commit1217fd87cfd53eab752837f19386e0a5594719d9 (patch)
tree50c6a3748c32de07750c9a2180516192b2562260
parent2b2c3e47f0a98261d5d6ab7f61420fb4e479b472 (diff)
sched/nohz: Fix rq->cpu_load[] calculations
commit 556061b00c9f2fd6a5524b6bde823ef12f299ecf upstream. While investigating why the load-balancer did funny I found that the rq->cpu_load[] tables were completely screwy.. a bit more digging revealed that the updates that got through were missing ticks followed by a catchup of 2 ticks. The catchup assumes the cpu was idle during that time (since only nohz can cause missed ticks and the machine is idle etc..) this means that esp. the higher indices were significantly lower than they ought to be. The reason for this is that its not correct to compare against jiffies on every jiffy on any other cpu than the cpu that updates jiffies. This patch cludges around it by only doing the catch-up stuff from nohz_idle_balance() and doing the regular stuff unconditionally from the tick. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: pjt@google.com Cc: Venkatesh Pallipadi <venki@google.com> Link: http://lkml.kernel.org/n/tip-tp4kj18xdd5aj4vvj0qg55s2@git.kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org> [bwh: Backported to 3.2: adjust filenames and context; keep functions static] Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
-rw-r--r--kernel/sched.c53
-rw-r--r--kernel/sched_fair.c2
2 files changed, 41 insertions, 14 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 52ac69b6d4c..a409d81b778 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1887,7 +1887,7 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2)
static void update_sysctl(void);
static int get_update_sysctl_factor(void);
-static void update_cpu_load(struct rq *this_rq);
+static void update_idle_cpu_load(struct rq *this_rq);
static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
{
@@ -3855,22 +3855,13 @@ decay_load_missed(unsigned long load, unsigned long missed_updates, int idx)
* scheduler tick (TICK_NSEC). With tickless idle this will not be called
* every tick. We fix it up based on jiffies.
*/
-static void update_cpu_load(struct rq *this_rq)
+static void __update_cpu_load(struct rq *this_rq, unsigned long this_load,
+ unsigned long pending_updates)
{
- unsigned long this_load = this_rq->load.weight;
- unsigned long curr_jiffies = jiffies;
- unsigned long pending_updates;
int i, scale;
this_rq->nr_load_updates++;
- /* Avoid repeated calls on same jiffy, when moving in and out of idle */
- if (curr_jiffies == this_rq->last_load_update_tick)
- return;
-
- pending_updates = curr_jiffies - this_rq->last_load_update_tick;
- this_rq->last_load_update_tick = curr_jiffies;
-
/* Update our load: */
this_rq->cpu_load[0] = this_load; /* Fasttrack for idx 0 */
for (i = 1, scale = 2; i < CPU_LOAD_IDX_MAX; i++, scale += scale) {
@@ -3895,9 +3886,45 @@ static void update_cpu_load(struct rq *this_rq)
sched_avg_update(this_rq);
}
+/*
+ * Called from nohz_idle_balance() to update the load ratings before doing the
+ * idle balance.
+ */
+static void update_idle_cpu_load(struct rq *this_rq)
+{
+ unsigned long curr_jiffies = jiffies;
+ unsigned long load = this_rq->load.weight;
+ unsigned long pending_updates;
+
+ /*
+ * Bloody broken means of dealing with nohz, but better than nothing..
+ * jiffies is updated by one cpu, another cpu can drift wrt the jiffy
+ * update and see 0 difference the one time and 2 the next, even though
+ * we ticked at roughtly the same rate.
+ *
+ * Hence we only use this from nohz_idle_balance() and skip this
+ * nonsense when called from the scheduler_tick() since that's
+ * guaranteed a stable rate.
+ */
+ if (load || curr_jiffies == this_rq->last_load_update_tick)
+ return;
+
+ pending_updates = curr_jiffies - this_rq->last_load_update_tick;
+ this_rq->last_load_update_tick = curr_jiffies;
+
+ __update_cpu_load(this_rq, load, pending_updates);
+}
+
+/*
+ * Called from scheduler_tick()
+ */
static void update_cpu_load_active(struct rq *this_rq)
{
- update_cpu_load(this_rq);
+ /*
+ * See the mess in update_idle_cpu_load().
+ */
+ this_rq->last_load_update_tick = jiffies;
+ __update_cpu_load(this_rq, this_rq->load.weight, 1);
calc_load_account_active(this_rq);
}
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 8a39fa3e3c6..66e4576e8be 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -4735,7 +4735,7 @@ static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle)
raw_spin_lock_irq(&this_rq->lock);
update_rq_clock(this_rq);
- update_cpu_load(this_rq);
+ update_idle_cpu_load(this_rq);
raw_spin_unlock_irq(&this_rq->lock);
rebalance_domains(balance_cpu, CPU_IDLE);