diff options
Diffstat (limited to 'kernel/time/tick-sched.c')
| -rw-r--r-- | kernel/time/tick-sched.c | 166 |
1 files changed, 108 insertions, 58 deletions
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index d5097c44b40..7656642e4b8 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -139,7 +139,6 @@ static void tick_nohz_update_jiffies(ktime_t now) struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); unsigned long flags; - cpumask_clear_cpu(cpu, nohz_cpu_mask); ts->idle_waketime = now; local_irq_save(flags); @@ -159,9 +158,10 @@ update_ts_time_stats(int cpu, struct tick_sched *ts, ktime_t now, u64 *last_upda if (ts->idle_active) { delta = ktime_sub(now, ts->idle_entrytime); - ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); if (nr_iowait_cpu(cpu) > 0) ts->iowait_sleeptime = ktime_add(ts->iowait_sleeptime, delta); + else + ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); ts->idle_entrytime = now; } @@ -197,11 +197,11 @@ static ktime_t tick_nohz_start_idle(int cpu, struct tick_sched *ts) /** * get_cpu_idle_time_us - get the total idle time of a cpu * @cpu: CPU number to query - * @last_update_time: variable to store update time in + * @last_update_time: variable to store update time in. Do not update + * counters if NULL. * * Return the cummulative idle time (since boot) for a given - * CPU, in microseconds. The idle time returned includes - * the iowait time (unlike what "top" and co report). + * CPU, in microseconds. * * This time is measured via accounting rather than sampling, * and is as accurate as ktime_get() is. @@ -211,20 +211,35 @@ static ktime_t tick_nohz_start_idle(int cpu, struct tick_sched *ts) u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time) { struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); + ktime_t now, idle; if (!tick_nohz_enabled) return -1; - update_ts_time_stats(cpu, ts, ktime_get(), last_update_time); + now = ktime_get(); + if (last_update_time) { + update_ts_time_stats(cpu, ts, now, last_update_time); + idle = ts->idle_sleeptime; + } else { + if (ts->idle_active && !nr_iowait_cpu(cpu)) { + ktime_t delta = ktime_sub(now, ts->idle_entrytime); + + idle = ktime_add(ts->idle_sleeptime, delta); + } else { + idle = ts->idle_sleeptime; + } + } + + return ktime_to_us(idle); - return ktime_to_us(ts->idle_sleeptime); } EXPORT_SYMBOL_GPL(get_cpu_idle_time_us); -/* +/** * get_cpu_iowait_time_us - get the total iowait time of a cpu * @cpu: CPU number to query - * @last_update_time: variable to store update time in + * @last_update_time: variable to store update time in. Do not update + * counters if NULL. * * Return the cummulative iowait time (since boot) for a given * CPU, in microseconds. @@ -237,52 +252,40 @@ EXPORT_SYMBOL_GPL(get_cpu_idle_time_us); u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time) { struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); + ktime_t now, iowait; if (!tick_nohz_enabled) return -1; - update_ts_time_stats(cpu, ts, ktime_get(), last_update_time); + now = ktime_get(); + if (last_update_time) { + update_ts_time_stats(cpu, ts, now, last_update_time); + iowait = ts->iowait_sleeptime; + } else { + if (ts->idle_active && nr_iowait_cpu(cpu) > 0) { + ktime_t delta = ktime_sub(now, ts->idle_entrytime); - return ktime_to_us(ts->iowait_sleeptime); + iowait = ktime_add(ts->iowait_sleeptime, delta); + } else { + iowait = ts->iowait_sleeptime; + } + } + + return ktime_to_us(iowait); } EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us); -/** - * tick_nohz_stop_sched_tick - stop the idle tick from the idle task - * - * When the next event is more than a tick into the future, stop the idle tick - * Called either from the idle loop or from irq_exit() when an idle period was - * just interrupted by an interrupt which did not cause a reschedule. - */ -void tick_nohz_stop_sched_tick(int inidle) +static void tick_nohz_stop_sched_tick(struct tick_sched *ts) { - unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags; - struct tick_sched *ts; + unsigned long seq, last_jiffies, next_jiffies, delta_jiffies; ktime_t last_update, expires, now; struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; u64 time_delta; int cpu; - local_irq_save(flags); - cpu = smp_processor_id(); ts = &per_cpu(tick_cpu_sched, cpu); - /* - * Call to tick_nohz_start_idle stops the last_update_time from being - * updated. Thus, it must not be called in the event we are called from - * irq_exit() with the prior state different than idle. - */ - if (!inidle && !ts->inidle) - goto end; - - /* - * Set ts->inidle unconditionally. Even if the system did not - * switch to NOHZ mode the cpu frequency governers rely on the - * update of the idle time accounting in tick_nohz_start_idle(). - */ - ts->inidle = 1; - now = tick_nohz_start_idle(cpu, ts); /* @@ -298,10 +301,10 @@ void tick_nohz_stop_sched_tick(int inidle) } if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) - goto end; + return; if (need_resched()) - goto end; + return; if (unlikely(local_softirq_pending() && cpu_online(cpu))) { static int ratelimit; @@ -311,7 +314,7 @@ void tick_nohz_stop_sched_tick(int inidle) (unsigned int) local_softirq_pending()); ratelimit++; } - goto end; + return; } ts->idle_calls++; @@ -389,9 +392,6 @@ void tick_nohz_stop_sched_tick(int inidle) else expires.tv64 = KTIME_MAX; - if (delta_jiffies > 1) - cpumask_set_cpu(cpu, nohz_cpu_mask); - /* Skip reprogram of event if its not changed */ if (ts->tick_stopped && ktime_equal(expires, dev->next_event)) goto out; @@ -409,7 +409,6 @@ void tick_nohz_stop_sched_tick(int inidle) ts->idle_tick = hrtimer_get_expires(&ts->sched_timer); ts->tick_stopped = 1; ts->idle_jiffies = last_jiffies; - rcu_enter_nohz(); } ts->idle_sleeps++; @@ -441,15 +440,70 @@ void tick_nohz_stop_sched_tick(int inidle) * softirq. */ tick_do_update_jiffies64(ktime_get()); - cpumask_clear_cpu(cpu, nohz_cpu_mask); } raise_softirq_irqoff(TIMER_SOFTIRQ); out: ts->next_jiffies = next_jiffies; ts->last_jiffies = last_jiffies; ts->sleep_length = ktime_sub(dev->next_event, now); -end: - local_irq_restore(flags); +} + +/** + * tick_nohz_idle_enter - stop the idle tick from the idle task + * + * When the next event is more than a tick into the future, stop the idle tick + * Called when we start the idle loop. + * + * The arch is responsible of calling: + * + * - rcu_idle_enter() after its last use of RCU before the CPU is put + * to sleep. + * - rcu_idle_exit() before the first use of RCU after the CPU is woken up. + */ +void tick_nohz_idle_enter(void) +{ + struct tick_sched *ts; + + WARN_ON_ONCE(irqs_disabled()); + + /* + * Update the idle state in the scheduler domain hierarchy + * when tick_nohz_stop_sched_tick() is called from the idle loop. + * State will be updated to busy during the first busy tick after + * exiting idle. + */ + set_cpu_sd_state_idle(); + + local_irq_disable(); + + ts = &__get_cpu_var(tick_cpu_sched); + /* + * set ts->inidle unconditionally. even if the system did not + * switch to nohz mode the cpu frequency governers rely on the + * update of the idle time accounting in tick_nohz_start_idle(). + */ + ts->inidle = 1; + tick_nohz_stop_sched_tick(ts); + + local_irq_enable(); +} + +/** + * tick_nohz_irq_exit - update next tick event from interrupt exit + * + * When an interrupt fires while we are idle and it doesn't cause + * a reschedule, it may still add, modify or delete a timer, enqueue + * an RCU callback, etc... + * So we need to re-calculate and reprogram the next tick event. + */ +void tick_nohz_irq_exit(void) +{ + struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); + + if (!ts->inidle) + return; + + tick_nohz_stop_sched_tick(ts); } /** @@ -491,11 +545,13 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) } /** - * tick_nohz_restart_sched_tick - restart the idle tick from the idle task + * tick_nohz_idle_exit - restart the idle tick from the idle task * * Restart the idle tick when the CPU is woken up from idle + * This also exit the RCU extended quiescent state. The CPU + * can use RCU again after this function is called. */ -void tick_nohz_restart_sched_tick(void) +void tick_nohz_idle_exit(void) { int cpu = smp_processor_id(); struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); @@ -505,6 +561,7 @@ void tick_nohz_restart_sched_tick(void) ktime_t now; local_irq_disable(); + if (ts->idle_active || (ts->inidle && ts->tick_stopped)) now = ktime_get(); @@ -519,12 +576,9 @@ void tick_nohz_restart_sched_tick(void) ts->inidle = 0; - rcu_exit_nohz(); - /* Update jiffies first */ select_nohz_load_balancer(0); tick_do_update_jiffies64(now); - cpumask_clear_cpu(cpu, nohz_cpu_mask); #ifndef CONFIG_VIRT_CPU_ACCOUNTING /* @@ -640,8 +694,6 @@ static void tick_nohz_switch_to_nohz(void) next = ktime_add(next, tick_period); } local_irq_enable(); - - printk(KERN_INFO "Switched to NOHz mode on CPU #%d\n", smp_processor_id()); } /* @@ -793,10 +845,8 @@ void tick_setup_sched_timer(void) } #ifdef CONFIG_NO_HZ - if (tick_nohz_enabled) { + if (tick_nohz_enabled) ts->nohz_mode = NOHZ_MODE_HIGHRES; - printk(KERN_INFO "Switched to NOHz mode on CPU #%d\n", smp_processor_id()); - } #endif } #endif /* HIGH_RES_TIMERS */ |
