diff options
Diffstat (limited to 'kernel/time/tick-sched.c')
| -rw-r--r-- | kernel/time/tick-sched.c | 709 |
1 files changed, 500 insertions, 209 deletions
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 7656642e4b8..6558b7ac112 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -20,18 +20,24 @@ #include <linux/profile.h> #include <linux/sched.h> #include <linux/module.h> +#include <linux/irq_work.h> +#include <linux/posix-timers.h> +#include <linux/perf_event.h> +#include <linux/context_tracking.h> #include <asm/irq_regs.h> #include "tick-internal.h" +#include <trace/events/timer.h> + /* * Per cpu nohz control structure */ -static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched); +DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched); /* - * The time, when the last jiffy update happened. Protected by xtime_lock. + * The time, when the last jiffy update happened. Protected by jiffies_lock. */ static ktime_t last_jiffies_update; @@ -49,14 +55,14 @@ static void tick_do_update_jiffies64(ktime_t now) ktime_t delta; /* - * Do a quick check without holding xtime_lock: + * Do a quick check without holding jiffies_lock: */ delta = ktime_sub(now, last_jiffies_update); if (delta.tv64 < tick_period.tv64) return; - /* Reevalute with xtime_lock held */ - write_seqlock(&xtime_lock); + /* Reevalute with jiffies_lock held */ + write_seqlock(&jiffies_lock); delta = ktime_sub(now, last_jiffies_update); if (delta.tv64 >= tick_period.tv64) { @@ -78,8 +84,12 @@ static void tick_do_update_jiffies64(ktime_t now) /* Keep the tick_next_period variable up to date */ tick_next_period = ktime_add(last_jiffies_update, tick_period); + } else { + write_sequnlock(&jiffies_lock); + return; } - write_sequnlock(&xtime_lock); + write_sequnlock(&jiffies_lock); + update_wall_time(); } /* @@ -89,24 +99,274 @@ static ktime_t tick_init_jiffy_update(void) { ktime_t period; - write_seqlock(&xtime_lock); + write_seqlock(&jiffies_lock); /* Did we start the jiffies update yet ? */ if (last_jiffies_update.tv64 == 0) last_jiffies_update = tick_next_period; period = last_jiffies_update; - write_sequnlock(&xtime_lock); + write_sequnlock(&jiffies_lock); return period; } + +static void tick_sched_do_timer(ktime_t now) +{ + int cpu = smp_processor_id(); + +#ifdef CONFIG_NO_HZ_COMMON + /* + * Check if the do_timer duty was dropped. We don't care about + * concurrency: This happens only when the cpu in charge went + * into a long sleep. If two cpus happen to assign themself to + * this duty, then the jiffies update is still serialized by + * jiffies_lock. + */ + if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE) + && !tick_nohz_full_cpu(cpu)) + tick_do_timer_cpu = cpu; +#endif + + /* Check, if the jiffies need an update */ + if (tick_do_timer_cpu == cpu) + tick_do_update_jiffies64(now); +} + +static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) +{ +#ifdef CONFIG_NO_HZ_COMMON + /* + * When we are idle and the tick is stopped, we have to touch + * the watchdog as we might not schedule for a really long + * time. This happens on complete idle SMP systems while + * waiting on the login prompt. We also increment the "start of + * idle" jiffy stamp so the idle accounting adjustment we do + * when we go busy again does not account too much ticks. + */ + if (ts->tick_stopped) { + touch_softlockup_watchdog(); + if (is_idle_task(current)) + ts->idle_jiffies++; + } +#endif + update_process_times(user_mode(regs)); + profile_tick(CPU_PROFILING); +} + +#ifdef CONFIG_NO_HZ_FULL +cpumask_var_t tick_nohz_full_mask; +bool tick_nohz_full_running; + +static bool can_stop_full_tick(void) +{ + WARN_ON_ONCE(!irqs_disabled()); + + if (!sched_can_stop_tick()) { + trace_tick_stop(0, "more than 1 task in runqueue\n"); + return false; + } + + if (!posix_cpu_timers_can_stop_tick(current)) { + trace_tick_stop(0, "posix timers running\n"); + return false; + } + + if (!perf_event_can_stop_tick()) { + trace_tick_stop(0, "perf events running\n"); + return false; + } + + /* sched_clock_tick() needs us? */ +#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK + /* + * TODO: kick full dynticks CPUs when + * sched_clock_stable is set. + */ + if (!sched_clock_stable()) { + trace_tick_stop(0, "unstable sched clock\n"); + /* + * Don't allow the user to think they can get + * full NO_HZ with this machine. + */ + WARN_ONCE(tick_nohz_full_running, + "NO_HZ FULL will not work with unstable sched clock"); + return false; + } +#endif + + return true; +} + +static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now); + +/* + * Re-evaluate the need for the tick on the current CPU + * and restart it if necessary. + */ +void __tick_nohz_full_check(void) +{ + struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); + + if (tick_nohz_full_cpu(smp_processor_id())) { + if (ts->tick_stopped && !is_idle_task(current)) { + if (!can_stop_full_tick()) + tick_nohz_restart_sched_tick(ts, ktime_get()); + } + } +} + +static void nohz_full_kick_work_func(struct irq_work *work) +{ + __tick_nohz_full_check(); +} + +static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = { + .func = nohz_full_kick_work_func, +}; + +/* + * Kick the current CPU if it's full dynticks in order to force it to + * re-evaluate its dependency on the tick and restart it if necessary. + */ +void tick_nohz_full_kick(void) +{ + if (tick_nohz_full_cpu(smp_processor_id())) + irq_work_queue(&__get_cpu_var(nohz_full_kick_work)); +} + +static void nohz_full_kick_ipi(void *info) +{ + __tick_nohz_full_check(); +} + +/* + * Kick all full dynticks CPUs in order to force these to re-evaluate + * their dependency on the tick and restart it if necessary. + */ +void tick_nohz_full_kick_all(void) +{ + if (!tick_nohz_full_running) + return; + + preempt_disable(); + smp_call_function_many(tick_nohz_full_mask, + nohz_full_kick_ipi, NULL, false); + tick_nohz_full_kick(); + preempt_enable(); +} + +/* + * Re-evaluate the need for the tick as we switch the current task. + * It might need the tick due to per task/process properties: + * perf events, posix cpu timers, ... + */ +void __tick_nohz_task_switch(struct task_struct *tsk) +{ + unsigned long flags; + + local_irq_save(flags); + + if (!tick_nohz_full_cpu(smp_processor_id())) + goto out; + + if (tick_nohz_tick_stopped() && !can_stop_full_tick()) + tick_nohz_full_kick(); + +out: + local_irq_restore(flags); +} + +/* Parse the boot-time nohz CPU list from the kernel parameters. */ +static int __init tick_nohz_full_setup(char *str) +{ + int cpu; + + alloc_bootmem_cpumask_var(&tick_nohz_full_mask); + if (cpulist_parse(str, tick_nohz_full_mask) < 0) { + pr_warning("NOHZ: Incorrect nohz_full cpumask\n"); + return 1; + } + + cpu = smp_processor_id(); + if (cpumask_test_cpu(cpu, tick_nohz_full_mask)) { + pr_warning("NO_HZ: Clearing %d from nohz_full range for timekeeping\n", cpu); + cpumask_clear_cpu(cpu, tick_nohz_full_mask); + } + tick_nohz_full_running = true; + + return 1; +} +__setup("nohz_full=", tick_nohz_full_setup); + +static int tick_nohz_cpu_down_callback(struct notifier_block *nfb, + unsigned long action, + void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_DOWN_PREPARE: + /* + * If we handle the timekeeping duty for full dynticks CPUs, + * we can't safely shutdown that CPU. + */ + if (tick_nohz_full_running && tick_do_timer_cpu == cpu) + return NOTIFY_BAD; + break; + } + return NOTIFY_OK; +} + +/* + * Worst case string length in chunks of CPU range seems 2 steps + * separations: 0,2,4,6,... + * This is NR_CPUS + sizeof('\0') + */ +static char __initdata nohz_full_buf[NR_CPUS + 1]; + +static int tick_nohz_init_all(void) +{ + int err = -1; + +#ifdef CONFIG_NO_HZ_FULL_ALL + if (!alloc_cpumask_var(&tick_nohz_full_mask, GFP_KERNEL)) { + pr_err("NO_HZ: Can't allocate full dynticks cpumask\n"); + return err; + } + err = 0; + cpumask_setall(tick_nohz_full_mask); + cpumask_clear_cpu(smp_processor_id(), tick_nohz_full_mask); + tick_nohz_full_running = true; +#endif + return err; +} + +void __init tick_nohz_init(void) +{ + int cpu; + + if (!tick_nohz_full_running) { + if (tick_nohz_init_all() < 0) + return; + } + + for_each_cpu(cpu, tick_nohz_full_mask) + context_tracking_cpu_set(cpu); + + cpu_notifier(tick_nohz_cpu_down_callback, 0); + cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), tick_nohz_full_mask); + pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_full_buf); +} +#endif + /* * NOHZ - aka dynamic tick functionality */ -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* * NO HZ enabled ? */ static int tick_nohz_enabled __read_mostly = 1; - +int tick_nohz_active __read_mostly; /* * Enable / Disable tickless mode */ @@ -135,11 +395,9 @@ __setup("nohz=", setup_tick_nohz); */ static void tick_nohz_update_jiffies(ktime_t now) { - int cpu = smp_processor_id(); - struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); unsigned long flags; - ts->idle_waketime = now; + __this_cpu_write(tick_cpu_sched.idle_waketime, now); local_irq_save(flags); tick_do_update_jiffies64(now); @@ -170,23 +428,17 @@ update_ts_time_stats(int cpu, struct tick_sched *ts, ktime_t now, u64 *last_upda } -static void tick_nohz_stop_idle(int cpu, ktime_t now) +static void tick_nohz_stop_idle(struct tick_sched *ts, ktime_t now) { - struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); - - update_ts_time_stats(cpu, ts, now, NULL); + update_ts_time_stats(smp_processor_id(), ts, now, NULL); ts->idle_active = 0; sched_clock_idle_wakeup_event(0); } -static ktime_t tick_nohz_start_idle(int cpu, struct tick_sched *ts) +static ktime_t tick_nohz_start_idle(struct tick_sched *ts) { - ktime_t now; - - now = ktime_get(); - - update_ts_time_stats(cpu, ts, now, NULL); + ktime_t now = ktime_get(); ts->idle_entrytime = now; ts->idle_active = 1; @@ -213,7 +465,7 @@ u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time) struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); ktime_t now, idle; - if (!tick_nohz_enabled) + if (!tick_nohz_active) return -1; now = ktime_get(); @@ -254,7 +506,7 @@ u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time) struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); ktime_t now, iowait; - if (!tick_nohz_enabled) + if (!tick_nohz_active) return -1; now = ktime_get(); @@ -275,71 +527,43 @@ u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time) } EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us); -static void tick_nohz_stop_sched_tick(struct tick_sched *ts) +static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, + ktime_t now, int cpu) { unsigned long seq, last_jiffies, next_jiffies, delta_jiffies; - ktime_t last_update, expires, now; + ktime_t last_update, expires, ret = { .tv64 = 0 }; + unsigned long rcu_delta_jiffies; struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; u64 time_delta; - int cpu; - - cpu = smp_processor_id(); - ts = &per_cpu(tick_cpu_sched, cpu); - - now = tick_nohz_start_idle(cpu, ts); - - /* - * If this cpu is offline and it is the one which updates - * jiffies, then give up the assignment and let it be taken by - * the cpu which runs the tick timer next. If we don't drop - * this here the jiffies might be stale and do_timer() never - * invoked. - */ - if (unlikely(!cpu_online(cpu))) { - if (cpu == tick_do_timer_cpu) - tick_do_timer_cpu = TICK_DO_TIMER_NONE; - } - - if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) - return; - - if (need_resched()) - return; - if (unlikely(local_softirq_pending() && cpu_online(cpu))) { - static int ratelimit; + time_delta = timekeeping_max_deferment(); - if (ratelimit < 10) { - printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", - (unsigned int) local_softirq_pending()); - ratelimit++; - } - return; - } - - ts->idle_calls++; /* Read jiffies and the time when jiffies were updated last */ do { - seq = read_seqbegin(&xtime_lock); + seq = read_seqbegin(&jiffies_lock); last_update = last_jiffies_update; last_jiffies = jiffies; - time_delta = timekeeping_max_deferment(); - } while (read_seqretry(&xtime_lock, seq)); + } while (read_seqretry(&jiffies_lock, seq)); - if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) || - arch_needs_cpu(cpu)) { + if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) || + arch_needs_cpu(cpu) || irq_work_needs_cpu()) { next_jiffies = last_jiffies + 1; delta_jiffies = 1; } else { /* Get the next timer wheel timer */ next_jiffies = get_next_timer_interrupt(last_jiffies); delta_jiffies = next_jiffies - last_jiffies; + if (rcu_delta_jiffies < delta_jiffies) { + next_jiffies = last_jiffies + rcu_delta_jiffies; + delta_jiffies = rcu_delta_jiffies; + } } + /* - * Do not stop the tick, if we are only one off - * or if the cpu is required for rcu + * Do not stop the tick, if we are only one off (or less) + * or if the cpu is required for RCU: */ - if (!ts->tick_stopped && delta_jiffies == 1) + if (!ts->tick_stopped && delta_jiffies <= 1) goto out; /* Schedule the tick, if we are at least one jiffie off */ @@ -368,6 +592,13 @@ static void tick_nohz_stop_sched_tick(struct tick_sched *ts) time_delta = KTIME_MAX; } +#ifdef CONFIG_NO_HZ_FULL + if (!ts->inidle) { + time_delta = min(time_delta, + scheduler_tick_max_deferment()); + } +#endif + /* * calculate the expiry time for the next timer wheel * timer. delta_jiffies >= NEXT_TIMER_MAX_DELTA signals @@ -396,6 +627,8 @@ static void tick_nohz_stop_sched_tick(struct tick_sched *ts) if (ts->tick_stopped && ktime_equal(expires, dev->next_event)) goto out; + ret = expires; + /* * nohz_stop_sched_tick can be called several times before * the nohz_restart_sched_tick is called. This happens when @@ -404,18 +637,14 @@ static void tick_nohz_stop_sched_tick(struct tick_sched *ts) * the scheduler tick in nohz_restart_sched_tick. */ if (!ts->tick_stopped) { - select_nohz_load_balancer(1); + nohz_balance_enter_idle(cpu); + calc_load_enter_idle(); - ts->idle_tick = hrtimer_get_expires(&ts->sched_timer); + ts->last_tick = hrtimer_get_expires(&ts->sched_timer); ts->tick_stopped = 1; - ts->idle_jiffies = last_jiffies; + trace_tick_stop(1, " "); } - ts->idle_sleeps++; - - /* Mark expires */ - ts->idle_expires = expires; - /* * If the expiration time == KTIME_MAX, then * in this case we simply stop the tick timer. @@ -446,6 +675,102 @@ out: ts->next_jiffies = next_jiffies; ts->last_jiffies = last_jiffies; ts->sleep_length = ktime_sub(dev->next_event, now); + + return ret; +} + +static void tick_nohz_full_stop_tick(struct tick_sched *ts) +{ +#ifdef CONFIG_NO_HZ_FULL + int cpu = smp_processor_id(); + + if (!tick_nohz_full_cpu(cpu) || is_idle_task(current)) + return; + + if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE) + return; + + if (!can_stop_full_tick()) + return; + + tick_nohz_stop_sched_tick(ts, ktime_get(), cpu); +#endif +} + +static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) +{ + /* + * If this cpu is offline and it is the one which updates + * jiffies, then give up the assignment and let it be taken by + * the cpu which runs the tick timer next. If we don't drop + * this here the jiffies might be stale and do_timer() never + * invoked. + */ + if (unlikely(!cpu_online(cpu))) { + if (cpu == tick_do_timer_cpu) + tick_do_timer_cpu = TICK_DO_TIMER_NONE; + return false; + } + + if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) { + ts->sleep_length = (ktime_t) { .tv64 = NSEC_PER_SEC/HZ }; + return false; + } + + if (need_resched()) + return false; + + if (unlikely(local_softirq_pending() && cpu_online(cpu))) { + static int ratelimit; + + if (ratelimit < 10 && + (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) { + pr_warn("NOHZ: local_softirq_pending %02x\n", + (unsigned int) local_softirq_pending()); + ratelimit++; + } + return false; + } + + if (tick_nohz_full_enabled()) { + /* + * Keep the tick alive to guarantee timekeeping progression + * if there are full dynticks CPUs around + */ + if (tick_do_timer_cpu == cpu) + return false; + /* + * Boot safety: make sure the timekeeping duty has been + * assigned before entering dyntick-idle mode, + */ + if (tick_do_timer_cpu == TICK_DO_TIMER_NONE) + return false; + } + + return true; +} + +static void __tick_nohz_idle_enter(struct tick_sched *ts) +{ + ktime_t now, expires; + int cpu = smp_processor_id(); + + now = tick_nohz_start_idle(ts); + + if (can_stop_idle_tick(cpu, ts)) { + int was_stopped = ts->tick_stopped; + + ts->idle_calls++; + + expires = tick_nohz_stop_sched_tick(ts, now, cpu); + if (expires.tv64 > 0LL) { + ts->idle_sleeps++; + ts->idle_expires = expires; + } + + if (!was_stopped && ts->tick_stopped) + ts->idle_jiffies = ts->last_jiffies; + } } /** @@ -477,16 +802,12 @@ void tick_nohz_idle_enter(void) local_irq_disable(); ts = &__get_cpu_var(tick_cpu_sched); - /* - * set ts->inidle unconditionally. even if the system did not - * switch to nohz mode the cpu frequency governers rely on the - * update of the idle time accounting in tick_nohz_start_idle(). - */ ts->inidle = 1; - tick_nohz_stop_sched_tick(ts); + __tick_nohz_idle_enter(ts); local_irq_enable(); } +EXPORT_SYMBOL_GPL(tick_nohz_idle_enter); /** * tick_nohz_irq_exit - update next tick event from interrupt exit @@ -500,10 +821,10 @@ void tick_nohz_irq_exit(void) { struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); - if (!ts->inidle) - return; - - tick_nohz_stop_sched_tick(ts); + if (ts->inidle) + __tick_nohz_idle_enter(ts); + else + tick_nohz_full_stop_tick(ts); } /** @@ -521,7 +842,7 @@ ktime_t tick_nohz_get_sleep_length(void) static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) { hrtimer_cancel(&ts->sched_timer); - hrtimer_set_expires(&ts->sched_timer, ts->idle_tick); + hrtimer_set_expires(&ts->sched_timer, ts->last_tick); while (1) { /* Forward the time to expire in the future */ @@ -538,49 +859,36 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) hrtimer_get_expires(&ts->sched_timer), 0)) break; } - /* Update jiffies and reread time */ - tick_do_update_jiffies64(now); + /* Reread time and update jiffies */ now = ktime_get(); + tick_do_update_jiffies64(now); } } -/** - * tick_nohz_idle_exit - restart the idle tick from the idle task - * - * Restart the idle tick when the CPU is woken up from idle - * This also exit the RCU extended quiescent state. The CPU - * can use RCU again after this function is called. - */ -void tick_nohz_idle_exit(void) +static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now) { - int cpu = smp_processor_id(); - struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); -#ifndef CONFIG_VIRT_CPU_ACCOUNTING - unsigned long ticks; -#endif - ktime_t now; + /* Update jiffies first */ + tick_do_update_jiffies64(now); + update_cpu_load_nohz(); - local_irq_disable(); + calc_load_exit_idle(); + touch_softlockup_watchdog(); + /* + * Cancel the scheduled timer and restore the tick + */ + ts->tick_stopped = 0; + ts->idle_exittime = now; - if (ts->idle_active || (ts->inidle && ts->tick_stopped)) - now = ktime_get(); + tick_nohz_restart(ts, now); +} - if (ts->idle_active) - tick_nohz_stop_idle(cpu, now); +static void tick_nohz_account_idle_ticks(struct tick_sched *ts) +{ +#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE + unsigned long ticks; - if (!ts->inidle || !ts->tick_stopped) { - ts->inidle = 0; - local_irq_enable(); + if (vtime_accounting_enabled()) return; - } - - ts->inidle = 0; - - /* Update jiffies first */ - select_nohz_load_balancer(0); - tick_do_update_jiffies64(now); - -#ifndef CONFIG_VIRT_CPU_ACCOUNTING /* * We stopped the tick in idle. Update process times would miss the * time we slept as update_process_times does only a 1 tick @@ -593,18 +901,40 @@ void tick_nohz_idle_exit(void) if (ticks && ticks < LONG_MAX) account_idle_ticks(ticks); #endif +} - touch_softlockup_watchdog(); - /* - * Cancel the scheduled timer and restore the tick - */ - ts->tick_stopped = 0; - ts->idle_exittime = now; +/** + * tick_nohz_idle_exit - restart the idle tick from the idle task + * + * Restart the idle tick when the CPU is woken up from idle + * This also exit the RCU extended quiescent state. The CPU + * can use RCU again after this function is called. + */ +void tick_nohz_idle_exit(void) +{ + struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); + ktime_t now; - tick_nohz_restart(ts, now); + local_irq_disable(); + + WARN_ON_ONCE(!ts->inidle); + + ts->inidle = 0; + + if (ts->idle_active || ts->tick_stopped) + now = ktime_get(); + + if (ts->idle_active) + tick_nohz_stop_idle(ts, now); + + if (ts->tick_stopped) { + tick_nohz_restart_sched_tick(ts, now); + tick_nohz_account_idle_ticks(ts); + } local_irq_enable(); } +EXPORT_SYMBOL_GPL(tick_nohz_idle_exit); static int tick_nohz_reprogram(struct tick_sched *ts, ktime_t now) { @@ -619,40 +949,12 @@ static void tick_nohz_handler(struct clock_event_device *dev) { struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); struct pt_regs *regs = get_irq_regs(); - int cpu = smp_processor_id(); ktime_t now = ktime_get(); dev->next_event.tv64 = KTIME_MAX; - /* - * Check if the do_timer duty was dropped. We don't care about - * concurrency: This happens only when the cpu in charge went - * into a long sleep. If two cpus happen to assign themself to - * this duty, then the jiffies update is still serialized by - * xtime_lock. - */ - if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) - tick_do_timer_cpu = cpu; - - /* Check, if the jiffies need an update */ - if (tick_do_timer_cpu == cpu) - tick_do_update_jiffies64(now); - - /* - * When we are idle and the tick is stopped, we have to touch - * the watchdog as we might not schedule for a really long - * time. This happens on complete idle SMP systems while - * waiting on the login prompt. We also increment the "start - * of idle" jiffy stamp so the idle accounting adjustment we - * do when we go busy again does not account too much ticks. - */ - if (ts->tick_stopped) { - touch_softlockup_watchdog(); - ts->idle_jiffies++; - } - - update_process_times(user_mode(regs)); - profile_tick(CPU_PROFILING); + tick_sched_do_timer(now); + tick_sched_handle(ts, regs); while (tick_nohz_reprogram(ts, now)) { now = ktime_get(); @@ -676,7 +978,7 @@ static void tick_nohz_switch_to_nohz(void) local_irq_enable(); return; } - + tick_nohz_active = 1; ts->nohz_mode = NOHZ_MODE_LOWRES; /* @@ -707,12 +1009,10 @@ static void tick_nohz_switch_to_nohz(void) * timer and do not touch the other magic bits which need to be done * when idle is left. */ -static void tick_nohz_kick_tick(int cpu, ktime_t now) +static void tick_nohz_kick_tick(struct tick_sched *ts, ktime_t now) { #if 0 /* Switch back to 2.6.27 behaviour */ - - struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); ktime_t delta; /* @@ -727,36 +1027,36 @@ static void tick_nohz_kick_tick(int cpu, ktime_t now) #endif } -static inline void tick_check_nohz(int cpu) +static inline void tick_nohz_irq_enter(void) { - struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); + struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); ktime_t now; if (!ts->idle_active && !ts->tick_stopped) return; now = ktime_get(); if (ts->idle_active) - tick_nohz_stop_idle(cpu, now); + tick_nohz_stop_idle(ts, now); if (ts->tick_stopped) { tick_nohz_update_jiffies(now); - tick_nohz_kick_tick(cpu, now); + tick_nohz_kick_tick(ts, now); } } #else static inline void tick_nohz_switch_to_nohz(void) { } -static inline void tick_check_nohz(int cpu) { } +static inline void tick_nohz_irq_enter(void) { } -#endif /* NO_HZ */ +#endif /* CONFIG_NO_HZ_COMMON */ /* * Called from irq_enter to notify about the possible interruption of idle() */ -void tick_check_idle(int cpu) +void tick_irq_enter(void) { - tick_check_oneshot_broadcast(cpu); - tick_check_nohz(cpu); + tick_check_oneshot_broadcast_this_cpu(); + tick_nohz_irq_enter(); } /* @@ -765,7 +1065,7 @@ void tick_check_idle(int cpu) #ifdef CONFIG_HIGH_RES_TIMERS /* * We rearm the timer until we get disabled by the idle code. - * Called with interrupts disabled and timer->base->cpu_base->lock held. + * Called with interrupts disabled. */ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer) { @@ -773,50 +1073,31 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer) container_of(timer, struct tick_sched, sched_timer); struct pt_regs *regs = get_irq_regs(); ktime_t now = ktime_get(); - int cpu = smp_processor_id(); -#ifdef CONFIG_NO_HZ - /* - * Check if the do_timer duty was dropped. We don't care about - * concurrency: This happens only when the cpu in charge went - * into a long sleep. If two cpus happen to assign themself to - * this duty, then the jiffies update is still serialized by - * xtime_lock. - */ - if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) - tick_do_timer_cpu = cpu; -#endif - - /* Check, if the jiffies need an update */ - if (tick_do_timer_cpu == cpu) - tick_do_update_jiffies64(now); + tick_sched_do_timer(now); /* * Do not call, when we are not in irq context and have * no valid regs pointer */ - if (regs) { - /* - * When we are idle and the tick is stopped, we have to touch - * the watchdog as we might not schedule for a really long - * time. This happens on complete idle SMP systems while - * waiting on the login prompt. We also increment the "start of - * idle" jiffy stamp so the idle accounting adjustment we do - * when we go busy again does not account too much ticks. - */ - if (ts->tick_stopped) { - touch_softlockup_watchdog(); - ts->idle_jiffies++; - } - update_process_times(user_mode(regs)); - profile_tick(CPU_PROFILING); - } + if (regs) + tick_sched_handle(ts, regs); hrtimer_forward(timer, now, tick_period); return HRTIMER_RESTART; } +static int sched_skew_tick; + +static int __init skew_tick(char *str) +{ + get_option(&str, &sched_skew_tick); + + return 0; +} +early_param("skew_tick", skew_tick); + /** * tick_setup_sched_timer - setup the tick emulation timer */ @@ -834,6 +1115,14 @@ void tick_setup_sched_timer(void) /* Get the next period (per cpu) */ hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update()); + /* Offset the tick to avert jiffies_lock contention. */ + if (sched_skew_tick) { + u64 offset = ktime_to_ns(tick_period) >> 1; + do_div(offset, num_possible_cpus()); + offset *= smp_processor_id(); + hrtimer_add_expires_ns(&ts->sched_timer, offset); + } + for (;;) { hrtimer_forward(&ts->sched_timer, now, tick_period); hrtimer_start_expires(&ts->sched_timer, @@ -844,14 +1133,16 @@ void tick_setup_sched_timer(void) now = ktime_get(); } -#ifdef CONFIG_NO_HZ - if (tick_nohz_enabled) +#ifdef CONFIG_NO_HZ_COMMON + if (tick_nohz_enabled) { ts->nohz_mode = NOHZ_MODE_HIGHRES; + tick_nohz_active = 1; + } #endif } #endif /* HIGH_RES_TIMERS */ -#if defined CONFIG_NO_HZ || defined CONFIG_HIGH_RES_TIMERS +#if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS void tick_cancel_sched_timer(int cpu) { struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); @@ -861,7 +1152,7 @@ void tick_cancel_sched_timer(int cpu) hrtimer_cancel(&ts->sched_timer); # endif - ts->nohz_mode = NOHZ_MODE_INACTIVE; + memset(ts, 0, sizeof(*ts)); } #endif |
