diff options
Diffstat (limited to 'kernel/time/tick-sched.c')
| -rw-r--r-- | kernel/time/tick-sched.c | 300 | 
1 files changed, 282 insertions, 18 deletions
| diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index a19a39952c1..bc67d4245e1 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -21,11 +21,15 @@  #include <linux/sched.h>  #include <linux/module.h>  #include <linux/irq_work.h> +#include <linux/posix-timers.h> +#include <linux/perf_event.h>  #include <asm/irq_regs.h>  #include "tick-internal.h" +#include <trace/events/timer.h> +  /*   * Per cpu nohz control structure   */ @@ -104,7 +108,7 @@ static void tick_sched_do_timer(ktime_t now)  {  	int cpu = smp_processor_id(); -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON  	/*  	 * Check if the do_timer duty was dropped. We don't care about  	 * concurrency: This happens only when the cpu in charge went @@ -112,7 +116,8 @@ static void tick_sched_do_timer(ktime_t now)  	 * this duty, then the jiffies update is still serialized by  	 * jiffies_lock.  	 */ -	if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) +	if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE) +	    && !tick_nohz_full_cpu(cpu))  		tick_do_timer_cpu = cpu;  #endif @@ -123,7 +128,7 @@ static void tick_sched_do_timer(ktime_t now)  static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)  { -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON  	/*  	 * When we are idle and the tick is stopped, we have to touch  	 * the watchdog as we might not schedule for a really long @@ -142,10 +147,226 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)  	profile_tick(CPU_PROFILING);  } +#ifdef CONFIG_NO_HZ_FULL +static cpumask_var_t nohz_full_mask; +bool have_nohz_full_mask; + +static bool can_stop_full_tick(void) +{ +	WARN_ON_ONCE(!irqs_disabled()); + +	if (!sched_can_stop_tick()) { +		trace_tick_stop(0, "more than 1 task in runqueue\n"); +		return false; +	} + +	if (!posix_cpu_timers_can_stop_tick(current)) { +		trace_tick_stop(0, "posix timers running\n"); +		return false; +	} + +	if (!perf_event_can_stop_tick()) { +		trace_tick_stop(0, "perf events running\n"); +		return false; +	} + +	/* sched_clock_tick() needs us? */ +#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK +	/* +	 * TODO: kick full dynticks CPUs when +	 * sched_clock_stable is set. +	 */ +	if (!sched_clock_stable) { +		trace_tick_stop(0, "unstable sched clock\n"); +		return false; +	} +#endif + +	return true; +} + +static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now); + +/* + * Re-evaluate the need for the tick on the current CPU + * and restart it if necessary. + */ +void tick_nohz_full_check(void) +{ +	struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); + +	if (tick_nohz_full_cpu(smp_processor_id())) { +		if (ts->tick_stopped && !is_idle_task(current)) { +			if (!can_stop_full_tick()) +				tick_nohz_restart_sched_tick(ts, ktime_get()); +		} +	} +} + +static void nohz_full_kick_work_func(struct irq_work *work) +{ +	tick_nohz_full_check(); +} + +static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = { +	.func = nohz_full_kick_work_func, +}; + +/* + * Kick the current CPU if it's full dynticks in order to force it to + * re-evaluate its dependency on the tick and restart it if necessary. + */ +void tick_nohz_full_kick(void) +{ +	if (tick_nohz_full_cpu(smp_processor_id())) +		irq_work_queue(&__get_cpu_var(nohz_full_kick_work)); +} + +static void nohz_full_kick_ipi(void *info) +{ +	tick_nohz_full_check(); +} + +/* + * Kick all full dynticks CPUs in order to force these to re-evaluate + * their dependency on the tick and restart it if necessary. + */ +void tick_nohz_full_kick_all(void) +{ +	if (!have_nohz_full_mask) +		return; + +	preempt_disable(); +	smp_call_function_many(nohz_full_mask, +			       nohz_full_kick_ipi, NULL, false); +	preempt_enable(); +} + +/* + * Re-evaluate the need for the tick as we switch the current task. + * It might need the tick due to per task/process properties: + * perf events, posix cpu timers, ... + */ +void tick_nohz_task_switch(struct task_struct *tsk) +{ +	unsigned long flags; + +	local_irq_save(flags); + +	if (!tick_nohz_full_cpu(smp_processor_id())) +		goto out; + +	if (tick_nohz_tick_stopped() && !can_stop_full_tick()) +		tick_nohz_full_kick(); + +out: +	local_irq_restore(flags); +} + +int tick_nohz_full_cpu(int cpu) +{ +	if (!have_nohz_full_mask) +		return 0; + +	return cpumask_test_cpu(cpu, nohz_full_mask); +} + +/* Parse the boot-time nohz CPU list from the kernel parameters. */ +static int __init tick_nohz_full_setup(char *str) +{ +	int cpu; + +	alloc_bootmem_cpumask_var(&nohz_full_mask); +	if (cpulist_parse(str, nohz_full_mask) < 0) { +		pr_warning("NOHZ: Incorrect nohz_full cpumask\n"); +		return 1; +	} + +	cpu = smp_processor_id(); +	if (cpumask_test_cpu(cpu, nohz_full_mask)) { +		pr_warning("NO_HZ: Clearing %d from nohz_full range for timekeeping\n", cpu); +		cpumask_clear_cpu(cpu, nohz_full_mask); +	} +	have_nohz_full_mask = true; + +	return 1; +} +__setup("nohz_full=", tick_nohz_full_setup); + +static int __cpuinit tick_nohz_cpu_down_callback(struct notifier_block *nfb, +						 unsigned long action, +						 void *hcpu) +{ +	unsigned int cpu = (unsigned long)hcpu; + +	switch (action & ~CPU_TASKS_FROZEN) { +	case CPU_DOWN_PREPARE: +		/* +		 * If we handle the timekeeping duty for full dynticks CPUs, +		 * we can't safely shutdown that CPU. +		 */ +		if (have_nohz_full_mask && tick_do_timer_cpu == cpu) +			return -EINVAL; +		break; +	} +	return NOTIFY_OK; +} + +/* + * Worst case string length in chunks of CPU range seems 2 steps + * separations: 0,2,4,6,... + * This is NR_CPUS + sizeof('\0') + */ +static char __initdata nohz_full_buf[NR_CPUS + 1]; + +static int tick_nohz_init_all(void) +{ +	int err = -1; + +#ifdef CONFIG_NO_HZ_FULL_ALL +	if (!alloc_cpumask_var(&nohz_full_mask, GFP_KERNEL)) { +		pr_err("NO_HZ: Can't allocate full dynticks cpumask\n"); +		return err; +	} +	err = 0; +	cpumask_setall(nohz_full_mask); +	cpumask_clear_cpu(smp_processor_id(), nohz_full_mask); +	have_nohz_full_mask = true; +#endif +	return err; +} + +void __init tick_nohz_init(void) +{ +	int cpu; + +	if (!have_nohz_full_mask) { +		if (tick_nohz_init_all() < 0) +			return; +	} + +	cpu_notifier(tick_nohz_cpu_down_callback, 0); + +	/* Make sure full dynticks CPU are also RCU nocbs */ +	for_each_cpu(cpu, nohz_full_mask) { +		if (!rcu_is_nocb_cpu(cpu)) { +			pr_warning("NO_HZ: CPU %d is not RCU nocb: " +				   "cleared from nohz_full range", cpu); +			cpumask_clear_cpu(cpu, nohz_full_mask); +		} +	} + +	cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), nohz_full_mask); +	pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_full_buf); +} +#else +#define have_nohz_full_mask (0) +#endif +  /*   * NOHZ - aka dynamic tick functionality   */ -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON  /*   * NO HZ enabled ?   */ @@ -345,11 +566,12 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,  			delta_jiffies = rcu_delta_jiffies;  		}  	} +  	/* -	 * Do not stop the tick, if we are only one off -	 * or if the cpu is required for rcu +	 * Do not stop the tick, if we are only one off (or less) +	 * or if the cpu is required for RCU:  	 */ -	if (!ts->tick_stopped && delta_jiffies == 1) +	if (!ts->tick_stopped && delta_jiffies <= 1)  		goto out;  	/* Schedule the tick, if we are at least one jiffie off */ @@ -378,6 +600,13 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,  			time_delta = KTIME_MAX;  		} +#ifdef CONFIG_NO_HZ_FULL +		if (!ts->inidle) { +			time_delta = min(time_delta, +					 scheduler_tick_max_deferment()); +		} +#endif +  		/*  		 * calculate the expiry time for the next timer wheel  		 * timer. delta_jiffies >= NEXT_TIMER_MAX_DELTA signals @@ -421,6 +650,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,  			ts->last_tick = hrtimer_get_expires(&ts->sched_timer);  			ts->tick_stopped = 1; +			trace_tick_stop(1, " ");  		}  		/* @@ -457,6 +687,24 @@ out:  	return ret;  } +static void tick_nohz_full_stop_tick(struct tick_sched *ts) +{ +#ifdef CONFIG_NO_HZ_FULL +       int cpu = smp_processor_id(); + +       if (!tick_nohz_full_cpu(cpu) || is_idle_task(current)) +               return; + +       if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE) +	       return; + +       if (!can_stop_full_tick()) +               return; + +       tick_nohz_stop_sched_tick(ts, ktime_get(), cpu); +#endif +} +  static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)  {  	/* @@ -482,13 +730,28 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)  		if (ratelimit < 10 &&  		    (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) { -			printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", -			       (unsigned int) local_softirq_pending()); +			pr_warn("NOHZ: local_softirq_pending %02x\n", +				(unsigned int) local_softirq_pending());  			ratelimit++;  		}  		return false;  	} +	if (have_nohz_full_mask) { +		/* +		 * Keep the tick alive to guarantee timekeeping progression +		 * if there are full dynticks CPUs around +		 */ +		if (tick_do_timer_cpu == cpu) +			return false; +		/* +		 * Boot safety: make sure the timekeeping duty has been +		 * assigned before entering dyntick-idle mode, +		 */ +		if (tick_do_timer_cpu == TICK_DO_TIMER_NONE) +			return false; +	} +  	return true;  } @@ -568,12 +831,13 @@ void tick_nohz_irq_exit(void)  {  	struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); -	if (!ts->inidle) -		return; - -	/* Cancel the timer because CPU already waken up from the C-states*/ -	menu_hrtimer_cancel(); -	__tick_nohz_idle_enter(ts); +	if (ts->inidle) { +		/* Cancel the timer because CPU already waken up from the C-states*/ +		menu_hrtimer_cancel(); +		__tick_nohz_idle_enter(ts); +	} else { +		tick_nohz_full_stop_tick(ts); +	}  }  /** @@ -802,7 +1066,7 @@ static inline void tick_check_nohz(int cpu)  static inline void tick_nohz_switch_to_nohz(void) { }  static inline void tick_check_nohz(int cpu) { } -#endif /* NO_HZ */ +#endif /* CONFIG_NO_HZ_COMMON */  /*   * Called from irq_enter to notify about the possible interruption of idle() @@ -887,14 +1151,14 @@ void tick_setup_sched_timer(void)  		now = ktime_get();  	} -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON  	if (tick_nohz_enabled)  		ts->nohz_mode = NOHZ_MODE_HIGHRES;  #endif  }  #endif /* HIGH_RES_TIMERS */ -#if defined CONFIG_NO_HZ || defined CONFIG_HIGH_RES_TIMERS +#if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS  void tick_cancel_sched_timer(int cpu)  {  	struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | 
