diff options
Diffstat (limited to 'kernel/watchdog.c')
| -rw-r--r-- | kernel/watchdog.c | 118 | 
1 files changed, 104 insertions, 14 deletions
diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 51c4f34d258..c3319bd1b04 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -31,6 +31,12 @@  int watchdog_user_enabled = 1;  int __read_mostly watchdog_thresh = 10; +#ifdef CONFIG_SMP +int __read_mostly sysctl_softlockup_all_cpu_backtrace; +#else +#define sysctl_softlockup_all_cpu_backtrace 0 +#endif +  static int __read_mostly watchdog_running;  static u64 __read_mostly sample_period; @@ -47,6 +53,7 @@ static DEFINE_PER_CPU(bool, watchdog_nmi_touch);  static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);  static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);  #endif +static unsigned long soft_lockup_nmi_warn;  /* boot commands */  /* @@ -95,6 +102,15 @@ static int __init nosoftlockup_setup(char *str)  }  __setup("nosoftlockup", nosoftlockup_setup);  /*  */ +#ifdef CONFIG_SMP +static int __init softlockup_all_cpu_backtrace_setup(char *str) +{ +	sysctl_softlockup_all_cpu_backtrace = +		!!simple_strtol(str, NULL, 0); +	return 1; +} +__setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup); +#endif  /*   * Hard-lockup warnings should be triggered after just a few seconds. Soft- @@ -138,7 +154,11 @@ static void __touch_watchdog(void)  void touch_softlockup_watchdog(void)  { -	__this_cpu_write(watchdog_touch_ts, 0); +	/* +	 * Preemption can be enabled.  It doesn't matter which CPU's timestamp +	 * gets zeroed here, so use the raw_ operation. +	 */ +	raw_cpu_write(watchdog_touch_ts, 0);  }  EXPORT_SYMBOL(touch_softlockup_watchdog); @@ -158,14 +178,14 @@ void touch_all_softlockup_watchdogs(void)  #ifdef CONFIG_HARDLOCKUP_DETECTOR  void touch_nmi_watchdog(void)  { -	if (watchdog_user_enabled) { -		unsigned cpu; - -		for_each_present_cpu(cpu) { -			if (per_cpu(watchdog_nmi_touch, cpu) != true) -				per_cpu(watchdog_nmi_touch, cpu) = true; -		} -	} +	/* +	 * Using __raw here because some code paths have +	 * preemption enabled.  If preemption is enabled +	 * then interrupts should be enabled too, in which +	 * case we shouldn't have to worry about the watchdog +	 * going off. +	 */ +	__raw_get_cpu_var(watchdog_nmi_touch) = true;  	touch_softlockup_watchdog();  }  EXPORT_SYMBOL(touch_nmi_watchdog); @@ -267,6 +287,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)  	unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts);  	struct pt_regs *regs = get_irq_regs();  	int duration; +	int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace;  	/* kick the hardlockup detector */  	watchdog_interrupt_count(); @@ -313,6 +334,17 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)  		if (__this_cpu_read(soft_watchdog_warn) == true)  			return HRTIMER_RESTART; +		if (softlockup_all_cpu_backtrace) { +			/* Prevent multiple soft-lockup reports if one cpu is already +			 * engaged in dumping cpu back traces +			 */ +			if (test_and_set_bit(0, &soft_lockup_nmi_warn)) { +				/* Someone else will report us. Let's give up */ +				__this_cpu_write(soft_watchdog_warn, true); +				return HRTIMER_RESTART; +			} +		} +  		printk(KERN_EMERG "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",  			smp_processor_id(), duration,  			current->comm, task_pid_nr(current)); @@ -323,6 +355,17 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)  		else  			dump_stack(); +		if (softlockup_all_cpu_backtrace) { +			/* Avoid generating two back traces for current +			 * given that one is already made above +			 */ +			trigger_allbutself_cpu_backtrace(); + +			clear_bit(0, &soft_lockup_nmi_warn); +			/* Barrier to sync with other cpus */ +			smp_mb__after_atomic(); +		} +  		if (softlockup_panic)  			panic("softlockup: hung tasks");  		__this_cpu_write(soft_watchdog_warn, true); @@ -486,7 +529,49 @@ static struct smp_hotplug_thread watchdog_threads = {  	.unpark			= watchdog_enable,  }; -static int watchdog_enable_all_cpus(void) +static void restart_watchdog_hrtimer(void *info) +{ +	struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer); +	int ret; + +	/* +	 * No need to cancel and restart hrtimer if it is currently executing +	 * because it will reprogram itself with the new period now. +	 * We should never see it unqueued here because we are running per-cpu +	 * with interrupts disabled. +	 */ +	ret = hrtimer_try_to_cancel(hrtimer); +	if (ret == 1) +		hrtimer_start(hrtimer, ns_to_ktime(sample_period), +				HRTIMER_MODE_REL_PINNED); +} + +static void update_timers(int cpu) +{ +	/* +	 * Make sure that perf event counter will adopt to a new +	 * sampling period. Updating the sampling period directly would +	 * be much nicer but we do not have an API for that now so +	 * let's use a big hammer. +	 * Hrtimer will adopt the new period on the next tick but this +	 * might be late already so we have to restart the timer as well. +	 */ +	watchdog_nmi_disable(cpu); +	smp_call_function_single(cpu, restart_watchdog_hrtimer, NULL, 1); +	watchdog_nmi_enable(cpu); +} + +static void update_timers_all_cpus(void) +{ +	int cpu; + +	get_online_cpus(); +	for_each_online_cpu(cpu) +		update_timers(cpu); +	put_online_cpus(); +} + +static int watchdog_enable_all_cpus(bool sample_period_changed)  {  	int err = 0; @@ -496,6 +581,8 @@ static int watchdog_enable_all_cpus(void)  			pr_err("Failed to create watchdog threads, disabled\n");  		else  			watchdog_running = 1; +	} else if (sample_period_changed) { +		update_timers_all_cpus();  	}  	return err; @@ -520,13 +607,15 @@ int proc_dowatchdog(struct ctl_table *table, int write,  		    void __user *buffer, size_t *lenp, loff_t *ppos)  {  	int err, old_thresh, old_enabled; +	static DEFINE_MUTEX(watchdog_proc_mutex); +	mutex_lock(&watchdog_proc_mutex);  	old_thresh = ACCESS_ONCE(watchdog_thresh);  	old_enabled = ACCESS_ONCE(watchdog_user_enabled);  	err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);  	if (err || !write) -		return err; +		goto out;  	set_sample_period();  	/* @@ -535,7 +624,7 @@ int proc_dowatchdog(struct ctl_table *table, int write,  	 * watchdog_*_all_cpus() function takes care of this.  	 */  	if (watchdog_user_enabled && watchdog_thresh) -		err = watchdog_enable_all_cpus(); +		err = watchdog_enable_all_cpus(old_thresh != watchdog_thresh);  	else  		watchdog_disable_all_cpus(); @@ -544,7 +633,8 @@ int proc_dowatchdog(struct ctl_table *table, int write,  		watchdog_thresh = old_thresh;  		watchdog_user_enabled = old_enabled;  	} - +out: +	mutex_unlock(&watchdog_proc_mutex);  	return err;  }  #endif /* CONFIG_SYSCTL */ @@ -554,5 +644,5 @@ void __init lockup_detector_init(void)  	set_sample_period();  	if (watchdog_user_enabled) -		watchdog_enable_all_cpus(); +		watchdog_enable_all_cpus(false);  }  | 
