diff options
Diffstat (limited to 'kernel/watchdog.c')
| -rw-r--r-- | kernel/watchdog.c | 66 | 
1 files changed, 53 insertions, 13 deletions
diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 4431610f049..c3319bd1b04 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -31,6 +31,12 @@  int watchdog_user_enabled = 1;  int __read_mostly watchdog_thresh = 10; +#ifdef CONFIG_SMP +int __read_mostly sysctl_softlockup_all_cpu_backtrace; +#else +#define sysctl_softlockup_all_cpu_backtrace 0 +#endif +  static int __read_mostly watchdog_running;  static u64 __read_mostly sample_period; @@ -47,6 +53,7 @@ static DEFINE_PER_CPU(bool, watchdog_nmi_touch);  static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);  static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);  #endif +static unsigned long soft_lockup_nmi_warn;  /* boot commands */  /* @@ -95,6 +102,15 @@ static int __init nosoftlockup_setup(char *str)  }  __setup("nosoftlockup", nosoftlockup_setup);  /*  */ +#ifdef CONFIG_SMP +static int __init softlockup_all_cpu_backtrace_setup(char *str) +{ +	sysctl_softlockup_all_cpu_backtrace = +		!!simple_strtol(str, NULL, 0); +	return 1; +} +__setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup); +#endif  /*   * Hard-lockup warnings should be triggered after just a few seconds. Soft- @@ -138,7 +154,11 @@ static void __touch_watchdog(void)  void touch_softlockup_watchdog(void)  { -	__this_cpu_write(watchdog_touch_ts, 0); +	/* +	 * Preemption can be enabled.  It doesn't matter which CPU's timestamp +	 * gets zeroed here, so use the raw_ operation. +	 */ +	raw_cpu_write(watchdog_touch_ts, 0);  }  EXPORT_SYMBOL(touch_softlockup_watchdog); @@ -158,14 +178,14 @@ void touch_all_softlockup_watchdogs(void)  #ifdef CONFIG_HARDLOCKUP_DETECTOR  void touch_nmi_watchdog(void)  { -	if (watchdog_user_enabled) { -		unsigned cpu; - -		for_each_present_cpu(cpu) { -			if (per_cpu(watchdog_nmi_touch, cpu) != true) -				per_cpu(watchdog_nmi_touch, cpu) = true; -		} -	} +	/* +	 * Using __raw here because some code paths have +	 * preemption enabled.  If preemption is enabled +	 * then interrupts should be enabled too, in which +	 * case we shouldn't have to worry about the watchdog +	 * going off. +	 */ +	__raw_get_cpu_var(watchdog_nmi_touch) = true;  	touch_softlockup_watchdog();  }  EXPORT_SYMBOL(touch_nmi_watchdog); @@ -267,6 +287,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)  	unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts);  	struct pt_regs *regs = get_irq_regs();  	int duration; +	int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace;  	/* kick the hardlockup detector */  	watchdog_interrupt_count(); @@ -313,6 +334,17 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)  		if (__this_cpu_read(soft_watchdog_warn) == true)  			return HRTIMER_RESTART; +		if (softlockup_all_cpu_backtrace) { +			/* Prevent multiple soft-lockup reports if one cpu is already +			 * engaged in dumping cpu back traces +			 */ +			if (test_and_set_bit(0, &soft_lockup_nmi_warn)) { +				/* Someone else will report us. Let's give up */ +				__this_cpu_write(soft_watchdog_warn, true); +				return HRTIMER_RESTART; +			} +		} +  		printk(KERN_EMERG "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",  			smp_processor_id(), duration,  			current->comm, task_pid_nr(current)); @@ -323,6 +355,17 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)  		else  			dump_stack(); +		if (softlockup_all_cpu_backtrace) { +			/* Avoid generating two back traces for current +			 * given that one is already made above +			 */ +			trigger_allbutself_cpu_backtrace(); + +			clear_bit(0, &soft_lockup_nmi_warn); +			/* Barrier to sync with other cpus */ +			smp_mb__after_atomic(); +		} +  		if (softlockup_panic)  			panic("softlockup: hung tasks");  		__this_cpu_write(soft_watchdog_warn, true); @@ -505,7 +548,6 @@ static void restart_watchdog_hrtimer(void *info)  static void update_timers(int cpu)  { -	struct call_single_data data = {.func = restart_watchdog_hrtimer};  	/*  	 * Make sure that perf event counter will adopt to a new  	 * sampling period. Updating the sampling period directly would @@ -515,7 +557,7 @@ static void update_timers(int cpu)  	 * might be late already so we have to restart the timer as well.  	 */  	watchdog_nmi_disable(cpu); -	__smp_call_function_single(cpu, &data, 1); +	smp_call_function_single(cpu, restart_watchdog_hrtimer, NULL, 1);  	watchdog_nmi_enable(cpu);  } @@ -524,10 +566,8 @@ static void update_timers_all_cpus(void)  	int cpu;  	get_online_cpus(); -	preempt_disable();  	for_each_online_cpu(cpu)  		update_timers(cpu); -	preempt_enable();  	put_online_cpus();  }  | 
