aboutsummaryrefslogtreecommitdiff
path: root/kernel/watchdog.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/watchdog.c')
-rw-r--r--kernel/watchdog.c532
1 files changed, 307 insertions, 225 deletions
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 6e3c41a4024..c3319bd1b04 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -3,15 +3,14 @@
*
* started by Don Zickus, Copyright (C) 2010 Red Hat, Inc.
*
- * this code detects hard lockups: incidents in where on a CPU
- * the kernel does not respond to anything except NMI.
- *
- * Note: Most of this code is borrowed heavily from softlockup.c,
- * so thanks to Ingo for the initial implementation.
- * Some chunks also taken from arch/x86/kernel/apic/nmi.c, thanks
+ * Note: Most of this code is borrowed heavily from the original softlockup
+ * detector, so thanks to Ingo for the initial implementation.
+ * Some chunks also taken from the old x86-specific nmi watchdog code, thanks
* to those contributors as well.
*/
+#define pr_fmt(fmt) "NMI watchdog: " fmt
+
#include <linux/mm.h>
#include <linux/cpu.h>
#include <linux/nmi.h>
@@ -23,40 +22,55 @@
#include <linux/notifier.h>
#include <linux/module.h>
#include <linux/sysctl.h>
+#include <linux/smpboot.h>
+#include <linux/sched/rt.h>
#include <asm/irq_regs.h>
+#include <linux/kvm_para.h>
#include <linux/perf_event.h>
-int watchdog_enabled;
-int __read_mostly softlockup_thresh = 60;
+int watchdog_user_enabled = 1;
+int __read_mostly watchdog_thresh = 10;
+#ifdef CONFIG_SMP
+int __read_mostly sysctl_softlockup_all_cpu_backtrace;
+#else
+#define sysctl_softlockup_all_cpu_backtrace 0
+#endif
+
+static int __read_mostly watchdog_running;
+static u64 __read_mostly sample_period;
static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer);
static DEFINE_PER_CPU(bool, softlockup_touch_sync);
static DEFINE_PER_CPU(bool, soft_watchdog_warn);
+static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
+static DEFINE_PER_CPU(unsigned long, soft_lockup_hrtimer_cnt);
#ifdef CONFIG_HARDLOCKUP_DETECTOR
static DEFINE_PER_CPU(bool, hard_watchdog_warn);
static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
-static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
#endif
-
-static int no_watchdog;
-
+static unsigned long soft_lockup_nmi_warn;
/* boot commands */
/*
* Should we panic when a soft-lockup or hard-lockup occurs:
*/
#ifdef CONFIG_HARDLOCKUP_DETECTOR
-static int hardlockup_panic;
+static int hardlockup_panic =
+ CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
static int __init hardlockup_panic_setup(char *str)
{
if (!strncmp(str, "panic", 5))
hardlockup_panic = 1;
+ else if (!strncmp(str, "nopanic", 7))
+ hardlockup_panic = 0;
+ else if (!strncmp(str, "0", 1))
+ watchdog_user_enabled = 0;
return 1;
}
__setup("nmi_watchdog=", hardlockup_panic_setup);
@@ -75,7 +89,7 @@ __setup("softlockup_panic=", softlockup_panic_setup);
static int __init nowatchdog_setup(char *str)
{
- no_watchdog = 1;
+ watchdog_user_enabled = 0;
return 1;
}
__setup("nowatchdog", nowatchdog_setup);
@@ -83,45 +97,68 @@ __setup("nowatchdog", nowatchdog_setup);
/* deprecated */
static int __init nosoftlockup_setup(char *str)
{
- no_watchdog = 1;
+ watchdog_user_enabled = 0;
return 1;
}
__setup("nosoftlockup", nosoftlockup_setup);
/* */
+#ifdef CONFIG_SMP
+static int __init softlockup_all_cpu_backtrace_setup(char *str)
+{
+ sysctl_softlockup_all_cpu_backtrace =
+ !!simple_strtol(str, NULL, 0);
+ return 1;
+}
+__setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup);
+#endif
+/*
+ * Hard-lockup warnings should be triggered after just a few seconds. Soft-
+ * lockups can have false positives under extreme conditions. So we generally
+ * want a higher threshold for soft lockups than for hard lockups. So we couple
+ * the thresholds with a factor: we make the soft threshold twice the amount of
+ * time the hard threshold is.
+ */
+static int get_softlockup_thresh(void)
+{
+ return watchdog_thresh * 2;
+}
/*
* Returns seconds, approximately. We don't need nanosecond
* resolution, and we don't need to waste time with a big divide when
* 2^30ns == 1.074s.
*/
-static unsigned long get_timestamp(int this_cpu)
+static unsigned long get_timestamp(void)
{
- return cpu_clock(this_cpu) >> 30LL; /* 2^30 ~= 10^9 */
+ return local_clock() >> 30LL; /* 2^30 ~= 10^9 */
}
-static unsigned long get_sample_period(void)
+static void set_sample_period(void)
{
/*
- * convert softlockup_thresh from seconds to ns
- * the divide by 5 is to give hrtimer 5 chances to
- * increment before the hardlockup detector generates
- * a warning
+ * convert watchdog_thresh from seconds to ns
+ * the divide by 5 is to give hrtimer several chances (two
+ * or three with the current relation between the soft
+ * and hard thresholds) to increment before the
+ * hardlockup detector generates a warning
*/
- return softlockup_thresh / 5 * NSEC_PER_SEC;
+ sample_period = get_softlockup_thresh() * ((u64)NSEC_PER_SEC / 5);
}
/* Commands for resetting the watchdog */
static void __touch_watchdog(void)
{
- int this_cpu = smp_processor_id();
-
- __get_cpu_var(watchdog_touch_ts) = get_timestamp(this_cpu);
+ __this_cpu_write(watchdog_touch_ts, get_timestamp());
}
void touch_softlockup_watchdog(void)
{
- __raw_get_cpu_var(watchdog_touch_ts) = 0;
+ /*
+ * Preemption can be enabled. It doesn't matter which CPU's timestamp
+ * gets zeroed here, so use the raw_ operation.
+ */
+ raw_cpu_write(watchdog_touch_ts, 0);
}
EXPORT_SYMBOL(touch_softlockup_watchdog);
@@ -141,14 +178,14 @@ void touch_all_softlockup_watchdogs(void)
#ifdef CONFIG_HARDLOCKUP_DETECTOR
void touch_nmi_watchdog(void)
{
- if (watchdog_enabled) {
- unsigned cpu;
-
- for_each_present_cpu(cpu) {
- if (per_cpu(watchdog_nmi_touch, cpu) != true)
- per_cpu(watchdog_nmi_touch, cpu) = true;
- }
- }
+ /*
+ * Using __raw here because some code paths have
+ * preemption enabled. If preemption is enabled
+ * then interrupts should be enabled too, in which
+ * case we shouldn't have to worry about the watchdog
+ * going off.
+ */
+ __raw_get_cpu_var(watchdog_nmi_touch) = true;
touch_softlockup_watchdog();
}
EXPORT_SYMBOL(touch_nmi_watchdog);
@@ -165,28 +202,29 @@ void touch_softlockup_watchdog_sync(void)
/* watchdog detector functions */
static int is_hardlockup(void)
{
- unsigned long hrint = __get_cpu_var(hrtimer_interrupts);
+ unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
- if (__get_cpu_var(hrtimer_interrupts_saved) == hrint)
+ if (__this_cpu_read(hrtimer_interrupts_saved) == hrint)
return 1;
- __get_cpu_var(hrtimer_interrupts_saved) = hrint;
+ __this_cpu_write(hrtimer_interrupts_saved, hrint);
return 0;
}
#endif
static int is_softlockup(unsigned long touch_ts)
{
- unsigned long now = get_timestamp(smp_processor_id());
+ unsigned long now = get_timestamp();
/* Warn about unreasonable delays: */
- if (time_after(now, touch_ts + softlockup_thresh))
+ if (time_after(now, touch_ts + get_softlockup_thresh()))
return now - touch_ts;
return 0;
}
#ifdef CONFIG_HARDLOCKUP_DETECTOR
+
static struct perf_event_attr wd_hw_attr = {
.type = PERF_TYPE_HARDWARE,
.config = PERF_COUNT_HW_CPU_CYCLES,
@@ -196,15 +234,15 @@ static struct perf_event_attr wd_hw_attr = {
};
/* Callback function for perf event subsystem */
-static void watchdog_overflow_callback(struct perf_event *event, int nmi,
+static void watchdog_overflow_callback(struct perf_event *event,
struct perf_sample_data *data,
struct pt_regs *regs)
{
/* Ensure the watchdog never gets throttled */
event->hw.interrupts = 0;
- if (__get_cpu_var(watchdog_nmi_touch) == true) {
- __get_cpu_var(watchdog_nmi_touch) = false;
+ if (__this_cpu_read(watchdog_nmi_touch) == true) {
+ __this_cpu_write(watchdog_nmi_touch, false);
return;
}
@@ -218,7 +256,7 @@ static void watchdog_overflow_callback(struct perf_event *event, int nmi,
int this_cpu = smp_processor_id();
/* only print hardlockups once */
- if (__get_cpu_var(hard_watchdog_warn) == true)
+ if (__this_cpu_read(hard_watchdog_warn) == true)
return;
if (hardlockup_panic)
@@ -226,46 +264,52 @@ static void watchdog_overflow_callback(struct perf_event *event, int nmi,
else
WARN(1, "Watchdog detected hard LOCKUP on cpu %d", this_cpu);
- __get_cpu_var(hard_watchdog_warn) = true;
+ __this_cpu_write(hard_watchdog_warn, true);
return;
}
- __get_cpu_var(hard_watchdog_warn) = false;
+ __this_cpu_write(hard_watchdog_warn, false);
return;
}
+#endif /* CONFIG_HARDLOCKUP_DETECTOR */
+
static void watchdog_interrupt_count(void)
{
- __get_cpu_var(hrtimer_interrupts)++;
+ __this_cpu_inc(hrtimer_interrupts);
}
-#else
-static inline void watchdog_interrupt_count(void) { return; }
-#endif /* CONFIG_HARDLOCKUP_DETECTOR */
+
+static int watchdog_nmi_enable(unsigned int cpu);
+static void watchdog_nmi_disable(unsigned int cpu);
/* watchdog kicker functions */
static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
{
- unsigned long touch_ts = __get_cpu_var(watchdog_touch_ts);
+ unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts);
struct pt_regs *regs = get_irq_regs();
int duration;
+ int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace;
/* kick the hardlockup detector */
watchdog_interrupt_count();
/* kick the softlockup detector */
- wake_up_process(__get_cpu_var(softlockup_watchdog));
+ wake_up_process(__this_cpu_read(softlockup_watchdog));
/* .. and repeat */
- hrtimer_forward_now(hrtimer, ns_to_ktime(get_sample_period()));
+ hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period));
if (touch_ts == 0) {
- if (unlikely(__get_cpu_var(softlockup_touch_sync))) {
+ if (unlikely(__this_cpu_read(softlockup_touch_sync))) {
/*
* If the time stamp was touched atomically
* make sure the scheduler tick is up to date.
*/
- __get_cpu_var(softlockup_touch_sync) = false;
+ __this_cpu_write(softlockup_touch_sync, false);
sched_clock_tick();
}
+
+ /* Clear the guest paused flag on watchdog reset */
+ kvm_check_and_clear_guest_paused();
__touch_watchdog();
return HRTIMER_RESTART;
}
@@ -278,11 +322,30 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
*/
duration = is_softlockup(touch_ts);
if (unlikely(duration)) {
+ /*
+ * If a virtual machine is stopped by the host it can look to
+ * the watchdog like a soft lockup, check to see if the host
+ * stopped the vm before we issue the warning
+ */
+ if (kvm_check_and_clear_guest_paused())
+ return HRTIMER_RESTART;
+
/* only warn once */
- if (__get_cpu_var(soft_watchdog_warn) == true)
+ if (__this_cpu_read(soft_watchdog_warn) == true)
return HRTIMER_RESTART;
- printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
+ if (softlockup_all_cpu_backtrace) {
+ /* Prevent multiple soft-lockup reports if one cpu is already
+ * engaged in dumping cpu back traces
+ */
+ if (test_and_set_bit(0, &soft_lockup_nmi_warn)) {
+ /* Someone else will report us. Let's give up */
+ __this_cpu_write(soft_watchdog_warn, true);
+ return HRTIMER_RESTART;
+ }
+ }
+
+ printk(KERN_EMERG "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
smp_processor_id(), duration,
current->comm, task_pid_nr(current));
print_modules();
@@ -292,57 +355,98 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
else
dump_stack();
+ if (softlockup_all_cpu_backtrace) {
+ /* Avoid generating two back traces for current
+ * given that one is already made above
+ */
+ trigger_allbutself_cpu_backtrace();
+
+ clear_bit(0, &soft_lockup_nmi_warn);
+ /* Barrier to sync with other cpus */
+ smp_mb__after_atomic();
+ }
+
if (softlockup_panic)
panic("softlockup: hung tasks");
- __get_cpu_var(soft_watchdog_warn) = true;
+ __this_cpu_write(soft_watchdog_warn, true);
} else
- __get_cpu_var(soft_watchdog_warn) = false;
+ __this_cpu_write(soft_watchdog_warn, false);
return HRTIMER_RESTART;
}
+static void watchdog_set_prio(unsigned int policy, unsigned int prio)
+{
+ struct sched_param param = { .sched_priority = prio };
-/*
- * The watchdog thread - touches the timestamp.
- */
-static int watchdog(void *unused)
+ sched_setscheduler(current, policy, &param);
+}
+
+static void watchdog_enable(unsigned int cpu)
{
- struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer);
- sched_setscheduler(current, SCHED_FIFO, &param);
+ /* kick off the timer for the hardlockup detector */
+ hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ hrtimer->function = watchdog_timer_fn;
- /* initialize timestamp */
- __touch_watchdog();
+ /* Enable the perf event */
+ watchdog_nmi_enable(cpu);
- /* kick off the timer for the hardlockup detector */
/* done here because hrtimer_start can only pin to smp_processor_id() */
- hrtimer_start(hrtimer, ns_to_ktime(get_sample_period()),
+ hrtimer_start(hrtimer, ns_to_ktime(sample_period),
HRTIMER_MODE_REL_PINNED);
- set_current_state(TASK_INTERRUPTIBLE);
- /*
- * Run briefly once per second to reset the softlockup timestamp.
- * If this gets delayed for more than 60 seconds then the
- * debug-printout triggers in watchdog_timer_fn().
- */
- while (!kthread_should_stop()) {
- __touch_watchdog();
- schedule();
+ /* initialize timestamp */
+ watchdog_set_prio(SCHED_FIFO, MAX_RT_PRIO - 1);
+ __touch_watchdog();
+}
- if (kthread_should_stop())
- break;
+static void watchdog_disable(unsigned int cpu)
+{
+ struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer);
- set_current_state(TASK_INTERRUPTIBLE);
- }
- __set_current_state(TASK_RUNNING);
+ watchdog_set_prio(SCHED_NORMAL, 0);
+ hrtimer_cancel(hrtimer);
+ /* disable the perf event */
+ watchdog_nmi_disable(cpu);
+}
- return 0;
+static void watchdog_cleanup(unsigned int cpu, bool online)
+{
+ watchdog_disable(cpu);
}
+static int watchdog_should_run(unsigned int cpu)
+{
+ return __this_cpu_read(hrtimer_interrupts) !=
+ __this_cpu_read(soft_lockup_hrtimer_cnt);
+}
+
+/*
+ * The watchdog thread function - touches the timestamp.
+ *
+ * It only runs once every sample_period seconds (4 seconds by
+ * default) to reset the softlockup timestamp. If this gets delayed
+ * for more than 2*watchdog_thresh seconds then the debug-printout
+ * triggers in watchdog_timer_fn().
+ */
+static void watchdog(unsigned int cpu)
+{
+ __this_cpu_write(soft_lockup_hrtimer_cnt,
+ __this_cpu_read(hrtimer_interrupts));
+ __touch_watchdog();
+}
#ifdef CONFIG_HARDLOCKUP_DETECTOR
-static int watchdog_nmi_enable(int cpu)
+/*
+ * People like the simple clean cpu node info on boot.
+ * Reduce the watchdog noise by only printing messages
+ * that are different from what cpu0 displayed.
+ */
+static unsigned long cpu0_err;
+
+static int watchdog_nmi_enable(unsigned int cpu)
{
struct perf_event_attr *wd_attr;
struct perf_event *event = per_cpu(watchdog_ev, cpu);
@@ -355,16 +459,36 @@ static int watchdog_nmi_enable(int cpu)
if (event != NULL)
goto out_enable;
- /* Try to register using hardware perf events */
wd_attr = &wd_hw_attr;
- wd_attr->sample_period = hw_nmi_get_sample_period();
- event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback);
+ wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);
+
+ /* Try to register using hardware perf events */
+ event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL);
+
+ /* save cpu0 error for future comparision */
+ if (cpu == 0 && IS_ERR(event))
+ cpu0_err = PTR_ERR(event);
+
if (!IS_ERR(event)) {
- printk(KERN_INFO "NMI watchdog enabled, takes one hw-pmu counter.\n");
+ /* only print for cpu0 or different than cpu0 */
+ if (cpu == 0 || cpu0_err)
+ pr_info("enabled on all CPUs, permanently consumes one hw-PMU counter.\n");
goto out_save;
}
- printk(KERN_ERR "NMI watchdog failed to create perf event on cpu%i: %p\n", cpu, event);
+ /* skip displaying the same error again */
+ if (cpu > 0 && (PTR_ERR(event) == cpu0_err))
+ return PTR_ERR(event);
+
+ /* vary the KERN level based on the returned errno */
+ if (PTR_ERR(event) == -EOPNOTSUPP)
+ pr_info("disabled (cpu%i): not supported (no LAPIC?)\n", cpu);
+ else if (PTR_ERR(event) == -ENOENT)
+ pr_warning("disabled (cpu%i): hardware events not enabled\n",
+ cpu);
+ else
+ pr_err("disabled (cpu%i): unable to create perf event: %ld\n",
+ cpu, PTR_ERR(event));
return PTR_ERR(event);
/* success path */
@@ -376,7 +500,7 @@ out:
return 0;
}
-static void watchdog_nmi_disable(int cpu)
+static void watchdog_nmi_disable(unsigned int cpu)
{
struct perf_event *event = per_cpu(watchdog_ev, cpu);
@@ -390,177 +514,135 @@ static void watchdog_nmi_disable(int cpu)
return;
}
#else
-static int watchdog_nmi_enable(int cpu) { return 0; }
-static void watchdog_nmi_disable(int cpu) { return; }
+static int watchdog_nmi_enable(unsigned int cpu) { return 0; }
+static void watchdog_nmi_disable(unsigned int cpu) { return; }
#endif /* CONFIG_HARDLOCKUP_DETECTOR */
-/* prepare/enable/disable routines */
-static int watchdog_prepare_cpu(int cpu)
-{
- struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, cpu);
-
- WARN_ON(per_cpu(softlockup_watchdog, cpu));
- hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
- hrtimer->function = watchdog_timer_fn;
-
- return 0;
-}
+static struct smp_hotplug_thread watchdog_threads = {
+ .store = &softlockup_watchdog,
+ .thread_should_run = watchdog_should_run,
+ .thread_fn = watchdog,
+ .thread_comm = "watchdog/%u",
+ .setup = watchdog_enable,
+ .cleanup = watchdog_cleanup,
+ .park = watchdog_disable,
+ .unpark = watchdog_enable,
+};
-static int watchdog_enable(int cpu)
+static void restart_watchdog_hrtimer(void *info)
{
- struct task_struct *p = per_cpu(softlockup_watchdog, cpu);
- int err;
-
- /* enable the perf event */
- err = watchdog_nmi_enable(cpu);
- if (err)
- return err;
-
- /* create the watchdog thread */
- if (!p) {
- p = kthread_create(watchdog, (void *)(unsigned long)cpu, "watchdog/%d", cpu);
- if (IS_ERR(p)) {
- printk(KERN_ERR "softlockup watchdog for %i failed\n", cpu);
- return PTR_ERR(p);
- }
- kthread_bind(p, cpu);
- per_cpu(watchdog_touch_ts, cpu) = 0;
- per_cpu(softlockup_watchdog, cpu) = p;
- wake_up_process(p);
- }
-
- /* if any cpu succeeds, watchdog is considered enabled for the system */
- watchdog_enabled = 1;
+ struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer);
+ int ret;
- return 0;
+ /*
+ * No need to cancel and restart hrtimer if it is currently executing
+ * because it will reprogram itself with the new period now.
+ * We should never see it unqueued here because we are running per-cpu
+ * with interrupts disabled.
+ */
+ ret = hrtimer_try_to_cancel(hrtimer);
+ if (ret == 1)
+ hrtimer_start(hrtimer, ns_to_ktime(sample_period),
+ HRTIMER_MODE_REL_PINNED);
}
-static void watchdog_disable(int cpu)
+static void update_timers(int cpu)
{
- struct task_struct *p = per_cpu(softlockup_watchdog, cpu);
- struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, cpu);
-
/*
- * cancel the timer first to stop incrementing the stats
- * and waking up the kthread
+ * Make sure that perf event counter will adopt to a new
+ * sampling period. Updating the sampling period directly would
+ * be much nicer but we do not have an API for that now so
+ * let's use a big hammer.
+ * Hrtimer will adopt the new period on the next tick but this
+ * might be late already so we have to restart the timer as well.
*/
- hrtimer_cancel(hrtimer);
-
- /* disable the perf event */
watchdog_nmi_disable(cpu);
-
- /* stop the watchdog thread */
- if (p) {
- per_cpu(softlockup_watchdog, cpu) = NULL;
- kthread_stop(p);
- }
+ smp_call_function_single(cpu, restart_watchdog_hrtimer, NULL, 1);
+ watchdog_nmi_enable(cpu);
}
-static void watchdog_enable_all_cpus(void)
+static void update_timers_all_cpus(void)
{
int cpu;
- int result = 0;
+ get_online_cpus();
for_each_online_cpu(cpu)
- result += watchdog_enable(cpu);
-
- if (result)
- printk(KERN_ERR "watchdog: failed to be enabled on some cpus\n");
-
+ update_timers(cpu);
+ put_online_cpus();
}
-static void watchdog_disable_all_cpus(void)
+static int watchdog_enable_all_cpus(bool sample_period_changed)
{
- int cpu;
-
- if (no_watchdog)
- return;
+ int err = 0;
- for_each_online_cpu(cpu)
- watchdog_disable(cpu);
+ if (!watchdog_running) {
+ err = smpboot_register_percpu_thread(&watchdog_threads);
+ if (err)
+ pr_err("Failed to create watchdog threads, disabled\n");
+ else
+ watchdog_running = 1;
+ } else if (sample_period_changed) {
+ update_timers_all_cpus();
+ }
- /* if all watchdogs are disabled, then they are disabled for the system */
- watchdog_enabled = 0;
+ return err;
}
-
+/* prepare/enable/disable routines */
/* sysctl functions */
#ifdef CONFIG_SYSCTL
+static void watchdog_disable_all_cpus(void)
+{
+ if (watchdog_running) {
+ watchdog_running = 0;
+ smpboot_unregister_percpu_thread(&watchdog_threads);
+ }
+}
+
/*
- * proc handler for /proc/sys/kernel/nmi_watchdog
+ * proc handler for /proc/sys/kernel/nmi_watchdog,watchdog_thresh
*/
-int proc_dowatchdog_enabled(struct ctl_table *table, int write,
- void __user *buffer, size_t *length, loff_t *ppos)
+int proc_dowatchdog(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
{
- proc_dointvec(table, write, buffer, length, ppos);
+ int err, old_thresh, old_enabled;
+ static DEFINE_MUTEX(watchdog_proc_mutex);
- if (watchdog_enabled)
- watchdog_enable_all_cpus();
- else
- watchdog_disable_all_cpus();
- return 0;
-}
-
-int proc_dowatchdog_thresh(struct ctl_table *table, int write,
- void __user *buffer,
- size_t *lenp, loff_t *ppos)
-{
- return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
-}
-#endif /* CONFIG_SYSCTL */
+ mutex_lock(&watchdog_proc_mutex);
+ old_thresh = ACCESS_ONCE(watchdog_thresh);
+ old_enabled = ACCESS_ONCE(watchdog_user_enabled);
+ err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+ if (err || !write)
+ goto out;
-/*
- * Create/destroy watchdog threads as CPUs come and go:
- */
-static int __cpuinit
-cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
-{
- int hotcpu = (unsigned long)hcpu;
- int err = 0;
+ set_sample_period();
+ /*
+ * Watchdog threads shouldn't be enabled if they are
+ * disabled. The 'watchdog_running' variable check in
+ * watchdog_*_all_cpus() function takes care of this.
+ */
+ if (watchdog_user_enabled && watchdog_thresh)
+ err = watchdog_enable_all_cpus(old_thresh != watchdog_thresh);
+ else
+ watchdog_disable_all_cpus();
- switch (action) {
- case CPU_UP_PREPARE:
- case CPU_UP_PREPARE_FROZEN:
- err = watchdog_prepare_cpu(hotcpu);
- break;
- case CPU_ONLINE:
- case CPU_ONLINE_FROZEN:
- err = watchdog_enable(hotcpu);
- break;
-#ifdef CONFIG_HOTPLUG_CPU
- case CPU_UP_CANCELED:
- case CPU_UP_CANCELED_FROZEN:
- watchdog_disable(hotcpu);
- break;
- case CPU_DEAD:
- case CPU_DEAD_FROZEN:
- watchdog_disable(hotcpu);
- break;
-#endif /* CONFIG_HOTPLUG_CPU */
+ /* Restore old values on failure */
+ if (err) {
+ watchdog_thresh = old_thresh;
+ watchdog_user_enabled = old_enabled;
}
- return notifier_from_errno(err);
+out:
+ mutex_unlock(&watchdog_proc_mutex);
+ return err;
}
+#endif /* CONFIG_SYSCTL */
-static struct notifier_block __cpuinitdata cpu_nfb = {
- .notifier_call = cpu_callback
-};
-
-static int __init spawn_watchdog_task(void)
+void __init lockup_detector_init(void)
{
- void *cpu = (void *)(long)smp_processor_id();
- int err;
+ set_sample_period();
- if (no_watchdog)
- return 0;
-
- err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
- WARN_ON(notifier_to_errno(err));
-
- cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
- register_cpu_notifier(&cpu_nfb);
-
- return 0;
+ if (watchdog_user_enabled)
+ watchdog_enable_all_cpus(false);
}
-early_initcall(spawn_watchdog_task);