diff options
Diffstat (limited to 'kernel/watchdog.c')
| -rw-r--r-- | kernel/watchdog.c | 532 |
1 files changed, 307 insertions, 225 deletions
diff --git a/kernel/watchdog.c b/kernel/watchdog.c index bafba687a6d..c3319bd1b04 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -3,15 +3,14 @@ * * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc. * - * this code detects hard lockups: incidents in where on a CPU - * the kernel does not respond to anything except NMI. - * - * Note: Most of this code is borrowed heavily from softlockup.c, - * so thanks to Ingo for the initial implementation. - * Some chunks also taken from arch/x86/kernel/apic/nmi.c, thanks + * Note: Most of this code is borrowed heavily from the original softlockup + * detector, so thanks to Ingo for the initial implementation. + * Some chunks also taken from the old x86-specific nmi watchdog code, thanks * to those contributors as well. */ +#define pr_fmt(fmt) "NMI watchdog: " fmt + #include <linux/mm.h> #include <linux/cpu.h> #include <linux/nmi.h> @@ -23,40 +22,55 @@ #include <linux/notifier.h> #include <linux/module.h> #include <linux/sysctl.h> +#include <linux/smpboot.h> +#include <linux/sched/rt.h> #include <asm/irq_regs.h> +#include <linux/kvm_para.h> #include <linux/perf_event.h> -int watchdog_enabled; -int __read_mostly softlockup_thresh = 60; +int watchdog_user_enabled = 1; +int __read_mostly watchdog_thresh = 10; +#ifdef CONFIG_SMP +int __read_mostly sysctl_softlockup_all_cpu_backtrace; +#else +#define sysctl_softlockup_all_cpu_backtrace 0 +#endif + +static int __read_mostly watchdog_running; +static u64 __read_mostly sample_period; static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog); static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer); static DEFINE_PER_CPU(bool, softlockup_touch_sync); static DEFINE_PER_CPU(bool, soft_watchdog_warn); +static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts); +static DEFINE_PER_CPU(unsigned long, soft_lockup_hrtimer_cnt); #ifdef CONFIG_HARDLOCKUP_DETECTOR static DEFINE_PER_CPU(bool, hard_watchdog_warn); static DEFINE_PER_CPU(bool, watchdog_nmi_touch); -static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts); static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); #endif - -static int __initdata no_watchdog; - +static unsigned long soft_lockup_nmi_warn; /* boot commands */ /* * Should we panic when a soft-lockup or hard-lockup occurs: */ #ifdef CONFIG_HARDLOCKUP_DETECTOR -static int hardlockup_panic; +static int hardlockup_panic = + CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE; static int __init hardlockup_panic_setup(char *str) { if (!strncmp(str, "panic", 5)) hardlockup_panic = 1; + else if (!strncmp(str, "nopanic", 7)) + hardlockup_panic = 0; + else if (!strncmp(str, "0", 1)) + watchdog_user_enabled = 0; return 1; } __setup("nmi_watchdog=", hardlockup_panic_setup); @@ -75,7 +89,7 @@ __setup("softlockup_panic=", softlockup_panic_setup); static int __init nowatchdog_setup(char *str) { - no_watchdog = 1; + watchdog_user_enabled = 0; return 1; } __setup("nowatchdog", nowatchdog_setup); @@ -83,45 +97,68 @@ __setup("nowatchdog", nowatchdog_setup); /* deprecated */ static int __init nosoftlockup_setup(char *str) { - no_watchdog = 1; + watchdog_user_enabled = 0; return 1; } __setup("nosoftlockup", nosoftlockup_setup); /* */ +#ifdef CONFIG_SMP +static int __init softlockup_all_cpu_backtrace_setup(char *str) +{ + sysctl_softlockup_all_cpu_backtrace = + !!simple_strtol(str, NULL, 0); + return 1; +} +__setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup); +#endif +/* + * Hard-lockup warnings should be triggered after just a few seconds. Soft- + * lockups can have false positives under extreme conditions. So we generally + * want a higher threshold for soft lockups than for hard lockups. So we couple + * the thresholds with a factor: we make the soft threshold twice the amount of + * time the hard threshold is. + */ +static int get_softlockup_thresh(void) +{ + return watchdog_thresh * 2; +} /* * Returns seconds, approximately. We don't need nanosecond * resolution, and we don't need to waste time with a big divide when * 2^30ns == 1.074s. */ -static unsigned long get_timestamp(int this_cpu) +static unsigned long get_timestamp(void) { - return cpu_clock(this_cpu) >> 30LL; /* 2^30 ~= 10^9 */ + return local_clock() >> 30LL; /* 2^30 ~= 10^9 */ } -static unsigned long get_sample_period(void) +static void set_sample_period(void) { /* - * convert softlockup_thresh from seconds to ns - * the divide by 5 is to give hrtimer 5 chances to - * increment before the hardlockup detector generates - * a warning + * convert watchdog_thresh from seconds to ns + * the divide by 5 is to give hrtimer several chances (two + * or three with the current relation between the soft + * and hard thresholds) to increment before the + * hardlockup detector generates a warning */ - return softlockup_thresh / 5 * NSEC_PER_SEC; + sample_period = get_softlockup_thresh() * ((u64)NSEC_PER_SEC / 5); } /* Commands for resetting the watchdog */ static void __touch_watchdog(void) { - int this_cpu = smp_processor_id(); - - __get_cpu_var(watchdog_touch_ts) = get_timestamp(this_cpu); + __this_cpu_write(watchdog_touch_ts, get_timestamp()); } void touch_softlockup_watchdog(void) { - __raw_get_cpu_var(watchdog_touch_ts) = 0; + /* + * Preemption can be enabled. It doesn't matter which CPU's timestamp + * gets zeroed here, so use the raw_ operation. + */ + raw_cpu_write(watchdog_touch_ts, 0); } EXPORT_SYMBOL(touch_softlockup_watchdog); @@ -141,14 +178,14 @@ void touch_all_softlockup_watchdogs(void) #ifdef CONFIG_HARDLOCKUP_DETECTOR void touch_nmi_watchdog(void) { - if (watchdog_enabled) { - unsigned cpu; - - for_each_present_cpu(cpu) { - if (per_cpu(watchdog_nmi_touch, cpu) != true) - per_cpu(watchdog_nmi_touch, cpu) = true; - } - } + /* + * Using __raw here because some code paths have + * preemption enabled. If preemption is enabled + * then interrupts should be enabled too, in which + * case we shouldn't have to worry about the watchdog + * going off. + */ + __raw_get_cpu_var(watchdog_nmi_touch) = true; touch_softlockup_watchdog(); } EXPORT_SYMBOL(touch_nmi_watchdog); @@ -165,28 +202,29 @@ void touch_softlockup_watchdog_sync(void) /* watchdog detector functions */ static int is_hardlockup(void) { - unsigned long hrint = __get_cpu_var(hrtimer_interrupts); + unsigned long hrint = __this_cpu_read(hrtimer_interrupts); - if (__get_cpu_var(hrtimer_interrupts_saved) == hrint) + if (__this_cpu_read(hrtimer_interrupts_saved) == hrint) return 1; - __get_cpu_var(hrtimer_interrupts_saved) = hrint; + __this_cpu_write(hrtimer_interrupts_saved, hrint); return 0; } #endif static int is_softlockup(unsigned long touch_ts) { - unsigned long now = get_timestamp(smp_processor_id()); + unsigned long now = get_timestamp(); /* Warn about unreasonable delays: */ - if (time_after(now, touch_ts + softlockup_thresh)) + if (time_after(now, touch_ts + get_softlockup_thresh())) return now - touch_ts; return 0; } #ifdef CONFIG_HARDLOCKUP_DETECTOR + static struct perf_event_attr wd_hw_attr = { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES, @@ -196,15 +234,15 @@ static struct perf_event_attr wd_hw_attr = { }; /* Callback function for perf event subsystem */ -static void watchdog_overflow_callback(struct perf_event *event, int nmi, +static void watchdog_overflow_callback(struct perf_event *event, struct perf_sample_data *data, struct pt_regs *regs) { /* Ensure the watchdog never gets throttled */ event->hw.interrupts = 0; - if (__get_cpu_var(watchdog_nmi_touch) == true) { - __get_cpu_var(watchdog_nmi_touch) = false; + if (__this_cpu_read(watchdog_nmi_touch) == true) { + __this_cpu_write(watchdog_nmi_touch, false); return; } @@ -218,7 +256,7 @@ static void watchdog_overflow_callback(struct perf_event *event, int nmi, int this_cpu = smp_processor_id(); /* only print hardlockups once */ - if (__get_cpu_var(hard_watchdog_warn) == true) + if (__this_cpu_read(hard_watchdog_warn) == true) return; if (hardlockup_panic) @@ -226,46 +264,52 @@ static void watchdog_overflow_callback(struct perf_event *event, int nmi, else WARN(1, "Watchdog detected hard LOCKUP on cpu %d", this_cpu); - __get_cpu_var(hard_watchdog_warn) = true; + __this_cpu_write(hard_watchdog_warn, true); return; } - __get_cpu_var(hard_watchdog_warn) = false; + __this_cpu_write(hard_watchdog_warn, false); return; } +#endif /* CONFIG_HARDLOCKUP_DETECTOR */ + static void watchdog_interrupt_count(void) { - __get_cpu_var(hrtimer_interrupts)++; + __this_cpu_inc(hrtimer_interrupts); } -#else -static inline void watchdog_interrupt_count(void) { return; } -#endif /* CONFIG_HARDLOCKUP_DETECTOR */ + +static int watchdog_nmi_enable(unsigned int cpu); +static void watchdog_nmi_disable(unsigned int cpu); /* watchdog kicker functions */ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) { - unsigned long touch_ts = __get_cpu_var(watchdog_touch_ts); + unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts); struct pt_regs *regs = get_irq_regs(); int duration; + int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace; /* kick the hardlockup detector */ watchdog_interrupt_count(); /* kick the softlockup detector */ - wake_up_process(__get_cpu_var(softlockup_watchdog)); + wake_up_process(__this_cpu_read(softlockup_watchdog)); /* .. and repeat */ - hrtimer_forward_now(hrtimer, ns_to_ktime(get_sample_period())); + hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period)); if (touch_ts == 0) { - if (unlikely(__get_cpu_var(softlockup_touch_sync))) { + if (unlikely(__this_cpu_read(softlockup_touch_sync))) { /* * If the time stamp was touched atomically * make sure the scheduler tick is up to date. */ - __get_cpu_var(softlockup_touch_sync) = false; + __this_cpu_write(softlockup_touch_sync, false); sched_clock_tick(); } + + /* Clear the guest paused flag on watchdog reset */ + kvm_check_and_clear_guest_paused(); __touch_watchdog(); return HRTIMER_RESTART; } @@ -278,11 +322,30 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) */ duration = is_softlockup(touch_ts); if (unlikely(duration)) { + /* + * If a virtual machine is stopped by the host it can look to + * the watchdog like a soft lockup, check to see if the host + * stopped the vm before we issue the warning + */ + if (kvm_check_and_clear_guest_paused()) + return HRTIMER_RESTART; + /* only warn once */ - if (__get_cpu_var(soft_watchdog_warn) == true) + if (__this_cpu_read(soft_watchdog_warn) == true) return HRTIMER_RESTART; - printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n", + if (softlockup_all_cpu_backtrace) { + /* Prevent multiple soft-lockup reports if one cpu is already + * engaged in dumping cpu back traces + */ + if (test_and_set_bit(0, &soft_lockup_nmi_warn)) { + /* Someone else will report us. Let's give up */ + __this_cpu_write(soft_watchdog_warn, true); + return HRTIMER_RESTART; + } + } + + printk(KERN_EMERG "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n", smp_processor_id(), duration, current->comm, task_pid_nr(current)); print_modules(); @@ -292,57 +355,98 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) else dump_stack(); + if (softlockup_all_cpu_backtrace) { + /* Avoid generating two back traces for current + * given that one is already made above + */ + trigger_allbutself_cpu_backtrace(); + + clear_bit(0, &soft_lockup_nmi_warn); + /* Barrier to sync with other cpus */ + smp_mb__after_atomic(); + } + if (softlockup_panic) panic("softlockup: hung tasks"); - __get_cpu_var(soft_watchdog_warn) = true; + __this_cpu_write(soft_watchdog_warn, true); } else - __get_cpu_var(soft_watchdog_warn) = false; + __this_cpu_write(soft_watchdog_warn, false); return HRTIMER_RESTART; } +static void watchdog_set_prio(unsigned int policy, unsigned int prio) +{ + struct sched_param param = { .sched_priority = prio }; -/* - * The watchdog thread - touches the timestamp. - */ -static int watchdog(void *unused) + sched_setscheduler(current, policy, ¶m); +} + +static void watchdog_enable(unsigned int cpu) { - struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer); - sched_setscheduler(current, SCHED_FIFO, ¶m); + /* kick off the timer for the hardlockup detector */ + hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hrtimer->function = watchdog_timer_fn; - /* initialize timestamp */ - __touch_watchdog(); + /* Enable the perf event */ + watchdog_nmi_enable(cpu); - /* kick off the timer for the hardlockup detector */ /* done here because hrtimer_start can only pin to smp_processor_id() */ - hrtimer_start(hrtimer, ns_to_ktime(get_sample_period()), + hrtimer_start(hrtimer, ns_to_ktime(sample_period), HRTIMER_MODE_REL_PINNED); - set_current_state(TASK_INTERRUPTIBLE); - /* - * Run briefly once per second to reset the softlockup timestamp. - * If this gets delayed for more than 60 seconds then the - * debug-printout triggers in watchdog_timer_fn(). - */ - while (!kthread_should_stop()) { - __touch_watchdog(); - schedule(); + /* initialize timestamp */ + watchdog_set_prio(SCHED_FIFO, MAX_RT_PRIO - 1); + __touch_watchdog(); +} - if (kthread_should_stop()) - break; +static void watchdog_disable(unsigned int cpu) +{ + struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer); - set_current_state(TASK_INTERRUPTIBLE); - } - __set_current_state(TASK_RUNNING); + watchdog_set_prio(SCHED_NORMAL, 0); + hrtimer_cancel(hrtimer); + /* disable the perf event */ + watchdog_nmi_disable(cpu); +} - return 0; +static void watchdog_cleanup(unsigned int cpu, bool online) +{ + watchdog_disable(cpu); } +static int watchdog_should_run(unsigned int cpu) +{ + return __this_cpu_read(hrtimer_interrupts) != + __this_cpu_read(soft_lockup_hrtimer_cnt); +} + +/* + * The watchdog thread function - touches the timestamp. + * + * It only runs once every sample_period seconds (4 seconds by + * default) to reset the softlockup timestamp. If this gets delayed + * for more than 2*watchdog_thresh seconds then the debug-printout + * triggers in watchdog_timer_fn(). + */ +static void watchdog(unsigned int cpu) +{ + __this_cpu_write(soft_lockup_hrtimer_cnt, + __this_cpu_read(hrtimer_interrupts)); + __touch_watchdog(); +} #ifdef CONFIG_HARDLOCKUP_DETECTOR -static int watchdog_nmi_enable(int cpu) +/* + * People like the simple clean cpu node info on boot. + * Reduce the watchdog noise by only printing messages + * that are different from what cpu0 displayed. + */ +static unsigned long cpu0_err; + +static int watchdog_nmi_enable(unsigned int cpu) { struct perf_event_attr *wd_attr; struct perf_event *event = per_cpu(watchdog_ev, cpu); @@ -355,16 +459,36 @@ static int watchdog_nmi_enable(int cpu) if (event != NULL) goto out_enable; - /* Try to register using hardware perf events */ wd_attr = &wd_hw_attr; - wd_attr->sample_period = hw_nmi_get_sample_period(); - event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback); + wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh); + + /* Try to register using hardware perf events */ + event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL); + + /* save cpu0 error for future comparision */ + if (cpu == 0 && IS_ERR(event)) + cpu0_err = PTR_ERR(event); + if (!IS_ERR(event)) { - printk(KERN_INFO "NMI watchdog enabled, takes one hw-pmu counter.\n"); + /* only print for cpu0 or different than cpu0 */ + if (cpu == 0 || cpu0_err) + pr_info("enabled on all CPUs, permanently consumes one hw-PMU counter.\n"); goto out_save; } - printk(KERN_ERR "NMI watchdog failed to create perf event on cpu%i: %p\n", cpu, event); + /* skip displaying the same error again */ + if (cpu > 0 && (PTR_ERR(event) == cpu0_err)) + return PTR_ERR(event); + + /* vary the KERN level based on the returned errno */ + if (PTR_ERR(event) == -EOPNOTSUPP) + pr_info("disabled (cpu%i): not supported (no LAPIC?)\n", cpu); + else if (PTR_ERR(event) == -ENOENT) + pr_warning("disabled (cpu%i): hardware events not enabled\n", + cpu); + else + pr_err("disabled (cpu%i): unable to create perf event: %ld\n", + cpu, PTR_ERR(event)); return PTR_ERR(event); /* success path */ @@ -376,7 +500,7 @@ out: return 0; } -static void watchdog_nmi_disable(int cpu) +static void watchdog_nmi_disable(unsigned int cpu) { struct perf_event *event = per_cpu(watchdog_ev, cpu); @@ -390,177 +514,135 @@ static void watchdog_nmi_disable(int cpu) return; } #else -static int watchdog_nmi_enable(int cpu) { return 0; } -static void watchdog_nmi_disable(int cpu) { return; } +static int watchdog_nmi_enable(unsigned int cpu) { return 0; } +static void watchdog_nmi_disable(unsigned int cpu) { return; } #endif /* CONFIG_HARDLOCKUP_DETECTOR */ -/* prepare/enable/disable routines */ -static int watchdog_prepare_cpu(int cpu) -{ - struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, cpu); - - WARN_ON(per_cpu(softlockup_watchdog, cpu)); - hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - hrtimer->function = watchdog_timer_fn; - - return 0; -} +static struct smp_hotplug_thread watchdog_threads = { + .store = &softlockup_watchdog, + .thread_should_run = watchdog_should_run, + .thread_fn = watchdog, + .thread_comm = "watchdog/%u", + .setup = watchdog_enable, + .cleanup = watchdog_cleanup, + .park = watchdog_disable, + .unpark = watchdog_enable, +}; -static int watchdog_enable(int cpu) +static void restart_watchdog_hrtimer(void *info) { - struct task_struct *p = per_cpu(softlockup_watchdog, cpu); - int err; - - /* enable the perf event */ - err = watchdog_nmi_enable(cpu); - if (err) - return err; - - /* create the watchdog thread */ - if (!p) { - p = kthread_create(watchdog, (void *)(unsigned long)cpu, "watchdog/%d", cpu); - if (IS_ERR(p)) { - printk(KERN_ERR "softlockup watchdog for %i failed\n", cpu); - return PTR_ERR(p); - } - kthread_bind(p, cpu); - per_cpu(watchdog_touch_ts, cpu) = 0; - per_cpu(softlockup_watchdog, cpu) = p; - wake_up_process(p); - } - - /* if any cpu succeeds, watchdog is considered enabled for the system */ - watchdog_enabled = 1; + struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer); + int ret; - return 0; + /* + * No need to cancel and restart hrtimer if it is currently executing + * because it will reprogram itself with the new period now. + * We should never see it unqueued here because we are running per-cpu + * with interrupts disabled. + */ + ret = hrtimer_try_to_cancel(hrtimer); + if (ret == 1) + hrtimer_start(hrtimer, ns_to_ktime(sample_period), + HRTIMER_MODE_REL_PINNED); } -static void watchdog_disable(int cpu) +static void update_timers(int cpu) { - struct task_struct *p = per_cpu(softlockup_watchdog, cpu); - struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, cpu); - /* - * cancel the timer first to stop incrementing the stats - * and waking up the kthread + * Make sure that perf event counter will adopt to a new + * sampling period. Updating the sampling period directly would + * be much nicer but we do not have an API for that now so + * let's use a big hammer. + * Hrtimer will adopt the new period on the next tick but this + * might be late already so we have to restart the timer as well. */ - hrtimer_cancel(hrtimer); - - /* disable the perf event */ watchdog_nmi_disable(cpu); - - /* stop the watchdog thread */ - if (p) { - per_cpu(softlockup_watchdog, cpu) = NULL; - kthread_stop(p); - } + smp_call_function_single(cpu, restart_watchdog_hrtimer, NULL, 1); + watchdog_nmi_enable(cpu); } -static void watchdog_enable_all_cpus(void) +static void update_timers_all_cpus(void) { int cpu; - int result = 0; + get_online_cpus(); for_each_online_cpu(cpu) - result += watchdog_enable(cpu); - - if (result) - printk(KERN_ERR "watchdog: failed to be enabled on some cpus\n"); - + update_timers(cpu); + put_online_cpus(); } -static void watchdog_disable_all_cpus(void) +static int watchdog_enable_all_cpus(bool sample_period_changed) { - int cpu; - - if (no_watchdog) - return; + int err = 0; - for_each_online_cpu(cpu) - watchdog_disable(cpu); + if (!watchdog_running) { + err = smpboot_register_percpu_thread(&watchdog_threads); + if (err) + pr_err("Failed to create watchdog threads, disabled\n"); + else + watchdog_running = 1; + } else if (sample_period_changed) { + update_timers_all_cpus(); + } - /* if all watchdogs are disabled, then they are disabled for the system */ - watchdog_enabled = 0; + return err; } - +/* prepare/enable/disable routines */ /* sysctl functions */ #ifdef CONFIG_SYSCTL +static void watchdog_disable_all_cpus(void) +{ + if (watchdog_running) { + watchdog_running = 0; + smpboot_unregister_percpu_thread(&watchdog_threads); + } +} + /* - * proc handler for /proc/sys/kernel/nmi_watchdog + * proc handler for /proc/sys/kernel/nmi_watchdog,watchdog_thresh */ -int proc_dowatchdog_enabled(struct ctl_table *table, int write, - void __user *buffer, size_t *length, loff_t *ppos) +int proc_dowatchdog(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) { - proc_dointvec(table, write, buffer, length, ppos); + int err, old_thresh, old_enabled; + static DEFINE_MUTEX(watchdog_proc_mutex); - if (watchdog_enabled) - watchdog_enable_all_cpus(); - else - watchdog_disable_all_cpus(); - return 0; -} - -int proc_dowatchdog_thresh(struct ctl_table *table, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos) -{ - return proc_dointvec_minmax(table, write, buffer, lenp, ppos); -} -#endif /* CONFIG_SYSCTL */ + mutex_lock(&watchdog_proc_mutex); + old_thresh = ACCESS_ONCE(watchdog_thresh); + old_enabled = ACCESS_ONCE(watchdog_user_enabled); + err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + if (err || !write) + goto out; -/* - * Create/destroy watchdog threads as CPUs come and go: - */ -static int __cpuinit -cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) -{ - int hotcpu = (unsigned long)hcpu; - int err = 0; + set_sample_period(); + /* + * Watchdog threads shouldn't be enabled if they are + * disabled. The 'watchdog_running' variable check in + * watchdog_*_all_cpus() function takes care of this. + */ + if (watchdog_user_enabled && watchdog_thresh) + err = watchdog_enable_all_cpus(old_thresh != watchdog_thresh); + else + watchdog_disable_all_cpus(); - switch (action) { - case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: - err = watchdog_prepare_cpu(hotcpu); - break; - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - err = watchdog_enable(hotcpu); - break; -#ifdef CONFIG_HOTPLUG_CPU - case CPU_UP_CANCELED: - case CPU_UP_CANCELED_FROZEN: - watchdog_disable(hotcpu); - break; - case CPU_DEAD: - case CPU_DEAD_FROZEN: - watchdog_disable(hotcpu); - break; -#endif /* CONFIG_HOTPLUG_CPU */ + /* Restore old values on failure */ + if (err) { + watchdog_thresh = old_thresh; + watchdog_user_enabled = old_enabled; } - return notifier_from_errno(err); +out: + mutex_unlock(&watchdog_proc_mutex); + return err; } +#endif /* CONFIG_SYSCTL */ -static struct notifier_block __cpuinitdata cpu_nfb = { - .notifier_call = cpu_callback -}; - -static int __init spawn_watchdog_task(void) +void __init lockup_detector_init(void) { - void *cpu = (void *)(long)smp_processor_id(); - int err; + set_sample_period(); - if (no_watchdog) - return 0; - - err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); - WARN_ON(notifier_to_errno(err)); - - cpu_callback(&cpu_nfb, CPU_ONLINE, cpu); - register_cpu_notifier(&cpu_nfb); - - return 0; + if (watchdog_user_enabled) + watchdog_enable_all_cpus(false); } -early_initcall(spawn_watchdog_task); |
