diff options
Diffstat (limited to 'drivers/char/ipmi/ipmi_watchdog.c')
| -rw-r--r-- | drivers/char/ipmi/ipmi_watchdog.c | 77 |
1 files changed, 51 insertions, 26 deletions
diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c index c2917ffad2c..37b8be7cba9 100644 --- a/drivers/char/ipmi/ipmi_watchdog.c +++ b/drivers/char/ipmi/ipmi_watchdog.c @@ -139,19 +139,10 @@ #define IPMI_WDOG_SET_TIMER 0x24 #define IPMI_WDOG_GET_TIMER 0x25 -/* These are here until the real ones get into the watchdog.h interface. */ -#ifndef WDIOC_GETTIMEOUT -#define WDIOC_GETTIMEOUT _IOW(WATCHDOG_IOCTL_BASE, 20, int) -#endif -#ifndef WDIOC_SET_PRETIMEOUT -#define WDIOC_SET_PRETIMEOUT _IOW(WATCHDOG_IOCTL_BASE, 21, int) -#endif -#ifndef WDIOC_GET_PRETIMEOUT -#define WDIOC_GET_PRETIMEOUT _IOW(WATCHDOG_IOCTL_BASE, 22, int) -#endif +#define IPMI_WDOG_TIMER_NOT_INIT_RESP 0x80 static DEFINE_MUTEX(ipmi_watchdog_mutex); -static int nowayout = WATCHDOG_NOWAYOUT; +static bool nowayout = WATCHDOG_NOWAYOUT; static ipmi_user_t watchdog_user; static int watchdog_ifnum; @@ -318,7 +309,7 @@ module_param(start_now, int, 0444); MODULE_PARM_DESC(start_now, "Set to 1 to start the watchdog as" "soon as the driver is loaded."); -module_param(nowayout, int, 0644); +module_param(nowayout, bool, 0644); MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started " "(default=CONFIG_WATCHDOG_NOWAYOUT)"); @@ -518,6 +509,7 @@ static void panic_halt_ipmi_heartbeat(void) msg.cmd = IPMI_WDOG_RESET_TIMER; msg.data = NULL; msg.data_len = 0; + atomic_add(2, &panic_done_count); rv = ipmi_request_supply_msgs(watchdog_user, (struct ipmi_addr *) &addr, 0, @@ -526,8 +518,8 @@ static void panic_halt_ipmi_heartbeat(void) &panic_halt_heartbeat_smi_msg, &panic_halt_heartbeat_recv_msg, 1); - if (!rv) - atomic_add(2, &panic_done_count); + if (rv) + atomic_sub(2, &panic_done_count); } static struct ipmi_smi_msg panic_halt_smi_msg = { @@ -551,16 +543,18 @@ static void panic_halt_ipmi_set_timeout(void) /* Wait for the messages to be free. */ while (atomic_read(&panic_done_count) != 0) ipmi_poll_interface(watchdog_user); + atomic_add(2, &panic_done_count); rv = i_ipmi_set_timeout(&panic_halt_smi_msg, &panic_halt_recv_msg, &send_heartbeat_now); - if (!rv) { - atomic_add(2, &panic_done_count); - if (send_heartbeat_now) - panic_halt_ipmi_heartbeat(); - } else + if (rv) { + atomic_sub(2, &panic_done_count); printk(KERN_WARNING PFX "Unable to extend the watchdog timeout."); + } else { + if (send_heartbeat_now) + panic_halt_ipmi_heartbeat(); + } while (atomic_read(&panic_done_count) != 0) ipmi_poll_interface(watchdog_user); } @@ -596,6 +590,7 @@ static int ipmi_heartbeat(void) struct kernel_ipmi_msg msg; int rv; struct ipmi_system_interface_addr addr; + int timeout_retries = 0; if (ipmi_ignore_heartbeat) return 0; @@ -616,6 +611,7 @@ static int ipmi_heartbeat(void) mutex_lock(&heartbeat_lock); +restart: atomic_set(&heartbeat_tofree, 2); /* @@ -653,7 +649,33 @@ static int ipmi_heartbeat(void) /* Wait for the heartbeat to be sent. */ wait_for_completion(&heartbeat_wait); - if (heartbeat_recv_msg.msg.data[0] != 0) { + if (heartbeat_recv_msg.msg.data[0] == IPMI_WDOG_TIMER_NOT_INIT_RESP) { + timeout_retries++; + if (timeout_retries > 3) { + printk(KERN_ERR PFX ": Unable to restore the IPMI" + " watchdog's settings, giving up.\n"); + rv = -EIO; + goto out_unlock; + } + + /* + * The timer was not initialized, that means the BMC was + * probably reset and lost the watchdog information. Attempt + * to restore the timer's info. Note that we still hold + * the heartbeat lock, to keep a heartbeat from happening + * in this process, so must say no heartbeat to avoid a + * deadlock on this mutex. + */ + rv = ipmi_set_timeout(IPMI_SET_TIMEOUT_NO_HB); + if (rv) { + printk(KERN_ERR PFX ": Unable to send the command to" + " set the watchdog's settings, giving up.\n"); + goto out_unlock; + } + + /* We might need a new heartbeat, so do it now */ + goto restart; + } else if (heartbeat_recv_msg.msg.data[0] != 0) { /* * Got an error in the heartbeat response. It was already * reported in ipmi_wdog_msg_handler, but we should return @@ -662,6 +684,7 @@ static int ipmi_heartbeat(void) rv = -EINVAL; } +out_unlock: mutex_unlock(&heartbeat_lock); return rv; @@ -698,7 +721,6 @@ static int ipmi_ioctl(struct file *file, return -EFAULT; return 0; - case WDIOC_SET_PRETIMEOUT: case WDIOC_SETPRETIMEOUT: i = copy_from_user(&val, argp, sizeof(int)); if (i) @@ -706,7 +728,6 @@ static int ipmi_ioctl(struct file *file, pretimeout = val; return ipmi_set_timeout(IPMI_SET_TIMEOUT_HB_IF_NECESSARY); - case WDIOC_GET_PRETIMEOUT: case WDIOC_GETPRETIMEOUT: i = copy_to_user(argp, &pretimeout, sizeof(pretimeout)); if (i) @@ -922,11 +943,15 @@ static struct miscdevice ipmi_wdog_miscdev = { static void ipmi_wdog_msg_handler(struct ipmi_recv_msg *msg, void *handler_data) { - if (msg->msg.data[0] != 0) { + if (msg->msg.cmd == IPMI_WDOG_RESET_TIMER && + msg->msg.data[0] == IPMI_WDOG_TIMER_NOT_INIT_RESP) + printk(KERN_INFO PFX "response: The IPMI controller appears" + " to have been reset, will attempt to reinitialize" + " the watchdog timer\n"); + else if (msg->msg.data[0] != 0) printk(KERN_ERR PFX "response: Error %x on cmd %x\n", msg->msg.data[0], msg->msg.cmd); - } ipmi_free_recv_msg(msg); } @@ -1129,7 +1154,7 @@ static int wdog_reboot_handler(struct notifier_block *this, if (code == SYS_POWER_OFF || code == SYS_HALT) { /* Disable the WDT if we are shutting down. */ ipmi_watchdog_state = WDOG_TIMEOUT_NONE; - panic_halt_ipmi_set_timeout(); + ipmi_set_timeout(IPMI_SET_TIMEOUT_NO_HB); } else if (ipmi_watchdog_state != WDOG_TIMEOUT_NONE) { /* Set a long timer to let the reboot happens, but reboot if it hangs, but only if the watchdog @@ -1137,7 +1162,7 @@ static int wdog_reboot_handler(struct notifier_block *this, timeout = 120; pretimeout = 0; ipmi_watchdog_state = WDOG_TIMEOUT_RESET; - panic_halt_ipmi_set_timeout(); + ipmi_set_timeout(IPMI_SET_TIMEOUT_NO_HB); } } return NOTIFY_OK; |
