diff options
Diffstat (limited to 'arch/powerpc/kernel/crash.c')
| -rw-r--r-- | arch/powerpc/kernel/crash.c | 376 |
1 files changed, 273 insertions, 103 deletions
diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index 8c21d378f5d..51dbace3269 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -10,171 +10,303 @@ * */ -#undef DEBUG - #include <linux/kernel.h> #include <linux/smp.h> #include <linux/reboot.h> #include <linux/kexec.h> -#include <linux/bootmem.h> +#include <linux/export.h> #include <linux/crash_dump.h> #include <linux/delay.h> -#include <linux/elf.h> -#include <linux/elfcore.h> -#include <linux/init.h> +#include <linux/irq.h> #include <linux/types.h> #include <asm/processor.h> #include <asm/machdep.h> +#include <asm/kexec.h> #include <asm/kdump.h> -#include <asm/lmb.h> -#include <asm/firmware.h> +#include <asm/prom.h> #include <asm/smp.h> +#include <asm/setjmp.h> +#include <asm/debug.h> -#ifdef DEBUG -#include <asm/udbg.h> -#define DBG(fmt...) udbg_printf(fmt) -#else -#define DBG(fmt...) -#endif +/* + * The primary CPU waits a while for all secondary CPUs to enter. This is to + * avoid sending an IPI if the secondary CPUs are entering + * crash_kexec_secondary on their own (eg via a system reset). + * + * The secondary timeout has to be longer than the primary. Both timeouts are + * in milliseconds. + */ +#define PRIMARY_TIMEOUT 500 +#define SECONDARY_TIMEOUT 1000 -/* This keeps a track of which one is crashing cpu. */ -int crashing_cpu = -1; +#define IPI_TIMEOUT 10000 +#define REAL_MODE_TIMEOUT 10000 -static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data, - size_t data_len) -{ - struct elf_note note; - - note.n_namesz = strlen(name) + 1; - note.n_descsz = data_len; - note.n_type = type; - memcpy(buf, ¬e, sizeof(note)); - buf += (sizeof(note) +3)/4; - memcpy(buf, name, note.n_namesz); - buf += (note.n_namesz + 3)/4; - memcpy(buf, data, note.n_descsz); - buf += (note.n_descsz + 3)/4; - - return buf; -} +/* This keeps a track of which one is the crashing cpu. */ +int crashing_cpu = -1; +static int time_to_dump; -static void final_note(u32 *buf) -{ - struct elf_note note; +#define CRASH_HANDLER_MAX 3 +/* NULL terminated list of shutdown handles */ +static crash_shutdown_t crash_shutdown_handles[CRASH_HANDLER_MAX+1]; +static DEFINE_SPINLOCK(crash_handlers_lock); - note.n_namesz = 0; - note.n_descsz = 0; - note.n_type = 0; - memcpy(buf, ¬e, sizeof(note)); -} +static unsigned long crash_shutdown_buf[JMP_BUF_LEN]; +static int crash_shutdown_cpu = -1; -static void crash_save_this_cpu(struct pt_regs *regs, int cpu) +static int handle_fault(struct pt_regs *regs) { - struct elf_prstatus prstatus; - u32 *buf; - - if ((cpu < 0) || (cpu >= NR_CPUS)) - return; - - /* Using ELF notes here is opportunistic. - * I need a well defined structure format - * for the data I pass, and I need tags - * on the data to indicate what information I have - * squirrelled away. ELF notes happen to provide - * all of that that no need to invent something new. - */ - buf = (u32*)per_cpu_ptr(crash_notes, cpu); - if (!buf) - return; - - memset(&prstatus, 0, sizeof(prstatus)); - prstatus.pr_pid = current->pid; - elf_core_copy_regs(&prstatus.pr_reg, regs); - buf = append_elf_note(buf, "CORE", NT_PRSTATUS, &prstatus, - sizeof(prstatus)); - final_note(buf); + if (crash_shutdown_cpu == smp_processor_id()) + longjmp(crash_shutdown_buf, 1); + return 0; } #ifdef CONFIG_SMP -static atomic_t waiting_for_crash_ipi; +static atomic_t cpus_in_crash; void crash_ipi_callback(struct pt_regs *regs) { - int cpu = smp_processor_id(); + static cpumask_t cpus_state_saved = CPU_MASK_NONE; - if (cpu == crashing_cpu) - return; + int cpu = smp_processor_id(); if (!cpu_online(cpu)) return; + hard_irq_disable(); + if (!cpumask_test_cpu(cpu, &cpus_state_saved)) { + crash_save_cpu(regs, cpu); + cpumask_set_cpu(cpu, &cpus_state_saved); + } + + atomic_inc(&cpus_in_crash); + smp_mb__after_atomic(); + + /* + * Starting the kdump boot. + * This barrier is needed to make sure that all CPUs are stopped. + */ + while (!time_to_dump) + cpu_relax(); + if (ppc_md.kexec_cpu_down) ppc_md.kexec_cpu_down(1, 1); - local_irq_disable(); - - crash_save_this_cpu(regs, cpu); - atomic_dec(&waiting_for_crash_ipi); +#ifdef CONFIG_PPC64 kexec_smp_wait(); +#else + for (;;); /* FIXME */ +#endif + /* NOTREACHED */ } -static void crash_kexec_prepare_cpus(void) +static void crash_kexec_prepare_cpus(int cpu) { unsigned int msecs; + unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */ + int tries = 0; + int (*old_handler)(struct pt_regs *regs); - atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); + printk(KERN_EMERG "Sending IPI to other CPUs\n"); crash_send_ipi(crash_ipi_callback); smp_wmb(); +again: /* - * FIXME: Until we will have the way to stop other CPUSs reliabally, + * FIXME: Until we will have the way to stop other CPUs reliably, * the crash CPU will send an IPI and wait for other CPUs to - * respond. If not, proceed the kexec boot even though we failed to - * capture other CPU states. + * respond. */ - msecs = 1000000; - while ((atomic_read(&waiting_for_crash_ipi) > 0) && (--msecs > 0)) { - barrier(); + msecs = IPI_TIMEOUT; + while ((atomic_read(&cpus_in_crash) < ncpus) && (--msecs > 0)) mdelay(1); - } /* Would it be better to replace the trap vector here? */ + if (atomic_read(&cpus_in_crash) >= ncpus) { + printk(KERN_EMERG "IPI complete\n"); + return; + } + + printk(KERN_EMERG "ERROR: %d cpu(s) not responding\n", + ncpus - atomic_read(&cpus_in_crash)); + /* - * FIXME: In case if we do not get all CPUs, one possibility: ask the - * user to do soft reset such that we get all. - * IPI handler is already set by the panic cpu initially. Therefore, - * all cpus could invoke this handler from die() and the panic CPU - * will call machine_kexec() directly from this handler to do - * kexec boot. + * If we have a panic timeout set then we can't wait indefinitely + * for someone to activate system reset. We also give up on the + * second time through if system reset fail to work. */ - if (atomic_read(&waiting_for_crash_ipi)) - printk(KERN_ALERT "done waiting: %d cpus not responding\n", - atomic_read(&waiting_for_crash_ipi)); - /* Leave the IPI callback set */ + if ((panic_timeout > 0) || (tries > 0)) + return; + + /* + * A system reset will cause all CPUs to take an 0x100 exception. + * The primary CPU returns here via setjmp, and the secondary + * CPUs reexecute the crash_kexec_secondary path. + */ + old_handler = __debugger; + __debugger = handle_fault; + crash_shutdown_cpu = smp_processor_id(); + + if (setjmp(crash_shutdown_buf) == 0) { + printk(KERN_EMERG "Activate system reset (dumprestart) " + "to stop other cpu(s)\n"); + + /* + * A system reset will force all CPUs to execute the + * crash code again. We need to reset cpus_in_crash so we + * wait for everyone to do this. + */ + atomic_set(&cpus_in_crash, 0); + smp_mb(); + + while (atomic_read(&cpus_in_crash) < ncpus) + cpu_relax(); + } + + crash_shutdown_cpu = -1; + __debugger = old_handler; + + tries++; + goto again; } -#else -static void crash_kexec_prepare_cpus(void) + +/* + * This function will be called by secondary cpus. + */ +void crash_kexec_secondary(struct pt_regs *regs) +{ + unsigned long flags; + int msecs = SECONDARY_TIMEOUT; + + local_irq_save(flags); + + /* Wait for the primary crash CPU to signal its progress */ + while (crashing_cpu < 0) { + if (--msecs < 0) { + /* No response, kdump image may not have been loaded */ + local_irq_restore(flags); + return; + } + + mdelay(1); + } + + crash_ipi_callback(regs); +} + +#else /* ! CONFIG_SMP */ + +static void crash_kexec_prepare_cpus(int cpu) { /* - * move the secondarys to us so that we can copy + * move the secondaries to us so that we can copy * the new kernel 0-0x100 safely * * do this if kexec in setup.c ? */ +#ifdef CONFIG_PPC64 smp_release_cpus(); +#else + /* FIXME */ +#endif } -#endif +void crash_kexec_secondary(struct pt_regs *regs) +{ +} +#endif /* CONFIG_SMP */ + +/* wait for all the CPUs to hit real mode but timeout if they don't come in */ +#if defined(CONFIG_SMP) && defined(CONFIG_PPC_STD_MMU_64) +static void crash_kexec_wait_realmode(int cpu) +{ + unsigned int msecs; + int i; + + msecs = REAL_MODE_TIMEOUT; + for (i=0; i < nr_cpu_ids && msecs > 0; i++) { + if (i == cpu) + continue; + + while (paca[i].kexec_state < KEXEC_STATE_REAL_MODE) { + barrier(); + if (!cpu_possible(i) || !cpu_online(i) || (msecs <= 0)) + break; + msecs--; + mdelay(1); + } + } + mb(); +} +#else +static inline void crash_kexec_wait_realmode(int cpu) {} +#endif /* CONFIG_SMP && CONFIG_PPC_STD_MMU_64 */ + +/* + * Register a function to be called on shutdown. Only use this if you + * can't reset your device in the second kernel. + */ +int crash_shutdown_register(crash_shutdown_t handler) +{ + unsigned int i, rc; + + spin_lock(&crash_handlers_lock); + for (i = 0 ; i < CRASH_HANDLER_MAX; i++) + if (!crash_shutdown_handles[i]) { + /* Insert handle at first empty entry */ + crash_shutdown_handles[i] = handler; + rc = 0; + break; + } + + if (i == CRASH_HANDLER_MAX) { + printk(KERN_ERR "Crash shutdown handles full, " + "not registered.\n"); + rc = 1; + } + + spin_unlock(&crash_handlers_lock); + return rc; +} +EXPORT_SYMBOL(crash_shutdown_register); + +int crash_shutdown_unregister(crash_shutdown_t handler) +{ + unsigned int i, rc; + + spin_lock(&crash_handlers_lock); + for (i = 0 ; i < CRASH_HANDLER_MAX; i++) + if (crash_shutdown_handles[i] == handler) + break; + + if (i == CRASH_HANDLER_MAX) { + printk(KERN_ERR "Crash shutdown handle not found\n"); + rc = 1; + } else { + /* Shift handles down */ + for (; crash_shutdown_handles[i]; i++) + crash_shutdown_handles[i] = + crash_shutdown_handles[i+1]; + rc = 0; + } + + spin_unlock(&crash_handlers_lock); + return rc; +} +EXPORT_SYMBOL(crash_shutdown_unregister); void default_machine_crash_shutdown(struct pt_regs *regs) { + unsigned int i; + int (*old_handler)(struct pt_regs *regs); + /* * This function is only called after the system - * has paniced or is otherwise in a critical state. + * has panicked or is otherwise in a critical state. * The minimum amount of code to allow a kexec'd kernel * to run successfully needs to happen here. * @@ -182,16 +314,54 @@ void default_machine_crash_shutdown(struct pt_regs *regs) * an SMP system. * The kernel is broken so disable interrupts. */ - local_irq_disable(); - - if (ppc_md.kexec_cpu_down) - ppc_md.kexec_cpu_down(1, 0); + hard_irq_disable(); /* * Make a note of crashing cpu. Will be used in machine_kexec * such that another IPI will not be sent. */ crashing_cpu = smp_processor_id(); - crash_kexec_prepare_cpus(); - crash_save_this_cpu(regs, crashing_cpu); + + /* + * If we came in via system reset, wait a while for the secondary + * CPUs to enter. + */ + if (TRAP(regs) == 0x100) + mdelay(PRIMARY_TIMEOUT); + + crash_kexec_prepare_cpus(crashing_cpu); + + crash_save_cpu(regs, crashing_cpu); + + time_to_dump = 1; + + crash_kexec_wait_realmode(crashing_cpu); + + machine_kexec_mask_interrupts(); + + /* + * Call registered shutdown routines safely. Swap out + * __debugger_fault_handler, and replace on exit. + */ + old_handler = __debugger_fault_handler; + __debugger_fault_handler = handle_fault; + crash_shutdown_cpu = smp_processor_id(); + for (i = 0; crash_shutdown_handles[i]; i++) { + if (setjmp(crash_shutdown_buf) == 0) { + /* + * Insert syncs and delay to ensure + * instructions in the dangerous region don't + * leak away from this protected region. + */ + asm volatile("sync; isync"); + /* dangerous region */ + crash_shutdown_handles[i](); + asm volatile("sync; isync"); + } + } + crash_shutdown_cpu = -1; + __debugger_fault_handler = old_handler; + + if (ppc_md.kexec_cpu_down) + ppc_md.kexec_cpu_down(1, 0); } |
