diff options
Diffstat (limited to 'arch/powerpc/kernel/crash.c')
| -rw-r--r-- | arch/powerpc/kernel/crash.c | 326 |
1 files changed, 138 insertions, 188 deletions
diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index 6f4613dd05e..51dbace3269 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -10,84 +10,84 @@ * */ -#undef DEBUG - #include <linux/kernel.h> #include <linux/smp.h> #include <linux/reboot.h> #include <linux/kexec.h> -#include <linux/bootmem.h> +#include <linux/export.h> #include <linux/crash_dump.h> #include <linux/delay.h> -#include <linux/elf.h> -#include <linux/elfcore.h> -#include <linux/init.h> #include <linux/irq.h> #include <linux/types.h> -#include <linux/lmb.h> #include <asm/processor.h> #include <asm/machdep.h> #include <asm/kexec.h> #include <asm/kdump.h> #include <asm/prom.h> -#include <asm/firmware.h> #include <asm/smp.h> -#include <asm/system.h> #include <asm/setjmp.h> +#include <asm/debug.h> -#ifdef DEBUG -#include <asm/udbg.h> -#define DBG(fmt...) udbg_printf(fmt) -#else -#define DBG(fmt...) -#endif +/* + * The primary CPU waits a while for all secondary CPUs to enter. This is to + * avoid sending an IPI if the secondary CPUs are entering + * crash_kexec_secondary on their own (eg via a system reset). + * + * The secondary timeout has to be longer than the primary. Both timeouts are + * in milliseconds. + */ +#define PRIMARY_TIMEOUT 500 +#define SECONDARY_TIMEOUT 1000 + +#define IPI_TIMEOUT 10000 +#define REAL_MODE_TIMEOUT 10000 -/* This keeps a track of which one is crashing cpu. */ +/* This keeps a track of which one is the crashing cpu. */ int crashing_cpu = -1; -static cpumask_t cpus_in_crash = CPU_MASK_NONE; -cpumask_t cpus_in_sr = CPU_MASK_NONE; +static int time_to_dump; -#define CRASH_HANDLER_MAX 2 +#define CRASH_HANDLER_MAX 3 /* NULL terminated list of shutdown handles */ static crash_shutdown_t crash_shutdown_handles[CRASH_HANDLER_MAX+1]; static DEFINE_SPINLOCK(crash_handlers_lock); +static unsigned long crash_shutdown_buf[JMP_BUF_LEN]; +static int crash_shutdown_cpu = -1; + +static int handle_fault(struct pt_regs *regs) +{ + if (crash_shutdown_cpu == smp_processor_id()) + longjmp(crash_shutdown_buf, 1); + return 0; +} + #ifdef CONFIG_SMP -static atomic_t enter_on_soft_reset = ATOMIC_INIT(0); +static atomic_t cpus_in_crash; void crash_ipi_callback(struct pt_regs *regs) { + static cpumask_t cpus_state_saved = CPU_MASK_NONE; + int cpu = smp_processor_id(); if (!cpu_online(cpu)) return; hard_irq_disable(); - if (!cpu_isset(cpu, cpus_in_crash)) + if (!cpumask_test_cpu(cpu, &cpus_state_saved)) { crash_save_cpu(regs, cpu); - cpu_set(cpu, cpus_in_crash); - - /* - * Entered via soft-reset - could be the kdump - * process is invoked using soft-reset or user activated - * it if some CPU did not respond to an IPI. - * For soft-reset, the secondary CPU can enter this func - * twice. 1 - using IPI, and 2. soft-reset. - * Tell the kexec CPU that entered via soft-reset and ready - * to go down. - */ - if (cpu_isset(cpu, cpus_in_sr)) { - cpu_clear(cpu, cpus_in_sr); - atomic_inc(&enter_on_soft_reset); + cpumask_set_cpu(cpu, &cpus_state_saved); } + atomic_inc(&cpus_in_crash); + smp_mb__after_atomic(); + /* * Starting the kdump boot. * This barrier is needed to make sure that all CPUs are stopped. - * If not, soft-reset will be invoked to bring other CPUs. */ - while (!cpu_isset(crashing_cpu, cpus_in_crash)) + while (!time_to_dump) cpu_relax(); if (ppc_md.kexec_cpu_down) @@ -102,114 +102,108 @@ void crash_ipi_callback(struct pt_regs *regs) /* NOTREACHED */ } -/* - * Wait until all CPUs are entered via soft-reset. - */ -static void crash_soft_reset_check(int cpu) -{ - unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */ - - cpu_clear(cpu, cpus_in_sr); - while (atomic_read(&enter_on_soft_reset) != ncpus) - cpu_relax(); -} - - static void crash_kexec_prepare_cpus(int cpu) { unsigned int msecs; - unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */ + int tries = 0; + int (*old_handler)(struct pt_regs *regs); + + printk(KERN_EMERG "Sending IPI to other CPUs\n"); crash_send_ipi(crash_ipi_callback); smp_wmb(); +again: /* - * FIXME: Until we will have the way to stop other CPUSs reliabally, + * FIXME: Until we will have the way to stop other CPUs reliably, * the crash CPU will send an IPI and wait for other CPUs to * respond. - * Delay of at least 10 seconds. */ - printk(KERN_EMERG "Sending IPI to other cpus...\n"); - msecs = 10000; - while ((cpus_weight(cpus_in_crash) < ncpus) && (--msecs > 0)) { - cpu_relax(); + msecs = IPI_TIMEOUT; + while ((atomic_read(&cpus_in_crash) < ncpus) && (--msecs > 0)) mdelay(1); - } /* Would it be better to replace the trap vector here? */ + if (atomic_read(&cpus_in_crash) >= ncpus) { + printk(KERN_EMERG "IPI complete\n"); + return; + } + + printk(KERN_EMERG "ERROR: %d cpu(s) not responding\n", + ncpus - atomic_read(&cpus_in_crash)); + /* - * FIXME: In case if we do not get all CPUs, one possibility: ask the - * user to do soft reset such that we get all. - * Soft-reset will be used until better mechanism is implemented. + * If we have a panic timeout set then we can't wait indefinitely + * for someone to activate system reset. We also give up on the + * second time through if system reset fail to work. */ - if (cpus_weight(cpus_in_crash) < ncpus) { - printk(KERN_EMERG "done waiting: %d cpu(s) not responding\n", - ncpus - cpus_weight(cpus_in_crash)); - printk(KERN_EMERG "Activate soft-reset to stop other cpu(s)\n"); - cpus_in_sr = CPU_MASK_NONE; - atomic_set(&enter_on_soft_reset, 0); - while (cpus_weight(cpus_in_crash) < ncpus) - cpu_relax(); - } + if ((panic_timeout > 0) || (tries > 0)) + return; + /* - * Make sure all CPUs are entered via soft-reset if the kdump is - * invoked using soft-reset. + * A system reset will cause all CPUs to take an 0x100 exception. + * The primary CPU returns here via setjmp, and the secondary + * CPUs reexecute the crash_kexec_secondary path. */ - if (cpu_isset(cpu, cpus_in_sr)) - crash_soft_reset_check(cpu); - /* Leave the IPI callback set */ + old_handler = __debugger; + __debugger = handle_fault; + crash_shutdown_cpu = smp_processor_id(); + + if (setjmp(crash_shutdown_buf) == 0) { + printk(KERN_EMERG "Activate system reset (dumprestart) " + "to stop other cpu(s)\n"); + + /* + * A system reset will force all CPUs to execute the + * crash code again. We need to reset cpus_in_crash so we + * wait for everyone to do this. + */ + atomic_set(&cpus_in_crash, 0); + smp_mb(); + + while (atomic_read(&cpus_in_crash) < ncpus) + cpu_relax(); + } + + crash_shutdown_cpu = -1; + __debugger = old_handler; + + tries++; + goto again; } /* - * This function will be called by secondary cpus or by kexec cpu - * if soft-reset is activated to stop some CPUs. + * This function will be called by secondary cpus. */ void crash_kexec_secondary(struct pt_regs *regs) { - int cpu = smp_processor_id(); unsigned long flags; - int msecs = 5; + int msecs = SECONDARY_TIMEOUT; local_irq_save(flags); - /* Wait 5ms if the kexec CPU is not entered yet. */ + + /* Wait for the primary crash CPU to signal its progress */ while (crashing_cpu < 0) { if (--msecs < 0) { - /* - * Either kdump image is not loaded or - * kdump process is not started - Probably xmon - * exited using 'x'(exit and recover) or - * kexec_should_crash() failed for all running tasks. - */ - cpu_clear(cpu, cpus_in_sr); + /* No response, kdump image may not have been loaded */ local_irq_restore(flags); return; } + mdelay(1); - cpu_relax(); - } - if (cpu == crashing_cpu) { - /* - * Panic CPU will enter this func only via soft-reset. - * Wait until all secondary CPUs entered and - * then start kexec boot. - */ - crash_soft_reset_check(cpu); - cpu_set(crashing_cpu, cpus_in_crash); - if (ppc_md.kexec_cpu_down) - ppc_md.kexec_cpu_down(1, 0); - machine_kexec(kexec_crash_image); - /* NOTREACHED */ } + crash_ipi_callback(regs); } -#else +#else /* ! CONFIG_SMP */ + static void crash_kexec_prepare_cpus(int cpu) { /* - * move the secondarys to us so that we can copy + * move the secondaries to us so that we can copy * the new kernel 0-0x100 safely * * do this if kexec in setup.c ? @@ -223,75 +217,34 @@ static void crash_kexec_prepare_cpus(int cpu) void crash_kexec_secondary(struct pt_regs *regs) { - cpus_in_sr = CPU_MASK_NONE; } -#endif -#ifdef CONFIG_SPU_BASE +#endif /* CONFIG_SMP */ -#include <asm/spu.h> -#include <asm/spu_priv1.h> - -struct crash_spu_info { - struct spu *spu; - u32 saved_spu_runcntl_RW; - u32 saved_spu_status_R; - u32 saved_spu_npc_RW; - u64 saved_mfc_sr1_RW; - u64 saved_mfc_dar; - u64 saved_mfc_dsisr; -}; - -#define CRASH_NUM_SPUS 16 /* Enough for current hardware */ -static struct crash_spu_info crash_spu_info[CRASH_NUM_SPUS]; - -static void crash_kexec_stop_spus(void) +/* wait for all the CPUs to hit real mode but timeout if they don't come in */ +#if defined(CONFIG_SMP) && defined(CONFIG_PPC_STD_MMU_64) +static void crash_kexec_wait_realmode(int cpu) { - struct spu *spu; + unsigned int msecs; int i; - u64 tmp; - - for (i = 0; i < CRASH_NUM_SPUS; i++) { - if (!crash_spu_info[i].spu) - continue; - - spu = crash_spu_info[i].spu; - crash_spu_info[i].saved_spu_runcntl_RW = - in_be32(&spu->problem->spu_runcntl_RW); - crash_spu_info[i].saved_spu_status_R = - in_be32(&spu->problem->spu_status_R); - crash_spu_info[i].saved_spu_npc_RW = - in_be32(&spu->problem->spu_npc_RW); - - crash_spu_info[i].saved_mfc_dar = spu_mfc_dar_get(spu); - crash_spu_info[i].saved_mfc_dsisr = spu_mfc_dsisr_get(spu); - tmp = spu_mfc_sr1_get(spu); - crash_spu_info[i].saved_mfc_sr1_RW = tmp; - - tmp &= ~MFC_STATE1_MASTER_RUN_CONTROL_MASK; - spu_mfc_sr1_set(spu, tmp); - - __delay(200); - } -} - -void crash_register_spus(struct list_head *list) -{ - struct spu *spu; - - list_for_each_entry(spu, list, full_list) { - if (WARN_ON(spu->number >= CRASH_NUM_SPUS)) + msecs = REAL_MODE_TIMEOUT; + for (i=0; i < nr_cpu_ids && msecs > 0; i++) { + if (i == cpu) continue; - crash_spu_info[spu->number].spu = spu; + while (paca[i].kexec_state < KEXEC_STATE_REAL_MODE) { + barrier(); + if (!cpu_possible(i) || !cpu_online(i) || (msecs <= 0)) + break; + msecs--; + mdelay(1); + } } + mb(); } - #else -static inline void crash_kexec_stop_spus(void) -{ -} -#endif /* CONFIG_SPU_BASE */ +static inline void crash_kexec_wait_realmode(int cpu) {} +#endif /* CONFIG_SMP && CONFIG_PPC_STD_MMU_64 */ /* * Register a function to be called on shutdown. Only use this if you @@ -346,20 +299,11 @@ int crash_shutdown_unregister(crash_shutdown_t handler) } EXPORT_SYMBOL(crash_shutdown_unregister); -static unsigned long crash_shutdown_buf[JMP_BUF_LEN]; - -static int handle_fault(struct pt_regs *regs) -{ - longjmp(crash_shutdown_buf, 1); - return 0; -} - void default_machine_crash_shutdown(struct pt_regs *regs) { unsigned int i; int (*old_handler)(struct pt_regs *regs); - /* * This function is only called after the system * has panicked or is otherwise in a critical state. @@ -372,22 +316,36 @@ void default_machine_crash_shutdown(struct pt_regs *regs) */ hard_irq_disable(); - for_each_irq(i) { - struct irq_desc *desc = irq_to_desc(i); + /* + * Make a note of crashing cpu. Will be used in machine_kexec + * such that another IPI will not be sent. + */ + crashing_cpu = smp_processor_id(); - if (desc->status & IRQ_INPROGRESS) - desc->chip->eoi(i); + /* + * If we came in via system reset, wait a while for the secondary + * CPUs to enter. + */ + if (TRAP(regs) == 0x100) + mdelay(PRIMARY_TIMEOUT); - if (!(desc->status & IRQ_DISABLED)) - desc->chip->disable(i); - } + crash_kexec_prepare_cpus(crashing_cpu); + + crash_save_cpu(regs, crashing_cpu); + + time_to_dump = 1; + + crash_kexec_wait_realmode(crashing_cpu); + + machine_kexec_mask_interrupts(); /* - * Call registered shutdown routines savely. Swap out + * Call registered shutdown routines safely. Swap out * __debugger_fault_handler, and replace on exit. */ old_handler = __debugger_fault_handler; __debugger_fault_handler = handle_fault; + crash_shutdown_cpu = smp_processor_id(); for (i = 0; crash_shutdown_handles[i]; i++) { if (setjmp(crash_shutdown_buf) == 0) { /* @@ -401,17 +359,9 @@ void default_machine_crash_shutdown(struct pt_regs *regs) asm volatile("sync; isync"); } } + crash_shutdown_cpu = -1; __debugger_fault_handler = old_handler; - /* - * Make a note of crashing cpu. Will be used in machine_kexec - * such that another IPI will not be sent. - */ - crashing_cpu = smp_processor_id(); - crash_save_cpu(regs, crashing_cpu); - crash_kexec_prepare_cpus(crashing_cpu); - cpu_set(crashing_cpu, cpus_in_crash); - crash_kexec_stop_spus(); if (ppc_md.kexec_cpu_down) ppc_md.kexec_cpu_down(1, 0); } |
