diff options
Diffstat (limited to 'arch/powerpc/kernel/crash.c')
| -rw-r--r-- | arch/powerpc/kernel/crash.c | 329 | 
1 files changed, 125 insertions, 204 deletions
diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index 832c8c4db25..51dbace3269 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -10,84 +10,84 @@   *   */ -#undef DEBUG -  #include <linux/kernel.h>  #include <linux/smp.h>  #include <linux/reboot.h>  #include <linux/kexec.h> -#include <linux/bootmem.h> +#include <linux/export.h>  #include <linux/crash_dump.h>  #include <linux/delay.h> -#include <linux/elf.h> -#include <linux/elfcore.h> -#include <linux/init.h>  #include <linux/irq.h>  #include <linux/types.h> -#include <linux/memblock.h>  #include <asm/processor.h>  #include <asm/machdep.h>  #include <asm/kexec.h>  #include <asm/kdump.h>  #include <asm/prom.h> -#include <asm/firmware.h>  #include <asm/smp.h> -#include <asm/system.h>  #include <asm/setjmp.h> +#include <asm/debug.h> -#ifdef DEBUG -#include <asm/udbg.h> -#define DBG(fmt...) udbg_printf(fmt) -#else -#define DBG(fmt...) -#endif +/* + * The primary CPU waits a while for all secondary CPUs to enter. This is to + * avoid sending an IPI if the secondary CPUs are entering + * crash_kexec_secondary on their own (eg via a system reset). + * + * The secondary timeout has to be longer than the primary. Both timeouts are + * in milliseconds. + */ +#define PRIMARY_TIMEOUT		500 +#define SECONDARY_TIMEOUT	1000 + +#define IPI_TIMEOUT		10000 +#define REAL_MODE_TIMEOUT	10000 -/* This keeps a track of which one is crashing cpu. */ +/* This keeps a track of which one is the crashing cpu. */  int crashing_cpu = -1; -static cpumask_t cpus_in_crash = CPU_MASK_NONE; -cpumask_t cpus_in_sr = CPU_MASK_NONE; +static int time_to_dump; -#define CRASH_HANDLER_MAX 2 +#define CRASH_HANDLER_MAX 3  /* NULL terminated list of shutdown handles */  static crash_shutdown_t crash_shutdown_handles[CRASH_HANDLER_MAX+1];  static DEFINE_SPINLOCK(crash_handlers_lock); +static unsigned long crash_shutdown_buf[JMP_BUF_LEN]; +static int crash_shutdown_cpu = -1; + +static int handle_fault(struct pt_regs *regs) +{ +	if (crash_shutdown_cpu == smp_processor_id()) +		longjmp(crash_shutdown_buf, 1); +	return 0; +} +  #ifdef CONFIG_SMP -static atomic_t enter_on_soft_reset = ATOMIC_INIT(0); +static atomic_t cpus_in_crash;  void crash_ipi_callback(struct pt_regs *regs)  { +	static cpumask_t cpus_state_saved = CPU_MASK_NONE; +  	int cpu = smp_processor_id();  	if (!cpu_online(cpu))  		return;  	hard_irq_disable(); -	if (!cpu_isset(cpu, cpus_in_crash)) +	if (!cpumask_test_cpu(cpu, &cpus_state_saved)) {  		crash_save_cpu(regs, cpu); -	cpu_set(cpu, cpus_in_crash); - -	/* -	 * Entered via soft-reset - could be the kdump -	 * process is invoked using soft-reset or user activated -	 * it if some CPU did not respond to an IPI. -	 * For soft-reset, the secondary CPU can enter this func -	 * twice. 1 - using IPI, and 2. soft-reset. -	 * Tell the kexec CPU that entered via soft-reset and ready -	 * to go down. -	 */ -	if (cpu_isset(cpu, cpus_in_sr)) { -		cpu_clear(cpu, cpus_in_sr); -		atomic_inc(&enter_on_soft_reset); +		cpumask_set_cpu(cpu, &cpus_state_saved);  	} +	atomic_inc(&cpus_in_crash); +	smp_mb__after_atomic(); +  	/*  	 * Starting the kdump boot.  	 * This barrier is needed to make sure that all CPUs are stopped. -	 * If not, soft-reset will be invoked to bring other CPUs.  	 */ -	while (!cpu_isset(crashing_cpu, cpus_in_crash)) +	while (!time_to_dump)  		cpu_relax();  	if (ppc_md.kexec_cpu_down) @@ -102,142 +102,108 @@ void crash_ipi_callback(struct pt_regs *regs)  	/* NOTREACHED */  } -/* - * Wait until all CPUs are entered via soft-reset. - */ -static void crash_soft_reset_check(int cpu) -{ -	unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */ - -	cpu_clear(cpu, cpus_in_sr); -	while (atomic_read(&enter_on_soft_reset) != ncpus) -		cpu_relax(); -} - -  static void crash_kexec_prepare_cpus(int cpu)  {  	unsigned int msecs; -  	unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */ +	int tries = 0; +	int (*old_handler)(struct pt_regs *regs); + +	printk(KERN_EMERG "Sending IPI to other CPUs\n");  	crash_send_ipi(crash_ipi_callback);  	smp_wmb(); +again:  	/* -	 * FIXME: Until we will have the way to stop other CPUSs reliabally, +	 * FIXME: Until we will have the way to stop other CPUs reliably,  	 * the crash CPU will send an IPI and wait for other CPUs to  	 * respond. -	 * Delay of at least 10 seconds.  	 */ -	printk(KERN_EMERG "Sending IPI to other cpus...\n"); -	msecs = 10000; -	while ((cpus_weight(cpus_in_crash) < ncpus) && (--msecs > 0)) { -		cpu_relax(); +	msecs = IPI_TIMEOUT; +	while ((atomic_read(&cpus_in_crash) < ncpus) && (--msecs > 0))  		mdelay(1); -	}  	/* Would it be better to replace the trap vector here? */ +	if (atomic_read(&cpus_in_crash) >= ncpus) { +		printk(KERN_EMERG "IPI complete\n"); +		return; +	} + +	printk(KERN_EMERG "ERROR: %d cpu(s) not responding\n", +		ncpus - atomic_read(&cpus_in_crash)); +  	/* -	 * FIXME: In case if we do not get all CPUs, one possibility: ask the -	 * user to do soft reset such that we get all. -	 * Soft-reset will be used until better mechanism is implemented. +	 * If we have a panic timeout set then we can't wait indefinitely +	 * for someone to activate system reset. We also give up on the +	 * second time through if system reset fail to work.  	 */ -	if (cpus_weight(cpus_in_crash) < ncpus) { -		printk(KERN_EMERG "done waiting: %d cpu(s) not responding\n", -			ncpus - cpus_weight(cpus_in_crash)); -		printk(KERN_EMERG "Activate soft-reset to stop other cpu(s)\n"); -		cpus_in_sr = CPU_MASK_NONE; -		atomic_set(&enter_on_soft_reset, 0); -		while (cpus_weight(cpus_in_crash) < ncpus) -			cpu_relax(); -	} +	if ((panic_timeout > 0) || (tries > 0)) +		return; +  	/* -	 * Make sure all CPUs are entered via soft-reset if the kdump is -	 * invoked using soft-reset. +	 * A system reset will cause all CPUs to take an 0x100 exception. +	 * The primary CPU returns here via setjmp, and the secondary +	 * CPUs reexecute the crash_kexec_secondary path.  	 */ -	if (cpu_isset(cpu, cpus_in_sr)) -		crash_soft_reset_check(cpu); -	/* Leave the IPI callback set */ -} +	old_handler = __debugger; +	__debugger = handle_fault; +	crash_shutdown_cpu = smp_processor_id(); -/* wait for all the CPUs to hit real mode but timeout if they don't come in */ -#ifdef CONFIG_PPC_STD_MMU_64 -static void crash_kexec_wait_realmode(int cpu) -{ -	unsigned int msecs; -	int i; +	if (setjmp(crash_shutdown_buf) == 0) { +		printk(KERN_EMERG "Activate system reset (dumprestart) " +				  "to stop other cpu(s)\n"); -	msecs = 10000; -	for (i=0; i < NR_CPUS && msecs > 0; i++) { -		if (i == cpu) -			continue; +		/* +		 * A system reset will force all CPUs to execute the +		 * crash code again. We need to reset cpus_in_crash so we +		 * wait for everyone to do this. +		 */ +		atomic_set(&cpus_in_crash, 0); +		smp_mb(); -		while (paca[i].kexec_state < KEXEC_STATE_REAL_MODE) { -			barrier(); -			if (!cpu_possible(i)) { -				break; -			} -			if (!cpu_online(i)) { -				break; -			} -			msecs--; -			mdelay(1); -		} +		while (atomic_read(&cpus_in_crash) < ncpus) +			cpu_relax();  	} -	mb(); + +	crash_shutdown_cpu = -1; +	__debugger = old_handler; + +	tries++; +	goto again;  } -#endif  /* - * This function will be called by secondary cpus or by kexec cpu - * if soft-reset is activated to stop some CPUs. + * This function will be called by secondary cpus.   */  void crash_kexec_secondary(struct pt_regs *regs)  { -	int cpu = smp_processor_id();  	unsigned long flags; -	int msecs = 5; +	int msecs = SECONDARY_TIMEOUT;  	local_irq_save(flags); -	/* Wait 5ms if the kexec CPU is not entered yet. */ + +	/* Wait for the primary crash CPU to signal its progress */  	while (crashing_cpu < 0) {  		if (--msecs < 0) { -			/* -			 * Either kdump image is not loaded or -			 * kdump process is not started - Probably xmon -			 * exited using 'x'(exit and recover) or -			 * kexec_should_crash() failed for all running tasks. -			 */ -			cpu_clear(cpu, cpus_in_sr); +			/* No response, kdump image may not have been loaded */  			local_irq_restore(flags);  			return;  		} +  		mdelay(1); -		cpu_relax(); -	} -	if (cpu == crashing_cpu) { -		/* -		 * Panic CPU will enter this func only via soft-reset. -		 * Wait until all secondary CPUs entered and -		 * then start kexec boot. -		 */ -		crash_soft_reset_check(cpu); -		cpu_set(crashing_cpu, cpus_in_crash); -		if (ppc_md.kexec_cpu_down) -			ppc_md.kexec_cpu_down(1, 0); -		machine_kexec(kexec_crash_image); -		/* NOTREACHED */  	} +  	crash_ipi_callback(regs);  } -#else +#else	/* ! CONFIG_SMP */ +  static void crash_kexec_prepare_cpus(int cpu)  {  	/* -	 * move the secondarys to us so that we can copy +	 * move the secondaries to us so that we can copy  	 * the new kernel 0-0x100 safely  	 *  	 * do this if kexec in setup.c ? @@ -251,75 +217,34 @@ static void crash_kexec_prepare_cpus(int cpu)  void crash_kexec_secondary(struct pt_regs *regs)  { -	cpus_in_sr = CPU_MASK_NONE;  } -#endif -#ifdef CONFIG_SPU_BASE - -#include <asm/spu.h> -#include <asm/spu_priv1.h> - -struct crash_spu_info { -	struct spu *spu; -	u32 saved_spu_runcntl_RW; -	u32 saved_spu_status_R; -	u32 saved_spu_npc_RW; -	u64 saved_mfc_sr1_RW; -	u64 saved_mfc_dar; -	u64 saved_mfc_dsisr; -}; +#endif	/* CONFIG_SMP */ -#define CRASH_NUM_SPUS	16	/* Enough for current hardware */ -static struct crash_spu_info crash_spu_info[CRASH_NUM_SPUS]; - -static void crash_kexec_stop_spus(void) +/* wait for all the CPUs to hit real mode but timeout if they don't come in */ +#if defined(CONFIG_SMP) && defined(CONFIG_PPC_STD_MMU_64) +static void crash_kexec_wait_realmode(int cpu)  { -	struct spu *spu; +	unsigned int msecs;  	int i; -	u64 tmp; - -	for (i = 0; i < CRASH_NUM_SPUS; i++) { -		if (!crash_spu_info[i].spu) -			continue; - -		spu = crash_spu_info[i].spu; - -		crash_spu_info[i].saved_spu_runcntl_RW = -			in_be32(&spu->problem->spu_runcntl_RW); -		crash_spu_info[i].saved_spu_status_R = -			in_be32(&spu->problem->spu_status_R); -		crash_spu_info[i].saved_spu_npc_RW = -			in_be32(&spu->problem->spu_npc_RW); -		crash_spu_info[i].saved_mfc_dar    = spu_mfc_dar_get(spu); -		crash_spu_info[i].saved_mfc_dsisr  = spu_mfc_dsisr_get(spu); -		tmp = spu_mfc_sr1_get(spu); -		crash_spu_info[i].saved_mfc_sr1_RW = tmp; - -		tmp &= ~MFC_STATE1_MASTER_RUN_CONTROL_MASK; -		spu_mfc_sr1_set(spu, tmp); - -		__delay(200); -	} -} - -void crash_register_spus(struct list_head *list) -{ -	struct spu *spu; - -	list_for_each_entry(spu, list, full_list) { -		if (WARN_ON(spu->number >= CRASH_NUM_SPUS)) +	msecs = REAL_MODE_TIMEOUT; +	for (i=0; i < nr_cpu_ids && msecs > 0; i++) { +		if (i == cpu)  			continue; -		crash_spu_info[spu->number].spu = spu; +		while (paca[i].kexec_state < KEXEC_STATE_REAL_MODE) { +			barrier(); +			if (!cpu_possible(i) || !cpu_online(i) || (msecs <= 0)) +				break; +			msecs--; +			mdelay(1); +		}  	} +	mb();  } -  #else -static inline void crash_kexec_stop_spus(void) -{ -} -#endif /* CONFIG_SPU_BASE */ +static inline void crash_kexec_wait_realmode(int cpu) {} +#endif	/* CONFIG_SMP && CONFIG_PPC_STD_MMU_64 */  /*   * Register a function to be called on shutdown.  Only use this if you @@ -374,22 +299,11 @@ int crash_shutdown_unregister(crash_shutdown_t handler)  }  EXPORT_SYMBOL(crash_shutdown_unregister); -static unsigned long crash_shutdown_buf[JMP_BUF_LEN]; -static int crash_shutdown_cpu = -1; - -static int handle_fault(struct pt_regs *regs) -{ -	if (crash_shutdown_cpu == smp_processor_id()) -		longjmp(crash_shutdown_buf, 1); -	return 0; -} -  void default_machine_crash_shutdown(struct pt_regs *regs)  {  	unsigned int i;  	int (*old_handler)(struct pt_regs *regs); -  	/*  	 * This function is only called after the system  	 * has panicked or is otherwise in a critical state. @@ -407,17 +321,26 @@ void default_machine_crash_shutdown(struct pt_regs *regs)  	 * such that another IPI will not be sent.  	 */  	crashing_cpu = smp_processor_id(); -	crash_save_cpu(regs, crashing_cpu); + +	/* +	 * If we came in via system reset, wait a while for the secondary +	 * CPUs to enter. +	 */ +	if (TRAP(regs) == 0x100) +		mdelay(PRIMARY_TIMEOUT); +  	crash_kexec_prepare_cpus(crashing_cpu); -	cpu_set(crashing_cpu, cpus_in_crash); -#if defined(CONFIG_PPC_STD_MMU_64) && defined(CONFIG_SMP) + +	crash_save_cpu(regs, crashing_cpu); + +	time_to_dump = 1; +  	crash_kexec_wait_realmode(crashing_cpu); -#endif  	machine_kexec_mask_interrupts();  	/* -	 * Call registered shutdown routines savely.  Swap out +	 * Call registered shutdown routines safely.  Swap out  	 * __debugger_fault_handler, and replace on exit.  	 */  	old_handler = __debugger_fault_handler; @@ -439,8 +362,6 @@ void default_machine_crash_shutdown(struct pt_regs *regs)  	crash_shutdown_cpu = -1;  	__debugger_fault_handler = old_handler; -	crash_kexec_stop_spus(); -  	if (ppc_md.kexec_cpu_down)  		ppc_md.kexec_cpu_down(1, 0);  }  | 
