1 files changed, 138 insertions, 188 deletions
diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c
index 6f4613dd05e..51dbace3269 100644
--- a/arch/powerpc/kernel/crash.c
+++ b/arch/powerpc/kernel/crash.c
@@ -10,84 +10,84 @@
  *
  */
 
-#undef DEBUG
-
 #include <linux/kernel.h>
 #include <linux/smp.h>
 #include <linux/reboot.h>
 #include <linux/kexec.h>
-#include <linux/bootmem.h>
+#include <linux/export.h>
 #include <linux/crash_dump.h>
 #include <linux/delay.h>
-#include <linux/elf.h>
-#include <linux/elfcore.h>
-#include <linux/init.h>
 #include <linux/irq.h>
 #include <linux/types.h>
-#include <linux/lmb.h>
 
 #include <asm/processor.h>
 #include <asm/machdep.h>
 #include <asm/kexec.h>
 #include <asm/kdump.h>
 #include <asm/prom.h>
-#include <asm/firmware.h>
 #include <asm/smp.h>
-#include <asm/system.h>
 #include <asm/setjmp.h>
+#include <asm/debug.h>
 
-#ifdef DEBUG
-#include <asm/udbg.h>
-#define DBG(fmt...) udbg_printf(fmt)
-#else
-#define DBG(fmt...)
-#endif
+/*
+ * The primary CPU waits a while for all secondary CPUs to enter. This is to
+ * avoid sending an IPI if the secondary CPUs are entering
+ * crash_kexec_secondary on their own (eg via a system reset).
+ *
+ * The secondary timeout has to be longer than the primary. Both timeouts are
+ * in milliseconds.
+ */
+#define PRIMARY_TIMEOUT		500
+#define SECONDARY_TIMEOUT	1000
+
+#define IPI_TIMEOUT		10000
+#define REAL_MODE_TIMEOUT	10000
 
-/* This keeps a track of which one is crashing cpu. */
+/* This keeps a track of which one is the crashing cpu. */
 int crashing_cpu = -1;
-static cpumask_t cpus_in_crash = CPU_MASK_NONE;
-cpumask_t cpus_in_sr = CPU_MASK_NONE;
+static int time_to_dump;
 
-#define CRASH_HANDLER_MAX 2
+#define CRASH_HANDLER_MAX 3
 /* NULL terminated list of shutdown handles */
 static crash_shutdown_t crash_shutdown_handles[CRASH_HANDLER_MAX+1];
 static DEFINE_SPINLOCK(crash_handlers_lock);
 
+static unsigned long crash_shutdown_buf[JMP_BUF_LEN];
+static int crash_shutdown_cpu = -1;
+
+static int handle_fault(struct pt_regs *regs)
+{
+	if (crash_shutdown_cpu == smp_processor_id())
+		longjmp(crash_shutdown_buf, 1);
+	return 0;
+}
+
 #ifdef CONFIG_SMP
-static atomic_t enter_on_soft_reset = ATOMIC_INIT(0);
 
+static atomic_t cpus_in_crash;
 void crash_ipi_callback(struct pt_regs *regs)
 {
+	static cpumask_t cpus_state_saved = CPU_MASK_NONE;
+
 	int cpu = smp_processor_id();
 
 	if (!cpu_online(cpu))
 		return;
 
 	hard_irq_disable();
-	if (!cpu_isset(cpu, cpus_in_crash))
+	if (!cpumask_test_cpu(cpu, &cpus_state_saved)) {
 		crash_save_cpu(regs, cpu);
-	cpu_set(cpu, cpus_in_crash);
-
-	/*
-	 * Entered via soft-reset - could be the kdump
-	 * process is invoked using soft-reset or user activated
-	 * it if some CPU did not respond to an IPI.
-	 * For soft-reset, the secondary CPU can enter this func
-	 * twice. 1 - using IPI, and 2. soft-reset.
-	 * Tell the kexec CPU that entered via soft-reset and ready
-	 * to go down.
-	 */
-	if (cpu_isset(cpu, cpus_in_sr)) {
-		cpu_clear(cpu, cpus_in_sr);
-		atomic_inc(&enter_on_soft_reset);
+		cpumask_set_cpu(cpu, &cpus_state_saved);
 	}
 
+	atomic_inc(&cpus_in_crash);
+	smp_mb__after_atomic();
+
 	/*
 	 * Starting the kdump boot.
 	 * This barrier is needed to make sure that all CPUs are stopped.
-	 * If not, soft-reset will be invoked to bring other CPUs.
 	 */
-	while (!cpu_isset(crashing_cpu, cpus_in_crash))
+	while (!time_to_dump)
 		cpu_relax();
 
 	if (ppc_md.kexec_cpu_down)
@@ -102,114 +102,108 @@ void crash_ipi_callback(struct pt_regs *regs)
 	/* NOTREACHED */
 }
 
-/*
- * Wait until all CPUs are entered via soft-reset.
- */
-static void crash_soft_reset_check(int cpu)
-{
-	unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */
-
-	cpu_clear(cpu, cpus_in_sr);
-	while (atomic_read(&enter_on_soft_reset) != ncpus)
-		cpu_relax();
-}
-
-
 static void crash_kexec_prepare_cpus(int cpu)
 {
 	unsigned int msecs;
-
 	unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */
+	int tries = 0;
+	int (*old_handler)(struct pt_regs *regs);
+
+	printk(KERN_EMERG "Sending IPI to other CPUs\n");
 
 	crash_send_ipi(crash_ipi_callback);
 	smp_wmb();
 
+again:
 	/*
-	 * FIXME: Until we will have the way to stop other CPUSs reliabally,
+	 * FIXME: Until we will have the way to stop other CPUs reliably,
 	 * the crash CPU will send an IPI and wait for other CPUs to
 	 * respond.
-	 * Delay of at least 10 seconds.
 	 */
-	printk(KERN_EMERG "Sending IPI to other cpus...\n");
-	msecs = 10000;
-	while ((cpus_weight(cpus_in_crash) < ncpus) && (--msecs > 0)) {
-		cpu_relax();
+	msecs = IPI_TIMEOUT;
+	while ((atomic_read(&cpus_in_crash) < ncpus) && (--msecs > 0))
 		mdelay(1);
-	}
 
 	/* Would it be better to replace the trap vector here? */
 
+	if (atomic_read(&cpus_in_crash) >= ncpus) {
+		printk(KERN_EMERG "IPI complete\n");
+		return;
+	}
+
+	printk(KERN_EMERG "ERROR: %d cpu(s) not responding\n",
+		ncpus - atomic_read(&cpus_in_crash));
+
 	/*
-	 * FIXME: In case if we do not get all CPUs, one possibility: ask the
-	 * user to do soft reset such that we get all.
-	 * Soft-reset will be used until better mechanism is implemented.
+	 * If we have a panic timeout set then we can't wait indefinitely
+	 * for someone to activate system reset. We also give up on the
+	 * second time through if system reset fail to work.
 	 */
-	if (cpus_weight(cpus_in_crash) < ncpus) {
-		printk(KERN_EMERG "done waiting: %d cpu(s) not responding\n",
-			ncpus - cpus_weight(cpus_in_crash));
-		printk(KERN_EMERG "Activate soft-reset to stop other cpu(s)\n");
-		cpus_in_sr = CPU_MASK_NONE;
-		atomic_set(&enter_on_soft_reset, 0);
-		while (cpus_weight(cpus_in_crash) < ncpus)
-			cpu_relax();
-	}
+	if ((panic_timeout > 0) || (tries > 0))
+		return;
+
 	/*
-	 * Make sure all CPUs are entered via soft-reset if the kdump is
-	 * invoked using soft-reset.
+	 * A system reset will cause all CPUs to take an 0x100 exception.
+	 * The primary CPU returns here via setjmp, and the secondary
+	 * CPUs reexecute the crash_kexec_secondary path.
 	 */
-	if (cpu_isset(cpu, cpus_in_sr))
-		crash_soft_reset_check(cpu);
-	/* Leave the IPI callback set */
+	old_handler = __debugger;
+	__debugger = handle_fault;
+	crash_shutdown_cpu = smp_processor_id();
+
+	if (setjmp(crash_shutdown_buf) == 0) {
+		printk(KERN_EMERG "Activate system reset (dumprestart) "
+				  "to stop other cpu(s)\n");
+
+		/*
+		 * A system reset will force all CPUs to execute the
+		 * crash code again. We need to reset cpus_in_crash so we
+		 * wait for everyone to do this.
+		 */
+		atomic_set(&cpus_in_crash, 0);
+		smp_mb();
+
+		while (atomic_read(&cpus_in_crash) < ncpus)
+			cpu_relax();
+	}
+
+	crash_shutdown_cpu = -1;
+	__debugger = old_handler;
+
+	tries++;
+	goto again;
 }
 
 /*
- * This function will be called by secondary cpus or by kexec cpu
- * if soft-reset is activated to stop some CPUs.
+ * This function will be called by secondary cpus.
  */
 void crash_kexec_secondary(struct pt_regs *regs)
 {
-	int cpu = smp_processor_id();
 	unsigned long flags;
-	int msecs = 5;
+	int msecs = SECONDARY_TIMEOUT;
 
 	local_irq_save(flags);
-	/* Wait 5ms if the kexec CPU is not entered yet. */
+
+	/* Wait for the primary crash CPU to signal its progress */
 	while (crashing_cpu < 0) {
 		if (--msecs < 0) {
-			/*
-			 * Either kdump image is not loaded or
-			 * kdump process is not started - Probably xmon
-			 * exited using 'x'(exit and recover) or
-			 * kexec_should_crash() failed for all running tasks.
-			 */
-			cpu_clear(cpu, cpus_in_sr);
+			/* No response, kdump image may not have been loaded */
 			local_irq_restore(flags);
 			return;
 		}
+
 		mdelay(1);
-		cpu_relax();
-	}
-	if (cpu == crashing_cpu) {
-		/*
-		 * Panic CPU will enter this func only via soft-reset.
-		 * Wait until all secondary CPUs entered and
-		 * then start kexec boot.
-		 */
-		crash_soft_reset_check(cpu);
-		cpu_set(crashing_cpu, cpus_in_crash);
-		if (ppc_md.kexec_cpu_down)
-			ppc_md.kexec_cpu_down(1, 0);
-		machine_kexec(kexec_crash_image);
-		/* NOTREACHED */
 	}
+
 	crash_ipi_callback(regs);
 }
 
-#else
+#else	/* ! CONFIG_SMP */
+
 static void crash_kexec_prepare_cpus(int cpu)
 {
 	/*
-	 * move the secondarys to us so that we can copy
+	 * move the secondaries to us so that we can copy
 	 * the new kernel 0-0x100 safely
 	 *
 	 * do this if kexec in setup.c ?
@@ -223,75 +217,34 @@ static void crash_kexec_prepare_cpus(int cpu)
 
 void crash_kexec_secondary(struct pt_regs *regs)
 {
-	cpus_in_sr = CPU_MASK_NONE;
 }
-#endif
-#ifdef CONFIG_SPU_BASE
+#endif	/* CONFIG_SMP */
 
-#include <asm/spu.h>
-#include <asm/spu_priv1.h>
-
-struct crash_spu_info {
-	struct spu *spu;
-	u32 saved_spu_runcntl_RW;
-	u32 saved_spu_status_R;
-	u32 saved_spu_npc_RW;
-	u64 saved_mfc_sr1_RW;
-	u64 saved_mfc_dar;
-	u64 saved_mfc_dsisr;
-};
-
-#define CRASH_NUM_SPUS	16	/* Enough for current hardware */
-static struct crash_spu_info crash_spu_info[CRASH_NUM_SPUS];
-
-static void crash_kexec_stop_spus(void)
+/* wait for all the CPUs to hit real mode but timeout if they don't come in */
+#if defined(CONFIG_SMP) && defined(CONFIG_PPC_STD_MMU_64)
+static void crash_kexec_wait_realmode(int cpu)
 {
-	struct spu *spu;
+	unsigned int msecs;
 	int i;
-	u64 tmp;
-
-	for (i = 0; i < CRASH_NUM_SPUS; i++) {
-		if (!crash_spu_info[i].spu)
-			continue;
-
-		spu = crash_spu_info[i].spu;
 
-		crash_spu_info[i].saved_spu_runcntl_RW =
-			in_be32(&spu->problem->spu_runcntl_RW);
-		crash_spu_info[i].saved_spu_status_R =
-			in_be32(&spu->problem->spu_status_R);
-		crash_spu_info[i].saved_spu_npc_RW =
-			in_be32(&spu->problem->spu_npc_RW);
-
-		crash_spu_info[i].saved_mfc_dar    = spu_mfc_dar_get(spu);
-		crash_spu_info[i].saved_mfc_dsisr  = spu_mfc_dsisr_get(spu);
-		tmp = spu_mfc_sr1_get(spu);
-		crash_spu_info[i].saved_mfc_sr1_RW = tmp;
-
-		tmp &= ~MFC_STATE1_MASTER_RUN_CONTROL_MASK;
-		spu_mfc_sr1_set(spu, tmp);
-
-		__delay(200);
-	}
-}
-
-void crash_register_spus(struct list_head *list)
-{
-	struct spu *spu;
-
-	list_for_each_entry(spu, list, full_list) {
-		if (WARN_ON(spu->number >= CRASH_NUM_SPUS))
+	msecs = REAL_MODE_TIMEOUT;
+	for (i=0; i < nr_cpu_ids && msecs > 0; i++) {
+		if (i == cpu)
 			continue;
 
-		crash_spu_info[spu->number].spu = spu;
+		while (paca[i].kexec_state < KEXEC_STATE_REAL_MODE) {
+			barrier();
+			if (!cpu_possible(i) || !cpu_online(i) || (msecs <= 0))
+				break;
+			msecs--;
+			mdelay(1);
+		}
 	}
+	mb();
 }
-
 #else
-static inline void crash_kexec_stop_spus(void)
-{
-}
-#endif /* CONFIG_SPU_BASE */
+static inline void crash_kexec_wait_realmode(int cpu) {}
+#endif	/* CONFIG_SMP && CONFIG_PPC_STD_MMU_64 */
 
 /*
  * Register a function to be called on shutdown.  Only use this if you
@@ -346,20 +299,11 @@ int crash_shutdown_unregister(crash_shutdown_t handler)
 }
 EXPORT_SYMBOL(crash_shutdown_unregister);
 
-static unsigned long crash_shutdown_buf[JMP_BUF_LEN];
-
-static int handle_fault(struct pt_regs *regs)
-{
-	longjmp(crash_shutdown_buf, 1);
-	return 0;
-}
-
 void default_machine_crash_shutdown(struct pt_regs *regs)
 {
 	unsigned int i;
 	int (*old_handler)(struct pt_regs *regs);
 
-
 	/*
 	 * This function is only called after the system
 	 * has panicked or is otherwise in a critical state.
@@ -372,22 +316,36 @@ void default_machine_crash_shutdown(struct pt_regs *regs)
 	 */
 	hard_irq_disable();
 
-	for_each_irq(i) {
-		struct irq_desc *desc = irq_to_desc(i);
+	/*
+	 * Make a note of crashing cpu. Will be used in machine_kexec
+	 * such that another IPI will not be sent.
+	 */
+	crashing_cpu = smp_processor_id();
 
-		if (desc->status & IRQ_INPROGRESS)
-			desc->chip->eoi(i);
+	/*
+	 * If we came in via system reset, wait a while for the secondary
+	 * CPUs to enter.
+	 */
+	if (TRAP(regs) == 0x100)
+		mdelay(PRIMARY_TIMEOUT);
 
-		if (!(desc->status & IRQ_DISABLED))
-			desc->chip->disable(i);
-	}
+	crash_kexec_prepare_cpus(crashing_cpu);
+
+	crash_save_cpu(regs, crashing_cpu);
+
+	time_to_dump = 1;
+
+	crash_kexec_wait_realmode(crashing_cpu);
+
+	machine_kexec_mask_interrupts();
 
 	/*
-	 * Call registered shutdown routines savely.  Swap out
+	 * Call registered shutdown routines safely.  Swap out
 	 * __debugger_fault_handler, and replace on exit.
 	 */
 	old_handler = __debugger_fault_handler;
 	__debugger_fault_handler = handle_fault;
+	crash_shutdown_cpu = smp_processor_id();
 	for (i = 0; crash_shutdown_handles[i]; i++) {
 		if (setjmp(crash_shutdown_buf) == 0) {
 			/*
@@ -401,17 +359,9 @@ void default_machine_crash_shutdown(struct pt_regs *regs)
 			asm volatile("sync; isync");
 		}
 	}
+	crash_shutdown_cpu = -1;
 	__debugger_fault_handler = old_handler;
 
-	/*
-	 * Make a note of crashing cpu. Will be used in machine_kexec
-	 * such that another IPI will not be sent.
-	 */
-	crashing_cpu = smp_processor_id();
-	crash_save_cpu(regs, crashing_cpu);
-	crash_kexec_prepare_cpus(crashing_cpu);
-	cpu_set(crashing_cpu, cpus_in_crash);
-	crash_kexec_stop_spus();
 	if (ppc_md.kexec_cpu_down)
 		ppc_md.kexec_cpu_down(1, 0);
 }