aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZou Nan hai <nanhai.zou@intel.com>2006-12-07 09:51:35 -0800
committerTony Luck <tony.luck@intel.com>2006-12-07 09:51:35 -0800
commita79561134f38de12dce14ed72138f38e55ef53fc (patch)
treeabe109dbe85e5b0085ba9b9a7eed7cc623d67eec
parent620034c84d1d939717bdfbe02c51a3fee43541c3 (diff)
[IA64] IA64 Kexec/kdump
Changes and updates. 1. Remove fake rendz path and related code according to discuss with Khalid Aziz. 2. fc.i offset fix in relocate_kernel.S. 3. iospic shutdown code eoi and mask race fix from Fujitsu. 4. Warm boot hook in machine_kexec to SN SAL code from Jack Steiner. 5. Send slave to SAL slave loop patch from Jay Lan. 6. Kdump on non-recoverable MCA event patch from Jay Lan 7. Use CTL_UNNUMBERED in kdump_on_init sysctl. Signed-off-by: Zou Nan hai <nanhai.zou@intel.com> Signed-off-by: Tony Luck <tony.luck@intel.com>
-rw-r--r--arch/ia64/Kconfig23
-rw-r--r--arch/ia64/kernel/Makefile1
-rw-r--r--arch/ia64/kernel/crash.c245
-rw-r--r--arch/ia64/kernel/efi.c65
-rw-r--r--arch/ia64/kernel/entry.S2
-rw-r--r--arch/ia64/kernel/iosapic.c21
-rw-r--r--arch/ia64/kernel/machine_kexec.c133
-rw-r--r--arch/ia64/kernel/mca.c5
-rw-r--r--arch/ia64/kernel/relocate_kernel.S334
-rw-r--r--arch/ia64/kernel/setup.c38
-rw-r--r--arch/ia64/kernel/smp.c28
-rw-r--r--arch/ia64/sn/kernel/setup.c8
-rw-r--r--include/asm-ia64/kexec.h47
-rw-r--r--include/asm-ia64/machvec.h5
-rw-r--r--include/asm-ia64/machvec_sn2.h2
-rw-r--r--include/asm-ia64/meminit.h3
-rw-r--r--include/asm-ia64/sn/sn_sal.h9
-rw-r--r--include/linux/kexec.h5
-rw-r--r--kernel/kexec.c1
19 files changed, 969 insertions, 6 deletions
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 683b12c6f76..75d839715b2 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -434,6 +434,29 @@ config IA64_ESI
source "drivers/sn/Kconfig"
+config KEXEC
+ bool "kexec system call (EXPERIMENTAL)"
+ depends on EXPERIMENTAL && !IA64_HP_SIM && (!SMP || HOTPLUG_CPU)
+ help
+ kexec is a system call that implements the ability to shutdown your
+ current kernel, and to start another kernel. It is like a reboot
+ but it is indepedent of the system firmware. And like a reboot
+ you can start any kernel with it, not just Linux.
+
+ The name comes from the similiarity to the exec system call.
+
+ It is an ongoing process to be certain the hardware in a machine
+ is properly shutdown, so do not be surprised if this code does not
+ initially work for you. It may help to enable device hotplugging
+ support. As of this writing the exact hardware interface is
+ strongly in flux, so no good recommendation can be made.
+
+config CRASH_DUMP
+ bool "kernel crash dumps (EXPERIMENTAL)"
+ depends on EXPERIMENTAL && IA64_MCA_RECOVERY && !IA64_HP_SIM && (!SMP || HOTPLUG_CPU)
+ help
+ Generate crash dump after being started by kexec.
+
source "drivers/firmware/Kconfig"
source "fs/Kconfig.binfmt"
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
index cfa099b04cd..8ae384eb535 100644
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@ -28,6 +28,7 @@ obj-$(CONFIG_IA64_CYCLONE) += cyclone.o
obj-$(CONFIG_CPU_FREQ) += cpufreq/
obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o
obj-$(CONFIG_KPROBES) += kprobes.o jprobes.o
+obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o
obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR) += uncached.o
obj-$(CONFIG_AUDIT) += audit.o
obj-$(CONFIG_PCI_MSI) += msi_ia64.o
diff --git a/arch/ia64/kernel/crash.c b/arch/ia64/kernel/crash.c
new file mode 100644
index 00000000000..0aabedf95da
--- /dev/null
+++ b/arch/ia64/kernel/crash.c
@@ -0,0 +1,245 @@
+/*
+ * arch/ia64/kernel/crash.c
+ *
+ * Architecture specific (ia64) functions for kexec based crash dumps.
+ *
+ * Created by: Khalid Aziz <khalid.aziz@hp.com>
+ * Copyright (C) 2005 Hewlett-Packard Development Company, L.P.
+ * Copyright (C) 2005 Intel Corp Zou Nan hai <nanhai.zou@intel.com>
+ *
+ */
+#include <linux/smp.h>
+#include <linux/delay.h>
+#include <linux/crash_dump.h>
+#include <linux/bootmem.h>
+#include <linux/kexec.h>
+#include <linux/elfcore.h>
+#include <linux/sysctl.h>
+#include <linux/init.h>
+
+#include <asm/kdebug.h>
+#include <asm/mca.h>
+#include <asm/uaccess.h>
+
+int kdump_status[NR_CPUS];
+atomic_t kdump_cpu_freezed;
+atomic_t kdump_in_progress;
+int kdump_on_init = 1;
+ssize_t
+copy_oldmem_page(unsigned long pfn, char *buf,
+ size_t csize, unsigned long offset, int userbuf)
+{
+ void *vaddr;
+
+ if (!csize)
+ return 0;
+ vaddr = __va(pfn<<PAGE_SHIFT);
+ if (userbuf) {
+ if (copy_to_user(buf, (vaddr + offset), csize)) {
+ return -EFAULT;
+ }
+ } else
+ memcpy(buf, (vaddr + offset), csize);
+ return csize;
+}
+
+static inline Elf64_Word
+*append_elf_note(Elf64_Word *buf, char *name, unsigned type, void *data,
+ size_t data_len)
+{
+ struct elf_note *note = (struct elf_note *)buf;
+ note->n_namesz = strlen(name) + 1;
+ note->n_descsz = data_len;
+ note->n_type = type;
+ buf += (sizeof(*note) + 3)/4;
+ memcpy(buf, name, note->n_namesz);
+ buf += (note->n_namesz + 3)/4;
+ memcpy(buf, data, data_len);
+ buf += (data_len + 3)/4;
+ return buf;
+}
+
+static void
+final_note(void *buf)
+{
+ memset(buf, 0, sizeof(struct elf_note));
+}
+
+extern void ia64_dump_cpu_regs(void *);
+
+static DEFINE_PER_CPU(struct elf_prstatus, elf_prstatus);
+
+void
+crash_save_this_cpu()
+{
+ void *buf;
+ unsigned long cfm, sof, sol;
+
+ int cpu = smp_processor_id();
+ struct elf_prstatus *prstatus = &per_cpu(elf_prstatus, cpu);
+
+ elf_greg_t *dst = (elf_greg_t *)&(prstatus->pr_reg);
+ memset(prstatus, 0, sizeof(*prstatus));
+ prstatus->pr_pid = current->pid;
+
+ ia64_dump_cpu_regs(dst);
+ cfm = dst[43];
+ sol = (cfm >> 7) & 0x7f;
+ sof = cfm & 0x7f;
+ dst[46] = (unsigned long)ia64_rse_skip_regs((unsigned long *)dst[46],
+ sof - sol);
+
+ buf = (u64 *) per_cpu_ptr(crash_notes, cpu);
+ if (!buf)
+ return;
+ buf = append_elf_note(buf, "CORE", NT_PRSTATUS, prstatus,
+ sizeof(*prstatus));
+ final_note(buf);
+}
+
+static int
+kdump_wait_cpu_freeze(void)
+{
+ int cpu_num = num_online_cpus() - 1;
+ int timeout = 1000;
+ while(timeout-- > 0) {
+ if (atomic_read(&kdump_cpu_freezed) == cpu_num)
+ return 0;
+ udelay(1000);
+ }
+ return 1;
+}
+
+void
+machine_crash_shutdown(struct pt_regs *pt)
+{
+ /* This function is only called after the system
+ * has paniced or is otherwise in a critical state.
+ * The minimum amount of code to allow a kexec'd kernel
+ * to run successfully needs to happen here.
+ *
+ * In practice this means shooting down the other cpus in
+ * an SMP system.
+ */
+ kexec_disable_iosapic();
+#ifdef CONFIG_SMP
+ kdump_smp_send_stop();
+ if (kdump_wait_cpu_freeze() && kdump_on_init) {
+ //not all cpu response to IPI, send INIT to freeze them
+ kdump_smp_send_init();
+ }
+#endif
+}
+
+static void
+machine_kdump_on_init(void)
+{
+ local_irq_disable();
+ kexec_disable_iosapic();
+ machine_kexec(ia64_kimage);
+}
+
+void
+kdump_cpu_freeze(struct unw_frame_info *info, void *arg)
+{
+ int cpuid;
+ local_irq_disable();
+ cpuid = smp_processor_id();
+ crash_save_this_cpu();
+ current->thread.ksp = (__u64)info->sw - 16;
+ atomic_inc(&kdump_cpu_freezed);
+ kdump_status[cpuid] = 1;
+ mb();
+ if (cpuid == 0) {
+ for (;;)
+ cpu_relax();
+ } else
+ ia64_jump_to_sal(&sal_boot_rendez_state[cpuid]);
+}
+
+static int
+kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data)
+{
+ struct ia64_mca_notify_die *nd;
+ struct die_args *args = data;
+
+ if (!kdump_on_init)
+ return NOTIFY_DONE;
+
+ if (val != DIE_INIT_MONARCH_ENTER &&
+ val != DIE_INIT_SLAVE_ENTER &&
+ val != DIE_MCA_RENDZVOUS_LEAVE &&
+ val != DIE_MCA_MONARCH_LEAVE)
+ return NOTIFY_DONE;
+
+ nd = (struct ia64_mca_notify_die *)args->err;
+ /* Reason code 1 means machine check rendezous*/
+ if ((val == DIE_INIT_MONARCH_ENTER || DIE_INIT_SLAVE_ENTER) &&
+ nd->sos->rv_rc == 1)
+ return NOTIFY_DONE;
+
+ switch (val) {
+ case DIE_INIT_MONARCH_ENTER:
+ machine_kdump_on_init();
+ break;
+ case DIE_INIT_SLAVE_ENTER:
+ unw_init_running(kdump_cpu_freeze, NULL);
+ break;
+ case DIE_MCA_RENDZVOUS_LEAVE:
+ if (atomic_read(&kdump_in_progress))
+ unw_init_running(kdump_cpu_freeze, NULL);
+ break;
+ case DIE_MCA_MONARCH_LEAVE:
+ /* die_register->signr indicate if MCA is recoverable */
+ if (!args->signr)
+ machine_kdump_on_init();
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
+#ifdef CONFIG_SYSCTL
+static ctl_table kdump_on_init_table[] = {
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "kdump_on_init",
+ .data = &kdump_on_init,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ { .ctl_name = 0 }
+};
+
+static ctl_table sys_table[] = {
+ {
+ .ctl_name = CTL_KERN,
+ .procname = "kernel",
+ .mode = 0555,
+ .child = kdump_on_init_table,
+ },
+ { .ctl_name = 0 }
+};
+#endif
+
+static int
+machine_crash_setup(void)
+{
+ char *from = strstr(saved_command_line, "elfcorehdr=");
+ static struct notifier_block kdump_init_notifier_nb = {
+ .notifier_call = kdump_init_notifier,
+ };
+ int ret;
+ if (from)
+ elfcorehdr_addr = memparse(from+11, &from);
+ saved_max_pfn = (unsigned long)-1;
+ if((ret = register_die_notifier(&kdump_init_notifier_nb)) != 0)
+ return ret;
+#ifdef CONFIG_SYSCTL
+ register_sysctl_table(sys_table, 0);
+#endif
+ return 0;
+}
+
+__initcall(machine_crash_setup);
+
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index bb8770a177b..9b96e7dbaf6 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -26,6 +26,7 @@
#include <linux/types.h>
#include <linux/time.h>
#include <linux/efi.h>
+#include <linux/kexec.h>
#include <asm/io.h>
#include <asm/kregs.h>
@@ -41,7 +42,7 @@ extern efi_status_t efi_call_phys (void *, ...);
struct efi efi;
EXPORT_SYMBOL(efi);
static efi_runtime_services_t *runtime;
-static unsigned long mem_limit = ~0UL, max_addr = ~0UL;
+static unsigned long mem_limit = ~0UL, max_addr = ~0UL, min_addr = 0UL;
#define efi_call_virt(f, args...) (*(f))(args)
@@ -421,6 +422,8 @@ efi_init (void)
mem_limit = memparse(cp + 4, &cp);
} else if (memcmp(cp, "max_addr=", 9) == 0) {
max_addr = GRANULEROUNDDOWN(memparse(cp + 9, &cp));
+ } else if (memcmp(cp, "min_addr=", 9) == 0) {
+ min_addr = GRANULEROUNDDOWN(memparse(cp + 9, &cp));
} else {
while (*cp != ' ' && *cp)
++cp;
@@ -428,6 +431,8 @@ efi_init (void)
++cp;
}
}
+ if (min_addr != 0UL)
+ printk(KERN_INFO "Ignoring memory below %luMB\n", min_addr >> 20);
if (max_addr != ~0UL)
printk(KERN_INFO "Ignoring memory above %luMB\n", max_addr >> 20);
@@ -894,7 +899,8 @@ find_memmap_space (void)
as = max(contig_low, md->phys_addr);
ae = min(contig_high, efi_md_end(md));
- /* keep within max_addr= command line arg */
+ /* keep within max_addr= and min_addr= command line arg */
+ as = max(as, min_addr);
ae = min(ae, max_addr);
if (ae <= as)
continue;
@@ -1004,7 +1010,8 @@ efi_memmap_init(unsigned long *s, unsigned long *e)
} else
ae = efi_md_end(md);
- /* keep within max_addr= command line arg */
+ /* keep within max_addr= and min_addr= command line arg */
+ as = max(as, min_addr);
ae = min(ae, max_addr);
if (ae <= as)
continue;
@@ -1116,6 +1123,58 @@ efi_initialize_iomem_resources(struct resource *code_resource,
*/
insert_resource(res, code_resource);
insert_resource(res, data_resource);
+#ifdef CONFIG_KEXEC
+ insert_resource(res, &efi_memmap_res);
+ insert_resource(res, &boot_param_res);
+ if (crashk_res.end > crashk_res.start)
+ insert_resource(res, &crashk_res);
+#endif
}
}
}
+
+#ifdef CONFIG_KEXEC
+/* find a block of memory aligned to 64M exclude reserved regions
+ rsvd_regions are sorted
+ */
+unsigned long
+kdump_find_rsvd_region (unsigned long size,
+ struct rsvd_region *r, int n)
+{
+ int i;
+ u64 start, end;
+ u64 alignment = 1UL << _PAGE_SIZE_64M;
+ void *efi_map_start, *efi_map_end, *p;
+ efi_memory_desc_t *md;
+ u64 efi_desc_size;
+
+ efi_map_start = __va(ia64_boot_param->efi_memmap);
+ efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
+ efi_desc_size = ia64_boot_param->efi_memdesc_size;
+
+ for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
+ md = p;
+ if (!efi_wb(md))
+ continue;
+ start = ALIGN(md->phys_addr, alignment);
+ end = efi_md_end(md);
+ for (i = 0; i < n; i++) {
+ if (__pa(r[i].start) >= start && __pa(r[i].end) < end) {
+ if (__pa(r[i].start) > start + size)
+ return start;
+ start = ALIGN(__pa(r[i].end), alignment);
+ if (i < n-1 && __pa(r[i+1].start) < start + size)
+ continue;
+ else
+ break;
+ }
+ }
+ if (end > start + size)
+ return start;
+ }
+
+ printk(KERN_WARNING "Cannot reserve 0x%lx byte of memory for crashdump\n",
+ size);
+ return ~0UL;
+}
+#endif
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 3390b7c5a63..15234ed3a34 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -1575,7 +1575,7 @@ sys_call_table:
data8 sys_mq_timedreceive // 1265
data8 sys_mq_notify
data8 sys_mq_getsetattr
- data8 sys_ni_syscall // reserved for kexec_load
+ data8 sys_kexec_load
data8 sys_ni_syscall // reserved for vserver
data8 sys_waitid // 1270
data8 sys_add_key
diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c
index 60d64950e3c..0fc5fb7865c 100644
--- a/arch/ia64/kernel/iosapic.c
+++ b/arch/ia64/kernel/iosapic.c
@@ -288,6 +288,27 @@ nop (unsigned int irq)
/* do nothing... */
}
+
+#ifdef CONFIG_KEXEC
+void
+kexec_disable_iosapic(void)
+{
+ struct iosapic_intr_info *info;
+ struct iosapic_rte_info *rte;
+ u8 vec = 0;
+ for (info = iosapic_intr_info; info <
+ iosapic_intr_info + IA64_NUM_VECTORS; ++info, ++vec) {
+ list_for_each_entry(rte, &info->rtes,
+ rte_list) {
+ iosapic_write(rte->addr,
+ IOSAPIC_RTE_LOW(rte->rte_index),
+ IOSAPIC_MASK|vec);
+ iosapic_eoi(rte->addr, vec);
+ }
+ }
+}
+#endif
+
static void
mask_irq (unsigned int irq)
{
diff --git a/arch/ia64/kernel/machine_kexec.c b/arch/ia64/kernel/machine_kexec.c
new file mode 100644
index 00000000000..468233fa2ce
--- /dev/null
+++ b/arch/ia64/kernel/machine_kexec.c
@@ -0,0 +1,133 @@
+/*
+ * arch/ia64/kernel/machine_kexec.c
+ *
+ * Handle transition of Linux booting another kernel
+ * Copyright (C) 2005 Hewlett-Packard Development Comapny, L.P.
+ * Copyright (C) 2005 Khalid Aziz <khalid.aziz@hp.com>
+ * Copyright (C) 2006 Intel Corp, Zou Nan hai <nanhai.zou@intel.com>
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+
+#include <linux/mm.h>
+#include <linux/kexec.h>
+#include <linux/cpu.h>
+#include <linux/irq.h>
+#include <asm/mmu_context.h>
+#include <asm/setup.h>
+#include <asm/delay.h>
+#include <asm/meminit.h>
+
+typedef void (*relocate_new_kernel_t)(unsigned long, unsigned long,
+ struct ia64_boot_param *, unsigned long);
+
+struct kimage *ia64_kimage;
+
+struct resource efi_memmap_res = {
+ .name = "EFI Memory Map",
+ .start = 0,
+ .end = 0,
+ .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+};
+
+struct resource boot_param_res = {
+ .name = "Boot parameter",
+ .start = 0,
+ .end = 0,
+ .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+};
+
+
+/*
+ * Do what every setup is needed on image and the
+ * reboot code buffer to allow us to avoid allocations
+ * later.
+ */
+int machine_kexec_prepare(struct kimage *image)
+{
+ void *control_code_buffer;
+ const unsigned long *func;
+
+ func = (unsigned long *)&relocate_new_kernel;
+ /* Pre-load control code buffer to minimize work in kexec path */
+ control_code_buffer = page_address(image->control_code_page);
+ memcpy((void *)control_code_buffer, (const void *)func[0],
+ relocate_new_kernel_size);
+ flush_icache_range((unsigned long)control_code_buffer,
+ (unsigned long)control_code_buffer + relocate_new_kernel_size);
+ ia64_kimage = image;
+
+ return 0;
+}
+
+void machine_kexec_cleanup(struct kimage *image)
+{
+}
+
+void machine_shutdown(void)
+{
+ int cpu;
+
+ for_each_online_cpu(cpu) {
+ if (cpu != smp_processor_id())
+ cpu_down(cpu);
+ }
+ kexec_disable_iosapic();
+}
+
+/*
+ * Do not allocate memory (or fail in any way) in machine_kexec().
+ * We are past the point of no return, committed to rebooting now.
+ */
+extern void *efi_get_pal_addr(void);
+static void ia64_machine_kexec(struct unw_frame_info *info, void *arg)
+{
+ struct kimage *image = arg;
+ relocate_new_kernel_t rnk;
+ void *pal_addr = efi_get_pal_addr();
+ unsigned long code_addr = (unsigned long)page_address(image->control_code_page);
+ unsigned long vector;
+ int ii;
+
+ if (image->type == KEXEC_TYPE_CRASH) {
+ crash_save_this_cpu();
+ current->thread.ksp = (__u64)info->sw - 16;
+ }
+
+ /* Interrupts aren't acceptable while we reboot */
+ local_irq_disable();
+
+ /* Mask CMC and Performance Monitor interrupts */
+ ia64_setreg(_IA64_REG_CR_PMV, 1 << 16);
+ ia64_setreg(_IA64_REG_CR_CMCV, 1 << 16);
+
+ /* Mask ITV and Local Redirect Registers */
+ ia64_set_itv(1 << 16);
+ ia64_set_lrr0(1 << 16);
+ ia64_set_lrr1(1 << 16);
+
+ /* terminate possible nested in-service interrupts */
+ for (ii = 0; ii < 16; ii++)
+ ia64_eoi();
+
+ /* unmask TPR and clear any pending interrupts */
+ ia64_setreg(_IA64_REG_CR_TPR, 0);
+ ia64_srlz_d();
+ vector = ia64_get_ivr();
+ while (vector != IA64_SPURIOUS_INT_VECTOR) {
+ ia64_eoi();
+ vector = ia64_get_ivr();
+ }
+ platform_kernel_launch_event();
+ rnk = (relocate_new_kernel_t)&code_addr;
+ (*rnk)(image->head, image->start, ia64_boot_param,
+ GRANULEROUNDDOWN((unsigned long) pal_addr));
+ BUG();
+}
+
+void machine_kexec(struct kimage *image)
+{
+ unw_init_running(ia64_machine_kexec, image);
+ for(;;);
+}
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 6bedd97570c..87c1c4f4287 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -82,6 +82,7 @@
#include <asm/system.h>
#include <asm/sal.h>
#include <asm/mca.h>
+#include <asm/kexec.h>
#include <asm/irq.h>
#include <asm/hw_irq.h>
@@ -1238,6 +1239,10 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
} else {
/* Dump buffered message to console */
ia64_mlogbuf_finish(1);
+#ifdef CONFIG_CRASH_DUMP
+ atomic_set(&kdump_in_progress, 1);
+ monarch_cpu = -1;
+#endif
}
if (notify_die(DIE_MCA_MONARCH_LEAVE, "MCA", regs, (long)&nd, 0, recover)
== NOTIFY_STOP)
diff --git a/arch/ia64/kernel/relocate_kernel.S b/arch/ia64/kernel/relocate_kernel.S
new file mode 100644
index 00000000000..ae473e3f2a0
--- /dev/null
+++ b/arch/ia64/kernel/relocate_kernel.S
@@ -0,0 +1,334 @@
+/*
+ * arch/ia64/kernel/relocate_kernel.S
+ *
+ * Relocate kexec'able kernel and start it
+ *
+ * Copyright (C) 2005 Hewlett-Packard Development Company, L.P.
+ * Copyright (C) 2005 Khalid Aziz <khalid.aziz@hp.com>
+ * Copyright (C) 2005 Intel Corp, Zou Nan hai <nanhai.zou@intel.com>
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+#include <asm/asmmacro.h>
+#include <asm/kregs.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/mca_asm.h>
+
+ /* Must be relocatable PIC code callable as a C function
+ */
+GLOBAL_ENTRY(relocate_new_kernel)
+ .prologue
+ alloc r31=ar.pfs,4,0,0,0
+ .body
+.reloc_entry:
+{
+ rsm psr.i| psr.ic
+ mov r2=ip
+}
+ ;;
+{
+ flushrs // must be first insn in group
+ srlz.i
+}
+ ;;
+ dep r2=0,r2,61,3 //to physical address
+ ;;
+ //first switch to physical mode
+ add r3=1f-.reloc_entry, r2
+ movl r16 = IA64_PSR_AC|IA64_PSR_BN|IA64_PSR_IC
+ mov ar.rsc=0 // put RSE in enforced lazy mode
+ ;;
+ add sp=(memory_stack_end - 16 - .reloc_entry),r2
+ add r8=(register_stack - .reloc_entry),r2
+ ;;
+ mov r18=ar.rnat
+ mov ar.bspstore=r8
+ ;;
+ mov cr.ipsr=r16
+ mov cr.iip=r3
+ mov cr.ifs=r0
+ srlz.i
+ ;;
+ mov ar.rnat=r18
+ rfi
+ ;;
+1:
+ //physical mode code begin
+ mov b6=in1
+ dep r28=0,in2,61,3 //to physical address
+
+ // purge all TC entries
+#define O(member) IA64_CPUINFO_##member##_OFFSET
+ GET_THIS_PADDR(r2, cpu_info) // load phys addr of cpu_info into r2
+ ;;
+ addl r17=O(PTCE_STRIDE),r2
+ addl r2=O(PTCE_BASE),r2
+ ;;
+ ld8 r18=[r2],(O(PTCE_COUNT)-O(PTCE_BASE));; // r18=ptce_base
+ ld4 r19=[r2],4 // r19=ptce_count[0]
+ ld4 r21=[r17],4 // r21=ptce_stride[0]
+ ;;
+ ld4 r20=[r2] // r20=ptce_count[1]
+ ld4 r22=[r17] // r22=ptce_stride[1]
+ mov r24=r0
+ ;;
+ adds r20=-1,r20
+ ;;
+#undef O
+2:
+ cmp.ltu p6,p7=r24,r19
+(p7) br.cond.dpnt.few 4f
+ mov ar.lc=r20
+3:
+ ptc.e r18
+ ;;
+ add r18=r22,r18
+ br.cloop.sptk.few 3b
+ ;;
+ add r18=r21,r18
+ add r24=1,r24
+ ;;
+ br.sptk.few 2b
+4:
+ srlz.i
+ ;;
+ //purge TR entry for kernel text and data
+ movl r16=KERNEL_START
+ mov r18=KERNEL_TR_PAGE_SHIFT<<2
+ ;;
+ ptr.i r16, r18
+ ptr.d r16, r18
+ ;;
+ srlz.i
+ ;;
+
+ // purge TR entry for percpu data
+ movl r16=PERCPU_ADDR
+ mov r18=PERCPU_PAGE_SHIFT<<2
+ ;;
+ ptr.d r16,r18
+ ;;
+ srlz.d
+ ;;
+
+ // purge TR entry for pal code
+ mov r16=in3
+ mov r18=IA64_GRANULE_SHIFT<<2
+ ;;
+ ptr.i r16,r18
+ ;;
+ srlz.i
+ ;;
+
+ // purge TR entry for stack
+ mov r16=IA64_KR(CURRENT_STACK)
+ ;;
+ shl r16=r16,IA64_GRANULE_SHIFT
+ movl r19=PAGE_OFFSET
+ ;;
+ add r16=r19,r16
+ mov r18=IA64_GRANULE_SHIFT<<2
+ ;;
+ ptr.d r16,r18
+ ;;
+ srlz.i
+ ;;
+
+ //copy segments
+ movl r16=PAGE_MASK
+ mov r30=in0 // in0 is page_list
+ br.sptk.few .dest_page
+ ;;
+.loop:
+ ld8 r30=[in0], 8;;
+.dest_page:
+ tbit.z p0, p6=r30, 0;; // 0x1 dest page
+(p6) and r17=r30, r16
+(p6) br.cond.sptk.few .loop;;
+
+ tbit.z p0, p6=r30, 1;; // 0x2 indirect page
+(p6) and in0=r30, r16
+(p6) br.cond.sptk.few .loop;;
+
+ tbit.z p0, p6=r30, 2;; // 0x4 end flag
+(p6) br.cond.sptk.few .end_loop;;
+
+ tbit.z p6, p0=r30, 3;; // 0x8 source page
+(p6) br.cond.sptk.few .loop
+
+ and r18=r30, r16
+
+ // simple copy page, may optimize later
+ movl r14=PAGE_SIZE/8 - 1;;
+ mov ar.lc=r14;;
+1:
+ ld8 r14=[r18], 8;;
+ st8 [r17]=r14;;
+ fc.i r17
+ add r17=8, r17
+ br.ctop.sptk.few 1b
+ br.sptk.few .loop
+ ;;
+
+.end_loop:
+ sync.i // for fc.i
+ ;;
+ srlz.i
+ ;;
+ srlz.d
+ ;;
+ br.call.sptk.many b0=b6;;
+
+.align 32
+memory_stack:
+ .fill 8192, 1, 0
+memory_stack_end:
+register_stack:
+ .fill 8192, 1, 0
+register_stack_end:
+relocate_new_kernel_end:
+END(relocate_new_kernel)
+
+.global relocate_new_kernel_size
+relocate_new_kernel_size:
+ data8 relocate_new_kernel_end - relocate_new_kernel
+
+GLOBAL_ENTRY(ia64_dump_cpu_regs)
+ .prologue
+ alloc loc0=ar.pfs,1,2,0,0
+ .body
+ mov ar.rsc=0 // put RSE in enforced lazy mode
+ add loc1=4*8, in0 // save r4 and r5 first
+ ;;
+{
+ flushrs // flush dirty regs to backing store
+ srlz.i
+}
+ st8 [loc1]=r4, 8
+ ;;
+ st8 [loc1]=r5, 8
+ ;;
+ add loc1=32*8, in0
+ mov r4=ar.rnat
+ ;;
+ st8 [in0]=r0, 8 // r0
+ st8 [loc1]=r4, 8 // rnat
+ mov r5=pr
+ ;;
+ st8 [in0]=r1, 8 // r1
+ st8 [loc1]=r5, 8 // pr
+ mov r4=b0
+ ;;
+ st8 [in0]=r2, 8 // r2
+ st8 [loc1]=r4, 8 // b0
+ mov r5=b1;
+ ;;
+ st8 [in0]=r3, 24 // r3
+ st8 [loc1]=r5, 8 // b1
+ mov r4=b2
+ ;;
+ st8 [in0]=r6, 8 // r6
+ st8 [loc1]=r4, 8 // b2
+ mov r5=b3
+ ;;
+ st8 [in0]=r7, 8 // r7
+ st8 [loc1]=r5, 8 // b3
+ mov r4=b4
+ ;;
+ st8 [in0]=r8, 8 // r8
+ st8 [loc1]=r4, 8 // b4
+ mov r5=b5
+ ;;
+ st8 [in0]=r9, 8 // r9
+ st8 [loc1]=r5, 8 // b5
+ mov r4=b6
+ ;;
+ st8 [in0]=r10, 8 // r10
+ st8 [loc1]=r5, 8 // b6
+ mov r5=b7
+ ;;
+ st8 [in0]=r11, 8 // r11
+ st8 [loc1]=r5, 8 // b7
+ mov r4=b0
+ ;;
+ st8 [in0]=r12, 8 // r12
+ st8 [loc1]=r4, 8 // ip
+ mov r5=loc0
+ ;;
+ st8 [in0]=r13, 8 // r13
+ extr.u r5=r5, 0, 38 // ar.pfs.pfm
+ mov r4=r0 // user mask
+ ;;
+ st8 [in0]=r14, 8 // r14
+ st8 [loc1]=r5, 8 // cfm
+ ;;
+ st8 [in0]=r15, 8 // r15
+ st8 [loc1]=r4, 8 // user mask
+ mov r5=ar.rsc
+ ;;
+ st8 [in0]=r16, 8 // r16
+ st8 [loc1]=r5, 8 // ar.rsc
+ mov r4=ar.bsp
+ ;;
+ st8 [in0]=r17, 8 // r17
+ st8 [loc1]=r4, 8 // ar.bsp
+ mov r5=ar.bspstore
+ ;;
+ st8 [in0]=r18, 8 // r18
+ st8 [loc1]=r5, 8 // ar.bspstore
+ mov r4=ar.rnat
+ ;;
+ st8 [in0]=r19, 8 // r19
+ st8 [loc1]=r4, 8 // ar.rnat
+ mov r5=ar.ccv
+ ;;
+ st8 [in0]=r20, 8 // r20
+ st8 [loc1]=r5, 8 // ar.ccv
+ mov r4=ar.unat
+ ;;
+ st8 [in0]=r21, 8 // r21
+ st8 [loc1]=r4, 8 // ar.unat
+ mov r5 = ar.fpsr
+ ;;
+ st8 [in0]=r22, 8 // r22
+ st8 [loc1]=r5, 8 // ar.fpsr
+ mov r4 = ar.unat
+ ;;
+ st8 [in0]=r23, 8 // r23
+ st8 [loc1]=r4, 8 // unat
+ mov r5 = ar.fpsr
+ ;;
+ st8 [in0]=r24, 8 // r24
+ st8 [loc1]=r5, 8 // fpsr
+ mov r4 = ar.pfs
+ ;;
+ st8 [in0]=r25, 8 // r25
+ st8 [loc1]=r4, 8 // ar.pfs
+ mov r5 = ar.lc
+ ;;
+ st8 [in0]=r26, 8 // r26
+ st8 [loc1]=r5, 8 // ar.lc
+ mov r4 = ar.ec
+ ;;
+ st8 [in0]=r27, 8 // r27
+ st8 [loc1]=r4, 8 // ar.ec
+ mov r5 = ar.csd
+ ;;
+ st8 [in0]=r28, 8 // r28
+ st8 [loc1]=r5, 8 // ar.csd
+ mov r4 = ar.ssd
+ ;;
+ st8 [in0]=r29, 8 // r29
+ st8 [loc1]=r4, 8 // ar.ssd
+ ;;
+ st8 [in0]=r30, 8 // r30
+ ;;
+ st8 [in0]=r31, 8 // r31
+ mov ar.pfs=loc0
+ ;;
+ br.ret.sptk.many rp
+END(ia64_dump_cpu_regs)
+
+
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index d10404a4175..14e1200376a 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -43,6 +43,8 @@
#include <linux/initrd.h>
#include <linux/pm.h>
#include <linux/cpufreq.h>
+#include <linux/kexec.h>
+#include <linux/crash_dump.h>
#include <asm/ia32.h>
#include <asm/machvec.h>
@@ -252,6 +254,41 @@ reserve_memory (void)
efi_memmap_init(&rsvd_region[n].start, &rsvd_region[n].end);
n++;
+#ifdef CONFIG_KEXEC
+ /* crashkernel=size@offset specifies the size to reserve for a crash
+ * kernel.(offset is ingored for keep compatibility with other archs)
+ * By reserving this memory we guarantee that linux never set's it
+ * up as a DMA target.Useful for holding code to do something
+ * appropriate after a kernel panic.
+ */
+ {
+ char *from = strstr(saved_command_line, "crashkernel=");
+ unsigned long base, size;
+ if (from) {
+ size = memparse(from + 12, &from);
+ if (size) {
+ sort_regions(rsvd_region, n);
+ base = kdump_find_rsvd_region(size,
+ rsvd_region, n);
+ if (base != ~0UL) {
+ rsvd_region[n].start =
+ (unsigned long)__va(base);
+ rsvd_region[n].end =
+ (unsigned long)__va(base + size);
+ n++;
+ crashk_res.start = base;
+ crashk_res.end = base + size - 1;
+ }
+ }
+ }
+ efi_memmap_res.start = ia64_boot_param->efi_memmap;
+ efi_memmap_res.end = efi_memmap_res.start +
+ ia64_boot_param->efi_memmap_size;
+ boot_param_res.start = __pa(ia64_boot_param);
+ boot_param_res.end = boot_param_res.start +
+ sizeof(*ia64_boot_param);
+ }
+#endif
/* end of memory marker */
rsvd_region[n].start = ~0UL;
rsvd_region[n].end = ~0UL;
@@ -263,6 +300,7 @@ reserve_memory (void)
sort_regions(r