Merge branch 'master'

author: Steven Whitehouse <swhiteho@redhat.com> 2006-03-31 15:34:58 -0500
committer: Steven Whitehouse <swhiteho@redhat.com> 2006-03-31 15:34:58 -0500
commit: 86579dd06deecfa6ac88d5e84e4d63c397cd6f6d (patch)
tree: b4475d3ccde53015ad84a06e4e55e64591171b75 /arch/ia64
parent: 7ea9ea832212c4a755650f7c7cc1ff0b63292a41 (diff)
parent: a0f067802576d4eb4c65d40b8ee7d6ea3c81dd61 (diff)
61 files changed, 1407 insertions, 909 deletions
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index a85ea9d37f0..edffe25a477 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -34,6 +34,10 @@ config RWSEM_XCHGADD_ALGORITHM
 	bool
 	default y
 
+config GENERIC_FIND_NEXT_BIT
+	bool
+	default y
+
 config GENERIC_CALIBRATE_DELAY
 	bool
 	default y
@@ -42,6 +46,10 @@ config TIME_INTERPOLATION
 	bool
 	default y
 
+config DMI
+	bool
+	default y
+
 config EFI
 	bool
 	default y
@@ -252,6 +260,15 @@ config NR_CPUS
 	  than 64 will cause the use of a CPU mask array, causing a small
 	  performance hit.
 
+config IA64_NR_NODES
+	int "Maximum number of NODEs (256-1024)" if (IA64_SGI_SN2 || IA64_GENERIC)
+	range 256 1024
+	depends on IA64_SGI_SN2 || IA64_GENERIC
+	default "256"
+	help
+	  This option specifies the maximum number of nodes in your SSI system.
+	  If in doubt, use the default.
+
 config HOTPLUG_CPU
 	bool "Support for hot-pluggable CPUs (EXPERIMENTAL)"
 	depends on SMP && EXPERIMENTAL
@@ -271,6 +288,25 @@ config SCHED_SMT
 	  Intel IA64 chips with MultiThreading at a cost of slightly increased
 	  overhead in some places. If unsure say N here.
 
+config PERMIT_BSP_REMOVE
+	bool "Support removal of Bootstrap Processor"
+	depends on HOTPLUG_CPU
+	default n
+	---help---
+	Say Y here if your platform SAL will support removal of BSP with HOTPLUG_CPU
+	support. 
+
+config FORCE_CPEI_RETARGET
+	bool "Force assumption that CPEI can be re-targetted"
+	depends on PERMIT_BSP_REMOVE
+	default n
+	---help---
+	Say Y if you need to force the assumption that CPEI can be re-targetted to
+	any cpu in the system. This hint is available via ACPI 3.0 specifications.
+	Tiger4 systems are capable of re-directing CPEI to any CPU other than BSP.
+	This option it useful to enable this feature on older BIOS's as well.
+	You can also enable this by using boot command line option force_cpei=1.
+
 config PREEMPT
 	bool "Preemptible Kernel"
         help
diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile
index f722e1a2594..80ea7506fa1 100644
--- a/arch/ia64/Makefile
+++ b/arch/ia64/Makefile
@@ -1,6 +1,9 @@
 #
 # ia64/Makefile
 #
+# This file is included by the global makefile so that you can add your own
+# architecture-specific flags and dependencies.
+#
 # This file is subject to the terms and conditions of the GNU General Public
 # License.  See the file "COPYING" in the main directory of this archive
 # for more details.
@@ -62,7 +65,7 @@ drivers-$(CONFIG_OPROFILE)	+= arch/ia64/oprofile/
 
 boot := arch/ia64/hp/sim/boot
 
-.PHONY: boot compressed check
+PHONY += boot compressed check
 
 all: compressed unwcheck
 
diff --git a/arch/ia64/configs/gensparse_defconfig b/arch/ia64/configs/gensparse_defconfig
index 744fd2f79f6..0d29aa2066b 100644
--- a/arch/ia64/configs/gensparse_defconfig
+++ b/arch/ia64/configs/gensparse_defconfig
@@ -116,6 +116,7 @@ CONFIG_IOSAPIC=y
 CONFIG_FORCE_MAX_ZONEORDER=17
 CONFIG_SMP=y
 CONFIG_NR_CPUS=512
+CONFIG_IA64_NR_NODES=256
 CONFIG_HOTPLUG_CPU=y
 # CONFIG_SCHED_SMT is not set
 # CONFIG_PREEMPT is not set
diff --git a/arch/ia64/configs/sn2_defconfig b/arch/ia64/configs/sn2_defconfig
index 8206752161b..a718034d68d 100644
--- a/arch/ia64/configs/sn2_defconfig
+++ b/arch/ia64/configs/sn2_defconfig
@@ -116,6 +116,7 @@ CONFIG_IA64_SGI_SN_XP=m
 CONFIG_FORCE_MAX_ZONEORDER=17
 CONFIG_SMP=y
 CONFIG_NR_CPUS=1024
+CONFIG_IA64_NR_NODES=256
 # CONFIG_HOTPLUG_CPU is not set
 CONFIG_SCHED_SMT=y
 CONFIG_PREEMPT=y
diff --git a/arch/ia64/configs/tiger_defconfig b/arch/ia64/configs/tiger_defconfig
index 125568118b8..766bf495543 100644
--- a/arch/ia64/configs/tiger_defconfig
+++ b/arch/ia64/configs/tiger_defconfig
@@ -116,6 +116,8 @@ CONFIG_FORCE_MAX_ZONEORDER=17
 CONFIG_SMP=y
 CONFIG_NR_CPUS=4
 CONFIG_HOTPLUG_CPU=y
+CONFIG_PERMIT_BSP_REMOVE=y
+CONFIG_FORCE_CPEI_RETARGET=y
 # CONFIG_SCHED_SMT is not set
 # CONFIG_PREEMPT is not set
 CONFIG_SELECT_MEMORY_MODEL=y
diff --git a/arch/ia64/defconfig b/arch/ia64/defconfig
index 3e767288a74..6cba55da572 100644
--- a/arch/ia64/defconfig
+++ b/arch/ia64/defconfig
@@ -116,6 +116,7 @@ CONFIG_IOSAPIC=y
 CONFIG_FORCE_MAX_ZONEORDER=17
 CONFIG_SMP=y
 CONFIG_NR_CPUS=512
+CONFIG_IA64_NR_NODES=256
 CONFIG_HOTPLUG_CPU=y
 # CONFIG_SCHED_SMT is not set
 # CONFIG_PREEMPT is not set
diff --git a/arch/ia64/dig/setup.c b/arch/ia64/dig/setup.c
index c9104bfff66..38aa9c10885 100644
--- a/arch/ia64/dig/setup.c
+++ b/arch/ia64/dig/setup.c
@@ -69,8 +69,3 @@ dig_setup (char **cmdline_p)
 	screen_info.orig_video_isVGA = 1;	/* XXX fake */
 	screen_info.orig_video_ega_bx = 3;	/* XXX fake */
 }
-
-void __init
-dig_irq_init (void)
-{
-}
diff --git a/arch/ia64/hp/sim/simserial.c b/arch/ia64/hp/sim/simserial.c
index 626cdc83668..0e5c6ae5022 100644
--- a/arch/ia64/hp/sim/simserial.c
+++ b/arch/ia64/hp/sim/simserial.c
@@ -46,11 +46,6 @@
 #define KEYBOARD_INTR	3	/* must match with simulator! */
 
 #define NR_PORTS	1	/* only one port for now */
-#define SERIAL_INLINE	1
-
-#ifdef SERIAL_INLINE
-#define _INLINE_ inline
-#endif
 
 #define IRQ_T(info) ((info->flags & ASYNC_SHARE_IRQ) ? SA_SHIRQ : SA_INTERRUPT)
 
@@ -237,7 +232,7 @@ static void rs_put_char(struct tty_struct *tty, unsigned char ch)
 	local_irq_restore(flags);
 }
 
-static _INLINE_ void transmit_chars(struct async_struct *info, int *intr_done)
+static void transmit_chars(struct async_struct *info, int *intr_done)
 {
 	int count;
 	unsigned long flags;
diff --git a/arch/ia64/ia32/ia32priv.h b/arch/ia64/ia32/ia32priv.h
index 68ceb4e690c..ccb98ed48e5 100644
--- a/arch/ia64/ia32/ia32priv.h
+++ b/arch/ia64/ia32/ia32priv.h
@@ -29,9 +29,9 @@
 struct partial_page {
 	struct partial_page	*next; /* linked list, sorted by address */
 	struct rb_node		pp_rb;
-	/* 64K is the largest "normal" page supported by ia64 ABI. So 4K*32
+	/* 64K is the largest "normal" page supported by ia64 ABI. So 4K*64
 	 * should suffice.*/
-	unsigned int		bitmap;
+	unsigned long		bitmap;
 	unsigned int		base;
 };
 
diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c
index 70dba1f0e2e..5366b3b23d0 100644
--- a/arch/ia64/ia32/sys_ia32.c
+++ b/arch/ia64/ia32/sys_ia32.c
@@ -25,7 +25,6 @@
 #include <linux/resource.h>
 #include <linux/times.h>
 #include <linux/utsname.h>
-#include <linux/timex.h>
 #include <linux/smp.h>
 #include <linux/smp_lock.h>
 #include <linux/sem.h>
@@ -1166,19 +1165,7 @@ put_tv32 (struct compat_timeval __user *o, struct timeval *i)
 asmlinkage unsigned long
 sys32_alarm (unsigned int seconds)
 {
-	struct itimerval it_new, it_old;
-	unsigned int oldalarm;
-
-	it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0;
-	it_new.it_value.tv_sec = seconds;
-	it_new.it_value.tv_usec = 0;
-	do_setitimer(ITIMER_REAL, &it_new, &it_old);
-	oldalarm = it_old.it_value.tv_sec;
-	/* ehhh.. We can't return 0 if we have an alarm pending.. */
-	/* And we'd better return too much than too little anyway */
-	if (it_old.it_value.tv_usec)
-		oldalarm++;
-	return oldalarm;
+	return alarm_setitimer(seconds);
 }
 
 /* Translations due to time_t size differences.  Which affects all
@@ -2603,78 +2590,4 @@ sys32_setresgid(compat_gid_t rgid, compat_gid_t egid,
 	ssgid = (sgid == (compat_gid_t)-1) ? ((gid_t)-1) : ((gid_t)sgid);
 	return sys_setresgid(srgid, segid, ssgid);
 }
-
-/* Handle adjtimex compatibility. */
-
-struct timex32 {
-	u32 modes;
-	s32 offset, freq, maxerror, esterror;
-	s32 status, constant, precision, tolerance;
-	struct compat_timeval time;
-	s32 tick;
-	s32 ppsfreq, jitter, shift, stabil;
-	s32 jitcnt, calcnt, errcnt, stbcnt;
-	s32  :32; s32  :32; s32  :32; s32  :32;
-	s32  :32; s32  :32; s32  :32; s32  :32;
-	s32  :32; s32  :32; s32  :32; s32  :32;
-};
-
-extern int do_adjtimex(struct timex *);
-
-asmlinkage long
-sys32_adjtimex(struct timex32 *utp)
-{
-	struct timex txc;
-	int ret;
-
-	memset(&txc, 0, sizeof(struct timex));
-
-	if(get_user(txc.modes, &utp->modes) ||
-	   __get_user(txc.offset, &utp->offset) ||
-	   __get_user(txc.freq, &utp->freq) ||
-	   __get_user(txc.maxerror, &utp->maxerror) ||
-	   __get_user(txc.esterror, &utp->esterror) ||
-	   __get_user(txc.status, &utp->status) ||
-	   __get_user(txc.constant, &utp->constant) ||
-	   __get_user(txc.precision, &utp->precision) ||
-	   __get_user(txc.tolerance, &utp->tolerance) ||
-	   __get_user(txc.time.tv_sec, &utp->time.tv_sec) ||
-	   __get_user(txc.time.tv_usec, &utp->time.tv_usec) ||
-	   __get_user(txc.tick, &utp->tick) ||
-	   __get_user(txc.ppsfreq, &utp->ppsfreq) ||
-	   __get_user(txc.jitter, &utp->jitter) ||
-	   __get_user(txc.shift, &utp->shift) ||
-	   __get_user(txc.stabil, &utp->stabil) ||
-	   __get_user(txc.jitcnt, &utp->jitcnt) ||
-	   __get_user(txc.calcnt, &utp->calcnt) ||
-	   __get_user(txc.errcnt, &utp->errcnt) ||
-	   __get_user(txc.stbcnt, &utp->stbcnt))
-		return -EFAULT;
-
-	ret = do_adjtimex(&txc);
-
-	if(put_user(txc.modes, &utp->modes) ||
-	   __put_user(txc.offset, &utp->offset) ||
-	   __put_user(txc.freq, &utp->freq) ||
-	   __put_user(txc.maxerror, &utp->maxerror) ||
-	   __put_user(txc.esterror, &utp->esterror) ||
-	   __put_user(txc.status, &utp->status) ||
-	   __put_user(txc.constant, &utp->constant) ||
-	   __put_user(txc.precision, &utp->precision) ||
-	   __put_user(txc.tolerance, &utp->tolerance) ||
-	   __put_user(txc.time.tv_sec, &utp->time.tv_sec) ||
-	   __put_user(txc.time.tv_usec, &utp->time.tv_usec) ||
-	   __put_user(txc.tick, &utp->tick) ||
-	   __put_user(txc.ppsfreq, &utp->ppsfreq) ||
-	   __put_user(txc.jitter, &utp->jitter) ||
-	   __put_user(txc.shift, &utp->shift) ||
-	   __put_user(txc.stabil, &utp->stabil) ||
-	   __put_user(txc.jitcnt, &utp->jitcnt) ||
-	   __put_user(txc.calcnt, &utp->calcnt) ||
-	   __put_user(txc.errcnt, &utp->errcnt) ||
-	   __put_user(txc.stbcnt, &utp->stbcnt))
-		ret = -EFAULT;
-
-	return ret;
-}
 #endif /* NOTYET */
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
index 09a0dbc17fb..59e871dae74 100644
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@ -7,7 +7,7 @@ extra-y	:= head.o init_task.o vmlinux.lds
 obj-y := acpi.o entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o	\
 	 irq_lsapic.o ivt.o machvec.o pal.o patch.o process.o perfmon.o ptrace.o sal.o		\
 	 salinfo.o semaphore.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \
-	 unwind.o mca.o mca_asm.o topology.o
+	 unwind.o mca.o mca_asm.o topology.o dmi_scan.o
 
 obj-$(CONFIG_IA64_BRL_EMU)	+= brl_emu.o
 obj-$(CONFIG_IA64_GENERIC)	+= acpi-ext.o
@@ -30,6 +30,7 @@ obj-$(CONFIG_IA64_MCA_RECOVERY)	+= mca_recovery.o
 obj-$(CONFIG_KPROBES)		+= kprobes.o jprobes.o
 obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR)	+= uncached.o
 mca_recovery-y			+= mca_drv.o mca_drv_asm.o
+dmi_scan-y			+= ../../i386/kernel/dmi_scan.o
 
 # The gate DSO image is built using a special linker script.
 targets += gate.so gate-syms.o
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index ecd44bdc839..58c93a30348 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -284,19 +284,24 @@ acpi_parse_plat_int_src(acpi_table_entry_header * header,
 	return 0;
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
 unsigned int can_cpei_retarget(void)
 {
 	extern int cpe_vector;
+	extern unsigned int force_cpei_retarget;
 
 	/*
 	 * Only if CPEI is supported and the override flag
 	 * is present, otherwise return that its re-targettable
 	 * if we are in polling mode.
 	 */
-	if (cpe_vector > 0 && !acpi_cpei_override)
-		return 0;
-	else
-		return 1;
+	if (cpe_vector > 0) {
+		if (acpi_cpei_override || force_cpei_retarget)
+			return 1;
+		else
+			return 0;
+	}
+	return 1;
 }
 
 unsigned int is_cpu_cpei_target(unsigned int cpu)
@@ -315,6 +320,7 @@ void set_cpei_target_cpu(unsigned int cpu)
 {
 	acpi_cpei_phys_cpuid = cpu_physical_id(cpu);
 }
+#endif
 
 unsigned int get_cpei_target_cpu(void)
 {
@@ -414,6 +420,26 @@ int __devinitdata pxm_to_nid_map[MAX_PXM_DOMAINS];
 int __initdata nid_to_pxm_map[MAX_NUMNODES];
 static struct acpi_table_slit __initdata *slit_table;
 
+static int get_processor_proximity_domain(struct acpi_table_processor_affinity *pa)
+{
+	int pxm;
+
+	pxm = pa->proximity_domain;
+	if (ia64_platform_is("sn2"))
+		pxm += pa->reserved[0] << 8;
+	return pxm;
+}
+
+static int get_memory_proximity_domain(struct acpi_table_memory_affinity *ma)
+{
+	int pxm;
+
+	pxm = ma->proximity_domain;
+	if (ia64_platform_is("sn2"))
+		pxm += ma->reserved1[0] << 8;
+	return pxm;
+}
+
 /*
  * ACPI 2.0 SLIT (System Locality Information Table)
  * http://devresource.hp.com/devresource/Docs/TechPapers/IA64/slit.pdf
@@ -437,13 +463,20 @@ void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
 void __init
 acpi_numa_processor_affinity_init(struct acpi_table_processor_affinity *pa)
 {
+	int pxm;
+
+	if (!pa->flags.enabled)
+		return;
+
+	pxm = get_processor_proximity_domain(pa);
+
 	/* record this node in proximity bitmap */
-	pxm_bit_set(pa->proximity_domain);
+	pxm_bit_set(pxm);
 
 	node_cpuid[srat_num_cpus].phys_id =
 	    (pa->apic_id << 8) | (pa->lsapic_eid);
 	/* nid should be overridden as logical node id later */
-	node_cpuid[srat_num_cpus].nid = pa->proximity_domain;
+	node_cpuid[srat_num_cpus].nid = pxm;
 	srat_num_cpus++;
 }
 
@@ -451,10 +484,10 @@ void __init
 acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma)
 {
 	unsigned long paddr, size;
-	u8 pxm;
+	int pxm;
 	struct node_memblk_s *p, *q, *pend;
 
-	pxm = ma->proximity_domain;
+	pxm = get_memory_proximity_domain(ma);
 
 	/* fill node memory chunk structure */
 	paddr = ma->base_addr_hi;
@@ -618,9 +651,9 @@ unsigned long __init acpi_find_rsdp(void)
 {
 	unsigned long rsdp_phys = 0;
 
-	if (efi.acpi20)
-		rsdp_phys = __pa(efi.acpi20);
-	else if (efi.acpi)
+	if (efi.acpi20 != EFI_INVALID_TABLE_ADDR)
+		rsdp_phys = efi.acpi20;
+	else if (efi.acpi != EFI_INVALID_TABLE_ADDR)
 		printk(KERN_WARNING PREFIX
 		       "v1.0/r0.71 tables no longer supported\n");
 	return rsdp_phys;
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index 9990320b6f9..12cfedce73b 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -458,24 +458,33 @@ efi_init (void)
 	printk(KERN_INFO "EFI v%u.%.02u by %s:",
 	       efi.systab->hdr.revision >> 16, efi.systab->hdr.revision & 0xffff, vendor);
 
+	efi.mps        = EFI_INVALID_TABLE_ADDR;
+	efi.acpi       = EFI_INVALID_TABLE_ADDR;
+	efi.acpi20     = EFI_INVALID_TABLE_ADDR;
+	efi.smbios     = EFI_INVALID_TABLE_ADDR;
+	efi.sal_systab = EFI_INVALID_TABLE_ADDR;
+	efi.boot_info  = EFI_INVALID_TABLE_ADDR;
+	efi.hcdp       = EFI_INVALID_TABLE_ADDR;
+	efi.uga        = EFI_INVALID_TABLE_ADDR;
+
 	for (i = 0; i < (int) efi.systab->nr_tables; i++) {
 		if (efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID) == 0) {
-			efi.mps = __va(config_tables[i].table);
+			efi.mps = config_tables[i].table;
 			printk(" MPS=0x%lx", config_tables[i].table);
 		} else if (efi_guidcmp(config_tables[i].guid, ACPI_20_TABLE_GUID) == 0) {
-			efi.acpi20 = __va(config_tables[i].table);
+			efi.acpi20 = config_tables[i].table;
 			printk(" ACPI 2.0=0x%lx", config_tables[i].table);
 		} else if (efi_guidcmp(config_tables[i].guid, ACPI_TABLE_GUID) == 0) {
-			efi.acpi = __va(config_tables[i].table);
+			efi.acpi = config_tables[i].table;
 			printk(" ACPI=0x%lx", config_tables[i].table);
 		} else if (efi_guidcmp(config_tables[i].guid, SMBIOS_TABLE_GUID) == 0) {
-			efi.smbios = __va(config_tables[i].table);
+			efi.smbios = config_tables[i].table;
 			printk(" SMBIOS=0x%lx", config_tables[i].table);
 		} else if (efi_guidcmp(config_tables[i].guid, SAL_SYSTEM_TABLE_GUID) == 0) {
-			efi.sal_systab = __va(config_tables[i].table);
+			efi.sal_systab = config_tables[i].table;
 			printk(" SALsystab=0x%lx", config_tables[i].table);
 		} else if (efi_guidcmp(config_tables[i].guid, HCDP_TABLE_GUID) == 0) {
-			efi.hcdp = __va(config_tables[i].table);
+			efi.hcdp = config_tables[i].table;
 			printk(" HCDP=0x%lx", config_tables[i].table);
 		}
 	}
@@ -677,27 +686,34 @@ EXPORT_SYMBOL(efi_mem_attributes);
 /*
  * Determines whether the memory at phys_addr supports the desired
  * attribute (WB, UC, etc).  If this returns 1, the caller can safely
- * access *size bytes at phys_addr with the specified attribute.
+ * access size bytes at phys_addr with the specified attribute.
  */
-static int
-efi_mem_attribute_range (unsigned long phys_addr, unsigned long *size, u64 attr)
+int
+efi_mem_attribute_range (unsigned long phys_addr, unsigned long size, u64 attr)
 {
+	unsigned long end = phys_addr + size;
 	efi_memory_desc_t *md = efi_memory_descriptor(phys_addr);
-	unsigned long md_end;
 
-	if (!md || (md->attribute & attr) != attr)
+	/*
+	 * Some firmware doesn't report MMIO regions in the EFI memory
+	 * map.  The Intel BigSur (a.k.a. HP i2000) has this problem.
+	 * On those platforms, we have to assume UC is valid everywhere.
+	 */
+	if (!md || (md->attribute & attr) != attr) {
+		if (attr == EFI_MEMORY_UC && !efi_memmap_has_mmio())
+			return 1;
 		return 0;
+	}
 
 	do {
-		md_end = efi_md_end(md);
-		if (phys_addr + *size <= md_end)
+		unsigned long md_end = efi_md_end(md);
+
+		if (end <= md_end)
 			return 1;
 
 		md = efi_memory_descriptor(md_end);
-		if (!md || (md->attribute & attr) != attr) {
-			*size = md_end - phys_addr;
-			return 1;
-		}
+		if (!md || (md->attribute & attr) != attr)
+			return 0;
 	} while (md);
 	return 0;
 }
@@ -708,7 +724,7 @@ efi_mem_attribute_range (unsigned long phys_addr, unsigned long *size, u64 attr)
  * control access size.
  */
 int
-valid_phys_addr_range (unsigned long phys_addr, unsigned long *size)
+valid_phys_addr_range (unsigned long phys_addr, unsigned long size)
 {
 	return efi_mem_attribute_range(phys_addr, size, EFI_MEMORY_WB);
 }
@@ -723,7 +739,7 @@ valid_phys_addr_range (unsigned long phys_addr, unsigned long *size)
  * because that doesn't appear in the boot-time EFI memory map.
  */
 int
-valid_mmap_phys_addr_range (unsigned long phys_addr, unsigned long *size)
+valid_mmap_phys_addr_range (unsigned long phys_addr, unsigned long size)
 {
 	if (efi_mem_attribute_range(phys_addr, size, EFI_MEMORY_WB))
 		return 1;
@@ -731,14 +747,6 @@ valid_mmap_phys_addr_range (unsigned long phys_addr, unsigned long *size)
 	if (efi_mem_attribute_range(phys_addr, size, EFI_MEMORY_UC))
 		return 1;
 
-	/*
-	 * Some firmware doesn't report MMIO regions in the EFI memory map.
-	 * The Intel BigSur (a.k.a. HP i2000) has this problem.  In this
-	 * case, we can't use the EFI memory map to validate mmap requests.
-	 */
-	if (!efi_memmap_has_mmio())
-		return 1;
-
 	return 0;
 }
 
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 930fdfca6dd..750e8e7fbdc 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -1102,9 +1102,6 @@ skip_rbs_switch:
 	st8 [r2]=r8
 	st8 [r3]=r10
 .work_pending:
-	tbit.nz p6,p0=r31,TIF_SIGDELAYED		// signal delayed from  MCA/INIT/NMI/PMI context?
-(p6)	br.cond.sptk.few .sigdelayed
-	;;
 	tbit.z p6,p0=r31,TIF_NEED_RESCHED		// current_thread_info()->need_resched==0?
 (p6)	br.cond.sptk.few .notify
 #ifdef CONFIG_PREEMPT
@@ -1131,17 +1128,6 @@ skip_rbs_switch:
 (pLvSys)br.cond.sptk.few  .work_pending_syscall_end
 	br.cond.sptk.many .work_processed_kernel	// don't re-check
 
-// There is a delayed signal that was detected in MCA/INIT/NMI/PMI context where
-// it could not be delivered.  Deliver it now.  The signal might be for us and
-// may set TIF_SIGPENDING, so redrive ia64_leave_* after processing the delayed
-// signal.
-
-.sigdelayed:
-	br.call.sptk.many rp=do_sigdelayed
-	cmp.eq p6,p0=r0,r0				// p6 <- 1, always re-check
-(pLvSys)br.cond.sptk.few  .work_pending_syscall_end
-	br.cond.sptk.many .work_processed_kernel	// re-check
-
 .work_pending_syscall_end:
 	adds r2=PT(R8)+16,r12
 	adds r3=PT(R10)+16,r12
@@ -1619,5 +1605,6 @@ sys_call_table:
 	data8 sys_ni_syscall			// reserved for pselect
 	data8 sys_ni_syscall			// 1295 reserved for ppoll
 	data8 sys_unshare
+	data8 sys_splice
 
 	.org sys_call_table + 8*NR_syscalls	// guard against failures to increase NR_syscalls
diff --git a/arch/ia64/kernel/gate.lds.S b/arch/ia64/kernel/gate.lds.S
index e1e4aba9ecd..7c99e6ec3da 100644
--- a/arch/ia64/kernel/gate.lds.S
+++ b/arch/ia64/kernel/gate.lds.S
@@ -59,6 +59,7 @@ SECTIONS
 	*(.dynbss)
 	*(.bss .bss.* .gnu.linkonce.b.*)
 	*(__ex_table)
+	*(__mca_table)
   }
 }
 
diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c
index 574084f343f..7956eb9058f 100644
--- a/arch/ia64/kernel/iosapic.c
+++ b/arch/ia64/kernel/iosapic.c
@@ -9,54 +9,65 @@
  * Copyright (C) 1999 VA Linux Systems
  * Copyright (C) 1999,2000 Walt Drummond <drummond@valinux.com>
  *
- * 00/04/19	D. Mosberger	Rewritten to mirror more closely the x86 I/O APIC code.
- *				In particular, we now have separate handlers for edge
- *				and level triggered interrupts.
- * 00/10/27	Asit Mallick, Goutham Rao <goutham.rao@intel.com> IRQ vector allocation
- *				PCI to vector mapping, shared PCI interrupts.
- * 00/10/27	D. Mosberger	Document things a bit more to make them more understandable.
- *				Clean up much of the old IOSAPIC cruft.
- * 01/07/27	J.I. Lee	PCI irq routing, Platform/Legacy interrupts and fixes for
- *				ACPI S5(SoftOff) support.
+ * 00/04/19	D. Mosberger	Rewritten to mirror more closely the x86 I/O
+ *				APIC code.  In particular, we now have separate
+ *				handlers for edge and level triggered
+ *				interrupts.
+ * 00/10/27	Asit Mallick, Goutham Rao <goutham.rao@intel.com> IRQ vector
+ *				allocation PCI to vector mapping, shared PCI
+ *				interrupts.
+ * 00/10/27	D. Mosberger	Document things a bit more to make them more
+ *				understandable.  Clean up much of the old
+ *				IOSAPIC cruft.
+ * 01/07/27	J.I. Lee	PCI irq routing, Platform/Legacy interrupts
+ *				and fixes for ACPI S5(SoftOff) support.
  * 02/01/23	J.I. Lee	iosapic pgm fixes for PCI irq routing from _PRT
- * 02/01/07     E. Focht        <efocht@ess.nec.de> Redirectable interrupt vectors in
- *                              iosapic_set_affinity(), initializations for
- *                              /proc/irq/#/smp_affinity
+ * 02/01/07     E. Focht        <efocht@ess.nec.de> Redirectable interrupt
+ *				vectors in iosapic_set_affinity(),
+ *				initializations for /proc/irq/#/smp_affinity
  * 02/04/02	P. Diefenbaugh	Cleaned up ACPI PCI IRQ routing.
  * 02/04/18	J.I. Lee	bug fix in iosapic_init_pci_irq
- * 02/04/30	J.I. Lee	bug fix in find_iosapic to fix ACPI PCI IRQ to IOSAPIC mapping
- *				error
+ * 02/04/30	J.I. Lee	bug fix in find_iosapic to fix ACPI PCI IRQ to
+ *				IOSAPIC mapping error
  * 02/07/29	T. Kochi	Allocate interrupt vectors dynamically
- * 02/08/04	T. Kochi	Cleaned up terminology (irq, global system interrupt, vector, etc.)
- * 02/09/20	D. Mosberger	Simplified by taking advantage of ACPI's pci_irq code.
+ * 02/08/04	T. Kochi	Cleaned up terminology (irq, global system
+ *				interrupt, vector, etc.)
+ * 02/09/20	D. Mosberger	Simplified by taking advantage of ACPI's
+ *				pci_irq code.
  * 03/02/19	B. Helgaas	Make pcat_compat system-wide, not per-IOSAPIC.
- *				Remove iosapic_address & gsi_base from external interfaces.
- *				Rationalize __init/__devinit attributes.
+ *				Remove iosapic_address & gsi_base from
+ *				external interfaces.  Rationalize
+ *				__init/__devinit attributes.
  * 04/12/04 Ashok Raj	<ashok.raj@intel.com> Intel Corporation 2004
- *				Updated to work with irq migration necessary for CPU Hotplug
+ *				Updated to work with irq migration necessary
+ *				for CPU Hotplug
  */
 /*
- * Here is what the interrupt logic between a PCI device and the kernel looks like:
+ * Here is what the interrupt logic between a PCI device and the kernel looks
+ * like:
  *
- * (1) A PCI device raises one of the four interrupt pins (INTA, INTB, INTC, INTD).  The
- *     device is uniquely identified by its bus--, and slot-number (the function
- *     number does not matter here because all functions share the same interrupt
- *     lines).
+ * (1) A PCI device raises one of the four interrupt pins (INTA, INTB, INTC,
+ *     INTD).  The device is uniquely identified by its bus-, and slot-number
+ *     (the function number does not matter here because all functions share
+ *     the same interrupt lines).
  *
- * (2) The motherboard routes the interrupt line to a pin on a IOSAPIC controller.
- *     Multiple interrupt lines may have to share the same IOSAPIC pin (if they're level
- *     triggered and use the same polarity).  Each interrupt line has a unique Global
- *     System Interrupt (GSI) number which can be calculated as the sum of the controller's
- *     base GSI number and the IOSAPIC pin number to which the line connects.
+ * (2) The motherboard routes the interrupt line to a pin on a IOSAPIC
+ *     controller.  Multiple interrupt lines may have to share the same
+ *     IOSAPIC pin (if they're level triggered and use the same polarity).
+ *     Each interrupt line has a unique Global System Interrupt (GSI) number
+ *     which can be calculated as the sum of the controller's base GSI number
+ *     and the IOSAPIC pin number to which the line connects.
  *
- * (3) The IOSAPIC uses an internal routing table entries (RTEs) to map the IOSAPIC pin
- *     into the IA-64 interrupt vector.  This interrupt vector is then sent to the CPU.
+ * (3) The IOSAPIC uses an internal routing table entries (RTEs) to map the
+ * IOSAPIC pin into the IA-64 interrupt vector.  This interrupt vector is then
+ * sent to the CPU.
  *
- * (4) The kernel recognizes an interrupt as an IRQ.  The IRQ interface is used as
- *     architecture-independent interrupt handling mechanism in Linux.  As an
- *     IRQ is a number, we have to have IA-64 interrupt vector number <-> IRQ number
- *     mapping.  On smaller systems, we use one-to-one mapping between IA-64 vector and
- *     IRQ.  A platform can implement platform_irq_to_vector(irq) and
+ * (4) The kernel recognizes an interrupt as an IRQ.  The IRQ interface is
+ *     used as architecture-independent interrupt handling mechanism in Linux.
+ *     As an IRQ is a number, we have to have
+ *     IA-64 interrupt vector number <-> IRQ number mapping.  On smaller
+ *     systems, we use one-to-one mapping between IA-64 vector and IRQ.  A
+ *     platform can implement platform_irq_to_vector(irq) and
  *     platform_local_vector_to_irq(vector) APIs to differentiate the mapping.
  *     Please see also include/asm-ia64/hw_irq.h for those APIs.
  *
@@ -64,9 +75,9 @@
  *
  *	PCI pin -> global system interrupt (GSI) -> IA-64 vector <-> IRQ
  *
- * Note: The term "IRQ" is loosely used everywhere in Linux kernel to describe interrupts.
- * Now we use "IRQ" only for Linux IRQ's.  ISA IRQ (isa_irq) is the only exception in this
- * source code.
+ * Note: The term "IRQ" is loosely used everywhere in Linux kernel to
+ * describeinterrupts.  Now we use "IRQ" only for Linux IRQ's.  ISA IRQ
+ * (isa_irq) is the only exception in this source code.
  */
 #include <linux/config.h>
 
@@ -90,7 +101,6 @@
 #include <asm/ptrace.h>
 #include <asm/system.h>
 
-
 #undef DEBUG_INTERRUPT_ROUTING
 
 #ifdef DEBUG_INTERRUPT_ROUTING
@@ -99,36 +109,46 @@
 #define DBG(fmt...)
 #endif
 
-#define NR_PREALLOCATE_RTE_ENTRIES	(PAGE_SIZE / sizeof(struct iosapic_rte_info))
+#define NR_PREALLOCATE_RTE_ENTRIES \
+	(PAGE_SIZE / sizeof(struct iosapic_rte_info))
 #define RTE_PREALLOCATED	(1)
 
 static DEFINE_SPINLOCK(iosapic_lock);
 
-/* These tables map IA-64 vectors to the IOSAPIC pin that generates this vector. */
+/*
+ * These tables map IA-64 vectors to the IOSAPIC pin that generates this
+ * vector.
+ */
 
 struct iosapic_rte_info {
-	struct list_head rte_list;	/* node in list of RTEs sharing the same vector */
+	struct list_head rte_list;	/* node in list of RTEs sharing the
+					 * same vector */
 	char __iomem	*addr;		/* base address of IOSAPIC */
-	unsigned int	gsi_base;	/* first GSI assigned to this IOSAPIC */
+	unsigned int	gsi_base;	/* first GSI assigned to this
+					 * IOSAPIC */
 	char		rte_index;	/* IOSAPIC RTE index */
 	int		refcnt;		/* reference counter */
 	unsigned int	flags;		/* flags */
 } ____cacheline_aligned;
 
 static struct iosapic_intr_info {
-	struct list_head rtes;		/* RTEs using this vector (empty => not an IOSAPIC interrupt) */
+	struct list_head rtes;		/* RTEs using this vector (empty =>
+					 * not an IOSAPIC interrupt) */
 	int		count;		/* # of RTEs that shares this vector */
-	u32		low32;		/* current value of low word of Redirection table entry */
+	u32		low32;		/* current value of low word of
+					 * Redirection table entry */
 	unsigned int	dest;		/* destination CPU physical ID */
 	unsigned char	dmode	: 3;	/* delivery mode (see iosapic.h) */
-	unsigned char 	polarity: 1;	/* interrupt polarity (see iosapic.h) */
+	unsigned char 	polarity: 1;	/* interrupt polarity
+					 * (see iosapic.h) */
 	unsigned char	trigger	: 1;	/* trigger mode (see iosapic.h) */
 } iosapic_intr_info[IA64_NUM_VECTORS];
 
 static struct iosapic {
 	char __iomem	*addr;		/* base address of IOSAPIC */
-	unsigned int 	gsi_base;	/* first GSI assigned to this IOSAPIC */
-	unsigned short 	num_rte;	/* number of RTE in this IOSAPIC */
+	unsigned int 	gsi_base;	/* first GSI assigned to this
+					 * IOSAPIC */
+	unsigned short 	num_rte;	/* # of RTEs on this IOSAPIC */
 	int		rtes_inuse;	/* # of RTEs in use on this IOSAPIC */
 #ifdef CONFIG_NUMA
 	unsigned short	node;		/* numa node association via pxm */
@@ -149,7 +169,8 @@ find_iosapic (unsigned int gsi)
 	int i;
 
 	for (i = 0; i < NR_IOSAPICS; i++) {
-		if ((unsigned) (gsi - iosapic_lists[i].gsi_base) < iosapic_lists[i].num_rte)
+		if ((unsigned) (gsi - iosapic_lists[i].gsi_base) <
+		    iosapic_lists[i].num_rte)
 			return i;
 	}
 
@@ -162,7 +183,8 @@ _gsi_to_vector (unsigned int gsi)
 	struct iosapic_intr_info *info;
 	struct iosapic_rte_info *rte;
 
-	for (info = iosapic_intr_info; info < iosapic_intr_info + IA64_NUM_VECTORS; ++info)
+	for (info = iosapic_intr_info; info <
+		     iosapic_intr_info + IA64_NUM_VECTORS; ++info)
 		list_for_each_entry(rte, &info->rtes, rte_list)
 			if (rte->gsi_base + rte->rte_index == gsi)
 				return info - iosapic_intr_info;
@@ -185,8 +207,8 @@ gsi_to_irq (unsigned int gsi)
 	unsigned long flags;
 	int irq;
 	/*
-	 * XXX fix me: this assumes an identity mapping vetween IA-64 vector and Linux irq
-	 * numbers...
+	 * XXX fix me: this assumes an identity mapping between IA-64 vector
+	 * and Linux irq numbers...
 	 */
 	spin_lock_irqsave(&iosapic_lock, flags);
 	{
@@ -197,7 +219,8 @@ gsi_to_irq (unsigned int gsi)
 	return irq;
 }
 
-static struct iosapic_rte_info *gsi_vector_to_rte(unsigned int gsi, unsigned int vec)
+static struct iosapic_rte_info *gsi_vector_to_rte(unsigned int gsi,
+						  unsigned int vec)
 {
 	struct iosapic_rte_info *rte;
 
@@ -237,7 +260,9 @@ set_rte (unsigned int gsi, unsigned int vector, unsigned int dest, int mask)
 
 		for (irq = 0; irq < NR_IRQS; ++irq)
 			if (irq_to_vector(irq) == vector) {
-				set_irq_affinity_info(irq, (int)(dest & 0xffff), redir);
+				set_irq_affinity_info(irq,
+						      (int)(dest & 0xffff),
+						      redir);
 				break;
 			}
 	}
@@ -259,7 +284,7 @@ set_rte (unsigned int gsi, unsigned int vector, unsigned int dest, int mask)
 }
 
 static void
-nop (unsigned int vector)
+nop (unsigned int irq)
 {
 	/* do nothing... */
 }
@@ -281,7 +306,8 @@ mask_irq (unsigned int irq)
 	{
 		/* set only the mask bit */
 		low32 = iosapic_intr_info[vec].low32 |= IOSAPIC_MASK;
-		list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list) {
+		list_for_each_entry(rte, &iosapic_intr_info[vec].rtes,
+				    rte_list) {
 			addr = rte->addr;
 			rte_index = rte->rte_index;
 			iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
@@ -306,7 +332,8 @@ unmask_irq (unsigned int irq)
 	spin_lock_irqsave(&iosapic_lock, flags);
 	{
 		low32 = iosapic_intr_info[vec].low32 &= ~IOSAPIC_MASK;
-		list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list) {
+		list_for_each_entry(rte, &iosapic_intr_info[vec].rtes,
+				    rte_list) {
 			addr = rte->addr;
 			rte_index = rte->rte_index;
 			iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
@@ -346,21 +373,25 @@ iosapic_set_affinity (unsigned int irq, cpumask_t mask)
 
 	spin_lock_irqsave(&iosapic_lock, flags);
 	{
-		low32 = iosapic_intr_info[vec].low32 & ~(7 << IOSAPIC_DELIVERY_SHIFT);
+		low32 = iosapic_intr_info[vec].low32 &
+			~(7 << IOSAPIC_DELIVERY_SHIFT);
 
 		if (redir)
 		        /* change delivery mode to lowest priority */
-			low32 |= (IOSAPIC_LOWEST_PRIORITY << IOSAPIC_DELIVERY_SHIFT);
+			low32 |= (IOSAPIC_LOWEST_PRIORITY <<
+				  IOSAPIC_DELIVERY_SHIFT);
 		else
 		        /* change delivery mode to fixed */
 			low32 |= (IOSAPIC_FIXED << IOSAPIC_DELIVERY_SHIFT);
 
 		iosapic_intr_info[vec].low32 = low32;
 		iosapic_intr_info[vec].dest = dest;
-		list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list) {
+		list_for_each_entry(rte, &iosapic_intr_info[vec].rtes,
+				    rte_list) {
 			addr = rte->addr;
 			rte_index = rte->rte_index;
-			iosapic_write(addr, IOSAPIC_RTE_HIGH(rte_index), high32);
+			iosapic_write(addr, IOSAPIC_RTE_HIGH(rte_index),
+				      high32);
 			iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
 		}
 	}
@@ -433,7 +464,8 @@ iosapic_ack_edge_irq (unsigned int irq)
 	 * interrupt for real. This prevents IRQ storms from unhandled
 	 * devices.
 	 */
-	if ((idesc->status & (IRQ_PENDING|IRQ_DISABLED)) == (IRQ_PENDING|IRQ_DISABLED))
+	if ((idesc->status & (IRQ_PENDING|IRQ_DISABLED)) ==
+	    (IRQ_PENDING|IRQ_DISABLED))
 		mask_irq(irq);
 }
 
@@ -467,7 +499,8 @@ iosapic_version (char __iomem *addr)
 	return iosapic_read(addr, IOSAPIC_VERSION);
 }
 
-static int iosapic_find_sharable_vector (unsigned long trigger, unsigned long pol)
+static int iosapic_find_sharable_vector (unsigned long trigger,
+					 unsigned long pol)
 {
 	int i, vector = -1, min_count = -1;
 	struct iosapic_intr_info *info;
@@ -482,7 +515,8 @@ static int iosapic_find_sharable_vector (unsigned long trigger, unsigned long po
 	for (i = IA64_FIRST_DEVICE_VECTOR; i <= IA64_LAST_DEVICE_VECTOR; i++) {
 		info = &iosapic_intr_info[i];
 		if (info->trigger == trigger && info->polarity == pol &&
-		    (info->dmode == IOSAPIC_FIXED || info->dmode == IOSAPIC_LOWEST_PRIORITY)) {
+		    (info->dmode == IOSAPIC_FIXED || info->dmode ==
+		     IOSAPIC_LOWEST_PRIORITY)) {
 			if (min_count == -1 || info->count < min_count) {
 				vector = i;
 				min_count = info->count;
@@ -506,12 +540,15 @@ iosapic_reassign_vector (int vector)
 		new_vector = assign_irq_vector(AUTO_ASSIGN);
 		if (new_vector < 0)
 			panic("%s: out of interrupt vectors!\n", __FUNCTION__);
-		printk(KERN_INFO "Reassigning vector %d to %d\n", vector, new_vector);
+		printk(KERN_INFO "Reassigning vector %d to %d\n",
+		       vector, new_vector);
 		memcpy(&iosapic_intr_info[new_vector], &iosapic_intr_info[vector],
 		       sizeof(struct iosapic_intr_info));
 		INIT_LIST_HEAD(&iosapic_intr_info[new_vector].rtes);
-		list_move(iosapic_intr_info[vector].rtes.next, &iosapic_intr_info[new_vector].rtes);
-		memset(&iosapic_intr_info[vector], 0, sizeof(struct iosapic_intr_info));
+		list_move(iosapic_intr_info[vector].rtes.next,
+			  &iosapic_intr_info[new_vector].rtes);
+		memset(&iosapic_intr_info[vector], 0,
+		       sizeof(struct iosapic_intr_info));
 		iosapic_intr_info[vector].low32 = IOSAPIC_MASK;
 		INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes);
 	}
@@ -524,7 +561,8 @@ static struct iosapic_rte_info *iosapic_alloc_rte (void)
 	int preallocated = 0;
 
 	if (!iosapic_kmalloc_ok && list_empty(&free_rte_list)) {
-		rte = alloc_bootmem(sizeof(struct iosapic_rte_info) * NR_PREALLOCATE_RTE_ENTRIES);
+		rte = alloc_bootmem(sizeof(struct iosapic_rte_info) *
+				    NR_PREALLOCATE_RTE_ENTRIES);
 		if (!rte)
 			return NULL;
 		for (i = 0; i < NR_PREALLOCATE_RTE_ENTRIES; i++, rte++)
@@ -532,7 +570,8 @@ static struct iosapic_rte_info *iosapic_alloc_rte (void)
 	}
 
 	if (!list_empty(&free_rte_list)) {
-		rte = list_entry(free_rte_list.next, struct iosapic_rte_info, rte_list);
+		rte = list_entry(free_rte_list.next, struct iosapic_rte_info,
+				 rte_list);
 		list_del(&rte->rte_list);
 		preallocated++;
 	} else {
@@ -575,7 +614,8 @@ register_intr (unsigned int gsi, int vector, unsigned char delivery,
 
 	index = find_iosapic(gsi);
 	if (index < 0) {
-		printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n", __FUNCTION__, gsi);
+		printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n",
+		       __FUNCTION__, gsi);
 		return -ENODEV;
 	}
 
@@ -586,7 +626,8 @@ register_intr (unsigned int gsi, int vector, unsigned char delivery,
 	if (!rte) {
 		rte = iosapic_alloc_rte();
 		if (!rte) {
-			printk(KERN_WARNING "%s: cannot allocate memory\n", __FUNCTION__);
+			printk(KERN_WARNING "%s: cannot allocate memory\n",
+			       __FUNCTION__);
 			return -ENOMEM;
 		}
 
@@ -602,7 +643,9 @@ register_intr (unsigned int gsi, int vector, unsigned char delivery,
 	else if (vector_is_shared(vector)) {
 		struct iosapic_intr_info *info = &iosapic_intr_info[vector];
 		if (info->trigger != trigger || info->polarity != polarity) {
-			printk (KERN_WARNING "%s: cannot override the interrupt\n", __FUNCTION__);
+			printk (KERN_WARNING
+				"%s: cannot override the interrupt\n",
+				__FUNCTION__);
 			return -EINVAL;
 		}
 	}
@@ -619,8 +662,10 @@ register_intr (unsigned int gsi, int vector, unsigned char delivery,
 	idesc = irq_descp(vector);
 	if (idesc->handler != irq_type) {
 		if (idesc->handler != &no_irq_type)
-			printk(KERN_WARNING "%s: changing vector %d from %s to %s\n",
-			       __FUNCTION__, vector, idesc->handler->typename, irq_type->typename);
+			printk(KERN_WARNING
+			       "%s: changing vector %d from %s to %s\n",
+			       __FUNCTION__, vector,
+			       idesc->handler->typename, irq_type->typename);
 		idesc->handler = irq_type;
 	}
 	return 0;
@@ -631,6 +676,7 @@ get_target_cpu (unsigned int gsi, int vector)
 {
 #ifdef CONFIG_SMP
 	static int cpu = -1;
+	extern int cpe_vector;
 
 	/*
 	 * In case of vector shared by multiple RTEs, all RTEs that
@@ -653,6 +699,11 @@ get_target_cpu (unsigned int gsi, int vector)
 	if (!cpu_online(smp_processor_id()))
 		return cpu_physical_id(smp_processor_id());
 
+#ifdef CONFIG_ACPI
+	if (cpe_vector > 0 && vector == IA64_CPEP_VECTOR)
+		return get_cpei_target_cpu();
+#endif
+
 #ifdef CONFIG_NUMA
 	{
 		int num_cpus, cpu_index, iosapic_index, numa_cpu, i = 0;
@@ -675,7 +726,7 @@ get_target_cpu (unsigned int gsi, int vector)
 		if (!num_cpus)
 			goto skip_numa_setup;
 
-		/* Use vector assigment to distribute across cpus in node */
+		/* Use vector assignment to distribute across cpus in node */
 		cpu_index = vector % num_cpus;
 
 		for (numa_cpu = first_cpu(cpu_mask) ; i < cpu_index ; i++)
@@ -697,7 +748,7 @@ skip_numa_setup:
 	} while (!cpu_online(cpu));
 
 	return cpu_physical_id(cpu);
-#else
+#else  /* CONFIG_SMP */
 	return cpu_physical_id(smp_processor_id());
 #endif
 }
@@ -749,7 +800,8 @@ again:
 			if (list_empty(&iosapic_intr_info[vector].rtes))
 				free_irq_vector(vector);
 			spin_unlock(&iosapic_lock);
-			spin_unlock_irqrestore(&irq_descp(vector)->lock, flags);
+			spin_unlock_irqrestore(&irq_descp(vector)->lock,
+					       flags);
 			goto again;
 		}
 
@@ -758,7 +810,8 @@ again:
 			      polarity, trigger);
 		if (err < 0) {
 			spin_unlock(&iosapic_lock);
-			spin_unlock_irqrestore(&irq_descp(vector)->lock, flags);
+			spin_unlock_irqrestore(&irq_descp(vector)->lock,
+					       flags);
 			return err;
 		}
 
@@ -800,7 +853,8 @@ iosapic_unregister_intr (unsigned int gsi)
 	 */
 	irq = gsi_to_irq(gsi);
 	if (irq < 0) {
-		printk(KERN_ERR "iosapic_unregister_intr(%u) unbalanced\n", gsi);
+		printk(KERN_ERR "iosapic_unregister_intr(%u) unbalanced\n",
+		       gsi);
 		WARN_ON(1);
 		return;
 	}
@@ -811,7 +865,9 @@ iosapic_unregister_intr (unsigned int gsi)
 	spin_lock(&iosapic_lock);
 	{
 		if ((rte = gsi_vector_to_rte(gsi, vector)) == NULL) {
-			printk(KERN_ERR "iosapic_unregister_intr(%u) unbalanced\n", gsi);
+			printk(KERN_ERR
+			       "iosapic_unregister_intr(%u) unbalanced\n",
+			       gsi);
 			WARN_ON(1);
 			goto out;
 		}
@@ -821,7 +877,8 @@ iosapic_unregister_intr (unsigned int gsi)
 
 		/* Mask the interrupt */
 		low32 = iosapic_intr_info[vector].low32 | IOSAPIC_MASK;
-		iosapic_write(rte->addr, IOSAPIC_RTE_LOW(rte->rte_index), low32);
+		iosapic_write(rte->addr, IOSAPIC_RTE_LOW(rte->rte_index),
+			      low32);
 
 		/* Remove the rte entry from the list */
 		list_del(&rte->rte_list);
@@ -834,7 +891,9 @@ iosapic_unregister_intr (unsigned int gsi)
 		trigger	 = iosapic_intr_info[vector].trigger;
 		polarity = iosapic_intr_info[vector].polarity;
 		dest     = iosapic_intr_info[vector].dest;
-		printk(KERN_INFO "GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d unregistered\n",
+		printk(KERN_INFO
+		       "GSI %u (%s, %s) -> CPU %d (0x%04x)"
+		       " vector %d unregistered\n",
 		       gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"),
 		       (polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
 		       cpu_logical_id(dest), dest, vector);
@@ -847,12 +906,15 @@ iosapic_unregister_intr (unsigned int gsi)
 			idesc->handler = &no_irq_type;
 
 			/* Clear the interrupt information */
-			memset(&iosapic_intr_info[vector], 0, sizeof(struct iosapic_intr_info));
+			memset(&iosapic_intr_info[vector], 0,
+			       sizeof(struct iosapic_intr_info));
 			iosapic_intr_info[vector].low32 |= IOSAPIC_MASK;
 			INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes);
 
 			if (idesc->action) {
-				printk(KERN_ERR "interrupt handlers still exist on IRQ %u\n", irq);
+				printk(KERN_ERR
+				       "interrupt handlers still exist on"
+				       "IRQ %u\n", irq);
 				WARN_ON(1);
 			}
 
@@ -867,7 +929,6 @@ iosapic_unregister_intr (unsigned int gsi)
 
 /*
  * ACPI calls this when it finds an entry for a platform interrupt.
- * Note that the irq_base and IOSAPIC address must be set in iosapic_init().
  */
 int __init
 iosapic_register_platform_intr (u32 int_type, unsigned int gsi,
@@ -901,13 +962,16 @@ iosapic_register_platform_intr (u32 int_type, unsigned int gsi,
 		mask = 1;
 		break;
 	      default:
-		printk(KERN_ERR "iosapic_register_platform_irq(): invalid int type 0x%x\n", int_type);
+		printk(KERN_ERR "%s: invalid int type 0x%x\n", __FUNCTION__,
+		       int_type);
 		return -1;
 	}
 
 	register_intr(gsi, vector, delivery, polarity, trigger);
 
-	printk(KERN_INFO "PLATFORM int %s (0x%x): GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d\n",
+	printk(KERN_INFO
+	       "PLATFORM int %s (0x%x): GSI %u (%s, %s) -> CPU %d (0x%04x)"
+	       " vector %d\n",
 	       int_type < ARRAY_SIZE(name) ? name[int_type] : "unknown",
 	       int_type, gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"),
 	       (polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
@@ -917,10 +981,8 @@ iosapic_register_platform_intr (u32 int_type, unsigned int gsi,
 	return vector;
 }
 
-
 /*
  * ACPI calls this when it finds an entry for a legacy ISA IRQ override.
- * Note that the gsi_base and IOSAPIC address must be set in iosapic_init().
  */
 void __init
 iosapic_override_isa_irq (unsigned int isa_irq, unsigned int gsi,
@@ -949,16 +1011,19 @@ iosapic_system_init (int system_pcat_compat)
 
 	for (vector = 0; vector < IA64_NUM_VECTORS; ++vector) {
 		iosapic_intr_info[vector].low32 = IOSAPIC_MASK;
-		INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes);	/* mark as unused */
+		/* mark as unused */
+		INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes);
 	}
 
 	pcat_compat = system_pcat_compat;
 	if (pcat_compat) {
 		/*
-		 * Disable the compatibility mode interrupts (8259 style), needs IN/OUT support
-		 * enabled.
+		 * Disable the compatibility mode interrupts (8259 style),
+		 * needs IN/OUT support enabled.
 		 */
-		printk(KERN_INFO "%s: Disabling PC-AT compatible 8259 interrupts\n", __FUNCTION__);
+		printk(KERN_INFO
+		       "%s: Disabling PC-AT compatible 8259 interrupts\n",
+		       __FUNCTION__);
 		outb(0xff, 0xA1);
 		outb(0xff, 0x21);
 	}
@@ -998,10 +1063,7 @@ iosapic_check_gsi_range (unsigned int gsi_base, unsigned int ver)
 		base = iosapic_lists[index].gsi_base;
 		end  = base + iosapic_lists[index].num_rte - 1;
 
-		if (gsi_base < base && gsi_end < base)
-			continue;/* OK */
-
-		if (gsi_base > end && gsi_end > end)
+		if (gsi_end < base || end < gsi_base)
 			continue; /* OK */
 
 		return -EBUSY;
@@ -1047,12 +1109,14 @@ iosapic_init (unsigned long phys_addr, unsigned int gsi_base)
 
 	if ((gsi_base == 0) && pcat_compat) {
 		/*
-		 * Map the legacy ISA devices into the IOSAPIC data.  Some of these may
-		 * get reprogrammed later on with data from the ACPI Interrupt Source
-		 * Override table.
+		 * Map the legacy ISA devices into the IOSAPIC data.  Some of
+		 * these may get reprogrammed later on with data from the ACPI
+		 * Interrupt Source Override table.
 		 */
 		for (isa_irq = 0; isa_irq < 16; ++isa_irq)
-			iosapic_override_isa_irq(isa_irq, isa_irq, IOSAPIC_POL_HIGH, IOSAPIC_EDGE);
+			iosapic_override_isa_irq(isa_irq, isa_irq,
+						 IOSAPIC_POL_HIGH,
+						 IOSAPIC_EDGE);
 	}
 	return 0;
 }
@@ -1075,7 +1139,8 @@ iosapic_remove (unsigned int gsi_base)
 
 		if (iosapic_lists[index].rtes_inuse) {
 			err = -EBUSY;
-			printk(KERN_WARNING "%s: IOSAPIC for GSI base %u is busy\n",
+			printk(KERN_WARNING
+			       "%s: IOSAPIC for GSI base %u is busy\n",
 			       __FUNCTION__, gsi_base);
 			goto out;
 		}
diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c
index d33244c3275..5ce908ef9c9 100644
--- a/arch/ia64/kernel/irq.c
+++ b/arch/ia64/kernel/irq.c
@@ -163,8 +163,19 @@ void fixup_irqs(void)
 {
 	unsigned int irq;
 	extern void ia64_process_pending_intr(void);
+	extern void ia64_disable_timer(void);
+	extern volatile int time_keeper_id;
+
+	ia64_disable_timer();
+
+	/*
+	 * Find a new timesync master
+	 */
+	if (smp_processor_id() == time_keeper_id) {
+		time_keeper_id = first_cpu(cpu_online_map);
+		printk ("CPU %d is now promoted to time-keeper master\n", time_keeper_id);
+	}
 
-	ia64_set_itv(1<<16);
 	/*
 	 * Phase 1: Locate irq's bound to this cpu and
 	 * relocate them for cpu removal.
diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S
index dcd906fe574..829a43cab79 100644
--- a/arch/ia64/kernel/ivt.S
+++ b/arch/ia64/kernel/ivt.S
@@ -865,6 +865,7 @@ ENTRY(interrupt)
 	;;
 	SAVE_REST
 	;;
+	MCA_RECOVER_RANGE(interrupt)
 	alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group
 	mov out0=cr.ivr		// pass cr.ivr as first arg
 	add out1=16,sp		// pass pointer to pt_regs as second arg
diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c
index 50ae8c7d453..789881ca83d 100644
--- a/arch/ia64/kernel/kprobes.c
+++ b/arch/ia64/kernel/kprobes.c
@@ -34,6 +34,7 @@
 #include <asm/pgtable.h>
 #include <asm/kdebug.h>
 #include <asm/sections.h>
+#include <asm/uaccess.h>
 
 extern void jprobe_inst_return(void);
 
@@ -722,13 +723,50 @@ static int __kprobes kprobes_fault_handler(struct pt_regs *regs, int trapnr)
 	struct kprobe *cur = kprobe_running();
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
 
-	if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
-		return 1;
 
-	if (kcb->kprobe_status & KPROBE_HIT_SS) {
-		resume_execution(cur, regs);
-		reset_current_kprobe();
+	switch(kcb->kprobe_status) {
+	case KPROBE_HIT_SS:
+	case KPROBE_REENTER:
+		/*
+		 * We are here because the instruction being single
+		 * stepped caused a page fault. We reset the current
+		 * kprobe and the instruction pointer points back to
+		 * the probe address and allow the page fault handler
+		 * to continue as a normal page fault.
+		 */
+		regs->cr_iip = ((unsigned long)cur->addr) & ~0xFULL;
+		ia64_psr(regs)->ri = ((unsigned long)cur->addr) & 0xf;
+		if (kcb->kprobe_status == KPROBE_REENTER)
+			restore_previous_kprobe(kcb);
+		else
+			reset_current_kprobe();
 		preempt_enable_no_resched();
+		break;
+	case KPROBE_HIT_ACTIVE:
+	case KPROBE_HIT_SSDONE:
+		/*
+		 * We increment the nmissed count for accounting,
+		 * we can also use npre/npostfault count for accouting
+		 * these specific fault cases.
+		 */
+		kprobes_inc_nmissed_count(cur);
+
+		/*
+		 * We come here because instructions in the pre/post
+		 * handler caused the page_fault, this could happen
+		 * if handler tries to access user space by
+		 * copy_from_user(), get_user() etc. Let the
+		 * user-specified handler try to fix it first.
+		 */
+		if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
+			return 1;
+
+		/*
+		 * Let ia64_do_page_fault() fix it.
+		 */
+		break;
+	default:
+		break;
 	}
 
 	return 0;
@@ -740,6 +778,9 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
 	struct die_args *args = (struct die_args *)data;
 	int ret = NOTIFY_DONE;
 
+	if (args->regs && user_mode(args->regs))
+		return ret;
+
 	switch(val) {
 	case DIE_BREAK:
 		/* err is break number from ia64_bad_break() */
diff --git a/arch/ia64/kernel/machvec.c b/arch/ia64/kernel/machvec.c
index c3a04ee7f4f..4b0b71d5aef 100644
--- a/arch/ia64/kernel/machvec.c
+++ b/arch/ia64/kernel/machvec.c
@@ -14,7 +14,15 @@
 struct ia64_machine_vector ia64_mv;
 EXPORT_SYMBOL(ia64_mv);
 
-static struct ia64_machine_vector *
+static __initdata const char *mvec_name;
+static __init int setup_mvec(char *s)
+{
+	mvec_name = s;
+	return 0;
+}
+early_param("machvec", setup_mvec);
+
+static struct ia64_machine_vector * __init
 lookup_machvec (const char *name)
 {
 	extern struct ia64_machine_vector machvec_start[];
@@ -33,10 +41,13 @@ machvec_init (const char *name)
 {
 	struct ia64_machine_vector *mv;
 
+	if (!name)
+		name = mvec_name ? mvec_name : acpi_get_sysname();
 	mv = lookup_machvec(name);
-	if (!mv) {
-		panic("generic kernel failed to find machine vector for platform %s!", name);
-	}
+	if (!mv)
+		panic("generic kernel failed to find machine vector for"
+		      " platform %s!", name);
+
 	ia64_mv = *mv;
 	printk(KERN_INFO "booting generic kernel on platform %s\n", name);
 }
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index ee7eec9ee57..8963171788d 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -69,6 +69,7 @@
 #include <linux/kernel.h>
 #include <linux/smp.h>
 #include <linux/workqueue.h>
+#include <linux/cpumask.h>
 
 #include <asm/delay.h>
 #include <asm/kdebug.h>
@@ -83,6 +84,7 @@
 #include <asm/irq.h>
 #include <asm/hw_irq.h>
 
+#include "mca_drv.h"
 #include "entry.h"
 
 #if defined(IA64_MCA_DEBUG_INFO)
@@ -133,7 +135,7 @@ static int cpe_poll_enabled = 1;
 
 extern void salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe);
 
-static int mca_init;
+static int mca_init __initdata;
 
 
 static void inline
@@ -184,7 +186,7 @@ static ia64_state_log_t ia64_state_log[IA64_MAX_LOG_TYPES];
  * Inputs   :   info_type   (SAL_INFO_TYPE_{MCA,INIT,CMC,CPE})
  * Outputs	:	None
  */
-static void
+static void __init
 ia64_log_init(int sal_info_type)
 {
 	u64	max_size = 0;
@@ -282,13 +284,53 @@ ia64_mca_log_sal_error_record(int sal_info_type)
 }
 
 /*
- * platform dependent error handling
+ * search_mca_table
+ *  See if the MCA surfaced in an instruction range
+ *  that has been tagged as recoverable.
+ *
+ *  Inputs
+ *	first	First address range to check
+ *	last	Last address range to check
+ *	ip	Instruction pointer, address we are looking for
+ *
+ * Return value:
+ *      1 on Success (in the table)/ 0 on Failure (not in the  table)
  */
-#ifndef PLATFORM_MCA_HANDLERS
+int
+search_mca_table (const struct mca_table_entry *first,
+                const struct mca_table_entry *last,
+                unsigned long ip)
+{
+        const struct mca_table_entry *curr;
+        u64 curr_start, curr_end;
+
+        curr = first;
+        while (curr <= last) {
+                curr_start = (u64) &curr->start_addr + curr->start_addr;
+                curr_end = (u64) &curr->end_addr + curr->end_addr;
+
+                if ((ip >= curr_start) && (ip <= curr_end)) {
+                        return 1;
+                }
+                curr++;
+        }
+        return 0;
+}
+
+/* Given an address, look for it in the mca tables. */
+int mca_recover_range(unsigned long addr)
+{
+	extern struct mca_table_entry __start___mca_table[];
+	extern struct mca_table_entry __stop___mca_table[];
+
+	return search_mca_table(__start___mca_table, __stop___mca_table-1, addr);
+}
+EXPORT_SYMBOL_GPL(mca_recover_range);
 
 #ifdef CONFIG_ACPI
 
 int cpe_vector = -1;
+int ia64_cpe_irq = -1;
 
 static irqreturn_t
 ia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs)
@@ -359,7 +401,7 @@ ia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs)
  *  Outputs
  *      None
  */
-static void
+static void __init
 ia64_mca_register_cpev (int cpev)
 {
 	/* Register the CPE interrupt vector with SAL */
@@ -377,8 +419,6 @@ ia64_mca_register_cpev (int cpev)
 }
 #endif /* CONFIG_ACPI */
 
-#endif /* PLATFORM_MCA_HANDLERS */
-
 /*
  * ia64_mca_cmc_vector_setup
  *
@@ -392,7 +432,7 @@ ia64_mca_register_cpev (int cpev)
  * Outputs
  *	None
  */
-void
+void __cpuinit
 ia64_mca_cmc_vector_setup (void)
 {
 	cmcv_reg_t	cmcv;
@@ -630,6 +670,32 @@ copy_reg(const u64 *fr, u64 fnat, u64 *tr, u64 *tnat)
 	*tnat |= (nat << tslot);
 }
 
+/* Change the comm field on the MCA/INT task to include the pid that
+ * was interrupted, it makes for easier debugging.  If that pid was 0
+ * (swapper or nested MCA/INIT) then use the start of the previous comm
+ * field suffixed with its cpu.
+ */
+
+static void
+ia64_mca_modify_comm(const task_t *previous_current)
+{
+	char *p, comm[sizeof(current->comm)];
+	if (previous_current->pid)
+		snprintf(comm, sizeof(comm), "%s %d",
+			current->comm, previous_current->pid);
+	else {
+		int l;
+		if ((p = strchr(previous_current->comm, ' ')))
+			l = p - previous_current->comm;
+		else
+			l = strlen(previous_current->comm);
+		snprintf(comm, sizeof(comm), "%s %*s %d",
+			current->comm, l, previous_current->comm,
+			task_thread_info(previous_current)->cpu);
+	}
+	memcpy(current->comm, comm, sizeof(current->comm));
+}
+
 /* On entry to this routine, we are running on the per cpu stack, see
  * mca_asm.h.  The original stack has not been touched by this event.  Some of
  * the original stack's registers will be in the RBS on this stack.  This stack
@@ -648,7 +714,7 @@ ia64_mca_modify_original_stack(struct pt_regs *regs,
 		struct ia64_sal_os_state *sos,
 		const char *type)
 {
-	char *p, comm[sizeof(current->comm)];
+	char *p;
 	ia64_va va;
 	extern char ia64_leave_kernel[];	/* Need asm address, not function descriptor */
 	const pal_min_state_area_t *ms = sos->pal_min_state;
@@ -721,54 +787,43 @@ ia64_mca_modify_original_stack(struct pt_regs *regs,
 	/* Verify the previous stack state before we change it */
 	if (user_mode(regs)) {
 		msg = "occurred in user space";
-		goto no_mod;
-	}
-	if (r13 != sos->prev_IA64_KR_CURRENT) {
-		msg = "inconsistent previous current and r13";
-		goto no_mod;
-	}
-	if ((r12 - r13) >= KERNEL_STACK_SIZE) {
-		msg = "inconsistent r12 and r13";
-		goto no_mod;
-	}
-	if ((ar_bspstore - r13) >= KERNEL_STACK_SIZE) {
-		msg = "inconsistent ar.bspstore and r13";
-		goto no_mod;
-	}
-	va.p = old_bspstore;
-	if (va.f.reg < 5) {
-		msg = "old_bspstore is in the wrong region";
-		goto no_mod;
-	}
-	if ((ar_bsp - r13) >= KERNEL_STACK_SIZE) {
-		msg = "inconsistent ar.bsp and r13";
-		goto no_mod;
-	}
-	size += (ia64_rse_skip_regs(old_bspstore, slots) - old_bspstore) * 8;
-	if (ar_bspstore + size > r12) {
-		msg = "no room for blocked state";
+		/* previous_current is guaranteed to be valid when the task was
+		 * in user space, so ...
+		 */
+		ia64_mca_modify_comm(previous_current);
 		goto no_mod;
 	}
 
-	/* Change the comm field on the MCA/INT task to include the pid that
-	 * was interrupted, it makes for easier debugging.  If that pid was 0
-	 * (swapper or nested MCA/INIT) then use the start of the previous comm
-	 * field suffixed with its cpu.
-	 */
-	if (previous_current->pid)
-		snprintf(comm, sizeof(comm), "%s %d",
-			current->comm, previous_current->pid);
-	else {
-		int l;
-		if ((p = strchr(previous_current->comm, ' ')))
-			l = p - previous_current->comm;
-		else
-			l = strlen(previous_current->comm);
-		snprintf(comm, sizeof(comm), "%s %*s %d",
-			current->comm, l, previous_current->comm,
-			task_thread_info(previous_current)->cpu);
+	if (!mca_recover_range(ms->pmsa_iip)) {
+		if (r13 != sos->prev_IA64_KR_CURRENT) {
+			msg = "inconsistent previous current and r13";
+			goto no_mod;
+		}
+		if ((r12 - r13) >= KERNEL_STACK_SIZE) {
+			msg = "inconsistent r12 and r13";
+			goto no_mod;
+		}
+		if ((ar_bspstore - r13) >= KERNEL_STACK_SIZE) {
+			msg = "inconsistent ar.bspstore and r13";
+			goto no_mod;
+		}
+		va.p = old_bspstore;
+		if (va.f.reg < 5) {
+			msg = "old_bspstore is in the wrong region";
+			goto no_mod;
+		}
+		if ((ar_bsp - r13) >= KERNEL_STACK_SIZE) {
+			msg = "inconsistent ar.bsp and r13";
+			goto no_mod;
+		}
+		size += (ia64_rse_skip_regs(old_bspstore, slots) - old_bspstore) * 8;
+		if (ar_bspstore + size > r12) {
+			msg = "no room for blocked state";
+			goto no_mod;
+		}
 	}
-	memcpy(current->comm, comm, sizeof(current->comm));
+
+	ia64_mca_modify_comm(previous_current);
 
 	/* Make the original task look blocked.  First stack a struct pt_regs,
 	 * describing the state at the time of interrupt.  mca_asm.S built a
@@ -908,7 +963,7 @@ no_mod:
 static void
 ia64_wait_for_slaves(int monarch)
 {
-	int c, wait = 0;
+	int c, wait = 0, missing = 0;
 	for_each_online_cpu(c) {
 		if (c == monarch)
 			continue;
@@ -919,15 +974,32 @@ ia64_wait_for_slaves(int monarch)
 		}
 	}
 	if (!wait)
-		return;
+		goto all_in;
 	for_each_online_cpu(c) {
 		if (c == monarch)
 			continue;
 		if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) {
 			udelay(5*1000000);	/* wait 5 seconds for slaves (arbitrary) */
+			if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE)
+				missing = 1;
 			break;
 		}
 	}
+	if (!missing)
+		goto all_in;
+	printk(KERN_INFO "OS MCA slave did not rendezvous on cpu");
+	for_each_online_cpu(c) {
+		if (c == monarch)
+			continue;
+		if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE)
+			printk(" %d", c);
+	}
+	printk("\n");
+	return;
+
+all_in:
+	printk(KERN_INFO "All OS MCA slaves have reached rendezvous\n");
+	return;
 }
 
 /*
@@ -953,6 +1025,10 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
 	task_t *previous_current;
 
 	oops_in_progress = 1;	/* FIXME: make printk NMI/MCA/INIT safe */
+	console_loglevel = 15;	/* make sure printks make it to console */
+	printk(KERN_INFO "Entered OS MCA handler. PSP=%lx cpu=%d monarch=%ld\n",
+		sos->proc_state_param, cpu, sos->monarch);
+
 	previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA");
 	monarch_cpu = cpu;
 	if (notify_die(DIE_MCA_MONARCH_ENTER, "MCA", regs, 0, 0, 0)
@@ -1416,7 +1492,7 @@ static struct irqaction mca_cpep_irqaction = {
  * format most of the fields.
  */
 
-static void
+static void __cpuinit
 format_mca_init_stack(void *mca_data, unsigned long offset,
 		const char *type, int cpu)
 {
@@ -1430,7 +1506,7 @@ format_mca_init_stack(void *mca_data, unsigned long offset,
 	ti->cpu = cpu;
 	p->thread_info = ti;
 	p->state = TASK_UNINTERRUPTIBLE;
-	__set_bit(cpu, &p->cpus_allowed);
+	cpu_set(cpu, p->cpus_allowed);
 	INIT_LIST_HEAD(&p->tasks);
 	p->parent = p->real_parent = p->group_leader = p;
 	INIT_LIST_HEAD(&p->children);
@@ -1440,15 +1516,17 @@ format_mca_init_stack(void *mca_data, unsigned long offset,
 
 /* Do per-CPU MCA-related initialization.  */
 
-void __devinit
+void __cpuinit
 ia64_mca_cpu_init(void *cpu_data)
 {
 	void *pal_vaddr;
+	static int first_time = 1;
 
-	if (smp_processor_id() == 0) {
+	if (first_time) {
 		void *mca_data;
 		int cpu;
 
+		first_time = 0;
 		mca_data = alloc_bootmem(sizeof(struct ia64_mca_cpu)
 					 * NR_CPUS + KERNEL_STACK_SIZE);
 		mca_data = (void *)(((unsigned long)mca_data +
@@ -1704,6 +1782,7 @@ ia64_mca_late_init(void)
 					desc = irq_descp(irq);
 					desc->status |= IRQ_PER_CPU;
 					setup_irq(irq, &mca_cpe_irqaction);
+					ia64_cpe_irq = irq;
 				}
 			ia64_mca_register_cpev(cpe_vector);
 			IA64_MCA_DEBUG("%s: CPEI/P setup and enabled.\n", __FUNCTION__);
diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c
index e883d85906d..37c88eb5587 100644
--- a/arch/ia64/kernel/mca_drv.c
+++ b/arch/ia64/kernel/mca_drv.c
@@ -6,6 +6,7 @@
  * Copyright (C) Hidetoshi Seto (seto.hidetoshi@jp.fujitsu.com)
  * Copyright (C) 2005 Silicon Graphics, Inc
  * Copyright (C) 2005 Keith Owens <kaos@sgi.com>
+ * Copyright (C) 2006 Russ Anderson <rja@sgi.com>
  */
 #include <linux/config.h>
 #include <linux/types.h>
@@ -121,11 +122,12 @@ mca_page_isolate(unsigned long paddr)
  */
 
 void
-mca_handler_bh(unsigned long paddr)
+mca_handler_bh(unsigned long paddr, void *iip, unsigned long ipsr)
 {
-	printk(KERN_ERR
-		"OS_MCA: process [pid: %d](%s) encounters MCA (paddr=%lx)\n",
-		current->pid, current->comm, paddr);
+	printk(KERN_ERR "OS_MCA: process [cpu %d, pid: %d, uid: %d, "
+		"iip: %p, psr: 0x%lx,paddr: 0x%lx](%s) encounters MCA.\n",
+		raw_smp_processor_id(), current->pid, current->uid,
+		iip, ipsr, paddr, current->comm);
 
 	spin_lock(&mca_bh_lock);
 	switch (mca_page_isolate(paddr)) {
@@ -442,21 +444,26 @@ recover_from_read_error(slidx_table_t *slidx,
 	if (!peidx_bottom(peidx) || !(peidx_bottom(peidx)->valid.minstate))
 		return 0;
 	psr1 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_ipsr);
+	psr2 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_xpsr);
 
 	/*
 	 *  Check the privilege level of interrupted context.
 	 *   If it is user-mode, then terminate affected process.
 	 */
-	if (psr1->cpl != 0) {
+
+	pmsa = sos->pal_min_state;
+	if (psr1->cpl != 0 ||
+	   ((psr2->cpl != 0) && mca_recover_range(pmsa->pmsa_iip))) {
 		smei = peidx_bus_check(peidx, 0);
 		if (smei->valid.target_identifier) {
 			/*
 			 *  setup for resume to bottom half of MCA,
 			 * "mca_handler_bhhook"
 			 */
-			pmsa = sos->pal_min_state;
-			/* pass to bhhook as 1st argument (gr8) */
+			/* pass to bhhook as argument (gr8, ...) */
 			pmsa->pmsa_gr[8-1] = smei->target_identifier;
+			pmsa->pmsa_gr[9-1] = pmsa->pmsa_iip;
+			pmsa->pmsa_gr[10-1] = pmsa->pmsa_ipsr;
 			/* set interrupted return address (but no use) */
 			pmsa->pmsa_br0 = pmsa->pmsa_iip;
 			/* change resume address to bottom half */
@@ -466,6 +473,7 @@ recover_from_read_error(slidx_table_t *slidx,
 			psr2 = (struct ia64_psr *)&pmsa->pmsa_ipsr;
 			psr2->cpl = 0;
 			psr2->ri  = 0;
+			psr2->bn  = 1;
 			psr2->i  = 0;
 
 			return 1;
diff --git a/arch/ia64/kernel/mca_drv.h b/arch/ia64/kernel/mca_drv.h
index e2f6fa1e0ef..31a2e52bb16 100644
--- a/arch/ia64/kernel/mca_drv.h
+++ b/arch/ia64/kernel/mca_drv.h
@@ -111,3 +111,10 @@ typedef struct slidx_table {
 	slidx_foreach_entry(__pos, &((slidx)->sec)) { __count++; }\
 	__count; })
 
+struct mca_table_entry {
+	int start_addr;	/* location-relative starting address of MCA recoverable range */
+	int end_addr;	/* location-relative ending address of MCA recoverable range */
+};
+
+extern const struct mca_table_entry *search_mca_tables (unsigned long addr);
+extern int mca_recover_range(unsigned long);
diff --git a/arch/ia64/kernel/mca_drv_asm.S b/arch/ia64/kernel/mca_drv_asm.S
index 3f298ee4d00..e6a580d354b 100644
--- a/arch/ia64/kernel/mca_drv_asm.S
+++ b/arch/ia64/kernel/mca_drv_asm.S
@@ -14,15 +14,12 @@
 
 GLOBAL_ENTRY(mca_handler_bhhook)
 	invala				// clear RSE ?
-	;;
 	cover
 	;;
 	clrrrb
 	;;						
-	alloc	r16=ar.pfs,0,2,1,0	// make a new frame
-	;;
+	alloc	r16=ar.pfs,0,2,3,0	// make a new frame
 	mov	ar.rsc=0
-	;;
 	mov	r13=IA64_KR(CURRENT)	// current task pointer
 	;;
 	mov	r2=r13
@@ -30,7 +27,6 @@ GLOBAL_ENTRY(mca_handler_bhhook)
 	addl	r22=IA64_RBS_OFFSET,r2
 	;;
 	mov	ar.bspstore=r22
-	;;
 	addl	sp=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2
 	;;
 	adds	r2=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
@@ -40,12 +36,12 @@ GLOBAL_ENTRY(mca_handler_bhhook)
 	movl	loc1=mca_handler_bh	// recovery C function
 	;;
 	mov	out0=r8			// poisoned address
+	mov	out1=r9			// iip
+	mov	out2=r10		// psr
 	mov	b6=loc1
 	;;
 	mov	loc1=rp
-	;;
-	ssm	psr.i
-	;;
+	ssm	psr.i | psr.ic
 	br.call.sptk.many rp=b6		// does not return ...
 	;;
 	mov	ar.pfs=loc0
@@ -53,5 +49,4 @@ GLOBAL_ENTRY(mca_handler_bhhook)
 	;;
 	mov	r8=r0
 	br.ret.sptk.many rp
-	;;
 END(mca_handler_bhhook)
diff --git a/arch/ia64/kernel/numa.c b/arch/ia64/kernel/numa.c
index a68ce667809..0766493d4d0 100644
--- a/arch/ia64/kernel/numa.c
+++ b/arch/ia64/kernel/numa.c
@@ -25,7 +25,7 @@
 #include <asm/processor.h>
 #include <asm/smp.h>
 
-u8 cpu_to_node_map[NR_CPUS] __cacheline_aligned;
+u16 cpu_to_node_map[NR_CPUS] __cacheline_aligned;
 EXPORT_SYMBOL(cpu_to_node_map);
 
 cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned;
diff --git a/arch/ia64/kernel/patch.c b/arch/ia64/kernel/patch.c
index 6a4ac7d70b3..bc11bb096f5 100644
--- a/arch/ia64/kernel/patch.c
+++ b/arch/ia64/kernel/patch.c
@@ -115,7 +115,7 @@ ia64_patch_vtop (unsigned long start, unsigned long end)
 	ia64_srlz_i();
 }
 
-void
+void __init
 ia64_patch_mckinley_e9 (unsigned long start, unsigned long end)
 {
 	static int first_time = 1;
@@ -149,7 +149,7 @@ ia64_patch_mckinley_e9 (unsigned long start, unsigned long end)
 	ia64_srlz_i();
 }
 
-static void
+static void __init
 patch_fsyscall_table (unsigned long start, unsigned long end)
 {
 	extern unsigned long fsyscall_table[NR_syscalls];
@@ -166,7 +166,7 @@ patch_fsyscall_table (unsigned long start, unsigned long end)
 	ia64_srlz_i();
 }
 
-static void
+static void __init
 patch_brl_fsys_bubble_down (unsigned long start, unsigned long end)
 {
 	extern char fsys_bubble_down[];
@@ -184,7 +184,7 @@ patch_brl_fsys_bubble_down (unsigned long start, unsigned long end)
 	ia64_srlz_i();
 }
 
-void
+void __init
 ia64_patch_gate (void)
 {
 #	define START(name)	((unsigned long) __start_gate_##name##_patchlist)
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 9c5194b385d..077f21216b6 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -6722,6 +6722,7 @@ __initcall(pfm_init);
 void
 pfm_init_percpu (void)
 {
+	static int first_time=1;
 	/*
 	 * make sure no measurement is active
 	 * (may inherit programmed PMCs from EFI).
@@ -6734,8 +6735,10 @@ pfm_init_percpu (void)
 	 */
 	pfm_unfreeze_pmu();
 
-	if (smp_processor_id() == 0)
+	if (first_time) {
 		register_percpu_irq(IA64_PERFMON_VECTOR, &perfmon_irqaction);
+		first_time=0;
+	}
 
 	ia64_setreg(_IA64_REG_CR_PMV, IA64_PERFMON_VECTOR);
 	ia64_srlz_d();
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index 309d59658e5..355d57970ba 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -30,7 +30,6 @@
 #include <linux/efi.h>
 #include <linux/interrupt.h>
 #include <linux/delay.h>
-#include <linux/kprobes.h>
 
 #include <asm/cpu.h>
 #include <asm/delay.h>
@@ -738,13 +737,6 @@ void
 exit_thread (void)
 {
 
-	/*
-	 * Remove function-return probe instances associated with this task
-	 * and put them back on the free list. Do not insert an exit probe for
-	 * this function, it will be disabled by kprobe_flush_task if you do.
-	 */
-	kprobe_flush_task(current);
-
 	ia64_drop_fpu(current);
 #ifdef CONFIG_PERFMON
        /* if needed, stop monitoring and flush state to perfmon context */
diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
index eaed14aac6a..9887c8787e7 100644
--- a/arch/ia64/kernel/ptrace.c
+++ b/arch/ia64/kernel/ptrace.c
@@ -1656,8 +1656,14 @@ syscall_trace_leave (long arg0, long arg1, long arg2, long arg3,
 		     long arg4, long arg5, long arg6, long arg7,
 		     struct pt_regs regs)
 {
-	if (unlikely(current->audit_context))
-		audit_syscall_exit(current, AUDITSC_RESULT(regs.r10), regs.r8);
+	if (unlikely(current->audit_context)) {
+		int success = AUDITSC_RESULT(regs.r10);
+		long result = regs.r8;
+
+		if (success != AUDITSC_SUCCESS)
+			result = -result;
+		audit_syscall_exit(current, success, result);
+	}
 
 	if (test_thread_flag(TIF_SYSCALL_TRACE)
 	    && (current->ptrace & PT_PTRACED))
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index 3258e09278d..e4dfda1eb7d 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -37,11 +37,11 @@
 #include <linux/string.h>
 #include <linux/threads.h>
 #include <linux/tty.h>
+#include <linux/dmi.h>
 #include <linux/serial.h>
 #include <linux/serial_core.h>
 #include <linux/efi.h>
 #include <linux/initrd.h>
-#include <linux/platform.h>
 #include <linux/pm.h>
 #include <linux/cpufreq.h>
 
@@ -131,8 +131,8 @@ EXPORT_SYMBOL(ia64_max_iommu_merge_mask);
 /*
  * We use a special marker for the end of memory and it uses the extra (+1) slot
  */
-struct rsvd_region rsvd_region[IA64_MAX_RSVD_REGIONS + 1];
-int num_rsvd_regions;
+struct rsvd_region rsvd_region[IA64_MAX_RSVD_REGIONS + 1] __initdata;
+int num_rsvd_regions __initdata;
 
 
 /*
@@ -141,7 +141,7 @@ int num_rsvd_regions;
  * caller-specified function is called with the memory ranges that remain after filtering.
  * This routine does not assume the incoming segments are sorted.
  */
-int
+int __init
 filter_rsvd_memory (unsigned long start, unsigned long end, void *arg)
 {
 	unsigned long range_start, range_end, prev_start;
@@ -177,7 +177,7 @@ filter_rsvd_memory (unsigned long start, unsigned long end, void *arg)
 	return 0;
 }
 
-static void
+static void __init
 sort_regions (struct rsvd_region *rsvd_region, int max)
 {
 	int j;
@@ -218,7 +218,7 @@ __initcall(register_memory);
  * initrd, etc.  There are currently %IA64_MAX_RSVD_REGIONS defined,
  * see include/asm-ia64/meminit.h if you need to define more.
  */
-void
+void __init
 reserve_memory (void)
 {
 	int n = 0;
@@ -270,7 +270,7 @@ reserve_memory (void)
  * Grab the initrd start and end from the boot parameter struct given us by
  * the boot loader.
  */
-void
+void __init
 find_initrd (void)
 {
 #ifdef CONFIG_BLK_DEV_INITRD
@@ -362,7 +362,7 @@ mark_bsp_online (void)
 }
 
 #ifdef CONFIG_SMP
-static void
+static void __init
 check_for_logical_procs (void)
 {
 	pal_logical_to_physical_t info;
@@ -389,6 +389,14 @@ check_for_logical_procs (void)
 }
 #endif
 
+static __initdata int nomca;
+static __init int setup_nomca(char *s)
+{
+	nomca = 1;
+	return 0;
+}
+early_param("nomca", setup_nomca);
+
 void __init
 setup_arch (char **cmdline_p)
 {
@@ -402,35 +410,15 @@ setup_arch (char **cmdline_p)
 	efi_init();
 	io_port_init();
 
+	parse_early_param();
+
 #ifdef CONFIG_IA64_GENERIC
-	{
-		const char *mvec_name = strstr (*cmdline_p, "machvec=");
-		char str[64];
-
-		if (mvec_name) {
-			const char *end;
-			size_t len;
-
-			mvec_name += 8;
-			end = strchr (mvec_name, ' ');
-			if (end)
-				len = end - mvec_name;
-			else
-				len = strlen (mvec_name);
-			len = min(len, sizeof (str) - 1);
-			strncpy (str, mvec_name, len);
-			str[len] = '\0';
-			mvec_name = str;
-		} else
-			mvec_name = acpi_get_sysname();
-		machvec_init(mvec_name);
-	}
+	machvec_init(NULL);
 #endif
 
 	if (early_console_setup(*cmdline_p) == 0)
 		mark_bsp_online();
 
-	parse_early_param();
 #ifdef CONFIG_ACPI
 	/* Initialize the ACPI boot-time table parser */
 	acpi_table_init();
@@ -446,7 +434,7 @@ setup_arch (char **cmdline_p)
 	find_memory();
 
 	/* process SAL system table: */
-	ia64_sal_init(efi.sal_systab);
+	ia64_sal_init(__va(efi.sal_systab));
 
 	ia64_setup_printk_clock();
 
@@ -493,7 +481,7 @@ setup_arch (char **cmdline_p)
 #endif
 
 	/* enable IA-64 Machine Check Abort Handling unless disabled */
-	if (!strstr(saved_command_line, "nomca"))
+	if (!nomca)
 		ia64_mca_init();
 
 	platform_setup(cmdline_p);
@@ -623,7 +611,7 @@ struct seq_operations cpuinfo_op = {
 	.show =		show_cpuinfo
 };
 
-void
+static void __cpuinit
 identify_cpu (struct cpuinfo_ia64 *c)
 {
 	union {
@@ -700,7 +688,7 @@ setup_per_cpu_areas (void)
  * In addition, the minimum of the i-cache stride sizes is calculated for
  * "flush_icache_range()".
  */
-static void
+static void __cpuinit
 get_max_cacheline_size (void)
 {
 	unsigned long line_size, max = 1;
@@ -763,10 +751,10 @@ get_max_cacheline_size (void)
  * cpu_init() initializes state that is per-CPU.  This function acts
  * as a 'CPU state barrier', nothing should get across.
  */
-void
+void __cpuinit
 cpu_init (void)
 {
-	extern void __devinit ia64_mmu_init (void *);
+	extern void __cpuinit ia64_mmu_init (void *);
 	unsigned long num_phys_stacked;
 	pal_vm_info_2_u_t vmi;
 	unsigned int max_ctx;
@@ -894,9 +882,16 @@ void sched_cacheflush(void)
 	ia64_sal_cache_flush(3);
 }
 
-void
+void __init
 check_bugs (void)
 {
 	ia64_patch_mckinley_e9((unsigned long) __start___mckinley_e9_bundles,
 			       (unsigned long) __end___mckinley_e9_bundles);
 }
+
+static int __init run_dmi_scan(void)
+{
+	dmi_scan_machine();
+	return 0;
+}
+core_initcall(run_dmi_scan);
diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c
index 463f6bb44d0..1d7903ee212 100644
--- a/arch/ia64/kernel/signal.c
+++ b/arch/ia64/kernel/signal.c
@@ -588,104 +588,3 @@ ia64_do_signal (sigset_t *oldset, struct sigscratch *scr, long in_syscall)
 	}
 	return 0;
 }
-
-/* Set a delayed signal that was detected in MCA/INIT/NMI/PMI context where it
- * could not be delivered.  It is important that the target process is not
- * allowed to do any more work in user space.  Possible cases for the target
- * process:
- *
- * - It is sleeping and will wake up soon.  Store the data in the current task,
- *   the signal will be sent when the current task returns from the next
- *   interrupt.
- *
- * - It is running in user context.  Store the data in the current task, the
- *   signal will be sent when the current task returns from the next interrupt.
- *
- * - It is running in kernel context on this or another cpu and will return to
- *   user context.  Store the data in the target task, the signal will be sent
- *   to itself when the target task returns to user space.
- *
- * - It is running in kernel context on this cpu and will sleep before
- *   returning to user context.  Because this is also the current task, the
- *   signal will not get delivered and the task could sleep indefinitely.
- *   Store the data in the idle task for this cpu, the signal will be sent
- *   after the idle task processes its next interrupt.
- *
- * To cover all cases, store the data in the target task, the current task and
- * the idle task on this cpu.  Whatever happens, the signal will be delivered
- * to the target task before it can do any useful user space work.  Multiple
- * deliveries have no unwanted side effects.
- *
- * Note: This code is executed in MCA/INIT/NMI/PMI context, with interrupts
- * disabled.  It must not take any locks nor use kernel structures or services
- * that require locks.
- */
-
-/* To ensure that we get the right pid, check its start time.  To avoid extra
- * include files in thread_info.h, convert the task start_time to unsigned long,
- * giving us a cycle time of > 580 years.
- */
-static inline unsigned long
-start_time_ul(const struct task_struct *t)
-{
-	return t->start_time.tv_sec * NSEC_PER_SEC + t->start_time.tv_nsec;
-}
-
-void
-set_sigdelayed(pid_t pid, int signo, int code, void __user *addr)
-{
-	struct task_struct *t;
-	unsigned long start_time =  0;
-	int i;
-
-	for (i = 1; i <= 3; ++i) {
-		switch (i) {
-		case 1:
-			t = find_task_by_pid(pid);
-			if (t)
-				start_time = start_time_ul(t);
-			break;
-		case 2:
-			t = current;
-			break;
-		default:
-			t = idle_task(smp_processor_id());
-			break;
-		}
-
-		if (!t)
-			return;
-		task_thread_info(t)->sigdelayed.signo = signo;
-		task_thread_info(t)->sigdelayed.code = code;
-		task_thread_info(t)->sigdelayed.addr = addr;
-		task_thread_info(t)->sigdelayed.start_time = start_time;
-		task_thread_info(t)->sigdelayed.pid = pid;
-		wmb();
-		set_tsk_thread_flag(t, TIF_SIGDELAYED);
-	}
-}
-
-/* Called from entry.S when it detects TIF_SIGDELAYED, a delayed signal that
- * was detected in MCA/INIT/NMI/PMI context where it could not be delivered.
- */
-
-void
-do_sigdelayed(void)
-{
-	struct siginfo siginfo;
-	pid_t pid;
-	struct task_struct *t;
-
-	clear_thread_flag(TIF_SIGDELAYED);
-	memset(&siginfo, 0, sizeof(siginfo));
-	siginfo.si_signo = current_thread_info()->sigdelayed.signo;
-	siginfo.si_code = current_thread_info()->sigdelayed.code;
-	siginfo.si_addr = current_thread_info()->sigdelayed.addr;
-	pid = current_thread_info()->sigdelayed.pid;
-	t = find_task_by_pid(pid);
-	if (!t)
-		return;
-	if (current_thread_info()->sigdelayed.start_time != start_time_ul(t))
-		return;
-	force_sig_info(siginfo.si_signo, &siginfo, t);
-}
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index b681ef34a86..44e9547878a 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -70,6 +70,12 @@
 #endif
 
 #ifdef CONFIG_HOTPLUG_CPU
+#ifdef CONFIG_PERMIT_BSP_REMOVE
+#define bsp_remove_ok	1
+#else
+#define bsp_remove_ok	0
+#endif
+
 /*
  * Store all idle threads, this can be reused instead of creating
  * a new thread. Also avoids complicated thread destroy functionality
@@ -104,7 +110,7 @@ struct sal_to_os_boot *sal_state_for_booting_cpu = &sal_boot_rendez_state[0];
 /*
  * ITC synchronization related stuff:
  */
-#define MASTER	0
+#define MASTER	(0)
 #define SLAVE	(SMP_CACHE_BYTES/8)
 
 #define NUM_ROUNDS	64	/* magic value */
@@ -151,6 +157,27 @@ char __initdata no_int_routing;
 
 unsigned char smp_int_redirect; /* are INT and IPI redirectable by the chipset? */
 
+#ifdef CONFIG_FORCE_CPEI_RETARGET
+#define CPEI_OVERRIDE_DEFAULT	(1)
+#else
+#define CPEI_OVERRIDE_DEFAULT	(0)
+#endif
+
+unsigned int force_cpei_retarget = CPEI_OVERRIDE_DEFAULT;
+
+static int __init
+cmdl_force_cpei(char *str)
+{
+	int value=0;
+
+	get_option (&str, &value);
+	force_cpei_retarget = value;
+
+	return 1;
+}
+
+__setup("force_cpei=", cmdl_force_cpei);
+
 static int __init
 nointroute (char *str)
 {
@@ -161,6 +188,27 @@ nointroute (char *str)
 
 __setup("nointroute", nointroute);
 
+static void fix_b0_for_bsp(void)
+{
+#ifdef CONFIG_HOTPLUG_CPU
+	int cpuid;
+	static int fix_bsp_b0 = 1;
+
+	cpuid = smp_processor_id();
+
+	/*
+	 * Cache the b0 value on the first AP that comes up
+	 */
+	if (!(fix_bsp_b0 && cpuid))
+		return;
+
+	sal_boot_rendez_state[0].br[0] = sal_boot_rendez_state[cpuid].br[0];
+	printk ("Fixed BSP b0 value from CPU %d\n", cpuid);
+
+	fix_bsp_b0 = 0;
+#endif
+}
+
 void
 sync_master (void *arg)
 {
@@ -327,8 +375,9 @@ smp_setup_percpu_timer (void)
 static void __devinit
 smp_callin (void)
 {
-	int cpuid, phys_id;
+	int cpuid, phys_id, itc_master;
 	extern void ia64_init_itm(void);
+	extern volatile int time_keeper_id;
 
 #ifdef CONFIG_PERFMON
 	extern void pfm_init_percpu(void);
@@ -336,6 +385,7 @@ smp_callin (void)
 
 	cpuid = smp_processor_id();
 	phys_id = hard_smp_processor_id();
+	itc_master = time_keeper_id;
 
 	if (cpu_online(cpuid)) {
 		printk(KERN_ERR "huh, phys CPU#0x%x, CPU#0x%x already present??\n",
@@ -343,6 +393,8 @@ smp_callin (void)
 		BUG();
 	}
 
+	fix_b0_for_bsp();
+
 	lock_ipi_calllock();
 	cpu_set(cpuid, cpu_online_map);
 	unlock_ipi_calllock();
@@ -365,8 +417,8 @@ smp_callin (void)
 		 * calls spin_unlock_bh(), which calls spin_unlock_bh(), which calls
 		 * local_bh_enable(), which bugs out if irqs are not enabled...
 		 */
-		Dprintk("Going to syncup ITC with BP.\n");
-		ia64_sync_itc(0);
+		Dprintk("Going to syncup ITC with ITC Master.\n");
+		ia64_sync_itc(itc_master);
 	}
 
 	/*
@@ -572,32 +624,8 @@ void __devinit smp_prepare_boot_cpu(void)
 	per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
 }
 
-/*
- * mt_info[] is a temporary store for all info returned by
- * PAL_LOGICAL_TO_PHYSICAL, to be copied into cpuinfo_ia64 when the
- * specific cpu comes.
- */
-static struct {
-	__u32   socket_id;
-	__u16   core_id;
-	__u16   thread_id;
-	__u16   proc_fixed_addr;
-	__u8    valid;
-} mt_info[NR_CPUS] __devinitdata;
-
 #ifdef CONFIG_HOTPLUG_CPU
 static inline void
-remove_from_mtinfo(int cpu)
-{
-	int i;
-
-	for_each_cpu(i)
-		if (mt_info[i].valid &&  mt_info[i].socket_id ==
-		    				cpu_data(cpu)->socket_id)
-			mt_info[i].valid = 0;
-}
-
-static inline void
 clear_cpu_sibling_map(int cpu)
 {
 	int i;
@@ -626,15 +654,50 @@ remove_siblinginfo(int cpu)
 
 	/* remove it from all sibling map's */
 	clear_cpu_sibling_map(cpu);
+}
+
+extern void fixup_irqs(void);
 
-	/* if this cpu is the last in the core group, remove all its info 
-	 * from mt_info structure
+int migrate_platform_irqs(unsigned int cpu)
+{
+	int new_cpei_cpu;
+	irq_desc_t *desc = NULL;
+	cpumask_t 	mask;
+	int 		retval = 0;
+
+	/*
+	 * dont permit CPEI target to removed.
 	 */
-	if (last)
-		remove_from_mtinfo(cpu);
+	if (cpe_vector > 0 && is_cpu_cpei_target(cpu)) {
+		printk ("CPU (%d) is CPEI Target\n", cpu);
+		if (can_cpei_retarget()) {
+			/*
+			 * Now re-target the CPEI to a different processor
+			 */
+			new_cpei_cpu = any_online_cpu(cpu_online_map);
+			mask = cpumask_of_cpu(new_cpei_cpu);
+			set_cpei_target_cpu(new_cpei_cpu);
+			desc = irq_descp(ia64_cpe_irq);
+			/*
+			 * Switch for now, immediatly, we need to do fake intr
+			 * as other interrupts, but need to study CPEI behaviour with
+			 * polling before making changes.
+			 */
+			if (desc) {
+				desc->handler->disable(ia64_cpe_irq);
+				desc->handler->set_affinity(ia64_cpe_irq, mask);
+				desc->handler->enable(ia64_cpe_irq);
+				printk ("Re-targetting CPEI to cpu %d\n", new_cpei_cpu);
+			}
+		}
+		if (!desc) {
+			printk ("Unable to retarget CPEI, offline cpu [%d] failed\n", cpu);
+			retval = -EBUSY;
+		}
+	}
+	return retval;
 }
 
-extern void fixup_irqs(void);
 /* must be called with cpucontrol mutex held */
 int __cpu_disable(void)
 {
@@ -643,8 +706,17 @@ int __cpu_disable(void)
 	/*
 	 * dont permit boot processor for now
 	 */
-	if (cpu == 0)
-		return -EBUSY;
+	if (cpu == 0 && !bsp_remove_ok) {
+		printk ("Your platform does not support removal of BSP\n");
+		return (-EBUSY);
+	}
+
+	cpu_clear(cpu, cpu_online_map);
+
+	if (migrate_platform_irqs(cpu)) {
+		cpu_set(cpu, cpu_online_map);
+		return (-EBUSY);
+	}
 
 	remove_siblinginfo(cpu);
 	cpu_clear(cpu, cpu_online_map);
@@ -776,40 +848,6 @@ init_smp_config(void)
 		       ia64_sal_strerror(sal_ret));
 }
 
-static inline int __devinit
-check_for_mtinfo_index(void)
-{
-	int i;
-	
-	for_each_cpu(i)
-		if (!mt_info[i].valid)
-			return i;
-
-	return -1;
-}
-
-/*
- * Search the mt_info to find out if this socket's cid/tid information is
- * cached or not. If the socket exists, fill in the core_id and thread_id 
- * in cpuinfo
- */
-static int __devinit
-check_for_new_socket(__u16 logical_address, struct cpuinfo_ia64 *c)
-{
-	int i;
-	__u32 sid = c->socket_id;
-
-	for_each_cpu(i) {
-		if (mt_info[i].valid && mt_info[i].proc_fixed_addr == logical_address
-		    && mt_info[i].socket_id == sid) {
-			c->core_id = mt_info[i].core_id;
-			c->thread_id = mt_info[i].thread_id;
-			return 1; /* not a new socket */
-		}
-	}
-	return 0;
-}
-
 /*
  * identify_siblings(cpu) gets called from identify_cpu. This populates the 
  * information related to logical execution units in per_cpu_data structure.
@@ -819,14 +857,12 @@ identify_siblings(struct cpuinfo_ia64 *c)
 {
 	s64 status;
 	u16 pltid;
-	u64 proc_fixed_addr;
-	int count, i;
 	pal_logical_to_physical_t info;
 
 	if (smp_num_cpucores == 1 && smp_num_siblings == 1)
 		return;
 
-	if ((status = ia64_pal_logical_to_phys(0, &info)) != PAL_STATUS_SUCCESS) {
+	if ((status = ia64_pal_logical_to_phys(-1, &info)) != PAL_STATUS_SUCCESS) {
 		printk(KERN_ERR "ia64_pal_logical_to_phys failed with %ld\n",
 		       status);
 		return;
@@ -835,47 +871,12 @@ identify_siblings(struct cpuinfo_ia64 *c)
 		printk(KERN_ERR "ia64_sal_pltid failed with %ld\n", status);
 		return;
 	}
-	if ((status = ia64_pal_fixed_addr(&proc_fixed_addr)) != PAL_STATUS_SUCCESS) {
-		printk(KERN_ERR "ia64_pal_fixed_addr failed with %ld\n", status);
-		return;
-	}
 
 	c->socket_id =  (pltid << 8) | info.overview_ppid;
 	c->cores_per_socket = info.overview_cpp;
 	c->threads_per_core = info.overview_tpc;
-	count = c->num_log = info.overview_num_log;
-
-	/* If the thread and core id information is already cached, then
-	 * we will simply update cpu_info and return. Otherwise, we will
-	 * do the PAL calls and cache core and thread id's of all the siblings.
-	 */
-	if (check_for_new_socket(proc_fixed_addr, c))
-		return;
-
-	for (i = 0; i < count; i++) {
-		int index;
-
-		if (i && (status = ia64_pal_logical_to_phys(i, &info))
-			  != PAL_STATUS_SUCCESS) {
-                	printk(KERN_ERR "ia64_pal_logical_to_phys failed"
-					" with %ld\n", status);
-                	return;
-		}
-		if (info.log2_la == proc_fixed_addr) {
-			c->core_id = info.log1_cid;
-			c->thread_id = info.log1_tid;
-		}
+	c->num_log = info.overview_num_log;
 
-		index = check_for_mtinfo_index();
-		/* We will not do the mt_info caching optimization in this case.
-		 */
-		if (index < 0)
-			continue;
-
-		mt_info[index].valid = 1;
-		mt_info[index].socket_id = c->socket_id;
-		mt_info[index].core_id = info.log1_cid;
-		mt_info[index].thread_id = info.log1_tid;
-		mt_info[index].proc_fixed_addr = info.log2_la;
-	}
+	c->core_id = info.log1_cid;
+	c->thread_id = info.log1_tid;
 }
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 307d01e15b2..ac167436e93 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -32,7 +32,7 @@
 
 extern unsigned long wall_jiffies;
 
-#define TIME_KEEPER_ID	0	/* smp_processor_id() of time-keeper */
+volatile int time_keeper_id = 0; /* smp_processor_id() of time-keeper */
 
 #ifdef CONFIG_IA64_DEBUG_IRQ
 
@@ -71,7 +71,7 @@ timer_interrupt (int irq, void *dev_id, struct pt_regs *regs)
 
 		new_itm += local_cpu_data->itm_delta;
 
-		if (smp_processor_id() == TIME_KEEPER_ID) {
+		if (smp_processor_id() == time_keeper_id) {
 			/*
 			 * Here we are in the timer irq handler. We have irqs locally
 			 * disabled, but we don't know if the timer_bh is running on
@@ -236,6 +236,11 @@ static struct irqaction timer_irqaction = {
 	.name =		"timer"
 };
 
+void __devinit ia64_disable_timer(void)
+{
+	ia64_set_itv(1 << 16);
+}
+
 void __init
 time_init (void)
 {
diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c
index 6e5eea19fa6..3b6fd798c4d 100644
--- a/arch/ia64/kernel/topology.c
+++ b/arch/ia64/kernel/topology.c
@@ -36,7 +36,7 @@ int arch_register_cpu(int num)
 	parent = &sysfs_nodes[cpu_to_node(num)];
 #endif /* CONFIG_NUMA */
 
-#ifdef CONFIG_ACPI
+#if defined (CONFIG_ACPI) && defined (CONFIG_HOTPLUG_CPU)
 	/*
 	 * If CPEI cannot be re-targetted, and this is
 	 * CPEI target, then dont create the control file
diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c
index dabd6c32641..7c1ddc8ac44 100644
--- a/arch/ia64/kernel/traps.c
+++ b/arch/ia64/kernel/traps.c
@@ -30,19 +30,19 @@ extern spinlock_t timerlist_lock;
 fpswa_interface_t *fpswa_interface;
 EXPORT_SYMBOL(fpswa_interface);
 
-struct notifier_block *ia64die_chain;
+ATOMIC_NOTIFIER_HEAD(ia64die_chain);
 
 int
 register_die_notifier(struct notifier_block *nb)
 {
-	return notifier_chain_register(&ia64die_chain, nb);
+	return atomic_notifier_chain_register(&ia64die_chain, nb);
 }
 EXPORT_SYMBOL_GPL(register_die_notifier);
 
 int
 unregister_die_notifier(struct notifier_block *nb)
 {
-	return notifier_chain_unregister(&ia64die_chain, nb);
+	return atomic_notifier_chain_unregister(&ia64die_chain, nb);
 }
 EXPORT_SYMBOL_GPL(unregister_die_notifier);
 
diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S
index 73af6267d2e..783600fe52b 100644
--- a/arch/ia64/kernel/vmlinux.lds.S
+++ b/arch/ia64/kernel/vmlinux.lds.S
@@ -70,34 +70,18 @@ SECTIONS
 	  __stop___ex_table = .;
 	}
 
-  .data.patch.vtop : AT(ADDR(.data.patch.vtop) - LOAD_OFFSET)
-	{
-	  __start___vtop_patchlist = .;
-	  *(.data.patch.vtop)
-	  __end___vtop_patchlist = .;
-	}
-
-  .data.patch.mckinley_e9 : AT(ADDR(.data.patch.mckinley_e9) - LOAD_OFFSET)
+  /* MCA table */
+  . = ALIGN(16);
+  __mca_table : AT(ADDR(__mca_table) - LOAD_OFFSET)
 	{
-	  __start___mckinley_e9_bundles = .;
-	  *(.data.patch.mckinley_e9)
-	  __end___mckinley_e9_bundles = .;
+	  __start___mca_table = .;
+	  *(__mca_table)
+	  __stop___mca_table = .;
 	}
 
   /* Global data */
   _data = .;
 
-#if defined(CONFIG_IA64_GENERIC)
-  /* Machine Vector */
-  . = ALIGN(16);
-  .machvec : AT(ADDR(.machvec) - LOAD_OFFSET)
-	{
-	  machvec_start = .;
-	  *(.machvec)
-	  machvec_end = .;
-	}
-#endif
-
   /* Unwind info & table: */
   . = ALIGN(8);
   .IA_64.unwind_info : AT(ADDR(.IA_64.unwind_info) - LOAD_OFFSET)
@@ -154,6 +138,32 @@ SECTIONS
 	  *(.initcall7.init)
 	  __initcall_end = .;
 	}
+
+  .data.patch.vtop : AT(ADDR(.data.patch.vtop) - LOAD_OFFSET)
+	{
+	  __start___vtop_patchlist = .;
+	  *(.data.patch.vtop)
+	  __end___vtop_patchlist = .;
+	}
+
+  .data.patch.mckinley_e9 : AT(ADDR(.data.patch.mckinley_e9) - LOAD_OFFSET)
+	{
+	  __start___mckinley_e9_bundles = .;
+	  *(.data.patch.mckinley_e9)
+	  __end___mckinley_e9_bundles = .;
+	}
+
+#if defined(CONFIG_IA64_GENERIC)
+  /* Machine Vector */
+  . = ALIGN(16);
+  .machvec : AT(ADDR(.machvec) - LOAD_OFFSET)
+	{
+	  machvec_start = .;
+	  *(.machvec)
+	  machvec_end = .;
+	}
+#endif
+
    __con_initcall_start = .;
   .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET)
 	{ *(.con_initcall.init) }
diff --git a/arch/ia64/lib/Makefile b/arch/ia64/lib/Makefile
index ac64664a180..d8536a2c22a 100644
--- a/arch/ia64/lib/Makefile
+++ b/arch/ia64/lib/Makefile
@@ -6,7 +6,7 @@ obj-y := io.o
 
 lib-y := __divsi3.o __udivsi3.o __modsi3.o __umodsi3.o			\
 	__divdi3.o __udivdi3.o __moddi3.o __umoddi3.o			\
-	bitop.o checksum.o clear_page.o csum_partial_copy.o		\
+	checksum.o clear_page.o csum_partial_copy.o			\
 	clear_user.o strncpy_from_user.o strlen_user.o strnlen_user.o	\
 	flush.o ip_fast_csum.o do_csum.o				\
 	memset.o strlen.o
diff --git a/arch/ia64/lib/bitop.c b/arch/ia64/lib/bitop.c
deleted file mode 100644
index 82e299c8464..00000000000
--- a/arch/ia64/lib/bitop.c
+++ /dev/null
@@ -1,88 +0,0 @@
-#include <linux/compiler.h>
-#include <linux/types.h>
-#include <asm/intrinsics.h>
-#include <linux/module.h>
-#include <linux/bitops.h>
-
-/*
- * Find next zero bit in a bitmap reasonably efficiently..
- */
-
-int __find_next_zero_bit (const void *addr, unsigned long size, unsigned long offset)
-{
-	unsigned long *p = ((unsigned long *) addr) + (offset >> 6);
-	unsigned long result = offset & ~63UL;
-	unsigned long tmp;
-
-	if (offset >= size)
-		return size;
-	size -= result;
-	offset &= 63UL;
-	if (offset) {
-		tmp = *(p++);
-		tmp |= ~0UL >> (64-offset);
-		if (size < 64)
-			goto found_first;
-		if (~tmp)
-			goto found_middle;
-		size -= 64;
-		result += 64;
-	}
-	while (size & ~63UL) {
-		if (~(tmp = *(p++)))
-			goto found_middle;
-		result += 64;
-		size -= 64;
-	}
-	if (!size)
-		return result;
-	tmp = *p;
-found_first:
-	tmp |= ~0UL << size;
-	if (tmp == ~0UL)		/* any bits zero? */
-		return result + size;	/* nope */
-found_middle:
-	return result + ffz(tmp);
-}
-EXPORT_SYMBOL(__find_next_zero_bit);
-
-/*
- * Find next bit in a bitmap reasonably efficiently..
- */
-int __find_next_bit(const void *addr, unsigned long size, unsigned long offset)
-{
-	unsigned long *p = ((unsigned long *) addr) + (offset >> 6);
-	unsigned long result = offset & ~63UL;
-	unsigned long tmp;
-
-	if (offset >= size)
-		return size;
-	size -= result;
-	offset &= 63UL;
-	if (offset) {
-		tmp = *(p++);
-		tmp &= ~0UL << offset;
-		if (size < 64)
-			goto found_first;
-		if (tmp)
-			goto found_middle;
-		size -= 64;
-		result += 64;
-	}
-	while (size & ~63UL) {
-		if ((tmp = *(p++)))
-			goto found_middle;
-		result += 64;
-		size -= 64;
-	}
-	if (!size)
-		return result;
-	tmp = *p;
-  found_first:
-	tmp &= ~0UL >> (64-size);
-	if (tmp == 0UL)		/* Are any bits set? */
-		return result + size; /* Nope. */
-  found_middle:
-	return result + __ffs(tmp);
-}
-EXPORT_SYMBOL(__find_next_bit);
diff --git a/arch/ia64/mm/Makefile b/arch/ia64/mm/Makefile
index d78d20f0a0f..bb0a01a8187 100644
--- a/arch/ia64/mm/Makefile
+++ b/arch/ia64/mm/Makefile
@@ -2,7 +2,7 @@
 # Makefile for the ia64-specific parts of the memory manager.
 #
 
-obj-y := init.o fault.o tlb.o extable.o
+obj-y := init.o fault.o tlb.o extable.o ioremap.o
 
 obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
 obj-$(CONFIG_NUMA)	   += numa.o
diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c
index acaaec4e468..84fd1c14c8a 100644
--- a/arch/ia64/mm/contig.c
+++ b/arch/ia64/mm/contig.c
@@ -97,7 +97,7 @@ find_max_pfn (unsigned long start, unsigned long end, void *arg)
  * Find a place to put the bootmap and return its starting address in
  * bootmap_start.  This address must be page-aligned.
  */
-int
+static int __init
 find_bootmap_location (unsigned long start, unsigned long end, void *arg)
 {
 	unsigned long needed = *(unsigned long *)arg;
@@ -141,7 +141,7 @@ find_bootmap_location (unsigned long start, unsigned long end, void *arg)
  * Walk the EFI memory map and find usable memory for the system, taking
  * into account reserved areas.
  */
-void
+void __init
 find_memory (void)
 {
 	unsigned long bootmap_size;
@@ -176,18 +176,20 @@ find_memory (void)
  *
  * Allocate and setup per-cpu data areas.
  */
-void *
+void * __cpuinit
 per_cpu_init (void)
 {
 	void *cpu_data;
 	int cpu;
+	static int first_time=1;
 
 	/*
 	 * get_free_pages() cannot be used before cpu_init() done.  BSP
 	 * allocates "NR_CPUS" pages for all CPUs to avoid that AP calls
 	 * get_zeroed_page().
 	 */
-	if (smp_processor_id() == 0) {
+	if (first_time) {
+		first_time=0;
 		cpu_data = __alloc_bootmem(PERCPU_PAGE_SIZE * NR_CPUS,
 					   PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
 		for (cpu = 0; cpu < NR_CPUS; cpu++) {
@@ -226,7 +228,7 @@ count_dma_pages (u64 start, u64 end, void *arg)
  * Set up the page tables.
  */
 
-void
+void __init
 paging_init (void)
 {
 	unsigned long max_dma;
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index c87d6d1d581..ec9eeb89975 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -379,31 +379,6 @@ static void __init *memory_less_node_alloc(int nid, unsigned long pernodesize)
 }
 
 /**
- * pgdat_insert - insert the pgdat into global pgdat_list
- * @pgdat: the pgdat for a node.
- */
-static void __init pgdat_insert(pg_data_t *pgdat)
-{
-	pg_data_t *prev = NULL, *next;
-
-	for_each_pgdat(next)
-		if (pgdat->node_id < next->node_id)
-			break;
-		else
-			prev = next;
-
-	if (prev) {
-		prev->pgdat_next = pgdat;
-		pgdat->pgdat_next = next;
-	} else {
-		pgdat->pgdat_next = pgdat_list;
-		pgdat_list = pgdat;
-	}
-
-	return;
-}
-
-/**
  * memory_less_nodes - allocate and initialize CPU only nodes pernode
  *	information.
  */
@@ -525,15 +500,20 @@ void __init find_memory(void)
  * find_pernode_space() does most of this already, we just need to set
  * local_per_cpu_offset
  */
-void *per_cpu_init(void)
+void __cpuinit *per_cpu_init(void)
 {
 	int cpu;
+	static int first_time = 1;
+
 
 	if (smp_processor_id() != 0)
 		return __per_cpu_start + __per_cpu_offset[smp_processor_id()];
 
-	for (cpu = 0; cpu < NR_CPUS; cpu++)
-		per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu];
+	if (first_time) {
+		first_time = 0;
+		for (cpu = 0; cpu < NR_CPUS; cpu++)
+			per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu];
+	}
 
 	return __per_cpu_start + __per_cpu_offset[smp_processor_id()];
 }
@@ -555,7 +535,7 @@ void show_mem(void)
 	printk("Mem-info:\n");
 	show_free_areas();
 	printk("Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
-	for_each_pgdat(pgdat) {
+	for_each_online_pgdat(pgdat) {
 		unsigned long present;
 		unsigned long flags;
 		int shared = 0, cached = 0, reserved = 0;
@@ -740,11 +720,5 @@ void __init paging_init(void)
 				    pfn_offset, zholes_size);
 	}
 
-	/*
-	 * Make memory less nodes become a member of the known nodes.
-	 */
-	for_each_node_mask(node, memory_less_mask)
-		pgdat_insert(mem_data[node].pgdat);
-
 	zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
 }
diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c
index 2d13889d0a9..8d506710fdb 100644
--- a/arch/ia64/mm/hugetlbpage.c
+++ b/arch/ia64/mm/hugetlbpage.c
@@ -68,9 +68,10 @@ huge_pte_offset (struct mm_struct *mm, unsigned long addr)
 #define mk_pte_huge(entry) { pte_val(entry) |= _PAGE_P; }
 
 /*
- * This function checks for proper alignment of input addr and len parameters.
+ * Don't actually need to do any preparation, but need to make sure
+ * the address is in the right region.
  */
-int is_aligned_hugepage_range(unsigned long addr, unsigned long len)
+int prepare_hugepage_range(unsigned long addr, unsigned long len)
 {
 	if (len & ~HPAGE_MASK)
 		return -EINVAL;
@@ -112,8 +113,7 @@ void hugetlb_free_pgd_range(struct mmu_gather **tlb,
 			unsigned long floor, unsigned long ceiling)
 {
 	/*
-	 * This is called only when is_hugepage_only_range(addr,),
-	 * and it follows that is_hugepage_only_range(end,) also.
+	 * This is called to free hugetlb page tables.
 	 *
 	 * The offset of these addresses from the base of the hugetlb
 	 * region must be scaled down by HPAGE_SIZE/PAGE_SIZE so that
@@ -125,9 +125,9 @@ void hugetlb_free_pgd_range(struct mmu_gather **tlb,
 
 	addr = htlbpage_to_page(addr);
 	end  = htlbpage_to_page(end);
-	if (is_hugepage_only_range(tlb->mm, floor, HPAGE_SIZE))
+	if (REGION_NUMBER(floor) == RGN_HPAGE)
 		floor = htlbpage_to_page(floor);
-	if (is_hugepage_only_range(tlb->mm, ceiling, HPAGE_SIZE))
+	if (REGION_NUMBER(ceiling) == RGN_HPAGE)
 		ceiling = htlbpage_to_page(ceiling);
 
 	free_pgd_range(tlb, addr, end, floor, ceiling);
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index b38b6d213c1..cafa8776a53 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -109,6 +109,7 @@ lazy_mmu_prot_update (pte_t pte)
 {
 	unsigned long addr;
 	struct page *page;
+	unsigned long order;
 
 	if (!pte_exec(pte))
 		return;				/* not an executable page... */
@@ -119,7 +120,12 @@ lazy_mmu_prot_update (pte_t pte)
 	if (test_bit(PG_arch_1, &page->flags))
 		return;				/* i-cache is already coherent with d-cache */
 
-	flush_icache_range(addr, addr + PAGE_SIZE);
+	if (PageCompound(page)) {
+		order = (unsigned long) (page[1].lru.prev);
+		flush_icache_range(addr, addr + (1UL << order << PAGE_SHIFT));
+	}
+	else
+		flush_icache_range(addr, addr + PAGE_SIZE);
 	set_bit(PG_arch_1, &page->flags);	/* mark page as clean */
 }
 
@@ -197,7 +203,7 @@ free_initmem (void)
 	eaddr = (unsigned long) ia64_imva(__init_end);
 	while (addr < eaddr) {
 		ClearPageReserved(virt_to_page(addr));
-		set_page_count(virt_to_page(addr), 1);
+		init_page_count(virt_to_page(addr));
 		free_page(addr);
 		++totalram_pages;
 		addr += PAGE_SIZE;
@@ -206,7 +212,7 @@ free_initmem (void)
 	       (__init_end - __init_begin) >> 10);
 }
 
-void
+void __init
 free_initrd_mem (unsigned long start, unsigned long end)
 {
 	struct page *page;
@@ -252,7 +258,7 @@ free_initrd_mem (unsigned long start, unsigned long end)
 			continue;
 		page = virt_to_page(start);
 		ClearPageReserved(page);
-		set_page_count(page, 1);
+		init_page_count(page);
 		free_page(start);
 		++totalram_pages;
 	}
@@ -261,7 +267,7 @@ free_initrd_mem (unsigned long start, unsigned long end)
 /*
  * This installs a clean page in the kernel's page table.
  */
-struct page *
+static struct page * __init
 put_kernel_page (struct page *page, unsigned long address, pgprot_t pgprot)
 {
 	pgd_t *pgd;
@@ -294,7 +300,7 @@ put_kernel_page (struct page *page, unsigned long address, pgprot_t pgprot)
 	return page;
 }
 
-static void
+static void __init
 setup_gate (void)
 {
 	struct page *page;
@@ -411,7 +417,7 @@ ia64_mmu_init (void *my_cpu_data)
 
 #ifdef CONFIG_VIRTUAL_MEM_MAP
 
-int
+int __init
 create_mem_map_page_table (u64 start, u64 end, void *arg)
 {
 	unsigned long address, start_page, end_page;
@@ -519,7 +525,7 @@ ia64_pfn_valid (unsigned long pfn)
 }
 EXPORT_SYMBOL(ia64_pfn_valid);
 
-int
+int __init
 find_largest_hole (u64 start, u64 end, void *arg)
 {
 	u64 *max_gap = arg;
@@ -535,7 +541,7 @@ find_largest_hole (u64 start, u64 end, void *arg)
 }
 #endif /* CONFIG_VIRTUAL_MEM_MAP */
 
-static int
+static int __init
 count_reserved_pages (u64 start, u64 end, void *arg)
 {
 	unsigned long num_reserved = 0;
@@ -556,7 +562,7 @@ count_reserved_pages (u64 start, u64 end, void *arg)
  * purposes.
  */
 
-static int nolwsys;
+static int nolwsys __initdata;
 
 static int __init
 nolwsys_setup (char *s)
@@ -567,7 +573,7 @@ nolwsys_setup (char *s)
 
 __setup("nolwsys", nolwsys_setup);
 
-void
+void __init
 mem_init (void)
 {
 	long reserved_pages, codesize, datasize, initsize;
@@ -600,7 +606,7 @@ mem_init (void)
 	kclist_add(&kcore_vmem, (void *)VMALLOC_START, VMALLOC_END-VMALLOC_START);
 	kclist_add(&kcore_kernel, _stext, _end - _stext);
 
-	for_each_pgdat(pgdat)
+	for_each_online_pgdat(pgdat)
 		if (pgdat->bdata->node_bootmem_map)
 			totalram_pages += free_all_bootmem_node(pgdat);
 
@@ -640,7 +646,7 @@ mem_init (void)
 void online_page(struct page *page)
 {
 	ClearPageReserved(page);
-	set_page_count(page, 1);
+	init_page_count(page);
 	__free_page(page);
 	totalram_pages++;
 	num_physpages++;
diff --git a/arch/ia64/mm/ioremap.c b/arch/ia64/mm/ioremap.c
new file mode 100644
index 00000000000..643ccc6960c
--- /dev/null
+++ b/arch/ia64/mm/ioremap.c
@@ -0,0 +1,43 @@
+/*
+ * (c) Copyright 2006 Hewlett-Packard Development Company, L.P.
+ *	Bjorn Helgaas <bjorn.helgaas@hp.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/compiler.h>
+#include <linux/module.h>
+#include <linux/efi.h>
+#include <asm/io.h>
+
+static inline void __iomem *
+__ioremap (unsigned long offset, unsigned long size)
+{
+	return (void __iomem *) (__IA64_UNCACHED_OFFSET | offset);
+}
+
+void __iomem *
+ioremap (unsigned long offset, unsigned long size)
+{
+	if (efi_mem_attribute_range(offset, size, EFI_MEMORY_WB))
+		return phys_to_virt(offset);
+
+	if (efi_mem_attribute_range(offset, size, EFI_MEMORY_UC))
+		return __ioremap(offset, size);
+
+	/*
+	 * Someday this should check ACPI resources so we
+	 * can do the right thing for hot-plugged regions.
+	 */
+	return __ioremap(offset, size);
+}
+EXPORT_SYMBOL(ioremap);
+
+void __iomem *
+ioremap_nocache (unsigned long offset, unsigned long size)
+{
+	return __ioremap(offset, size);
+}
+EXPORT_SYMBOL(ioremap_nocache);
diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c
index 6a4eec9113e..4dbbca0b5e9 100644
--- a/arch/ia64/mm/tlb.c
+++ b/arch/ia64/mm/tlb.c
@@ -156,17 +156,19 @@ flush_tlb_range (struct vm_area_struct *vma, unsigned long start,
 		nbits = purge.max_bits;
 	start &= ~((1UL << nbits) - 1);
 
-# ifdef CONFIG_SMP
-	platform_global_tlb_purge(mm, start, end, nbits);
-# else
 	preempt_disable();
+#ifdef CONFIG_SMP
+	if (mm != current->active_mm || cpus_weight(mm->cpu_vm_mask) != 1) {
+		platform_global_tlb_purge(mm, start, end, nbits);
+		preempt_enable();
+		return;
+	}
+#endif
 	do {
 		ia64_ptcl(start, (nbits<<2));
 		start += (1UL << nbits);
 	} while (start < end);
 	preempt_enable();
-# endif
-
 	ia64_srlz_i();			/* srlz.i implies srlz.d */
 }
 EXPORT_SYMBOL(flush_tlb_range);
diff --git a/arch/ia64/sn/kernel/Makefile b/arch/ia64/sn/kernel/Makefile
index 3e9b4eea741..ab9c48c8801 100644
--- a/arch/ia64/sn/kernel/Makefile
+++ b/arch/ia64/sn/kernel/Makefile
@@ -10,7 +10,8 @@
 CPPFLAGS += -I$(srctree)/arch/ia64/sn/include
 
 obj-y				+= setup.o bte.o bte_error.o irq.o mca.o idle.o \
-				   huberror.o io_init.o iomv.o klconflib.o sn2/
+				   huberror.o io_init.o iomv.o klconflib.o pio_phys.o \
+				   sn2/
 obj-$(CONFIG_IA64_GENERIC)      += machvec.o
 obj-$(CONFIG_SGI_TIOCX)		+= tiocx.o
 obj-$(CONFIG_IA64_SGI_SN_XP)	+= xp.o
diff --git a/arch/ia64/sn/kernel/bte.c b/arch/ia64/sn/kernel/bte.c
index 1f11db470d9..e952ef4f6d9 100644
--- a/arch/ia64/sn/kernel/bte.c
+++ b/arch/ia64/sn/kernel/bte.c
@@ -36,7 +36,7 @@ static struct bteinfo_s *bte_if_on_node(nasid_t nasid, int interface)
 	nodepda_t *tmp_nodepda;
 
 	if (nasid_to_cnodeid(nasid) == -1)
-		return (struct bteinfo_s *)NULL;;
+		return (struct bteinfo_s *)NULL;
 
 	tmp_nodepda = NODEPDA(nasid_to_cnodeid(nasid));
 	return &tmp_nodepda->bte_if[interface];
diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c
index dfb3f290237..5101ac46264 100644
--- a/arch/ia64/sn/kernel/io_init.c
+++ b/arch/ia64/sn/kernel/io_init.c
@@ -13,6 +13,8 @@
 #include <asm/sn/sn_feature_sets.h>
 #include <asm/sn/geo.h>
 #include <asm/sn/io.h>
+#include <asm/sn/l1.h>
+#include <asm/sn/module.h>
 #include <asm/sn/pcibr_provider.h>
 #include <asm/sn/pcibus_provider_defs.h>
 #include <asm/sn/pcidev.h>
@@ -710,9 +712,36 @@ cnodeid_get_geoid(cnodeid_t cnode)
 	return hubdev->hdi_geoid;
 }
 
+void sn_generate_path(struct pci_bus *pci_bus, char *address)
+{
+	nasid_t nasid;
+	cnodeid_t cnode;
+	geoid_t geoid;
+	moduleid_t moduleid;
+	u16 bricktype;
+
+	nasid = NASID_GET(SN_PCIBUS_BUSSOFT(pci_bus)->bs_base);
+	cnode = nasid_to_cnodeid(nasid);
+	geoid = cnodeid_get_geoid(cnode);
+	moduleid = geo_module(geoid);
+
+	sprintf(address, "module_%c%c%c%c%.2d",
+		'0'+RACK_GET_CLASS(MODULE_GET_RACK(moduleid)),
+		'0'+RACK_GET_GROUP(MODULE_GET_RACK(moduleid)),
+		'0'+RACK_GET_NUM(MODULE_GET_RACK(moduleid)),
+		MODULE_GET_BTCHAR(moduleid), MODULE_GET_BPOS(moduleid));
+
+	/* Tollhouse requires slot id to be displayed */
+	bricktype = MODULE_GET_BTYPE(moduleid);
+	if ((bricktype == L1_BRICKTYPE_191010) ||
+	    (bricktype == L1_BRICKTYPE_1932))
+			sprintf(address, "%s^%d", address, geo_slot(geoid));
+}
+
 subsys_initcall(sn_pci_init);
 EXPORT_SYMBOL(sn_pci_fixup_slot);
 EXPORT_SYMBOL(sn_pci_unfixup_slot);
 EXPORT_SYMBOL(sn_pci_controller_fixup);
 EXPORT_SYMBOL(sn_bus_store_sysdata);
 EXPORT_SYMBOL(sn_bus_free_sysdata);
+EXPORT_SYMBOL(sn_generate_path);
diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c
index c373113d073..c265e02f503 100644
--- a/arch/ia64/sn/kernel/irq.c
+++ b/arch/ia64/sn/kernel/irq.c
@@ -350,9 +350,6 @@ static void force_interrupt(int irq)
 static void sn_check_intr(int irq, struct sn_irq_info *sn_irq_info)
 {
 	u64 regval;
-	int irr_reg_num;
-	int irr_bit;
-	u64 irr_reg;
 	struct pcidev_info *pcidev_info;
 	struct pcibus_info *pcibus_info;
 
@@ -373,23 +370,7 @@ static void sn_check_intr(int irq, struct sn_irq_info *sn_irq_info)
 	    pdi_pcibus_info;
 	regval = pcireg_intr_status_get(pcibus_info);
 
-	irr_reg_num = irq_to_vector(irq) / 64;
-	irr_bit = irq_to_vector(irq) % 64;
-	switch (irr_reg_num) {
-	case 0:
-		irr_reg = ia64_getreg(_IA64_REG_CR_IRR0);
-		break;
-	case 1:
-		irr_reg = ia64_getreg(_IA64_REG_CR_IRR1);
-		break;
-	case 2:
-		irr_reg = ia64_getreg(_IA64_REG_CR_IRR2);
-		break;
-	case 3:
-		irr_reg = ia64_getreg(_IA64_REG_CR_IRR3);
-		break;
-	}
-	if (!test_bit(irr_bit, &irr_reg)) {
+	if (!ia64_get_irr(irq_to_vector(irq))) {
 		if (!test_bit(irq, pda->sn_in_service_ivecs)) {
 			regval &= 0xff;
 			if (sn_irq_info->irq_int_bit & regval &
diff --git a/arch/ia64/sn/kernel/pio_phys.S b/arch/ia64/sn/kernel/pio_phys.S
new file mode 100644
index 00000000000..3c7d48d6ecb
--- /dev/null
+++ b/arch/ia64/sn/kernel/pio_phys.S
@@ -0,0 +1,71 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2000-2005 Silicon Graphics, Inc. All rights reserved.
+ *
+ * This file contains macros used to access MMR registers via
+ * uncached physical addresses.
+ *      pio_phys_read_mmr  - read an MMR
+ *      pio_phys_write_mmr - write an MMR
+ *      pio_atomic_phys_write_mmrs - atomically write 1 or 2 MMRs with psr.ic=0
+ *              Second MMR will be skipped if address is NULL
+ *
+ * Addresses passed to these routines should be uncached physical addresses
+ * 	ie., 0x80000....
+ */
+
+
+
+#include <asm/asmmacro.h>
+#include <asm/page.h>
+
+GLOBAL_ENTRY(pio_phys_read_mmr)
+	.prologue
+	.regstk 1,0,0,0
+	.body
+	mov r2=psr
+	rsm psr.i | psr.dt
+	;;
+	srlz.d
+	ld8.acq r8=[r32]
+	;;
+	mov psr.l=r2;;
+	srlz.d
+	br.ret.sptk.many rp
+END(pio_phys_read_mmr)
+
+GLOBAL_ENTRY(pio_phys_write_mmr)
+	.prologue
+	.regstk 2,0,0,0
+	.body
+	mov r2=psr
+	rsm psr.i | psr.dt
+	;;
+	srlz.d
+	st8.rel [r32]=r33
+	;;
+	mov psr.l=r2;;
+	srlz.d
+	br.ret.sptk.many rp
+END(pio_phys_write_mmr)
+
+GLOBAL_ENTRY(pio_atomic_phys_write_mmrs)
+	.prologue
+	.regstk 4,0,0,0
+	.body
+	mov r2=psr
+	cmp.ne p9,p0=r34,r0;
+	rsm psr.i | psr.dt | psr.ic
+	;;
+	srlz.d
+	st8.rel [r32]=r33
+(p9)	st8.rel [r34]=r35
+	;;
+	mov psr.l=r2;;
+	srlz.d
+	br.ret.sptk.many rp
+END(pio_atomic_phys_write_mmrs)
+
+
diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c
index 5b84836c217..30988dfbddf 100644
--- a/arch/ia64/sn/kernel/setup.c
+++ b/arch/ia64/sn/kernel/setup.c
@@ -3,7 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (C) 1999,2001-2005 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 1999,2001-2006 Silicon Graphics, Inc. All rights reserved.
  */
 
 #include <linux/config.h>
@@ -327,10 +327,11 @@ sn_scan_pcdp(void)
 	struct pcdp_interface_pci if_pci;
 	extern struct efi efi;
 
-	pcdp = efi.hcdp;
-	if (! pcdp)
+	if (efi.hcdp == EFI_INVALID_TABLE_ADDR)
 		return;		/* no hcdp/pcdp table */
 
+	pcdp = __va(efi.hcdp);
+
 	if (pcdp->rev < 3)
 		return;		/* only support PCDP (rev >= 3) */
 
@@ -498,6 +499,7 @@ void __init sn_setup(char **cmdline_p)
 	 * for sn.
 	 */
 	pm_power_off = ia64_sn_power_down;
+	current->thread.flags |= IA64_THREAD_MIGRATION;
 }
 
 /**
@@ -660,7 +662,8 @@ void __init sn_cpu_init(void)
 			SH2_PIO_WRITE_STATUS_1, SH2_PIO_WRITE_STATUS_3};
 		u64 *pio;
 		pio = is_shub1() ? pio1 : pio2;
-		pda->pio_write_status_addr = (volatile unsigned long *) LOCAL_MMR_ADDR(pio[slice]);
+		pda->pio_write_status_addr =
+		   (volatile unsigned long *)GLOBAL_MMR_ADDR(nasid, pio[slice]);
 		pda->pio_write_status_val = is_shub1() ? SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK : 0;
 	}
 
diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c
index b2e1e746b47..d9d306c79f2 100644
--- a/arch/ia64/sn/kernel/sn2/sn2_smp.c
+++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c
@@ -93,6 +93,27 @@ static inline unsigned long wait_piowc(void)
 	return (ws & SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_MASK) != 0;
 }
 
+/**
+ * sn_migrate - SN-specific task migration actions
+ * @task: Task being migrated to new CPU
+ *
+ * SN2 PIO writes from separate CPUs are not guaranteed to arrive in order.
+ * Context switching user threads which have memory-mapped MMIO may cause
+ * PIOs to issue from seperate CPUs, thus the PIO writes must be drained
+ * from the previous CPU's Shub before execution resumes on the new CPU.
+ */
+void sn_migrate(struct task_struct *task)
+{
+	pda_t *last_pda = pdacpu(task_thread_info(task)->last_cpu);
+	volatile unsigned long *adr = last_pda->pio_write_status_addr;
+	unsigned long val = last_pda->pio_write_status_val;
+
+	/* Drain PIO writes from old CPU's Shub */
+	while (unlikely((*adr & SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK)
+			!= val))
+		cpu_relax();
+}
+
 void sn_tlb_migrate_finish(struct mm_struct *mm)
 {
 	/* flush_tlb_mm is inefficient if more than 1 users of mm */
diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
index 70db21f3df2..d917afa30b2 100644
--- a/arch/ia64/sn/kernel/sn2/sn_hwperf.c
+++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
@@ -110,7 +110,11 @@ static int sn_hwperf_geoid_to_cnode(char *location)
 	if (sn_hwperf_location_to_bpos(location, &rack, &bay, &slot, &slab))
 		return -1;
 
-	for_each_node(cnode) {
+	/*
+	 * FIXME: replace with cleaner for_each_XXX macro which addresses
+	 * both compute and IO nodes once ACPI3.0 is available.
+	 */
+	for (cnode = 0; cnode < num_cnodes; cnode++) {
 		geoid = cnodeid_get_geoid(cnode);
 		module_id = geo_module(geoid);
 		this_rack = MODULE_GET_RACK(module_id);
@@ -605,7 +609,7 @@ static int sn_hwperf_op_cpu(struct sn_hwperf_op_info *op_info)
 	op_info->a->arg &= SN_HWPERF_ARG_OBJID_MASK;
 
 	if (cpu != SN_HWPERF_ARG_ANY_CPU) {
-		if (cpu >= num_online_cpus() || !cpu_online(cpu)) {
+		if (cpu >= NR_CPUS || !cpu_online(cpu)) {
 			r = -EINVAL;
 			goto out;
 		}
diff --git a/arch/ia64/sn/kernel/sn2/sn_proc_fs.c b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c
index c686d9c12f7..5100261310f 100644
--- a/arch/ia64/sn/kernel/sn2/sn_proc_fs.c
+++ b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c
@@ -93,19 +93,22 @@ static int coherence_id_open(struct inode *inode, struct file *file)
 static struct proc_dir_entry
 *sn_procfs_create_entry(const char *name, struct proc_dir_entry *parent,
 			int (*openfunc)(struct inode *, struct file *),
-			int (*releasefunc)(struct inode *, struct file *))
+	int (*releasefunc)(struct inode *, struct file *),
+	ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *))
 {
 	struct proc_dir_entry *e = create_proc_entry(name, 0444, parent);
 
 	if (e) {
-		e->proc_fops = (struct file_operations *)kmalloc(
-			sizeof(struct file_operations), GFP_KERNEL);
-		if (e->proc_fops) {
-			memset(e->proc_fops, 0, sizeof(struct file_operations));
-			e->proc_fops->open = openfunc;
-			e->proc_fops->read = seq_read;
-			e->proc_fops->llseek = seq_lseek;
-			e->proc_fops->release = releasefunc;
+		struct file_operations *f;
+
+		f = kzalloc(sizeof(*f), GFP_KERNEL);
+		if (f) {
+			f->open = openfunc;
+			f->read = seq_read;
+			f->llseek = seq_lseek;
+			f->release = releasefunc;
+			f->write = write;
+			e->proc_fops = f;
 		}
 	}
 
@@ -119,31 +122,29 @@ extern int sn_topology_release(struct inode *, struct file *);
 void register_sn_procfs(void)
 {
 	static struct proc_dir_entry *sgi_proc_dir = NULL;
-	struct proc_dir_entry *e;
 
 	BUG_ON(sgi_proc_dir != NULL);
 	if (!(sgi_proc_dir = proc_mkdir("sgi_sn", NULL)))
 		return;
 
 	sn_procfs_create_entry("partition_id", sgi_proc_dir,
-			       partition_id_open, single_release);
+		partition_id_open, single_release, NULL);
 
 	sn_procfs_create_entry("system_serial_number", sgi_proc_dir,
-			       system_serial_number_open, single_release);
+		system_serial_number_open, single_release, NULL);
 
 	sn_procfs_create_entry("licenseID", sgi_proc_dir, 
-			       licenseID_open, single_release);
+		licenseID_open, single_release, NULL);
 
-	e = sn_procfs_create_entry("sn_force_interrupt", sgi_proc_dir, 
-				   sn_force_interrupt_open, single_release);
-	if (e) 
-		e->proc_fops->write = sn_force_interrupt_write_proc;
+	sn_procfs_create_entry("sn_force_interrupt", sgi_proc_dir,
+		sn_force_interrupt_open, single_release,
+		sn_force_interrupt_write_proc);
 
 	sn_procfs_create_entry("coherence_id", sgi_proc_dir, 
-			       coherence_id_open, single_release);
+		coherence_id_open, single_release, NULL);
 	
 	sn_procfs_create_entry("sn_topology", sgi_proc_dir,
-			       sn_topology_open, sn_topology_release);
+		sn_topology_open, sn_topology_release, NULL);
 }
 
 #endif /* CONFIG_PROC_FS */
diff --git a/arch/ia64/sn/kernel/tiocx.c b/arch/ia64/sn/kernel/tiocx.c
index 99cb28e7429..feaf1a6e810 100644
--- a/arch/ia64/sn/kernel/tiocx.c
+++ b/arch/ia64/sn/kernel/tiocx.c
@@ -369,9 +369,15 @@ static void tio_corelet_reset(nasid_t nasid, int corelet)
 
 static int is_fpga_tio(int nasid, int *bt)
 {
-	int ioboard_type;
+	u16 ioboard_type;
+	s64 rc;
 
-	ioboard_type = ia64_sn_sysctl_ioboard_get(nasid);
+	rc = ia64_sn_sysctl_ioboard_get(nasid, &ioboard_type);
+	if (rc) {
+		printk(KERN_WARNING "ia64_sn_sysctl_ioboard_get failed: %ld\n",
+		       rc);
+		return 0;
+	}
 
 	switch (ioboard_type) {
 	case L1_BRICKTYPE_SA:
diff --git a/arch/ia64/sn/kernel/xpc_channel.c b/arch/ia64/sn/kernel/xpc_channel.c
index cdf6856ce08..d0abddd9ffe 100644
--- a/arch/ia64/sn/kernel/xpc_channel.c
+++ b/arch/ia64/sn/kernel/xpc_channel.c
@@ -21,7 +21,6 @@
 #include <linux/sched.h>
 #include <linux/cache.h>
 #include <linux/interrupt.h>
-#include <linux/slab.h>
 #include <linux/mutex.h>
 #include <linux/completion.h>
 #include <asm/sn/bte.h>
@@ -30,6 +29,31 @@
 
 
 /*
+ * Guarantee that the kzalloc'd memory is cacheline aligned.
+ */
+static void *
+xpc_kzalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
+{
+	/* see if kzalloc will give us cachline aligned memory by default */
+	*base = kzalloc(size, flags);
+	if (*base == NULL) {
+		return NULL;
+	}
+	if ((u64) *base == L1_CACHE_ALIGN((u64) *base)) {
+		return *base;
+	}
+	kfree(*base);
+
+	/* nope, we'll have to do it ourselves */
+	*base = kzalloc(size + L1_CACHE_BYTES, flags);
+	if (*base == NULL) {
+		return NULL;
+	}
+	return (void *) L1_CACHE_ALIGN((u64) *base);
+}
+
+
+/*
  * Set up the initial values for the XPartition Communication channels.
  */
 static void
@@ -93,20 +117,19 @@ xpc_setup_infrastructure(struct xpc_partition *part)
 	 * Allocate all of the channel structures as a contiguous chunk of
 	 * memory.
 	 */
-	part->channels = kmalloc(sizeof(struct xpc_channel) * XPC_NCHANNELS,
+	part->channels = kzalloc(sizeof(struct xpc_channel) * XPC_NCHANNELS,
 								GFP_KERNEL);
 	if (part->channels == NULL) {
 		dev_err(xpc_chan, "can't get memory for channels\n");
 		return xpcNoMemory;
 	}
-	memset(part->channels, 0, sizeof(struct xpc_channel) * XPC_NCHANNELS);
 
 	part->nchannels = XPC_NCHANNELS;
 
 
 	/* allocate all the required GET/PUT values */
 
-	part->local_GPs = xpc_kmalloc_cacheline_aligned(XPC_GP_SIZE,
+	part->local_GPs = xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE,
 					GFP_KERNEL, &part->local_GPs_base);
 	if (part->local_GPs == NULL) {
 		kfree(part->channels);
@@ -115,55 +138,51 @@ xpc_setup_infrastructure(struct xpc_partition *part)
 			"values\n");
 		return xpcNoMemory;
 	}
-	memset(part->local_GPs, 0, XPC_GP_SIZE);
 
-	part->remote_GPs = xpc_kmalloc_cacheline_aligned(XPC_GP_SIZE,
+	part->remote_GPs = xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE,
 					GFP_KERNEL, &part->remote_GPs_base);
 	if (part->remote_GPs == NULL) {
-		kfree(part->channels);
-		part->channels = NULL;
-		kfree(part->local_GPs_base);
-		part->local_GPs = NULL;
 		dev_err(xpc_chan, "can't get memory for remote get/put "
 			"values\n");
+		kfree(part->local_GPs_base);
+		part->local_GPs = NULL;
+		kfree(part->channels);
+		part->channels = NULL;
 		return xpcNoMemory;
 	}
-	memset(part->remote_GPs, 0, XPC_GP_SIZE);
 
 
 	/* allocate all the required open and close args */
 
-	part->local_openclose_args = xpc_kmalloc_cacheline_aligned(
+	part->local_openclose_args = xpc_kzalloc_cacheline_aligned(
 					XPC_OPENCLOSE_ARGS_SIZE, GFP_KERNEL,
 					&part->local_openclose_args_base);
 	if (part->local_openclose_args == NULL) {
-		kfree(part->channels);
-		part->channels = NULL;
-		kfree(part->local_GPs_base);
-		part->local_GPs = NULL;
+		dev_err(xpc_chan, "can't get memory for local connect args\n");
 		kfree(part->remote_GPs_base);
 		part->remote_GPs = NULL;
-		dev_err(xpc_chan, "can't get memory for local connect args\n");
+		kfree(part->local_GPs_base);
+		part->local_GPs = NULL;
+		kfree(part->channels);
+		part->channels = NULL;
 		return xpcNoMemory;
 	}
-	memset(part->local_openclose_args, 0, XPC_OPENCLOSE_ARGS_SIZE);
 
-	part->remote_openclose_args = xpc_kmalloc_cacheline_aligned(
+	part->remote_openclose_args = xpc_kzalloc_cacheline_aligned(
 					XPC_OPENCLOSE_ARGS_SIZE, GFP_KERNEL,
 					&part->remote_openclose_args_base);
 	if (part->remote_openclose_args == NULL) {
-		kfree(part->channels);
-		part->channels = NULL;
-		kfree(part->local_GPs_base);
-		part->local_GPs = NULL;
-		kfree(part->remote_GPs_base);
-		part->remote_GPs = NULL;
+		dev_err(xpc_chan, "can't get memory for remote connect args\n");
 		kfree(part->local_openclose_args_base);
 		part->local_openclose_args = NULL;
-		dev_err(xpc_chan, "can't get memory for remote connect args\n");
+		kfree(part->remote_GPs_base);
+		part->remote_GPs = NULL;
+		kfree(part->local_GPs_base);
+		part->local_GPs = NULL;
+		kfree(part->channels);
+		part->channels = NULL;
 		return xpcNoMemory;
 	}
-	memset(part->remote_openclose_args, 0, XPC_OPENCLOSE_ARGS_SIZE);
 
 
 	xpc_initialize_channels(part, partid);
@@ -186,18 +205,18 @@ xpc_setup_infrastructure(struct xpc_partition *part)
 	ret = request_irq(SGI_XPC_NOTIFY, xpc_notify_IRQ_handler, SA_SHIRQ,
 				part->IPI_owner, (void *) (u64) partid);
 	if (ret != 0) {
-		kfree(part->channels);
-		part->channels = NULL;
-		kfree(part->local_GPs_base);
-		part->local_GPs = NULL;
-		kfree(part->remote_GPs_base);
-		part->remote_GPs = NULL;
-		kfree(part->local_openclose_args_base);
-		part->local_openclose_args = NULL;
-		kfree(part->remote_openclose_args_base);
-		part->remote_openclose_args = NULL;
 		dev_err(xpc_chan, "can't register NOTIFY IRQ handler, "
 			"errno=%d\n", -ret);
+		kfree(part->remote_openclose_args_base);
+		part->remote_openclose_args = NULL;
+		kfree(part->local_openclose_args_base);
+		part->local_openclose_args = NULL;
+		kfree(part->remote_GPs_base);
+		part->remote_GPs = NULL;
+		kfree(part->local_GPs_base);
+		part->local_GPs = NULL;
+		kfree(part->channels);
+		part->channels = NULL;
 		return xpcLackOfResources;
 	}
 
@@ -446,22 +465,20 @@ xpc_allocate_local_msgqueue(struct xpc_channel *ch)
 	for (nentries = ch->local_nentries; nentries > 0; nentries--) {
 
 		nbytes = nentries * ch->msg_size;
-		ch->local_msgqueue = xpc_kmalloc_cacheline_aligned(nbytes,
+		ch->local_msgqueue = xpc_kzalloc_cacheline_aligned(nbytes,
 						GFP_KERNEL,
 						&ch->local_msgqueue_base);
 		if (ch->local_msgqueue == NULL) {
 			continue;
 		}
-		memset(ch->local_msgqueue, 0, nbytes);
 
 		nbytes = nentries * sizeof(struct xpc_notify);
-		ch->notify_queue = kmalloc(nbytes, GFP_KERNEL);
+		ch->notify_queue = kzalloc(nbytes, GFP_KERNEL);
 		if (ch->notify_queue == NULL) {
 			kfree(ch->local_msgqueue_base);
 			ch->local_msgqueue = NULL;
 			continue;
 		}
-		memset(ch->notify_queue, 0, nbytes);
 
 		spin_lock_irqsave(&ch->lock, irq_flags);
 		if (nentries < ch->local_nentries) {
@@ -501,13 +518,12 @@ xpc_allocate_remote_msgqueue(struct xpc_channel *ch)
 	for (nentries = ch->remote_nentries; nentries > 0; nentries--) {
 
 		nbytes = nentries * ch->msg_size;
-		ch->remote_msgqueue = xpc_kmalloc_cacheline_aligned(nbytes,
+		ch->remote_msgqueue = xpc_kzalloc_cacheline_aligned(nbytes,
 						GFP_KERNEL,
 						&ch->remote_msgqueue_base);
 		if (ch->remote_msgqueue == NULL) {
 			continue;
 		}
-		memset(ch->remote_msgqueue, 0, nbytes);
 
 		spin_lock_irqsave(&ch->lock, irq_flags);
 		if (nentries < ch->remote_nentries) {
diff --git a/arch/ia64/sn/kernel/xpc_main.c b/arch/ia64/sn/kernel/xpc_main.c
index 8cbf1643257..99b123a6421 100644
--- a/arch/ia64/sn/kernel/xpc_main.c
+++ b/arch/ia64/sn/kernel/xpc_main.c
@@ -52,7 +52,6 @@
 #include <linux/syscalls.h>
 #include <linux/cache.h>
 #include <linux/interrupt.h>
-#include <linux/slab.h>
 #include <linux/delay.h>
 #include <linux/reboot.h>
 #include <linux/completion.h>
diff --git a/arch/ia64/sn/kernel/xpc_partition.c b/arch/ia64/sn/kernel/xpc_partition.c
index 88a730e6cfd..94211429fd0 100644
--- a/arch/ia64/sn/kernel/xpc_partition.c
+++ b/arch/ia64/sn/kernel/xpc_partition.c
@@ -81,6 +81,31 @@ char ____cacheline_aligned xpc_remote_copy_buffer[XPC_RP_HEADER_SIZE +
 
 
 /*
+ * Guarantee that the kmalloc'd memory is cacheline aligned.
+ */
+static void *
+xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
+{
+	/* see if kmalloc will give us cachline aligned memory by default */
+	*base = kmalloc(size, flags);
+	if (*base == NULL) {
+		return NULL;
+	}
+	if ((u64) *base == L1_CACHE_ALIGN((u64) *base)) {
+		return *base;
+	}
+	kfree(*base);
+
+	/* nope, we'll have to do it ourselves */
+	*base = kmalloc(size + L1_CACHE_BYTES, flags);
+	if (*base == NULL) {
+		return NULL;
+	}
+	return (void *) L1_CACHE_ALIGN((u64) *base);
+}
+
+
+/*
  * Given a nasid, get the physical address of the  partition's reserved page
  * for that nasid. This function returns 0 on any error.
  */
@@ -1038,13 +1063,12 @@ xpc_discovery(void)
 	remote_vars = (struct xpc_vars *) remote_rp;
 
 
-	discovered_nasids = kmalloc(sizeof(u64) * xp_nasid_mask_words,
+	discovered_nasids = kzalloc(sizeof(u64) * xp_nasid_mask_words,
 							GFP_KERNEL);
 	if (discovered_nasids == NULL) {
 		kfree(remote_rp_base);
 		return;
 	}
-	memset(discovered_nasids, 0, sizeof(u64) * xp_nasid_mask_words);
 
 	rp = (struct xpc_rsvd_page *) xpc_rsvd_page;
 
diff --git a/arch/ia64/sn/pci/pcibr/pcibr_provider.c b/arch/ia64/sn/pci/pcibr/pcibr_provider.c
index 98f716bd92f..ab1211ef017 100644
--- a/arch/ia64/sn/pci/pcibr/pcibr_provider.c
+++ b/arch/ia64/sn/pci/pcibr/pcibr_provider.c
@@ -74,6 +74,22 @@ static int sal_pcibr_error_interrupt(struct pcibus_info *soft)
 	return (int)ret_stuff.v0;
 }
 
+u16 sn_ioboard_to_pci_bus(struct pci_bus *pci_bus)
+{
+	s64 rc;
+	u16 ioboard;
+	nasid_t nasid = NASID_GET(SN_PCIBUS_BUSSOFT(pci_bus)->bs_base);
+
+	rc = ia64_sn_sysctl_ioboard_get(nasid, &ioboard);
+	if (rc) {
+		printk(KERN_WARNING "ia64_sn_sysctl_ioboard_get failed: %ld\n",
+		       rc);
+		return 0;
+	}
+
+	return ioboard;
+}
+
 /* 
  * PCI Bridge Error interrupt handler.  Gets invoked whenever a PCI 
  * bridge sends an error interrupt.
@@ -255,3 +271,4 @@ pcibr_init_provider(void)
 
 EXPORT_SYMBOL_GPL(sal_pcibr_slot_enable);
 EXPORT_SYMBOL_GPL(sal_pcibr_slot_disable);
+EXPORT_SYMBOL_GPL(sn_ioboard_to_pci_bus);
diff --git a/arch/ia64/sn/pci/tioca_provider.c b/arch/ia64/sn/pci/tioca_provider.c
index 7571a402552..be017691296 100644
--- a/arch/ia64/sn/pci/tioca_provider.c
+++ b/arch/ia64/sn/pci/tioca_provider.c
@@ -377,7 +377,7 @@ tioca_dma_mapped(struct pci_dev *pdev, u64 paddr, size_t req_size)
 	struct tioca_dmamap *ca_dmamap;
 	void *map;
 	unsigned long flags;
-	struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(pdev);;
+	struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(pdev);
 
 	tioca_common = (struct tioca_common *)pcidev_info->pdi_pcibus_info;
 	tioca_kern = (struct tioca_kernel *)tioca_common->ca_kernel_private;
diff --git a/arch/ia64/sn/pci/tioce_provider.c b/arch/ia64/sn/pci/tioce_provider.c
index e52831ed93e..fa073cc4b56 100644
--- a/arch/ia64/sn/pci/tioce_provider.c
+++ b/arch/ia64/sn/pci/tioce_provider.c
@@ -15,6 +15,124 @@
 #include <asm/sn/pcidev.h>
 #include <asm/sn/pcibus_provider_defs.h>
 #include <asm/sn/tioce_provider.h>
+#include <asm/sn/sn2/sn_hwperf.h>
+
+/*
+ * 1/26/2006
+ *
+ * WAR for SGI PV 944642.  For revA TIOCE, need to use the following recipe
+ * (taken from the above PV) before and after accessing tioce internal MMR's
+ * to avoid tioce lockups.
+ *
+ * The recipe as taken from the PV:
+ *
+ *	if(mmr address < 0x45000) {
+ *		if(mmr address == 0 or 0x80)
+ *			mmr wrt or read address 0xc0
+ *		else if(mmr address == 0x148 or 0x200)
+ *			mmr wrt or read address 0x28
+ *		else
+ *			mmr wrt or read address 0x158
+ *
+ *		do desired mmr access (rd or wrt)
+ *
+ *		if(mmr address == 0x100)
+ *			mmr wrt or read address 0x38
+ *		mmr wrt or read address 0xb050
+ *	} else
+ *		do desired mmr access
+ *
+ * According to hw, we can use reads instead of writes to the above addres
+ *
+ * Note this WAR can only to be used for accessing internal MMR's in the
+ * TIOCE Coretalk Address Range 0x0 - 0x07ff_ffff.  This includes the
+ * "Local CE Registers and Memories" and "PCI Compatible Config Space" address
+ * spaces from table 2-1 of the "CE Programmer's Reference Overview" document.
+ *
+ * All registers defined in struct tioce will meet that criteria.
+ */
+
+static void inline
+tioce_mmr_war_pre(struct tioce_kernel *kern, void *mmr_addr)
+{
+	u64 mmr_base;
+	u64 mmr_offset;
+
+	if (kern->ce_common->ce_rev != TIOCE_REV_A)
+		return;
+
+	mmr_base = kern->ce_common->ce_pcibus.bs_base;
+	mmr_offset = (u64)mmr_addr - mmr_base;
+
+	if (mmr_offset < 0x45000) {
+		u64 mmr_war_offset;
+
+		if (mmr_offset == 0 || mmr_offset == 0x80)
+			mmr_war_offset = 0xc0;
+		else if (mmr_offset == 0x148 || mmr_offset == 0x200)
+			mmr_war_offset = 0x28;
+		else
+			mmr_war_offset = 0x158;
+
+		readq_relaxed((void *)(mmr_base + mmr_war_offset));
+	}
+}
+
+static void inline
+tioce_mmr_war_post(struct tioce_kernel *kern, void *mmr_addr)
+{
+	u64 mmr_base;
+	u64 mmr_offset;
+
+	if (kern->ce_common->ce_rev != TIOCE_REV_A)
+		return;
+
+	mmr_base = kern->ce_common->ce_pcibus.bs_base;
+	mmr_offset = (u64)mmr_addr - mmr_base;
+
+	if (mmr_offset < 0x45000) {
+		if (mmr_offset == 0x100)
+			readq_relaxed((void *)(mmr_base + 0x38));
+		readq_relaxed((void *)(mmr_base + 0xb050));
+	}
+}
+
+/* load mmr contents into a variable */
+#define tioce_mmr_load(kern, mmrp, varp) do {\
+	tioce_mmr_war_pre(kern, mmrp); \
+	*(varp) = readq_relaxed(mmrp); \
+	tioce_mmr_war_post(kern, mmrp); \
+} while (0)
+
+/* store variable contents into mmr */
+#define tioce_mmr_store(kern, mmrp, varp) do {\
+	tioce_mmr_war_pre(kern, mmrp); \
+	writeq(*varp, mmrp); \
+	tioce_mmr_war_post(kern, mmrp); \
+} while (0)
+
+/* store immediate value into mmr */
+#define tioce_mmr_storei(kern, mmrp, val) do {\
+	tioce_mmr_war_pre(kern, mmrp); \
+	writeq(val, mmrp); \
+	tioce_mmr_war_post(kern, mmrp); \
+} while (0)
+
+/* set bits (immediate value) into mmr */
+#define tioce_mmr_seti(kern, mmrp, bits) do {\
+	u64 tmp; \
+	tioce_mmr_load(kern, mmrp, &tmp); \
+	tmp |= (bits); \
+	tioce_mmr_store(kern, mmrp, &tmp); \
+} while (0)
+
+/* clear bits (immediate value) into mmr */
+#define tioce_mmr_clri(kern, mmrp, bits) do { \
+	u64 tmp; \
+	tioce_mmr_load(kern, mmrp, &tmp); \
+	tmp &= ~(bits); \
+	tioce_mmr_store(kern, mmrp, &tmp); \
+} while (0)
 
 /**
  * Bus address ranges for the 5 flavors of TIOCE DMA
@@ -62,9 +180,9 @@
 #define TIOCE_ATE_M40	2
 #define TIOCE_ATE_M40S	3
 
-#define KB(x)	((x) << 10)
-#define MB(x)	((x) << 20)
-#define GB(x)	((x) << 30)
+#define KB(x)	((u64)(x) << 10)
+#define MB(x)	((u64)(x) << 20)
+#define GB(x)	((u64)(x) << 30)
 
 /**
  * tioce_dma_d64 - create a DMA mapping using 64-bit direct mode
@@ -151,7 +269,7 @@ tioce_alloc_map(struct tioce_kernel *ce_kern, int type, int port,
 	int last;
 	int entries;
 	int nates;
-	int pagesize;
+	u64 pagesize;
 	u64 *ate_shadow;
 	u64 *ate_reg;
 	u64 addr;
@@ -228,7 +346,7 @@ tioce_alloc_map(struct tioce_kernel *ce_kern, int type, int port,
 
 		ate = ATE_MAKE(addr, pagesize);
 		ate_shadow[i + j] = ate;
-		writeq(ate, &ate_reg[i + j]);
+		tioce_mmr_storei(ce_kern, &ate_reg[i + j], ate);
 		addr += pagesize;
 	}
 
@@ -272,7 +390,8 @@ tioce_dma_d32(struct pci_dev *pdev, u64 ct_addr)
 		u64 tmp;
 
 		ce_kern->ce_port[port].dirmap_shadow = ct_upper;
-		writeq(ct_upper, &ce_mmr->ce_ure_dir_map[port]);
+		tioce_mmr_storei(ce_kern, &ce_mmr->ce_ure_dir_map[port],
+				 ct_upper);
 		tmp = ce_mmr->ce_ure_dir_map[port];
 		dma_ok = 1;
 	} else
@@ -344,7 +463,8 @@ tioce_dma_unmap(struct pci_dev *pdev, dma_addr_t bus_addr, int dir)
 	if (TIOCE_D32_ADDR(bus_addr)) {
 		if (--ce_kern->ce_port[port].dirmap_refcnt == 0) {
 			ce_kern->ce_port[port].dirmap_shadow = 0;
-			writeq(0, &ce_mmr->ce_ure_dir_map[port]);
+			tioce_mmr_storei(ce_kern, &ce_mmr->ce_ure_dir_map[port],
+					 0);
 		}
 	} else {
 		struct tioce_dmamap *map;
@@ -365,7 +485,7 @@ tioce_dma_unmap(struct pci_dev *pdev, dma_addr_t bus_addr, int dir)
 		} else if (--map->refcnt == 0) {
 			for (i = 0; i < map->ate_count; i++) {
 				map->ate_shadow[i] = 0;
-				map->ate_hw[i] = 0;
+				tioce_mmr_storei(ce_kern, &map->ate_hw[i], 0);
 			}
 
 			list_del(&map->ce_dmamap_list);
@@ -486,7 +606,7 @@ tioce_do_dma_map(struct pci_dev *pdev, u64 paddr, size_t byte_count,
 	spin_unlock_irqrestore(&ce_kern->ce_lock, flags);
 
 dma_map_done:
-	if (mapaddr & barrier)
+	if (mapaddr && barrier)
 		mapaddr = tioce_dma_barrier(mapaddr, 1);
 
 	return mapaddr;
@@ -541,17 +661,61 @@ tioce_error_intr_handler(int irq, void *arg, struct pt_regs *pt)
 			soft->ce_pcibus.bs_persist_segment,
 			soft->ce_pcibus.bs_persist_busnum, 0, 0, 0, 0, 0);
 
+	if (ret_stuff.v0)
+		panic("tioce_error_intr_handler:  Fatal TIOCE error");
+
 	return IRQ_HANDLED;
 }
 
 /**
+ * tioce_reserve_m32 - reserve M32 ate's for the indicated address range
+ * @tioce_kernel: TIOCE context to reserve ate's for
+ * @base: starting bus address to reserve
+ * @limit: last bus address to reserve
+ *
+ * If base/limit falls within the range of bus space mapped through the
+ * M32 space, reserve the resources corresponding to the range.
+ */
+static void
+tioce_reserve_m32(struct tioce_kernel *ce_kern, u64 base, u64 limit)
+{
+	int ate_index, last_ate, ps;
+	struct tioce *ce_mmr;
+
+	if (!TIOCE_M32_ADDR(base))
+		return;
+
+	ce_mmr = (struct tioce *)ce_kern->ce_common->ce_pcibus.bs_base;
+	ps = ce_kern->ce_ate3240_pagesize;
+	ate_index = ATE_PAGE(base, ps);
+	last_ate = ate_index + ATE_NPAGES(base, limit-base+1, ps) - 1;
+
+	if (ate_index < 64)
+		ate_index = 64;
+
+	while (ate_index <= last_ate) {
+		u64 ate;
+
+		ate = ATE_MAKE(0xdeadbeef, ps);
+		ce_kern->ce_ate3240_shadow[ate_index] = ate;
+		tioce_mmr_storei(ce_kern, &ce_mmr->ce_ure_ate3240[ate_index],
+				 ate);
+		ate_index++;
+	}
+}
+
+/**
  * tioce_kern_init - init kernel structures related to a given TIOCE
  * @tioce_common: ptr to a cached tioce_common struct that originated in prom
- */ static struct tioce_kernel *
+ */
+static struct tioce_kernel *
 tioce_kern_init(struct tioce_common *tioce_common)
 {
 	int i;
+	int ps;
+	int dev;
 	u32 tmp;
+	unsigned int seg, bus;
 	struct tioce *tioce_mmr;
 	struct tioce_kernel *tioce_kern;
 
@@ -572,9 +736,10 @@ tioce_kern_init(struct tioce_common *tioce_common)
 	 * here to use pci_read_config_xxx() so use the raw_pci_ops vector.
 	 */
 
-	raw_pci_ops->read(tioce_common->ce_pcibus.bs_persist_segment,
-			  tioce_common->ce_pcibus.bs_persist_busnum,
-			  PCI_DEVFN(2, 0), PCI_SECONDARY_BUS, 1, &tmp);
+	seg = tioce_common->ce_pcibus.bs_persist_segment;
+	bus = tioce_common->ce_pcibus.bs_persist_busnum;
+
+	raw_pci_ops->read(seg, bus, PCI_DEVFN(2, 0), PCI_SECONDARY_BUS, 1,&tmp);
 	tioce_kern->ce_port1_secondary = (u8) tmp;
 
 	/*
@@ -583,18 +748,76 @@ tioce_kern_init(struct tioce_common *tioce_common)
 	 */
 
 	tioce_mmr = (struct tioce *)tioce_common->ce_pcibus.bs_base;
-	__sn_clrq_relaxed(&tioce_mmr->ce_ure_page_map, CE_URE_PAGESIZE_MASK);
-	__sn_setq_relaxed(&tioce_mmr->ce_ure_page_map, CE_URE_256K_PAGESIZE);
-	tioce_kern->ce_ate3240_pagesize = KB(256);
+	tioce_mmr_clri(tioce_kern, &tioce_mmr->ce_ure_page_map,
+		       CE_URE_PAGESIZE_MASK);
+	tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_ure_page_map,
+		       CE_URE_256K_PAGESIZE);
+	ps = tioce_kern->ce_ate3240_pagesize = KB(256);
 
 	for (i = 0; i < TIOCE_NUM_M40_ATES; i++) {
 		tioce_kern->ce_ate40_shadow[i] = 0;
-		writeq(0, &tioce_mmr->ce_ure_ate40[i]);
+		tioce_mmr_storei(tioce_kern, &tioce_mmr->ce_ure_ate40[i], 0);
 	}
 
 	for (i = 0; i < TIOCE_NUM_M3240_ATES; i++) {
 		tioce_kern->ce_ate3240_shadow[i] = 0;
-		writeq(0, &tioce_mmr->ce_ure_ate3240[i]);
+		tioce_mmr_storei(tioce_kern, &tioce_mmr->ce_ure_ate3240[i], 0);
+	}
+
+	/*
+	 * Reserve ATE's corresponding to reserved address ranges.  These
+	 * include:
+	 *
+	 *	Memory space covered by each PPB mem base/limit register
+	 * 	Memory space covered by each PPB prefetch base/limit register
+	 *
+	 * These bus ranges are for pio (downstream) traffic only, and so
+	 * cannot be used for DMA.
+	 */
+
+	for (dev = 1; dev <= 2; dev++) {
+		u64 base, limit;
+
+		/* mem base/limit */
+
+		raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0),
+				  PCI_MEMORY_BASE, 2, &tmp);
+		base = (u64)tmp << 16;
+
+		raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0),
+				  PCI_MEMORY_LIMIT, 2, &tmp);
+		limit = (u64)tmp << 16;
+		limit |= 0xfffffUL;
+
+		if (base < limit)
+			tioce_reserve_m32(tioce_kern, base, limit);
+
+		/*
+		 * prefetch mem base/limit.  The tioce ppb's have 64-bit
+		 * decoders, so read the upper portions w/o checking the
+		 * attributes.
+		 */
+
+		raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0),
+				  PCI_PREF_MEMORY_BASE, 2, &tmp);
+		base = ((u64)tmp & PCI_PREF_RANGE_MASK) << 16;
+
+		raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0),
+				  PCI_PREF_BASE_UPPER32, 4, &tmp);
+		base |= (u64)tmp << 32;
+
+		raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0),
+				  PCI_PREF_MEMORY_LIMIT, 2, &tmp);
+
+		limit = ((u64)tmp & PCI_PREF_RANGE_MASK) << 16;
+		limit |= 0xfffffUL;
+
+		raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0),
+				  PCI_PREF_LIMIT_UPPER32, 4, &tmp);
+		limit |= (u64)tmp << 32;
+
+		if ((base < limit) && TIOCE_M32_ADDR(base))
+			tioce_reserve_m32(tioce_kern, base, limit);
 	}
 
 	return tioce_kern;
@@ -614,6 +837,7 @@ tioce_force_interrupt(struct sn_irq_info *sn_irq_info)
 {
 	struct pcidev_info *pcidev_info;
 	struct tioce_common *ce_common;
+	struct tioce_kernel *ce_kern;
 	struct tioce *ce_mmr;
 	u64 force_int_val;
 
@@ -629,6 +853,29 @@ tioce_force_interrupt(struct sn_irq_info *sn_irq_info)
 
 	ce_common = (struct tioce_common *)pcidev_info->pdi_pcibus_info;
 	ce_mmr = (struct tioce *)ce_common->ce_pcibus.bs_base;
+	ce_kern = (struct tioce_kernel *)ce_common->ce_kernel_private;
+
+	/*
+	 * TIOCE Rev A workaround (PV 945826), force an interrupt by writing
+	 * the TIO_INTx register directly (1/26/2006)
+	 */
+	if (ce_common->ce_rev == TIOCE_REV_A) {
+		u64 int_bit_mask = (1ULL << sn_irq_info->irq_int_bit);
+		u64 status;
+
+		tioce_mmr_load(ce_kern, &ce_mmr->ce_adm_int_status, &status);
+		if (status & int_bit_mask) {
+			u64 force_irq = (1 << 8) | sn_irq_info->irq_irq;
+			u64 ctalk = sn_irq_info->irq_xtalkaddr;
+			u64 nasid, offset;
+
+			nasid = (ctalk & CTALK_NASID_MASK) >> CTALK_NASID_SHFT;
+			offset = (ctalk & CTALK_NODE_OFFSET);
+			HUB_S(TIO_IOSPACE_ADDR(nasid, offset), force_irq);
+		}
+
+		return;
+	}
 
 	/*
 	 * irq_int_bit is originally set up by prom, and holds the interrupt
@@ -666,7 +913,7 @@ tioce_force_interrupt(struct sn_irq_info *sn_irq_info)
 	default:
 		return;
 	}
-	writeq(force_int_val, &ce_mmr->ce_adm_force_int);
+	tioce_mmr_storei(ce_kern, &ce_mmr->ce_adm_force_int, force_int_val);
 }
 
 /**
@@ -685,6 +932,7 @@ tioce_target_interrupt(struct sn_irq_info *sn_irq_info)
 {
 	struct pcidev_info *pcidev_info;
 	struct tioce_common *ce_common;
+	struct tioce_kernel *ce_kern;
 	struct tioce *ce_mmr;
 	int bit;
 	u64 vector;
@@ -695,14 +943,15 @@ tioce_target_interrupt(struct sn_irq_info *sn_irq_info)
 
 	ce_common = (struct tioce_common *)pcidev_info->pdi_pcibus_info;
 	ce_mmr = (struct tioce *)ce_common->ce_pcibus.bs_base;
+	ce_kern = (struct tioce_kernel *)ce_common->ce_kernel_private;
 
 	bit = sn_irq_info->irq_int_bit;
 
-	__sn_setq_relaxed(&ce_mmr->ce_adm_int_mask, (1UL << bit));
+	tioce_mmr_seti(ce_kern, &ce_mmr->ce_adm_int_mask, (1UL << bit));
 	vector = (u64)sn_irq_info->irq_irq << INTR_VECTOR_SHFT;
 	vector |= sn_irq_info->irq_xtalkaddr;
-	writeq(vector, &ce_mmr->ce_adm_int_dest[bit]);
-	__sn_clrq_relaxed(&ce_mmr->ce_adm_int_mask, (1UL << bit));
+	tioce_mmr_storei(ce_kern, &ce_mmr->ce_adm_int_dest[bit], vector);
+	tioce_mmr_clri(ce_kern, &ce_mmr->ce_adm_int_mask, (1UL << bit));
 
 	tioce_force_interrupt(sn_irq_info);
 }
@@ -721,7 +970,11 @@ tioce_target_interrupt(struct sn_irq_info *sn_irq_info)
 static void *
 tioce_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *controller)
 {
+	int my_nasid;
+	cnodeid_t my_cnode, mem_cnode;
 	struct tioce_common *tioce_common;
+	struct tioce_kernel *tioce_kern;
+	struct tioce *tioce_mmr;
 
 	/*
 	 * Allocate kernel bus soft and copy from prom.
@@ -734,11 +987,23 @@ tioce_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *cont
 	memcpy(tioce_common, prom_bussoft, sizeof(struct tioce_common));
 	tioce_common->ce_pcibus.bs_base |= __IA64_UNCACHED_OFFSET;
 
-	if (tioce_kern_init(tioce_common) == NULL) {
+	tioce_kern = tioce_kern_init(tioce_common);
+	if (tioce_kern == NULL) {
 		kfree(tioce_common);
 		return NULL;
 	}
 
+	/*
+	 * Clear out any transient errors before registering the error
+	 * interrupt handler.
+	 */
+
+	tioce_mmr = (struct tioce *)tioce_common->ce_pcibus.bs_base;
+	tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_adm_int_status_alias, ~0ULL);
+	tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_adm_error_summary_alias,
+		       ~0ULL);
+	tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_dre_comp_err_addr, ~0ULL);
+
 	if (request_irq(SGI_PCIASIC_ERROR,
 			tioce_error_intr_handler,
 			SA_SHIRQ, "TIOCE error", (void *)tioce_common))
@@ -750,6 +1015,21 @@ tioce_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *cont
 		       tioce_common->ce_pcibus.bs_persist_segment,
 		       tioce_common->ce_pcibus.bs_persist_busnum);
 
+	/*
+	 * identify closest nasid for memory allocations
+	 */
+
+	my_nasid = NASID_GET(tioce_common->ce_pcibus.bs_base);
+	my_cnode = nasid_to_cnodeid(my_nasid);
+
+	if (sn_hwperf_get_nearest_node(my_cnode, &mem_cnode, NULL) < 0) {
+		printk(KERN_WARNING "tioce_bus_fixup: failed to find "
+		       "closest node with MEM to TIO node %d\n", my_cnode);
+		mem_cnode = (cnodeid_t)-1; /* use any node */
+	}
+
+	controller->node = mem_cnode;
+
 	return tioce_common;
 }
author	Steven Whitehouse <swhiteho@redhat.com>	2006-03-31 15:34:58 -0500
committer	Steven Whitehouse <swhiteho@redhat.com>	2006-03-31 15:34:58 -0500
commit	86579dd06deecfa6ac88d5e84e4d63c397cd6f6d (patch)
tree	b4475d3ccde53015ad84a06e4e55e64591171b75 /arch/ia64
parent	7ea9ea832212c4a755650f7c7cc1ff0b63292a41 (diff)
parent	a0f067802576d4eb4c65d40b8ee7d6ea3c81dd61 (diff)