74 files changed, 987 insertions, 551 deletions
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index e19185d2655..de1bff65996 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -14,6 +14,7 @@ config IA64
 	select PCI if (!IA64_HP_SIM)
 	select ACPI if (!IA64_HP_SIM)
 	select PM if (!IA64_HP_SIM)
+	select ARCH_SUPPORTS_MSI
 	default y
 	help
 	  The Itanium Processor Family is Intel's 64-bit successor to
@@ -30,6 +31,10 @@ config ZONE_DMA
 	def_bool y
 	depends on !IA64_SGI_SN2
 
+config QUICKLIST
+	bool
+	default y
+
 config MMU
 	bool
 	default y
@@ -438,6 +443,16 @@ config IA64_PALINFO
 	  To use this option, you have to ensure that the "/proc file system
 	  support" (CONFIG_PROC_FS) is enabled, too.
 
+config IA64_MC_ERR_INJECT
+	tristate "MC error injection support"
+	help
+	  Selets whether support for MC error injection. By enabling the
+	  support, kernel provide sysfs interface for user application to
+	  call MC error injection PAL procedure to inject various errors.
+	  This is a useful tool for MCA testing.
+
+	  If you're unsure, do not select this option.
+
 config SGI_SN
 	def_bool y if (IA64_SGI_SN2 || IA64_GENERIC)
 
@@ -457,7 +472,7 @@ config KEXEC
 	help
 	  kexec is a system call that implements the ability to shutdown your
 	  current kernel, and to start another kernel.  It is like a reboot
-	  but it is indepedent of the system firmware.   And like a reboot
+	  but it is independent of the system firmware.   And like a reboot
 	  you can start any kernel with it, not just Linux.
 
 	  The name comes from the similiarity to the exec system call.
diff --git a/arch/ia64/defconfig b/arch/ia64/defconfig
index 153bfdc0182..90bd9601cdd 100644
--- a/arch/ia64/defconfig
+++ b/arch/ia64/defconfig
@@ -164,6 +164,7 @@ CONFIG_COMPAT=y
 CONFIG_IA64_MCA_RECOVERY=y
 CONFIG_PERFMON=y
 CONFIG_IA64_PALINFO=y
+# CONFIG_MC_ERR_INJECT is not set
 CONFIG_SGI_SN=y
 # CONFIG_IA64_ESI is not set
 
diff --git a/arch/ia64/hp/common/hwsw_iommu.c b/arch/ia64/hp/common/hwsw_iommu.c
index 2153bcacbe6..94e57109fad 100644
--- a/arch/ia64/hp/common/hwsw_iommu.c
+++ b/arch/ia64/hp/common/hwsw_iommu.c
@@ -63,7 +63,7 @@ use_swiotlb (struct device *dev)
 	return dev && dev->dma_mask && !hwiommu_dma_supported(dev, *dev->dma_mask);
 }
 
-void
+void __init
 hwsw_init (void)
 {
 	/* default to a smallish 2MB sw I/O TLB */
diff --git a/arch/ia64/hp/sim/boot/fw-emu.c b/arch/ia64/hp/sim/boot/fw-emu.c
index 5a0a7afcfc3..300acd913d9 100644
--- a/arch/ia64/hp/sim/boot/fw-emu.c
+++ b/arch/ia64/hp/sim/boot/fw-emu.c
@@ -287,7 +287,7 @@ sys_fw_init (const char *args, int arglen)
 
 	memset(efi_systab, 0, sizeof(efi_systab));
 	efi_systab->hdr.signature = EFI_SYSTEM_TABLE_SIGNATURE;
-	efi_systab->hdr.revision  = EFI_SYSTEM_TABLE_REVISION;
+	efi_systab->hdr.revision  = ((1 << 16) | 00);
 	efi_systab->hdr.headersize = sizeof(efi_systab->hdr);
 	efi_systab->fw_vendor = __pa("H\0e\0w\0l\0e\0t\0t\0-\0P\0a\0c\0k\0a\0r\0d\0\0");
 	efi_systab->fw_revision = 1;
diff --git a/arch/ia64/ia32/audit.c b/arch/ia64/ia32/audit.c
index 92d7d0c8d93..8850fe40ea3 100644
--- a/arch/ia64/ia32/audit.c
+++ b/arch/ia64/ia32/audit.c
@@ -20,6 +20,11 @@ unsigned ia32_read_class[] = {
 ~0U
 };
 
+unsigned ia32_signal_class[] = {
+#include <asm-generic/audit_signal.h>
+~0U
+};
+
 int ia32_classify_syscall(unsigned syscall)
 {
 	switch(syscall) {
diff --git a/arch/ia64/ia32/ia32_entry.S b/arch/ia64/ia32/ia32_entry.S
index 687e5fdc968..99b665e2b1d 100644
--- a/arch/ia64/ia32/ia32_entry.S
+++ b/arch/ia64/ia32/ia32_entry.S
@@ -52,43 +52,6 @@ ENTRY(ia32_clone)
 	br.ret.sptk.many rp
 END(ia32_clone)
 
-ENTRY(sys32_rt_sigsuspend)
-	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
-	alloc loc1=ar.pfs,8,2,3,0		// preserve all eight input regs
-	mov loc0=rp
-	mov out0=in0				// mask
-	mov out1=in1				// sigsetsize
-	mov out2=sp				// out2 = &sigscratch
-	.fframe 16
-	adds sp=-16,sp				// allocate dummy "sigscratch"
-	;;
-	.body
-	br.call.sptk.many rp=ia32_rt_sigsuspend
-1:	.restore sp
-	adds sp=16,sp
-	mov rp=loc0
-	mov ar.pfs=loc1
-	br.ret.sptk.many rp
-END(sys32_rt_sigsuspend)
-
-ENTRY(sys32_sigsuspend)
-	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
-	alloc loc1=ar.pfs,8,2,3,0		// preserve all eight input regs
-	mov loc0=rp
-	mov out0=in2				// mask (first two args are ignored)
-	;;
-	mov out1=sp				// out1 = &sigscratch
-	.fframe 16
-	adds sp=-16,sp				// allocate dummy "sigscratch"
-	.body
-	br.call.sptk.many rp=ia32_sigsuspend
-1:	.restore sp
-	adds sp=16,sp
-	mov rp=loc0
-	mov ar.pfs=loc1
-	br.ret.sptk.many rp
-END(sys32_sigsuspend)
-
 GLOBAL_ENTRY(ia32_ret_from_clone)
 	PT_REGS_UNWIND_INFO(0)
 {	/*
@@ -389,7 +352,7 @@ ia32_syscall_table:
 	data8 sys_rt_sigpending
 	data8 compat_sys_rt_sigtimedwait
 	data8 sys32_rt_sigqueueinfo
-	data8 sys32_rt_sigsuspend
+	data8 compat_sys_rt_sigsuspend
 	data8 sys32_pread	  /* 180 */
 	data8 sys32_pwrite
 	data8 sys_chown	/* 16-bit version */
diff --git a/arch/ia64/ia32/ia32_ldt.c b/arch/ia64/ia32/ia32_ldt.c
index a152738c7d0..16d51c14684 100644
--- a/arch/ia64/ia32/ia32_ldt.c
+++ b/arch/ia64/ia32/ia32_ldt.c
@@ -10,7 +10,6 @@
 #include <linux/string.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/vmalloc.h>
 
 #include <asm/uaccess.h>
diff --git a/arch/ia64/ia32/ia32_signal.c b/arch/ia64/ia32/ia32_signal.c
index b3355a9ca2c..85e82f32e48 100644
--- a/arch/ia64/ia32/ia32_signal.c
+++ b/arch/ia64/ia32/ia32_signal.c
@@ -18,7 +18,6 @@
 #include <linux/sched.h>
 #include <linux/signal.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/stddef.h>
 #include <linux/syscalls.h>
 #include <linux/unistd.h>
@@ -452,59 +451,20 @@ sigact_set_handler (struct k_sigaction *sa, unsigned int handler, unsigned int r
 		sa->sa.sa_handler = (__sighandler_t) (((unsigned long) restorer << 32) | handler);
 }
 
-long
-__ia32_rt_sigsuspend (compat_sigset_t *sset, unsigned int sigsetsize, struct sigscratch *scr)
+asmlinkage long
+sys32_sigsuspend (int history0, int history1, old_sigset_t mask)
 {
-	extern long ia64_do_signal (sigset_t *oldset, struct sigscratch *scr, long in_syscall);
-	sigset_t oldset, set;
-
-	scr->scratch_unat = 0;	/* avoid leaking kernel bits to user level */
-	memset(&set, 0, sizeof(set));
-
-	memcpy(&set.sig, &sset->sig, sigsetsize);
-
-	sigdelsetmask(&set, ~_BLOCKABLE);
-
+	mask &= _BLOCKABLE;
 	spin_lock_irq(&current->sighand->siglock);
-	{
-		oldset = current->blocked;
-		current->blocked = set;
-		recalc_sigpending();
-	}
+	current->saved_sigmask = current->blocked;
+	siginitset(&current->blocked, mask);
+	recalc_sigpending();
 	spin_unlock_irq(&current->sighand->siglock);
 
-	/*
-	 * The return below usually returns to the signal handler.  We need to pre-set the
-	 * correct error code here to ensure that the right values get saved in sigcontext
-	 * by ia64_do_signal.
-	 */
-	scr->pt.r8 = -EINTR;
-	while (1) {
-		current->state = TASK_INTERRUPTIBLE;
-		schedule();
-		if (ia64_do_signal(&oldset, scr, 1))
-			return -EINTR;
-	}
-}
-
-asmlinkage long
-ia32_rt_sigsuspend (compat_sigset_t __user *uset, unsigned int sigsetsize, struct sigscratch *scr)
-{
-	compat_sigset_t set;
-
-	if (sigsetsize > sizeof(compat_sigset_t))
-		return -EINVAL;
-
-	if (copy_from_user(&set.sig, &uset->sig, sigsetsize))
-		return -EFAULT;
-
-	return __ia32_rt_sigsuspend(&set, sigsetsize, scr);
-}
-
-asmlinkage long
-ia32_sigsuspend (unsigned int mask, struct sigscratch *scr)
-{
-	return __ia32_rt_sigsuspend((compat_sigset_t *) &mask, sizeof(mask), scr);
+	current->state = TASK_INTERRUPTIBLE;
+	schedule();
+	set_thread_flag(TIF_RESTORE_SIGMASK);
+	return -ERESTARTNOHAND;
 }
 
 asmlinkage long
@@ -811,7 +771,11 @@ get_sigframe (struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size)
 	}
 	/* Legacy stack switching not supported */
 
-	return (void __user *)((esp - frame_size) & -8ul);
+	esp -= frame_size;
+	/* Align the stack pointer according to the i386 ABI,
+	 * i.e. so that on function entry ((sp + 4) & 15) == 0. */
+	esp = ((esp + 4) & -16ul) - 4;
+	return (void __user *) esp;
 }
 
 static int
diff --git a/arch/ia64/ia32/ia32_support.c b/arch/ia64/ia32/ia32_support.c
index 6af400a12ca..beea7a0b9dc 100644
--- a/arch/ia64/ia32/ia32_support.c
+++ b/arch/ia64/ia32/ia32_support.c
@@ -252,10 +252,8 @@ ia32_init (void)
 		extern struct kmem_cache *partial_page_cachep;
 
 		partial_page_cachep = kmem_cache_create("partial_page_cache",
-							sizeof(struct partial_page), 0, 0,
-							NULL, NULL);
-		if (!partial_page_cachep)
-			panic("Cannot create partial page SLAB cache");
+						sizeof(struct partial_page),
+						0, SLAB_PANIC, NULL, NULL);
 	}
 #endif
 	return 0;
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
index 098ee605bf5..33e5a598672 100644
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@ -34,6 +34,7 @@ obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR)	+= uncached.o
 obj-$(CONFIG_AUDIT)		+= audit.o
 obj-$(CONFIG_PCI_MSI)		+= msi_ia64.o
 mca_recovery-y			+= mca_drv.o mca_drv_asm.o
+obj-$(CONFIG_IA64_MC_ERR_INJECT)+= err_inject.o
 
 obj-$(CONFIG_IA64_ESI)		+= esi.o
 ifneq ($(CONFIG_IA64_ESI),)
diff --git a/arch/ia64/kernel/acpi-processor.c b/arch/ia64/kernel/acpi-processor.c
index 4d4993a47e5..5a216c01992 100644
--- a/arch/ia64/kernel/acpi-processor.c
+++ b/arch/ia64/kernel/acpi-processor.c
@@ -44,7 +44,7 @@ static void init_intel_pdc(struct acpi_processor *pr)
 
 	buf[0] = ACPI_PDC_REVISION_ID;
 	buf[1] = 1;
-	buf[2] |= ACPI_PDC_EST_CAPABILITY_SMP;
+	buf[2] = ACPI_PDC_EST_CAPABILITY_SMP;
 
 	obj->type = ACPI_TYPE_BUFFER;
 	obj->buffer.length = 12;
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index 3549c94467b..103dd8edda7 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -67,7 +67,8 @@ EXPORT_SYMBOL(pm_power_off);
 unsigned int acpi_cpei_override;
 unsigned int acpi_cpei_phys_cpuid;
 
-const char *acpi_get_sysname(void)
+const char __init *
+acpi_get_sysname(void)
 {
 #ifdef CONFIG_IA64_GENERIC
 	unsigned long rsdp_phys;
@@ -791,7 +792,7 @@ static __init int setup_additional_cpus(char *s)
 early_param("additional_cpus", setup_additional_cpus);
 
 /*
- * cpu_possible_map should be static, it cannot change as cpu's
+ * cpu_possible_map should be static, it cannot change as CPUs
  * are onlined, or offlined. The reason is per-cpu data-structures
  * are allocated by some modules at init time, and dont expect to
  * do this dynamically on cpu arrival/departure.
diff --git a/arch/ia64/kernel/audit.c b/arch/ia64/kernel/audit.c
index 04682555a28..f3802ae89b1 100644
--- a/arch/ia64/kernel/audit.c
+++ b/arch/ia64/kernel/audit.c
@@ -23,6 +23,20 @@ static unsigned chattr_class[] = {
 ~0U
 };
 
+static unsigned signal_class[] = {
+#include <asm-generic/audit_signal.h>
+~0U
+};
+
+int audit_classify_arch(int arch)
+{
+#ifdef CONFIG_IA32_SUPPORT
+	if (arch == AUDIT_ARCH_I386)
+		return 1;
+#endif
+	return 0;
+}
+
 int audit_classify_syscall(int abi, unsigned syscall)
 {
 #ifdef CONFIG_IA32_SUPPORT
@@ -49,15 +63,18 @@ static int __init audit_classes_init(void)
 	extern __u32 ia32_write_class[];
 	extern __u32 ia32_read_class[];
 	extern __u32 ia32_chattr_class[];
+	extern __u32 ia32_signal_class[];
 	audit_register_class(AUDIT_CLASS_WRITE_32, ia32_write_class);
 	audit_register_class(AUDIT_CLASS_READ_32, ia32_read_class);
 	audit_register_class(AUDIT_CLASS_DIR_WRITE_32, ia32_dir_class);
 	audit_register_class(AUDIT_CLASS_CHATTR_32, ia32_chattr_class);
+	audit_register_class(AUDIT_CLASS_SIGNAL_32, ia32_signal_class);
 #endif
 	audit_register_class(AUDIT_CLASS_WRITE, write_class);
 	audit_register_class(AUDIT_CLASS_READ, read_class);
 	audit_register_class(AUDIT_CLASS_DIR_WRITE, dir_class);
 	audit_register_class(AUDIT_CLASS_CHATTR, chattr_class);
+	audit_register_class(AUDIT_CLASS_SIGNAL, signal_class);
 	return 0;
 }
 
diff --git a/arch/ia64/kernel/crash.c b/arch/ia64/kernel/crash.c
index 80a94e70782..1d64ef478dd 100644
--- a/arch/ia64/kernel/crash.c
+++ b/arch/ia64/kernel/crash.c
@@ -16,8 +16,8 @@
 #include <linux/elfcore.h>
 #include <linux/sysctl.h>
 #include <linux/init.h>
+#include <linux/kdebug.h>
 
-#include <asm/kdebug.h>
 #include <asm/mca.h>
 
 int kdump_status[NR_CPUS];
@@ -74,7 +74,7 @@ crash_save_this_cpu(void)
 	buf = (u64 *) per_cpu_ptr(crash_notes, cpu);
 	if (!buf)
 		return;
-	buf = append_elf_note(buf, "CORE", NT_PRSTATUS, prstatus,
+	buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS, prstatus,
 			sizeof(*prstatus));
 	final_note(buf);
 }
@@ -156,24 +156,30 @@ kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data)
 	if (!kdump_on_init)
 		return NOTIFY_DONE;
 
-	if (val != DIE_INIT_MONARCH_ENTER &&
-	    val != DIE_INIT_SLAVE_ENTER &&
+	if (val != DIE_INIT_MONARCH_LEAVE &&
+	    val != DIE_INIT_SLAVE_LEAVE &&
+	    val != DIE_INIT_MONARCH_PROCESS &&
 	    val != DIE_MCA_RENDZVOUS_LEAVE &&
 	    val != DIE_MCA_MONARCH_LEAVE)
 		return NOTIFY_DONE;
 
 	nd = (struct ia64_mca_notify_die *)args->err;
-	/* Reason code 1 means machine check rendezous*/
-	if ((val == DIE_INIT_MONARCH_ENTER || val == DIE_INIT_SLAVE_ENTER) &&
-		 nd->sos->rv_rc == 1)
+	/* Reason code 1 means machine check rendezvous*/
+	if ((val == DIE_INIT_MONARCH_LEAVE || val == DIE_INIT_SLAVE_LEAVE
+	    || val == DIE_INIT_MONARCH_PROCESS) && nd->sos->rv_rc == 1)
 		return NOTIFY_DONE;
 
 	switch (val) {
-		case DIE_INIT_MONARCH_ENTER:
+		case DIE_INIT_MONARCH_PROCESS:
+			atomic_set(&kdump_in_progress, 1);
+			*(nd->monarch_cpu) = -1;
+			break;
+		case DIE_INIT_MONARCH_LEAVE:
 			machine_kdump_on_init();
 			break;
-		case DIE_INIT_SLAVE_ENTER:
-			unw_init_running(kdump_cpu_freeze, NULL);
+		case DIE_INIT_SLAVE_LEAVE:
+			if (atomic_read(&kdump_in_progress))
+				unw_init_running(kdump_cpu_freeze, NULL);
 			break;
 		case DIE_MCA_RENDZVOUS_LEAVE:
 			if (atomic_read(&kdump_in_progress))
@@ -215,8 +221,10 @@ static ctl_table sys_table[] = {
 static int
 machine_crash_setup(void)
 {
+	/* be notified before default_monarch_init_process */
 	static struct notifier_block kdump_init_notifier_nb = {
 		.notifier_call = kdump_init_notifier,
+		.priority = 1,
 	};
 	int ret;
 	if((ret = register_die_notifier(&kdump_init_notifier_nb)) != 0)
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index f45f91d38ca..75ec3478d8a 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -445,11 +445,11 @@ efi_init (void)
 		panic("Woah! Can't find EFI system table.\n");
 	if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
 		panic("Woah! EFI system table signature incorrect\n");
-	if ((efi.systab->hdr.revision ^ EFI_SYSTEM_TABLE_REVISION) >> 16 != 0)
-		printk(KERN_WARNING "Warning: EFI system table major version mismatch: "
-		       "got %d.%02d, expected %d.%02d\n",
-		       efi.systab->hdr.revision >> 16, efi.systab->hdr.revision & 0xffff,
-		       EFI_SYSTEM_TABLE_REVISION >> 16, EFI_SYSTEM_TABLE_REVISION & 0xffff);
+	if ((efi.systab->hdr.revision >> 16) == 0)
+		printk(KERN_WARNING "Warning: EFI system table version "
+		       "%d.%02d, expected 1.00 or greater\n",
+		       efi.systab->hdr.revision >> 16,
+		       efi.systab->hdr.revision & 0xffff);
 
 	config_tables = __va(efi.systab->tables);
 
@@ -660,6 +660,29 @@ efi_memory_descriptor (unsigned long phys_addr)
 	return NULL;
 }
 
+static int
+efi_memmap_intersects (unsigned long phys_addr, unsigned long size)
+{
+	void *efi_map_start, *efi_map_end, *p;
+	efi_memory_desc_t *md;
+	u64 efi_desc_size;
+	unsigned long end;
+
+	efi_map_start = __va(ia64_boot_param->efi_memmap);
+	efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
+	efi_desc_size = ia64_boot_param->efi_memdesc_size;
+
+	end = phys_addr + size;
+
+	for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
+		md = p;
+
+		if (md->phys_addr < end && efi_md_end(md) > phys_addr)
+			return 1;
+	}
+	return 0;
+}
+
 u32
 efi_mem_type (unsigned long phys_addr)
 {
@@ -766,11 +789,28 @@ valid_phys_addr_range (unsigned long phys_addr, unsigned long size)
 int
 valid_mmap_phys_addr_range (unsigned long pfn, unsigned long size)
 {
+	unsigned long phys_addr = pfn << PAGE_SHIFT;
+	u64 attr;
+
+	attr = efi_mem_attribute(phys_addr, size);
+
 	/*
-	 * MMIO regions are often missing from the EFI memory map.
-	 * We must allow mmap of them for programs like X, so we
-	 * currently can't do any useful validation.
+	 * /dev/mem mmap uses normal user pages, so we don't need the entire
+	 * granule, but the entire region we're mapping must support the same
+	 * attribute.
 	 */
+	if (attr & EFI_MEMORY_WB || attr & EFI_MEMORY_UC)
+		return 1;
+
+	/*
+	 * Intel firmware doesn't tell us about all the MMIO regions, so
+	 * in general we have to allow mmap requests.  But if EFI *does*
+	 * tell us about anything inside this region, we should deny it.
+	 * The user can always map a smaller region to avoid the overlap.
+	 */
+	if (efi_memmap_intersects(phys_addr, size))
+		return 0;
+
 	return 1;
 }
 
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index e7873eeae44..95f51751523 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -767,7 +767,7 @@ ENTRY(ia64_leave_syscall)
 	ld8.fill r15=[r3]			// M0|1 restore r15
 	mov b6=r18				// I0   restore b6
 
-	addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0 // A
+	LOAD_PHYS_STACK_REG_SIZE(r17)
 	mov f9=f0					// F    clear f9
 (pKStk) br.cond.dpnt.many skip_rbs_switch		// B
 
@@ -775,7 +775,6 @@ ENTRY(ia64_leave_syscall)
 	shr.u r18=r19,16		// I0|1 get byte size of existing "dirty" partition
 	cover				// B    add current frame into dirty partition & set cr.ifs
 	;;
-(pUStk) ld4 r17=[r17]			// M0|1 r17 = cpu_data->phys_stacked_size_p8
 	mov r19=ar.bsp			// M2   get new backing store pointer
 	mov f10=f0			// F    clear f10
 
@@ -953,9 +952,7 @@ GLOBAL_ENTRY(ia64_leave_kernel)
 	shr.u r18=r19,16	// get byte size of existing "dirty" partition
 	;;
 	mov r16=ar.bsp		// get existing backing store pointer
-	addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0
-	;;
-	ld4 r17=[r17]		// r17 = cpu_data->phys_stacked_size_p8
+	LOAD_PHYS_STACK_REG_SIZE(r17)
 (pKStk)	br.cond.dpnt skip_rbs_switch
 
 	/*
@@ -1202,32 +1199,6 @@ ENTRY(notify_resume_user)
 	br.ret.sptk.many rp
 END(notify_resume_user)
 
-GLOBAL_ENTRY(sys_rt_sigsuspend)
-	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
-	alloc loc1=ar.pfs,8,2,3,0 // preserve all eight input regs in case of syscall restart!
-	mov r9=ar.unat
-	mov loc0=rp				// save return address
-	mov out0=in0				// mask
-	mov out1=in1				// sigsetsize
-	adds out2=8,sp				// out2=&sigscratch->ar_pfs
-	;;
-	.fframe 16
-	.spillsp ar.unat, 16
-	st8 [sp]=r9,-16				// allocate space for ar.unat and save it
-	st8 [out2]=loc1,-8			// save ar.pfs, out2=&sigscratch
-	.body
-	br.call.sptk.many rp=ia64_rt_sigsuspend
-.ret17:	.restore sp
-	adds sp=16,sp				// pop scratch stack space
-	;;
-	ld8 r9=[sp]				// load new unat from sw->caller_unat
-	mov rp=loc0
-	;;
-	mov ar.unat=r9
-	mov ar.pfs=loc1
-	br.ret.sptk.many rp
-END(sys_rt_sigsuspend)
-
 ENTRY(sys_rt_sigreturn)
 	PT_REGS_UNWIND_INFO(0)
 	/*
@@ -1601,8 +1572,8 @@ sys_call_table:
 	data8 sys_readlinkat
 	data8 sys_fchmodat
 	data8 sys_faccessat
-	data8 sys_ni_syscall			// reserved for pselect
-	data8 sys_ni_syscall			// 1295 reserved for ppoll
+	data8 sys_pselect6
+	data8 sys_ppoll
 	data8 sys_unshare
 	data8 sys_splice
 	data8 sys_set_robust_list
@@ -1612,5 +1583,10 @@ sys_call_table:
 	data8 sys_vmsplice
 	data8 sys_ni_syscall			// reserved for move_pages
 	data8 sys_getcpu
+	data8 sys_epoll_pwait			// 1305
+	data8 sys_utimensat
+	data8 sys_signalfd
+	data8 sys_timerfd
+	data8 sys_eventfd
 
 	.org sys_call_table + 8*NR_syscalls	// guard against failures to increase NR_syscalls
diff --git a/arch/ia64/kernel/err_inject.c b/arch/ia64/kernel/err_inject.c
new file mode 100644
index 00000000000..b642648cc2a
--- /dev/null
+++ b/arch/ia64/kernel/err_inject.c
@@ -0,0 +1,295 @@
+/*
+ * err_inject.c -
+ *	1.) Inject errors to a processor.
+ *	2.) Query error injection capabilities.
+ * This driver along with user space code can be acting as an error
+ * injection tool.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT.  See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Written by: Fenghua Yu <fenghua.yu@intel.com>, Intel Corporation
+ * Copyright (C) 2006, Intel Corp.  All rights reserved.
+ *
+ */
+#include <linux/sysdev.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/cpu.h>
+#include <linux/module.h>
+
+#define ERR_INJ_DEBUG
+
+#define ERR_DATA_BUFFER_SIZE 3 		// Three 8-byte;
+
+#define define_one_ro(name) 						\
+static SYSDEV_ATTR(name, 0444, show_##name, NULL)
+
+#define define_one_rw(name) 						\
+static SYSDEV_ATTR(name, 0644, show_##name, store_##name)
+
+static u64 call_start[NR_CPUS];
+static u64 phys_addr[NR_CPUS];
+static u64 err_type_info[NR_CPUS];
+static u64 err_struct_info[NR_CPUS];
+static struct {
+	u64 data1;
+	u64 data2;
+	u64 data3;
+} __attribute__((__aligned__(16))) err_data_buffer[NR_CPUS];
+static s64 status[NR_CPUS];
+static u64 capabilities[NR_CPUS];
+static u64 resources[NR_CPUS];
+
+#define show(name) 							\
+static ssize_t 								\
+show_##name(struct sys_device *dev, char *buf)				\
+{									\
+	u32 cpu=dev->id;						\
+	return sprintf(buf, "%lx\n", name[cpu]);			\
+}
+
+#define store(name)							\
+static ssize_t 								\
+store_##name(struct sys_device *dev, const char *buf, size_t size)	\
+{									\
+	unsigned int cpu=dev->id;					\
+	name[cpu] = simple_strtoull(buf, NULL, 16);			\
+	return size;							\
+}
+
+show(call_start)
+
+/* It's user's responsibility to call the PAL procedure on a specific
+ * processor. The cpu number in driver is only used for storing data.
+ */
+static ssize_t
+store_call_start(struct sys_device *dev, const char *buf, size_t size)
+{
+	unsigned int cpu=dev->id;
+	unsigned long call_start = simple_strtoull(buf, NULL, 16);
+
+#ifdef ERR_INJ_DEBUG
+	printk(KERN_DEBUG "pal_mc_err_inject for cpu%d:\n", cpu);
+	printk(KERN_DEBUG "err_type_info=%lx,\n", err_type_info[cpu]);
+	printk(KERN_DEBUG "err_struct_info=%lx,\n", err_struct_info[cpu]);
+	printk(KERN_DEBUG "err_data_buffer=%lx, %lx, %lx.\n",
+			  err_data_buffer[cpu].data1,
+			  err_data_buffer[cpu].data2,
+			  err_data_buffer[cpu].data3);
+#endif
+	switch (call_start) {
+	    case 0: /* Do nothing. */
+		break;
+	    case 1: /* Call pal_mc_error_inject in physical mode. */
+		status[cpu]=ia64_pal_mc_error_inject_phys(err_type_info[cpu],
+					err_struct_info[cpu],
+					ia64_tpa(&err_data_buffer[cpu]),
+					&capabilities[cpu],
+			 		&resources[cpu]);
+		break;
+	    case 2: /* Call pal_mc_error_inject in virtual mode. */
+		status[cpu]=ia64_pal_mc_error_inject_virt(err_type_info[cpu],
+					err_struct_info[cpu],
+					ia64_tpa(&err_data_buffer[cpu]),
+					&capabilities[cpu],
+			 		&resources[cpu]);
+		break;
+	    default:
+		status[cpu] = -EINVAL;
+		break;
+	}
+
+#ifdef ERR_INJ_DEBUG
+	printk(KERN_DEBUG "Returns: status=%d,\n", (int)status[cpu]);
+	printk(KERN_DEBUG "capapbilities=%lx,\n", capabilities[cpu]);
+	printk(KERN_DEBUG "resources=%lx\n", resources[cpu]);
+#endif
+	return size;
+}
+
+show(err_type_info)
+store(err_type_info)
+
+static ssize_t
+show_virtual_to_phys(struct sys_device *dev, char *buf)
+{
+	unsigned int cpu=dev->id;
+	return sprintf(buf, "%lx\n", phys_addr[cpu]);
+}
+
+static ssize_t
+store_virtual_to_phys(struct sys_device *dev, const char *buf, size_t size)
+{
+	unsigned int cpu=dev->id;
+	u64 virt_addr=simple_strtoull(buf, NULL, 16);
+	int ret;
+
+        ret = get_user_pages(current, current->mm, virt_addr,
+                        1, VM_READ, 0, NULL, NULL);
+	if (ret<=0) {
+#ifdef ERR_INJ_DEBUG
+		printk("Virtual address %lx is not existing.\n",virt_addr);
+#endif
+		return -EINVAL;
+	}
+
+	phys_addr[cpu] = ia64_tpa(virt_addr);
+	return size;
+}
+
+show(err_struct_info)
+store(err_struct_info)
+
+static ssize_t
+show_err_data_buffer(struct sys_device *dev, char *buf)
+{
+	unsigned int cpu=dev->id;
+
+	return sprintf(buf, "%lx, %lx, %lx\n",
+			err_data_buffer[cpu].data1,
+			err_data_buffer[cpu].data2,
+			err_data_buffer[cpu].data3);
+}
+
+static ssize_t
+store_err_data_buffer(struct sys_device *dev, const char *buf, size_t size)
+{
+	unsigned int cpu=dev->id;
+	int ret;
+
+#ifdef ERR_INJ_DEBUG
+	printk("write err_data_buffer=[%lx,%lx,%lx] on cpu%d\n",
+		 err_data_buffer[cpu].data1,
+		 err_data_buffer[cpu].data2,
+		 err_data_buffer[cpu].data3,
+		 cpu);
+#endif
+	ret=sscanf(buf, "%lx, %lx, %lx",
+			&err_data_buffer[cpu].data1,
+			&err_data_buffer[cpu].data2,
+			&err_data_buffer[cpu].data3);
+	if (ret!=ERR_DATA_BUFFER_SIZE)
+		return -EINVAL;
+
+	return size;
+}
+
+show(status)
+show(capabilities)
+show(resources)
+
+define_one_rw(call_start);
+define_one_rw(err_type_info);
+define_one_rw(err_struct_info);
+define_one_rw(err_data_buffer);
+define_one_rw(virtual_to_phys);
+define_one_ro(status);
+define_one_ro(capabilities);
+define_one_ro(resources);
+
+static struct attribute *default_attrs[] = {
+	&attr_call_start.attr,
+	&attr_virtual_to_phys.attr,
+	&attr_err_type_info.attr,
+	&attr_err_struct_info.attr,
+	&attr_err_data_buffer.attr,
+	&attr_status.attr,
+	&attr_capabilities.attr,
+	&attr_resources.attr,
+	NULL
+};
+
+static struct attribute_group err_inject_attr_group = {
+	.attrs = default_attrs,
+	.name = "err_inject"
+};
+/* Add/Remove err_inject interface for CPU device */
+static int __cpuinit err_inject_add_dev(struct sys_device * sys_dev)
+{
+	return sysfs_create_group(&sys_dev->kobj, &err_inject_attr_group);
+}
+
+static int __cpuinit err_inject_remove_dev(struct sys_device * sys_dev)
+{
+	sysfs_remove_group(&sys_dev->kobj, &err_inject_attr_group);
+	return 0;
+}
+static int __cpuinit err_inject_cpu_callback(struct notifier_block *nfb,
+		unsigned long action, void *hcpu)
+{
+	unsigned int cpu = (unsigned long)hcpu;
+	struct sys_device *sys_dev;
+
+	sys_dev = get_cpu_sysdev(cpu);
+	switch (action) {
+	case CPU_ONLINE:
+	case CPU_ONLINE_FROZEN:
+		err_inject_add_dev(sys_dev);
+		break;
+	case CPU_DEAD:
+	case CPU_DEAD_FROZEN:
+		err_inject_remove_dev(sys_dev);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block __cpuinitdata err_inject_cpu_notifier =
+{
+	.notifier_call = err_inject_cpu_callback,
+};
+
+static int __init
+err_inject_init(void)
+{
+	int i;
+
+#ifdef ERR_INJ_DEBUG
+	printk(KERN_INFO "Enter error injection driver.\n");
+#endif
+	for_each_online_cpu(i) {
+		err_inject_cpu_callback(&err_inject_cpu_notifier, CPU_ONLINE,
+				(void *)(long)i);
+	}
+
+	register_hotcpu_notifier(&err_inject_cpu_notifier);
+
+	return 0;
+}
+
+static void __exit
+err_inject_exit(void)
+{
+	int i;
+	struct sys_device *sys_dev;
+
+#ifdef ERR_INJ_DEBUG
+	printk(KERN_INFO "Exit error injection driver.\n");
+#endif
+	for_each_online_cpu(i) {
+		sys_dev = get_cpu_sysdev(i);
+		sysfs_remove_group(&sys_dev->kobj, &err_inject_attr_group);
+	}
+	unregister_hotcpu_notifier(&err_inject_cpu_notifier);
+}
+
+module_init(err_inject_init);
+module_exit(err_inject_exit);
+
+MODULE_AUTHOR("Fenghua Yu <fenghua.yu@intel.com>");
+MODULE_DESCRIPTION("MC error injection kernel sysfs interface");
+MODULE_LICENSE("GPL");
diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c
index dcfbf3e7a9e..37f46527d23 100644
--- a/arch/ia64/kernel/iosapic.c
+++ b/arch/ia64/kernel/iosapic.c
@@ -87,7 +87,6 @@
 #include <linux/list.h>
 #include <linux/pci.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/string.h>
 #include <linux/bootmem.h>
 
@@ -1013,7 +1012,7 @@ iosapic_register_platform_intr (u32 int_type, unsigned int gsi,
 /*
  * ACPI calls this when it finds an entry for a legacy ISA IRQ override.
  */
-void __init
+void __devinit
 iosapic_override_isa_irq (unsigned int isa_irq, unsigned int gsi,
 			  unsigned long polarity,
 			  unsigned long trigger)
diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c
index ce49c85c928..407b4587048 100644
--- a/arch/ia64/kernel/irq.c
+++ b/arch/ia64/kernel/irq.c
@@ -4,7 +4,7 @@
  *	Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
  *
  * This file contains the code used by various IRQ handling routines:
- * asking for different IRQ's should be done through these routines
+ * asking for different IRQs should be done through these routines
  * instead of just grabbing them. Thus setups with different IRQ numbers
  * shouldn't result in any weird surprises, and installing new handlers
  * should be easier.
@@ -12,7 +12,7 @@
  * Copyright (C) Ashok Raj<ashok.raj@intel.com>, Intel Corporation 2004
  *
  * 4/14/2004: Added code to handle cpu migration and do safe irq
- *			migration without lossing interrupts for iosapic
+ *			migration without losing interrupts for iosapic
  *			architecture.
  */
 
@@ -104,6 +104,17 @@ void set_irq_affinity_info (unsigned int irq, int hwid, int redir)
 		irq_redir[irq] = (char) (redir & 0xff);
 	}
 }
+
+bool is_affinity_mask_valid(cpumask_t cpumask)
+{
+	if (ia64_platform_is("sn2")) {
+		/* Only allow one CPU to be specified in the smp_affinity mask */
+		if (cpus_weight(cpumask) != 1)
+			return false;
+	}
+	return true;
+}
+
 #endif /* CONFIG_SMP */
 
 #ifdef CONFIG_HOTPLUG_CPU
@@ -179,7 +190,7 @@ void fixup_irqs(void)
 	}
 
 	/*
-	 * Phase 1: Locate irq's bound to this cpu and
+	 * Phase 1: Locate IRQs bound to this cpu and
 	 * relocate them for cpu removal.
 	 */
 	migrate_irqs();
diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c
index 456f57b087c..bc47049f060 100644
--- a/arch/ia64/kernel/irq_ia64.c
+++ b/arch/ia64/kernel/irq_ia64.c
@@ -27,7 +27,6 @@
 #include <linux/random.h>	/* for rand_initialize_irq() */
 #include <linux/signal.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/threads.h>
 #include <linux/bitops.h>
 #include <linux/irq.h>
@@ -39,6 +38,7 @@
 #include <asm/machvec.h>
 #include <asm/pgtable.h>
 #include <asm/system.h>
+#include <asm/tlbflush.h>
 
 #ifdef CONFIG_PERFMON
 # include <asm/perfmon.h>
@@ -127,8 +127,10 @@ void destroy_irq(unsigned int irq)
 
 #ifdef CONFIG_SMP
 #	define IS_RESCHEDULE(vec)	(vec == IA64_IPI_RESCHEDULE)
+#	define IS_LOCAL_TLB_FLUSH(vec)	(vec == IA64_IPI_LOCAL_TLB_FLUSH)
 #else
 #	define IS_RESCHEDULE(vec)	(0)
+#	define IS_LOCAL_TLB_FLUSH(vec)	(0)
 #endif
 /*
  * That's where the IVT branches when we get an external
@@ -180,8 +182,11 @@ ia64_handle_irq (ia64_vector vector, struct pt_regs *regs)
 	saved_tpr = ia64_getreg(_IA64_REG_CR_TPR);
 	ia64_srlz_d();
 	while (vector != IA64_SPURIOUS_INT_VECTOR) {
-		if (unlikely(IS_RESCHEDULE(vector)))
-			 kstat_this_cpu.irqs[vector]++;
+		if (unlikely(IS_LOCAL_TLB_FLUSH(vector))) {
+			smp_local_flush_tlb();
+			kstat_this_cpu.irqs[vector]++;
+		} else if (unlikely(IS_RESCHEDULE(vector)))
+			kstat_this_cpu.irqs[vector]++;
 		else {
 			ia64_setreg(_IA64_REG_CR_TPR, vector);
 			ia64_srlz_d();
@@ -227,8 +232,11 @@ void ia64_process_pending_intr(void)
 	  * Perform normal interrupt style processing
 	  */
 	while (vector != IA64_SPURIOUS_INT_VECTOR) {
-		if (unlikely(IS_RESCHEDULE(vector)))
-			 kstat_this_cpu.irqs[vector]++;
+		if (unlikely(IS_LOCAL_TLB_FLUSH(vector))) {
+			smp_local_flush_tlb();
+			kstat_this_cpu.irqs[vector]++;
+		} else if (unlikely(IS_RESCHEDULE(vector)))
+			kstat_this_cpu.irqs[vector]++;
 		else {
 			struct pt_regs *old_regs = set_irq_regs(NULL);
 
@@ -260,12 +268,12 @@ void ia64_process_pending_intr(void)
 
 
 #ifdef CONFIG_SMP
-extern irqreturn_t handle_IPI (int irq, void *dev_id);
 
 static irqreturn_t dummy_handler (int irq, void *dev_id)
 {
 	BUG();
 }
+extern irqreturn_t handle_IPI (int irq, void *dev_id);
 
 static struct irqaction ipi_irqaction = {
 	.handler =	handle_IPI,
@@ -278,6 +286,13 @@ static struct irqaction resched_irqaction = {
 	.flags =	IRQF_DISABLED,
 	.name =		"resched"
 };
+
+static struct irqaction tlb_irqaction = {
+	.handler =	dummy_handler,
+	.flags =	IRQF_DISABLED,
+	.name =		"tlb_flush"
+};
+
 #endif
 
 void
@@ -303,6 +318,7 @@ init_IRQ (void)
 #ifdef CONFIG_SMP
 	register_percpu_irq(IA64_IPI_VECTOR, &ipi_irqaction);
 	register_percpu_irq(IA64_IPI_RESCHEDULE, &resched_irqaction);
+	register_percpu_irq(IA64_IPI_LOCAL_TLB_FLUSH, &tlb_irqaction);
 #endif
 #ifdef CONFIG_PERFMON
 	pfm_init_percpu();
diff --git a/arch/ia64/kernel/irq_lsapic.c b/arch/ia64/kernel/irq_lsapic.c
index c2f07beb175..e56a7a36aca 100644
--- a/arch/ia64/kernel/irq_lsapic.c
+++ b/arch/ia64/kernel/irq_lsapic.c
@@ -23,7 +23,7 @@ lsapic_noop_startup (unsigned int irq)
 static void
 lsapic_noop (unsigned int irq)
 {
-	/* nuthing to do... */
+	/* nothing to do... */
 }
 
 static int lsapic_retrigger(unsigned int irq)
diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S
index 6b7fcbd3f6f..34f44d8be00 100644
--- a/arch/ia64/kernel/ivt.S
+++ b/arch/ia64/kernel/ivt.S
@@ -374,6 +374,7 @@ ENTRY(alt_dtlb_miss)
 	movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
 	mov r21=cr.ipsr
 	mov r31=pr
+	mov r24=PERCPU_ADDR
 	;;
 #ifdef CONFIG_DISABLE_VHPT
 	shr.u r22=r16,61			// get the region number into r21
@@ -386,22 +387,30 @@ ENTRY(alt_dtlb_miss)
 (p8)	mov r29=b0				// save b0
 (p8)	br.cond.dptk dtlb_fault
 #endif
+	cmp.ge p10,p11=r16,r24			// access to per_cpu_data?
+	tbit.z p12,p0=r16,61			// access to region 6?
+	mov r25=PERCPU_PAGE_SHIFT << 2
+	mov r26=PERCPU_PAGE_SIZE
+	nop.m 0
+	nop.b 0
+	;;
+(p10)	mov r19=IA64_KR(PER_CPU_DATA)
+(p11)	and r19=r19,r16				// clear non-ppn fields
 	extr.u r23=r21,IA64_PSR_CPL0_BIT,2	// extract psr.cpl
 	and r22=IA64_ISR_CODE_MASK,r20		// get the isr.code field
 	tbit.nz p6,p7=r20,IA64_ISR_SP_BIT	// is speculation bit on?
-	shr.u r18=r16,57			// move address bit 61 to bit 4
-	and r19=r19,r16				// clear ed, reserved bits, and PTE control bits
 	tbit.nz p9,p0=r20,IA64_ISR_NA_BIT	// is non-access bit on?
 	;;
-	andcm r18=0x10,r18	// bit 4=~address-bit(61)
+(p10)	sub r19=r19,r26
+(p10)	mov cr.itir=r25
 	cmp.ne p8,p0=r0,r23
 (p9)	cmp.eq.or.andcm p6,p7=IA64_ISR_CODE_LFETCH,r22	// check isr.code field
+(p12)	dep r17=-1,r17,4,1			// set ma=UC for region 6 addr
 (p8)	br.cond.spnt page_fault
 
 	dep r21=-1,r21,IA64_PSR_ED_BIT,1
-	or r19=r19,r17		// insert PTE control bits into r19
 	;;
-	or r19=r19,r18		// set bit 4 (uncached) if the access was to region 6
+	or r19=r19,r17		// insert PTE control bits into r19
 (p6)	mov cr.ipsr=r21
 	;;
 (p7)	itc.d r19		// insert the TLB entry
diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c
index 6cb56dd4056..5bc46f15134 100644
--- a/arch/ia64/kernel/kprobes.c
+++ b/arch/ia64/kernel/kprobes.c
@@ -29,9 +29,9 @@
 #include <linux/slab.h>
 #include <linux/preempt.h>
 #include <linux/moduleloader.h>
+#include <linux/kdebug.h>
 
 #include <asm/pgtable.h>
-#include <asm/kdebug.h>
 #include <asm/sections.h>
 #include <asm/uaccess.h>
 
@@ -151,12 +151,12 @@ static uint __kprobes is_cmp_ctype_unc_inst(uint template, uint slot,
 
 	cmp_inst.l = kprobe_inst;
 	if ((cmp_inst.f.x2 == 0) || (cmp_inst.f.x2 == 1)) {
-		/* Integere compare - Register Register (A6 type)*/
+		/* Integer compare - Register Register (A6 type)*/
 		if ((cmp_inst.f.tb == 0) && (cmp_inst.f.ta == 0)
 				&&(cmp_inst.f.c == 1))
 			ctype_unc = 1;
 	} else if ((cmp_inst.f.x2 == 2)||(cmp_inst.f.x2 == 3)) {
-		/* Integere compare - Immediate Register (A8 type)*/
+		/* Integer compare - Immediate Register (A8 type)*/
 		if ((cmp_inst.f.ta == 0) &&(cmp_inst.f.c == 1))
 			ctype_unc = 1;
 	}
@@ -370,14 +370,18 @@ static int __kprobes valid_kprobe_addr(int template, int slot,
 
 static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
 {
-	kcb->prev_kprobe.kp = kprobe_running();
-	kcb->prev_kprobe.status = kcb->kprobe_status;
+	unsigned int i;
+	i = atomic_add_return(1, &kcb->prev_kprobe_index);
+	kcb->prev_kprobe[i-1].kp = kprobe_running();
+	kcb->prev_kprobe[i-1].status = kcb->kprobe_status;
 }
 
 static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
 {
-	__get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
-	kcb->kprobe_status = kcb->prev_kprobe.status;
+	unsigned int i;
+	i = atomic_sub_return(1, &kcb->prev_kprobe_index);
+	__get_cpu_var(current_kprobe) = kcb->prev_kprobe[i].kp;
+	kcb->kprobe_status = kcb->prev_kprobe[i].status;
 }
 
 static void __kprobes set_current_kprobe(struct kprobe *p,
@@ -444,7 +448,8 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 			break;
 	}
 
-	BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address));
+	kretprobe_assert(ri, orig_ret_address, trampoline_address);
+
 	regs->cr_iip = orig_ret_address;
 
 	reset_current_kprobe();
@@ -464,23 +469,13 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 }
 
 /* Called with kretprobe_lock held */
-void __kprobes arch_prepare_kretprobe(struct kretprobe *rp,
+void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
 				      struct pt_regs *regs)
 {
-	struct kretprobe_instance *ri;
-
-	if ((ri = get_free_rp_inst(rp)) != NULL) {
-		ri->rp = rp;
-		ri->task = current;
-		ri->ret_addr = (kprobe_opcode_t *)regs->b0;
-
-		/* Replace the return addr with trampoline addr */
-		regs->b0 = ((struct fnptr *)kretprobe_trampoline)->ip;
+	ri->ret_addr = (kprobe_opcode_t *)regs->b0;
 
-		add_rp_inst(ri);
-	} else {
-		rp->nmissed++;
-	}
+	/* Replace the return addr with trampoline addr */
+	regs->b0 = ((struct fnptr *)kretprobe_trampoline)->ip;
 }
 
 int __kprobes arch_prepare_kprobe(struct kprobe *p)
@@ -825,7 +820,7 @@ out:
 	return 1;
 }
 
-static int __kprobes kprobes_fault_handler(struct pt_regs *regs, int trapnr)
+int __kprobes kprobes_fault_handler(struct pt_regs *regs, int trapnr)
 {
 	struct kprobe *cur = kprobe_running();
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
@@ -909,13 +904,6 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
 			if (post_kprobes_handler(args->regs))
 				ret = NOTIFY_STOP;
 		break;
-	case DIE_PAGE_FAULT:
-		/* kprobe_running() needs smp_processor_id() */
-		preempt_disable();
-		if (kprobe_running() &&
-			kprobes_fault_handler(args->regs, args->trapnr))
-			ret = NOTIFY_STOP;
-		preempt_enable();
 	default:
 		break;
 	}
@@ -959,7 +947,7 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
 	/*
 	 * Callee owns the argument space and could overwrite it, eg
 	 * tail call optimization. So to be absolutely safe
-	 * we save the argument space before transfering the control
+	 * we save the argument space before transferring the control
 	 * to instrumented jprobe function which runs in
 	 * the process context
 	 */
@@ -1021,3 +1009,12 @@ int __init arch_init_kprobes(void)
 		(kprobe_opcode_t *)((struct fnptr *)kretprobe_trampoline)->ip;
 	return register_kprobe(&trampoline_p);
 }
+
+int __kprobes arch_trampoline_kprobe(struct kprobe *p)
+{
+	if (p->addr ==
+		(kprobe_opcode_t *)((struct fnptr *)kretprobe_trampoline)->ip)
+		return 1;
+
+	return 0;
+}
diff --git a/arch/ia64/kernel/machvec.c b/arch/ia64/kernel/machvec.c
index 9620822270a..13df337508e 100644
--- a/arch/ia64/kernel/machvec.c
+++ b/arch/ia64/kernel/machvec.c
@@ -35,7 +35,7 @@ lookup_machvec (const char *name)
 	return 0;
 }
 
-void
+void __init
 machvec_init (const char *name)
 {
 	struct ia64_machine_vector *mv;
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 491687f84fb..1ead5ea6c5c 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -63,7 +63,6 @@
 #include <linux/sched.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
-#include <linux/smp_lock.h>
 #include <linux/bootmem.h>
 #include <linux/acpi.h>
 #include <linux/timer.h>
@@ -72,9 +71,9 @@
 #include <linux/smp.h>
 #include <linux/workqueue.h>
 #include <linux/cpumask.h>
+#include <linux/kdebug.h>
 
 #include <asm/delay.h>
-#include <asm/kdebug.h>
 #include <asm/machvec.h>
 #include <asm/meminit.h>
 #include <asm/page.h>
@@ -119,7 +118,9 @@ static ia64_mc_info_t		ia64_mc_info;
 #define CPE_HISTORY_LENGTH    5
 #define CMC_HISTORY_LENGTH    5
 
+#ifdef CONFIG_ACPI
 static struct timer_list cpe_poll_timer;
+#endif
 static struct timer_list cmc_poll_timer;
 /*
  * This variable tells whether we are currently in polling mode.
@@ -272,7 +273,6 @@ static void ia64_mlogbuf_finish(int wait)
 
 	mlogbuf_finished = 1;
 }
-EXPORT_SYMBOL(ia64_mlogbuf_finish);
 
 /*
  * Print buffered messages from INIT context.
@@ -1476,6 +1476,10 @@ default_monarch_init_process(struct notifier_block *self, unsigned long val, voi
 	struct task_struct *g, *t;
 	if (val != DIE_INIT_MONARCH_PROCESS)
 		return NOTIFY_DONE;
+#ifdef CONFIG_KEXEC
+	if (atomic_read(&kdump_in_progress))
+		return NOTIFY_DONE;
+#endif
 
 	/*
 	 * FIXME: mlogbuf will brim over with INIT stack dumps.
@@ -1690,7 +1694,7 @@ format_mca_init_stack(void *mca_data, unsigned long offset,
 	ti->preempt_count = 1;
 	ti->task = p;
 	ti->cpu = cpu;
-	p->thread_info = ti;
+	p->stack = ti;
 	p->state = TASK_UNINTERRUPTIBLE;
 	cpu_set(cpu, p->cpus_allowed);
 	INIT_LIST_HEAD(&p->tasks);
diff --git a/arch/ia64/kernel/mca_asm.S b/arch/ia64/kernel/mca_asm.S
index c6b607c00de..8c9c26aa6ae 100644
--- a/arch/ia64/kernel/mca_asm.S
+++ b/arch/ia64/kernel/mca_asm.S
@@ -101,14 +101,6 @@ ia64_do_tlb_purge:
 	;;
 	srlz.d
 	;;
-	// 2. Purge DTR for PERCPU data.
-	movl r16=PERCPU_ADDR
-	mov r18=PERCPU_PAGE_SHIFT<<2
-	;;
-	ptr.d r16,r18
-	;;
-	srlz.d
-	;;
 	// 3. Purge ITR for PAL code.
 	GET_THIS_PADDR(r2, ia64_mca_pal_base)
 	;;
@@ -196,22 +188,6 @@ ia64_reload_tr:
 	srlz.i
 	srlz.d
 	;;
-	// 2. Reload DTR register for PERCPU data.
-	GET_THIS_PADDR(r2, ia64_mca_per_cpu_pte)
-	;;
-	movl r16=PERCPU_ADDR		// vaddr
-	movl r18=PERCPU_PAGE_SHIFT<<2
-	;;
-	mov cr.itir=r18
-	mov cr.ifa=r16
-	;;
-	ld8 r18=[r2]			// load per-CPU PTE
-	mov r16=IA64_TR_PERCPU_DATA;
-	;;
-	itr.d dtr[r16]=r18
-	;;
-	srlz.d
-	;;
 	// 3. Reload ITR for PAL code.
 	GET_THIS_PADDR(r2, ia64_mca_pal_pte)
 	;;
diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c
index 832cf1e647e..aba813c2c15 100644
--- a/arch/ia64/kernel/mca_drv.c
+++ b/arch/ia64/kernel/mca_drv.c
@@ -14,7 +14,6 @@
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/kallsyms.h>
-#include <linux/smp_lock.h>
 #include <linux/bootmem.h>
 #include <linux/acpi.h>
 #include <linux/timer.h>
@@ -439,7 +438,7 @@ is_mca_global(peidx_table_t *peidx, pal_bus_check_info_t *pbci,
  * @peidx:	pointer of index of processor error section
  *
  * Return value:
- *	target address on Success / 0 on Failue
+ *	target address on Success / 0 on Failure
  */
 static u64
 get_target_identifier(peidx_table_t *peidx)
@@ -702,7 +701,7 @@ recover_from_processor_error(int platform, slidx_table_t *slidx,
 		return fatal_mca("External bus check fatal status");
 
 	/*
-	 * This is a local MCA and estimated as a recoverble error.
+	 * This is a local MCA and estimated as a recoverable error.
 	 */
 	if (platform)
 		return recover_from_platform_error(slidx, peidx, pbci, sos);
diff --git a/arch/ia64/kernel/module.c b/arch/ia64/kernel/module.c
index 158e3c51bb7..196287928ba 100644
--- a/arch/ia64/kernel/module.c
+++ b/arch/ia64/kernel/module.c
@@ -861,7 +861,7 @@ apply_relocate (Elf64_Shdr *sechdrs, const char *strtab, unsigned int symindex,
 /*
  * Modules contain a single unwind table which covers both the core and the init text
  * sections but since the two are not contiguous, we need to split this table up such that
- * we can register (and unregister) each "segment" seperately.  Fortunately, this sounds
+ * we can register (and unregister) each "segment" separately.  Fortunately, this sounds
  * more complicated than it really is.
  */
 static void
diff --git a/arch/ia64/kernel/palinfo.c b/arch/ia64/kernel/palinfo.c
index a71df9ae039..85829e27785 100644
--- a/arch/ia64/kernel/palinfo.c
+++ b/arch/ia64/kernel/palinfo.c
@@ -975,9 +975,11 @@ static int palinfo_cpu_callback(struct notifier_block *nfb,
 
 	switch (action) {
 	case CPU_ONLINE:
+	case CPU_ONLINE_FROZEN:
 		create_palinfo_proc_entries(hotcpu);
 		break;
 	case CPU_DEAD:
+	case CPU_DEAD_FROZEN:
 		remove_palinfo_proc_entries(hotcpu);
 		break;
 	}
diff --git a/arch/ia64/kernel/patch.c b/arch/ia64/kernel/patch.c
index bc11bb096f5..e796e29f8e1 100644
--- a/arch/ia64/kernel/patch.c
+++ b/arch/ia64/kernel/patch.c
@@ -195,3 +195,23 @@ ia64_patch_gate (void)
 	ia64_patch_vtop(START(vtop), END(vtop));
 	ia64_patch_mckinley_e9(START(mckinley_e9), END(mckinley_e9));
 }
+
+void ia64_patch_phys_stack_reg(unsigned long val)
+{
+	s32 * offp = (s32 *) __start___phys_stack_reg_patchlist;
+	s32 * end = (s32 *) __end___phys_stack_reg_patchlist;
+	u64 ip, mask, imm;
+
+	/* see instruction format A4: adds r1 = imm13, r3 */
+	mask = (0x3fUL << 27) | (0x7f << 13);
+	imm = (((val >> 7) & 0x3f) << 27) | (val & 0x7f) << 13;
+
+	while (offp < end) {
+		ip = (u64) offp + *offp;
+		ia64_patch(ip, mask, imm);
+		ia64_fc(ip);
+		++offp;
+	}
+	ia64_sync_i();
+	ia64_srlz_i();
+}
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index abc7ad03588..b7133cabdbe 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -23,7 +23,6 @@
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/interrupt.h>
-#include <linux/smp_lock.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/init.h>
@@ -1319,7 +1318,7 @@ pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned int cpu)
 {
 	unsigned long flags;
 	/*
-	 * validy checks on cpu_mask have been done upstream
+	 * validity checks on cpu_mask have been done upstream
 	 */
 	LOCK_PFS(flags);
 
@@ -1385,7 +1384,7 @@ pfm_unreserve_session(pfm_context_t *ctx, int is_syswide, unsigned int cpu)
 {
 	unsigned long flags;
 	/*
-	 * validy checks on cpu_mask have been done upstream
+	 * validity checks on cpu_mask have been done upstream
 	 */
 	LOCK_PFS(flags);
 
@@ -1836,7 +1835,7 @@ pfm_flush(struct file *filp, fl_owner_t id)
 	/*
 	 * remove our file from the async queue, if we use this mode.
 	 * This can be done without the context being protected. We come
-	 * here when the context has become unreacheable by other tasks.
+	 * here when the context has become unreachable by other tasks.
 	 *
 	 * We may still have active monitoring at this point and we may
 	 * end up in pfm_overflow_handler(). However, fasync_helper()
@@ -2133,7 +2132,7 @@ doit:
 	filp->private_data = NULL;
 
 	/*
-	 * if we free on the spot, the context is now completely unreacheable
+	 * if we free on the spot, the context is now completely unreachable
 	 * from the callers side. The monitored task side is also cut, so we
 	 * can freely cut.
 	 *
@@ -2563,7 +2562,7 @@ pfm_reset_pmu_state(pfm_context_t *ctx)
 	ctx->ctx_all_pmcs[0] = pmu_conf->impl_pmcs[0] & ~0x1;
 
 	/*
-	 * bitmask of all PMDs that are accesible to this context
+	 * bitmask of all PMDs that are accessible to this context
 	 */
 	ctx->ctx_all_pmds[0] = pmu_conf->impl_pmds[0];
 
@@ -3396,7 +3395,7 @@ pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 		if (unlikely(!PMD_IS_IMPL(cnum))) goto error;
 		/*
 		 * we can only read the register that we use. That includes
-		 * the one we explicitely initialize AND the one we want included
+		 * the one we explicitly initialize AND the one we want included
 		 * in the sampling buffer (smpl_regs).
 		 *
 		 * Having this restriction allows optimization in the ctxsw routine
@@ -3716,7 +3715,7 @@ pfm_restart(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 	 * if non-blocking, then we ensure that the task will go into
 	 * pfm_handle_work() before returning to user mode.
 	 *
-	 * We cannot explicitely reset another task, it MUST always
+	 * We cannot explicitly reset another task, it MUST always
 	 * be done by the task itself. This works for system wide because
 	 * the tool that is controlling the session is logically doing 
 	 * "self-monitoring".
@@ -4645,7 +4644,7 @@ pfm_exit_thread(struct task_struct *task)
 	switch(state) {
 		case PFM_CTX_UNLOADED:
 			/*
-	 		 * only comes to thios function if pfm_context is not NULL, i.e., cannot
+	 		 * only comes to this function if pfm_context is not NULL, i.e., cannot
 			 * be in unloaded state
 	 		 */
 			printk(KERN_ERR "perfmon: pfm_exit_thread [%d] ctx unloaded\n", task->pid);
@@ -5248,7 +5247,7 @@ pfm_end_notify_user(pfm_context_t *ctx)
 
 /*
  * main overflow processing routine.
- * it can be called from the interrupt path or explicitely during the context switch code
+ * it can be called from the interrupt path or explicitly during the context switch code
  */
 static void
 pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, struct pt_regs *regs)
diff --git a/arch/ia64/kernel/perfmon_mckinley.h b/arch/ia64/kernel/perfmon_mckinley.h
index 9becccda289..c4bec7a9d18 100644
--- a/arch/ia64/kernel/perfmon_mckinley.h
+++ b/arch/ia64/kernel/perfmon_mckinley.h
@@ -181,7 +181,7 @@ static pmu_config_t pmu_conf_mck={
 	.pmc_desc      = pfm_mck_pmc_desc,
 	.num_ibrs       = 8,
 	.num_dbrs       = 8,
-	.use_rr_dbregs = 1 /* debug register are use for range retrictions */
+	.use_rr_dbregs = 1 /* debug register are use for range restrictions */
 };
 
 
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index ae96d417699..af73b8dfde2 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -20,20 +20,19 @@
 #include <linux/personality.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
-#include <linux/smp_lock.h>
 #include <linux/stddef.h>
 #include <linux/thread_info.h>
 #include <linux/unistd.h>
 #include <linux/efi.h>
 #include <linux/interrupt.h>
 #include <linux/delay.h>
+#include <linux/kdebug.h>
 
 #include <asm/cpu.h>
 #include <asm/delay.h>
 #include <asm/elf.h>
 #include <asm/ia32.h>
 #include <asm/irq.h>
-#include <asm/kdebug.h>
 #include <asm/kexec.h>
 #include <asm/pgalloc.h>
 #include <asm/processor.h>
@@ -156,7 +155,7 @@ show_regs (struct pt_regs *regs)
 }
 
 void
-do_notify_resume_user (sigset_t *oldset, struct sigscratch *scr, long in_syscall)
+do_notify_resume_user (sigset_t *unused, struct sigscratch *scr, long in_syscall)
 {
 	if (fsys_mode(current, &scr->pt)) {
 		/* defer signal-handling etc. until we return to privilege-level 0.  */
@@ -171,8 +170,8 @@ do_notify_resume_user (sigset_t *oldset, struct sigscratch *scr, long in_syscall
 #endif
 
 	/* deal with pending signal delivery */
-	if (test_thread_flag(TIF_SIGPENDING))
-		ia64_do_signal(oldset, scr, in_syscall);
+	if (test_thread_flag(TIF_SIGPENDING)||test_thread_flag(TIF_RESTORE_SIGMASK))
+		ia64_do_signal(scr, in_syscall);
 }
 
 static int pal_halt        = 1;
@@ -237,6 +236,7 @@ void cpu_idle_wait(void)
 {
 	unsigned int cpu, this_cpu = get_cpu();
 	cpumask_t map;
+	cpumask_t tmp = current->cpus_allowed;
 
 	set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
 	put_cpu();
@@ -258,6 +258,7 @@ void cpu_idle_wait(void)
 		}
 		cpus_and(map, map, cpu_online_map);
 	} while (!cpus_empty(map));
+	set_cpus_allowed(current, tmp);
 }
 EXPORT_SYMBOL_GPL(cpu_idle_wait);
 
@@ -762,6 +763,9 @@ get_wchan (struct task_struct *p)
 	unsigned long ip;
 	int count = 0;
 
+	if (!p || p == current || p->state == TASK_RUNNING)
+		return 0;
+
 	/*
 	 * Note: p may not be a blocked task (it could be current or
 	 * another process running on some other CPU.  Rather than
@@ -772,6 +776,8 @@ get_wchan (struct task_struct *p)
 	 */
 	unw_init_from_blocked_task(&info, p);
 	do {
+		if (p->state == TASK_RUNNING)
+			return 0;
 		if (unw_unwind(&info) < 0)
 			return 0;
 		unw_get_ip(&info, &ip);
diff --git a/arch/ia64/kernel/relocate_kernel.S b/arch/ia64/kernel/relocate_kernel.S
index ae473e3f2a0..903babd22d6 100644
--- a/arch/ia64/kernel/relocate_kernel.S
+++ b/arch/ia64/kernel/relocate_kernel.S
@@ -94,7 +94,7 @@ GLOBAL_ENTRY(relocate_new_kernel)
 4:
         srlz.i
         ;;
-	//purge TR entry for kernel text and data
+	// purge TR entry for kernel text and data
         movl r16=KERNEL_START
         mov r18=KERNEL_TR_PAGE_SHIFT<<2
         ;;
@@ -104,15 +104,6 @@ GLOBAL_ENTRY(relocate_new_kernel)
         srlz.i
         ;;
 
-	// purge TR entry for percpu data
-        movl r16=PERCPU_ADDR
-        mov r18=PERCPU_PAGE_SHIFT<<2
-        ;;
-        ptr.d r16,r18
-        ;;
-        srlz.d
-	;;
-
         // purge TR entry for pal code
         mov r16=in3
         mov r18=IA64_GRANULE_SHIFT<<2
diff --git a/arch/ia64/kernel/sal.c b/arch/ia64/kernel/sal.c
index 37c876f95db..27c2ef445a5 100644
--- a/arch/ia64/kernel/sal.c
+++ b/arch/ia64/kernel/sal.c
@@ -134,7 +134,7 @@ set_smp_redirect (int flag)
 	 * interrupt redirection. The reason is this would require that
 	 * All interrupts be stopped and hard bind the irq to a cpu.
 	 * Later when the interrupt is fired we need to set the redir hint
-	 * on again in the vector. This is combersome for something that the
+	 * on again in the vector. This is cumbersome for something that the
 	 * user mode irq balancer will solve anyways.
 	 */
 	no_int_routing=1;
diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c
index af9f8754d84..25cd75f50ab 100644
--- a/arch/ia64/kernel/salinfo.c
+++ b/arch/ia64/kernel/salinfo.c
@@ -42,7 +42,6 @@
 #include <linux/proc_fs.h>
 #include <linux/module.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/timer.h>
 #include <linux/vmalloc.h>
 
@@ -163,7 +162,7 @@ static DEFINE_SPINLOCK(data_saved_lock);
 /** salinfo_platform_oemdata - optional callback to decode oemdata from an error
  * record.
  * @sect_header: pointer to the start of the section to decode.
- * @oemdata: returns vmalloc area containing the decded output.
+ * @oemdata: returns vmalloc area containing the decoded output.
  * @oemdata_size: returns length of decoded output (strlen).
  *
  * Description: If user space asks for oem data to be decoded by the kernel
@@ -583,6 +582,7 @@ salinfo_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu
 	struct salinfo_data *data;
 	switch (action) {
 	case CPU_ONLINE:
+	case CPU_ONLINE_FROZEN:
 		spin_lock_irqsave(&data_saved_lock, flags);
 		for (i = 0, data = salinfo_data;
 		     i < ARRAY_SIZE(salinfo_data);
@@ -593,6 +593,7 @@ salinfo_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu
 		spin_unlock_irqrestore(&data_saved_lock, flags);
 		break;
 	case CPU_DEAD:
+	case CPU_DEAD_FROZEN:
 		spin_lock_irqsave(&data_saved_lock, flags);
 		for (i = 0, data = salinfo_data;
 		     i < ARRAY_SIZE(salinfo_data);
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index dc7dd7648ec..eaa6a24bc0b 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -75,7 +75,6 @@ extern void ia64_setup_printk_clock(void);
 
 DEFINE_PER_CPU(struct cpuinfo_ia64, cpu_info);
 DEFINE_PER_CPU(unsigned long, local_per_cpu_offset);
-DEFINE_PER_CPU(unsigned long, ia64_phys_stacked_size_p8);
 unsigned long ia64_cycles_per_usec;
 struct ia64_boot_param *ia64_boot_param;
 struct screen_info screen_info;
@@ -577,7 +576,7 @@ setup_arch (char **cmdline_p)
 }
 
 /*
- * Display cpu info for all cpu's.
+ * Display cpu info for all CPUs.
  */
 static int
 show_cpuinfo (struct seq_file *m, void *v)
@@ -762,7 +761,7 @@ identify_cpu (struct cpuinfo_ia64 *c)
 	c->cpu = smp_processor_id();
 
 	/* below default values will be overwritten  by identify_siblings() 
-	 * for Multi-Threading/Multi-Core capable cpu's
+	 * for Multi-Threading/Multi-Core capable CPUs
 	 */
 	c->threads_per_core = c->cores_per_socket = c->num_log = 1;
 	c->socket_id = -1;
@@ -787,7 +786,7 @@ identify_cpu (struct cpuinfo_ia64 *c)
 	c->unimpl_pa_mask = ~((1L<<63) | ((1L << phys_addr_size) - 1));
 }
 
-void
+void __init
 setup_per_cpu_areas (void)
 {
 	/* start_kernel() requires this... */
@@ -869,6 +868,7 @@ void __cpuinit
 cpu_init (void)
 {
 	extern void __cpuinit ia64_mmu_init (void *);
+	static unsigned long max_num_phys_stacked = IA64_NUM_PHYS_STACK_REG;
 	unsigned long num_phys_stacked;
 	pal_vm_info_2_u_t vmi;
 	unsigned int max_ctx;
@@ -947,7 +947,7 @@ cpu_init (void)
 	ia32_cpu_init();
 #endif
 
-	/* Clear ITC to eliminiate sched_clock() overflows in human time.  */
+	/* Clear ITC to eliminate sched_clock() overflows in human time.  */
 	ia64_set_itc(0);
 
 	/* disable all local interrupt sources: */
@@ -982,7 +982,10 @@ cpu_init (void)
 		num_phys_stacked = 96;
 	}
 	/* size of physical stacked register partition plus 8 bytes: */
-	__get_cpu_var(ia64_phys_stacked_size_p8) = num_phys_stacked*8 + 8;
+	if (num_phys_stacked > max_num_phys_stacked) {
+		ia64_patch_phys_stack_reg(num_phys_stacked*8 + 8);
+		max_num_phys_stacked = num_phys_stacked;
+	}
 	platform_cpu_init();
 	pm_idle = default_idle;
 }
diff --git a/arch/ia64/kernel/sigframe.h b/arch/ia64/kernel/sigframe.h
index 37b986cb86e..9fd9a1933b3 100644
--- a/arch/ia64/kernel/sigframe.h
+++ b/arch/ia64/kernel/sigframe.h
@@ -22,4 +22,4 @@ struct sigframe {
 	struct sigcontext sc;
 };
 
-extern long ia64_do_signal (sigset_t *, struct sigscratch *, long);
+extern void ia64_do_signal (struct sigscratch *, long);
diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c
index 77f8b49c788..aeec8184e86 100644
--- a/arch/ia64/kernel/signal.c
+++ b/arch/ia64/kernel/signal.c
@@ -14,7 +14,6 @@
 #include <linux/sched.h>
 #include <linux/signal.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/stddef.h>
 #include <linux/tty.h>
 #include <linux/binfmts.h>
@@ -41,47 +40,6 @@
 # define GET_SIGSET(k,u)	__get_user((k)->sig[0], &(u)->sig[0])
 #endif
 
-long
-ia64_rt_sigsuspend (sigset_t __user *uset, size_t sigsetsize, struct sigscratch *scr)
-{
-	sigset_t oldset, set;
-
-	/* XXX: Don't preclude handling different sized sigset_t's.  */
-	if (sigsetsize != sizeof(sigset_t))
-		return -EINVAL;
-
-	if (!access_ok(VERIFY_READ, uset, sigsetsize))
-		return -EFAULT;
-
-	if (GET_SIGSET(&set, uset))
-		return -EFAULT;
-
-	sigdelsetmask(&set, ~_BLOCKABLE);
-
-	spin_lock_irq(&current->sighand->siglock);
-	{
-		oldset = current->blocked;
-		current->blocked = set;
-		recalc_sigpending();
-	}
-	spin_unlock_irq(&current->sighand->siglock);
-
-	/*
-	 * The return below usually returns to the signal handler.  We need to
-	 * pre-set the correct error code here to ensure that the right values
-	 * get saved in sigcontext by ia64_do_signal.
-	 */
-	scr->pt.r8 = EINTR;
-	scr->pt.r10 = -1;
-
-	while (1) {
-		current->state = TASK_INTERRUPTIBLE;
-		schedule();
-		if (ia64_do_signal(&oldset, scr, 1))
-			return -EINTR;
-	}
-}
-
 asmlinkage long
 sys_sigaltstack (const stack_t __user *uss, stack_t __user *uoss, long arg2,
 		 long arg3, long arg4, long arg5, long arg6, long arg7,
@@ -478,10 +436,11 @@ handle_signal (unsigned long sig, struct k_sigaction *ka, siginfo_t *info, sigse
  * Note that `init' is a special process: it doesn't get signals it doesn't want to
  * handle.  Thus you cannot kill init even with a SIGKILL even by mistake.
  */
-long
-ia64_do_signal (sigset_t *oldset, struct sigscratch *scr, long in_syscall)
+void
+ia64_do_signal (struct sigscratch *scr, long in_syscall)
 {
 	struct k_sigaction ka;
+	sigset_t *oldset;
 	siginfo_t info;
 	long restart = in_syscall;
 	long errno = scr->pt.r8;
@@ -493,9 +452,11 @@ ia64_do_signal (sigset_t *oldset, struct sigscratch *scr, long in_syscall)
 	 * doing anything if so.
 	 */
 	if (!user_mode(&scr->pt))
-		return 0;
+		return;
 
-	if (!oldset)
+	if (test_thread_flag(TIF_RESTORE_SIGMASK))
+		oldset = &current->saved_sigmask;
+	else
 		oldset = &current->blocked;
 
 	/*
@@ -558,8 +519,15 @@ ia64_do_signal (sigset_t *oldset, struct sigscratch *scr, long in_syscall)
 		 * Whee!  Actually deliver the signal.  If the delivery failed, we need to
 		 * continue to iterate in this loop so we can deliver the SIGSEGV...
 		 */
-		if (handle_signal(signr, &ka, &info, oldset, scr))
-			return 1;
+		if (handle_signal(signr, &ka, &info, oldset, scr)) {
+			/* a signal was successfully delivered; the saved
+			 * sigmask will have been stored in the signal frame,
+			 * and will be restored by sigreturn, so we can simply
+			 * clear the TIF_RESTORE_SIGMASK flag */
+			if (test_thread_flag(TIF_RESTORE_SIGMASK))
+				clear_thread_flag(TIF_RESTORE_SIGMASK);
+			return;
+		}
 	}
 
 	/* Did we come from a system call? */
@@ -585,5 +553,11 @@ ia64_do_signal (sigset_t *oldset, struct sigscratch *scr, long in_syscall)
 			}
 		}
 	}
-	return 0;
+
+	/* if there's no signal to deliver, we just put the saved sigmask
+	 * back */
+	if (test_thread_flag(TIF_RESTORE_SIGMASK)) {
+		clear_thread_flag(TIF_RESTORE_SIGMASK);
+		sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
+	}
 }
diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c
index 55ddd809b02..b3a47f986e1 100644
--- a/arch/ia64/kernel/smp.c
+++ b/arch/ia64/kernel/smp.c
@@ -50,6 +50,18 @@
 #include <asm/mca.h>
 
 /*
+ * Note: alignment of 4 entries/cacheline was empirically determined
+ * to be a good tradeoff between hot cachelines & spreading the array
+ * across too many cacheline.
+ */
+static struct local_tlb_flush_counts {
+	unsigned int count;
+} __attribute__((__aligned__(32))) local_tlb_flush_counts[NR_CPUS];
+
+static DEFINE_PER_CPU(unsigned int, shadow_flush_counts[NR_CPUS]) ____cacheline_aligned;
+
+
+/*
  * Structure and data for smp_call_function(). This is designed to minimise static memory
  * requirements. It also looks cleaner.
  */
@@ -174,7 +186,7 @@ handle_IPI (int irq, void *dev_id)
 }
 
 /*
- * Called with preeemption disabled.
+ * Called with preemption disabled.
  */
 static inline void
 send_IPI_single (int dest_cpu, int op)
@@ -184,7 +196,7 @@ send_IPI_single (int dest_cpu, int op)
 }
 
 /*
- * Called with preeemption disabled.
+ * Called with preemption disabled.
  */
 static inline void
 send_IPI_allbutself (int op)
@@ -198,7 +210,7 @@ send_IPI_allbutself (int op)
 }
 
 /*
- * Called with preeemption disabled.
+ * Called with preemption disabled.
  */
 static inline void
 send_IPI_all (int op)
@@ -211,7 +223,7 @@ send_IPI_all (int op)
 }
 
 /*
- * Called with preeemption disabled.
+ * Called with preemption disabled.
  */
 static inline void
 send_IPI_self (int op)
@@ -240,7 +252,7 @@ kdump_smp_send_init(void)
 }
 #endif
 /*
- * Called with preeemption disabled.
+ * Called with preemption disabled.
  */
 void
 smp_send_reschedule (int cpu)
@@ -248,6 +260,62 @@ smp_send_reschedule (int cpu)
 	platform_send_ipi(cpu, IA64_IPI_RESCHEDULE, IA64_IPI_DM_INT, 0);
 }
 
+/*
+ * Called with preemption disabled.
+ */
+static void
+smp_send_local_flush_tlb (int cpu)
+{
+	platform_send_ipi(cpu, IA64_IPI_LOCAL_TLB_FLUSH, IA64_IPI_DM_INT, 0);
+}
+
+void
+smp_local_flush_tlb(void)
+{
+	/*
+	 * Use atomic ops. Otherwise, the load/increment/store sequence from
+	 * a "++" operation can have the line stolen between the load & store.
+	 * The overhead of the atomic op in negligible in this case & offers
+	 * significant benefit for the brief periods where lots of cpus
+	 * are simultaneously flushing TLBs.
+	 */
+	ia64_fetchadd(1, &local_tlb_flush_counts[smp_processor_id()].count, acq);
+	local_flush_tlb_all();
+}
+
+#define FLUSH_DELAY	5 /* Usec backoff to eliminate excessive cacheline bouncing */
+
+void
+smp_flush_tlb_cpumask(cpumask_t xcpumask)
+{
+	unsigned int *counts = __ia64_per_cpu_var(shadow_flush_counts);
+	cpumask_t cpumask = xcpumask;
+	int mycpu, cpu, flush_mycpu = 0;
+
+	preempt_disable();
+	mycpu = smp_processor_id();
+
+	for_each_cpu_mask(cpu, cpumask)
+		counts[cpu] = local_tlb_flush_counts[cpu].count;
+
+	mb();
+	for_each_cpu_mask(cpu, cpumask) {
+		if (cpu == mycpu)
+			flush_mycpu = 1;
+		else
+			smp_send_local_flush_tlb(cpu);
+	}
+
+	if (flush_mycpu)
+		smp_local_flush_tlb();
+
+	for_each_cpu_mask(cpu, cpumask)
+		while(counts[cpu] == local_tlb_flush_counts[cpu].count)
+			udelay(FLUSH_DELAY);
+
+	preempt_enable();
+}
+
 void
 smp_flush_tlb_all (void)
 {
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index ff7df439da6..3c9d8e6089c 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -35,7 +35,6 @@
 #include <linux/mm.h>
 #include <linux/notifier.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/spinlock.h>
 #include <linux/efi.h>
 #include <linux/percpu.h>
@@ -371,7 +370,7 @@ smp_setup_percpu_timer (void)
 {
 }
 
-static void __devinit
+static void __cpuinit
 smp_callin (void)
 {
 	int cpuid, phys_id, itc_master;
@@ -457,7 +456,7 @@ smp_callin (void)
 /*
  * Activate a secondary processor.  head.S calls this.
  */
-int __devinit
+int __cpuinit
 start_secondary (void *unused)
 {
 	/* Early console may use I/O ports */
@@ -695,7 +694,7 @@ int migrate_platform_irqs(unsigned int cpu)
 			set_cpei_target_cpu(new_cpei_cpu);
 			desc = irq_desc + ia64_cpe_irq;
 			/*
-			 * Switch for now, immediatly, we need to do fake intr
+			 * Switch for now, immediately, we need to do fake intr
 			 * as other interrupts, but need to study CPEI behaviour with
 			 * polling before making changes.
 			 */
@@ -841,7 +840,7 @@ __cpu_up (unsigned int cpu)
 }
 
 /*
- * Assume that CPU's have been discovered by some platform-dependent interface.  For
+ * Assume that CPUs have been discovered by some platform-dependent interface.  For
  * SoftSDV/Lion, that would be ACPI.
  *
  * Setup of the IPI irq handler is done in irq.c:init_IRQ_SMP().
@@ -855,7 +854,7 @@ init_smp_config(void)
 	} *ap_startup;
 	long sal_ret;
 
-	/* Tell SAL where to drop the AP's.  */
+	/* Tell SAL where to drop the APs.  */
 	ap_startup = (struct fptr *) start_ap;
 	sal_ret = ia64_sal_set_vectors(SAL_VECTOR_OS_BOOT_RENDEZ,
 				       ia64_tpa(ap_startup->fp), ia64_tpa(ap_startup->gp), 0, 0, 0, 0);
diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c
index 9ef62a3fbfa..1eda194b955 100644
--- a/arch/ia64/kernel/sys_ia64.c
+++ b/arch/ia64/kernel/sys_ia64.c
@@ -13,7 +13,6 @@
 #include <linux/shm.h>
 #include <linux/file.h>		/* doh, must come after sched.h... */
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/syscalls.h>
 #include <linux/highuid.h>
 #include <linux/hugetlb.h>
@@ -33,6 +32,13 @@ arch_get_unmapped_area (struct file *filp, unsigned long addr, unsigned long len
 	if (len > RGN_MAP_LIMIT)
 		return -ENOMEM;
 
+	/* handle fixed mapping: prevent overlap with huge pages */
+	if (flags & MAP_FIXED) {
+		if (is_hugepage_only_range(mm, addr, len))
+			return -EINVAL;
+		return addr;
+	}
+
 #ifdef CONFIG_HUGETLB_PAGE
 	if (REGION_NUMBER(addr) == RGN_HPAGE)
 		addr = 0;
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 39e0cd3a088..a06667c7acc 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -235,7 +235,7 @@ ia64_init_itm (void)
 
 static struct irqaction timer_irqaction = {
 	.handler =	timer_interrupt,
-	.flags =	IRQF_DISABLED,
+	.flags =	IRQF_DISABLED | IRQF_IRQPOLL,
 	.name =		"timer"
 };
 
diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c
index 687500ddb4b..94ae3c87d82 100644
--- a/arch/ia64/kernel/topology.c
+++ b/arch/ia64/kernel/topology.c
@@ -412,9 +412,11 @@ static int __cpuinit cache_cpu_callback(struct notifier_block *nfb,
 	sys_dev = get_cpu_sysdev(cpu);
 	switch (action) {
 	case CPU_ONLINE:
+	case CPU_ONLINE_FROZEN:
 		cache_add_dev(sys_dev);
 		break;
 	case CPU_DEAD:
+	case CPU_DEAD_FROZEN:
 		cache_remove_dev(sys_dev);
 		break;
 	}
diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c
index 765cbe5ba6a..15ad85da15a 100644
--- a/arch/ia64/kernel/traps.c
+++ b/arch/ia64/kernel/traps.c
@@ -16,33 +16,17 @@
 #include <linux/hardirq.h>
 #include <linux/kprobes.h>
 #include <linux/delay.h>		/* for ssleep() */
+#include <linux/kdebug.h>
 
 #include <asm/fpswa.h>
 #include <asm/ia32.h>
 #include <asm/intrinsics.h>
 #include <asm/processor.h>
 #include <asm/uaccess.h>
-#include <asm/kdebug.h>
 
 fpswa_interface_t *fpswa_interface;
 EXPORT_SYMBOL(fpswa_interface);
 
-ATOMIC_NOTIFIER_HEAD(ia64die_chain);
-
-int
-register_die_notifier(struct notifier_block *nb)
-{
-	return atomic_notifier_chain_register(&ia64die_chain, nb);
-}
-EXPORT_SYMBOL_GPL(register_die_notifier);
-
-int
-unregister_die_notifier(struct notifier_block *nb)
-{
-	return atomic_notifier_chain_unregister(&ia64die_chain, nb);
-}
-EXPORT_SYMBOL_GPL(unregister_die_notifier);
-
 void __init
 trap_init (void)
 {
@@ -59,9 +43,9 @@ die (const char *str, struct pt_regs *regs, long err)
 		u32 lock_owner;
 		int lock_owner_depth;
 	} die = {
-		.lock =			SPIN_LOCK_UNLOCKED,
-		.lock_owner =		-1,
-		.lock_owner_depth =	0
+		.lock =	__SPIN_LOCK_UNLOCKED(die.lock),
+		.lock_owner = -1,
+		.lock_owner_depth = 0
 	};
 	static int die_counter;
 	int cpu = get_cpu();
@@ -320,7 +304,7 @@ handle_fpu_swa (int fp_fault, struct pt_regs *regs, unsigned long isr)
 			 * Lower 4 bits are used as a count. Upper bits are a sequence
 			 * number that is updated when count is reset. The cmpxchg will
 			 * fail is seqno has changed. This minimizes mutiple cpus
-			 * reseting the count.
+			 * resetting the count.
 			 */
 			if (current_jiffies > last.time)
 				(void) cmpxchg_acq(&last.count, count, 16 + (count & ~15));
diff --git a/arch/ia64/kernel/unaligned.c b/arch/ia64/kernel/unaligned.c
index 1e357550c77..fe6aa5a9f8f 100644
--- a/arch/ia64/kernel/unaligned.c
+++ b/arch/ia64/kernel/unaligned.c
@@ -15,7 +15,6 @@
  */
 #include <linux/kernel.h>
 #include <linux/sched.h>
-#include <linux/smp_lock.h>
 #include <linux/tty.h>
 
 #include <asm/intrinsics.h>
diff --git a/arch/ia64/kernel/unwind.c b/arch/ia64/kernel/unwind.c
index 93d5a3b41f6..b0b08b5f3ec 100644
--- a/arch/ia64/kernel/unwind.c
+++ b/arch/ia64/kernel/unwind.c
@@ -2,7 +2,7 @@
  * Copyright (C) 1999-2004 Hewlett-Packard Co
  *	David Mosberger-Tang <davidm@hpl.hp.com>
  * Copyright (C) 2003 Fenghua Yu <fenghua.yu@intel.com>
- * 	- Change pt_regs_off() to make it less dependant on pt_regs structure.
+ * 	- Change pt_regs_off() to make it less dependent on pt_regs structure.
  */
 /*
  * This file implements call frame unwind support for the Linux
@@ -60,6 +60,7 @@
 #  define UNW_DEBUG_ON(n)	unw_debug_level >= n
    /* Do not code a printk level, not all debug lines end in newline */
 #  define UNW_DPRINT(n, ...)  if (UNW_DEBUG_ON(n)) printk(__VA_ARGS__)
+#  undef inline
 #  define inline
 #else /* !UNW_DEBUG */
 #  define UNW_DEBUG_ON(n)  0
@@ -145,7 +146,7 @@ static struct {
 # endif
 } unw = {
 	.tables = &unw.kernel_table,
-	.lock = SPIN_LOCK_UNLOCKED,
+	.lock = __SPIN_LOCK_UNLOCKED(unw.lock),
 	.save_order = {
 		UNW_REG_RP, UNW_REG_PFS, UNW_REG_PSP, UNW_REG_PR,
 		UNW_REG_UNAT, UNW_REG_LC, UNW_REG_FPSR, UNW_REG_PRI_UNAT_GR
@@ -1859,7 +1860,7 @@ int
 unw_unwind (struct unw_frame_info *info)
 {
 	unsigned long prev_ip, prev_sp, prev_bsp;
-	unsigned long ip, pr, num_regs;
+	unsigned long ip, pr, num_regs, rp_loc, pfs_loc;
 	STAT(unsigned long start, flags;)
 	int retval;
 
@@ -1869,14 +1870,16 @@ unw_unwind (struct unw_frame_info *info)
 	prev_sp = info->sp;
 	prev_bsp = info->bsp;
 
-	/* restore the ip */
-	if (!info->rp_loc) {
+	/* validate the return IP pointer */
+	rp_loc = (unsigned long) info->rp_loc;
+	if ((rp_loc < info->regstk.limit) || (rp_loc > info->regstk.top)) {
 		/* FIXME: should really be level 0 but it occurs too often. KAO */
 		UNW_DPRINT(1, "unwind.%s: failed to locate return link (ip=0x%lx)!\n",
 			   __FUNCTION__, info->ip);
 		STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags));
 		return -1;
 	}
+	/* restore the ip */
 	ip = info->ip = *info->rp_loc;
 	if (ip < GATE_ADDR) {
 		UNW_DPRINT(2, "unwind.%s: reached user-space (ip=0x%lx)\n", __FUNCTION__, ip);
@@ -1884,12 +1887,14 @@ unw_unwind (struct unw_frame_info *info)
 		return -1;
 	}
 
-	/* restore the cfm: */
-	if (!info->pfs_loc) {
+	/* validate the previous stack frame pointer */
+	pfs_loc = (unsigned long) info->pfs_loc;
+	if ((pfs_loc < info->regstk.limit) || (pfs_loc > info->regstk.top)) {
 		UNW_DPRINT(0, "unwind.%s: failed to locate ar.pfs!\n", __FUNCTION__);
 		STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags));
 		return -1;
 	}
+	/* restore the cfm: */
 	info->cfm_loc = info->pfs_loc;
 
 	/* restore the bsp: */
@@ -1943,9 +1948,9 @@ EXPORT_SYMBOL(unw_unwind);
 int
 unw_unwind_to_user (struct unw_frame_info *info)
 {
-	unsigned long ip, sp, pr = 0;
+	unsigned long ip, sp, pr = info->pr;
 
-	while (unw_unwind(info) >= 0) {
+	do {
 		unw_get_sp(info, &sp);
 		if ((long)((unsigned long)info->task + IA64_STK_OFFSET - sp)
 		    < IA64_PT_REGS_SIZE) {
@@ -1963,7 +1968,7 @@ unw_unwind_to_user (struct unw_frame_info *info)
 				__FUNCTION__, ip);
 			return -1;
 		}
-	}
+	} while (unw_unwind(info) >= 0);
 	unw_get_ip(info, &ip);
 	UNW_DPRINT(0, "unwind.%s: failed to unwind to user-level (ip=0x%lx)\n",
 		   __FUNCTION__, ip);
@@ -1991,13 +1996,16 @@ init_frame_info (struct unw_frame_info *info, struct task_struct *t,
 	memset(info, 0, sizeof(*info));
 
 	rbslimit = (unsigned long) t + IA64_RBS_OFFSET;
+	stklimit = (unsigned long) t + IA64_STK_OFFSET;
+
 	rbstop   = sw->ar_bspstore;
-	if (rbstop - (unsigned long) t >= IA64_STK_OFFSET)
+	if (rbstop > stklimit || rbstop < rbslimit)
 		rbstop = rbslimit;
 
-	stklimit = (unsigned long) t + IA64_STK_OFFSET;
 	if (stktop <= rbstop)
 		stktop = rbstop;
+	if (stktop > stklimit)
+		stktop = stklimit;
 
 	info->regstk.limit = rbslimit;
 	info->regstk.top   = rbstop;
diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S
index 25dd55e4db2..5a65965c8b5 100644
--- a/arch/ia64/kernel/vmlinux.lds.S
+++ b/arch/ia64/kernel/vmlinux.lds.S
@@ -44,7 +44,7 @@ SECTIONS
   .text : AT(ADDR(.text) - LOAD_OFFSET)
     {
 	IVT_TEXT
-	*(.text)
+	TEXT_TEXT
 	SCHED_TEXT
 	LOCK_TEXT
 	KPROBES_TEXT
@@ -78,6 +78,13 @@ SECTIONS
 	  __stop___mca_table = .;
 	}
 
+  .data.patch.phys_stack_reg : AT(ADDR(.data.patch.phys_stack_reg) - LOAD_OFFSET)
+	{
+	  __start___phys_stack_reg_patchlist = .;
+	  *(.data.patch.phys_stack_reg)
+	  __end___phys_stack_reg_patchlist = .;
+	}
+
   /* Global data */
   _data = .;
 
@@ -207,7 +214,12 @@ SECTIONS
 
   data : { } :data
   .data : AT(ADDR(.data) - LOAD_OFFSET)
-	{ *(.data) *(.data1) *(.gnu.linkonce.d*) CONSTRUCTORS }
+	{
+		DATA_DATA
+		*(.data1)
+		*(.gnu.linkonce.d*)
+		CONSTRUCTORS
+	}
 
   . = ALIGN(16);	/* gp must be 16-byte aligned for exc. table */
   .got : AT(ADDR(.got) - LOAD_OFFSET)
diff --git a/arch/ia64/lib/csum_partial_copy.c b/arch/ia64/lib/csum_partial_copy.c
index 503dfe6d145..118daf5a063 100644
--- a/arch/ia64/lib/csum_partial_copy.c
+++ b/arch/ia64/lib/csum_partial_copy.c
@@ -128,6 +128,8 @@ csum_partial_copy_from_user(const void __user *src, void *dst,
 	return (__force __wsum)result;
 }
 
+EXPORT_SYMBOL(csum_partial_copy_from_user);
+
 __wsum
 csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum)
 {
diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c
index 44ce5ed9444..7ac8592a35b 100644
--- a/arch/ia64/mm/contig.c
+++ b/arch/ia64/mm/contig.c
@@ -88,7 +88,7 @@ void show_mem(void)
 	printk(KERN_INFO "%d pages shared\n", total_shared);
 	printk(KERN_INFO "%d pages swap cached\n", total_cached);
 	printk(KERN_INFO "Total of %ld pages in page table cache\n",
-	       pgtable_quicklist_total_size());
+	       quicklist_total_size());
 	printk(KERN_INFO "%d free buffer pages\n", nr_free_buffer_pages());
 }
 
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index 872da7a2acc..0dbf0e81f8c 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -317,7 +317,7 @@ static void __meminit scatter_node_data(void)
 	 * node_online_map is not set for hot-added nodes at this time,
 	 * because we are halfway through initialization of the new node's
 	 * structures.  If for_each_online_node() is used, a new node's
-	 * pg_data_ptrs will be not initialized. Insted of using it,
+	 * pg_data_ptrs will be not initialized. Instead of using it,
 	 * pgdat_list[] is checked.
 	 */
 	for_each_node(node) {
@@ -561,7 +561,7 @@ void show_mem(void)
 	printk(KERN_INFO "%d pages shared\n", total_shared);
 	printk(KERN_INFO "%d pages swap cached\n", total_cached);
 	printk(KERN_INFO "Total of %ld pages in page table cache\n",
-	       pgtable_quicklist_total_size());
+	       quicklist_total_size());
 	printk(KERN_INFO "%d free buffer pages\n", nr_free_buffer_pages());
 }
 
@@ -693,6 +693,7 @@ void __init paging_init(void)
 	zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
 }
 
+#ifdef CONFIG_MEMORY_HOTPLUG
 pg_data_t *arch_alloc_nodedata(int nid)
 {
 	unsigned long size = compute_pernodesize(nid);
@@ -710,3 +711,4 @@ void arch_refresh_nodedata(int update_node, pg_data_t *update_pgdat)
 	pgdat_list[update_node] = update_pgdat;
 	scatter_node_data();
 }
+#endif
diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
index 59f3ab93761..b87f785c241 100644
--- a/arch/ia64/mm/fault.c
+++ b/arch/ia64/mm/fault.c
@@ -7,49 +7,36 @@
 #include <linux/sched.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
-#include <linux/smp_lock.h>
 #include <linux/interrupt.h>
 #include <linux/kprobes.h>
+#include <linux/kdebug.h>
 
 #include <asm/pgtable.h>
 #include <asm/processor.h>
 #include <asm/system.h>
 #include <asm/uaccess.h>
-#include <asm/kdebug.h>
 
 extern void die (char *, struct pt_regs *, long);
 
 #ifdef CONFIG_KPROBES
-ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
-
-/* Hook to register for page fault notifications */
-int register_page_fault_notifier(struct notifier_block *nb)
-{
-	return atomic_notifier_chain_register(&notify_page_fault_chain, nb);
-}
-
-int unregister_page_fault_notifier(struct notifier_block *nb)
+static inline int notify_page_fault(struct pt_regs *regs, int trap)
 {
-	return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
-}
+	int ret = 0;
+
+	if (!user_mode(regs)) {
+		/* kprobe_running() needs smp_processor_id() */
+		preempt_disable();
+		if (kprobe_running() && kprobes_fault_handler(regs, trap))
+			ret = 1;
+		preempt_enable();
+	}
 
-static inline int notify_page_fault(enum die_val val, const char *str,
-			struct pt_regs *regs, long err, int trap, int sig)
-{
-	struct die_args args = {
-		.regs = regs,
-		.str = str,
-		.err = err,
-		.trapnr = trap,
-		.signr = sig
-	};
-	return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
+	return ret;
 }
 #else
-static inline int notify_page_fault(enum die_val val, const char *str,
-			struct pt_regs *regs, long err, int trap, int sig)
+static inline int notify_page_fault(struct pt_regs *regs, int trap)
 {
-	return NOTIFY_DONE;
+	return 0;
 }
 #endif
 
@@ -118,8 +105,7 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re
 	/*
 	 * This is to handle the kprobes on user space access instructions
 	 */
-	if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, code, TRAP_BRKPT,
-					SIGSEGV) == NOTIFY_STOP)
+	if (notify_page_fault(regs, TRAP_BRKPT))
 		return;
 
 	down_read(&mm->mmap_sem);
diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c
index 0c7e94edc20..1346b7f0539 100644
--- a/arch/ia64/mm/hugetlbpage.c
+++ b/arch/ia64/mm/hugetlbpage.c
@@ -13,7 +13,6 @@
 #include <linux/mm.h>
 #include <linux/hugetlb.h>
 #include <linux/pagemap.h>
-#include <linux/smp_lock.h>
 #include <linux/slab.h>
 #include <linux/sysctl.h>
 #include <asm/mman.h>
@@ -148,6 +147,14 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, u
 		return -ENOMEM;
 	if (len & ~HPAGE_MASK)
 		return -EINVAL;
+
+	/* Handle MAP_FIXED */
+	if (flags & MAP_FIXED) {
+		if (prepare_hugepage_range(addr, len, pgoff))
+			return -EINVAL;
+		return addr;
+	}
+
 	/* This code assumes that RGN_HPAGE != 0. */
 	if ((REGION_NUMBER(addr) != RGN_HPAGE) || (addr & (HPAGE_SIZE - 1)))
 		addr = HPAGE_REGION_BASE;
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 4f36987eea7..c14abefabaf 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -39,9 +39,6 @@
 
 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 
-DEFINE_PER_CPU(unsigned long *, __pgtable_quicklist);
-DEFINE_PER_CPU(long, __pgtable_quicklist_size);
-
 extern void ia64_tlb_init (void);
 
 unsigned long MAX_DMA_ADDRESS = PAGE_OFFSET + 0x100000000UL;
@@ -56,54 +53,6 @@ EXPORT_SYMBOL(vmem_map);
 struct page *zero_page_memmap_ptr;	/* map entry for zero page */
 EXPORT_SYMBOL(zero_page_memmap_ptr);
 
-#define MIN_PGT_PAGES			25UL
-#define MAX_PGT_FREES_PER_PASS		16L
-#define PGT_FRACTION_OF_NODE_MEM	16
-
-static inline long
-max_pgt_pages(void)
-{
-	u64 node_free_pages, max_pgt_pages;
-
-#ifndef	CONFIG_NUMA
-	node_free_pages = nr_free_pages();
-#else
-	node_free_pages = node_page_state(numa_node_id(), NR_FREE_PAGES);
-#endif
-	max_pgt_pages = node_free_pages / PGT_FRACTION_OF_NODE_MEM;
-	max_pgt_pages = max(max_pgt_pages, MIN_PGT_PAGES);
-	return max_pgt_pages;
-}
-
-static inline long
-min_pages_to_free(void)
-{
-	long pages_to_free;
-
-	pages_to_free = pgtable_quicklist_size - max_pgt_pages();
-	pages_to_free = min(pages_to_free, MAX_PGT_FREES_PER_PASS);
-	return pages_to_free;
-}
-
-void
-check_pgt_cache(void)
-{
-	long pages_to_free;
-
-	if (unlikely(pgtable_quicklist_size <= MIN_PGT_PAGES))
-		return;
-
-	preempt_disable();
-	while (unlikely((pages_to_free = min_pages_to_free()) > 0)) {
-		while (pages_to_free--) {
-			free_page((unsigned long)pgtable_quicklist_alloc());
-		}
-		preempt_enable();
-		preempt_disable();
-	}
-	preempt_enable();
-}
-
 void
 lazy_mmu_prot_update (pte_t pte)
 {
@@ -121,7 +70,7 @@ lazy_mmu_prot_update (pte_t pte)
 		return;				/* i-cache is already coherent with d-cache */
 
 	if (PageCompound(page)) {
-		order = (unsigned long) (page[1].lru.prev);
+		order = compound_order(page);
 		flush_icache_range(addr, addr + (1UL << order << PAGE_SHIFT));
 	}
 	else
@@ -355,7 +304,7 @@ setup_gate (void)
 void __devinit
 ia64_mmu_init (void *my_cpu_data)
 {
-	unsigned long psr, pta, impl_va_bits;
+	unsigned long pta, impl_va_bits;
 	extern void __devinit tlb_init (void);
 
 #ifdef CONFIG_DISABLE_VHPT
@@ -364,15 +313,6 @@ ia64_mmu_init (void *my_cpu_data)
 #	define VHPT_ENABLE_BIT	1
 #endif
 
-	/* Pin mapping for percpu area into TLB */
-	psr = ia64_clear_ic();
-	ia64_itr(0x2, IA64_TR_PERCPU_DATA, PERCPU_ADDR,
-		 pte_val(pfn_pte(__pa(my_cpu_data) >> PAGE_SHIFT, PAGE_KERNEL)),
-		 PERCPU_PAGE_SHIFT);
-
-	ia64_set_psr(psr);
-	ia64_srlz_i();
-
 	/*
 	 * Check if the virtually mapped linear page table (VMLPT) overlaps with a mapped
 	 * address space.  The IA-64 architecture guarantees that at least 50 bits of
diff --git a/arch/ia64/mm/ioremap.c b/arch/ia64/mm/ioremap.c
index 4280c074d64..2a140627dfd 100644
--- a/arch/ia64/mm/ioremap.c
+++ b/arch/ia64/mm/ioremap.c
@@ -1,5 +1,5 @@
 /*
- * (c) Copyright 2006 Hewlett-Packard Development Company, L.P.
+ * (c) Copyright 2006, 2007 Hewlett-Packard Development Company, L.P.
  *	Bjorn Helgaas <bjorn.helgaas@hp.com>
  *
  * This program is free software; you can redistribute it and/or modify
@@ -10,51 +10,101 @@
 #include <linux/compiler.h>
 #include <linux/module.h>
 #include <linux/efi.h>
+#include <linux/io.h>
+#include <linux/vmalloc.h>
 #include <asm/io.h>
 #include <asm/meminit.h>
 
 static inline void __iomem *
-__ioremap (unsigned long offset, unsigned long size)
+__ioremap (unsigned long phys_addr)
 {
-	return (void __iomem *) (__IA64_UNCACHED_OFFSET | offset);
+	return (void __iomem *) (__IA64_UNCACHED_OFFSET | phys_addr);
 }
 
 void __iomem *
-ioremap (unsigned long offset, unsigned long size)
+ioremap (unsigned long phys_addr, unsigned long size)
 {
+	void __iomem *addr;
+	struct vm_struct *area;
+	unsigned long offset;
+	pgprot_t prot;
 	u64 attr;
 	unsigned long gran_base, gran_size;
+	unsigned long page_base;
 
 	/*
 	 * For things in kern_memmap, we must use the same attribute
 	 * as the rest of the kernel.  For more details, see
 	 * Documentation/ia64/aliasing.txt.
 	 */
-	attr = kern_mem_attribute(offset, size);
+	attr = kern_mem_attribute(phys_addr, size);
 	if (attr & EFI_MEMORY_WB)
-		return (void __iomem *) phys_to_virt(offset);
+		return (void __iomem *) phys_to_virt(phys_addr);
 	else if (attr & EFI_MEMORY_UC)
-		return __ioremap(offset, size);
+		return __ioremap(phys_addr);
 
 	/*
 	 * Some chipsets don't support UC access to memory.  If
 	 * WB is supported for the whole granule, we prefer that.
 	 */
-	gran_base = GRANULEROUNDDOWN(offset);
-	gran_size = GRANULEROUNDUP(offset + size) - gran_base;
+	gran_base = GRANULEROUNDDOWN(phys_addr);
+	gran_size = GRANULEROUNDUP(phys_addr + size) - gran_base;
 	if (efi_mem_attribute(gran_base, gran_size) & EFI_MEMORY_WB)
-		return (void __iomem *) phys_to_virt(offset);
+		return (void __iomem *) phys_to_virt(phys_addr);
 
-	return __ioremap(offset, size);
+	/*
+	 * WB is not supported for the whole granule, so we can't use
+	 * the region 7 identity mapping.  If we can safely cover the
+	 * area with kernel page table mappings, we can use those
+	 * instead.
+	 */
+	page_base = phys_addr & PAGE_MASK;
+	size = PAGE_ALIGN(phys_addr + size) - page_base;
+	if (efi_mem_attribute(page_base, size) & EFI_MEMORY_WB) {
+		prot = PAGE_KERNEL;
+
+		/*
+		 * Mappings have to be page-aligned
+		 */
+		offset = phys_addr & ~PAGE_MASK;
+		phys_addr &= PAGE_MASK;
+
+		/*
+		 * Ok, go for it..
+		 */
+		area = get_vm_area(size, VM_IOREMAP);
+		if (!area)
+			return NULL;
+
+		area->phys_addr = phys_addr;
+		addr = (void __iomem *) area->addr;
+		if (ioremap_page_range((unsigned long) addr,
+				(unsigned long) addr + size, phys_addr, prot)) {
+			vunmap((void __force *) addr);
+			return NULL;
+		}
+
+		return (void __iomem *) (offset + (char __iomem *)addr);
+	}
+
+	return __ioremap(phys_addr);
 }
 EXPORT_SYMBOL(ioremap);
 
 void __iomem *
-ioremap_nocache (unsigned long offset, unsigned long size)
+ioremap_nocache (unsigned long phys_addr, unsigned long size)
 {
-	if (kern_mem_attribute(offset, size) & EFI_MEMORY_WB)
+	if (kern_mem_attribute(phys_addr, size) & EFI_MEMORY_WB)
 		return NULL;
 
-	return __ioremap(offset, size);
+	return __ioremap(phys_addr);
 }
 EXPORT_SYMBOL(ioremap_nocache);
+
+void
+iounmap (volatile void __iomem *addr)
+{
+	if (REGION_NUMBER(addr) == RGN_GATE)
+		vunmap((void *) ((unsigned long) addr & PAGE_MASK));
+}
+EXPORT_SYMBOL(iounmap);
diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c
index ffad7624436..fa4e6d4810f 100644
--- a/arch/ia64/mm/tlb.c
+++ b/arch/ia64/mm/tlb.c
@@ -32,9 +32,9 @@ static struct {
 } purge;
 
 struct ia64_ctx ia64_ctx = {
-	.lock =		SPIN_LOCK_UNLOCKED,
-	.next =		1,
-	.max_ctx =	~0U
+	.lock =	__SPIN_LOCK_UNLOCKED(ia64_ctx.lock),
+	.next =	1,
+	.max_ctx = ~0U
 };
 
 DEFINE_PER_CPU(u8, ia64_need_tlb_flush);
diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c
index 0e83f3b419b..73696b4a2ee 100644
--- a/arch/ia64/pci/pci.c
+++ b/arch/ia64/pci/pci.c
@@ -18,7 +18,6 @@
 #include <linux/init.h>
 #include <linux/ioport.h>
 #include <linux/slab.h>
-#include <linux/smp_lock.h>
 #include <linux/spinlock.h>
 
 #include <asm/machvec.h>
@@ -355,10 +354,13 @@ pci_acpi_scan_root(struct acpi_device *device, int domain, int bus)
 
 	acpi_walk_resources(device->handle, METHOD_NAME__CRS, count_window,
 			&windows);
-	controller->window = kmalloc_node(sizeof(*controller->window) * windows,
-			GFP_KERNEL, controller->node);
-	if (!controller->window)
-		goto out2;
+	if (windows) {
+		controller->window =
+			kmalloc_node(sizeof(*controller->window) * windows,
+				     GFP_KERNEL, controller->node);
+		if (!controller->window)
+			goto out2;
+	}
 
 	name = kmalloc(16, GFP_KERNEL);
 	if (!name)
@@ -659,8 +661,6 @@ pci_mmap_legacy_page_range(struct pci_bus *bus, struct vm_area_struct *vma)
 		return -EINVAL;
 	prot = phys_mem_access_prot(NULL, vma->vm_pgoff, size,
 				    vma->vm_page_prot);
-	if (pgprot_val(prot) != pgprot_val(pgprot_noncached(vma->vm_page_prot)))
-		return -EINVAL;
 
 	addr = pci_get_legacy_mem(bus);
 	if (IS_ERR(addr))
diff --git a/arch/ia64/sn/kernel/bte.c b/arch/ia64/sn/kernel/bte.c
index ff1c5560117..b362d6d6a8c 100644
--- a/arch/ia64/sn/kernel/bte.c
+++ b/arch/ia64/sn/kernel/bte.c
@@ -63,7 +63,7 @@ static inline void bte_start_transfer(struct bteinfo_s *bte, u64 len, u64 mode)
  * Use the block transfer engine to move kernel memory from src to dest
  * using the assigned mode.
  *
- * Paramaters:
+ * Parameters:
  *   src - physical address of the transfer source.
  *   dest - physical address of the transfer destination.
  *   len - number of bytes to transfer from source to dest.
@@ -247,7 +247,7 @@ EXPORT_SYMBOL(bte_copy);
  * use the block transfer engine to move kernel
  * memory from src to dest using the assigned mode.
  *
- * Paramaters:
+ * Parameters:
  *   src - physical address of the transfer source.
  *   dest - physical address of the transfer destination.
  *   len - number of bytes to transfer from source to dest.
@@ -255,7 +255,7 @@ EXPORT_SYMBOL(bte_copy);
  *          for IBCT0/1 in the SGI documentation.
  *
  * NOTE: If the source, dest, and len are all cache line aligned,
- * then it would be _FAR_ preferrable to use bte_copy instead.
+ * then it would be _FAR_ preferable to use bte_copy instead.
  */
 bte_result_t bte_unaligned_copy(u64 src, u64 dest, u64 len, u64 mode)
 {
@@ -300,7 +300,7 @@ bte_result_t bte_unaligned_copy(u64 src, u64 dest, u64 len, u64 mode)
 	 * a standard bte copy.
 	 *
 	 * One nasty exception to the above rule is when the
-	 * source and destination are not symetrically
+	 * source and destination are not symmetrically
 	 * mis-aligned.  If the source offset from the first
 	 * cache line is different from the destination offset,
 	 * we make the first section be the entire transfer
@@ -337,7 +337,7 @@ bte_result_t bte_unaligned_copy(u64 src, u64 dest, u64 len, u64 mode)
 
 			if (footBcopyDest == (headBcopyDest + headBcopyLen)) {
 				/*
-				 * We have two contigous bcopy
+				 * We have two contiguous bcopy
 				 * blocks.  Merge them.
 				 */
 				headBcopyLen += footBcopyLen;
@@ -375,7 +375,7 @@ bte_result_t bte_unaligned_copy(u64 src, u64 dest, u64 len, u64 mode)
 	} else {
 
 		/*
-		 * The transfer is not symetric, we will
+		 * The transfer is not symmetric, we will
 		 * allocate a buffer large enough for all the
 		 * data, bte_copy into that buffer and then
 		 * bcopy to the destination.
diff --git a/arch/ia64/sn/kernel/bte_error.c b/arch/ia64/sn/kernel/bte_error.c
index b6fcf8164f2..27c5936ccfe 100644
--- a/arch/ia64/sn/kernel/bte_error.c
+++ b/arch/ia64/sn/kernel/bte_error.c
@@ -105,7 +105,7 @@ int shub1_bte_error_handler(unsigned long _nodepda)
 	}
 
 	BTE_PRINTK(("eh:%p:%d Cleaning up\n", err_nodepda, smp_processor_id()));
-	/* Reenable both bte interfaces */
+	/* Re-enable both bte interfaces */
 	imem.ii_imem_regval = REMOTE_HUB_L(nasid, IIO_IMEM);
 	imem.ii_imem_fld_s.i_b0_esd = imem.ii_imem_fld_s.i_b1_esd = 1;
 	REMOTE_HUB_S(nasid, IIO_IMEM, imem.ii_imem_regval);
@@ -243,7 +243,7 @@ bte_crb_error_handler(cnodeid_t cnode, int btenum,
 
 	/*
 	 * The caller has already figured out the error type, we save that
-	 * in the bte handle structure for the thread excercising the
+	 * in the bte handle structure for the thread exercising the
 	 * interface to consume.
 	 */
 	bte->bh_error = ioe->ie_errortype + BTEFAIL_OFFSET;
diff --git a/arch/ia64/sn/kernel/huberror.c b/arch/ia64/sn/kernel/huberror.c
index fcf7f93c4b6..2c3f9dfca78 100644
--- a/arch/ia64/sn/kernel/huberror.c
+++ b/arch/ia64/sn/kernel/huberror.c
@@ -8,7 +8,6 @@
 
 #include <linux/types.h>
 #include <linux/interrupt.h>
-#include <linux/pci.h>
 #include <asm/delay.h>
 #include <asm/sn/sn_sal.h>
 #include "ioerror.h"
diff --git a/arch/ia64/sn/kernel/io_common.c b/arch/ia64/sn/kernel/io_common.c
index d48bcd83253..787ed642dd4 100644
--- a/arch/ia64/sn/kernel/io_common.c
+++ b/arch/ia64/sn/kernel/io_common.c
@@ -364,7 +364,7 @@ void sn_bus_store_sysdata(struct pci_dev *dev)
 
 	element = kzalloc(sizeof(struct sysdata_el), GFP_KERNEL);
 	if (!element) {
-		dev_dbg(dev, "%s: out of memory!\n", __FUNCTION__);
+		dev_dbg(&dev->dev, "%s: out of memory!\n", __FUNCTION__);
 		return;
 	}
 	element->sysdata = SN_PCIDEV_INFO(dev);
@@ -479,7 +479,7 @@ sn_io_early_init(void)
 	}
 
 	/*
-	 * prime sn_pci_provider[].  Individial provider init routines will
+	 * prime sn_pci_provider[].  Individual provider init routines will
 	 * override their respective default entries.
 	 */
 
diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c
index 8d2a1bfbfe7..7f6d2360a26 100644
--- a/arch/ia64/sn/kernel/irq.c
+++ b/arch/ia64/sn/kernel/irq.c
@@ -59,6 +59,22 @@ void sn_intr_free(nasid_t local_nasid, int local_widget,
 			(u64) sn_irq_info->irq_cookie, 0, 0);
 }
 
+u64 sn_intr_redirect(nasid_t local_nasid, int local_widget,
+		      struct sn_irq_info *sn_irq_info,
+		      nasid_t req_nasid, int req_slice)
+{
+	struct ia64_sal_retval ret_stuff;
+	ret_stuff.status = 0;
+	ret_stuff.v0 = 0;
+
+	SAL_CALL_NOLOCK(ret_stuff, (u64) SN_SAL_IOIF_INTERRUPT,
+			(u64) SAL_INTR_REDIRECT, (u64) local_nasid,
+			(u64) local_widget, __pa(sn_irq_info),
+			(u64) req_nasid, (u64) req_slice, 0);
+
+	return ret_stuff.status;
+}
+
 static unsigned int sn_startup_irq(unsigned int irq)
 {
 	return 0;
@@ -127,15 +143,8 @@ struct sn_irq_info *sn_retarget_vector(struct sn_irq_info *sn_irq_info,
 	struct sn_irq_info *new_irq_info;
 	struct sn_pcibus_provider *pci_provider;
 
-	new_irq_info = kmalloc(sizeof(struct sn_irq_info), GFP_ATOMIC);
-	if (new_irq_info == NULL)
-		return NULL;
-
-	memcpy(new_irq_info, sn_irq_info, sizeof(struct sn_irq_info));
-
-	bridge = (u64) new_irq_info->irq_bridge;
+	bridge = (u64) sn_irq_info->irq_bridge;
 	if (!bridge) {
-		kfree(new_irq_info);
 		return NULL; /* irq is not a device interrupt */
 	}
 
@@ -145,8 +154,25 @@ struct sn_irq_info *sn_retarget_vector(struct sn_irq_info *sn_irq_info,
 		local_widget = TIO_SWIN_WIDGETNUM(bridge);
 	else
 		local_widget = SWIN_WIDGETNUM(bridge);
-
 	vector = sn_irq_info->irq_irq;
+
+	/* Make use of SAL_INTR_REDIRECT if PROM supports it */
+	status = sn_intr_redirect(local_nasid, local_widget, sn_irq_info, nasid, slice);
+	if (!status) {
+		new_irq_info = sn_irq_info;
+		goto finish_up;
+	}
+
+	/*
+	 * PROM does not support SAL_INTR_REDIRECT, or it failed.
+	 * Revert to old method.
+	 */
+	new_irq_info = kmalloc(sizeof(struct sn_irq_info), GFP_ATOMIC);
+	if (new_irq_info == NULL)
+		return NULL;
+
+	memcpy(new_irq_info, sn_irq_info, sizeof(struct sn_irq_info));
+
 	/* Free the old PROM new_irq_info structure */
 	sn_intr_free(local_nasid, local_widget, new_irq_info);
 	unregister_intr_pda(new_irq_info);
@@ -162,11 +188,18 @@ struct sn_irq_info *sn_retarget_vector(struct sn_irq_info *sn_irq_info,
 		return NULL;
 	}
 
+	register_intr_pda(new_irq_info);
+	spin_lock(&sn_irq_info_lock);
+	list_replace_rcu(&sn_irq_info->list, &new_irq_info->list);
+	spin_unlock(&sn_irq_info_lock);
+	call_rcu(&sn_irq_info->rcu, sn_irq_info_free);
+
+
+finish_up:
 	/* Update kernels new_irq_info with new target info */
 	cpuid = nasid_slice_to_cpuid(new_irq_info->irq_nasid,
 				     new_irq_info->irq_slice);
 	new_irq_info->irq_cpuid = cpuid;
-	register_intr_pda(new_irq_info);
 
 	pci_provider = sn_pci_provider[new_irq_info->irq_bridge_type];
 
@@ -178,11 +211,6 @@ struct sn_irq_info *sn_retarget_vector(struct sn_irq_info *sn_irq_info,
 	    pci_provider && pci_provider->target_interrupt)
 		(pci_provider->target_interrupt)(new_irq_info);
 
-	spin_lock(&sn_irq_info_lock);
-	list_replace_rcu(&sn_irq_info->list, &new_irq_info->list);
-	spin_unlock(&sn_irq_info_lock);
-	call_rcu(&sn_irq_info->rcu, sn_irq_info_free);
-
 #ifdef CONFIG_SMP
 	cpuphys = cpu_physical_id(cpuid);
 	set_irq_affinity_info((vector & 0xff), cpuphys, 0);
diff --git a/arch/ia64/sn/kernel/msi_sn.c b/arch/ia64/sn/kernel/msi_sn.c
index 49873aa4a37..83f190ffe35 100644
--- a/arch/ia64/sn/kernel/msi_sn.c
+++ b/arch/ia64/sn/kernel/msi_sn.c
@@ -87,7 +87,6 @@ int sn_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *entry)
 	if (irq < 0)
 		return irq;
 
-	set_irq_msi(irq, entry);
 	/*
 	 * Set up the vector plumbing.  Let the prom (via sn_intr_alloc)
 	 * decide which cpu to direct this msi at by default.
@@ -144,10 +143,11 @@ int sn_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *entry)
 	 */
 	msg.data = 0x100 + irq;
 
+	set_irq_msi(irq, entry);
 	write_msi_msg(irq, &msg);
 	set_irq_chip_and_handler(irq, &sn_msi_chip, handle_edge_irq);
 
-	return irq;
+	return 0;
 }
 
 #ifdef CONFIG_SMP
diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c
index a9bed5ca2ed..684b1c984a4 100644
--- a/arch/ia64/sn/kernel/setup.c
+++ b/arch/ia64/sn/kernel/setup.c
@@ -167,7 +167,7 @@ void __init early_sn_setup(void)
 	 * IO on SN2 is done via SAL calls, early_printk won't work without this.
 	 *
 	 * This code duplicates some of the ACPI table parsing that is in efi.c & sal.c.
-	 * Any changes to those file may have to be made hereas well.
+	 * Any changes to those file may have to be made here as well.
 	 */
 	efi_systab = (efi_system_table_t *) __va(ia64_boot_param->efi_systab);
 	config_tables = __va(efi_systab->tables);
@@ -194,7 +194,7 @@ void __init early_sn_setup(void)
 }
 
 extern int platform_intr_list[];
-static int __initdata shub_1_1_found;
+static int __cpuinitdata shub_1_1_found;
 
 /*
  * sn_check_for_wars
diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c
index 601747b1e22..033c8a9f000 100644
--- a/arch/ia64/sn/kernel/sn2/sn2_smp.c
+++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c
@@ -46,6 +46,9 @@ DECLARE_PER_CPU(struct ptc_stats, ptcstats);
 
 static  __cacheline_aligned DEFINE_SPINLOCK(sn2_global_ptc_lock);
 
+/* 0 = old algorithm (no IPI flushes), 1 = ipi deadlock flush, 2 = ipi instead of SHUB ptc, >2 = always ipi */
+static int sn2_flush_opt = 0;
+
 extern unsigned long
 sn2_ptc_deadlock_recovery_core(volatile unsigned long *, unsigned long,
 			       volatile unsigned long *, unsigned long,
@@ -76,6 +79,8 @@ struct ptc_stats {
 	unsigned long shub_itc_clocks;
 	unsigned long shub_itc_clocks_max;
 	unsigned long shub_ptc_flushes_not_my_mm;
+	unsigned long shub_ipi_flushes;
+	unsigned long shub_ipi_flushes_itc_clocks;
 };
 
 #define sn2_ptctest	0
@@ -99,7 +104,7 @@ static inline unsigned long wait_piowc(void)
  *
  * SN2 PIO writes from separate CPUs are not guaranteed to arrive in order.
  * Context switching user threads which have memory-mapped MMIO may cause
- * PIOs to issue from seperate CPUs, thus the PIO writes must be drained
+ * PIOs to issue from separate CPUs, thus the PIO writes must be drained
  * from the previous CPU's Shub before execution resumes on the new CPU.
  */
 void sn_migrate(struct task_struct *task)
@@ -121,6 +126,18 @@ void sn_tlb_migrate_finish(struct mm_struct *mm)
 		flush_tlb_mm(mm);
 }
 
+static void
+sn2_ipi_flush_all_tlb(struct mm_struct *mm)
+{
+	unsigned long itc;
+
+	itc = ia64_get_itc();
+	smp_flush_tlb_cpumask(mm->cpu_vm_mask);
+	itc = ia64_get_itc() - itc;
+	__get_cpu_var(ptcstats).shub_ipi_flushes_itc_clocks += itc;
+	__get_cpu_var(ptcstats).shub_ipi_flushes++;
+}
+
 /**
  * sn2_global_tlb_purge - globally purge translation cache of virtual address range
  * @mm: mm_struct containing virtual address range
@@ -154,7 +171,12 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start,
 	unsigned long itc, itc2, flags, data0 = 0, data1 = 0, rr_value, old_rr = 0;
 	short nasids[MAX_NUMNODES], nix;
 	nodemask_t nodes_flushed;
-	int active, max_active, deadlock;
+	int active, max_active, deadlock, flush_opt = sn2_flush_opt;
+
+	if (flush_opt > 2) {
+		sn2_ipi_flush_all_tlb(mm);
+		return;
+	}
 
 	nodes_clear(nodes_flushed);
 	i = 0;
@@ -189,6 +211,12 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start,
 		return;
 	}
 
+	if (flush_opt == 2) {
+		sn2_ipi_flush_all_tlb(mm);
+		preempt_enable();
+		return;
+	}
+
 	itc = ia64_get_itc();
 	nix = 0;
 	for_each_node_mask(cnode, nodes_flushed)
@@ -256,6 +284,8 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start,
 			}
 			if (active >= max_active || i == (nix - 1)) {
 				if ((deadlock = wait_piowc())) {
+					if (flush_opt == 1)
+						goto done;
 					sn2_ptc_deadlock_recovery(nasids, ibegin, i, mynasid, ptc0, data0, ptc1, data1);
 					if (reset_max_active_on_deadlock())
 						max_active = 1;
@@ -267,6 +297,7 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start,
 		start += (1UL << nbits);
 	} while (start < end);
 
+done:
 	itc2 = ia64_get_itc() - itc2;
 	__get_cpu_var(ptcstats).shub_itc_clocks += itc2;
 	if (itc2 > __get_cpu_var(ptcstats).shub_itc_clocks_max)
@@ -279,6 +310,11 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start,
 
 	spin_unlock_irqrestore(PTC_LOCK(shub1), flags);
 
+	if (flush_opt == 1 && deadlock) {
+		__get_cpu_var(ptcstats).deadlocks++;
+		sn2_ipi_flush_all_tlb(mm);
+	}
+
 	preempt_enable();
 }
 
@@ -425,24 +461,42 @@ static int sn2_ptc_seq_show(struct seq_file *file, void *data)
 
 	if (!cpu) {
 		seq_printf(file,
-			   "# cpu ptc_l newrid ptc_flushes nodes_flushed deadlocks lock_nsec shub_nsec shub_nsec_max not_my_mm deadlock2\n");
-		seq_printf(file, "# ptctest %d\n", sn2_ptctest);
+			   "# cpu ptc_l newrid ptc_flushes nodes_flushed deadlocks lock_nsec shub_nsec shub_nsec_max not_my_mm deadlock2 ipi_fluches ipi_nsec\n");
+		seq_printf(file, "# ptctest %d, flushopt %d\n", sn2_ptctest, sn2_flush_opt);
 	}
 
 	if (cpu < NR_CPUS && cpu_online(cpu)) {
 		stat = &per_cpu(ptcstats, cpu);
-		seq_printf(file, "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n", cpu, stat->ptc_l,
+		seq_printf(file, "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n", cpu, stat->ptc_l,
 				stat->change_rid, stat->shub_ptc_flushes, stat->nodes_flushed,
 				stat->deadlocks,
 				1000 * stat->lock_itc_clocks / per_cpu(cpu_info, cpu).cyc_per_usec,
 				1000 * stat->shub_itc_clocks / per_cpu(cpu_info, cpu).cyc_per_usec,
 				1000 * stat->shub_itc_clocks_max / per_cpu(cpu_info, cpu).cyc_per_usec,
 				stat->shub_ptc_flushes_not_my_mm,
-				stat->deadlocks2);
+				stat->deadlocks2,
+				stat->shub_ipi_flushes,
+				1000 * stat->shub_ipi_flushes_itc_clocks / per_cpu(cpu_info, cpu).cyc_per_usec);
 	}
 	return 0;
 }
 
+static ssize_t sn2_ptc_proc_write(struct file *file, const char __user *user, size_t count, loff_t *data)
+{
+	int cpu;
+	char optstr[64];
+
+	if (copy_from_user(optstr, user, count))
+		return -EFAULT;
+	optstr[count - 1] = '\0';
+	sn2_flush_opt = simple_strtoul(optstr, NULL, 0);
+
+	for_each_online_cpu(cpu)
+		memset(&per_cpu(ptcstats, cpu), 0, sizeof(struct ptc_stats));
+
+	return count;
+}
+
 static struct seq_operations sn2_ptc_seq_ops = {
 	.start = sn2_ptc_seq_start,
 	.next = sn2_ptc_seq_next,
@@ -458,6 +512,7 @@ static int sn2_ptc_proc_open(struct inode *inode, struct file *file)
 static const struct file_operations proc_sn2_ptc_operations = {
 	.open = sn2_ptc_proc_open,
 	.read = seq_read,
+	.write = sn2_ptc_proc_write,
 	.llseek = seq_lseek,
 	.release = seq_release,
 };
diff --git a/arch/ia64/sn/kernel/xpc_channel.c b/arch/ia64/sn/kernel/xpc_channel.c
index c08db9c2375..44ccc0d789c 100644
--- a/arch/ia64/sn/kernel/xpc_channel.c
+++ b/arch/ia64/sn/kernel/xpc_channel.c
@@ -293,7 +293,7 @@ xpc_pull_remote_cachelines(struct xpc_partition *part, void *dst,
 
 
 /*
- * Pull the remote per partititon specific variables from the specified
+ * Pull the remote per partition specific variables from the specified
  * partition.
  */
 enum xpc_retval
@@ -461,7 +461,7 @@ xpc_allocate_local_msgqueue(struct xpc_channel *ch)
 	// >>> may want to check for ch->flags & XPC_C_DISCONNECTING between
 	// >>> iterations of the for-loop, bail if set?
 
-	// >>> should we impose a minumum #of entries? like 4 or 8?
+	// >>> should we impose a minimum #of entries? like 4 or 8?
 	for (nentries = ch->local_nentries; nentries > 0; nentries--) {
 
 		nbytes = nentries * ch->msg_size;
@@ -514,7 +514,7 @@ xpc_allocate_remote_msgqueue(struct xpc_channel *ch)
 	// >>> may want to check for ch->flags & XPC_C_DISCONNECTING between
 	// >>> iterations of the for-loop, bail if set?
 
-	// >>> should we impose a minumum #of entries? like 4 or 8?
+	// >>> should we impose a minimum #of entries? like 4 or 8?
 	for (nentries = ch->remote_nentries; nentries > 0; nentries--) {
 
 		nbytes = nentries * ch->msg_size;
@@ -1478,7 +1478,7 @@ xpc_teardown_infrastructure(struct xpc_partition *part)
 
 
 	/*
-	 * Before proceding with the teardown we have to wait until all
+	 * Before proceeding with the teardown we have to wait until all
 	 * existing references cease.
 	 */
 	wait_event(part->teardown_wq, (atomic_read(&part->references) == 0));
diff --git a/arch/ia64/sn/kernel/xpc_main.c b/arch/ia64/sn/kernel/xpc_main.c
index 68355ef6f84..e336e1692a7 100644
--- a/arch/ia64/sn/kernel/xpc_main.c
+++ b/arch/ia64/sn/kernel/xpc_main.c
@@ -55,9 +55,9 @@
 #include <linux/delay.h>
 #include <linux/reboot.h>
 #include <linux/completion.h>
+#include <linux/kdebug.h>
 #include <asm/sn/intr.h>
 #include <asm/sn/sn_sal.h>
-#include <asm/kdebug.h>
 #include <asm/uaccess.h>
 #include <asm/sn/xpc.h>
 
@@ -1332,7 +1332,7 @@ xpc_init(void)
 		dev_warn(xpc_part, "can't register reboot notifier\n");
 	}
 
-	/* add ourselves to the die_notifier list (i.e., ia64die_chain) */
+	/* add ourselves to the die_notifier list */
 	ret = register_die_notifier(&xpc_die_notifier);
 	if (ret != 0) {
 		dev_warn(xpc_part, "can't register die notifier\n");
diff --git a/arch/ia64/sn/kernel/xpc_partition.c b/arch/ia64/sn/kernel/xpc_partition.c
index 57c723f5cba..7ba403232cb 100644
--- a/arch/ia64/sn/kernel/xpc_partition.c
+++ b/arch/ia64/sn/kernel/xpc_partition.c
@@ -574,7 +574,7 @@ xpc_update_partition_info(struct xpc_partition *part, u8 remote_rp_version,
 		u64 remote_vars_pa, struct xpc_vars *remote_vars)
 {
 	part->remote_rp_version = remote_rp_version;
-	dev_dbg(xpc_part, "  remote_rp_version = 0x%016lx\n",
+	dev_dbg(xpc_part, "  remote_rp_version = 0x%016x\n",
 		part->remote_rp_version);
 
 	part->remote_rp_stamp = *remote_rp_stamp;
diff --git a/arch/ia64/sn/kernel/xpnet.c b/arch/ia64/sn/kernel/xpnet.c
index 5419acb89a8..e58fcadff2e 100644
--- a/arch/ia64/sn/kernel/xpnet.c
+++ b/arch/ia64/sn/kernel/xpnet.c
@@ -24,7 +24,6 @@
 
 #include <linux/module.h>
 #include <linux/kernel.h>
-#include <linux/pci.h>
 #include <linux/init.h>
 #include <linux/ioport.h>
 #include <linux/netdevice.h>
@@ -344,8 +343,8 @@ xpnet_dev_open(struct net_device *dev)
 	enum xpc_retval ret;
 
 
-	dev_dbg(xpnet, "calling xpc_connect(%d, 0x%p, NULL, %ld, %ld, %d, "
-		"%d)\n", XPC_NET_CHANNEL, xpnet_connection_activity,
+	dev_dbg(xpnet, "calling xpc_connect(%d, 0x%p, NULL, %ld, %ld, %ld, "
+		"%ld)\n", XPC_NET_CHANNEL, xpnet_connection_activity,
 		XPNET_MSG_SIZE, XPNET_MSG_NENTRIES, XPNET_MAX_KTHREADS,
 		XPNET_MAX_IDLE_KTHREADS);
 
@@ -532,7 +531,7 @@ xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	dev_dbg(xpnet, "destination Partitions mask (dp) = 0x%lx\n", dp);
 
 	/*
-	 * If we wanted to allow promiscous mode to work like an
+	 * If we wanted to allow promiscuous mode to work like an
 	 * unswitched network, this would be a good point to OR in a
 	 * mask of partitions which should be receiving all packets.
 	 */
diff --git a/arch/ia64/sn/pci/pci_dma.c b/arch/ia64/sn/pci/pci_dma.c
index 7a291a27151..d79ddacfba2 100644
--- a/arch/ia64/sn/pci/pci_dma.c
+++ b/arch/ia64/sn/pci/pci_dma.c
@@ -333,7 +333,7 @@ int sn_pci_legacy_read(struct pci_bus *bus, u16 port, u32 *val, u8 size)
 	/*
 	 * First, try the SN_SAL_IOIF_PCI_SAFE SAL call which can work
 	 * around hw issues at the pci bus level.  SGI proms older than
-	 * 4.10 don't implment this.
+	 * 4.10 don't implement this.
 	 */
 
 	SAL_CALL(isrv, SN_SAL_IOIF_PCI_SAFE,
@@ -348,7 +348,7 @@ int sn_pci_legacy_read(struct pci_bus *bus, u16 port, u32 *val, u8 size)
 	/*
 	 * If the above failed, retry using the SAL_PROBE call which should
 	 * be present in all proms (but which cannot work round PCI chipset
-	 * bugs).  This code is retained for compatability with old
+	 * bugs).  This code is retained for compatibility with old
 	 * pre-4.10 proms, and should be removed at some point in the future.
 	 */
 
@@ -379,7 +379,7 @@ int sn_pci_legacy_write(struct pci_bus *bus, u16 port, u32 val, u8 size)
 	/*
 	 * First, try the SN_SAL_IOIF_PCI_SAFE SAL call which can work
 	 * around hw issues at the pci bus level.  SGI proms older than
-	 * 4.10 don't implment this.
+	 * 4.10 don't implement this.
 	 */
 
 	SAL_CALL(isrv, SN_SAL_IOIF_PCI_SAFE,
@@ -394,7 +394,7 @@ int sn_pci_legacy_write(struct pci_bus *bus, u16 port, u32 val, u8 size)
 	/*
 	 * If the above failed, retry using the SAL_PROBE call which should
 	 * be present in all proms (but which cannot work round PCI chipset
-	 * bugs).  This code is retained for compatability with old
+	 * bugs).  This code is retained for compatibility with old
 	 * pre-4.10 proms, and should be removed at some point in the future.
 	 */
 
diff --git a/arch/ia64/sn/pci/pcibr/pcibr_ate.c b/arch/ia64/sn/pci/pcibr/pcibr_ate.c
index 935029fc400..239b3cedcf2 100644
--- a/arch/ia64/sn/pci/pcibr/pcibr_ate.c
+++ b/arch/ia64/sn/pci/pcibr/pcibr_ate.c
@@ -30,7 +30,7 @@ static void mark_ate(struct ate_resource *ate_resource, int start, int number,
 
 /*
  * find_free_ate:  Find the first free ate index starting from the given
- *		   index for the desired consequtive count.
+ *		   index for the desired consecutive count.
  */
 static int find_free_ate(struct ate_resource *ate_resource, int start,
 			 int count)
@@ -88,7 +88,7 @@ static inline int alloc_ate_resource(struct ate_resource *ate_resource,
 		return -1;
 
 	/*
-	 * Find the required number of free consequtive ates.
+	 * Find the required number of free consecutive ates.
 	 */
 	start_index =
 	    find_free_ate(ate_resource, ate_resource->lowest_free_index,
@@ -105,7 +105,7 @@ static inline int alloc_ate_resource(struct ate_resource *ate_resource,
 /*
  * Allocate "count" contiguous Bridge Address Translation Entries
  * on the specified bridge to be used for PCI to XTALK mappings.
- * Indices in rm map range from 1..num_entries.  Indicies returned
+ * Indices in rm map range from 1..num_entries.  Indices returned
  * to caller range from 0..num_entries-1.
  *
  * Return the start index on success, -1 on failure.
diff --git a/arch/ia64/sn/pci/pcibr/pcibr_dma.c b/arch/ia64/sn/pci/pcibr/pcibr_dma.c
index 95af40cb22f..e626e50a938 100644
--- a/arch/ia64/sn/pci/pcibr/pcibr_dma.c
+++ b/arch/ia64/sn/pci/pcibr/pcibr_dma.c
@@ -201,7 +201,7 @@ pcibr_dmatrans_direct32(struct pcidev_info * info,
 }
 
 /*
- * Wrapper routine for free'ing DMA maps
+ * Wrapper routine for freeing DMA maps
  * DMA mappings for Direct 64 and 32 do not have any DMA maps.
  */
 void
diff --git a/arch/ia64/sn/pci/tioca_provider.c b/arch/ia64/sn/pci/tioca_provider.c
index 8a2cb4e691f..b9bedbd6e1d 100644
--- a/arch/ia64/sn/pci/tioca_provider.c
+++ b/arch/ia64/sn/pci/tioca_provider.c
@@ -223,7 +223,7 @@ tioca_fastwrite_enable(struct tioca_kernel *tioca_kern)
 
 	/*
 	 * Scan all vga controllers on this bus making sure they all
-	 * suport FW.  If not, return.
+	 * support FW.  If not, return.
 	 */
 
 	list_for_each_entry(pdev, tioca_kern->ca_devices, bus_list) {
@@ -364,7 +364,7 @@ tioca_dma_d48(struct pci_dev *pdev, u64 paddr)
  * @req_size: len (bytes) to map
  *
  * Map @paddr into CA address space using the GART mechanism.  The mapped
- * dma_addr_t is guarenteed to be contiguous in CA bus space.
+ * dma_addr_t is guaranteed to be contiguous in CA bus space.
  */
 static dma_addr_t
 tioca_dma_mapped(struct pci_dev *pdev, u64 paddr, size_t req_size)
@@ -526,7 +526,7 @@ tioca_dma_map(struct pci_dev *pdev, u64 paddr, size_t byte_count, int dma_flags)
 		return 0;
 
 	/*
-	 * If card is 64 or 48 bit addresable, use a direct mapping.  32
+	 * If card is 64 or 48 bit addressable, use a direct mapping.  32
 	 * bit direct is so restrictive w.r.t. where the memory resides that
 	 * we don't use it even though CA has some support.
 	 */
diff --git a/arch/ia64/sn/pci/tioce_provider.c b/arch/ia64/sn/pci/tioce_provider.c
index 35f854fb612..f4c0b961a93 100644
--- a/arch/ia64/sn/pci/tioce_provider.c
+++ b/arch/ia64/sn/pci/tioce_provider.c
@@ -256,9 +256,9 @@ pcidev_to_tioce(struct pci_dev *pdev, struct tioce __iomem **base,
  * @ct_addr: the coretalk address to map
  * @len: number of bytes to map
  *
- * Given the addressing type, set up various paramaters that define the
+ * Given the addressing type, set up various parameters that define the
  * ATE pool to use.  Search for a contiguous block of entries to cover the
- * length, and if enough resources exist, fill in the ATE's and construct a
+ * length, and if enough resources exist, fill in the ATEs and construct a
  * tioce_dmamap struct to track the mapping.
  */
 static u64
@@ -581,8 +581,8 @@ tioce_do_dma_map(struct pci_dev *pdev, u64 paddr, size_t byte_count,
 	 */
 	if (!mapaddr && !barrier && dma_mask >= 0xffffffffffUL) {
 		/*
-		 * We have two options for 40-bit mappings:  16GB "super" ATE's
-		 * and 64MB "regular" ATE's.  We'll try both if needed for a
+		 * We have two options for 40-bit mappings:  16GB "super" ATEs
+		 * and 64MB "regular" ATEs.  We'll try both if needed for a
 		 * given mapping but which one we try first depends on the
 		 * size.  For requests >64MB, prefer to use a super page with
 		 * regular as the fallback. Otherwise, try in the reverse order.
@@ -687,8 +687,8 @@ tioce_error_intr_handler(int irq, void *arg)
 }
 
 /**
- * tioce_reserve_m32 - reserve M32 ate's for the indicated address range
- * @tioce_kernel: TIOCE context to reserve ate's for
+ * tioce_reserve_m32 - reserve M32 ATEs for the indicated address range
+ * @tioce_kernel: TIOCE context to reserve ATEs for
  * @base: starting bus address to reserve
  * @limit: last bus address to reserve
  *
@@ -763,7 +763,7 @@ tioce_kern_init(struct tioce_common *tioce_common)
 
 	/*
 	 * Set PMU pagesize to the largest size available, and zero out
-	 * the ate's.
+	 * the ATEs.
 	 */
 
 	tioce_mmr = (struct tioce __iomem *)tioce_common->ce_pcibus.bs_base;
@@ -784,7 +784,7 @@ tioce_kern_init(struct tioce_common *tioce_common)
 	}
 
 	/*
-	 * Reserve ATE's corresponding to reserved address ranges.  These
+	 * Reserve ATEs corresponding to reserved address ranges.  These
 	 * include:
 	 *
 	 *	Memory space covered by each PPB mem base/limit register