432 files changed, 60980 insertions, 30821 deletions
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 0f3076a820c..2f3abcf8f6b 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -1,16 +1,52 @@
-#
-# For a description of the syntax of this configuration file,
-# see Documentation/kbuild/kconfig-language.txt.
-#
-
-mainmenu "IA-64 Linux Kernel Configuration"
-
 source "init/Kconfig"
 
+source "kernel/Kconfig.freezer"
+
 menu "Processor type and features"
 
 config IA64
 	bool
+	select ARCH_MIGHT_HAVE_PC_PARPORT
+	select ARCH_MIGHT_HAVE_PC_SERIO
+	select PCI if (!IA64_HP_SIM)
+	select ACPI if (!IA64_HP_SIM)
+	select PM if (!IA64_HP_SIM)
+	select HAVE_UNSTABLE_SCHED_CLOCK
+	select HAVE_IDE
+	select HAVE_OPROFILE
+	select HAVE_KPROBES
+	select HAVE_KRETPROBES
+	select HAVE_FTRACE_MCOUNT_RECORD
+	select HAVE_DYNAMIC_FTRACE if (!ITANIUM)
+	select HAVE_FUNCTION_TRACER
+	select HAVE_DMA_ATTRS
+	select HAVE_KVM
+	select TTY
+	select HAVE_ARCH_TRACEHOOK
+	select HAVE_DMA_API_DEBUG
+	select HAVE_MEMBLOCK
+	select HAVE_MEMBLOCK_NODE_MAP
+	select HAVE_VIRT_CPU_ACCOUNTING
+	select VIRT_TO_BUS
+	select ARCH_DISCARD_MEMBLOCK
+	select GENERIC_IRQ_PROBE
+	select GENERIC_PENDING_IRQ if SMP
+	select GENERIC_IRQ_SHOW
+	select GENERIC_IRQ_LEGACY
+	select ARCH_WANT_OPTIONAL_GPIOLIB
+	select ARCH_HAVE_NMI_SAFE_CMPXCHG
+	select GENERIC_IOMAP
+	select GENERIC_SMP_IDLE_THREAD
+	select ARCH_INIT_TASK
+	select ARCH_TASK_STRUCT_ALLOCATOR
+	select ARCH_THREAD_INFO_ALLOCATOR
+	select ARCH_CLOCKSOURCE_DATA
+	select GENERIC_TIME_VSYSCALL_OLD
+	select SYSCTL_ARCH_UNALIGN_NO_WARN
+	select HAVE_MOD_ARCH_SPECIFIC
+	select MODULES_USE_ELF_RELA
+	select ARCH_USE_CMPXCHG_LOCKREF
+	select HAVE_ARCH_AUDITSYSCALL
 	default y
 	help
 	  The Itanium Processor Family is Intel's 64-bit successor to
@@ -20,45 +56,66 @@ config IA64
 
 config 64BIT
 	bool
+	select ATA_NONSTANDARD if ATA
+	default y
+
+config ZONE_DMA
+	def_bool y
+	depends on !IA64_SGI_SN2
+
+config QUICKLIST
+	bool
 	default y
 
 config MMU
 	bool
 	default y
 
+config ARCH_DMA_ADDR_T_64BIT
+	def_bool y
+
+config NEED_DMA_MAP_STATE
+	def_bool y
+
+config NEED_SG_DMA_LENGTH
+	def_bool y
+
 config SWIOTLB
        bool
-       default y
+
+config STACKTRACE_SUPPORT
+	def_bool y
+
+config GENERIC_LOCKBREAK
+	def_bool n
 
 config RWSEM_XCHGADD_ALGORITHM
 	bool
 	default y
 
-config GENERIC_FIND_NEXT_BIT
+config HUGETLB_PAGE_SIZE_VARIABLE
 	bool
+	depends on HUGETLB_PAGE
 	default y
 
 config GENERIC_CALIBRATE_DELAY
 	bool
 	default y
 
-config TIME_INTERPOLATION
-	bool
-	default y
+config HAVE_SETUP_PER_CPU_AREA
+	def_bool y
 
 config DMI
 	bool
 	default y
+	select DMI_SCAN_MACHINE_NON_EFI_FALLBACK
 
 config EFI
 	bool
+	select UCS2_STRING
 	default y
 
-config GENERIC_IOMAP
-	bool
-	default y
-
-config SCHED_NO_NO_OMIT_FRAME_POINTER
+config SCHED_OMIT_FRAME_POINTER
 	bool
 	default y
 
@@ -66,19 +123,47 @@ config IA64_UNCACHED_ALLOCATOR
 	bool
 	select GENERIC_ALLOCATOR
 
-config DMA_IS_DMA32
+config ARCH_USES_PG_UNCACHED
+	def_bool y
+	depends on IA64_UNCACHED_ALLOCATOR
+
+config AUDIT_ARCH
 	bool
 	default y
 
+menuconfig PARAVIRT_GUEST
+	bool "Paravirtualized guest support"
+	depends on BROKEN
+	help
+	  Say Y here to get to see options related to running Linux under
+	  various hypervisors.  This option alone does not add any kernel code.
+
+	  If you say N, all options in this submenu will be skipped and disabled.
+
+if PARAVIRT_GUEST
+
+config PARAVIRT
+	bool "Enable paravirtualization code"
+	depends on PARAVIRT_GUEST
+	default y
+	help
+	  This changes the kernel so it can modify itself when it is run
+	  under a hypervisor, potentially improving performance significantly
+	  over full virtualization.  However, when run without a hypervisor
+	  the kernel is theoretically slower and slightly larger.
+
+endif
+
 choice
 	prompt "System type"
 	default IA64_GENERIC
 
 config IA64_GENERIC
 	bool "generic"
-	select ACPI
 	select NUMA
 	select ACPI_NUMA
+	select SWIOTLB
+	select PCI_MSI
 	help
 	  This selects the system type of your hardware.  A "generic" kernel
 	  will run on any supported IA-64 system.  However, if you configure
@@ -86,15 +171,23 @@ config IA64_GENERIC
 
 	  generic		For any supported IA-64 system
 	  DIG-compliant		For DIG ("Developer's Interface Guide") compliant systems
+	  DIG+Intel+IOMMU	For DIG systems with Intel IOMMU
 	  HP-zx1/sx1000		For HP systems
 	  HP-zx1/sx1000+swiotlb	For HP systems with (broken) DMA-constrained devices.
 	  SGI-SN2		For SGI Altix systems
+	  SGI-UV		For SGI UV systems
 	  Ski-simulator		For the HP simulator <http://www.hpl.hp.com/research/linux/ski/>
 
 	  If you don't know what to do, choose "generic".
 
 config IA64_DIG
 	bool "DIG-compliant"
+	select SWIOTLB
+
+config IA64_DIG_VTD
+	bool "DIG+Intel+IOMMU"
+	select INTEL_IOMMU
+	select PCI_MSI
 
 config IA64_HP_ZX1
 	bool "HP-zx1/sx1000"
@@ -104,6 +197,7 @@ config IA64_HP_ZX1
 
 config IA64_HP_ZX1_SWIOTLB
 	bool "HP-zx1/sx1000 with software I/O TLB"
+	select SWIOTLB
 	help
 	  Build a kernel that runs on HP zx1 and sx1000 systems even when they
 	  have broken PCI devices which cannot DMA to full 32 bits.  Apart
@@ -113,6 +207,8 @@ config IA64_HP_ZX1_SWIOTLB
 
 config IA64_SGI_SN2
 	bool "SGI-SN2"
+	select NUMA
+	select ACPI_NUMA
 	help
 	  Selecting this option will optimize the kernel for use on sn2 based
 	  systems, but the resulting kernel binary will not run on other
@@ -120,8 +216,21 @@ config IA64_SGI_SN2
 	  to select this option.  If in doubt, select ia64 generic support
 	  instead.
 
+config IA64_SGI_UV
+	bool "SGI-UV"
+	select NUMA
+	select ACPI_NUMA
+	select SWIOTLB
+	help
+	  Selecting this option will optimize the kernel for use on UV based
+	  systems, but the resulting kernel binary will not run on other
+	  types of ia64 systems.  If you have an SGI UV system, it's safe
+	  to select this option.  If in doubt, select ia64 generic support
+	  instead.
+
 config IA64_HP_SIM
 	bool "Ski-simulator"
+	select SWIOTLB
 
 endchoice
 
@@ -189,7 +298,14 @@ config PGTABLE_4
 
 endchoice
 
+if IA64_HP_SIM
+config HZ
+	default 32
+endif
+
+if !IA64_HP_SIM
 source kernel/Kconfig.hz
+endif
 
 config IA64_BRL_EMU
 	bool
@@ -213,17 +329,6 @@ config IOSAPIC
 	depends on !IA64_HP_SIM
 	default y
 
-config IA64_SGI_SN_XP
-	tristate "Support communication between SGI SSIs"
-	depends on IA64_GENERIC || IA64_SGI_SN2
-	select IA64_UNCACHED_ALLOCATOR
-	help
-	  An SGI machine can be divided into multiple Single System
-	  Images which act independently of each other and have
-	  hardware based memory protection from the others.  Enabling
-	  this feature will allow for direct communication between SSIs
-	  based on a network adapter and DMA messaging.
-
 config FORCE_MAX_ZONEORDER
 	int "MAX_ORDER (11 - 17)"  if !HUGETLB_PAGE
 	range 11 17  if !HUGETLB_PAGE
@@ -243,16 +348,16 @@ config SMP
 	  single processor systems.  On a single processor system, the kernel
 	  will run faster if you say N here.
 
-	  See also the <file:Documentation/smp.txt> and the SMP-HOWTO
-	  available at <http://www.tldp.org/docs.html#howto>.
+	  See also the SMP-HOWTO available at
+	  <http://www.tldp.org/docs.html#howto>.
 
 	  If you don't know what to do here, say N.
 
 config NR_CPUS
-	int "Maximum number of CPUs (2-1024)"
-	range 2 1024
+	int "Maximum number of CPUs (2-4096)"
+	range 2 4096
 	depends on SMP
-	default "64"
+	default "4096"
 	help
 	  You should set this to the number of CPUs in your system, but
 	  keep in mind that a kernel compiled for, e.g., 2 CPUs will boot but
@@ -261,19 +366,23 @@ config NR_CPUS
 	  performance hit.
 
 config HOTPLUG_CPU
-	bool "Support for hot-pluggable CPUs (EXPERIMENTAL)"
-	depends on SMP && EXPERIMENTAL
-	select HOTPLUG
+	bool "Support for hot-pluggable CPUs"
+	depends on SMP
 	default n
 	---help---
 	  Say Y here to experiment with turning CPUs off and on.  CPUs
 	  can be controlled through /sys/devices/system/cpu/cpu#.
 	  Say N if you want to disable CPU hotplug.
 
+config ARCH_ENABLE_MEMORY_HOTPLUG
+	def_bool y
+
+config ARCH_ENABLE_MEMORY_HOTREMOVE
+	def_bool y
+
 config SCHED_SMT
 	bool "SMT scheduler support"
 	depends on SMP
-	default off
 	help
 	  Improves the CPU scheduler's decision making when dealing with
 	  Intel IA64 chips with MultiThreading at a cost of slightly increased
@@ -288,27 +397,17 @@ config PERMIT_BSP_REMOVE
 	support. 
 
 config FORCE_CPEI_RETARGET
-	bool "Force assumption that CPEI can be re-targetted"
+	bool "Force assumption that CPEI can be re-targeted"
 	depends on PERMIT_BSP_REMOVE
 	default n
 	---help---
-	Say Y if you need to force the assumption that CPEI can be re-targetted to
+	Say Y if you need to force the assumption that CPEI can be re-targeted to
 	any cpu in the system. This hint is available via ACPI 3.0 specifications.
 	Tiger4 systems are capable of re-directing CPEI to any CPU other than BSP.
 	This option it useful to enable this feature on older BIOS's as well.
 	You can also enable this by using boot command line option force_cpei=1.
 
-config PREEMPT
-	bool "Preemptible Kernel"
-        help
-          This option reduces the latency of the kernel when reacting to
-          real-time or interactive events by allowing a low priority process to
-          be preempted even if it is in kernel mode executing a system call.
-          This allows applications to run more reliably even when the system is
-          under load.
-
-          Say Y here if you are building a kernel for a desktop, embedded
-          or real-time system.  Say N if you are unsure.
+source "kernel/Kconfig.preempt"
 
 source "mm/Kconfig"
 
@@ -329,6 +428,7 @@ config ARCH_FLATMEM_ENABLE
 config ARCH_SPARSEMEM_ENABLE
 	def_bool y
 	depends on ARCH_DISCONTIGMEM_ENABLE
+	select SPARSEMEM_VMEMMAP_ENABLE
 
 config ARCH_DISCONTIGMEM_DEFAULT
 	def_bool y if (IA64_SGI_SN2 || IA64_GENERIC || IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB)
@@ -338,6 +438,7 @@ config NUMA
 	bool "NUMA support"
 	depends on !IA64_HP_SIM && !FLATMEM
 	default y if IA64_SGI_SN2
+	select ACPI_NUMA if ACPI
 	help
 	  Say Y to compile the kernel to support NUMA (Non-Uniform Memory
 	  Access).  This option is for configuring high-end multiprocessor
@@ -346,7 +447,7 @@ config NUMA
 config NODES_SHIFT
 	int "Max num nodes shift(3-10)"
 	range 3 10
-	default "8"
+	default "10"
 	depends on NEED_MULTIPLE_NODES
 	help
 	  This option specifies the maximum number of nodes in your SSI system.
@@ -371,22 +472,22 @@ config HOLES_IN_ZONE
 	default y if VIRTUAL_MEM_MAP
 
 config HAVE_ARCH_EARLY_PFN_TO_NID
+	def_bool NUMA && SPARSEMEM
+
+config HAVE_ARCH_NODEDATA_EXTENSION
 	def_bool y
-	depends on NEED_MULTIPLE_NODES
+	depends on NUMA
 
-config IA32_SUPPORT
-	bool "Support for Linux/x86 binaries"
-	help
-	  IA-64 processors can execute IA-32 (X86) instructions.  By
-	  saying Y here, the kernel will include IA-32 system call
-	  emulation support which makes it possible to transparently
-	  run IA-32 Linux binaries on an IA-64 Linux system.
-	  If in doubt, say Y.
+config USE_PERCPU_NUMA_NODE_ID
+	def_bool y
+	depends on NUMA
 
-config COMPAT
-	bool
-	depends on IA32_SUPPORT
-	default y
+config HAVE_MEMORYLESS_NODES
+	def_bool NUMA
+
+config ARCH_PROC_KCORE_TEXT
+	def_bool y
+	depends on PROC_KCORE
 
 config IA64_MCA_RECOVERY
 	tristate "MCA recovery from errors other than TLB."
@@ -410,27 +511,78 @@ config IA64_PALINFO
 	  To use this option, you have to ensure that the "/proc file system
 	  support" (CONFIG_PROC_FS) is enabled, too.
 
+config IA64_MC_ERR_INJECT
+	tristate "MC error injection support"
+	help
+	  Adds support for MC error injection. If enabled, the kernel 
+	  will provide a sysfs interface for user applications to
+	  call MC error injection PAL procedures to inject various errors.
+	  This is a useful tool for MCA testing.
+
+	  If you're unsure, do not select this option.
+
 config SGI_SN
 	def_bool y if (IA64_SGI_SN2 || IA64_GENERIC)
 
+config IA64_ESI
+	bool "ESI (Extensible SAL Interface) support"
+	help
+	  If you say Y here, support is built into the kernel to
+	  make ESI calls.  ESI calls are used to support vendor-specific
+	  firmware extensions, such as the ability to inject memory-errors
+	  for test-purposes.  If you're unsure, say N.
+
+config IA64_HP_AML_NFW
+	bool "Support ACPI AML calls to native firmware"
+	help
+	  This driver installs a global ACPI Operation Region handler for
+	  region 0xA1.  AML methods can use this OpRegion to call arbitrary
+	  native firmware functions.  The driver installs the OpRegion
+	  handler if there is an HPQ5001 device or if the user supplies
+	  the "force" module parameter, e.g., with the "aml_nfw.force"
+	  kernel command line option.
+
 source "drivers/sn/Kconfig"
 
+config KEXEC
+	bool "kexec system call"
+	depends on !IA64_HP_SIM && (!SMP || HOTPLUG_CPU)
+	help
+	  kexec is a system call that implements the ability to shutdown your
+	  current kernel, and to start another kernel.  It is like a reboot
+	  but it is independent of the system firmware.   And like a reboot
+	  you can start any kernel with it, not just Linux.
+
+	  The name comes from the similarity to the exec system call.
+
+	  It is an ongoing process to be certain the hardware in a machine
+	  is properly shutdown, so do not be surprised if this code does not
+	  initially work for you.  As of this writing the exact hardware
+	  interface is strongly in flux, so no good recommendation can be
+	  made.
+
+config CRASH_DUMP
+	  bool "kernel crash dumps"
+	  depends on IA64_MCA_RECOVERY && !IA64_HP_SIM && (!SMP || HOTPLUG_CPU)
+	  help
+	    Generate crash dump after being started by kexec.
+
 source "drivers/firmware/Kconfig"
 
 source "fs/Kconfig.binfmt"
 
 endmenu
 
-menu "Power management and ACPI"
+menu "Power management and ACPI options"
 
 source "kernel/power/Kconfig"
 
 source "drivers/acpi/Kconfig"
 
 if PM
-
-source "arch/ia64/kernel/cpufreq/Kconfig"
-
+menu "CPU Frequency scaling"
+source "drivers/cpufreq/Kconfig"
+endmenu
 endif
 
 endmenu
@@ -446,8 +598,12 @@ config PCI
 	  here unless you are using a simulator without PCI support.
 
 config PCI_DOMAINS
-	bool
-	default PCI
+	def_bool PCI
+
+config PCI_SYSCALL
+	def_bool PCI
+
+source "drivers/pci/pcie/Kconfig"
 
 source "drivers/pci/Kconfig"
 
@@ -463,46 +619,28 @@ source "net/Kconfig"
 
 source "drivers/Kconfig"
 
-source "fs/Kconfig"
-
-source "lib/Kconfig"
-
-#
-# Use the generic interrupt handling code in kernel/irq/:
-#
-config GENERIC_HARDIRQS
-	bool
-	default y
-
-config GENERIC_IRQ_PROBE
-	bool
-	default y
-
-config GENERIC_PENDING_IRQ
-	bool
-	depends on GENERIC_HARDIRQS && SMP
-	default y
-
 source "arch/ia64/hp/sim/Kconfig"
 
-menu "Instrumentation Support"
-        depends on EXPERIMENTAL
-
-source "arch/ia64/oprofile/Kconfig"
-
-config KPROBES
-	bool "Kprobes (EXPERIMENTAL)"
-	depends on EXPERIMENTAL && MODULES
+config MSPEC
+	tristate "Memory special operations driver"
+	depends on IA64
+	select IA64_UNCACHED_ALLOCATOR
 	help
-	  Kprobes allows you to trap at almost any kernel address and
-	  execute a callback function.  register_kprobe() establishes
-	  a probepoint and specifies the callback.  Kprobes is useful
-	  for kernel debugging, non-intrusive instrumentation and testing.
-	  If in doubt, say "N".
-endmenu
+	  If you have an ia64 and you want to enable memory special
+	  operations support (formerly known as fetchop), say Y here,
+	  otherwise say N.
+
+source "fs/Kconfig"
 
 source "arch/ia64/Kconfig.debug"
 
 source "security/Kconfig"
 
 source "crypto/Kconfig"
+
+source "arch/ia64/kvm/Kconfig"
+
+source "lib/Kconfig"
+
+config IOMMU_HELPER
+	def_bool (IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB || IA64_GENERIC || SWIOTLB)
diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile
index 80ea7506fa1..f37238f45bc 100644
--- a/arch/ia64/Makefile
+++ b/arch/ia64/Makefile
@@ -11,6 +11,8 @@
 # Copyright (C) 1998-2004 by David Mosberger-Tang <davidm@hpl.hp.com>
 #
 
+KBUILD_DEFCONFIG := generic_defconfig
+
 NM := $(CROSS_COMPILE)nm -B
 READELF := $(CROSS_COMPILE)readelf
 
@@ -20,16 +22,16 @@ CHECKFLAGS	+= -m64 -D__ia64=1 -D__ia64__=1 -D_LP64 -D__LP64__
 
 OBJCOPYFLAGS	:= --strip-all
 LDFLAGS_vmlinux	:= -static
-LDFLAGS_MODULE	+= -T $(srctree)/arch/ia64/module.lds
-AFLAGS_KERNEL	:= -mconstant-gp
+KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/ia64/module.lds
+KBUILD_AFLAGS_KERNEL := -mconstant-gp
 EXTRA		:=
 
 cflags-y	:= -pipe $(EXTRA) -ffixed-r13 -mfixed-range=f12-f15,f32-f127 \
 		   -falign-functions=32 -frename-registers -fno-optimize-sibling-calls
-CFLAGS_KERNEL	:= -mconstant-gp
+KBUILD_CFLAGS_KERNEL := -mconstant-gp
 
 GAS_STATUS	= $(shell $(srctree)/arch/ia64/scripts/check-gas "$(CC)" "$(OBJDUMP)")
-CPPFLAGS += $(shell $(srctree)/arch/ia64/scripts/toolchain-flags "$(CC)" "$(OBJDUMP)" "$(READELF)")
+KBUILD_CPPFLAGS += $(shell $(srctree)/arch/ia64/scripts/toolchain-flags "$(CC)" "$(OBJDUMP)" "$(READELF)")
 
 ifeq ($(GAS_STATUS),buggy)
 $(error Sorry, you need a newer version of the assember, one that is built from	\
@@ -39,28 +41,25 @@ $(error Sorry, you need a newer version of the assember, one that is built from
 		ftp://ftp.hpl.hp.com/pub/linux-ia64/gas-030124.tar.gz)
 endif
 
-ifeq ($(call cc-version),0304)
-	cflags-$(CONFIG_ITANIUM)	+= -mtune=merced
-	cflags-$(CONFIG_MCKINLEY)	+= -mtune=mckinley
-endif
-
-CFLAGS += $(cflags-y)
+KBUILD_CFLAGS += $(cflags-y)
 head-y := arch/ia64/kernel/head.o arch/ia64/kernel/init_task.o
 
 libs-y				+= arch/ia64/lib/
 core-y				+= arch/ia64/kernel/ arch/ia64/mm/
-core-$(CONFIG_IA32_SUPPORT)	+= arch/ia64/ia32/
 core-$(CONFIG_IA64_DIG) 	+= arch/ia64/dig/
+core-$(CONFIG_IA64_DIG_VTD) 	+= arch/ia64/dig/
 core-$(CONFIG_IA64_GENERIC) 	+= arch/ia64/dig/
 core-$(CONFIG_IA64_HP_ZX1)	+= arch/ia64/dig/
 core-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += arch/ia64/dig/
 core-$(CONFIG_IA64_SGI_SN2)	+= arch/ia64/sn/
+core-$(CONFIG_IA64_SGI_UV)	+= arch/ia64/uv/
+core-$(CONFIG_KVM) 		+= arch/ia64/kvm/
 
 drivers-$(CONFIG_PCI)		+= arch/ia64/pci/
 drivers-$(CONFIG_IA64_HP_SIM)	+= arch/ia64/hp/sim/
 drivers-$(CONFIG_IA64_HP_ZX1)	+= arch/ia64/hp/common/ arch/ia64/hp/zx1/
 drivers-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += arch/ia64/hp/common/ arch/ia64/hp/zx1/
-drivers-$(CONFIG_IA64_GENERIC)	+= arch/ia64/hp/common/ arch/ia64/hp/zx1/ arch/ia64/hp/sim/ arch/ia64/sn/
+drivers-$(CONFIG_IA64_GENERIC)	+= arch/ia64/hp/common/ arch/ia64/hp/zx1/ arch/ia64/hp/sim/ arch/ia64/sn/ arch/ia64/uv/
 drivers-$(CONFIG_OPROFILE)	+= arch/ia64/oprofile/
 
 boot := arch/ia64/hp/sim/boot
@@ -71,11 +70,13 @@ all: compressed unwcheck
 
 compressed: vmlinux.gz
 
+vmlinuz: vmlinux.gz
+
 vmlinux.gz: vmlinux
 	$(Q)$(MAKE) $(build)=$(boot) $@
 
 unwcheck: vmlinux
-	-$(Q)READELF=$(READELF) $(srctree)/arch/ia64/scripts/unwcheck.py $<
+	-$(Q)READELF=$(READELF) python $(srctree)/arch/ia64/scripts/unwcheck.py $<
 
 archclean:
 	$(Q)$(MAKE) $(clean)=$(boot)
@@ -94,3 +95,9 @@ define archhelp
   echo '  boot		- Build vmlinux and bootloader for Ski simulator'
   echo '* unwcheck	- Check vmlinux for invalid unwind info'
 endef
+
+archprepare: make_nr_irqs_h FORCE
+PHONY += make_nr_irqs_h FORCE
+
+make_nr_irqs_h: FORCE
+	$(Q)$(MAKE) $(build)=arch/ia64/kernel include/generated/nr-irqs.h
diff --git a/arch/ia64/configs/bigsur_defconfig b/arch/ia64/configs/bigsur_defconfig
index 90e9c2e61bf..4c4ac163c60 100644
--- a/arch/ia64/configs/bigsur_defconfig
+++ b/arch/ia64/configs/bigsur_defconfig
@@ -1,1360 +1,116 @@
-#
-# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.16-rc5
-# Mon Feb 27 16:10:42 2006
-#
-
-#
-# Code maturity level options
-#
 CONFIG_EXPERIMENTAL=y
-CONFIG_LOCK_KERNEL=y
-CONFIG_INIT_ENV_ARG_LIMIT=32
-
-#
-# General setup
-#
-CONFIG_LOCALVERSION=""
-CONFIG_LOCALVERSION_AUTO=y
-CONFIG_SWAP=y
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
-# CONFIG_BSD_PROCESS_ACCT is not set
-CONFIG_SYSCTL=y
-# CONFIG_AUDIT is not set
-# CONFIG_IKCONFIG is not set
-# CONFIG_CPUSETS is not set
-CONFIG_INITRAMFS_SOURCE=""
-CONFIG_CC_OPTIMIZE_FOR_SIZE=y
-# CONFIG_EMBEDDED is not set
-CONFIG_KALLSYMS=y
-# CONFIG_KALLSYMS_ALL is not set
-# CONFIG_KALLSYMS_EXTRA_PASS is not set
-CONFIG_HOTPLUG=y
-CONFIG_PRINTK=y
-CONFIG_BUG=y
-CONFIG_ELF_CORE=y
-CONFIG_BASE_FULL=y
-CONFIG_FUTEX=y
-CONFIG_EPOLL=y
-CONFIG_SHMEM=y
-CONFIG_CC_ALIGN_FUNCTIONS=0
-CONFIG_CC_ALIGN_LABELS=0
-CONFIG_CC_ALIGN_LOOPS=0
-CONFIG_CC_ALIGN_JUMPS=0
-CONFIG_SLAB=y
-# CONFIG_TINY_SHMEM is not set
-CONFIG_BASE_SMALL=0
-# CONFIG_SLOB is not set
-
-#
-# Loadable module support
-#
+CONFIG_LOG_BUF_SHIFT=16
+CONFIG_PROFILING=y
+CONFIG_OPROFILE=y
 CONFIG_MODULES=y
 CONFIG_MODULE_UNLOAD=y
-# CONFIG_MODULE_FORCE_UNLOAD is not set
-CONFIG_OBSOLETE_MODPARM=y
-# CONFIG_MODVERSIONS is not set
-# CONFIG_MODULE_SRCVERSION_ALL is not set
-CONFIG_KMOD=y
-CONFIG_STOP_MACHINE=y
-
-#
-# Block layer
-#
-
-#
-# IO Schedulers
-#
-CONFIG_IOSCHED_NOOP=y
-CONFIG_IOSCHED_AS=y
-CONFIG_IOSCHED_DEADLINE=y
-CONFIG_IOSCHED_CFQ=y
-CONFIG_DEFAULT_AS=y
-# CONFIG_DEFAULT_DEADLINE is not set
-# CONFIG_DEFAULT_CFQ is not set
-# CONFIG_DEFAULT_NOOP is not set
-CONFIG_DEFAULT_IOSCHED="anticipatory"
-
-#
-# Processor type and features
-#
-CONFIG_IA64=y
-CONFIG_64BIT=y
-CONFIG_MMU=y
-CONFIG_SWIOTLB=y
-CONFIG_RWSEM_XCHGADD_ALGORITHM=y
-CONFIG_GENERIC_CALIBRATE_DELAY=y
-CONFIG_TIME_INTERPOLATION=y
-CONFIG_EFI=y
-CONFIG_GENERIC_IOMAP=y
-CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
-CONFIG_DMA_IS_DMA32=y
-# CONFIG_IA64_GENERIC is not set
 CONFIG_IA64_DIG=y
-# CONFIG_IA64_HP_ZX1 is not set
-# CONFIG_IA64_HP_ZX1_SWIOTLB is not set
-# CONFIG_IA64_SGI_SN2 is not set
-# CONFIG_IA64_HP_SIM is not set
-CONFIG_ITANIUM=y
-# CONFIG_MCKINLEY is not set
-# CONFIG_IA64_PAGE_SIZE_4KB is not set
-# CONFIG_IA64_PAGE_SIZE_8KB is not set
-CONFIG_IA64_PAGE_SIZE_16KB=y
-# CONFIG_IA64_PAGE_SIZE_64KB is not set
-CONFIG_PGTABLE_3=y
-# CONFIG_PGTABLE_4 is not set
-# CONFIG_HZ_100 is not set
-CONFIG_HZ_250=y
-# CONFIG_HZ_1000 is not set
-CONFIG_HZ=250
-CONFIG_IA64_BRL_EMU=y
-CONFIG_IA64_L1_CACHE_SHIFT=6
-# CONFIG_IA64_CYCLONE is not set
-CONFIG_IOSAPIC=y
-CONFIG_FORCE_MAX_ZONEORDER=17
 CONFIG_SMP=y
 CONFIG_NR_CPUS=2
-# CONFIG_HOTPLUG_CPU is not set
-# CONFIG_SCHED_SMT is not set
 CONFIG_PREEMPT=y
-CONFIG_SELECT_MEMORY_MODEL=y
-CONFIG_FLATMEM_MANUAL=y
-# CONFIG_DISCONTIGMEM_MANUAL is not set
-# CONFIG_SPARSEMEM_MANUAL is not set
-CONFIG_FLATMEM=y
-CONFIG_FLAT_NODE_MEM_MAP=y
-# CONFIG_SPARSEMEM_STATIC is not set
-CONFIG_SPLIT_PTLOCK_CPUS=4
-CONFIG_ARCH_SELECT_MEMORY_MODEL=y
-CONFIG_ARCH_DISCONTIGMEM_ENABLE=y
-CONFIG_ARCH_FLATMEM_ENABLE=y
-CONFIG_ARCH_SPARSEMEM_ENABLE=y
 # CONFIG_VIRTUAL_MEM_MAP is not set
-CONFIG_IA32_SUPPORT=y
-CONFIG_COMPAT=y
-# CONFIG_IA64_MCA_RECOVERY is not set
 CONFIG_PERFMON=y
 CONFIG_IA64_PALINFO=y
-
-#
-# Firmware Drivers
-#
 CONFIG_EFI_VARS=y
-CONFIG_EFI_PCDP=y
-CONFIG_BINFMT_ELF=y
 CONFIG_BINFMT_MISC=m
-
-#
-# Power management and ACPI
-#
-CONFIG_PM=y
-CONFIG_PM_LEGACY=y
-# CONFIG_PM_DEBUG is not set
-
-#
-# ACPI (Advanced Configuration and Power Interface) Support
-#
-CONFIG_ACPI=y
 CONFIG_ACPI_BUTTON=m
 CONFIG_ACPI_FAN=m
 CONFIG_ACPI_PROCESSOR=m
-CONFIG_ACPI_THERMAL=m
-CONFIG_ACPI_BLACKLIST_YEAR=0
-# CONFIG_ACPI_DEBUG is not set
-CONFIG_ACPI_EC=y
-CONFIG_ACPI_POWER=y
-CONFIG_ACPI_SYSTEM=y
-# CONFIG_ACPI_CONTAINER is not set
-
-#
-# CPU Frequency scaling
-#
-# CONFIG_CPU_FREQ is not set
-
-#
-# Bus options (PCI, PCMCIA)
-#
-CONFIG_PCI=y
-CONFIG_PCI_DOMAINS=y
-# CONFIG_PCI_MSI is not set
-CONFIG_PCI_LEGACY_PROC=y
-# CONFIG_PCI_DEBUG is not set
-
-#
-# PCI Hotplug Support
-#
-# CONFIG_HOTPLUG_PCI is not set
-
-#
-# PCCARD (PCMCIA/CardBus) support
-#
-# CONFIG_PCCARD is not set
-
-#
-# Networking
-#
 CONFIG_NET=y
-
-#
-# Networking options
-#
-# CONFIG_NETDEBUG is not set
 CONFIG_PACKET=y
-CONFIG_PACKET_MMAP=y
 CONFIG_UNIX=y
-# CONFIG_NET_KEY is not set
 CONFIG_INET=y
-# CONFIG_IP_MULTICAST is not set
-# CONFIG_IP_ADVANCED_ROUTER is not set
-CONFIG_IP_FIB_HASH=y
-# CONFIG_IP_PNP is not set
-# CONFIG_NET_IPIP is not set
-# CONFIG_NET_IPGRE is not set
-# CONFIG_ARPD is not set
-# CONFIG_SYN_COOKIES is not set
-# CONFIG_INET_AH is not set
-# CONFIG_INET_ESP is not set
-# CONFIG_INET_IPCOMP is not set
-# CONFIG_INET_TUNNEL is not set
-CONFIG_INET_DIAG=y
-CONFIG_INET_TCP_DIAG=y
-# CONFIG_TCP_CONG_ADVANCED is not set
-CONFIG_TCP_CONG_BIC=y
 # CONFIG_IPV6 is not set
-# CONFIG_NETFILTER is not set
-
-#
-# DCCP Configuration (EXPERIMENTAL)
-#
-# CONFIG_IP_DCCP is not set
-
-#
-# SCTP Configuration (EXPERIMENTAL)
-#
-# CONFIG_IP_SCTP is not set
-
-#
-# TIPC Configuration (EXPERIMENTAL)
-#
-# CONFIG_TIPC is not set
-# CONFIG_ATM is not set
-# CONFIG_BRIDGE is not set
-# CONFIG_VLAN_8021Q is not set
-# CONFIG_DECNET is not set
-# CONFIG_LLC2 is not set
-# CONFIG_IPX is not set
-# CONFIG_ATALK is not set
-# CONFIG_X25 is not set
-# CONFIG_LAPB is not set
-# CONFIG_NET_DIVERT is not set
-# CONFIG_ECONET is not set
-# CONFIG_WAN_ROUTER is not set
-
-#
-# QoS and/or fair queueing
-#
-# CONFIG_NET_SCHED is not set
-
-#
-# Network testing
-#
-# CONFIG_NET_PKTGEN is not set
-# CONFIG_HAMRADIO is not set
-# CONFIG_IRDA is not set
-# CONFIG_BT is not set
-# CONFIG_IEEE80211 is not set
-
-#
-# Device Drivers
-#
-
-#
-# Generic Driver Options
-#
-CONFIG_STANDALONE=y
-CONFIG_PREVENT_FIRMWARE_BUILD=y
-# CONFIG_FW_LOADER is not set
-# CONFIG_DEBUG_DRIVER is not set
-
-#
-# Connector - unified userspace <-> kernelspace linker
-#
-# CONFIG_CONNECTOR is not set
-
-#
-# Memory Technology Devices (MTD)
-#
-# CONFIG_MTD is not set
-
-#
-# Parallel port support
-#
-# CONFIG_PARPORT is not set
-
-#
-# Plug and Play support
-#
-CONFIG_PNP=y
-# CONFIG_PNP_DEBUG is not set
-
-#
-# Protocols
-#
-CONFIG_PNPACPI=y
-
-#
-# Block devices
-#
-# CONFIG_BLK_CPQ_DA is not set
-# CONFIG_BLK_CPQ_CISS_DA is not set
-# CONFIG_BLK_DEV_DAC960 is not set
-# CONFIG_BLK_DEV_UMEM is not set
-# CONFIG_BLK_DEV_COW_COMMON is not set
 CONFIG_BLK_DEV_LOOP=m
 CONFIG_BLK_DEV_CRYPTOLOOP=m
 CONFIG_BLK_DEV_NBD=m
-# CONFIG_BLK_DEV_SX8 is not set
-# CONFIG_BLK_DEV_UB is not set
 CONFIG_BLK_DEV_RAM=m
-CONFIG_BLK_DEV_RAM_COUNT=16
-CONFIG_BLK_DEV_RAM_SIZE=4096
-# CONFIG_CDROM_PKTCDVD is not set
-# CONFIG_ATA_OVER_ETH is not set
-
-#
-# ATA/ATAPI/MFM/RLL support
-#
 CONFIG_IDE=m
-CONFIG_IDE_MAX_HWIFS=4
-CONFIG_BLK_DEV_IDE=m
-
-#
-# Please see Documentation/ide.txt for help/info on IDE drives
-#
-# CONFIG_BLK_DEV_IDE_SATA is not set
-CONFIG_BLK_DEV_IDEDISK=m
-# CONFIG_IDEDISK_MULTI_MODE is not set
 CONFIG_BLK_DEV_IDECD=m
-# CONFIG_BLK_DEV_IDETAPE is not set
-CONFIG_BLK_DEV_IDEFLOPPY=m
-# CONFIG_BLK_DEV_IDESCSI is not set
-# CONFIG_IDE_TASK_IOCTL is not set
-
-#
-# IDE chipset support/bugfixes
-#
-# CONFIG_IDE_GENERIC is not set
-# CONFIG_BLK_DEV_IDEPNP is not set
-CONFIG_BLK_DEV_IDEPCI=y
-CONFIG_IDEPCI_SHARE_IRQ=y
-# CONFIG_BLK_DEV_OFFBOARD is not set
 CONFIG_BLK_DEV_GENERIC=m
-# CONFIG_BLK_DEV_OPTI621 is not set
-CONFIG_BLK_DEV_IDEDMA_PCI=y
-# CONFIG_BLK_DEV_IDEDMA_FORCED is not set
-CONFIG_IDEDMA_PCI_AUTO=y
-# CONFIG_IDEDMA_ONLYDISK is not set
-# CONFIG_BLK_DEV_AEC62XX is not set
-# CONFIG_BLK_DEV_ALI15X3 is not set
-# CONFIG_BLK_DEV_AMD74XX is not set
-# CONFIG_BLK_DEV_CMD64X is not set
-# CONFIG_BLK_DEV_TRIFLEX is not set
-# CONFIG_BLK_DEV_CY82C693 is not set
-# CONFIG_BLK_DEV_CS5520 is not set
-# CONFIG_BLK_DEV_CS5530 is not set
-# CONFIG_BLK_DEV_HPT34X is not set
-# CONFIG_BLK_DEV_HPT366 is not set
-# CONFIG_BLK_DEV_SC1200 is not set
 CONFIG_BLK_DEV_PIIX=m
-# CONFIG_BLK_DEV_IT821X is not set
-# CONFIG_BLK_DEV_NS87415 is not set
-# CONFIG_BLK_DEV_PDC202XX_OLD is not set
-# CONFIG_BLK_DEV_PDC202XX_NEW is not set
-# CONFIG_BLK_DEV_SVWKS is not set
-# CONFIG_BLK_DEV_SIIMAGE is not set
-# CONFIG_BLK_DEV_SLC90E66 is not set
-# CONFIG_BLK_DEV_TRM290 is not set
-# CONFIG_BLK_DEV_VIA82CXXX is not set
-# CONFIG_IDE_ARM is not set
-CONFIG_BLK_DEV_IDEDMA=y
-# CONFIG_IDEDMA_IVB is not set
-CONFIG_IDEDMA_AUTO=y
-# CONFIG_BLK_DEV_HD is not set
-
-#
-# SCSI device support
-#
-# CONFIG_RAID_ATTRS is not set
 CONFIG_SCSI=y
-CONFIG_SCSI_PROC_FS=y
-
-#
-# SCSI support type (disk, tape, CD-ROM)
-#
 CONFIG_BLK_DEV_SD=y
-# CONFIG_CHR_DEV_ST is not set
-# CONFIG_CHR_DEV_OSST is not set
-# CONFIG_BLK_DEV_SR is not set
-# CONFIG_CHR_DEV_SG is not set
-# CONFIG_CHR_DEV_SCH is not set
-
-#
-# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
-#
-# CONFIG_SCSI_MULTI_LUN is not set
 CONFIG_SCSI_CONSTANTS=y
 CONFIG_SCSI_LOGGING=y
-
-#
-# SCSI Transport Attributes
-#
 CONFIG_SCSI_SPI_ATTRS=m
-# CONFIG_SCSI_FC_ATTRS is not set
-# CONFIG_SCSI_ISCSI_ATTRS is not set
-# CONFIG_SCSI_SAS_ATTRS is not set
-
-#
-# SCSI low-level drivers
-#
-# CONFIG_ISCSI_TCP is not set
-# CONFIG_BLK_DEV_3W_XXXX_RAID is not set
-# CONFIG_SCSI_3W_9XXX is not set
-# CONFIG_SCSI_ACARD is not set
-# CONFIG_SCSI_AACRAID is not set
-# CONFIG_SCSI_AIC7XXX is not set
-# CONFIG_SCSI_AIC7XXX_OLD is not set
-# CONFIG_SCSI_AIC79XX is not set
-# CONFIG_MEGARAID_NEWGEN is not set
-# CONFIG_MEGARAID_LEGACY is not set
-# CONFIG_MEGARAID_SAS is not set
-# CONFIG_SCSI_SATA is not set
-# CONFIG_SCSI_DMX3191D is not set
-# CONFIG_SCSI_FUTURE_DOMAIN is not set
-# CONFIG_SCSI_IPS is not set
-# CONFIG_SCSI_INITIO is not set
-# CONFIG_SCSI_INIA100 is not set
-# CONFIG_SCSI_SYM53C8XX_2 is not set
-# CONFIG_SCSI_IPR is not set
-# CONFIG_SCSI_QLOGIC_FC is not set
 CONFIG_SCSI_QLOGIC_1280=y
-# CONFIG_SCSI_QLA_FC is not set
-# CONFIG_SCSI_LPFC is not set
-# CONFIG_SCSI_DC395x is not set
-# CONFIG_SCSI_DC390T is not set
-# CONFIG_SCSI_DEBUG is not set
-
-#
-# Multi-device support (RAID and LVM)
-#
 CONFIG_MD=y
 CONFIG_BLK_DEV_MD=m
 CONFIG_MD_LINEAR=m
 CONFIG_MD_RAID0=m
 CONFIG_MD_RAID1=m
 CONFIG_MD_RAID10=m
-CONFIG_MD_RAID5=m
-CONFIG_MD_RAID6=m
 CONFIG_MD_MULTIPATH=m
-# CONFIG_MD_FAULTY is not set
 CONFIG_BLK_DEV_DM=m
 CONFIG_DM_CRYPT=m
 CONFIG_DM_SNAPSHOT=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_ZERO=m
-# CONFIG_DM_MULTIPATH is not set
-
-#
-# Fusion MPT device support
-#
-# CONFIG_FUSION is not set
-# CONFIG_FUSION_SPI is not set
-# CONFIG_FUSION_FC is not set
-# CONFIG_FUSION_SAS is not set
-
-#
-# IEEE 1394 (FireWire) support
-#
-# CONFIG_IEEE1394 is not set
-
-#
-# I2O device support
-#
-# CONFIG_I2O is not set
-
-#
-# Network device support
-#
 CONFIG_NETDEVICES=y
 CONFIG_DUMMY=y
-# CONFIG_BONDING is not set
-# CONFIG_EQUALIZER is not set
-# CONFIG_TUN is not set
-# CONFIG_NET_SB1000 is not set
-
-#
-# ARCnet devices
-#
-# CONFIG_ARCNET is not set
-
-#
-# PHY device support
-#
-# CONFIG_PHYLIB is not set
-
-#
-# Ethernet (10 or 100Mbit)
-#
 CONFIG_NET_ETHERNET=y
 CONFIG_MII=y
-# CONFIG_HAPPYMEAL is not set
-# CONFIG_SUNGEM is not set
-# CONFIG_CASSINI is not set
-# CONFIG_NET_VENDOR_3COM is not set
-
-#
-# Tulip family network device support
-#
-# CONFIG_NET_TULIP is not set
-# CONFIG_HP100 is not set
 CONFIG_NET_PCI=y
-# CONFIG_PCNET32 is not set
-# CONFIG_AMD8111_ETH is not set
-# CONFIG_ADAPTEC_STARFIRE is not set
-# CONFIG_B44 is not set
-# CONFIG_FORCEDETH is not set
-# CONFIG_DGRS is not set
-CONFIG_EEPRO100=y
-# CONFIG_E100 is not set
-# CONFIG_FEALNX is not set
-# CONFIG_NATSEMI is not set
-# CONFIG_NE2K_PCI is not set
-# CONFIG_8139CP is not set
-# CONFIG_8139TOO is not set
-# CONFIG_SIS900 is not set
-# CONFIG_EPIC100 is not set
-# CONFIG_SUNDANCE is not set
-# CONFIG_VIA_RHINE is not set
-
-#
-# Ethernet (1000 Mbit)
-#
-# CONFIG_ACENIC is not set
-# CONFIG_DL2K is not set
-# CONFIG_E1000 is not set
-# CONFIG_NS83820 is not set
-# CONFIG_HAMACHI is not set
-# CONFIG_YELLOWFIN is not set
-# CONFIG_R8169 is not set
-# CONFIG_SIS190 is not set
-# CONFIG_SKGE is not set
-# CONFIG_SKY2 is not set
-# CONFIG_SK98LIN is not set
-# CONFIG_VIA_VELOCITY is not set
-# CONFIG_TIGON3 is not set
-# CONFIG_BNX2 is not set
-
-#
-# Ethernet (10000 Mbit)
-#
-# CONFIG_CHELSIO_T1 is not set
-# CONFIG_IXGB is not set
-# CONFIG_S2IO is not set
-
-#
-# Token Ring devices
-#
-# CONFIG_TR is not set
-
-#
-# Wireless LAN (non-hamradio)
-#
-# CONFIG_NET_RADIO is not set
-
-#
-# Wan interfaces
-#
-# CONFIG_WAN is not set
-# CONFIG_FDDI is not set
-# CONFIG_HIPPI is not set
-# CONFIG_PPP is not set
-# CONFIG_SLIP is not set
-# CONFIG_NET_FC is not set
-# CONFIG_SHAPER is not set
-# CONFIG_NETCONSOLE is not set
-# CONFIG_NETPOLL is not set
-# CONFIG_NET_POLL_CONTROLLER is not set
-
-#
-# ISDN subsystem
-#
-# CONFIG_ISDN is not set
-
-#
-# Telephony Support
-#
-# CONFIG_PHONE is not set
-
-#
-# Input device support
-#
-CONFIG_INPUT=y
-
-#
-# Userland interfaces
-#
-CONFIG_INPUT_MOUSEDEV=y
-CONFIG_INPUT_MOUSEDEV_PSAUX=y
-CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
-CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
-# CONFIG_INPUT_JOYDEV is not set
-# CONFIG_INPUT_TSDEV is not set
 CONFIG_INPUT_EVDEV=y
-# CONFIG_INPUT_EVBUG is not set
-
-#
-# Input Device Drivers
-#
-CONFIG_INPUT_KEYBOARD=y
-CONFIG_KEYBOARD_ATKBD=y
-# CONFIG_KEYBOARD_SUNKBD is not set
-# CONFIG_KEYBOARD_LKKBD is not set
-# CONFIG_KEYBOARD_XTKBD is not set
-# CONFIG_KEYBOARD_NEWTON is not set
-CONFIG_INPUT_MOUSE=y
-CONFIG_MOUSE_PS2=y
-# CONFIG_MOUSE_SERIAL is not set
-# CONFIG_MOUSE_VSXXXAA is not set
-# CONFIG_INPUT_JOYSTICK is not set
-# CONFIG_INPUT_TOUCHSCREEN is not set
-# CONFIG_INPUT_MISC is not set
-
-#
-# Hardware I/O ports
-#
-CONFIG_SERIO=y
-CONFIG_SERIO_I8042=y
-CONFIG_SERIO_SERPORT=y
-# CONFIG_SERIO_PCIPS2 is not set
-CONFIG_SERIO_LIBPS2=y
-# CONFIG_SERIO_RAW is not set
-# CONFIG_GAMEPORT is not set
-
-#
-# Character devices
-#
-CONFIG_VT=y
-CONFIG_VT_CONSOLE=y
-CONFIG_HW_CONSOLE=y
-# CONFIG_SERIAL_NONSTANDARD is not set
-
-#
-# Serial drivers
-#
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_SERIAL_8250_ACPI=y
-CONFIG_SERIAL_8250_NR_UARTS=4
-CONFIG_SERIAL_8250_RUNTIME_UARTS=4
 CONFIG_SERIAL_8250_EXTENDED=y
 CONFIG_SERIAL_8250_SHARE_IRQ=y
-# CONFIG_SERIAL_8250_DETECT_IRQ is not set
-# CONFIG_SERIAL_8250_RSA is not set
-
-#
-# Non-8250 serial port support
-#
-CONFIG_SERIAL_CORE=y
-CONFIG_SERIAL_CORE_CONSOLE=y
-# CONFIG_SERIAL_JSM is not set
-CONFIG_UNIX98_PTYS=y
-CONFIG_LEGACY_PTYS=y
-CONFIG_LEGACY_PTY_COUNT=256
-
-#
-# IPMI
-#
-# CONFIG_IPMI_HANDLER is not set
-
-#
-# Watchdog Cards
-#
-# CONFIG_WATCHDOG is not set
 # CONFIG_HW_RANDOM is not set
 CONFIG_EFI_RTC=y
-# CONFIG_DTLK is not set
-# CONFIG_R3964 is not set
-# CONFIG_APPLICOM is not set
-
-#
-# Ftape, the floppy tape device driver
-#
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
 CONFIG_AGP=m
 CONFIG_AGP_I460=m
 CONFIG_DRM=m
-# CONFIG_DRM_TDFX is not set
 CONFIG_DRM_R128=m
-# CONFIG_DRM_RADEON is not set
-# CONFIG_DRM_MGA is not set
-# CONFIG_DRM_SIS is not set
-# CONFIG_DRM_VIA is not set
-# CONFIG_DRM_SAVAGE is not set
-# CONFIG_RAW_DRIVER is not set
-# CONFIG_HPET is not set
-# CONFIG_HANGCHECK_TIMER is not set
-
-#
-# TPM devices
-#
-# CONFIG_TCG_TPM is not set
-# CONFIG_TELCLOCK is not set
-
-#
-# I2C support
-#
-CONFIG_I2C=y
-CONFIG_I2C_CHARDEV=y
-
-#
-# I2C Algorithms
-#
-CONFIG_I2C_ALGOBIT=y
-# CONFIG_I2C_ALGOPCF is not set
-# CONFIG_I2C_ALGOPCA is not set
-
-#
-# I2C Hardware Bus support
-#
-# CONFIG_I2C_ALI1535 is not set
-# CONFIG_I2C_ALI1563 is not set
-# CONFIG_I2C_ALI15X3 is not set
-# CONFIG_I2C_AMD756 is not set
-# CONFIG_I2C_AMD8111 is not set
-# CONFIG_I2C_I801 is not set
-# CONFIG_I2C_I810 is not set
-# CONFIG_I2C_PIIX4 is not set
-# CONFIG_I2C_NFORCE2 is not set
-# CONFIG_I2C_PARPORT_LIGHT is not set
-# CONFIG_I2C_PROSAVAGE is not set
-# CONFIG_I2C_SAVAGE4 is not set
-# CONFIG_SCx200_ACB is not set
-# CONFIG_I2C_SIS5595 is not set
-# CONFIG_I2C_SIS630 is not set
-# CONFIG_I2C_SIS96X is not set
-# CONFIG_I2C_STUB is not set
-# CONFIG_I2C_VIA is not set
-# CONFIG_I2C_VIAPRO is not set
-# CONFIG_I2C_VOODOO3 is not set
-# CONFIG_I2C_PCA_ISA is not set
-
-#
-# Miscellaneous I2C Chip support
-#
-# CONFIG_SENSORS_DS1337 is not set
-# CONFIG_SENSORS_DS1374 is not set
-# CONFIG_SENSORS_EEPROM is not set
-# CONFIG_SENSORS_PCF8574 is not set
-# CONFIG_SENSORS_PCA9539 is not set
-# CONFIG_SENSORS_PCF8591 is not set
-# CONFIG_SENSORS_RTC8564 is not set
-# CONFIG_SENSORS_MAX6875 is not set
-# CONFIG_RTC_X1205_I2C is not set
-# CONFIG_I2C_DEBUG_CORE is not set
-# CONFIG_I2C_DEBUG_ALGO is not set
-# CONFIG_I2C_DEBUG_BUS is not set
-# CONFIG_I2C_DEBUG_CHIP is not set
-
-#
-# SPI support
-#
-# CONFIG_SPI is not set
-# CONFIG_SPI_MASTER is not set
-
-#
-# Dallas's 1-wire bus
-#
-# CONFIG_W1 is not set
-
-#
-# Hardware Monitoring support
-#
-CONFIG_HWMON=y
-# CONFIG_HWMON_VID is not set
-# CONFIG_SENSORS_ADM1021 is not set
-# CONFIG_SENSORS_ADM1025 is not set
-# CONFIG_SENSORS_ADM1026 is not set
-# CONFIG_SENSORS_ADM1031 is not set
-# CONFIG_SENSORS_ADM9240 is not set
-# CONFIG_SENSORS_ASB100 is not set
-# CONFIG_SENSORS_ATXP1 is not set
-# CONFIG_SENSORS_DS1621 is not set
-# CONFIG_SENSORS_F71805F is not set
-# CONFIG_SENSORS_FSCHER is not set
-# CONFIG_SENSORS_FSCPOS is not set
-# CONFIG_SENSORS_GL518SM is not set
-# CONFIG_SENSORS_GL520SM is not set
-# CONFIG_SENSORS_IT87 is not set
-# CONFIG_SENSORS_LM63 is not set
-# CONFIG_SENSORS_LM75 is not set
-# CONFIG_SENSORS_LM77 is not set
-# CONFIG_SENSORS_LM78 is not set
-# CONFIG_SENSORS_LM80 is not set
-# CONFIG_SENSORS_LM83 is not set
-# CONFIG_SENSORS_LM85 is not set
-# CONFIG_SENSORS_LM87 is not set
-# CONFIG_SENSORS_LM90 is not set
-# CONFIG_SENSORS_LM92 is not set
-# CONFIG_SENSORS_MAX1619 is not set
-# CONFIG_SENSORS_PC87360 is not set
-# CONFIG_SENSORS_SIS5595 is not set
-# CONFIG_SENSORS_SMSC47M1 is not set
-# CONFIG_SENSORS_SMSC47B397 is not set
-# CONFIG_SENSORS_VIA686A is not set
-# CONFIG_SENSORS_VT8231 is not set
-# CONFIG_SENSORS_W83781D is not set
-# CONFIG_SENSORS_W83792D is not set
-# CONFIG_SENSORS_W83L785TS is not set
-# CONFIG_SENSORS_W83627HF is not set
-# CONFIG_SENSORS_W83627EHF is not set
-# CONFIG_HWMON_DEBUG_CHIP is not set
-
-#
-# Misc devices
-#
-
-#
-# Multimedia Capabilities Port drivers
-#
-
-#
-# Multimedia devices
-#
-# CONFIG_VIDEO_DEV is not set
-
-#
-# Digital Video Broadcasting Devices
-#
-# CONFIG_DVB is not set
-
-#
-# Graphics support
-#
-# CONFIG_FB is not set
-
-#
-# Console display driver support
-#
-CONFIG_VGA_CONSOLE=y
-CONFIG_DUMMY_CONSOLE=y
-
-#
-# Sound
-#
 CONFIG_SOUND=m
-
-#
-# Advanced Linux Sound Architecture
-#
 CONFIG_SND=m
-CONFIG_SND_TIMER=m
-CONFIG_SND_PCM=m
-CONFIG_SND_HWDEP=m
-CONFIG_SND_RAWMIDI=m
 CONFIG_SND_SEQUENCER=m
-# CONFIG_SND_SEQ_DUMMY is not set
-CONFIG_SND_OSSEMUL=y
 CONFIG_SND_MIXER_OSS=m
 CONFIG_SND_PCM_OSS=m
-# CONFIG_SND_SEQUENCER_OSS is not set
-# CONFIG_SND_DYNAMIC_MINORS is not set
-CONFIG_SND_SUPPORT_OLD_API=y
-# CONFIG_SND_VERBOSE_PRINTK is not set
-# CONFIG_SND_DEBUG is not set
-
-#
-# Generic devices
-#
-CONFIG_SND_OPL3_LIB=m
-CONFIG_SND_AC97_CODEC=m
-CONFIG_SND_AC97_BUS=m
-# CONFIG_SND_DUMMY is not set
-# CONFIG_SND_VIRMIDI is not set
-# CONFIG_SND_MTPAV is not set
-# CONFIG_SND_SERIAL_U16550 is not set
-# CONFIG_SND_MPU401 is not set
-
-#
-# PCI devices
-#
-# CONFIG_SND_AD1889 is not set
-# CONFIG_SND_ALI5451 is not set
-# CONFIG_SND_ATIIXP is not set
-# CONFIG_SND_ATIIXP_MODEM is not set
-# CONFIG_SND_AU8810 is not set
-# CONFIG_SND_AU8820 is not set
-# CONFIG_SND_AU8830 is not set
-# CONFIG_SND_AZT3328 is not set
-# CONFIG_SND_BT87X is not set
-# CONFIG_SND_CA0106 is not set
-# CONFIG_SND_CMIPCI is not set
 CONFIG_SND_CS4281=m
-# CONFIG_SND_CS46XX is not set
-# CONFIG_SND_EMU10K1 is not set
-# CONFIG_SND_EMU10K1X is not set
-# CONFIG_SND_ENS1370 is not set
-# CONFIG_SND_ENS1371 is not set
-# CONFIG_SND_ES1938 is not set
-# CONFIG_SND_ES1968 is not set
-# CONFIG_SND_FM801 is not set
-# CONFIG_SND_HDA_INTEL is not set
-# CONFIG_SND_HDSP is not set
-# CONFIG_SND_HDSPM is not set
-# CONFIG_SND_ICE1712 is not set
-# CONFIG_SND_ICE1724 is not set
-# CONFIG_SND_INTEL8X0 is not set
-# CONFIG_SND_INTEL8X0M is not set
-# CONFIG_SND_KORG1212 is not set
-# CONFIG_SND_MAESTRO3 is not set
-# CONFIG_SND_MIXART is not set
-# CONFIG_SND_NM256 is not set
-# CONFIG_SND_PCXHR is not set
-# CONFIG_SND_RME32 is not set
-# CONFIG_SND_RME96 is not set
-# CONFIG_SND_RME9652 is not set
-# CONFIG_SND_SONICVIBES is not set
-# CONFIG_SND_TRIDENT is not set
-# CONFIG_SND_VIA82XX is not set
-# CONFIG_SND_VIA82XX_MODEM is not set
-# CONFIG_SND_VX222 is not set
-# CONFIG_SND_YMFPCI is not set
-
-#
-# USB devices
-#
-# CONFIG_SND_USB_AUDIO is not set
-
-#
-# Open Sound System
-#
-# CONFIG_SOUND_PRIME is not set
-
-#
-# USB support
-#
-CONFIG_USB_ARCH_HAS_HCD=y
-CONFIG_USB_ARCH_HAS_OHCI=y
+CONFIG_USB_HIDDEV=y
 CONFIG_USB=m
-# CONFIG_USB_DEBUG is not set
-
-#
-# Miscellaneous USB options
-#
-CONFIG_USB_DEVICEFS=y
-# CONFIG_USB_BANDWIDTH is not set
-# CONFIG_USB_DYNAMIC_MINORS is not set
-# CONFIG_USB_SUSPEND is not set
-# CONFIG_USB_OTG is not set
-
-#
-# USB Host Controller Drivers
-#
-# CONFIG_USB_EHCI_HCD is not set
-# CONFIG_USB_ISP116X_HCD is not set
-# CONFIG_USB_OHCI_HCD is not set
+CONFIG_USB_MON=m
 CONFIG_USB_UHCI_HCD=m
-# CONFIG_USB_SL811_HCD is not set
-
-#
-# USB Device Class drivers
-#
-# CONFIG_OBSOLETE_OSS_USB_DRIVER is not set
 CONFIG_USB_ACM=m
 CONFIG_USB_PRINTER=m
-
-#
-# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
-#
-
-#
-# may also be needed; see USB_STORAGE Help for more information
-#
 CONFIG_USB_STORAGE=m
-# CONFIG_USB_STORAGE_DEBUG is not set
-# CONFIG_USB_STORAGE_DATAFAB is not set
-# CONFIG_USB_STORAGE_FREECOM is not set
-# CONFIG_USB_STORAGE_ISD200 is not set
-# CONFIG_USB_STORAGE_DPCM is not set
-# CONFIG_USB_STORAGE_USBAT is not set
-# CONFIG_USB_STORAGE_SDDR09 is not set
-# CONFIG_USB_STORAGE_SDDR55 is not set
-# CONFIG_USB_STORAGE_JUMPSHOT is not set
-# CONFIG_USB_STORAGE_ALAUDA is not set
-# CONFIG_USB_LIBUSUAL is not set
-
-#
-# USB Input Devices
-#
-CONFIG_USB_HID=m
-CONFIG_USB_HIDINPUT=y
-# CONFIG_USB_HIDINPUT_POWERBOOK is not set
-# CONFIG_HID_FF is not set
-CONFIG_USB_HIDDEV=y
-
-#
-# USB HID Boot Protocol drivers
-#
-# CONFIG_USB_KBD is not set
-# CONFIG_USB_MOUSE is not set
-# CONFIG_USB_AIPTEK is not set
-# CONFIG_USB_WACOM is not set
-# CONFIG_USB_ACECAD is not set
-# CONFIG_USB_KBTAB is not set
-# CONFIG_USB_POWERMATE is not set
-# CONFIG_USB_MTOUCH is not set
-# CONFIG_USB_ITMTOUCH is not set
-# CONFIG_USB_EGALAX is not set
-# CONFIG_USB_YEALINK is not set
-# CONFIG_USB_XPAD is not set
-# CONFIG_USB_ATI_REMOTE is not set
-# CONFIG_USB_ATI_REMOTE2 is not set
-# CONFIG_USB_KEYSPAN_REMOTE is not set
-# CONFIG_USB_APPLETOUCH is not set
-
-#
-# USB Imaging devices
-#
-# CONFIG_USB_MDC800 is not set
-# CONFIG_USB_MICROTEK is not set
-
-#
-# USB Multimedia devices
-#
-# CONFIG_USB_DABUSB is not set
-
-#
-# Video4Linux support is needed for USB Multimedia device support
-#
-
-#
-# USB Network Adapters
-#
-# CONFIG_USB_CATC is not set
-# CONFIG_USB_KAWETH is not set
-# CONFIG_USB_PEGASUS is not set
-# CONFIG_USB_RTL8150 is not set
-# CONFIG_USB_USBNET is not set
-CONFIG_USB_MON=y
-
-#
-# USB port drivers
-#
-
-#
-# USB Serial Converter support
-#
-# CONFIG_USB_SERIAL is not set
-
-#
-# USB Miscellaneous drivers
-#
-# CONFIG_USB_EMI62 is not set
-# CONFIG_USB_EMI26 is not set
-# CONFIG_USB_AUERSWALD is not set
-# CONFIG_USB_RIO500 is not set
-# CONFIG_USB_LEGOTOWER is not set
-# CONFIG_USB_LCD is not set
-# CONFIG_USB_LED is not set
-# CONFIG_USB_CYTHERM is not set
-# CONFIG_USB_PHIDGETKIT is not set
-# CONFIG_USB_PHIDGETSERVO is not set
-# CONFIG_USB_IDMOUSE is not set
-# CONFIG_USB_LD is not set
-# CONFIG_USB_TEST is not set
-
-#
-# USB DSL modem support
-#
-
-#
-# USB Gadget Support
-#
-# CONFIG_USB_GADGET is not set
-
-#
-# MMC/SD Card support
-#
-# CONFIG_MMC is not set
-
-#
-# InfiniBand support
-#
-# CONFIG_INFINIBAND is not set
-
-#
-# EDAC - error detection and reporting (RAS)
-#
-
-#
-# File systems
-#
 CONFIG_EXT2_FS=y
-# CONFIG_EXT2_FS_XATTR is not set
-# CONFIG_EXT2_FS_XIP is not set
 CONFIG_EXT3_FS=y
-CONFIG_EXT3_FS_XATTR=y
-# CONFIG_EXT3_FS_POSIX_ACL is not set
-# CONFIG_EXT3_FS_SECURITY is not set
-CONFIG_JBD=y
-# CONFIG_JBD_DEBUG is not set
-CONFIG_FS_MBCACHE=y
-# CONFIG_REISERFS_FS is not set
-# CONFIG_JFS_FS is not set
-CONFIG_FS_POSIX_ACL=y
 CONFIG_XFS_FS=y
-CONFIG_XFS_EXPORT=y
 CONFIG_XFS_QUOTA=y
-CONFIG_XFS_SECURITY=y
 CONFIG_XFS_POSIX_ACL=y
-# CONFIG_XFS_RT is not set
-# CONFIG_OCFS2_FS is not set
-# CONFIG_MINIX_FS is not set
-# CONFIG_ROMFS_FS is not set
-CONFIG_INOTIFY=y
-# CONFIG_QUOTA is not set
-CONFIG_QUOTACTL=y
-CONFIG_DNOTIFY=y
 CONFIG_AUTOFS_FS=m
 CONFIG_AUTOFS4_FS=m
-# CONFIG_FUSE_FS is not set
-
-#
-# CD-ROM/DVD Filesystems
-#
 CONFIG_ISO9660_FS=m
 CONFIG_JOLIET=y
-# CONFIG_ZISOFS is not set
 CONFIG_UDF_FS=m
-CONFIG_UDF_NLS=y
-
-#
-# DOS/FAT/NT Filesystems
-#
-CONFIG_FAT_FS=y
-# CONFIG_MSDOS_FS is not set
 CONFIG_VFAT_FS=y
-CONFIG_FAT_DEFAULT_CODEPAGE=437
-CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
-# CONFIG_NTFS_FS is not set
-
-#
-# Pseudo filesystems
-#
-CONFIG_PROC_FS=y
 CONFIG_PROC_KCORE=y
-CONFIG_SYSFS=y
 CONFIG_TMPFS=y
 CONFIG_HUGETLBFS=y
-CONFIG_HUGETLB_PAGE=y
-CONFIG_RAMFS=y
-# CONFIG_RELAYFS_FS is not set
-# CONFIG_CONFIGFS_FS is not set
-
-#
-# Miscellaneous filesystems
-#
-# CONFIG_ADFS_FS is not set
-# CONFIG_AFFS_FS is not set
-# CONFIG_HFS_FS is not set
-# CONFIG_HFSPLUS_FS is not set
-# CONFIG_BEFS_FS is not set
-# CONFIG_BFS_FS is not set
-# CONFIG_EFS_FS is not set
-# CONFIG_CRAMFS is not set
-# CONFIG_VXFS_FS is not set
-# CONFIG_HPFS_FS is not set
-# CONFIG_QNX4FS_FS is not set
-# CONFIG_SYSV_FS is not set
-# CONFIG_UFS_FS is not set
-
-#
-# Network File Systems
-#
 CONFIG_NFS_FS=m
 CONFIG_NFS_V3=y
-# CONFIG_NFS_V3_ACL is not set
 CONFIG_NFS_V4=y
-# CONFIG_NFS_DIRECTIO is not set
 CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
-# CONFIG_NFSD_V3_ACL is not set
 CONFIG_NFSD_V4=y
-CONFIG_NFSD_TCP=y
-CONFIG_LOCKD=m
-CONFIG_LOCKD_V4=y
-CONFIG_EXPORTFS=y
-CONFIG_NFS_COMMON=y
-CONFIG_SUNRPC=m
-CONFIG_SUNRPC_GSS=m
-CONFIG_RPCSEC_GSS_KRB5=m
-# CONFIG_RPCSEC_GSS_SPKM3 is not set
-# CONFIG_SMB_FS is not set
 CONFIG_CIFS=m
 CONFIG_CIFS_STATS=y
-# CONFIG_CIFS_STATS2 is not set
 CONFIG_CIFS_XATTR=y
 CONFIG_CIFS_POSIX=y
-# CONFIG_CIFS_EXPERIMENTAL is not set
-# CONFIG_NCP_FS is not set
-# CONFIG_CODA_FS is not set
-# CONFIG_AFS_FS is not set
-# CONFIG_9P_FS is not set
-
-#
-# Partition Types
-#
 CONFIG_PARTITION_ADVANCED=y
-# CONFIG_ACORN_PARTITION is not set
-# CONFIG_OSF_PARTITION is not set
-# CONFIG_AMIGA_PARTITION is not set
-# CONFIG_ATARI_PARTITION is not set
-# CONFIG_MAC_PARTITION is not set
-CONFIG_MSDOS_PARTITION=y
-# CONFIG_BSD_DISKLABEL is not set
-# CONFIG_MINIX_SUBPARTITION is not set
-# CONFIG_SOLARIS_X86_PARTITION is not set
-# CONFIG_UNIXWARE_DISKLABEL is not set
-# CONFIG_LDM_PARTITION is not set
 CONFIG_SGI_PARTITION=y
-# CONFIG_ULTRIX_PARTITION is not set
-# CONFIG_SUN_PARTITION is not set
-# CONFIG_KARMA_PARTITION is not set
 CONFIG_EFI_PARTITION=y
-
-#
-# Native Language Support
-#
-CONFIG_NLS=y
-CONFIG_NLS_DEFAULT="iso8859-1"
 CONFIG_NLS_CODEPAGE_437=y
-# CONFIG_NLS_CODEPAGE_737 is not set
-# CONFIG_NLS_CODEPAGE_775 is not set
-# CONFIG_NLS_CODEPAGE_850 is not set
-# CONFIG_NLS_CODEPAGE_852 is not set
-# CONFIG_NLS_CODEPAGE_855 is not set
-# CONFIG_NLS_CODEPAGE_857 is not set
-# CONFIG_NLS_CODEPAGE_860 is not set
-# CONFIG_NLS_CODEPAGE_861 is not set
-# CONFIG_NLS_CODEPAGE_862 is not set
-# CONFIG_NLS_CODEPAGE_863 is not set
-# CONFIG_NLS_CODEPAGE_864 is not set
-# CONFIG_NLS_CODEPAGE_865 is not set
-# CONFIG_NLS_CODEPAGE_866 is not set
-# CONFIG_NLS_CODEPAGE_869 is not set
-# CONFIG_NLS_CODEPAGE_936 is not set
-# CONFIG_NLS_CODEPAGE_950 is not set
-# CONFIG_NLS_CODEPAGE_932 is not set
-# CONFIG_NLS_CODEPAGE_949 is not set
-# CONFIG_NLS_CODEPAGE_874 is not set
-# CONFIG_NLS_ISO8859_8 is not set
-# CONFIG_NLS_CODEPAGE_1250 is not set
-# CONFIG_NLS_CODEPAGE_1251 is not set
-# CONFIG_NLS_ASCII is not set
 CONFIG_NLS_ISO8859_1=y
-# CONFIG_NLS_ISO8859_2 is not set
-# CONFIG_NLS_ISO8859_3 is not set
-# CONFIG_NLS_ISO8859_4 is not set
-# CONFIG_NLS_ISO8859_5 is not set
-# CONFIG_NLS_ISO8859_6 is not set
-# CONFIG_NLS_ISO8859_7 is not set
-# CONFIG_NLS_ISO8859_9 is not set
-# CONFIG_NLS_ISO8859_13 is not set
-# CONFIG_NLS_ISO8859_14 is not set
-# CONFIG_NLS_ISO8859_15 is not set
-# CONFIG_NLS_KOI8_R is not set
-# CONFIG_NLS_KOI8_U is not set
 CONFIG_NLS_UTF8=m
-
-#
-# Library routines
-#
-# CONFIG_CRC_CCITT is not set
-# CONFIG_CRC16 is not set
-CONFIG_CRC32=y
-# CONFIG_LIBCRC32C is not set
-CONFIG_GENERIC_HARDIRQS=y
-CONFIG_GENERIC_IRQ_PROBE=y
-CONFIG_GENERIC_PENDING_IRQ=y
-
-#
-# Instrumentation Support
-#
-CONFIG_PROFILING=y
-CONFIG_OPROFILE=y
-# CONFIG_KPROBES is not set
-
-#
-# Kernel hacking
-#
-# CONFIG_PRINTK_TIME is not set
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_KERNEL=y
-CONFIG_LOG_BUF_SHIFT=16
-CONFIG_DETECT_SOFTLOCKUP=y
-# CONFIG_SCHEDSTATS is not set
-# CONFIG_DEBUG_SLAB is not set
-CONFIG_DEBUG_PREEMPT=y
 CONFIG_DEBUG_MUTEXES=y
-# CONFIG_DEBUG_SPINLOCK is not set
-# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
-# CONFIG_DEBUG_KOBJECT is not set
-# CONFIG_DEBUG_INFO is not set
-# CONFIG_DEBUG_FS is not set
-# CONFIG_DEBUG_VM is not set
-CONFIG_FORCED_INLINING=y
-# CONFIG_RCU_TORTURE_TEST is not set
-# CONFIG_IA64_GRANULE_16MB is not set
-CONFIG_IA64_GRANULE_64MB=y
-# CONFIG_IA64_PRINT_HAZARDS is not set
-# CONFIG_DISABLE_VHPT is not set
-# CONFIG_IA64_DEBUG_CMPXCHG is not set
-# CONFIG_IA64_DEBUG_IRQ is not set
-CONFIG_SYSVIPC_COMPAT=y
-
-#
-# Security options
-#
-# CONFIG_KEYS is not set
-# CONFIG_SECURITY is not set
-
-#
-# Cryptographic options
-#
-CONFIG_CRYPTO=y
-# CONFIG_CRYPTO_HMAC is not set
-# CONFIG_CRYPTO_NULL is not set
-# CONFIG_CRYPTO_MD4 is not set
 CONFIG_CRYPTO_MD5=y
-# CONFIG_CRYPTO_SHA1 is not set
-# CONFIG_CRYPTO_SHA256 is not set
-# CONFIG_CRYPTO_SHA512 is not set
-# CONFIG_CRYPTO_WP512 is not set
-# CONFIG_CRYPTO_TGR192 is not set
 CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_BLOWFISH is not set
-# CONFIG_CRYPTO_TWOFISH is not set
-# CONFIG_CRYPTO_SERPENT is not set
-# CONFIG_CRYPTO_AES is not set
-# CONFIG_CRYPTO_CAST5 is not set
-# CONFIG_CRYPTO_CAST6 is not set
-# CONFIG_CRYPTO_TEA is not set
-# CONFIG_CRYPTO_ARC4 is not set
-# CONFIG_CRYPTO_KHAZAD is not set
-# CONFIG_CRYPTO_ANUBIS is not set
-# CONFIG_CRYPTO_DEFLATE is not set
-# CONFIG_CRYPTO_MICHAEL_MIC is not set
-# CONFIG_CRYPTO_CRC32C is not set
-# CONFIG_CRYPTO_TEST is not set
-
-#
-# Hardware crypto devices
-#
diff --git a/arch/ia64/configs/generic_defconfig b/arch/ia64/configs/generic_defconfig
new file mode 100644
index 00000000000..e8ed3ae70aa
--- /dev/null
+++ b/arch/ia64/configs/generic_defconfig
@@ -0,0 +1,235 @@
+CONFIG_EXPERIMENTAL=y
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=20
+CONFIG_CGROUPS=y
+CONFIG_CPUSETS=y
+CONFIG_SYSFS_DEPRECATED_V2=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_KALLSYMS_ALL=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_MCKINLEY=y
+CONFIG_IA64_PAGE_SIZE_64KB=y
+CONFIG_IA64_CYCLONE=y
+CONFIG_SMP=y
+CONFIG_HOTPLUG_CPU=y
+CONFIG_IA64_MCA_RECOVERY=y
+CONFIG_PERFMON=y
+CONFIG_IA64_PALINFO=y
+CONFIG_KEXEC=y
+CONFIG_CRASH_DUMP=y
+CONFIG_EFI_VARS=y
+CONFIG_BINFMT_MISC=m
+CONFIG_ACPI_BUTTON=m
+CONFIG_ACPI_FAN=m
+CONFIG_ACPI_DOCK=y
+CONFIG_ACPI_PROCESSOR=m
+CONFIG_ACPI_CONTAINER=y
+CONFIG_HOTPLUG_PCI=y
+CONFIG_HOTPLUG_PCI_ACPI=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_ARPD=y
+CONFIG_SYN_COOKIES=y
+# CONFIG_IPV6 is not set
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_CONNECTOR=y
+# CONFIG_PNP_DEBUG_MESSAGES is not set
+CONFIG_BLK_DEV_LOOP=m
+CONFIG_BLK_DEV_CRYPTOLOOP=m
+CONFIG_BLK_DEV_NBD=m
+CONFIG_BLK_DEV_RAM=y
+CONFIG_SGI_IOC4=y
+CONFIG_SGI_XP=m
+CONFIG_IDE=y
+CONFIG_BLK_DEV_IDECD=y
+CONFIG_BLK_DEV_GENERIC=y
+CONFIG_BLK_DEV_CMD64X=y
+CONFIG_BLK_DEV_PIIX=y
+CONFIG_BLK_DEV_SGIIOC4=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_ST=m
+CONFIG_BLK_DEV_SR=m
+CONFIG_CHR_DEV_SG=m
+CONFIG_SCSI_FC_ATTRS=y
+CONFIG_SCSI_SYM53C8XX_2=y
+CONFIG_SCSI_QLOGIC_1280=y
+CONFIG_ATA=y
+CONFIG_ATA_PIIX=y
+CONFIG_SATA_VITESSE=y
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=m
+CONFIG_MD_LINEAR=m
+CONFIG_MD_RAID0=m
+CONFIG_MD_RAID1=m
+CONFIG_MD_MULTIPATH=m
+CONFIG_BLK_DEV_DM=m
+CONFIG_DM_CRYPT=m
+CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_MIRROR=m
+CONFIG_DM_ZERO=m
+CONFIG_DM_MULTIPATH=m
+CONFIG_FUSION=y
+CONFIG_FUSION_SPI=y
+CONFIG_FUSION_FC=m
+CONFIG_FUSION_SAS=y
+CONFIG_NETDEVICES=y
+CONFIG_DUMMY=m
+CONFIG_NET_ETHERNET=y
+CONFIG_NET_TULIP=y
+CONFIG_TULIP=m
+CONFIG_NET_PCI=y
+CONFIG_NET_VENDOR_INTEL=y
+CONFIG_E100=m
+CONFIG_E1000=y
+CONFIG_IGB=y
+CONFIG_TIGON3=y
+CONFIG_NETCONSOLE=y
+# CONFIG_SERIO_SERPORT is not set
+CONFIG_GAMEPORT=m
+CONFIG_SERIAL_NONSTANDARD=y
+CONFIG_SGI_SNSC=y
+CONFIG_SGI_TIOCX=y
+CONFIG_SGI_MBCS=m
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=6
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_SERIAL_SGI_L1_CONSOLE=y
+CONFIG_SERIAL_SGI_IOC4=y
+# CONFIG_HW_RANDOM is not set
+CONFIG_EFI_RTC=y
+CONFIG_RAW_DRIVER=m
+CONFIG_HPET=y
+CONFIG_AGP=m
+CONFIG_AGP_I460=m
+CONFIG_AGP_HP_ZX1=m
+CONFIG_AGP_SGI_TIOCA=m
+CONFIG_DRM=m
+CONFIG_DRM_TDFX=m
+CONFIG_DRM_R128=m
+CONFIG_DRM_RADEON=m
+CONFIG_DRM_MGA=m
+CONFIG_DRM_SIS=m
+CONFIG_SOUND=m
+CONFIG_SND=m
+CONFIG_SND_SEQUENCER=m
+CONFIG_SND_SEQ_DUMMY=m
+CONFIG_SND_MIXER_OSS=m
+CONFIG_SND_PCM_OSS=m
+CONFIG_SND_SEQUENCER_OSS=y
+CONFIG_SND_VERBOSE_PRINTK=y
+CONFIG_SND_DUMMY=m
+CONFIG_SND_VIRMIDI=m
+CONFIG_SND_MTPAV=m
+CONFIG_SND_SERIAL_U16550=m
+CONFIG_SND_MPU401=m
+CONFIG_SND_CS4281=m
+CONFIG_SND_CS46XX=m
+CONFIG_SND_EMU10K1=m
+CONFIG_SND_FM801=m
+CONFIG_HID_GYRATION=m
+CONFIG_HID_PANTHERLORD=m
+CONFIG_HID_PETALYNX=m
+CONFIG_HID_SAMSUNG=m
+CONFIG_HID_SONY=m
+CONFIG_HID_SUNPLUS=m
+CONFIG_USB=m
+CONFIG_USB_MON=m
+CONFIG_USB_EHCI_HCD=m
+CONFIG_USB_OHCI_HCD=m
+CONFIG_USB_UHCI_HCD=m
+CONFIG_USB_STORAGE=m
+CONFIG_INFINIBAND=m
+CONFIG_INFINIBAND_MTHCA=m
+CONFIG_INFINIBAND_IPOIB=m
+CONFIG_MSPEC=m
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_EXT2_FS_POSIX_ACL=y
+CONFIG_EXT2_FS_SECURITY=y
+CONFIG_EXT3_FS=y
+CONFIG_EXT3_FS_POSIX_ACL=y
+CONFIG_EXT3_FS_SECURITY=y
+CONFIG_REISERFS_FS=y
+CONFIG_REISERFS_FS_XATTR=y
+CONFIG_REISERFS_FS_POSIX_ACL=y
+CONFIG_REISERFS_FS_SECURITY=y
+CONFIG_XFS_FS=y
+CONFIG_AUTOFS_FS=m
+CONFIG_AUTOFS4_FS=m
+CONFIG_ISO9660_FS=m
+CONFIG_JOLIET=y
+CONFIG_UDF_FS=m
+CONFIG_VFAT_FS=y
+CONFIG_NTFS_FS=m
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_HUGETLBFS=y
+CONFIG_NFS_FS=m
+CONFIG_NFS_V3=y
+CONFIG_NFS_V4=y
+CONFIG_NFSD=m
+CONFIG_NFSD_V4=y
+CONFIG_SMB_FS=m
+CONFIG_SMB_NLS_DEFAULT=y
+CONFIG_CIFS=m
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_SGI_PARTITION=y
+CONFIG_EFI_PARTITION=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_CODEPAGE_737=m
+CONFIG_NLS_CODEPAGE_775=m
+CONFIG_NLS_CODEPAGE_850=m
+CONFIG_NLS_CODEPAGE_852=m
+CONFIG_NLS_CODEPAGE_855=m
+CONFIG_NLS_CODEPAGE_857=m
+CONFIG_NLS_CODEPAGE_860=m
+CONFIG_NLS_CODEPAGE_861=m
+CONFIG_NLS_CODEPAGE_862=m
+CONFIG_NLS_CODEPAGE_863=m
+CONFIG_NLS_CODEPAGE_864=m
+CONFIG_NLS_CODEPAGE_865=m
+CONFIG_NLS_CODEPAGE_866=m
+CONFIG_NLS_CODEPAGE_869=m
+CONFIG_NLS_CODEPAGE_936=m
+CONFIG_NLS_CODEPAGE_950=m
+CONFIG_NLS_CODEPAGE_932=m
+CONFIG_NLS_CODEPAGE_949=m
+CONFIG_NLS_CODEPAGE_874=m
+CONFIG_NLS_ISO8859_8=m
+CONFIG_NLS_CODEPAGE_1250=m
+CONFIG_NLS_CODEPAGE_1251=m
+CONFIG_NLS_ISO8859_1=y
+CONFIG_NLS_ISO8859_2=m
+CONFIG_NLS_ISO8859_3=m
+CONFIG_NLS_ISO8859_4=m
+CONFIG_NLS_ISO8859_5=m
+CONFIG_NLS_ISO8859_6=m
+CONFIG_NLS_ISO8859_7=m
+CONFIG_NLS_ISO8859_9=m
+CONFIG_NLS_ISO8859_13=m
+CONFIG_NLS_ISO8859_14=m
+CONFIG_NLS_ISO8859_15=m
+CONFIG_NLS_KOI8_R=m
+CONFIG_NLS_KOI8_U=m
+CONFIG_NLS_UTF8=m
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_MUTEXES=y
+# CONFIG_RCU_CPU_STALL_DETECTOR is not set
+CONFIG_SYSCTL_SYSCALL_CHECK=y
+CONFIG_CRYPTO_ECB=m
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_MD5=y
+# CONFIG_CRYPTO_ANSI_CPRNG is not set
+CONFIG_CRC_T10DIF=y
+CONFIG_INTEL_IOMMU=y
diff --git a/arch/ia64/configs/gensparse_defconfig b/arch/ia64/configs/gensparse_defconfig
index 0d29aa2066b..d663efd1e4d 100644
--- a/arch/ia64/configs/gensparse_defconfig
+++ b/arch/ia64/configs/gensparse_defconfig
@@ -1,773 +1,103 @@
-#
-# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.16-rc5
-# Thu Mar  2 16:39:10 2006
-#
-
-#
-# Code maturity level options
-#
 CONFIG_EXPERIMENTAL=y
-CONFIG_LOCK_KERNEL=y
-CONFIG_INIT_ENV_ARG_LIMIT=32
-
-#
-# General setup
-#
-CONFIG_LOCALVERSION=""
-CONFIG_LOCALVERSION_AUTO=y
-CONFIG_SWAP=y
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
-# CONFIG_BSD_PROCESS_ACCT is not set
-CONFIG_SYSCTL=y
-# CONFIG_AUDIT is not set
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
-# CONFIG_CPUSETS is not set
-CONFIG_INITRAMFS_SOURCE=""
-CONFIG_CC_OPTIMIZE_FOR_SIZE=y
-# CONFIG_EMBEDDED is not set
-CONFIG_KALLSYMS=y
+CONFIG_LOG_BUF_SHIFT=20
+CONFIG_BLK_DEV_INITRD=y
 CONFIG_KALLSYMS_ALL=y
-# CONFIG_KALLSYMS_EXTRA_PASS is not set
-CONFIG_HOTPLUG=y
-CONFIG_PRINTK=y
-CONFIG_BUG=y
-CONFIG_ELF_CORE=y
-CONFIG_BASE_FULL=y
-CONFIG_FUTEX=y
-CONFIG_EPOLL=y
-CONFIG_SHMEM=y
-CONFIG_CC_ALIGN_FUNCTIONS=0
-CONFIG_CC_ALIGN_LABELS=0
-CONFIG_CC_ALIGN_LOOPS=0
-CONFIG_CC_ALIGN_JUMPS=0
-CONFIG_SLAB=y
-# CONFIG_TINY_SHMEM is not set
-CONFIG_BASE_SMALL=0
-# CONFIG_SLOB is not set
-
-#
-# Loadable module support
-#
 CONFIG_MODULES=y
 CONFIG_MODULE_UNLOAD=y
-# CONFIG_MODULE_FORCE_UNLOAD is not set
-CONFIG_OBSOLETE_MODPARM=y
 CONFIG_MODVERSIONS=y
-# CONFIG_MODULE_SRCVERSION_ALL is not set
-CONFIG_KMOD=y
-CONFIG_STOP_MACHINE=y
-
-#
-# Block layer
-#
-
-#
-# IO Schedulers
-#
-CONFIG_IOSCHED_NOOP=y
-CONFIG_IOSCHED_AS=y
-CONFIG_IOSCHED_DEADLINE=y
-CONFIG_IOSCHED_CFQ=y
-CONFIG_DEFAULT_AS=y
-# CONFIG_DEFAULT_DEADLINE is not set
-# CONFIG_DEFAULT_CFQ is not set
-# CONFIG_DEFAULT_NOOP is not set
-CONFIG_DEFAULT_IOSCHED="anticipatory"
-
-#
-# Processor type and features
-#
-CONFIG_IA64=y
-CONFIG_64BIT=y
-CONFIG_MMU=y
-CONFIG_SWIOTLB=y
-CONFIG_RWSEM_XCHGADD_ALGORITHM=y
-CONFIG_GENERIC_CALIBRATE_DELAY=y
-CONFIG_TIME_INTERPOLATION=y
-CONFIG_EFI=y
-CONFIG_GENERIC_IOMAP=y
-CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
-CONFIG_DMA_IS_DMA32=y
-CONFIG_IA64_GENERIC=y
-# CONFIG_IA64_DIG is not set
-# CONFIG_IA64_HP_ZX1 is not set
-# CONFIG_IA64_HP_ZX1_SWIOTLB is not set
-# CONFIG_IA64_SGI_SN2 is not set
-# CONFIG_IA64_HP_SIM is not set
-# CONFIG_ITANIUM is not set
 CONFIG_MCKINLEY=y
-# CONFIG_IA64_PAGE_SIZE_4KB is not set
-# CONFIG_IA64_PAGE_SIZE_8KB is not set
-CONFIG_IA64_PAGE_SIZE_16KB=y
-# CONFIG_IA64_PAGE_SIZE_64KB is not set
-CONFIG_PGTABLE_3=y
-# CONFIG_PGTABLE_4 is not set
-# CONFIG_HZ_100 is not set
-CONFIG_HZ_250=y
-# CONFIG_HZ_1000 is not set
-CONFIG_HZ=250
-CONFIG_IA64_L1_CACHE_SHIFT=7
 CONFIG_IA64_CYCLONE=y
-CONFIG_IOSAPIC=y
-# CONFIG_IA64_SGI_SN_XP is not set
-CONFIG_FORCE_MAX_ZONEORDER=17
 CONFIG_SMP=y
 CONFIG_NR_CPUS=512
-CONFIG_IA64_NR_NODES=256
 CONFIG_HOTPLUG_CPU=y
-# CONFIG_SCHED_SMT is not set
-# CONFIG_PREEMPT is not set
-CONFIG_SELECT_MEMORY_MODEL=y
-# CONFIG_FLATMEM_MANUAL is not set
-# CONFIG_DISCONTIGMEM_MANUAL is not set
 CONFIG_SPARSEMEM_MANUAL=y
-CONFIG_SPARSEMEM=y
-CONFIG_NEED_MULTIPLE_NODES=y
-CONFIG_HAVE_MEMORY_PRESENT=y
-# CONFIG_SPARSEMEM_STATIC is not set
-CONFIG_SPARSEMEM_EXTREME=y
-# CONFIG_MEMORY_HOTPLUG is not set
-CONFIG_SPLIT_PTLOCK_CPUS=4
-CONFIG_MIGRATION=y
-CONFIG_ARCH_SELECT_MEMORY_MODEL=y
-CONFIG_ARCH_DISCONTIGMEM_ENABLE=y
-CONFIG_ARCH_FLATMEM_ENABLE=y
-CONFIG_ARCH_SPARSEMEM_ENABLE=y
-CONFIG_ARCH_DISCONTIGMEM_DEFAULT=y
-CONFIG_NUMA=y
-CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y
-CONFIG_IA32_SUPPORT=y
-CONFIG_COMPAT=y
 CONFIG_IA64_MCA_RECOVERY=y
 CONFIG_PERFMON=y
 CONFIG_IA64_PALINFO=y
-CONFIG_SGI_SN=y
-
-#
-# Firmware Drivers
-#
+CONFIG_SGI_IOC3=y
 CONFIG_EFI_VARS=y
-CONFIG_EFI_PCDP=y
-CONFIG_BINFMT_ELF=y
 CONFIG_BINFMT_MISC=m
-
-#
-# Power management and ACPI
-#
-CONFIG_PM=y
-CONFIG_PM_LEGACY=y
-# CONFIG_PM_DEBUG is not set
-
-#
-# ACPI (Advanced Configuration and Power Interface) Support
-#
-CONFIG_ACPI=y
 CONFIG_ACPI_BUTTON=m
 CONFIG_ACPI_FAN=m
 CONFIG_ACPI_PROCESSOR=m
-CONFIG_ACPI_HOTPLUG_CPU=y
-CONFIG_ACPI_THERMAL=m
-CONFIG_ACPI_NUMA=y
-CONFIG_ACPI_BLACKLIST_YEAR=0
-# CONFIG_ACPI_DEBUG is not set
-CONFIG_ACPI_EC=y
-CONFIG_ACPI_POWER=y
-CONFIG_ACPI_SYSTEM=y
 CONFIG_ACPI_CONTAINER=m
-
-#
-# CPU Frequency scaling
-#
-# CONFIG_CPU_FREQ is not set
-
-#
-# Bus options (PCI, PCMCIA)
-#
-CONFIG_PCI=y
-CONFIG_PCI_DOMAINS=y
-# CONFIG_PCI_MSI is not set
-CONFIG_PCI_LEGACY_PROC=y
-# CONFIG_PCI_DEBUG is not set
-
-#
-# PCI Hotplug Support
-#
-CONFIG_HOTPLUG_PCI=m
-# CONFIG_HOTPLUG_PCI_FAKE is not set
+CONFIG_HOTPLUG_PCI=y
 CONFIG_HOTPLUG_PCI_ACPI=m
-# CONFIG_HOTPLUG_PCI_ACPI_IBM is not set
-# CONFIG_HOTPLUG_PCI_CPCI is not set
-# CONFIG_HOTPLUG_PCI_SHPC is not set
-# CONFIG_HOTPLUG_PCI_SGI is not set
-
-#
-# PCCARD (PCMCIA/CardBus) support
-#
-# CONFIG_PCCARD is not set
-
-#
-# Networking
-#
-CONFIG_NET=y
-
-#
-# Networking options
-#
-# CONFIG_NETDEBUG is not set
 CONFIG_PACKET=y
-# CONFIG_PACKET_MMAP is not set
 CONFIG_UNIX=y
-# CONFIG_NET_KEY is not set
 CONFIG_INET=y
 CONFIG_IP_MULTICAST=y
-# CONFIG_IP_ADVANCED_ROUTER is not set
-CONFIG_IP_FIB_HASH=y
-# CONFIG_IP_PNP is not set
-# CONFIG_NET_IPIP is not set
-# CONFIG_NET_IPGRE is not set
-# CONFIG_IP_MROUTE is not set
 CONFIG_ARPD=y
 CONFIG_SYN_COOKIES=y
-# CONFIG_INET_AH is not set
-# CONFIG_INET_ESP is not set
-# CONFIG_INET_IPCOMP is not set
-# CONFIG_INET_TUNNEL is not set
-CONFIG_INET_DIAG=y
-CONFIG_INET_TCP_DIAG=y
-# CONFIG_TCP_CONG_ADVANCED is not set
-CONFIG_TCP_CONG_BIC=y
 # CONFIG_IPV6 is not set
-# CONFIG_NETFILTER is not set
-
-#
-# DCCP Configuration (EXPERIMENTAL)
-#
-# CONFIG_IP_DCCP is not set
-
-#
-# SCTP Configuration (EXPERIMENTAL)
-#
-# CONFIG_IP_SCTP is not set
-
-#
-# TIPC Configuration (EXPERIMENTAL)
-#
-# CONFIG_TIPC is not set
-# CONFIG_ATM is not set
-# CONFIG_BRIDGE is not set
-# CONFIG_VLAN_8021Q is not set
-# CONFIG_DECNET is not set
-# CONFIG_LLC2 is not set
-# CONFIG_IPX is not set
-# CONFIG_ATALK is not set
-# CONFIG_X25 is not set
-# CONFIG_LAPB is not set
-# CONFIG_NET_DIVERT is not set
-# CONFIG_ECONET is not set
-# CONFIG_WAN_ROUTER is not set
-
-#
-# QoS and/or fair queueing
-#
-# CONFIG_NET_SCHED is not set
-
-#
-# Network testing
-#
-# CONFIG_NET_PKTGEN is not set
-# CONFIG_HAMRADIO is not set
-# CONFIG_IRDA is not set
-# CONFIG_BT is not set
-# CONFIG_IEEE80211 is not set
-
-#
-# Device Drivers
-#
-
-#
-# Generic Driver Options
-#
-CONFIG_STANDALONE=y
-CONFIG_PREVENT_FIRMWARE_BUILD=y
-CONFIG_FW_LOADER=m
-# CONFIG_DEBUG_DRIVER is not set
-
-#
-# Connector - unified userspace <-> kernelspace linker
-#
-# CONFIG_CONNECTOR is not set
-
-#
-# Memory Technology Devices (MTD)
-#
-# CONFIG_MTD is not set
-
-#
-# Parallel port support
-#
-# CONFIG_PARPORT is not set
-
-#
-# Plug and Play support
-#
-CONFIG_PNP=y
-# CONFIG_PNP_DEBUG is not set
-
-#
-# Protocols
-#
-CONFIG_PNPACPI=y
-
-#
-# Block devices
-#
-# CONFIG_BLK_CPQ_DA is not set
-# CONFIG_BLK_CPQ_CISS_DA is not set
-# CONFIG_BLK_DEV_DAC960 is not set
-# CONFIG_BLK_DEV_UMEM is not set
-# CONFIG_BLK_DEV_COW_COMMON is not set
 CONFIG_BLK_DEV_LOOP=m
 CONFIG_BLK_DEV_CRYPTOLOOP=m
 CONFIG_BLK_DEV_NBD=m
-# CONFIG_BLK_DEV_SX8 is not set
-# CONFIG_BLK_DEV_UB is not set
 CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_COUNT=16
-CONFIG_BLK_DEV_RAM_SIZE=4096
-CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CDROM_PKTCDVD is not set
-# CONFIG_ATA_OVER_ETH is not set
-
-#
-# ATA/ATAPI/MFM/RLL support
-#
+CONFIG_SGI_IOC4=y
 CONFIG_IDE=y
-CONFIG_IDE_MAX_HWIFS=4
-CONFIG_BLK_DEV_IDE=y
-
-#
-# Please see Documentation/ide.txt for help/info on IDE drives
-#
-# CONFIG_BLK_DEV_IDE_SATA is not set
-CONFIG_BLK_DEV_IDEDISK=y
-# CONFIG_IDEDISK_MULTI_MODE is not set
 CONFIG_BLK_DEV_IDECD=y
-# CONFIG_BLK_DEV_IDETAPE is not set
-CONFIG_BLK_DEV_IDEFLOPPY=y
-CONFIG_BLK_DEV_IDESCSI=m
-# CONFIG_IDE_TASK_IOCTL is not set
-
-#
-# IDE chipset support/bugfixes
-#
 CONFIG_IDE_GENERIC=y
-# CONFIG_BLK_DEV_IDEPNP is not set
-CONFIG_BLK_DEV_IDEPCI=y
-# CONFIG_IDEPCI_SHARE_IRQ is not set
-# CONFIG_BLK_DEV_OFFBOARD is not set
 CONFIG_BLK_DEV_GENERIC=y
-# CONFIG_BLK_DEV_OPTI621 is not set
-CONFIG_BLK_DEV_IDEDMA_PCI=y
-# CONFIG_BLK_DEV_IDEDMA_FORCED is not set
-CONFIG_IDEDMA_PCI_AUTO=y
-# CONFIG_IDEDMA_ONLYDISK is not set
-# CONFIG_BLK_DEV_AEC62XX is not set
-# CONFIG_BLK_DEV_ALI15X3 is not set
-# CONFIG_BLK_DEV_AMD74XX is not set
 CONFIG_BLK_DEV_CMD64X=y
-# CONFIG_BLK_DEV_TRIFLEX is not set
-# CONFIG_BLK_DEV_CY82C693 is not set
-# CONFIG_BLK_DEV_CS5520 is not set
-# CONFIG_BLK_DEV_CS5530 is not set
-# CONFIG_BLK_DEV_HPT34X is not set
-# CONFIG_BLK_DEV_HPT366 is not set
-# CONFIG_BLK_DEV_SC1200 is not set
 CONFIG_BLK_DEV_PIIX=y
-# CONFIG_BLK_DEV_IT821X is not set
-# CONFIG_BLK_DEV_NS87415 is not set
-# CONFIG_BLK_DEV_PDC202XX_OLD is not set
-# CONFIG_BLK_DEV_PDC202XX_NEW is not set
-# CONFIG_BLK_DEV_SVWKS is not set
 CONFIG_BLK_DEV_SGIIOC4=y
-# CONFIG_BLK_DEV_SIIMAGE is not set
-# CONFIG_BLK_DEV_SLC90E66 is not set
-# CONFIG_BLK_DEV_TRM290 is not set
-# CONFIG_BLK_DEV_VIA82CXXX is not set
-# CONFIG_IDE_ARM is not set
-CONFIG_BLK_DEV_IDEDMA=y
-# CONFIG_IDEDMA_IVB is not set
-CONFIG_IDEDMA_AUTO=y
-# CONFIG_BLK_DEV_HD is not set
-
-#
-# SCSI device support
-#
-# CONFIG_RAID_ATTRS is not set
 CONFIG_SCSI=y
-CONFIG_SCSI_PROC_FS=y
-
-#
-# SCSI support type (disk, tape, CD-ROM)
-#
 CONFIG_BLK_DEV_SD=y
 CONFIG_CHR_DEV_ST=m
-# CONFIG_CHR_DEV_OSST is not set
 CONFIG_BLK_DEV_SR=m
-# CONFIG_BLK_DEV_SR_VENDOR is not set
 CONFIG_CHR_DEV_SG=m
-# CONFIG_CHR_DEV_SCH is not set
-
-#
-# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
-#
-# CONFIG_SCSI_MULTI_LUN is not set
-# CONFIG_SCSI_CONSTANTS is not set
-# CONFIG_SCSI_LOGGING is not set
-
-#
-# SCSI Transport Attributes
-#
-CONFIG_SCSI_SPI_ATTRS=y
 CONFIG_SCSI_FC_ATTRS=y
-# CONFIG_SCSI_ISCSI_ATTRS is not set
-# CONFIG_SCSI_SAS_ATTRS is not set
-
-#
-# SCSI low-level drivers
-#
-# CONFIG_ISCSI_TCP is not set
-# CONFIG_BLK_DEV_3W_XXXX_RAID is not set
-# CONFIG_SCSI_3W_9XXX is not set
-# CONFIG_SCSI_ACARD is not set
-# CONFIG_SCSI_AACRAID is not set
-# CONFIG_SCSI_AIC7XXX is not set
-# CONFIG_SCSI_AIC7XXX_OLD is not set
-# CONFIG_SCSI_AIC79XX is not set
-# CONFIG_MEGARAID_NEWGEN is not set
-# CONFIG_MEGARAID_LEGACY is not set
-# CONFIG_MEGARAID_SAS is not set
-CONFIG_SCSI_SATA=y
-# CONFIG_SCSI_SATA_AHCI is not set
-# CONFIG_SCSI_SATA_SVW is not set
-# CONFIG_SCSI_ATA_PIIX is not set
-# CONFIG_SCSI_SATA_MV is not set
-# CONFIG_SCSI_SATA_NV is not set
-# CONFIG_SCSI_PDC_ADMA is not set
-# CONFIG_SCSI_SATA_QSTOR is not set
-# CONFIG_SCSI_SATA_PROMISE is not set
-# CONFIG_SCSI_SATA_SX4 is not set
-# CONFIG_SCSI_SATA_SIL is not set
-# CONFIG_SCSI_SATA_SIL24 is not set
-# CONFIG_SCSI_SATA_SIS is not set
-# CONFIG_SCSI_SATA_ULI is not set
-# CONFIG_SCSI_SATA_VIA is not set
-CONFIG_SCSI_SATA_VITESSE=y
-# CONFIG_SCSI_DMX3191D is not set
-# CONFIG_SCSI_FUTURE_DOMAIN is not set
-# CONFIG_SCSI_IPS is not set
-# CONFIG_SCSI_INITIO is not set
-# CONFIG_SCSI_INIA100 is not set
 CONFIG_SCSI_SYM53C8XX_2=y
-CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1
-CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16
-CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64
-# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set
-# CONFIG_SCSI_IPR is not set
-# CONFIG_SCSI_QLOGIC_FC is not set
 CONFIG_SCSI_QLOGIC_1280=y
-# CONFIG_SCSI_QLA_FC is not set
-# CONFIG_SCSI_LPFC is not set
-# CONFIG_SCSI_DC395x is not set
-# CONFIG_SCSI_DC390T is not set
-# CONFIG_SCSI_DEBUG is not set
-
-#
-# Multi-device support (RAID and LVM)
-#
 CONFIG_MD=y
 CONFIG_BLK_DEV_MD=m
 CONFIG_MD_LINEAR=m
 CONFIG_MD_RAID0=m
 CONFIG_MD_RAID1=m
-# CONFIG_MD_RAID10 is not set
-CONFIG_MD_RAID5=m
-CONFIG_MD_RAID6=m
 CONFIG_MD_MULTIPATH=m
-# CONFIG_MD_FAULTY is not set
 CONFIG_BLK_DEV_DM=m
 CONFIG_DM_CRYPT=m
 CONFIG_DM_SNAPSHOT=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_ZERO=m
 CONFIG_DM_MULTIPATH=m
-# CONFIG_DM_MULTIPATH_EMC is not set
-
-#
-# Fusion MPT device support
-#
 CONFIG_FUSION=y
 CONFIG_FUSION_SPI=y
 CONFIG_FUSION_FC=m
-# CONFIG_FUSION_SAS is not set
-CONFIG_FUSION_MAX_SGE=128
-# CONFIG_FUSION_CTL is not set
-
-#
-# IEEE 1394 (FireWire) support
-#
-# CONFIG_IEEE1394 is not set
-
-#
-# I2O device support
-#
-# CONFIG_I2O is not set
-
-#
-# Network device support
-#
 CONFIG_NETDEVICES=y
 CONFIG_DUMMY=m
-# CONFIG_BONDING is not set
-# CONFIG_EQUALIZER is not set
-# CONFIG_TUN is not set
-# CONFIG_NET_SB1000 is not set
-
-#
-# ARCnet devices
-#
-# CONFIG_ARCNET is not set
-
-#
-# PHY device support
-#
-# CONFIG_PHYLIB is not set
-
-#
-# Ethernet (10 or 100Mbit)
-#
 CONFIG_NET_ETHERNET=y
-CONFIG_MII=m
-# CONFIG_HAPPYMEAL is not set
-# CONFIG_SUNGEM is not set
-# CONFIG_CASSINI is not set
-# CONFIG_NET_VENDOR_3COM is not set
-
-#
-# Tulip family network device support
-#
 CONFIG_NET_TULIP=y
-# CONFIG_DE2104X is not set
 CONFIG_TULIP=m
-# CONFIG_TULIP_MWI is not set
-# CONFIG_TULIP_MMIO is not set
-# CONFIG_TULIP_NAPI is not set
-# CONFIG_DE4X5 is not set
-# CONFIG_WINBOND_840 is not set
-# CONFIG_DM9102 is not set
-# CONFIG_ULI526X is not set
-# CONFIG_HP100 is not set
 CONFIG_NET_PCI=y
-# CONFIG_PCNET32 is not set
-# CONFIG_AMD8111_ETH is not set
-# CONFIG_ADAPTEC_STARFIRE is not set
-# CONFIG_B44 is not set
-# CONFIG_FORCEDETH is not set
-# CONFIG_DGRS is not set
-CONFIG_EEPRO100=m
+CONFIG_NET_VENDOR_INTEL=y
 CONFIG_E100=m
-# CONFIG_FEALNX is not set
-# CONFIG_NATSEMI is not set
-# CONFIG_NE2K_PCI is not set
-# CONFIG_8139CP is not set
-# CONFIG_8139TOO is not set
-# CONFIG_SIS900 is not set
-# CONFIG_EPIC100 is not set
-# CONFIG_SUNDANCE is not set
-# CONFIG_VIA_RHINE is not set
-
-#
-# Ethernet (1000 Mbit)
-#
-# CONFIG_ACENIC is not set
-# CONFIG_DL2K is not set
 CONFIG_E1000=y
-# CONFIG_E1000_NAPI is not set
-# CONFIG_E1000_DISABLE_PACKET_SPLIT is not set
-# CONFIG_NS83820 is not set
-# CONFIG_HAMACHI is not set
-# CONFIG_YELLOWFIN is not set
-# CONFIG_R8169 is not set
-# CONFIG_SIS190 is not set
-# CONFIG_SKGE is not set
-# CONFIG_SKY2 is not set
-# CONFIG_SK98LIN is not set
-# CONFIG_VIA_VELOCITY is not set
 CONFIG_TIGON3=y
-# CONFIG_BNX2 is not set
-
-#
-# Ethernet (10000 Mbit)
-#
-# CONFIG_CHELSIO_T1 is not set
-# CONFIG_IXGB is not set
-# CONFIG_S2IO is not set
-
-#
-# Token Ring devices
-#
-# CONFIG_TR is not set
-
-#
-# Wireless LAN (non-hamradio)
-#
-# CONFIG_NET_RADIO is not set
-
-#
-# Wan interfaces
-#
-# CONFIG_WAN is not set
-# CONFIG_FDDI is not set
-# CONFIG_HIPPI is not set
-# CONFIG_PPP is not set
-# CONFIG_SLIP is not set
-# CONFIG_NET_FC is not set
-# CONFIG_SHAPER is not set
 CONFIG_NETCONSOLE=y
-CONFIG_NETPOLL=y
-# CONFIG_NETPOLL_RX is not set
-# CONFIG_NETPOLL_TRAP is not set
-CONFIG_NET_POLL_CONTROLLER=y
-
-#
-# ISDN subsystem
-#
-# CONFIG_ISDN is not set
-
-#
-# Telephony Support
-#
-# CONFIG_PHONE is not set
-
-#
-# Input device support
-#
-CONFIG_INPUT=y
-
-#
-# Userland interfaces
-#
-CONFIG_INPUT_MOUSEDEV=y
-CONFIG_INPUT_MOUSEDEV_PSAUX=y
-CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
-CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
-# CONFIG_INPUT_JOYDEV is not set
-# CONFIG_INPUT_TSDEV is not set
-# CONFIG_INPUT_EVDEV is not set
-# CONFIG_INPUT_EVBUG is not set
-
-#
-# Input Device Drivers
-#
-CONFIG_INPUT_KEYBOARD=y
-CONFIG_KEYBOARD_ATKBD=y
-# CONFIG_KEYBOARD_SUNKBD is not set
-# CONFIG_KEYBOARD_LKKBD is not set
-# CONFIG_KEYBOARD_XTKBD is not set
-# CONFIG_KEYBOARD_NEWTON is not set
-CONFIG_INPUT_MOUSE=y
-CONFIG_MOUSE_PS2=y
-# CONFIG_MOUSE_SERIAL is not set
-# CONFIG_MOUSE_VSXXXAA is not set
-# CONFIG_INPUT_JOYSTICK is not set
-# CONFIG_INPUT_TOUCHSCREEN is not set
-# CONFIG_INPUT_MISC is not set
-
-#
-# Hardware I/O ports
-#
-CONFIG_SERIO=y
-CONFIG_SERIO_I8042=y
 # CONFIG_SERIO_SERPORT is not set
-# CONFIG_SERIO_PCIPS2 is not set
-CONFIG_SERIO_LIBPS2=y
-# CONFIG_SERIO_RAW is not set
 CONFIG_GAMEPORT=m
-# CONFIG_GAMEPORT_NS558 is not set
-# CONFIG_GAMEPORT_L4 is not set
-# CONFIG_GAMEPORT_EMU10K1 is not set
-# CONFIG_GAMEPORT_FM801 is not set
-
-#
-# Character devices
-#
-CONFIG_VT=y
-CONFIG_VT_CONSOLE=y
-CONFIG_HW_CONSOLE=y
 CONFIG_SERIAL_NONSTANDARD=y
-# CONFIG_COMPUTONE is not set
-# CONFIG_ROCKETPORT is not set
-# CONFIG_CYCLADES is not set
-# CONFIG_DIGIEPCA is not set
-# CONFIG_MOXA_INTELLIO is not set
-# CONFIG_MOXA_SMARTIO is not set
-# CONFIG_ISI is not set
-# CONFIG_SYNCLINKMP is not set
-# CONFIG_SYNCLINK_GT is not set
-# CONFIG_N_HDLC is not set
-# CONFIG_SPECIALIX is not set
-# CONFIG_SX is not set
-# CONFIG_STALDRV is not set
 CONFIG_SGI_SNSC=y
 CONFIG_SGI_TIOCX=y
 CONFIG_SGI_MBCS=m
-
-#
-# Serial drivers
-#
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_SERIAL_8250_ACPI=y
 CONFIG_SERIAL_8250_NR_UARTS=6
-CONFIG_SERIAL_8250_RUNTIME_UARTS=4
 CONFIG_SERIAL_8250_EXTENDED=y
 CONFIG_SERIAL_8250_SHARE_IRQ=y
-# CONFIG_SERIAL_8250_DETECT_IRQ is not set
-# CONFIG_SERIAL_8250_RSA is not set
-
-#
-# Non-8250 serial port support
-#
-CONFIG_SERIAL_CORE=y
-CONFIG_SERIAL_CORE_CONSOLE=y
 CONFIG_SERIAL_SGI_L1_CONSOLE=y
-# CONFIG_SERIAL_JSM is not set
 CONFIG_SERIAL_SGI_IOC4=y
 CONFIG_SERIAL_SGI_IOC3=y
-CONFIG_UNIX98_PTYS=y
-CONFIG_LEGACY_PTYS=y
-CONFIG_LEGACY_PTY_COUNT=256
-
-#
-# IPMI
-#
-# CONFIG_IPMI_HANDLER is not set
-
-#
-# Watchdog Cards
-#
-# CONFIG_WATCHDOG is not set
 # CONFIG_HW_RANDOM is not set
 CONFIG_EFI_RTC=y
-# CONFIG_DTLK is not set
-# CONFIG_R3964 is not set
-# CONFIG_APPLICOM is not set
-
-#
-# Ftape, the floppy tape device driver
-#
+CONFIG_RAW_DRIVER=m
+CONFIG_HPET=y
 CONFIG_AGP=m
 CONFIG_AGP_I460=m
 CONFIG_AGP_HP_ZX1=m
@@ -778,492 +108,65 @@ CONFIG_DRM_R128=m
 CONFIG_DRM_RADEON=m
 CONFIG_DRM_MGA=m
 CONFIG_DRM_SIS=m
-# CONFIG_DRM_VIA is not set
-# CONFIG_DRM_SAVAGE is not set
-CONFIG_RAW_DRIVER=m
-CONFIG_MAX_RAW_DEVS=256
-CONFIG_HPET=y
-# CONFIG_HPET_RTC_IRQ is not set
-CONFIG_HPET_MMAP=y
-# CONFIG_HANGCHECK_TIMER is not set
-CONFIG_MMTIMER=y
-
-#
-# TPM devices
-#
-# CONFIG_TCG_TPM is not set
-# CONFIG_TELCLOCK is not set
-
-#
-# I2C support
-#
-# CONFIG_I2C is not set
-
-#
-# SPI support
-#
-# CONFIG_SPI is not set
-# CONFIG_SPI_MASTER is not set
-
-#
-# Dallas's 1-wire bus
-#
-# CONFIG_W1 is not set
-
-#
-# Hardware Monitoring support
-#
-CONFIG_HWMON=y
-# CONFIG_HWMON_VID is not set
-# CONFIG_SENSORS_F71805F is not set
-# CONFIG_HWMON_DEBUG_CHIP is not set
-
-#
-# Misc devices
-#
-
-#
-# Multimedia Capabilities Port drivers
-#
-
-#
-# Multimedia devices
-#
-# CONFIG_VIDEO_DEV is not set
-
-#
-# Digital Video Broadcasting Devices
-#
-# CONFIG_DVB is not set
-
-#
-# Graphics support
-#
-# CONFIG_FB is not set
-
-#
-# Console display driver support
-#
-CONFIG_VGA_CONSOLE=y
-CONFIG_DUMMY_CONSOLE=y
-
-#
-# Sound
-#
 CONFIG_SOUND=m
-
-#
-# Advanced Linux Sound Architecture
-#
 CONFIG_SND=m
-CONFIG_SND_TIMER=m
-CONFIG_SND_PCM=m
-CONFIG_SND_HWDEP=m
-CONFIG_SND_RAWMIDI=m
 CONFIG_SND_SEQUENCER=m
 CONFIG_SND_SEQ_DUMMY=m
-CONFIG_SND_OSSEMUL=y
 CONFIG_SND_MIXER_OSS=m
 CONFIG_SND_PCM_OSS=m
 CONFIG_SND_SEQUENCER_OSS=y
-# CONFIG_SND_DYNAMIC_MINORS is not set
-CONFIG_SND_SUPPORT_OLD_API=y
 CONFIG_SND_VERBOSE_PRINTK=y
-# CONFIG_SND_DEBUG is not set
-
-#
-# Generic devices
-#
-CONFIG_SND_MPU401_UART=m
-CONFIG_SND_OPL3_LIB=m
-CONFIG_SND_AC97_CODEC=m
-CONFIG_SND_AC97_BUS=m
 CONFIG_SND_DUMMY=m
 CONFIG_SND_VIRMIDI=m
 CONFIG_SND_MTPAV=m
 CONFIG_SND_SERIAL_U16550=m
 CONFIG_SND_MPU401=m
-
-#
-# PCI devices
-#
-# CONFIG_SND_AD1889 is not set
-# CONFIG_SND_ALI5451 is not set
-# CONFIG_SND_ATIIXP is not set
-# CONFIG_SND_ATIIXP_MODEM is not set
-# CONFIG_SND_AU8810 is not set
-# CONFIG_SND_AU8820 is not set
-# CONFIG_SND_AU8830 is not set
-# CONFIG_SND_AZT3328 is not set
-# CONFIG_SND_BT87X is not set
-# CONFIG_SND_CA0106 is not set
-# CONFIG_SND_CMIPCI is not set
 CONFIG_SND_CS4281=m
 CONFIG_SND_CS46XX=m
-CONFIG_SND_CS46XX_NEW_DSP=y
 CONFIG_SND_EMU10K1=m
-# CONFIG_SND_EMU10K1X is not set
-# CONFIG_SND_ENS1370 is not set
-# CONFIG_SND_ENS1371 is not set
-# CONFIG_SND_ES1938 is not set
-# CONFIG_SND_ES1968 is not set
 CONFIG_SND_FM801=m
-# CONFIG_SND_FM801_TEA575X is not set
-# CONFIG_SND_HDA_INTEL is not set
-# CONFIG_SND_HDSP is not set
-# CONFIG_SND_HDSPM is not set
-# CONFIG_SND_ICE1712 is not set
-# CONFIG_SND_ICE1724 is not set
-# CONFIG_SND_INTEL8X0 is not set
-# CONFIG_SND_INTEL8X0M is not set
-# CONFIG_SND_KORG1212 is not set
-# CONFIG_SND_MAESTRO3 is not set
-# CONFIG_SND_MIXART is not set
-# CONFIG_SND_NM256 is not set
-# CONFIG_SND_PCXHR is not set
-# CONFIG_SND_RME32 is not set
-# CONFIG_SND_RME96 is not set
-# CONFIG_SND_RME9652 is not set
-# CONFIG_SND_SONICVIBES is not set
-# CONFIG_SND_TRIDENT is not set
-# CONFIG_SND_VIA82XX is not set
-# CONFIG_SND_VIA82XX_MODEM is not set
-# CONFIG_SND_VX222 is not set
-# CONFIG_SND_YMFPCI is not set
-
-#
-# USB devices
-#
-# CONFIG_SND_USB_AUDIO is not set
-
-#
-# Open Sound System
-#
-# CONFIG_SOUND_PRIME is not set
-
-#
-# USB support
-#
-CONFIG_USB_ARCH_HAS_HCD=y
-CONFIG_USB_ARCH_HAS_OHCI=y
 CONFIG_USB=m
-# CONFIG_USB_DEBUG is not set
-
-#
-# Miscellaneous USB options
-#
-CONFIG_USB_DEVICEFS=y
-# CONFIG_USB_BANDWIDTH is not set
-# CONFIG_USB_DYNAMIC_MINORS is not set
-# CONFIG_USB_SUSPEND is not set
-# CONFIG_USB_OTG is not set
-
-#
-# USB Host Controller Drivers
-#
+CONFIG_USB_MON=m
 CONFIG_USB_EHCI_HCD=m
-# CONFIG_USB_EHCI_SPLIT_ISO is not set
-# CONFIG_USB_EHCI_ROOT_HUB_TT is not set
-# CONFIG_USB_ISP116X_HCD is not set
 CONFIG_USB_OHCI_HCD=m
-# CONFIG_USB_OHCI_BIG_ENDIAN is not set
-CONFIG_USB_OHCI_LITTLE_ENDIAN=y
 CONFIG_USB_UHCI_HCD=m
-# CONFIG_USB_SL811_HCD is not set
-
-#
-# USB Device Class drivers
-#
-# CONFIG_OBSOLETE_OSS_USB_DRIVER is not set
-# CONFIG_USB_ACM is not set
-# CONFIG_USB_PRINTER is not set
-
-#
-# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
-#
-
-#
-# may also be needed; see USB_STORAGE Help for more information
-#
 CONFIG_USB_STORAGE=m
-# CONFIG_USB_STORAGE_DEBUG is not set
-# CONFIG_USB_STORAGE_DATAFAB is not set
-# CONFIG_USB_STORAGE_FREECOM is not set
-# CONFIG_USB_STORAGE_ISD200 is not set
-# CONFIG_USB_STORAGE_DPCM is not set
-# CONFIG_USB_STORAGE_USBAT is not set
-# CONFIG_USB_STORAGE_SDDR09 is not set
-# CONFIG_USB_STORAGE_SDDR55 is not set
-# CONFIG_USB_STORAGE_JUMPSHOT is not set
-# CONFIG_USB_STORAGE_ALAUDA is not set
-# CONFIG_USB_LIBUSUAL is not set
-
-#
-# USB Input Devices
-#
-CONFIG_USB_HID=m
-CONFIG_USB_HIDINPUT=y
-# CONFIG_USB_HIDINPUT_POWERBOOK is not set
-# CONFIG_HID_FF is not set
-# CONFIG_USB_HIDDEV is not set
-
-#
-# USB HID Boot Protocol drivers
-#
-# CONFIG_USB_KBD is not set
-# CONFIG_USB_MOUSE is not set
-# CONFIG_USB_AIPTEK is not set
-# CONFIG_USB_WACOM is not set
-# CONFIG_USB_ACECAD is not set
-# CONFIG_USB_KBTAB is not set
-# CONFIG_USB_POWERMATE is not set
-# CONFIG_USB_MTOUCH is not set
-# CONFIG_USB_ITMTOUCH is not set
-# CONFIG_USB_EGALAX is not set
-# CONFIG_USB_YEALINK is not set
-# CONFIG_USB_XPAD is not set
-# CONFIG_USB_ATI_REMOTE is not set
-# CONFIG_USB_ATI_REMOTE2 is not set
-# CONFIG_USB_KEYSPAN_REMOTE is not set
-# CONFIG_USB_APPLETOUCH is not set
-
-#
-# USB Imaging devices
-#
-# CONFIG_USB_MDC800 is not set
-# CONFIG_USB_MICROTEK is not set
-
-#
-# USB Multimedia devices
-#
-# CONFIG_USB_DABUSB is not set
-
-#
-# Video4Linux support is needed for USB Multimedia device support
-#
-
-#
-# USB Network Adapters
-#
-# CONFIG_USB_CATC is not set
-# CONFIG_USB_KAWETH is not set
-# CONFIG_USB_PEGASUS is not set
-# CONFIG_USB_RTL8150 is not set
-# CONFIG_USB_USBNET is not set
-CONFIG_USB_MON=y
-
-#
-# USB port drivers
-#
-
-#
-# USB Serial Converter support
-#
-# CONFIG_USB_SERIAL is not set
-
-#
-# USB Miscellaneous drivers
-#
-# CONFIG_USB_EMI62 is not set
-# CONFIG_USB_EMI26 is not set
-# CONFIG_USB_AUERSWALD is not set
-# CONFIG_USB_RIO500 is not set
-# CONFIG_USB_LEGOTOWER is not set
-# CONFIG_USB_LCD is not set
-# CONFIG_USB_LED is not set
-# CONFIG_USB_CYTHERM is not set
-# CONFIG_USB_PHIDGETKIT is not set
-# CONFIG_USB_PHIDGETSERVO is not set
-# CONFIG_USB_IDMOUSE is not set
-# CONFIG_USB_SISUSBVGA is not set
-# CONFIG_USB_LD is not set
-# CONFIG_USB_TEST is not set
-
-#
-# USB DSL modem support
-#
-
-#
-# USB Gadget Support
-#
-# CONFIG_USB_GADGET is not set
-
-#
-# MMC/SD Card support
-#
-# CONFIG_MMC is not set
-
-#
-# InfiniBand support
-#
 CONFIG_INFINIBAND=m
-# CONFIG_INFINIBAND_USER_MAD is not set
-# CONFIG_INFINIBAND_USER_ACCESS is not set
 CONFIG_INFINIBAND_MTHCA=m
-# CONFIG_INFINIBAND_MTHCA_DEBUG is not set
 CONFIG_INFINIBAND_IPOIB=m
-# CONFIG_INFINIBAND_IPOIB_DEBUG is not set
-# CONFIG_INFINIBAND_SRP is not set
-
-#
-# SN Devices
-#
-CONFIG_SGI_IOC4=y
-CONFIG_SGI_IOC3=y
-
-#
-# EDAC - error detection and reporting (RAS)
-#
-
-#
-# File systems
-#
 CONFIG_EXT2_FS=y
 CONFIG_EXT2_FS_XATTR=y
 CONFIG_EXT2_FS_POSIX_ACL=y
 CONFIG_EXT2_FS_SECURITY=y
-# CONFIG_EXT2_FS_XIP is not set
 CONFIG_EXT3_FS=y
-CONFIG_EXT3_FS_XATTR=y
 CONFIG_EXT3_FS_POSIX_ACL=y
 CONFIG_EXT3_FS_SECURITY=y
-CONFIG_JBD=y
-# CONFIG_JBD_DEBUG is not set
-CONFIG_FS_MBCACHE=y
 CONFIG_REISERFS_FS=y
-# CONFIG_REISERFS_CHECK is not set
-# CONFIG_REISERFS_PROC_INFO is not set
 CONFIG_REISERFS_FS_XATTR=y
 CONFIG_REISERFS_FS_POSIX_ACL=y
 CONFIG_REISERFS_FS_SECURITY=y
-# CONFIG_JFS_FS is not set
-CONFIG_FS_POSIX_ACL=y
 CONFIG_XFS_FS=y
-CONFIG_XFS_EXPORT=y
-# CONFIG_XFS_QUOTA is not set
-# CONFIG_XFS_SECURITY is not set
-# CONFIG_XFS_POSIX_ACL is not set
-# CONFIG_XFS_RT is not set
-# CONFIG_OCFS2_FS is not set
-# CONFIG_MINIX_FS is not set
-# CONFIG_ROMFS_FS is not set
-CONFIG_INOTIFY=y
-# CONFIG_QUOTA is not set
-CONFIG_DNOTIFY=y
 CONFIG_AUTOFS_FS=y
 CONFIG_AUTOFS4_FS=y
-# CONFIG_FUSE_FS is not set
-
-#
-# CD-ROM/DVD Filesystems
-#
 CONFIG_ISO9660_FS=m
 CONFIG_JOLIET=y
-# CONFIG_ZISOFS is not set
 CONFIG_UDF_FS=m
-CONFIG_UDF_NLS=y
-
-#
-# DOS/FAT/NT Filesystems
-#
-CONFIG_FAT_FS=y
-# CONFIG_MSDOS_FS is not set
 CONFIG_VFAT_FS=y
-CONFIG_FAT_DEFAULT_CODEPAGE=437
-CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
 CONFIG_NTFS_FS=m
-# CONFIG_NTFS_DEBUG is not set
-# CONFIG_NTFS_RW is not set
-
-#
-# Pseudo filesystems
-#
-CONFIG_PROC_FS=y
 CONFIG_PROC_KCORE=y
-CONFIG_SYSFS=y
 CONFIG_TMPFS=y
 CONFIG_HUGETLBFS=y
-CONFIG_HUGETLB_PAGE=y
-CONFIG_RAMFS=y
-# CONFIG_RELAYFS_FS is not set
-# CONFIG_CONFIGFS_FS is not set
-
-#
-# Miscellaneous filesystems
-#
-# CONFIG_ADFS_FS is not set
-# CONFIG_AFFS_FS is not set
-# CONFIG_HFS_FS is not set
-# CONFIG_HFSPLUS_FS is not set
-# CONFIG_BEFS_FS is not set
-# CONFIG_BFS_FS is not set
-# CONFIG_EFS_FS is not set
-# CONFIG_CRAMFS is not set
-# CONFIG_VXFS_FS is not set
-# CONFIG_HPFS_FS is not set
-# CONFIG_QNX4FS_FS is not set
-# CONFIG_SYSV_FS is not set
-# CONFIG_UFS_FS is not set
-
-#
-# Network File Systems
-#
 CONFIG_NFS_FS=m
 CONFIG_NFS_V3=y
-# CONFIG_NFS_V3_ACL is not set
 CONFIG_NFS_V4=y
-CONFIG_NFS_DIRECTIO=y
 CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
-# CONFIG_NFSD_V3_ACL is not set
 CONFIG_NFSD_V4=y
-CONFIG_NFSD_TCP=y
-CONFIG_LOCKD=m
-CONFIG_LOCKD_V4=y
-CONFIG_EXPORTFS=y
-CONFIG_NFS_COMMON=y
-CONFIG_SUNRPC=m
-CONFIG_SUNRPC_GSS=m
-CONFIG_RPCSEC_GSS_KRB5=m
-# CONFIG_RPCSEC_GSS_SPKM3 is not set
 CONFIG_SMB_FS=m
 CONFIG_SMB_NLS_DEFAULT=y
-CONFIG_SMB_NLS_REMOTE="cp437"
 CONFIG_CIFS=m
-# CONFIG_CIFS_STATS is not set
-# CONFIG_CIFS_XATTR is not set
-# CONFIG_CIFS_EXPERIMENTAL is not set
-# CONFIG_NCP_FS is not set
-# CONFIG_CODA_FS is not set
-# CONFIG_AFS_FS is not set
-# CONFIG_9P_FS is not set
-
-#
-# Partition Types
-#
 CONFIG_PARTITION_ADVANCED=y
-# CONFIG_ACORN_PARTITION is not set
-# CONFIG_OSF_PARTITION is not set
-# CONFIG_AMIGA_PARTITION is not set
-# CONFIG_ATARI_PARTITION is not set
-# CONFIG_MAC_PARTITION is not set
-CONFIG_MSDOS_PARTITION=y
-# CONFIG_BSD_DISKLABEL is not set
-# CONFIG_MINIX_SUBPARTITION is not set
-# CONFIG_SOLARIS_X86_PARTITION is not set
-# CONFIG_UNIXWARE_DISKLABEL is not set
-# CONFIG_LDM_PARTITION is not set
 CONFIG_SGI_PARTITION=y
-# CONFIG_ULTRIX_PARTITION is not set
-# CONFIG_SUN_PARTITION is not set
-# CONFIG_KARMA_PARTITION is not set
 CONFIG_EFI_PARTITION=y
-
-#
-# Native Language Support
-#
-CONFIG_NLS=y
-CONFIG_NLS_DEFAULT="iso8859-1"
 CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_CODEPAGE_737=m
 CONFIG_NLS_CODEPAGE_775=m
@@ -1287,7 +190,6 @@ CONFIG_NLS_CODEPAGE_874=m
 CONFIG_NLS_ISO8859_8=m
 CONFIG_NLS_CODEPAGE_1250=m
 CONFIG_NLS_CODEPAGE_1251=m
-# CONFIG_NLS_ASCII is not set
 CONFIG_NLS_ISO8859_1=y
 CONFIG_NLS_ISO8859_2=m
 CONFIG_NLS_ISO8859_3=m
@@ -1302,93 +204,7 @@ CONFIG_NLS_ISO8859_15=m
 CONFIG_NLS_KOI8_R=m
 CONFIG_NLS_KOI8_U=m
 CONFIG_NLS_UTF8=m
-
-#
-# Library routines
-#
-# CONFIG_CRC_CCITT is not set
-# CONFIG_CRC16 is not set
-CONFIG_CRC32=y
-# CONFIG_LIBCRC32C is not set
-CONFIG_GENERIC_HARDIRQS=y
-CONFIG_GENERIC_IRQ_PROBE=y
-CONFIG_GENERIC_PENDING_IRQ=y
-
-#
-# HP Simulator drivers
-#
-# CONFIG_HP_SIMETH is not set
-# CONFIG_HP_SIMSERIAL is not set
-# CONFIG_HP_SIMSCSI is not set
-
-#
-# Instrumentation Support
-#
-# CONFIG_PROFILING is not set
-# CONFIG_KPROBES is not set
-
-#
-# Kernel hacking
-#
-# CONFIG_PRINTK_TIME is not set
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_KERNEL=y
-CONFIG_LOG_BUF_SHIFT=20
-CONFIG_DETECT_SOFTLOCKUP=y
-# CONFIG_SCHEDSTATS is not set
-# CONFIG_DEBUG_SLAB is not set
 CONFIG_DEBUG_MUTEXES=y
-# CONFIG_DEBUG_SPINLOCK is not set
-# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
-# CONFIG_DEBUG_KOBJECT is not set
-# CONFIG_DEBUG_INFO is not set
-# CONFIG_DEBUG_FS is not set
-# CONFIG_DEBUG_VM is not set
-CONFIG_FORCED_INLINING=y
-# CONFIG_RCU_TORTURE_TEST is not set
-CONFIG_IA64_GRANULE_16MB=y
-# CONFIG_IA64_GRANULE_64MB is not set
-# CONFIG_IA64_PRINT_HAZARDS is not set
-# CONFIG_DISABLE_VHPT is not set
-# CONFIG_IA64_DEBUG_CMPXCHG is not set
-# CONFIG_IA64_DEBUG_IRQ is not set
-CONFIG_SYSVIPC_COMPAT=y
-
-#
-# Security options
-#
-# CONFIG_KEYS is not set
-# CONFIG_SECURITY is not set
-
-#
-# Cryptographic options
-#
-CONFIG_CRYPTO=y
-# CONFIG_CRYPTO_HMAC is not set
-# CONFIG_CRYPTO_NULL is not set
-# CONFIG_CRYPTO_MD4 is not set
 CONFIG_CRYPTO_MD5=y
-# CONFIG_CRYPTO_SHA1 is not set
-# CONFIG_CRYPTO_SHA256 is not set
-# CONFIG_CRYPTO_SHA512 is not set
-# CONFIG_CRYPTO_WP512 is not set
-# CONFIG_CRYPTO_TGR192 is not set
-CONFIG_CRYPTO_DES=m
-# CONFIG_CRYPTO_BLOWFISH is not set
-# CONFIG_CRYPTO_TWOFISH is not set
-# CONFIG_CRYPTO_SERPENT is not set
-# CONFIG_CRYPTO_AES is not set
-# CONFIG_CRYPTO_CAST5 is not set
-# CONFIG_CRYPTO_CAST6 is not set
-# CONFIG_CRYPTO_TEA is not set
-# CONFIG_CRYPTO_ARC4 is not set
-# CONFIG_CRYPTO_KHAZAD is not set
-# CONFIG_CRYPTO_ANUBIS is not set
-# CONFIG_CRYPTO_DEFLATE is not set
-# CONFIG_CRYPTO_MICHAEL_MIC is not set
-# CONFIG_CRYPTO_CRC32C is not set
-# CONFIG_CRYPTO_TEST is not set
-
-#
-# Hardware crypto devices
-#
diff --git a/arch/ia64/configs/sim_defconfig b/arch/ia64/configs/sim_defconfig
index d9146c31ea1..b4548a3e82d 100644
--- a/arch/ia64/configs/sim_defconfig
+++ b/arch/ia64/configs/sim_defconfig
@@ -1,725 +1,56 @@
-#
-# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.16-rc5
-# Mon Feb 27 16:13:41 2006
-#
-
-#
-# Code maturity level options
-#
 CONFIG_EXPERIMENTAL=y
-CONFIG_LOCK_KERNEL=y
-CONFIG_INIT_ENV_ARG_LIMIT=32
-
-#
-# General setup
-#
-CONFIG_LOCALVERSION=""
-CONFIG_LOCALVERSION_AUTO=y
-CONFIG_SWAP=y
 CONFIG_SYSVIPC=y
-# CONFIG_POSIX_MQUEUE is not set
-# CONFIG_BSD_PROCESS_ACCT is not set
-CONFIG_SYSCTL=y
-# CONFIG_AUDIT is not set
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
-# CONFIG_CPUSETS is not set
-CONFIG_INITRAMFS_SOURCE=""
+CONFIG_LOG_BUF_SHIFT=16
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-# CONFIG_EMBEDDED is not set
-CONFIG_KALLSYMS=y
-# CONFIG_KALLSYMS_ALL is not set
-# CONFIG_KALLSYMS_EXTRA_PASS is not set
-CONFIG_HOTPLUG=y
-CONFIG_PRINTK=y
-CONFIG_BUG=y
-CONFIG_ELF_CORE=y
-CONFIG_BASE_FULL=y
-CONFIG_FUTEX=y
-CONFIG_EPOLL=y
-CONFIG_SHMEM=y
-CONFIG_CC_ALIGN_FUNCTIONS=0
-CONFIG_CC_ALIGN_LABELS=0
-CONFIG_CC_ALIGN_LOOPS=0
-CONFIG_CC_ALIGN_JUMPS=0
-CONFIG_SLAB=y
-# CONFIG_TINY_SHMEM is not set
-CONFIG_BASE_SMALL=0
-# CONFIG_SLOB is not set
-
-#
-# Loadable module support
-#
 CONFIG_MODULES=y
 CONFIG_MODULE_UNLOAD=y
 CONFIG_MODULE_FORCE_UNLOAD=y
-CONFIG_OBSOLETE_MODPARM=y
 CONFIG_MODVERSIONS=y
-# CONFIG_MODULE_SRCVERSION_ALL is not set
-CONFIG_KMOD=y
-CONFIG_STOP_MACHINE=y
-
-#
-# Block layer
-#
-
-#
-# IO Schedulers
-#
-CONFIG_IOSCHED_NOOP=y
-CONFIG_IOSCHED_AS=y
-CONFIG_IOSCHED_DEADLINE=y
-CONFIG_IOSCHED_CFQ=y
-CONFIG_DEFAULT_AS=y
-# CONFIG_DEFAULT_DEADLINE is not set
-# CONFIG_DEFAULT_CFQ is not set
-# CONFIG_DEFAULT_NOOP is not set
-CONFIG_DEFAULT_IOSCHED="anticipatory"
-
-#
-# Processor type and features
-#
-CONFIG_IA64=y
-CONFIG_64BIT=y
-CONFIG_MMU=y
-CONFIG_SWIOTLB=y
-CONFIG_RWSEM_XCHGADD_ALGORITHM=y
-CONFIG_GENERIC_CALIBRATE_DELAY=y
-CONFIG_TIME_INTERPOLATION=y
-CONFIG_EFI=y
-CONFIG_GENERIC_IOMAP=y
-CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
-CONFIG_DMA_IS_DMA32=y
-# CONFIG_IA64_GENERIC is not set
-# CONFIG_IA64_DIG is not set
-# CONFIG_IA64_HP_ZX1 is not set
-# CONFIG_IA64_HP_ZX1_SWIOTLB is not set
-# CONFIG_IA64_SGI_SN2 is not set
 CONFIG_IA64_HP_SIM=y
-# CONFIG_ITANIUM is not set
 CONFIG_MCKINLEY=y
-# CONFIG_IA64_PAGE_SIZE_4KB is not set
-# CONFIG_IA64_PAGE_SIZE_8KB is not set
-# CONFIG_IA64_PAGE_SIZE_16KB is not set
 CONFIG_IA64_PAGE_SIZE_64KB=y
-CONFIG_PGTABLE_3=y
-# CONFIG_PGTABLE_4 is not set
-# CONFIG_HZ_100 is not set
-CONFIG_HZ_250=y
-# CONFIG_HZ_1000 is not set
-CONFIG_HZ=250
-CONFIG_IA64_L1_CACHE_SHIFT=7
-# CONFIG_IA64_CYCLONE is not set
-CONFIG_FORCE_MAX_ZONEORDER=17
 CONFIG_SMP=y
 CONFIG_NR_CPUS=64
-# CONFIG_HOTPLUG_CPU is not set
-# CONFIG_SCHED_SMT is not set
 CONFIG_PREEMPT=y
-CONFIG_SELECT_MEMORY_MODEL=y
-CONFIG_FLATMEM_MANUAL=y
-# CONFIG_DISCONTIGMEM_MANUAL is not set
-# CONFIG_SPARSEMEM_MANUAL is not set
-CONFIG_FLATMEM=y
-CONFIG_FLAT_NODE_MEM_MAP=y
-# CONFIG_SPARSEMEM_STATIC is not set
-CONFIG_SPLIT_PTLOCK_CPUS=4
-CONFIG_ARCH_SELECT_MEMORY_MODEL=y
-CONFIG_ARCH_DISCONTIGMEM_ENABLE=y
-CONFIG_ARCH_FLATMEM_ENABLE=y
-CONFIG_ARCH_SPARSEMEM_ENABLE=y
-# CONFIG_VIRTUAL_MEM_MAP is not set
-CONFIG_IA32_SUPPORT=y
-CONFIG_COMPAT=y
-# CONFIG_IA64_MCA_RECOVERY is not set
-# CONFIG_PERFMON is not set
 CONFIG_IA64_PALINFO=m
-
-#
-# Firmware Drivers
-#
 CONFIG_EFI_VARS=y
-CONFIG_BINFMT_ELF=y
 CONFIG_BINFMT_MISC=y
-
-#
-# Power management and ACPI
-#
-
-#
-# Networking
-#
 CONFIG_NET=y
-
-#
-# Networking options
-#
-# CONFIG_NETDEBUG is not set
 CONFIG_PACKET=y
-# CONFIG_PACKET_MMAP is not set
-# CONFIG_UNIX is not set
-# CONFIG_NET_KEY is not set
 CONFIG_INET=y
 CONFIG_IP_MULTICAST=y
-# CONFIG_IP_ADVANCED_ROUTER is not set
-CONFIG_IP_FIB_HASH=y
-# CONFIG_IP_PNP is not set
-# CONFIG_NET_IPIP is not set
-# CONFIG_NET_IPGRE is not set
-# CONFIG_IP_MROUTE is not set
-# CONFIG_ARPD is not set
-# CONFIG_SYN_COOKIES is not set
-# CONFIG_INET_AH is not set
-# CONFIG_INET_ESP is not set
-# CONFIG_INET_IPCOMP is not set
-# CONFIG_INET_TUNNEL is not set
-CONFIG_INET_DIAG=y
-CONFIG_INET_TCP_DIAG=y
-# CONFIG_TCP_CONG_ADVANCED is not set
-CONFIG_TCP_CONG_BIC=y
 # CONFIG_IPV6 is not set
-# CONFIG_NETFILTER is not set
-
-#
-# DCCP Configuration (EXPERIMENTAL)
-#
-# CONFIG_IP_DCCP is not set
-
-#
-# SCTP Configuration (EXPERIMENTAL)
-#
-# CONFIG_IP_SCTP is not set
-
-#
-# TIPC Configuration (EXPERIMENTAL)
-#
-# CONFIG_TIPC is not set
-# CONFIG_ATM is not set
-# CONFIG_BRIDGE is not set
-# CONFIG_VLAN_8021Q is not set
-# CONFIG_DECNET is not set
-# CONFIG_LLC2 is not set
-# CONFIG_IPX is not set
-# CONFIG_ATALK is not set
-# CONFIG_X25 is not set
-# CONFIG_LAPB is not set
-# CONFIG_NET_DIVERT is not set
-# CONFIG_ECONET is not set
-# CONFIG_WAN_ROUTER is not set
-
-#
-# QoS and/or fair queueing
-#
-# CONFIG_NET_SCHED is not set
-
-#
-# Network testing
-#
-# CONFIG_NET_PKTGEN is not set
-# CONFIG_HAMRADIO is not set
-# CONFIG_IRDA is not set
-# CONFIG_BT is not set
-# CONFIG_IEEE80211 is not set
-
-#
-# Device Drivers
-#
-
-#
-# Generic Driver Options
-#
 # CONFIG_STANDALONE is not set
-CONFIG_PREVENT_FIRMWARE_BUILD=y
-# CONFIG_FW_LOADER is not set
-# CONFIG_DEBUG_DRIVER is not set
-
-#
-# Connector - unified userspace <-> kernelspace linker
-#
-# CONFIG_CONNECTOR is not set
-
-#
-# Memory Technology Devices (MTD)
-#
-# CONFIG_MTD is not set
-
-#
-# Parallel port support
-#
-# CONFIG_PARPORT is not set
-
-#
-# Plug and Play support
-#
-
-#
-# Block devices
-#
-# CONFIG_BLK_DEV_COW_COMMON is not set
 CONFIG_BLK_DEV_LOOP=y
-# CONFIG_BLK_DEV_CRYPTOLOOP is not set
-# CONFIG_BLK_DEV_NBD is not set
 CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_COUNT=16
-CONFIG_BLK_DEV_RAM_SIZE=4096
-# CONFIG_BLK_DEV_INITRD is not set
-# CONFIG_CDROM_PKTCDVD is not set
-# CONFIG_ATA_OVER_ETH is not set
-
-#
-# ATA/ATAPI/MFM/RLL support
-#
-# CONFIG_IDE is not set
-
-#
-# SCSI device support
-#
-# CONFIG_RAID_ATTRS is not set
 CONFIG_SCSI=y
-CONFIG_SCSI_PROC_FS=y
-
-#
-# SCSI support type (disk, tape, CD-ROM)
-#
 CONFIG_BLK_DEV_SD=y
-# CONFIG_CHR_DEV_ST is not set
-# CONFIG_CHR_DEV_OSST is not set
-# CONFIG_BLK_DEV_SR is not set
-# CONFIG_CHR_DEV_SG is not set
-# CONFIG_CHR_DEV_SCH is not set
-
-#
-# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
-#
 CONFIG_SCSI_MULTI_LUN=y
 CONFIG_SCSI_CONSTANTS=y
 CONFIG_SCSI_LOGGING=y
-
-#
-# SCSI Transport Attributes
-#
 CONFIG_SCSI_SPI_ATTRS=y
-# CONFIG_SCSI_FC_ATTRS is not set
-# CONFIG_SCSI_ISCSI_ATTRS is not set
-# CONFIG_SCSI_SAS_ATTRS is not set
-
-#
-# SCSI low-level drivers
-#
-# CONFIG_ISCSI_TCP is not set
-# CONFIG_SCSI_SATA is not set
-# CONFIG_SCSI_DEBUG is not set
-
-#
-# Multi-device support (RAID and LVM)
-#
-# CONFIG_MD is not set
-
-#
-# Fusion MPT device support
-#
-# CONFIG_FUSION is not set
-
-#
-# IEEE 1394 (FireWire) support
-#
-
-#
-# I2O device support
-#
-
-#
-# Network device support
-#
-# CONFIG_NETDEVICES is not set
-# CONFIG_DUMMY is not set
-# CONFIG_BONDING is not set
-# CONFIG_EQUALIZER is not set
-# CONFIG_TUN is not set
-
-#
-# PHY device support
-#
-
-#
-# Ethernet (10 or 100Mbit)
-#
-# CONFIG_NET_ETHERNET is not set
-
-#
-# Ethernet (1000 Mbit)
-#
-
-#
-# Ethernet (10000 Mbit)
-#
-# CONFIG_PPP is not set
-# CONFIG_SLIP is not set
-# CONFIG_SHAPER is not set
-# CONFIG_NETCONSOLE is not set
-# CONFIG_NETPOLL is not set
-# CONFIG_NET_POLL_CONTROLLER is not set
-
-#
-# ISDN subsystem
-#
-# CONFIG_ISDN is not set
-
-#
-# Telephony Support
-#
-# CONFIG_PHONE is not set
-
-#
-# Input device support
-#
-CONFIG_INPUT=y
-
-#
-# Userland interfaces
-#
-CONFIG_INPUT_MOUSEDEV=y
-CONFIG_INPUT_MOUSEDEV_PSAUX=y
-CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
-CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
-# CONFIG_INPUT_JOYDEV is not set
-# CONFIG_INPUT_TSDEV is not set
-# CONFIG_INPUT_EVDEV is not set
-# CONFIG_INPUT_EVBUG is not set
-
-#
-# Input Device Drivers
-#
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
-# CONFIG_INPUT_JOYSTICK is not set
-# CONFIG_INPUT_TOUCHSCREEN is not set
-# CONFIG_INPUT_MISC is not set
-
-#
-# Hardware I/O ports
-#
-CONFIG_SERIO=y
 # CONFIG_SERIO_I8042 is not set
-CONFIG_SERIO_SERPORT=y
-# CONFIG_SERIO_RAW is not set
-# CONFIG_GAMEPORT is not set
-
-#
-# Character devices
-#
-CONFIG_VT=y
-CONFIG_VT_CONSOLE=y
-CONFIG_HW_CONSOLE=y
-# CONFIG_SERIAL_NONSTANDARD is not set
-
-#
-# Serial drivers
-#
-# CONFIG_SERIAL_8250 is not set
-
-#
-# Non-8250 serial port support
-#
-CONFIG_UNIX98_PTYS=y
 # CONFIG_LEGACY_PTYS is not set
-
-#
-# IPMI
-#
-# CONFIG_IPMI_HANDLER is not set
-
-#
-# Watchdog Cards
-#
-# CONFIG_WATCHDOG is not set
 CONFIG_EFI_RTC=y
-# CONFIG_DTLK is not set
-# CONFIG_R3964 is not set
-
-#
-# Ftape, the floppy tape device driver
-#
-# CONFIG_AGP is not set
-# CONFIG_RAW_DRIVER is not set
-# CONFIG_HANGCHECK_TIMER is not set
-
-#
-# TPM devices
-#
-# CONFIG_TCG_TPM is not set
-# CONFIG_TELCLOCK is not set
-
-#
-# I2C support
-#
-# CONFIG_I2C is not set
-
-#
-# SPI support
-#
-# CONFIG_SPI is not set
-# CONFIG_SPI_MASTER is not set
-
-#
-# Dallas's 1-wire bus
-#
-# CONFIG_W1 is not set
-
-#
-# Hardware Monitoring support
-#
-CONFIG_HWMON=y
-# CONFIG_HWMON_VID is not set
-# CONFIG_SENSORS_F71805F is not set
-# CONFIG_HWMON_DEBUG_CHIP is not set
-
-#
-# Misc devices
-#
-
-#
-# Multimedia Capabilities Port drivers
-#
-
-#
-# Multimedia devices
-#
-# CONFIG_VIDEO_DEV is not set
-
-#
-# Digital Video Broadcasting Devices
-#
-# CONFIG_DVB is not set
-
-#
-# Graphics support
-#
-# CONFIG_FB is not set
-
-#
-# Console display driver support
-#
 # CONFIG_VGA_CONSOLE is not set
-CONFIG_DUMMY_CONSOLE=y
-
-#
-# Sound
-#
-# CONFIG_SOUND is not set
-
-#
-# USB support
-#
-# CONFIG_USB_ARCH_HAS_HCD is not set
-# CONFIG_USB_ARCH_HAS_OHCI is not set
-
-#
-# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
-#
-
-#
-# USB Gadget Support
-#
-# CONFIG_USB_GADGET is not set
-
-#
-# MMC/SD Card support
-#
-# CONFIG_MMC is not set
-
-#
-# InfiniBand support
-#
-
-#
-# EDAC - error detection and reporting (RAS)
-#
-
-#
-# File systems
-#
+CONFIG_HP_SIMETH=y
+CONFIG_HP_SIMSERIAL=y
+CONFIG_HP_SIMSERIAL_CONSOLE=y
+CONFIG_HP_SIMSCSI=y
 CONFIG_EXT2_FS=y
-# CONFIG_EXT2_FS_XATTR is not set
-# CONFIG_EXT2_FS_XIP is not set
 CONFIG_EXT3_FS=y
 # CONFIG_EXT3_FS_XATTR is not set
-CONFIG_JBD=y
-# CONFIG_JBD_DEBUG is not set
-# CONFIG_REISERFS_FS is not set
-# CONFIG_JFS_FS is not set
-# CONFIG_FS_POSIX_ACL is not set
-# CONFIG_XFS_FS is not set
-# CONFIG_OCFS2_FS is not set
-# CONFIG_MINIX_FS is not set
-# CONFIG_ROMFS_FS is not set
-CONFIG_INOTIFY=y
-# CONFIG_QUOTA is not set
-CONFIG_DNOTIFY=y
-# CONFIG_AUTOFS_FS is not set
-# CONFIG_AUTOFS4_FS is not set
-# CONFIG_FUSE_FS is not set
-
-#
-# CD-ROM/DVD Filesystems
-#
-# CONFIG_ISO9660_FS is not set
-# CONFIG_UDF_FS is not set
-
-#
-# DOS/FAT/NT Filesystems
-#
-# CONFIG_MSDOS_FS is not set
-# CONFIG_VFAT_FS is not set
-# CONFIG_NTFS_FS is not set
-
-#
-# Pseudo filesystems
-#
-CONFIG_PROC_FS=y
 CONFIG_PROC_KCORE=y
-CONFIG_SYSFS=y
-# CONFIG_TMPFS is not set
 CONFIG_HUGETLBFS=y
-CONFIG_HUGETLB_PAGE=y
-CONFIG_RAMFS=y
-# CONFIG_RELAYFS_FS is not set
-# CONFIG_CONFIGFS_FS is not set
-
-#
-# Miscellaneous filesystems
-#
-# CONFIG_ADFS_FS is not set
-# CONFIG_AFFS_FS is not set
-# CONFIG_HFS_FS is not set
-# CONFIG_HFSPLUS_FS is not set
-# CONFIG_BEFS_FS is not set
-# CONFIG_BFS_FS is not set
-# CONFIG_EFS_FS is not set
-# CONFIG_CRAMFS is not set
-# CONFIG_VXFS_FS is not set
-# CONFIG_HPFS_FS is not set
-# CONFIG_QNX4FS_FS is not set
-# CONFIG_SYSV_FS is not set
-# CONFIG_UFS_FS is not set
-
-#
-# Network File Systems
-#
 CONFIG_NFS_FS=y
-# CONFIG_NFS_V3 is not set
-# CONFIG_NFS_V4 is not set
-CONFIG_NFS_DIRECTIO=y
 CONFIG_NFSD=y
 CONFIG_NFSD_V3=y
-# CONFIG_NFSD_V3_ACL is not set
-# CONFIG_NFSD_V4 is not set
-# CONFIG_NFSD_TCP is not set
-CONFIG_LOCKD=y
-CONFIG_LOCKD_V4=y
-CONFIG_EXPORTFS=y
-CONFIG_NFS_COMMON=y
-CONFIG_SUNRPC=y
-# CONFIG_RPCSEC_GSS_KRB5 is not set
-# CONFIG_RPCSEC_GSS_SPKM3 is not set
-# CONFIG_SMB_FS is not set
-# CONFIG_CIFS is not set
-# CONFIG_NCP_FS is not set
-# CONFIG_CODA_FS is not set
-# CONFIG_AFS_FS is not set
-# CONFIG_9P_FS is not set
-
-#
-# Partition Types
-#
 CONFIG_PARTITION_ADVANCED=y
-# CONFIG_ACORN_PARTITION is not set
-# CONFIG_OSF_PARTITION is not set
-# CONFIG_AMIGA_PARTITION is not set
-# CONFIG_ATARI_PARTITION is not set
-# CONFIG_MAC_PARTITION is not set
-CONFIG_MSDOS_PARTITION=y
-# CONFIG_BSD_DISKLABEL is not set
-# CONFIG_MINIX_SUBPARTITION is not set
-# CONFIG_SOLARIS_X86_PARTITION is not set
-# CONFIG_UNIXWARE_DISKLABEL is not set
-# CONFIG_LDM_PARTITION is not set
-# CONFIG_SGI_PARTITION is not set
-# CONFIG_ULTRIX_PARTITION is not set
-# CONFIG_SUN_PARTITION is not set
-# CONFIG_KARMA_PARTITION is not set
 CONFIG_EFI_PARTITION=y
-
-#
-# Native Language Support
-#
-# CONFIG_NLS is not set
-
-#
-# Library routines
-#
-# CONFIG_CRC_CCITT is not set
-# CONFIG_CRC16 is not set
-CONFIG_CRC32=y
-# CONFIG_LIBCRC32C is not set
-CONFIG_GENERIC_HARDIRQS=y
-CONFIG_GENERIC_IRQ_PROBE=y
-CONFIG_GENERIC_PENDING_IRQ=y
-
-#
-# HP Simulator drivers
-#
-CONFIG_HP_SIMETH=y
-CONFIG_HP_SIMSERIAL=y
-CONFIG_HP_SIMSERIAL_CONSOLE=y
-CONFIG_HP_SIMSCSI=y
-
-#
-# Instrumentation Support
-#
-# CONFIG_PROFILING is not set
-# CONFIG_KPROBES is not set
-
-#
-# Kernel hacking
-#
-# CONFIG_PRINTK_TIME is not set
-# CONFIG_MAGIC_SYSRQ is not set
 CONFIG_DEBUG_KERNEL=y
-CONFIG_LOG_BUF_SHIFT=16
-CONFIG_DETECT_SOFTLOCKUP=y
-# CONFIG_SCHEDSTATS is not set
-# CONFIG_DEBUG_SLAB is not set
-CONFIG_DEBUG_PREEMPT=y
 CONFIG_DEBUG_MUTEXES=y
-# CONFIG_DEBUG_SPINLOCK is not set
-# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
-# CONFIG_DEBUG_KOBJECT is not set
 CONFIG_DEBUG_INFO=y
-# CONFIG_DEBUG_FS is not set
-# CONFIG_DEBUG_VM is not set
-CONFIG_FORCED_INLINING=y
-# CONFIG_RCU_TORTURE_TEST is not set
-# CONFIG_IA64_GRANULE_16MB is not set
-CONFIG_IA64_GRANULE_64MB=y
-# CONFIG_IA64_PRINT_HAZARDS is not set
-# CONFIG_DISABLE_VHPT is not set
-# CONFIG_IA64_DEBUG_CMPXCHG is not set
-# CONFIG_IA64_DEBUG_IRQ is not set
-CONFIG_SYSVIPC_COMPAT=y
-
-#
-# Security options
-#
-# CONFIG_KEYS is not set
-# CONFIG_SECURITY is not set
-
-#
-# Cryptographic options
-#
-# CONFIG_CRYPTO is not set
-
-#
-# Hardware crypto devices
-#
diff --git a/arch/ia64/configs/sn2_defconfig b/arch/ia64/configs/sn2_defconfig
deleted file mode 100644
index 9ea35398e10..00000000000
--- a/arch/ia64/configs/sn2_defconfig
+++ /dev/null
@@ -1,1216 +0,0 @@
-#
-# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.17-rc3
-# Thu Apr 27 11:48:23 2006
-#
-
-#
-# Code maturity level options
-#
-CONFIG_EXPERIMENTAL=y
-CONFIG_LOCK_KERNEL=y
-CONFIG_INIT_ENV_ARG_LIMIT=32
-
-#
-# General setup
-#
-CONFIG_LOCALVERSION=""
-# CONFIG_LOCALVERSION_AUTO is not set
-CONFIG_SWAP=y
-CONFIG_SYSVIPC=y
-CONFIG_POSIX_MQUEUE=y
-# CONFIG_BSD_PROCESS_ACCT is not set
-CONFIG_SYSCTL=y
-# CONFIG_AUDIT is not set
-# CONFIG_IKCONFIG is not set
-CONFIG_CPUSETS=y
-CONFIG_RELAY=y
-CONFIG_INITRAMFS_SOURCE=""
-CONFIG_CC_OPTIMIZE_FOR_SIZE=y
-# CONFIG_EMBEDDED is not set
-CONFIG_KALLSYMS=y
-CONFIG_KALLSYMS_ALL=y
-# CONFIG_KALLSYMS_EXTRA_PASS is not set
-CONFIG_HOTPLUG=y
-CONFIG_PRINTK=y
-CONFIG_BUG=y
-CONFIG_ELF_CORE=y
-CONFIG_BASE_FULL=y
-CONFIG_FUTEX=y
-CONFIG_EPOLL=y
-CONFIG_SHMEM=y
-CONFIG_SLAB=y
-# CONFIG_TINY_SHMEM is not set
-CONFIG_BASE_SMALL=0
-# CONFIG_SLOB is not set
-
-#
-# Loadable module support
-#
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-# CONFIG_MODULE_FORCE_UNLOAD is not set
-# CONFIG_MODVERSIONS is not set
-# CONFIG_MODULE_SRCVERSION_ALL is not set
-CONFIG_KMOD=y
-CONFIG_STOP_MACHINE=y
-
-#
-# Block layer
-#
-# CONFIG_BLK_DEV_IO_TRACE is not set
-
-#
-# IO Schedulers
-#
-CONFIG_IOSCHED_NOOP=y
-CONFIG_IOSCHED_AS=y
-CONFIG_IOSCHED_DEADLINE=y
-CONFIG_IOSCHED_CFQ=y
-CONFIG_DEFAULT_AS=y
-# CONFIG_DEFAULT_DEADLINE is not set
-# CONFIG_DEFAULT_CFQ is not set
-# CONFIG_DEFAULT_NOOP is not set
-CONFIG_DEFAULT_IOSCHED="anticipatory"
-
-#
-# Processor type and features
-#
-CONFIG_IA64=y
-CONFIG_64BIT=y
-CONFIG_MMU=y
-CONFIG_SWIOTLB=y
-CONFIG_RWSEM_XCHGADD_ALGORITHM=y
-CONFIG_GENERIC_FIND_NEXT_BIT=y
-CONFIG_GENERIC_CALIBRATE_DELAY=y
-CONFIG_TIME_INTERPOLATION=y
-CONFIG_DMI=y
-CONFIG_EFI=y
-CONFIG_GENERIC_IOMAP=y
-CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
-CONFIG_IA64_UNCACHED_ALLOCATOR=y
-CONFIG_DMA_IS_DMA32=y
-# CONFIG_IA64_GENERIC is not set
-# CONFIG_IA64_DIG is not set
-# CONFIG_IA64_HP_ZX1 is not set
-# CONFIG_IA64_HP_ZX1_SWIOTLB is not set
-CONFIG_IA64_SGI_SN2=y
-# CONFIG_IA64_HP_SIM is not set
-# CONFIG_ITANIUM is not set
-CONFIG_MCKINLEY=y
-# CONFIG_IA64_PAGE_SIZE_4KB is not set
-# CONFIG_IA64_PAGE_SIZE_8KB is not set
-CONFIG_IA64_PAGE_SIZE_16KB=y
-# CONFIG_IA64_PAGE_SIZE_64KB is not set
-# CONFIG_PGTABLE_3 is not set
-CONFIG_PGTABLE_4=y
-# CONFIG_HZ_100 is not set
-CONFIG_HZ_250=y
-# CONFIG_HZ_1000 is not set
-CONFIG_HZ=250
-CONFIG_IA64_L1_CACHE_SHIFT=7
-# CONFIG_IA64_CYCLONE is not set
-CONFIG_IOSAPIC=y
-CONFIG_IA64_SGI_SN_XP=m
-CONFIG_FORCE_MAX_ZONEORDER=17
-CONFIG_SMP=y
-CONFIG_NR_CPUS=1024
-# CONFIG_HOTPLUG_CPU is not set
-CONFIG_SCHED_SMT=y
-CONFIG_PREEMPT=y
-CONFIG_SELECT_MEMORY_MODEL=y
-# CONFIG_FLATMEM_MANUAL is not set
-CONFIG_DISCONTIGMEM_MANUAL=y
-# CONFIG_SPARSEMEM_MANUAL is not set
-CONFIG_DISCONTIGMEM=y
-CONFIG_FLAT_NODE_MEM_MAP=y
-CONFIG_NEED_MULTIPLE_NODES=y
-# CONFIG_SPARSEMEM_STATIC is not set
-CONFIG_SPLIT_PTLOCK_CPUS=4
-CONFIG_MIGRATION=y
-CONFIG_ARCH_SELECT_MEMORY_MODEL=y
-CONFIG_ARCH_DISCONTIGMEM_ENABLE=y
-CONFIG_ARCH_FLATMEM_ENABLE=y
-CONFIG_ARCH_SPARSEMEM_ENABLE=y
-CONFIG_ARCH_DISCONTIGMEM_DEFAULT=y
-CONFIG_NUMA=y
-CONFIG_NODES_SHIFT=10
-CONFIG_VIRTUAL_MEM_MAP=y
-CONFIG_HOLES_IN_ZONE=y
-CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y
-CONFIG_IA32_SUPPORT=y
-CONFIG_COMPAT=y
-CONFIG_IA64_MCA_RECOVERY=y
-CONFIG_PERFMON=y
-CONFIG_IA64_PALINFO=y
-CONFIG_SGI_SN=y
-
-#
-# Firmware Drivers
-#
-CONFIG_EFI_VARS=y
-CONFIG_EFI_PCDP=y
-CONFIG_BINFMT_ELF=y
-# CONFIG_BINFMT_MISC is not set
-
-#
-# Power management and ACPI
-#
-CONFIG_PM=y
-# CONFIG_PM_LEGACY is not set
-# CONFIG_PM_DEBUG is not set
-
-#
-# ACPI (Advanced Configuration and Power Interface) Support
-#
-CONFIG_ACPI=y
-# CONFIG_ACPI_BUTTON is not set
-# CONFIG_ACPI_FAN is not set
-# CONFIG_ACPI_PROCESSOR is not set
-CONFIG_ACPI_NUMA=y
-CONFIG_ACPI_BLACKLIST_YEAR=0
-# CONFIG_ACPI_DEBUG is not set
-CONFIG_ACPI_EC=y
-CONFIG_ACPI_POWER=y
-CONFIG_ACPI_SYSTEM=y
-# CONFIG_ACPI_CONTAINER is not set
-
-#
-# CPU Frequency scaling
-#
-# CONFIG_CPU_FREQ is not set
-
-#
-# Bus options (PCI, PCMCIA)
-#
-CONFIG_PCI=y
-CONFIG_PCI_DOMAINS=y
-# CONFIG_PCI_MSI is not set
-# CONFIG_PCI_DEBUG is not set
-
-#
-# PCI Hotplug Support
-#
-CONFIG_HOTPLUG_PCI=y
-# CONFIG_HOTPLUG_PCI_FAKE is not set
-# CONFIG_HOTPLUG_PCI_ACPI is not set
-# CONFIG_HOTPLUG_PCI_CPCI is not set
-# CONFIG_HOTPLUG_PCI_SHPC is not set
-CONFIG_HOTPLUG_PCI_SGI=y
-
-#
-# PCCARD (PCMCIA/CardBus) support
-#
-# CONFIG_PCCARD is not set
-
-#
-# Networking
-#
-CONFIG_NET=y
-
-#
-# Networking options
-#
-# CONFIG_NETDEBUG is not set
-CONFIG_PACKET=y
-CONFIG_PACKET_MMAP=y
-CONFIG_UNIX=y
-# CONFIG_NET_KEY is not set
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-# CONFIG_IP_ADVANCED_ROUTER is not set
-CONFIG_IP_FIB_HASH=y
-# CONFIG_IP_PNP is not set
-# CONFIG_NET_IPIP is not set
-# CONFIG_NET_IPGRE is not set
-# CONFIG_IP_MROUTE is not set
-# CONFIG_ARPD is not set
-CONFIG_SYN_COOKIES=y
-# CONFIG_INET_AH is not set
-# CONFIG_INET_ESP is not set
-# CONFIG_INET_IPCOMP is not set
-# CONFIG_INET_XFRM_TUNNEL is not set
-# CONFIG_INET_TUNNEL is not set
-CONFIG_INET_DIAG=m
-CONFIG_INET_TCP_DIAG=m
-# CONFIG_TCP_CONG_ADVANCED is not set
-CONFIG_TCP_CONG_BIC=y
-CONFIG_IPV6=m
-# CONFIG_IPV6_PRIVACY is not set
-# CONFIG_IPV6_ROUTER_PREF is not set
-# CONFIG_INET6_AH is not set
-# CONFIG_INET6_ESP is not set
-# CONFIG_INET6_IPCOMP is not set
-# CONFIG_INET6_XFRM_TUNNEL is not set
-# CONFIG_INET6_TUNNEL is not set
-# CONFIG_IPV6_TUNNEL is not set
-# CONFIG_NETFILTER is not set
-
-#
-# DCCP Configuration (EXPERIMENTAL)
-#
-# CONFIG_IP_DCCP is not set
-
-#
-# SCTP Configuration (EXPERIMENTAL)
-#
-# CONFIG_IP_SCTP is not set
-
-#
-# TIPC Configuration (EXPERIMENTAL)
-#
-# CONFIG_TIPC is not set
-# CONFIG_ATM is not set
-# CONFIG_BRIDGE is not set
-# CONFIG_VLAN_8021Q is not set
-# CONFIG_DECNET is not set
-# CONFIG_LLC2 is not set
-# CONFIG_IPX is not set
-# CONFIG_ATALK is not set
-# CONFIG_X25 is not set
-# CONFIG_LAPB is not set
-# CONFIG_NET_DIVERT is not set
-# CONFIG_ECONET is not set
-# CONFIG_WAN_ROUTER is not set
-
-#
-# QoS and/or fair queueing
-#
-# CONFIG_NET_SCHED is not set
-
-#
-# Network testing
-#
-# CONFIG_NET_PKTGEN is not set
-# CONFIG_HAMRADIO is not set
-# CONFIG_IRDA is not set
-# CONFIG_BT is not set
-# CONFIG_IEEE80211 is not set
-
-#
-# Device Drivers
-#
-
-#
-# Generic Driver Options
-#
-CONFIG_STANDALONE=y
-CONFIG_PREVENT_FIRMWARE_BUILD=y
-CONFIG_FW_LOADER=y
-# CONFIG_DEBUG_DRIVER is not set
-
-#
-# Connector - unified userspace <-> kernelspace linker
-#
-# CONFIG_CONNECTOR is not set
-
-#
-# Memory Technology Devices (MTD)
-#
-# CONFIG_MTD is not set
-
-#
-# Parallel port support
-#
-# CONFIG_PARPORT is not set
-
-#
-# Plug and Play support
-#
-# CONFIG_PNP is not set
-
-#
-# Block devices
-#
-# CONFIG_BLK_CPQ_DA is not set
-# CONFIG_BLK_CPQ_CISS_DA is not set
-# CONFIG_BLK_DEV_DAC960 is not set
-# CONFIG_BLK_DEV_UMEM is not set
-# CONFIG_BLK_DEV_COW_COMMON is not set
-CONFIG_BLK_DEV_LOOP=y
-CONFIG_BLK_DEV_CRYPTOLOOP=m
-CONFIG_BLK_DEV_NBD=m
-# CONFIG_BLK_DEV_SX8 is not set
-# CONFIG_BLK_DEV_UB is not set
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_COUNT=16
-CONFIG_BLK_DEV_RAM_SIZE=4096
-CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CDROM_PKTCDVD is not set
-CONFIG_ATA_OVER_ETH=m
-
-#
-# ATA/ATAPI/MFM/RLL support
-#
-CONFIG_IDE=y
-CONFIG_IDE_MAX_HWIFS=4
-CONFIG_BLK_DEV_IDE=y
-
-#
-# Please see Documentation/ide.txt for help/info on IDE drives
-#
-# CONFIG_BLK_DEV_IDE_SATA is not set
-CONFIG_BLK_DEV_IDEDISK=y
-# CONFIG_IDEDISK_MULTI_MODE is not set
-CONFIG_BLK_DEV_IDECD=y
-# CONFIG_BLK_DEV_IDETAPE is not set
-# CONFIG_BLK_DEV_IDEFLOPPY is not set
-# CONFIG_BLK_DEV_IDESCSI is not set
-# CONFIG_IDE_TASK_IOCTL is not set
-
-#
-# IDE chipset support/bugfixes
-#
-CONFIG_IDE_GENERIC=y
-CONFIG_BLK_DEV_IDEPCI=y
-# CONFIG_IDEPCI_SHARE_IRQ is not set
-# CONFIG_BLK_DEV_OFFBOARD is not set
-# CONFIG_BLK_DEV_GENERIC is not set
-# CONFIG_BLK_DEV_OPTI621 is not set
-CONFIG_BLK_DEV_IDEDMA_PCI=y
-# CONFIG_BLK_DEV_IDEDMA_FORCED is not set
-CONFIG_IDEDMA_PCI_AUTO=y
-# CONFIG_IDEDMA_ONLYDISK is not set
-# CONFIG_BLK_DEV_AEC62XX is not set
-# CONFIG_BLK_DEV_ALI15X3 is not set
-# CONFIG_BLK_DEV_AMD74XX is not set
-# CONFIG_BLK_DEV_CMD64X is not set
-# CONFIG_BLK_DEV_TRIFLEX is not set
-# CONFIG_BLK_DEV_CY82C693 is not set
-# CONFIG_BLK_DEV_CS5520 is not set
-# CONFIG_BLK_DEV_CS5530 is not set
-# CONFIG_BLK_DEV_HPT34X is not set
-# CONFIG_BLK_DEV_HPT366 is not set
-# CONFIG_BLK_DEV_SC1200 is not set
-# CONFIG_BLK_DEV_PIIX is not set
-# CONFIG_BLK_DEV_IT821X is not set
-# CONFIG_BLK_DEV_NS87415 is not set
-# CONFIG_BLK_DEV_PDC202XX_OLD is not set
-# CONFIG_BLK_DEV_PDC202XX_NEW is not set
-# CONFIG_BLK_DEV_SVWKS is not set
-CONFIG_BLK_DEV_SGIIOC4=y
-# CONFIG_BLK_DEV_SIIMAGE is not set
-# CONFIG_BLK_DEV_SLC90E66 is not set
-# CONFIG_BLK_DEV_TRM290 is not set
-# CONFIG_BLK_DEV_VIA82CXXX is not set
-# CONFIG_IDE_ARM is not set
-CONFIG_BLK_DEV_IDEDMA=y
-# CONFIG_IDEDMA_IVB is not set
-CONFIG_IDEDMA_AUTO=y
-# CONFIG_BLK_DEV_HD is not set
-
-#
-# SCSI device support
-#
-# CONFIG_RAID_ATTRS is not set
-CONFIG_SCSI=y
-CONFIG_SCSI_PROC_FS=y
-
-#
-# SCSI support type (disk, tape, CD-ROM)
-#
-CONFIG_BLK_DEV_SD=y
-CONFIG_CHR_DEV_ST=m
-# CONFIG_CHR_DEV_OSST is not set
-CONFIG_BLK_DEV_SR=m
-# CONFIG_BLK_DEV_SR_VENDOR is not set
-CONFIG_CHR_DEV_SG=m
-CONFIG_CHR_DEV_SCH=m
-
-#
-# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
-#
-# CONFIG_SCSI_MULTI_LUN is not set
-CONFIG_SCSI_CONSTANTS=y
-# CONFIG_SCSI_LOGGING is not set
-
-#
-# SCSI Transport Attributes
-#
-CONFIG_SCSI_SPI_ATTRS=y
-CONFIG_SCSI_FC_ATTRS=y
-CONFIG_SCSI_ISCSI_ATTRS=m
-CONFIG_SCSI_SAS_ATTRS=y
-
-#
-# SCSI low-level drivers
-#
-CONFIG_ISCSI_TCP=m
-# CONFIG_BLK_DEV_3W_XXXX_RAID is not set
-# CONFIG_SCSI_3W_9XXX is not set
-# CONFIG_SCSI_ACARD is not set
-# CONFIG_SCSI_AACRAID is not set
-# CONFIG_SCSI_AIC7XXX is not set
-# CONFIG_SCSI_AIC7XXX_OLD is not set
-# CONFIG_SCSI_AIC79XX is not set
-# CONFIG_MEGARAID_NEWGEN is not set
-# CONFIG_MEGARAID_LEGACY is not set
-# CONFIG_MEGARAID_SAS is not set
-CONFIG_SCSI_SATA=y
-# CONFIG_SCSI_SATA_AHCI is not set
-# CONFIG_SCSI_SATA_SVW is not set
-# CONFIG_SCSI_ATA_PIIX is not set
-# CONFIG_SCSI_SATA_MV is not set
-# CONFIG_SCSI_SATA_NV is not set
-# CONFIG_SCSI_PDC_ADMA is not set
-# CONFIG_SCSI_SATA_QSTOR is not set
-# CONFIG_SCSI_SATA_PROMISE is not set
-# CONFIG_SCSI_SATA_SX4 is not set
-# CONFIG_SCSI_SATA_SIL is not set
-# CONFIG_SCSI_SATA_SIL24 is not set
-# CONFIG_SCSI_SATA_SIS is not set
-# CONFIG_SCSI_SATA_ULI is not set
-# CONFIG_SCSI_SATA_VIA is not set
-CONFIG_SCSI_SATA_VITESSE=y
-# CONFIG_SCSI_DMX3191D is not set
-# CONFIG_SCSI_FUTURE_DOMAIN is not set
-# CONFIG_SCSI_IPS is not set
-# CONFIG_SCSI_INITIO is not set
-# CONFIG_SCSI_INIA100 is not set
-# CONFIG_SCSI_SYM53C8XX_2 is not set
-# CONFIG_SCSI_IPR is not set
-CONFIG_SCSI_QLOGIC_1280=y
-CONFIG_SCSI_QLA_FC=y
-CONFIG_SCSI_QLA2XXX_EMBEDDED_FIRMWARE=y
-# CONFIG_SCSI_QLA21XX is not set
-CONFIG_SCSI_QLA22XX=y
-CONFIG_SCSI_QLA2300=y
-CONFIG_SCSI_QLA2322=y
-# CONFIG_SCSI_QLA24XX is not set
-# CONFIG_SCSI_LPFC is not set
-# CONFIG_SCSI_DC395x is not set
-# CONFIG_SCSI_DC390T is not set
-# CONFIG_SCSI_DEBUG is not set
-
-#
-# Multi-device support (RAID and LVM)
-#
-CONFIG_MD=y
-CONFIG_BLK_DEV_MD=y
-CONFIG_MD_LINEAR=y
-CONFIG_MD_RAID0=y
-CONFIG_MD_RAID1=y
-# CONFIG_MD_RAID10 is not set
-CONFIG_MD_RAID5=y
-# CONFIG_MD_RAID5_RESHAPE is not set
-# CONFIG_MD_RAID6 is not set
-CONFIG_MD_MULTIPATH=y
-# CONFIG_MD_FAULTY is not set
-CONFIG_BLK_DEV_DM=y
-CONFIG_DM_CRYPT=m
-CONFIG_DM_SNAPSHOT=m
-CONFIG_DM_MIRROR=m
-CONFIG_DM_ZERO=m
-CONFIG_DM_MULTIPATH=m
-CONFIG_DM_MULTIPATH_EMC=m
-
-#
-# Fusion MPT device support
-#
-CONFIG_FUSION=y
-CONFIG_FUSION_SPI=y
-CONFIG_FUSION_FC=y
-CONFIG_FUSION_SAS=y
-CONFIG_FUSION_MAX_SGE=128
-CONFIG_FUSION_CTL=m
-
-#
-# IEEE 1394 (FireWire) support
-#
-# CONFIG_IEEE1394 is not set
-
-#
-# I2O device support
-#
-# CONFIG_I2O is not set
-
-#
-# Network device support
-#
-CONFIG_NETDEVICES=y
-# CONFIG_DUMMY is not set
-# CONFIG_BONDING is not set
-# CONFIG_EQUALIZER is not set
-# CONFIG_TUN is not set
-
-#
-# ARCnet devices
-#
-# CONFIG_ARCNET is not set
-
-#
-# PHY device support
-#
-
-#
-# Ethernet (10 or 100Mbit)
-#
-# CONFIG_NET_ETHERNET is not set
-
-#
-# Ethernet (1000 Mbit)
-#
-# CONFIG_ACENIC is not set
-# CONFIG_DL2K is not set
-# CONFIG_E1000 is not set
-# CONFIG_NS83820 is not set
-# CONFIG_HAMACHI is not set
-# CONFIG_YELLOWFIN is not set
-# CONFIG_R8169 is not set
-# CONFIG_SIS190 is not set
-# CONFIG_SKGE is not set
-# CONFIG_SKY2 is not set
-# CONFIG_SK98LIN is not set
-CONFIG_TIGON3=y
-# CONFIG_BNX2 is not set
-
-#
-# Ethernet (10000 Mbit)
-#
-CONFIG_CHELSIO_T1=m
-# CONFIG_IXGB is not set
-CONFIG_S2IO=m
-# CONFIG_S2IO_NAPI is not set
-
-#
-# Token Ring devices
-#
-# CONFIG_TR is not set
-
-#
-# Wireless LAN (non-hamradio)
-#
-# CONFIG_NET_RADIO is not set
-
-#
-# Wan interfaces
-#
-# CONFIG_WAN is not set
-# CONFIG_FDDI is not set
-# CONFIG_HIPPI is not set
-# CONFIG_PPP is not set
-# CONFIG_SLIP is not set
-# CONFIG_NET_FC is not set
-# CONFIG_SHAPER is not set
-CONFIG_NETCONSOLE=y
-CONFIG_NETPOLL=y
-# CONFIG_NETPOLL_RX is not set
-# CONFIG_NETPOLL_TRAP is not set
-CONFIG_NET_POLL_CONTROLLER=y
-
-#
-# ISDN subsystem
-#
-# CONFIG_ISDN is not set
-
-#
-# Telephony Support
-#
-# CONFIG_PHONE is not set
-
-#
-# Input device support
-#
-CONFIG_INPUT=y
-
-#
-# Userland interfaces
-#
-CONFIG_INPUT_MOUSEDEV=y
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
-CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
-CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
-# CONFIG_INPUT_JOYDEV is not set
-# CONFIG_INPUT_TSDEV is not set
-# CONFIG_INPUT_EVDEV is not set
-# CONFIG_INPUT_EVBUG is not set
-
-#
-# Input Device Drivers
-#
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-# CONFIG_INPUT_JOYSTICK is not set
-# CONFIG_INPUT_TOUCHSCREEN is not set
-# CONFIG_INPUT_MISC is not set
-
-#
-# Hardware I/O ports
-#
-# CONFIG_SERIO is not set
-# CONFIG_GAMEPORT is not set
-
-#
-# Character devices
-#
-CONFIG_VT=y
-CONFIG_VT_CONSOLE=y
-CONFIG_HW_CONSOLE=y
-CONFIG_SERIAL_NONSTANDARD=y
-# CONFIG_COMPUTONE is not set
-# CONFIG_ROCKETPORT is not set
-# CONFIG_CYCLADES is not set
-# CONFIG_DIGIEPCA is not set
-# CONFIG_MOXA_INTELLIO is not set
-# CONFIG_MOXA_SMARTIO is not set
-# CONFIG_ISI is not set
-# CONFIG_SYNCLINKMP is not set
-# CONFIG_SYNCLINK_GT is not set
-# CONFIG_N_HDLC is not set
-# CONFIG_SPECIALIX is not set
-# CONFIG_SX is not set
-# CONFIG_STALDRV is not set
-CONFIG_SGI_SNSC=y
-CONFIG_SGI_TIOCX=y
-CONFIG_SGI_MBCS=m
-
-#
-# Serial drivers
-#
-# CONFIG_SERIAL_8250 is not set
-
-#
-# Non-8250 serial port support
-#
-CONFIG_SERIAL_CORE=y
-CONFIG_SERIAL_CORE_CONSOLE=y
-CONFIG_SERIAL_SGI_L1_CONSOLE=y
-# CONFIG_SERIAL_JSM is not set
-CONFIG_SERIAL_SGI_IOC4=y
-CONFIG_SERIAL_SGI_IOC3=y
-CONFIG_UNIX98_PTYS=y
-CONFIG_LEGACY_PTYS=y
-CONFIG_LEGACY_PTY_COUNT=256
-
-#
-# IPMI
-#
-# CONFIG_IPMI_HANDLER is not set
-
-#
-# Watchdog Cards
-#
-# CONFIG_WATCHDOG is not set
-# CONFIG_HW_RANDOM is not set
-CONFIG_EFI_RTC=y
-# CONFIG_DTLK is not set
-# CONFIG_R3964 is not set
-# CONFIG_APPLICOM is not set
-
-#
-# Ftape, the floppy tape device driver
-#
-CONFIG_AGP=y
-# CONFIG_AGP_VIA is not set
-CONFIG_AGP_SGI_TIOCA=y
-# CONFIG_DRM is not set
-CONFIG_RAW_DRIVER=m
-CONFIG_MAX_RAW_DEVS=256
-# CONFIG_HPET is not set
-# CONFIG_HANGCHECK_TIMER is not set
-CONFIG_MMTIMER=y
-
-#
-# TPM devices
-#
-# CONFIG_TCG_TPM is not set
-# CONFIG_TELCLOCK is not set
-
-#
-# I2C support
-#
-# CONFIG_I2C is not set
-
-#
-# SPI support
-#
-# CONFIG_SPI is not set
-# CONFIG_SPI_MASTER is not set
-
-#
-# Dallas's 1-wire bus
-#
-# CONFIG_W1 is not set
-
-#
-# Hardware Monitoring support
-#
-# CONFIG_HWMON is not set
-# CONFIG_HWMON_VID is not set
-
-#
-# Misc devices
-#
-
-#
-# Multimedia devices
-#
-# CONFIG_VIDEO_DEV is not set
-
-#
-# Digital Video Broadcasting Devices
-#
-# CONFIG_DVB is not set
-# CONFIG_USB_DABUSB is not set
-
-#
-# Graphics support
-#
-# CONFIG_FB is not set
-
-#
-# Console display driver support
-#
-CONFIG_VGA_CONSOLE=y
-# CONFIG_VGACON_SOFT_SCROLLBACK is not set
-CONFIG_DUMMY_CONSOLE=y
-
-#
-# Sound
-#
-# CONFIG_SOUND is not set
-
-#
-# USB support
-#
-CONFIG_USB_ARCH_HAS_HCD=y
-CONFIG_USB_ARCH_HAS_OHCI=y
-CONFIG_USB_ARCH_HAS_EHCI=y
-CONFIG_USB=m
-# CONFIG_USB_DEBUG is not set
-
-#
-# Miscellaneous USB options
-#
-# CONFIG_USB_DEVICEFS is not set
-# CONFIG_USB_BANDWIDTH is not set
-# CONFIG_USB_DYNAMIC_MINORS is not set
-# CONFIG_USB_SUSPEND is not set
-# CONFIG_USB_OTG is not set
-
-#
-# USB Host Controller Drivers
-#
-CONFIG_USB_EHCI_HCD=m
-# CONFIG_USB_EHCI_SPLIT_ISO is not set
-# CONFIG_USB_EHCI_ROOT_HUB_TT is not set
-# CONFIG_USB_ISP116X_HCD is not set
-CONFIG_USB_OHCI_HCD=m
-# CONFIG_USB_OHCI_BIG_ENDIAN is not set
-CONFIG_USB_OHCI_LITTLE_ENDIAN=y
-CONFIG_USB_UHCI_HCD=m
-# CONFIG_USB_SL811_HCD is not set
-
-#
-# USB Device Class drivers
-#
-# CONFIG_USB_ACM is not set
-# CONFIG_USB_PRINTER is not set
-
-#
-# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
-#
-
-#
-# may also be needed; see USB_STORAGE Help for more information
-#
-# CONFIG_USB_STORAGE is not set
-# CONFIG_USB_LIBUSUAL is not set
-
-#
-# USB Input Devices
-#
-CONFIG_USB_HID=m
-CONFIG_USB_HIDINPUT=y
-# CONFIG_USB_HIDINPUT_POWERBOOK is not set
-# CONFIG_HID_FF is not set
-# CONFIG_USB_HIDDEV is not set
-
-#
-# USB HID Boot Protocol drivers
-#
-# CONFIG_USB_KBD is not set
-# CONFIG_USB_MOUSE is not set
-# CONFIG_USB_AIPTEK is not set
-# CONFIG_USB_WACOM is not set
-# CONFIG_USB_ACECAD is not set
-# CONFIG_USB_KBTAB is not set
-# CONFIG_USB_POWERMATE is not set
-# CONFIG_USB_TOUCHSCREEN is not set
-# CONFIG_USB_YEALINK is not set
-# CONFIG_USB_XPAD is not set
-# CONFIG_USB_ATI_REMOTE is not set
-# CONFIG_USB_ATI_REMOTE2 is not set
-# CONFIG_USB_KEYSPAN_REMOTE is not set
-# CONFIG_USB_APPLETOUCH is not set
-
-#
-# USB Imaging devices
-#
-# CONFIG_USB_MDC800 is not set
-# CONFIG_USB_MICROTEK is not set
-
-#
-# USB Network Adapters
-#
-# CONFIG_USB_CATC is not set
-# CONFIG_USB_KAWETH is not set
-# CONFIG_USB_PEGASUS is not set
-# CONFIG_USB_RTL8150 is not set
-# CONFIG_USB_USBNET is not set
-CONFIG_USB_MON=y
-
-#
-# USB port drivers
-#
-
-#
-# USB Serial Converter support
-#
-# CONFIG_USB_SERIAL is not set
-
-#
-# USB Miscellaneous drivers
-#
-# CONFIG_USB_EMI62 is not set
-# CONFIG_USB_EMI26 is not set
-# CONFIG_USB_AUERSWALD is not set
-# CONFIG_USB_RIO500 is not set
-# CONFIG_USB_LEGOTOWER is not set
-# CONFIG_USB_LCD is not set
-# CONFIG_USB_LED is not set
-# CONFIG_USB_CYTHERM is not set
-# CONFIG_USB_PHIDGETKIT is not set
-# CONFIG_USB_PHIDGETSERVO is not set
-# CONFIG_USB_IDMOUSE is not set
-# CONFIG_USB_SISUSBVGA is not set
-# CONFIG_USB_LD is not set
-
-#
-# USB DSL modem support
-#
-
-#
-# USB Gadget Support
-#
-# CONFIG_USB_GADGET is not set
-
-#
-# MMC/SD Card support
-#
-# CONFIG_MMC is not set
-
-#
-# LED devices
-#
-# CONFIG_NEW_LEDS is not set
-
-#
-# LED drivers
-#
-
-#
-# LED Triggers
-#
-
-#
-# InfiniBand support
-#
-CONFIG_INFINIBAND=m
-# CONFIG_INFINIBAND_USER_MAD is not set
-CONFIG_INFINIBAND_USER_ACCESS=m
-CONFIG_INFINIBAND_MTHCA=m
-CONFIG_INFINIBAND_MTHCA_DEBUG=y
-CONFIG_INFINIBAND_IPOIB=m
-CONFIG_INFINIBAND_IPOIB_DEBUG=y
-# CONFIG_INFINIBAND_IPOIB_DEBUG_DATA is not set
-CONFIG_INFINIBAND_SRP=m
-
-#
-# SN Devices
-#
-CONFIG_SGI_IOC4=y
-CONFIG_SGI_IOC3=y
-
-#
-# EDAC - error detection and reporting (RAS) (EXPERIMENTAL)
-#
-
-#
-# Real Time Clock
-#
-# CONFIG_RTC_CLASS is not set
-
-#
-# File systems
-#
-CONFIG_EXT2_FS=y
-CONFIG_EXT2_FS_XATTR=y
-CONFIG_EXT2_FS_POSIX_ACL=y
-CONFIG_EXT2_FS_SECURITY=y
-# CONFIG_EXT2_FS_XIP is not set
-CONFIG_EXT3_FS=y
-CONFIG_EXT3_FS_XATTR=y
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_EXT3_FS_SECURITY=y
-CONFIG_JBD=y
-# CONFIG_JBD_DEBUG is not set
-CONFIG_FS_MBCACHE=y
-CONFIG_REISERFS_FS=y
-# CONFIG_REISERFS_CHECK is not set
-# CONFIG_REISERFS_PROC_INFO is not set
-CONFIG_REISERFS_FS_XATTR=y
-CONFIG_REISERFS_FS_POSIX_ACL=y
-CONFIG_REISERFS_FS_SECURITY=y
-# CONFIG_JFS_FS is not set
-CONFIG_FS_POSIX_ACL=y
-CONFIG_XFS_FS=y
-CONFIG_XFS_EXPORT=y
-CONFIG_XFS_QUOTA=y
-# CONFIG_XFS_SECURITY is not set
-CONFIG_XFS_POSIX_ACL=y
-CONFIG_XFS_RT=y
-# CONFIG_OCFS2_FS is not set
-# CONFIG_MINIX_FS is not set
-# CONFIG_ROMFS_FS is not set
-CONFIG_INOTIFY=y
-CONFIG_QUOTA=y
-# CONFIG_QFMT_V1 is not set
-# CONFIG_QFMT_V2 is not set
-CONFIG_QUOTACTL=y
-CONFIG_DNOTIFY=y
-CONFIG_AUTOFS_FS=m
-CONFIG_AUTOFS4_FS=m
-CONFIG_FUSE_FS=m
-
-#
-# CD-ROM/DVD Filesystems
-#
-CONFIG_ISO9660_FS=y
-CONFIG_JOLIET=y
-# CONFIG_ZISOFS is not set
-CONFIG_UDF_FS=m
-CONFIG_UDF_NLS=y
-
-#
-# DOS/FAT/NT Filesystems
-#
-CONFIG_FAT_FS=y
-# CONFIG_MSDOS_FS is not set
-CONFIG_VFAT_FS=y
-CONFIG_FAT_DEFAULT_CODEPAGE=437
-CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
-# CONFIG_NTFS_FS is not set
-
-#
-# Pseudo filesystems
-#
-CONFIG_PROC_FS=y
-CONFIG_PROC_KCORE=y
-CONFIG_SYSFS=y
-CONFIG_TMPFS=y
-CONFIG_HUGETLBFS=y
-CONFIG_HUGETLB_PAGE=y
-CONFIG_RAMFS=y
-# CONFIG_CONFIGFS_FS is not set
-
-#
-# Miscellaneous filesystems
-#
-# CONFIG_ADFS_FS is not set
-# CONFIG_AFFS_FS is not set
-# CONFIG_HFS_FS is not set
-# CONFIG_HFSPLUS_FS is not set
-# CONFIG_BEFS_FS is not set
-# CONFIG_BFS_FS is not set
-# CONFIG_EFS_FS is not set
-# CONFIG_CRAMFS is not set
-# CONFIG_VXFS_FS is not set
-# CONFIG_HPFS_FS is not set
-# CONFIG_QNX4FS_FS is not set
-# CONFIG_SYSV_FS is not set
-# CONFIG_UFS_FS is not set
-
-#
-# Network File Systems
-#
-CONFIG_NFS_FS=m
-CONFIG_NFS_V3=y
-# CONFIG_NFS_V3_ACL is not set
-CONFIG_NFS_V4=y
-CONFIG_NFS_DIRECTIO=y
-CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
-# CONFIG_NFSD_V3_ACL is not set
-CONFIG_NFSD_V4=y
-CONFIG_NFSD_TCP=y
-CONFIG_LOCKD=m
-CONFIG_LOCKD_V4=y
-CONFIG_EXPORTFS=y
-CONFIG_NFS_COMMON=y
-CONFIG_SUNRPC=m
-CONFIG_SUNRPC_GSS=m
-CONFIG_RPCSEC_GSS_KRB5=m
-# CONFIG_RPCSEC_GSS_SPKM3 is not set
-CONFIG_SMB_FS=m
-# CONFIG_SMB_NLS_DEFAULT is not set
-CONFIG_CIFS=m
-# CONFIG_CIFS_STATS is not set
-# CONFIG_CIFS_XATTR is not set
-# CONFIG_CIFS_EXPERIMENTAL is not set
-# CONFIG_NCP_FS is not set
-# CONFIG_CODA_FS is not set
-# CONFIG_AFS_FS is not set
-# CONFIG_9P_FS is not set
-
-#
-# Partition Types
-#
-CONFIG_PARTITION_ADVANCED=y
-# CONFIG_ACORN_PARTITION is not set
-# CONFIG_OSF_PARTITION is not set
-# CONFIG_AMIGA_PARTITION is not set
-# CONFIG_ATARI_PARTITION is not set
-# CONFIG_MAC_PARTITION is not set
-CONFIG_MSDOS_PARTITION=y
-# CONFIG_BSD_DISKLABEL is not set
-# CONFIG_MINIX_SUBPARTITION is not set
-# CONFIG_SOLARIS_X86_PARTITION is not set
-# CONFIG_UNIXWARE_DISKLABEL is not set
-# CONFIG_LDM_PARTITION is not set
-CONFIG_SGI_PARTITION=y
-# CONFIG_ULTRIX_PARTITION is not set
-# CONFIG_SUN_PARTITION is not set
-# CONFIG_KARMA_PARTITION is not set
-CONFIG_EFI_PARTITION=y
-
-#
-# Native Language Support
-#
-CONFIG_NLS=y
-CONFIG_NLS_DEFAULT="iso8859-1"
-CONFIG_NLS_CODEPAGE_437=y
-# CONFIG_NLS_CODEPAGE_737 is not set
-# CONFIG_NLS_CODEPAGE_775 is not set
-# CONFIG_NLS_CODEPAGE_850 is not set
-# CONFIG_NLS_CODEPAGE_852 is not set
-# CONFIG_NLS_CODEPAGE_855 is not set
-# CONFIG_NLS_CODEPAGE_857 is not set
-# CONFIG_NLS_CODEPAGE_860 is not set
-# CONFIG_NLS_CODEPAGE_861 is not set
-# CONFIG_NLS_CODEPAGE_862 is not set
-# CONFIG_NLS_CODEPAGE_863 is not set
-# CONFIG_NLS_CODEPAGE_864 is not set
-# CONFIG_NLS_CODEPAGE_865 is not set
-# CONFIG_NLS_CODEPAGE_866 is not set
-# CONFIG_NLS_CODEPAGE_869 is not set
-# CONFIG_NLS_CODEPAGE_936 is not set
-# CONFIG_NLS_CODEPAGE_950 is not set
-# CONFIG_NLS_CODEPAGE_932 is not set
-# CONFIG_NLS_CODEPAGE_949 is not set
-# CONFIG_NLS_CODEPAGE_874 is not set
-# CONFIG_NLS_ISO8859_8 is not set
-# CONFIG_NLS_CODEPAGE_1250 is not set
-# CONFIG_NLS_CODEPAGE_1251 is not set
-# CONFIG_NLS_ASCII is not set
-CONFIG_NLS_ISO8859_1=y
-# CONFIG_NLS_ISO8859_2 is not set
-# CONFIG_NLS_ISO8859_3 is not set
-# CONFIG_NLS_ISO8859_4 is not set
-# CONFIG_NLS_ISO8859_5 is not set
-# CONFIG_NLS_ISO8859_6 is not set
-# CONFIG_NLS_ISO8859_7 is not set
-# CONFIG_NLS_ISO8859_9 is not set
-# CONFIG_NLS_ISO8859_13 is not set
-# CONFIG_NLS_ISO8859_14 is not set
-# CONFIG_NLS_ISO8859_15 is not set
-# CONFIG_NLS_KOI8_R is not set
-# CONFIG_NLS_KOI8_U is not set
-CONFIG_NLS_UTF8=y
-
-#
-# Library routines
-#
-# CONFIG_CRC_CCITT is not set
-CONFIG_CRC16=m
-CONFIG_CRC32=y
-CONFIG_LIBCRC32C=m
-CONFIG_ZLIB_INFLATE=m
-CONFIG_ZLIB_DEFLATE=m
-CONFIG_GENERIC_ALLOCATOR=y
-CONFIG_GENERIC_HARDIRQS=y
-CONFIG_GENERIC_IRQ_PROBE=y
-CONFIG_GENERIC_PENDING_IRQ=y
-
-#
-# Instrumentation Support
-#
-# CONFIG_PROFILING is not set
-# CONFIG_KPROBES is not set
-
-#
-# Kernel hacking
-#
-# CONFIG_PRINTK_TIME is not set
-CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_KERNEL=y
-CONFIG_LOG_BUF_SHIFT=20
-CONFIG_DETECT_SOFTLOCKUP=y
-# CONFIG_SCHEDSTATS is not set
-# CONFIG_DEBUG_SLAB is not set
-CONFIG_DEBUG_PREEMPT=y
-# CONFIG_DEBUG_MUTEXES is not set
-# CONFIG_DEBUG_SPINLOCK is not set
-# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
-# CONFIG_DEBUG_KOBJECT is not set
-CONFIG_DEBUG_INFO=y
-# CONFIG_DEBUG_FS is not set
-# CONFIG_DEBUG_VM is not set
-CONFIG_FORCED_INLINING=y
-# CONFIG_RCU_TORTURE_TEST is not set
-CONFIG_IA64_GRANULE_16MB=y
-# CONFIG_IA64_GRANULE_64MB is not set
-# CONFIG_IA64_PRINT_HAZARDS is not set
-# CONFIG_DISABLE_VHPT is not set
-# CONFIG_IA64_DEBUG_CMPXCHG is not set
-# CONFIG_IA64_DEBUG_IRQ is not set
-CONFIG_SYSVIPC_COMPAT=y
-
-#
-# Security options
-#
-# CONFIG_KEYS is not set
-# CONFIG_SECURITY is not set
-
-#
-# Cryptographic options
-#
-CONFIG_CRYPTO=y
-CONFIG_CRYPTO_HMAC=y
-# CONFIG_CRYPTO_NULL is not set
-# CONFIG_CRYPTO_MD4 is not set
-CONFIG_CRYPTO_MD5=y
-CONFIG_CRYPTO_SHA1=m
-# CONFIG_CRYPTO_SHA256 is not set
-# CONFIG_CRYPTO_SHA512 is not set
-# CONFIG_CRYPTO_WP512 is not set
-# CONFIG_CRYPTO_TGR192 is not set
-CONFIG_CRYPTO_DES=m
-# CONFIG_CRYPTO_BLOWFISH is not set
-# CONFIG_CRYPTO_TWOFISH is not set
-# CONFIG_CRYPTO_SERPENT is not set
-# CONFIG_CRYPTO_AES is not set
-# CONFIG_CRYPTO_CAST5 is not set
-# CONFIG_CRYPTO_CAST6 is not set
-# CONFIG_CRYPTO_TEA is not set
-# CONFIG_CRYPTO_ARC4 is not set
-# CONFIG_CRYPTO_KHAZAD is not set
-# CONFIG_CRYPTO_ANUBIS is not set
-CONFIG_CRYPTO_DEFLATE=m
-# CONFIG_CRYPTO_MICHAEL_MIC is not set
-CONFIG_CRYPTO_CRC32C=m
-# CONFIG_CRYPTO_TEST is not set
-
-#
-# Hardware crypto devices
-#
diff --git a/arch/ia64/configs/tiger_defconfig b/arch/ia64/configs/tiger_defconfig
index 766bf495543..c8a3f40e77f 100644
--- a/arch/ia64/configs/tiger_defconfig
+++ b/arch/ia64/configs/tiger_defconfig
@@ -1,743 +1,98 @@
-#
-# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.16-rc5
-# Mon Feb 27 15:49:18 2006
-#
-
-#
-# Code maturity level options
-#
 CONFIG_EXPERIMENTAL=y
-CONFIG_LOCK_KERNEL=y
-CONFIG_INIT_ENV_ARG_LIMIT=32
-
-#
-# General setup
-#
-CONFIG_LOCALVERSION=""
-CONFIG_LOCALVERSION_AUTO=y
-CONFIG_SWAP=y
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
-# CONFIG_BSD_PROCESS_ACCT is not set
-CONFIG_SYSCTL=y
-# CONFIG_AUDIT is not set
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
-# CONFIG_CPUSETS is not set
-CONFIG_INITRAMFS_SOURCE=""
-CONFIG_CC_OPTIMIZE_FOR_SIZE=y
-# CONFIG_EMBEDDED is not set
-CONFIG_KALLSYMS=y
+CONFIG_LOG_BUF_SHIFT=20
+CONFIG_BLK_DEV_INITRD=y
 CONFIG_KALLSYMS_ALL=y
-# CONFIG_KALLSYMS_EXTRA_PASS is not set
-CONFIG_HOTPLUG=y
-CONFIG_PRINTK=y
-CONFIG_BUG=y
-CONFIG_ELF_CORE=y
-CONFIG_BASE_FULL=y
-CONFIG_FUTEX=y
-CONFIG_EPOLL=y
-CONFIG_SHMEM=y
-CONFIG_CC_ALIGN_FUNCTIONS=0
-CONFIG_CC_ALIGN_LABELS=0
-CONFIG_CC_ALIGN_LOOPS=0
-CONFIG_CC_ALIGN_JUMPS=0
-CONFIG_SLAB=y
-# CONFIG_TINY_SHMEM is not set
-CONFIG_BASE_SMALL=0
-# CONFIG_SLOB is not set
-
-#
-# Loadable module support
-#
 CONFIG_MODULES=y
 CONFIG_MODULE_UNLOAD=y
-# CONFIG_MODULE_FORCE_UNLOAD is not set
-CONFIG_OBSOLETE_MODPARM=y
 CONFIG_MODVERSIONS=y
 CONFIG_MODULE_SRCVERSION_ALL=y
-CONFIG_KMOD=y
-CONFIG_STOP_MACHINE=y
-
-#
-# Block layer
-#
-
-#
-# IO Schedulers
-#
-CONFIG_IOSCHED_NOOP=y
-CONFIG_IOSCHED_AS=y
-CONFIG_IOSCHED_DEADLINE=y
-CONFIG_IOSCHED_CFQ=y
-CONFIG_DEFAULT_AS=y
-# CONFIG_DEFAULT_DEADLINE is not set
-# CONFIG_DEFAULT_CFQ is not set
-# CONFIG_DEFAULT_NOOP is not set
-CONFIG_DEFAULT_IOSCHED="anticipatory"
-
-#
-# Processor type and features
-#
-CONFIG_IA64=y
-CONFIG_64BIT=y
-CONFIG_MMU=y
-CONFIG_SWIOTLB=y
-CONFIG_RWSEM_XCHGADD_ALGORITHM=y
-CONFIG_GENERIC_CALIBRATE_DELAY=y
-CONFIG_TIME_INTERPOLATION=y
-CONFIG_EFI=y
-CONFIG_GENERIC_IOMAP=y
-CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
-CONFIG_DMA_IS_DMA32=y
-# CONFIG_IA64_GENERIC is not set
+# CONFIG_BLK_DEV_BSG is not set
 CONFIG_IA64_DIG=y
-# CONFIG_IA64_HP_ZX1 is not set
-# CONFIG_IA64_HP_ZX1_SWIOTLB is not set
-# CONFIG_IA64_SGI_SN2 is not set
-# CONFIG_IA64_HP_SIM is not set
-# CONFIG_ITANIUM is not set
 CONFIG_MCKINLEY=y
-# CONFIG_IA64_PAGE_SIZE_4KB is not set
-# CONFIG_IA64_PAGE_SIZE_8KB is not set
-CONFIG_IA64_PAGE_SIZE_16KB=y
-# CONFIG_IA64_PAGE_SIZE_64KB is not set
-CONFIG_PGTABLE_3=y
-# CONFIG_PGTABLE_4 is not set
-# CONFIG_HZ_100 is not set
-CONFIG_HZ_250=y
-# CONFIG_HZ_1000 is not set
-CONFIG_HZ=250
-CONFIG_IA64_L1_CACHE_SHIFT=7
+CONFIG_IA64_PAGE_SIZE_64KB=y
 CONFIG_IA64_CYCLONE=y
-CONFIG_IOSAPIC=y
-CONFIG_FORCE_MAX_ZONEORDER=17
 CONFIG_SMP=y
-CONFIG_NR_CPUS=4
+CONFIG_NR_CPUS=16
 CONFIG_HOTPLUG_CPU=y
 CONFIG_PERMIT_BSP_REMOVE=y
 CONFIG_FORCE_CPEI_RETARGET=y
-# CONFIG_SCHED_SMT is not set
-# CONFIG_PREEMPT is not set
-CONFIG_SELECT_MEMORY_MODEL=y
-CONFIG_FLATMEM_MANUAL=y
-# CONFIG_DISCONTIGMEM_MANUAL is not set
-# CONFIG_SPARSEMEM_MANUAL is not set
-CONFIG_FLATMEM=y
-CONFIG_FLAT_NODE_MEM_MAP=y
-# CONFIG_SPARSEMEM_STATIC is not set
-CONFIG_SPLIT_PTLOCK_CPUS=4
-CONFIG_ARCH_SELECT_MEMORY_MODEL=y
-CONFIG_ARCH_DISCONTIGMEM_ENABLE=y
-CONFIG_ARCH_FLATMEM_ENABLE=y
-CONFIG_ARCH_SPARSEMEM_ENABLE=y
-CONFIG_VIRTUAL_MEM_MAP=y
-CONFIG_HOLES_IN_ZONE=y
-CONFIG_IA32_SUPPORT=y
-CONFIG_COMPAT=y
 CONFIG_IA64_MCA_RECOVERY=y
 CONFIG_PERFMON=y
 CONFIG_IA64_PALINFO=y
-
-#
-# Firmware Drivers
-#
+CONFIG_KEXEC=y
 CONFIG_EFI_VARS=y
-CONFIG_EFI_PCDP=y
-CONFIG_BINFMT_ELF=y
 CONFIG_BINFMT_MISC=m
-
-#
-# Power management and ACPI
-#
-CONFIG_PM=y
-CONFIG_PM_LEGACY=y
-# CONFIG_PM_DEBUG is not set
-
-#
-# ACPI (Advanced Configuration and Power Interface) Support
-#
-CONFIG_ACPI=y
 CONFIG_ACPI_BUTTON=m
 CONFIG_ACPI_FAN=m
 CONFIG_ACPI_PROCESSOR=m
-CONFIG_ACPI_HOTPLUG_CPU=y
-CONFIG_ACPI_THERMAL=m
-CONFIG_ACPI_BLACKLIST_YEAR=0
-# CONFIG_ACPI_DEBUG is not set
-CONFIG_ACPI_EC=y
-CONFIG_ACPI_POWER=y
-CONFIG_ACPI_SYSTEM=y
 CONFIG_ACPI_CONTAINER=m
-
-#
-# CPU Frequency scaling
-#
-# CONFIG_CPU_FREQ is not set
-
-#
-# Bus options (PCI, PCMCIA)
-#
-CONFIG_PCI=y
-CONFIG_PCI_DOMAINS=y
-# CONFIG_PCI_MSI is not set
-CONFIG_PCI_LEGACY_PROC=y
-# CONFIG_PCI_DEBUG is not set
-
-#
-# PCI Hotplug Support
-#
-CONFIG_HOTPLUG_PCI=m
-# CONFIG_HOTPLUG_PCI_FAKE is not set
+CONFIG_HOTPLUG_PCI=y
 CONFIG_HOTPLUG_PCI_ACPI=m
-# CONFIG_HOTPLUG_PCI_ACPI_IBM is not set
-# CONFIG_HOTPLUG_PCI_CPCI is not set
-# CONFIG_HOTPLUG_PCI_SHPC is not set
-
-#
-# PCCARD (PCMCIA/CardBus) support
-#
-# CONFIG_PCCARD is not set
-
-#
-# Networking
-#
-CONFIG_NET=y
-
-#
-# Networking options
-#
-# CONFIG_NETDEBUG is not set
 CONFIG_PACKET=y
-# CONFIG_PACKET_MMAP is not set
 CONFIG_UNIX=y
-# CONFIG_NET_KEY is not set
 CONFIG_INET=y
 CONFIG_IP_MULTICAST=y
-# CONFIG_IP_ADVANCED_ROUTER is not set
-CONFIG_IP_FIB_HASH=y
-# CONFIG_IP_PNP is not set
-# CONFIG_NET_IPIP is not set
-# CONFIG_NET_IPGRE is not set
-# CONFIG_IP_MROUTE is not set
 CONFIG_ARPD=y
 CONFIG_SYN_COOKIES=y
-# CONFIG_INET_AH is not set
-# CONFIG_INET_ESP is not set
-# CONFIG_INET_IPCOMP is not set
-# CONFIG_INET_TUNNEL is not set
-CONFIG_INET_DIAG=y
-CONFIG_INET_TCP_DIAG=y
-# CONFIG_TCP_CONG_ADVANCED is not set
-CONFIG_TCP_CONG_BIC=y
 # CONFIG_IPV6 is not set
-# CONFIG_NETFILTER is not set
-
-#
-# DCCP Configuration (EXPERIMENTAL)
-#
-# CONFIG_IP_DCCP is not set
-
-#
-# SCTP Configuration (EXPERIMENTAL)
-#
-# CONFIG_IP_SCTP is not set
-
-#
-# TIPC Configuration (EXPERIMENTAL)
-#
-# CONFIG_TIPC is not set
-# CONFIG_ATM is not set
-# CONFIG_BRIDGE is not set
-# CONFIG_VLAN_8021Q is not set
-# CONFIG_DECNET is not set
-# CONFIG_LLC2 is not set
-# CONFIG_IPX is not set
-# CONFIG_ATALK is not set
-# CONFIG_X25 is not set
-# CONFIG_LAPB is not set
-# CONFIG_NET_DIVERT is not set
-# CONFIG_ECONET is not set
-# CONFIG_WAN_ROUTER is not set
-
-#
-# QoS and/or fair queueing
-#
-# CONFIG_NET_SCHED is not set
-
-#
-# Network testing
-#
-# CONFIG_NET_PKTGEN is not set
-# CONFIG_HAMRADIO is not set
-# CONFIG_IRDA is not set
-# CONFIG_BT is not set
-# CONFIG_IEEE80211 is not set
-
-#
-# Device Drivers
-#
-
-#
-# Generic Driver Options
-#
-CONFIG_STANDALONE=y
-CONFIG_PREVENT_FIRMWARE_BUILD=y
-CONFIG_FW_LOADER=m
-# CONFIG_DEBUG_DRIVER is not set
-
-#
-# Connector - unified userspace <-> kernelspace linker
-#
-# CONFIG_CONNECTOR is not set
-
-#
-# Memory Technology Devices (MTD)
-#
-# CONFIG_MTD is not set
-
-#
-# Parallel port support
-#
-# CONFIG_PARPORT is not set
-
-#
-# Plug and Play support
-#
-CONFIG_PNP=y
-# CONFIG_PNP_DEBUG is not set
-
-#
-# Protocols
-#
-CONFIG_PNPACPI=y
-
-#
-# Block devices
-#
-# CONFIG_BLK_CPQ_DA is not set
-# CONFIG_BLK_CPQ_CISS_DA is not set
-# CONFIG_BLK_DEV_DAC960 is not set
-# CONFIG_BLK_DEV_UMEM is not set
-# CONFIG_BLK_DEV_COW_COMMON is not set
 CONFIG_BLK_DEV_LOOP=m
 CONFIG_BLK_DEV_CRYPTOLOOP=m
 CONFIG_BLK_DEV_NBD=m
-# CONFIG_BLK_DEV_SX8 is not set
-# CONFIG_BLK_DEV_UB is not set
 CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_COUNT=16
-CONFIG_BLK_DEV_RAM_SIZE=4096
-CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CDROM_PKTCDVD is not set
-# CONFIG_ATA_OVER_ETH is not set
-
-#
-# ATA/ATAPI/MFM/RLL support
-#
 CONFIG_IDE=y
-CONFIG_IDE_MAX_HWIFS=4
-CONFIG_BLK_DEV_IDE=y
-
-#
-# Please see Documentation/ide.txt for help/info on IDE drives
-#
-# CONFIG_BLK_DEV_IDE_SATA is not set
-CONFIG_BLK_DEV_IDEDISK=y
-# CONFIG_IDEDISK_MULTI_MODE is not set
 CONFIG_BLK_DEV_IDECD=y
-# CONFIG_BLK_DEV_IDETAPE is not set
-CONFIG_BLK_DEV_IDEFLOPPY=y
-CONFIG_BLK_DEV_IDESCSI=m
-# CONFIG_IDE_TASK_IOCTL is not set
-
-#
-# IDE chipset support/bugfixes
-#
-# CONFIG_IDE_GENERIC is not set
-# CONFIG_BLK_DEV_IDEPNP is not set
-CONFIG_BLK_DEV_IDEPCI=y
-# CONFIG_IDEPCI_SHARE_IRQ is not set
-# CONFIG_BLK_DEV_OFFBOARD is not set
 CONFIG_BLK_DEV_GENERIC=y
-# CONFIG_BLK_DEV_OPTI621 is not set
-CONFIG_BLK_DEV_IDEDMA_PCI=y
-# CONFIG_BLK_DEV_IDEDMA_FORCED is not set
-CONFIG_IDEDMA_PCI_AUTO=y
-# CONFIG_IDEDMA_ONLYDISK is not set
-# CONFIG_BLK_DEV_AEC62XX is not set
-# CONFIG_BLK_DEV_ALI15X3 is not set
-# CONFIG_BLK_DEV_AMD74XX is not set
 CONFIG_BLK_DEV_CMD64X=y
-# CONFIG_BLK_DEV_TRIFLEX is not set
-# CONFIG_BLK_DEV_CY82C693 is not set
-# CONFIG_BLK_DEV_CS5520 is not set
-# CONFIG_BLK_DEV_CS5530 is not set
-# CONFIG_BLK_DEV_HPT34X is not set
-# CONFIG_BLK_DEV_HPT366 is not set
-# CONFIG_BLK_DEV_SC1200 is not set
 CONFIG_BLK_DEV_PIIX=y
-# CONFIG_BLK_DEV_IT821X is not set
-# CONFIG_BLK_DEV_NS87415 is not set
-# CONFIG_BLK_DEV_PDC202XX_OLD is not set
-# CONFIG_BLK_DEV_PDC202XX_NEW is not set
-# CONFIG_BLK_DEV_SVWKS is not set
-# CONFIG_BLK_DEV_SIIMAGE is not set
-# CONFIG_BLK_DEV_SLC90E66 is not set
-# CONFIG_BLK_DEV_TRM290 is not set
-# CONFIG_BLK_DEV_VIA82CXXX is not set
-# CONFIG_IDE_ARM is not set
-CONFIG_BLK_DEV_IDEDMA=y
-# CONFIG_IDEDMA_IVB is not set
-CONFIG_IDEDMA_AUTO=y
-# CONFIG_BLK_DEV_HD is not set
-
-#
-# SCSI device support
-#
-# CONFIG_RAID_ATTRS is not set
 CONFIG_SCSI=y
-CONFIG_SCSI_PROC_FS=y
-
-#
-# SCSI support type (disk, tape, CD-ROM)
-#
 CONFIG_BLK_DEV_SD=y
 CONFIG_CHR_DEV_ST=m
-# CONFIG_CHR_DEV_OSST is not set
 CONFIG_BLK_DEV_SR=m
-# CONFIG_BLK_DEV_SR_VENDOR is not set
 CONFIG_CHR_DEV_SG=m
-# CONFIG_CHR_DEV_SCH is not set
-
-#
-# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
-#
-# CONFIG_SCSI_MULTI_LUN is not set
-# CONFIG_SCSI_CONSTANTS is not set
-# CONFIG_SCSI_LOGGING is not set
-
-#
-# SCSI Transport Attributes
-#
-CONFIG_SCSI_SPI_ATTRS=y
-CONFIG_SCSI_FC_ATTRS=y
-# CONFIG_SCSI_ISCSI_ATTRS is not set
-# CONFIG_SCSI_SAS_ATTRS is not set
-
-#
-# SCSI low-level drivers
-#
-# CONFIG_ISCSI_TCP is not set
-# CONFIG_BLK_DEV_3W_XXXX_RAID is not set
-# CONFIG_SCSI_3W_9XXX is not set
-# CONFIG_SCSI_ACARD is not set
-# CONFIG_SCSI_AACRAID is not set
-# CONFIG_SCSI_AIC7XXX is not set
-# CONFIG_SCSI_AIC7XXX_OLD is not set
-# CONFIG_SCSI_AIC79XX is not set
-# CONFIG_MEGARAID_NEWGEN is not set
-# CONFIG_MEGARAID_LEGACY is not set
-# CONFIG_MEGARAID_SAS is not set
-# CONFIG_SCSI_SATA is not set
-# CONFIG_SCSI_DMX3191D is not set
-# CONFIG_SCSI_FUTURE_DOMAIN is not set
-# CONFIG_SCSI_IPS is not set
-# CONFIG_SCSI_INITIO is not set
-# CONFIG_SCSI_INIA100 is not set
 CONFIG_SCSI_SYM53C8XX_2=y
-CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1
-CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16
-CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64
-# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set
-# CONFIG_SCSI_IPR is not set
-CONFIG_SCSI_QLOGIC_FC=y
-# CONFIG_SCSI_QLOGIC_FC_FIRMWARE is not set
 CONFIG_SCSI_QLOGIC_1280=y
-# CONFIG_SCSI_QLA_FC is not set
-# CONFIG_SCSI_LPFC is not set
-# CONFIG_SCSI_DC395x is not set
-# CONFIG_SCSI_DC390T is not set
-# CONFIG_SCSI_DEBUG is not set
-
-#
-# Multi-device support (RAID and LVM)
-#
 CONFIG_MD=y
 CONFIG_BLK_DEV_MD=m
 CONFIG_MD_LINEAR=m
 CONFIG_MD_RAID0=m
 CONFIG_MD_RAID1=m
-# CONFIG_MD_RAID10 is not set
-CONFIG_MD_RAID5=m
-CONFIG_MD_RAID6=m
 CONFIG_MD_MULTIPATH=m
-# CONFIG_MD_FAULTY is not set
 CONFIG_BLK_DEV_DM=m
 CONFIG_DM_CRYPT=m
 CONFIG_DM_SNAPSHOT=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_ZERO=m
-# CONFIG_DM_MULTIPATH is not set
-
-#
-# Fusion MPT device support
-#
 CONFIG_FUSION=y
 CONFIG_FUSION_SPI=y
 CONFIG_FUSION_FC=y
-# CONFIG_FUSION_SAS is not set
-CONFIG_FUSION_MAX_SGE=128
 CONFIG_FUSION_CTL=y
-
-#
-# IEEE 1394 (FireWire) support
-#
-# CONFIG_IEEE1394 is not set
-
-#
-# I2O device support
-#
-# CONFIG_I2O is not set
-
-#
-# Network device support
-#
 CONFIG_NETDEVICES=y
 CONFIG_DUMMY=m
-# CONFIG_BONDING is not set
-# CONFIG_EQUALIZER is not set
-# CONFIG_TUN is not set
-# CONFIG_NET_SB1000 is not set
-
-#
-# ARCnet devices
-#
-# CONFIG_ARCNET is not set
-
-#
-# PHY device support
-#
-# CONFIG_PHYLIB is not set
-
-#
-# Ethernet (10 or 100Mbit)
-#
 CONFIG_NET_ETHERNET=y
-CONFIG_MII=m
-# CONFIG_HAPPYMEAL is not set
-# CONFIG_SUNGEM is not set
-# CONFIG_CASSINI is not set
-# CONFIG_NET_VENDOR_3COM is not set
-
-#
-# Tulip family network device support
-#
 CONFIG_NET_TULIP=y
-# CONFIG_DE2104X is not set
 CONFIG_TULIP=m
-# CONFIG_TULIP_MWI is not set
-# CONFIG_TULIP_MMIO is not set
-# CONFIG_TULIP_NAPI is not set
-# CONFIG_DE4X5 is not set
-# CONFIG_WINBOND_840 is not set
-# CONFIG_DM9102 is not set
-# CONFIG_ULI526X is not set
-# CONFIG_HP100 is not set
 CONFIG_NET_PCI=y
-# CONFIG_PCNET32 is not set
-# CONFIG_AMD8111_ETH is not set
-# CONFIG_ADAPTEC_STARFIRE is not set
-# CONFIG_B44 is not set
-# CONFIG_FORCEDETH is not set
-# CONFIG_DGRS is not set
-CONFIG_EEPRO100=m
+CONFIG_NET_VENDOR_INTEL=y
 CONFIG_E100=m
-# CONFIG_FEALNX is not set
-# CONFIG_NATSEMI is not set
-# CONFIG_NE2K_PCI is not set
-# CONFIG_8139CP is not set
-# CONFIG_8139TOO is not set
-# CONFIG_SIS900 is not set
-# CONFIG_EPIC100 is not set
-# CONFIG_SUNDANCE is not set
-# CONFIG_VIA_RHINE is not set
-
-#
-# Ethernet (1000 Mbit)
-#
-# CONFIG_ACENIC is not set
-# CONFIG_DL2K is not set
 CONFIG_E1000=y
-# CONFIG_E1000_NAPI is not set
-# CONFIG_E1000_DISABLE_PACKET_SPLIT is not set
-# CONFIG_NS83820 is not set
-# CONFIG_HAMACHI is not set
-# CONFIG_YELLOWFIN is not set
-# CONFIG_R8169 is not set
-# CONFIG_SIS190 is not set
-# CONFIG_SKGE is not set
-# CONFIG_SKY2 is not set
-# CONFIG_SK98LIN is not set
-# CONFIG_VIA_VELOCITY is not set
 CONFIG_TIGON3=y
-# CONFIG_BNX2 is not set
-
-#
-# Ethernet (10000 Mbit)
-#
-# CONFIG_CHELSIO_T1 is not set
-# CONFIG_IXGB is not set
-# CONFIG_S2IO is not set
-
-#
-# Token Ring devices
-#
-# CONFIG_TR is not set
-
-#
-# Wireless LAN (non-hamradio)
-#
-# CONFIG_NET_RADIO is not set
-
-#
-# Wan interfaces
-#
-# CONFIG_WAN is not set
-# CONFIG_FDDI is not set
-# CONFIG_HIPPI is not set
-# CONFIG_PPP is not set
-# CONFIG_SLIP is not set
-# CONFIG_NET_FC is not set
-# CONFIG_SHAPER is not set
 CONFIG_NETCONSOLE=y
-CONFIG_NETPOLL=y
-# CONFIG_NETPOLL_RX is not set
-# CONFIG_NETPOLL_TRAP is not set
-CONFIG_NET_POLL_CONTROLLER=y
-
-#
-# ISDN subsystem
-#
-# CONFIG_ISDN is not set
-
-#
-# Telephony Support
-#
-# CONFIG_PHONE is not set
-
-#
-# Input device support
-#
-CONFIG_INPUT=y
-
-#
-# Userland interfaces
-#
-CONFIG_INPUT_MOUSEDEV=y
-CONFIG_INPUT_MOUSEDEV_PSAUX=y
-CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
-CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
-# CONFIG_INPUT_JOYDEV is not set
-# CONFIG_INPUT_TSDEV is not set
-# CONFIG_INPUT_EVDEV is not set
-# CONFIG_INPUT_EVBUG is not set
-
-#
-# Input Device Drivers
-#
-CONFIG_INPUT_KEYBOARD=y
-CONFIG_KEYBOARD_ATKBD=y
-# CONFIG_KEYBOARD_SUNKBD is not set
-# CONFIG_KEYBOARD_LKKBD is not set
-# CONFIG_KEYBOARD_XTKBD is not set
-# CONFIG_KEYBOARD_NEWTON is not set
-CONFIG_INPUT_MOUSE=y
-CONFIG_MOUSE_PS2=y
-# CONFIG_MOUSE_SERIAL is not set
-# CONFIG_MOUSE_VSXXXAA is not set
-# CONFIG_INPUT_JOYSTICK is not set
-# CONFIG_INPUT_TOUCHSCREEN is not set
-# CONFIG_INPUT_MISC is not set
-
-#
-# Hardware I/O ports
-#
-CONFIG_SERIO=y
-CONFIG_SERIO_I8042=y
 # CONFIG_SERIO_SERPORT is not set
-# CONFIG_SERIO_PCIPS2 is not set
-CONFIG_SERIO_LIBPS2=y
-# CONFIG_SERIO_RAW is not set
 CONFIG_GAMEPORT=m
-# CONFIG_GAMEPORT_NS558 is not set
-# CONFIG_GAMEPORT_L4 is not set
-# CONFIG_GAMEPORT_EMU10K1 is not set
-# CONFIG_GAMEPORT_FM801 is not set
-
-#
-# Character devices
-#
-CONFIG_VT=y
-CONFIG_VT_CONSOLE=y
-CONFIG_HW_CONSOLE=y
 CONFIG_SERIAL_NONSTANDARD=y
-# CONFIG_COMPUTONE is not set
-# CONFIG_ROCKETPORT is not set
-# CONFIG_CYCLADES is not set
-# CONFIG_DIGIEPCA is not set
-# CONFIG_MOXA_INTELLIO is not set
-# CONFIG_MOXA_SMARTIO is not set
-# CONFIG_ISI is not set
-# CONFIG_SYNCLINKMP is not set
-# CONFIG_SYNCLINK_GT is not set
-# CONFIG_N_HDLC is not set
-# CONFIG_SPECIALIX is not set
-# CONFIG_SX is not set
-# CONFIG_STALDRV is not set
-
-#
-# Serial drivers
-#
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_SERIAL_8250_ACPI=y
 CONFIG_SERIAL_8250_NR_UARTS=6
-CONFIG_SERIAL_8250_RUNTIME_UARTS=4
 CONFIG_SERIAL_8250_EXTENDED=y
 CONFIG_SERIAL_8250_SHARE_IRQ=y
-# CONFIG_SERIAL_8250_DETECT_IRQ is not set
-# CONFIG_SERIAL_8250_RSA is not set
-
-#
-# Non-8250 serial port support
-#
-CONFIG_SERIAL_CORE=y
-CONFIG_SERIAL_CORE_CONSOLE=y
-# CONFIG_SERIAL_JSM is not set
-CONFIG_UNIX98_PTYS=y
-CONFIG_LEGACY_PTYS=y
-CONFIG_LEGACY_PTY_COUNT=256
-
-#
-# IPMI
-#
-# CONFIG_IPMI_HANDLER is not set
-
-#
-# Watchdog Cards
-#
-# CONFIG_WATCHDOG is not set
 # CONFIG_HW_RANDOM is not set
 CONFIG_EFI_RTC=y
-# CONFIG_DTLK is not set
-# CONFIG_R3964 is not set
-# CONFIG_APPLICOM is not set
-
-#
-# Ftape, the floppy tape device driver
-#
+CONFIG_RAW_DRIVER=m
+CONFIG_HPET=y
 CONFIG_AGP=m
 CONFIG_AGP_I460=m
 CONFIG_DRM=m
@@ -746,382 +101,44 @@ CONFIG_DRM_R128=m
 CONFIG_DRM_RADEON=m
 CONFIG_DRM_MGA=m
 CONFIG_DRM_SIS=m
-# CONFIG_DRM_VIA is not set
-# CONFIG_DRM_SAVAGE is not set
-CONFIG_RAW_DRIVER=m
-CONFIG_MAX_RAW_DEVS=256
-CONFIG_HPET=y
-# CONFIG_HPET_RTC_IRQ is not set
-CONFIG_HPET_MMAP=y
-# CONFIG_HANGCHECK_TIMER is not set
-
-#
-# TPM devices
-#
-# CONFIG_TCG_TPM is not set
-# CONFIG_TELCLOCK is not set
-
-#
-# I2C support
-#
-# CONFIG_I2C is not set
-
-#
-# SPI support
-#
-# CONFIG_SPI is not set
-# CONFIG_SPI_MASTER is not set
-
-#
-# Dallas's 1-wire bus
-#
-# CONFIG_W1 is not set
-
-#
-# Hardware Monitoring support
-#
-CONFIG_HWMON=y
-# CONFIG_HWMON_VID is not set
-# CONFIG_SENSORS_F71805F is not set
-# CONFIG_HWMON_DEBUG_CHIP is not set
-
-#
-# Misc devices
-#
-
-#
-# Multimedia Capabilities Port drivers
-#
-
-#
-# Multimedia devices
-#
-# CONFIG_VIDEO_DEV is not set
-
-#
-# Digital Video Broadcasting Devices
-#
-# CONFIG_DVB is not set
-
-#
-# Graphics support
-#
-# CONFIG_FB is not set
-
-#
-# Console display driver support
-#
-CONFIG_VGA_CONSOLE=y
-CONFIG_DUMMY_CONSOLE=y
-
-#
-# Sound
-#
-# CONFIG_SOUND is not set
-
-#
-# USB support
-#
-CONFIG_USB_ARCH_HAS_HCD=y
-CONFIG_USB_ARCH_HAS_OHCI=y
 CONFIG_USB=y
-# CONFIG_USB_DEBUG is not set
-
-#
-# Miscellaneous USB options
-#
-CONFIG_USB_DEVICEFS=y
-# CONFIG_USB_BANDWIDTH is not set
-# CONFIG_USB_DYNAMIC_MINORS is not set
-# CONFIG_USB_SUSPEND is not set
-# CONFIG_USB_OTG is not set
-
-#
-# USB Host Controller Drivers
-#
 CONFIG_USB_EHCI_HCD=m
-# CONFIG_USB_EHCI_SPLIT_ISO is not set
-# CONFIG_USB_EHCI_ROOT_HUB_TT is not set
-# CONFIG_USB_ISP116X_HCD is not set
 CONFIG_USB_OHCI_HCD=m
-# CONFIG_USB_OHCI_BIG_ENDIAN is not set
-CONFIG_USB_OHCI_LITTLE_ENDIAN=y
 CONFIG_USB_UHCI_HCD=y
-# CONFIG_USB_SL811_HCD is not set
-
-#
-# USB Device Class drivers
-#
-# CONFIG_USB_ACM is not set
-# CONFIG_USB_PRINTER is not set
-
-#
-# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
-#
-
-#
-# may also be needed; see USB_STORAGE Help for more information
-#
 CONFIG_USB_STORAGE=m
-# CONFIG_USB_STORAGE_DEBUG is not set
-# CONFIG_USB_STORAGE_DATAFAB is not set
-# CONFIG_USB_STORAGE_FREECOM is not set
-# CONFIG_USB_STORAGE_ISD200 is not set
-# CONFIG_USB_STORAGE_DPCM is not set
-# CONFIG_USB_STORAGE_USBAT is not set
-# CONFIG_USB_STORAGE_SDDR09 is not set
-# CONFIG_USB_STORAGE_SDDR55 is not set
-# CONFIG_USB_STORAGE_JUMPSHOT is not set
-# CONFIG_USB_STORAGE_ALAUDA is not set
-# CONFIG_USB_LIBUSUAL is not set
-
-#
-# USB Input Devices
-#
-CONFIG_USB_HID=y
-CONFIG_USB_HIDINPUT=y
-# CONFIG_USB_HIDINPUT_POWERBOOK is not set
-# CONFIG_HID_FF is not set
-# CONFIG_USB_HIDDEV is not set
-# CONFIG_USB_AIPTEK is not set
-# CONFIG_USB_WACOM is not set
-# CONFIG_USB_ACECAD is not set
-# CONFIG_USB_KBTAB is not set
-# CONFIG_USB_POWERMATE is not set
-# CONFIG_USB_MTOUCH is not set
-# CONFIG_USB_ITMTOUCH is not set
-# CONFIG_USB_EGALAX is not set
-# CONFIG_USB_YEALINK is not set
-# CONFIG_USB_XPAD is not set
-# CONFIG_USB_ATI_REMOTE is not set
-# CONFIG_USB_ATI_REMOTE2 is not set
-# CONFIG_USB_KEYSPAN_REMOTE is not set
-# CONFIG_USB_APPLETOUCH is not set
-
-#
-# USB Imaging devices
-#
-# CONFIG_USB_MDC800 is not set
-# CONFIG_USB_MICROTEK is not set
-
-#
-# USB Multimedia devices
-#
-# CONFIG_USB_DABUSB is not set
-
-#
-# Video4Linux support is needed for USB Multimedia device support
-#
-
-#
-# USB Network Adapters
-#
-# CONFIG_USB_CATC is not set
-# CONFIG_USB_KAWETH is not set
-# CONFIG_USB_PEGASUS is not set
-# CONFIG_USB_RTL8150 is not set
-# CONFIG_USB_USBNET is not set
-# CONFIG_USB_MON is not set
-
-#
-# USB port drivers
-#
-
-#
-# USB Serial Converter support
-#
-# CONFIG_USB_SERIAL is not set
-
-#
-# USB Miscellaneous drivers
-#
-# CONFIG_USB_EMI62 is not set
-# CONFIG_USB_EMI26 is not set
-# CONFIG_USB_AUERSWALD is not set
-# CONFIG_USB_RIO500 is not set
-# CONFIG_USB_LEGOTOWER is not set
-# CONFIG_USB_LCD is not set
-# CONFIG_USB_LED is not set
-# CONFIG_USB_CYTHERM is not set
-# CONFIG_USB_PHIDGETKIT is not set
-# CONFIG_USB_PHIDGETSERVO is not set
-# CONFIG_USB_IDMOUSE is not set
-# CONFIG_USB_SISUSBVGA is not set
-# CONFIG_USB_LD is not set
-# CONFIG_USB_TEST is not set
-
-#
-# USB DSL modem support
-#
-
-#
-# USB Gadget Support
-#
-# CONFIG_USB_GADGET is not set
-
-#
-# MMC/SD Card support
-#
-# CONFIG_MMC is not set
-
-#
-# InfiniBand support
-#
-# CONFIG_INFINIBAND is not set
-
-#
-# EDAC - error detection and reporting (RAS)
-#
-
-#
-# File systems
-#
 CONFIG_EXT2_FS=y
 CONFIG_EXT2_FS_XATTR=y
 CONFIG_EXT2_FS_POSIX_ACL=y
 CONFIG_EXT2_FS_SECURITY=y
-# CONFIG_EXT2_FS_XIP is not set
 CONFIG_EXT3_FS=y
-CONFIG_EXT3_FS_XATTR=y
 CONFIG_EXT3_FS_POSIX_ACL=y
 CONFIG_EXT3_FS_SECURITY=y
-CONFIG_JBD=y
-# CONFIG_JBD_DEBUG is not set
-CONFIG_FS_MBCACHE=y
 CONFIG_REISERFS_FS=y
-# CONFIG_REISERFS_CHECK is not set
-# CONFIG_REISERFS_PROC_INFO is not set
 CONFIG_REISERFS_FS_XATTR=y
 CONFIG_REISERFS_FS_POSIX_ACL=y
 CONFIG_REISERFS_FS_SECURITY=y
-# CONFIG_JFS_FS is not set
-CONFIG_FS_POSIX_ACL=y
 CONFIG_XFS_FS=y
-CONFIG_XFS_EXPORT=y
-# CONFIG_XFS_QUOTA is not set
-# CONFIG_XFS_SECURITY is not set
-# CONFIG_XFS_POSIX_ACL is not set
-# CONFIG_XFS_RT is not set
-# CONFIG_OCFS2_FS is not set
-# CONFIG_MINIX_FS is not set
-# CONFIG_ROMFS_FS is not set
-CONFIG_INOTIFY=y
-# CONFIG_QUOTA is not set
-CONFIG_DNOTIFY=y
 CONFIG_AUTOFS_FS=y
 CONFIG_AUTOFS4_FS=y
-# CONFIG_FUSE_FS is not set
-
-#
-# CD-ROM/DVD Filesystems
-#
 CONFIG_ISO9660_FS=m
 CONFIG_JOLIET=y
-# CONFIG_ZISOFS is not set
 CONFIG_UDF_FS=m
-CONFIG_UDF_NLS=y
-
-#
-# DOS/FAT/NT Filesystems
-#
-CONFIG_FAT_FS=y
-# CONFIG_MSDOS_FS is not set
 CONFIG_VFAT_FS=y
-CONFIG_FAT_DEFAULT_CODEPAGE=437
-CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
 CONFIG_NTFS_FS=m
-# CONFIG_NTFS_DEBUG is not set
-# CONFIG_NTFS_RW is not set
-
-#
-# Pseudo filesystems
-#
-CONFIG_PROC_FS=y
 CONFIG_PROC_KCORE=y
-CONFIG_SYSFS=y
 CONFIG_TMPFS=y
 CONFIG_HUGETLBFS=y
-CONFIG_HUGETLB_PAGE=y
-CONFIG_RAMFS=y
-# CONFIG_RELAYFS_FS is not set
-# CONFIG_CONFIGFS_FS is not set
-
-#
-# Miscellaneous filesystems
-#
-# CONFIG_ADFS_FS is not set
-# CONFIG_AFFS_FS is not set
-# CONFIG_HFS_FS is not set
-# CONFIG_HFSPLUS_FS is not set
-# CONFIG_BEFS_FS is not set
-# CONFIG_BFS_FS is not set
-# CONFIG_EFS_FS is not set
-# CONFIG_CRAMFS is not set
-# CONFIG_VXFS_FS is not set
-# CONFIG_HPFS_FS is not set
-# CONFIG_QNX4FS_FS is not set
-# CONFIG_SYSV_FS is not set
-# CONFIG_UFS_FS is not set
-
-#
-# Network File Systems
-#
 CONFIG_NFS_FS=m
 CONFIG_NFS_V3=y
-# CONFIG_NFS_V3_ACL is not set
 CONFIG_NFS_V4=y
-CONFIG_NFS_DIRECTIO=y
 CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
-# CONFIG_NFSD_V3_ACL is not set
 CONFIG_NFSD_V4=y
-CONFIG_NFSD_TCP=y
-CONFIG_LOCKD=m
-CONFIG_LOCKD_V4=y
-CONFIG_EXPORTFS=y
-CONFIG_NFS_COMMON=y
-CONFIG_SUNRPC=m
-CONFIG_SUNRPC_GSS=m
-CONFIG_RPCSEC_GSS_KRB5=m
-# CONFIG_RPCSEC_GSS_SPKM3 is not set
 CONFIG_SMB_FS=m
 CONFIG_SMB_NLS_DEFAULT=y
-CONFIG_SMB_NLS_REMOTE="cp437"
 CONFIG_CIFS=m
-# CONFIG_CIFS_STATS is not set
-# CONFIG_CIFS_XATTR is not set
-# CONFIG_CIFS_EXPERIMENTAL is not set
-# CONFIG_NCP_FS is not set
-# CONFIG_CODA_FS is not set
-# CONFIG_AFS_FS is not set
-# CONFIG_9P_FS is not set
-
-#
-# Partition Types
-#
 CONFIG_PARTITION_ADVANCED=y
-# CONFIG_ACORN_PARTITION is not set
-# CONFIG_OSF_PARTITION is not set
-# CONFIG_AMIGA_PARTITION is not set
-# CONFIG_ATARI_PARTITION is not set
-# CONFIG_MAC_PARTITION is not set
-CONFIG_MSDOS_PARTITION=y
-# CONFIG_BSD_DISKLABEL is not set
-# CONFIG_MINIX_SUBPARTITION is not set
-# CONFIG_SOLARIS_X86_PARTITION is not set
-# CONFIG_UNIXWARE_DISKLABEL is not set
-# CONFIG_LDM_PARTITION is not set
 CONFIG_SGI_PARTITION=y
-# CONFIG_ULTRIX_PARTITION is not set
-# CONFIG_SUN_PARTITION is not set
-# CONFIG_KARMA_PARTITION is not set
 CONFIG_EFI_PARTITION=y
-
-#
-# Native Language Support
-#
-CONFIG_NLS=y
-CONFIG_NLS_DEFAULT="iso8859-1"
 CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_CODEPAGE_737=m
 CONFIG_NLS_CODEPAGE_775=m
@@ -1145,7 +162,6 @@ CONFIG_NLS_CODEPAGE_874=m
 CONFIG_NLS_ISO8859_8=m
 CONFIG_NLS_CODEPAGE_1250=m
 CONFIG_NLS_CODEPAGE_1251=m
-# CONFIG_NLS_ASCII is not set
 CONFIG_NLS_ISO8859_1=y
 CONFIG_NLS_ISO8859_2=m
 CONFIG_NLS_ISO8859_3=m
@@ -1160,86 +176,10 @@ CONFIG_NLS_ISO8859_15=m
 CONFIG_NLS_KOI8_R=m
 CONFIG_NLS_KOI8_U=m
 CONFIG_NLS_UTF8=m
-
-#
-# Library routines
-#
-# CONFIG_CRC_CCITT is not set
-# CONFIG_CRC16 is not set
-CONFIG_CRC32=y
-# CONFIG_LIBCRC32C is not set
-CONFIG_GENERIC_HARDIRQS=y
-CONFIG_GENERIC_IRQ_PROBE=y
-CONFIG_GENERIC_PENDING_IRQ=y
-
-#
-# Instrumentation Support
-#
-# CONFIG_PROFILING is not set
-# CONFIG_KPROBES is not set
-
-#
-# Kernel hacking
-#
-# CONFIG_PRINTK_TIME is not set
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_KERNEL=y
-CONFIG_LOG_BUF_SHIFT=20
-CONFIG_DETECT_SOFTLOCKUP=y
-# CONFIG_SCHEDSTATS is not set
-# CONFIG_DEBUG_SLAB is not set
 CONFIG_DEBUG_MUTEXES=y
-# CONFIG_DEBUG_SPINLOCK is not set
-# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
-# CONFIG_DEBUG_KOBJECT is not set
-# CONFIG_DEBUG_INFO is not set
-# CONFIG_DEBUG_FS is not set
-# CONFIG_DEBUG_VM is not set
-CONFIG_FORCED_INLINING=y
-# CONFIG_RCU_TORTURE_TEST is not set
 CONFIG_IA64_GRANULE_16MB=y
-# CONFIG_IA64_GRANULE_64MB is not set
-# CONFIG_IA64_PRINT_HAZARDS is not set
-# CONFIG_DISABLE_VHPT is not set
-# CONFIG_IA64_DEBUG_CMPXCHG is not set
-# CONFIG_IA64_DEBUG_IRQ is not set
-CONFIG_SYSVIPC_COMPAT=y
-
-#
-# Security options
-#
-# CONFIG_KEYS is not set
-# CONFIG_SECURITY is not set
-
-#
-# Cryptographic options
-#
-CONFIG_CRYPTO=y
-# CONFIG_CRYPTO_HMAC is not set
-# CONFIG_CRYPTO_NULL is not set
-# CONFIG_CRYPTO_MD4 is not set
+CONFIG_CRYPTO_ECB=m
+CONFIG_CRYPTO_PCBC=m
 CONFIG_CRYPTO_MD5=y
-# CONFIG_CRYPTO_SHA1 is not set
-# CONFIG_CRYPTO_SHA256 is not set
-# CONFIG_CRYPTO_SHA512 is not set
-# CONFIG_CRYPTO_WP512 is not set
-# CONFIG_CRYPTO_TGR192 is not set
-CONFIG_CRYPTO_DES=m
-# CONFIG_CRYPTO_BLOWFISH is not set
-# CONFIG_CRYPTO_TWOFISH is not set
-# CONFIG_CRYPTO_SERPENT is not set
-# CONFIG_CRYPTO_AES is not set
-# CONFIG_CRYPTO_CAST5 is not set
-# CONFIG_CRYPTO_CAST6 is not set
-# CONFIG_CRYPTO_TEA is not set
-# CONFIG_CRYPTO_ARC4 is not set
-# CONFIG_CRYPTO_KHAZAD is not set
-# CONFIG_CRYPTO_ANUBIS is not set
-# CONFIG_CRYPTO_DEFLATE is not set
-# CONFIG_CRYPTO_MICHAEL_MIC is not set
-# CONFIG_CRYPTO_CRC32C is not set
-# CONFIG_CRYPTO_TEST is not set
-
-#
-# Hardware crypto devices
-#
diff --git a/arch/ia64/configs/zx1_defconfig b/arch/ia64/configs/zx1_defconfig
index 949dc4670a0..54bc72eda30 100644
--- a/arch/ia64/configs/zx1_defconfig
+++ b/arch/ia64/configs/zx1_defconfig
@@ -1,1296 +1,121 @@
-#
-# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.16-rc5
-# Mon Feb 27 15:55:36 2006
-#
-
-#
-# Code maturity level options
-#
 CONFIG_EXPERIMENTAL=y
-CONFIG_LOCK_KERNEL=y
-CONFIG_INIT_ENV_ARG_LIMIT=32
-
-#
-# General setup
-#
-CONFIG_LOCALVERSION=""
-CONFIG_LOCALVERSION_AUTO=y
-CONFIG_SWAP=y
 CONFIG_SYSVIPC=y
-# CONFIG_POSIX_MQUEUE is not set
 CONFIG_BSD_PROCESS_ACCT=y
-# CONFIG_BSD_PROCESS_ACCT_V3 is not set
-CONFIG_SYSCTL=y
-# CONFIG_AUDIT is not set
-# CONFIG_IKCONFIG is not set
-# CONFIG_CPUSETS is not set
-CONFIG_INITRAMFS_SOURCE=""
-CONFIG_CC_OPTIMIZE_FOR_SIZE=y
-# CONFIG_EMBEDDED is not set
-CONFIG_KALLSYMS=y
-# CONFIG_KALLSYMS_ALL is not set
-# CONFIG_KALLSYMS_EXTRA_PASS is not set
-CONFIG_HOTPLUG=y
-CONFIG_PRINTK=y
-CONFIG_BUG=y
-CONFIG_ELF_CORE=y
-CONFIG_BASE_FULL=y
-CONFIG_FUTEX=y
-CONFIG_EPOLL=y
-CONFIG_SHMEM=y
-CONFIG_CC_ALIGN_FUNCTIONS=0
-CONFIG_CC_ALIGN_LABELS=0
-CONFIG_CC_ALIGN_LOOPS=0
-CONFIG_CC_ALIGN_JUMPS=0
-CONFIG_SLAB=y
-# CONFIG_TINY_SHMEM is not set
-CONFIG_BASE_SMALL=0
-# CONFIG_SLOB is not set
-
-#
-# Loadable module support
-#
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_KPROBES=y
 CONFIG_MODULES=y
-# CONFIG_MODULE_UNLOAD is not set
-CONFIG_OBSOLETE_MODPARM=y
-# CONFIG_MODVERSIONS is not set
-# CONFIG_MODULE_SRCVERSION_ALL is not set
-# CONFIG_KMOD is not set
-
-#
-# Block layer
-#
-
-#
-# IO Schedulers
-#
-CONFIG_IOSCHED_NOOP=y
-CONFIG_IOSCHED_AS=y
-CONFIG_IOSCHED_DEADLINE=y
-CONFIG_IOSCHED_CFQ=y
-CONFIG_DEFAULT_AS=y
-# CONFIG_DEFAULT_DEADLINE is not set
-# CONFIG_DEFAULT_CFQ is not set
-# CONFIG_DEFAULT_NOOP is not set
-CONFIG_DEFAULT_IOSCHED="anticipatory"
-
-#
-# Processor type and features
-#
-CONFIG_IA64=y
-CONFIG_64BIT=y
-CONFIG_MMU=y
-CONFIG_SWIOTLB=y
-CONFIG_RWSEM_XCHGADD_ALGORITHM=y
-CONFIG_GENERIC_CALIBRATE_DELAY=y
-CONFIG_TIME_INTERPOLATION=y
-CONFIG_EFI=y
-CONFIG_GENERIC_IOMAP=y
-CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
-CONFIG_DMA_IS_DMA32=y
-# CONFIG_IA64_GENERIC is not set
-# CONFIG_IA64_DIG is not set
 CONFIG_IA64_HP_ZX1=y
-# CONFIG_IA64_HP_ZX1_SWIOTLB is not set
-# CONFIG_IA64_SGI_SN2 is not set
-# CONFIG_IA64_HP_SIM is not set
-# CONFIG_ITANIUM is not set
 CONFIG_MCKINLEY=y
-# CONFIG_IA64_PAGE_SIZE_4KB is not set
-# CONFIG_IA64_PAGE_SIZE_8KB is not set
-CONFIG_IA64_PAGE_SIZE_16KB=y
-# CONFIG_IA64_PAGE_SIZE_64KB is not set
-CONFIG_PGTABLE_3=y
-# CONFIG_PGTABLE_4 is not set
-# CONFIG_HZ_100 is not set
-CONFIG_HZ_250=y
-# CONFIG_HZ_1000 is not set
-CONFIG_HZ=250
-CONFIG_IA64_L1_CACHE_SHIFT=7
-# CONFIG_IA64_CYCLONE is not set
-CONFIG_IOSAPIC=y
-CONFIG_FORCE_MAX_ZONEORDER=17
 CONFIG_SMP=y
 CONFIG_NR_CPUS=16
-# CONFIG_HOTPLUG_CPU is not set
-# CONFIG_SCHED_SMT is not set
-# CONFIG_PREEMPT is not set
-CONFIG_SELECT_MEMORY_MODEL=y
+CONFIG_HOTPLUG_CPU=y
 CONFIG_FLATMEM_MANUAL=y
-# CONFIG_DISCONTIGMEM_MANUAL is not set
-# CONFIG_SPARSEMEM_MANUAL is not set
-CONFIG_FLATMEM=y
-CONFIG_FLAT_NODE_MEM_MAP=y
-# CONFIG_SPARSEMEM_STATIC is not set
-CONFIG_SPLIT_PTLOCK_CPUS=4
-CONFIG_ARCH_SELECT_MEMORY_MODEL=y
-CONFIG_ARCH_DISCONTIGMEM_ENABLE=y
-CONFIG_ARCH_FLATMEM_ENABLE=y
-CONFIG_ARCH_SPARSEMEM_ENABLE=y
-CONFIG_ARCH_DISCONTIGMEM_DEFAULT=y
-CONFIG_VIRTUAL_MEM_MAP=y
-CONFIG_HOLES_IN_ZONE=y
-CONFIG_IA32_SUPPORT=y
-CONFIG_COMPAT=y
 CONFIG_IA64_MCA_RECOVERY=y
 CONFIG_PERFMON=y
 CONFIG_IA64_PALINFO=y
-
-#
-# Firmware Drivers
-#
+CONFIG_CRASH_DUMP=y
 CONFIG_EFI_VARS=y
-CONFIG_EFI_PCDP=y
-CONFIG_BINFMT_ELF=y
 CONFIG_BINFMT_MISC=y
-
-#
-# Power management and ACPI
-#
-CONFIG_PM=y
-CONFIG_PM_LEGACY=y
-# CONFIG_PM_DEBUG is not set
-
-#
-# ACPI (Advanced Configuration and Power Interface) Support
-#
-CONFIG_ACPI=y
-CONFIG_ACPI_BUTTON=y
-CONFIG_ACPI_FAN=y
-CONFIG_ACPI_PROCESSOR=y
-CONFIG_ACPI_THERMAL=y
-CONFIG_ACPI_BLACKLIST_YEAR=0
-# CONFIG_ACPI_DEBUG is not set
-CONFIG_ACPI_EC=y
-CONFIG_ACPI_POWER=y
-CONFIG_ACPI_SYSTEM=y
-# CONFIG_ACPI_CONTAINER is not set
-
-#
-# CPU Frequency scaling
-#
-# CONFIG_CPU_FREQ is not set
-
-#
-# Bus options (PCI, PCMCIA)
-#
-CONFIG_PCI=y
-CONFIG_PCI_DOMAINS=y
-# CONFIG_PCI_MSI is not set
-CONFIG_PCI_LEGACY_PROC=y
-# CONFIG_PCI_DEBUG is not set
-
-#
-# PCI Hotplug Support
-#
 CONFIG_HOTPLUG_PCI=y
-# CONFIG_HOTPLUG_PCI_FAKE is not set
 CONFIG_HOTPLUG_PCI_ACPI=y
-# CONFIG_HOTPLUG_PCI_ACPI_IBM is not set
-# CONFIG_HOTPLUG_PCI_CPCI is not set
-# CONFIG_HOTPLUG_PCI_SHPC is not set
-
-#
-# PCCARD (PCMCIA/CardBus) support
-#
-# CONFIG_PCCARD is not set
-
-#
-# Networking
-#
-CONFIG_NET=y
-
-#
-# Networking options
-#
-# CONFIG_NETDEBUG is not set
 CONFIG_PACKET=y
-# CONFIG_PACKET_MMAP is not set
 CONFIG_UNIX=y
-# CONFIG_NET_KEY is not set
 CONFIG_INET=y
 CONFIG_IP_MULTICAST=y
-# CONFIG_IP_ADVANCED_ROUTER is not set
-CONFIG_IP_FIB_HASH=y
-# CONFIG_IP_PNP is not set
-# CONFIG_NET_IPIP is not set
-# CONFIG_NET_IPGRE is not set
-# CONFIG_IP_MROUTE is not set
-# CONFIG_ARPD is not set
-# CONFIG_SYN_COOKIES is not set
-# CONFIG_INET_AH is not set
-# CONFIG_INET_ESP is not set
-# CONFIG_INET_IPCOMP is not set
-# CONFIG_INET_TUNNEL is not set
-CONFIG_INET_DIAG=y
-CONFIG_INET_TCP_DIAG=y
-# CONFIG_TCP_CONG_ADVANCED is not set
-CONFIG_TCP_CONG_BIC=y
-
-#
-# IP: Virtual Server Configuration
-#
-# CONFIG_IP_VS is not set
 # CONFIG_IPV6 is not set
 CONFIG_NETFILTER=y
-# CONFIG_NETFILTER_DEBUG is not set
-
-#
-# Core Netfilter Configuration
-#
-# CONFIG_NETFILTER_NETLINK is not set
-# CONFIG_NF_CONNTRACK is not set
-# CONFIG_NETFILTER_XTABLES is not set
-
-#
-# IP: Netfilter Configuration
-#
-# CONFIG_IP_NF_CONNTRACK is not set
-# CONFIG_IP_NF_QUEUE is not set
-
-#
-# DCCP Configuration (EXPERIMENTAL)
-#
-# CONFIG_IP_DCCP is not set
-
-#
-# SCTP Configuration (EXPERIMENTAL)
-#
-# CONFIG_IP_SCTP is not set
-
-#
-# TIPC Configuration (EXPERIMENTAL)
-#
-# CONFIG_TIPC is not set
-# CONFIG_ATM is not set
-# CONFIG_BRIDGE is not set
-# CONFIG_VLAN_8021Q is not set
-# CONFIG_DECNET is not set
-# CONFIG_LLC2 is not set
-# CONFIG_IPX is not set
-# CONFIG_ATALK is not set
-# CONFIG_X25 is not set
-# CONFIG_LAPB is not set
-# CONFIG_NET_DIVERT is not set
-# CONFIG_ECONET is not set
-# CONFIG_WAN_ROUTER is not set
-
-#
-# QoS and/or fair queueing
-#
-# CONFIG_NET_SCHED is not set
-
-#
-# Network testing
-#
-# CONFIG_NET_PKTGEN is not set
-# CONFIG_HAMRADIO is not set
-# CONFIG_IRDA is not set
-# CONFIG_BT is not set
-# CONFIG_IEEE80211 is not set
-
-#
-# Device Drivers
-#
-
-#
-# Generic Driver Options
-#
-CONFIG_STANDALONE=y
-CONFIG_PREVENT_FIRMWARE_BUILD=y
-# CONFIG_FW_LOADER is not set
-# CONFIG_DEBUG_DRIVER is not set
-
-#
-# Connector - unified userspace <-> kernelspace linker
-#
-# CONFIG_CONNECTOR is not set
-
-#
-# Memory Technology Devices (MTD)
-#
-# CONFIG_MTD is not set
-
-#
-# Parallel port support
-#
-# CONFIG_PARPORT is not set
-
-#
-# Plug and Play support
-#
-CONFIG_PNP=y
-# CONFIG_PNP_DEBUG is not set
-
-#
-# Protocols
-#
-CONFIG_PNPACPI=y
-
-#
-# Block devices
-#
-# CONFIG_BLK_CPQ_DA is not set
-# CONFIG_BLK_CPQ_CISS_DA is not set
-# CONFIG_BLK_DEV_DAC960 is not set
-# CONFIG_BLK_DEV_UMEM is not set
-# CONFIG_BLK_DEV_COW_COMMON is not set
 CONFIG_BLK_DEV_LOOP=y
-# CONFIG_BLK_DEV_CRYPTOLOOP is not set
-# CONFIG_BLK_DEV_NBD is not set
-# CONFIG_BLK_DEV_SX8 is not set
-# CONFIG_BLK_DEV_UB is not set
 CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_COUNT=16
-CONFIG_BLK_DEV_RAM_SIZE=4096
-CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CDROM_PKTCDVD is not set
-# CONFIG_ATA_OVER_ETH is not set
-
-#
-# ATA/ATAPI/MFM/RLL support
-#
 CONFIG_IDE=y
-CONFIG_IDE_MAX_HWIFS=4
-CONFIG_BLK_DEV_IDE=y
-
-#
-# Please see Documentation/ide.txt for help/info on IDE drives
-#
-# CONFIG_BLK_DEV_IDE_SATA is not set
-CONFIG_BLK_DEV_IDEDISK=y
-# CONFIG_IDEDISK_MULTI_MODE is not set
 CONFIG_BLK_DEV_IDECD=y
-# CONFIG_BLK_DEV_IDETAPE is not set
-# CONFIG_BLK_DEV_IDEFLOPPY is not set
-# CONFIG_BLK_DEV_IDESCSI is not set
-# CONFIG_IDE_TASK_IOCTL is not set
-
-#
-# IDE chipset support/bugfixes
-#
-# CONFIG_IDE_GENERIC is not set
-# CONFIG_BLK_DEV_IDEPNP is not set
-CONFIG_BLK_DEV_IDEPCI=y
-CONFIG_IDEPCI_SHARE_IRQ=y
-# CONFIG_BLK_DEV_OFFBOARD is not set
 CONFIG_BLK_DEV_GENERIC=y
-# CONFIG_BLK_DEV_OPTI621 is not set
-CONFIG_BLK_DEV_IDEDMA_PCI=y
-# CONFIG_BLK_DEV_IDEDMA_FORCED is not set
-# CONFIG_IDEDMA_PCI_AUTO is not set
-# CONFIG_BLK_DEV_AEC62XX is not set
-# CONFIG_BLK_DEV_ALI15X3 is not set
-# CONFIG_BLK_DEV_AMD74XX is not set
 CONFIG_BLK_DEV_CMD64X=y
-# CONFIG_BLK_DEV_TRIFLEX is not set
-# CONFIG_BLK_DEV_CY82C693 is not set
-# CONFIG_BLK_DEV_CS5520 is not set
-# CONFIG_BLK_DEV_CS5530 is not set
-# CONFIG_BLK_DEV_HPT34X is not set
-# CONFIG_BLK_DEV_HPT366 is not set
-# CONFIG_BLK_DEV_SC1200 is not set
-# CONFIG_BLK_DEV_PIIX is not set
-# CONFIG_BLK_DEV_IT821X is not set
-# CONFIG_BLK_DEV_NS87415 is not set
-# CONFIG_BLK_DEV_PDC202XX_OLD is not set
-# CONFIG_BLK_DEV_PDC202XX_NEW is not set
-# CONFIG_BLK_DEV_SVWKS is not set
-# CONFIG_BLK_DEV_SIIMAGE is not set
-# CONFIG_BLK_DEV_SLC90E66 is not set
-# CONFIG_BLK_DEV_TRM290 is not set
-# CONFIG_BLK_DEV_VIA82CXXX is not set
-# CONFIG_IDE_ARM is not set
-CONFIG_BLK_DEV_IDEDMA=y
-# CONFIG_IDEDMA_IVB is not set
-# CONFIG_IDEDMA_AUTO is not set
-# CONFIG_BLK_DEV_HD is not set
-
-#
-# SCSI device support
-#
-# CONFIG_RAID_ATTRS is not set
 CONFIG_SCSI=y
-CONFIG_SCSI_PROC_FS=y
-
-#
-# SCSI support type (disk, tape, CD-ROM)
-#
 CONFIG_BLK_DEV_SD=y
 CONFIG_CHR_DEV_ST=y
 CONFIG_CHR_DEV_OSST=y
 CONFIG_BLK_DEV_SR=y
 CONFIG_BLK_DEV_SR_VENDOR=y
 CONFIG_CHR_DEV_SG=y
-# CONFIG_CHR_DEV_SCH is not set
-
-#
-# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
-#
 CONFIG_SCSI_MULTI_LUN=y
 CONFIG_SCSI_CONSTANTS=y
 CONFIG_SCSI_LOGGING=y
-
-#
-# SCSI Transport Attributes
-#
-CONFIG_SCSI_SPI_ATTRS=y
-CONFIG_SCSI_FC_ATTRS=y
-# CONFIG_SCSI_ISCSI_ATTRS is not set
-# CONFIG_SCSI_SAS_ATTRS is not set
-
-#
-# SCSI low-level drivers
-#
-# CONFIG_ISCSI_TCP is not set
-# CONFIG_BLK_DEV_3W_XXXX_RAID is not set
-# CONFIG_SCSI_3W_9XXX is not set
-# CONFIG_SCSI_ACARD is not set
-# CONFIG_SCSI_AACRAID is not set
-# CONFIG_SCSI_AIC7XXX is not set
-# CONFIG_SCSI_AIC7XXX_OLD is not set
-# CONFIG_SCSI_AIC79XX is not set
-# CONFIG_MEGARAID_NEWGEN is not set
-# CONFIG_MEGARAID_LEGACY is not set
-# CONFIG_MEGARAID_SAS is not set
-# CONFIG_SCSI_SATA is not set
-# CONFIG_SCSI_DMX3191D is not set
-# CONFIG_SCSI_FUTURE_DOMAIN is not set
-# CONFIG_SCSI_IPS is not set
-# CONFIG_SCSI_INITIO is not set
-# CONFIG_SCSI_INIA100 is not set
 CONFIG_SCSI_SYM53C8XX_2=y
-CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1
-CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16
-CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64
-# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set
-# CONFIG_SCSI_IPR is not set
-# CONFIG_SCSI_QLOGIC_FC is not set
 CONFIG_SCSI_QLOGIC_1280=y
-# CONFIG_SCSI_QLA_FC is not set
-# CONFIG_SCSI_LPFC is not set
-# CONFIG_SCSI_DC395x is not set
-# CONFIG_SCSI_DC390T is not set
-# CONFIG_SCSI_DEBUG is not set
-
-#
-# Multi-device support (RAID and LVM)
-#
-# CONFIG_MD is not set
-
-#
-# Fusion MPT device support
-#
 CONFIG_FUSION=y
 CONFIG_FUSION_SPI=y
 CONFIG_FUSION_FC=y
-# CONFIG_FUSION_SAS is not set
-CONFIG_FUSION_MAX_SGE=128
 CONFIG_FUSION_CTL=m
-
-#
-# IEEE 1394 (FireWire) support
-#
-# CONFIG_IEEE1394 is not set
-
-#
-# I2O device support
-#
-# CONFIG_I2O is not set
-
-#
-# Network device support
-#
 CONFIG_NETDEVICES=y
 CONFIG_DUMMY=y
-# CONFIG_BONDING is not set
-# CONFIG_EQUALIZER is not set
-# CONFIG_TUN is not set
-# CONFIG_NET_SB1000 is not set
-
-#
-# ARCnet devices
-#
-# CONFIG_ARCNET is not set
-
-#
-# PHY device support
-#
-# CONFIG_PHYLIB is not set
-
-#
-# Ethernet (10 or 100Mbit)
-#
 CONFIG_NET_ETHERNET=y
-CONFIG_MII=y
-# CONFIG_HAPPYMEAL is not set
-# CONFIG_SUNGEM is not set
-# CONFIG_CASSINI is not set
-# CONFIG_NET_VENDOR_3COM is not set
-
-#
-# Tulip family network device support
-#
 CONFIG_NET_TULIP=y
-# CONFIG_DE2104X is not set
 CONFIG_TULIP=y
 CONFIG_TULIP_MWI=y
 CONFIG_TULIP_MMIO=y
 CONFIG_TULIP_NAPI=y
 CONFIG_TULIP_NAPI_HW_MITIGATION=y
-# CONFIG_DE4X5 is not set
-# CONFIG_WINBOND_840 is not set
-# CONFIG_DM9102 is not set
-# CONFIG_ULI526X is not set
-# CONFIG_HP100 is not set
 CONFIG_NET_PCI=y
-# CONFIG_PCNET32 is not set
-# CONFIG_AMD8111_ETH is not set
-# CONFIG_ADAPTEC_STARFIRE is not set
-# CONFIG_B44 is not set
-# CONFIG_FORCEDETH is not set
-# CONFIG_DGRS is not set
-# CONFIG_EEPRO100 is not set
+CONFIG_NET_VENDOR_INTEL=y
 CONFIG_E100=y
-# CONFIG_FEALNX is not set
-# CONFIG_NATSEMI is not set
-# CONFIG_NE2K_PCI is not set
-# CONFIG_8139CP is not set
-# CONFIG_8139TOO is not set
-# CONFIG_SIS900 is not set
-# CONFIG_EPIC100 is not set
-# CONFIG_SUNDANCE is not set
-# CONFIG_VIA_RHINE is not set
-
-#
-# Ethernet (1000 Mbit)
-#
-# CONFIG_ACENIC is not set
-# CONFIG_DL2K is not set
 CONFIG_E1000=y
-# CONFIG_E1000_NAPI is not set
-# CONFIG_E1000_DISABLE_PACKET_SPLIT is not set
-# CONFIG_NS83820 is not set
-# CONFIG_HAMACHI is not set
-# CONFIG_YELLOWFIN is not set
-# CONFIG_R8169 is not set
-# CONFIG_SIS190 is not set
-# CONFIG_SKGE is not set
-# CONFIG_SKY2 is not set
-# CONFIG_SK98LIN is not set
-# CONFIG_VIA_VELOCITY is not set
 CONFIG_TIGON3=y
-# CONFIG_BNX2 is not set
-
-#
-# Ethernet (10000 Mbit)
-#
-# CONFIG_CHELSIO_T1 is not set
-# CONFIG_IXGB is not set
-# CONFIG_S2IO is not set
-
-#
-# Token Ring devices
-#
-# CONFIG_TR is not set
-
-#
-# Wireless LAN (non-hamradio)
-#
-# CONFIG_NET_RADIO is not set
-
-#
-# Wan interfaces
-#
-# CONFIG_WAN is not set
-# CONFIG_FDDI is not set
-# CONFIG_HIPPI is not set
-# CONFIG_PPP is not set
-# CONFIG_SLIP is not set
-# CONFIG_NET_FC is not set
-# CONFIG_SHAPER is not set
-# CONFIG_NETCONSOLE is not set
-# CONFIG_NETPOLL is not set
-# CONFIG_NET_POLL_CONTROLLER is not set
-
-#
-# ISDN subsystem
-#
-# CONFIG_ISDN is not set
-
-#
-# Telephony Support
-#
-# CONFIG_PHONE is not set
-
-#
-# Input device support
-#
-CONFIG_INPUT=y
-
-#
-# Userland interfaces
-#
-CONFIG_INPUT_MOUSEDEV=y
-CONFIG_INPUT_MOUSEDEV_PSAUX=y
-CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
-CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
 CONFIG_INPUT_JOYDEV=y
-# CONFIG_INPUT_TSDEV is not set
 CONFIG_INPUT_EVDEV=y
-# CONFIG_INPUT_EVBUG is not set
-
-#
-# Input Device Drivers
-#
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
-# CONFIG_INPUT_JOYSTICK is not set
-# CONFIG_INPUT_TOUCHSCREEN is not set
-# CONFIG_INPUT_MISC is not set
-
-#
-# Hardware I/O ports
-#
-CONFIG_SERIO=y
 # CONFIG_SERIO_I8042 is not set
 # CONFIG_SERIO_SERPORT is not set
-# CONFIG_SERIO_PCIPS2 is not set
-# CONFIG_SERIO_RAW is not set
-# CONFIG_GAMEPORT is not set
-
-#
-# Character devices
-#
-CONFIG_VT=y
-CONFIG_VT_CONSOLE=y
-CONFIG_HW_CONSOLE=y
-# CONFIG_SERIAL_NONSTANDARD is not set
-
-#
-# Serial drivers
-#
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_SERIAL_8250_ACPI=y
 CONFIG_SERIAL_8250_NR_UARTS=8
-CONFIG_SERIAL_8250_RUNTIME_UARTS=4
 CONFIG_SERIAL_8250_EXTENDED=y
 CONFIG_SERIAL_8250_SHARE_IRQ=y
-# CONFIG_SERIAL_8250_DETECT_IRQ is not set
-# CONFIG_SERIAL_8250_RSA is not set
-
-#
-# Non-8250 serial port support
-#
-CONFIG_SERIAL_CORE=y
-CONFIG_SERIAL_CORE_CONSOLE=y
-# CONFIG_SERIAL_JSM is not set
-CONFIG_UNIX98_PTYS=y
-CONFIG_LEGACY_PTYS=y
-CONFIG_LEGACY_PTY_COUNT=256
-
-#
-# IPMI
-#
-# CONFIG_IPMI_HANDLER is not set
-
-#
-# Watchdog Cards
-#
-# CONFIG_WATCHDOG is not set
 # CONFIG_HW_RANDOM is not set
 CONFIG_EFI_RTC=y
-# CONFIG_DTLK is not set
-# CONFIG_R3964 is not set
-# CONFIG_APPLICOM is not set
-
-#
-# Ftape, the floppy tape device driver
-#
+CONFIG_I2C_CHARDEV=y
 CONFIG_AGP=y
 CONFIG_AGP_HP_ZX1=y
 CONFIG_DRM=y
-# CONFIG_DRM_TDFX is not set
-# CONFIG_DRM_R128 is not set
 CONFIG_DRM_RADEON=y
-# CONFIG_DRM_MGA is not set
-# CONFIG_DRM_SIS is not set
-# CONFIG_DRM_VIA is not set
-# CONFIG_DRM_SAVAGE is not set
-# CONFIG_RAW_DRIVER is not set
-# CONFIG_HPET is not set
-# CONFIG_HANGCHECK_TIMER is not set
-
-#
-# TPM devices
-#
-# CONFIG_TCG_TPM is not set
-# CONFIG_TELCLOCK is not set
-
-#
-# I2C support
-#
-CONFIG_I2C=y
-CONFIG_I2C_CHARDEV=y
-
-#
-# I2C Algorithms
-#
-CONFIG_I2C_ALGOBIT=y
-CONFIG_I2C_ALGOPCF=y
-# CONFIG_I2C_ALGOPCA is not set
-
-#
-# I2C Hardware Bus support
-#
-# CONFIG_I2C_ALI1535 is not set
-# CONFIG_I2C_ALI1563 is not set
-# CONFIG_I2C_ALI15X3 is not set
-# CONFIG_I2C_AMD756 is not set
-# CONFIG_I2C_AMD8111 is not set
-# CONFIG_I2C_I801 is not set
-# CONFIG_I2C_I810 is not set
-# CONFIG_I2C_PIIX4 is not set
-# CONFIG_I2C_NFORCE2 is not set
-# CONFIG_I2C_PARPORT_LIGHT is not set
-# CONFIG_I2C_PROSAVAGE is not set
-# CONFIG_I2C_SAVAGE4 is not set
-# CONFIG_SCx200_ACB is not set
-# CONFIG_I2C_SIS5595 is not set
-# CONFIG_I2C_SIS630 is not set
-# CONFIG_I2C_SIS96X is not set
-# CONFIG_I2C_STUB is not set
-# CONFIG_I2C_VIA is not set
-# CONFIG_I2C_VIAPRO is not set
-# CONFIG_I2C_VOODOO3 is not set
-# CONFIG_I2C_PCA_ISA is not set
-
-#
-# Miscellaneous I2C Chip support
-#
-# CONFIG_SENSORS_DS1337 is not set
-# CONFIG_SENSORS_DS1374 is not set
-# CONFIG_SENSORS_EEPROM is not set
-# CONFIG_SENSORS_PCF8574 is not set
-# CONFIG_SENSORS_PCA9539 is not set
-# CONFIG_SENSORS_PCF8591 is not set
-# CONFIG_SENSORS_RTC8564 is not set
-# CONFIG_SENSORS_MAX6875 is not set
-# CONFIG_RTC_X1205_I2C is not set
-# CONFIG_I2C_DEBUG_CORE is not set
-# CONFIG_I2C_DEBUG_ALGO is not set
-# CONFIG_I2C_DEBUG_BUS is not set
-# CONFIG_I2C_DEBUG_CHIP is not set
-
-#
-# SPI support
-#
-# CONFIG_SPI is not set
-# CONFIG_SPI_MASTER is not set
-
-#
-# Dallas's 1-wire bus
-#
-# CONFIG_W1 is not set
-
-#
-# Hardware Monitoring support
-#
-# CONFIG_HWMON is not set
-# CONFIG_HWMON_VID is not set
-
-#
-# Misc devices
-#
-
-#
-# Multimedia Capabilities Port drivers
-#
-
-#
-# Multimedia devices
-#
-CONFIG_VIDEO_DEV=y
-
-#
-# Video For Linux
-#
-
-#
-# Video Adapters
-#
-# CONFIG_VIDEO_ADV_DEBUG is not set
-# CONFIG_VIDEO_BT848 is not set
-# CONFIG_VIDEO_CPIA is not set
-# CONFIG_VIDEO_SAA5246A is not set
-# CONFIG_VIDEO_SAA5249 is not set
-# CONFIG_TUNER_3036 is not set
-# CONFIG_VIDEO_STRADIS is not set
-# CONFIG_VIDEO_ZORAN is not set
-# CONFIG_VIDEO_SAA7134 is not set
-# CONFIG_VIDEO_MXB is not set
-# CONFIG_VIDEO_DPC is not set
-# CONFIG_VIDEO_HEXIUM_ORION is not set
-# CONFIG_VIDEO_HEXIUM_GEMINI is not set
-# CONFIG_VIDEO_CX88 is not set
-# CONFIG_VIDEO_EM28XX is not set
-# CONFIG_VIDEO_OVCAMCHIP is not set
-# CONFIG_VIDEO_AUDIO_DECODER is not set
-# CONFIG_VIDEO_DECODER is not set
-
-#
-# Radio Adapters
-#
-# CONFIG_RADIO_GEMTEK_PCI is not set
-# CONFIG_RADIO_MAXIRADIO is not set
-# CONFIG_RADIO_MAESTRO is not set
-
-#
-# Digital Video Broadcasting Devices
-#
-# CONFIG_DVB is not set
-
-#
-# Graphics support
-#
-CONFIG_FB=y
-CONFIG_FB_CFB_FILLRECT=y
-CONFIG_FB_CFB_COPYAREA=y
-CONFIG_FB_CFB_IMAGEBLIT=y
-# CONFIG_FB_MACMODES is not set
-CONFIG_FB_MODE_HELPERS=y
-# CONFIG_FB_TILEBLITTING is not set
-# CONFIG_FB_CIRRUS is not set
-# CONFIG_FB_PM2 is not set
-# CONFIG_FB_CYBER2000 is not set
-# CONFIG_FB_ASILIANT is not set
-# CONFIG_FB_IMSTT is not set
-# CONFIG_FB_S1D13XXX is not set
-# CONFIG_FB_NVIDIA is not set
-# CONFIG_FB_RIVA is not set
-# CONFIG_FB_MATROX is not set
-# CONFIG_FB_RADEON_OLD is not set
 CONFIG_FB_RADEON=y
-CONFIG_FB_RADEON_I2C=y
 CONFIG_FB_RADEON_DEBUG=y
-# CONFIG_FB_ATY128 is not set
-# CONFIG_FB_ATY is not set
-# CONFIG_FB_SAVAGE is not set
-# CONFIG_FB_SIS is not set
-# CONFIG_FB_NEOMAGIC is not set
-# CONFIG_FB_KYRO is not set
-# CONFIG_FB_3DFX is not set
-# CONFIG_FB_VOODOO1 is not set
-# CONFIG_FB_TRIDENT is not set
-# CONFIG_FB_VIRTUAL is not set
-
-#
-# Console display driver support
-#
-CONFIG_VGA_CONSOLE=y
-CONFIG_DUMMY_CONSOLE=y
-CONFIG_FRAMEBUFFER_CONSOLE=y
-# CONFIG_FRAMEBUFFER_CONSOLE_ROTATION is not set
-# CONFIG_FONTS is not set
-CONFIG_FONT_8x8=y
-CONFIG_FONT_8x16=y
-
-#
-# Logo configuration
-#
 CONFIG_LOGO=y
 # CONFIG_LOGO_LINUX_MONO is not set
 # CONFIG_LOGO_LINUX_VGA16 is not set
-CONFIG_LOGO_LINUX_CLUT224=y
-# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
-
-#
-# Sound
-#
 CONFIG_SOUND=y
-
-#
-# Advanced Linux Sound Architecture
-#
 CONFIG_SND=y
-CONFIG_SND_TIMER=y
-CONFIG_SND_PCM=y
-CONFIG_SND_HWDEP=y
-CONFIG_SND_RAWMIDI=y
 CONFIG_SND_SEQUENCER=y
-# CONFIG_SND_SEQ_DUMMY is not set
-CONFIG_SND_OSSEMUL=y
 CONFIG_SND_MIXER_OSS=y
 CONFIG_SND_PCM_OSS=y
 CONFIG_SND_SEQUENCER_OSS=y
-# CONFIG_SND_DYNAMIC_MINORS is not set
-CONFIG_SND_SUPPORT_OLD_API=y
-# CONFIG_SND_VERBOSE_PRINTK is not set
-# CONFIG_SND_DEBUG is not set
-
-#
-# Generic devices
-#
-CONFIG_SND_MPU401_UART=y
-CONFIG_SND_OPL3_LIB=y
-CONFIG_SND_AC97_CODEC=y
-CONFIG_SND_AC97_BUS=y
-# CONFIG_SND_DUMMY is not set
-# CONFIG_SND_VIRMIDI is not set
-# CONFIG_SND_MTPAV is not set
-# CONFIG_SND_SERIAL_U16550 is not set
-# CONFIG_SND_MPU401 is not set
-
-#
-# PCI devices
-#
-# CONFIG_SND_AD1889 is not set
-# CONFIG_SND_ALI5451 is not set
-# CONFIG_SND_ATIIXP is not set
-# CONFIG_SND_ATIIXP_MODEM is not set
-# CONFIG_SND_AU8810 is not set
-# CONFIG_SND_AU8820 is not set
-# CONFIG_SND_AU8830 is not set
-# CONFIG_SND_AZT3328 is not set
-# CONFIG_SND_BT87X is not set
-# CONFIG_SND_CA0106 is not set
-# CONFIG_SND_CMIPCI is not set
-# CONFIG_SND_CS4281 is not set
-# CONFIG_SND_CS46XX is not set
-# CONFIG_SND_EMU10K1 is not set
-# CONFIG_SND_EMU10K1X is not set
-# CONFIG_SND_ENS1370 is not set
-# CONFIG_SND_ENS1371 is not set
-# CONFIG_SND_ES1938 is not set
-# CONFIG_SND_ES1968 is not set
 CONFIG_SND_FM801=y
-CONFIG_SND_FM801_TEA575X=y
-# CONFIG_SND_HDA_INTEL is not set
-# CONFIG_SND_HDSP is not set
-# CONFIG_SND_HDSPM is not set
-# CONFIG_SND_ICE1712 is not set
-# CONFIG_SND_ICE1724 is not set
-# CONFIG_SND_INTEL8X0 is not set
-# CONFIG_SND_INTEL8X0M is not set
-# CONFIG_SND_KORG1212 is not set
-# CONFIG_SND_MAESTRO3 is not set
-# CONFIG_SND_MIXART is not set
-# CONFIG_SND_NM256 is not set
-# CONFIG_SND_PCXHR is not set
-# CONFIG_SND_RME32 is not set
-# CONFIG_SND_RME96 is not set
-# CONFIG_SND_RME9652 is not set
-# CONFIG_SND_SONICVIBES is not set
-# CONFIG_SND_TRIDENT is not set
-# CONFIG_SND_VIA82XX is not set
-# CONFIG_SND_VIA82XX_MODEM is not set
-# CONFIG_SND_VX222 is not set
-# CONFIG_SND_YMFPCI is not set
-
-#
-# USB devices
-#
-# CONFIG_SND_USB_AUDIO is not set
-
-#
-# Open Sound System
-#
-# CONFIG_SOUND_PRIME is not set
-
-#
-# USB support
-#
-CONFIG_USB_ARCH_HAS_HCD=y
-CONFIG_USB_ARCH_HAS_OHCI=y
+CONFIG_USB_HIDDEV=y
 CONFIG_USB=y
-# CONFIG_USB_DEBUG is not set
-
-#
-# Miscellaneous USB options
-#
-# CONFIG_USB_DEVICEFS is not set
-CONFIG_USB_BANDWIDTH=y
-# CONFIG_USB_DYNAMIC_MINORS is not set
-# CONFIG_USB_SUSPEND is not set
-# CONFIG_USB_OTG is not set
-
-#
-# USB Host Controller Drivers
-#
+CONFIG_USB_MON=y
 CONFIG_USB_EHCI_HCD=y
-# CONFIG_USB_EHCI_SPLIT_ISO is not set
-# CONFIG_USB_EHCI_ROOT_HUB_TT is not set
-# CONFIG_USB_ISP116X_HCD is not set
 CONFIG_USB_OHCI_HCD=y
-# CONFIG_USB_OHCI_BIG_ENDIAN is not set
-CONFIG_USB_OHCI_LITTLE_ENDIAN=y
 CONFIG_USB_UHCI_HCD=y
-# CONFIG_USB_SL811_HCD is not set
-
-#
-# USB Device Class drivers
-#
-# CONFIG_OBSOLETE_OSS_USB_DRIVER is not set
-# CONFIG_USB_ACM is not set
-# CONFIG_USB_PRINTER is not set
-
-#
-# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
-#
-
-#
-# may also be needed; see USB_STORAGE Help for more information
-#
 CONFIG_USB_STORAGE=y
-# CONFIG_USB_STORAGE_DEBUG is not set
-# CONFIG_USB_STORAGE_DATAFAB is not set
-# CONFIG_USB_STORAGE_FREECOM is not set
-# CONFIG_USB_STORAGE_ISD200 is not set
-# CONFIG_USB_STORAGE_DPCM is not set
-# CONFIG_USB_STORAGE_USBAT is not set
-# CONFIG_USB_STORAGE_SDDR09 is not set
-# CONFIG_USB_STORAGE_SDDR55 is not set
-# CONFIG_USB_STORAGE_JUMPSHOT is not set
-# CONFIG_USB_STORAGE_ALAUDA is not set
-# CONFIG_USB_LIBUSUAL is not set
-
-#
-# USB Input Devices
-#
-CONFIG_USB_HID=y
-CONFIG_USB_HIDINPUT=y
-# CONFIG_USB_HIDINPUT_POWERBOOK is not set
-# CONFIG_HID_FF is not set
-CONFIG_USB_HIDDEV=y
-# CONFIG_USB_AIPTEK is not set
-# CONFIG_USB_WACOM is not set
-# CONFIG_USB_ACECAD is not set
-# CONFIG_USB_KBTAB is not set
-# CONFIG_USB_POWERMATE is not set
-# CONFIG_USB_MTOUCH is not set
-# CONFIG_USB_ITMTOUCH is not set
-# CONFIG_USB_EGALAX is not set
-# CONFIG_USB_YEALINK is not set
-# CONFIG_USB_XPAD is not set
-# CONFIG_USB_ATI_REMOTE is not set
-# CONFIG_USB_ATI_REMOTE2 is not set
-# CONFIG_USB_KEYSPAN_REMOTE is not set
-# CONFIG_USB_APPLETOUCH is not set
-
-#
-# USB Imaging devices
-#
-# CONFIG_USB_MDC800 is not set
-# CONFIG_USB_MICROTEK is not set
-
-#
-# USB Multimedia devices
-#
-# CONFIG_USB_DABUSB is not set
-# CONFIG_USB_VICAM is not set
-# CONFIG_USB_DSBR is not set
-# CONFIG_USB_ET61X251 is not set
-# CONFIG_USB_IBMCAM is not set
-# CONFIG_USB_KONICAWC is not set
-# CONFIG_USB_OV511 is not set
-# CONFIG_USB_SE401 is not set
-# CONFIG_USB_SN9C102 is not set
-# CONFIG_USB_STV680 is not set
-# CONFIG_USB_PWC is not set
-
-#
-# USB Network Adapters
-#
-# CONFIG_USB_CATC is not set
-# CONFIG_USB_KAWETH is not set
-# CONFIG_USB_PEGASUS is not set
-# CONFIG_USB_RTL8150 is not set
-# CONFIG_USB_USBNET is not set
-CONFIG_USB_MON=y
-
-#
-# USB port drivers
-#
-
-#
-# USB Serial Converter support
-#
-# CONFIG_USB_SERIAL is not set
-
-#
-# USB Miscellaneous drivers
-#
-# CONFIG_USB_EMI62 is not set
-# CONFIG_USB_EMI26 is not set
-# CONFIG_USB_AUERSWALD is not set
-# CONFIG_USB_RIO500 is not set
-# CONFIG_USB_LEGOTOWER is not set
-# CONFIG_USB_LCD is not set
-# CONFIG_USB_LED is not set
-# CONFIG_USB_CYTHERM is not set
-# CONFIG_USB_PHIDGETKIT is not set
-# CONFIG_USB_PHIDGETSERVO is not set
-# CONFIG_USB_IDMOUSE is not set
-# CONFIG_USB_SISUSBVGA is not set
-# CONFIG_USB_LD is not set
-
-#
-# USB DSL modem support
-#
-
-#
-# USB Gadget Support
-#
-# CONFIG_USB_GADGET is not set
-
-#
-# MMC/SD Card support
-#
-# CONFIG_MMC is not set
-
-#
-# InfiniBand support
-#
-# CONFIG_INFINIBAND is not set
-
-#
-# EDAC - error detection and reporting (RAS)
-#
-
-#
-# File systems
-#
 CONFIG_EXT2_FS=y
 CONFIG_EXT2_FS_XATTR=y
-# CONFIG_EXT2_FS_POSIX_ACL is not set
-# CONFIG_EXT2_FS_SECURITY is not set
-# CONFIG_EXT2_FS_XIP is not set
 CONFIG_EXT3_FS=y
-CONFIG_EXT3_FS_XATTR=y
-# CONFIG_EXT3_FS_POSIX_ACL is not set
-# CONFIG_EXT3_FS_SECURITY is not set
-CONFIG_JBD=y
-# CONFIG_JBD_DEBUG is not set
-CONFIG_FS_MBCACHE=y
-# CONFIG_REISERFS_FS is not set
-# CONFIG_JFS_FS is not set
-# CONFIG_FS_POSIX_ACL is not set
-# CONFIG_XFS_FS is not set
-# CONFIG_OCFS2_FS is not set
-# CONFIG_MINIX_FS is not set
-# CONFIG_ROMFS_FS is not set
-# CONFIG_INOTIFY is not set
-# CONFIG_QUOTA is not set
-CONFIG_DNOTIFY=y
 CONFIG_AUTOFS_FS=y
-# CONFIG_AUTOFS4_FS is not set
-# CONFIG_FUSE_FS is not set
-
-#
-# CD-ROM/DVD Filesystems
-#
 CONFIG_ISO9660_FS=y
 CONFIG_JOLIET=y
-# CONFIG_ZISOFS is not set
 CONFIG_UDF_FS=y
-CONFIG_UDF_NLS=y
-
-#
-# DOS/FAT/NT Filesystems
-#
-CONFIG_FAT_FS=y
 CONFIG_MSDOS_FS=y
 CONFIG_VFAT_FS=y
-CONFIG_FAT_DEFAULT_CODEPAGE=437
-CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
-# CONFIG_NTFS_FS is not set
-
-#
-# Pseudo filesystems
-#
-CONFIG_PROC_FS=y
 CONFIG_PROC_KCORE=y
-CONFIG_SYSFS=y
 CONFIG_TMPFS=y
 CONFIG_HUGETLBFS=y
-CONFIG_HUGETLB_PAGE=y
-CONFIG_RAMFS=y
-# CONFIG_RELAYFS_FS is not set
-# CONFIG_CONFIGFS_FS is not set
-
-#
-# Miscellaneous filesystems
-#
-# CONFIG_ADFS_FS is not set
-# CONFIG_AFFS_FS is not set
-# CONFIG_HFS_FS is not set
-# CONFIG_HFSPLUS_FS is not set
-# CONFIG_BEFS_FS is not set
-# CONFIG_BFS_FS is not set
-# CONFIG_EFS_FS is not set
-# CONFIG_CRAMFS is not set
-# CONFIG_VXFS_FS is not set
-# CONFIG_HPFS_FS is not set
-# CONFIG_QNX4FS_FS is not set
-# CONFIG_SYSV_FS is not set
-# CONFIG_UFS_FS is not set
-
-#
-# Network File Systems
-#
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
-# CONFIG_NFS_V3_ACL is not set
 CONFIG_NFS_V4=y
-# CONFIG_NFS_DIRECTIO is not set
 CONFIG_NFSD=y
 CONFIG_NFSD_V3=y
-# CONFIG_NFSD_V3_ACL is not set
-# CONFIG_NFSD_V4 is not set
-# CONFIG_NFSD_TCP is not set
-CONFIG_LOCKD=y
-CONFIG_LOCKD_V4=y
-CONFIG_EXPORTFS=y
-CONFIG_NFS_COMMON=y
-CONFIG_SUNRPC=y
-CONFIG_SUNRPC_GSS=y
-CONFIG_RPCSEC_GSS_KRB5=y
-# CONFIG_RPCSEC_GSS_SPKM3 is not set
-# CONFIG_SMB_FS is not set
-# CONFIG_CIFS is not set
-# CONFIG_NCP_FS is not set
-# CONFIG_CODA_FS is not set
-# CONFIG_AFS_FS is not set
-# CONFIG_9P_FS is not set
-
-#
-# Partition Types
-#
 CONFIG_PARTITION_ADVANCED=y
-# CONFIG_ACORN_PARTITION is not set
-# CONFIG_OSF_PARTITION is not set
-# CONFIG_AMIGA_PARTITION is not set
-# CONFIG_ATARI_PARTITION is not set
-# CONFIG_MAC_PARTITION is not set
-CONFIG_MSDOS_PARTITION=y
-# CONFIG_BSD_DISKLABEL is not set
-# CONFIG_MINIX_SUBPARTITION is not set
-# CONFIG_SOLARIS_X86_PARTITION is not set
-# CONFIG_UNIXWARE_DISKLABEL is not set
-# CONFIG_LDM_PARTITION is not set
-# CONFIG_SGI_PARTITION is not set
-# CONFIG_ULTRIX_PARTITION is not set
-# CONFIG_SUN_PARTITION is not set
-# CONFIG_KARMA_PARTITION is not set
 CONFIG_EFI_PARTITION=y
-
-#
-# Native Language Support
-#
-CONFIG_NLS=y
-CONFIG_NLS_DEFAULT="iso8859-1"
 CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_CODEPAGE_737=y
 CONFIG_NLS_CODEPAGE_775=y
@@ -1312,9 +137,7 @@ CONFIG_NLS_CODEPAGE_932=y
 CONFIG_NLS_CODEPAGE_949=y
 CONFIG_NLS_CODEPAGE_874=y
 CONFIG_NLS_ISO8859_8=y
-# CONFIG_NLS_CODEPAGE_1250 is not set
 CONFIG_NLS_CODEPAGE_1251=y
-# CONFIG_NLS_ASCII is not set
 CONFIG_NLS_ISO8859_1=y
 CONFIG_NLS_ISO8859_2=y
 CONFIG_NLS_ISO8859_3=y
@@ -1329,86 +152,9 @@ CONFIG_NLS_ISO8859_15=y
 CONFIG_NLS_KOI8_R=y
 CONFIG_NLS_KOI8_U=y
 CONFIG_NLS_UTF8=y
-
-#
-# Library routines
-#
-# CONFIG_CRC_CCITT is not set
-# CONFIG_CRC16 is not set
-CONFIG_CRC32=y
-# CONFIG_LIBCRC32C is not set
-CONFIG_GENERIC_HARDIRQS=y
-CONFIG_GENERIC_IRQ_PROBE=y
-CONFIG_GENERIC_PENDING_IRQ=y
-
-#
-# Instrumentation Support
-#
-# CONFIG_PROFILING is not set
-CONFIG_KPROBES=y
-
-#
-# Kernel hacking
-#
-# CONFIG_PRINTK_TIME is not set
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_KERNEL=y
-CONFIG_LOG_BUF_SHIFT=17
-CONFIG_DETECT_SOFTLOCKUP=y
-# CONFIG_SCHEDSTATS is not set
-# CONFIG_DEBUG_SLAB is not set
 CONFIG_DEBUG_MUTEXES=y
-# CONFIG_DEBUG_SPINLOCK is not set
-# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
-# CONFIG_DEBUG_KOBJECT is not set
-# CONFIG_DEBUG_INFO is not set
-# CONFIG_DEBUG_FS is not set
-# CONFIG_DEBUG_VM is not set
-CONFIG_FORCED_INLINING=y
-# CONFIG_RCU_TORTURE_TEST is not set
-CONFIG_IA64_GRANULE_16MB=y
-# CONFIG_IA64_GRANULE_64MB is not set
 CONFIG_IA64_PRINT_HAZARDS=y
-# CONFIG_DISABLE_VHPT is not set
-# CONFIG_IA64_DEBUG_CMPXCHG is not set
-# CONFIG_IA64_DEBUG_IRQ is not set
-CONFIG_SYSVIPC_COMPAT=y
-
-#
-# Security options
-#
-# CONFIG_KEYS is not set
-# CONFIG_SECURITY is not set
-
-#
-# Cryptographic options
-#
-CONFIG_CRYPTO=y
-# CONFIG_CRYPTO_HMAC is not set
-# CONFIG_CRYPTO_NULL is not set
-# CONFIG_CRYPTO_MD4 is not set
-CONFIG_CRYPTO_MD5=y
-# CONFIG_CRYPTO_SHA1 is not set
-# CONFIG_CRYPTO_SHA256 is not set
-# CONFIG_CRYPTO_SHA512 is not set
-# CONFIG_CRYPTO_WP512 is not set
-# CONFIG_CRYPTO_TGR192 is not set
-CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_BLOWFISH is not set
-# CONFIG_CRYPTO_TWOFISH is not set
-# CONFIG_CRYPTO_SERPENT is not set
-# CONFIG_CRYPTO_AES is not set
-# CONFIG_CRYPTO_CAST5 is not set
-# CONFIG_CRYPTO_CAST6 is not set
-# CONFIG_CRYPTO_TEA is not set
-# CONFIG_CRYPTO_ARC4 is not set
-# CONFIG_CRYPTO_KHAZAD is not set
-# CONFIG_CRYPTO_ANUBIS is not set
-# CONFIG_CRYPTO_DEFLATE is not set
-# CONFIG_CRYPTO_MICHAEL_MIC is not set
-# CONFIG_CRYPTO_CRC32C is not set
-# CONFIG_CRYPTO_TEST is not set
-
-#
-# Hardware crypto devices
-#
+CONFIG_CRYPTO_ECB=m
+CONFIG_CRYPTO_PCBC=m
diff --git a/arch/ia64/defconfig b/arch/ia64/defconfig
deleted file mode 100644
index 6cba55da572..00000000000
--- a/arch/ia64/defconfig
+++ /dev/null
@@ -1,1394 +0,0 @@
-#
-# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.16-rc5
-# Mon Feb 27 16:02:28 2006
-#
-
-#
-# Code maturity level options
-#
-CONFIG_EXPERIMENTAL=y
-CONFIG_LOCK_KERNEL=y
-CONFIG_INIT_ENV_ARG_LIMIT=32
-
-#
-# General setup
-#
-CONFIG_LOCALVERSION=""
-CONFIG_LOCALVERSION_AUTO=y
-CONFIG_SWAP=y
-CONFIG_SYSVIPC=y
-CONFIG_POSIX_MQUEUE=y
-# CONFIG_BSD_PROCESS_ACCT is not set
-CONFIG_SYSCTL=y
-# CONFIG_AUDIT is not set
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-# CONFIG_CPUSETS is not set
-CONFIG_INITRAMFS_SOURCE=""
-CONFIG_CC_OPTIMIZE_FOR_SIZE=y
-# CONFIG_EMBEDDED is not set
-CONFIG_KALLSYMS=y
-CONFIG_KALLSYMS_ALL=y
-# CONFIG_KALLSYMS_EXTRA_PASS is not set
-CONFIG_HOTPLUG=y
-CONFIG_PRINTK=y
-CONFIG_BUG=y
-CONFIG_ELF_CORE=y
-CONFIG_BASE_FULL=y
-CONFIG_FUTEX=y
-CONFIG_EPOLL=y
-CONFIG_SHMEM=y
-CONFIG_CC_ALIGN_FUNCTIONS=0
-CONFIG_CC_ALIGN_LABELS=0
-CONFIG_CC_ALIGN_LOOPS=0
-CONFIG_CC_ALIGN_JUMPS=0
-CONFIG_SLAB=y
-# CONFIG_TINY_SHMEM is not set
-CONFIG_BASE_SMALL=0
-# CONFIG_SLOB is not set
-
-#
-# Loadable module support
-#
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-# CONFIG_MODULE_FORCE_UNLOAD is not set
-CONFIG_OBSOLETE_MODPARM=y
-CONFIG_MODVERSIONS=y
-# CONFIG_MODULE_SRCVERSION_ALL is not set
-CONFIG_KMOD=y
-CONFIG_STOP_MACHINE=y
-
-#
-# Block layer
-#
-
-#
-# IO Schedulers
-#
-CONFIG_IOSCHED_NOOP=y
-CONFIG_IOSCHED_AS=y
-CONFIG_IOSCHED_DEADLINE=y
-CONFIG_IOSCHED_CFQ=y
-CONFIG_DEFAULT_AS=y
-# CONFIG_DEFAULT_DEADLINE is not set
-# CONFIG_DEFAULT_CFQ is not set
-# CONFIG_DEFAULT_NOOP is not set
-CONFIG_DEFAULT_IOSCHED="anticipatory"
-
-#
-# Processor type and features
-#
-CONFIG_IA64=y
-CONFIG_64BIT=y
-CONFIG_MMU=y
-CONFIG_SWIOTLB=y
-CONFIG_RWSEM_XCHGADD_ALGORITHM=y
-CONFIG_GENERIC_CALIBRATE_DELAY=y
-CONFIG_TIME_INTERPOLATION=y
-CONFIG_EFI=y
-CONFIG_GENERIC_IOMAP=y
-CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
-CONFIG_DMA_IS_DMA32=y
-CONFIG_IA64_GENERIC=y
-# CONFIG_IA64_DIG is not set
-# CONFIG_IA64_HP_ZX1 is not set
-# CONFIG_IA64_HP_ZX1_SWIOTLB is not set
-# CONFIG_IA64_SGI_SN2 is not set
-# CONFIG_IA64_HP_SIM is not set
-# CONFIG_ITANIUM is not set
-CONFIG_MCKINLEY=y
-# CONFIG_IA64_PAGE_SIZE_4KB is not set
-# CONFIG_IA64_PAGE_SIZE_8KB is not set
-CONFIG_IA64_PAGE_SIZE_16KB=y
-# CONFIG_IA64_PAGE_SIZE_64KB is not set
-CONFIG_PGTABLE_3=y
-# CONFIG_PGTABLE_4 is not set
-# CONFIG_HZ_100 is not set
-CONFIG_HZ_250=y
-# CONFIG_HZ_1000 is not set
-CONFIG_HZ=250
-CONFIG_IA64_L1_CACHE_SHIFT=7
-CONFIG_IA64_CYCLONE=y
-CONFIG_IOSAPIC=y
-# CONFIG_IA64_SGI_SN_XP is not set
-CONFIG_FORCE_MAX_ZONEORDER=17
-CONFIG_SMP=y
-CONFIG_NR_CPUS=512
-CONFIG_IA64_NR_NODES=256
-CONFIG_HOTPLUG_CPU=y
-# CONFIG_SCHED_SMT is not set
-# CONFIG_PREEMPT is not set
-CONFIG_SELECT_MEMORY_MODEL=y
-# CONFIG_FLATMEM_MANUAL is not set
-CONFIG_DISCONTIGMEM_MANUAL=y
-# CONFIG_SPARSEMEM_MANUAL is not set
-CONFIG_DISCONTIGMEM=y
-CONFIG_FLAT_NODE_MEM_MAP=y
-CONFIG_NEED_MULTIPLE_NODES=y
-# CONFIG_SPARSEMEM_STATIC is not set
-CONFIG_SPLIT_PTLOCK_CPUS=4
-CONFIG_MIGRATION=y
-CONFIG_ARCH_SELECT_MEMORY_MODEL=y
-CONFIG_ARCH_DISCONTIGMEM_ENABLE=y
-CONFIG_ARCH_FLATMEM_ENABLE=y
-CONFIG_ARCH_SPARSEMEM_ENABLE=y
-CONFIG_ARCH_DISCONTIGMEM_DEFAULT=y
-CONFIG_NUMA=y
-CONFIG_VIRTUAL_MEM_MAP=y
-CONFIG_HOLES_IN_ZONE=y
-CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y
-CONFIG_IA32_SUPPORT=y
-CONFIG_COMPAT=y
-CONFIG_IA64_MCA_RECOVERY=y
-CONFIG_PERFMON=y
-CONFIG_IA64_PALINFO=y
-CONFIG_SGI_SN=y
-
-#
-# Firmware Drivers
-#
-CONFIG_EFI_VARS=y
-CONFIG_EFI_PCDP=y
-CONFIG_BINFMT_ELF=y
-CONFIG_BINFMT_MISC=m
-
-#
-# Power management and ACPI
-#
-CONFIG_PM=y
-CONFIG_PM_LEGACY=y
-# CONFIG_PM_DEBUG is not set
-
-#
-# ACPI (Advanced Configuration and Power Interface) Support
-#
-CONFIG_ACPI=y
-CONFIG_ACPI_BUTTON=m
-CONFIG_ACPI_FAN=m
-CONFIG_ACPI_PROCESSOR=m
-CONFIG_ACPI_HOTPLUG_CPU=y
-CONFIG_ACPI_THERMAL=m
-CONFIG_ACPI_NUMA=y
-CONFIG_ACPI_BLACKLIST_YEAR=0
-# CONFIG_ACPI_DEBUG is not set
-CONFIG_ACPI_EC=y
-CONFIG_ACPI_POWER=y
-CONFIG_ACPI_SYSTEM=y
-CONFIG_ACPI_CONTAINER=m
-
-#
-# CPU Frequency scaling
-#
-# CONFIG_CPU_FREQ is not set
-
-#
-# Bus options (PCI, PCMCIA)
-#
-CONFIG_PCI=y
-CONFIG_PCI_DOMAINS=y
-# CONFIG_PCI_MSI is not set
-CONFIG_PCI_LEGACY_PROC=y
-# CONFIG_PCI_DEBUG is not set
-
-#
-# PCI Hotplug Support
-#
-CONFIG_HOTPLUG_PCI=m
-# CONFIG_HOTPLUG_PCI_FAKE is not set
-CONFIG_HOTPLUG_PCI_ACPI=m
-# CONFIG_HOTPLUG_PCI_ACPI_IBM is not set
-# CONFIG_HOTPLUG_PCI_CPCI is not set
-# CONFIG_HOTPLUG_PCI_SHPC is not set
-# CONFIG_HOTPLUG_PCI_SGI is not set
-
-#
-# PCCARD (PCMCIA/CardBus) support
-#
-# CONFIG_PCCARD is not set
-
-#
-# Networking
-#
-CONFIG_NET=y
-
-#
-# Networking options
-#
-# CONFIG_NETDEBUG is not set
-CONFIG_PACKET=y
-# CONFIG_PACKET_MMAP is not set
-CONFIG_UNIX=y
-# CONFIG_NET_KEY is not set
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-# CONFIG_IP_ADVANCED_ROUTER is not set
-CONFIG_IP_FIB_HASH=y
-# CONFIG_IP_PNP is not set
-# CONFIG_NET_IPIP is not set
-# CONFIG_NET_IPGRE is not set
-# CONFIG_IP_MROUTE is not set
-CONFIG_ARPD=y
-CONFIG_SYN_COOKIES=y
-# CONFIG_INET_AH is not set
-# CONFIG_INET_ESP is not set
-# CONFIG_INET_IPCOMP is not set
-# CONFIG_INET_TUNNEL is not set
-CONFIG_INET_DIAG=y
-CONFIG_INET_TCP_DIAG=y
-# CONFIG_TCP_CONG_ADVANCED is not set
-CONFIG_TCP_CONG_BIC=y
-# CONFIG_IPV6 is not set
-# CONFIG_NETFILTER is not set
-
-#
-# DCCP Configuration (EXPERIMENTAL)
-#
-# CONFIG_IP_DCCP is not set
-
-#
-# SCTP Configuration (EXPERIMENTAL)
-#
-# CONFIG_IP_SCTP is not set
-
-#
-# TIPC Configuration (EXPERIMENTAL)
-#
-# CONFIG_TIPC is not set
-# CONFIG_ATM is not set
-# CONFIG_BRIDGE is not set
-# CONFIG_VLAN_8021Q is not set
-# CONFIG_DECNET is not set
-# CONFIG_LLC2 is not set
-# CONFIG_IPX is not set
-# CONFIG_ATALK is not set
-# CONFIG_X25 is not set
-# CONFIG_LAPB is not set
-# CONFIG_NET_DIVERT is not set
-# CONFIG_ECONET is not set
-# CONFIG_WAN_ROUTER is not set
-
-#
-# QoS and/or fair queueing
-#
-# CONFIG_NET_SCHED is not set
-
-#
-# Network testing
-#
-# CONFIG_NET_PKTGEN is not set
-# CONFIG_HAMRADIO is not set
-# CONFIG_IRDA is not set
-# CONFIG_BT is not set
-# CONFIG_IEEE80211 is not set
-
-#
-# Device Drivers
-#
-
-#
-# Generic Driver Options
-#
-CONFIG_STANDALONE=y
-CONFIG_PREVENT_FIRMWARE_BUILD=y
-CONFIG_FW_LOADER=m
-# CONFIG_DEBUG_DRIVER is not set
-
-#
-# Connector - unified userspace <-> kernelspace linker
-#
-# CONFIG_CONNECTOR is not set
-
-#
-# Memory Technology Devices (MTD)
-#
-# CONFIG_MTD is not set
-
-#
-# Parallel port support
-#
-# CONFIG_PARPORT is not set
-
-#
-# Plug and Play support
-#
-CONFIG_PNP=y
-# CONFIG_PNP_DEBUG is not set
-
-#
-# Protocols
-#
-CONFIG_PNPACPI=y
-
-#
-# Block devices
-#
-# CONFIG_BLK_CPQ_DA is not set
-# CONFIG_BLK_CPQ_CISS_DA is not set
-# CONFIG_BLK_DEV_DAC960 is not set
-# CONFIG_BLK_DEV_UMEM is not set
-# CONFIG_BLK_DEV_COW_COMMON is not set
-CONFIG_BLK_DEV_LOOP=m
-CONFIG_BLK_DEV_CRYPTOLOOP=m
-CONFIG_BLK_DEV_NBD=m
-# CONFIG_BLK_DEV_SX8 is not set
-# CONFIG_BLK_DEV_UB is not set
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_COUNT=16
-CONFIG_BLK_DEV_RAM_SIZE=4096
-CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CDROM_PKTCDVD is not set
-# CONFIG_ATA_OVER_ETH is not set
-
-#
-# ATA/ATAPI/MFM/RLL support
-#
-CONFIG_IDE=y
-CONFIG_IDE_MAX_HWIFS=4
-CONFIG_BLK_DEV_IDE=y
-
-#
-# Please see Documentation/ide.txt for help/info on IDE drives
-#
-# CONFIG_BLK_DEV_IDE_SATA is not set
-CONFIG_BLK_DEV_IDEDISK=y
-# CONFIG_IDEDISK_MULTI_MODE is not set
-CONFIG_BLK_DEV_IDECD=y
-# CONFIG_BLK_DEV_IDETAPE is not set
-CONFIG_BLK_DEV_IDEFLOPPY=y
-CONFIG_BLK_DEV_IDESCSI=m
-# CONFIG_IDE_TASK_IOCTL is not set
-
-#
-# IDE chipset support/bugfixes
-#
-# CONFIG_IDE_GENERIC is not set
-# CONFIG_BLK_DEV_IDEPNP is not set
-CONFIG_BLK_DEV_IDEPCI=y
-# CONFIG_IDEPCI_SHARE_IRQ is not set
-# CONFIG_BLK_DEV_OFFBOARD is not set
-CONFIG_BLK_DEV_GENERIC=y
-# CONFIG_BLK_DEV_OPTI621 is not set
-CONFIG_BLK_DEV_IDEDMA_PCI=y
-# CONFIG_BLK_DEV_IDEDMA_FORCED is not set
-CONFIG_IDEDMA_PCI_AUTO=y
-# CONFIG_IDEDMA_ONLYDISK is not set
-# CONFIG_BLK_DEV_AEC62XX is not set
-# CONFIG_BLK_DEV_ALI15X3 is not set
-# CONFIG_BLK_DEV_AMD74XX is not set
-CONFIG_BLK_DEV_CMD64X=y
-# CONFIG_BLK_DEV_TRIFLEX is not set
-# CONFIG_BLK_DEV_CY82C693 is not set
-# CONFIG_BLK_DEV_CS5520 is not set
-# CONFIG_BLK_DEV_CS5530 is not set
-# CONFIG_BLK_DEV_HPT34X is not set
-# CONFIG_BLK_DEV_HPT366 is not set
-# CONFIG_BLK_DEV_SC1200 is not set
-CONFIG_BLK_DEV_PIIX=y
-# CONFIG_BLK_DEV_IT821X is not set
-# CONFIG_BLK_DEV_NS87415 is not set
-# CONFIG_BLK_DEV_PDC202XX_OLD is not set
-# CONFIG_BLK_DEV_PDC202XX_NEW is not set
-# CONFIG_BLK_DEV_SVWKS is not set
-CONFIG_BLK_DEV_SGIIOC4=y
-# CONFIG_BLK_DEV_SIIMAGE is not set
-# CONFIG_BLK_DEV_SLC90E66 is not set
-# CONFIG_BLK_DEV_TRM290 is not set
-# CONFIG_BLK_DEV_VIA82CXXX is not set
-# CONFIG_IDE_ARM is not set
-CONFIG_BLK_DEV_IDEDMA=y
-# CONFIG_IDEDMA_IVB is not set
-CONFIG_IDEDMA_AUTO=y
-# CONFIG_BLK_DEV_HD is not set
-
-#
-# SCSI device support
-#
-# CONFIG_RAID_ATTRS is not set
-CONFIG_SCSI=y
-CONFIG_SCSI_PROC_FS=y
-
-#
-# SCSI support type (disk, tape, CD-ROM)
-#
-CONFIG_BLK_DEV_SD=y
-CONFIG_CHR_DEV_ST=m
-# CONFIG_CHR_DEV_OSST is not set
-CONFIG_BLK_DEV_SR=m
-# CONFIG_BLK_DEV_SR_VENDOR is not set
-CONFIG_CHR_DEV_SG=m
-# CONFIG_CHR_DEV_SCH is not set
-
-#
-# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
-#
-# CONFIG_SCSI_MULTI_LUN is not set
-# CONFIG_SCSI_CONSTANTS is not set
-# CONFIG_SCSI_LOGGING is not set
-
-#
-# SCSI Transport Attributes
-#
-CONFIG_SCSI_SPI_ATTRS=y
-CONFIG_SCSI_FC_ATTRS=y
-# CONFIG_SCSI_ISCSI_ATTRS is not set
-# CONFIG_SCSI_SAS_ATTRS is not set
-
-#
-# SCSI low-level drivers
-#
-# CONFIG_ISCSI_TCP is not set
-# CONFIG_BLK_DEV_3W_XXXX_RAID is not set
-# CONFIG_SCSI_3W_9XXX is not set
-# CONFIG_SCSI_ACARD is not set
-# CONFIG_SCSI_AACRAID is not set
-# CONFIG_SCSI_AIC7XXX is not set
-# CONFIG_SCSI_AIC7XXX_OLD is not set
-# CONFIG_SCSI_AIC79XX is not set
-# CONFIG_MEGARAID_NEWGEN is not set
-# CONFIG_MEGARAID_LEGACY is not set
-# CONFIG_MEGARAID_SAS is not set
-CONFIG_SCSI_SATA=y
-# CONFIG_SCSI_SATA_AHCI is not set
-# CONFIG_SCSI_SATA_SVW is not set
-# CONFIG_SCSI_ATA_PIIX is not set
-# CONFIG_SCSI_SATA_MV is not set
-# CONFIG_SCSI_SATA_NV is not set
-# CONFIG_SCSI_PDC_ADMA is not set
-# CONFIG_SCSI_SATA_QSTOR is not set
-# CONFIG_SCSI_SATA_PROMISE is not set
-# CONFIG_SCSI_SATA_SX4 is not set
-# CONFIG_SCSI_SATA_SIL is not set
-# CONFIG_SCSI_SATA_SIL24 is not set
-# CONFIG_SCSI_SATA_SIS is not set
-# CONFIG_SCSI_SATA_ULI is not set
-# CONFIG_SCSI_SATA_VIA is not set
-CONFIG_SCSI_SATA_VITESSE=y
-# CONFIG_SCSI_DMX3191D is not set
-# CONFIG_SCSI_FUTURE_DOMAIN is not set
-# CONFIG_SCSI_IPS is not set
-# CONFIG_SCSI_INITIO is not set
-# CONFIG_SCSI_INIA100 is not set
-CONFIG_SCSI_SYM53C8XX_2=y
-CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1
-CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16
-CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64
-# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set
-# CONFIG_SCSI_IPR is not set
-# CONFIG_SCSI_QLOGIC_FC is not set
-CONFIG_SCSI_QLOGIC_1280=y
-# CONFIG_SCSI_QLA_FC is not set
-# CONFIG_SCSI_LPFC is not set
-# CONFIG_SCSI_DC395x is not set
-# CONFIG_SCSI_DC390T is not set
-# CONFIG_SCSI_DEBUG is not set
-
-#
-# Multi-device support (RAID and LVM)
-#
-CONFIG_MD=y
-CONFIG_BLK_DEV_MD=m
-CONFIG_MD_LINEAR=m
-CONFIG_MD_RAID0=m
-CONFIG_MD_RAID1=m
-# CONFIG_MD_RAID10 is not set
-CONFIG_MD_RAID5=m
-CONFIG_MD_RAID6=m
-CONFIG_MD_MULTIPATH=m
-# CONFIG_MD_FAULTY is not set
-CONFIG_BLK_DEV_DM=m
-CONFIG_DM_CRYPT=m
-CONFIG_DM_SNAPSHOT=m
-CONFIG_DM_MIRROR=m
-CONFIG_DM_ZERO=m
-CONFIG_DM_MULTIPATH=m
-# CONFIG_DM_MULTIPATH_EMC is not set
-
-#
-# Fusion MPT device support
-#
-CONFIG_FUSION=y
-CONFIG_FUSION_SPI=y
-CONFIG_FUSION_FC=m
-# CONFIG_FUSION_SAS is not set
-CONFIG_FUSION_MAX_SGE=128
-# CONFIG_FUSION_CTL is not set
-
-#
-# IEEE 1394 (FireWire) support
-#
-# CONFIG_IEEE1394 is not set
-
-#
-# I2O device support
-#
-# CONFIG_I2O is not set
-
-#
-# Network device support
-#
-CONFIG_NETDEVICES=y
-CONFIG_DUMMY=m
-# CONFIG_BONDING is not set
-# CONFIG_EQUALIZER is not set
-# CONFIG_TUN is not set
-# CONFIG_NET_SB1000 is not set
-
-#
-# ARCnet devices
-#
-# CONFIG_ARCNET is not set
-
-#
-# PHY device support
-#
-# CONFIG_PHYLIB is not set
-
-#
-# Ethernet (10 or 100Mbit)
-#
-CONFIG_NET_ETHERNET=y
-CONFIG_MII=m
-# CONFIG_HAPPYMEAL is not set
-# CONFIG_SUNGEM is not set
-# CONFIG_CASSINI is not set
-# CONFIG_NET_VENDOR_3COM is not set
-
-#
-# Tulip family network device support
-#
-CONFIG_NET_TULIP=y
-# CONFIG_DE2104X is not set
-CONFIG_TULIP=m
-# CONFIG_TULIP_MWI is not set
-# CONFIG_TULIP_MMIO is not set
-# CONFIG_TULIP_NAPI is not set
-# CONFIG_DE4X5 is not set
-# CONFIG_WINBOND_840 is not set
-# CONFIG_DM9102 is not set
-# CONFIG_ULI526X is not set
-# CONFIG_HP100 is not set
-CONFIG_NET_PCI=y
-# CONFIG_PCNET32 is not set
-# CONFIG_AMD8111_ETH is not set
-# CONFIG_ADAPTEC_STARFIRE is not set
-# CONFIG_B44 is not set
-# CONFIG_FORCEDETH is not set
-# CONFIG_DGRS is not set
-CONFIG_EEPRO100=m
-CONFIG_E100=m
-# CONFIG_FEALNX is not set
-# CONFIG_NATSEMI is not set
-# CONFIG_NE2K_PCI is not set
-# CONFIG_8139CP is not set
-# CONFIG_8139TOO is not set
-# CONFIG_SIS900 is not set
-# CONFIG_EPIC100 is not set
-# CONFIG_SUNDANCE is not set
-# CONFIG_VIA_RHINE is not set
-
-#
-# Ethernet (1000 Mbit)
-#
-# CONFIG_ACENIC is not set
-# CONFIG_DL2K is not set
-CONFIG_E1000=y
-# CONFIG_E1000_NAPI is not set
-# CONFIG_E1000_DISABLE_PACKET_SPLIT is not set
-# CONFIG_NS83820 is not set
-# CONFIG_HAMACHI is not set
-# CONFIG_YELLOWFIN is not set
-# CONFIG_R8169 is not set
-# CONFIG_SIS190 is not set
-# CONFIG_SKGE is not set
-# CONFIG_SKY2 is not set
-# CONFIG_SK98LIN is not set
-# CONFIG_VIA_VELOCITY is not set
-CONFIG_TIGON3=y
-# CONFIG_BNX2 is not set
-
-#
-# Ethernet (10000 Mbit)
-#
-# CONFIG_CHELSIO_T1 is not set
-# CONFIG_IXGB is not set
-# CONFIG_S2IO is not set
-
-#
-# Token Ring devices
-#
-# CONFIG_TR is not set
-
-#
-# Wireless LAN (non-hamradio)
-#
-# CONFIG_NET_RADIO is not set
-
-#
-# Wan interfaces
-#
-# CONFIG_WAN is not set
-# CONFIG_FDDI is not set
-# CONFIG_HIPPI is not set
-# CONFIG_PPP is not set
-# CONFIG_SLIP is not set
-# CONFIG_NET_FC is not set
-# CONFIG_SHAPER is not set
-CONFIG_NETCONSOLE=y
-CONFIG_NETPOLL=y
-# CONFIG_NETPOLL_RX is not set
-# CONFIG_NETPOLL_TRAP is not set
-CONFIG_NET_POLL_CONTROLLER=y
-
-#
-# ISDN subsystem
-#
-# CONFIG_ISDN is not set
-
-#
-# Telephony Support
-#
-# CONFIG_PHONE is not set
-
-#
-# Input device support
-#
-CONFIG_INPUT=y
-
-#
-# Userland interfaces
-#
-CONFIG_INPUT_MOUSEDEV=y
-CONFIG_INPUT_MOUSEDEV_PSAUX=y
-CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
-CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
-# CONFIG_INPUT_JOYDEV is not set
-# CONFIG_INPUT_TSDEV is not set
-# CONFIG_INPUT_EVDEV is not set
-# CONFIG_INPUT_EVBUG is not set
-
-#
-# Input Device Drivers
-#
-CONFIG_INPUT_KEYBOARD=y
-CONFIG_KEYBOARD_ATKBD=y
-# CONFIG_KEYBOARD_SUNKBD is not set
-# CONFIG_KEYBOARD_LKKBD is not set
-# CONFIG_KEYBOARD_XTKBD is not set
-# CONFIG_KEYBOARD_NEWTON is not set
-CONFIG_INPUT_MOUSE=y
-CONFIG_MOUSE_PS2=y
-# CONFIG_MOUSE_SERIAL is not set
-# CONFIG_MOUSE_VSXXXAA is not set
-# CONFIG_INPUT_JOYSTICK is not set
-# CONFIG_INPUT_TOUCHSCREEN is not set
-# CONFIG_INPUT_MISC is not set
-
-#
-# Hardware I/O ports
-#
-CONFIG_SERIO=y
-CONFIG_SERIO_I8042=y
-# CONFIG_SERIO_SERPORT is not set
-# CONFIG_SERIO_PCIPS2 is not set
-CONFIG_SERIO_LIBPS2=y
-# CONFIG_SERIO_RAW is not set
-CONFIG_GAMEPORT=m
-# CONFIG_GAMEPORT_NS558 is not set
-# CONFIG_GAMEPORT_L4 is not set
-# CONFIG_GAMEPORT_EMU10K1 is not set
-# CONFIG_GAMEPORT_FM801 is not set
-
-#
-# Character devices
-#
-CONFIG_VT=y
-CONFIG_VT_CONSOLE=y
-CONFIG_HW_CONSOLE=y
-CONFIG_SERIAL_NONSTANDARD=y
-# CONFIG_COMPUTONE is not set
-# CONFIG_ROCKETPORT is not set
-# CONFIG_CYCLADES is not set
-# CONFIG_DIGIEPCA is not set
-# CONFIG_MOXA_INTELLIO is not set
-# CONFIG_MOXA_SMARTIO is not set
-# CONFIG_ISI is not set
-# CONFIG_SYNCLINKMP is not set
-# CONFIG_SYNCLINK_GT is not set
-# CONFIG_N_HDLC is not set
-# CONFIG_SPECIALIX is not set
-# CONFIG_SX is not set
-# CONFIG_STALDRV is not set
-CONFIG_SGI_SNSC=y
-CONFIG_SGI_TIOCX=y
-CONFIG_SGI_MBCS=m
-
-#
-# Serial drivers
-#
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_SERIAL_8250_ACPI=y
-CONFIG_SERIAL_8250_NR_UARTS=6
-CONFIG_SERIAL_8250_RUNTIME_UARTS=4
-CONFIG_SERIAL_8250_EXTENDED=y
-CONFIG_SERIAL_8250_SHARE_IRQ=y
-# CONFIG_SERIAL_8250_DETECT_IRQ is not set
-# CONFIG_SERIAL_8250_RSA is not set
-
-#
-# Non-8250 serial port support
-#
-CONFIG_SERIAL_CORE=y
-CONFIG_SERIAL_CORE_CONSOLE=y
-CONFIG_SERIAL_SGI_L1_CONSOLE=y
-# CONFIG_SERIAL_JSM is not set
-CONFIG_SERIAL_SGI_IOC4=y
-# CONFIG_SERIAL_SGI_IOC3 is not set
-CONFIG_UNIX98_PTYS=y
-CONFIG_LEGACY_PTYS=y
-CONFIG_LEGACY_PTY_COUNT=256
-
-#
-# IPMI
-#
-# CONFIG_IPMI_HANDLER is not set
-
-#
-# Watchdog Cards
-#
-# CONFIG_WATCHDOG is not set
-# CONFIG_HW_RANDOM is not set
-CONFIG_EFI_RTC=y
-# CONFIG_DTLK is not set
-# CONFIG_R3964 is not set
-# CONFIG_APPLICOM is not set
-
-#
-# Ftape, the floppy tape device driver
-#
-CONFIG_AGP=m
-CONFIG_AGP_I460=m
-CONFIG_AGP_HP_ZX1=m
-CONFIG_AGP_SGI_TIOCA=m
-CONFIG_DRM=m
-CONFIG_DRM_TDFX=m
-CONFIG_DRM_R128=m
-CONFIG_DRM_RADEON=m
-CONFIG_DRM_MGA=m
-CONFIG_DRM_SIS=m
-# CONFIG_DRM_VIA is not set
-# CONFIG_DRM_SAVAGE is not set
-CONFIG_RAW_DRIVER=m
-CONFIG_MAX_RAW_DEVS=256
-CONFIG_HPET=y
-# CONFIG_HPET_RTC_IRQ is not set
-CONFIG_HPET_MMAP=y
-# CONFIG_HANGCHECK_TIMER is not set
-CONFIG_MMTIMER=y
-
-#
-# TPM devices
-#
-# CONFIG_TCG_TPM is not set
-# CONFIG_TELCLOCK is not set
-
-#
-# I2C support
-#
-# CONFIG_I2C is not set
-
-#
-# SPI support
-#
-# CONFIG_SPI is not set
-# CONFIG_SPI_MASTER is not set
-
-#
-# Dallas's 1-wire bus
-#
-# CONFIG_W1 is not set
-
-#
-# Hardware Monitoring support
-#
-CONFIG_HWMON=y
-# CONFIG_HWMON_VID is not set
-# CONFIG_SENSORS_F71805F is not set
-# CONFIG_HWMON_DEBUG_CHIP is not set
-
-#
-# Misc devices
-#
-
-#
-# Multimedia Capabilities Port drivers
-#
-
-#
-# Multimedia devices
-#
-# CONFIG_VIDEO_DEV is not set
-
-#
-# Digital Video Broadcasting Devices
-#
-# CONFIG_DVB is not set
-
-#
-# Graphics support
-#
-# CONFIG_FB is not set
-
-#
-# Console display driver support
-#
-CONFIG_VGA_CONSOLE=y
-CONFIG_DUMMY_CONSOLE=y
-
-#
-# Sound
-#
-CONFIG_SOUND=m
-
-#
-# Advanced Linux Sound Architecture
-#
-CONFIG_SND=m
-CONFIG_SND_TIMER=m
-CONFIG_SND_PCM=m
-CONFIG_SND_HWDEP=m
-CONFIG_SND_RAWMIDI=m
-CONFIG_SND_SEQUENCER=m
-CONFIG_SND_SEQ_DUMMY=m
-CONFIG_SND_OSSEMUL=y
-CONFIG_SND_MIXER_OSS=m
-CONFIG_SND_PCM_OSS=m
-CONFIG_SND_SEQUENCER_OSS=y
-# CONFIG_SND_DYNAMIC_MINORS is not set
-CONFIG_SND_SUPPORT_OLD_API=y
-CONFIG_SND_VERBOSE_PRINTK=y
-# CONFIG_SND_DEBUG is not set
-
-#
-# Generic devices
-#
-CONFIG_SND_MPU401_UART=m
-CONFIG_SND_OPL3_LIB=m
-CONFIG_SND_AC97_CODEC=m
-CONFIG_SND_AC97_BUS=m
-CONFIG_SND_DUMMY=m
-CONFIG_SND_VIRMIDI=m
-CONFIG_SND_MTPAV=m
-CONFIG_SND_SERIAL_U16550=m
-CONFIG_SND_MPU401=m
-
-#
-# PCI devices
-#
-# CONFIG_SND_AD1889 is not set
-# CONFIG_SND_ALI5451 is not set
-# CONFIG_SND_ATIIXP is not set
-# CONFIG_SND_ATIIXP_MODEM is not set
-# CONFIG_SND_AU8810 is not set
-# CONFIG_SND_AU8820 is not set
-# CONFIG_SND_AU8830 is not set
-# CONFIG_SND_AZT3328 is not set
-# CONFIG_SND_BT87X is not set
-# CONFIG_SND_CA0106 is not set
-# CONFIG_SND_CMIPCI is not set
-CONFIG_SND_CS4281=m
-CONFIG_SND_CS46XX=m
-CONFIG_SND_CS46XX_NEW_DSP=y
-CONFIG_SND_EMU10K1=m
-# CONFIG_SND_EMU10K1X is not set
-# CONFIG_SND_ENS1370 is not set
-# CONFIG_SND_ENS1371 is not set
-# CONFIG_SND_ES1938 is not set
-# CONFIG_SND_ES1968 is not set
-CONFIG_SND_FM801=m
-# CONFIG_SND_FM801_TEA575X is not set
-# CONFIG_SND_HDA_INTEL is not set
-# CONFIG_SND_HDSP is not set
-# CONFIG_SND_HDSPM is not set
-# CONFIG_SND_ICE1712 is not set
-# CONFIG_SND_ICE1724 is not set
-# CONFIG_SND_INTEL8X0 is not set
-# CONFIG_SND_INTEL8X0M is not set
-# CONFIG_SND_KORG1212 is not set
-# CONFIG_SND_MAESTRO3 is not set
-# CONFIG_SND_MIXART is not set
-# CONFIG_SND_NM256 is not set
-# CONFIG_SND_PCXHR is not set
-# CONFIG_SND_RME32 is not set
-# CONFIG_SND_RME96 is not set
-# CONFIG_SND_RME9652 is not set
-# CONFIG_SND_SONICVIBES is not set
-# CONFIG_SND_TRIDENT is not set
-# CONFIG_SND_VIA82XX is not set
-# CONFIG_SND_VIA82XX_MODEM is not set
-# CONFIG_SND_VX222 is not set
-# CONFIG_SND_YMFPCI is not set
-
-#
-# USB devices
-#
-# CONFIG_SND_USB_AUDIO is not set
-
-#
-# Open Sound System
-#
-# CONFIG_SOUND_PRIME is not set
-
-#
-# USB support
-#
-CONFIG_USB_ARCH_HAS_HCD=y
-CONFIG_USB_ARCH_HAS_OHCI=y
-CONFIG_USB=m
-# CONFIG_USB_DEBUG is not set
-
-#
-# Miscellaneous USB options
-#
-CONFIG_USB_DEVICEFS=y
-# CONFIG_USB_BANDWIDTH is not set
-# CONFIG_USB_DYNAMIC_MINORS is not set
-# CONFIG_USB_SUSPEND is not set
-# CONFIG_USB_OTG is not set
-
-#
-# USB Host Controller Drivers
-#
-CONFIG_USB_EHCI_HCD=m
-# CONFIG_USB_EHCI_SPLIT_ISO is not set
-# CONFIG_USB_EHCI_ROOT_HUB_TT is not set
-# CONFIG_USB_ISP116X_HCD is not set
-CONFIG_USB_OHCI_HCD=m
-# CONFIG_USB_OHCI_BIG_ENDIAN is not set
-CONFIG_USB_OHCI_LITTLE_ENDIAN=y
-CONFIG_USB_UHCI_HCD=m
-# CONFIG_USB_SL811_HCD is not set
-
-#
-# USB Device Class drivers
-#
-# CONFIG_OBSOLETE_OSS_USB_DRIVER is not set
-# CONFIG_USB_ACM is not set
-# CONFIG_USB_PRINTER is not set
-
-#
-# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
-#
-
-#
-# may also be needed; see USB_STORAGE Help for more information
-#
-CONFIG_USB_STORAGE=m
-# CONFIG_USB_STORAGE_DEBUG is not set
-# CONFIG_USB_STORAGE_DATAFAB is not set
-# CONFIG_USB_STORAGE_FREECOM is not set
-# CONFIG_USB_STORAGE_ISD200 is not set
-# CONFIG_USB_STORAGE_DPCM is not set
-# CONFIG_USB_STORAGE_USBAT is not set
-# CONFIG_USB_STORAGE_SDDR09 is not set
-# CONFIG_USB_STORAGE_SDDR55 is not set
-# CONFIG_USB_STORAGE_JUMPSHOT is not set
-# CONFIG_USB_STORAGE_ALAUDA is not set
-# CONFIG_USB_LIBUSUAL is not set
-
-#
-# USB Input Devices
-#
-CONFIG_USB_HID=m
-CONFIG_USB_HIDINPUT=y
-# CONFIG_USB_HIDINPUT_POWERBOOK is not set
-# CONFIG_HID_FF is not set
-# CONFIG_USB_HIDDEV is not set
-
-#
-# USB HID Boot Protocol drivers
-#
-# CONFIG_USB_KBD is not set
-# CONFIG_USB_MOUSE is not set
-# CONFIG_USB_AIPTEK is not set
-# CONFIG_USB_WACOM is not set
-# CONFIG_USB_ACECAD is not set
-# CONFIG_USB_KBTAB is not set
-# CONFIG_USB_POWERMATE is not set
-# CONFIG_USB_MTOUCH is not set
-# CONFIG_USB_ITMTOUCH is not set
-# CONFIG_USB_EGALAX is not set
-# CONFIG_USB_YEALINK is not set
-# CONFIG_USB_XPAD is not set
-# CONFIG_USB_ATI_REMOTE is not set
-# CONFIG_USB_ATI_REMOTE2 is not set
-# CONFIG_USB_KEYSPAN_REMOTE is not set
-# CONFIG_USB_APPLETOUCH is not set
-
-#
-# USB Imaging devices
-#
-# CONFIG_USB_MDC800 is not set
-# CONFIG_USB_MICROTEK is not set
-
-#
-# USB Multimedia devices
-#
-# CONFIG_USB_DABUSB is not set
-
-#
-# Video4Linux support is needed for USB Multimedia device support
-#
-
-#
-# USB Network Adapters
-#
-# CONFIG_USB_CATC is not set
-# CONFIG_USB_KAWETH is not set
-# CONFIG_USB_PEGASUS is not set
-# CONFIG_USB_RTL8150 is not set
-# CONFIG_USB_USBNET is not set
-CONFIG_USB_MON=y
-
-#
-# USB port drivers
-#
-
-#
-# USB Serial Converter support
-#
-# CONFIG_USB_SERIAL is not set
-
-#
-# USB Miscellaneous drivers
-#
-# CONFIG_USB_EMI62 is not set
-# CONFIG_USB_EMI26 is not set
-# CONFIG_USB_AUERSWALD is not set
-# CONFIG_USB_RIO500 is not set
-# CONFIG_USB_LEGOTOWER is not set
-# CONFIG_USB_LCD is not set
-# CONFIG_USB_LED is not set
-# CONFIG_USB_CYTHERM is not set
-# CONFIG_USB_PHIDGETKIT is not set
-# CONFIG_USB_PHIDGETSERVO is not set
-# CONFIG_USB_IDMOUSE is not set
-# CONFIG_USB_SISUSBVGA is not set
-# CONFIG_USB_LD is not set
-# CONFIG_USB_TEST is not set
-
-#
-# USB DSL modem support
-#
-
-#
-# USB Gadget Support
-#
-# CONFIG_USB_GADGET is not set
-
-#
-# MMC/SD Card support
-#
-# CONFIG_MMC is not set
-
-#
-# InfiniBand support
-#
-CONFIG_INFINIBAND=m
-# CONFIG_INFINIBAND_USER_MAD is not set
-# CONFIG_INFINIBAND_USER_ACCESS is not set
-CONFIG_INFINIBAND_MTHCA=m
-# CONFIG_INFINIBAND_MTHCA_DEBUG is not set
-CONFIG_INFINIBAND_IPOIB=m
-# CONFIG_INFINIBAND_IPOIB_DEBUG is not set
-# CONFIG_INFINIBAND_SRP is not set
-
-#
-# SN Devices
-#
-CONFIG_SGI_IOC4=y
-CONFIG_SGI_IOC3=m
-
-#
-# EDAC - error detection and reporting (RAS)
-#
-
-#
-# File systems
-#
-CONFIG_EXT2_FS=y
-CONFIG_EXT2_FS_XATTR=y
-CONFIG_EXT2_FS_POSIX_ACL=y
-CONFIG_EXT2_FS_SECURITY=y
-# CONFIG_EXT2_FS_XIP is not set
-CONFIG_EXT3_FS=y
-CONFIG_EXT3_FS_XATTR=y
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_EXT3_FS_SECURITY=y
-CONFIG_JBD=y
-# CONFIG_JBD_DEBUG is not set
-CONFIG_FS_MBCACHE=y
-CONFIG_REISERFS_FS=y
-# CONFIG_REISERFS_CHECK is not set
-# CONFIG_REISERFS_PROC_INFO is not set
-CONFIG_REISERFS_FS_XATTR=y
-CONFIG_REISERFS_FS_POSIX_ACL=y
-CONFIG_REISERFS_FS_SECURITY=y
-# CONFIG_JFS_FS is not set
-CONFIG_FS_POSIX_ACL=y
-CONFIG_XFS_FS=y
-CONFIG_XFS_EXPORT=y
-# CONFIG_XFS_QUOTA is not set
-# CONFIG_XFS_SECURITY is not set
-# CONFIG_XFS_POSIX_ACL is not set
-# CONFIG_XFS_RT is not set
-# CONFIG_OCFS2_FS is not set
-# CONFIG_MINIX_FS is not set
-# CONFIG_ROMFS_FS is not set
-CONFIG_INOTIFY=y
-# CONFIG_QUOTA is not set
-CONFIG_DNOTIFY=y
-CONFIG_AUTOFS_FS=y
-CONFIG_AUTOFS4_FS=y
-# CONFIG_FUSE_FS is not set
-
-#
-# CD-ROM/DVD Filesystems
-#
-CONFIG_ISO9660_FS=m
-CONFIG_JOLIET=y
-# CONFIG_ZISOFS is not set
-CONFIG_UDF_FS=m
-CONFIG_UDF_NLS=y
-
-#
-# DOS/FAT/NT Filesystems
-#
-CONFIG_FAT_FS=y
-# CONFIG_MSDOS_FS is not set
-CONFIG_VFAT_FS=y
-CONFIG_FAT_DEFAULT_CODEPAGE=437
-CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
-CONFIG_NTFS_FS=m
-# CONFIG_NTFS_DEBUG is not set
-# CONFIG_NTFS_RW is not set
-
-#
-# Pseudo filesystems
-#
-CONFIG_PROC_FS=y
-CONFIG_PROC_KCORE=y
-CONFIG_SYSFS=y
-CONFIG_TMPFS=y
-CONFIG_HUGETLBFS=y
-CONFIG_HUGETLB_PAGE=y
-CONFIG_RAMFS=y
-# CONFIG_RELAYFS_FS is not set
-# CONFIG_CONFIGFS_FS is not set
-
-#
-# Miscellaneous filesystems
-#
-# CONFIG_ADFS_FS is not set
-# CONFIG_AFFS_FS is not set
-# CONFIG_HFS_FS is not set
-# CONFIG_HFSPLUS_FS is not set
-# CONFIG_BEFS_FS is not set
-# CONFIG_BFS_FS is not set
-# CONFIG_EFS_FS is not set
-# CONFIG_CRAMFS is not set
-# CONFIG_VXFS_FS is not set
-# CONFIG_HPFS_FS is not set
-# CONFIG_QNX4FS_FS is not set
-# CONFIG_SYSV_FS is not set
-# CONFIG_UFS_FS is not set
-
-#
-# Network File Systems
-#
-CONFIG_NFS_FS=m
-CONFIG_NFS_V3=y
-# CONFIG_NFS_V3_ACL is not set
-CONFIG_NFS_V4=y
-CONFIG_NFS_DIRECTIO=y
-CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
-# CONFIG_NFSD_V3_ACL is not set
-CONFIG_NFSD_V4=y
-CONFIG_NFSD_TCP=y
-CONFIG_LOCKD=m
-CONFIG_LOCKD_V4=y
-CONFIG_EXPORTFS=y
-CONFIG_NFS_COMMON=y
-CONFIG_SUNRPC=m
-CONFIG_SUNRPC_GSS=m
-CONFIG_RPCSEC_GSS_KRB5=m
-# CONFIG_RPCSEC_GSS_SPKM3 is not set
-CONFIG_SMB_FS=m
-CONFIG_SMB_NLS_DEFAULT=y
-CONFIG_SMB_NLS_REMOTE="cp437"
-CONFIG_CIFS=m
-# CONFIG_CIFS_STATS is not set
-# CONFIG_CIFS_XATTR is not set
-# CONFIG_CIFS_EXPERIMENTAL is not set
-# CONFIG_NCP_FS is not set
-# CONFIG_CODA_FS is not set
-# CONFIG_AFS_FS is not set
-# CONFIG_9P_FS is not set
-
-#
-# Partition Types
-#
-CONFIG_PARTITION_ADVANCED=y
-# CONFIG_ACORN_PARTITION is not set
-# CONFIG_OSF_PARTITION is not set
-# CONFIG_AMIGA_PARTITION is not set
-# CONFIG_ATARI_PARTITION is not set
-# CONFIG_MAC_PARTITION is not set
-CONFIG_MSDOS_PARTITION=y
-# CONFIG_BSD_DISKLABEL is not set
-# CONFIG_MINIX_SUBPARTITION is not set
-# CONFIG_SOLARIS_X86_PARTITION is not set
-# CONFIG_UNIXWARE_DISKLABEL is not set
-# CONFIG_LDM_PARTITION is not set
-CONFIG_SGI_PARTITION=y
-# CONFIG_ULTRIX_PARTITION is not set
-# CONFIG_SUN_PARTITION is not set
-# CONFIG_KARMA_PARTITION is not set
-CONFIG_EFI_PARTITION=y
-
-#
-# Native Language Support
-#
-CONFIG_NLS=y
-CONFIG_NLS_DEFAULT="iso8859-1"
-CONFIG_NLS_CODEPAGE_437=y
-CONFIG_NLS_CODEPAGE_737=m
-CONFIG_NLS_CODEPAGE_775=m
-CONFIG_NLS_CODEPAGE_850=m
-CONFIG_NLS_CODEPAGE_852=m
-CONFIG_NLS_CODEPAGE_855=m
-CONFIG_NLS_CODEPAGE_857=m
-CONFIG_NLS_CODEPAGE_860=m
-CONFIG_NLS_CODEPAGE_861=m
-CONFIG_NLS_CODEPAGE_862=m
-CONFIG_NLS_CODEPAGE_863=m
-CONFIG_NLS_CODEPAGE_864=m
-CONFIG_NLS_CODEPAGE_865=m
-CONFIG_NLS_CODEPAGE_866=m
-CONFIG_NLS_CODEPAGE_869=m
-CONFIG_NLS_CODEPAGE_936=m
-CONFIG_NLS_CODEPAGE_950=m
-CONFIG_NLS_CODEPAGE_932=m
-CONFIG_NLS_CODEPAGE_949=m
-CONFIG_NLS_CODEPAGE_874=m
-CONFIG_NLS_ISO8859_8=m
-CONFIG_NLS_CODEPAGE_1250=m
-CONFIG_NLS_CODEPAGE_1251=m
-# CONFIG_NLS_ASCII is not set
-CONFIG_NLS_ISO8859_1=y
-CONFIG_NLS_ISO8859_2=m
-CONFIG_NLS_ISO8859_3=m
-CONFIG_NLS_ISO8859_4=m
-CONFIG_NLS_ISO8859_5=m
-CONFIG_NLS_ISO8859_6=m
-CONFIG_NLS_ISO8859_7=m
-CONFIG_NLS_ISO8859_9=m
-CONFIG_NLS_ISO8859_13=m
-CONFIG_NLS_ISO8859_14=m
-CONFIG_NLS_ISO8859_15=m
-CONFIG_NLS_KOI8_R=m
-CONFIG_NLS_KOI8_U=m
-CONFIG_NLS_UTF8=m
-
-#
-# Library routines
-#
-# CONFIG_CRC_CCITT is not set
-# CONFIG_CRC16 is not set
-CONFIG_CRC32=y
-# CONFIG_LIBCRC32C is not set
-CONFIG_GENERIC_HARDIRQS=y
-CONFIG_GENERIC_IRQ_PROBE=y
-CONFIG_GENERIC_PENDING_IRQ=y
-
-#
-# HP Simulator drivers
-#
-# CONFIG_HP_SIMETH is not set
-# CONFIG_HP_SIMSERIAL is not set
-# CONFIG_HP_SIMSCSI is not set
-
-#
-# Instrumentation Support
-#
-# CONFIG_PROFILING is not set
-# CONFIG_KPROBES is not set
-
-#
-# Kernel hacking
-#
-# CONFIG_PRINTK_TIME is not set
-CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_KERNEL=y
-CONFIG_LOG_BUF_SHIFT=20
-CONFIG_DETECT_SOFTLOCKUP=y
-# CONFIG_SCHEDSTATS is not set
-# CONFIG_DEBUG_SLAB is not set
-CONFIG_DEBUG_MUTEXES=y
-# CONFIG_DEBUG_SPINLOCK is not set
-# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
-# CONFIG_DEBUG_KOBJECT is not set
-# CONFIG_DEBUG_INFO is not set
-# CONFIG_DEBUG_FS is not set
-# CONFIG_DEBUG_VM is not set
-CONFIG_FORCED_INLINING=y
-# CONFIG_RCU_TORTURE_TEST is not set
-CONFIG_IA64_GRANULE_16MB=y
-# CONFIG_IA64_GRANULE_64MB is not set
-# CONFIG_IA64_PRINT_HAZARDS is not set
-# CONFIG_DISABLE_VHPT is not set
-# CONFIG_IA64_DEBUG_CMPXCHG is not set
-# CONFIG_IA64_DEBUG_IRQ is not set
-CONFIG_SYSVIPC_COMPAT=y
-
-#
-# Security options
-#
-# CONFIG_KEYS is not set
-# CONFIG_SECURITY is not set
-
-#
-# Cryptographic options
-#
-CONFIG_CRYPTO=y
-# CONFIG_CRYPTO_HMAC is not set
-# CONFIG_CRYPTO_NULL is not set
-# CONFIG_CRYPTO_MD4 is not set
-CONFIG_CRYPTO_MD5=y
-# CONFIG_CRYPTO_SHA1 is not set
-# CONFIG_CRYPTO_SHA256 is not set
-# CONFIG_CRYPTO_SHA512 is not set
-# CONFIG_CRYPTO_WP512 is not set
-# CONFIG_CRYPTO_TGR192 is not set
-CONFIG_CRYPTO_DES=m
-# CONFIG_CRYPTO_BLOWFISH is not set
-# CONFIG_CRYPTO_TWOFISH is not set
-# CONFIG_CRYPTO_SERPENT is not set
-# CONFIG_CRYPTO_AES is not set
-# CONFIG_CRYPTO_CAST5 is not set
-# CONFIG_CRYPTO_CAST6 is not set
-# CONFIG_CRYPTO_TEA is not set
-# CONFIG_CRYPTO_ARC4 is not set
-# CONFIG_CRYPTO_KHAZAD is not set
-# CONFIG_CRYPTO_ANUBIS is not set
-# CONFIG_CRYPTO_DEFLATE is not set
-# CONFIG_CRYPTO_MICHAEL_MIC is not set
-# CONFIG_CRYPTO_CRC32C is not set
-# CONFIG_CRYPTO_TEST is not set
-
-#
-# Hardware crypto devices
-#
diff --git a/arch/ia64/dig/Makefile b/arch/ia64/dig/Makefile
index 971cd7870dd..ae16ec4f630 100644
--- a/arch/ia64/dig/Makefile
+++ b/arch/ia64/dig/Makefile
@@ -6,4 +6,9 @@
 #
 
 obj-y := setup.o
+ifeq ($(CONFIG_INTEL_IOMMU), y)
+obj-$(CONFIG_IA64_GENERIC) += machvec.o machvec_vtd.o
+else
 obj-$(CONFIG_IA64_GENERIC) += machvec.o
+endif
+
diff --git a/arch/ia64/dig/machvec_vtd.c b/arch/ia64/dig/machvec_vtd.c
new file mode 100644
index 00000000000..7cd3eb471ca
--- /dev/null
+++ b/arch/ia64/dig/machvec_vtd.c
@@ -0,0 +1,3 @@
+#define MACHVEC_PLATFORM_NAME		dig_vtd
+#define MACHVEC_PLATFORM_HEADER		<asm/machvec_dig_vtd.h>
+#include <asm/machvec_init.h>
diff --git a/arch/ia64/dig/setup.c b/arch/ia64/dig/setup.c
index 38aa9c10885..98131e1db7a 100644
--- a/arch/ia64/dig/setup.c
+++ b/arch/ia64/dig/setup.c
@@ -8,14 +8,13 @@
  * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
  * Copyright (C) 1999 Vijay Chander <vijay@engr.sgi.com>
  */
-#include <linux/config.h>
 
 #include <linux/init.h>
 #include <linux/delay.h>
 #include <linux/kernel.h>
 #include <linux/kdev_t.h>
 #include <linux/string.h>
-#include <linux/tty.h>
+#include <linux/screen_info.h>
 #include <linux/console.h>
 #include <linux/timex.h>
 #include <linux/sched.h>
@@ -23,7 +22,7 @@
 
 #include <asm/io.h>
 #include <asm/machvec.h>
-#include <asm/system.h>
+#include <asm/setup.h>
 
 void __init
 dig_setup (char **cmdline_p)
diff --git a/arch/ia64/hp/common/Makefile b/arch/ia64/hp/common/Makefile
index f61a60057ff..9e179dd06b8 100644
--- a/arch/ia64/hp/common/Makefile
+++ b/arch/ia64/hp/common/Makefile
@@ -8,3 +8,4 @@
 obj-y := sba_iommu.o
 obj-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += hwsw_iommu.o
 obj-$(CONFIG_IA64_GENERIC) += hwsw_iommu.o
+obj-$(CONFIG_IA64_HP_AML_NFW) += aml_nfw.o
diff --git a/arch/ia64/hp/common/aml_nfw.c b/arch/ia64/hp/common/aml_nfw.c
new file mode 100644
index 00000000000..84715fcbba0
--- /dev/null
+++ b/arch/ia64/hp/common/aml_nfw.c
@@ -0,0 +1,235 @@
+/*
+ * OpRegion handler to allow AML to call native firmware
+ *
+ * (c) Copyright 2007 Hewlett-Packard Development Company, L.P.
+ *	Bjorn Helgaas <bjorn.helgaas@hp.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This driver implements HP Open Source Review Board proposal 1842,
+ * which was approved on 9/20/2006.
+ *
+ * For technical documentation, see the HP SPPA Firmware EAS, Appendix F.
+ *
+ * ACPI does not define a mechanism for AML methods to call native firmware
+ * interfaces such as PAL or SAL.  This OpRegion handler adds such a mechanism.
+ * After the handler is installed, an AML method can call native firmware by
+ * storing the arguments and firmware entry point to specific offsets in the
+ * OpRegion.  When AML reads the "return value" offset from the OpRegion, this
+ * handler loads up the arguments, makes the firmware call, and returns the
+ * result.
+ */
+
+#include <linux/module.h>
+#include <linux/acpi.h>
+#include <asm/sal.h>
+
+MODULE_AUTHOR("Bjorn Helgaas <bjorn.helgaas@hp.com>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ACPI opregion handler for native firmware calls");
+
+static bool force_register;
+module_param_named(force, force_register, bool, 0);
+MODULE_PARM_DESC(force, "Install opregion handler even without HPQ5001 device");
+
+#define AML_NFW_SPACE		0xA1
+
+struct ia64_pdesc {
+	void *ip;
+	void *gp;
+};
+
+/*
+ * N.B.  The layout of this structure is defined in the HP SPPA FW EAS, and
+ *	 the member offsets are embedded in AML methods.
+ */
+struct ia64_nfw_context {
+	u64 arg[8];
+	struct ia64_sal_retval ret;
+	u64 ip;
+	u64 gp;
+	u64 pad[2];
+};
+
+static void *virt_map(u64 address)
+{
+	if (address & (1UL << 63))
+		return (void *) (__IA64_UNCACHED_OFFSET | address);
+
+	return __va(address);
+}
+
+static void aml_nfw_execute(struct ia64_nfw_context *c)
+{
+	struct ia64_pdesc virt_entry;
+	ia64_sal_handler entry;
+
+	virt_entry.ip = virt_map(c->ip);
+	virt_entry.gp = virt_map(c->gp);
+
+	entry = (ia64_sal_handler) &virt_entry;
+
+	IA64_FW_CALL(entry, c->ret,
+		     c->arg[0], c->arg[1], c->arg[2], c->arg[3],
+		     c->arg[4], c->arg[5], c->arg[6], c->arg[7]);
+}
+
+static void aml_nfw_read_arg(u8 *offset, u32 bit_width, u64 *value)
+{
+	switch (bit_width) {
+	case 8:
+		*value = *(u8 *)offset;
+		break;
+	case 16:
+		*value = *(u16 *)offset;
+		break;
+	case 32:
+		*value = *(u32 *)offset;
+		break;
+	case 64:
+		*value = *(u64 *)offset;
+		break;
+	}
+}
+
+static void aml_nfw_write_arg(u8 *offset, u32 bit_width, u64 *value)
+{
+	switch (bit_width) {
+	case 8:
+		*(u8 *) offset = *value;
+		break;
+	case 16:
+		*(u16 *) offset = *value;
+		break;
+	case 32:
+		*(u32 *) offset = *value;
+		break;
+	case 64:
+		*(u64 *) offset = *value;
+		break;
+	}
+}
+
+static acpi_status aml_nfw_handler(u32 function, acpi_physical_address address,
+	u32 bit_width, u64 *value, void *handler_context,
+	void *region_context)
+{
+	struct ia64_nfw_context *context = handler_context;
+	u8 *offset = (u8 *) context + address;
+
+	if (bit_width !=  8 && bit_width != 16 &&
+	    bit_width != 32 && bit_width != 64)
+		return AE_BAD_PARAMETER;
+
+	if (address + (bit_width >> 3) > sizeof(struct ia64_nfw_context))
+		return AE_BAD_PARAMETER;
+
+	switch (function) {
+	case ACPI_READ:
+		if (address == offsetof(struct ia64_nfw_context, ret))
+			aml_nfw_execute(context);
+		aml_nfw_read_arg(offset, bit_width, value);
+		break;
+	case ACPI_WRITE:
+		aml_nfw_write_arg(offset, bit_width, value);
+		break;
+	}
+
+	return AE_OK;
+}
+
+static struct ia64_nfw_context global_context;
+static int global_handler_registered;
+
+static int aml_nfw_add_global_handler(void)
+{
+	acpi_status status;
+
+	if (global_handler_registered)
+		return 0;
+
+	status = acpi_install_address_space_handler(ACPI_ROOT_OBJECT,
+		AML_NFW_SPACE, aml_nfw_handler, NULL, &global_context);
+	if (ACPI_FAILURE(status))
+		return -ENODEV;
+
+	global_handler_registered = 1;
+	printk(KERN_INFO "Global 0x%02X opregion handler registered\n",
+		AML_NFW_SPACE);
+	return 0;
+}
+
+static int aml_nfw_remove_global_handler(void)
+{
+	acpi_status status;
+
+	if (!global_handler_registered)
+		return 0;
+
+	status = acpi_remove_address_space_handler(ACPI_ROOT_OBJECT,
+		AML_NFW_SPACE, aml_nfw_handler);
+	if (ACPI_FAILURE(status))
+		return -ENODEV;
+
+	global_handler_registered = 0;
+	printk(KERN_INFO "Global 0x%02X opregion handler removed\n",
+		AML_NFW_SPACE);
+	return 0;
+}
+
+static int aml_nfw_add(struct acpi_device *device)
+{
+	/*
+	 * We would normally allocate a new context structure and install
+	 * the address space handler for the specific device we found.
+	 * But the HP-UX implementation shares a single global context
+	 * and always puts the handler at the root, so we'll do the same.
+	 */
+	return aml_nfw_add_global_handler();
+}
+
+static int aml_nfw_remove(struct acpi_device *device)
+{
+	return aml_nfw_remove_global_handler();
+}
+
+static const struct acpi_device_id aml_nfw_ids[] = {
+	{"HPQ5001", 0},
+	{"", 0}
+};
+
+static struct acpi_driver acpi_aml_nfw_driver = {
+	.name = "native firmware",
+	.ids = aml_nfw_ids,
+	.ops = {
+		.add = aml_nfw_add,
+		.remove = aml_nfw_remove,
+		},
+};
+
+static int __init aml_nfw_init(void)
+{
+	int result;
+
+	if (force_register)
+		aml_nfw_add_global_handler();
+
+	result = acpi_bus_register_driver(&acpi_aml_nfw_driver);
+	if (result < 0) {
+		aml_nfw_remove_global_handler();
+		return result;
+	}
+
+	return 0;
+}
+
+static void __exit aml_nfw_exit(void)
+{
+	acpi_bus_unregister_driver(&acpi_aml_nfw_driver);
+	aml_nfw_remove_global_handler();
+}
+
+module_init(aml_nfw_init);
+module_exit(aml_nfw_exit);
diff --git a/arch/ia64/hp/common/hwsw_iommu.c b/arch/ia64/hp/common/hwsw_iommu.c
index a5a5637507b..1e4cae5ae05 100644
--- a/arch/ia64/hp/common/hwsw_iommu.c
+++ b/arch/ia64/hp/common/hwsw_iommu.c
@@ -13,57 +13,36 @@
  */
 
 #include <linux/device.h>
-
+#include <linux/dma-mapping.h>
+#include <linux/swiotlb.h>
+#include <linux/export.h>
 #include <asm/machvec.h>
 
+extern struct dma_map_ops sba_dma_ops, swiotlb_dma_ops;
+
 /* swiotlb declarations & definitions: */
 extern int swiotlb_late_init_with_default_size (size_t size);
-extern ia64_mv_dma_alloc_coherent	swiotlb_alloc_coherent;
-extern ia64_mv_dma_free_coherent	swiotlb_free_coherent;
-extern ia64_mv_dma_map_single		swiotlb_map_single;
-extern ia64_mv_dma_unmap_single		swiotlb_unmap_single;
-extern ia64_mv_dma_map_sg		swiotlb_map_sg;
-extern ia64_mv_dma_unmap_sg		swiotlb_unmap_sg;
-extern ia64_mv_dma_supported		swiotlb_dma_supported;
-extern ia64_mv_dma_mapping_error	swiotlb_dma_mapping_error;
-
-/* hwiommu declarations & definitions: */
-
-extern ia64_mv_dma_alloc_coherent	sba_alloc_coherent;
-extern ia64_mv_dma_free_coherent	sba_free_coherent;
-extern ia64_mv_dma_map_single		sba_map_single;
-extern ia64_mv_dma_unmap_single		sba_unmap_single;
-extern ia64_mv_dma_map_sg		sba_map_sg;
-extern ia64_mv_dma_unmap_sg		sba_unmap_sg;
-extern ia64_mv_dma_supported		sba_dma_supported;
-extern ia64_mv_dma_mapping_error	sba_dma_mapping_error;
-
-#define hwiommu_alloc_coherent		sba_alloc_coherent
-#define hwiommu_free_coherent		sba_free_coherent
-#define hwiommu_map_single		sba_map_single
-#define hwiommu_unmap_single		sba_unmap_single
-#define hwiommu_map_sg			sba_map_sg
-#define hwiommu_unmap_sg		sba_unmap_sg
-#define hwiommu_dma_supported		sba_dma_supported
-#define hwiommu_dma_mapping_error	sba_dma_mapping_error
-#define hwiommu_sync_single_for_cpu	machvec_dma_sync_single
-#define hwiommu_sync_sg_for_cpu		machvec_dma_sync_sg
-#define hwiommu_sync_single_for_device	machvec_dma_sync_single
-#define hwiommu_sync_sg_for_device	machvec_dma_sync_sg
-
 
 /*
  * Note: we need to make the determination of whether or not to use
  * the sw I/O TLB based purely on the device structure.  Anything else
  * would be unreliable or would be too intrusive.
  */
-static inline int
-use_swiotlb (struct device *dev)
+static inline int use_swiotlb(struct device *dev)
 {
-	return dev && dev->dma_mask && !hwiommu_dma_supported(dev, *dev->dma_mask);
+	return dev && dev->dma_mask &&
+		!sba_dma_ops.dma_supported(dev, *dev->dma_mask);
 }
 
-void
+struct dma_map_ops *hwsw_dma_get_ops(struct device *dev)
+{
+	if (use_swiotlb(dev))
+		return &swiotlb_dma_ops;
+	return &sba_dma_ops;
+}
+EXPORT_SYMBOL(hwsw_dma_get_ops);
+
+void __init
 hwsw_init (void)
 {
 	/* default to a smallish 2MB sw I/O TLB */
@@ -71,124 +50,10 @@ hwsw_init (void)
 #ifdef CONFIG_IA64_GENERIC
 		/* Better to have normal DMA than panic */
 		printk(KERN_WARNING "%s: Failed to initialize software I/O TLB,"
-		       " reverting to hpzx1 platform vector\n", __FUNCTION__);
+		       " reverting to hpzx1 platform vector\n", __func__);
 		machvec_init("hpzx1");
 #else
 		panic("Unable to initialize software I/O TLB services");
 #endif
 	}
 }
-
-void *
-hwsw_alloc_coherent (struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flags)
-{
-	if (use_swiotlb(dev))
-		return swiotlb_alloc_coherent(dev, size, dma_handle, flags);
-	else
-		return hwiommu_alloc_coherent(dev, size, dma_handle, flags);
-}
-
-void
-hwsw_free_coherent (struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle)
-{
-	if (use_swiotlb(dev))
-		swiotlb_free_coherent(dev, size, vaddr, dma_handle);
-	else
-		hwiommu_free_coherent(dev, size, vaddr, dma_handle);
-}
-
-dma_addr_t
-hwsw_map_single (struct device *dev, void *addr, size_t size, int dir)
-{
-	if (use_swiotlb(dev))
-		return swiotlb_map_single(dev, addr, size, dir);
-	else
-		return hwiommu_map_single(dev, addr, size, dir);
-}
-
-void
-hwsw_unmap_single (struct device *dev, dma_addr_t iova, size_t size, int dir)
-{
-	if (use_swiotlb(dev))
-		return swiotlb_unmap_single(dev, iova, size, dir);
-	else
-		return hwiommu_unmap_single(dev, iova, size, dir);
-}
-
-
-int
-hwsw_map_sg (struct device *dev, struct scatterlist *sglist, int nents, int dir)
-{
-	if (use_swiotlb(dev))
-		return swiotlb_map_sg(dev, sglist, nents, dir);
-	else
-		return hwiommu_map_sg(dev, sglist, nents, dir);
-}
-
-void
-hwsw_unmap_sg (struct device *dev, struct scatterlist *sglist, int nents, int dir)
-{
-	if (use_swiotlb(dev))
-		return swiotlb_unmap_sg(dev, sglist, nents, dir);
-	else
-		return hwiommu_unmap_sg(dev, sglist, nents, dir);
-}
-
-void
-hwsw_sync_single_for_cpu (struct device *dev, dma_addr_t addr, size_t size, int dir)
-{
-	if (use_swiotlb(dev))
-		swiotlb_sync_single_for_cpu(dev, addr, size, dir);
-	else
-		hwiommu_sync_single_for_cpu(dev, addr, size, dir);
-}
-
-void
-hwsw_sync_sg_for_cpu (struct device *dev, struct scatterlist *sg, int nelems, int dir)
-{
-	if (use_swiotlb(dev))
-		swiotlb_sync_sg_for_cpu(dev, sg, nelems, dir);
-	else
-		hwiommu_sync_sg_for_cpu(dev, sg, nelems, dir);
-}
-
-void
-hwsw_sync_single_for_device (struct device *dev, dma_addr_t addr, size_t size, int dir)
-{
-	if (use_swiotlb(dev))
-		swiotlb_sync_single_for_device(dev, addr, size, dir);
-	else
-		hwiommu_sync_single_for_device(dev, addr, size, dir);
-}
-
-void
-hwsw_sync_sg_for_device (struct device *dev, struct scatterlist *sg, int nelems, int dir)
-{
-	if (use_swiotlb(dev))
-		swiotlb_sync_sg_for_device(dev, sg, nelems, dir);
-	else
-		hwiommu_sync_sg_for_device(dev, sg, nelems, dir);
-}
-
-int
-hwsw_dma_supported (struct device *dev, u64 mask)
-{
-	if (hwiommu_dma_supported(dev, mask))
-		return 1;
-	return swiotlb_dma_supported(dev, mask);
-}
-
-int
-hwsw_dma_mapping_error (dma_addr_t dma_addr)
-{
-	return hwiommu_dma_mapping_error (dma_addr) || swiotlb_dma_mapping_error(dma_addr);
-}
-
-EXPORT_SYMBOL(hwsw_dma_mapping_error);
-EXPORT_SYMBOL(hwsw_map_single);
-EXPORT_SYMBOL(hwsw_unmap_single);
-EXPORT_SYMBOL(hwsw_map_sg);
-EXPORT_SYMBOL(hwsw_unmap_sg);
-EXPORT_SYMBOL(hwsw_dma_supported);
-EXPORT_SYMBOL(hwsw_alloc_coherent);
-EXPORT_SYMBOL(hwsw_free_coherent);
diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c
index bdccd0b1eb6..344387a5540 100644
--- a/arch/ia64/hp/common/sba_iommu.c
+++ b/arch/ia64/hp/common/sba_iommu.c
@@ -19,7 +19,6 @@
 **
 */
 
-#include <linux/config.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
@@ -35,15 +34,20 @@
 #include <linux/efi.h>
 #include <linux/nodemask.h>
 #include <linux/bitops.h>         /* hweight64() */
+#include <linux/crash_dump.h>
+#include <linux/iommu-helper.h>
+#include <linux/dma-mapping.h>
+#include <linux/prefetch.h>
 
 #include <asm/delay.h>		/* ia64_get_itc() */
 #include <asm/io.h>
 #include <asm/page.h>		/* PAGE_OFFSET */
 #include <asm/dma.h>
-#include <asm/system.h>		/* wmb() */
 
 #include <asm/acpi-ext.h>
 
+extern int swiotlb_late_init_with_default_size (size_t size);
+
 #define PFX "IOC: "
 
 /*
@@ -76,7 +80,7 @@
 ** If a device prefetches beyond the end of a valid pdir entry, it will cause
 ** a hard failure, ie. MCA.  Version 3.0 and later of the zx1 LBA should
 ** disconnect on 4k boundaries and prevent such issues.  If the device is
-** particularly agressive, this option will keep the entire pdir valid such
+** particularly aggressive, this option will keep the entire pdir valid such
 ** that prefetching will hit a valid address.  This could severely impact
 ** error containment, and is therefore off by default.  The page that is
 ** used for spill-over is poisoned, so that should help debugging somewhat.
@@ -238,20 +242,20 @@ struct ioc {
 	struct pci_dev	*sac_only_dev;
 };
 
-static struct ioc *ioc_list;
+static struct ioc *ioc_list, *ioc_found;
 static int reserve_sba_gart = 1;
 
 static SBA_INLINE void sba_mark_invalid(struct ioc *, dma_addr_t, size_t);
 static SBA_INLINE void sba_free_range(struct ioc *, dma_addr_t, size_t);
 
-#define sba_sg_address(sg)	(page_address((sg)->page) + (sg)->offset)
+#define sba_sg_address(sg)	sg_virt((sg))
 
 #ifdef FULL_VALID_PDIR
 static u64 prefetch_spill_page;
 #endif
 
 #ifdef CONFIG_PCI
-# define GET_IOC(dev)	(((dev)->bus == &pci_bus_type)						\
+# define GET_IOC(dev)	((dev_is_pci(dev))						\
 			 ? ((struct ioc *) PCI_CONTROLLER(to_pci_dev(dev))->iommu) : NULL)
 #else
 # define GET_IOC(dev)	NULL
@@ -259,10 +263,10 @@ static u64 prefetch_spill_page;
 
 /*
 ** DMA_CHUNK_SIZE is used by the SCSI mid-layer to break up
-** (or rather not merge) DMA's into managable chunks.
+** (or rather not merge) DMAs into manageable chunks.
 ** On parisc, this is more of the software/tuning constraint
-** rather than the HW. I/O MMU allocation alogorithms can be
-** faster with smaller size is (to some degree).
+** rather than the HW. I/O MMU allocation algorithms can be
+** faster with smaller sizes (to some degree).
 */
 #define DMA_CHUNK_SIZE  (BITS_PER_LONG*iovp_size)
 
@@ -394,7 +398,7 @@ sba_dump_sg( struct ioc *ioc, struct scatterlist *startsg, int nents)
 		printk(KERN_DEBUG " %d : DMA %08lx/%05x CPU %p\n", nents,
 		       startsg->dma_address, startsg->dma_length,
 		       sba_sg_address(startsg));
-		startsg++;
+		startsg = sg_next(startsg);
 	}
 }
 
@@ -407,7 +411,7 @@ sba_check_sg( struct ioc *ioc, struct scatterlist *startsg, int nents)
 	while (the_nents-- > 0) {
 		if (sba_sg_address(the_sg) == 0x0UL)
 			sba_dump_sg(NULL, startsg, nents);
-		the_sg++;
+		the_sg = sg_next(the_sg);
 	}
 }
 
@@ -458,6 +462,13 @@ get_iovp_order (unsigned long size)
 	return order;
 }
 
+static unsigned long ptr_to_pide(struct ioc *ioc, unsigned long *res_ptr,
+				 unsigned int bitshiftcnt)
+{
+	return (((unsigned long)res_ptr - (unsigned long)ioc->res_map) << 3)
+		+ bitshiftcnt;
+}
+
 /**
  * sba_search_bitmap - find free space in IO PDIR resource bitmap
  * @ioc: IO MMU structure which owns the pdir we are interested in.
@@ -469,15 +480,25 @@ get_iovp_order (unsigned long size)
  * Cool perf optimization: search for log2(size) bits at a time.
  */
 static SBA_INLINE unsigned long
-sba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted, int use_hint)
+sba_search_bitmap(struct ioc *ioc, struct device *dev,
+		  unsigned long bits_wanted, int use_hint)
 {
 	unsigned long *res_ptr;
 	unsigned long *res_end = (unsigned long *) &(ioc->res_map[ioc->res_size]);
-	unsigned long flags, pide = ~0UL;
+	unsigned long flags, pide = ~0UL, tpide;
+	unsigned long boundary_size;
+	unsigned long shift;
+	int ret;
 
 	ASSERT(((unsigned long) ioc->res_hint & (sizeof(unsigned long) - 1UL)) == 0);
 	ASSERT(res_ptr < res_end);
 
+	boundary_size = (unsigned long long)dma_get_seg_boundary(dev) + 1;
+	boundary_size = ALIGN(boundary_size, 1ULL << iovp_shift) >> iovp_shift;
+
+	BUG_ON(ioc->ibase & ~iovp_mask);
+	shift = ioc->ibase >> iovp_shift;
+
 	spin_lock_irqsave(&ioc->res_lock, flags);
 
 	/* Allow caller to force a search through the entire resource space */
@@ -502,9 +523,7 @@ sba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted, int use_hint)
 			if (likely(*res_ptr != ~0UL)) {
 				bitshiftcnt = ffz(*res_ptr);
 				*res_ptr |= (1UL << bitshiftcnt);
-				pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map);
-				pide <<= 3;	/* convert to bit address */
-				pide += bitshiftcnt;
+				pide = ptr_to_pide(ioc, res_ptr, bitshiftcnt);
 				ioc->res_bitshift = bitshiftcnt + bits_wanted;
 				goto found_it;
 			}
@@ -527,17 +546,19 @@ sba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted, int use_hint)
 		base_mask = RESMAP_MASK(bits_wanted);
 		mask = base_mask << bitshiftcnt;
 
-		DBG_RES("%s() o %ld %p", __FUNCTION__, o, res_ptr);
+		DBG_RES("%s() o %ld %p", __func__, o, res_ptr);
 		for(; res_ptr < res_end ; res_ptr++)
 		{ 
 			DBG_RES("    %p %lx %lx\n", res_ptr, mask, *res_ptr);
 			ASSERT(0 != mask);
 			for (; mask ; mask <<= o, bitshiftcnt += o) {
-				if(0 == ((*res_ptr) & mask)) {
+				tpide = ptr_to_pide(ioc, res_ptr, bitshiftcnt);
+				ret = iommu_is_span_boundary(tpide, bits_wanted,
+							     shift,
+							     boundary_size);
+				if ((0 == ((*res_ptr) & mask)) && !ret) {
 					*res_ptr |= mask;     /* mark resources busy! */
-					pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map);
-					pide <<= 3;	/* convert to bit address */
-					pide += bitshiftcnt;
+					pide = tpide;
 					ioc->res_bitshift = bitshiftcnt + bits_wanted;
 					goto found_it;
 				}
@@ -558,6 +579,11 @@ sba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted, int use_hint)
 		end = res_end - qwords;
 
 		for (; res_ptr < end; res_ptr++) {
+			tpide = ptr_to_pide(ioc, res_ptr, 0);
+			ret = iommu_is_span_boundary(tpide, bits_wanted,
+						     shift, boundary_size);
+			if (ret)
+				goto next_ptr;
 			for (i = 0 ; i < qwords ; i++) {
 				if (res_ptr[i] != 0)
 					goto next_ptr;
@@ -570,8 +596,7 @@ sba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted, int use_hint)
 				res_ptr[i] = ~0UL;
 			res_ptr[i] |= RESMAP_MASK(bits);
 
-			pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map);
-			pide <<= 3;	/* convert to bit address */
+			pide = tpide;
 			res_ptr += qwords;
 			ioc->res_bitshift = bits;
 			goto found_it;
@@ -603,7 +628,7 @@ found_it:
  * resource bit map.
  */
 static int
-sba_alloc_range(struct ioc *ioc, size_t size)
+sba_alloc_range(struct ioc *ioc, struct device *dev, size_t size)
 {
 	unsigned int pages_needed = size >> iovp_shift;
 #ifdef PDIR_SEARCH_TIMING
@@ -620,9 +645,9 @@ sba_alloc_range(struct ioc *ioc, size_t size)
 	/*
 	** "seek and ye shall find"...praying never hurts either...
 	*/
-	pide = sba_search_bitmap(ioc, pages_needed, 1);
+	pide = sba_search_bitmap(ioc, dev, pages_needed, 1);
 	if (unlikely(pide >= (ioc->res_size << 3))) {
-		pide = sba_search_bitmap(ioc, pages_needed, 0);
+		pide = sba_search_bitmap(ioc, dev, pages_needed, 0);
 		if (unlikely(pide >= (ioc->res_size << 3))) {
 #if DELAYED_RESOURCE_CNT > 0
 			unsigned long flags;
@@ -651,13 +676,20 @@ sba_alloc_range(struct ioc *ioc, size_t size)
 			}
 			spin_unlock_irqrestore(&ioc->saved_lock, flags);
 
-			pide = sba_search_bitmap(ioc, pages_needed, 0);
-			if (unlikely(pide >= (ioc->res_size << 3)))
-				panic(__FILE__ ": I/O MMU @ %p is out of mapping resources\n",
-				      ioc->ioc_hpa);
+			pide = sba_search_bitmap(ioc, dev, pages_needed, 0);
+			if (unlikely(pide >= (ioc->res_size << 3))) {
+				printk(KERN_WARNING "%s: I/O MMU @ %p is"
+				       "out of mapping resources, %u %u %lx\n",
+				       __func__, ioc->ioc_hpa, ioc->res_size,
+				       pages_needed, dma_get_seg_boundary(dev));
+				return -1;
+			}
 #else
-			panic(__FILE__ ": I/O MMU @ %p is out of mapping resources\n",
-			      ioc->ioc_hpa);
+			printk(KERN_WARNING "%s: I/O MMU @ %p is"
+			       "out of mapping resources, %u %u %lx\n",
+			       __func__, ioc->ioc_hpa, ioc->res_size,
+			       pages_needed, dma_get_seg_boundary(dev));
+			return -1;
 #endif
 		}
 	}
@@ -677,7 +709,7 @@ sba_alloc_range(struct ioc *ioc, size_t size)
 #endif
 
 	DBG_RES("%s(%x) %d -> %lx hint %x/%x\n",
-		__FUNCTION__, size, pages_needed, pide,
+		__func__, size, pages_needed, pide,
 		(uint) ((unsigned long) ioc->res_hint - (unsigned long) ioc->res_map),
 		ioc->res_bitshift );
 
@@ -720,8 +752,8 @@ sba_free_range(struct ioc *ioc, dma_addr_t iova, size_t size)
 			m = RESMAP_MASK(bits_not_wanted) << (pide & (BITS_PER_LONG - 1));
 			bits_not_wanted = 0;
 
-			DBG_RES("%s( ,%x,%x) %x/%lx %x %p %lx\n", __FUNCTION__, (uint) iova, size,
-		        	bits_not_wanted, m, pide, res_ptr, *res_ptr);
+			DBG_RES("%s( ,%x,%x) %x/%lx %x %p %lx\n", __func__, (uint) iova, size,
+			        bits_not_wanted, m, pide, res_ptr, *res_ptr);
 
 			ASSERT(m != 0);
 			ASSERT(bits_not_wanted);
@@ -875,18 +907,22 @@ sba_mark_invalid(struct ioc *ioc, dma_addr_t iova, size_t byte_cnt)
 }
 
 /**
- * sba_map_single - map one buffer and return IOVA for DMA
+ * sba_map_single_attrs - map one buffer and return IOVA for DMA
  * @dev: instance of PCI owned by the driver that's asking.
  * @addr:  driver buffer to map.
  * @size:  number of bytes to map in driver buffer.
  * @dir:  R/W or both.
+ * @attrs: optional dma attributes
  *
- * See Documentation/DMA-mapping.txt
+ * See Documentation/DMA-API-HOWTO.txt
  */
-dma_addr_t
-sba_map_single(struct device *dev, void *addr, size_t size, int dir)
+static dma_addr_t sba_map_page(struct device *dev, struct page *page,
+			       unsigned long poff, size_t size,
+			       enum dma_data_direction dir,
+			       struct dma_attrs *attrs)
 {
 	struct ioc *ioc;
+	void *addr = page_address(page) + poff;
 	dma_addr_t iovp;
 	dma_addr_t offset;
 	u64 *pdir_start;
@@ -908,7 +944,8 @@ sba_map_single(struct device *dev, void *addr, size_t size, int dir)
  		** Device is bit capable of DMA'ing to the buffer...
 		** just return the PCI address of ptr
  		*/
-		DBG_BYPASS("sba_map_single() bypass mask/addr: 0x%lx/0x%lx\n",
+		DBG_BYPASS("sba_map_single_attrs() bypass mask/addr: "
+			   "0x%lx/0x%lx\n",
 		           to_pci_dev(dev)->dma_mask, pci_addr);
 		return pci_addr;
 	}
@@ -929,17 +966,18 @@ sba_map_single(struct device *dev, void *addr, size_t size, int dir)
 
 #ifdef ASSERT_PDIR_SANITY
 	spin_lock_irqsave(&ioc->res_lock, flags);
-	if (sba_check_pdir(ioc,"Check before sba_map_single()"))
+	if (sba_check_pdir(ioc,"Check before sba_map_single_attrs()"))
 		panic("Sanity check failed");
 	spin_unlock_irqrestore(&ioc->res_lock, flags);
 #endif
 
-	pide = sba_alloc_range(ioc, size);
+	pide = sba_alloc_range(ioc, dev, size);
+	if (pide < 0)
+		return 0;
 
 	iovp = (dma_addr_t) pide << iovp_shift;
 
-	DBG_RUN("%s() 0x%p -> 0x%lx\n",
-		__FUNCTION__, addr, (long) iovp | offset);
+	DBG_RUN("%s() 0x%p -> 0x%lx\n", __func__, addr, (long) iovp | offset);
 
 	pdir_start = &(ioc->pdir_base[pide]);
 
@@ -959,12 +997,20 @@ sba_map_single(struct device *dev, void *addr, size_t size, int dir)
 	/* form complete address */
 #ifdef ASSERT_PDIR_SANITY
 	spin_lock_irqsave(&ioc->res_lock, flags);
-	sba_check_pdir(ioc,"Check after sba_map_single()");
+	sba_check_pdir(ioc,"Check after sba_map_single_attrs()");
 	spin_unlock_irqrestore(&ioc->res_lock, flags);
 #endif
 	return SBA_IOVA(ioc, iovp, offset);
 }
 
+static dma_addr_t sba_map_single_attrs(struct device *dev, void *addr,
+				       size_t size, enum dma_data_direction dir,
+				       struct dma_attrs *attrs)
+{
+	return sba_map_page(dev, virt_to_page(addr),
+			    (unsigned long)addr & ~PAGE_MASK, size, dir, attrs);
+}
+
 #ifdef ENABLE_MARK_CLEAN
 static SBA_INLINE void
 sba_mark_clean(struct ioc *ioc, dma_addr_t iova, size_t size)
@@ -990,15 +1036,17 @@ sba_mark_clean(struct ioc *ioc, dma_addr_t iova, size_t size)
 #endif
 
 /**
- * sba_unmap_single - unmap one IOVA and free resources
+ * sba_unmap_single_attrs - unmap one IOVA and free resources
  * @dev: instance of PCI owned by the driver that's asking.
  * @iova:  IOVA of driver buffer previously mapped.
  * @size:  number of bytes mapped in driver buffer.
  * @dir:  R/W or both.
+ * @attrs: optional dma attributes
  *
- * See Documentation/DMA-mapping.txt
+ * See Documentation/DMA-API-HOWTO.txt
  */
-void sba_unmap_single(struct device *dev, dma_addr_t iova, size_t size, int dir)
+static void sba_unmap_page(struct device *dev, dma_addr_t iova, size_t size,
+			   enum dma_data_direction dir, struct dma_attrs *attrs)
 {
 	struct ioc *ioc;
 #if DELAYED_RESOURCE_CNT > 0
@@ -1015,7 +1063,8 @@ void sba_unmap_single(struct device *dev, dma_addr_t iova, size_t size, int dir)
 		/*
 		** Address does not fall w/in IOVA, must be bypassing
 		*/
-		DBG_BYPASS("sba_unmap_single() bypass addr: 0x%lx\n", iova);
+		DBG_BYPASS("sba_unmap_single_attrs() bypass addr: 0x%lx\n",
+			   iova);
 
 #ifdef ENABLE_MARK_CLEAN
 		if (dir == DMA_FROM_DEVICE) {
@@ -1027,8 +1076,7 @@ void sba_unmap_single(struct device *dev, dma_addr_t iova, size_t size, int dir)
 #endif
 	offset = iova & ~iovp_mask;
 
-	DBG_RUN("%s() iovp 0x%lx/%x\n",
-		__FUNCTION__, (long) iova, size);
+	DBG_RUN("%s() iovp 0x%lx/%x\n", __func__, (long) iova, size);
 
 	iova ^= offset;        /* clear offset bits */
 	size += offset;
@@ -1066,6 +1114,11 @@ void sba_unmap_single(struct device *dev, dma_addr_t iova, size_t size, int dir)
 #endif /* DELAYED_RESOURCE_CNT == 0 */
 }
 
+void sba_unmap_single_attrs(struct device *dev, dma_addr_t iova, size_t size,
+			    enum dma_data_direction dir, struct dma_attrs *attrs)
+{
+	sba_unmap_page(dev, iova, size, dir, attrs);
+}
 
 /**
  * sba_alloc_coherent - allocate/map shared mem for DMA
@@ -1073,10 +1126,11 @@ void sba_unmap_single(struct device *dev, dma_addr_t iova, size_t size, int dir)
  * @size:  number of bytes mapped in driver buffer.
  * @dma_handle:  IOVA of new buffer.
  *
- * See Documentation/DMA-mapping.txt
+ * See Documentation/DMA-API-HOWTO.txt
  */
-void *
-sba_alloc_coherent (struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flags)
+static void *
+sba_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
+		   gfp_t flags, struct dma_attrs *attrs)
 {
 	struct ioc *ioc;
 	void *addr;
@@ -1086,11 +1140,13 @@ sba_alloc_coherent (struct device *dev, size_t size, dma_addr_t *dma_handle, gfp
 
 #ifdef CONFIG_NUMA
 	{
+		int node = ioc->node;
 		struct page *page;
-		page = alloc_pages_node(ioc->node == MAX_NUMNODES ?
-		                        numa_node_id() : ioc->node, flags,
-		                        get_order(size));
 
+		if (node == NUMA_NO_NODE)
+			node = numa_node_id();
+
+		page = alloc_pages_exact_node(node, flags, get_order(size));
 		if (unlikely(!page))
 			return NULL;
 
@@ -1122,7 +1178,8 @@ sba_alloc_coherent (struct device *dev, size_t size, dma_addr_t *dma_handle, gfp
 	 * If device can't bypass or bypass is disabled, pass the 32bit fake
 	 * device to map single to get an iova mapping.
 	 */
-	*dma_handle = sba_map_single(&ioc->sac_only_dev->dev, addr, size, 0);
+	*dma_handle = sba_map_single_attrs(&ioc->sac_only_dev->dev, addr,
+					   size, 0, NULL);
 
 	return addr;
 }
@@ -1135,11 +1192,12 @@ sba_alloc_coherent (struct device *dev, size_t size, dma_addr_t *dma_handle, gfp
  * @vaddr:  virtual address IOVA of "consistent" buffer.
  * @dma_handler:  IO virtual address of "consistent" buffer.
  *
- * See Documentation/DMA-mapping.txt
+ * See Documentation/DMA-API-HOWTO.txt
  */
-void sba_free_coherent (struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle)
+static void sba_free_coherent(struct device *dev, size_t size, void *vaddr,
+			      dma_addr_t dma_handle, struct dma_attrs *attrs)
 {
-	sba_unmap_single(dev, dma_handle, size, 0);
+	sba_unmap_single_attrs(dev, dma_handle, size, 0, NULL);
 	free_pages((unsigned long) vaddr, get_order(size));
 }
 
@@ -1177,7 +1235,6 @@ sba_fill_pdir(
 	u64 *pdirp = NULL;
 	unsigned long dma_offset = 0;
 
-	dma_sg--;
 	while (nents-- > 0) {
 		int     cnt = startsg->dma_length;
 		startsg->dma_length = 0;
@@ -1199,7 +1256,8 @@ sba_fill_pdir(
 			u32 pide = startsg->dma_address & ~PIDE_FLAG;
 			dma_offset = (unsigned long) pide & ~iovp_mask;
 			startsg->dma_address = 0;
-			dma_sg++;
+			if (n_mappings)
+				dma_sg = sg_next(dma_sg);
 			dma_sg->dma_address = pide | ioc->ibase;
 			pdirp = &(ioc->pdir_base[pide >> iovp_shift]);
 			n_mappings++;
@@ -1226,7 +1284,7 @@ sba_fill_pdir(
 				pdirp++;
 			} while (cnt > 0);
 		}
-		startsg++;
+		startsg = sg_next(startsg);
 	}
 	/* force pdir update */
 	wmb();
@@ -1263,7 +1321,7 @@ sba_fill_pdir(
  * the sglist do both.
  */
 static SBA_INLINE int
-sba_coalesce_chunks( struct ioc *ioc,
+sba_coalesce_chunks(struct ioc *ioc, struct device *dev,
 	struct scatterlist *startsg,
 	int nents)
 {
@@ -1273,6 +1331,8 @@ sba_coalesce_chunks( struct ioc *ioc,
 	struct scatterlist *dma_sg;        /* next DMA stream head */
 	unsigned long dma_offset, dma_len; /* start/len of DMA stream */
 	int n_mappings = 0;
+	unsigned int max_seg_size = dma_get_max_seg_size(dev);
+	int idx;
 
 	while (nents > 0) {
 		unsigned long vaddr = (unsigned long) sba_sg_address(startsg);
@@ -1295,7 +1355,7 @@ sba_coalesce_chunks( struct ioc *ioc,
 		while (--nents > 0) {
 			unsigned long vaddr;	/* tmp */
 
-			startsg++;
+			startsg = sg_next(startsg);
 
 			/* PARANOID */
 			startsg->dma_address = startsg->dma_length = 0;
@@ -1312,6 +1372,9 @@ sba_coalesce_chunks( struct ioc *ioc,
 			    > DMA_CHUNK_SIZE)
 				break;
 
+			if (dma_len + startsg->length > max_seg_size)
+				break;
+
 			/*
 			** Then look for virtually contiguous blocks.
 			**
@@ -1331,7 +1394,7 @@ sba_coalesce_chunks( struct ioc *ioc,
 #endif
 
 			/*
-			** Not virtually contigous.
+			** Not virtually contiguous.
 			** Terminate prev chunk.
 			** Start a new chunk.
 			**
@@ -1368,26 +1431,35 @@ sba_coalesce_chunks( struct ioc *ioc,
 		vcontig_sg->dma_length = vcontig_len;
 		dma_len = (dma_len + dma_offset + ~iovp_mask) & iovp_mask;
 		ASSERT(dma_len <= DMA_CHUNK_SIZE);
-		dma_sg->dma_address = (dma_addr_t) (PIDE_FLAG
-			| (sba_alloc_range(ioc, dma_len) << iovp_shift)
-			| dma_offset);
+		idx = sba_alloc_range(ioc, dev, dma_len);
+		if (idx < 0) {
+			dma_sg->dma_length = 0;
+			return -1;
+		}
+		dma_sg->dma_address = (dma_addr_t)(PIDE_FLAG | (idx << iovp_shift)
+						   | dma_offset);
 		n_mappings++;
 	}
 
 	return n_mappings;
 }
 
-
+static void sba_unmap_sg_attrs(struct device *dev, struct scatterlist *sglist,
+			       int nents, enum dma_data_direction dir,
+			       struct dma_attrs *attrs);
 /**
  * sba_map_sg - map Scatter/Gather list
  * @dev: instance of PCI owned by the driver that's asking.
  * @sglist:  array of buffer/length pairs
  * @nents:  number of entries in list
  * @dir:  R/W or both.
+ * @attrs: optional dma attributes
  *
- * See Documentation/DMA-mapping.txt
+ * See Documentation/DMA-API-HOWTO.txt
  */
-int sba_map_sg(struct device *dev, struct scatterlist *sglist, int nents, int dir)
+static int sba_map_sg_attrs(struct device *dev, struct scatterlist *sglist,
+			    int nents, enum dma_data_direction dir,
+			    struct dma_attrs *attrs)
 {
 	struct ioc *ioc;
 	int coalesced, filled = 0;
@@ -1398,14 +1470,14 @@ int sba_map_sg(struct device *dev, struct scatterlist *sglist, int nents, int di
 	struct scatterlist *sg;
 #endif
 
-	DBG_RUN_SG("%s() START %d entries\n", __FUNCTION__, nents);
+	DBG_RUN_SG("%s() START %d entries\n", __func__, nents);
 	ioc = GET_IOC(dev);
 	ASSERT(ioc);
 
 #ifdef ALLOW_IOV_BYPASS_SG
 	ASSERT(to_pci_dev(dev)->dma_mask);
 	if (likely((ioc->dma_mask & ~to_pci_dev(dev)->dma_mask) == 0)) {
-		for (sg = sglist ; filled < nents ; filled++, sg++){
+		for_each_sg(sglist, sg, nents, filled) {
 			sg->dma_length = sg->length;
 			sg->dma_address = virt_to_phys(sba_sg_address(sg));
 		}
@@ -1415,16 +1487,16 @@ int sba_map_sg(struct device *dev, struct scatterlist *sglist, int nents, int di
 	/* Fast path single entry scatterlists. */
 	if (nents == 1) {
 		sglist->dma_length = sglist->length;
-		sglist->dma_address = sba_map_single(dev, sba_sg_address(sglist), sglist->length, dir);
+		sglist->dma_address = sba_map_single_attrs(dev, sba_sg_address(sglist), sglist->length, dir, attrs);
 		return 1;
 	}
 
 #ifdef ASSERT_PDIR_SANITY
 	spin_lock_irqsave(&ioc->res_lock, flags);
-	if (sba_check_pdir(ioc,"Check before sba_map_sg()"))
+	if (sba_check_pdir(ioc,"Check before sba_map_sg_attrs()"))
 	{
 		sba_dump_sg(ioc, sglist, nents);
-		panic("Check before sba_map_sg()");
+		panic("Check before sba_map_sg_attrs()");
 	}
 	spin_unlock_irqrestore(&ioc->res_lock, flags);
 #endif
@@ -1439,7 +1511,11 @@ int sba_map_sg(struct device *dev, struct scatterlist *sglist, int nents, int di
 	** w/o this association, we wouldn't have coherent DMA!
 	** Access to the virtual address is what forces a two pass algorithm.
 	*/
-	coalesced = sba_coalesce_chunks(ioc, sglist, nents);
+	coalesced = sba_coalesce_chunks(ioc, dev, sglist, nents);
+	if (coalesced < 0) {
+		sba_unmap_sg_attrs(dev, sglist, nents, dir, attrs);
+		return 0;
+	}
 
 	/*
 	** Program the I/O Pdir
@@ -1453,31 +1529,33 @@ int sba_map_sg(struct device *dev, struct scatterlist *sglist, int nents, int di
 
 #ifdef ASSERT_PDIR_SANITY
 	spin_lock_irqsave(&ioc->res_lock, flags);
-	if (sba_check_pdir(ioc,"Check after sba_map_sg()"))
+	if (sba_check_pdir(ioc,"Check after sba_map_sg_attrs()"))
 	{
 		sba_dump_sg(ioc, sglist, nents);
-		panic("Check after sba_map_sg()\n");
+		panic("Check after sba_map_sg_attrs()\n");
 	}
 	spin_unlock_irqrestore(&ioc->res_lock, flags);
 #endif
 
 	ASSERT(coalesced == filled);
-	DBG_RUN_SG("%s() DONE %d mappings\n", __FUNCTION__, filled);
+	DBG_RUN_SG("%s() DONE %d mappings\n", __func__, filled);
 
 	return filled;
 }
 
-
 /**
- * sba_unmap_sg - unmap Scatter/Gather list
+ * sba_unmap_sg_attrs - unmap Scatter/Gather list
  * @dev: instance of PCI owned by the driver that's asking.
  * @sglist:  array of buffer/length pairs
  * @nents:  number of entries in list
  * @dir:  R/W or both.
+ * @attrs: optional dma attributes
  *
- * See Documentation/DMA-mapping.txt
+ * See Documentation/DMA-API-HOWTO.txt
  */
-void sba_unmap_sg (struct device *dev, struct scatterlist *sglist, int nents, int dir)
+static void sba_unmap_sg_attrs(struct device *dev, struct scatterlist *sglist,
+			       int nents, enum dma_data_direction dir,
+			       struct dma_attrs *attrs)
 {
 #ifdef ASSERT_PDIR_SANITY
 	struct ioc *ioc;
@@ -1485,29 +1563,30 @@ void sba_unmap_sg (struct device *dev, struct scatterlist *sglist, int nents, in
 #endif
 
 	DBG_RUN_SG("%s() START %d entries,  %p,%x\n",
-		__FUNCTION__, nents, sba_sg_address(sglist), sglist->length);
+		   __func__, nents, sba_sg_address(sglist), sglist->length);
 
 #ifdef ASSERT_PDIR_SANITY
 	ioc = GET_IOC(dev);
 	ASSERT(ioc);
 
 	spin_lock_irqsave(&ioc->res_lock, flags);
-	sba_check_pdir(ioc,"Check before sba_unmap_sg()");
+	sba_check_pdir(ioc,"Check before sba_unmap_sg_attrs()");
 	spin_unlock_irqrestore(&ioc->res_lock, flags);
 #endif
 
 	while (nents && sglist->dma_length) {
 
-		sba_unmap_single(dev, sglist->dma_address, sglist->dma_length, dir);
-		sglist++;
+		sba_unmap_single_attrs(dev, sglist->dma_address,
+				       sglist->dma_length, dir, attrs);
+		sglist = sg_next(sglist);
 		nents--;
 	}
 
-	DBG_RUN_SG("%s() DONE (nents %d)\n", __FUNCTION__,  nents);
+	DBG_RUN_SG("%s() DONE (nents %d)\n", __func__,  nents);
 
 #ifdef ASSERT_PDIR_SANITY
 	spin_lock_irqsave(&ioc->res_lock, flags);
-	sba_check_pdir(ioc,"Check after sba_unmap_sg()");
+	sba_check_pdir(ioc,"Check after sba_unmap_sg_attrs()");
 	spin_unlock_irqrestore(&ioc->res_lock, flags);
 #endif
 
@@ -1519,7 +1598,7 @@ void sba_unmap_sg (struct device *dev, struct scatterlist *sglist, int nents, in
 *
 ***************************************************************/
 
-static void __init
+static void
 ioc_iova_init(struct ioc *ioc)
 {
 	int tcnfg;
@@ -1540,7 +1619,7 @@ ioc_iova_init(struct ioc *ioc)
 	ioc->iov_size = ~ioc->imask + 1;
 
 	DBG_INIT("%s() hpa %p IOV base 0x%lx mask 0x%lx (%dMB)\n",
-		__FUNCTION__, ioc->ioc_hpa, ioc->ibase, ioc->imask,
+		__func__, ioc->ioc_hpa, ioc->ibase, ioc->imask,
 		ioc->iov_size >> 20);
 
 	switch (iovp_size) {
@@ -1563,7 +1642,7 @@ ioc_iova_init(struct ioc *ioc)
 
 	memset(ioc->pdir_base, 0, ioc->pdir_size);
 
-	DBG_INIT("%s() IOV page size %ldK pdir %p size %x\n", __FUNCTION__,
+	DBG_INIT("%s() IOV page size %ldK pdir %p size %x\n", __func__,
 		iovp_size >> 10, ioc->pdir_base, ioc->pdir_size);
 
 	ASSERT(ALIGN((unsigned long) ioc->pdir_base, 4*1024) == (unsigned long) ioc->pdir_base);
@@ -1606,7 +1685,7 @@ ioc_iova_init(struct ioc *ioc)
 
 		prefetch_spill_page = virt_to_phys(addr);
 
-		DBG_INIT("%s() prefetch spill addr: 0x%lx\n", __FUNCTION__, prefetch_spill_page);
+		DBG_INIT("%s() prefetch spill addr: 0x%lx\n", __func__, prefetch_spill_page);
 	}
 	/*
   	** Set all the PDIR entries valid w/ the spill page as the target
@@ -1635,7 +1714,7 @@ ioc_resource_init(struct ioc *ioc)
 	/* resource map size dictated by pdir_size */
 	ioc->res_size = ioc->pdir_size / PDIR_ENTRY_SIZE; /* entries */
 	ioc->res_size >>= 3;  /* convert bit count to byte count */
-	DBG_INIT("%s() res_size 0x%x\n", __FUNCTION__, ioc->res_size);
+	DBG_INIT("%s() res_size 0x%x\n", __func__, ioc->res_size);
 
 	ioc->res_map = (char *) __get_free_pages(GFP_KERNEL,
 						 get_order(ioc->res_size));
@@ -1658,7 +1737,7 @@ ioc_resource_init(struct ioc *ioc)
 							      | prefetch_spill_page);
 #endif
 
-	DBG_INIT("%s() res_map %x %p\n", __FUNCTION__,
+	DBG_INIT("%s() res_map %x %p\n", __func__,
 		 ioc->res_size, (void *) ioc->res_map);
 }
 
@@ -1673,15 +1752,13 @@ ioc_sac_init(struct ioc *ioc)
 	 * SAC (single address cycle) addressable, so allocate a
 	 * pseudo-device to enforce that.
 	 */
-	sac = kmalloc(sizeof(*sac), GFP_KERNEL);
+	sac = kzalloc(sizeof(*sac), GFP_KERNEL);
 	if (!sac)
 		panic(PFX "Couldn't allocate struct pci_dev");
-	memset(sac, 0, sizeof(*sac));
 
-	controller = kmalloc(sizeof(*controller), GFP_KERNEL);
+	controller = kzalloc(sizeof(*controller), GFP_KERNEL);
 	if (!controller)
 		panic(PFX "Couldn't allocate struct pci_controller");
-	memset(controller, 0, sizeof(*controller));
 
 	controller->iommu = ioc;
 	sac->sysdata = controller;
@@ -1732,22 +1809,13 @@ static struct ioc_iommu ioc_iommu_info[] __initdata = {
 	{ SX2000_IOC_ID, "sx2000", NULL },
 };
 
-static struct ioc * __init
-ioc_init(u64 hpa, void *handle)
+static void ioc_init(unsigned long hpa, struct ioc *ioc)
 {
-	struct ioc *ioc;
 	struct ioc_iommu *info;
 
-	ioc = kmalloc(sizeof(*ioc), GFP_KERNEL);
-	if (!ioc)
-		return NULL;
-
-	memset(ioc, 0, sizeof(*ioc));
-
 	ioc->next = ioc_list;
 	ioc_list = ioc;
 
-	ioc->handle = handle;
 	ioc->ioc_hpa = ioremap(hpa, 0x1000);
 
 	ioc->func_id = READ_REG(ioc->ioc_hpa + IOC_FUNC_ID);
@@ -1765,7 +1833,7 @@ ioc_init(u64 hpa, void *handle)
 	iovp_size = (1 << iovp_shift);
 	iovp_mask = ~(iovp_size - 1);
 
-	DBG_INIT("%s: PAGE_SIZE %ldK, iovp_size %ldK\n", __FUNCTION__,
+	DBG_INIT("%s: PAGE_SIZE %ldK, iovp_size %ldK\n", __func__,
 		PAGE_SIZE >> 10, iovp_size >> 10);
 
 	if (!ioc->name) {
@@ -1788,8 +1856,6 @@ ioc_init(u64 hpa, void *handle)
 		"%s %d.%d HPA 0x%lx IOVA space %dMb at 0x%lx\n",
 		ioc->name, (ioc->rev >> 4) & 0xF, ioc->rev & 0xF,
 		hpa, ioc->iov_size >> 20, ioc->ibase);
-
-	return ioc;
 }
 
 
@@ -1841,7 +1907,7 @@ ioc_show(struct seq_file *s, void *v)
 	seq_printf(s, "Hewlett Packard %s IOC rev %d.%d\n",
 		ioc->name, ((ioc->rev >> 4) & 0xF), (ioc->rev & 0xF));
 #ifdef CONFIG_NUMA
-	if (ioc->node != MAX_NUMNODES)
+	if (ioc->node != NUMA_NO_NODE)
 		seq_printf(s, "NUMA node       : %d\n", ioc->node);
 #endif
 	seq_printf(s, "IOVA size       : %ld MB\n", ((ioc->pdir_size >> 3) * iovp_size)/(1024*1024));
@@ -1873,7 +1939,7 @@ ioc_show(struct seq_file *s, void *v)
 	return 0;
 }
 
-static struct seq_operations ioc_seq_ops = {
+static const struct seq_operations ioc_seq_ops = {
 	.start = ioc_start,
 	.next  = ioc_next,
 	.stop  = ioc_stop,
@@ -1886,7 +1952,7 @@ ioc_open(struct inode *inode, struct file *file)
 	return seq_open(file, &ioc_seq_ops);
 }
 
-static struct file_operations ioc_fops = {
+static const struct file_operations ioc_fops = {
 	.open    = ioc_open,
 	.read    = seq_read,
 	.llseek  = seq_lseek,
@@ -1896,15 +1962,13 @@ static struct file_operations ioc_fops = {
 static void __init
 ioc_proc_init(void)
 {
-	struct proc_dir_entry *dir, *entry;
+	struct proc_dir_entry *dir;
 
 	dir = proc_mkdir("bus/mckinley", NULL);
 	if (!dir)
 		return;
 
-	entry = create_proc_entry(ioc_list->name, 0, dir);
-	if (entry)
-		entry->proc_fops = &ioc_fops;
+	proc_create(ioc_list->name, 0, dir, &ioc_fops);
 }
 #endif
 
@@ -1921,7 +1985,7 @@ sba_connect_bus(struct pci_bus *bus)
 	if (PCI_CONTROLLER(bus)->iommu)
 		return;
 
-	handle = PCI_CONTROLLER(bus)->acpi_handle;
+	handle = acpi_device_handle(PCI_CONTROLLER(bus)->companion);
 	if (!handle)
 		return;
 
@@ -1944,62 +2008,47 @@ sba_connect_bus(struct pci_bus *bus)
 	printk(KERN_WARNING "No IOC for PCI Bus %04x:%02x in ACPI\n", pci_domain_nr(bus), bus->number);
 }
 
-#ifdef CONFIG_NUMA
 static void __init
 sba_map_ioc_to_node(struct ioc *ioc, acpi_handle handle)
 {
+#ifdef CONFIG_NUMA
 	unsigned int node;
-	int pxm;
-
-	ioc->node = MAX_NUMNODES;
-
-	pxm = acpi_get_pxm(handle);
-
-	if (pxm < 0)
-		return;
-
-	node = pxm_to_nid_map[pxm];
 
-	if (node >= MAX_NUMNODES || !node_online(node))
-		return;
+	node = acpi_get_node(handle);
+	if (node != NUMA_NO_NODE && !node_online(node))
+		node = NUMA_NO_NODE;
 
 	ioc->node = node;
-	return;
-}
-#else
-#define sba_map_ioc_to_node(ioc, handle)
 #endif
+}
 
-static int __init
-acpi_sba_ioc_add(struct acpi_device *device)
+static void acpi_sba_ioc_add(struct ioc *ioc)
 {
-	struct ioc *ioc;
+	acpi_handle handle = ioc->handle;
 	acpi_status status;
 	u64 hpa, length;
-	struct acpi_buffer buffer;
-	struct acpi_device_info *dev_info;
+	struct acpi_device_info *adi;
 
-	status = hp_acpi_csr_space(device->handle, &hpa, &length);
+	ioc_found = ioc->next;
+	status = hp_acpi_csr_space(handle, &hpa, &length);
 	if (ACPI_FAILURE(status))
-		return 1;
+		goto err;
 
-	buffer.length = ACPI_ALLOCATE_LOCAL_BUFFER;
-	status = acpi_get_object_info(device->handle, &buffer);
+	status = acpi_get_object_info(handle, &adi);
 	if (ACPI_FAILURE(status))
-		return 1;
-	dev_info = buffer.pointer;
+		goto err;
 
 	/*
 	 * For HWP0001, only SBA appears in ACPI namespace.  It encloses the PCI
 	 * root bridges, and its CSR space includes the IOC function.
 	 */
-	if (strncmp("HWP0001", dev_info->hardware_id.value, 7) == 0) {
+	if (strncmp("HWP0001", adi->hardware_id.string, 7) == 0) {
 		hpa += ZX1_IOC_OFFSET;
 		/* zx1 based systems default to kernel page size iommu pages */
 		if (!iovp_shift)
 			iovp_shift = min(PAGE_SHIFT, 16);
 	}
-	ACPI_MEM_FREE(dev_info);
+	kfree(adi);
 
 	/*
 	 * default anything not caught above or specified on cmdline to 4k
@@ -2008,38 +2057,87 @@ acpi_sba_ioc_add(struct acpi_device *device)
 	if (!iovp_shift)
 		iovp_shift = 12;
 
-	ioc = ioc_init(hpa, device->handle);
+	ioc_init(hpa, ioc);
+	/* setup NUMA node association */
+	sba_map_ioc_to_node(ioc, handle);
+	return;
+
+ err:
+	kfree(ioc);
+}
+
+static const struct acpi_device_id hp_ioc_iommu_device_ids[] = {
+	{"HWP0001", 0},
+	{"HWP0004", 0},
+	{"", 0},
+};
+
+static int acpi_sba_ioc_attach(struct acpi_device *device,
+			       const struct acpi_device_id *not_used)
+{
+	struct ioc *ioc;
+
+	ioc = kzalloc(sizeof(*ioc), GFP_KERNEL);
 	if (!ioc)
-		return 1;
+		return -ENOMEM;
 
-	/* setup NUMA node association */
-	sba_map_ioc_to_node(ioc, device->handle);
-	return 0;
+	ioc->next = ioc_found;
+	ioc_found = ioc;
+	ioc->handle = device->handle;
+	return 1;
 }
 
-static struct acpi_driver acpi_sba_ioc_driver = {
-	.name		= "IOC IOMMU Driver",
-	.ids		= "HWP0001,HWP0004",
-	.ops		= {
-		.add	= acpi_sba_ioc_add,
-	},
+
+static struct acpi_scan_handler acpi_sba_ioc_handler = {
+	.ids	= hp_ioc_iommu_device_ids,
+	.attach	= acpi_sba_ioc_attach,
 };
 
+static int __init acpi_sba_ioc_init_acpi(void)
+{
+	return acpi_scan_add_handler(&acpi_sba_ioc_handler);
+}
+/* This has to run before acpi_scan_init(). */
+arch_initcall(acpi_sba_ioc_init_acpi);
+
+extern struct dma_map_ops swiotlb_dma_ops;
+
 static int __init
 sba_init(void)
 {
 	if (!ia64_platform_is("hpzx1") && !ia64_platform_is("hpzx1_swiotlb"))
 		return 0;
 
-	acpi_bus_register_driver(&acpi_sba_ioc_driver);
+#if defined(CONFIG_IA64_GENERIC)
+	/* If we are booting a kdump kernel, the sba_iommu will
+	 * cause devices that were not shutdown properly to MCA
+	 * as soon as they are turned back on.  Our only option for
+	 * a successful kdump kernel boot is to use the swiotlb.
+	 */
+	if (is_kdump_kernel()) {
+		dma_ops = &swiotlb_dma_ops;
+		if (swiotlb_late_init_with_default_size(64 * (1<<20)) != 0)
+			panic("Unable to initialize software I/O TLB:"
+				  " Try machvec=dig boot option");
+		machvec_init("dig");
+		return 0;
+	}
+#endif
+
+	/*
+	 * ioc_found should be populated by the acpi_sba_ioc_handler's .attach()
+	 * routine, but that only happens if acpi_scan_init() has already run.
+	 */
+	while (ioc_found)
+		acpi_sba_ioc_add(ioc_found);
+
 	if (!ioc_list) {
 #ifdef CONFIG_IA64_GENERIC
-		extern int swiotlb_late_init_with_default_size (size_t size);
-
 		/*
 		 * If we didn't find something sba_iommu can claim, we
 		 * need to setup the swiotlb and switch to the dig machvec.
 		 */
+		dma_ops = &swiotlb_dma_ops;
 		if (swiotlb_late_init_with_default_size(64 * (1<<20)) != 0)
 			panic("Unable to find SBA IOMMU or initialize "
 			      "software I/O TLB: Try machvec=dig boot option");
@@ -2086,15 +2184,13 @@ nosbagart(char *str)
 	return 1;
 }
 
-int
-sba_dma_supported (struct device *dev, u64 mask)
+static int sba_dma_supported (struct device *dev, u64 mask)
 {
 	/* make sure it's at least 32bit capable */
 	return ((mask & 0xFFFFFFFFUL) == 0xFFFFFFFFUL);
 }
 
-int
-sba_dma_mapping_error (dma_addr_t dma_addr)
+static int sba_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 {
 	return 0;
 }
@@ -2116,7 +2212,7 @@ sba_page_override(char *str)
 			break;
 		default:
 			printk("%s: unknown/unsupported iommu page size %ld\n",
-			       __FUNCTION__, page_size);
+			       __func__, page_size);
 	}
 
 	return 1;
@@ -2124,11 +2220,22 @@ sba_page_override(char *str)
 
 __setup("sbapagesize=",sba_page_override);
 
-EXPORT_SYMBOL(sba_dma_mapping_error);
-EXPORT_SYMBOL(sba_map_single);
-EXPORT_SYMBOL(sba_unmap_single);
-EXPORT_SYMBOL(sba_map_sg);
-EXPORT_SYMBOL(sba_unmap_sg);
-EXPORT_SYMBOL(sba_dma_supported);
-EXPORT_SYMBOL(sba_alloc_coherent);
-EXPORT_SYMBOL(sba_free_coherent);
+struct dma_map_ops sba_dma_ops = {
+	.alloc			= sba_alloc_coherent,
+	.free			= sba_free_coherent,
+	.map_page		= sba_map_page,
+	.unmap_page		= sba_unmap_page,
+	.map_sg			= sba_map_sg_attrs,
+	.unmap_sg		= sba_unmap_sg_attrs,
+	.sync_single_for_cpu	= machvec_dma_sync_single,
+	.sync_sg_for_cpu	= machvec_dma_sync_sg,
+	.sync_single_for_device	= machvec_dma_sync_single,
+	.sync_sg_for_device	= machvec_dma_sync_sg,
+	.dma_supported		= sba_dma_supported,
+	.mapping_error		= sba_dma_mapping_error,
+};
+
+void sba_dma_init(void)
+{
+	dma_ops = &sba_dma_ops;
+}
diff --git a/arch/ia64/hp/sim/Kconfig b/arch/ia64/hp/sim/Kconfig
index 18ccb1266e1..d84707d5520 100644
--- a/arch/ia64/hp/sim/Kconfig
+++ b/arch/ia64/hp/sim/Kconfig
@@ -4,17 +4,19 @@ menu "HP Simulator drivers"
 
 config HP_SIMETH
 	bool "Simulated Ethernet "
+	depends on NET
 
 config HP_SIMSERIAL
 	bool "Simulated serial driver support"
+	depends on TTY
 
 config HP_SIMSERIAL_CONSOLE
 	bool "Console for HP simulator"
 	depends on HP_SIMSERIAL
 
 config HP_SIMSCSI
-	tristate "Simulated SCSI disk"
-	depends on SCSI
+	bool "Simulated SCSI disk"
+	depends on SCSI=y
 
 endmenu
 
diff --git a/arch/ia64/hp/sim/boot/Makefile b/arch/ia64/hp/sim/boot/Makefile
index df6e9968c84..2e805e0cc56 100644
--- a/arch/ia64/hp/sim/boot/Makefile
+++ b/arch/ia64/hp/sim/boot/Makefile
@@ -33,5 +33,5 @@ $(obj)/vmlinux.bin: vmlinux FORCE
 LDFLAGS_bootloader = -static -T
 
 $(obj)/bootloader: $(src)/bootloader.lds $(obj)/bootloader.o $(obj)/boot_head.o $(obj)/fw-emu.o \
-                   lib/lib.a arch/ia64/lib/lib.a FORCE
+                   lib/lib.a arch/ia64/lib/built-in.o arch/ia64/lib/lib.a FORCE
 	$(call if_changed,ld)
diff --git a/arch/ia64/hp/sim/boot/boot_head.S b/arch/ia64/hp/sim/boot/boot_head.S
index a9bd71ac78e..8808565491f 100644
--- a/arch/ia64/hp/sim/boot/boot_head.S
+++ b/arch/ia64/hp/sim/boot/boot_head.S
@@ -26,6 +26,7 @@ GLOBAL_ENTRY(_start)
 	movl sp = stack_mem+16384-16
 	bsw.1
 	br.call.sptk.many rp=start_bootloader
+0:	nop 0		  /* dummy nop to make unwinding work */
 END(_start)
 
 /*
diff --git a/arch/ia64/hp/sim/boot/bootloader.c b/arch/ia64/hp/sim/boot/bootloader.c
index 51a7b7b4dd0..28f4b230b8c 100644
--- a/arch/ia64/hp/sim/boot/bootloader.c
+++ b/arch/ia64/hp/sim/boot/bootloader.c
@@ -11,7 +11,6 @@
  */
 struct task_struct;	/* forward declaration for elf.h */
 
-#include <linux/config.h>
 #include <linux/elf.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
@@ -21,7 +20,6 @@ struct task_struct;	/* forward declaration for elf.h */
 #include <asm/pal.h>
 #include <asm/pgtable.h>
 #include <asm/sal.h>
-#include <asm/system.h>
 
 #include "ssc.h"
 
diff --git a/arch/ia64/hp/sim/boot/bootloader.lds b/arch/ia64/hp/sim/boot/bootloader.lds
index 69ae5853103..3977f25a126 100644
--- a/arch/ia64/hp/sim/boot/bootloader.lds
+++ b/arch/ia64/hp/sim/boot/bootloader.lds
@@ -22,10 +22,11 @@ SECTIONS
   .sdata     : { *(.sdata) }
   _edata  =  .;
 
-  _bss = .;
+  __bss_start = .;
   .sbss      : { *(.sbss) *(.scommon) }
   .bss       : { *(.bss) *(COMMON) }
   . = ALIGN(64 / 8);
+  __bss_stop = .;
   _end = . ;
 
   /* Stabs debugging sections.  */
diff --git a/arch/ia64/hp/sim/boot/fw-emu.c b/arch/ia64/hp/sim/boot/fw-emu.c
index 30fdfb1d0a5..87bf9ad8cf0 100644
--- a/arch/ia64/hp/sim/boot/fw-emu.c
+++ b/arch/ia64/hp/sim/boot/fw-emu.c
@@ -4,7 +4,6 @@
  * Copyright (C) 1998-2001 Hewlett-Packard Co
  *	David Mosberger-Tang <davidm@hpl.hp.com>
  */
-#include <linux/config.h>
 
 #ifdef CONFIG_PCI
 # include <linux/pci.h>
@@ -14,6 +13,7 @@
 #include <asm/io.h>
 #include <asm/pal.h>
 #include <asm/sal.h>
+#include <asm/setup.h>
 
 #include "ssc.h"
 
@@ -161,28 +161,19 @@ sal_emulator (long index, unsigned long in1, unsigned long in2,
 	 */
 	status = 0;
 	if (index == SAL_FREQ_BASE) {
-		switch (in1) {
-		      case SAL_FREQ_BASE_PLATFORM:
+		if (in1 == SAL_FREQ_BASE_PLATFORM)
 			r9 = 200000000;
-			break;
-
-		      case SAL_FREQ_BASE_INTERVAL_TIMER:
+		else if (in1 == SAL_FREQ_BASE_INTERVAL_TIMER) {
 			/*
 			 * Is this supposed to be the cr.itc frequency
 			 * or something platform specific?  The SAL
 			 * doc ain't exactly clear on this...
 			 */
 			r9 = 700000000;
-			break;
-
-		      case SAL_FREQ_BASE_REALTIME_CLOCK:
+		} else if (in1 == SAL_FREQ_BASE_REALTIME_CLOCK)
 			r9 = 1;
-			break;
-
-		      default:
+		else
 			status = -1;
-			break;
-		}
 	} else if (index == SAL_SET_VECTORS) {
 		;
 	} else if (index == SAL_GET_STATE_INFO) {
@@ -286,9 +277,9 @@ sys_fw_init (const char *args, int arglen)
 	}
 	cmd_line[arglen] = '\0';
 
-	memset(efi_systab, 0, sizeof(efi_systab));
+	memset(efi_systab, 0, sizeof(*efi_systab));
 	efi_systab->hdr.signature = EFI_SYSTEM_TABLE_SIGNATURE;
-	efi_systab->hdr.revision  = EFI_SYSTEM_TABLE_REVISION;
+	efi_systab->hdr.revision  = ((1 << 16) | 00);
 	efi_systab->hdr.headersize = sizeof(efi_systab->hdr);
 	efi_systab->fw_vendor = __pa("H\0e\0w\0l\0e\0t\0t\0-\0P\0a\0c\0k\0a\0r\0d\0\0");
 	efi_systab->fw_revision = 1;
@@ -299,16 +290,16 @@ sys_fw_init (const char *args, int arglen)
 	efi_runtime->hdr.signature = EFI_RUNTIME_SERVICES_SIGNATURE;
 	efi_runtime->hdr.revision = EFI_RUNTIME_SERVICES_REVISION;
 	efi_runtime->hdr.headersize = sizeof(efi_runtime->hdr);
-	efi_runtime->get_time = __pa(&fw_efi_get_time);
-	efi_runtime->set_time = __pa(&efi_unimplemented);
-	efi_runtime->get_wakeup_time = __pa(&efi_unimplemented);
-	efi_runtime->set_wakeup_time = __pa(&efi_unimplemented);
-	efi_runtime->set_virtual_address_map = __pa(&efi_unimplemented);
-	efi_runtime->get_variable = __pa(&efi_unimplemented);
-	efi_runtime->get_next_variable = __pa(&efi_unimplemented);
-	efi_runtime->set_variable = __pa(&efi_unimplemented);
-	efi_runtime->get_next_high_mono_count = __pa(&efi_unimplemented);
-	efi_runtime->reset_system = __pa(&efi_reset_system);
+	efi_runtime->get_time = (void *)__pa(&fw_efi_get_time);
+	efi_runtime->set_time = (void *)__pa(&efi_unimplemented);
+	efi_runtime->get_wakeup_time = (void *)__pa(&efi_unimplemented);
+	efi_runtime->set_wakeup_time = (void *)__pa(&efi_unimplemented);
+	efi_runtime->set_virtual_address_map = (void *)__pa(&efi_unimplemented);
+	efi_runtime->get_variable = (void *)__pa(&efi_unimplemented);
+	efi_runtime->get_next_variable = (void *)__pa(&efi_unimplemented);
+	efi_runtime->set_variable = (void *)__pa(&efi_unimplemented);
+	efi_runtime->get_next_high_mono_count = (void *)__pa(&efi_unimplemented);
+	efi_runtime->reset_system = (void *)__pa(&efi_reset_system);
 
 	efi_tables->guid = SAL_SYSTEM_TABLE_GUID;
 	efi_tables->table = __pa(sal_systab);
@@ -330,11 +321,6 @@ sys_fw_init (const char *args, int arglen)
 	strcpy(sal_systab->product_id, "HP-simulator");
 #endif
 
-#ifdef CONFIG_IA64_SDV
-	strcpy(sal_systab->oem_id, "Intel");
-	strcpy(sal_systab->product_id, "SDV");
-#endif
-
 	/* fill in an entry point: */
 	sal_ed->type = SAL_DESC_ENTRY_POINT;
 	sal_ed->pal_proc = __pa(pal_desc[0]);
diff --git a/arch/ia64/hp/sim/hpsim_console.c b/arch/ia64/hp/sim/hpsim_console.c
index 5deff21e587..01663bc42b1 100644
--- a/arch/ia64/hp/sim/hpsim_console.c
+++ b/arch/ia64/hp/sim/hpsim_console.c
@@ -5,7 +5,6 @@
  *	David Mosberger-Tang <davidm@hpl.hp.com>
  * Copyright (C) 1999 Vijay Chander <vijay@engr.sgi.com>
  */
-#include <linux/config.h>
 
 #include <linux/init.h>
 #include <linux/kernel.h>
@@ -22,6 +21,7 @@
 #include <asm/machvec.h>
 #include <asm/pgtable.h>
 #include <asm/sal.h>
+#include <asm/hpsim.h>
 
 #include "hpsim_ssc.h"
 
@@ -29,7 +29,7 @@ static int simcons_init (struct console *, char *);
 static void simcons_write (struct console *, const char *, unsigned);
 static struct tty_driver *simcons_console_device (struct console *, int *);
 
-struct console hpsim_cons = {
+static struct console hpsim_cons = {
 	.name =		"simcons",
 	.write =	simcons_write,
 	.device =	simcons_console_device,
@@ -59,7 +59,18 @@ simcons_write (struct console *cons, const char *buf, unsigned count)
 
 static struct tty_driver *simcons_console_device (struct console *c, int *index)
 {
-	extern struct tty_driver *hp_simserial_driver;
 	*index = c->index;
 	return hp_simserial_driver;
 }
+
+int simcons_register(void)
+{
+	if (!ia64_platform_is("hpsim"))
+		return 1;
+
+	if (hpsim_cons.flags & CON_ENABLED)
+		return 1;
+
+	register_console(&hpsim_cons);
+	return 0;
+}
diff --git a/arch/ia64/hp/sim/hpsim_irq.c b/arch/ia64/hp/sim/hpsim_irq.c
index c0d25a2a3e9..0aa70ebda49 100644
--- a/arch/ia64/hp/sim/hpsim_irq.c
+++ b/arch/ia64/hp/sim/hpsim_irq.c
@@ -10,42 +10,66 @@
 #include <linux/sched.h>
 #include <linux/irq.h>
 
+#include "hpsim_ssc.h"
+
 static unsigned int
-hpsim_irq_startup (unsigned int irq)
+hpsim_irq_startup(struct irq_data *data)
 {
 	return 0;
 }
 
 static void
-hpsim_irq_noop (unsigned int irq)
+hpsim_irq_noop(struct irq_data *data)
 {
 }
 
-static void
-hpsim_set_affinity_noop (unsigned int a, cpumask_t b)
+static int
+hpsim_set_affinity_noop(struct irq_data *d, const struct cpumask *b, bool f)
 {
+	return 0;
 }
 
-static struct hw_interrupt_type irq_type_hp_sim = {
-	.typename =	"hpsim",
-	.startup =	hpsim_irq_startup,
-	.shutdown =	hpsim_irq_noop,
-	.enable =	hpsim_irq_noop,
-	.disable =	hpsim_irq_noop,
-	.ack =		hpsim_irq_noop,
-	.end =		hpsim_irq_noop,
-	.set_affinity =	hpsim_set_affinity_noop,
+static struct irq_chip irq_type_hp_sim = {
+	.name =			"hpsim",
+	.irq_startup =		hpsim_irq_startup,
+	.irq_shutdown =		hpsim_irq_noop,
+	.irq_enable =		hpsim_irq_noop,
+	.irq_disable =		hpsim_irq_noop,
+	.irq_ack =		hpsim_irq_noop,
+	.irq_set_affinity =	hpsim_set_affinity_noop,
 };
 
+static void hpsim_irq_set_chip(int irq)
+{
+	struct irq_chip *chip = irq_get_chip(irq);
+
+	if (chip == &no_irq_chip)
+		irq_set_chip(irq, &irq_type_hp_sim);
+}
+
+static void hpsim_connect_irq(int intr, int irq)
+{
+	ia64_ssc(intr, irq, 0, 0, SSC_CONNECT_INTERRUPT);
+}
+
+int hpsim_get_irq(int intr)
+{
+	int irq = assign_irq_vector(AUTO_ASSIGN);
+
+	if (irq >= 0) {
+		hpsim_irq_set_chip(irq);
+		irq_set_handler(irq, handle_simple_irq);
+		hpsim_connect_irq(intr, irq);
+	}
+
+	return irq;
+}
+
 void __init
 hpsim_irq_init (void)
 {
-	irq_desc_t *idesc;
 	int i;
 
-	for (i = 0; i < NR_IRQS; ++i) {
-		idesc = irq_descp(i);
-		if (idesc->handler == &no_irq_type)
-			idesc->handler = &irq_type_hp_sim;
-	}
+	for_each_active_irq(i)
+		hpsim_irq_set_chip(i);
 }
diff --git a/arch/ia64/hp/sim/hpsim_setup.c b/arch/ia64/hp/sim/hpsim_setup.c
index 694fc86bfbd..664a5402a69 100644
--- a/arch/ia64/hp/sim/hpsim_setup.c
+++ b/arch/ia64/hp/sim/hpsim_setup.c
@@ -5,7 +5,6 @@
  *	David Mosberger-Tang <davidm@hpl.hp.com>
  * Copyright (C) 1999 Vijay Chander <vijay@engr.sgi.com>
  */
-#include <linux/config.h>
 #include <linux/console.h>
 #include <linux/init.h>
 #include <linux/kdev_t.h>
@@ -22,16 +21,11 @@
 #include <asm/machvec.h>
 #include <asm/pgtable.h>
 #include <asm/sal.h>
+#include <asm/hpsim.h>
 
 #include "hpsim_ssc.h"
 
 void
-ia64_ssc_connect_irq (long intr, long irq)
-{
-	ia64_ssc(intr, irq, 0, 0, SSC_CONNECT_INTERRUPT);
-}
-
-void
 ia64_ctl_trace (long on)
 {
 	ia64_ssc(on, 0, 0, 0, SSC_CTL_TRACE);
@@ -42,11 +36,5 @@ hpsim_setup (char **cmdline_p)
 {
 	ROOT_DEV = Root_SDA1;		/* default to first SCSI drive */
 
-#ifdef CONFIG_HP_SIMSERIAL_CONSOLE
-	{
-		extern struct console hpsim_cons;
-		if (ia64_platform_is("hpsim"))
-			register_console(&hpsim_cons);
-	}
-#endif
+	simcons_register();
 }
diff --git a/arch/ia64/hp/sim/simeth.c b/arch/ia64/hp/sim/simeth.c
index 0639ec0ed01..d1b04c4c95e 100644
--- a/arch/ia64/hp/sim/simeth.c
+++ b/arch/ia64/hp/sim/simeth.c
@@ -4,7 +4,6 @@
  * Copyright (C) 1999-2001, 2003 Hewlett-Packard Co
  *	Stephane Eranian <eranian@hpl.hp.com>
  */
-#include <linux/config.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/types.h>
@@ -21,8 +20,10 @@
 #include <linux/skbuff.h>
 #include <linux/notifier.h>
 #include <linux/bitops.h>
-#include <asm/system.h>
 #include <asm/irq.h>
+#include <asm/hpsim.h>
+
+#include "hpsim_ssc.h"
 
 #define SIMETH_RECV_MAX	10
 
@@ -36,12 +37,6 @@
 #define SIMETH_FRAME_SIZE	ETH_FRAME_LEN
 
 
-#define SSC_NETDEV_PROBE		100
-#define SSC_NETDEV_SEND			101
-#define SSC_NETDEV_RECV			102
-#define SSC_NETDEV_ATTACH		103
-#define SSC_NETDEV_DETACH		104
-
 #define NETWORK_INTR			8
 
 struct simeth_local {
@@ -55,7 +50,7 @@ static int simeth_close(struct net_device *dev);
 static int simeth_tx(struct sk_buff *skb, struct net_device *dev);
 static int simeth_rx(struct net_device *dev);
 static struct net_device_stats *simeth_get_stats(struct net_device *dev);
-static irqreturn_t simeth_interrupt(int irq, void *dev_id, struct pt_regs * regs);
+static irqreturn_t simeth_interrupt(int irq, void *dev_id);
 static void set_multicast_list(struct net_device *dev);
 static int simeth_device_event(struct notifier_block *this,unsigned long event, void *ptr);
 
@@ -88,7 +83,7 @@ static int simeth_debug;		/* set to 1 to get debug information */
  */
 static struct notifier_block simeth_dev_notifier = {
 	simeth_device_event,
-	0
+	NULL
 };
 
 
@@ -125,9 +120,6 @@ simeth_probe (void)
 	return r;
 }
 
-extern long ia64_ssc (long, long, long, long, int);
-extern void ia64_ssc_connect_irq (long intr, long irq);
-
 static inline int
 netdev_probe(char *name, unsigned char *ether)
 {
@@ -136,17 +128,6 @@ netdev_probe(char *name, unsigned char *ether)
 
 
 static inline int
-netdev_connect(int irq)
-{
-	/* XXX Fix me
-	 * this does not support multiple cards
-	 * also no return value
-	 */
-	ia64_ssc_connect_irq(NETWORK_INTR, irq);
-	return 0;
-}
-
-static inline int
 netdev_attach(int fd, int irq, unsigned int ipaddr)
 {
 	/* this puts the host interface in the right mode (start interrupting) */
@@ -174,6 +155,15 @@ netdev_read(int fd, unsigned char *buf, unsigned int len)
 	return ia64_ssc(fd, __pa(buf), len, 0, SSC_NETDEV_RECV);
 }
 
+static const struct net_device_ops simeth_netdev_ops = {
+	.ndo_open		= simeth_open,
+	.ndo_stop		= simeth_close,
+	.ndo_start_xmit		= simeth_tx,
+	.ndo_get_stats		= simeth_get_stats,
+	.ndo_set_rx_mode	= set_multicast_list, /* not yet used */
+
+};
+
 /*
  * Function shared with module code, so cannot be in init section
  *
@@ -191,7 +181,7 @@ simeth_probe1(void)
 	unsigned char mac_addr[ETH_ALEN];
 	struct simeth_local *local;
 	struct net_device *dev;
-	int fd, i, err, rc;
+	int fd, err, rc;
 
 	/*
 	 * XXX Fix me
@@ -213,14 +203,10 @@ simeth_probe1(void)
 
 	memcpy(dev->dev_addr, mac_addr, sizeof(mac_addr));
 
-	local = dev->priv;
+	local = netdev_priv(dev);
 	local->simfd = fd; /* keep track of underlying file descriptor */
 
-	dev->open		= simeth_open;
-	dev->stop		= simeth_close;
-	dev->hard_start_xmit	= simeth_tx;
-	dev->get_stats		= simeth_get_stats;
-	dev->set_multicast_list = set_multicast_list; /* no yet used */
+	dev->netdev_ops = &simeth_netdev_ops;
 
 	err = register_netdev(dev);
 	if (err) {
@@ -228,22 +214,16 @@ simeth_probe1(void)
 		return err;
 	}
 
-	if ((rc = assign_irq_vector(AUTO_ASSIGN)) < 0)
-		panic("%s: out of interrupt vectors!\n", __FUNCTION__);
-	dev->irq = rc;
-
 	/*
 	 * attach the interrupt in the simulator, this does enable interrupts
 	 * until a netdev_attach() is called
 	 */
-	netdev_connect(dev->irq);
+	if ((rc = hpsim_get_irq(NETWORK_INTR)) < 0)
+		panic("%s: out of interrupt vectors!\n", __func__);
+	dev->irq = rc;
 
-	printk(KERN_INFO "%s: hosteth=%s simfd=%d, HwAddr",
-	       dev->name, simeth_device, local->simfd);
-	for(i = 0; i < ETH_ALEN; i++) {
-		printk(" %2.2x", dev->dev_addr[i]);
-	}
-	printk(", IRQ %d\n", dev->irq);
+	printk(KERN_INFO "%s: hosteth=%s simfd=%d, HwAddr=%pm, IRQ %d\n",
+	       dev->name, simeth_device, local->simfd, dev->dev_addr, dev->irq);
 
 	return 0;
 }
@@ -288,7 +268,7 @@ static __inline__ int dev_is_ethdev(struct net_device *dev)
 static int
 simeth_device_event(struct notifier_block *this,unsigned long event, void *ptr)
 {
-	struct net_device *dev = ptr;
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 	struct simeth_local *local;
 	struct in_device *in_dev;
 	struct in_ifaddr **ifap = NULL;
@@ -301,6 +281,9 @@ simeth_device_event(struct notifier_block *this,unsigned long event, void *ptr)
 		return NOTIFY_DONE;
 	}
 
+	if (dev_net(dev) != &init_net)
+		return NOTIFY_DONE;
+
 	if ( event != NETDEV_UP && event != NETDEV_DOWN ) return NOTIFY_DONE;
 
 	/*
@@ -321,7 +304,7 @@ simeth_device_event(struct notifier_block *this,unsigned long event, void *ptr)
 	}
 
 	printk(KERN_INFO "simeth_device_event: %s ipaddr=0x%x\n",
-	       dev->name, htonl(ifa->ifa_local));
+	       dev->name, ntohl(ifa->ifa_local));
 
 	/*
 	 * XXX Fix me
@@ -329,10 +312,10 @@ simeth_device_event(struct notifier_block *this,unsigned long event, void *ptr)
 	 * we get DOWN then UP.
 	 */
 
-	local = dev->priv;
+	local = netdev_priv(dev);
 	/* now do it for real */
 	r = event == NETDEV_UP ?
-		netdev_attach(local->simfd, dev->irq, htonl(ifa->ifa_local)):
+		netdev_attach(local->simfd, dev->irq, ntohl(ifa->ifa_local)):
 		netdev_detach(local->simfd);
 
 	printk(KERN_INFO "simeth: netdev_attach/detach: event=%s ->%d\n",
@@ -384,7 +367,7 @@ frame_print(unsigned char *from, unsigned char *frame, int len)
 static int
 simeth_tx(struct sk_buff *skb, struct net_device *dev)
 {
-	struct simeth_local *local = dev->priv;
+	struct simeth_local *local = netdev_priv(dev);
 
 #if 0
 	/* ensure we have at least ETH_ZLEN bytes (min frame size) */
@@ -411,7 +394,7 @@ simeth_tx(struct sk_buff *skb, struct net_device *dev)
 	 */
 
 	dev_kfree_skb(skb);
-	return 0;
+	return NETDEV_TX_OK;
 }
 
 static inline struct sk_buff *
@@ -428,7 +411,6 @@ make_new_skb(struct net_device *dev)
 		printk(KERN_NOTICE "%s: memory squeeze. dropping packet.\n", dev->name);
 		return NULL;
 	}
-	nskb->dev = dev;
 
 	skb_reserve(nskb, 2);	/* Align IP on 16 byte boundaries */
 
@@ -448,7 +430,7 @@ simeth_rx(struct net_device *dev)
 	int			len;
 	int			rcv_count = SIMETH_RECV_MAX;
 
-	local = dev->priv;
+	local = netdev_priv(dev);
 	/*
 	 * the loop concept has been borrowed from other drivers
 	 * looks to me like it's a throttling thing to avoid pushing to many
@@ -475,7 +457,7 @@ simeth_rx(struct net_device *dev)
 		 * XXX Fix me
 		 * Should really do a csum+copy here
 		 */
-		memcpy(skb->data, frame, len);
+		skb_copy_to_linear_data(skb, frame, len);
 #endif
 		skb->protocol = eth_type_trans(skb, dev);
 
@@ -498,15 +480,10 @@ simeth_rx(struct net_device *dev)
  * Interrupt handler (Yes, we can do it too !!!)
  */
 static irqreturn_t
-simeth_interrupt(int irq, void *dev_id, struct pt_regs * regs)
+simeth_interrupt(int irq, void *dev_id)
 {
 	struct net_device *dev = dev_id;
 
-	if ( dev == NULL ) {
-		printk(KERN_WARNING "simeth: irq %d for unknown device\n", irq);
-		return IRQ_NONE;
-	}
-
 	/*
 	 * very simple loop because we get interrupts only when receiving
 	 */
@@ -517,7 +494,7 @@ simeth_interrupt(int irq, void *dev_id, struct pt_regs * regs)
 static struct net_device_stats *
 simeth_get_stats(struct net_device *dev)
 {
-	struct simeth_local *local = dev->priv;
+	struct simeth_local *local = netdev_priv(dev);
 
 	return &local->stats;
 }
diff --git a/arch/ia64/hp/sim/simscsi.c b/arch/ia64/hp/sim/simscsi.c
index a3fe9753113..3a428f19a00 100644
--- a/arch/ia64/hp/sim/simscsi.c
+++ b/arch/ia64/hp/sim/simscsi.c
@@ -14,6 +14,7 @@
 #include <linux/kernel.h>
 #include <linux/timer.h>
 #include <asm/irq.h>
+#include "hpsim_ssc.h"
 
 #include <scsi/scsi.h>
 #include <scsi/scsi_cmnd.h>
@@ -59,8 +60,6 @@ struct disk_stat {
 	unsigned count;
 };
 
-extern long ia64_ssc (long arg0, long arg1, long arg2, long arg3, int nr);
-
 static int desc[16] = {
 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
 };
@@ -89,8 +88,8 @@ simscsi_setup (char *s)
 	if (strlen(s) > MAX_ROOT_LEN) {
 		printk(KERN_ERR "simscsi_setup: prefix too long---using default %s\n",
 		       simscsi_root);
-	}
-	simscsi_root = s;
+	} else
+		simscsi_root = s;
 	return 1;
 }
 
@@ -101,9 +100,9 @@ simscsi_interrupt (unsigned long val)
 {
 	struct scsi_cmnd *sc;
 
-	while ((sc = queue[rd].sc) != 0) {
+	while ((sc = queue[rd].sc) != NULL) {
 		atomic_dec(&num_reqs);
-		queue[rd].sc = 0;
+		queue[rd].sc = NULL;
 		if (DBG)
 			printk("simscsi_interrupt: done with %ld\n", sc->serial_number);
 		(*sc->scsi_done)(sc);
@@ -122,48 +121,22 @@ simscsi_biosparam (struct scsi_device *sdev, struct block_device *n,
 }
 
 static void
-simscsi_readwrite (struct scsi_cmnd *sc, int mode, unsigned long offset, unsigned long len)
-{
-	struct disk_stat stat;
-	struct disk_req req;
-
-	req.addr = __pa(sc->request_buffer);
-	req.len  = len;			/* # of bytes to transfer */
-
-	if (sc->request_bufflen < req.len)
-		return;
-
-	stat.fd = desc[sc->device->id];
-	if (DBG)
-		printk("simscsi_%s @ %lx (off %lx)\n",
-		       mode == SSC_READ ? "read":"write", req.addr, offset);
-	ia64_ssc(stat.fd, 1, __pa(&req), offset, mode);
-	ia64_ssc(__pa(&stat), 0, 0, 0, SSC_WAIT_COMPLETION);
-
-	if (stat.count == req.len) {
-		sc->result = GOOD;
-	} else {
-		sc->result = DID_ERROR << 16;
-	}
-}
-
-static void
 simscsi_sg_readwrite (struct scsi_cmnd *sc, int mode, unsigned long offset)
 {
-	int list_len = sc->use_sg;
-	struct scatterlist *sl = (struct scatterlist *)sc->buffer;
+	int i;
+	struct scatterlist *sl;
 	struct disk_stat stat;
 	struct disk_req req;
 
 	stat.fd = desc[sc->device->id];
 
-	while (list_len) {
-		req.addr = __pa(page_address(sl->page) + sl->offset);
+	scsi_for_each_sg(sc, sl, scsi_sg_count(sc), i) {
+		req.addr = __pa(sg_virt(sl));
 		req.len  = sl->length;
 		if (DBG)
 			printk("simscsi_sg_%s @ %lx (off %lx) use_sg=%d len=%d\n",
 			       mode == SSC_READ ? "read":"write", req.addr, offset,
-			       list_len, sl->length);
+			       scsi_sg_count(sc) - i, sl->length);
 		ia64_ssc(stat.fd, 1, __pa(&req), offset, mode);
 		ia64_ssc(__pa(&stat), 0, 0, 0, SSC_WAIT_COMPLETION);
 
@@ -173,8 +146,6 @@ simscsi_sg_readwrite (struct scsi_cmnd *sc, int mode, unsigned long offset)
 			return;
 		}
 		offset +=  sl->length;
-		sl++;
-		list_len--;
 	}
 	sc->result = GOOD;
 }
@@ -190,10 +161,7 @@ simscsi_readwrite6 (struct scsi_cmnd *sc, int mode)
 	unsigned long offset;
 
 	offset = (((sc->cmnd[1] & 0x1f) << 16) | (sc->cmnd[2] << 8) | sc->cmnd[3])*512;
-	if (sc->use_sg > 0)
-		simscsi_sg_readwrite(sc, mode, offset);
-	else
-		simscsi_readwrite(sc, mode, offset, sc->cmnd[4]*512);
+	simscsi_sg_readwrite(sc, mode, offset);
 }
 
 static size_t
@@ -230,31 +198,11 @@ simscsi_readwrite10 (struct scsi_cmnd *sc, int mode)
 		| ((unsigned long)sc->cmnd[3] << 16)
 		| ((unsigned long)sc->cmnd[4] <<  8) 
 		| ((unsigned long)sc->cmnd[5] <<  0))*512UL;
-	if (sc->use_sg > 0)
-		simscsi_sg_readwrite(sc, mode, offset);
-	else
-		simscsi_readwrite(sc, mode, offset, ((sc->cmnd[7] << 8) | sc->cmnd[8])*512);
-}
-
-static void simscsi_fillresult(struct scsi_cmnd *sc, char *buf, unsigned len)
-{
-
-	int scatterlen = sc->use_sg;
-	struct scatterlist *slp;
-
-	if (scatterlen == 0)
-		memcpy(sc->request_buffer, buf, len);
-	else for (slp = (struct scatterlist *)sc->buffer; scatterlen-- > 0 && len > 0; slp++) {
-		unsigned thislen = min(len, slp->length);
-
-		memcpy(page_address(slp->page) + slp->offset, buf, thislen);
-		slp++;
-		len -= thislen;
-	}
+	simscsi_sg_readwrite(sc, mode, offset);
 }
 
 static int
-simscsi_queuecommand (struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
+simscsi_queuecommand_lck (struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
 {
 	unsigned int target_id = sc->device->id;
 	char fname[MAX_ROOT_LEN+16];
@@ -274,7 +222,7 @@ simscsi_queuecommand (struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
 	if (target_id <= 15 && sc->device->lun == 0) {
 		switch (sc->cmnd[0]) {
 		      case INQUIRY:
-			if (sc->request_bufflen < 35) {
+			if (scsi_bufflen(sc) < 35) {
 				break;
 			}
 			sprintf (fname, "%s%c", simscsi_root, 'a' + target_id);
@@ -294,7 +242,7 @@ simscsi_queuecommand (struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
 			buf[6] = 0;	/* reserved */
 			buf[7] = 0;	/* various flags */
 			memcpy(buf + 8, "HP      SIMULATED DISK  0.00",  28);
-			simscsi_fillresult(sc, buf, 36);
+			scsi_sg_copy_from_buffer(sc, buf, 36);
 			sc->result = GOOD;
 			break;
 
@@ -327,7 +275,7 @@ simscsi_queuecommand (struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
 			break;
 
 		      case READ_CAPACITY:
-			if (desc[target_id] < 0 || sc->request_bufflen < 8) {
+			if (desc[target_id] < 0 || scsi_bufflen(sc) < 8) {
 				break;
 			}
 			buf = localbuf;
@@ -342,14 +290,15 @@ simscsi_queuecommand (struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
 			buf[5] = 0;
 			buf[6] = 2;
 			buf[7] = 0;
-			simscsi_fillresult(sc, buf, 8);
+			scsi_sg_copy_from_buffer(sc, buf, 8);
 			sc->result = GOOD;
 			break;
 
 		      case MODE_SENSE:
 		      case MODE_SENSE_10:
 			/* sd.c uses this to determine whether disk does write-caching. */
-			simscsi_fillresult(sc, (char *)empty_zero_page, sc->request_bufflen);
+			scsi_sg_copy_from_buffer(sc, (char *)empty_zero_page,
+						 PAGE_SIZE);
 			sc->result = GOOD;
 			break;
 
@@ -377,6 +326,8 @@ simscsi_queuecommand (struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
 	return 0;
 }
 
+static DEF_SCSI_QCMD(simscsi_queuecommand)
+
 static int
 simscsi_host_reset (struct scsi_cmnd *sc)
 {
@@ -408,8 +359,13 @@ simscsi_init(void)
 		return -ENOMEM;
 
 	error = scsi_add_host(host, NULL);
-	if (!error)
-		scsi_scan_host(host);
+	if (error)
+		goto free_host;
+	scsi_scan_host(host);
+	return 0;
+
+ free_host:
+	scsi_host_put(host);
 	return error;
 }
 
diff --git a/arch/ia64/hp/sim/simserial.c b/arch/ia64/hp/sim/simserial.c
index 0e5c6ae5022..e70cadec7ce 100644
--- a/arch/ia64/hp/sim/simserial.c
+++ b/arch/ia64/hp/sim/simserial.c
@@ -4,19 +4,13 @@
  * This driver is mostly used for bringup purposes and will go away.
  * It has a strong dependency on the system console. All outputs
  * are rerouted to the same facility as the one used by printk which, in our
- * case means sys_sim.c console (goes via the simulator). The code hereafter
- * is completely leveraged from the serial.c driver.
+ * case means sys_sim.c console (goes via the simulator).
  *
  * Copyright (C) 1999-2000, 2002-2003 Hewlett-Packard Co
  *	Stephane Eranian <eranian@hpl.hp.com>
  *	David Mosberger-Tang <davidm@hpl.hp.com>
- *
- * 02/04/00 D. Mosberger	Merged in serial.c bug fixes in rs_close().
- * 02/25/00 D. Mosberger	Synced up with 2.3.99pre-5 version of serial.c.
- * 07/30/02 D. Mosberger	Replace sti()/cli() with explicit spinlocks & local irq masking
  */
 
-#include <linux/config.h>
 #include <linux/init.h>
 #include <linux/errno.h>
 #include <linux/sched.h>
@@ -25,21 +19,20 @@
 #include <linux/major.h>
 #include <linux/fcntl.h>
 #include <linux/mm.h>
+#include <linux/seq_file.h>
 #include <linux/slab.h>
 #include <linux/capability.h>
+#include <linux/circ_buf.h>
 #include <linux/console.h>
+#include <linux/irq.h>
 #include <linux/module.h>
 #include <linux/serial.h>
-#include <linux/serialP.h>
 #include <linux/sysrq.h>
+#include <linux/uaccess.h>
 
-#include <asm/irq.h>
-#include <asm/hw_irq.h>
-#include <asm/uaccess.h>
+#include <asm/hpsim.h>
 
-#ifdef CONFIG_KDB
-# include <linux/kdb.h>
-#endif
+#include "hpsim_ssc.h"
 
 #undef SIMSERIAL_DEBUG	/* define this to get some debug information */
 
@@ -47,148 +40,62 @@
 
 #define NR_PORTS	1	/* only one port for now */
 
-#define IRQ_T(info) ((info->flags & ASYNC_SHARE_IRQ) ? SA_SHIRQ : SA_INTERRUPT)
-
-#define SSC_GETCHAR	21
-
-extern long ia64_ssc (long, long, long, long, int);
-extern void ia64_ssc_connect_irq (long intr, long irq);
-
-static char *serial_name = "SimSerial driver";
-static char *serial_version = "0.6";
-
-/*
- * This has been extracted from asm/serial.h. We need one eventually but
- * I don't know exactly what we're going to put in it so just fake one
- * for now.
- */
-#define BASE_BAUD ( 1843200 / 16 )
-
-#define STD_COM_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST)
-
-/*
- * Most of the values here are meaningless to this particular driver.
- * However some values must be preserved for the code (leveraged from serial.c
- * to work correctly).
- * port must not be 0
- * type must not be UNKNOWN
- * So I picked arbitrary (guess from where?) values instead
- */
-static struct serial_state rs_table[NR_PORTS]={
-  /* UART CLK   PORT IRQ     FLAGS        */
-  { 0, BASE_BAUD, 0x3F8, 0, STD_COM_FLAGS,0,PORT_16550 }  /* ttyS0 */
+struct serial_state {
+	struct tty_port port;
+	struct circ_buf xmit;
+	int irq;
+	int x_char;
 };
 
-/*
- * Just for the fun of it !
- */
-static struct serial_uart_config uart_config[] = {
-	{ "unknown", 1, 0 },
-	{ "8250", 1, 0 },
-	{ "16450", 1, 0 },
-	{ "16550", 1, 0 },
-	{ "16550A", 16, UART_CLEAR_FIFO | UART_USE_FIFO },
-	{ "cirrus", 1, 0 },
-	{ "ST16650", 1, UART_CLEAR_FIFO | UART_STARTECH },
-	{ "ST16650V2", 32, UART_CLEAR_FIFO | UART_USE_FIFO |
-		  UART_STARTECH },
-	{ "TI16750", 64, UART_CLEAR_FIFO | UART_USE_FIFO},
-	{ 0, 0}
-};
+static struct serial_state rs_table[NR_PORTS];
 
 struct tty_driver *hp_simserial_driver;
 
-static struct async_struct *IRQ_ports[NR_IRQS];
-
 static struct console *console;
 
-static unsigned char *tmp_buf;
-
-extern struct console *console_drivers; /* from kernel/printk.c */
-
-/*
- * ------------------------------------------------------------
- * rs_stop() and rs_start()
- *
- * This routines are called before setting or resetting tty->stopped.
- * They enable or disable transmitter interrupts, as necessary.
- * ------------------------------------------------------------
- */
-static void rs_stop(struct tty_struct *tty)
-{
-#ifdef SIMSERIAL_DEBUG
-	printk("rs_stop: tty->stopped=%d tty->hw_stopped=%d tty->flow_stopped=%d\n",
-		tty->stopped, tty->hw_stopped, tty->flow_stopped);
-#endif
-
-}
-
-static void rs_start(struct tty_struct *tty)
-{
-#ifdef SIMSERIAL_DEBUG
-	printk("rs_start: tty->stopped=%d tty->hw_stopped=%d tty->flow_stopped=%d\n",
-		tty->stopped, tty->hw_stopped, tty->flow_stopped);
-#endif
-}
-
-static  void receive_chars(struct tty_struct *tty, struct pt_regs *regs)
+static void receive_chars(struct tty_port *port)
 {
 	unsigned char ch;
 	static unsigned char seen_esc = 0;
 
 	while ( (ch = ia64_ssc(0, 0, 0, 0, SSC_GETCHAR)) ) {
-		if ( ch == 27 && seen_esc == 0 ) {
+		if (ch == 27 && seen_esc == 0) {
 			seen_esc = 1;
 			continue;
-		} else {
-			if ( seen_esc==1 && ch == 'O' ) {
-				seen_esc = 2;
-				continue;
-			} else if ( seen_esc == 2 ) {
-				if ( ch == 'P' ) /* F1 */
-					show_state();
+		} else if (seen_esc == 1 && ch == 'O') {
+			seen_esc = 2;
+			continue;
+		} else if (seen_esc == 2) {
+			if (ch == 'P') /* F1 */
+				show_state();
 #ifdef CONFIG_MAGIC_SYSRQ
-				if ( ch == 'S' ) { /* F4 */
-					do
-						ch = ia64_ssc(0, 0, 0, 0,
-							      SSC_GETCHAR);
-					while (!ch);
-					handle_sysrq(ch, regs, NULL);
-				}
-#endif
-				seen_esc = 0;
-				continue;
+			if (ch == 'S') { /* F4 */
+				do {
+					ch = ia64_ssc(0, 0, 0, 0, SSC_GETCHAR);
+				} while (!ch);
+				handle_sysrq(ch);
 			}
+#endif
+			seen_esc = 0;
+			continue;
 		}
 		seen_esc = 0;
 
-		if (tty_insert_flip_char(tty, ch, TTY_NORMAL) == 0)
+		if (tty_insert_flip_char(port, ch, TTY_NORMAL) == 0)
 			break;
 	}
-	tty_flip_buffer_push(tty);
+	tty_flip_buffer_push(port);
 }
 
 /*
  * This is the serial driver's interrupt routine for a single port
  */
-static irqreturn_t rs_interrupt_single(int irq, void *dev_id, struct pt_regs * regs)
+static irqreturn_t rs_interrupt_single(int irq, void *dev_id)
 {
-	struct async_struct * info;
+	struct serial_state *info = dev_id;
+
+	receive_chars(&info->port);
 
-	/*
-	 * I don't know exactly why they don't use the dev_id opaque data
-	 * pointer instead of this extra lookup table
-	 */
-	info = IRQ_ports[irq];
-	if (!info || !info->tty) {
-		printk(KERN_INFO "simrs_interrupt_single: info|tty=0 info=%p problem\n", info);
-		return IRQ_NONE;
-	}
-	/*
-	 * pretty simple in our case, because we only get interrupts
-	 * on inbound traffic
-	 */
-	receive_chars(info->tty, regs);
 	return IRQ_HANDLED;
 }
 
@@ -198,46 +105,31 @@ static irqreturn_t rs_interrupt_single(int irq, void *dev_id, struct pt_regs * r
  * -------------------------------------------------------------------
  */
 
-#if 0
-/*
- * not really used in our situation so keep them commented out for now
- */
-static DECLARE_TASK_QUEUE(tq_serial); /* used to be at the top of the file */
-static void do_serial_bh(void)
-{
-	run_task_queue(&tq_serial);
-	printk(KERN_ERR "do_serial_bh: called\n");
-}
-#endif
-
-static void do_softint(void *private_)
-{
-	printk(KERN_ERR "simserial: do_softint called\n");
-}
-
-static void rs_put_char(struct tty_struct *tty, unsigned char ch)
+static int rs_put_char(struct tty_struct *tty, unsigned char ch)
 {
-	struct async_struct *info = (struct async_struct *)tty->driver_data;
+	struct serial_state *info = tty->driver_data;
 	unsigned long flags;
 
-	if (!tty || !info->xmit.buf) return;
+	if (!info->xmit.buf)
+		return 0;
 
 	local_irq_save(flags);
 	if (CIRC_SPACE(info->xmit.head, info->xmit.tail, SERIAL_XMIT_SIZE) == 0) {
 		local_irq_restore(flags);
-		return;
+		return 0;
 	}
 	info->xmit.buf[info->xmit.head] = ch;
 	info->xmit.head = (info->xmit.head + 1) & (SERIAL_XMIT_SIZE-1);
 	local_irq_restore(flags);
+	return 1;
 }
 
-static void transmit_chars(struct async_struct *info, int *intr_done)
+static void transmit_chars(struct tty_struct *tty, struct serial_state *info,
+		int *intr_done)
 {
 	int count;
 	unsigned long flags;
 
-
 	local_irq_save(flags);
 
 	if (info->x_char) {
@@ -245,16 +137,15 @@ static void transmit_chars(struct async_struct *info, int *intr_done)
 
 		console->write(console, &c, 1);
 
-		info->state->icount.tx++;
 		info->x_char = 0;
 
 		goto out;
 	}
 
-	if (info->xmit.head == info->xmit.tail || info->tty->stopped || info->tty->hw_stopped) {
+	if (info->xmit.head == info->xmit.tail || tty->stopped) {
 #ifdef SIMSERIAL_DEBUG
 		printk("transmit_chars: head=%d, tail=%d, stopped=%d\n",
-		       info->xmit.head, info->xmit.tail, info->tty->stopped);
+		       info->xmit.head, info->xmit.tail, tty->stopped);
 #endif
 		goto out;
 	}
@@ -286,24 +177,24 @@ out:
 
 static void rs_flush_chars(struct tty_struct *tty)
 {
-	struct async_struct *info = (struct async_struct *)tty->driver_data;
+	struct serial_state *info = tty->driver_data;
 
-	if (info->xmit.head == info->xmit.tail || tty->stopped || tty->hw_stopped ||
-	    !info->xmit.buf)
+	if (info->xmit.head == info->xmit.tail || tty->stopped ||
+			!info->xmit.buf)
 		return;
 
-	transmit_chars(info, NULL);
+	transmit_chars(tty, info, NULL);
 }
 
-
 static int rs_write(struct tty_struct * tty,
 		    const unsigned char *buf, int count)
 {
+	struct serial_state *info = tty->driver_data;
 	int	c, ret = 0;
-	struct async_struct *info = (struct async_struct *)tty->driver_data;
 	unsigned long flags;
 
-	if (!tty || !info->xmit.buf || !tmp_buf) return 0;
+	if (!info->xmit.buf)
+		return 0;
 
 	local_irq_save(flags);
 	while (1) {
@@ -324,41 +215,37 @@ static int rs_write(struct tty_struct * tty,
 	/*
 	 * Hey, we transmit directly from here in our case
 	 */
-	if (CIRC_CNT(info->xmit.head, info->xmit.tail, SERIAL_XMIT_SIZE)
-	    && !tty->stopped && !tty->hw_stopped) {
-		transmit_chars(info, NULL);
-	}
+	if (CIRC_CNT(info->xmit.head, info->xmit.tail, SERIAL_XMIT_SIZE) &&
+			!tty->stopped)
+		transmit_chars(tty, info, NULL);
+
 	return ret;
 }
 
 static int rs_write_room(struct tty_struct *tty)
 {
-	struct async_struct *info = (struct async_struct *)tty->driver_data;
+	struct serial_state *info = tty->driver_data;
 
 	return CIRC_SPACE(info->xmit.head, info->xmit.tail, SERIAL_XMIT_SIZE);
 }
 
 static int rs_chars_in_buffer(struct tty_struct *tty)
 {
-	struct async_struct *info = (struct async_struct *)tty->driver_data;
+	struct serial_state *info = tty->driver_data;
 
 	return CIRC_CNT(info->xmit.head, info->xmit.tail, SERIAL_XMIT_SIZE);
 }
 
 static void rs_flush_buffer(struct tty_struct *tty)
 {
-	struct async_struct *info = (struct async_struct *)tty->driver_data;
+	struct serial_state *info = tty->driver_data;
 	unsigned long flags;
 
 	local_irq_save(flags);
 	info->xmit.head = info->xmit.tail = 0;
 	local_irq_restore(flags);
 
-	wake_up_interruptible(&tty->write_wait);
-
-	if ((tty->flags & (1 << TTY_DO_WRITE_WAKEUP)) &&
-	    tty->ldisc.write_wakeup)
-		(tty->ldisc.write_wakeup)(tty);
+	tty_wakeup(tty);
 }
 
 /*
@@ -367,7 +254,7 @@ static void rs_flush_buffer(struct tty_struct *tty)
  */
 static void rs_send_xchar(struct tty_struct *tty, char ch)
 {
-	struct async_struct *info = (struct async_struct *)tty->driver_data;
+	struct serial_state *info = tty->driver_data;
 
 	info->x_char = ch;
 	if (ch) {
@@ -375,7 +262,7 @@ static void rs_send_xchar(struct tty_struct *tty, char ch)
 		 * I guess we could call console->write() directly but
 		 * let's do that for now.
 		 */
-		transmit_chars(info, NULL);
+		transmit_chars(tty, info, NULL);
 	}
 }
 
@@ -389,14 +276,15 @@ static void rs_send_xchar(struct tty_struct *tty, char ch)
  */
 static void rs_throttle(struct tty_struct * tty)
 {
-	if (I_IXOFF(tty)) rs_send_xchar(tty, STOP_CHAR(tty));
+	if (I_IXOFF(tty))
+		rs_send_xchar(tty, STOP_CHAR(tty));
 
 	printk(KERN_INFO "simrs_throttle called\n");
 }
 
 static void rs_unthrottle(struct tty_struct * tty)
 {
-	struct async_struct *info = (struct async_struct *)tty->driver_data;
+	struct serial_state *info = tty->driver_data;
 
 	if (I_IXOFF(tty)) {
 		if (info->x_char)
@@ -407,317 +295,77 @@ static void rs_unthrottle(struct tty_struct * tty)
 	printk(KERN_INFO "simrs_unthrottle called\n");
 }
 
-/*
- * rs_break() --- routine which turns the break handling on or off
- */
-static void rs_break(struct tty_struct *tty, int break_state)
-{
-}
-
-static int rs_ioctl(struct tty_struct *tty, struct file * file,
-		    unsigned int cmd, unsigned long arg)
+static int rs_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg)
 {
 	if ((cmd != TIOCGSERIAL) && (cmd != TIOCSSERIAL) &&
 	    (cmd != TIOCSERCONFIG) && (cmd != TIOCSERGSTRUCT) &&
-	    (cmd != TIOCMIWAIT) && (cmd != TIOCGICOUNT)) {
+	    (cmd != TIOCMIWAIT)) {
 		if (tty->flags & (1 << TTY_IO_ERROR))
 		    return -EIO;
 	}
 
 	switch (cmd) {
-		case TIOCMGET:
-			printk(KERN_INFO "rs_ioctl: TIOCMGET called\n");
-			return -EINVAL;
-		case TIOCMBIS:
-		case TIOCMBIC:
-		case TIOCMSET:
-			printk(KERN_INFO "rs_ioctl: TIOCMBIS/BIC/SET called\n");
-			return -EINVAL;
-		case TIOCGSERIAL:
-			printk(KERN_INFO "simrs_ioctl TIOCGSERIAL called\n");
-			return 0;
-		case TIOCSSERIAL:
-			printk(KERN_INFO "simrs_ioctl TIOCSSERIAL called\n");
-			return 0;
-		case TIOCSERCONFIG:
-			printk(KERN_INFO "rs_ioctl: TIOCSERCONFIG called\n");
-			return -EINVAL;
-
-		case TIOCSERGETLSR: /* Get line status register */
-			printk(KERN_INFO "rs_ioctl: TIOCSERGETLSR called\n");
-			return  -EINVAL;
-
-		case TIOCSERGSTRUCT:
-			printk(KERN_INFO "rs_ioctl: TIOCSERGSTRUCT called\n");
-#if 0
-			if (copy_to_user((struct async_struct *) arg,
-					 info, sizeof(struct async_struct)))
-				return -EFAULT;
-#endif
-			return 0;
-
-		/*
-		 * Wait for any of the 4 modem inputs (DCD,RI,DSR,CTS) to change
-		 * - mask passed in arg for lines of interest
-		 *   (use |'ed TIOCM_RNG/DSR/CD/CTS for masking)
-		 * Caller should use TIOCGICOUNT to see which one it was
-		 */
-		case TIOCMIWAIT:
-			printk(KERN_INFO "rs_ioctl: TIOCMIWAIT: called\n");
-			return 0;
-		/*
-		 * Get counter of input serial line interrupts (DCD,RI,DSR,CTS)
-		 * Return: write counters to the user passed counter struct
-		 * NB: both 1->0 and 0->1 transitions are counted except for
-		 *     RI where only 0->1 is counted.
-		 */
-		case TIOCGICOUNT:
-			printk(KERN_INFO "rs_ioctl: TIOCGICOUNT called\n");
-			return 0;
-
-		case TIOCSERGWILD:
-		case TIOCSERSWILD:
-			/* "setserial -W" is called in Debian boot */
-			printk (KERN_INFO "TIOCSER?WILD ioctl obsolete, ignored.\n");
-			return 0;
-
-		default:
-			return -ENOIOCTLCMD;
-		}
-	return 0;
+	case TIOCGSERIAL:
+	case TIOCSSERIAL:
+	case TIOCSERGSTRUCT:
+	case TIOCMIWAIT:
+		return 0;
+	case TIOCSERCONFIG:
+	case TIOCSERGETLSR: /* Get line status register */
+		return -EINVAL;
+	case TIOCSERGWILD:
+	case TIOCSERSWILD:
+		/* "setserial -W" is called in Debian boot */
+		printk (KERN_INFO "TIOCSER?WILD ioctl obsolete, ignored.\n");
+		return 0;
+	}
+	return -ENOIOCTLCMD;
 }
 
 #define RELEVANT_IFLAG(iflag) (iflag & (IGNBRK|BRKINT|IGNPAR|PARMRK|INPCK))
 
-static void rs_set_termios(struct tty_struct *tty, struct termios *old_termios)
-{
-	unsigned int cflag = tty->termios->c_cflag;
-
-	if (   (cflag == old_termios->c_cflag)
-	    && (   RELEVANT_IFLAG(tty->termios->c_iflag)
-		== RELEVANT_IFLAG(old_termios->c_iflag)))
-	  return;
-
-
-	/* Handle turning off CRTSCTS */
-	if ((old_termios->c_cflag & CRTSCTS) &&
-	    !(tty->termios->c_cflag & CRTSCTS)) {
-		tty->hw_stopped = 0;
-		rs_start(tty);
-	}
-}
 /*
  * This routine will shutdown a serial port; interrupts are disabled, and
  * DTR is dropped if the hangup on close termio flag is on.
  */
-static void shutdown(struct async_struct * info)
+static void shutdown(struct tty_port *port)
 {
-	unsigned long	flags;
-	struct serial_state *state;
-	int		retval;
-
-	if (!(info->flags & ASYNC_INITIALIZED)) return;
-
-	state = info->state;
-
-#ifdef SIMSERIAL_DEBUG
-	printk("Shutting down serial port %d (irq %d)....", info->line,
-	       state->irq);
-#endif
+	struct serial_state *info = container_of(port, struct serial_state,
+			port);
+	unsigned long flags;
 
 	local_irq_save(flags);
-	{
-		/*
-		 * First unlink the serial port from the IRQ chain...
-		 */
-		if (info->next_port)
-			info->next_port->prev_port = info->prev_port;
-		if (info->prev_port)
-			info->prev_port->next_port = info->next_port;
-		else
-			IRQ_ports[state->irq] = info->next_port;
-
-		/*
-		 * Free the IRQ, if necessary
-		 */
-		if (state->irq && (!IRQ_ports[state->irq] ||
-				   !IRQ_ports[state->irq]->next_port)) {
-			if (IRQ_ports[state->irq]) {
-				free_irq(state->irq, NULL);
-				retval = request_irq(state->irq, rs_interrupt_single,
-						     IRQ_T(info), "serial", NULL);
-
-				if (retval)
-					printk(KERN_ERR "serial shutdown: request_irq: error %d"
-					       "  Couldn't reacquire IRQ.\n", retval);
-			} else
-				free_irq(state->irq, NULL);
-		}
-
-		if (info->xmit.buf) {
-			free_page((unsigned long) info->xmit.buf);
-			info->xmit.buf = 0;
-		}
-
-		if (info->tty) set_bit(TTY_IO_ERROR, &info->tty->flags);
+	if (info->irq)
+		free_irq(info->irq, info);
 
-		info->flags &= ~ASYNC_INITIALIZED;
+	if (info->xmit.buf) {
+		free_page((unsigned long) info->xmit.buf);
+		info->xmit.buf = NULL;
 	}
 	local_irq_restore(flags);
 }
 
-/*
- * ------------------------------------------------------------
- * rs_close()
- *
- * This routine is called when the serial port gets closed.  First, we
- * wait for the last remaining data to be sent.  Then, we unlink its
- * async structure from the interrupt chain if necessary, and we free
- * that IRQ if nothing is left in the chain.
- * ------------------------------------------------------------
- */
 static void rs_close(struct tty_struct *tty, struct file * filp)
 {
-	struct async_struct * info = (struct async_struct *)tty->driver_data;
-	struct serial_state *state;
-	unsigned long flags;
+	struct serial_state *info = tty->driver_data;
 
-	if (!info ) return;
-
-	state = info->state;
-
-	local_irq_save(flags);
-	if (tty_hung_up_p(filp)) {
-#ifdef SIMSERIAL_DEBUG
-		printk("rs_close: hung_up\n");
-#endif
-		local_irq_restore(flags);
-		return;
-	}
-#ifdef SIMSERIAL_DEBUG
-	printk("rs_close ttys%d, count = %d\n", info->line, state->count);
-#endif
-	if ((tty->count == 1) && (state->count != 1)) {
-		/*
-		 * Uh, oh.  tty->count is 1, which means that the tty
-		 * structure will be freed.  state->count should always
-		 * be one in these conditions.  If it's greater than
-		 * one, we've got real problems, since it means the
-		 * serial port won't be shutdown.
-		 */
-		printk(KERN_ERR "rs_close: bad serial port count; tty->count is 1, "
-		       "state->count is %d\n", state->count);
-		state->count = 1;
-	}
-	if (--state->count < 0) {
-		printk(KERN_ERR "rs_close: bad serial port count for ttys%d: %d\n",
-		       info->line, state->count);
-		state->count = 0;
-	}
-	if (state->count) {
-		local_irq_restore(flags);
-		return;
-	}
-	info->flags |= ASYNC_CLOSING;
-	local_irq_restore(flags);
-
-	/*
-	 * Now we wait for the transmit buffer to clear; and we notify
-	 * the line discipline to only process XON/XOFF characters.
-	 */
-	shutdown(info);
-	if (tty->driver->flush_buffer) tty->driver->flush_buffer(tty);
-	if (tty->ldisc.flush_buffer) tty->ldisc.flush_buffer(tty);
-	info->event = 0;
-	info->tty = 0;
-	if (info->blocked_open) {
-		if (info->close_delay)
-			schedule_timeout_interruptible(info->close_delay);
-		wake_up_interruptible(&info->open_wait);
-	}
-	info->flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING);
-	wake_up_interruptible(&info->close_wait);
+	tty_port_close(&info->port, tty, filp);
 }
 
-/*
- * rs_wait_until_sent() --- wait until the transmitter is empty
- */
-static void rs_wait_until_sent(struct tty_struct *tty, int timeout)
-{
-}
-
-
-/*
- * rs_hangup() --- called by tty_hangup() when a hangup is signaled.
- */
 static void rs_hangup(struct tty_struct *tty)
 {
-	struct async_struct * info = (struct async_struct *)tty->driver_data;
-	struct serial_state *state = info->state;
-
-#ifdef SIMSERIAL_DEBUG
-	printk("rs_hangup: called\n");
-#endif
-
-	state = info->state;
+	struct serial_state *info = tty->driver_data;
 
 	rs_flush_buffer(tty);
-	if (info->flags & ASYNC_CLOSING)
-		return;
-	shutdown(info);
-
-	info->event = 0;
-	state->count = 0;
-	info->flags &= ~ASYNC_NORMAL_ACTIVE;
-	info->tty = 0;
-	wake_up_interruptible(&info->open_wait);
+	tty_port_hangup(&info->port);
 }
 
-
-static int get_async_struct(int line, struct async_struct **ret_info)
-{
-	struct async_struct *info;
-	struct serial_state *sstate;
-
-	sstate = rs_table + line;
-	sstate->count++;
-	if (sstate->info) {
-		*ret_info = sstate->info;
-		return 0;
-	}
-	info = kmalloc(sizeof(struct async_struct), GFP_KERNEL);
-	if (!info) {
-		sstate->count--;
-		return -ENOMEM;
-	}
-	memset(info, 0, sizeof(struct async_struct));
-	init_waitqueue_head(&info->open_wait);
-	init_waitqueue_head(&info->close_wait);
-	init_waitqueue_head(&info->delta_msr_wait);
-	info->magic = SERIAL_MAGIC;
-	info->port = sstate->port;
-	info->flags = sstate->flags;
-	info->xmit_fifo_size = sstate->xmit_fifo_size;
-	info->line = line;
-	INIT_WORK(&info->work, do_softint, info);
-	info->state = sstate;
-	if (sstate->info) {
-		kfree(info);
-		*ret_info = sstate->info;
-		return 0;
-	}
-	*ret_info = sstate->info = info;
-	return 0;
-}
-
-static int
-startup(struct async_struct *info)
+static int activate(struct tty_port *port, struct tty_struct *tty)
 {
-	unsigned long flags;
-	int	retval=0;
-	irqreturn_t (*handler)(int, void *, struct pt_regs *);
-	struct serial_state *state= info->state;
-	unsigned long page;
+	struct serial_state *state = container_of(port, struct serial_state,
+			port);
+	unsigned long flags, page;
+	int retval = 0;
 
 	page = get_zeroed_page(GFP_KERNEL);
 	if (!page)
@@ -725,86 +373,31 @@ startup(struct async_struct *info)
 
 	local_irq_save(flags);
 
-	if (info->flags & ASYNC_INITIALIZED) {
-		free_page(page);
-		goto errout;
-	}
-
-	if (!state->port || !state->type) {
-		if (info->tty) set_bit(TTY_IO_ERROR, &info->tty->flags);
-		free_page(page);
-		goto errout;
-	}
-	if (info->xmit.buf)
+	if (state->xmit.buf)
 		free_page(page);
 	else
-		info->xmit.buf = (unsigned char *) page;
-
-#ifdef SIMSERIAL_DEBUG
-	printk("startup: ttys%d (irq %d)...", info->line, state->irq);
-#endif
+		state->xmit.buf = (unsigned char *) page;
 
-	/*
-	 * Allocate the IRQ if necessary
-	 */
-	if (state->irq && (!IRQ_ports[state->irq] ||
-			  !IRQ_ports[state->irq]->next_port)) {
-		if (IRQ_ports[state->irq]) {
-			retval = -EBUSY;
-			goto errout;
-		} else
-			handler = rs_interrupt_single;
-
-		retval = request_irq(state->irq, handler, IRQ_T(info), "simserial", NULL);
-		if (retval) {
-			if (capable(CAP_SYS_ADMIN)) {
-				if (info->tty)
-					set_bit(TTY_IO_ERROR,
-						&info->tty->flags);
-				retval = 0;
-			}
+	if (state->irq) {
+		retval = request_irq(state->irq, rs_interrupt_single, 0,
+				"simserial", state);
+		if (retval)
 			goto errout;
-		}
 	}
 
-	/*
-	 * Insert serial port into IRQ chain.
-	 */
-	info->prev_port = 0;
-	info->next_port = IRQ_ports[state->irq];
-	if (info->next_port)
-		info->next_port->prev_port = info;
-	IRQ_ports[state->irq] = info;
-
-	if (info->tty) clear_bit(TTY_IO_ERROR, &info->tty->flags);
-
-	info->xmit.head = info->xmit.tail = 0;
-
-#if 0
-	/*
-	 * Set up serial timers...
-	 */
-	timer_table[RS_TIMER].expires = jiffies + 2*HZ/100;
-	timer_active |= 1 << RS_TIMER;
-#endif
+	state->xmit.head = state->xmit.tail = 0;
 
 	/*
 	 * Set up the tty->alt_speed kludge
 	 */
-	if (info->tty) {
-		if ((info->flags & ASYNC_SPD_MASK) == ASYNC_SPD_HI)
-			info->tty->alt_speed = 57600;
-		if ((info->flags & ASYNC_SPD_MASK) == ASYNC_SPD_VHI)
-			info->tty->alt_speed = 115200;
-		if ((info->flags & ASYNC_SPD_MASK) == ASYNC_SPD_SHI)
-			info->tty->alt_speed = 230400;
-		if ((info->flags & ASYNC_SPD_MASK) == ASYNC_SPD_WARP)
-			info->tty->alt_speed = 460800;
-	}
-
-	info->flags |= ASYNC_INITIALIZED;
-	local_irq_restore(flags);
-	return 0;
+	if ((port->flags & ASYNC_SPD_MASK) == ASYNC_SPD_HI)
+		tty->alt_speed = 57600;
+	if ((port->flags & ASYNC_SPD_MASK) == ASYNC_SPD_VHI)
+		tty->alt_speed = 115200;
+	if ((port->flags & ASYNC_SPD_MASK) == ASYNC_SPD_SHI)
+		tty->alt_speed = 230400;
+	if ((port->flags & ASYNC_SPD_MASK) == ASYNC_SPD_WARP)
+		tty->alt_speed = 460800;
 
 errout:
 	local_irq_restore(flags);
@@ -820,56 +413,11 @@ errout:
  */
 static int rs_open(struct tty_struct *tty, struct file * filp)
 {
-	struct async_struct	*info;
-	int			retval, line;
-	unsigned long		page;
+	struct serial_state *info = rs_table + tty->index;
+	struct tty_port *port = &info->port;
 
-	line = tty->index;
-	if ((line < 0) || (line >= NR_PORTS))
-		return -ENODEV;
-	retval = get_async_struct(line, &info);
-	if (retval)
-		return retval;
 	tty->driver_data = info;
-	info->tty = tty;
-
-#ifdef SIMSERIAL_DEBUG
-	printk("rs_open %s, count = %d\n", tty->name, info->state->count);
-#endif
-	info->tty->low_latency = (info->flags & ASYNC_LOW_LATENCY) ? 1 : 0;
-
-	if (!tmp_buf) {
-		page = get_zeroed_page(GFP_KERNEL);
-		if (!page)
-			return -ENOMEM;
-		if (tmp_buf)
-			free_page(page);
-		else
-			tmp_buf = (unsigned char *) page;
-	}
-
-	/*
-	 * If the port is the middle of closing, bail out now
-	 */
-	if (tty_hung_up_p(filp) ||
-	    (info->flags & ASYNC_CLOSING)) {
-		if (info->flags & ASYNC_CLOSING)
-			interruptible_sleep_on(&info->close_wait);
-#ifdef SERIAL_DO_RESTART
-		return ((info->flags & ASYNC_HUP_NOTIFY) ?
-			-EAGAIN : -ERESTARTSYS);
-#else
-		return -EAGAIN;
-#endif
-	}
-
-	/*
-	 * Start up serial port
-	 */
-	retval = startup(info);
-	if (retval) {
-		return retval;
-	}
+	port->low_latency = (port->flags & ASYNC_LOW_LATENCY) ? 1 : 0;
 
 	/*
 	 * figure out which console to use (should be one already)
@@ -880,68 +428,38 @@ static int rs_open(struct tty_struct *tty, struct file * filp)
 		console = console->next;
 	}
 
-#ifdef SIMSERIAL_DEBUG
-	printk("rs_open ttys%d successful\n", info->line);
-#endif
-	return 0;
+	return tty_port_open(port, tty, filp);
 }
 
 /*
  * /proc fs routines....
  */
 
-static inline int line_info(char *buf, struct serial_state *state)
+static int rs_proc_show(struct seq_file *m, void *v)
 {
-	return sprintf(buf, "%d: uart:%s port:%lX irq:%d\n",
-		       state->line, uart_config[state->type].name,
-		       state->port, state->irq);
-}
+	int i;
 
-static int rs_read_proc(char *page, char **start, off_t off, int count,
-		 int *eof, void *data)
-{
-	int i, len = 0, l;
-	off_t	begin = 0;
-
-	len += sprintf(page, "simserinfo:1.0 driver:%s\n", serial_version);
-	for (i = 0; i < NR_PORTS && len < 4000; i++) {
-		l = line_info(page + len, &rs_table[i]);
-		len += l;
-		if (len+begin > off+count)
-			goto done;
-		if (len+begin < off) {
-			begin += len;
-			len = 0;
-		}
-	}
-	*eof = 1;
-done:
-	if (off >= len+begin)
-		return 0;
-	*start = page + (begin-off);
-	return ((count < begin+len-off) ? count : begin+len-off);
+	seq_printf(m, "simserinfo:1.0\n");
+	for (i = 0; i < NR_PORTS; i++)
+		seq_printf(m, "%d: uart:16550 port:3F8 irq:%d\n",
+		       i, rs_table[i].irq);
+	return 0;
 }
 
-/*
- * ---------------------------------------------------------------------
- * rs_init() and friends
- *
- * rs_init() is called at boot-time to initialize the serial driver.
- * ---------------------------------------------------------------------
- */
-
-/*
- * This routine prints out the appropriate serial driver version
- * number, and identifies which options were configured into this
- * driver.
- */
-static inline void show_serial_version(void)
+static int rs_proc_open(struct inode *inode, struct file *file)
 {
-	printk(KERN_INFO "%s version %s with", serial_name, serial_version);
-	printk(KERN_INFO " no serial options enabled\n");
+	return single_open(file, rs_proc_show, NULL);
 }
 
-static struct tty_operations hp_ops = {
+static const struct file_operations rs_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= rs_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static const struct tty_operations hp_ops = {
 	.open = rs_open,
 	.close = rs_close,
 	.write = rs_write,
@@ -954,36 +472,31 @@ static struct tty_operations hp_ops = {
 	.throttle = rs_throttle,
 	.unthrottle = rs_unthrottle,
 	.send_xchar = rs_send_xchar,
-	.set_termios = rs_set_termios,
-	.stop = rs_stop,
-	.start = rs_start,
 	.hangup = rs_hangup,
-	.break_ctl = rs_break,
-	.wait_until_sent = rs_wait_until_sent,
-	.read_proc = rs_read_proc,
+	.proc_fops = &rs_proc_fops,
 };
 
-/*
- * The serial driver boot-time initialization code!
- */
-static int __init
-simrs_init (void)
+static const struct tty_port_operations hp_port_ops = {
+	.activate = activate,
+	.shutdown = shutdown,
+};
+
+static int __init simrs_init(void)
 {
-	int			i, rc;
-	struct serial_state	*state;
+	struct serial_state *state;
+	int retval;
 
 	if (!ia64_platform_is("hpsim"))
 		return -ENODEV;
 
-	hp_simserial_driver = alloc_tty_driver(1);
+	hp_simserial_driver = alloc_tty_driver(NR_PORTS);
 	if (!hp_simserial_driver)
 		return -ENOMEM;
 
-	show_serial_version();
+	printk(KERN_INFO "SimSerial driver with no serial options enabled\n");
 
 	/* Initialize the tty_driver structure */
 
-	hp_simserial_driver->owner = THIS_MODULE;
 	hp_simserial_driver->driver_name = "simserial";
 	hp_simserial_driver->name = "ttyS";
 	hp_simserial_driver->major = TTY_MAJOR;
@@ -996,31 +509,35 @@ simrs_init (void)
 	hp_simserial_driver->flags = TTY_DRIVER_REAL_RAW;
 	tty_set_operations(hp_simserial_driver, &hp_ops);
 
-	/*
-	 * Let's have a little bit of fun !
-	 */
-	for (i = 0, state = rs_table; i < NR_PORTS; i++,state++) {
+	state = rs_table;
+	tty_port_init(&state->port);
+	state->port.ops = &hp_port_ops;
+	state->port.close_delay = 0; /* XXX really 0? */
 
-		if (state->type == PORT_UNKNOWN) continue;
+	retval = hpsim_get_irq(KEYBOARD_INTR);
+	if (retval < 0) {
+		printk(KERN_ERR "%s: out of interrupt vectors!\n",
+				__func__);
+		goto err_free_tty;
+	}
 
-		if (!state->irq) {
-			if ((rc = assign_irq_vector(AUTO_ASSIGN)) < 0)
-				panic("%s: out of interrupt vectors!\n",
-				      __FUNCTION__);
-			state->irq = rc;
-			ia64_ssc_connect_irq(KEYBOARD_INTR, state->irq);
-		}
+	state->irq = retval;
 
-		printk(KERN_INFO "ttyS%d at 0x%04lx (irq = %d) is a %s\n",
-		       state->line,
-		       state->port, state->irq,
-		       uart_config[state->type].name);
-	}
+	/* the port is imaginary */
+	printk(KERN_INFO "ttyS0 at 0x03f8 (irq = %d) is a 16550\n", state->irq);
 
-	if (tty_register_driver(hp_simserial_driver))
-		panic("Couldn't register simserial driver\n");
+	tty_port_link_device(&state->port, hp_simserial_driver, 0);
+	retval = tty_register_driver(hp_simserial_driver);
+	if (retval) {
+		printk(KERN_ERR "Couldn't register simserial driver\n");
+		goto err_free_tty;
+	}
 
 	return 0;
+err_free_tty:
+	put_tty_driver(hp_simserial_driver);
+	tty_port_destroy(&state->port);
+	return retval;
 }
 
 #ifndef MODULE
diff --git a/arch/ia64/ia32/Makefile b/arch/ia64/ia32/Makefile
deleted file mode 100644
index 61cb60affd9..00000000000
--- a/arch/ia64/ia32/Makefile
+++ /dev/null
@@ -1,10 +0,0 @@
-#
-# Makefile for the ia32 kernel emulation subsystem.
-#
-
-obj-y := ia32_entry.o sys_ia32.o ia32_signal.o \
-	 ia32_support.o ia32_traps.o binfmt_elf32.o ia32_ldt.o
-
-# Don't let GCC uses f16-f31 so that save_ia32_fpstate_live() and
-# restore_ia32_fpstate_live() can be sure the live register contain user-level state.
-CFLAGS_ia32_signal.o += -mfixed-range=f16-f31
diff --git a/arch/ia64/ia32/binfmt_elf32.c b/arch/ia64/ia32/binfmt_elf32.c
deleted file mode 100644
index da03c06744f..00000000000
--- a/arch/ia64/ia32/binfmt_elf32.c
+++ /dev/null
@@ -1,289 +0,0 @@
-/*
- * IA-32 ELF support.
- *
- * Copyright (C) 1999 Arun Sharma <arun.sharma@intel.com>
- * Copyright (C) 2001 Hewlett-Packard Co
- *	David Mosberger-Tang <davidm@hpl.hp.com>
- *
- * 06/16/00	A. Mallick	initialize csd/ssd/tssd/cflg for ia32_load_state
- * 04/13/01	D. Mosberger	dropped saving tssd in ar.k1---it's not needed
- * 09/14/01	D. Mosberger	fixed memory management for gdt/tss page
- */
-#include <linux/config.h>
-
-#include <linux/types.h>
-#include <linux/mm.h>
-#include <linux/security.h>
-
-#include <asm/param.h>
-#include <asm/signal.h>
-
-#include "ia32priv.h"
-#include "elfcore32.h"
-
-/* Override some function names */
-#undef start_thread
-#define start_thread			ia32_start_thread
-#define elf_format			elf32_format
-#define init_elf_binfmt			init_elf32_binfmt
-#define exit_elf_binfmt			exit_elf32_binfmt
-
-#undef CLOCKS_PER_SEC
-#define CLOCKS_PER_SEC	IA32_CLOCKS_PER_SEC
-
-extern void ia64_elf32_init (struct pt_regs *regs);
-
-static void elf32_set_personality (void);
-
-static unsigned long __attribute ((unused))
-randomize_stack_top(unsigned long stack_top);
-
-#define setup_arg_pages(bprm,tos,exec)		ia32_setup_arg_pages(bprm,exec)
-#define elf_map				elf32_map
-
-#undef SET_PERSONALITY
-#define SET_PERSONALITY(ex, ibcs2)	elf32_set_personality()
-
-#define elf_read_implies_exec(ex, have_pt_gnu_stack)	(!(have_pt_gnu_stack))
-
-/* Ugly but avoids duplication */
-#include "../../../fs/binfmt_elf.c"
-
-extern struct page *ia32_shared_page[];
-extern unsigned long *ia32_gdt;
-extern struct page *ia32_gate_page;
-
-struct page *
-ia32_install_shared_page (struct vm_area_struct *vma, unsigned long address, int *type)
-{
-	struct page *pg = ia32_shared_page[smp_processor_id()];
-	get_page(pg);
-	if (type)
-		*type = VM_FAULT_MINOR;
-	return pg;
-}
-
-struct page *
-ia32_install_gate_page (struct vm_area_struct *vma, unsigned long address, int *type)
-{
-	struct page *pg = ia32_gate_page;
-	get_page(pg);
-	if (type)
-		*type = VM_FAULT_MINOR;
-	return pg;
-}
-
-
-static struct vm_operations_struct ia32_shared_page_vm_ops = {
-	.nopage = ia32_install_shared_page
-};
-
-static struct vm_operations_struct ia32_gate_page_vm_ops = {
-	.nopage = ia32_install_gate_page
-};
-
-void
-ia64_elf32_init (struct pt_regs *regs)
-{
-	struct vm_area_struct *vma;
-
-	/*
-	 * Map GDT below 4GB, where the processor can find it.  We need to map
-	 * it with privilege level 3 because the IVE uses non-privileged accesses to these
-	 * tables.  IA-32 segmentation is used to protect against IA-32 accesses to them.
-	 */
-	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
-	if (vma) {
-		memset(vma, 0, sizeof(*vma));
-		vma->vm_mm = current->mm;
-		vma->vm_start = IA32_GDT_OFFSET;
-		vma->vm_end = vma->vm_start + PAGE_SIZE;
-		vma->vm_page_prot = PAGE_SHARED;
-		vma->vm_flags = VM_READ|VM_MAYREAD|VM_RESERVED;
-		vma->vm_ops = &ia32_shared_page_vm_ops;
-		down_write(&current->mm->mmap_sem);
-		{
-			if (insert_vm_struct(current->mm, vma)) {
-				kmem_cache_free(vm_area_cachep, vma);
-				up_write(&current->mm->mmap_sem);
-				BUG();
-			}
-		}
-		up_write(&current->mm->mmap_sem);
-	}
-
-	/*
-	 * When user stack is not executable, push sigreturn code to stack makes
-	 * segmentation fault raised when returning to kernel. So now sigreturn
-	 * code is locked in specific gate page, which is pointed by pretcode
-	 * when setup_frame_ia32
-	 */
-	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
-	if (vma) {
-		memset(vma, 0, sizeof(*vma));
-		vma->vm_mm = current->mm;
-		vma->vm_start = IA32_GATE_OFFSET;
-		vma->vm_end = vma->vm_start + PAGE_SIZE;
-		vma->vm_page_prot = PAGE_COPY_EXEC;
-		vma->vm_flags = VM_READ | VM_MAYREAD | VM_EXEC
-				| VM_MAYEXEC | VM_RESERVED;
-		vma->vm_ops = &ia32_gate_page_vm_ops;
-		down_write(&current->mm->mmap_sem);
-		{
-			if (insert_vm_struct(current->mm, vma)) {
-				kmem_cache_free(vm_area_cachep, vma);
-				up_write(&current->mm->mmap_sem);
-				BUG();
-			}
-		}
-		up_write(&current->mm->mmap_sem);
-	}
-
-	/*
-	 * Install LDT as anonymous memory.  This gives us all-zero segment descriptors
-	 * until a task modifies them via modify_ldt().
-	 */
-	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
-	if (vma) {
-		memset(vma, 0, sizeof(*vma));
-		vma->vm_mm = current->mm;
-		vma->vm_start = IA32_LDT_OFFSET;
-		vma->vm_end = vma->vm_start + PAGE_ALIGN(IA32_LDT_ENTRIES*IA32_LDT_ENTRY_SIZE);
-		vma->vm_page_prot = PAGE_SHARED;
-		vma->vm_flags = VM_READ|VM_WRITE|VM_MAYREAD|VM_MAYWRITE;
-		down_write(&current->mm->mmap_sem);
-		{
-			if (insert_vm_struct(current->mm, vma)) {
-				kmem_cache_free(vm_area_cachep, vma);
-				up_write(&current->mm->mmap_sem);
-				BUG();
-			}
-		}
-		up_write(&current->mm->mmap_sem);
-	}
-
-	ia64_psr(regs)->ac = 0;		/* turn off alignment checking */
-	regs->loadrs = 0;
-	/*
-	 *  According to the ABI %edx points to an `atexit' handler.  Since we don't have
-	 *  one we'll set it to 0 and initialize all the other registers just to make
-	 *  things more deterministic, ala the i386 implementation.
-	 */
-	regs->r8 = 0;	/* %eax */
-	regs->r11 = 0;	/* %ebx */
-	regs->r9 = 0;	/* %ecx */
-	regs->r10 = 0;	/* %edx */
-	regs->r13 = 0;	/* %ebp */
-	regs->r14 = 0;	/* %esi */
-	regs->r15 = 0;	/* %edi */
-
-	current->thread.eflag = IA32_EFLAG;
-	current->thread.fsr = IA32_FSR_DEFAULT;
-	current->thread.fcr = IA32_FCR_DEFAULT;
-	current->thread.fir = 0;
-	current->thread.fdr = 0;
-
-	/*
-	 * Setup GDTD.  Note: GDTD is the descrambled version of the pseudo-descriptor
-	 * format defined by Figure 3-11 "Pseudo-Descriptor Format" in the IA-32
-	 * architecture manual. Also note that the only fields that are not ignored are
-	 * `base', `limit', 'G', `P' (must be 1) and `S' (must be 0).
-	 */
-	regs->r31 = IA32_SEG_UNSCRAMBLE(IA32_SEG_DESCRIPTOR(IA32_GDT_OFFSET, IA32_PAGE_SIZE - 1,
-							    0, 0, 0, 1, 0, 0, 0));
-	/* Setup the segment selectors */
-	regs->r16 = (__USER_DS << 16) | __USER_DS; /* ES == DS, GS, FS are zero */
-	regs->r17 = (__USER_DS << 16) | __USER_CS; /* SS, CS; ia32_load_state() sets TSS and LDT */
-
-	ia32_load_segment_descriptors(current);
-	ia32_load_state(current);
-}
-
-int
-ia32_setup_arg_pages (struct linux_binprm *bprm, int executable_stack)
-{
-	unsigned long stack_base;
-	struct vm_area_struct *mpnt;
-	struct mm_struct *mm = current->mm;
-	int i, ret;
-
-	stack_base = IA32_STACK_TOP - MAX_ARG_PAGES*PAGE_SIZE;
-	mm->arg_start = bprm->p + stack_base;
-
-	bprm->p += stack_base;
-	if (bprm->loader)
-		bprm->loader += stack_base;
-	bprm->exec += stack_base;
-
-	mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
-	if (!mpnt)
-		return -ENOMEM;
-
-	memset(mpnt, 0, sizeof(*mpnt));
-
-	down_write(&current->mm->mmap_sem);
-	{
-		mpnt->vm_mm = current->mm;
-		mpnt->vm_start = PAGE_MASK & (unsigned long) bprm->p;
-		mpnt->vm_end = IA32_STACK_TOP;
-		if (executable_stack == EXSTACK_ENABLE_X)
-			mpnt->vm_flags = VM_STACK_FLAGS |  VM_EXEC;
-		else if (executable_stack == EXSTACK_DISABLE_X)
-			mpnt->vm_flags = VM_STACK_FLAGS & ~VM_EXEC;
-		else
-			mpnt->vm_flags = VM_STACK_FLAGS;
-		mpnt->vm_page_prot = (mpnt->vm_flags & VM_EXEC)?
-					PAGE_COPY_EXEC: PAGE_COPY;
-		if ((ret = insert_vm_struct(current->mm, mpnt))) {
-			up_write(&current->mm->mmap_sem);
-			kmem_cache_free(vm_area_cachep, mpnt);
-			return ret;
-		}
-		current->mm->stack_vm = current->mm->total_vm = vma_pages(mpnt);
-	}
-
-	for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
-		struct page *page = bprm->page[i];
-		if (page) {
-			bprm->page[i] = NULL;
-			install_arg_page(mpnt, page, stack_base);
-		}
-		stack_base += PAGE_SIZE;
-	}
-	up_write(&current->mm->mmap_sem);
-
-	/* Can't do it in ia64_elf32_init(). Needs to be done before calls to
-	   elf32_map() */
-	current->thread.ppl = ia32_init_pp_list();
-
-	return 0;
-}
-
-static void
-elf32_set_personality (void)
-{
-	set_personality(PER_LINUX32);
-	current->thread.map_base  = IA32_PAGE_OFFSET/3;
-}
-
-static unsigned long
-elf32_map (struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int prot, int type)
-{
-	unsigned long pgoff = (eppnt->p_vaddr) & ~IA32_PAGE_MASK;
-
-	return ia32_do_mmap(filep, (addr & IA32_PAGE_MASK), eppnt->p_filesz + pgoff, prot, type,
-			    eppnt->p_offset - pgoff);
-}
-
-#define cpu_uses_ia32el()	(local_cpu_data->family > 0x1f)
-
-static int __init check_elf32_binfmt(void)
-{
-	if (cpu_uses_ia32el()) {
-		printk("Please use IA-32 EL for executing IA-32 binaries\n");
-		return unregister_binfmt(&elf_format);
-	}
-	return 0;
-}
-
-module_init(check_elf32_binfmt)
diff --git a/arch/ia64/ia32/elfcore32.h b/arch/ia64/ia32/elfcore32.h
deleted file mode 100644
index a47f63b204f..00000000000
--- a/arch/ia64/ia32/elfcore32.h
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * IA-32 ELF core dump support.
- *
- * Copyright (C) 2003 Arun Sharma <arun.sharma@intel.com>
- *
- * Derived from the x86_64 version
- */
-#ifndef _ELFCORE32_H_
-#define _ELFCORE32_H_
-
-#include <asm/intrinsics.h>
-#include <asm/uaccess.h>
-
-#define USE_ELF_CORE_DUMP 1
-
-/* Override elfcore.h */
-#define _LINUX_ELFCORE_H 1
-typedef unsigned int elf_greg_t;
-
-#define ELF_NGREG (sizeof (struct user_regs_struct32) / sizeof(elf_greg_t))
-typedef elf_greg_t elf_gregset_t[ELF_NGREG];
-
-typedef struct ia32_user_i387_struct elf_fpregset_t;
-typedef struct ia32_user_fxsr_struct elf_fpxregset_t;
-
-struct elf_siginfo
-{
-	int	si_signo;			/* signal number */
-	int	si_code;			/* extra code */
-	int	si_errno;			/* errno */
-};
-
-#define jiffies_to_timeval(a,b) do { (b)->tv_usec = 0; (b)->tv_sec = (a)/HZ; }while(0)
-
-struct elf_prstatus
-{
-	struct elf_siginfo pr_info;	/* Info associated with signal */
-	short	pr_cursig;		/* Current signal */
-	unsigned int pr_sigpend;	/* Set of pending signals */
-	unsigned int pr_sighold;	/* Set of held signals */
-	pid_t	pr_pid;
-	pid_t	pr_ppid;
-	pid_t	pr_pgrp;
-	pid_t	pr_sid;
-	struct compat_timeval pr_utime;	/* User time */
-	struct compat_timeval pr_stime;	/* System time */
-	struct compat_timeval pr_cutime;	/* Cumulative user time */
-	struct compat_timeval pr_cstime;	/* Cumulative system time */
-	elf_gregset_t pr_reg;	/* GP registers */
-	int pr_fpvalid;		/* True if math co-processor being used.  */
-};
-
-#define ELF_PRARGSZ	(80)	/* Number of chars for args */
-
-struct elf_prpsinfo
-{
-	char	pr_state;	/* numeric process state */
-	char	pr_sname;	/* char for pr_state */
-	char	pr_zomb;	/* zombie */
-	char	pr_nice;	/* nice val */
-	unsigned int pr_flag;	/* flags */
-	__u16	pr_uid;
-	__u16	pr_gid;
-	pid_t	pr_pid, pr_ppid, pr_pgrp, pr_sid;
-	/* Lots missing */
-	char	pr_fname[16];	/* filename of executable */
-	char	pr_psargs[ELF_PRARGSZ];	/* initial part of arg list */
-};
-
-#define ELF_CORE_COPY_REGS(pr_reg, regs)       		\
-	pr_reg[0] = regs->r11;				\
-	pr_reg[1] = regs->r9;				\
-	pr_reg[2] = regs->r10;				\
-	pr_reg[3] = regs->r14;				\
-	pr_reg[4] = regs->r15;				\
-	pr_reg[5] = regs->r13;				\
-	pr_reg[6] = regs->r8;				\
-	pr_reg[7] = regs->r16 & 0xffff;			\
-	pr_reg[8] = (regs->r16 >> 16) & 0xffff;		\
-	pr_reg[9] = (regs->r16 >> 32) & 0xffff;		\
-	pr_reg[10] = (regs->r16 >> 48) & 0xffff;	\
-	pr_reg[11] = regs->r1; 				\
-	pr_reg[12] = regs->cr_iip;			\
-	pr_reg[13] = regs->r17 & 0xffff;		\
-	pr_reg[14] = ia64_getreg(_IA64_REG_AR_EFLAG);	\
-	pr_reg[15] = regs->r12;				\
-	pr_reg[16] = (regs->r17 >> 16) & 0xffff;
-
-static inline void elf_core_copy_regs(elf_gregset_t *elfregs,
-				      struct pt_regs *regs)
-{
-	ELF_CORE_COPY_REGS((*elfregs), regs)
-}
-
-static inline int elf_core_copy_task_regs(struct task_struct *t,
-					  elf_gregset_t* elfregs)
-{
-	ELF_CORE_COPY_REGS((*elfregs), task_pt_regs(t));
-	return 1;
-}
-
-static inline int
-elf_core_copy_task_fpregs(struct task_struct *tsk, struct pt_regs *regs, elf_fpregset_t *fpu)
-{
-	struct ia32_user_i387_struct *fpstate = (void*)fpu;
-	mm_segment_t old_fs;
-
-	if (!tsk_used_math(tsk))
-		return 0;
-	
-	old_fs = get_fs();
-	set_fs(KERNEL_DS);
-	save_ia32_fpstate(tsk, (struct ia32_user_i387_struct __user *) fpstate);
-	set_fs(old_fs);
-
-	return 1;
-}
-
-#define ELF_CORE_COPY_XFPREGS 1
-static inline int
-elf_core_copy_task_xfpregs(struct task_struct *tsk, elf_fpxregset_t *xfpu)
-{
-	struct ia32_user_fxsr_struct *fpxstate = (void*) xfpu;
-	mm_segment_t old_fs;
-
-	if (!tsk_used_math(tsk))
-		return 0;
-
-	old_fs = get_fs();
-	set_fs(KERNEL_DS);
-	save_ia32_fpxstate(tsk, (struct ia32_user_fxsr_struct __user *) fpxstate);
-	set_fs(old_fs);
-
-	return 1;
-}
-
-#endif /* _ELFCORE32_H_ */
diff --git a/arch/ia64/ia32/ia32_entry.S b/arch/ia64/ia32/ia32_entry.S
deleted file mode 100644
index a32cd59b81e..00000000000
--- a/arch/ia64/ia32/ia32_entry.S
+++ /dev/null
@@ -1,500 +0,0 @@
-#include <asm/asmmacro.h>
-#include <asm/ia32.h>
-#include <asm/asm-offsets.h>
-#include <asm/signal.h>
-#include <asm/thread_info.h>
-
-#include "../kernel/minstate.h"
-
-	/*
-	 * execve() is special because in case of success, we need to
-	 * setup a null register window frame (in case an IA-32 process
-	 * is exec'ing an IA-64 program).
-	 */
-ENTRY(ia32_execve)
-	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(3)
-	alloc loc1=ar.pfs,3,2,4,0
-	mov loc0=rp
-	.body
-	zxt4 out0=in0			// filename
-	;;				// stop bit between alloc and call
-	zxt4 out1=in1			// argv
-	zxt4 out2=in2			// envp
-	add out3=16,sp			// regs
-	br.call.sptk.few rp=sys32_execve
-1:	cmp.ge p6,p0=r8,r0
-	mov ar.pfs=loc1			// restore ar.pfs
-	;;
-(p6)	mov ar.pfs=r0			// clear ar.pfs in case of success
-	sxt4 r8=r8			// return 64-bit result
-	mov rp=loc0
-	br.ret.sptk.few rp
-END(ia32_execve)
-
-ENTRY(ia32_clone)
-	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(5)
-	alloc r16=ar.pfs,5,2,6,0
-	DO_SAVE_SWITCH_STACK
-	mov loc0=rp
-	mov loc1=r16				// save ar.pfs across do_fork
-	.body
-	zxt4 out1=in1				// newsp
-	mov out3=16				// stacksize (compensates for 16-byte scratch area)
-	adds out2=IA64_SWITCH_STACK_SIZE+16,sp	// out2 = &regs
-	mov out0=in0				// out0 = clone_flags
-	zxt4 out4=in2				// out4 = parent_tidptr
-	zxt4 out5=in4				// out5 = child_tidptr
-	br.call.sptk.many rp=do_fork
-.ret0:	.restore sp
-	adds sp=IA64_SWITCH_STACK_SIZE,sp	// pop the switch stack
-	mov ar.pfs=loc1
-	mov rp=loc0
-	br.ret.sptk.many rp
-END(ia32_clone)
-
-ENTRY(sys32_rt_sigsuspend)
-	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
-	alloc loc1=ar.pfs,8,2,3,0		// preserve all eight input regs
-	mov loc0=rp
-	mov out0=in0				// mask
-	mov out1=in1				// sigsetsize
-	mov out2=sp				// out2 = &sigscratch
-	.fframe 16
-	adds sp=-16,sp				// allocate dummy "sigscratch"
-	;;
-	.body
-	br.call.sptk.many rp=ia32_rt_sigsuspend
-1:	.restore sp
-	adds sp=16,sp
-	mov rp=loc0
-	mov ar.pfs=loc1
-	br.ret.sptk.many rp
-END(sys32_rt_sigsuspend)
-
-ENTRY(sys32_sigsuspend)
-	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
-	alloc loc1=ar.pfs,8,2,3,0		// preserve all eight input regs
-	mov loc0=rp
-	mov out0=in2				// mask (first two args are ignored)
-	;;
-	mov out1=sp				// out1 = &sigscratch
-	.fframe 16
-	adds sp=-16,sp				// allocate dummy "sigscratch"
-	.body
-	br.call.sptk.many rp=ia32_sigsuspend
-1:	.restore sp
-	adds sp=16,sp
-	mov rp=loc0
-	mov ar.pfs=loc1
-	br.ret.sptk.many rp
-END(sys32_sigsuspend)
-
-GLOBAL_ENTRY(ia32_ret_from_clone)
-	PT_REGS_UNWIND_INFO(0)
-{	/*
-	 * Some versions of gas generate bad unwind info if the first instruction of a
-	 * procedure doesn't go into the first slot of a bundle.  This is a workaround.
-	 */
-	nop.m 0
-	nop.i 0
-	/*
-	 * We need to call schedule_tail() to complete the scheduling process.
-	 * Called by ia64_switch_to after do_fork()->copy_thread().  r8 contains the
-	 * address of the previously executing task.
-	 */
-	br.call.sptk.many rp=ia64_invoke_schedule_tail
-}
-.ret1:
-	adds r2=TI_FLAGS+IA64_TASK_SIZE,r13
-	;;
-	ld4 r2=[r2]
-	;;
-	mov r8=0
-	and r2=_TIF_SYSCALL_TRACEAUDIT,r2
-	;;
-	cmp.ne p6,p0=r2,r0
-(p6)	br.cond.spnt .ia32_strace_check_retval
-	;;					// prevent RAW on r8
-END(ia32_ret_from_clone)
-	// fall thrugh
-GLOBAL_ENTRY(ia32_ret_from_syscall)
-	PT_REGS_UNWIND_INFO(0)
-
-	cmp.ge p6,p7=r8,r0                      // syscall executed successfully?
-	adds r2=IA64_PT_REGS_R8_OFFSET+16,sp    // r2 = &pt_regs.r8
-	;;
-	alloc r3=ar.pfs,0,0,0,0			// drop the syscall argument frame
-	st8 [r2]=r8                             // store return value in slot for r8
-	br.cond.sptk.many ia64_leave_kernel
-END(ia32_ret_from_syscall)
-
-	//
-	// Invoke a system call, but do some tracing before and after the call.
-	// We MUST preserve the current register frame throughout this routine
-	// because some system calls (such as ia64_execve) directly
-	// manipulate ar.pfs.
-	//
-	// Input:
-	//	r8 = syscall number
-	//	b6 = syscall entry point
-	//
-GLOBAL_ENTRY(ia32_trace_syscall)
-	PT_REGS_UNWIND_INFO(0)
-	mov r3=-38
-	adds r2=IA64_PT_REGS_R8_OFFSET+16,sp
-	;;
-	st8 [r2]=r3				// initialize return code to -ENOSYS
-	br.call.sptk.few rp=syscall_trace_enter	// give parent a chance to catch syscall args
-.ret2:	// Need to reload arguments (they may be changed by the tracing process)
-	adds r2=IA64_PT_REGS_R1_OFFSET+16,sp	// r2 = &pt_regs.r1
-	adds r3=IA64_PT_REGS_R13_OFFSET+16,sp	// r3 = &pt_regs.r13
-	mov r15=IA32_NR_syscalls
-	;;
-	ld4 r8=[r2],IA64_PT_REGS_R9_OFFSET-IA64_PT_REGS_R1_OFFSET
-	movl r16=ia32_syscall_table
-	;;
-	ld4 r33=[r2],8				// r9 == ecx
-	ld4 r37=[r3],16				// r13 == ebp
-	cmp.ltu.unc p6,p7=r8,r15
-	;;
-	ld4 r34=[r2],8				// r10 == edx
-	ld4 r36=[r3],8				// r15 == edi
-(p6)	shladd r16=r8,3,r16	// force ni_syscall if not valid syscall number
-	;;
-	ld8 r16=[r16]
-	;;
-	ld4 r32=[r2],8				// r11 == ebx
-	mov b6=r16
-	ld4 r35=[r3],8				// r14 == esi
-	br.call.sptk.few rp=b6			// do the syscall
-.ia32_strace_check_retval:
-	cmp.lt p6,p0=r8,r0			// syscall failed?
-	adds r2=IA64_PT_REGS_R8_OFFSET+16,sp	// r2 = &pt_regs.r8
-	;;
-	st8.spill [r2]=r8			// store return value in slot for r8
-	br.call.sptk.few rp=syscall_trace_leave	// give parent a chance to catch return value
-.ret4:	alloc r2=ar.pfs,0,0,0,0			// drop the syscall argument frame
-	br.cond.sptk.many ia64_leave_kernel
-END(ia32_trace_syscall)
-
-GLOBAL_ENTRY(sys32_vfork)
-	alloc r16=ar.pfs,2,2,4,0;;
-	mov out0=IA64_CLONE_VFORK|IA64_CLONE_VM|SIGCHLD	// out0 = clone_flags
-	br.cond.sptk.few .fork1			// do the work
-END(sys32_vfork)
-
-GLOBAL_ENTRY(sys32_fork)
-	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2)
-	alloc r16=ar.pfs,2,2,4,0
-	mov out0=SIGCHLD			// out0 = clone_flags
-	;;
-.fork1:
-	mov loc0=rp
-	mov loc1=r16				// save ar.pfs across do_fork
-	DO_SAVE_SWITCH_STACK
-
-	.body
-
-	mov out1=0
-	mov out3=0
-	adds out2=IA64_SWITCH_STACK_SIZE+16,sp	// out2 = &regs
-	br.call.sptk.few rp=do_fork
-.ret5:	.restore sp
-	adds sp=IA64_SWITCH_STACK_SIZE,sp	// pop the switch stack
-	mov ar.pfs=loc1
-	mov rp=loc0
-	br.ret.sptk.many rp
-END(sys32_fork)
-
-	.rodata
-	.align 8
-	.globl ia32_syscall_table
-ia32_syscall_table:
-	data8 sys_ni_syscall	  /* 0	-  old "setup(" system call*/
-	data8 sys_exit
-	data8 sys32_fork
-	data8 sys_read
-	data8 sys_write
-	data8 compat_sys_open	  /* 5 */
-	data8 sys_close
-	data8 sys32_waitpid
-	data8 sys_creat
-	data8 sys_link
-	data8 sys_unlink	  /* 10 */
-	data8 ia32_execve
-	data8 sys_chdir
-	data8 compat_sys_time
-	data8 sys_mknod
-	data8 sys_chmod		  /* 15 */
-	data8 sys_lchown	/* 16-bit version */
-	data8 sys_ni_syscall	  /* old break syscall holder */
-	data8 sys_ni_syscall
-	data8 sys32_lseek
-	data8 sys_getpid	  /* 20 */
-	data8 compat_sys_mount
-	data8 sys_oldumount
-	data8 sys_setuid	/* 16-bit version */
-	data8 sys_getuid	/* 16-bit version */
-	data8 compat_sys_stime    /* 25 */
-	data8 sys32_ptrace
-	data8 sys32_alarm
-	data8 sys_ni_syscall
-	data8 sys32_pause
-	data8 compat_sys_utime	  /* 30 */
-	data8 sys_ni_syscall	  /* old stty syscall holder */
-	data8 sys_ni_syscall	  /* old gtty syscall holder */
-	data8 sys_access
-	data8 sys_nice
-	data8 sys_ni_syscall	  /* 35 */	  /* old ftime syscall holder */
-	data8 sys_sync
-	data8 sys_kill
-	data8 sys_rename
-	data8 sys_mkdir
-	data8 sys_rmdir		  /* 40 */
-	data8 sys_dup
-	data8 sys32_pipe
-	data8 compat_sys_times
-	data8 sys_ni_syscall	  /* old prof syscall holder */
-	data8 sys32_brk		  /* 45 */
-	data8 sys_setgid	/* 16-bit version */
-	data8 sys_getgid	/* 16-bit version */
-	data8 sys32_signal
-	data8 sys_geteuid	/* 16-bit version */
-	data8 sys_getegid	/* 16-bit version */	  /* 50 */
-	data8 sys_acct
-	data8 sys_umount	  /* recycled never used phys( */
-	data8 sys_ni_syscall	  /* old lock syscall holder */
-	data8 compat_sys_ioctl
-	data8 compat_sys_fcntl	  /* 55 */
-	data8 sys_ni_syscall	  /* old mpx syscall holder */
-	data8 sys_setpgid
-	data8 sys_ni_syscall	  /* old ulimit syscall holder */
-	data8 sys_ni_syscall
-	data8 sys_umask		  /* 60 */
-	data8 sys_chroot
-	data8 sys_ustat
-	data8 sys_dup2
-	data8 sys_getppid
-	data8 sys_getpgrp	  /* 65 */
-	data8 sys_setsid
-	data8 sys32_sigaction
-	data8 sys_ni_syscall
-	data8 sys_ni_syscall
-	data8 sys_setreuid	/* 16-bit version */	  /* 70 */
-	data8 sys_setregid	/* 16-bit version */
-	data8 sys32_sigsuspend
-	data8 compat_sys_sigpending
-	data8 sys_sethostname
-	data8 compat_sys_setrlimit	  /* 75 */
-	data8 compat_sys_old_getrlimit
-	data8 compat_sys_getrusage
-	data8 sys32_gettimeofday
-	data8 sys32_settimeofday
-	data8 sys32_getgroups16	  /* 80 */
-	data8 sys32_setgroups16
-	data8 sys32_old_select
-	data8 sys_symlink
-	data8 sys_ni_syscall
-	data8 sys_readlink	  /* 85 */
-	data8 sys_uselib
-	data8 sys_swapon
-	data8 sys_reboot
-	data8 sys32_readdir
-	data8 sys32_mmap	  /* 90 */
-	data8 sys32_munmap
-	data8 sys_truncate
-	data8 sys_ftruncate
-	data8 sys_fchmod
-	data8 sys_fchown	/* 16-bit version */	  /* 95 */
-	data8 sys_getpriority
-	data8 sys_setpriority
-	data8 sys_ni_syscall	  /* old profil syscall holder */
-	data8 compat_sys_statfs
-	data8 compat_sys_fstatfs	  /* 100 */
-	data8 sys_ni_syscall	/* ioperm */
-	data8 compat_sys_socketcall
-	data8 sys_syslog
-	data8 compat_sys_setitimer
-	data8 compat_sys_getitimer	  /* 105 */
-	data8 compat_sys_newstat
-	data8 compat_sys_newlstat
-	data8 compat_sys_newfstat
-	data8 sys_ni_syscall
-	data8 sys_ni_syscall	/* iopl */	/* 110 */
-	data8 sys_vhangup
-	data8 sys_ni_syscall		/* used to be sys_idle */
-	data8 sys_ni_syscall
-	data8 compat_sys_wait4
-	data8 sys_swapoff	  /* 115 */
-	data8 sys32_sysinfo
-	data8 sys32_ipc
-	data8 sys_fsync
-	data8 sys32_sigreturn
-	data8 ia32_clone	  /* 120 */
-	data8 sys_setdomainname
-	data8 sys32_newuname
-	data8 sys32_modify_ldt
-	data8 compat_sys_adjtimex
-	data8 sys32_mprotect	  /* 125 */
-	data8 compat_sys_sigprocmask
-	data8 sys_ni_syscall	/* create_module */
-	data8 sys_ni_syscall	/* init_module */
-	data8 sys_ni_syscall	/* delete_module */
-	data8 sys_ni_syscall	/* get_kernel_syms */  /* 130 */
-	data8 sys_quotactl
-	data8 sys_getpgid
-	data8 sys_fchdir
-	data8 sys_ni_syscall	/* sys_bdflush */
-	data8 sys_sysfs		/* 135 */
-	data8 sys32_personality
-	data8 sys_ni_syscall	  /* for afs_syscall */
-	data8 sys_setfsuid	/* 16-bit version */
-	data8 sys_setfsgid	/* 16-bit version */
-	data8 sys_llseek	  /* 140 */
-	data8 compat_sys_getdents
-	data8 compat_sys_select
-	data8 sys_flock
-	data8 sys32_msync
-	data8 compat_sys_readv	  /* 145 */
-	data8 compat_sys_writev
-	data8 sys_getsid
-	data8 sys_fdatasync
-	data8 sys32_sysctl
-	data8 sys_mlock		  /* 150 */
-	data8 sys_munlock
-	data8 sys_mlockall
-	data8 sys_munlockall
-	data8 sys_sched_setparam
-	data8 sys_sched_getparam  /* 155 */
-	data8 sys_sched_setscheduler
-	data8 sys_sched_getscheduler
-	data8 sys_sched_yield
-	data8 sys_sched_get_priority_max
-	data8 sys_sched_get_priority_min	 /* 160 */
-	data8 sys32_sched_rr_get_interval
-	data8 compat_sys_nanosleep
-	data8 sys32_mremap
-	data8 sys_setresuid	/* 16-bit version */
-	data8 sys32_getresuid16	/* 16-bit version */	  /* 165 */
-	data8 sys_ni_syscall	/* vm86 */
-	data8 sys_ni_syscall	/* sys_query_module */
-	data8 sys_poll
-	data8 sys_ni_syscall	/* nfsservctl */
-	data8 sys_setresgid	  /* 170 */
-	data8 sys32_getresgid16
-	data8 sys_prctl
-	data8 sys32_rt_sigreturn
-	data8 sys32_rt_sigaction
-	data8 sys32_rt_sigprocmask /* 175 */
-	data8 sys_rt_sigpending
-	data8 compat_sys_rt_sigtimedwait
-	data8 sys32_rt_sigqueueinfo
-	data8 sys32_rt_sigsuspend
-	data8 sys32_pread	  /* 180 */
-	data8 sys32_pwrite
-	data8 sys_chown	/* 16-bit version */
-	data8 sys_getcwd
-	data8 sys_capget
-	data8 sys_capset	  /* 185 */
-	data8 sys32_sigaltstack
-	data8 sys32_sendfile
-	data8 sys_ni_syscall		  /* streams1 */
-	data8 sys_ni_syscall		  /* streams2 */
-	data8 sys32_vfork	  /* 190 */
-	data8 compat_sys_getrlimit
-	data8 sys32_mmap2
-	data8 sys32_truncate64
-	data8 sys32_ftruncate64
-	data8 sys32_stat64	  /* 195 */
-	data8 sys32_lstat64
-	data8 sys32_fstat64
-	data8 sys_lchown
-	data8 sys_getuid
-	data8 sys_getgid	  /* 200 */
-	data8 sys_geteuid
-	data8 sys_getegid
-	data8 sys_setreuid
-	data8 sys_setregid
-	data8 sys_getgroups	  /* 205 */
-	data8 sys_setgroups
-	data8 sys_fchown
-	data8 sys_setresuid
-	data8 sys_getresuid
-	data8 sys_setresgid	  /* 210 */
-	data8 sys_getresgid
-	data8 sys_chown
-	data8 sys_setuid
-	data8 sys_setgid
-	data8 sys_setfsuid	  /* 215 */
-	data8 sys_setfsgid
-	data8 sys_pivot_root
-	data8 sys_mincore
-	data8 sys_madvise
-	data8 compat_sys_getdents64	  /* 220 */
-	data8 compat_sys_fcntl64
-	data8 sys_ni_syscall		/* reserved for TUX */
-	data8 sys_ni_syscall		/* reserved for Security */
-	data8 sys_gettid
-	data8 sys_readahead	  /* 225 */
- 	data8 sys_setxattr
- 	data8 sys_lsetxattr
- 	data8 sys_fsetxattr
- 	data8 sys_getxattr
- 	data8 sys_lgetxattr	/* 230 */
- 	data8 sys_fgetxattr
- 	data8 sys_listxattr
- 	data8 sys_llistxattr
- 	data8 sys_flistxattr
- 	data8 sys_removexattr	/* 235 */
- 	data8 sys_lremovexattr
- 	data8 sys_fremovexattr
-	data8 sys_tkill
- 	data8 sys_sendfile64
-	data8 compat_sys_futex	/* 240 */
-	data8 compat_sys_sched_setaffinity
-	data8 compat_sys_sched_getaffinity
-	data8 sys32_set_thread_area
-	data8 sys32_get_thread_area
- 	data8 compat_sys_io_setup	/* 245 */
- 	data8 sys_io_destroy
- 	data8 compat_sys_io_getevents
- 	data8 compat_sys_io_submit
- 	data8 sys_io_cancel
- 	data8 sys_fadvise64	/* 250 */
-	data8 sys_ni_syscall
-	data8 sys_exit_group
- 	data8 sys_lookup_dcookie
-	data8 sys_epoll_create
-	data8 sys32_epoll_ctl	/* 255 */
-	data8 sys32_epoll_wait
-	data8 sys_remap_file_pages
-	data8 sys_set_tid_address
- 	data8 compat_sys_timer_create
- 	data8 compat_sys_timer_settime	/* 260 */
- 	data8 compat_sys_timer_gettime
- 	data8 sys_timer_getoverrun
- 	data8 sys_timer_delete
- 	data8 compat_sys_clock_settime
- 	data8 compat_sys_clock_gettime /* 265 */
- 	data8 compat_sys_clock_getres
- 	data8 compat_sys_clock_nanosleep
-	data8 compat_sys_statfs64
-	data8 compat_sys_fstatfs64
- 	data8 sys_tgkill	/* 270 */
- 	data8 compat_sys_utimes
- 	data8 sys32_fadvise64_64
- 	data8 sys_ni_syscall
-  	data8 sys_ni_syscall
- 	data8 sys_ni_syscall	/* 275 */
-  	data8 sys_ni_syscall
-  	data8 compat_sys_mq_open
-  	data8 sys_mq_unlink
-  	data8 compat_sys_mq_timedsend
-  	data8 compat_sys_mq_timedreceive	/* 280 */
-  	data8 compat_sys_mq_notify
-  	data8 compat_sys_mq_getsetattr
-	data8 sys_ni_syscall		/* reserved for kexec */
-	data8 compat_sys_waitid
-
-	// guard against failures to increase IA32_NR_syscalls
-	.org ia32_syscall_table + 8*IA32_NR_syscalls
diff --git a/arch/ia64/ia32/ia32_ldt.c b/arch/ia64/ia32/ia32_ldt.c
deleted file mode 100644
index a152738c7d0..00000000000
--- a/arch/ia64/ia32/ia32_ldt.c
+++ /dev/null
@@ -1,147 +0,0 @@
-/*
- * Copyright (C) 2001, 2004 Hewlett-Packard Co
- *	David Mosberger-Tang <davidm@hpl.hp.com>
- *
- * Adapted from arch/i386/kernel/ldt.c
- */
-
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/smp_lock.h>
-#include <linux/vmalloc.h>
-
-#include <asm/uaccess.h>
-
-#include "ia32priv.h"
-
-/*
- * read_ldt() is not really atomic - this is not a problem since synchronization of reads
- * and writes done to the LDT has to be assured by user-space anyway. Writes are atomic,
- * to protect the security checks done on new descriptors.
- */
-static int
-read_ldt (void __user *ptr, unsigned long bytecount)
-{
-	unsigned long bytes_left, n;
-	char __user *src, *dst;
-	char buf[256];	/* temporary buffer (don't overflow kernel stack!) */
-
-	if (bytecount > IA32_LDT_ENTRIES*IA32_LDT_ENTRY_SIZE)
-		bytecount = IA32_LDT_ENTRIES*IA32_LDT_ENTRY_SIZE;
-
-	bytes_left = bytecount;
-
-	src = (void __user *) IA32_LDT_OFFSET;
-	dst = ptr;
-
-	while (bytes_left) {
-		n = sizeof(buf);
-		if (n > bytes_left)
-			n = bytes_left;
-
-		/*
-		 * We know we're reading valid memory, but we still must guard against
-		 * running out of memory.
-		 */
-		if (__copy_from_user(buf, src, n))
-			return -EFAULT;
-
-		if (copy_to_user(dst, buf, n))
-			return -EFAULT;
-
-		src += n;
-		dst += n;
-		bytes_left -= n;
-	}
-	return bytecount;
-}
-
-static int
-read_default_ldt (void __user * ptr, unsigned long bytecount)
-{
-	unsigned long size;
-	int err;
-
-	/* XXX fix me: should return equivalent of default_ldt[0] */
-	err = 0;
-	size = 8;
-	if (size > bytecount)
-		size = bytecount;
-
-	err = size;
-	if (clear_user(ptr, size))
-		err = -EFAULT;
-
-	return err;
-}
-
-static int
-write_ldt (void __user * ptr, unsigned long bytecount, int oldmode)
-{
-	struct ia32_user_desc ldt_info;
-	__u64 entry;
-	int ret;
-
-	if (bytecount != sizeof(ldt_info))
-		return -EINVAL;
-	if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info)))
-		return -EFAULT;
-
-	if (ldt_info.entry_number >= IA32_LDT_ENTRIES)
-		return -EINVAL;
-	if (ldt_info.contents == 3) {
-		if (oldmode)
-			return -EINVAL;
-		if (ldt_info.seg_not_present == 0)
-			return -EINVAL;
-	}
-
-	if (ldt_info.base_addr == 0 && ldt_info.limit == 0
-	    && (oldmode || (ldt_info.contents == 0 && ldt_info.read_exec_only == 1
-			    && ldt_info.seg_32bit == 0 && ldt_info.limit_in_pages == 0
-			    && ldt_info.seg_not_present == 1 && ldt_info.useable == 0)))
-		/* allow LDTs to be cleared by the user */
-		entry = 0;
-	else
-		/* we must set the "Accessed" bit as IVE doesn't emulate it */
-		entry = IA32_SEG_DESCRIPTOR(ldt_info.base_addr, ldt_info.limit,
-					    (((ldt_info.read_exec_only ^ 1) << 1)
-					     | (ldt_info.contents << 2)) | 1,
-					    1, 3, ldt_info.seg_not_present ^ 1,
-					    (oldmode ? 0 : ldt_info.useable),
-					    ldt_info.seg_32bit,
-					    ldt_info.limit_in_pages);
-	/*
-	 * Install the new entry.  We know we're accessing valid (mapped) user-level
-	 * memory, but we still need to guard against out-of-memory, hence we must use
-	 * put_user().
-	 */
-	ret = __put_user(entry, (__u64 __user *) IA32_LDT_OFFSET + ldt_info.entry_number);
-	ia32_load_segment_descriptors(current);
-	return ret;
-}
-
-asmlinkage int
-sys32_modify_ldt (int func, unsigned int ptr, unsigned int bytecount)
-{
-	int ret = -ENOSYS;
-
-	switch (func) {
-	      case 0:
-		ret = read_ldt(compat_ptr(ptr), bytecount);
-		break;
-	      case 1:
-		ret = write_ldt(compat_ptr(ptr), bytecount, 1);
-		break;
-	      case 2:
-		ret = read_default_ldt(compat_ptr(ptr), bytecount);
-		break;
-	      case 0x11:
-		ret = write_ldt(compat_ptr(ptr), bytecount, 0);
-		break;
-	}
-	return ret;
-}
diff --git a/arch/ia64/ia32/ia32_signal.c b/arch/ia64/ia32/ia32_signal.c
deleted file mode 100644
index b3355a9ca2c..00000000000
--- a/arch/ia64/ia32/ia32_signal.c
+++ /dev/null
@@ -1,1035 +0,0 @@
-/*
- * IA32 Architecture-specific signal handling support.
- *
- * Copyright (C) 1999, 2001-2002, 2005 Hewlett-Packard Co
- *	David Mosberger-Tang <davidm@hpl.hp.com>
- * Copyright (C) 1999 Arun Sharma <arun.sharma@intel.com>
- * Copyright (C) 2000 VA Linux Co
- * Copyright (C) 2000 Don Dugger <n0ano@valinux.com>
- *
- * Derived from i386 and Alpha versions.
- */
-
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/personality.h>
-#include <linux/ptrace.h>
-#include <linux/sched.h>
-#include <linux/signal.h>
-#include <linux/smp.h>
-#include <linux/smp_lock.h>
-#include <linux/stddef.h>
-#include <linux/syscalls.h>
-#include <linux/unistd.h>
-#include <linux/wait.h>
-#include <linux/compat.h>
-
-#include <asm/intrinsics.h>
-#include <asm/uaccess.h>
-#include <asm/rse.h>
-#include <asm/sigcontext.h>
-
-#include "ia32priv.h"
-
-#include "../kernel/sigframe.h"
-
-#define A(__x)		((unsigned long)(__x))
-
-#define DEBUG_SIG	0
-#define _BLOCKABLE	(~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
-
-#define __IA32_NR_sigreturn            119
-#define __IA32_NR_rt_sigreturn         173
-
-struct sigframe_ia32
-{
-       int pretcode;
-       int sig;
-       struct sigcontext_ia32 sc;
-       struct _fpstate_ia32 fpstate;
-       unsigned int extramask[_COMPAT_NSIG_WORDS-1];
-       char retcode[8];
-};
-
-struct rt_sigframe_ia32
-{
-       int pretcode;
-       int sig;
-       int pinfo;
-       int puc;
-       compat_siginfo_t info;
-       struct ucontext_ia32 uc;
-       struct _fpstate_ia32 fpstate;
-       char retcode[8];
-};
-
-int
-copy_siginfo_from_user32 (siginfo_t *to, compat_siginfo_t __user *from)
-{
-	unsigned long tmp;
-	int err;
-
-	if (!access_ok(VERIFY_READ, from, sizeof(compat_siginfo_t)))
-		return -EFAULT;
-
-	err = __get_user(to->si_signo, &from->si_signo);
-	err |= __get_user(to->si_errno, &from->si_errno);
-	err |= __get_user(to->si_code, &from->si_code);
-
-	if (to->si_code < 0)
-		err |= __copy_from_user(&to->_sifields._pad, &from->_sifields._pad, SI_PAD_SIZE);
-	else {
-		switch (to->si_code >> 16) {
-		      case __SI_CHLD >> 16:
-			err |= __get_user(to->si_utime, &from->si_utime);
-			err |= __get_user(to->si_stime, &from->si_stime);
-			err |= __get_user(to->si_status, &from->si_status);
-		      default:
-			err |= __get_user(to->si_pid, &from->si_pid);
-			err |= __get_user(to->si_uid, &from->si_uid);
-			break;
-		      case __SI_FAULT >> 16:
-			err |= __get_user(tmp, &from->si_addr);
-			to->si_addr = (void __user *) tmp;
-			break;
-		      case __SI_POLL >> 16:
-			err |= __get_user(to->si_band, &from->si_band);
-			err |= __get_user(to->si_fd, &from->si_fd);
-			break;
-		      case __SI_RT >> 16: /* This is not generated by the kernel as of now.  */
-		      case __SI_MESGQ >> 16:
-			err |= __get_user(to->si_pid, &from->si_pid);
-			err |= __get_user(to->si_uid, &from->si_uid);
-			err |= __get_user(to->si_int, &from->si_int);
-			break;
-		}
-	}
-	return err;
-}
-
-int
-copy_siginfo_to_user32 (compat_siginfo_t __user *to, siginfo_t *from)
-{
-	unsigned int addr;
-	int err;
-
-	if (!access_ok(VERIFY_WRITE, to, sizeof(compat_siginfo_t)))
-		return -EFAULT;
-
-	/* If you change siginfo_t structure, please be sure
-	   this code is fixed accordingly.
-	   It should never copy any pad contained in the structure
-	   to avoid security leaks, but must copy the generic
-	   3 ints plus the relevant union member.
-	   This routine must convert siginfo from 64bit to 32bit as well
-	   at the same time.  */
-	err = __put_user(from->si_signo, &to->si_signo);
-	err |= __put_user(from->si_errno, &to->si_errno);
-	err |= __put_user((short)from->si_code, &to->si_code);
-	if (from->si_code < 0)
-		err |= __copy_to_user(&to->_sifields._pad, &from->_sifields._pad, SI_PAD_SIZE);
-	else {
-		switch (from->si_code >> 16) {
-		case __SI_CHLD >> 16:
-			err |= __put_user(from->si_utime, &to->si_utime);
-			err |= __put_user(from->si_stime, &to->si_stime);
-			err |= __put_user(from->si_status, &to->si_status);
-		default:
-			err |= __put_user(from->si_pid, &to->si_pid);
-			err |= __put_user(from->si_uid, &to->si_uid);
-			break;
-		case __SI_FAULT >> 16:
-			/* avoid type-checking warnings by copying _pad[0] in lieu of si_addr... */
-			err |= __put_user(from->_sifields._pad[0], &to->si_addr);
-			break;
-		case __SI_POLL >> 16:
-			err |= __put_user(from->si_band, &to->si_band);
-			err |= __put_user(from->si_fd, &to->si_fd);
-			break;
-		case __SI_TIMER >> 16:
-			err |= __put_user(from->si_tid, &to->si_tid);
-			err |= __put_user(from->si_overrun, &to->si_overrun);
-			addr = (unsigned long) from->si_ptr;
-			err |= __put_user(addr, &to->si_ptr);
-			break;
-		case __SI_RT >> 16:	/* Not generated by the kernel as of now.  */
-		case __SI_MESGQ >> 16:
-			err |= __put_user(from->si_uid, &to->si_uid);
-			err |= __put_user(from->si_pid, &to->si_pid);
-			addr = (unsigned long) from->si_ptr;
-			err |= __put_user(addr, &to->si_ptr);
-			break;
-		}
-	}
-	return err;
-}
-
-
-/*
- *  SAVE and RESTORE of ia32 fpstate info, from ia64 current state
- *  Used in exception handler to pass the fpstate to the user, and restore
- *  the fpstate while returning from the exception handler.
- *
- *    fpstate info and their mapping to IA64 regs:
- *    fpstate    REG(BITS)      Attribute    Comments
- *    cw         ar.fcr(0:12)                with bits 7 and 6 not used
- *    sw         ar.fsr(0:15)
- *    tag        ar.fsr(16:31)               with odd numbered bits not used
- *                                           (read returns 0, writes ignored)
- *    ipoff      ar.fir(0:31)
- *    cssel      ar.fir(32:47)
- *    dataoff    ar.fdr(0:31)
- *    datasel    ar.fdr(32:47)
- *
- *    _st[(0+TOS)%8]   f8
- *    _st[(1+TOS)%8]   f9
- *    _st[(2+TOS)%8]   f10
- *    _st[(3+TOS)%8]   f11                   (f8..f11 from ptregs)
- *      : :            :                     (f12..f15 from live reg)
- *      : :            :
- *    _st[(7+TOS)%8]   f15                   TOS=sw.top(bits11:13)
- *
- *    status     Same as sw     RO
- *    magic      0                           as X86_FXSR_MAGIC in ia32
- *    mxcsr      Bits(7:15)=ar.fcr(39:47)
- *               Bits(0:5) =ar.fsr(32:37)    with bit 6 reserved
- *    _xmm[0..7] f16..f31                    (live registers)
- *                                           with _xmm[0]
- *                                             Bit(64:127)=f17(0:63)
- *                                             Bit(0:63)=f16(0:63)
- *    All other fields unused...
- */
-
-static int
-save_ia32_fpstate_live (struct _fpstate_ia32 __user *save)
-{
-	struct task_struct *tsk = current;
-	struct pt_regs *ptp;
-	struct _fpreg_ia32 *fpregp;
-	char buf[32];
-	unsigned long fsr, fcr, fir, fdr;
-	unsigned long new_fsr;
-	unsigned long num128[2];
-	unsigned long mxcsr=0;
-	int fp_tos, fr8_st_map;
-
-	if (!access_ok(VERIFY_WRITE, save, sizeof(*save)))
-		return -EFAULT;
-
-	/* Read in fsr, fcr, fir, fdr and copy onto fpstate */
-	fsr = ia64_getreg(_IA64_REG_AR_FSR);
-	fcr = ia64_getreg(_IA64_REG_AR_FCR);
-	fir = ia64_getreg(_IA64_REG_AR_FIR);
-	fdr = ia64_getreg(_IA64_REG_AR_FDR);
-
-	/*
-	 * We need to clear the exception state before calling the signal handler. Clear
-	 * the bits 15, bits 0-7 in fp status word. Similar to the functionality of fnclex
-	 * instruction.
-	 */
-	new_fsr = fsr & ~0x80ff;
-	ia64_setreg(_IA64_REG_AR_FSR, new_fsr);
-
-	__put_user(fcr & 0xffff, &save->cw);
-	__put_user(fsr & 0xffff, &save->sw);
-	__put_user((fsr>>16) & 0xffff, &save->tag);
-	__put_user(fir, &save->ipoff);
-	__put_user((fir>>32) & 0xffff, &save->cssel);
-	__put_user(fdr, &save->dataoff);
-	__put_user((fdr>>32) & 0xffff, &save->datasel);
-	__put_user(fsr & 0xffff, &save->status);
-
-	mxcsr = ((fcr>>32) & 0xff80) | ((fsr>>32) & 0x3f);
-	__put_user(mxcsr & 0xffff, &save->mxcsr);
-	__put_user( 0, &save->magic); //#define X86_FXSR_MAGIC   0x0000
-
-	/*
-	 * save f8..f11  from pt_regs
-	 * save f12..f15 from live register set
-	 */
-	/*
-	 *  Find the location where f8 has to go in fp reg stack.  This depends on
-	 *  TOP(11:13) field of sw. Other f reg continue sequentially from where f8 maps
-	 *  to.
-	 */
-	fp_tos = (fsr>>11)&0x7;
-	fr8_st_map = (8-fp_tos)&0x7;
-	ptp = task_pt_regs(tsk);
-	fpregp = (struct _fpreg_ia32 *)(((unsigned long)buf + 15) & ~15);
-	ia64f2ia32f(fpregp, &ptp->f8);
-	copy_to_user(&save->_st[(0+fr8_st_map)&0x7], fpregp, sizeof(struct _fpreg_ia32));
-	ia64f2ia32f(fpregp, &ptp->f9);
-	copy_to_user(&save->_st[(1+fr8_st_map)&0x7], fpregp, sizeof(struct _fpreg_ia32));
-	ia64f2ia32f(fpregp, &ptp->f10);
-	copy_to_user(&save->_st[(2+fr8_st_map)&0x7], fpregp, sizeof(struct _fpreg_ia32));
-	ia64f2ia32f(fpregp, &ptp->f11);
-	copy_to_user(&save->_st[(3+fr8_st_map)&0x7], fpregp, sizeof(struct _fpreg_ia32));
-
-	ia64_stfe(fpregp, 12);
-	copy_to_user(&save->_st[(4+fr8_st_map)&0x7], fpregp, sizeof(struct _fpreg_ia32));
-	ia64_stfe(fpregp, 13);
-	copy_to_user(&save->_st[(5+fr8_st_map)&0x7], fpregp, sizeof(struct _fpreg_ia32));
-	ia64_stfe(fpregp, 14);
-	copy_to_user(&save->_st[(6+fr8_st_map)&0x7], fpregp, sizeof(struct _fpreg_ia32));
-	ia64_stfe(fpregp, 15);
-	copy_to_user(&save->_st[(7+fr8_st_map)&0x7], fpregp, sizeof(struct _fpreg_ia32));
-
-	ia64_stf8(&num128[0], 16);
-	ia64_stf8(&num128[1], 17);
-	copy_to_user(&save->_xmm[0], num128, sizeof(struct _xmmreg_ia32));
-
-	ia64_stf8(&num128[0], 18);
-	ia64_stf8(&num128[1], 19);
-	copy_to_user(&save->_xmm[1], num128, sizeof(struct _xmmreg_ia32));
-
-	ia64_stf8(&num128[0], 20);
-	ia64_stf8(&num128[1], 21);
-	copy_to_user(&save->_xmm[2], num128, sizeof(struct _xmmreg_ia32));
-
-	ia64_stf8(&num128[0], 22);
-	ia64_stf8(&num128[1], 23);
-	copy_to_user(&save->_xmm[3], num128, sizeof(struct _xmmreg_ia32));
-
-	ia64_stf8(&num128[0], 24);
-	ia64_stf8(&num128[1], 25);
-	copy_to_user(&save->_xmm[4], num128, sizeof(struct _xmmreg_ia32));
-
-	ia64_stf8(&num128[0], 26);
-	ia64_stf8(&num128[1], 27);
-	copy_to_user(&save->_xmm[5], num128, sizeof(struct _xmmreg_ia32));
-
-	ia64_stf8(&num128[0], 28);
-	ia64_stf8(&num128[1], 29);
-	copy_to_user(&save->_xmm[6], num128, sizeof(struct _xmmreg_ia32));
-
-	ia64_stf8(&num128[0], 30);
-	ia64_stf8(&num128[1], 31);
-	copy_to_user(&save->_xmm[7], num128, sizeof(struct _xmmreg_ia32));
-	return 0;
-}
-
-static int
-restore_ia32_fpstate_live (struct _fpstate_ia32 __user *save)
-{
-	struct task_struct *tsk = current;
-	struct pt_regs *ptp;
-	unsigned int lo, hi;
-	unsigned long num128[2];
-	unsigned long num64, mxcsr;
-	struct _fpreg_ia32 *fpregp;
-	char buf[32];
-	unsigned long fsr, fcr, fir, fdr;
-	int fp_tos, fr8_st_map;
-
-	if (!access_ok(VERIFY_READ, save, sizeof(*save)))
-		return(-EFAULT);
-
-	/*
-	 * Updating fsr, fcr, fir, fdr.
-	 * Just a bit more complicated than save.
-	 * - Need to make sure that we don't write any value other than the
-	 *   specific fpstate info
-	 * - Need to make sure that the untouched part of frs, fdr, fir, fcr
-	 *   should remain same while writing.
-	 * So, we do a read, change specific fields and write.
-	 */
-	fsr = ia64_getreg(_IA64_REG_AR_FSR);
-	fcr = ia64_getreg(_IA64_REG_AR_FCR);
-	fir = ia64_getreg(_IA64_REG_AR_FIR);
-	fdr = ia64_getreg(_IA64_REG_AR_FDR);
-
-	__get_user(mxcsr, (unsigned int __user *)&save->mxcsr);
-	/* setting bits 0..5 8..12 with cw and 39..47 from mxcsr */
-	__get_user(lo, (unsigned int __user *)&save->cw);
-	num64 = mxcsr & 0xff10;
-	num64 = (num64 << 32) | (lo & 0x1f3f);
-	fcr = (fcr & (~0xff1000001f3fUL)) | num64;
-
-	/* setting bits 0..31 with sw and tag and 32..37 from mxcsr */
-	__get_user(lo, (unsigned int __user *)&save->sw);
-	/* set bits 15,7 (fsw.b, fsw.es) to reflect the current error status */
-	if ( !(lo & 0x7f) )
-		lo &= (~0x8080);
-	__get_user(hi, (unsigned int __user *)&save->tag);
-	num64 = mxcsr & 0x3f;
-	num64 = (num64 << 16) | (hi & 0xffff);
-	num64 = (num64 << 16) | (lo & 0xffff);
-	fsr = (fsr & (~0x3fffffffffUL)) | num64;
-
-	/* setting bits 0..47 with cssel and ipoff */
-	__get_user(lo, (unsigned int __user *)&save->ipoff);
-	__get_user(hi, (unsigned int __user *)&save->cssel);
-	num64 = hi & 0xffff;
-	num64 = (num64 << 32) | lo;
-	fir = (fir & (~0xffffffffffffUL)) | num64;
-
-	/* setting bits 0..47 with datasel and dataoff */
-	__get_user(lo, (unsigned int __user *)&save->dataoff);
-	__get_user(hi, (unsigned int __user *)&save->datasel);
-	num64 = hi & 0xffff;
-	num64 = (num64 << 32) | lo;
-	fdr = (fdr & (~0xffffffffffffUL)) | num64;
-
-	ia64_setreg(_IA64_REG_AR_FSR, fsr);
-	ia64_setreg(_IA64_REG_AR_FCR, fcr);
-	ia64_setreg(_IA64_REG_AR_FIR, fir);
-	ia64_setreg(_IA64_REG_AR_FDR, fdr);
-
-	/*
-	 * restore f8..f11 onto pt_regs
-	 * restore f12..f15 onto live registers
-	 */
-	/*
-	 *  Find the location where f8 has to go in fp reg stack.  This depends on
-	 *  TOP(11:13) field of sw. Other f reg continue sequentially from where f8 maps
-	 *  to.
-	 */
-	fp_tos = (fsr>>11)&0x7;
-	fr8_st_map = (8-fp_tos)&0x7;
-	fpregp = (struct _fpreg_ia32 *)(((unsigned long)buf + 15) & ~15);
-
-	ptp = task_pt_regs(tsk);
-	copy_from_user(fpregp, &save->_st[(0+fr8_st_map)&0x7], sizeof(struct _fpreg_ia32));
-	ia32f2ia64f(&ptp->f8, fpregp);
-	copy_from_user(fpregp, &save->_st[(1+fr8_st_map)&0x7], sizeof(struct _fpreg_ia32));
-	ia32f2ia64f(&ptp->f9, fpregp);
-	copy_from_user(fpregp, &save->_st[(2+fr8_st_map)&0x7], sizeof(struct _fpreg_ia32));
-	ia32f2ia64f(&ptp->f10, fpregp);
-	copy_from_user(fpregp, &save->_st[(3+fr8_st_map)&0x7], sizeof(struct _fpreg_ia32));
-	ia32f2ia64f(&ptp->f11, fpregp);
-
-	copy_from_user(fpregp, &save->_st[(4+fr8_st_map)&0x7], sizeof(struct _fpreg_ia32));
-	ia64_ldfe(12, fpregp);
-	copy_from_user(fpregp, &save->_st[(5+fr8_st_map)&0x7], sizeof(struct _fpreg_ia32));
-	ia64_ldfe(13, fpregp);
-	copy_from_user(fpregp, &save->_st[(6+fr8_st_map)&0x7], sizeof(struct _fpreg_ia32));
-	ia64_ldfe(14, fpregp);
-	copy_from_user(fpregp, &save->_st[(7+fr8_st_map)&0x7], sizeof(struct _fpreg_ia32));
-	ia64_ldfe(15, fpregp);
-
-	copy_from_user(num128, &save->_xmm[0], sizeof(struct _xmmreg_ia32));
-	ia64_ldf8(16, &num128[0]);
-	ia64_ldf8(17, &num128[1]);
-
-	copy_from_user(num128, &save->_xmm[1], sizeof(struct _xmmreg_ia32));
-	ia64_ldf8(18, &num128[0]);
-	ia64_ldf8(19, &num128[1]);
-
-	copy_from_user(num128, &save->_xmm[2], sizeof(struct _xmmreg_ia32));
-	ia64_ldf8(20, &num128[0]);
-	ia64_ldf8(21, &num128[1]);
-
-	copy_from_user(num128, &save->_xmm[3], sizeof(struct _xmmreg_ia32));
-	ia64_ldf8(22, &num128[0]);
-	ia64_ldf8(23, &num128[1]);
-
-	copy_from_user(num128, &save->_xmm[4], sizeof(struct _xmmreg_ia32));
-	ia64_ldf8(24, &num128[0]);
-	ia64_ldf8(25, &num128[1]);
-
-	copy_from_user(num128, &save->_xmm[5], sizeof(struct _xmmreg_ia32));
-	ia64_ldf8(26, &num128[0]);
-	ia64_ldf8(27, &num128[1]);
-
-	copy_from_user(num128, &save->_xmm[6], sizeof(struct _xmmreg_ia32));
-	ia64_ldf8(28, &num128[0]);
-	ia64_ldf8(29, &num128[1]);
-
-	copy_from_user(num128, &save->_xmm[7], sizeof(struct _xmmreg_ia32));
-	ia64_ldf8(30, &num128[0]);
-	ia64_ldf8(31, &num128[1]);
-	return 0;
-}
-
-static inline void
-sigact_set_handler (struct k_sigaction *sa, unsigned int handler, unsigned int restorer)
-{
-	if (handler + 1 <= 2)
-		/* SIG_DFL, SIG_IGN, or SIG_ERR: must sign-extend to 64-bits */
-		sa->sa.sa_handler = (__sighandler_t) A((int) handler);
-	else
-		sa->sa.sa_handler = (__sighandler_t) (((unsigned long) restorer << 32) | handler);
-}
-
-long
-__ia32_rt_sigsuspend (compat_sigset_t *sset, unsigned int sigsetsize, struct sigscratch *scr)
-{
-	extern long ia64_do_signal (sigset_t *oldset, struct sigscratch *scr, long in_syscall);
-	sigset_t oldset, set;
-
-	scr->scratch_unat = 0;	/* avoid leaking kernel bits to user level */
-	memset(&set, 0, sizeof(set));
-
-	memcpy(&set.sig, &sset->sig, sigsetsize);
-
-	sigdelsetmask(&set, ~_BLOCKABLE);
-
-	spin_lock_irq(&current->sighand->siglock);
-	{
-		oldset = current->blocked;
-		current->blocked = set;
-		recalc_sigpending();
-	}
-	spin_unlock_irq(&current->sighand->siglock);
-
-	/*
-	 * The return below usually returns to the signal handler.  We need to pre-set the
-	 * correct error code here to ensure that the right values get saved in sigcontext
-	 * by ia64_do_signal.
-	 */
-	scr->pt.r8 = -EINTR;
-	while (1) {
-		current->state = TASK_INTERRUPTIBLE;
-		schedule();
-		if (ia64_do_signal(&oldset, scr, 1))
-			return -EINTR;
-	}
-}
-
-asmlinkage long
-ia32_rt_sigsuspend (compat_sigset_t __user *uset, unsigned int sigsetsize, struct sigscratch *scr)
-{
-	compat_sigset_t set;
-
-	if (sigsetsize > sizeof(compat_sigset_t))
-		return -EINVAL;
-
-	if (copy_from_user(&set.sig, &uset->sig, sigsetsize))
-		return -EFAULT;
-
-	return __ia32_rt_sigsuspend(&set, sigsetsize, scr);
-}
-
-asmlinkage long
-ia32_sigsuspend (unsigned int mask, struct sigscratch *scr)
-{
-	return __ia32_rt_sigsuspend((compat_sigset_t *) &mask, sizeof(mask), scr);
-}
-
-asmlinkage long
-sys32_signal (int sig, unsigned int handler)
-{
-	struct k_sigaction new_sa, old_sa;
-	int ret;
-
-	sigact_set_handler(&new_sa, handler, 0);
-	new_sa.sa.sa_flags = SA_ONESHOT | SA_NOMASK;
-	sigemptyset(&new_sa.sa.sa_mask);
-
-	ret = do_sigaction(sig, &new_sa, &old_sa);
-
-	return ret ? ret : IA32_SA_HANDLER(&old_sa);
-}
-
-asmlinkage long
-sys32_rt_sigaction (int sig, struct sigaction32 __user *act,
-		    struct sigaction32 __user *oact, unsigned int sigsetsize)
-{
-	struct k_sigaction new_ka, old_ka;
-	unsigned int handler, restorer;
-	int ret;
-
-	/* XXX: Don't preclude handling different sized sigset_t's.  */
-	if (sigsetsize != sizeof(compat_sigset_t))
-		return -EINVAL;
-
-	if (act) {
-		ret = get_user(handler, &act->sa_handler);
-		ret |= get_user(new_ka.sa.sa_flags, &act->sa_flags);
-		ret |= get_user(restorer, &act->sa_restorer);
-		ret |= copy_from_user(&new_ka.sa.sa_mask, &act->sa_mask, sizeof(compat_sigset_t));
-		if (ret)
-			return -EFAULT;
-
-		sigact_set_handler(&new_ka, handler, restorer);
-	}
-
-	ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
-
-	if (!ret && oact) {
-		ret = put_user(IA32_SA_HANDLER(&old_ka), &oact->sa_handler);
-		ret |= put_user(old_ka.sa.sa_flags, &oact->sa_flags);
-		ret |= put_user(IA32_SA_RESTORER(&old_ka), &oact->sa_restorer);
-		ret |= copy_to_user(&oact->sa_mask, &old_ka.sa.sa_mask, sizeof(compat_sigset_t));
-	}
-	return ret;
-}
-
-
-asmlinkage long
-sys32_rt_sigprocmask (int how, compat_sigset_t __user *set, compat_sigset_t __user *oset,
-		      unsigned int sigsetsize)
-{
-	mm_segment_t old_fs = get_fs();
-	sigset_t s;
-	long ret;
-
-	if (sigsetsize > sizeof(s))
-		return -EINVAL;
-
-	if (set) {
-		memset(&s, 0, sizeof(s));
-		if (copy_from_user(&s.sig, set, sigsetsize))
-			return -EFAULT;
-	}
-	set_fs(KERNEL_DS);
-	ret = sys_rt_sigprocmask(how,
-				 set ? (sigset_t __user *) &s : NULL,
-				 oset ? (sigset_t __user *) &s : NULL, sizeof(s));
-	set_fs(old_fs);
-	if (ret)
-		return ret;
-	if (oset) {
-		if (copy_to_user(oset, &s.sig, sigsetsize))
-			return -EFAULT;
-	}
-	return 0;
-}
-
-asmlinkage long
-sys32_rt_sigqueueinfo (int pid, int sig, compat_siginfo_t __user *uinfo)
-{
-	mm_segment_t old_fs = get_fs();
-	siginfo_t info;
-	int ret;
-
-	if (copy_siginfo_from_user32(&info, uinfo))
-		return -EFAULT;
-	set_fs(KERNEL_DS);
-	ret = sys_rt_sigqueueinfo(pid, sig, (siginfo_t __user *) &info);
-	set_fs(old_fs);
-	return ret;
-}
-
-asmlinkage long
-sys32_sigaction (int sig, struct old_sigaction32 __user *act, struct old_sigaction32 __user *oact)
-{
-	struct k_sigaction new_ka, old_ka;
-	unsigned int handler, restorer;
-	int ret;
-
-	if (act) {
-		compat_old_sigset_t mask;
-
-		ret = get_user(handler, &act->sa_handler);
-		ret |= get_user(new_ka.sa.sa_flags, &act->sa_flags);
-		ret |= get_user(restorer, &act->sa_restorer);
-		ret |= get_user(mask, &act->sa_mask);
-		if (ret)
-			return ret;
-
-		sigact_set_handler(&new_ka, handler, restorer);
-		siginitset(&new_ka.sa.sa_mask, mask);
-	}
-
-	ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
-
-	if (!ret && oact) {
-		ret = put_user(IA32_SA_HANDLER(&old_ka), &oact->sa_handler);
-		ret |= put_user(old_ka.sa.sa_flags, &oact->sa_flags);
-		ret |= put_user(IA32_SA_RESTORER(&old_ka), &oact->sa_restorer);
-		ret |= put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
-	}
-
-	return ret;
-}
-
-static int
-setup_sigcontext_ia32 (struct sigcontext_ia32 __user *sc, struct _fpstate_ia32 __user *fpstate,
-		       struct pt_regs *regs, unsigned long mask)
-{
-	int  err = 0;
-	unsigned long flag;
-
-	if (!access_ok(VERIFY_WRITE, sc, sizeof(*sc)))
-		return -EFAULT;
-
-	err |= __put_user((regs->r16 >> 32) & 0xffff, (unsigned int __user *)&sc->fs);
-	err |= __put_user((regs->r16 >> 48) & 0xffff, (unsigned int __user *)&sc->gs);
-	err |= __put_user((regs->r16 >> 16) & 0xffff, (unsigned int __user *)&sc->es);
-	err |= __put_user(regs->r16 & 0xffff, (unsigned int __user *)&sc->ds);
-	err |= __put_user(regs->r15, &sc->edi);
-	err |= __put_user(regs->r14, &sc->esi);
-	err |= __put_user(regs->r13, &sc->ebp);
-	err |= __put_user(regs->r12, &sc->esp);
-	err |= __put_user(regs->r11, &sc->ebx);
-	err |= __put_user(regs->r10, &sc->edx);
-	err |= __put_user(regs->r9, &sc->ecx);
-	err |= __put_user(regs->r8, &sc->eax);
-#if 0
-	err |= __put_user(current->tss.trap_no, &sc->trapno);
-	err |= __put_user(current->tss.error_code, &sc->err);
-#endif
-	err |= __put_user(regs->cr_iip, &sc->eip);
-	err |= __put_user(regs->r17 & 0xffff, (unsigned int __user *)&sc->cs);
-	/*
-	 *  `eflags' is in an ar register for this context
-	 */
-	flag = ia64_getreg(_IA64_REG_AR_EFLAG);
-	err |= __put_user((unsigned int)flag, &sc->eflags);
-	err |= __put_user(regs->r12, &sc->esp_at_signal);
-	err |= __put_user((regs->r17 >> 16) & 0xffff, (unsigned int __user *)&sc->ss);
-
-	if ( save_ia32_fpstate_live(fpstate) < 0 )
-		err = -EFAULT;
-	else
-		err |= __put_user((u32)(u64)fpstate, &sc->fpstate);
-
-#if 0
-	tmp = save_i387(fpstate);
-	if (tmp < 0)
-		err = 1;
-	else
-		err |= __put_user(tmp ? fpstate : NULL, &sc->fpstate);
-
-	/* non-iBCS2 extensions.. */
-#endif
-	err |= __put_user(mask, &sc->oldmask);
-#if 0
-	err |= __put_user(current->tss.cr2, &sc->cr2);
-#endif
-	return err;
-}
-
-static int
-restore_sigcontext_ia32 (struct pt_regs *regs, struct sigcontext_ia32 __user *sc, int *peax)
-{
-	unsigned int err = 0;
-
-	/* Always make any pending restarted system calls return -EINTR */
-	current_thread_info()->restart_block.fn = do_no_restart_syscall;
-
-	if (!access_ok(VERIFY_READ, sc, sizeof(*sc)))
-		return(-EFAULT);
-
-#define COPY(ia64x, ia32x)	err |= __get_user(regs->ia64x, &sc->ia32x)
-
-#define copyseg_gs(tmp)		(regs->r16 |= (unsigned long) (tmp) << 48)
-#define copyseg_fs(tmp)		(regs->r16 |= (unsigned long) (tmp) << 32)
-#define copyseg_cs(tmp)		(regs->r17 |= tmp)
-#define copyseg_ss(tmp)		(regs->r17 |= (unsigned long) (tmp) << 16)
-#define copyseg_es(tmp)		(regs->r16 |= (unsigned long) (tmp) << 16)
-#define copyseg_ds(tmp)		(regs->r16 |= tmp)
-
-#define COPY_SEG(seg)					\
-	{						\
-		unsigned short tmp;			\
-		err |= __get_user(tmp, &sc->seg);	\
-		copyseg_##seg(tmp);			\
-	}
-#define COPY_SEG_STRICT(seg)				\
-	{						\
-		unsigned short tmp;			\
-		err |= __get_user(tmp, &sc->seg);	\
-		copyseg_##seg(tmp|3);			\
-	}
-
-	/* To make COPY_SEGs easier, we zero r16, r17 */
-	regs->r16 = 0;
-	regs->r17 = 0;
-
-	COPY_SEG(gs);
-	COPY_SEG(fs);
-	COPY_SEG(es);
-	COPY_SEG(ds);
-	COPY(r15, edi);
-	COPY(r14, esi);
-	COPY(r13, ebp);
-	COPY(r12, esp);
-	COPY(r11, ebx);
-	COPY(r10, edx);
-	COPY(r9, ecx);
-	COPY(cr_iip, eip);
-	COPY_SEG_STRICT(cs);
-	COPY_SEG_STRICT(ss);
-	ia32_load_segment_descriptors(current);
-	{
-		unsigned int tmpflags;
-		unsigned long flag;
-
-		/*
-		 *  IA32 `eflags' is not part of `pt_regs', it's in an ar register which
-		 *  is part of the thread context.  Fortunately, we are executing in the
-		 *  IA32 process's context.
-		 */
-		err |= __get_user(tmpflags, &sc->eflags);
-		flag = ia64_getreg(_IA64_REG_AR_EFLAG);
-		flag &= ~0x40DD5;
-		flag |= (tmpflags & 0x40DD5);
-		ia64_setreg(_IA64_REG_AR_EFLAG, flag);
-
-		regs->r1 = -1;	/* disable syscall checks, r1 is orig_eax */
-	}
-
-	{
-		struct _fpstate_ia32 __user *buf = NULL;
-		u32    fpstate_ptr;
-		err |= get_user(fpstate_ptr, &(sc->fpstate));
-		buf = compat_ptr(fpstate_ptr);
-		if (buf) {
-			err |= restore_ia32_fpstate_live(buf);
-		}
-	}
-
-#if 0
-	{
-		struct _fpstate * buf;
-		err |= __get_user(buf, &sc->fpstate);
-		if (buf) {
-			if (!access_ok(VERIFY_READ, buf, sizeof(*buf)))
-				goto badframe;
-			err |= restore_i387(buf);
-		}
-	}
-#endif
-
-	err |= __get_user(*peax, &sc->eax);
-	return err;
-
-#if 0
-  badframe:
-	return 1;
-#endif
-}
-
-/*
- * Determine which stack to use..
- */
-static inline void __user *
-get_sigframe (struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size)
-{
-	unsigned long esp;
-
-	/* Default to using normal stack (truncate off sign-extension of bit 31: */
-	esp = (unsigned int) regs->r12;
-
-	/* This is the X/Open sanctioned signal stack switching.  */
-	if (ka->sa.sa_flags & SA_ONSTACK) {
-		if (!on_sig_stack(esp))
-			esp = current->sas_ss_sp + current->sas_ss_size;
-	}
-	/* Legacy stack switching not supported */
-
-	return (void __user *)((esp - frame_size) & -8ul);
-}
-
-static int
-setup_frame_ia32 (int sig, struct k_sigaction *ka, sigset_t *set, struct pt_regs * regs)
-{
-	struct exec_domain *ed = current_thread_info()->exec_domain;
-	struct sigframe_ia32 __user *frame;
-	int err = 0;
-
-	frame = get_sigframe(ka, regs, sizeof(*frame));
-
-	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
-		goto give_sigsegv;
-
-	err |= __put_user((ed && ed->signal_invmap && sig < 32
-			   ? (int)(ed->signal_invmap[sig]) : sig), &frame->sig);
-
-	err |= setup_sigcontext_ia32(&frame->sc, &frame->fpstate, regs, set->sig[0]);
-
-	if (_COMPAT_NSIG_WORDS > 1)
-		err |= __copy_to_user(frame->extramask, (char *) &set->sig + 4,
-				      sizeof(frame->extramask));
-
-	/* Set up to return from userspace.  If provided, use a stub
-	   already in userspace.  */
-	if (ka->sa.sa_flags & SA_RESTORER) {
-		unsigned int restorer = IA32_SA_RESTORER(ka);
-		err |= __put_user(restorer, &frame->pretcode);
-	} else {
-		/* Pointing to restorer in ia32 gate page */
-		err |= __put_user(IA32_GATE_OFFSET, &frame->pretcode);
-	}
-
-	/* This is popl %eax ; movl $,%eax ; int $0x80
-	 * and there for historical reasons only.
-	 * See arch/i386/kernel/signal.c
-	 */
-
-	err |= __put_user(0xb858, (short __user *)(frame->retcode+0));
-	err |= __put_user(__IA32_NR_sigreturn, (int __user *)(frame->retcode+2));
-	err |= __put_user(0x80cd, (short __user *)(frame->retcode+6));
-
-	if (err)
-		goto give_sigsegv;
-
-	/* Set up registers for signal handler */
-	regs->r12 = (unsigned long) frame;
-	regs->cr_iip = IA32_SA_HANDLER(ka);
-
-	set_fs(USER_DS);
-
-#if 0
-	regs->eflags &= ~TF_MASK;
-#endif
-
-#if 0
-	printk("SIG deliver (%s:%d): sig=%d sp=%p pc=%lx ra=%x\n",
-               current->comm, current->pid, sig, (void *) frame, regs->cr_iip, frame->pretcode);
-#endif
-
-	return 1;
-
-  give_sigsegv:
-	force_sigsegv(sig, current);
-	return 0;
-}
-
-static int
-setup_rt_frame_ia32 (int sig, struct k_sigaction *ka, siginfo_t *info,
-		     sigset_t *set, struct pt_regs * regs)
-{
-	struct exec_domain *ed = current_thread_info()->exec_domain;
-	compat_uptr_t pinfo, puc;
-	struct rt_sigframe_ia32 __user *frame;
-	int err = 0;
-
-	frame = get_sigframe(ka, regs, sizeof(*frame));
-
-	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
-		goto give_sigsegv;
-
-	err |= __put_user((ed && ed->signal_invmap
-			   && sig < 32 ? ed->signal_invmap[sig] : sig), &frame->sig);
-
-	pinfo = (long __user) &frame->info;
-	puc = (long __user) &frame->uc;
-	err |= __put_user(pinfo, &frame->pinfo);
-	err |= __put_user(puc, &frame->puc);
-	err |= copy_siginfo_to_user32(&frame->info, info);
-
-	/* Create the ucontext.  */
-	err |= __put_user(0, &frame->uc.uc_flags);
-	err |= __put_user(0, &frame->uc.uc_link);
-	err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
-	err |= __put_user(sas_ss_flags(regs->r12), &frame->uc.uc_stack.ss_flags);
-	err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
-	err |= setup_sigcontext_ia32(&frame->uc.uc_mcontext, &frame->fpstate, regs, set->sig[0]);
-	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
-	if (err)
-		goto give_sigsegv;
-
-	/* Set up to return from userspace.  If provided, use a stub
-	   already in userspace.  */
-	if (ka->sa.sa_flags & SA_RESTORER) {
-		unsigned int restorer = IA32_SA_RESTORER(ka);
-		err |= __put_user(restorer, &frame->pretcode);
-	} else {
-		/* Pointing to rt_restorer in ia32 gate page */
-		err |= __put_user(IA32_GATE_OFFSET + 8, &frame->pretcode);
-	}
-
-	/* This is movl $,%eax ; int $0x80
-	 * and there for historical reasons only.
-	 * See arch/i386/kernel/signal.c
-	 */
-
-	err |= __put_user(0xb8, (char __user *)(frame->retcode+0));
-	err |= __put_user(__IA32_NR_rt_sigreturn, (int __user *)(frame->retcode+1));
-	err |= __put_user(0x80cd, (short __user *)(frame->retcode+5));
-
-	if (err)
-		goto give_sigsegv;
-
-	/* Set up registers for signal handler */
-	regs->r12 = (unsigned long) frame;
-	regs->cr_iip = IA32_SA_HANDLER(ka);
-
-	set_fs(USER_DS);
-
-#if 0
-	regs->eflags &= ~TF_MASK;
-#endif
-
-#if 0
-	printk("SIG deliver (%s:%d): sp=%p pc=%lx ra=%x\n",
-               current->comm, current->pid, (void *) frame, regs->cr_iip, frame->pretcode);
-#endif
-
-	return 1;
-
-give_sigsegv:
-	force_sigsegv(sig, current);
-	return 0;
-}
-
-int
-ia32_setup_frame1 (int sig, struct k_sigaction *ka, siginfo_t *info,
-		   sigset_t *set, struct pt_regs *regs)
-{
-       /* Set up the stack frame */
-       if (ka->sa.sa_flags & SA_SIGINFO)
-               return setup_rt_frame_ia32(sig, ka, info, set, regs);
-       else
-               return setup_frame_ia32(sig, ka, set, regs);
-}
-
-asmlinkage long
-sys32_sigreturn (int arg0, int arg1, int arg2, int arg3, int arg4, int arg5,
-		 int arg6, int arg7, struct pt_regs regs)
-{
-	unsigned long esp = (unsigned int) regs.r12;
-	struct sigframe_ia32 __user *frame = (struct sigframe_ia32 __user *)(esp - 8);
-	sigset_t set;
-	int eax;
-
-	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
-		goto badframe;
-
-	if (__get_user(set.sig[0], &frame->sc.oldmask)
-	    || (_COMPAT_NSIG_WORDS > 1 && __copy_from_user((char *) &set.sig + 4, &frame->extramask,
-							 sizeof(frame->extramask))))
-		goto badframe;
-
-	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sighand->siglock);
-	current->blocked = set;
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
-
-	if (restore_sigcontext_ia32(&regs, &frame->sc, &eax))
-		goto badframe;
-	return eax;
-
-  badframe:
-	force_sig(SIGSEGV, current);
-	return 0;
-}
-
-asmlinkage long
-sys32_rt_sigreturn (int arg0, int arg1, int arg2, int arg3, int arg4,
-		    int arg5, int arg6, int arg7, struct pt_regs regs)
-{
-	unsigned long esp = (unsigned int) regs.r12;
-	struct rt_sigframe_ia32 __user *frame = (struct rt_sigframe_ia32 __user *)(esp - 4);
-	sigset_t set;
-	int eax;
-
-	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
-		goto badframe;
-	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
-		goto badframe;
-
-	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sighand->siglock);
-	current->blocked =  set;
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
-
-	if (restore_sigcontext_ia32(&regs, &frame->uc.uc_mcontext, &eax))
-		goto badframe;
-
-	/* It is more difficult to avoid calling this function than to
-	   call it and ignore errors.  */
-	do_sigaltstack((stack_t __user *) &frame->uc.uc_stack, NULL, esp);
-
-	return eax;
-
-  badframe:
-	force_sig(SIGSEGV, current);
-	return 0;
-}
diff --git a/arch/ia64/ia32/ia32_support.c b/arch/ia64/ia32/ia32_support.c
deleted file mode 100644
index c187743965a..00000000000
--- a/arch/ia64/ia32/ia32_support.c
+++ /dev/null
@@ -1,264 +0,0 @@
-/*
- * IA32 helper functions
- *
- * Copyright (C) 1999 Arun Sharma <arun.sharma@intel.com>
- * Copyright (C) 2000 Asit K. Mallick <asit.k.mallick@intel.com>
- * Copyright (C) 2001-2002 Hewlett-Packard Co
- *	David Mosberger-Tang <davidm@hpl.hp.com>
- *
- * 06/16/00	A. Mallick	added csd/ssd/tssd for ia32 thread context
- * 02/19/01	D. Mosberger	dropped tssd; it's not needed
- * 09/14/01	D. Mosberger	fixed memory management for gdt/tss page
- * 09/29/01	D. Mosberger	added ia32_load_segment_descriptors()
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/personality.h>
-#include <linux/sched.h>
-
-#include <asm/intrinsics.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
-#include <asm/system.h>
-#include <asm/processor.h>
-#include <asm/uaccess.h>
-
-#include "ia32priv.h"
-
-extern void die_if_kernel (char *str, struct pt_regs *regs, long err);
-
-struct exec_domain ia32_exec_domain;
-struct page *ia32_shared_page[NR_CPUS];
-unsigned long *ia32_boot_gdt;
-unsigned long *cpu_gdt_table[NR_CPUS];
-struct page *ia32_gate_page;
-
-static unsigned long
-load_desc (u16 selector)
-{
-	unsigned long *table, limit, index;
-
-	if (!selector)
-		return 0;
-	if (selector & IA32_SEGSEL_TI) {
-		table = (unsigned long *) IA32_LDT_OFFSET;
-		limit = IA32_LDT_ENTRIES;
-	} else {
-		table = cpu_gdt_table[smp_processor_id()];
-		limit = IA32_PAGE_SIZE / sizeof(ia32_boot_gdt[0]);
-	}
-	index = selector >> IA32_SEGSEL_INDEX_SHIFT;
-	if (index >= limit)
-		return 0;
-	return IA32_SEG_UNSCRAMBLE(table[index]);
-}
-
-void
-ia32_load_segment_descriptors (struct task_struct *task)
-{
-	struct pt_regs *regs = task_pt_regs(task);
-
-	/* Setup the segment descriptors */
-	regs->r24 = load_desc(regs->r16 >> 16);		/* ESD */
-	regs->r27 = load_desc(regs->r16 >>  0);		/* DSD */
-	regs->r28 = load_desc(regs->r16 >> 32);		/* FSD */
-	regs->r29 = load_desc(regs->r16 >> 48);		/* GSD */
-	regs->ar_csd = load_desc(regs->r17 >>  0);	/* CSD */
-	regs->ar_ssd = load_desc(regs->r17 >> 16);	/* SSD */
-}
-
-int
-ia32_clone_tls (struct task_struct *child, struct pt_regs *childregs)
-{
-	struct desc_struct *desc;
-	struct ia32_user_desc info;
-	int idx;
-
-	if (copy_from_user(&info, (void __user *)(childregs->r14 & 0xffffffff), sizeof(info)))
-		return -EFAULT;
-	if (LDT_empty(&info))
-		return -EINVAL;
-
-	idx = info.entry_number;
-	if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
-		return -EINVAL;
-
-	desc = child->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;
-	desc->a = LDT_entry_a(&info);
-	desc->b = LDT_entry_b(&info);
-
-	/* XXX: can this be done in a cleaner way ? */
-	load_TLS(&child->thread, smp_processor_id());
-	ia32_load_segment_descriptors(child);
-	load_TLS(&current->thread, smp_processor_id());
-
-	return 0;
-}
-
-void
-ia32_save_state (struct task_struct *t)
-{
-	t->thread.eflag = ia64_getreg(_IA64_REG_AR_EFLAG);
-	t->thread.fsr   = ia64_getreg(_IA64_REG_AR_FSR);
-	t->thread.fcr   = ia64_getreg(_IA64_REG_AR_FCR);
-	t->thread.fir   = ia64_getreg(_IA64_REG_AR_FIR);
-	t->thread.fdr   = ia64_getreg(_IA64_REG_AR_FDR);
-	ia64_set_kr(IA64_KR_IO_BASE, t->thread.old_iob);
-	ia64_set_kr(IA64_KR_TSSD, t->thread.old_k1);
-}
-
-void
-ia32_load_state (struct task_struct *t)
-{
-	unsigned long eflag, fsr, fcr, fir, fdr, tssd;
-	struct pt_regs *regs = task_pt_regs(t);
-
-	eflag = t->thread.eflag;
-	fsr = t->thread.fsr;
-	fcr = t->thread.fcr;
-	fir = t->thread.fir;
-	fdr = t->thread.fdr;
-	tssd = load_desc(_TSS);					/* TSSD */
-
-	ia64_setreg(_IA64_REG_AR_EFLAG, eflag);
-	ia64_setreg(_IA64_REG_AR_FSR, fsr);
-	ia64_setreg(_IA64_REG_AR_FCR, fcr);
-	ia64_setreg(_IA64_REG_AR_FIR, fir);
-	ia64_setreg(_IA64_REG_AR_FDR, fdr);
-	current->thread.old_iob = ia64_get_kr(IA64_KR_IO_BASE);
-	current->thread.old_k1 = ia64_get_kr(IA64_KR_TSSD);
-	ia64_set_kr(IA64_KR_IO_BASE, IA32_IOBASE);
-	ia64_set_kr(IA64_KR_TSSD, tssd);
-
-	regs->r17 = (_TSS << 48) | (_LDT << 32) | (__u32) regs->r17;
-	regs->r30 = load_desc(_LDT);				/* LDTD */
-	load_TLS(&t->thread, smp_processor_id());
-}
-
-/*
- * Setup IA32 GDT and TSS
- */
-void
-ia32_gdt_init (void)
-{
-	int cpu = smp_processor_id();
-
-	ia32_shared_page[cpu] = alloc_page(GFP_KERNEL);
-	if (!ia32_shared_page[cpu])
-		panic("failed to allocate ia32_shared_page[%d]\n", cpu);
-
-	cpu_gdt_table[cpu] = page_address(ia32_shared_page[cpu]);
-
-	/* Copy from the boot cpu's GDT */
-	memcpy(cpu_gdt_table[cpu], ia32_boot_gdt, PAGE_SIZE);
-}
-
-
-/*
- * Setup IA32 GDT and TSS
- */
-static void
-ia32_boot_gdt_init (void)
-{
-	unsigned long ldt_size;
-
-	ia32_shared_page[0] = alloc_page(GFP_KERNEL);
-	if (!ia32_shared_page[0])
-		panic("failed to allocate ia32_shared_page[0]\n");
-
-	ia32_boot_gdt = page_address(ia32_shared_page[0]);
-	cpu_gdt_table[0] = ia32_boot_gdt;
-
-	/* CS descriptor in IA-32 (scrambled) format */
-	ia32_boot_gdt[__USER_CS >> 3]
-		= IA32_SEG_DESCRIPTOR(0, (IA32_GATE_END-1) >> IA32_PAGE_SHIFT,
-				      0xb, 1, 3, 1, 1, 1, 1);
-
-	/* DS descriptor in IA-32 (scrambled) format */
-	ia32_boot_gdt[__USER_DS >> 3]
-		= IA32_SEG_DESCRIPTOR(0, (IA32_GATE_END-1) >> IA32_PAGE_SHIFT,
-				      0x3, 1, 3, 1, 1, 1, 1);
-
-	ldt_size = PAGE_ALIGN(IA32_LDT_ENTRIES*IA32_LDT_ENTRY_SIZE);
-	ia32_boot_gdt[TSS_ENTRY] = IA32_SEG_DESCRIPTOR(IA32_TSS_OFFSET, 235,
-						       0xb, 0, 3, 1, 1, 1, 0);
-	ia32_boot_gdt[LDT_ENTRY] = IA32_SEG_DESCRIPTOR(IA32_LDT_OFFSET, ldt_size - 1,
-						       0x2, 0, 3, 1, 1, 1, 0);
-}
-
-static void
-ia32_gate_page_init(void)
-{
-	unsigned long *sr;
-
-	ia32_gate_page = alloc_page(GFP_KERNEL);
-	sr = page_address(ia32_gate_page);
-	/* This is popl %eax ; movl $,%eax ; int $0x80 */
-	*sr++ = 0xb858 | (__IA32_NR_sigreturn << 16) | (0x80cdUL << 48);
-
-	/* This is movl $,%eax ; int $0x80 */
-	*sr = 0xb8 | (__IA32_NR_rt_sigreturn << 8) | (0x80cdUL << 40);
-}
-
-void
-ia32_mem_init(void)
-{
-	ia32_boot_gdt_init();
-	ia32_gate_page_init();
-}
-
-/*
- * Handle bad IA32 interrupt via syscall
- */
-void
-ia32_bad_interrupt (unsigned long int_num, struct pt_regs *regs)
-{
-	siginfo_t siginfo;
-
-	die_if_kernel("Bad IA-32 interrupt", regs, int_num);
-
-	siginfo.si_signo = SIGTRAP;
-	siginfo.si_errno = int_num;	/* XXX is it OK to abuse si_errno like this? */
-	siginfo.si_flags = 0;
-	siginfo.si_isr = 0;
-	siginfo.si_addr = NULL;
-	siginfo.si_imm = 0;
-	siginfo.si_code = TRAP_BRKPT;
-	force_sig_info(SIGTRAP, &siginfo, current);
-}
-
-void
-ia32_cpu_init (void)
-{
-	/* initialize global ia32 state - CR0 and CR4 */
-	ia64_setreg(_IA64_REG_AR_CFLAG, (((ulong) IA32_CR4 << 32) | IA32_CR0));
-}
-
-static int __init
-ia32_init (void)
-{
-	ia32_exec_domain.name = "Linux/x86";
-	ia32_exec_domain.handler = NULL;
-	ia32_exec_domain.pers_low = PER_LINUX32;
-	ia32_exec_domain.pers_high = PER_LINUX32;
-	ia32_exec_domain.signal_map = default_exec_domain.signal_map;
-	ia32_exec_domain.signal_invmap = default_exec_domain.signal_invmap;
-	register_exec_domain(&ia32_exec_domain);
-
-#if PAGE_SHIFT > IA32_PAGE_SHIFT
-	{
-		extern kmem_cache_t *partial_page_cachep;
-
-		partial_page_cachep = kmem_cache_create("partial_page_cache",
-							sizeof(struct partial_page), 0, 0,
-							NULL, NULL);
-		if (!partial_page_cachep)
-			panic("Cannot create partial page SLAB cache");
-	}
-#endif
-	return 0;
-}
-
-__initcall(ia32_init);
diff --git a/arch/ia64/ia32/ia32_traps.c b/arch/ia64/ia32/ia32_traps.c
deleted file mode 100644
index e486042672f..00000000000
--- a/arch/ia64/ia32/ia32_traps.c
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
- * IA-32 exception handlers
- *
- * Copyright (C) 2000 Asit K. Mallick <asit.k.mallick@intel.com>
- * Copyright (C) 2001-2002 Hewlett-Packard Co
- *	David Mosberger-Tang <davidm@hpl.hp.com>
- *
- * 06/16/00	A. Mallick	added siginfo for most cases (close to IA32)
- * 09/29/00	D. Mosberger	added ia32_intercept()
- */
-
-#include <linux/kernel.h>
-#include <linux/sched.h>
-
-#include "ia32priv.h"
-
-#include <asm/intrinsics.h>
-#include <asm/ptrace.h>
-
-int
-ia32_intercept (struct pt_regs *regs, unsigned long isr)
-{
-	switch ((isr >> 16) & 0xff) {
-	      case 0:	/* Instruction intercept fault */
-	      case 4:	/* Locked Data reference fault */
-	      case 1:	/* Gate intercept trap */
-		return -1;
-
-	      case 2:	/* System flag trap */
-		if (((isr >> 14) & 0x3) >= 2) {
-			/* MOV SS, POP SS instructions */
-			ia64_psr(regs)->id = 1;
-			return 0;
-		} else
-			return -1;
-	}
-	return -1;
-}
-
-int
-ia32_exception (struct pt_regs *regs, unsigned long isr)
-{
-	struct siginfo siginfo;
-
-	/* initialize these fields to avoid leaking kernel bits to user space: */
-	siginfo.si_errno = 0;
-	siginfo.si_flags = 0;
-	siginfo.si_isr = 0;
-	siginfo.si_imm = 0;
-	switch ((isr >> 16) & 0xff) {
-	      case 1:
-	      case 2:
-		siginfo.si_signo = SIGTRAP;
-		if (isr == 0)
-			siginfo.si_code = TRAP_TRACE;
-		else if (isr & 0x4)
-			siginfo.si_code = TRAP_BRANCH;
-		else
-			siginfo.si_code = TRAP_BRKPT;
-		break;
-
-	      case 3:
-		siginfo.si_signo = SIGTRAP;
-		siginfo.si_code = TRAP_BRKPT;
-		break;
-
-	      case 0:	/* Divide fault */
-		siginfo.si_signo = SIGFPE;
-		siginfo.si_code = FPE_INTDIV;
-		break;
-
-	      case 4:	/* Overflow */
-	      case 5:	/* Bounds fault */
-		siginfo.si_signo = SIGFPE;
-		siginfo.si_code = 0;
-		break;
-
-	      case 6:	/* Invalid Op-code */
-		siginfo.si_signo = SIGILL;
-		siginfo.si_code = ILL_ILLOPN;
-		break;
-
-	      case 7:	/* FP DNA */
-	      case 8:	/* Double Fault */
-	      case 9:	/* Invalid TSS */
-	      case 11:	/* Segment not present */
-	      case 12:	/* Stack fault */
-	      case 13:	/* General Protection Fault */
-		siginfo.si_signo = SIGSEGV;
-		siginfo.si_code = 0;
-		break;
-
-	      case 16:	/* Pending FP error */
-		{
-			unsigned long fsr, fcr;
-
-			fsr = ia64_getreg(_IA64_REG_AR_FSR);
-			fcr = ia64_getreg(_IA64_REG_AR_FCR);
-
-			siginfo.si_signo = SIGFPE;
-			/*
-			 * (~cwd & swd) will mask out exceptions that are not set to unmasked
-			 * status.  0x3f is the exception bits in these regs, 0x200 is the
-			 * C1 reg you need in case of a stack fault, 0x040 is the stack
-			 * fault bit.  We should only be taking one exception at a time,
-			 * so if this combination doesn't produce any single exception,
-			 * then we have a bad program that isn't synchronizing its FPU usage
-			 * and it will suffer the consequences since we won't be able to
-			 * fully reproduce the context of the exception
-			 */
-			siginfo.si_isr = isr;
-			siginfo.si_flags = __ISR_VALID;
-			switch(((~fcr) & (fsr & 0x3f)) | (fsr & 0x240)) {
-				case 0x000:
-				default:
-					siginfo.si_code = 0;
-					break;
-				case 0x001: /* Invalid Op */
-				case 0x040: /* Stack Fault */
-				case 0x240: /* Stack Fault | Direction */
-					siginfo.si_code = FPE_FLTINV;
-					break;
-				case 0x002: /* Denormalize */
-				case 0x010: /* Underflow */
-					siginfo.si_code = FPE_FLTUND;
-					break;
-				case 0x004: /* Zero Divide */
-					siginfo.si_code = FPE_FLTDIV;
-					break;
-				case 0x008: /* Overflow */
-					siginfo.si_code = FPE_FLTOVF;
-					break;
-				case 0x020: /* Precision */
-					siginfo.si_code = FPE_FLTRES;
-					break;
-			}
-
-			break;
-		}
-
-	      case 17:	/* Alignment check */
-		siginfo.si_signo = SIGSEGV;
-		siginfo.si_code = BUS_ADRALN;
-		break;
-
-	      case 19:	/* SSE Numeric error */
-		siginfo.si_signo = SIGFPE;
-		siginfo.si_code = 0;
-		break;
-
-	      default:
-		return -1;
-	}
-	force_sig_info(siginfo.si_signo, &siginfo, current);
-	return 0;
-}
diff --git a/arch/ia64/ia32/ia32priv.h b/arch/ia64/ia32/ia32priv.h
deleted file mode 100644
index ccb98ed48e5..00000000000
--- a/arch/ia64/ia32/ia32priv.h
+++ /dev/null
@@ -1,543 +0,0 @@
-#ifndef _ASM_IA64_IA32_PRIV_H
-#define _ASM_IA64_IA32_PRIV_H
-
-#include <linux/config.h>
-
-#include <asm/ia32.h>
-
-#ifdef CONFIG_IA32_SUPPORT
-
-#include <linux/binfmts.h>
-#include <linux/compat.h>
-#include <linux/rbtree.h>
-
-#include <asm/processor.h>
-
-/*
- * 32 bit structures for IA32 support.
- */
-
-#define IA32_PAGE_SIZE		(1UL << IA32_PAGE_SHIFT)
-#define IA32_PAGE_MASK		(~(IA32_PAGE_SIZE - 1))
-#define IA32_PAGE_ALIGN(addr)	(((addr) + IA32_PAGE_SIZE - 1) & IA32_PAGE_MASK)
-#define IA32_CLOCKS_PER_SEC	100	/* Cast in stone for IA32 Linux */
-
-/*
- * partially mapped pages provide precise accounting of which 4k sub pages
- * are mapped and which ones are not, thereby improving IA-32 compatibility.
- */
-struct partial_page {
-	struct partial_page	*next; /* linked list, sorted by address */
-	struct rb_node		pp_rb;
-	/* 64K is the largest "normal" page supported by ia64 ABI. So 4K*64
-	 * should suffice.*/
-	unsigned long		bitmap;
-	unsigned int		base;
-};
-
-struct partial_page_list {
-	struct partial_page	*pp_head; /* list head, points to the lowest
-					   * addressed partial page */
-	struct rb_root		ppl_rb;
-	struct partial_page	*pp_hint; /* pp_hint->next is the last
-					   * accessed partial page */
-	atomic_t		pp_count; /* reference count */
-};
-
-#if PAGE_SHIFT > IA32_PAGE_SHIFT
-struct partial_page_list* ia32_init_pp_list (void);
-#else
-# define ia32_init_pp_list()	0
-#endif
-
-/* sigcontext.h */
-/*
- * As documented in the iBCS2 standard..
- *
- * The first part of "struct _fpstate" is just the
- * normal i387 hardware setup, the extra "status"
- * word is used to save the coprocessor status word
- * before entering the handler.
- */
-struct _fpreg_ia32 {
-       unsigned short significand[4];
-       unsigned short exponent;
-};
-
-struct _fpxreg_ia32 {
-        unsigned short significand[4];
-        unsigned short exponent;
-        unsigned short padding[3];
-};
-
-struct _xmmreg_ia32 {
-        unsigned int element[4];
-};
-
-
-struct _fpstate_ia32 {
-       unsigned int    cw,
-		       sw,
-		       tag,
-		       ipoff,
-		       cssel,
-		       dataoff,
-		       datasel;
-       struct _fpreg_ia32      _st[8];
-       unsigned short  status;
-       unsigned short  magic;          /* 0xffff = regular FPU data only */
-
-       /* FXSR FPU environment */
-       unsigned int         _fxsr_env[6];   /* FXSR FPU env is ignored */
-       unsigned int         mxcsr;
-       unsigned int         reserved;
-       struct _fpxreg_ia32  _fxsr_st[8];    /* FXSR FPU reg data is ignored */
-       struct _xmmreg_ia32  _xmm[8];
-       unsigned int         padding[56];
-};
-
-struct sigcontext_ia32 {
-       unsigned short gs, __gsh;
-       unsigned short fs, __fsh;
-       unsigned short es, __esh;
-       unsigned short ds, __dsh;
-       unsigned int edi;
-       unsigned int esi;
-       unsigned int ebp;
-       unsigned int esp;
-       unsigned int ebx;
-       unsigned int edx;
-       unsigned int ecx;
-       unsigned int eax;
-       unsigned int trapno;
-       unsigned int err;
-       unsigned int eip;
-       unsigned short cs, __csh;
-       unsigned int eflags;
-       unsigned int esp_at_signal;
-       unsigned short ss, __ssh;
-       unsigned int fpstate;		/* really (struct _fpstate_ia32 *) */
-       unsigned int oldmask;
-       unsigned int cr2;
-};
-
-/* user.h */
-/*
- * IA32 (Pentium III/4) FXSR, SSE support
- *
- * Provide support for the GDB 5.0+ PTRACE_{GET|SET}FPXREGS requests for
- * interacting with the FXSR-format floating point environment.  Floating
- * point data can be accessed in the regular format in the usual manner,
- * and both the standard and SIMD floating point data can be accessed via
- * the new ptrace requests.  In either case, changes to the FPU environment
- * will be reflected in the task's state as expected.
- */
-struct ia32_user_i387_struct {
-	int	cwd;
-	int	swd;
-	int	twd;
-	int	fip;
-	int	fcs;
-	int	foo;
-	int	fos;
-	/* 8*10 bytes for each FP-reg = 80 bytes */
-	struct _fpreg_ia32 	st_space[8];
-};
-
-struct ia32_user_fxsr_struct {
-	unsigned short	cwd;
-	unsigned short	swd;
-	unsigned short	twd;
-	unsigned short	fop;
-	int	fip;
-	int	fcs;
-	int	foo;
-	int	fos;
-	int	mxcsr;
-	int	reserved;
-	int	st_space[32];	/* 8*16 bytes for each FP-reg = 128 bytes */
-	int	xmm_space[32];	/* 8*16 bytes for each XMM-reg = 128 bytes */
-	int	padding[56];
-};
-
-/* signal.h */
-#define IA32_SET_SA_HANDLER(ka,handler,restorer)				\
-				((ka)->sa.sa_handler = (__sighandler_t)		\
-					(((unsigned long)(restorer) << 32)	\
-					 | ((handler) & 0xffffffff)))
-#define IA32_SA_HANDLER(ka)	((unsigned long) (ka)->sa.sa_handler & 0xffffffff)
-#define IA32_SA_RESTORER(ka)	((unsigned long) (ka)->sa.sa_handler >> 32)
-
-#define __IA32_NR_sigreturn 119
-#define __IA32_NR_rt_sigreturn 173
-
-struct sigaction32 {
-       unsigned int sa_handler;		/* Really a pointer, but need to deal with 32 bits */
-       unsigned int sa_flags;
-       unsigned int sa_restorer;	/* Another 32 bit pointer */
-       compat_sigset_t sa_mask;		/* A 32 bit mask */
-};
-
-struct old_sigaction32 {
-       unsigned int  sa_handler;	/* Really a pointer, but need to deal
-					     with 32 bits */
-       compat_old_sigset_t sa_mask;		/* A 32 bit mask */
-       unsigned int sa_flags;
-       unsigned int sa_restorer;	/* Another 32 bit pointer */
-};
-
-typedef struct sigaltstack_ia32 {
-	unsigned int	ss_sp;
-	int		ss_flags;
-	unsigned int	ss_size;
-} stack_ia32_t;
-
-struct ucontext_ia32 {
-	unsigned int	  uc_flags;
-	unsigned int	  uc_link;
-	stack_ia32_t	  uc_stack;
-	struct sigcontext_ia32 uc_mcontext;
-	sigset_t	  uc_sigmask;	/* mask last for extensibility */
-};
-
-struct stat64 {
-	unsigned long long	st_dev;
-	unsigned char	__pad0[4];
-	unsigned int	__st_ino;
-	unsigned int	st_mode;
-	unsigned int	st_nlink;
-	unsigned int	st_uid;
-	unsigned int	st_gid;
-	unsigned long long	st_rdev;
-	unsigned char	__pad3[4];
-	unsigned int	st_size_lo;
-	unsigned int	st_size_hi;
-	unsigned int	st_blksize;
-	unsigned int	st_blocks;	/* Number 512-byte blocks allocated. */
-	unsigned int	__pad4;		/* future possible st_blocks high bits */
-	unsigned int	st_atime;
-	unsigned int	st_atime_nsec;
-	unsigned int	st_mtime;
-	unsigned int	st_mtime_nsec;
-	unsigned int	st_ctime;
-	unsigned int	st_ctime_nsec;
-	unsigned int	st_ino_lo;
-	unsigned int	st_ino_hi;
-};
-
-typedef struct compat_siginfo {
-	int si_signo;
-	int si_errno;
-	int si_code;
-
-	union {
-		int _pad[((128/sizeof(int)) - 3)];
-
-		/* kill() */
-		struct {
-			unsigned int _pid;	/* sender's pid */
-			unsigned int _uid;	/* sender's uid */
-		} _kill;
-
-		/* POSIX.1b timers */
-		struct {
-			compat_timer_t _tid;		/* timer id */
-			int _overrun;		/* overrun count */
-			char _pad[sizeof(unsigned int) - sizeof(int)];
-			compat_sigval_t _sigval;	/* same as below */
-			int _sys_private;       /* not to be passed to user */
-		} _timer;
-
-		/* POSIX.1b signals */
-		struct {
-			unsigned int _pid;	/* sender's pid */
-			unsigned int _uid;	/* sender's uid */
-			compat_sigval_t _sigval;
-		} _rt;
-
-		/* SIGCHLD */
-		struct {
-			unsigned int _pid;	/* which child */
-			unsigned int _uid;	/* sender's uid */
-			int _status;		/* exit code */
-			compat_clock_t _utime;
-			compat_clock_t _stime;
-		} _sigchld;
-
-		/* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
-		struct {
-			unsigned int _addr;	/* faulting insn/memory ref. */
-		} _sigfault;
-
-		/* SIGPOLL */
-		struct {
-			int _band;	/* POLL_IN, POLL_OUT, POLL_MSG */
-			int _fd;
-		} _sigpoll;
-	} _sifields;
-} compat_siginfo_t;
-
-struct old_linux32_dirent {
-	u32	d_ino;
-	u32	d_offset;
-	u16	d_namlen;
-	char	d_name[1];
-};
-
-/*
- * IA-32 ELF specific definitions for IA-64.
- */
-
-#define _ASM_IA64_ELF_H		/* Don't include elf.h */
-
-#include <linux/sched.h>
-#include <asm/processor.h>
-
-/*
- * This is used to ensure we don't load something for the wrong architecture.
- */
-#define elf_check_arch(x) ((x)->e_machine == EM_386)
-
-/*
- * These are used to set parameters in the core dumps.
- */
-#define ELF_CLASS	ELFCLASS32
-#define ELF_DATA	ELFDATA2LSB
-#define ELF_ARCH	EM_386
-
-#define IA32_STACK_TOP		IA32_PAGE_OFFSET
-#define IA32_GATE_OFFSET	IA32_PAGE_OFFSET
-#define IA32_GATE_END		IA32_PAGE_OFFSET + PAGE_SIZE
-
-/*
- * The system segments (GDT, TSS, LDT) have to be mapped below 4GB so the IA-32 engine can
- * access them.
- */
-#define IA32_GDT_OFFSET		(IA32_PAGE_OFFSET + PAGE_SIZE)
-#define IA32_TSS_OFFSET		(IA32_PAGE_OFFSET + 2*PAGE_SIZE)
-#define IA32_LDT_OFFSET		(IA32_PAGE_OFFSET + 3*PAGE_SIZE)
-
-#define ELF_EXEC_PAGESIZE	IA32_PAGE_SIZE
-
-/*
- * This is the location that an ET_DYN program is loaded if exec'ed.
- * Typical use of this is to invoke "./ld.so someprog" to test out a
- * new version of the loader.  We need to make sure that it is out of
- * the way of the program that it will "exec", and that there is
- * sufficient room for the brk.
- */
-#define ELF_ET_DYN_BASE		(IA32_PAGE_OFFSET/3 + 0x1000000)
-
-void ia64_elf32_init(struct pt_regs *regs);
-#define ELF_PLAT_INIT(_r, load_addr)	ia64_elf32_init(_r)
-
-#define elf_addr_t	u32
-
-/* This macro yields a bitmask that programs can use to figure out
-   what instruction set this CPU supports.  */
-#define ELF_HWCAP	0
-
-/* This macro yields a string that ld.so will use to load
-   implementation specific libraries for optimization.  Not terribly
-   relevant until we have real hardware to play with... */
-#define ELF_PLATFORM	NULL
-
-#ifdef __KERNEL__
-# define SET_PERSONALITY(EX,IBCS2)				\
-	(current->personality = (IBCS2) ? PER_SVR4 : PER_LINUX)
-#endif
-
-#define IA32_EFLAG	0x200
-
-/*
- * IA-32 ELF specific definitions for IA-64.
- */
-
-#define __USER_CS      0x23
-#define __USER_DS      0x2B
-
-/*
- * The per-cpu GDT has 32 entries: see <asm-i386/segment.h>
- */
-#define GDT_ENTRIES 32
-
-#define GDT_SIZE	(GDT_ENTRIES * 8)
-
-#define TSS_ENTRY 14
-#define LDT_ENTRY	(TSS_ENTRY + 1)
-
-#define IA32_SEGSEL_RPL		(0x3 << 0)
-#define IA32_SEGSEL_TI		(0x1 << 2)
-#define IA32_SEGSEL_INDEX_SHIFT	3
-
-#define _TSS			((unsigned long) TSS_ENTRY << IA32_SEGSEL_INDEX_SHIFT)
-#define _LDT			((unsigned long) LDT_ENTRY << IA32_SEGSEL_INDEX_SHIFT)
-
-#define IA32_SEG_BASE		16
-#define IA32_SEG_TYPE		40
-#define IA32_SEG_SYS		44
-#define IA32_SEG_DPL		45
-#define IA32_SEG_P		47
-#define IA32_SEG_HIGH_LIMIT	48
-#define IA32_SEG_AVL		52
-#define IA32_SEG_DB		54
-#define IA32_SEG_G		55
-#define IA32_SEG_HIGH_BASE	56
-
-#define IA32_SEG_DESCRIPTOR(base, limit, segtype, nonsysseg, dpl, segpresent, avl, segdb, gran)	\
-	       (((limit) & 0xffff)								\
-		| (((unsigned long) (base) & 0xffffff) << IA32_SEG_BASE)			\
-		| ((unsigned long) (segtype) << IA32_SEG_TYPE)					\
-		| ((unsigned long) (nonsysseg) << IA32_SEG_SYS)					\
-		| ((unsigned long) (dpl) << IA32_SEG_DPL)					\
-		| ((unsigned long) (segpresent) << IA32_SEG_P)					\
-		| ((((unsigned long) (limit) >> 16) & 0xf) << IA32_SEG_HIGH_LIMIT)		\
-		| ((unsigned long) (avl) << IA32_SEG_AVL)					\
-		| ((unsigned long) (segdb) << IA32_SEG_DB)					\
-		| ((unsigned long) (gran) << IA32_SEG_G)					\
-		| ((((unsigned long) (base) >> 24) & 0xff) << IA32_SEG_HIGH_BASE))
-
-#define SEG_LIM		32
-#define SEG_TYPE	52
-#define SEG_SYS		56
-#define SEG_DPL		57
-#define SEG_P		59
-#define SEG_AVL		60
-#define SEG_DB		62
-#define SEG_G		63
-
-/* Unscramble an IA-32 segment descriptor into the IA-64 format.  */
-#define IA32_SEG_UNSCRAMBLE(sd)									 \
-	(   (((sd) >> IA32_SEG_BASE) & 0xffffff) | ((((sd) >> IA32_SEG_HIGH_BASE) & 0xff) << 24) \
-	 | ((((sd) & 0xffff) | ((((sd) >> IA32_SEG_HIGH_LIMIT) & 0xf) << 16)) << SEG_LIM)	 \
-	 | ((((sd) >> IA32_SEG_TYPE) & 0xf) << SEG_TYPE)					 \
-	 | ((((sd) >> IA32_SEG_SYS) & 0x1) << SEG_SYS)						 \
-	 | ((((sd) >> IA32_SEG_DPL) & 0x3) << SEG_DPL)						 \
-	 | ((((sd) >> IA32_SEG_P) & 0x1) << SEG_P)						 \
-	 | ((((sd) >> IA32_SEG_AVL) & 0x1) << SEG_AVL)						 \
-	 | ((((sd) >> IA32_SEG_DB) & 0x1) << SEG_DB)						 \
-	 | ((((sd) >> IA32_SEG_G) & 0x1) << SEG_G))
-
-#define IA32_IOBASE	0x2000000000000000UL /* Virtual address for I/O space */
-
-#define IA32_CR0	0x80000001	/* Enable PG and PE bits */
-#define IA32_CR4	0x600		/* MMXEX and FXSR on */
-
-/*
- *  IA32 floating point control registers starting values
- */
-
-#define IA32_FSR_DEFAULT	0x55550000		/* set all tag bits */
-#define IA32_FCR_DEFAULT	0x17800000037fUL	/* extended precision, all masks */
-
-#define IA32_PTRACE_GETREGS	12
-#define IA32_PTRACE_SETREGS	13
-#define IA32_PTRACE_GETFPREGS	14
-#define IA32_PTRACE_SETFPREGS	15
-#define IA32_PTRACE_GETFPXREGS	18
-#define IA32_PTRACE_SETFPXREGS	19
-
-#define ia32_start_thread(regs,new_ip,new_sp) do {				\
-	set_fs(USER_DS);							\
-	ia64_psr(regs)->cpl = 3;	/* set user mode */			\
-	ia64_psr(regs)->ri = 0;		/* clear return slot number */		\
-	ia64_psr(regs)->is = 1;		/* IA-32 instruction set */		\
-	regs->cr_iip = new_ip;							\
-	regs->ar_rsc = 0xc;		/* enforced lazy mode, priv. level 3 */	\
-	regs->ar_rnat = 0;							\
-	regs->loadrs = 0;							\
-	regs->r12 = new_sp;							\
-} while (0)
-
-/*
- * Local Descriptor Table (LDT) related declarations.
- */
-
-#define IA32_LDT_ENTRIES	8192		/* Maximum number of LDT entries supported. */
-#define IA32_LDT_ENTRY_SIZE	8		/* The size of each LDT entry. */
-
-#define LDT_entry_a(info) \
-	((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff))
-
-#define LDT_entry_b(info)				\
-	(((info)->base_addr & 0xff000000) |		\
-	(((info)->base_addr & 0x00ff0000) >> 16) |	\
-	((info)->limit & 0xf0000) |			\
-	(((info)->read_exec_only ^ 1) << 9) |		\
-	((info)->contents << 10) |			\
-	(((info)->seg_not_present ^ 1) << 15) |		\
-	((info)->seg_32bit << 22) |			\
-	((info)->limit_in_pages << 23) |		\
-	((info)->useable << 20) |			\
-	0x7100)
-
-#define LDT_empty(info) (			\
-	(info)->base_addr	== 0	&&	\
-	(info)->limit		== 0	&&	\
-	(info)->contents	== 0	&&	\
-	(info)->read_exec_only	== 1	&&	\
-	(info)->seg_32bit	== 0	&&	\
-	(info)->limit_in_pages	== 0	&&	\
-	(info)->seg_not_present	== 1	&&	\
-	(info)->useable		== 0	)
-
-static inline void
-load_TLS (struct thread_struct *t, unsigned int cpu)
-{
-	extern unsigned long *cpu_gdt_table[NR_CPUS];
-
-	memcpy(cpu_gdt_table[cpu] + GDT_ENTRY_TLS_MIN + 0, &t->tls_array[0], sizeof(long));
-	memcpy(cpu_gdt_table[cpu] + GDT_ENTRY_TLS_MIN + 1, &t->tls_array[1], sizeof(long));
-	memcpy(cpu_gdt_table[cpu] + GDT_ENTRY_TLS_MIN + 2, &t->tls_array[2], sizeof(long));
-}
-
-struct ia32_user_desc {
-	unsigned int entry_number;
-	unsigned int base_addr;
-	unsigned int limit;
-	unsigned int seg_32bit:1;
-	unsigned int contents:2;
-	unsigned int read_exec_only:1;
-	unsigned int limit_in_pages:1;
-	unsigned int seg_not_present:1;
-	unsigned int useable:1;
-};
-
-struct linux_binprm;
-
-extern void ia32_init_addr_space (struct pt_regs *regs);
-extern int ia32_setup_arg_pages (struct linux_binprm *bprm, int exec_stack);
-extern unsigned long ia32_do_mmap (struct file *, unsigned long, unsigned long, int, int, loff_t);
-extern void ia32_load_segment_descriptors (struct task_struct *task);
-
-#define ia32f2ia64f(dst,src)			\
-do {						\
-	ia64_ldfe(6,src);			\
-	ia64_stop();				\
-	ia64_stf_spill(dst, 6);			\
-} while(0)
-
-#define ia64f2ia32f(dst,src)			\
-do {						\
-	ia64_ldf_fill(6, src);			\
-	ia64_stop();				\
-	ia64_stfe(dst, 6);			\
-} while(0)
-
-struct user_regs_struct32 {
-	__u32 ebx, ecx, edx, esi, edi, ebp, eax;
-	unsigned short ds, __ds, es, __es;
-	unsigned short fs, __fs, gs, __gs;
-	__u32 orig_eax, eip;
-	unsigned short cs, __cs;
-	__u32 eflags, esp;
-	unsigned short ss, __ss;
-};
-
-/* Prototypes for use in elfcore32.h */
-extern int save_ia32_fpstate (struct task_struct *, struct ia32_user_i387_struct __user *);
-extern int save_ia32_fpxstate (struct task_struct *, struct ia32_user_fxsr_struct __user *);
-
-#endif /* !CONFIG_IA32_SUPPORT */
-
-#endif /* _ASM_IA64_IA32_PRIV_H */
diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c
deleted file mode 100644
index 5366b3b23d0..00000000000
--- a/arch/ia64/ia32/sys_ia32.c
+++ /dev/null
@@ -1,2593 +0,0 @@
-/*
- * sys_ia32.c: Conversion between 32bit and 64bit native syscalls. Derived from sys_sparc32.c.
- *
- * Copyright (C) 2000		VA Linux Co
- * Copyright (C) 2000		Don Dugger <n0ano@valinux.com>
- * Copyright (C) 1999		Arun Sharma <arun.sharma@intel.com>
- * Copyright (C) 1997,1998	Jakub Jelinek (jj@sunsite.mff.cuni.cz)
- * Copyright (C) 1997		David S. Miller (davem@caip.rutgers.edu)
- * Copyright (C) 2000-2003, 2005 Hewlett-Packard Co
- *	David Mosberger-Tang <davidm@hpl.hp.com>
- * Copyright (C) 2004		Gordon Jin <gordon.jin@intel.com>
- *
- * These routines maintain argument size conversion between 32bit and 64bit
- * environment.
- */
-
-#include <linux/config.h>
-#include <linux/kernel.h>
-#include <linux/syscalls.h>
-#include <linux/sysctl.h>
-#include <linux/sched.h>
-#include <linux/fs.h>
-#include <linux/file.h>
-#include <linux/signal.h>
-#include <linux/resource.h>
-#include <linux/times.h>
-#include <linux/utsname.h>
-#include <linux/smp.h>
-#include <linux/smp_lock.h>
-#include <linux/sem.h>
-#include <linux/msg.h>
-#include <linux/mm.h>
-#include <linux/shm.h>
-#include <linux/slab.h>
-#include <linux/uio.h>
-#include <linux/nfs_fs.h>
-#include <linux/quota.h>
-#include <linux/syscalls.h>
-#include <linux/sunrpc/svc.h>
-#include <linux/nfsd/nfsd.h>
-#include <linux/nfsd/cache.h>
-#include <linux/nfsd/xdr.h>
-#include <linux/nfsd/syscall.h>
-#include <linux/poll.h>
-#include <linux/eventpoll.h>
-#include <linux/personality.h>
-#include <linux/ptrace.h>
-#include <linux/stat.h>
-#include <linux/ipc.h>
-#include <linux/capability.h>
-#include <linux/compat.h>
-#include <linux/vfs.h>
-#include <linux/mman.h>
-#include <linux/mutex.h>
-
-#include <asm/intrinsics.h>
-#include <asm/types.h>
-#include <asm/uaccess.h>
-#include <asm/unistd.h>
-
-#include "ia32priv.h"
-
-#include <net/scm.h>
-#include <net/sock.h>
-
-#define DEBUG	0
-
-#if DEBUG
-# define DBG(fmt...)	printk(KERN_DEBUG fmt)
-#else
-# define DBG(fmt...)
-#endif
-
-#define ROUND_UP(x,a)	((__typeof__(x))(((unsigned long)(x) + ((a) - 1)) & ~((a) - 1)))
-
-#define OFFSET4K(a)		((a) & 0xfff)
-#define PAGE_START(addr)	((addr) & PAGE_MASK)
-#define MINSIGSTKSZ_IA32	2048
-
-#define high2lowuid(uid) ((uid) > 65535 ? 65534 : (uid))
-#define high2lowgid(gid) ((gid) > 65535 ? 65534 : (gid))
-
-/*
- * Anything that modifies or inspects ia32 user virtual memory must hold this semaphore
- * while doing so.
- */
-/* XXX make per-mm: */
-static DEFINE_MUTEX(ia32_mmap_mutex);
-
-asmlinkage long
-sys32_execve (char __user *name, compat_uptr_t __user *argv, compat_uptr_t __user *envp,
-	      struct pt_regs *regs)
-{
-	long error;
-	char *filename;
-	unsigned long old_map_base, old_task_size, tssd;
-
-	filename = getname(name);
-	error = PTR_ERR(filename);
-	if (IS_ERR(filename))
-		return error;
-
-	old_map_base  = current->thread.map_base;
-	old_task_size = current->thread.task_size;
-	tssd = ia64_get_kr(IA64_KR_TSSD);
-
-	/* we may be exec'ing a 64-bit process: reset map base, task-size, and io-base: */
-	current->thread.map_base  = DEFAULT_MAP_BASE;
-	current->thread.task_size = DEFAULT_TASK_SIZE;
-	ia64_set_kr(IA64_KR_IO_BASE, current->thread.old_iob);
-	ia64_set_kr(IA64_KR_TSSD, current->thread.old_k1);
-
-	error = compat_do_execve(filename, argv, envp, regs);
-	putname(filename);
-
-	if (error < 0) {
-		/* oops, execve failed, switch back to old values... */
-		ia64_set_kr(IA64_KR_IO_BASE, IA32_IOBASE);
-		ia64_set_kr(IA64_KR_TSSD, tssd);
-		current->thread.map_base  = old_map_base;
-		current->thread.task_size = old_task_size;
-	}
-
-	return error;
-}
-
-int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf)
-{
-	int err;
-
-	if ((u64) stat->size > MAX_NON_LFS ||
-	    !old_valid_dev(stat->dev) ||
-	    !old_valid_dev(stat->rdev))
-		return -EOVERFLOW;
-
-	if (clear_user(ubuf, sizeof(*ubuf)))
-		return -EFAULT;
-
-	err  = __put_user(old_encode_dev(stat->dev), &ubuf->st_dev);
-	err |= __put_user(stat->ino, &ubuf->st_ino);
-	err |= __put_user(stat->mode, &ubuf->st_mode);
-	err |= __put_user(stat->nlink, &ubuf->st_nlink);
-	err |= __put_user(high2lowuid(stat->uid), &ubuf->st_uid);
-	err |= __put_user(high2lowgid(stat->gid), &ubuf->st_gid);
-	err |= __put_user(old_encode_dev(stat->rdev), &ubuf->st_rdev);
-	err |= __put_user(stat->size, &ubuf->st_size);
-	err |= __put_user(stat->atime.tv_sec, &ubuf->st_atime);
-	err |= __put_user(stat->atime.tv_nsec, &ubuf->st_atime_nsec);
-	err |= __put_user(stat->mtime.tv_sec, &ubuf->st_mtime);
-	err |= __put_user(stat->mtime.tv_nsec, &ubuf->st_mtime_nsec);
-	err |= __put_user(stat->ctime.tv_sec, &ubuf->st_ctime);
-	err |= __put_user(stat->ctime.tv_nsec, &ubuf->st_ctime_nsec);
-	err |= __put_user(stat->blksize, &ubuf->st_blksize);
-	err |= __put_user(stat->blocks, &ubuf->st_blocks);
-	return err;
-}
-
-#if PAGE_SHIFT > IA32_PAGE_SHIFT
-
-
-static int
-get_page_prot (struct vm_area_struct *vma, unsigned long addr)
-{
-	int prot = 0;
-
-	if (!vma || vma->vm_start > addr)
-		return 0;
-
-	if (vma->vm_flags & VM_READ)
-		prot |= PROT_READ;
-	if (vma->vm_flags & VM_WRITE)
-		prot |= PROT_WRITE;
-	if (vma->vm_flags & VM_EXEC)
-		prot |= PROT_EXEC;
-	return prot;
-}
-
-/*
- * Map a subpage by creating an anonymous page that contains the union of the old page and
- * the subpage.
- */
-static unsigned long
-mmap_subpage (struct file *file, unsigned long start, unsigned long end, int prot, int flags,
-	      loff_t off)
-{
-	void *page = NULL;
-	struct inode *inode;
-	unsigned long ret = 0;
-	struct vm_area_struct *vma = find_vma(current->mm, start);
-	int old_prot = get_page_prot(vma, start);
-
-	DBG("mmap_subpage(file=%p,start=0x%lx,end=0x%lx,prot=%x,flags=%x,off=0x%llx)\n",
-	    file, start, end, prot, flags, off);
-
-
-	/* Optimize the case where the old mmap and the new mmap are both anonymous */
-	if ((old_prot & PROT_WRITE) && (flags & MAP_ANONYMOUS) && !vma->vm_file) {
-		if (clear_user((void __user *) start, end - start)) {
-			ret = -EFAULT;
-			goto out;
-		}
-		goto skip_mmap;
-	}
-
-	page = (void *) get_zeroed_page(GFP_KERNEL);
-	if (!page)
-		return -ENOMEM;
-
-	if (old_prot)
-		copy_from_user(page, (void __user *) PAGE_START(start), PAGE_SIZE);
-
-	down_write(&current->mm->mmap_sem);
-	{
-		ret = do_mmap(NULL, PAGE_START(start), PAGE_SIZE, prot | PROT_WRITE,
-			      flags | MAP_FIXED | MAP_ANONYMOUS, 0);
-	}
-	up_write(&current->mm->mmap_sem);
-
-	if (IS_ERR((void *) ret))
-		goto out;
-
-	if (old_prot) {
-		/* copy back the old page contents.  */
-		if (offset_in_page(start))
-			copy_to_user((void __user *) PAGE_START(start), page,
-				     offset_in_page(start));
-		if (offset_in_page(end))
-			copy_to_user((void __user *) end, page + offset_in_page(end),
-				     PAGE_SIZE - offset_in_page(end));
-	}
-
-	if (!(flags & MAP_ANONYMOUS)) {
-		/* read the file contents */
-		inode = file->f_dentry->d_inode;
-		if (!inode->i_fop || !file->f_op->read
-		    || ((*file->f_op->read)(file, (char __user *) start, end - start, &off) < 0))
-		{
-			ret = -EINVAL;
-			goto out;
-		}
-	}
-
- skip_mmap:
-	if (!(prot & PROT_WRITE))
-		ret = sys_mprotect(PAGE_START(start), PAGE_SIZE, prot | old_prot);
-  out:
-	if (page)
-		free_page((unsigned long) page);
-	return ret;
-}
-
-/* SLAB cache for partial_page structures */
-kmem_cache_t *partial_page_cachep;
-
-/*
- * init partial_page_list.
- * return 0 means kmalloc fail.
- */
-struct partial_page_list*
-ia32_init_pp_list(void)
-{
-	struct partial_page_list *p;
-
-	if ((p = kmalloc(sizeof(*p), GFP_KERNEL)) == NULL)
-		return p;
-	p->pp_head = NULL;
-	p->ppl_rb = RB_ROOT;
-	p->pp_hint = NULL;
-	atomic_set(&p->pp_count, 1);
-	return p;
-}
-
-/*
- * Search for the partial page with @start in partial page list @ppl.
- * If finds the partial page, return the found partial page.
- * Else, return 0 and provide @pprev, @rb_link, @rb_parent to
- * be used by later __ia32_insert_pp().
- */
-static struct partial_page *
-__ia32_find_pp(struct partial_page_list *ppl, unsigned int start,
-	struct partial_page **pprev, struct rb_node ***rb_link,
-	struct rb_node **rb_parent)
-{
-	struct partial_page *pp;
-	struct rb_node **__rb_link, *__rb_parent, *rb_prev;
-
-	pp = ppl->pp_hint;
-	if (pp && pp->base == start)
-		return pp;
-
-	__rb_link = &ppl->ppl_rb.rb_node;
-	rb_prev = __rb_parent = NULL;
-
-	while (*__rb_link) {
-		__rb_parent = *__rb_link;
-		pp = rb_entry(__rb_parent, struct partial_page, pp_rb);
-
-		if (pp->base == start) {
-			ppl->pp_hint = pp;
-			return pp;
-		} else if (pp->base < start) {
-			rb_prev = __rb_parent;
-			__rb_link = &__rb_parent->rb_right;
-		} else {
-			__rb_link = &__rb_parent->rb_left;
-		}
-	}
-
-	*rb_link = __rb_link;
-	*rb_parent = __rb_parent;
-	*pprev = NULL;
-	if (rb_prev)
-		*pprev = rb_entry(rb_prev, struct partial_page, pp_rb);
-	return NULL;
-}
-
-/*
- * insert @pp into @ppl.
- */
-static void
-__ia32_insert_pp(struct partial_page_list *ppl, struct partial_page *pp,
-	 struct partial_page *prev, struct rb_node **rb_link,
-	struct rb_node *rb_parent)
-{
-	/* link list */
-	if (prev) {
-		pp->next = prev->next;
-		prev->next = pp;
-	} else {
-		ppl->pp_head = pp;
-		if (rb_parent)
-			pp->next = rb_entry(rb_parent,
-				struct partial_page, pp_rb);
-		else
-			pp->next = NULL;
-	}
-
-	/* link rb */
-	rb_link_node(&pp->pp_rb, rb_parent, rb_link);
-	rb_insert_color(&pp->pp_rb, &ppl->ppl_rb);
-
-	ppl->pp_hint = pp;
-}
-
-/*
- * delete @pp from partial page list @ppl.
- */
-static void
-__ia32_delete_pp(struct partial_page_list *ppl, struct partial_page *pp,
-	struct partial_page *prev)
-{
-	if (prev) {
-		prev->next = pp->next;
-		if (ppl->pp_hint == pp)
-			ppl->pp_hint = prev;
-	} else {
-		ppl->pp_head = pp->next;
-		if (ppl->pp_hint == pp)
-			ppl->pp_hint = pp->next;
-	}
-	rb_erase(&pp->pp_rb, &ppl->ppl_rb);
-	kmem_cache_free(partial_page_cachep, pp);
-}
-
-static struct partial_page *
-__pp_prev(struct partial_page *pp)
-{
-	struct rb_node *prev = rb_prev(&pp->pp_rb);
-	if (prev)
-		return rb_entry(prev, struct partial_page, pp_rb);
-	else
-		return NULL;
-}
-
-/*
- * Delete partial pages with address between @start and @end.
- * @start and @end are page aligned.
- */
-static void
-__ia32_delete_pp_range(unsigned int start, unsigned int end)
-{
-	struct partial_page *pp, *prev;
-	struct rb_node **rb_link, *rb_parent;
-
-	if (start >= end)
-		return;
-
-	pp = __ia32_find_pp(current->thread.ppl, start, &prev,
-					&rb_link, &rb_parent);
-	if (pp)
-		prev = __pp_prev(pp);
-	else {
-		if (prev)
-			pp = prev->next;
-		else
-			pp = current->thread.ppl->pp_head;
-	}
-
-	while (pp && pp->base < end) {
-		struct partial_page *tmp = pp->next;
-		__ia32_delete_pp(current->thread.ppl, pp, prev);
-		pp = tmp;
-	}
-}
-
-/*
- * Set the range between @start and @end in bitmap.
- * @start and @end should be IA32 page aligned and in the same IA64 page.
- */
-static int
-__ia32_set_pp(unsigned int start, unsigned int end, int flags)
-{
-	struct partial_page *pp, *prev;
-	struct rb_node ** rb_link, *rb_parent;
-	unsigned int pstart, start_bit, end_bit, i;
-
-	pstart = PAGE_START(start);
-	start_bit = (start % PAGE_SIZE) / IA32_PAGE_SIZE;
-	end_bit = (end % PAGE_SIZE) / IA32_PAGE_SIZE;
-	if (end_bit == 0)
-		end_bit = PAGE_SIZE / IA32_PAGE_SIZE;
-	pp = __ia32_find_pp(current->thread.ppl, pstart, &prev,
-					&rb_link, &rb_parent);
-	if (pp) {
-		for (i = start_bit; i < end_bit; i++)
-			set_bit(i, &pp->bitmap);
-		/*
-		 * Check: if this partial page has been set to a full page,
-		 * then delete it.
-		 */
-		if (find_first_zero_bit(&pp->bitmap, sizeof(pp->bitmap)*8) >=
-				PAGE_SIZE/IA32_PAGE_SIZE) {
-			__ia32_delete_pp(current->thread.ppl, pp, __pp_prev(pp));
-		}
-		return 0;
-	}
-
-	/*
-	 * MAP_FIXED may lead to overlapping mmap.
-	 * In this case, the requested mmap area may already mmaped as a full
-	 * page. So check vma before adding a new partial page.
-	 */
-	if (flags & MAP_FIXED) {
-		struct vm_area_struct *vma = find_vma(current->mm, pstart);
-		if (vma && vma->vm_start <= pstart)
-			return 0;
-	}
-
-	/* new a partial_page */
-	pp = kmem_cache_alloc(partial_page_cachep, GFP_KERNEL);
-	if (!pp)
-		return -ENOMEM;
-	pp->base = pstart;
-	pp->bitmap = 0;
-	for (i=start_bit; i<end_bit; i++)
-		set_bit(i, &(pp->bitmap));
-	pp->next = NULL;
-	__ia32_insert_pp(current->thread.ppl, pp, prev, rb_link, rb_parent);
-	return 0;
-}
-
-/*
- * @start and @end should be IA32 page aligned, but don't need to be in the
- * same IA64 page. Split @start and @end to make sure they're in the same IA64
- * page, then call __ia32_set_pp().
- */
-static void
-ia32_set_pp(unsigned int start, unsigned int end, int flags)
-{
-	down_write(&current->mm->mmap_sem);
-	if (flags & MAP_FIXED) {
-		/*
-		 * MAP_FIXED may lead to overlapping mmap. When this happens,
-		 * a series of complete IA64 pages results in deletion of
-		 * old partial pages in that range.
-		 */
-		__ia32_delete_pp_range(PAGE_ALIGN(start), PAGE_START(end));
-	}
-
-	if (end < PAGE_ALIGN(start)) {
-		__ia32_set_pp(start, end, flags);
-	} else {
-		if (offset_in_page(start))
-			__ia32_set_pp(start, PAGE_ALIGN(start), flags);
-		if (offset_in_page(end))
-			__ia32_set_pp(PAGE_START(end), end, flags);
-	}
-	up_write(&current->mm->mmap_sem);
-}
-
-/*
- * Unset the range between @start and @end in bitmap.
- * @start and @end should be IA32 page aligned and in the same IA64 page.
- * After doing that, if the bitmap is 0, then free the page and return 1,
- * 	else return 0;
- * If not find the partial page in the list, then
- * 	If the vma exists, then the full page is set to a partial page;
- *	Else return -ENOMEM.
- */
-static int
-__ia32_unset_pp(unsigned int start, unsigned int end)
-{
-	struct partial_page *pp, *prev;
-	struct rb_node ** rb_link, *rb_parent;
-	unsigned int pstart, start_bit, end_bit, i;
-	struct vm_area_struct *vma;
-
-	pstart = PAGE_START(start);
-	start_bit = (start % PAGE_SIZE) / IA32_PAGE_SIZE;
-	end_bit = (end % PAGE_SIZE) / IA32_PAGE_SIZE;
-	if (end_bit == 0)
-		end_bit = PAGE_SIZE / IA32_PAGE_SIZE;
-
-	pp = __ia32_find_pp(current->thread.ppl, pstart, &prev,
-					&rb_link, &rb_parent);
-	if (pp) {
-		for (i = start_bit; i < end_bit; i++)
-			clear_bit(i, &pp->bitmap);
-		if (pp->bitmap == 0) {
-			__ia32_delete_pp(current->thread.ppl, pp, __pp_prev(pp));
-			return 1;
-		}
-		return 0;
-	}
-
-	vma = find_vma(current->mm, pstart);
-	if (!vma || vma->vm_start > pstart) {
-		return -ENOMEM;
-	}
-
-	/* new a partial_page */
-	pp = kmem_cache_alloc(partial_page_cachep, GFP_KERNEL);
-	if (!pp)
-		return -ENOMEM;
-	pp->base = pstart;
-	pp->bitmap = 0;
-	for (i = 0; i < start_bit; i++)
-		set_bit(i, &(pp->bitmap));
-	for (i = end_bit; i < PAGE_SIZE / IA32_PAGE_SIZE; i++)
-		set_bit(i, &(pp->bitmap));
-	pp->next = NULL;
-	__ia32_insert_pp(current->thread.ppl, pp, prev, rb_link, rb_parent);
-	return 0;
-}
-
-/*
- * Delete pp between PAGE_ALIGN(start) and PAGE_START(end) by calling
- * __ia32_delete_pp_range(). Unset possible partial pages by calling
- * __ia32_unset_pp().
- * The returned value see __ia32_unset_pp().
- */
-static int
-ia32_unset_pp(unsigned int *startp, unsigned int *endp)
-{
-	unsigned int start = *startp, end = *endp;
-	int ret = 0;
-
-	down_write(&current->mm->mmap_sem);
-
-	__ia32_delete_pp_range(PAGE_ALIGN(start), PAGE_START(end));
-
-	if (end < PAGE_ALIGN(start)) {
-		ret = __ia32_unset_pp(start, end);
-		if (ret == 1) {
-			*startp = PAGE_START(start);
-			*endp = PAGE_ALIGN(end);
-		}
-		if (ret == 0) {
-			/* to shortcut sys_munmap() in sys32_munmap() */
-			*startp = PAGE_START(start);
-			*endp = PAGE_START(end);
-		}
-	} else {
-		if (offset_in_page(start)) {
-			ret = __ia32_unset_pp(start, PAGE_ALIGN(start));
-			if (ret == 1)
-				*startp = PAGE_START(start);
-			if (ret == 0)
-				*startp = PAGE_ALIGN(start);
-			if (ret < 0)
-				goto out;
-		}
-		if (offset_in_page(end)) {
-			ret = __ia32_unset_pp(PAGE_START(end), end);
-			if (ret == 1)
-				*endp = PAGE_ALIGN(end);
-			if (ret == 0)
-				*endp = PAGE_START(end);
-		}
-	}
-
- out:
-	up_write(&current->mm->mmap_sem);
-	return ret;
-}
-
-/*
- * Compare the range between @start and @end with bitmap in partial page.
- * @start and @end should be IA32 page aligned and in the same IA64 page.
- */
-static int
-__ia32_compare_pp(unsigned int start, unsigned int end)
-{
-	struct partial_page *pp, *prev;
-	struct rb_node ** rb_link, *rb_parent;
-	unsigned int pstart, start_bit, end_bit, size;
-	unsigned int first_bit, next_zero_bit;	/* the first range in bitmap */
-
-	pstart = PAGE_START(start);
-
-	pp = __ia32_find_pp(current->thread.ppl, pstart, &prev,
-					&rb_link, &rb_parent);
-	if (!pp)
-		return 1;
-
-	start_bit = (start % PAGE_SIZE) / IA32_PAGE_SIZE;
-	end_bit = (end % PAGE_SIZE) / IA32_PAGE_SIZE;
-	size = sizeof(pp->bitmap) * 8;
-	first_bit = find_first_bit(&pp->bitmap, size);
-	next_zero_bit = find_next_zero_bit(&pp->bitmap, size, first_bit);
-	if ((start_bit < first_bit) || (end_bit > next_zero_bit)) {
-		/* exceeds the first range in bitmap */
-		return -ENOMEM;
-	} else if ((start_bit == first_bit) && (end_bit == next_zero_bit)) {
-		first_bit = find_next_bit(&pp->bitmap, size, next_zero_bit);
-		if ((next_zero_bit < first_bit) && (first_bit < size))
-			return 1;	/* has next range */
-		else
-			return 0; 	/* no next range */
-	} else
-		return 1;
-}
-
-/*
- * @start and @end should be IA32 page aligned, but don't need to be in the
- * same IA64 page. Split @start and @end to make sure they're in the same IA64
- * page, then call __ia32_compare_pp().
- *
- * Take this as example: the range is the 1st and 2nd 4K page.
- * Return 0 if they fit bitmap exactly, i.e. bitmap = 00000011;
- * Return 1 if the range doesn't cover whole bitmap, e.g. bitmap = 00001111;
- * Return -ENOMEM if the range exceeds the bitmap, e.g. bitmap = 00000001 or
- * 	bitmap = 00000101.
- */
-static int
-ia32_compare_pp(unsigned int *startp, unsigned int *endp)
-{
-	unsigned int start = *startp, end = *endp;
-	int retval = 0;
-
-	down_write(&current->mm->mmap_sem);
-
-	if (end < PAGE_ALIGN(start)) {
-		retval = __ia32_compare_pp(start, end);
-		if (retval == 0) {
-			*startp = PAGE_START(start);
-			*endp = PAGE_ALIGN(end);
-		}
-	} else {
-		if (offset_in_page(start)) {
-			retval = __ia32_compare_pp(start,
-						   PAGE_ALIGN(start));
-			if (retval == 0)
-				*startp = PAGE_START(start);
-			if (retval < 0)
-				goto out;
-		}
-		if (offset_in_page(end)) {
-			retval = __ia32_compare_pp(PAGE_START(end), end);
-			if (retval == 0)
-				*endp = PAGE_ALIGN(end);
-		}
-	}
-
- out:
-	up_write(&current->mm->mmap_sem);
-	return retval;
-}
-
-static void
-__ia32_drop_pp_list(struct partial_page_list *ppl)
-{
-	struct partial_page *pp = ppl->pp_head;
-
-	while (pp) {
-		struct partial_page *next = pp->next;
-		kmem_cache_free(partial_page_cachep, pp);
-		pp = next;
-	}
-
-	kfree(ppl);
-}
-
-void
-ia32_drop_partial_page_list(struct task_struct *task)
-{
-	struct partial_page_list* ppl = task->thread.ppl;
-
-	if (ppl && atomic_dec_and_test(&ppl->pp_count))
-		__ia32_drop_pp_list(ppl);
-}
-
-/*
- * Copy current->thread.ppl to ppl (already initialized).
- */
-static int
-__ia32_copy_pp_list(struct partial_page_list *ppl)
-{
-	struct partial_page *pp, *tmp, *prev;
-	struct rb_node **rb_link, *rb_parent;
-
-	ppl->pp_head = NULL;
-	ppl->pp_hint = NULL;
-	ppl->ppl_rb = RB_ROOT;
-	rb_link = &ppl->ppl_rb.rb_node;
-	rb_parent = NULL;
-	prev = NULL;
-
-	for (pp = current->thread.ppl->pp_head; pp; pp = pp->next) {
-		tmp = kmem_cache_alloc(partial_page_cachep, GFP_KERNEL);
-		if (!tmp)
-			return -ENOMEM;
-		*tmp = *pp;
-		__ia32_insert_pp(ppl, tmp, prev, rb_link, rb_parent);
-		prev = tmp;
-		rb_link = &tmp->pp_rb.rb_right;
-		rb_parent = &tmp->pp_rb;
-	}
-	return 0;
-}
-
-int
-ia32_copy_partial_page_list(struct task_struct *p, unsigned long clone_flags)
-{
-	int retval = 0;
-
-	if (clone_flags & CLONE_VM) {
-		atomic_inc(&current->thread.ppl->pp_count);
-		p->thread.ppl = current->thread.ppl;
-	} else {
-		p->thread.ppl = ia32_init_pp_list();
-		if (!p->thread.ppl)
-			return -ENOMEM;
-		down_write(&current->mm->mmap_sem);
-		{
-			retval = __ia32_copy_pp_list(p->thread.ppl);
-		}
-		up_write(&current->mm->mmap_sem);
-	}
-
-	return retval;
-}
-
-static unsigned long
-emulate_mmap (struct file *file, unsigned long start, unsigned long len, int prot, int flags,
-	      loff_t off)
-{
-	unsigned long tmp, end, pend, pstart, ret, is_congruent, fudge = 0;
-	struct inode *inode;
-	loff_t poff;
-
-	end = start + len;
-	pstart = PAGE_START(start);
-	pend = PAGE_ALIGN(end);
-
-	if (flags & MAP_FIXED) {
-		ia32_set_pp((unsigned int)start, (unsigned int)end, flags);
-		if (start > pstart) {
-			if (flags & MAP_SHARED)
-				printk(KERN_INFO
-				       "%s(%d): emulate_mmap() can't share head (addr=0x%lx)\n",
-				       current->comm, current->pid, start);
-			ret = mmap_subpage(file, start, min(PAGE_ALIGN(start), end), prot, flags,
-					   off);
-			if (IS_ERR((void *) ret))
-				return ret;
-			pstart += PAGE_SIZE;
-			if (pstart >= pend)
-				goto out;	/* done */
-		}
-		if (end < pend) {
-			if (flags & MAP_SHARED)
-				printk(KERN_INFO
-				       "%s(%d): emulate_mmap() can't share tail (end=0x%lx)\n",
-				       current->comm, current->pid, end);
-			ret = mmap_subpage(file, max(start, PAGE_START(end)), end, prot, flags,
-					   (off + len) - offset_in_page(end));
-			if (IS_ERR((void *) ret))
-				return ret;
-			pend -= PAGE_SIZE;
-			if (pstart >= pend)
-				goto out;	/* done */
-		}
-	} else {
-		/*
-		 * If a start address was specified, use it if the entire rounded out area
-		 * is available.
-		 */
-		if (start && !pstart)
-			fudge = 1;	/* handle case of mapping to range (0,PAGE_SIZE) */
-		tmp = arch_get_unmapped_area(file, pstart - fudge, pend - pstart, 0, flags);
-		if (tmp != pstart) {
-			pstart = tmp;
-			start = pstart + offset_in_page(off);	/* make start congruent with off */
-			end = start + len;
-			pend = PAGE_ALIGN(end);
-		}
-	}
-
-	poff = off + (pstart - start);	/* note: (pstart - start) may be negative */
-	is_congruent = (flags & MAP_ANONYMOUS) || (offset_in_page(poff) == 0);
-
-	if ((flags & MAP_SHARED) && !is_congruent)
-		printk(KERN_INFO "%s(%d): emulate_mmap() can't share contents of incongruent mmap "
-		       "(addr=0x%lx,off=0x%llx)\n", current->comm, current->pid, start, off);
-
-	DBG("mmap_body: mapping [0x%lx-0x%lx) %s with poff 0x%llx\n", pstart, pend,
-	    is_congruent ? "congruent" : "not congruent", poff);
-
-	down_write(&current->mm->mmap_sem);
-	{
-		if (!(flags & MAP_ANONYMOUS) && is_congruent)
-			ret = do_mmap(file, pstart, pend - pstart, prot, flags | MAP_FIXED, poff);
-		else
-			ret = do_mmap(NULL, pstart, pend - pstart,
-				      prot | ((flags & MAP_ANONYMOUS) ? 0 : PROT_WRITE),
-				      flags | MAP_FIXED | MAP_ANONYMOUS, 0);
-	}
-	up_write(&current->mm->mmap_sem);
-
-	if (IS_ERR((void *) ret))
-		return ret;
-
-	if (!is_congruent) {
-		/* read the file contents */
-		inode = file->f_dentry->d_inode;
-		if (!inode->i_fop || !file->f_op->read
-		    || ((*file->f_op->read)(file, (char __user *) pstart, pend - pstart, &poff)
-			< 0))
-		{
-			sys_munmap(pstart, pend - pstart);
-			return -EINVAL;
-		}
-		if (!(prot & PROT_WRITE) && sys_mprotect(pstart, pend - pstart, prot) < 0)
-			return -EINVAL;
-	}
-
-	if (!(flags & MAP_FIXED))
-		ia32_set_pp((unsigned int)start, (unsigned int)end, flags);
-out:
-	return start;
-}
-
-#endif /* PAGE_SHIFT > IA32_PAGE_SHIFT */
-
-static inline unsigned int
-get_prot32 (unsigned int prot)
-{
-	if (prot & PROT_WRITE)
-		/* on x86, PROT_WRITE implies PROT_READ which implies PROT_EEC */
-		prot |= PROT_READ | PROT_WRITE | PROT_EXEC;
-	else if (prot & (PROT_READ | PROT_EXEC))
-		/* on x86, there is no distinction between PROT_READ and PROT_EXEC */
-		prot |= (PROT_READ | PROT_EXEC);
-
-	return prot;
-}
-
-unsigned long
-ia32_do_mmap (struct file *file, unsigned long addr, unsigned long len, int prot, int flags,
-	      loff_t offset)
-{
-	DBG("ia32_do_mmap(file=%p,addr=0x%lx,len=0x%lx,prot=%x,flags=%x,offset=0x%llx)\n",
-	    file, addr, len, prot, flags, offset);
-
-	if (file && (!file->f_op || !file->f_op->mmap))
-		return -ENODEV;
-
-	len = IA32_PAGE_ALIGN(len);
-	if (len == 0)
-		return addr;
-
-	if (len > IA32_PAGE_OFFSET || addr > IA32_PAGE_OFFSET - len)
-	{
-		if (flags & MAP_FIXED)
-			return -ENOMEM;
-		else
-		return -EINVAL;
-	}
-
-	if (OFFSET4K(offset))
-		return -EINVAL;
-
-	prot = get_prot32(prot);
-
-#if PAGE_SHIFT > IA32_PAGE_SHIFT
-	mutex_lock(&ia32_mmap_mutex);
-	{
-		addr = emulate_mmap(file, addr, len, prot, flags, offset);
-	}
-	mutex_unlock(&ia32_mmap_mutex);
-#else
-	down_write(&current->mm->mmap_sem);
-	{
-		addr = do_mmap(file, addr, len, prot, flags, offset);
-	}
-	up_write(&current->mm->mmap_sem);
-#endif
-	DBG("ia32_do_mmap: returning 0x%lx\n", addr);
-	return addr;
-}
-
-/*
- * Linux/i386 didn't use to be able to handle more than 4 system call parameters, so these
- * system calls used a memory block for parameter passing..
- */
-
-struct mmap_arg_struct {
-	unsigned int addr;
-	unsigned int len;
-	unsigned int prot;
-	unsigned int flags;
-	unsigned int fd;
-	unsigned int offset;
-};
-
-asmlinkage long
-sys32_mmap (struct mmap_arg_struct __user *arg)
-{
-	struct mmap_arg_struct a;
-	struct file *file = NULL;
-	unsigned long addr;
-	int flags;
-
-	if (copy_from_user(&a, arg, sizeof(a)))
-		return -EFAULT;
-
-	if (OFFSET4K(a.offset))
-		return -EINVAL;
-
-	flags = a.flags;
-
-	flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
-	if (!(flags & MAP_ANONYMOUS)) {
-		file = fget(a.fd);
-		if (!file)
-			return -EBADF;
-	}
-
-	addr = ia32_do_mmap(file, a.addr, a.len, a.prot, flags, a.offset);
-
-	if (file)
-		fput(file);
-	return addr;
-}
-
-asmlinkage long
-sys32_mmap2 (unsigned int addr, unsigned int len, unsigned int prot, unsigned int flags,
-	     unsigned int fd, unsigned int pgoff)
-{
-	struct file *file = NULL;
-	unsigned long retval;
-
-	flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
-	if (!(flags & MAP_ANONYMOUS)) {
-		file = fget(fd);
-		if (!file)
-			return -EBADF;
-	}
-
-	retval = ia32_do_mmap(file, addr, len, prot, flags,
-			      (unsigned long) pgoff << IA32_PAGE_SHIFT);
-
-	if (file)
-		fput(file);
-	return retval;
-}
-
-asmlinkage long
-sys32_munmap (unsigned int start, unsigned int len)
-{
-	unsigned int end = start + len;
-	long ret;
-
-#if PAGE_SHIFT <= IA32_PAGE_SHIFT
-	ret = sys_munmap(start, end - start);
-#else
-	if (OFFSET4K(start))
-		return -EINVAL;
-
-	end = IA32_PAGE_ALIGN(end);
-	if (start >= end)
-		return -EINVAL;
-
-	ret = ia32_unset_pp(&start, &end);
-	if (ret < 0)
-		return ret;
-
-	if (start >= end)
-		return 0;
-
-	mutex_lock(&ia32_mmap_mutex);
-	ret = sys_munmap(start, end - start);
-	mutex_unlock(&ia32_mmap_mutex);
-#endif
-	return ret;
-}
-
-#if PAGE_SHIFT > IA32_PAGE_SHIFT
-
-/*
- * When mprotect()ing a partial page, we set the permission to the union of the old
- * settings and the new settings.  In other words, it's only possible to make access to a
- * partial page less restrictive.
- */
-static long
-mprotect_subpage (unsigned long address, int new_prot)
-{
-	int old_prot;
-	struct vm_area_struct *vma;
-
-	if (new_prot == PROT_NONE)
-		return 0;		/* optimize case where nothing changes... */
-	vma = find_vma(current->mm, address);
-	old_prot = get_page_prot(vma, address);
-	return sys_mprotect(address, PAGE_SIZE, new_prot | old_prot);
-}
-
-#endif /* PAGE_SHIFT > IA32_PAGE_SHIFT */
-
-asmlinkage long
-sys32_mprotect (unsigned int start, unsigned int len, int prot)
-{
-	unsigned int end = start + len;
-#if PAGE_SHIFT > IA32_PAGE_SHIFT
-	long retval = 0;
-#endif
-
-	prot = get_prot32(prot);
-
-#if PAGE_SHIFT <= IA32_PAGE_SHIFT
-	return sys_mprotect(start, end - start, prot);
-#else
-	if (OFFSET4K(start))
-		return -EINVAL;
-
-	end = IA32_PAGE_ALIGN(end);
-	if (end < start)
-		return -EINVAL;
-
-	retval = ia32_compare_pp(&start, &end);
-
-	if (retval < 0)
-		return retval;
-
-	mutex_lock(&ia32_mmap_mutex);
-	{
-		if (offset_in_page(start)) {
-			/* start address is 4KB aligned but not page aligned. */
-			retval = mprotect_subpage(PAGE_START(start), prot);
-			if (retval < 0)
-				goto out;
-
-			start = PAGE_ALIGN(start);
-			if (start >= end)
-				goto out;	/* retval is already zero... */
-		}
-
-		if (offset_in_page(end)) {
-			/* end address is 4KB aligned but not page aligned. */
-			retval = mprotect_subpage(PAGE_START(end), prot);
-			if (retval < 0)
-				goto out;
-
-			end = PAGE_START(end);
-		}
-		retval = sys_mprotect(start, end - start, prot);
-	}
-  out:
-	mutex_unlock(&ia32_mmap_mutex);
-	return retval;
-#endif
-}
-
-asmlinkage long
-sys32_mremap (unsigned int addr, unsigned int old_len, unsigned int new_len,
-		unsigned int flags, unsigned int new_addr)
-{
-	long ret;
-
-#if PAGE_SHIFT <= IA32_PAGE_SHIFT
-	ret = sys_mremap(addr, old_len, new_len, flags, new_addr);
-#else
-	unsigned int old_end, new_end;
-
-	if (OFFSET4K(addr))
-		return -EINVAL;
-
-	old_len = IA32_PAGE_ALIGN(old_len);
-	new_len = IA32_PAGE_ALIGN(new_len);
-	old_end = addr + old_len;
-	new_end = addr + new_len;
-
-	if (!new_len)
-		return -EINVAL;
-
-	if ((flags & MREMAP_FIXED) && (OFFSET4K(new_addr)))
-		return -EINVAL;
-
-	if (old_len >= new_len) {
-		ret = sys32_munmap(addr + new_len, old_len - new_len);
-		if (ret && old_len != new_len)
-			return ret;
-		ret = addr;
-		if (!(flags & MREMAP_FIXED) || (new_addr == addr))
-			return ret;
-		old_len = new_len;
-	}
-
-	addr = PAGE_START(addr);
-	old_len = PAGE_ALIGN(old_end) - addr;
-	new_len = PAGE_ALIGN(new_end) - addr;
-
-	mutex_lock(&ia32_mmap_mutex);
-	ret = sys_mremap(addr, old_len, new_len, flags, new_addr);
-	mutex_unlock(&ia32_mmap_mutex);
-
-	if ((ret >= 0) && (old_len < new_len)) {
-		/* mremap expanded successfully */
-		ia32_set_pp(old_end, new_end, flags);
-	}
-#endif
-	return ret;
-}
-
-asmlinkage long
-sys32_pipe (int __user *fd)
-{
-	int retval;
-	int fds[2];
-
-	retval = do_pipe(fds);
-	if (retval)
-		goto out;
-	if (copy_to_user(fd, fds, sizeof(fds)))
-		retval = -EFAULT;
-  out:
-	return retval;
-}
-
-static inline long
-get_tv32 (struct timeval *o, struct compat_timeval __user *i)
-{
-	return (!access_ok(VERIFY_READ, i, sizeof(*i)) ||
-		(__get_user(o->tv_sec, &i->tv_sec) | __get_user(o->tv_usec, &i->tv_usec)));
-}
-
-static inline long
-put_tv32 (struct compat_timeval __user *o, struct timeval *i)
-{
-	return (!access_ok(VERIFY_WRITE, o, sizeof(*o)) ||
-		(__put_user(i->tv_sec, &o->tv_sec) | __put_user(i->tv_usec, &o->tv_usec)));
-}
-
-asmlinkage unsigned long
-sys32_alarm (unsigned int seconds)
-{
-	return alarm_setitimer(seconds);
-}
-
-/* Translations due to time_t size differences.  Which affects all
-   sorts of things, like timeval and itimerval.  */
-
-extern struct timezone sys_tz;
-
-asmlinkage long
-sys32_gettimeofday (struct compat_timeval __user *tv, struct timezone __user *tz)
-{
-	if (tv) {
-		struct timeval ktv;
-		do_gettimeofday(&ktv);
-		if (put_tv32(tv, &ktv))
-			return -EFAULT;
-	}
-	if (tz) {
-		if (copy_to_user(tz, &sys_tz, sizeof(sys_tz)))
-			return -EFAULT;
-	}
-	return 0;
-}
-
-asmlinkage long
-sys32_settimeofday (struct compat_timeval __user *tv, struct timezone __user *tz)
-{
-	struct timeval ktv;
-	struct timespec kts;
-	struct timezone ktz;
-
-	if (tv) {
-		if (get_tv32(&ktv, tv))
-			return -EFAULT;
-		kts.tv_sec = ktv.tv_sec;
-		kts.tv_nsec = ktv.tv_usec * 1000;
-	}
-	if (tz) {
-		if (copy_from_user(&ktz, tz, sizeof(ktz)))
-			return -EFAULT;
-	}
-
-	return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL);
-}
-
-struct getdents32_callback {
-	struct compat_dirent __user *current_dir;
-	struct compat_dirent __user *previous;
-	int count;
-	int error;
-};
-
-struct readdir32_callback {
-	struct old_linux32_dirent __user * dirent;
-	int count;
-};
-
-static int
-filldir32 (void *__buf, const char *name, int namlen, loff_t offset, ino_t ino,
-	   unsigned int d_type)
-{
-	struct compat_dirent __user * dirent;
-	struct getdents32_callback * buf = (struct getdents32_callback *) __buf;
-	int reclen = ROUND_UP(offsetof(struct compat_dirent, d_name) + namlen + 1, 4);
-
-	buf->error = -EINVAL;	/* only used if we fail.. */
-	if (reclen > buf->count)
-		return -EINVAL;
-	buf->error = -EFAULT;	/* only used if we fail.. */
-	dirent = buf->previous;
-	if (dirent)
-		if (put_user(offset, &dirent->d_off))
-			return -EFAULT;
-	dirent = buf->current_dir;
-	buf->previous = dirent;
-	if (put_user(ino, &dirent->d_ino)
-	    || put_user(reclen, &dirent->d_reclen)
-	    || copy_to_user(dirent->d_name, name, namlen)
-	    || put_user(0, dirent->d_name + namlen))
-		return -EFAULT;
-	dirent = (struct compat_dirent __user *) ((char __user *) dirent + reclen);
-	buf->current_dir = dirent;
-	buf->count -= reclen;
-	return 0;
-}
-
-asmlinkage long
-sys32_getdents (unsigned int fd, struct compat_dirent __user *dirent, unsigned int count)
-{
-	struct file * file;
-	struct compat_dirent __user * lastdirent;
-	struct getdents32_callback buf;
-	int error;
-
-	error = -EBADF;
-	file = fget(fd);
-	if (!file)
-		goto out;
-
-	buf.current_dir = dirent;
-	buf.previous = NULL;
-	buf.count = count;
-	buf.error = 0;
-
-	error = vfs_readdir(file, filldir32, &buf);
-	if (error < 0)
-		goto out_putf;
-	error = buf.error;
-	lastdirent = buf.previous;
-	if (lastdirent) {
-		error = -EINVAL;
-		if (put_user(file->f_pos, &lastdirent->d_off))
-			goto out_putf;
-		error = count - buf.count;
-	}
-
-out_putf:
-	fput(file);
-out:
-	return error;
-}
-
-static int
-fillonedir32 (void * __buf, const char * name, int namlen, loff_t offset, ino_t ino,
-	      unsigned int d_type)
-{
-	struct readdir32_callback * buf = (struct readdir32_callback *) __buf;
-	struct old_linux32_dirent __user * dirent;
-
-	if (buf->count)
-		return -EINVAL;
-	buf->count++;
-	dirent = buf->dirent;
-	if (put_user(ino, &dirent->d_ino)
-	    || put_user(offset, &dirent->d_offset)
-	    || put_user(namlen, &dirent->d_namlen)
-	    || copy_to_user(dirent->d_name, name, namlen)
-	    || put_user(0, dirent->d_name + namlen))
-		return -EFAULT;
-	return 0;
-}
-
-asmlinkage long
-sys32_readdir (unsigned int fd, void __user *dirent, unsigned int count)
-{
-	int error;
-	struct file * file;
-	struct readdir32_callback buf;
-
-	error = -EBADF;
-	file = fget(fd);
-	if (!file)
-		goto out;
-
-	buf.count = 0;
-	buf.dirent = dirent;
-
-	error = vfs_readdir(file, fillonedir32, &buf);
-	if (error >= 0)
-		error = buf.count;
-	fput(file);
-out:
-	return error;
-}
-
-struct sel_arg_struct {
-	unsigned int n;
-	unsigned int inp;
-	unsigned int outp;
-	unsigned int exp;
-	unsigned int tvp;
-};
-
-asmlinkage long
-sys32_old_select (struct sel_arg_struct __user *arg)
-{
-	struct sel_arg_struct a;
-
-	if (copy_from_user(&a, arg, sizeof(a)))
-		return -EFAULT;
-	return compat_sys_select(a.n, compat_ptr(a.inp), compat_ptr(a.outp),
-				 compat_ptr(a.exp), compat_ptr(a.tvp));
-}
-
-#define SEMOP		 1
-#define SEMGET		 2
-#define SEMCTL		 3
-#define SEMTIMEDOP	 4
-#define MSGSND		11
-#define MSGRCV		12
-#define MSGGET		13
-#define MSGCTL		14
-#define SHMAT		21
-#define SHMDT		22
-#define SHMGET		23
-#define SHMCTL		24
-
-asmlinkage long
-sys32_ipc(u32 call, int first, int second, int third, u32 ptr, u32 fifth)
-{
-	int version;
-
-	version = call >> 16; /* hack for backward compatibility */
-	call &= 0xffff;
-
-	switch (call) {
-	      case SEMTIMEDOP:
-		if (fifth)
-			return compat_sys_semtimedop(first, compat_ptr(ptr),
-				second, compat_ptr(fifth));
-		/* else fall through for normal semop() */
-	      case SEMOP:
-		/* struct sembuf is the same on 32 and 64bit :)) */
-		return sys_semtimedop(first, compat_ptr(ptr), second,
-				      NULL);
-	      case SEMGET:
-		return sys_semget(first, second, third);
-	      case SEMCTL:
-		return compat_sys_semctl(first, second, third, compat_ptr(ptr));
-
-	      case MSGSND:
-		return compat_sys_msgsnd(first, second, third, compat_ptr(ptr));
-	      case MSGRCV:
-		return compat_sys_msgrcv(first, second, fifth, third, version, compat_ptr(ptr));
-	      case MSGGET:
-		return sys_msgget((key_t) first, second);
-	      case MSGCTL:
-		return compat_sys_msgctl(first, second, compat_ptr(ptr));
-
-	      case SHMAT:
-		return compat_sys_shmat(first, second, third, version, compat_ptr(ptr));
-		break;
-	      case SHMDT:
-		return sys_shmdt(compat_ptr(ptr));
-	      case SHMGET:
-		return sys_shmget(first, (unsigned)second, third);
-	      case SHMCTL:
-		return compat_sys_shmctl(first, second, compat_ptr(ptr));
-
-	      default:
-		return -ENOSYS;
-	}
-	return -EINVAL;
-}
-
-asmlinkage long
-compat_sys_wait4 (compat_pid_t pid, compat_uint_t * stat_addr, int options,
-		 struct compat_rusage *ru);
-
-asmlinkage long
-sys32_waitpid (int pid, unsigned int *stat_addr, int options)
-{
-	return compat_sys_wait4(pid, stat_addr, options, NULL);
-}
-
-static unsigned int
-ia32_peek (struct task_struct *child, unsigned long addr, unsigned int *val)
-{
-	size_t copied;
-	unsigned int ret;
-
-	copied = access_process_vm(child, addr, val, sizeof(*val), 0);
-	return (copied != sizeof(ret)) ? -EIO : 0;
-}
-
-static unsigned int
-ia32_poke (struct task_struct *child, unsigned long addr, unsigned int val)
-{
-
-	if (access_process_vm(child, addr, &val, sizeof(val), 1) != sizeof(val))
-		return -EIO;
-	return 0;
-}
-
-/*
- *  The order in which registers are stored in the ptrace regs structure
- */
-#define PT_EBX	0
-#define PT_ECX	1
-#define PT_EDX	2
-#define PT_ESI	3
-#define PT_EDI	4
-#define PT_EBP	5
-#define PT_EAX	6
-#define PT_DS	7
-#define PT_ES	8
-#define PT_FS	9
-#define PT_GS	10
-#define PT_ORIG_EAX 11
-#define PT_EIP	12
-#define PT_CS	13
-#define PT_EFL	14
-#define PT_UESP	15
-#define PT_SS	16
-
-static unsigned int
-getreg (struct task_struct *child, int regno)
-{
-	struct pt_regs *child_regs;
-
-	child_regs = task_pt_regs(child);
-	switch (regno / sizeof(int)) {
-	      case PT_EBX: return child_regs->r11;
-	      case PT_ECX: return child_regs->r9;
-	      case PT_EDX: return child_regs->r10;
-	      case PT_ESI: return child_regs->r14;
-	      case PT_EDI: return child_regs->r15;
-	      case PT_EBP: return child_regs->r13;
-	      case PT_EAX: return child_regs->r8;
-	      case PT_ORIG_EAX: return child_regs->r1; /* see dispatch_to_ia32_handler() */
-	      case PT_EIP: return child_regs->cr_iip;
-	      case PT_UESP: return child_regs->r12;
-	      case PT_EFL: return child->thread.eflag;
-	      case PT_DS: case PT_ES: case PT_FS: case PT_GS: case PT_SS:
-		return __USER_DS;
-	      case PT_CS: return __USER_CS;
-	      default:
-		printk(KERN_ERR "ia32.getreg(): unknown register %d\n", regno);
-		break;
-	}
-	return 0;
-}
-
-static void
-putreg (struct task_struct *child, int regno, unsigned int value)
-{
-	struct pt_regs *child_regs;
-
-	child_regs = task_pt_regs(child);
-	switch (regno / sizeof(int)) {
-	      case PT_EBX: child_regs->r11 = value; break;
-	      case PT_ECX: child_regs->r9 = value; break;
-	      case PT_EDX: child_regs->r10 = value; break;
-	      case PT_ESI: child_regs->r14 = value; break;
-	      case PT_EDI: child_regs->r15 = value; break;
-	      case PT_EBP: child_regs->r13 = value; break;
-	      case PT_EAX: child_regs->r8 = value; break;
-	      case PT_ORIG_EAX: child_regs->r1 = value; break;
-	      case PT_EIP: child_regs->cr_iip = value; break;
-	      case PT_UESP: child_regs->r12 = value; break;
-	      case PT_EFL: child->thread.eflag = value; break;
-	      case PT_DS: case PT_ES: case PT_FS: case PT_GS: case PT_SS:
-		if (value != __USER_DS)
-			printk(KERN_ERR
-			       "ia32.putreg: attempt to set invalid segment register %d = %x\n",
-			       regno, value);
-		break;
-	      case PT_CS:
-		if (value != __USER_CS)
-			printk(KERN_ERR
-			       "ia32.putreg: attempt to to set invalid segment register %d = %x\n",
-			       regno, value);
-		break;
-	      default:
-		printk(KERN_ERR "ia32.putreg: unknown register %d\n", regno);
-		break;
-	}
-}
-
-static void
-put_fpreg (int regno, struct _fpreg_ia32 __user *reg, struct pt_regs *ptp,
-	   struct switch_stack *swp, int tos)
-{
-	struct _fpreg_ia32 *f;
-	char buf[32];
-
-	f = (struct _fpreg_ia32 *)(((unsigned long)buf + 15) & ~15);
-	if ((regno += tos) >= 8)
-		regno -= 8;
-	switch (regno) {
-	      case 0:
-		ia64f2ia32f(f, &ptp->f8);
-		break;
-	      case 1:
-		ia64f2ia32f(f, &ptp->f9);
-		break;
-	      case 2:
-		ia64f2ia32f(f, &ptp->f10);
-		break;
-	      case 3:
-		ia64f2ia32f(f, &ptp->f11);
-		break;
-	      case 4:
-	      case 5:
-	      case 6:
-	      case 7:
-		ia64f2ia32f(f, &swp->f12 + (regno - 4));
-		break;
-	}
-	copy_to_user(reg, f, sizeof(*reg));
-}
-
-static void
-get_fpreg (int regno, struct _fpreg_ia32 __user *reg, struct pt_regs *ptp,
-	   struct switch_stack *swp, int tos)
-{
-
-	if ((regno += tos) >= 8)
-		regno -= 8;
-	switch (regno) {
-	      case 0:
-		copy_from_user(&ptp->f8, reg, sizeof(*reg));
-		break;
-	      case 1:
-		copy_from_user(&ptp->f9, reg, sizeof(*reg));
-		break;
-	      case 2:
-		copy_from_user(&ptp->f10, reg, sizeof(*reg));
-		break;
-	      case 3:
-		copy_from_user(&ptp->f11, reg, sizeof(*reg));
-		break;
-	      case 4:
-	      case 5:
-	      case 6:
-	      case 7:
-		copy_from_user(&swp->f12 + (regno - 4), reg, sizeof(*reg));
-		break;
-	}
-	return;
-}
-
-int
-save_ia32_fpstate (struct task_struct *tsk, struct ia32_user_i387_struct __user *save)
-{
-	struct switch_stack *swp;
-	struct pt_regs *ptp;
-	int i, tos;
-
-	if (!access_ok(VERIFY_WRITE, save, sizeof(*save)))
-		return -EFAULT;
-
-	__put_user(tsk->thread.fcr & 0xffff, &save->cwd);
-	__put_user(tsk->thread.fsr & 0xffff, &save->swd);
-	__put_user((tsk->thread.fsr>>16) & 0xffff, &save->twd);
-	__put_user(tsk->thread.fir, &save->fip);
-	__put_user((tsk->thread.fir>>32) & 0xffff, &save->fcs);
-	__put_user(tsk->thread.fdr, &save->foo);
-	__put_user((tsk->thread.fdr>>32) & 0xffff, &save->fos);
-
-	/*
-	 *  Stack frames start with 16-bytes of temp space
-	 */
-	swp = (struct switch_stack *)(tsk->thread.ksp + 16);
-	ptp = task_pt_regs(tsk);
-	tos = (tsk->thread.fsr >> 11) & 7;
-	for (i = 0; i < 8; i++)
-		put_fpreg(i, &save->st_space[i], ptp, swp, tos);
-	return 0;
-}
-
-static int
-restore_ia32_fpstate (struct task_struct *tsk, struct ia32_user_i387_struct __user *save)
-{
-	struct switch_stack *swp;
-	struct pt_regs *ptp;
-	int i, tos;
-	unsigned int fsrlo, fsrhi, num32;
-
-	if (!access_ok(VERIFY_READ, save, sizeof(*save)))
-		return(-EFAULT);
-
-	__get_user(num32, (unsigned int __user *)&save->cwd);
-	tsk->thread.fcr = (tsk->thread.fcr & (~0x1f3f)) | (num32 & 0x1f3f);
-	__get_user(fsrlo, (unsigned int __user *)&save->swd);
-	__get_user(fsrhi, (unsigned int __user *)&save->twd);
-	num32 = (fsrhi << 16) | fsrlo;
-	tsk->thread.fsr = (tsk->thread.fsr & (~0xffffffff)) | num32;
-	__get_user(num32, (unsigned int __user *)&save->fip);
-	tsk->thread.fir = (tsk->thread.fir & (~0xffffffff)) | num32;
-	__get_user(num32, (unsigned int __user *)&save->foo);
-	tsk->thread.fdr = (tsk->thread.fdr & (~0xffffffff)) | num32;
-
-	/*
-	 *  Stack frames start with 16-bytes of temp space
-	 */
-	swp = (struct switch_stack *)(tsk->thread.ksp + 16);
-	ptp = task_pt_regs(tsk);
-	tos = (tsk->thread.fsr >> 11) & 7;
-	for (i = 0; i < 8; i++)
-		get_fpreg(i, &save->st_space[i], ptp, swp, tos);
-	return 0;
-}
-
-int
-save_ia32_fpxstate (struct task_struct *tsk, struct ia32_user_fxsr_struct __user *save)
-{
-	struct switch_stack *swp;
-	struct pt_regs *ptp;
-	int i, tos;
-	unsigned long mxcsr=0;
-	unsigned long num128[2];
-
-	if (!access_ok(VERIFY_WRITE, save, sizeof(*save)))
-		return -EFAULT;
-
-	__put_user(tsk->thread.fcr & 0xffff, &save->cwd);
-	__put_user(tsk->thread.fsr & 0xffff, &save->swd);
-	__put_user((tsk->thread.fsr>>16) & 0xffff, &save->twd);
-	__put_user(tsk->thread.fir, &save->fip);
-	__put_user((tsk->thread.fir>>32) & 0xffff, &save->fcs);
-	__put_user(tsk->thread.fdr, &save->foo);
-	__put_user((tsk->thread.fdr>>32) & 0xffff, &save->fos);
-
-        /*
-         *  Stack frames start with 16-bytes of temp space
-         */
-        swp = (struct switch_stack *)(tsk->thread.ksp + 16);
-        ptp = task_pt_regs(tsk);
-	tos = (tsk->thread.fsr >> 11) & 7;
-        for (i = 0; i < 8; i++)
-		put_fpreg(i, (struct _fpreg_ia32 __user *)&save->st_space[4*i], ptp, swp, tos);
-
-	mxcsr = ((tsk->thread.fcr>>32) & 0xff80) | ((tsk->thread.fsr>>32) & 0x3f);
-	__put_user(mxcsr & 0xffff, &save->mxcsr);
-	for (i = 0; i < 8; i++) {
-		memcpy(&(num128[0]), &(swp->f16) + i*2, sizeof(unsigned long));
-		memcpy(&(num128[1]), &(swp->f17) + i*2, sizeof(unsigned long));
-		copy_to_user(&save->xmm_space[0] + 4*i, num128, sizeof(struct _xmmreg_ia32));
-	}
-	return 0;
-}
-
-static int
-restore_ia32_fpxstate (struct task_struct *tsk, struct ia32_user_fxsr_struct __user *save)
-{
-	struct switch_stack *swp;
-	struct pt_regs *ptp;
-	int i, tos;
-	unsigned int fsrlo, fsrhi, num32;
-	int mxcsr;
-	unsigned long num64;
-	unsigned long num128[2];
-
-	if (!access_ok(VERIFY_READ, save, sizeof(*save)))
-		return(-EFAULT);
-
-	__get_user(num32, (unsigned int __user *)&save->cwd);
-	tsk->thread.fcr = (tsk->thread.fcr & (~0x1f3f)) | (num32 & 0x1f3f);
-	__get_user(fsrlo, (unsigned int __user *)&save->swd);
-	__get_user(fsrhi, (unsigned int __user *)&save->twd);
-	num32 = (fsrhi << 16) | fsrlo;
-	tsk->thread.fsr = (tsk->thread.fsr & (~0xffffffff)) | num32;
-	__get_user(num32, (unsigned int __user *)&save->fip);
-	tsk->thread.fir = (tsk->thread.fir & (~0xffffffff)) | num32;
-	__get_user(num32, (unsigned int __user *)&save->foo);
-	tsk->thread.fdr = (tsk->thread.fdr & (~0xffffffff)) | num32;
-
-	/*
-	 *  Stack frames start with 16-bytes of temp space
-	 */
-	swp = (struct switch_stack *)(tsk->thread.ksp + 16);
-	ptp = task_pt_regs(tsk);
-	tos = (tsk->thread.fsr >> 11) & 7;
-	for (i = 0; i < 8; i++)
-	get_fpreg(i, (struct _fpreg_ia32 __user *)&save->st_space[4*i], ptp, swp, tos);
-
-	__get_user(mxcsr, (unsigned int __user *)&save->mxcsr);
-	num64 = mxcsr & 0xff10;
-	tsk->thread.fcr = (tsk->thread.fcr & (~0xff1000000000UL)) | (num64<<32);
-	num64 = mxcsr & 0x3f;
-	tsk->thread.fsr = (tsk->thread.fsr & (~0x3f00000000UL)) | (num64<<32);
-
-	for (i = 0; i < 8; i++) {
-		copy_from_user(num128, &save->xmm_space[0] + 4*i, sizeof(struct _xmmreg_ia32));
-		memcpy(&(swp->f16) + i*2, &(num128[0]), sizeof(unsigned long));
-		memcpy(&(swp->f17) + i*2, &(num128[1]), sizeof(unsigned long));
-	}
-	return 0;
-}
-
-asmlinkage long
-sys32_ptrace (int request, pid_t pid, unsigned int addr, unsigned int data)
-{
-	struct task_struct *child;
-	unsigned int value, tmp;
-	long i, ret;
-
-	lock_kernel();
-	if (request == PTRACE_TRACEME) {
-		ret = ptrace_traceme();
-		goto out;
-	}
-
-	child = ptrace_get_task_struct(pid);
-	if (IS_ERR(child)) {
-		ret = PTR_ERR(child);
-		goto out;
-	}
-
-	if (request == PTRACE_ATTACH) {
-		ret = sys_ptrace(request, pid, addr, data);
-		goto out_tsk;
-	}
-
-	ret = ptrace_check_attach(child, request == PTRACE_KILL);
-	if (ret < 0)
-		goto out_tsk;
-
-	switch (request) {
-	      case PTRACE_PEEKTEXT:
-	      case PTRACE_PEEKDATA:	/* read word at location addr */
-		ret = ia32_peek(child, addr, &value);
-		if (ret == 0)
-			ret = put_user(value, (unsigned int __user *) compat_ptr(data));
-		else
-			ret = -EIO;
-		goto out_tsk;
-
-	      case PTRACE_POKETEXT:
-	      case PTRACE_POKEDATA:	/* write the word at location addr */
-		ret = ia32_poke(child, addr, data);
-		goto out_tsk;
-
-	      case PTRACE_PEEKUSR:	/* read word at addr in USER area */
-		ret = -EIO;
-		if ((addr & 3) || addr > 17*sizeof(int))
-			break;
-
-		tmp = getreg(child, addr);
-		if (!put_user(tmp, (unsigned int __user *) compat_ptr(data)))
-			ret = 0;
-		break;
-
-	      case PTRACE_POKEUSR:	/* write word at addr in USER area */
-		ret = -EIO;
-		if ((addr & 3) || addr > 17*sizeof(int))
-			break;
-
-		putreg(child, addr, data);
-		ret = 0;
-		break;
-
-	      case IA32_PTRACE_GETREGS:
-		if (!access_ok(VERIFY_WRITE, compat_ptr(data), 17*sizeof(int))) {
-			ret = -EIO;
-			break;
-		}
-		for (i = 0; i < (int) (17*sizeof(int)); i += sizeof(int) ) {
-			put_user(getreg(child, i), (unsigned int __user *) compat_ptr(data));
-			data += sizeof(int);
-		}
-		ret = 0;
-		break;
-
-	      case IA32_PTRACE_SETREGS:
-		if (!access_ok(VERIFY_READ, compat_ptr(data), 17*sizeof(int))) {
-			ret = -EIO;
-			break;
-		}
-		for (i = 0; i < (int) (17*sizeof(int)); i += sizeof(int) ) {
-			get_user(tmp, (unsigned int __user *) compat_ptr(data));
-			putreg(child, i, tmp);
-			data += sizeof(int);
-		}
-		ret = 0;
-		break;
-
-	      case IA32_PTRACE_GETFPREGS:
-		ret = save_ia32_fpstate(child, (struct ia32_user_i387_struct __user *)
-					compat_ptr(data));
-		break;
-
-	      case IA32_PTRACE_GETFPXREGS:
-		ret = save_ia32_fpxstate(child, (struct ia32_user_fxsr_struct __user *)
-					 compat_ptr(data));
-		break;
-
-	      case IA32_PTRACE_SETFPREGS:
-		ret = restore_ia32_fpstate(child, (struct ia32_user_i387_struct __user *)
-					   compat_ptr(data));
-		break;
-
-	      case IA32_PTRACE_SETFPXREGS:
-		ret = restore_ia32_fpxstate(child, (struct ia32_user_fxsr_struct __user *)
-					    compat_ptr(data));
-		break;
-
-	      case PTRACE_GETEVENTMSG:   
-		ret = put_user(child->ptrace_message, (unsigned int __user *) compat_ptr(data));
-		break;
-
-	      case PTRACE_SYSCALL:	/* continue, stop after next syscall */
-	      case PTRACE_CONT:		/* restart after signal. */
-	      case PTRACE_KILL:
-	      case PTRACE_SINGLESTEP:	/* execute chile for one instruction */
-	      case PTRACE_DETACH:	/* detach a process */
-		ret = sys_ptrace(request, pid, addr, data);
-		break;
-
-	      default:
-		ret = ptrace_request(child, request, addr, data);
-		break;
-
-	}
-  out_tsk:
-	put_task_struct(child);
-  out:
-	unlock_kernel();
-	return ret;
-}
-
-typedef struct {
-	unsigned int	ss_sp;
-	unsigned int	ss_flags;
-	unsigned int	ss_size;
-} ia32_stack_t;
-
-asmlinkage long
-sys32_sigaltstack (ia32_stack_t __user *uss32, ia32_stack_t __user *uoss32,
-		   long arg2, long arg3, long arg4, long arg5, long arg6,
-		   long arg7, struct pt_regs pt)
-{
-	stack_t uss, uoss;
-	ia32_stack_t buf32;
-	int ret;
-	mm_segment_t old_fs = get_fs();
-
-	if (uss32) {
-		if (copy_from_user(&buf32, uss32, sizeof(ia32_stack_t)))
-			return -EFAULT;
-		uss.ss_sp = (void __user *) (long) buf32.ss_sp;
-		uss.ss_flags = buf32.ss_flags;
-		/* MINSIGSTKSZ is different for ia32 vs ia64. We lie here to pass the
-	           check and set it to the user requested value later */
-		if ((buf32.ss_flags != SS_DISABLE) && (buf32.ss_size < MINSIGSTKSZ_IA32)) {
-			ret = -ENOMEM;
-			goto out;
-		}
-		uss.ss_size = MINSIGSTKSZ;
-	}
-	set_fs(KERNEL_DS);
-	ret = do_sigaltstack(uss32 ? (stack_t __user *) &uss : NULL,
-			     (stack_t __user *) &uoss, pt.r12);
- 	current->sas_ss_size = buf32.ss_size;
-	set_fs(old_fs);
-out:
-	if (ret < 0)
-		return(ret);
-	if (uoss32) {
-		buf32.ss_sp = (long __user) uoss.ss_sp;
-		buf32.ss_flags = uoss.ss_flags;
-		buf32.ss_size = uoss.ss_size;
-		if (copy_to_user(uoss32, &buf32, sizeof(ia32_stack_t)))
-			return -EFAULT;
-	}
-	return ret;
-}
-
-asmlinkage int
-sys32_pause (void)
-{
-	current->state = TASK_INTERRUPTIBLE;
-	schedule();
-	return -ERESTARTNOHAND;
-}
-
-asmlinkage int
-sys32_msync (unsigned int start, unsigned int len, int flags)
-{
-	unsigned int addr;
-
-	if (OFFSET4K(start))
-		return -EINVAL;
-	addr = PAGE_START(start);
-	return sys_msync(addr, len + (start - addr), flags);
-}
-
-struct sysctl32 {
-	unsigned int	name;
-	int		nlen;
-	unsigned int	oldval;
-	unsigned int	oldlenp;
-	unsigned int	newval;
-	unsigned int	newlen;
-	unsigned int	__unused[4];
-};
-
-#ifdef CONFIG_SYSCTL
-asmlinkage long
-sys32_sysctl (struct sysctl32 __user *args)
-{
-	struct sysctl32 a32;
-	mm_segment_t old_fs = get_fs ();
-	void __user *oldvalp, *newvalp;
-	size_t oldlen;
-	int __user *namep;
-	long ret;
-
-	if (copy_from_user(&a32, args, sizeof(a32)))
-		return -EFAULT;
-
-	/*
-	 * We need to pre-validate these because we have to disable address checking
-	 * before calling do_sysctl() because of OLDLEN but we can't run the risk of the
-	 * user specifying bad addresses here.  Well, since we're dealing with 32 bit
-	 * addresses, we KNOW that access_ok() will always succeed, so this is an
-	 * expensive NOP, but so what...
-	 */
-	namep = (int __user *) compat_ptr(a32.name);
-	oldvalp = compat_ptr(a32.oldval);
-	newvalp = compat_ptr(a32.newval);
-
-	if ((oldvalp && get_user(oldlen, (int __user *) compat_ptr(a32.oldlenp)))
-	    || !access_ok(VERIFY_WRITE, namep, 0)
-	    || !access_ok(VERIFY_WRITE, oldvalp, 0)
-	    || !access_ok(VERIFY_WRITE, newvalp, 0))
-		return -EFAULT;
-
-	set_fs(KERNEL_DS);
-	lock_kernel();
-	ret = do_sysctl(namep, a32.nlen, oldvalp, (size_t __user *) &oldlen,
-			newvalp, (size_t) a32.newlen);
-	unlock_kernel();
-	set_fs(old_fs);
-
-	if (oldvalp && put_user (oldlen, (int __user *) compat_ptr(a32.oldlenp)))
-		return -EFAULT;
-
-	return ret;
-}
-#endif
-
-asmlinkage long
-sys32_newuname (struct new_utsname __user *name)
-{
-	int ret = sys_newuname(name);
-
-	if (!ret)
-		if (copy_to_user(name->machine, "i686\0\0\0", 8))
-			ret = -EFAULT;
-	return ret;
-}
-
-asmlinkage long
-sys32_getresuid16 (u16 __user *ruid, u16 __user *euid, u16 __user *suid)
-{
-	uid_t a, b, c;
-	int ret;
-	mm_segment_t old_fs = get_fs();
-
-	set_fs(KERNEL_DS);
-	ret = sys_getresuid((uid_t __user *) &a, (uid_t __user *) &b, (uid_t __user *) &c);
-	set_fs(old_fs);
-
-	if (put_user(a, ruid) || put_user(b, euid) || put_user(c, suid))
-		return -EFAULT;
-	return ret;
-}
-
-asmlinkage long
-sys32_getresgid16 (u16 __user *rgid, u16 __user *egid, u16 __user *sgid)
-{
-	gid_t a, b, c;
-	int ret;
-	mm_segment_t old_fs = get_fs();
-
-	set_fs(KERNEL_DS);
-	ret = sys_getresgid((gid_t __user *) &a, (gid_t __user *) &b, (gid_t __user *) &c);
-	set_fs(old_fs);
-
-	if (ret)
-		return ret;
-
-	return put_user(a, rgid) | put_user(b, egid) | put_user(c, sgid);
-}
-
-asmlinkage long
-sys32_lseek (unsigned int fd, int offset, unsigned int whence)
-{
-	/* Sign-extension of "offset" is important here... */
-	return sys_lseek(fd, offset, whence);
-}
-
-static int
-groups16_to_user(short __user *grouplist, struct group_info *group_info)
-{
-	int i;
-	short group;
-
-	for (i = 0; i < group_info->ngroups; i++) {
-		group = (short)GROUP_AT(group_info, i);
-		if (put_user(group, grouplist+i))
-			return -EFAULT;
-	}
-
-	return 0;
-}
-
-static int
-groups16_from_user(struct group_info *group_info, short __user *grouplist)
-{
-	int i;
-	short group;
-
-	for (i = 0; i < group_info->ngroups; i++) {
-		if (get_user(group, grouplist+i))
-			return  -EFAULT;
-		GROUP_AT(group_info, i) = (gid_t)group;
-	}
-
-	return 0;
-}
-
-asmlinkage long
-sys32_getgroups16 (int gidsetsize, short __user *grouplist)
-{
-	int i;
-
-	if (gidsetsize < 0)
-		return -EINVAL;
-
-	get_group_info(current->group_info);
-	i = current->group_info->ngroups;
-	if (gidsetsize) {
-		if (i > gidsetsize) {
-			i = -EINVAL;
-			goto out;
-		}
-		if (groups16_to_user(grouplist, current->group_info)) {
-			i = -EFAULT;
-			goto out;
-		}
-	}
-out:
-	put_group_info(current->group_info);
-	return i;
-}
-
-asmlinkage long
-sys32_setgroups16 (int gidsetsize, short __user *grouplist)
-{
-	struct group_info *group_info;
-	int retval;
-
-	if (!capable(CAP_SETGID))
-		return -EPERM;
-	if ((unsigned)gidsetsize > NGROUPS_MAX)
-		return -EINVAL;
-
-	group_info = groups_alloc(gidsetsize);
-	if (!group_info)
-		return -ENOMEM;
-	retval = groups16_from_user(group_info, grouplist);
-	if (retval) {
-		put_group_info(group_info);
-		return retval;
-	}
-
-	retval = set_current_groups(group_info);
-	put_group_info(group_info);
-
-	return retval;
-}
-
-asmlinkage long
-sys32_truncate64 (unsigned int path, unsigned int len_lo, unsigned int len_hi)
-{
-	return sys_truncate(compat_ptr(path), ((unsigned long) len_hi << 32) | len_lo);
-}
-
-asmlinkage long
-sys32_ftruncate64 (int fd, unsigned int len_lo, unsigned int len_hi)
-{
-	return sys_ftruncate(fd, ((unsigned long) len_hi << 32) | len_lo);
-}
-
-static int
-putstat64 (struct stat64 __user *ubuf, struct kstat *kbuf)
-{
-	int err;
-	u64 hdev;
-
-	if (clear_user(ubuf, sizeof(*ubuf)))
-		return -EFAULT;
-
-	hdev = huge_encode_dev(kbuf->dev);
-	err  = __put_user(hdev, (u32 __user*)&ubuf->st_dev);
-	err |= __put_user(hdev >> 32, ((u32 __user*)&ubuf->st_dev) + 1);
-	err |= __put_user(kbuf->ino, &ubuf->__st_ino);
-	err |= __put_user(kbuf->ino, &ubuf->st_ino_lo);
-	err |= __put_user(kbuf->ino >> 32, &ubuf->st_ino_hi);
-	err |= __put_user(kbuf->mode, &ubuf->st_mode);
-	err |= __put_user(kbuf->nlink, &ubuf->st_nlink);
-	err |= __put_user(kbuf->uid, &ubuf->st_uid);
-	err |= __put_user(kbuf->gid, &ubuf->st_gid);
-	hdev = huge_encode_dev(kbuf->rdev);
-	err  = __put_user(hdev, (u32 __user*)&ubuf->st_rdev);
-	err |= __put_user(hdev >> 32, ((u32 __user*)&ubuf->st_rdev) + 1);
-	err |= __put_user(kbuf->size, &ubuf->st_size_lo);
-	err |= __put_user((kbuf->size >> 32), &ubuf->st_size_hi);
-	err |= __put_user(kbuf->atime.tv_sec, &ubuf->st_atime);
-	err |= __put_user(kbuf->atime.tv_nsec, &ubuf->st_atime_nsec);
-	err |= __put_user(kbuf->mtime.tv_sec, &ubuf->st_mtime);
-	err |= __put_user(kbuf->mtime.tv_nsec, &ubuf->st_mtime_nsec);
-	err |= __put_user(kbuf->ctime.tv_sec, &ubuf->st_ctime);
-	err |= __put_user(kbuf->ctime.tv_nsec, &ubuf->st_ctime_nsec);
-	err |= __put_user(kbuf->blksize, &ubuf->st_blksize);
-	err |= __put_user(kbuf->blocks, &ubuf->st_blocks);
-	return err;
-}
-
-asmlinkage long
-sys32_stat64 (char __user *filename, struct stat64 __user *statbuf)
-{
-	struct kstat s;
-	long ret = vfs_stat(filename, &s);
-	if (!ret)
-		ret = putstat64(statbuf, &s);
-	return ret;
-}
-
-asmlinkage long
-sys32_lstat64 (char __user *filename, struct stat64 __user *statbuf)
-{
-	struct kstat s;
-	long ret = vfs_lstat(filename, &s);
-	if (!ret)
-		ret = putstat64(statbuf, &s);
-	return ret;
-}
-
-asmlinkage long
-sys32_fstat64 (unsigned int fd, struct stat64 __user *statbuf)
-{
-	struct kstat s;
-	long ret = vfs_fstat(fd, &s);
-	if (!ret)
-		ret = putstat64(statbuf, &s);
-	return ret;
-}
-
-struct sysinfo32 {
-	s32 uptime;
-	u32 loads[3];
-	u32 totalram;
-	u32 freeram;
-	u32 sharedram;
-	u32 bufferram;
-	u32 totalswap;
-	u32 freeswap;
-	u16 procs;
-	u16 pad;
-	u32 totalhigh;
-	u32 freehigh;
-	u32 mem_unit;
-	char _f[8];
-};
-
-asmlinkage long
-sys32_sysinfo (struct sysinfo32 __user *info)
-{
-	struct sysinfo s;
-	long ret, err;
-	int bitcount = 0;
-	mm_segment_t old_fs = get_fs();
-
-	set_fs(KERNEL_DS);
-	ret = sys_sysinfo((struct sysinfo __user *) &s);
-	set_fs(old_fs);
-	/* Check to see if any memory value is too large for 32-bit and
-	 * scale down if needed.
-	 */
-	if ((s.totalram >> 32) || (s.totalswap >> 32)) {
-		while (s.mem_unit < PAGE_SIZE) {
-			s.mem_unit <<= 1;
-			bitcount++;
-		}
-		s.totalram >>= bitcount;
-		s.freeram >>= bitcount;
-		s.sharedram >>= bitcount;
-		s.bufferram >>= bitcount;
-		s.totalswap >>= bitcount;
-		s.freeswap >>= bitcount;
-		s.totalhigh >>= bitcount;
-		s.freehigh >>= bitcount;
-	}
-
-	if (!access_ok(VERIFY_WRITE, info, sizeof(*info)))
-		return -EFAULT;
-
-	err  = __put_user(s.uptime, &info->uptime);
-	err |= __put_user(s.loads[0], &info->loads[0]);
-	err |= __put_user(s.loads[1], &info->loads[1]);
-	err |= __put_user(s.loads[2], &info->loads[2]);
-	err |= __put_user(s.totalram, &info->totalram);
-	err |= __put_user(s.freeram, &info->freeram);
-	err |= __put_user(s.sharedram, &info->sharedram);
-	err |= __put_user(s.bufferram, &info->bufferram);
-	err |= __put_user(s.totalswap, &info->totalswap);
-	err |= __put_user(s.freeswap, &info->freeswap);
-	err |= __put_user(s.procs, &info->procs);
-	err |= __put_user (s.totalhigh, &info->totalhigh);
-	err |= __put_user (s.freehigh, &info->freehigh);
-	err |= __put_user (s.mem_unit, &info->mem_unit);
-	if (err)
-		return -EFAULT;
-	return ret;
-}
-
-asmlinkage long
-sys32_sched_rr_get_interval (pid_t pid, struct compat_timespec __user *interval)
-{
-	mm_segment_t old_fs = get_fs();
-	struct timespec t;
-	long ret;
-
-	set_fs(KERNEL_DS);
-	ret = sys_sched_rr_get_interval(pid, (struct timespec __user *) &t);
-	set_fs(old_fs);
-	if (put_compat_timespec(&t, interval))
-		return -EFAULT;
-	return ret;
-}
-
-asmlinkage long
-sys32_pread (unsigned int fd, void __user *buf, unsigned int count, u32 pos_lo, u32 pos_hi)
-{
-	return sys_pread64(fd, buf, count, ((unsigned long) pos_hi << 32) | pos_lo);
-}
-
-asmlinkage long
-sys32_pwrite (unsigned int fd, void __user *buf, unsigned int count, u32 pos_lo, u32 pos_hi)
-{
-	return sys_pwrite64(fd, buf, count, ((unsigned long) pos_hi << 32) | pos_lo);
-}
-
-asmlinkage long
-sys32_sendfile (int out_fd, int in_fd, int __user *offset, unsigned int count)
-{
-	mm_segment_t old_fs = get_fs();
-	long ret;
-	off_t of;
-
-	if (offset && get_user(of, offset))
-		return -EFAULT;
-
-	set_fs(KERNEL_DS);
-	ret = sys_sendfile(out_fd, in_fd, offset ? (off_t __user *) &of : NULL, count);
-	set_fs(old_fs);
-
-	if (offset && put_user(of, offset))
-		return -EFAULT;
-
-	return ret;
-}
-
-asmlinkage long
-sys32_personality (unsigned int personality)
-{
-	long ret;
-
-	if (current->personality == PER_LINUX32 && personality == PER_LINUX)
-		personality = PER_LINUX32;
-	ret = sys_personality(personality);
-	if (ret == PER_LINUX32)
-		ret = PER_LINUX;
-	return ret;
-}
-
-asmlinkage unsigned long
-sys32_brk (unsigned int brk)
-{
-	unsigned long ret, obrk;
-	struct mm_struct *mm = current->mm;
-
-	obrk = mm->brk;
-	ret = sys_brk(brk);
-	if (ret < obrk)
-		clear_user(compat_ptr(ret), PAGE_ALIGN(ret) - ret);
-	return ret;
-}
-
-/* Structure for ia32 emulation on ia64 */
-struct epoll_event32
-{
-	u32 events;
-	u32 data[2];
-};
-
-asmlinkage long
-sys32_epoll_ctl(int epfd, int op, int fd, struct epoll_event32 __user *event)
-{
-	mm_segment_t old_fs = get_fs();
-	struct epoll_event event64;
-	int error;
-	u32 data_halfword;
-
-	if (!access_ok(VERIFY_READ, event, sizeof(struct epoll_event32)))
-		return -EFAULT;
-
-	__get_user(event64.events, &event->events);
-	__get_user(data_halfword, &event->data[0]);
-	event64.data = data_halfword;
-	__get_user(data_halfword, &event->data[1]);
- 	event64.data |= (u64)data_halfword << 32;
-
-	set_fs(KERNEL_DS);
-	error = sys_epoll_ctl(epfd, op, fd, (struct epoll_event __user *) &event64);
-	set_fs(old_fs);
-
-	return error;
-}
-
-asmlinkage long
-sys32_epoll_wait(int epfd, struct epoll_event32 __user * events, int maxevents,
-		 int timeout)
-{
-	struct epoll_event *events64 = NULL;
-	mm_segment_t old_fs = get_fs();
-	int numevents, size;
-	int evt_idx;
-	int do_free_pages = 0;
-
-	if (maxevents <= 0) {
-		return -EINVAL;
-	}
-
-	/* Verify that the area passed by the user is writeable */
-	if (!access_ok(VERIFY_WRITE, events, maxevents * sizeof(struct epoll_event32)))
-		return -EFAULT;
-
-	/*
- 	 * Allocate space for the intermediate copy.  If the space needed
-	 * is large enough to cause kmalloc to fail, then try again with
-	 * __get_free_pages.
-	 */
-	size = maxevents * sizeof(struct epoll_event);
-	events64 = kmalloc(size, GFP_KERNEL);
-	if (events64 == NULL) {
-		events64 = (struct epoll_event *)
-				__get_free_pages(GFP_KERNEL, get_order(size));
-		if (events64 == NULL)
-			return -ENOMEM;
-		do_free_pages = 1;
-	}
-
-	/* Do the system call */
-	set_fs(KERNEL_DS); /* copy_to/from_user should work on kernel mem*/
-	numevents = sys_epoll_wait(epfd, (struct epoll_event __user *) events64,
-				   maxevents, timeout);
-	set_fs(old_fs);
-
-	/* Don't modify userspace memory if we're returning an error */
-	if (numevents > 0) {
-		/* Translate the 64-bit structures back into the 32-bit
-		   structures */
-		for (evt_idx = 0; evt_idx < numevents; evt_idx++) {
-			__put_user(events64[evt_idx].events,
-				   &events[evt_idx].events);
-			__put_user((u32)events64[evt_idx].data,
-				   &events[evt_idx].data[0]);
-			__put_user((u32)(events64[evt_idx].data >> 32),
-				   &events[evt_idx].data[1]);
-		}
-	}
-
-	if (do_free_pages)
-		free_pages((unsigned long) events64, get_order(size));
-	else
-		kfree(events64);
-	return numevents;
-}
-
-/*
- * Get a yet unused TLS descriptor index.
- */
-static int
-get_free_idx (void)
-{
-	struct thread_struct *t = &current->thread;
-	int idx;
-
-	for (idx = 0; idx < GDT_ENTRY_TLS_ENTRIES; idx++)
-		if (desc_empty(t->tls_array + idx))
-			return idx + GDT_ENTRY_TLS_MIN;
-	return -ESRCH;
-}
-
-/*
- * Set a given TLS descriptor:
- */
-asmlinkage int
-sys32_set_thread_area (struct ia32_user_desc __user *u_info)
-{
-	struct thread_struct *t = &current->thread;
-	struct ia32_user_desc info;
-	struct desc_struct *desc;
-	int cpu, idx;
-
-	if (copy_from_user(&info, u_info, sizeof(info)))
-		return -EFAULT;
-	idx = info.entry_number;
-
-	/*
-	 * index -1 means the kernel should try to find and allocate an empty descriptor:
-	 */
-	if (idx == -1) {
-		idx = get_free_idx();
-		if (idx < 0)
-			return idx;
-		if (put_user(idx, &u_info->entry_number))
-			return -EFAULT;
-	}
-
-	if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
-		return -EINVAL;
-
-	desc = t->tls_array + idx - GDT_ENTRY_TLS_MIN;
-
-	cpu = smp_processor_id();
-
-	if (LDT_empty(&info)) {
-		desc->a = 0;
-		desc->b = 0;
-	} else {
-		desc->a = LDT_entry_a(&info);
-		desc->b = LDT_entry_b(&info);
-	}
-	load_TLS(t, cpu);
-	return 0;
-}
-
-/*
- * Get the current Thread-Local Storage area:
- */
-
-#define GET_BASE(desc) (			\
-	(((desc)->a >> 16) & 0x0000ffff) |	\
-	(((desc)->b << 16) & 0x00ff0000) |	\
-	( (desc)->b        & 0xff000000)   )
-
-#define GET_LIMIT(desc) (			\
-	((desc)->a & 0x0ffff) |			\
-	 ((desc)->b & 0xf0000) )
-
-#define GET_32BIT(desc)		(((desc)->b >> 22) & 1)
-#define GET_CONTENTS(desc)	(((desc)->b >> 10) & 3)
-#define GET_WRITABLE(desc)	(((desc)->b >>  9) & 1)
-#define GET_LIMIT_PAGES(desc)	(((desc)->b >> 23) & 1)
-#define GET_PRESENT(desc)	(((desc)->b >> 15) & 1)
-#define GET_USEABLE(desc)	(((desc)->b >> 20) & 1)
-
-asmlinkage int
-sys32_get_thread_area (struct ia32_user_desc __user *u_info)
-{
-	struct ia32_user_desc info;
-	struct desc_struct *desc;
-	int idx;
-
-	if (get_user(idx, &u_info->entry_number))
-		return -EFAULT;
-	if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
-		return -EINVAL;
-
-	desc = current->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;
-
-	info.entry_number = idx;
-	info.base_addr = GET_BASE(desc);
-	info.limit = GET_LIMIT(desc);
-	info.seg_32bit = GET_32BIT(desc);
-	info.contents = GET_CONTENTS(desc);
-	info.read_exec_only = !GET_WRITABLE(desc);
-	info.limit_in_pages = GET_LIMIT_PAGES(desc);
-	info.seg_not_present = !GET_PRESENT(desc);
-	info.useable = GET_USEABLE(desc);
-
-	if (copy_to_user(u_info, &info, sizeof(info)))
-		return -EFAULT;
-	return 0;
-}
-
-long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high, 
-			__u32 len_low, __u32 len_high, int advice)
-{ 
-	return sys_fadvise64_64(fd,
-			       (((u64)offset_high)<<32) | offset_low,
-			       (((u64)len_high)<<32) | len_low,
-			       advice); 
-} 
-
-#ifdef	NOTYET  /* UNTESTED FOR IA64 FROM HERE DOWN */
-
-asmlinkage long sys32_setreuid(compat_uid_t ruid, compat_uid_t euid)
-{
-	uid_t sruid, seuid;
-
-	sruid = (ruid == (compat_uid_t)-1) ? ((uid_t)-1) : ((uid_t)ruid);
-	seuid = (euid == (compat_uid_t)-1) ? ((uid_t)-1) : ((uid_t)euid);
-	return sys_setreuid(sruid, seuid);
-}
-
-asmlinkage long
-sys32_setresuid(compat_uid_t ruid, compat_uid_t euid,
-		compat_uid_t suid)
-{
-	uid_t sruid, seuid, ssuid;
-
-	sruid = (ruid == (compat_uid_t)-1) ? ((uid_t)-1) : ((uid_t)ruid);
-	seuid = (euid == (compat_uid_t)-1) ? ((uid_t)-1) : ((uid_t)euid);
-	ssuid = (suid == (compat_uid_t)-1) ? ((uid_t)-1) : ((uid_t)suid);
-	return sys_setresuid(sruid, seuid, ssuid);
-}
-
-asmlinkage long
-sys32_setregid(compat_gid_t rgid, compat_gid_t egid)
-{
-	gid_t srgid, segid;
-
-	srgid = (rgid == (compat_gid_t)-1) ? ((gid_t)-1) : ((gid_t)rgid);
-	segid = (egid == (compat_gid_t)-1) ? ((gid_t)-1) : ((gid_t)egid);
-	return sys_setregid(srgid, segid);
-}
-
-asmlinkage long
-sys32_setresgid(compat_gid_t rgid, compat_gid_t egid,
-		compat_gid_t sgid)
-{
-	gid_t srgid, segid, ssgid;
-
-	srgid = (rgid == (compat_gid_t)-1) ? ((gid_t)-1) : ((gid_t)rgid);
-	segid = (egid == (compat_gid_t)-1) ? ((gid_t)-1) : ((gid_t)egid);
-	ssgid = (sgid == (compat_gid_t)-1) ? ((gid_t)-1) : ((gid_t)sgid);
-	return sys_setresgid(srgid, segid, ssgid);
-}
-#endif /* NOTYET */
diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild
new file mode 100644
index 00000000000..0da4aa2602a
--- /dev/null
+++ b/arch/ia64/include/asm/Kbuild
@@ -0,0 +1,9 @@
+
+generic-y += clkdev.h
+generic-y += exec.h
+generic-y += hash.h
+generic-y += kvm_para.h
+generic-y += mcs_spinlock.h
+generic-y += preempt.h
+generic-y += trace_clock.h
+generic-y += vtime.h
diff --git a/arch/ia64/include/asm/acenv.h b/arch/ia64/include/asm/acenv.h
new file mode 100644
index 00000000000..3f9eaeec987
--- /dev/null
+++ b/arch/ia64/include/asm/acenv.h
@@ -0,0 +1,56 @@
+/*
+ * IA64 specific ACPICA environments and implementation
+ *
+ * Copyright (C) 2014, Intel Corporation
+ *   Author: Lv Zheng <lv.zheng@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _ASM_IA64_ACENV_H
+#define _ASM_IA64_ACENV_H
+
+#include <asm/intrinsics.h>
+
+#define COMPILER_DEPENDENT_INT64	long
+#define COMPILER_DEPENDENT_UINT64	unsigned long
+
+/* Asm macros */
+
+#ifdef CONFIG_ACPI
+
+static inline int
+ia64_acpi_acquire_global_lock(unsigned int *lock)
+{
+	unsigned int old, new, val;
+	do {
+		old = *lock;
+		new = (((old & ~0x3) + 2) + ((old >> 1) & 0x1));
+		val = ia64_cmpxchg4_acq(lock, new, old);
+	} while (unlikely (val != old));
+	return (new < 3) ? -1 : 0;
+}
+
+static inline int
+ia64_acpi_release_global_lock(unsigned int *lock)
+{
+	unsigned int old, new, val;
+	do {
+		old = *lock;
+		new = old & ~0x3;
+		val = ia64_cmpxchg4_acq(lock, new, old);
+	} while (unlikely (val != old));
+	return old & 0x1;
+}
+
+#define ACPI_ACQUIRE_GLOBAL_LOCK(facs, Acq)				\
+	((Acq) = ia64_acpi_acquire_global_lock(&facs->global_lock))
+
+#define ACPI_RELEASE_GLOBAL_LOCK(facs, Acq)				\
+	((Acq) = ia64_acpi_release_global_lock(&facs->global_lock))
+
+#endif
+
+#endif /* _ASM_IA64_ACENV_H */
diff --git a/arch/ia64/include/asm/acpi-ext.h b/arch/ia64/include/asm/acpi-ext.h
new file mode 100644
index 00000000000..7f8362b379e
--- /dev/null
+++ b/arch/ia64/include/asm/acpi-ext.h
@@ -0,0 +1,20 @@
+/*
+ * (c) Copyright 2003, 2006 Hewlett-Packard Development Company, L.P.
+ *	Alex Williamson <alex.williamson@hp.com>
+ *	Bjorn Helgaas <bjorn.helgaas@hp.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Vendor specific extensions to ACPI.
+ */
+
+#ifndef _ASM_IA64_ACPI_EXT_H
+#define _ASM_IA64_ACPI_EXT_H
+
+#include <linux/types.h>
+
+extern acpi_status hp_acpi_csr_space (acpi_handle, u64 *base, u64 *length);
+
+#endif /* _ASM_IA64_ACPI_EXT_H */
diff --git a/arch/ia64/include/asm/acpi.h b/arch/ia64/include/asm/acpi.h
new file mode 100644
index 00000000000..75dc59a793d
--- /dev/null
+++ b/arch/ia64/include/asm/acpi.h
@@ -0,0 +1,142 @@
+/*
+ *  Copyright (C) 1999 VA Linux Systems
+ *  Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ *  Copyright (C) 2000,2001 J.I. Lee <jung-ik.lee@intel.com>
+ *  Copyright (C) 2001,2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+
+#ifndef _ASM_ACPI_H
+#define _ASM_ACPI_H
+
+#ifdef __KERNEL__
+
+#include <acpi/pdc_intel.h>
+
+#include <linux/init.h>
+#include <linux/numa.h>
+#include <asm/numa.h>
+
+#ifdef	CONFIG_ACPI
+extern int acpi_lapic;
+#define acpi_disabled 0	/* ACPI always enabled on IA64 */
+#define acpi_noirq 0	/* ACPI always enabled on IA64 */
+#define acpi_pci_disabled 0 /* ACPI PCI always enabled on IA64 */
+#define acpi_strict 1	/* no ACPI spec workarounds on IA64 */
+#endif
+#define acpi_processor_cstate_check(x) (x) /* no idle limits on IA64 :) */
+static inline void disable_acpi(void) { }
+
+#ifdef CONFIG_IA64_GENERIC
+const char *acpi_get_sysname (void);
+#else
+static inline const char *acpi_get_sysname (void)
+{
+# if defined (CONFIG_IA64_HP_SIM)
+	return "hpsim";
+# elif defined (CONFIG_IA64_HP_ZX1)
+	return "hpzx1";
+# elif defined (CONFIG_IA64_HP_ZX1_SWIOTLB)
+	return "hpzx1_swiotlb";
+# elif defined (CONFIG_IA64_SGI_SN2)
+	return "sn2";
+# elif defined (CONFIG_IA64_SGI_UV)
+	return "uv";
+# elif defined (CONFIG_IA64_DIG)
+	return "dig";
+# elif defined(CONFIG_IA64_DIG_VTD)
+	return "dig_vtd";
+# else
+#	error Unknown platform.  Fix acpi.c.
+# endif
+}
+#endif
+int acpi_request_vector (u32 int_type);
+int acpi_gsi_to_irq (u32 gsi, unsigned int *irq);
+
+/* Low-level suspend routine. */
+extern int acpi_suspend_lowlevel(void);
+
+extern unsigned long acpi_wakeup_address;
+
+/*
+ * Record the cpei override flag and current logical cpu. This is
+ * useful for CPU removal.
+ */
+extern unsigned int can_cpei_retarget(void);
+extern unsigned int is_cpu_cpei_target(unsigned int cpu);
+extern void set_cpei_target_cpu(unsigned int cpu);
+extern unsigned int get_cpei_target_cpu(void);
+extern void prefill_possible_map(void);
+#ifdef CONFIG_ACPI_HOTPLUG_CPU
+extern int additional_cpus;
+#else
+#define additional_cpus 0
+#endif
+
+#ifdef CONFIG_ACPI_NUMA
+#if MAX_NUMNODES > 256
+#define MAX_PXM_DOMAINS MAX_NUMNODES
+#else
+#define MAX_PXM_DOMAINS (256)
+#endif
+extern int pxm_to_nid_map[MAX_PXM_DOMAINS];
+extern int __initdata nid_to_pxm_map[MAX_NUMNODES];
+#endif
+
+static inline bool arch_has_acpi_pdc(void) { return true; }
+static inline void arch_acpi_set_pdc_bits(u32 *buf)
+{
+	buf[2] |= ACPI_PDC_EST_CAPABILITY_SMP;
+}
+
+#define acpi_unlazy_tlb(x)
+
+#ifdef CONFIG_ACPI_NUMA
+extern cpumask_t early_cpu_possible_map;
+#define for_each_possible_early_cpu(cpu)  \
+	for_each_cpu_mask((cpu), early_cpu_possible_map)
+
+static inline void per_cpu_scan_finalize(int min_cpus, int reserve_cpus)
+{
+	int low_cpu, high_cpu;
+	int cpu;
+	int next_nid = 0;
+
+	low_cpu = cpus_weight(early_cpu_possible_map);
+
+	high_cpu = max(low_cpu, min_cpus);
+	high_cpu = min(high_cpu + reserve_cpus, NR_CPUS);
+
+	for (cpu = low_cpu; cpu < high_cpu; cpu++) {
+		cpu_set(cpu, early_cpu_possible_map);
+		if (node_cpuid[cpu].nid == NUMA_NO_NODE) {
+			node_cpuid[cpu].nid = next_nid;
+			next_nid++;
+			if (next_nid >= num_online_nodes())
+				next_nid = 0;
+		}
+	}
+}
+#endif /* CONFIG_ACPI_NUMA */
+
+#endif /*__KERNEL__*/
+
+#endif /*_ASM_ACPI_H*/
diff --git a/arch/ia64/include/asm/agp.h b/arch/ia64/include/asm/agp.h
new file mode 100644
index 00000000000..01d09c401c5
--- /dev/null
+++ b/arch/ia64/include/asm/agp.h
@@ -0,0 +1,26 @@
+#ifndef _ASM_IA64_AGP_H
+#define _ASM_IA64_AGP_H
+
+/*
+ * IA-64 specific AGP definitions.
+ *
+ * Copyright (C) 2002-2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+/*
+ * To avoid memory-attribute aliasing issues, we require that the AGPGART engine operate
+ * in coherent mode, which lets us map the AGP memory as normal (write-back) memory
+ * (unlike x86, where it gets mapped "write-coalescing").
+ */
+#define map_page_into_agp(page)		/* nothing */
+#define unmap_page_from_agp(page)	/* nothing */
+#define flush_agp_cache()		mb()
+
+/* GATT allocation. Returns/accepts GATT kernel virtual address. */
+#define alloc_gatt_pages(order)		\
+	((char *)__get_free_pages(GFP_KERNEL, (order)))
+#define free_gatt_pages(table, order)	\
+	free_pages((unsigned long)(table), (order))
+
+#endif /* _ASM_IA64_AGP_H */
diff --git a/arch/ia64/include/asm/asm-offsets.h b/arch/ia64/include/asm/asm-offsets.h
new file mode 100644
index 00000000000..d370ee36a18
--- /dev/null
+++ b/arch/ia64/include/asm/asm-offsets.h
@@ -0,0 +1 @@
+#include <generated/asm-offsets.h>
diff --git a/arch/ia64/include/asm/asmmacro.h b/arch/ia64/include/asm/asmmacro.h
new file mode 100644
index 00000000000..3ab6d75aa3d
--- /dev/null
+++ b/arch/ia64/include/asm/asmmacro.h
@@ -0,0 +1,135 @@
+#ifndef _ASM_IA64_ASMMACRO_H
+#define _ASM_IA64_ASMMACRO_H
+
+/*
+ * Copyright (C) 2000-2001, 2003-2004 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+
+#define ENTRY(name)				\
+	.align 32;				\
+	.proc name;				\
+name:
+
+#define ENTRY_MIN_ALIGN(name)			\
+	.align 16;				\
+	.proc name;				\
+name:
+
+#define GLOBAL_ENTRY(name)			\
+	.global name;				\
+	ENTRY(name)
+
+#define END(name)				\
+	.endp name
+
+/*
+ * Helper macros to make unwind directives more readable:
+ */
+
+/* prologue_gr: */
+#define ASM_UNW_PRLG_RP			0x8
+#define ASM_UNW_PRLG_PFS		0x4
+#define ASM_UNW_PRLG_PSP		0x2
+#define ASM_UNW_PRLG_PR			0x1
+#define ASM_UNW_PRLG_GRSAVE(ninputs)	(32+(ninputs))
+
+/*
+ * Helper macros for accessing user memory.
+ *
+ * When adding any new .section/.previous entries here, make sure to
+ * also add it to the DISCARD section in arch/ia64/kernel/gate.lds.S or
+ * unpleasant things will happen.
+ */
+
+	.section "__ex_table", "a"		// declare section & section attributes
+	.previous
+
+# define EX(y,x...)				\
+	.xdata4 "__ex_table", 99f-., y-.;	\
+  [99:]	x
+# define EXCLR(y,x...)				\
+	.xdata4 "__ex_table", 99f-., y-.+4;	\
+  [99:]	x
+
+/*
+ * Tag MCA recoverable instruction ranges.
+ */
+
+	.section "__mca_table", "a"		// declare section & section attributes
+	.previous
+
+# define MCA_RECOVER_RANGE(y)			\
+	.xdata4 "__mca_table", y-., 99f-.;	\
+  [99:]
+
+/*
+ * Mark instructions that need a load of a virtual address patched to be
+ * a load of a physical address.  We use this either in critical performance
+ * path (ivt.S - TLB miss processing) or in places where it might not be
+ * safe to use a "tpa" instruction (mca_asm.S - error recovery).
+ */
+	.section ".data..patch.vtop", "a"	// declare section & section attributes
+	.previous
+
+#define	LOAD_PHYSICAL(pr, reg, obj)		\
+[1:](pr)movl reg = obj;				\
+	.xdata4 ".data..patch.vtop", 1b-.
+
+/*
+ * For now, we always put in the McKinley E9 workaround.  On CPUs that don't need it,
+ * we'll patch out the work-around bundles with NOPs, so their impact is minimal.
+ */
+#define DO_MCKINLEY_E9_WORKAROUND
+
+#ifdef DO_MCKINLEY_E9_WORKAROUND
+	.section ".data..patch.mckinley_e9", "a"
+	.previous
+/* workaround for Itanium 2 Errata 9: */
+# define FSYS_RETURN					\
+	.xdata4 ".data..patch.mckinley_e9", 1f-.;	\
+1:{ .mib;						\
+	nop.m 0;					\
+	mov r16=ar.pfs;					\
+	br.call.sptk.many b7=2f;;			\
+  };							\
+2:{ .mib;						\
+	nop.m 0;					\
+	mov ar.pfs=r16;					\
+	br.ret.sptk.many b6;;				\
+  }
+#else
+# define FSYS_RETURN	br.ret.sptk.many b6
+#endif
+
+/*
+ * If physical stack register size is different from DEF_NUM_STACK_REG,
+ * dynamically patch the kernel for correct size.
+ */
+	.section ".data..patch.phys_stack_reg", "a"
+	.previous
+#define LOAD_PHYS_STACK_REG_SIZE(reg)			\
+[1:]	adds reg=IA64_NUM_PHYS_STACK_REG*8+8,r0;	\
+	.xdata4 ".data..patch.phys_stack_reg", 1b-.
+
+/*
+ * Up until early 2004, use of .align within a function caused bad unwind info.
+ * TEXT_ALIGN(n) expands into ".align n" if a fixed GAS is available or into nothing
+ * otherwise.
+ */
+#ifdef HAVE_WORKING_TEXT_ALIGN
+# define TEXT_ALIGN(n)	.align n
+#else
+# define TEXT_ALIGN(n)
+#endif
+
+#ifdef HAVE_SERIALIZE_DIRECTIVE
+# define dv_serialize_data		.serialize.data
+# define dv_serialize_instruction	.serialize.instruction
+#else
+# define dv_serialize_data
+# define dv_serialize_instruction
+#endif
+
+#endif /* _ASM_IA64_ASMMACRO_H */
diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h
new file mode 100644
index 00000000000..0f8bf48dadf
--- /dev/null
+++ b/arch/ia64/include/asm/atomic.h
@@ -0,0 +1,212 @@
+#ifndef _ASM_IA64_ATOMIC_H
+#define _ASM_IA64_ATOMIC_H
+
+/*
+ * Atomic operations that C can't guarantee us.  Useful for
+ * resource counting etc..
+ *
+ * NOTE: don't mess with the types below!  The "unsigned long" and
+ * "int" types were carefully placed so as to ensure proper operation
+ * of the macros.
+ *
+ * Copyright (C) 1998, 1999, 2002-2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+#include <linux/types.h>
+
+#include <asm/intrinsics.h>
+#include <asm/barrier.h>
+
+
+#define ATOMIC_INIT(i)		{ (i) }
+#define ATOMIC64_INIT(i)	{ (i) }
+
+#define atomic_read(v)		(*(volatile int *)&(v)->counter)
+#define atomic64_read(v)	(*(volatile long *)&(v)->counter)
+
+#define atomic_set(v,i)		(((v)->counter) = (i))
+#define atomic64_set(v,i)	(((v)->counter) = (i))
+
+static __inline__ int
+ia64_atomic_add (int i, atomic_t *v)
+{
+	__s32 old, new;
+	CMPXCHG_BUGCHECK_DECL
+
+	do {
+		CMPXCHG_BUGCHECK(v);
+		old = atomic_read(v);
+		new = old + i;
+	} while (ia64_cmpxchg(acq, v, old, new, sizeof(atomic_t)) != old);
+	return new;
+}
+
+static __inline__ long
+ia64_atomic64_add (__s64 i, atomic64_t *v)
+{
+	__s64 old, new;
+	CMPXCHG_BUGCHECK_DECL
+
+	do {
+		CMPXCHG_BUGCHECK(v);
+		old = atomic64_read(v);
+		new = old + i;
+	} while (ia64_cmpxchg(acq, v, old, new, sizeof(atomic64_t)) != old);
+	return new;
+}
+
+static __inline__ int
+ia64_atomic_sub (int i, atomic_t *v)
+{
+	__s32 old, new;
+	CMPXCHG_BUGCHECK_DECL
+
+	do {
+		CMPXCHG_BUGCHECK(v);
+		old = atomic_read(v);
+		new = old - i;
+	} while (ia64_cmpxchg(acq, v, old, new, sizeof(atomic_t)) != old);
+	return new;
+}
+
+static __inline__ long
+ia64_atomic64_sub (__s64 i, atomic64_t *v)
+{
+	__s64 old, new;
+	CMPXCHG_BUGCHECK_DECL
+
+	do {
+		CMPXCHG_BUGCHECK(v);
+		old = atomic64_read(v);
+		new = old - i;
+	} while (ia64_cmpxchg(acq, v, old, new, sizeof(atomic64_t)) != old);
+	return new;
+}
+
+#define atomic_cmpxchg(v, old, new) (cmpxchg(&((v)->counter), old, new))
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
+
+#define atomic64_cmpxchg(v, old, new) \
+	(cmpxchg(&((v)->counter), old, new))
+#define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
+
+static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
+{
+	int c, old;
+	c = atomic_read(v);
+	for (;;) {
+		if (unlikely(c == (u)))
+			break;
+		old = atomic_cmpxchg((v), c, c + (a));
+		if (likely(old == c))
+			break;
+		c = old;
+	}
+	return c;
+}
+
+
+static __inline__ long atomic64_add_unless(atomic64_t *v, long a, long u)
+{
+	long c, old;
+	c = atomic64_read(v);
+	for (;;) {
+		if (unlikely(c == (u)))
+			break;
+		old = atomic64_cmpxchg((v), c, c + (a));
+		if (likely(old == c))
+			break;
+		c = old;
+	}
+	return c != (u);
+}
+
+#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
+
+#define atomic_add_return(i,v)						\
+({									\
+	int __ia64_aar_i = (i);						\
+	(__builtin_constant_p(i)					\
+	 && (   (__ia64_aar_i ==  1) || (__ia64_aar_i ==   4)		\
+	     || (__ia64_aar_i ==  8) || (__ia64_aar_i ==  16)		\
+	     || (__ia64_aar_i == -1) || (__ia64_aar_i ==  -4)		\
+	     || (__ia64_aar_i == -8) || (__ia64_aar_i == -16)))		\
+		? ia64_fetch_and_add(__ia64_aar_i, &(v)->counter)	\
+		: ia64_atomic_add(__ia64_aar_i, v);			\
+})
+
+#define atomic64_add_return(i,v)					\
+({									\
+	long __ia64_aar_i = (i);					\
+	(__builtin_constant_p(i)					\
+	 && (   (__ia64_aar_i ==  1) || (__ia64_aar_i ==   4)		\
+	     || (__ia64_aar_i ==  8) || (__ia64_aar_i ==  16)		\
+	     || (__ia64_aar_i == -1) || (__ia64_aar_i ==  -4)		\
+	     || (__ia64_aar_i == -8) || (__ia64_aar_i == -16)))		\
+		? ia64_fetch_and_add(__ia64_aar_i, &(v)->counter)	\
+		: ia64_atomic64_add(__ia64_aar_i, v);			\
+})
+
+/*
+ * Atomically add I to V and return TRUE if the resulting value is
+ * negative.
+ */
+static __inline__ int
+atomic_add_negative (int i, atomic_t *v)
+{
+	return atomic_add_return(i, v) < 0;
+}
+
+static __inline__ long
+atomic64_add_negative (__s64 i, atomic64_t *v)
+{
+	return atomic64_add_return(i, v) < 0;
+}
+
+#define atomic_sub_return(i,v)						\
+({									\
+	int __ia64_asr_i = (i);						\
+	(__builtin_constant_p(i)					\
+	 && (   (__ia64_asr_i ==   1) || (__ia64_asr_i ==   4)		\
+	     || (__ia64_asr_i ==   8) || (__ia64_asr_i ==  16)		\
+	     || (__ia64_asr_i ==  -1) || (__ia64_asr_i ==  -4)		\
+	     || (__ia64_asr_i ==  -8) || (__ia64_asr_i == -16)))	\
+		? ia64_fetch_and_add(-__ia64_asr_i, &(v)->counter)	\
+		: ia64_atomic_sub(__ia64_asr_i, v);			\
+})
+
+#define atomic64_sub_return(i,v)					\
+({									\
+	long __ia64_asr_i = (i);					\
+	(__builtin_constant_p(i)					\
+	 && (   (__ia64_asr_i ==   1) || (__ia64_asr_i ==   4)		\
+	     || (__ia64_asr_i ==   8) || (__ia64_asr_i ==  16)		\
+	     || (__ia64_asr_i ==  -1) || (__ia64_asr_i ==  -4)		\
+	     || (__ia64_asr_i ==  -8) || (__ia64_asr_i == -16)))	\
+		? ia64_fetch_and_add(-__ia64_asr_i, &(v)->counter)	\
+		: ia64_atomic64_sub(__ia64_asr_i, v);			\
+})
+
+#define atomic_dec_return(v)		atomic_sub_return(1, (v))
+#define atomic_inc_return(v)		atomic_add_return(1, (v))
+#define atomic64_dec_return(v)		atomic64_sub_return(1, (v))
+#define atomic64_inc_return(v)		atomic64_add_return(1, (v))
+
+#define atomic_sub_and_test(i,v)	(atomic_sub_return((i), (v)) == 0)
+#define atomic_dec_and_test(v)		(atomic_sub_return(1, (v)) == 0)
+#define atomic_inc_and_test(v)		(atomic_add_return(1, (v)) == 0)
+#define atomic64_sub_and_test(i,v)	(atomic64_sub_return((i), (v)) == 0)
+#define atomic64_dec_and_test(v)	(atomic64_sub_return(1, (v)) == 0)
+#define atomic64_inc_and_test(v)	(atomic64_add_return(1, (v)) == 0)
+
+#define atomic_add(i,v)			atomic_add_return((i), (v))
+#define atomic_sub(i,v)			atomic_sub_return((i), (v))
+#define atomic_inc(v)			atomic_add(1, (v))
+#define atomic_dec(v)			atomic_sub(1, (v))
+
+#define atomic64_add(i,v)		atomic64_add_return((i), (v))
+#define atomic64_sub(i,v)		atomic64_sub_return((i), (v))
+#define atomic64_inc(v)			atomic64_add(1, (v))
+#define atomic64_dec(v)			atomic64_sub(1, (v))
+
+#endif /* _ASM_IA64_ATOMIC_H */
diff --git a/arch/ia64/include/asm/barrier.h b/arch/ia64/include/asm/barrier.h
new file mode 100644
index 00000000000..a48957c7b44
--- /dev/null
+++ b/arch/ia64/include/asm/barrier.h
@@ -0,0 +1,94 @@
+/*
+ * Memory barrier definitions.  This is based on information published
+ * in the Processor Abstraction Layer and the System Abstraction Layer
+ * manual.
+ *
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 Asit Mallick <asit.k.mallick@intel.com>
+ * Copyright (C) 1999 Don Dugger <don.dugger@intel.com>
+ */
+#ifndef _ASM_IA64_BARRIER_H
+#define _ASM_IA64_BARRIER_H
+
+#include <linux/compiler.h>
+
+/*
+ * Macros to force memory ordering.  In these descriptions, "previous"
+ * and "subsequent" refer to program order; "visible" means that all
+ * architecturally visible effects of a memory access have occurred
+ * (at a minimum, this means the memory has been read or written).
+ *
+ *   wmb():	Guarantees that all preceding stores to memory-
+ *		like regions are visible before any subsequent
+ *		stores and that all following stores will be
+ *		visible only after all previous stores.
+ *   rmb():	Like wmb(), but for reads.
+ *   mb():	wmb()/rmb() combo, i.e., all previous memory
+ *		accesses are visible before all subsequent
+ *		accesses and vice versa.  This is also known as
+ *		a "fence."
+ *
+ * Note: "mb()" and its variants cannot be used as a fence to order
+ * accesses to memory mapped I/O registers.  For that, mf.a needs to
+ * be used.  However, we don't want to always use mf.a because (a)
+ * it's (presumably) much slower than mf and (b) mf.a is supported for
+ * sequential memory pages only.
+ */
+#define mb()	ia64_mf()
+#define rmb()	mb()
+#define wmb()	mb()
+#define read_barrier_depends()	do { } while(0)
+
+#ifdef CONFIG_SMP
+# define smp_mb()	mb()
+# define smp_rmb()	rmb()
+# define smp_wmb()	wmb()
+# define smp_read_barrier_depends()	read_barrier_depends()
+
+#else
+
+# define smp_mb()	barrier()
+# define smp_rmb()	barrier()
+# define smp_wmb()	barrier()
+# define smp_read_barrier_depends()	do { } while(0)
+
+#endif
+
+#define smp_mb__before_atomic()	barrier()
+#define smp_mb__after_atomic()	barrier()
+
+/*
+ * IA64 GCC turns volatile stores into st.rel and volatile loads into ld.acq no
+ * need for asm trickery!
+ */
+
+#define smp_store_release(p, v)						\
+do {									\
+	compiletime_assert_atomic_type(*p);				\
+	barrier();							\
+	ACCESS_ONCE(*p) = (v);						\
+} while (0)
+
+#define smp_load_acquire(p)						\
+({									\
+	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	compiletime_assert_atomic_type(*p);				\
+	barrier();							\
+	___p1;								\
+})
+
+/*
+ * XXX check on this ---I suspect what Linus really wants here is
+ * acquire vs release semantics but we can't discuss this stuff with
+ * Linus just yet.  Grrr...
+ */
+#define set_mb(var, value)	do { (var) = (value); mb(); } while (0)
+
+/*
+ * The group barrier in front of the rsm & ssm are necessary to ensure
+ * that none of the previous instructions in the same group are
+ * affected by the rsm/ssm.
+ */
+
+#endif /* _ASM_IA64_BARRIER_H */
diff --git a/arch/ia64/include/asm/bitops.h b/arch/ia64/include/asm/bitops.h
new file mode 100644
index 00000000000..71e8145243e
--- /dev/null
+++ b/arch/ia64/include/asm/bitops.h
@@ -0,0 +1,456 @@
+#ifndef _ASM_IA64_BITOPS_H
+#define _ASM_IA64_BITOPS_H
+
+/*
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * 02/06/02 find_next_bit() and find_first_bit() added from Erich Focht's ia64
+ * O(1) scheduler patch
+ */
+
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <asm/intrinsics.h>
+#include <asm/barrier.h>
+
+/**
+ * set_bit - Atomically set a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ *
+ * This function is atomic and may not be reordered.  See __set_bit()
+ * if you do not require the atomic guarantees.
+ * Note that @nr may be almost arbitrarily large; this function is not
+ * restricted to acting on a single-word quantity.
+ *
+ * The address must be (at least) "long" aligned.
+ * Note that there are driver (e.g., eepro100) which use these operations to
+ * operate on hw-defined data-structures, so we can't easily change these
+ * operations to force a bigger alignment.
+ *
+ * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
+ */
+static __inline__ void
+set_bit (int nr, volatile void *addr)
+{
+	__u32 bit, old, new;
+	volatile __u32 *m;
+	CMPXCHG_BUGCHECK_DECL
+
+	m = (volatile __u32 *) addr + (nr >> 5);
+	bit = 1 << (nr & 31);
+	do {
+		CMPXCHG_BUGCHECK(m);
+		old = *m;
+		new = old | bit;
+	} while (cmpxchg_acq(m, old, new) != old);
+}
+
+/**
+ * __set_bit - Set a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ *
+ * Unlike set_bit(), this function is non-atomic and may be reordered.
+ * If it's called on the same region of memory simultaneously, the effect
+ * may be that only one operation succeeds.
+ */
+static __inline__ void
+__set_bit (int nr, volatile void *addr)
+{
+	*((__u32 *) addr + (nr >> 5)) |= (1 << (nr & 31));
+}
+
+/**
+ * clear_bit - Clears a bit in memory
+ * @nr: Bit to clear
+ * @addr: Address to start counting from
+ *
+ * clear_bit() is atomic and may not be reordered.  However, it does
+ * not contain a memory barrier, so if it is used for locking purposes,
+ * you should call smp_mb__before_atomic() and/or smp_mb__after_atomic()
+ * in order to ensure changes are visible on other processors.
+ */
+static __inline__ void
+clear_bit (int nr, volatile void *addr)
+{
+	__u32 mask, old, new;
+	volatile __u32 *m;
+	CMPXCHG_BUGCHECK_DECL
+
+	m = (volatile __u32 *) addr + (nr >> 5);
+	mask = ~(1 << (nr & 31));
+	do {
+		CMPXCHG_BUGCHECK(m);
+		old = *m;
+		new = old & mask;
+	} while (cmpxchg_acq(m, old, new) != old);
+}
+
+/**
+ * clear_bit_unlock - Clears a bit in memory with release
+ * @nr: Bit to clear
+ * @addr: Address to start counting from
+ *
+ * clear_bit_unlock() is atomic and may not be reordered.  It does
+ * contain a memory barrier suitable for unlock type operations.
+ */
+static __inline__ void
+clear_bit_unlock (int nr, volatile void *addr)
+{
+	__u32 mask, old, new;
+	volatile __u32 *m;
+	CMPXCHG_BUGCHECK_DECL
+
+	m = (volatile __u32 *) addr + (nr >> 5);
+	mask = ~(1 << (nr & 31));
+	do {
+		CMPXCHG_BUGCHECK(m);
+		old = *m;
+		new = old & mask;
+	} while (cmpxchg_rel(m, old, new) != old);
+}
+
+/**
+ * __clear_bit_unlock - Non-atomically clears a bit in memory with release
+ * @nr: Bit to clear
+ * @addr: Address to start counting from
+ *
+ * Similarly to clear_bit_unlock, the implementation uses a store
+ * with release semantics. See also arch_spin_unlock().
+ */
+static __inline__ void
+__clear_bit_unlock(int nr, void *addr)
+{
+	__u32 * const m = (__u32 *) addr + (nr >> 5);
+	__u32 const new = *m & ~(1 << (nr & 31));
+
+	ia64_st4_rel_nta(m, new);
+}
+
+/**
+ * __clear_bit - Clears a bit in memory (non-atomic version)
+ * @nr: the bit to clear
+ * @addr: the address to start counting from
+ *
+ * Unlike clear_bit(), this function is non-atomic and may be reordered.
+ * If it's called on the same region of memory simultaneously, the effect
+ * may be that only one operation succeeds.
+ */
+static __inline__ void
+__clear_bit (int nr, volatile void *addr)
+{
+	*((__u32 *) addr + (nr >> 5)) &= ~(1 << (nr & 31));
+}
+
+/**
+ * change_bit - Toggle a bit in memory
+ * @nr: Bit to toggle
+ * @addr: Address to start counting from
+ *
+ * change_bit() is atomic and may not be reordered.
+ * Note that @nr may be almost arbitrarily large; this function is not
+ * restricted to acting on a single-word quantity.
+ */
+static __inline__ void
+change_bit (int nr, volatile void *addr)
+{
+	__u32 bit, old, new;
+	volatile __u32 *m;
+	CMPXCHG_BUGCHECK_DECL
+
+	m = (volatile __u32 *) addr + (nr >> 5);
+	bit = (1 << (nr & 31));
+	do {
+		CMPXCHG_BUGCHECK(m);
+		old = *m;
+		new = old ^ bit;
+	} while (cmpxchg_acq(m, old, new) != old);
+}
+
+/**
+ * __change_bit - Toggle a bit in memory
+ * @nr: the bit to toggle
+ * @addr: the address to start counting from
+ *
+ * Unlike change_bit(), this function is non-atomic and may be reordered.
+ * If it's called on the same region of memory simultaneously, the effect
+ * may be that only one operation succeeds.
+ */
+static __inline__ void
+__change_bit (int nr, volatile void *addr)
+{
+	*((__u32 *) addr + (nr >> 5)) ^= (1 << (nr & 31));
+}
+
+/**
+ * test_and_set_bit - Set a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.  
+ * It also implies the acquisition side of the memory barrier.
+ */
+static __inline__ int
+test_and_set_bit (int nr, volatile void *addr)
+{
+	__u32 bit, old, new;
+	volatile __u32 *m;
+	CMPXCHG_BUGCHECK_DECL
+
+	m = (volatile __u32 *) addr + (nr >> 5);
+	bit = 1 << (nr & 31);
+	do {
+		CMPXCHG_BUGCHECK(m);
+		old = *m;
+		new = old | bit;
+	} while (cmpxchg_acq(m, old, new) != old);
+	return (old & bit) != 0;
+}
+
+/**
+ * test_and_set_bit_lock - Set a bit and return its old value for lock
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This is the same as test_and_set_bit on ia64
+ */
+#define test_and_set_bit_lock test_and_set_bit
+
+/**
+ * __test_and_set_bit - Set a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is non-atomic and can be reordered.  
+ * If two examples of this operation race, one can appear to succeed
+ * but actually fail.  You must protect multiple accesses with a lock.
+ */
+static __inline__ int
+__test_and_set_bit (int nr, volatile void *addr)
+{
+	__u32 *p = (__u32 *) addr + (nr >> 5);
+	__u32 m = 1 << (nr & 31);
+	int oldbitset = (*p & m) != 0;
+
+	*p |= m;
+	return oldbitset;
+}
+
+/**
+ * test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to clear
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.  
+ * It also implies the acquisition side of the memory barrier.
+ */
+static __inline__ int
+test_and_clear_bit (int nr, volatile void *addr)
+{
+	__u32 mask, old, new;
+	volatile __u32 *m;
+	CMPXCHG_BUGCHECK_DECL
+
+	m = (volatile __u32 *) addr + (nr >> 5);
+	mask = ~(1 << (nr & 31));
+	do {
+		CMPXCHG_BUGCHECK(m);
+		old = *m;
+		new = old & mask;
+	} while (cmpxchg_acq(m, old, new) != old);
+	return (old & ~mask) != 0;
+}
+
+/**
+ * __test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to clear
+ * @addr: Address to count from
+ *
+ * This operation is non-atomic and can be reordered.  
+ * If two examples of this operation race, one can appear to succeed
+ * but actually fail.  You must protect multiple accesses with a lock.
+ */
+static __inline__ int
+__test_and_clear_bit(int nr, volatile void * addr)
+{
+	__u32 *p = (__u32 *) addr + (nr >> 5);
+	__u32 m = 1 << (nr & 31);
+	int oldbitset = (*p & m) != 0;
+
+	*p &= ~m;
+	return oldbitset;
+}
+
+/**
+ * test_and_change_bit - Change a bit and return its old value
+ * @nr: Bit to change
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.  
+ * It also implies the acquisition side of the memory barrier.
+ */
+static __inline__ int
+test_and_change_bit (int nr, volatile void *addr)
+{
+	__u32 bit, old, new;
+	volatile __u32 *m;
+	CMPXCHG_BUGCHECK_DECL
+
+	m = (volatile __u32 *) addr + (nr >> 5);
+	bit = (1 << (nr & 31));
+	do {
+		CMPXCHG_BUGCHECK(m);
+		old = *m;
+		new = old ^ bit;
+	} while (cmpxchg_acq(m, old, new) != old);
+	return (old & bit) != 0;
+}
+
+/**
+ * __test_and_change_bit - Change a bit and return its old value
+ * @nr: Bit to change
+ * @addr: Address to count from
+ *
+ * This operation is non-atomic and can be reordered.
+ */
+static __inline__ int
+__test_and_change_bit (int nr, void *addr)
+{
+	__u32 old, bit = (1 << (nr & 31));
+	__u32 *m = (__u32 *) addr + (nr >> 5);
+
+	old = *m;
+	*m = old ^ bit;
+	return (old & bit) != 0;
+}
+
+static __inline__ int
+test_bit (int nr, const volatile void *addr)
+{
+	return 1 & (((const volatile __u32 *) addr)[nr >> 5] >> (nr & 31));
+}
+
+/**
+ * ffz - find the first zero bit in a long word
+ * @x: The long word to find the bit in
+ *
+ * Returns the bit-number (0..63) of the first (least significant) zero bit.
+ * Undefined if no zero exists, so code should check against ~0UL first...
+ */
+static inline unsigned long
+ffz (unsigned long x)
+{
+	unsigned long result;
+
+	result = ia64_popcnt(x & (~x - 1));
+	return result;
+}
+
+/**
+ * __ffs - find first bit in word.
+ * @x: The word to search
+ *
+ * Undefined if no bit exists, so code should check against 0 first.
+ */
+static __inline__ unsigned long
+__ffs (unsigned long x)
+{
+	unsigned long result;
+
+	result = ia64_popcnt((x-1) & ~x);
+	return result;
+}
+
+#ifdef __KERNEL__
+
+/*
+ * Return bit number of last (most-significant) bit set.  Undefined
+ * for x==0.  Bits are numbered from 0..63 (e.g., ia64_fls(9) == 3).
+ */
+static inline unsigned long
+ia64_fls (unsigned long x)
+{
+	long double d = x;
+	long exp;
+
+	exp = ia64_getf_exp(d);
+	return exp - 0xffff;
+}
+
+/*
+ * Find the last (most significant) bit set.  Returns 0 for x==0 and
+ * bits are numbered from 1..32 (e.g., fls(9) == 4).
+ */
+static inline int
+fls (int t)
+{
+	unsigned long x = t & 0xffffffffu;
+
+	if (!x)
+		return 0;
+	x |= x >> 1;
+	x |= x >> 2;
+	x |= x >> 4;
+	x |= x >> 8;
+	x |= x >> 16;
+	return ia64_popcnt(x);
+}
+
+/*
+ * Find the last (most significant) bit set.  Undefined for x==0.
+ * Bits are numbered from 0..63 (e.g., __fls(9) == 3).
+ */
+static inline unsigned long
+__fls (unsigned long x)
+{
+	x |= x >> 1;
+	x |= x >> 2;
+	x |= x >> 4;
+	x |= x >> 8;
+	x |= x >> 16;
+	x |= x >> 32;
+	return ia64_popcnt(x) - 1;
+}
+
+#include <asm-generic/bitops/fls64.h>
+
+#include <asm-generic/bitops/builtin-ffs.h>
+
+/*
+ * hweightN: returns the hamming weight (i.e. the number
+ * of bits set) of a N-bit word
+ */
+static __inline__ unsigned long __arch_hweight64(unsigned long x)
+{
+	unsigned long result;
+	result = ia64_popcnt(x);
+	return result;
+}
+
+#define __arch_hweight32(x) ((unsigned int) __arch_hweight64((x) & 0xfffffffful))
+#define __arch_hweight16(x) ((unsigned int) __arch_hweight64((x) & 0xfffful))
+#define __arch_hweight8(x)  ((unsigned int) __arch_hweight64((x) & 0xfful))
+
+#include <asm-generic/bitops/const_hweight.h>
+
+#endif /* __KERNEL__ */
+
+#include <asm-generic/bitops/find.h>
+
+#ifdef __KERNEL__
+
+#include <asm-generic/bitops/le.h>
+
+#include <asm-generic/bitops/ext2-atomic-setbit.h>
+
+#include <asm-generic/bitops/sched.h>
+
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_IA64_BITOPS_H */
diff --git a/arch/ia64/include/asm/bug.h b/arch/ia64/include/asm/bug.h
new file mode 100644
index 00000000000..823616b5020
--- /dev/null
+++ b/arch/ia64/include/asm/bug.h
@@ -0,0 +1,14 @@
+#ifndef _ASM_IA64_BUG_H
+#define _ASM_IA64_BUG_H
+
+#ifdef CONFIG_BUG
+#define ia64_abort()	__builtin_trap()
+#define BUG() do { printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); ia64_abort(); } while (0)
+
+/* should this BUG be made generic? */
+#define HAVE_ARCH_BUG
+#endif
+
+#include <asm-generic/bug.h>
+
+#endif
diff --git a/arch/ia64/include/asm/bugs.h b/arch/ia64/include/asm/bugs.h
new file mode 100644
index 00000000000..433523e3b2e
--- /dev/null
+++ b/arch/ia64/include/asm/bugs.h
@@ -0,0 +1,19 @@
+/*
+ * This is included by init/main.c to check for architecture-dependent bugs.
+ *
+ * Needs:
+ *	void check_bugs(void);
+ *
+ * Based on <asm-alpha/bugs.h>.
+ *
+ * Modified 1998, 1999, 2003
+ *	David Mosberger-Tang <davidm@hpl.hp.com>,  Hewlett-Packard Co.
+ */
+#ifndef _ASM_IA64_BUGS_H
+#define _ASM_IA64_BUGS_H
+
+#include <asm/processor.h>
+
+extern void check_bugs (void);
+
+#endif /* _ASM_IA64_BUGS_H */
diff --git a/arch/ia64/include/asm/cache.h b/arch/ia64/include/asm/cache.h
new file mode 100644
index 00000000000..988254a7d34
--- /dev/null
+++ b/arch/ia64/include/asm/cache.h
@@ -0,0 +1,29 @@
+#ifndef _ASM_IA64_CACHE_H
+#define _ASM_IA64_CACHE_H
+
+
+/*
+ * Copyright (C) 1998-2000 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+/* Bytes per L1 (data) cache line.  */
+#define L1_CACHE_SHIFT		CONFIG_IA64_L1_CACHE_SHIFT
+#define L1_CACHE_BYTES		(1 << L1_CACHE_SHIFT)
+
+#ifdef CONFIG_SMP
+# define SMP_CACHE_SHIFT	L1_CACHE_SHIFT
+# define SMP_CACHE_BYTES	L1_CACHE_BYTES
+#else
+  /*
+   * The "aligned" directive can only _increase_ alignment, so this is
+   * safe and provides an easy way to avoid wasting space on a
+   * uni-processor:
+   */
+# define SMP_CACHE_SHIFT	3
+# define SMP_CACHE_BYTES	(1 << 3)
+#endif
+
+#define __read_mostly __attribute__((__section__(".data..read_mostly")))
+
+#endif /* _ASM_IA64_CACHE_H */
diff --git a/arch/ia64/include/asm/cacheflush.h b/arch/ia64/include/asm/cacheflush.h
new file mode 100644
index 00000000000..429eefc93ee
--- /dev/null
+++ b/arch/ia64/include/asm/cacheflush.h
@@ -0,0 +1,54 @@
+#ifndef _ASM_IA64_CACHEFLUSH_H
+#define _ASM_IA64_CACHEFLUSH_H
+
+/*
+ * Copyright (C) 2002 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+#include <linux/page-flags.h>
+#include <linux/bitops.h>
+
+#include <asm/page.h>
+
+/*
+ * Cache flushing routines.  This is the kind of stuff that can be very expensive, so try
+ * to avoid them whenever possible.
+ */
+
+#define flush_cache_all()			do { } while (0)
+#define flush_cache_mm(mm)			do { } while (0)
+#define flush_cache_dup_mm(mm)			do { } while (0)
+#define flush_cache_range(vma, start, end)	do { } while (0)
+#define flush_cache_page(vma, vmaddr, pfn)	do { } while (0)
+#define flush_icache_page(vma,page)		do { } while (0)
+#define flush_cache_vmap(start, end)		do { } while (0)
+#define flush_cache_vunmap(start, end)		do { } while (0)
+
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
+#define flush_dcache_page(page)			\
+do {						\
+	clear_bit(PG_arch_1, &(page)->flags);	\
+} while (0)
+
+#define flush_dcache_mmap_lock(mapping)		do { } while (0)
+#define flush_dcache_mmap_unlock(mapping)	do { } while (0)
+
+extern void flush_icache_range (unsigned long start, unsigned long end);
+extern void clflush_cache_range(void *addr, int size);
+
+
+#define flush_icache_user_range(vma, page, user_addr, len)					\
+do {												\
+	unsigned long _addr = (unsigned long) page_address(page) + ((user_addr) & ~PAGE_MASK);	\
+	flush_icache_range(_addr, _addr + (len));						\
+} while (0)
+
+#define copy_to_user_page(vma, page, vaddr, dst, src, len) \
+do { memcpy(dst, src, len); \
+     flush_icache_user_range(vma, page, vaddr, len); \
+} while (0)
+#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
+	memcpy(dst, src, len)
+
+#endif /* _ASM_IA64_CACHEFLUSH_H */
diff --git a/arch/ia64/include/asm/checksum.h b/arch/ia64/include/asm/checksum.h
new file mode 100644
index 00000000000..97af155057e
--- /dev/null
+++ b/arch/ia64/include/asm/checksum.h
@@ -0,0 +1,79 @@
+#ifndef _ASM_IA64_CHECKSUM_H
+#define _ASM_IA64_CHECKSUM_H
+
+/*
+ * Modified 1998, 1999
+ *	David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co
+ */
+
+/*
+ * This is a version of ip_compute_csum() optimized for IP headers,
+ * which always checksum on 4 octet boundaries.
+ */
+extern __sum16 ip_fast_csum(const void *iph, unsigned int ihl);
+
+/*
+ * Computes the checksum of the TCP/UDP pseudo-header returns a 16-bit
+ * checksum, already complemented
+ */
+extern __sum16 csum_tcpudp_magic (__be32 saddr, __be32 daddr,
+					     unsigned short len,
+					     unsigned short proto,
+					     __wsum sum);
+
+extern __wsum csum_tcpudp_nofold (__be32 saddr, __be32 daddr,
+					unsigned short len,
+					unsigned short proto,
+					__wsum sum);
+
+/*
+ * Computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit)
+ *
+ * returns a 32-bit number suitable for feeding into itself
+ * or csum_tcpudp_magic
+ *
+ * this function must be called with even lengths, except
+ * for the last fragment, which may be odd
+ *
+ * it's best to have buff aligned on a 32-bit boundary
+ */
+extern __wsum csum_partial(const void *buff, int len, __wsum sum);
+
+/*
+ * Same as csum_partial, but copies from src while it checksums.
+ *
+ * Here it is even more important to align src and dst on a 32-bit (or
+ * even better 64-bit) boundary.
+ */
+extern __wsum csum_partial_copy_from_user(const void __user *src, void *dst,
+						 int len, __wsum sum,
+						 int *errp);
+
+extern __wsum csum_partial_copy_nocheck(const void *src, void *dst,
+					       int len, __wsum sum);
+
+/*
+ * This routine is used for miscellaneous IP-like checksums, mainly in
+ * icmp.c
+ */
+extern __sum16 ip_compute_csum(const void *buff, int len);
+
+/*
+ * Fold a partial checksum without adding pseudo headers.
+ */
+static inline __sum16 csum_fold(__wsum csum)
+{
+	u32 sum = (__force u32)csum;
+	sum = (sum & 0xffff) + (sum >> 16);
+	sum = (sum & 0xffff) + (sum >> 16);
+	return (__force __sum16)~sum;
+}
+
+#define _HAVE_ARCH_IPV6_CSUM	1
+struct in6_addr;
+extern __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
+	const struct in6_addr *daddr, __u32 len, unsigned short proto,
+	__wsum csum);
+
+#endif /* _ASM_IA64_CHECKSUM_H */
diff --git a/arch/ia64/include/asm/clocksource.h b/arch/ia64/include/asm/clocksource.h
new file mode 100644
index 00000000000..5c8596e4cb0
--- /dev/null
+++ b/arch/ia64/include/asm/clocksource.h
@@ -0,0 +1,10 @@
+/* IA64-specific clocksource additions */
+
+#ifndef _ASM_IA64_CLOCKSOURCE_H
+#define _ASM_IA64_CLOCKSOURCE_H
+
+struct arch_clocksource_data {
+	void *fsys_mmio;        /* used by fsyscall asm code */
+};
+
+#endif /* _ASM_IA64_CLOCKSOURCE_H */
diff --git a/arch/ia64/include/asm/cpu.h b/arch/ia64/include/asm/cpu.h
new file mode 100644
index 00000000000..fcca30b9f11
--- /dev/null
+++ b/arch/ia64/include/asm/cpu.h
@@ -0,0 +1,22 @@
+#ifndef _ASM_IA64_CPU_H_
+#define _ASM_IA64_CPU_H_
+
+#include <linux/device.h>
+#include <linux/cpu.h>
+#include <linux/topology.h>
+#include <linux/percpu.h>
+
+struct ia64_cpu {
+	struct cpu cpu;
+};
+
+DECLARE_PER_CPU(struct ia64_cpu, cpu_devices);
+
+DECLARE_PER_CPU(int, cpu_state);
+
+#ifdef CONFIG_HOTPLUG_CPU
+extern int arch_register_cpu(int num);
+extern void arch_unregister_cpu(int);
+#endif
+
+#endif /* _ASM_IA64_CPU_H_ */
diff --git a/arch/ia64/include/asm/cputime.h b/arch/ia64/include/asm/cputime.h
new file mode 100644
index 00000000000..e2d3f5baf26
--- /dev/null
+++ b/arch/ia64/include/asm/cputime.h
@@ -0,0 +1,29 @@
+/*
+ * Definitions for measuring cputime on ia64 machines.
+ *
+ * Based on <asm-powerpc/cputime.h>.
+ *
+ * Copyright (C) 2007 FUJITSU LIMITED
+ * Copyright (C) 2007 Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * If we have CONFIG_VIRT_CPU_ACCOUNTING_NATIVE, we measure cpu time in nsec.
+ * Otherwise we measure cpu time in jiffies using the generic definitions.
+ */
+
+#ifndef __IA64_CPUTIME_H
+#define __IA64_CPUTIME_H
+
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+# include <asm-generic/cputime.h>
+#else
+# include <asm/processor.h>
+# include <asm-generic/cputime_nsecs.h>
+extern void arch_vtime_task_switch(struct task_struct *tsk);
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
+
+#endif /* __IA64_CPUTIME_H */
diff --git a/arch/ia64/include/asm/current.h b/arch/ia64/include/asm/current.h
new file mode 100644
index 00000000000..c659f90fbfd
--- /dev/null
+++ b/arch/ia64/include/asm/current.h
@@ -0,0 +1,17 @@
+#ifndef _ASM_IA64_CURRENT_H
+#define _ASM_IA64_CURRENT_H
+
+/*
+ * Modified 1998-2000
+ *	David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co
+ */
+
+#include <asm/intrinsics.h>
+
+/*
+ * In kernel mode, thread pointer (r13) is used to point to the current task
+ * structure.
+ */
+#define current	((struct task_struct *) ia64_getreg(_IA64_REG_TP))
+
+#endif /* _ASM_IA64_CURRENT_H */
diff --git a/arch/ia64/include/asm/cyclone.h b/arch/ia64/include/asm/cyclone.h
new file mode 100644
index 00000000000..88f6500e84a
--- /dev/null
+++ b/arch/ia64/include/asm/cyclone.h
@@ -0,0 +1,15 @@
+#ifndef ASM_IA64_CYCLONE_H
+#define ASM_IA64_CYCLONE_H
+
+#ifdef	CONFIG_IA64_CYCLONE
+extern int use_cyclone;
+extern void __init cyclone_setup(void);
+#else	/* CONFIG_IA64_CYCLONE */
+#define use_cyclone 0
+static inline void cyclone_setup(void)
+{
+	printk(KERN_ERR "Cyclone Counter: System not configured"
+					" w/ CONFIG_IA64_CYCLONE.\n");
+}
+#endif	/* CONFIG_IA64_CYCLONE */
+#endif	/* !ASM_IA64_CYCLONE_H */
diff --git a/arch/ia64/include/asm/delay.h b/arch/ia64/include/asm/delay.h
new file mode 100644
index 00000000000..a30a62f235e
--- /dev/null
+++ b/arch/ia64/include/asm/delay.h
@@ -0,0 +1,88 @@
+#ifndef _ASM_IA64_DELAY_H
+#define _ASM_IA64_DELAY_H
+
+/*
+ * Delay routines using a pre-computed "cycles/usec" value.
+ *
+ * Copyright (C) 1998, 1999 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ * Copyright (C) 1999 Asit Mallick <asit.k.mallick@intel.com>
+ * Copyright (C) 1999 Don Dugger <don.dugger@intel.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/compiler.h>
+
+#include <asm/intrinsics.h>
+#include <asm/processor.h>
+
+static __inline__ void
+ia64_set_itm (unsigned long val)
+{
+	ia64_setreg(_IA64_REG_CR_ITM, val);
+	ia64_srlz_d();
+}
+
+static __inline__ unsigned long
+ia64_get_itm (void)
+{
+	unsigned long result;
+
+	result = ia64_getreg(_IA64_REG_CR_ITM);
+	ia64_srlz_d();
+	return result;
+}
+
+static __inline__ void
+ia64_set_itv (unsigned long val)
+{
+	ia64_setreg(_IA64_REG_CR_ITV, val);
+	ia64_srlz_d();
+}
+
+static __inline__ unsigned long
+ia64_get_itv (void)
+{
+	return ia64_getreg(_IA64_REG_CR_ITV);
+}
+
+static __inline__ void
+ia64_set_itc (unsigned long val)
+{
+	ia64_setreg(_IA64_REG_AR_ITC, val);
+	ia64_srlz_d();
+}
+
+static __inline__ unsigned long
+ia64_get_itc (void)
+{
+	unsigned long result;
+
+	result = ia64_getreg(_IA64_REG_AR_ITC);
+	ia64_barrier();
+#ifdef CONFIG_ITANIUM
+	while (unlikely((__s32) result == -1)) {
+		result = ia64_getreg(_IA64_REG_AR_ITC);
+		ia64_barrier();
+	}
+#endif
+	return result;
+}
+
+extern void ia64_delay_loop (unsigned long loops);
+
+static __inline__ void
+__delay (unsigned long loops)
+{
+	if (unlikely(loops < 1))
+		return;
+
+	ia64_delay_loop (loops - 1);
+}
+
+extern void udelay (unsigned long usecs);
+
+#endif /* _ASM_IA64_DELAY_H */
diff --git a/arch/ia64/include/asm/device.h b/arch/ia64/include/asm/device.h
new file mode 100644
index 00000000000..f69c32ffbe6
--- /dev/null
+++ b/arch/ia64/include/asm/device.h
@@ -0,0 +1,18 @@
+/*
+ * Arch specific extensions to struct device
+ *
+ * This file is released under the GPLv2
+ */
+#ifndef _ASM_IA64_DEVICE_H
+#define _ASM_IA64_DEVICE_H
+
+struct dev_archdata {
+#ifdef CONFIG_INTEL_IOMMU
+	void *iommu; /* hook for IOMMU specific extension */
+#endif
+};
+
+struct pdev_archdata {
+};
+
+#endif /* _ASM_IA64_DEVICE_H */
diff --git a/arch/ia64/include/asm/div64.h b/arch/ia64/include/asm/div64.h
new file mode 100644
index 00000000000..6cd978cefb2
--- /dev/null
+++ b/arch/ia64/include/asm/div64.h
@@ -0,0 +1 @@
+#include <asm-generic/div64.h>
diff --git a/arch/ia64/include/asm/dma-mapping.h b/arch/ia64/include/asm/dma-mapping.h
new file mode 100644
index 00000000000..cf3ab7e784b
--- /dev/null
+++ b/arch/ia64/include/asm/dma-mapping.h
@@ -0,0 +1,109 @@
+#ifndef _ASM_IA64_DMA_MAPPING_H
+#define _ASM_IA64_DMA_MAPPING_H
+
+/*
+ * Copyright (C) 2003-2004 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+#include <asm/machvec.h>
+#include <linux/scatterlist.h>
+#include <asm/swiotlb.h>
+#include <linux/dma-debug.h>
+
+#define ARCH_HAS_DMA_GET_REQUIRED_MASK
+
+#define DMA_ERROR_CODE 0
+
+extern struct dma_map_ops *dma_ops;
+extern struct ia64_machine_vector ia64_mv;
+extern void set_iommu_machvec(void);
+
+extern void machvec_dma_sync_single(struct device *, dma_addr_t, size_t,
+				    enum dma_data_direction);
+extern void machvec_dma_sync_sg(struct device *, struct scatterlist *, int,
+				enum dma_data_direction);
+
+#define dma_alloc_coherent(d,s,h,f)	dma_alloc_attrs(d,s,h,f,NULL)
+
+static inline void *dma_alloc_attrs(struct device *dev, size_t size,
+				    dma_addr_t *daddr, gfp_t gfp,
+				    struct dma_attrs *attrs)
+{
+	struct dma_map_ops *ops = platform_dma_get_ops(dev);
+	void *caddr;
+
+	caddr = ops->alloc(dev, size, daddr, gfp, attrs);
+	debug_dma_alloc_coherent(dev, size, *daddr, caddr);
+	return caddr;
+}
+
+#define dma_free_coherent(d,s,c,h) dma_free_attrs(d,s,c,h,NULL)
+
+static inline void dma_free_attrs(struct device *dev, size_t size,
+				  void *caddr, dma_addr_t daddr,
+				  struct dma_attrs *attrs)
+{
+	struct dma_map_ops *ops = platform_dma_get_ops(dev);
+	debug_dma_free_coherent(dev, size, caddr, daddr);
+	ops->free(dev, size, caddr, daddr, attrs);
+}
+
+#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
+#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
+
+#define get_dma_ops(dev) platform_dma_get_ops(dev)
+
+#include <asm-generic/dma-mapping-common.h>
+
+static inline int dma_mapping_error(struct device *dev, dma_addr_t daddr)
+{
+	struct dma_map_ops *ops = platform_dma_get_ops(dev);
+	debug_dma_mapping_error(dev, daddr);
+	return ops->mapping_error(dev, daddr);
+}
+
+static inline int dma_supported(struct device *dev, u64 mask)
+{
+	struct dma_map_ops *ops = platform_dma_get_ops(dev);
+	return ops->dma_supported(dev, mask);
+}
+
+static inline int
+dma_set_mask (struct device *dev, u64 mask)
+{
+	if (!dev->dma_mask || !dma_supported(dev, mask))
+		return -EIO;
+	*dev->dma_mask = mask;
+	return 0;
+}
+
+static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
+{
+	if (!dev->dma_mask)
+		return 0;
+
+	return addr + size - 1 <= *dev->dma_mask;
+}
+
+static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
+{
+	return paddr;
+}
+
+static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
+{
+	return daddr;
+}
+
+static inline void
+dma_cache_sync (struct device *dev, void *vaddr, size_t size,
+	enum dma_data_direction dir)
+{
+	/*
+	 * IA-64 is cache-coherent, so this is mostly a no-op.  However, we do need to
+	 * ensure that dma_cache_sync() enforces order, hence the mb().
+	 */
+	mb();
+}
+
+#endif /* _ASM_IA64_DMA_MAPPING_H */
diff --git a/arch/ia64/include/asm/dma.h b/arch/ia64/include/asm/dma.h
new file mode 100644
index 00000000000..4d97f60f1ef
--- /dev/null
+++ b/arch/ia64/include/asm/dma.h
@@ -0,0 +1,24 @@
+#ifndef _ASM_IA64_DMA_H
+#define _ASM_IA64_DMA_H
+
+/*
+ * Copyright (C) 1998-2002 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+
+#include <asm/io.h>		/* need byte IO */
+
+extern unsigned long MAX_DMA_ADDRESS;
+
+#ifdef CONFIG_PCI
+  extern int isa_dma_bridge_buggy;
+#else
+# define isa_dma_bridge_buggy 	(0)
+#endif
+
+#define free_dma(x)
+
+void dma_mark_clean(void *addr, size_t size);
+
+#endif /* _ASM_IA64_DMA_H */
diff --git a/arch/ia64/include/asm/dmi.h b/arch/ia64/include/asm/dmi.h
new file mode 100644
index 00000000000..f365a61f5c7
--- /dev/null
+++ b/arch/ia64/include/asm/dmi.h
@@ -0,0 +1,14 @@
+#ifndef _ASM_DMI_H
+#define _ASM_DMI_H 1
+
+#include <linux/slab.h>
+#include <asm/io.h>
+
+/* Use normal IO mappings for DMI */
+#define dmi_early_remap		ioremap
+#define dmi_early_unmap(x, l)	iounmap(x)
+#define dmi_remap		ioremap
+#define dmi_unmap		iounmap
+#define dmi_alloc(l)		kzalloc(l, GFP_ATOMIC)
+
+#endif
diff --git a/arch/ia64/include/asm/elf.h b/arch/ia64/include/asm/elf.h
new file mode 100644
index 00000000000..5a83c5cc3dc
--- /dev/null
+++ b/arch/ia64/include/asm/elf.h
@@ -0,0 +1,234 @@
+#ifndef _ASM_IA64_ELF_H
+#define _ASM_IA64_ELF_H
+
+/*
+ * ELF-specific definitions.
+ *
+ * Copyright (C) 1998-1999, 2002-2004 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+
+#include <asm/fpu.h>
+#include <asm/page.h>
+#include <asm/auxvec.h>
+
+/*
+ * This is used to ensure we don't load something for the wrong architecture.
+ */
+#define elf_check_arch(x) ((x)->e_machine == EM_IA_64)
+
+/*
+ * These are used to set parameters in the core dumps.
+ */
+#define ELF_CLASS	ELFCLASS64
+#define ELF_DATA	ELFDATA2LSB
+#define ELF_ARCH	EM_IA_64
+
+#define CORE_DUMP_USE_REGSET
+
+/* Least-significant four bits of ELF header's e_flags are OS-specific.  The bits are
+   interpreted as follows by Linux: */
+#define EF_IA_64_LINUX_EXECUTABLE_STACK	0x1	/* is stack (& heap) executable by default? */
+
+#define ELF_EXEC_PAGESIZE	PAGE_SIZE
+
+/*
+ * This is the location that an ET_DYN program is loaded if exec'ed.
+ * Typical use of this is to invoke "./ld.so someprog" to test out a
+ * new version of the loader.  We need to make sure that it is out of
+ * the way of the program that it will "exec", and that there is
+ * sufficient room for the brk.
+ */
+#define ELF_ET_DYN_BASE		(TASK_UNMAPPED_BASE + 0x800000000UL)
+
+#define PT_IA_64_UNWIND		0x70000001
+
+/* IA-64 relocations: */
+#define R_IA64_NONE		0x00	/* none */
+#define R_IA64_IMM14		0x21	/* symbol + addend, add imm14 */
+#define R_IA64_IMM22		0x22	/* symbol + addend, add imm22 */
+#define R_IA64_IMM64		0x23	/* symbol + addend, mov imm64 */
+#define R_IA64_DIR32MSB		0x24	/* symbol + addend, data4 MSB */
+#define R_IA64_DIR32LSB		0x25	/* symbol + addend, data4 LSB */
+#define R_IA64_DIR64MSB		0x26	/* symbol + addend, data8 MSB */
+#define R_IA64_DIR64LSB		0x27	/* symbol + addend, data8 LSB */
+#define R_IA64_GPREL22		0x2a	/* @gprel(sym+add), add imm22 */
+#define R_IA64_GPREL64I		0x2b	/* @gprel(sym+add), mov imm64 */
+#define R_IA64_GPREL32MSB	0x2c	/* @gprel(sym+add), data4 MSB */
+#define R_IA64_GPREL32LSB	0x2d	/* @gprel(sym+add), data4 LSB */
+#define R_IA64_GPREL64MSB	0x2e	/* @gprel(sym+add), data8 MSB */
+#define R_IA64_GPREL64LSB	0x2f	/* @gprel(sym+add), data8 LSB */
+#define R_IA64_LTOFF22		0x32	/* @ltoff(sym+add), add imm22 */
+#define R_IA64_LTOFF64I		0x33	/* @ltoff(sym+add), mov imm64 */
+#define R_IA64_PLTOFF22		0x3a	/* @pltoff(sym+add), add imm22 */
+#define R_IA64_PLTOFF64I	0x3b	/* @pltoff(sym+add), mov imm64 */
+#define R_IA64_PLTOFF64MSB	0x3e	/* @pltoff(sym+add), data8 MSB */
+#define R_IA64_PLTOFF64LSB	0x3f	/* @pltoff(sym+add), data8 LSB */
+#define R_IA64_FPTR64I		0x43	/* @fptr(sym+add), mov imm64 */
+#define R_IA64_FPTR32MSB	0x44	/* @fptr(sym+add), data4 MSB */
+#define R_IA64_FPTR32LSB	0x45	/* @fptr(sym+add), data4 LSB */
+#define R_IA64_FPTR64MSB	0x46	/* @fptr(sym+add), data8 MSB */
+#define R_IA64_FPTR64LSB	0x47	/* @fptr(sym+add), data8 LSB */
+#define R_IA64_PCREL60B		0x48	/* @pcrel(sym+add), brl */
+#define R_IA64_PCREL21B		0x49	/* @pcrel(sym+add), ptb, call */
+#define R_IA64_PCREL21M		0x4a	/* @pcrel(sym+add), chk.s */
+#define R_IA64_PCREL21F		0x4b	/* @pcrel(sym+add), fchkf */
+#define R_IA64_PCREL32MSB	0x4c	/* @pcrel(sym+add), data4 MSB */
+#define R_IA64_PCREL32LSB	0x4d	/* @pcrel(sym+add), data4 LSB */
+#define R_IA64_PCREL64MSB	0x4e	/* @pcrel(sym+add), data8 MSB */
+#define R_IA64_PCREL64LSB	0x4f	/* @pcrel(sym+add), data8 LSB */
+#define R_IA64_LTOFF_FPTR22	0x52	/* @ltoff(@fptr(s+a)), imm22 */
+#define R_IA64_LTOFF_FPTR64I	0x53	/* @ltoff(@fptr(s+a)), imm64 */
+#define R_IA64_LTOFF_FPTR32MSB	0x54	/* @ltoff(@fptr(s+a)), 4 MSB */
+#define R_IA64_LTOFF_FPTR32LSB	0x55	/* @ltoff(@fptr(s+a)), 4 LSB */
+#define R_IA64_LTOFF_FPTR64MSB	0x56	/* @ltoff(@fptr(s+a)), 8 MSB */
+#define R_IA64_LTOFF_FPTR64LSB	0x57	/* @ltoff(@fptr(s+a)), 8 LSB */
+#define R_IA64_SEGREL32MSB	0x5c	/* @segrel(sym+add), data4 MSB */
+#define R_IA64_SEGREL32LSB	0x5d	/* @segrel(sym+add), data4 LSB */
+#define R_IA64_SEGREL64MSB	0x5e	/* @segrel(sym+add), data8 MSB */
+#define R_IA64_SEGREL64LSB	0x5f	/* @segrel(sym+add), data8 LSB */
+#define R_IA64_SECREL32MSB	0x64	/* @secrel(sym+add), data4 MSB */
+#define R_IA64_SECREL32LSB	0x65	/* @secrel(sym+add), data4 LSB */
+#define R_IA64_SECREL64MSB	0x66	/* @secrel(sym+add), data8 MSB */
+#define R_IA64_SECREL64LSB	0x67	/* @secrel(sym+add), data8 LSB */
+#define R_IA64_REL32MSB		0x6c	/* data 4 + REL */
+#define R_IA64_REL32LSB		0x6d	/* data 4 + REL */
+#define R_IA64_REL64MSB		0x6e	/* data 8 + REL */
+#define R_IA64_REL64LSB		0x6f	/* data 8 + REL */
+#define R_IA64_LTV32MSB		0x74	/* symbol + addend, data4 MSB */
+#define R_IA64_LTV32LSB		0x75	/* symbol + addend, data4 LSB */
+#define R_IA64_LTV64MSB		0x76	/* symbol + addend, data8 MSB */
+#define R_IA64_LTV64LSB		0x77	/* symbol + addend, data8 LSB */
+#define R_IA64_PCREL21BI	0x79	/* @pcrel(sym+add), ptb, call */
+#define R_IA64_PCREL22		0x7a	/* @pcrel(sym+add), imm22 */
+#define R_IA64_PCREL64I		0x7b	/* @pcrel(sym+add), imm64 */
+#define R_IA64_IPLTMSB		0x80	/* dynamic reloc, imported PLT, MSB */
+#define R_IA64_IPLTLSB		0x81	/* dynamic reloc, imported PLT, LSB */
+#define R_IA64_COPY		0x84	/* dynamic reloc, data copy */
+#define R_IA64_SUB		0x85	/* -symbol + addend, add imm22 */
+#define R_IA64_LTOFF22X		0x86	/* LTOFF22, relaxable.  */
+#define R_IA64_LDXMOV		0x87	/* Use of LTOFF22X.  */
+#define R_IA64_TPREL14		0x91	/* @tprel(sym+add), add imm14 */
+#define R_IA64_TPREL22		0x92	/* @tprel(sym+add), add imm22 */
+#define R_IA64_TPREL64I		0x93	/* @tprel(sym+add), add imm64 */
+#define R_IA64_TPREL64MSB	0x96	/* @tprel(sym+add), data8 MSB */
+#define R_IA64_TPREL64LSB	0x97	/* @tprel(sym+add), data8 LSB */
+#define R_IA64_LTOFF_TPREL22	0x9a	/* @ltoff(@tprel(s+a)), add imm22 */
+#define R_IA64_DTPMOD64MSB	0xa6	/* @dtpmod(sym+add), data8 MSB */
+#define R_IA64_DTPMOD64LSB	0xa7	/* @dtpmod(sym+add), data8 LSB */
+#define R_IA64_LTOFF_DTPMOD22	0xaa	/* @ltoff(@dtpmod(s+a)), imm22 */
+#define R_IA64_DTPREL14		0xb1	/* @dtprel(sym+add), imm14 */
+#define R_IA64_DTPREL22		0xb2	/* @dtprel(sym+add), imm22 */
+#define R_IA64_DTPREL64I	0xb3	/* @dtprel(sym+add), imm64 */
+#define R_IA64_DTPREL32MSB	0xb4	/* @dtprel(sym+add), data4 MSB */
+#define R_IA64_DTPREL32LSB	0xb5	/* @dtprel(sym+add), data4 LSB */
+#define R_IA64_DTPREL64MSB	0xb6	/* @dtprel(sym+add), data8 MSB */
+#define R_IA64_DTPREL64LSB	0xb7	/* @dtprel(sym+add), data8 LSB */
+#define R_IA64_LTOFF_DTPREL22	0xba	/* @ltoff(@dtprel(s+a)), imm22 */
+
+/* IA-64 specific section flags: */
+#define SHF_IA_64_SHORT		0x10000000	/* section near gp */
+
+/*
+ * We use (abuse?) this macro to insert the (empty) vm_area that is
+ * used to map the register backing store.  I don't see any better
+ * place to do this, but we should discuss this with Linus once we can
+ * talk to him...
+ */
+extern void ia64_init_addr_space (void);
+#define ELF_PLAT_INIT(_r, load_addr)	ia64_init_addr_space()
+
+/* ELF register definitions.  This is needed for core dump support.  */
+
+/*
+ * elf_gregset_t contains the application-level state in the following order:
+ *	r0-r31
+ *	NaT bits (for r0-r31; bit N == 1 iff rN is a NaT)
+ *	predicate registers (p0-p63)
+ *	b0-b7
+ *	ip cfm psr
+ *	ar.rsc ar.bsp ar.bspstore ar.rnat
+ *	ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec ar.csd ar.ssd
+ */
+#define ELF_NGREG	128	/* we really need just 72 but let's leave some headroom... */
+#define ELF_NFPREG	128	/* f0 and f1 could be omitted, but so what... */
+
+/* elf_gregset_t register offsets */
+#define ELF_GR_0_OFFSET     0
+#define ELF_NAT_OFFSET     (32 * sizeof(elf_greg_t))
+#define ELF_PR_OFFSET      (33 * sizeof(elf_greg_t))
+#define ELF_BR_0_OFFSET    (34 * sizeof(elf_greg_t))
+#define ELF_CR_IIP_OFFSET  (42 * sizeof(elf_greg_t))
+#define ELF_CFM_OFFSET     (43 * sizeof(elf_greg_t))
+#define ELF_CR_IPSR_OFFSET (44 * sizeof(elf_greg_t))
+#define ELF_GR_OFFSET(i)   (ELF_GR_0_OFFSET + i * sizeof(elf_greg_t))
+#define ELF_BR_OFFSET(i)   (ELF_BR_0_OFFSET + i * sizeof(elf_greg_t))
+#define ELF_AR_RSC_OFFSET  (45 * sizeof(elf_greg_t))
+#define ELF_AR_BSP_OFFSET  (46 * sizeof(elf_greg_t))
+#define ELF_AR_BSPSTORE_OFFSET (47 * sizeof(elf_greg_t))
+#define ELF_AR_RNAT_OFFSET (48 * sizeof(elf_greg_t))
+#define ELF_AR_CCV_OFFSET  (49 * sizeof(elf_greg_t))
+#define ELF_AR_UNAT_OFFSET (50 * sizeof(elf_greg_t))
+#define ELF_AR_FPSR_OFFSET (51 * sizeof(elf_greg_t))
+#define ELF_AR_PFS_OFFSET  (52 * sizeof(elf_greg_t))
+#define ELF_AR_LC_OFFSET   (53 * sizeof(elf_greg_t))
+#define ELF_AR_EC_OFFSET   (54 * sizeof(elf_greg_t))
+#define ELF_AR_CSD_OFFSET  (55 * sizeof(elf_greg_t))
+#define ELF_AR_SSD_OFFSET  (56 * sizeof(elf_greg_t))
+#define ELF_AR_END_OFFSET  (57 * sizeof(elf_greg_t))
+
+typedef unsigned long elf_fpxregset_t;
+
+typedef unsigned long elf_greg_t;
+typedef elf_greg_t elf_gregset_t[ELF_NGREG];
+
+typedef struct ia64_fpreg elf_fpreg_t;
+typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG];
+
+
+
+struct pt_regs;	/* forward declaration... */
+extern void ia64_elf_core_copy_regs (struct pt_regs *src, elf_gregset_t dst);
+#define ELF_CORE_COPY_REGS(_dest,_regs)	ia64_elf_core_copy_regs(_regs, _dest);
+
+/* This macro yields a bitmask that programs can use to figure out
+   what instruction set this CPU supports.  */
+#define ELF_HWCAP 	0
+
+/* This macro yields a string that ld.so will use to load
+   implementation specific libraries for optimization.  Not terribly
+   relevant until we have real hardware to play with... */
+#define ELF_PLATFORM	NULL
+
+#define elf_read_implies_exec(ex, executable_stack)					\
+	((executable_stack!=EXSTACK_DISABLE_X) && ((ex).e_flags & EF_IA_64_LINUX_EXECUTABLE_STACK) != 0)
+
+struct task_struct;
+
+#define GATE_EHDR	((const struct elfhdr *) GATE_ADDR)
+
+/* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */
+#define ARCH_DLINFO								\
+do {										\
+	extern char __kernel_syscall_via_epc[];					\
+	NEW_AUX_ENT(AT_SYSINFO, (unsigned long) __kernel_syscall_via_epc);	\
+	NEW_AUX_ENT(AT_SYSINFO_EHDR, (unsigned long) GATE_EHDR);		\
+} while (0)
+
+/*
+ * format for entries in the Global Offset Table
+ */
+struct got_entry {
+	uint64_t val;
+};
+
+/*
+ * Layout of the Function Descriptor
+ */
+struct fdesc {
+	uint64_t ip;
+	uint64_t gp;
+};
+
+#endif /* _ASM_IA64_ELF_H */
diff --git a/arch/ia64/include/asm/emergency-restart.h b/arch/ia64/include/asm/emergency-restart.h
new file mode 100644
index 00000000000..108d8c48e42
--- /dev/null
+++ b/arch/ia64/include/asm/emergency-restart.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_EMERGENCY_RESTART_H
+#define _ASM_EMERGENCY_RESTART_H
+
+#include <asm-generic/emergency-restart.h>
+
+#endif /* _ASM_EMERGENCY_RESTART_H */
diff --git a/arch/ia64/include/asm/esi.h b/arch/ia64/include/asm/esi.h
new file mode 100644
index 00000000000..40991c6ba64
--- /dev/null
+++ b/arch/ia64/include/asm/esi.h
@@ -0,0 +1,29 @@
+/*
+ * ESI service calls.
+ *
+ * Copyright (c) Copyright 2005-2006 Hewlett-Packard Development Company, L.P.
+ * 	Alex Williamson <alex.williamson@hp.com>
+ */
+#ifndef esi_h
+#define esi_h
+
+#include <linux/efi.h>
+
+#define ESI_QUERY			0x00000001
+#define ESI_OPEN_HANDLE			0x02000000
+#define ESI_CLOSE_HANDLE		0x02000001
+
+enum esi_proc_type {
+	ESI_PROC_SERIALIZED,	/* calls need to be serialized */
+	ESI_PROC_MP_SAFE,	/* MP-safe, but not reentrant */
+	ESI_PROC_REENTRANT	/* MP-safe and reentrant */
+};
+
+extern struct ia64_sal_retval esi_call_phys (void *, u64 *);
+extern int ia64_esi_call(efi_guid_t, struct ia64_sal_retval *,
+			 enum esi_proc_type,
+			 u64, u64, u64, u64, u64, u64, u64, u64);
+extern int ia64_esi_call_phys(efi_guid_t, struct ia64_sal_retval *, u64, u64,
+                              u64, u64, u64, u64, u64, u64);
+
+#endif /* esi_h */
diff --git a/arch/ia64/include/asm/fb.h b/arch/ia64/include/asm/fb.h
new file mode 100644
index 00000000000..89a397cee90
--- /dev/null
+++ b/arch/ia64/include/asm/fb.h
@@ -0,0 +1,23 @@
+#ifndef _ASM_FB_H_
+#define _ASM_FB_H_
+
+#include <linux/fb.h>
+#include <linux/fs.h>
+#include <linux/efi.h>
+#include <asm/page.h>
+
+static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
+				unsigned long off)
+{
+	if (efi_range_is_wc(vma->vm_start, vma->vm_end - vma->vm_start))
+		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+	else
+		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+}
+
+static inline int fb_is_primary_device(struct fb_info *info)
+{
+	return 0;
+}
+
+#endif /* _ASM_FB_H_ */
diff --git a/arch/ia64/include/asm/fpswa.h b/arch/ia64/include/asm/fpswa.h
new file mode 100644
index 00000000000..62edfceadaa
--- /dev/null
+++ b/arch/ia64/include/asm/fpswa.h
@@ -0,0 +1,73 @@
+#ifndef _ASM_IA64_FPSWA_H
+#define _ASM_IA64_FPSWA_H
+
+/*
+ * Floating-point Software Assist
+ *
+ * Copyright (C) 1999 Intel Corporation.
+ * Copyright (C) 1999 Asit Mallick <asit.k.mallick@intel.com>
+ * Copyright (C) 1999 Goutham Rao <goutham.rao@intel.com>
+ */
+
+typedef struct {
+	/* 4 * 128 bits */
+	unsigned long fp_lp[4*2];
+} fp_state_low_preserved_t;
+
+typedef struct {
+	/* 10 * 128 bits */
+	unsigned long fp_lv[10 * 2];
+} fp_state_low_volatile_t;
+
+typedef	struct {
+	/* 16 * 128 bits */
+	unsigned long fp_hp[16 * 2];
+} fp_state_high_preserved_t;
+
+typedef struct {
+	/* 96 * 128 bits */
+	unsigned long fp_hv[96 * 2];
+} fp_state_high_volatile_t;
+
+/**
+ * floating point state to be passed to the FP emulation library by
+ * the trap/fault handler
+ */
+typedef struct {
+	unsigned long			bitmask_low64;
+	unsigned long			bitmask_high64;
+	fp_state_low_preserved_t	*fp_state_low_preserved;
+	fp_state_low_volatile_t		*fp_state_low_volatile;
+	fp_state_high_preserved_t	*fp_state_high_preserved;
+	fp_state_high_volatile_t	*fp_state_high_volatile;
+} fp_state_t;
+
+typedef struct {
+	unsigned long status;
+	unsigned long err0;
+	unsigned long err1;
+	unsigned long err2;
+} fpswa_ret_t;
+
+/**
+ * function header for the Floating Point software assist
+ * library. This function is invoked by the Floating point software
+ * assist trap/fault handler.
+ */
+typedef fpswa_ret_t (*efi_fpswa_t) (unsigned long trap_type, void *bundle, unsigned long *ipsr,
+				    unsigned long *fsr, unsigned long *isr, unsigned long *preds,
+				    unsigned long *ifs, fp_state_t *fp_state);
+
+/**
+ * This is the FPSWA library interface as defined by EFI.  We need to pass a 
+ * pointer to the interface itself on a call to the assist library
+ */
+typedef struct {
+	unsigned int	 revision;
+	unsigned int	 reserved;
+	efi_fpswa_t	 fpswa;
+} fpswa_interface_t;
+
+extern fpswa_interface_t *fpswa_interface;
+
+#endif /* _ASM_IA64_FPSWA_H */
diff --git a/arch/ia64/include/asm/ftrace.h b/arch/ia64/include/asm/ftrace.h
new file mode 100644
index 00000000000..fbd1a2470ca
--- /dev/null
+++ b/arch/ia64/include/asm/ftrace.h
@@ -0,0 +1,27 @@
+#ifndef _ASM_IA64_FTRACE_H
+#define _ASM_IA64_FTRACE_H
+
+#ifdef CONFIG_FUNCTION_TRACER
+#define MCOUNT_INSN_SIZE        32 /* sizeof mcount call */
+
+#ifndef __ASSEMBLY__
+extern void _mcount(unsigned long pfs, unsigned long r1, unsigned long b0, unsigned long r0);
+#define mcount _mcount
+
+/* In IA64, MCOUNT_ADDR is set in link time, so it's not a constant at compile time */
+#define MCOUNT_ADDR (((struct fnptr *)mcount)->ip)
+#define FTRACE_ADDR (((struct fnptr *)ftrace_caller)->ip)
+
+static inline unsigned long ftrace_call_adjust(unsigned long addr)
+{
+	/* second bundle, insn 2 */
+	return addr - 0x12;
+}
+
+struct dyn_arch_ftrace {
+};
+#endif
+
+#endif /* CONFIG_FUNCTION_TRACER */
+
+#endif /* _ASM_IA64_FTRACE_H */
diff --git a/arch/ia64/include/asm/futex.h b/arch/ia64/include/asm/futex.h
new file mode 100644
index 00000000000..76acbcd5c06
--- /dev/null
+++ b/arch/ia64/include/asm/futex.h
@@ -0,0 +1,126 @@
+#ifndef _ASM_FUTEX_H
+#define _ASM_FUTEX_H
+
+#include <linux/futex.h>
+#include <linux/uaccess.h>
+#include <asm/errno.h>
+
+#define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg) \
+do {									\
+	register unsigned long r8 __asm ("r8") = 0;			\
+	__asm__ __volatile__(						\
+		"	mf;;					\n"	\
+		"[1:] "	insn ";;				\n"	\
+		"	.xdata4 \"__ex_table\", 1b-., 2f-.	\n"	\
+		"[2:]"							\
+		: "+r" (r8), "=r" (oldval)				\
+		: "r" (uaddr), "r" (oparg)				\
+		: "memory");						\
+	ret = r8;							\
+} while (0)
+
+#define __futex_atomic_op2(insn, ret, oldval, uaddr, oparg) \
+do {									\
+	register unsigned long r8 __asm ("r8") = 0;			\
+	int val, newval;						\
+	do {								\
+		__asm__ __volatile__(					\
+			"	mf;;				  \n"	\
+			"[1:]	ld4 %3=[%4];;			  \n"	\
+			"	mov %2=%3			  \n"	\
+				insn	";;			  \n"	\
+			"	mov ar.ccv=%2;;			  \n"	\
+			"[2:]	cmpxchg4.acq %1=[%4],%3,ar.ccv;;  \n"	\
+			"	.xdata4 \"__ex_table\", 1b-., 3f-.\n"	\
+			"	.xdata4 \"__ex_table\", 2b-., 3f-.\n"	\
+			"[3:]"						\
+			: "+r" (r8), "=r" (val), "=&r" (oldval),	\
+			   "=&r" (newval)				\
+			: "r" (uaddr), "r" (oparg)			\
+			: "memory");					\
+		if (unlikely (r8))					\
+			break;						\
+	} while (unlikely (val != oldval));				\
+	ret = r8;							\
+} while (0)
+
+static inline int
+futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
+{
+	int op = (encoded_op >> 28) & 7;
+	int cmp = (encoded_op >> 24) & 15;
+	int oparg = (encoded_op << 8) >> 20;
+	int cmparg = (encoded_op << 20) >> 20;
+	int oldval = 0, ret;
+	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
+		oparg = 1 << oparg;
+
+	if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32)))
+		return -EFAULT;
+
+	pagefault_disable();
+
+	switch (op) {
+	case FUTEX_OP_SET:
+		__futex_atomic_op1("xchg4 %1=[%2],%3", ret, oldval, uaddr,
+				   oparg);
+		break;
+	case FUTEX_OP_ADD:
+		__futex_atomic_op2("add %3=%3,%5", ret, oldval, uaddr, oparg);
+		break;
+	case FUTEX_OP_OR:
+		__futex_atomic_op2("or %3=%3,%5", ret, oldval, uaddr, oparg);
+		break;
+	case FUTEX_OP_ANDN:
+		__futex_atomic_op2("and %3=%3,%5", ret, oldval, uaddr,
+				   ~oparg);
+		break;
+	case FUTEX_OP_XOR:
+		__futex_atomic_op2("xor %3=%3,%5", ret, oldval, uaddr, oparg);
+		break;
+	default:
+		ret = -ENOSYS;
+	}
+
+	pagefault_enable();
+
+	if (!ret) {
+		switch (cmp) {
+		case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
+		case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
+		case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
+		case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
+		case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
+		case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
+		default: ret = -ENOSYS;
+		}
+	}
+	return ret;
+}
+
+static inline int
+futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+			      u32 oldval, u32 newval)
+{
+	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
+		return -EFAULT;
+
+	{
+		register unsigned long r8 __asm ("r8") = 0;
+		unsigned long prev;
+		__asm__ __volatile__(
+			"	mf;;					\n"
+			"	mov ar.ccv=%4;;				\n"
+			"[1:]	cmpxchg4.acq %1=[%2],%3,ar.ccv		\n"
+			"	.xdata4 \"__ex_table\", 1b-., 2f-.	\n"
+			"[2:]"
+			: "+r" (r8), "=&r" (prev)
+			: "r" (uaddr), "r" (newval),
+			  "rO" ((long) (unsigned) oldval)
+			: "memory");
+		*uval = prev;
+		return r8;
+	}
+}
+
+#endif /* _ASM_FUTEX_H */
diff --git a/arch/ia64/include/asm/gcc_intrin.h b/arch/ia64/include/asm/gcc_intrin.h
new file mode 100644
index 00000000000..f9495b1757a
--- /dev/null
+++ b/arch/ia64/include/asm/gcc_intrin.h
@@ -0,0 +1,12 @@
+/*
+ *
+ * Copyright (C) 2002,2003 Jun Nakajima <jun.nakajima@intel.com>
+ * Copyright (C) 2002,2003 Suresh Siddha <suresh.b.siddha@intel.com>
+ */
+#ifndef _ASM_IA64_GCC_INTRIN_H
+#define _ASM_IA64_GCC_INTRIN_H
+
+#include <uapi/asm/gcc_intrin.h>
+
+register unsigned long ia64_r13 asm ("r13") __used;
+#endif /* _ASM_IA64_GCC_INTRIN_H */
diff --git a/arch/ia64/include/asm/gpio.h b/arch/ia64/include/asm/gpio.h
new file mode 100644
index 00000000000..b3799d88ffc
--- /dev/null
+++ b/arch/ia64/include/asm/gpio.h
@@ -0,0 +1,4 @@
+#ifndef __LINUX_GPIO_H
+#warning Include linux/gpio.h instead of asm/gpio.h
+#include <linux/gpio.h>
+#endif
diff --git a/arch/ia64/include/asm/hardirq.h b/arch/ia64/include/asm/hardirq.h
new file mode 100644
index 00000000000..8fb7d33a661
--- /dev/null
+++ b/arch/ia64/include/asm/hardirq.h
@@ -0,0 +1,26 @@
+#ifndef _ASM_IA64_HARDIRQ_H
+#define _ASM_IA64_HARDIRQ_H
+
+/*
+ * Modified 1998-2002, 2004 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+/*
+ * No irq_cpustat_t for IA-64.  The data is held in the per-CPU data structure.
+ */
+
+#define __ARCH_IRQ_STAT	1
+
+#define local_softirq_pending()		(local_cpu_data->softirq_pending)
+
+#include <linux/threads.h>
+#include <linux/irq.h>
+
+#include <asm/processor.h>
+
+extern void __iomem *ipi_base_addr;
+
+void ack_bad_irq(unsigned int irq);
+
+#endif /* _ASM_IA64_HARDIRQ_H */
diff --git a/arch/ia64/include/asm/hpsim.h b/arch/ia64/include/asm/hpsim.h
new file mode 100644
index 00000000000..0fe50225daa
--- /dev/null
+++ b/arch/ia64/include/asm/hpsim.h
@@ -0,0 +1,16 @@
+#ifndef _ASMIA64_HPSIM_H
+#define _ASMIA64_HPSIM_H
+
+#ifndef CONFIG_HP_SIMSERIAL_CONSOLE
+static inline int simcons_register(void) { return 1; }
+#else
+int simcons_register(void);
+#endif
+
+struct tty_driver;
+extern struct tty_driver *hp_simserial_driver;
+
+extern int hpsim_get_irq(int intr);
+void ia64_ctl_trace(long on);
+
+#endif
diff --git a/arch/ia64/include/asm/hugetlb.h b/arch/ia64/include/asm/hugetlb.h
new file mode 100644
index 00000000000..aa910054b8e
--- /dev/null
+++ b/arch/ia64/include/asm/hugetlb.h
@@ -0,0 +1,85 @@
+#ifndef _ASM_IA64_HUGETLB_H
+#define _ASM_IA64_HUGETLB_H
+
+#include <asm/page.h>
+#include <asm-generic/hugetlb.h>
+
+
+void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
+			    unsigned long end, unsigned long floor,
+			    unsigned long ceiling);
+
+int prepare_hugepage_range(struct file *file,
+			unsigned long addr, unsigned long len);
+
+static inline int is_hugepage_only_range(struct mm_struct *mm,
+					 unsigned long addr,
+					 unsigned long len)
+{
+	return (REGION_NUMBER(addr) == RGN_HPAGE ||
+		REGION_NUMBER((addr)+(len)-1) == RGN_HPAGE);
+}
+
+static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm)
+{
+}
+
+static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
+				   pte_t *ptep, pte_t pte)
+{
+	set_pte_at(mm, addr, ptep, pte);
+}
+
+static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+					    unsigned long addr, pte_t *ptep)
+{
+	return ptep_get_and_clear(mm, addr, ptep);
+}
+
+static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
+					 unsigned long addr, pte_t *ptep)
+{
+}
+
+static inline int huge_pte_none(pte_t pte)
+{
+	return pte_none(pte);
+}
+
+static inline pte_t huge_pte_wrprotect(pte_t pte)
+{
+	return pte_wrprotect(pte);
+}
+
+static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
+					   unsigned long addr, pte_t *ptep)
+{
+	ptep_set_wrprotect(mm, addr, ptep);
+}
+
+static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
+					     unsigned long addr, pte_t *ptep,
+					     pte_t pte, int dirty)
+{
+	return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
+}
+
+static inline pte_t huge_ptep_get(pte_t *ptep)
+{
+	return *ptep;
+}
+
+static inline int arch_prepare_hugepage(struct page *page)
+{
+	return 0;
+}
+
+static inline void arch_release_hugepage(struct page *page)
+{
+}
+
+static inline void arch_clear_hugepage_flags(struct page *page)
+{
+}
+
+#endif /* _ASM_IA64_HUGETLB_H */
diff --git a/arch/ia64/include/asm/hw_irq.h b/arch/ia64/include/asm/hw_irq.h
new file mode 100644
index 00000000000..029bab36cd9
--- /dev/null
+++ b/arch/ia64/include/asm/hw_irq.h
@@ -0,0 +1,194 @@
+#ifndef _ASM_IA64_HW_IRQ_H
+#define _ASM_IA64_HW_IRQ_H
+
+/*
+ * Copyright (C) 2001-2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+#include <linux/interrupt.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <linux/profile.h>
+
+#include <asm/machvec.h>
+#include <asm/ptrace.h>
+#include <asm/smp.h>
+
+#ifndef CONFIG_PARAVIRT
+typedef u8 ia64_vector;
+#else
+typedef u16 ia64_vector;
+#endif
+
+/*
+ * 0 special
+ *
+ * 1,3-14 are reserved from firmware
+ *
+ * 16-255 (vectored external interrupts) are available
+ *
+ * 15 spurious interrupt (see IVR)
+ *
+ * 16 lowest priority, 255 highest priority
+ *
+ * 15 classes of 16 interrupts each.
+ */
+#define IA64_MIN_VECTORED_IRQ		 16
+#define IA64_MAX_VECTORED_IRQ		255
+#define IA64_NUM_VECTORS		256
+
+#define AUTO_ASSIGN			-1
+
+#define IA64_SPURIOUS_INT_VECTOR	0x0f
+
+/*
+ * Vectors 0x10-0x1f are used for low priority interrupts, e.g. CMCI.
+ */
+#define IA64_CPEP_VECTOR		0x1c	/* corrected platform error polling vector */
+#define IA64_CMCP_VECTOR		0x1d	/* corrected machine-check polling vector */
+#define IA64_CPE_VECTOR			0x1e	/* corrected platform error interrupt vector */
+#define IA64_CMC_VECTOR			0x1f	/* corrected machine-check interrupt vector */
+/*
+ * Vectors 0x20-0x2f are reserved for legacy ISA IRQs.
+ * Use vectors 0x30-0xe7 as the default device vector range for ia64.
+ * Platforms may choose to reduce this range in platform_irq_setup, but the
+ * platform range must fall within
+ *	[IA64_DEF_FIRST_DEVICE_VECTOR..IA64_DEF_LAST_DEVICE_VECTOR]
+ */
+extern int ia64_first_device_vector;
+extern int ia64_last_device_vector;
+
+#if defined(CONFIG_SMP) && (defined(CONFIG_IA64_GENERIC) || defined (CONFIG_IA64_DIG))
+/* Reserve the lower priority vector than device vectors for "move IRQ" IPI */
+#define IA64_IRQ_MOVE_VECTOR		0x30	/* "move IRQ" IPI */
+#define IA64_DEF_FIRST_DEVICE_VECTOR	0x31
+#else
+#define IA64_DEF_FIRST_DEVICE_VECTOR	0x30
+#endif
+#define IA64_DEF_LAST_DEVICE_VECTOR	0xe7
+#define IA64_FIRST_DEVICE_VECTOR	ia64_first_device_vector
+#define IA64_LAST_DEVICE_VECTOR		ia64_last_device_vector
+#define IA64_MAX_DEVICE_VECTORS		(IA64_DEF_LAST_DEVICE_VECTOR - IA64_DEF_FIRST_DEVICE_VECTOR + 1)
+#define IA64_NUM_DEVICE_VECTORS		(IA64_LAST_DEVICE_VECTOR - IA64_FIRST_DEVICE_VECTOR + 1)
+
+#define IA64_MCA_RENDEZ_VECTOR		0xe8	/* MCA rendez interrupt */
+#define IA64_PERFMON_VECTOR		0xee	/* performance monitor interrupt vector */
+#define IA64_TIMER_VECTOR		0xef	/* use highest-prio group 15 interrupt for timer */
+#define	IA64_MCA_WAKEUP_VECTOR		0xf0	/* MCA wakeup (must be >MCA_RENDEZ_VECTOR) */
+#define IA64_IPI_LOCAL_TLB_FLUSH	0xfc	/* SMP flush local TLB */
+#define IA64_IPI_RESCHEDULE		0xfd	/* SMP reschedule */
+#define IA64_IPI_VECTOR			0xfe	/* inter-processor interrupt vector */
+
+/* Used for encoding redirected irqs */
+
+#define IA64_IRQ_REDIRECTED		(1 << 31)
+
+/* IA64 inter-cpu interrupt related definitions */
+
+#define IA64_IPI_DEFAULT_BASE_ADDR	0xfee00000
+
+/* Delivery modes for inter-cpu interrupts */
+enum {
+        IA64_IPI_DM_INT =       0x0,    /* pend an external interrupt */
+        IA64_IPI_DM_PMI =       0x2,    /* pend a PMI */
+        IA64_IPI_DM_NMI =       0x4,    /* pend an NMI (vector 2) */
+        IA64_IPI_DM_INIT =      0x5,    /* pend an INIT interrupt */
+        IA64_IPI_DM_EXTINT =    0x7,    /* pend an 8259-compatible interrupt. */
+};
+
+extern __u8 isa_irq_to_vector_map[16];
+#define isa_irq_to_vector(x)	isa_irq_to_vector_map[(x)]
+
+struct irq_cfg {
+	ia64_vector vector;
+	cpumask_t domain;
+	cpumask_t old_domain;
+	unsigned move_cleanup_count;
+	u8 move_in_progress : 1;
+};
+extern spinlock_t vector_lock;
+extern struct irq_cfg irq_cfg[NR_IRQS];
+#define irq_to_domain(x)	irq_cfg[(x)].domain
+DECLARE_PER_CPU(int[IA64_NUM_VECTORS], vector_irq);
+
+extern struct irq_chip irq_type_ia64_lsapic;	/* CPU-internal interrupt controller */
+
+#ifdef CONFIG_PARAVIRT_GUEST
+#include <asm/paravirt.h>
+#else
+#define ia64_register_ipi	ia64_native_register_ipi
+#define assign_irq_vector	ia64_native_assign_irq_vector
+#define free_irq_vector		ia64_native_free_irq_vector
+#define register_percpu_irq	ia64_native_register_percpu_irq
+#define ia64_resend_irq		ia64_native_resend_irq
+#endif
+
+extern void ia64_native_register_ipi(void);
+extern int bind_irq_vector(int irq, int vector, cpumask_t domain);
+extern int ia64_native_assign_irq_vector (int irq);	/* allocate a free vector */
+extern void ia64_native_free_irq_vector (int vector);
+extern int reserve_irq_vector (int vector);
+extern void __setup_vector_irq(int cpu);
+extern void ia64_send_ipi (int cpu, int vector, int delivery_mode, int redirect);
+extern void ia64_native_register_percpu_irq (ia64_vector vec, struct irqaction *action);
+extern void destroy_and_reserve_irq (unsigned int irq);
+
+#if defined(CONFIG_SMP) && (defined(CONFIG_IA64_GENERIC) || defined(CONFIG_IA64_DIG))
+extern int irq_prepare_move(int irq, int cpu);
+extern void irq_complete_move(unsigned int irq);
+#else
+static inline int irq_prepare_move(int irq, int cpu) { return 0; }
+static inline void irq_complete_move(unsigned int irq) {}
+#endif
+
+static inline void ia64_native_resend_irq(unsigned int vector)
+{
+	platform_send_ipi(smp_processor_id(), vector, IA64_IPI_DM_INT, 0);
+}
+
+/*
+ * Default implementations for the irq-descriptor API:
+ */
+#ifndef CONFIG_IA64_GENERIC
+static inline ia64_vector __ia64_irq_to_vector(int irq)
+{
+	return irq_cfg[irq].vector;
+}
+
+static inline unsigned int
+__ia64_local_vector_to_irq (ia64_vector vec)
+{
+	return __get_cpu_var(vector_irq)[vec];
+}
+#endif
+
+/*
+ * Next follows the irq descriptor interface.  On IA-64, each CPU supports 256 interrupt
+ * vectors.  On smaller systems, there is a one-to-one correspondence between interrupt
+ * vectors and the Linux irq numbers.  However, larger systems may have multiple interrupt
+ * domains meaning that the translation from vector number to irq number depends on the
+ * interrupt domain that a CPU belongs to.  This API abstracts such platform-dependent
+ * differences and provides a uniform means to translate between vector and irq numbers
+ * and to obtain the irq descriptor for a given irq number.
+ */
+
+/* Extract the IA-64 vector that corresponds to IRQ.  */
+static inline ia64_vector
+irq_to_vector (int irq)
+{
+	return platform_irq_to_vector(irq);
+}
+
+/*
+ * Convert the local IA-64 vector to the corresponding irq number.  This translation is
+ * done in the context of the interrupt domain that the currently executing CPU belongs
+ * to.
+ */
+static inline unsigned int
+local_vector_to_irq (ia64_vector vec)
+{
+	return platform_local_vector_to_irq(vec);
+}
+
+#endif /* _ASM_IA64_HW_IRQ_H */
diff --git a/arch/ia64/include/asm/idle.h b/arch/ia64/include/asm/idle.h
new file mode 100644
index 00000000000..b7685015a8b
--- /dev/null
+++ b/arch/ia64/include/asm/idle.h
@@ -0,0 +1,7 @@
+#ifndef _ASM_IA64_IDLE_H
+#define _ASM_IA64_IDLE_H
+
+static inline void enter_idle(void) { }
+static inline void exit_idle(void) { }
+
+#endif /* _ASM_IA64_IDLE_H */
diff --git a/arch/ia64/include/asm/intrinsics.h b/arch/ia64/include/asm/intrinsics.h
new file mode 100644
index 00000000000..20477ea111b
--- /dev/null
+++ b/arch/ia64/include/asm/intrinsics.h
@@ -0,0 +1,25 @@
+/*
+ * Compiler-dependent intrinsics.
+ *
+ * Copyright (C) 2002-2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+#ifndef _ASM_IA64_INTRINSICS_H
+#define _ASM_IA64_INTRINSICS_H
+
+#include <asm/paravirt_privop.h>
+#include <uapi/asm/intrinsics.h>
+
+#ifndef __ASSEMBLY__
+#if defined(CONFIG_PARAVIRT)
+# undef IA64_INTRINSIC_API
+# undef IA64_INTRINSIC_MACRO
+# ifdef ASM_SUPPORTED
+#  define IA64_INTRINSIC_API(name)	paravirt_ ## name
+# else
+#  define IA64_INTRINSIC_API(name)	pv_cpu_ops.name
+# endif
+#define IA64_INTRINSIC_MACRO(name)	paravirt_ ## name
+#endif
+#endif /* !__ASSEMBLY__ */
+#endif /* _ASM_IA64_INTRINSICS_H */
diff --git a/arch/ia64/include/asm/io.h b/arch/ia64/include/asm/io.h
new file mode 100644
index 00000000000..0d2bcb37ec3
--- /dev/null
+++ b/arch/ia64/include/asm/io.h
@@ -0,0 +1,444 @@
+#ifndef _ASM_IA64_IO_H
+#define _ASM_IA64_IO_H
+
+/*
+ * This file contains the definitions for the emulated IO instructions
+ * inb/inw/inl/outb/outw/outl and the "string versions" of the same
+ * (insb/insw/insl/outsb/outsw/outsl). You can also use "pausing"
+ * versions of the single-IO instructions (inb_p/inw_p/..).
+ *
+ * This file is not meant to be obfuscating: it's just complicated to
+ * (a) handle it all in a way that makes gcc able to optimize it as
+ * well as possible and (b) trying to avoid writing the same thing
+ * over and over again with slight variations and possibly making a
+ * mistake somewhere.
+ *
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 Asit Mallick <asit.k.mallick@intel.com>
+ * Copyright (C) 1999 Don Dugger <don.dugger@intel.com>
+ */
+
+#include <asm/unaligned.h>
+
+/* We don't use IO slowdowns on the ia64, but.. */
+#define __SLOW_DOWN_IO	do { } while (0)
+#define SLOW_DOWN_IO	do { } while (0)
+
+#define __IA64_UNCACHED_OFFSET	RGN_BASE(RGN_UNCACHED)
+
+/*
+ * The legacy I/O space defined by the ia64 architecture supports only 65536 ports, but
+ * large machines may have multiple other I/O spaces so we can't place any a priori limit
+ * on IO_SPACE_LIMIT.  These additional spaces are described in ACPI.
+ */
+#define IO_SPACE_LIMIT		0xffffffffffffffffUL
+
+#define MAX_IO_SPACES_BITS		8
+#define MAX_IO_SPACES			(1UL << MAX_IO_SPACES_BITS)
+#define IO_SPACE_BITS			24
+#define IO_SPACE_SIZE			(1UL << IO_SPACE_BITS)
+
+#define IO_SPACE_NR(port)		((port) >> IO_SPACE_BITS)
+#define IO_SPACE_BASE(space)		((space) << IO_SPACE_BITS)
+#define IO_SPACE_PORT(port)		((port) & (IO_SPACE_SIZE - 1))
+
+#define IO_SPACE_SPARSE_ENCODING(p)	((((p) >> 2) << 12) | ((p) & 0xfff))
+
+struct io_space {
+	unsigned long mmio_base;	/* base in MMIO space */
+	int sparse;
+};
+
+extern struct io_space io_space[];
+extern unsigned int num_io_spaces;
+
+# ifdef __KERNEL__
+
+/*
+ * All MMIO iomem cookies are in region 6; anything less is a PIO cookie:
+ *	0xCxxxxxxxxxxxxxxx	MMIO cookie (return from ioremap)
+ *	0x000000001SPPPPPP	PIO cookie (S=space number, P..P=port)
+ *
+ * ioread/writeX() uses the leading 1 in PIO cookies (PIO_OFFSET) to catch
+ * code that uses bare port numbers without the prerequisite pci_iomap().
+ */
+#define PIO_OFFSET		(1UL << (MAX_IO_SPACES_BITS + IO_SPACE_BITS))
+#define PIO_MASK		(PIO_OFFSET - 1)
+#define PIO_RESERVED		__IA64_UNCACHED_OFFSET
+#define HAVE_ARCH_PIO_SIZE
+
+#include <asm/intrinsics.h>
+#include <asm/machvec.h>
+#include <asm/page.h>
+#include <asm-generic/iomap.h>
+
+/*
+ * Change virtual addresses to physical addresses and vv.
+ */
+static inline unsigned long
+virt_to_phys (volatile void *address)
+{
+	return (unsigned long) address - PAGE_OFFSET;
+}
+
+static inline void*
+phys_to_virt (unsigned long address)
+{
+	return (void *) (address + PAGE_OFFSET);
+}
+
+#define ARCH_HAS_VALID_PHYS_ADDR_RANGE
+extern u64 kern_mem_attribute (unsigned long phys_addr, unsigned long size);
+extern int valid_phys_addr_range (phys_addr_t addr, size_t count); /* efi.c */
+extern int valid_mmap_phys_addr_range (unsigned long pfn, size_t count);
+
+/*
+ * The following two macros are deprecated and scheduled for removal.
+ * Please use the PCI-DMA interface defined in <asm/pci.h> instead.
+ */
+#define bus_to_virt	phys_to_virt
+#define virt_to_bus	virt_to_phys
+#define page_to_bus	page_to_phys
+
+# endif /* KERNEL */
+
+/*
+ * Memory fence w/accept.  This should never be used in code that is
+ * not IA-64 specific.
+ */
+#define __ia64_mf_a()	ia64_mfa()
+
+/**
+ * ___ia64_mmiowb - I/O write barrier
+ *
+ * Ensure ordering of I/O space writes.  This will make sure that writes
+ * following the barrier will arrive after all previous writes.  For most
+ * ia64 platforms, this is a simple 'mf.a' instruction.
+ *
+ * See Documentation/DocBook/deviceiobook.tmpl for more information.
+ */
+static inline void ___ia64_mmiowb(void)
+{
+	ia64_mfa();
+}
+
+static inline void*
+__ia64_mk_io_addr (unsigned long port)
+{
+	struct io_space *space;
+	unsigned long offset;
+
+	space = &io_space[IO_SPACE_NR(port)];
+	port = IO_SPACE_PORT(port);
+	if (space->sparse)
+		offset = IO_SPACE_SPARSE_ENCODING(port);
+	else
+		offset = port;
+
+	return (void *) (space->mmio_base | offset);
+}
+
+#define __ia64_inb	___ia64_inb
+#define __ia64_inw	___ia64_inw
+#define __ia64_inl	___ia64_inl
+#define __ia64_outb	___ia64_outb
+#define __ia64_outw	___ia64_outw
+#define __ia64_outl	___ia64_outl
+#define __ia64_readb	___ia64_readb
+#define __ia64_readw	___ia64_readw
+#define __ia64_readl	___ia64_readl
+#define __ia64_readq	___ia64_readq
+#define __ia64_readb_relaxed	___ia64_readb
+#define __ia64_readw_relaxed	___ia64_readw
+#define __ia64_readl_relaxed	___ia64_readl
+#define __ia64_readq_relaxed	___ia64_readq
+#define __ia64_writeb	___ia64_writeb
+#define __ia64_writew	___ia64_writew
+#define __ia64_writel	___ia64_writel
+#define __ia64_writeq	___ia64_writeq
+#define __ia64_mmiowb	___ia64_mmiowb
+
+/*
+ * For the in/out routines, we need to do "mf.a" _after_ doing the I/O access to ensure
+ * that the access has completed before executing other I/O accesses.  Since we're doing
+ * the accesses through an uncachable (UC) translation, the CPU will execute them in
+ * program order.  However, we still need to tell the compiler not to shuffle them around
+ * during optimization, which is why we use "volatile" pointers.
+ */
+
+static inline unsigned int
+___ia64_inb (unsigned long port)
+{
+	volatile unsigned char *addr = __ia64_mk_io_addr(port);
+	unsigned char ret;
+
+	ret = *addr;
+	__ia64_mf_a();
+	return ret;
+}
+
+static inline unsigned int
+___ia64_inw (unsigned long port)
+{
+	volatile unsigned short *addr = __ia64_mk_io_addr(port);
+	unsigned short ret;
+
+	ret = *addr;
+	__ia64_mf_a();
+	return ret;
+}
+
+static inline unsigned int
+___ia64_inl (unsigned long port)
+{
+	volatile unsigned int *addr = __ia64_mk_io_addr(port);
+	unsigned int ret;
+
+	ret = *addr;
+	__ia64_mf_a();
+	return ret;
+}
+
+static inline void
+___ia64_outb (unsigned char val, unsigned long port)
+{
+	volatile unsigned char *addr = __ia64_mk_io_addr(port);
+
+	*addr = val;
+	__ia64_mf_a();
+}
+
+static inline void
+___ia64_outw (unsigned short val, unsigned long port)
+{
+	volatile unsigned short *addr = __ia64_mk_io_addr(port);
+
+	*addr = val;
+	__ia64_mf_a();
+}
+
+static inline void
+___ia64_outl (unsigned int val, unsigned long port)
+{
+	volatile unsigned int *addr = __ia64_mk_io_addr(port);
+
+	*addr = val;
+	__ia64_mf_a();
+}
+
+static inline void
+__insb (unsigned long port, void *dst, unsigned long count)
+{
+	unsigned char *dp = dst;
+
+	while (count--)
+		*dp++ = platform_inb(port);
+}
+
+static inline void
+__insw (unsigned long port, void *dst, unsigned long count)
+{
+	unsigned short *dp = dst;
+
+	while (count--)
+		put_unaligned(platform_inw(port), dp++);
+}
+
+static inline void
+__insl (unsigned long port, void *dst, unsigned long count)
+{
+	unsigned int *dp = dst;
+
+	while (count--)
+		put_unaligned(platform_inl(port), dp++);
+}
+
+static inline void
+__outsb (unsigned long port, const void *src, unsigned long count)
+{
+	const unsigned char *sp = src;
+
+	while (count--)
+		platform_outb(*sp++, port);
+}
+
+static inline void
+__outsw (unsigned long port, const void *src, unsigned long count)
+{
+	const unsigned short *sp = src;
+
+	while (count--)
+		platform_outw(get_unaligned(sp++), port);
+}
+
+static inline void
+__outsl (unsigned long port, const void *src, unsigned long count)
+{
+	const unsigned int *sp = src;
+
+	while (count--)
+		platform_outl(get_unaligned(sp++), port);
+}
+
+/*
+ * Unfortunately, some platforms are broken and do not follow the IA-64 architecture
+ * specification regarding legacy I/O support.  Thus, we have to make these operations
+ * platform dependent...
+ */
+#define __inb		platform_inb
+#define __inw		platform_inw
+#define __inl		platform_inl
+#define __outb		platform_outb
+#define __outw		platform_outw
+#define __outl		platform_outl
+#define __mmiowb	platform_mmiowb
+
+#define inb(p)		__inb(p)
+#define inw(p)		__inw(p)
+#define inl(p)		__inl(p)
+#define insb(p,d,c)	__insb(p,d,c)
+#define insw(p,d,c)	__insw(p,d,c)
+#define insl(p,d,c)	__insl(p,d,c)
+#define outb(v,p)	__outb(v,p)
+#define outw(v,p)	__outw(v,p)
+#define outl(v,p)	__outl(v,p)
+#define outsb(p,s,c)	__outsb(p,s,c)
+#define outsw(p,s,c)	__outsw(p,s,c)
+#define outsl(p,s,c)	__outsl(p,s,c)
+#define mmiowb()	__mmiowb()
+
+/*
+ * The address passed to these functions are ioremap()ped already.
+ *
+ * We need these to be machine vectors since some platforms don't provide
+ * DMA coherence via PIO reads (PCI drivers and the spec imply that this is
+ * a good idea).  Writes are ok though for all existing ia64 platforms (and
+ * hopefully it'll stay that way).
+ */
+static inline unsigned char
+___ia64_readb (const volatile void __iomem *addr)
+{
+	return *(volatile unsigned char __force *)addr;
+}
+
+static inline unsigned short
+___ia64_readw (const volatile void __iomem *addr)
+{
+	return *(volatile unsigned short __force *)addr;
+}
+
+static inline unsigned int
+___ia64_readl (const volatile void __iomem *addr)
+{
+	return *(volatile unsigned int __force *) addr;
+}
+
+static inline unsigned long
+___ia64_readq (const volatile void __iomem *addr)
+{
+	return *(volatile unsigned long __force *) addr;
+}
+
+static inline void
+__writeb (unsigned char val, volatile void __iomem *addr)
+{
+	*(volatile unsigned char __force *) addr = val;
+}
+
+static inline void
+__writew (unsigned short val, volatile void __iomem *addr)
+{
+	*(volatile unsigned short __force *) addr = val;
+}
+
+static inline void
+__writel (unsigned int val, volatile void __iomem *addr)
+{
+	*(volatile unsigned int __force *) addr = val;
+}
+
+static inline void
+__writeq (unsigned long val, volatile void __iomem *addr)
+{
+	*(volatile unsigned long __force *) addr = val;
+}
+
+#define __readb		platform_readb
+#define __readw		platform_readw
+#define __readl		platform_readl
+#define __readq		platform_readq
+#define __readb_relaxed	platform_readb_relaxed
+#define __readw_relaxed	platform_readw_relaxed
+#define __readl_relaxed	platform_readl_relaxed
+#define __readq_relaxed	platform_readq_relaxed
+
+#define readb(a)	__readb((a))
+#define readw(a)	__readw((a))
+#define readl(a)	__readl((a))
+#define readq(a)	__readq((a))
+#define readb_relaxed(a)	__readb_relaxed((a))
+#define readw_relaxed(a)	__readw_relaxed((a))
+#define readl_relaxed(a)	__readl_relaxed((a))
+#define readq_relaxed(a)	__readq_relaxed((a))
+#define __raw_readb	readb
+#define __raw_readw	readw
+#define __raw_readl	readl
+#define __raw_readq	readq
+#define __raw_readb_relaxed	readb_relaxed
+#define __raw_readw_relaxed	readw_relaxed
+#define __raw_readl_relaxed	readl_relaxed
+#define __raw_readq_relaxed	readq_relaxed
+#define writeb(v,a)	__writeb((v), (a))
+#define writew(v,a)	__writew((v), (a))
+#define writel(v,a)	__writel((v), (a))
+#define writeq(v,a)	__writeq((v), (a))
+#define __raw_writeb	writeb
+#define __raw_writew	writew
+#define __raw_writel	writel
+#define __raw_writeq	writeq
+
+#ifndef inb_p
+# define inb_p		inb
+#endif
+#ifndef inw_p
+# define inw_p		inw
+#endif
+#ifndef inl_p
+# define inl_p		inl
+#endif
+
+#ifndef outb_p
+# define outb_p		outb
+#endif
+#ifndef outw_p
+# define outw_p		outw
+#endif
+#ifndef outl_p
+# define outl_p		outl
+#endif
+
+# ifdef __KERNEL__
+
+extern void __iomem * ioremap(unsigned long offset, unsigned long size);
+extern void __iomem * ioremap_nocache (unsigned long offset, unsigned long size);
+extern void iounmap (volatile void __iomem *addr);
+extern void __iomem * early_ioremap (unsigned long phys_addr, unsigned long size);
+#define early_memremap(phys_addr, size)        early_ioremap(phys_addr, size)
+extern void early_iounmap (volatile void __iomem *addr, unsigned long size);
+static inline void __iomem * ioremap_cache (unsigned long phys_addr, unsigned long size)
+{
+	return ioremap(phys_addr, size);
+}
+
+
+/*
+ * String version of IO memory access ops:
+ */
+extern void memcpy_fromio(void *dst, const volatile void __iomem *src, long n);
+extern void memcpy_toio(volatile void __iomem *dst, const void *src, long n);
+extern void memset_io(volatile void __iomem *s, int c, long n);
+
+# endif /* __KERNEL__ */
+
+#endif /* _ASM_IA64_IO_H */
diff --git a/arch/ia64/include/asm/iommu.h b/arch/ia64/include/asm/iommu.h
new file mode 100644
index 00000000000..105c93b00b1
--- /dev/null
+++ b/arch/ia64/include/asm/iommu.h
@@ -0,0 +1,22 @@
+#ifndef _ASM_IA64_IOMMU_H
+#define _ASM_IA64_IOMMU_H 1
+
+#define cpu_has_x2apic 0
+/* 10 seconds */
+#define DMAR_OPERATION_TIMEOUT (((cycles_t) local_cpu_data->itc_freq)*10)
+
+extern void pci_iommu_shutdown(void);
+extern void no_iommu_init(void);
+#ifdef	CONFIG_INTEL_IOMMU
+extern int force_iommu, no_iommu;
+extern int iommu_pass_through;
+extern int iommu_detected;
+#else
+#define iommu_pass_through	(0)
+#define no_iommu		(1)
+#define iommu_detected		(0)
+#endif
+extern void iommu_dma_init(void);
+extern void machvec_init(const char *name);
+
+#endif
diff --git a/arch/ia64/include/asm/iommu_table.h b/arch/ia64/include/asm/iommu_table.h
new file mode 100644
index 00000000000..92c8d36ae5a
--- /dev/null
+++ b/arch/ia64/include/asm/iommu_table.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_IA64_IOMMU_TABLE_H
+#define _ASM_IA64_IOMMU_TABLE_H
+
+#define IOMMU_INIT_POST(_detect)
+
+#endif /* _ASM_IA64_IOMMU_TABLE_H */
diff --git a/arch/ia64/include/asm/iosapic.h b/arch/ia64/include/asm/iosapic.h
new file mode 100644
index 00000000000..94c89a2d97f
--- /dev/null
+++ b/arch/ia64/include/asm/iosapic.h
@@ -0,0 +1,121 @@
+#ifndef __ASM_IA64_IOSAPIC_H
+#define __ASM_IA64_IOSAPIC_H
+
+#define	IOSAPIC_REG_SELECT	0x0
+#define	IOSAPIC_WINDOW		0x10
+#define	IOSAPIC_EOI		0x40
+
+#define	IOSAPIC_VERSION		0x1
+
+/*
+ * Redirection table entry
+ */
+#define	IOSAPIC_RTE_LOW(i)	(0x10+i*2)
+#define	IOSAPIC_RTE_HIGH(i)	(0x11+i*2)
+
+#define	IOSAPIC_DEST_SHIFT		16
+
+/*
+ * Delivery mode
+ */
+#define	IOSAPIC_DELIVERY_SHIFT		8
+#define	IOSAPIC_FIXED			0x0
+#define	IOSAPIC_LOWEST_PRIORITY	0x1
+#define	IOSAPIC_PMI			0x2
+#define	IOSAPIC_NMI			0x4
+#define	IOSAPIC_INIT			0x5
+#define	IOSAPIC_EXTINT			0x7
+
+/*
+ * Interrupt polarity
+ */
+#define	IOSAPIC_POLARITY_SHIFT		13
+#define	IOSAPIC_POL_HIGH		0
+#define	IOSAPIC_POL_LOW		1
+
+/*
+ * Trigger mode
+ */
+#define	IOSAPIC_TRIGGER_SHIFT		15
+#define	IOSAPIC_EDGE			0
+#define	IOSAPIC_LEVEL			1
+
+/*
+ * Mask bit
+ */
+
+#define	IOSAPIC_MASK_SHIFT		16
+#define	IOSAPIC_MASK			(1<<IOSAPIC_MASK_SHIFT)
+
+#define IOSAPIC_VECTOR_MASK		0xffffff00
+
+#ifndef __ASSEMBLY__
+
+#ifdef CONFIG_IOSAPIC
+
+#define NR_IOSAPICS			256
+
+#ifdef CONFIG_PARAVIRT_GUEST
+#include <asm/paravirt.h>
+#else
+#define iosapic_pcat_compat_init	ia64_native_iosapic_pcat_compat_init
+#define __iosapic_read			__ia64_native_iosapic_read
+#define __iosapic_write			__ia64_native_iosapic_write
+#define iosapic_get_irq_chip		ia64_native_iosapic_get_irq_chip
+#endif
+
+extern void __init ia64_native_iosapic_pcat_compat_init(void);
+extern struct irq_chip *ia64_native_iosapic_get_irq_chip(unsigned long trigger);
+
+static inline unsigned int
+__ia64_native_iosapic_read(char __iomem *iosapic, unsigned int reg)
+{
+	writel(reg, iosapic + IOSAPIC_REG_SELECT);
+	return readl(iosapic + IOSAPIC_WINDOW);
+}
+
+static inline void
+__ia64_native_iosapic_write(char __iomem *iosapic, unsigned int reg, u32 val)
+{
+	writel(reg, iosapic + IOSAPIC_REG_SELECT);
+	writel(val, iosapic + IOSAPIC_WINDOW);
+}
+
+static inline void iosapic_eoi(char __iomem *iosapic, u32 vector)
+{
+	writel(vector, iosapic + IOSAPIC_EOI);
+}
+
+extern void __init iosapic_system_init (int pcat_compat);
+extern int iosapic_init (unsigned long address, unsigned int gsi_base);
+extern int iosapic_remove (unsigned int gsi_base);
+extern int gsi_to_irq (unsigned int gsi);
+extern int iosapic_register_intr (unsigned int gsi, unsigned long polarity,
+				  unsigned long trigger);
+extern void iosapic_unregister_intr (unsigned int irq);
+extern void iosapic_override_isa_irq (unsigned int isa_irq, unsigned int gsi,
+				      unsigned long polarity,
+				      unsigned long trigger);
+extern int __init iosapic_register_platform_intr (u32 int_type,
+					   unsigned int gsi,
+					   int pmi_vector,
+					   u16 eid, u16 id,
+					   unsigned long polarity,
+					   unsigned long trigger);
+
+#ifdef CONFIG_NUMA
+extern void map_iosapic_to_node (unsigned int, int);
+#endif
+#else
+#define iosapic_system_init(pcat_compat)			do { } while (0)
+#define iosapic_init(address,gsi_base)				(-EINVAL)
+#define iosapic_remove(gsi_base)				(-ENODEV)
+#define iosapic_register_intr(gsi,polarity,trigger)		(gsi)
+#define iosapic_unregister_intr(irq)				do { } while (0)
+#define iosapic_override_isa_irq(isa_irq,gsi,polarity,trigger)	do { } while (0)
+#define iosapic_register_platform_intr(type,gsi,pmi,eid,id, \
+	polarity,trigger)					(gsi)
+#endif
+
+# endif /* !__ASSEMBLY__ */
+#endif /* __ASM_IA64_IOSAPIC_H */
diff --git a/arch/ia64/include/asm/irq.h b/arch/ia64/include/asm/irq.h
new file mode 100644
index 00000000000..820667cbea7
--- /dev/null
+++ b/arch/ia64/include/asm/irq.h
@@ -0,0 +1,37 @@
+#ifndef _ASM_IA64_IRQ_H
+#define _ASM_IA64_IRQ_H
+
+/*
+ * Copyright (C) 1999-2000, 2002 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *	Stephane Eranian <eranian@hpl.hp.com>
+ *
+ * 11/24/98	S.Eranian 	updated TIMER_IRQ and irq_canonicalize
+ * 01/20/99	S.Eranian	added keyboard interrupt
+ * 02/29/00     D.Mosberger	moved most things into hw_irq.h
+ */
+
+#include <linux/types.h>
+#include <linux/cpumask.h>
+#include <generated/nr-irqs.h>
+
+static __inline__ int
+irq_canonicalize (int irq)
+{
+	/*
+	 * We do the legacy thing here of pretending that irqs < 16
+	 * are 8259 irqs.  This really shouldn't be necessary at all,
+	 * but we keep it here as serial.c still uses it...
+	 */
+	return ((irq == 2) ? 9 : irq);
+}
+
+extern void set_irq_affinity_info (unsigned int irq, int dest, int redir);
+bool is_affinity_mask_valid(const struct cpumask *cpumask);
+
+#define is_affinity_mask_valid is_affinity_mask_valid
+
+int create_irq(void);
+void destroy_irq(unsigned int irq);
+
+#endif /* _ASM_IA64_IRQ_H */
diff --git a/arch/ia64/include/asm/irq_regs.h b/arch/ia64/include/asm/irq_regs.h
new file mode 100644
index 00000000000..3dd9c0b7027
--- /dev/null
+++ b/arch/ia64/include/asm/irq_regs.h
@@ -0,0 +1 @@
+#include <asm-generic/irq_regs.h>
diff --git a/arch/ia64/include/asm/irq_remapping.h b/arch/ia64/include/asm/irq_remapping.h
new file mode 100644
index 00000000000..e3b3556e2e1
--- /dev/null
+++ b/arch/ia64/include/asm/irq_remapping.h
@@ -0,0 +1,6 @@
+#ifndef __IA64_INTR_REMAPPING_H
+#define __IA64_INTR_REMAPPING_H
+#define irq_remapping_enabled 0
+#define dmar_alloc_hwirq	create_irq
+#define dmar_free_hwirq		destroy_irq
+#endif
diff --git a/arch/ia64/include/asm/irqflags.h b/arch/ia64/include/asm/irqflags.h
new file mode 100644
index 00000000000..cec6c06b52c
--- /dev/null
+++ b/arch/ia64/include/asm/irqflags.h
@@ -0,0 +1,98 @@
+/*
+ * IRQ flags defines.
+ *
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 Asit Mallick <asit.k.mallick@intel.com>
+ * Copyright (C) 1999 Don Dugger <don.dugger@intel.com>
+ */
+
+#ifndef _ASM_IA64_IRQFLAGS_H
+#define _ASM_IA64_IRQFLAGS_H
+
+#include <asm/pal.h>
+#include <asm/kregs.h>
+
+#ifdef CONFIG_IA64_DEBUG_IRQ
+extern unsigned long last_cli_ip;
+static inline void arch_maybe_save_ip(unsigned long flags)
+{
+	if (flags & IA64_PSR_I)
+		last_cli_ip = ia64_getreg(_IA64_REG_IP);
+}
+#else
+#define arch_maybe_save_ip(flags) do {} while (0)
+#endif
+
+/*
+ * - clearing psr.i is implicitly serialized (visible by next insn)
+ * - setting psr.i requires data serialization
+ * - we need a stop-bit before reading PSR because we sometimes
+ *   write a floating-point register right before reading the PSR
+ *   and that writes to PSR.mfl
+ */
+
+static inline unsigned long arch_local_save_flags(void)
+{
+	ia64_stop();
+#ifdef CONFIG_PARAVIRT
+	return ia64_get_psr_i();
+#else
+	return ia64_getreg(_IA64_REG_PSR);
+#endif
+}
+
+static inline unsigned long arch_local_irq_save(void)
+{
+	unsigned long flags = arch_local_save_flags();
+
+	ia64_stop();
+	ia64_rsm(IA64_PSR_I);
+	arch_maybe_save_ip(flags);
+	return flags;
+}
+
+static inline void arch_local_irq_disable(void)
+{
+#ifdef CONFIG_IA64_DEBUG_IRQ
+	arch_local_irq_save();
+#else
+	ia64_stop();
+	ia64_rsm(IA64_PSR_I);
+#endif
+}
+
+static inline void arch_local_irq_enable(void)
+{
+	ia64_stop();
+	ia64_ssm(IA64_PSR_I);
+	ia64_srlz_d();
+}
+
+static inline void arch_local_irq_restore(unsigned long flags)
+{
+#ifdef CONFIG_IA64_DEBUG_IRQ
+	unsigned long old_psr = arch_local_save_flags();
+#endif
+	ia64_intrin_local_irq_restore(flags & IA64_PSR_I);
+	arch_maybe_save_ip(old_psr & ~flags);
+}
+
+static inline bool arch_irqs_disabled_flags(unsigned long flags)
+{
+	return (flags & IA64_PSR_I) == 0;
+}
+
+static inline bool arch_irqs_disabled(void)
+{
+	return arch_irqs_disabled_flags(arch_local_save_flags());
+}
+
+static inline void arch_safe_halt(void)
+{
+	arch_local_irq_enable();
+	ia64_pal_halt_light();	/* PAL_HALT_LIGHT */
+}
+
+
+#endif /* _ASM_IA64_IRQFLAGS_H */
diff --git a/arch/ia64/include/asm/kdebug.h b/arch/ia64/include/asm/kdebug.h
new file mode 100644
index 00000000000..d11a6985503
--- /dev/null
+++ b/arch/ia64/include/asm/kdebug.h
@@ -0,0 +1,57 @@
+#ifndef _IA64_KDEBUG_H
+#define _IA64_KDEBUG_H 1
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) Intel Corporation, 2005
+ *
+ * 2005-Apr     Rusty Lynch <rusty.lynch@intel.com> and Anil S Keshavamurthy
+ *              <anil.s.keshavamurthy@intel.com> adopted from
+ *              include/asm-x86_64/kdebug.h
+ *
+ * 2005-Oct	Keith Owens <kaos@sgi.com>.  Expand notify_die to cover more
+ *		events.
+ */
+
+enum die_val {
+	DIE_BREAK = 1,
+	DIE_FAULT,
+	DIE_OOPS,
+	DIE_MACHINE_HALT,
+	DIE_MACHINE_RESTART,
+	DIE_MCA_MONARCH_ENTER,
+	DIE_MCA_MONARCH_PROCESS,
+	DIE_MCA_MONARCH_LEAVE,
+	DIE_MCA_SLAVE_ENTER,
+	DIE_MCA_SLAVE_PROCESS,
+	DIE_MCA_SLAVE_LEAVE,
+	DIE_MCA_RENDZVOUS_ENTER,
+	DIE_MCA_RENDZVOUS_PROCESS,
+	DIE_MCA_RENDZVOUS_LEAVE,
+	DIE_MCA_NEW_TIMEOUT,
+	DIE_INIT_ENTER,
+	DIE_INIT_MONARCH_ENTER,
+	DIE_INIT_MONARCH_PROCESS,
+	DIE_INIT_MONARCH_LEAVE,
+	DIE_INIT_SLAVE_ENTER,
+	DIE_INIT_SLAVE_PROCESS,
+	DIE_INIT_SLAVE_LEAVE,
+	DIE_KDEBUG_ENTER,
+	DIE_KDEBUG_LEAVE,
+	DIE_KDUMP_ENTER,
+	DIE_KDUMP_LEAVE,
+};
+
+#endif
diff --git a/arch/ia64/include/asm/kexec.h b/arch/ia64/include/asm/kexec.h
new file mode 100644
index 00000000000..aea2b81b03a
--- /dev/null
+++ b/arch/ia64/include/asm/kexec.h
@@ -0,0 +1,45 @@
+#ifndef _ASM_IA64_KEXEC_H
+#define _ASM_IA64_KEXEC_H
+
+#include <asm/setup.h>
+
+/* Maximum physical address we can use pages from */
+#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
+/* Maximum address we can reach in physical address mode */
+#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
+/* Maximum address we can use for the control code buffer */
+#define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE
+
+#define KEXEC_CONTROL_PAGE_SIZE (8192 + 8192 + 4096)
+
+/* The native architecture */
+#define KEXEC_ARCH KEXEC_ARCH_IA_64
+
+#define kexec_flush_icache_page(page) do { \
+                unsigned long page_addr = (unsigned long)page_address(page); \
+                flush_icache_range(page_addr, page_addr + PAGE_SIZE); \
+        } while(0)
+
+extern struct kimage *ia64_kimage;
+extern const unsigned int relocate_new_kernel_size;
+extern void relocate_new_kernel(unsigned long, unsigned long,
+		struct ia64_boot_param *, unsigned long);
+static inline void
+crash_setup_regs(struct pt_regs *newregs, struct pt_regs *oldregs)
+{
+}
+extern struct resource efi_memmap_res;
+extern struct resource boot_param_res;
+extern void kdump_smp_send_stop(void);
+extern void kdump_smp_send_init(void);
+extern void kexec_disable_iosapic(void);
+extern void crash_save_this_cpu(void);
+struct rsvd_region;
+extern unsigned long kdump_find_rsvd_region(unsigned long size,
+		struct rsvd_region *rsvd_regions, int n);
+extern void kdump_cpu_freeze(struct unw_frame_info *info, void *arg);
+extern int kdump_status[];
+extern atomic_t kdump_cpu_freezed;
+extern atomic_t kdump_in_progress;
+
+#endif /* _ASM_IA64_KEXEC_H */
diff --git a/arch/ia64/include/asm/kmap_types.h b/arch/ia64/include/asm/kmap_types.h
new file mode 100644
index 00000000000..05d5f999610
--- /dev/null
+++ b/arch/ia64/include/asm/kmap_types.h
@@ -0,0 +1,12 @@
+#ifndef _ASM_IA64_KMAP_TYPES_H
+#define _ASM_IA64_KMAP_TYPES_H
+
+#ifdef CONFIG_DEBUG_HIGHMEM
+#define  __WITH_KM_FENCE
+#endif
+
+#include <asm-generic/kmap_types.h>
+
+#undef __WITH_KM_FENCE
+
+#endif /* _ASM_IA64_KMAP_TYPES_H */
diff --git a/arch/ia64/include/asm/kprobes.h b/arch/ia64/include/asm/kprobes.h
new file mode 100644
index 00000000000..d5505d6f238
--- /dev/null
+++ b/arch/ia64/include/asm/kprobes.h
@@ -0,0 +1,127 @@
+#ifndef _ASM_KPROBES_H
+#define _ASM_KPROBES_H
+/*
+ *  Kernel Probes (KProbes)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2002, 2004
+ * Copyright (C) Intel Corporation, 2005
+ *
+ * 2005-Apr     Rusty Lynch <rusty.lynch@intel.com> and Anil S Keshavamurthy
+ *              <anil.s.keshavamurthy@intel.com> adapted from i386
+ */
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/percpu.h>
+#include <asm/break.h>
+
+#define __ARCH_WANT_KPROBES_INSN_SLOT
+#define MAX_INSN_SIZE   2	/* last half is for kprobe-booster */
+#define BREAK_INST	(long)(__IA64_BREAK_KPROBE << 6)
+#define NOP_M_INST	(long)(1<<27)
+#define BRL_INST(i1, i2) ((long)((0xcL << 37) |	/* brl */ \
+				(0x1L << 12) |	/* many */ \
+				(((i1) & 1) << 36) | ((i2) << 13))) /* imm */
+
+typedef union cmp_inst {
+	struct {
+	unsigned long long qp : 6;
+	unsigned long long p1 : 6;
+	unsigned long long c  : 1;
+	unsigned long long r2 : 7;
+	unsigned long long r3 : 7;
+	unsigned long long p2 : 6;
+	unsigned long long ta : 1;
+	unsigned long long x2 : 2;
+	unsigned long long tb : 1;
+	unsigned long long opcode : 4;
+	unsigned long long reserved : 23;
+	}f;
+	unsigned long long l;
+} cmp_inst_t;
+
+struct kprobe;
+
+typedef struct _bundle {
+	struct {
+		unsigned long long template : 5;
+		unsigned long long slot0 : 41;
+		unsigned long long slot1_p0 : 64-46;
+	} quad0;
+	struct {
+		unsigned long long slot1_p1 : 41 - (64-46);
+		unsigned long long slot2 : 41;
+	} quad1;
+} __attribute__((__aligned__(16)))  bundle_t;
+
+struct prev_kprobe {
+	struct kprobe *kp;
+	unsigned long status;
+};
+
+#define	MAX_PARAM_RSE_SIZE	(0x60+0x60/0x3f)
+/* per-cpu kprobe control block */
+#define ARCH_PREV_KPROBE_SZ 2
+struct kprobe_ctlblk {
+	unsigned long kprobe_status;
+	struct pt_regs jprobe_saved_regs;
+	unsigned long jprobes_saved_stacked_regs[MAX_PARAM_RSE_SIZE];
+	unsigned long *bsp;
+	unsigned long cfm;
+	atomic_t prev_kprobe_index;
+	struct prev_kprobe prev_kprobe[ARCH_PREV_KPROBE_SZ];
+};
+
+#define kretprobe_blacklist_size 0
+
+#define SLOT0_OPCODE_SHIFT	(37)
+#define SLOT1_p1_OPCODE_SHIFT	(37 - (64-46))
+#define SLOT2_OPCODE_SHIFT 	(37)
+
+#define INDIRECT_CALL_OPCODE		(1)
+#define IP_RELATIVE_CALL_OPCODE		(5)
+#define IP_RELATIVE_BRANCH_OPCODE	(4)
+#define IP_RELATIVE_PREDICT_OPCODE	(7)
+#define LONG_BRANCH_OPCODE		(0xC)
+#define LONG_CALL_OPCODE		(0xD)
+#define flush_insn_slot(p)		do { } while (0)
+
+typedef struct kprobe_opcode {
+	bundle_t bundle;
+} kprobe_opcode_t;
+
+/* Architecture specific copy of original instruction*/
+struct arch_specific_insn {
+	/* copy of the instruction to be emulated */
+	kprobe_opcode_t *insn;
+ #define INST_FLAG_FIX_RELATIVE_IP_ADDR		1
+ #define INST_FLAG_FIX_BRANCH_REG		2
+ #define INST_FLAG_BREAK_INST			4
+ #define INST_FLAG_BOOSTABLE			8
+ 	unsigned long inst_flag;
+ 	unsigned short target_br_reg;
+	unsigned short slot;
+};
+
+extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
+extern int kprobe_exceptions_notify(struct notifier_block *self,
+				    unsigned long val, void *data);
+
+extern void invalidate_stacked_regs(void);
+extern void flush_register_stack(void);
+extern void arch_remove_kprobe(struct kprobe *p);
+
+#endif				/* _ASM_KPROBES_H */
diff --git a/arch/ia64/include/asm/kregs.h b/arch/ia64/include/asm/kregs.h
new file mode 100644
index 00000000000..39e65f6639f
--- /dev/null
+++ b/arch/ia64/include/asm/kregs.h
@@ -0,0 +1,165 @@
+#ifndef _ASM_IA64_KREGS_H
+#define _ASM_IA64_KREGS_H
+
+/*
+ * Copyright (C) 2001-2002 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+/*
+ * This file defines the kernel register usage convention used by Linux/ia64.
+ */
+
+/*
+ * Kernel registers:
+ */
+#define IA64_KR_IO_BASE		0	/* ar.k0: legacy I/O base address */
+#define IA64_KR_TSSD		1	/* ar.k1: IVE uses this as the TSSD */
+#define IA64_KR_PER_CPU_DATA	3	/* ar.k3: physical per-CPU base */
+#define IA64_KR_CURRENT_STACK	4	/* ar.k4: what's mapped in IA64_TR_CURRENT_STACK */
+#define IA64_KR_FPU_OWNER	5	/* ar.k5: fpu-owner (UP only, at the moment) */
+#define IA64_KR_CURRENT		6	/* ar.k6: "current" task pointer */
+#define IA64_KR_PT_BASE		7	/* ar.k7: page table base address (physical) */
+
+#define _IA64_KR_PASTE(x,y)	x##y
+#define _IA64_KR_PREFIX(n)	_IA64_KR_PASTE(ar.k, n)
+#define IA64_KR(n)		_IA64_KR_PREFIX(IA64_KR_##n)
+
+/*
+ * Translation registers:
+ */
+#define IA64_TR_KERNEL		0	/* itr0, dtr0: maps kernel image (code & data) */
+#define IA64_TR_PALCODE		1	/* itr1: maps PALcode as required by EFI */
+#define IA64_TR_CURRENT_STACK	1	/* dtr1: maps kernel's memory- & register-stacks */
+
+#define IA64_TR_ALLOC_BASE	2 	/* itr&dtr: Base of dynamic TR resource*/
+#define IA64_TR_ALLOC_MAX	64 	/* Max number for dynamic use*/
+
+/* Processor status register bits: */
+#define IA64_PSR_BE_BIT		1
+#define IA64_PSR_UP_BIT		2
+#define IA64_PSR_AC_BIT		3
+#define IA64_PSR_MFL_BIT	4
+#define IA64_PSR_MFH_BIT	5
+#define IA64_PSR_IC_BIT		13
+#define IA64_PSR_I_BIT		14
+#define IA64_PSR_PK_BIT		15
+#define IA64_PSR_DT_BIT		17
+#define IA64_PSR_DFL_BIT	18
+#define IA64_PSR_DFH_BIT	19
+#define IA64_PSR_SP_BIT		20
+#define IA64_PSR_PP_BIT		21
+#define IA64_PSR_DI_BIT		22
+#define IA64_PSR_SI_BIT		23
+#define IA64_PSR_DB_BIT		24
+#define IA64_PSR_LP_BIT		25
+#define IA64_PSR_TB_BIT		26
+#define IA64_PSR_RT_BIT		27
+/* The following are not affected by save_flags()/restore_flags(): */
+#define IA64_PSR_CPL0_BIT	32
+#define IA64_PSR_CPL1_BIT	33
+#define IA64_PSR_IS_BIT		34
+#define IA64_PSR_MC_BIT		35
+#define IA64_PSR_IT_BIT		36
+#define IA64_PSR_ID_BIT		37
+#define IA64_PSR_DA_BIT		38
+#define IA64_PSR_DD_BIT		39
+#define IA64_PSR_SS_BIT		40
+#define IA64_PSR_RI_BIT		41
+#define IA64_PSR_ED_BIT		43
+#define IA64_PSR_BN_BIT		44
+#define IA64_PSR_IA_BIT		45
+
+/* A mask of PSR bits that we generally don't want to inherit across a clone2() or an
+   execve().  Only list flags here that need to be cleared/set for BOTH clone2() and
+   execve().  */
+#define IA64_PSR_BITS_TO_CLEAR	(IA64_PSR_MFL | IA64_PSR_MFH | IA64_PSR_DB | IA64_PSR_LP | \
+				 IA64_PSR_TB  | IA64_PSR_ID  | IA64_PSR_DA | IA64_PSR_DD | \
+				 IA64_PSR_SS  | IA64_PSR_ED  | IA64_PSR_IA)
+#define IA64_PSR_BITS_TO_SET	(IA64_PSR_DFH | IA64_PSR_SP)
+
+#define IA64_PSR_BE	(__IA64_UL(1) << IA64_PSR_BE_BIT)
+#define IA64_PSR_UP	(__IA64_UL(1) << IA64_PSR_UP_BIT)
+#define IA64_PSR_AC	(__IA64_UL(1) << IA64_PSR_AC_BIT)
+#define IA64_PSR_MFL	(__IA64_UL(1) << IA64_PSR_MFL_BIT)
+#define IA64_PSR_MFH	(__IA64_UL(1) << IA64_PSR_MFH_BIT)
+#define IA64_PSR_IC	(__IA64_UL(1) << IA64_PSR_IC_BIT)
+#define IA64_PSR_I	(__IA64_UL(1) << IA64_PSR_I_BIT)
+#define IA64_PSR_PK	(__IA64_UL(1) << IA64_PSR_PK_BIT)
+#define IA64_PSR_DT	(__IA64_UL(1) << IA64_PSR_DT_BIT)
+#define IA64_PSR_DFL	(__IA64_UL(1) << IA64_PSR_DFL_BIT)
+#define IA64_PSR_DFH	(__IA64_UL(1) << IA64_PSR_DFH_BIT)
+#define IA64_PSR_SP	(__IA64_UL(1) << IA64_PSR_SP_BIT)
+#define IA64_PSR_PP	(__IA64_UL(1) << IA64_PSR_PP_BIT)
+#define IA64_PSR_DI	(__IA64_UL(1) << IA64_PSR_DI_BIT)
+#define IA64_PSR_SI	(__IA64_UL(1) << IA64_PSR_SI_BIT)
+#define IA64_PSR_DB	(__IA64_UL(1) << IA64_PSR_DB_BIT)
+#define IA64_PSR_LP	(__IA64_UL(1) << IA64_PSR_LP_BIT)
+#define IA64_PSR_TB	(__IA64_UL(1) << IA64_PSR_TB_BIT)
+#define IA64_PSR_RT	(__IA64_UL(1) << IA64_PSR_RT_BIT)
+/* The following are not affected by save_flags()/restore_flags(): */
+#define IA64_PSR_CPL	(__IA64_UL(3) << IA64_PSR_CPL0_BIT)
+#define IA64_PSR_IS	(__IA64_UL(1) << IA64_PSR_IS_BIT)
+#define IA64_PSR_MC	(__IA64_UL(1) << IA64_PSR_MC_BIT)
+#define IA64_PSR_IT	(__IA64_UL(1) << IA64_PSR_IT_BIT)
+#define IA64_PSR_ID	(__IA64_UL(1) << IA64_PSR_ID_BIT)
+#define IA64_PSR_DA	(__IA64_UL(1) << IA64_PSR_DA_BIT)
+#define IA64_PSR_DD	(__IA64_UL(1) << IA64_PSR_DD_BIT)
+#define IA64_PSR_SS	(__IA64_UL(1) << IA64_PSR_SS_BIT)
+#define IA64_PSR_RI	(__IA64_UL(3) << IA64_PSR_RI_BIT)
+#define IA64_PSR_ED	(__IA64_UL(1) << IA64_PSR_ED_BIT)
+#define IA64_PSR_BN	(__IA64_UL(1) << IA64_PSR_BN_BIT)
+#define IA64_PSR_IA	(__IA64_UL(1) << IA64_PSR_IA_BIT)
+
+/* User mask bits: */
+#define IA64_PSR_UM	(IA64_PSR_BE | IA64_PSR_UP | IA64_PSR_AC | IA64_PSR_MFL | IA64_PSR_MFH)
+
+/* Default Control Register */
+#define IA64_DCR_PP_BIT		 0	/* privileged performance monitor default */
+#define IA64_DCR_BE_BIT		 1	/* big-endian default */
+#define IA64_DCR_LC_BIT		 2	/* ia32 lock-check enable */
+#define IA64_DCR_DM_BIT		 8	/* defer TLB miss faults */
+#define IA64_DCR_DP_BIT		 9	/* defer page-not-present faults */
+#define IA64_DCR_DK_BIT		10	/* defer key miss faults */
+#define IA64_DCR_DX_BIT		11	/* defer key permission faults */
+#define IA64_DCR_DR_BIT		12	/* defer access right faults */
+#define IA64_DCR_DA_BIT		13	/* defer access bit faults */
+#define IA64_DCR_DD_BIT		14	/* defer debug faults */
+
+#define IA64_DCR_PP	(__IA64_UL(1) << IA64_DCR_PP_BIT)
+#define IA64_DCR_BE	(__IA64_UL(1) << IA64_DCR_BE_BIT)
+#define IA64_DCR_LC	(__IA64_UL(1) << IA64_DCR_LC_BIT)
+#define IA64_DCR_DM	(__IA64_UL(1) << IA64_DCR_DM_BIT)
+#define IA64_DCR_DP	(__IA64_UL(1) << IA64_DCR_DP_BIT)
+#define IA64_DCR_DK	(__IA64_UL(1) << IA64_DCR_DK_BIT)
+#define IA64_DCR_DX	(__IA64_UL(1) << IA64_DCR_DX_BIT)
+#define IA64_DCR_DR	(__IA64_UL(1) << IA64_DCR_DR_BIT)
+#define IA64_DCR_DA	(__IA64_UL(1) << IA64_DCR_DA_BIT)
+#define IA64_DCR_DD	(__IA64_UL(1) << IA64_DCR_DD_BIT)
+
+/* Interrupt Status Register */
+#define IA64_ISR_X_BIT		32	/* execute access */
+#define IA64_ISR_W_BIT		33	/* write access */
+#define IA64_ISR_R_BIT		34	/* read access */
+#define IA64_ISR_NA_BIT		35	/* non-access */
+#define IA64_ISR_SP_BIT		36	/* speculative load exception */
+#define IA64_ISR_RS_BIT		37	/* mandatory register-stack exception */
+#define IA64_ISR_IR_BIT		38	/* invalid register frame exception */
+#define IA64_ISR_CODE_MASK	0xf
+
+#define IA64_ISR_X	(__IA64_UL(1) << IA64_ISR_X_BIT)
+#define IA64_ISR_W	(__IA64_UL(1) << IA64_ISR_W_BIT)
+#define IA64_ISR_R	(__IA64_UL(1) << IA64_ISR_R_BIT)
+#define IA64_ISR_NA	(__IA64_UL(1) << IA64_ISR_NA_BIT)
+#define IA64_ISR_SP	(__IA64_UL(1) << IA64_ISR_SP_BIT)
+#define IA64_ISR_RS	(__IA64_UL(1) << IA64_ISR_RS_BIT)
+#define IA64_ISR_IR	(__IA64_UL(1) << IA64_ISR_IR_BIT)
+
+/* ISR code field for non-access instructions */
+#define IA64_ISR_CODE_TPA	0
+#define IA64_ISR_CODE_FC	1
+#define IA64_ISR_CODE_PROBE	2
+#define IA64_ISR_CODE_TAK	3
+#define IA64_ISR_CODE_LFETCH	4
+#define IA64_ISR_CODE_PROBEF	5
+
+#endif /* _ASM_IA64_kREGS_H */
diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
new file mode 100644
index 00000000000..db95f570705
--- /dev/null
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -0,0 +1,600 @@
+/*
+ * kvm_host.h: used for kvm module, and hold ia64-specific sections.
+ *
+ * Copyright (C) 2007, Intel Corporation.
+ *
+ * Xiantao Zhang <xiantao.zhang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#ifndef __ASM_KVM_HOST_H
+#define __ASM_KVM_HOST_H
+
+#define KVM_USER_MEM_SLOTS 32
+
+#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
+#define KVM_IRQCHIP_NUM_PINS  KVM_IOAPIC_NUM_PINS
+
+/* define exit reasons from vmm to kvm*/
+#define EXIT_REASON_VM_PANIC		0
+#define EXIT_REASON_MMIO_INSTRUCTION	1
+#define EXIT_REASON_PAL_CALL		2
+#define EXIT_REASON_SAL_CALL		3
+#define EXIT_REASON_SWITCH_RR6		4
+#define EXIT_REASON_VM_DESTROY		5
+#define EXIT_REASON_EXTERNAL_INTERRUPT	6
+#define EXIT_REASON_IPI			7
+#define EXIT_REASON_PTC_G		8
+#define EXIT_REASON_DEBUG		20
+
+/*Define vmm address space and vm data space.*/
+#define KVM_VMM_SIZE (__IA64_UL_CONST(16)<<20)
+#define KVM_VMM_SHIFT 24
+#define KVM_VMM_BASE 0xD000000000000000
+#define VMM_SIZE (__IA64_UL_CONST(8)<<20)
+
+/*
+ * Define vm_buffer, used by PAL Services, base address.
+ * Note: vm_buffer is in the VMM-BLOCK, the size must be < 8M
+ */
+#define KVM_VM_BUFFER_BASE (KVM_VMM_BASE + VMM_SIZE)
+#define KVM_VM_BUFFER_SIZE (__IA64_UL_CONST(8)<<20)
+
+/*
+ * kvm guest's data area looks as follow:
+ *
+ *            +----------------------+	-------	KVM_VM_DATA_SIZE
+ *	      |	    vcpu[n]'s data   |	 |     ___________________KVM_STK_OFFSET
+ *     	      |			     |	 |    /			  |
+ *     	      |	       ..........    |	 |   /vcpu's struct&stack |
+ *     	      |	       ..........    |	 |  /---------------------|---- 0
+ *	      |	    vcpu[5]'s data   |	 | /	   vpd		  |
+ *	      |	    vcpu[4]'s data   |	 |/-----------------------|
+ *	      |	    vcpu[3]'s data   |	 /	   vtlb		  |
+ *	      |	    vcpu[2]'s data   |	/|------------------------|
+ *	      |	    vcpu[1]'s data   |/  |	   vhpt		  |
+ *	      |	    vcpu[0]'s data   |____________________________|
+ *            +----------------------+	 |
+ *	      |	   memory dirty log  |	 |
+ *            +----------------------+	 |
+ *	      |	   vm's data struct  |	 |
+ *            +----------------------+	 |
+ *	      |			     |	 |
+ *	      |			     |	 |
+ *	      |			     |	 |
+ *	      |			     |	 |
+ *	      |			     |	 |
+ *	      |			     |	 |
+ *	      |			     |	 |
+ *	      |	  vm's p2m table  |	 |
+ *	      |			     |	 |
+ *            |			     |	 |
+ *	      |			     |	 |  |
+ * vm's data->|			     |   |  |
+ *	      +----------------------+ ------- 0
+ * To support large memory, needs to increase the size of p2m.
+ * To support more vcpus, needs to ensure it has enough space to
+ * hold vcpus' data.
+ */
+
+#define KVM_VM_DATA_SHIFT	26
+#define KVM_VM_DATA_SIZE	(__IA64_UL_CONST(1) << KVM_VM_DATA_SHIFT)
+#define KVM_VM_DATA_BASE	(KVM_VMM_BASE + KVM_VM_DATA_SIZE)
+
+#define KVM_P2M_BASE		KVM_VM_DATA_BASE
+#define KVM_P2M_SIZE		(__IA64_UL_CONST(24) << 20)
+
+#define VHPT_SHIFT		16
+#define VHPT_SIZE		(__IA64_UL_CONST(1) << VHPT_SHIFT)
+#define VHPT_NUM_ENTRIES	(__IA64_UL_CONST(1) << (VHPT_SHIFT-5))
+
+#define VTLB_SHIFT		16
+#define VTLB_SIZE		(__IA64_UL_CONST(1) << VTLB_SHIFT)
+#define VTLB_NUM_ENTRIES	(1UL << (VHPT_SHIFT-5))
+
+#define VPD_SHIFT		16
+#define VPD_SIZE		(__IA64_UL_CONST(1) << VPD_SHIFT)
+
+#define VCPU_STRUCT_SHIFT	16
+#define VCPU_STRUCT_SIZE	(__IA64_UL_CONST(1) << VCPU_STRUCT_SHIFT)
+
+/*
+ * This must match KVM_IA64_VCPU_STACK_{SHIFT,SIZE} arch/ia64/include/asm/kvm.h
+ */
+#define KVM_STK_SHIFT		16
+#define KVM_STK_OFFSET		(__IA64_UL_CONST(1)<< KVM_STK_SHIFT)
+
+#define KVM_VM_STRUCT_SHIFT	19
+#define KVM_VM_STRUCT_SIZE	(__IA64_UL_CONST(1) << KVM_VM_STRUCT_SHIFT)
+
+#define KVM_MEM_DIRY_LOG_SHIFT	19
+#define KVM_MEM_DIRTY_LOG_SIZE (__IA64_UL_CONST(1) << KVM_MEM_DIRY_LOG_SHIFT)
+
+#ifndef __ASSEMBLY__
+
+/*Define the max vcpus and memory for Guests.*/
+#define KVM_MAX_VCPUS	(KVM_VM_DATA_SIZE - KVM_P2M_SIZE - KVM_VM_STRUCT_SIZE -\
+			KVM_MEM_DIRTY_LOG_SIZE) / sizeof(struct kvm_vcpu_data)
+#define KVM_MAX_MEM_SIZE (KVM_P2M_SIZE >> 3 << PAGE_SHIFT)
+
+#define VMM_LOG_LEN 256
+
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/kvm.h>
+#include <linux/kvm_para.h>
+#include <linux/kvm_types.h>
+
+#include <asm/pal.h>
+#include <asm/sal.h>
+#include <asm/page.h>
+
+struct kvm_vcpu_data {
+	char vcpu_vhpt[VHPT_SIZE];
+	char vcpu_vtlb[VTLB_SIZE];
+	char vcpu_vpd[VPD_SIZE];
+	char vcpu_struct[VCPU_STRUCT_SIZE];
+};
+
+struct kvm_vm_data {
+	char kvm_p2m[KVM_P2M_SIZE];
+	char kvm_vm_struct[KVM_VM_STRUCT_SIZE];
+	char kvm_mem_dirty_log[KVM_MEM_DIRTY_LOG_SIZE];
+	struct kvm_vcpu_data vcpu_data[KVM_MAX_VCPUS];
+};
+
+#define VCPU_BASE(n)	(KVM_VM_DATA_BASE + \
+				offsetof(struct kvm_vm_data, vcpu_data[n]))
+#define KVM_VM_BASE	(KVM_VM_DATA_BASE + \
+				offsetof(struct kvm_vm_data, kvm_vm_struct))
+#define KVM_MEM_DIRTY_LOG_BASE	KVM_VM_DATA_BASE + \
+				offsetof(struct kvm_vm_data, kvm_mem_dirty_log)
+
+#define VHPT_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vhpt))
+#define VTLB_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vtlb))
+#define VPD_BASE(n)  (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vpd))
+#define VCPU_STRUCT_BASE(n)	(VCPU_BASE(n) + \
+				offsetof(struct kvm_vcpu_data, vcpu_struct))
+
+/*IO section definitions*/
+#define IOREQ_READ      1
+#define IOREQ_WRITE     0
+
+#define STATE_IOREQ_NONE        0
+#define STATE_IOREQ_READY       1
+#define STATE_IOREQ_INPROCESS   2
+#define STATE_IORESP_READY      3
+
+/*Guest Physical address layout.*/
+#define GPFN_MEM        (0UL << 60) /* Guest pfn is normal mem */
+#define GPFN_FRAME_BUFFER   (1UL << 60) /* VGA framebuffer */
+#define GPFN_LOW_MMIO       (2UL << 60) /* Low MMIO range */
+#define GPFN_PIB        (3UL << 60) /* PIB base */
+#define GPFN_IOSAPIC        (4UL << 60) /* IOSAPIC base */
+#define GPFN_LEGACY_IO      (5UL << 60) /* Legacy I/O base */
+#define GPFN_GFW        (6UL << 60) /* Guest Firmware */
+#define GPFN_PHYS_MMIO      (7UL << 60) /* Directed MMIO Range */
+
+#define GPFN_IO_MASK        (7UL << 60) /* Guest pfn is I/O type */
+#define GPFN_INV_MASK       (1UL << 63) /* Guest pfn is invalid */
+#define INVALID_MFN       (~0UL)
+#define MEM_G   (1UL << 30)
+#define MEM_M   (1UL << 20)
+#define MMIO_START       (3 * MEM_G)
+#define MMIO_SIZE        (512 * MEM_M)
+#define VGA_IO_START     0xA0000UL
+#define VGA_IO_SIZE      0x20000
+#define LEGACY_IO_START  (MMIO_START + MMIO_SIZE)
+#define LEGACY_IO_SIZE   (64 * MEM_M)
+#define IO_SAPIC_START   0xfec00000UL
+#define IO_SAPIC_SIZE    0x100000
+#define PIB_START 0xfee00000UL
+#define PIB_SIZE 0x200000
+#define GFW_START        (4 * MEM_G - 16 * MEM_M)
+#define GFW_SIZE         (16 * MEM_M)
+
+/*Deliver mode, defined for ioapic.c*/
+#define dest_Fixed IOSAPIC_FIXED
+#define dest_LowestPrio IOSAPIC_LOWEST_PRIORITY
+
+#define NMI_VECTOR      		2
+#define ExtINT_VECTOR       		0
+#define NULL_VECTOR     		(-1)
+#define IA64_SPURIOUS_INT_VECTOR    	0x0f
+
+#define VCPU_LID(v) (((u64)(v)->vcpu_id) << 24)
+
+/*
+ *Delivery mode
+ */
+#define SAPIC_DELIV_SHIFT      8
+#define SAPIC_FIXED            0x0
+#define SAPIC_LOWEST_PRIORITY  0x1
+#define SAPIC_PMI              0x2
+#define SAPIC_NMI              0x4
+#define SAPIC_INIT             0x5
+#define SAPIC_EXTINT           0x7
+
+/*
+ * vcpu->requests bit members for arch
+ */
+#define KVM_REQ_PTC_G		32
+#define KVM_REQ_RESUME		33
+
+struct kvm;
+struct kvm_vcpu;
+
+struct kvm_mmio_req {
+	uint64_t addr;          /*  physical address		*/
+	uint64_t size;          /*  size in bytes		*/
+	uint64_t data;          /*  data (or paddr of data)     */
+	uint8_t state:4;
+	uint8_t dir:1;          /*  1=read, 0=write             */
+};
+
+/*Pal data struct */
+struct kvm_pal_call{
+	/*In area*/
+	uint64_t gr28;
+	uint64_t gr29;
+	uint64_t gr30;
+	uint64_t gr31;
+	/*Out area*/
+	struct ia64_pal_retval ret;
+};
+
+/* Sal data structure */
+struct kvm_sal_call{
+	/*In area*/
+	uint64_t in0;
+	uint64_t in1;
+	uint64_t in2;
+	uint64_t in3;
+	uint64_t in4;
+	uint64_t in5;
+	uint64_t in6;
+	uint64_t in7;
+	struct sal_ret_values ret;
+};
+
+/*Guest change rr6*/
+struct kvm_switch_rr6 {
+	uint64_t old_rr;
+	uint64_t new_rr;
+};
+
+union ia64_ipi_a{
+	unsigned long val;
+	struct {
+		unsigned long rv  : 3;
+		unsigned long ir  : 1;
+		unsigned long eid : 8;
+		unsigned long id  : 8;
+		unsigned long ib_base : 44;
+	};
+};
+
+union ia64_ipi_d {
+	unsigned long val;
+	struct {
+		unsigned long vector : 8;
+		unsigned long dm  : 3;
+		unsigned long ig  : 53;
+	};
+};
+
+/*ipi check exit data*/
+struct kvm_ipi_data{
+	union ia64_ipi_a addr;
+	union ia64_ipi_d data;
+};
+
+/*global purge data*/
+struct kvm_ptc_g {
+	unsigned long vaddr;
+	unsigned long rr;
+	unsigned long ps;
+	struct kvm_vcpu *vcpu;
+};
+
+/*Exit control data */
+struct exit_ctl_data{
+	uint32_t exit_reason;
+	uint32_t vm_status;
+	union {
+		struct kvm_mmio_req	ioreq;
+		struct kvm_pal_call	pal_data;
+		struct kvm_sal_call	sal_data;
+		struct kvm_switch_rr6	rr_data;
+		struct kvm_ipi_data	ipi_data;
+		struct kvm_ptc_g	ptc_g_data;
+	} u;
+};
+
+union pte_flags {
+	unsigned long val;
+	struct {
+		unsigned long p    :  1; /*0      */
+		unsigned long      :  1; /* 1     */
+		unsigned long ma   :  3; /* 2-4   */
+		unsigned long a    :  1; /* 5     */
+		unsigned long d    :  1; /* 6     */
+		unsigned long pl   :  2; /* 7-8   */
+		unsigned long ar   :  3; /* 9-11  */
+		unsigned long ppn  : 38; /* 12-49 */
+		unsigned long      :  2; /* 50-51 */
+		unsigned long ed   :  1; /* 52    */
+	};
+};
+
+union ia64_pta {
+	unsigned long val;
+	struct {
+		unsigned long ve : 1;
+		unsigned long reserved0 : 1;
+		unsigned long size : 6;
+		unsigned long vf : 1;
+		unsigned long reserved1 : 6;
+		unsigned long base : 49;
+	};
+};
+
+struct thash_cb {
+	/* THASH base information */
+	struct thash_data	*hash; /* hash table pointer */
+	union ia64_pta		pta;
+	int           num;
+};
+
+struct kvm_vcpu_stat {
+	u32 halt_wakeup;
+};
+
+struct kvm_vcpu_arch {
+	int launched;
+	int last_exit;
+	int last_run_cpu;
+	int vmm_tr_slot;
+	int vm_tr_slot;
+	int sn_rtc_tr_slot;
+
+#define KVM_MP_STATE_RUNNABLE          0
+#define KVM_MP_STATE_UNINITIALIZED     1
+#define KVM_MP_STATE_INIT_RECEIVED     2
+#define KVM_MP_STATE_HALTED            3
+	int mp_state;
+
+#define MAX_PTC_G_NUM			3
+	int ptc_g_count;
+	struct kvm_ptc_g ptc_g_data[MAX_PTC_G_NUM];
+
+	/*halt timer to wake up sleepy vcpus*/
+	struct hrtimer hlt_timer;
+	long ht_active;
+
+	struct kvm_lapic *apic;    /* kernel irqchip context */
+	struct vpd *vpd;
+
+	/* Exit data for vmm_transition*/
+	struct exit_ctl_data exit_data;
+
+	cpumask_t cache_coherent_map;
+
+	unsigned long vmm_rr;
+	unsigned long host_rr6;
+	unsigned long psbits[8];
+	unsigned long cr_iipa;
+	unsigned long cr_isr;
+	unsigned long vsa_base;
+	unsigned long dirty_log_lock_pa;
+	unsigned long __gp;
+	/* TR and TC.  */
+	struct thash_data itrs[NITRS];
+	struct thash_data dtrs[NDTRS];
+	/* Bit is set if there is a tr/tc for the region.  */
+	unsigned char itr_regions;
+	unsigned char dtr_regions;
+	unsigned char tc_regions;
+	/* purge all */
+	unsigned long ptce_base;
+	unsigned long ptce_count[2];
+	unsigned long ptce_stride[2];
+	/* itc/itm */
+	unsigned long last_itc;
+	long itc_offset;
+	unsigned long itc_check;
+	unsigned long timer_check;
+	unsigned int timer_pending;
+	unsigned int timer_fired;
+
+	unsigned long vrr[8];
+	unsigned long ibr[8];
+	unsigned long dbr[8];
+	unsigned long insvc[4];		/* Interrupt in service.  */
+	unsigned long xtp;
+
+	unsigned long metaphysical_rr0; /* from kvm_arch (so is pinned) */
+	unsigned long metaphysical_rr4;	/* from kvm_arch (so is pinned) */
+	unsigned long metaphysical_saved_rr0; /* from kvm_arch          */
+	unsigned long metaphysical_saved_rr4; /* from kvm_arch          */
+	unsigned long fp_psr;       /*used for lazy float register */
+	unsigned long saved_gp;
+	/*for phycial  emulation */
+	int mode_flags;
+	struct thash_cb vtlb;
+	struct thash_cb vhpt;
+	char irq_check;
+	char irq_new_pending;
+
+	unsigned long opcode;
+	unsigned long cause;
+	char log_buf[VMM_LOG_LEN];
+	union context host;
+	union context guest;
+
+	char mmio_data[8];
+};
+
+struct kvm_vm_stat {
+	u64 remote_tlb_flush;
+};
+
+struct kvm_sal_data {
+	unsigned long boot_ip;
+	unsigned long boot_gp;
+};
+
+struct kvm_arch_memory_slot {
+};
+
+struct kvm_arch {
+	spinlock_t dirty_log_lock;
+
+	unsigned long	vm_base;
+	unsigned long	metaphysical_rr0;
+	unsigned long	metaphysical_rr4;
+	unsigned long	vmm_init_rr;
+
+	int		is_sn2;
+
+	struct kvm_ioapic *vioapic;
+	struct kvm_vm_stat stat;
+	struct kvm_sal_data rdv_sal_data;
+
+	struct list_head assigned_dev_head;
+	struct iommu_domain *iommu_domain;
+	bool iommu_noncoherent;
+
+	unsigned long irq_sources_bitmap;
+	unsigned long irq_states[KVM_IOAPIC_NUM_PINS];
+};
+
+union cpuid3_t {
+	u64 value;
+	struct {
+		u64 number : 8;
+		u64 revision : 8;
+		u64 model : 8;
+		u64 family : 8;
+		u64 archrev : 8;
+		u64 rv : 24;
+	};
+};
+
+struct kvm_pt_regs {
+	/* The following registers are saved by SAVE_MIN: */
+	unsigned long b6;  /* scratch */
+	unsigned long b7;  /* scratch */
+
+	unsigned long ar_csd; /* used by cmp8xchg16 (scratch) */
+	unsigned long ar_ssd; /* reserved for future use (scratch) */
+
+	unsigned long r8;  /* scratch (return value register 0) */
+	unsigned long r9;  /* scratch (return value register 1) */
+	unsigned long r10; /* scratch (return value register 2) */
+	unsigned long r11; /* scratch (return value register 3) */
+
+	unsigned long cr_ipsr; /* interrupted task's psr */
+	unsigned long cr_iip;  /* interrupted task's instruction pointer */
+	unsigned long cr_ifs;  /* interrupted task's function state */
+
+	unsigned long ar_unat; /* interrupted task's NaT register (preserved) */
+	unsigned long ar_pfs;  /* prev function state  */
+	unsigned long ar_rsc;  /* RSE configuration */
+	/* The following two are valid only if cr_ipsr.cpl > 0: */
+	unsigned long ar_rnat;  /* RSE NaT */
+	unsigned long ar_bspstore; /* RSE bspstore */
+
+	unsigned long pr;  /* 64 predicate registers (1 bit each) */
+	unsigned long b0;  /* return pointer (bp) */
+	unsigned long loadrs;  /* size of dirty partition << 16 */
+
+	unsigned long r1;  /* the gp pointer */
+	unsigned long r12; /* interrupted task's memory stack pointer */
+	unsigned long r13; /* thread pointer */
+
+	unsigned long ar_fpsr;  /* floating point status (preserved) */
+	unsigned long r15;  /* scratch */
+
+	/* The remaining registers are NOT saved for system calls.  */
+	unsigned long r14;  /* scratch */
+	unsigned long r2;  /* scratch */
+	unsigned long r3;  /* scratch */
+	unsigned long r16;  /* scratch */
+	unsigned long r17;  /* scratch */
+	unsigned long r18;  /* scratch */
+	unsigned long r19;  /* scratch */
+	unsigned long r20;  /* scratch */
+	unsigned long r21;  /* scratch */
+	unsigned long r22;  /* scratch */
+	unsigned long r23;  /* scratch */
+	unsigned long r24;  /* scratch */
+	unsigned long r25;  /* scratch */
+	unsigned long r26;  /* scratch */
+	unsigned long r27;  /* scratch */
+	unsigned long r28;  /* scratch */
+	unsigned long r29;  /* scratch */
+	unsigned long r30;  /* scratch */
+	unsigned long r31;  /* scratch */
+	unsigned long ar_ccv;  /* compare/exchange value (scratch) */
+
+	/*
+	 * Floating point registers that the kernel considers scratch:
+	 */
+	struct ia64_fpreg f6;  /* scratch */
+	struct ia64_fpreg f7;  /* scratch */
+	struct ia64_fpreg f8;  /* scratch */
+	struct ia64_fpreg f9;  /* scratch */
+	struct ia64_fpreg f10;  /* scratch */
+	struct ia64_fpreg f11;  /* scratch */
+
+	unsigned long r4;  /* preserved */
+	unsigned long r5;  /* preserved */
+	unsigned long r6;  /* preserved */
+	unsigned long r7;  /* preserved */
+	unsigned long eml_unat;    /* used for emulating instruction */
+	unsigned long pad0;     /* alignment pad */
+};
+
+static inline struct kvm_pt_regs *vcpu_regs(struct kvm_vcpu *v)
+{
+	return (struct kvm_pt_regs *) ((unsigned long) v + KVM_STK_OFFSET) - 1;
+}
+
+typedef int kvm_vmm_entry(void);
+typedef void kvm_tramp_entry(union context *host, union context *guest);
+
+struct kvm_vmm_info{
+	struct module	*module;
+	kvm_vmm_entry 	*vmm_entry;
+	kvm_tramp_entry *tramp_entry;
+	unsigned long 	vmm_ivt;
+	unsigned long	patch_mov_ar;
+	unsigned long	patch_mov_ar_sn2;
+};
+
+int kvm_highest_pending_irq(struct kvm_vcpu *vcpu);
+int kvm_emulate_halt(struct kvm_vcpu *vcpu);
+int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run);
+void kvm_sal_emul(struct kvm_vcpu *vcpu);
+
+#define __KVM_HAVE_ARCH_VM_ALLOC 1
+struct kvm *kvm_arch_alloc_vm(void);
+void kvm_arch_free_vm(struct kvm *kvm);
+
+#endif /* __ASSEMBLY__*/
+
+#endif
diff --git a/arch/ia64/include/asm/libata-portmap.h b/arch/ia64/include/asm/libata-portmap.h
new file mode 100644
index 00000000000..0e00c9a9f41
--- /dev/null
+++ b/arch/ia64/include/asm/libata-portmap.h
@@ -0,0 +1,12 @@
+#ifndef __ASM_IA64_LIBATA_PORTMAP_H
+#define __ASM_IA64_LIBATA_PORTMAP_H
+
+#define ATA_PRIMARY_CMD		0x1F0
+#define ATA_PRIMARY_CTL		0x3F6
+#define ATA_PRIMARY_IRQ(dev)	isa_irq_to_vector(14)
+
+#define ATA_SECONDARY_CMD	0x170
+#define ATA_SECONDARY_CTL	0x376
+#define ATA_SECONDARY_IRQ(dev)	isa_irq_to_vector(15)
+
+#endif
diff --git a/arch/ia64/include/asm/linkage.h b/arch/ia64/include/asm/linkage.h
new file mode 100644
index 00000000000..787575701f1
--- /dev/null
+++ b/arch/ia64/include/asm/linkage.h
@@ -0,0 +1,18 @@
+#ifndef __ASM_LINKAGE_H
+#define __ASM_LINKAGE_H
+
+#ifndef __ASSEMBLY__
+
+#define asmlinkage CPP_ASMLINKAGE __attribute__((syscall_linkage))
+
+#else
+
+#include <asm/asmmacro.h>
+
+#endif
+
+#define cond_syscall(x) asm(".weak\t" #x "#\n" #x "#\t=\tsys_ni_syscall#")
+#define SYSCALL_ALIAS(alias, name)					\
+	asm ( #alias "# = " #name "#\n\t.globl " #alias "#")
+
+#endif
diff --git a/arch/ia64/include/asm/local.h b/arch/ia64/include/asm/local.h
new file mode 100644
index 00000000000..c11c530f74d
--- /dev/null
+++ b/arch/ia64/include/asm/local.h
@@ -0,0 +1 @@
+#include <asm-generic/local.h>
diff --git a/arch/ia64/include/asm/local64.h b/arch/ia64/include/asm/local64.h
new file mode 100644
index 00000000000..36c93b5cc23
--- /dev/null
+++ b/arch/ia64/include/asm/local64.h
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/ia64/include/asm/machvec.h b/arch/ia64/include/asm/machvec.h
new file mode 100644
index 00000000000..9c39bdfc2da
--- /dev/null
+++ b/arch/ia64/include/asm/machvec.h
@@ -0,0 +1,367 @@
+/*
+ * Machine vector for IA-64.
+ *
+ * Copyright (C) 1999 Silicon Graphics, Inc.
+ * Copyright (C) Srinivasa Thirumalachar <sprasad@engr.sgi.com>
+ * Copyright (C) Vijay Chander <vijay@engr.sgi.com>
+ * Copyright (C) 1999-2001, 2003-2004 Hewlett-Packard Co.
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+#ifndef _ASM_IA64_MACHVEC_H
+#define _ASM_IA64_MACHVEC_H
+
+#include <linux/types.h>
+
+/* forward declarations: */
+struct device;
+struct pt_regs;
+struct scatterlist;
+struct page;
+struct mm_struct;
+struct pci_bus;
+struct task_struct;
+struct pci_dev;
+struct msi_desc;
+struct dma_attrs;
+
+typedef void ia64_mv_setup_t (char **);
+typedef void ia64_mv_cpu_init_t (void);
+typedef void ia64_mv_irq_init_t (void);
+typedef void ia64_mv_send_ipi_t (int, int, int, int);
+typedef void ia64_mv_timer_interrupt_t (int, void *);
+typedef void ia64_mv_global_tlb_purge_t (struct mm_struct *, unsigned long, unsigned long, unsigned long);
+typedef void ia64_mv_tlb_migrate_finish_t (struct mm_struct *);
+typedef u8 ia64_mv_irq_to_vector (int);
+typedef unsigned int ia64_mv_local_vector_to_irq (u8);
+typedef char *ia64_mv_pci_get_legacy_mem_t (struct pci_bus *);
+typedef int ia64_mv_pci_legacy_read_t (struct pci_bus *, u16 port, u32 *val,
+				       u8 size);
+typedef int ia64_mv_pci_legacy_write_t (struct pci_bus *, u16 port, u32 val,
+					u8 size);
+typedef void ia64_mv_migrate_t(struct task_struct * task);
+typedef void ia64_mv_pci_fixup_bus_t (struct pci_bus *);
+typedef void ia64_mv_kernel_launch_event_t(void);
+
+/* DMA-mapping interface: */
+typedef void ia64_mv_dma_init (void);
+typedef u64 ia64_mv_dma_get_required_mask (struct device *);
+typedef struct dma_map_ops *ia64_mv_dma_get_ops(struct device *);
+
+/*
+ * WARNING: The legacy I/O space is _architected_.  Platforms are
+ * expected to follow this architected model (see Section 10.7 in the
+ * IA-64 Architecture Software Developer's Manual).  Unfortunately,
+ * some broken machines do not follow that model, which is why we have
+ * to make the inX/outX operations part of the machine vector.
+ * Platform designers should follow the architected model whenever
+ * possible.
+ */
+typedef unsigned int ia64_mv_inb_t (unsigned long);
+typedef unsigned int ia64_mv_inw_t (unsigned long);
+typedef unsigned int ia64_mv_inl_t (unsigned long);
+typedef void ia64_mv_outb_t (unsigned char, unsigned long);
+typedef void ia64_mv_outw_t (unsigned short, unsigned long);
+typedef void ia64_mv_outl_t (unsigned int, unsigned long);
+typedef void ia64_mv_mmiowb_t (void);
+typedef unsigned char ia64_mv_readb_t (const volatile void __iomem *);
+typedef unsigned short ia64_mv_readw_t (const volatile void __iomem *);
+typedef unsigned int ia64_mv_readl_t (const volatile void __iomem *);
+typedef unsigned long ia64_mv_readq_t (const volatile void __iomem *);
+typedef unsigned char ia64_mv_readb_relaxed_t (const volatile void __iomem *);
+typedef unsigned short ia64_mv_readw_relaxed_t (const volatile void __iomem *);
+typedef unsigned int ia64_mv_readl_relaxed_t (const volatile void __iomem *);
+typedef unsigned long ia64_mv_readq_relaxed_t (const volatile void __iomem *);
+
+typedef int ia64_mv_setup_msi_irq_t (struct pci_dev *pdev, struct msi_desc *);
+typedef void ia64_mv_teardown_msi_irq_t (unsigned int irq);
+
+static inline void
+machvec_noop (void)
+{
+}
+
+static inline void
+machvec_noop_mm (struct mm_struct *mm)
+{
+}
+
+static inline void
+machvec_noop_task (struct task_struct *task)
+{
+}
+
+static inline void
+machvec_noop_bus (struct pci_bus *bus)
+{
+}
+
+extern void machvec_setup (char **);
+extern void machvec_timer_interrupt (int, void *);
+extern void machvec_tlb_migrate_finish (struct mm_struct *);
+
+# if defined (CONFIG_IA64_HP_SIM)
+#  include <asm/machvec_hpsim.h>
+# elif defined (CONFIG_IA64_DIG)
+#  include <asm/machvec_dig.h>
+# elif defined(CONFIG_IA64_DIG_VTD)
+#  include <asm/machvec_dig_vtd.h>
+# elif defined (CONFIG_IA64_HP_ZX1)
+#  include <asm/machvec_hpzx1.h>
+# elif defined (CONFIG_IA64_HP_ZX1_SWIOTLB)
+#  include <asm/machvec_hpzx1_swiotlb.h>
+# elif defined (CONFIG_IA64_SGI_SN2)
+#  include <asm/machvec_sn2.h>
+# elif defined (CONFIG_IA64_SGI_UV)
+#  include <asm/machvec_uv.h>
+# elif defined (CONFIG_IA64_GENERIC)
+
+# ifdef MACHVEC_PLATFORM_HEADER
+#  include MACHVEC_PLATFORM_HEADER
+# else
+#  define ia64_platform_name	ia64_mv.name
+#  define platform_setup	ia64_mv.setup
+#  define platform_cpu_init	ia64_mv.cpu_init
+#  define platform_irq_init	ia64_mv.irq_init
+#  define platform_send_ipi	ia64_mv.send_ipi
+#  define platform_timer_interrupt	ia64_mv.timer_interrupt
+#  define platform_global_tlb_purge	ia64_mv.global_tlb_purge
+#  define platform_tlb_migrate_finish	ia64_mv.tlb_migrate_finish
+#  define platform_dma_init		ia64_mv.dma_init
+#  define platform_dma_get_required_mask ia64_mv.dma_get_required_mask
+#  define platform_dma_get_ops		ia64_mv.dma_get_ops
+#  define platform_irq_to_vector	ia64_mv.irq_to_vector
+#  define platform_local_vector_to_irq	ia64_mv.local_vector_to_irq
+#  define platform_pci_get_legacy_mem	ia64_mv.pci_get_legacy_mem
+#  define platform_pci_legacy_read	ia64_mv.pci_legacy_read
+#  define platform_pci_legacy_write	ia64_mv.pci_legacy_write
+#  define platform_inb		ia64_mv.inb
+#  define platform_inw		ia64_mv.inw
+#  define platform_inl		ia64_mv.inl
+#  define platform_outb		ia64_mv.outb
+#  define platform_outw		ia64_mv.outw
+#  define platform_outl		ia64_mv.outl
+#  define platform_mmiowb	ia64_mv.mmiowb
+#  define platform_readb        ia64_mv.readb
+#  define platform_readw        ia64_mv.readw
+#  define platform_readl        ia64_mv.readl
+#  define platform_readq        ia64_mv.readq
+#  define platform_readb_relaxed        ia64_mv.readb_relaxed
+#  define platform_readw_relaxed        ia64_mv.readw_relaxed
+#  define platform_readl_relaxed        ia64_mv.readl_relaxed
+#  define platform_readq_relaxed        ia64_mv.readq_relaxed
+#  define platform_migrate		ia64_mv.migrate
+#  define platform_setup_msi_irq	ia64_mv.setup_msi_irq
+#  define platform_teardown_msi_irq	ia64_mv.teardown_msi_irq
+#  define platform_pci_fixup_bus	ia64_mv.pci_fixup_bus
+#  define platform_kernel_launch_event	ia64_mv.kernel_launch_event
+# endif
+
+/* __attribute__((__aligned__(16))) is required to make size of the
+ * structure multiple of 16 bytes.
+ * This will fillup the holes created because of section 3.3.1 in
+ * Software Conventions guide.
+ */
+struct ia64_machine_vector {
+	const char *name;
+	ia64_mv_setup_t *setup;
+	ia64_mv_cpu_init_t *cpu_init;
+	ia64_mv_irq_init_t *irq_init;
+	ia64_mv_send_ipi_t *send_ipi;
+	ia64_mv_timer_interrupt_t *timer_interrupt;
+	ia64_mv_global_tlb_purge_t *global_tlb_purge;
+	ia64_mv_tlb_migrate_finish_t *tlb_migrate_finish;
+	ia64_mv_dma_init *dma_init;
+	ia64_mv_dma_get_required_mask *dma_get_required_mask;
+	ia64_mv_dma_get_ops *dma_get_ops;
+	ia64_mv_irq_to_vector *irq_to_vector;
+	ia64_mv_local_vector_to_irq *local_vector_to_irq;
+	ia64_mv_pci_get_legacy_mem_t *pci_get_legacy_mem;
+	ia64_mv_pci_legacy_read_t *pci_legacy_read;
+	ia64_mv_pci_legacy_write_t *pci_legacy_write;
+	ia64_mv_inb_t *inb;
+	ia64_mv_inw_t *inw;
+	ia64_mv_inl_t *inl;
+	ia64_mv_outb_t *outb;
+	ia64_mv_outw_t *outw;
+	ia64_mv_outl_t *outl;
+	ia64_mv_mmiowb_t *mmiowb;
+	ia64_mv_readb_t *readb;
+	ia64_mv_readw_t *readw;
+	ia64_mv_readl_t *readl;
+	ia64_mv_readq_t *readq;
+	ia64_mv_readb_relaxed_t *readb_relaxed;
+	ia64_mv_readw_relaxed_t *readw_relaxed;
+	ia64_mv_readl_relaxed_t *readl_relaxed;
+	ia64_mv_readq_relaxed_t *readq_relaxed;
+	ia64_mv_migrate_t *migrate;
+	ia64_mv_setup_msi_irq_t *setup_msi_irq;
+	ia64_mv_teardown_msi_irq_t *teardown_msi_irq;
+	ia64_mv_pci_fixup_bus_t *pci_fixup_bus;
+	ia64_mv_kernel_launch_event_t *kernel_launch_event;
+} __attribute__((__aligned__(16))); /* align attrib? see above comment */
+
+#define MACHVEC_INIT(name)			\
+{						\
+	#name,					\
+	platform_setup,				\
+	platform_cpu_init,			\
+	platform_irq_init,			\
+	platform_send_ipi,			\
+	platform_timer_interrupt,		\
+	platform_global_tlb_purge,		\
+	platform_tlb_migrate_finish,		\
+	platform_dma_init,			\
+	platform_dma_get_required_mask,		\
+	platform_dma_get_ops,			\
+	platform_irq_to_vector,			\
+	platform_local_vector_to_irq,		\
+	platform_pci_get_legacy_mem,		\
+	platform_pci_legacy_read,		\
+	platform_pci_legacy_write,		\
+	platform_inb,				\
+	platform_inw,				\
+	platform_inl,				\
+	platform_outb,				\
+	platform_outw,				\
+	platform_outl,				\
+	platform_mmiowb,			\
+	platform_readb,				\
+	platform_readw,				\
+	platform_readl,				\
+	platform_readq,				\
+	platform_readb_relaxed,			\
+	platform_readw_relaxed,			\
+	platform_readl_relaxed,			\
+	platform_readq_relaxed,			\
+	platform_migrate,			\
+	platform_setup_msi_irq,			\
+	platform_teardown_msi_irq,		\
+	platform_pci_fixup_bus,			\
+	platform_kernel_launch_event            \
+}
+
+extern struct ia64_machine_vector ia64_mv;
+extern void machvec_init (const char *name);
+extern void machvec_init_from_cmdline(const char *cmdline);
+
+# else
+#  error Unknown configuration.  Update arch/ia64/include/asm/machvec.h.
+# endif /* CONFIG_IA64_GENERIC */
+
+extern void swiotlb_dma_init(void);
+extern struct dma_map_ops *dma_get_ops(struct device *);
+
+/*
+ * Define default versions so we can extend machvec for new platforms without having
+ * to update the machvec files for all existing platforms.
+ */
+#ifndef platform_setup
+# define platform_setup			machvec_setup
+#endif
+#ifndef platform_cpu_init
+# define platform_cpu_init		machvec_noop
+#endif
+#ifndef platform_irq_init
+# define platform_irq_init		machvec_noop
+#endif
+
+#ifndef platform_send_ipi
+# define platform_send_ipi		ia64_send_ipi	/* default to architected version */
+#endif
+#ifndef platform_timer_interrupt
+# define platform_timer_interrupt 	machvec_timer_interrupt
+#endif
+#ifndef platform_global_tlb_purge
+# define platform_global_tlb_purge	ia64_global_tlb_purge /* default to architected version */
+#endif
+#ifndef platform_tlb_migrate_finish
+# define platform_tlb_migrate_finish	machvec_noop_mm
+#endif
+#ifndef platform_kernel_launch_event
+# define platform_kernel_launch_event	machvec_noop
+#endif
+#ifndef platform_dma_init
+# define platform_dma_init		swiotlb_dma_init
+#endif
+#ifndef platform_dma_get_ops
+# define platform_dma_get_ops		dma_get_ops
+#endif
+#ifndef platform_dma_get_required_mask
+# define  platform_dma_get_required_mask	ia64_dma_get_required_mask
+#endif
+#ifndef platform_irq_to_vector
+# define platform_irq_to_vector		__ia64_irq_to_vector
+#endif
+#ifndef platform_local_vector_to_irq
+# define platform_local_vector_to_irq	__ia64_local_vector_to_irq
+#endif
+#ifndef platform_pci_get_legacy_mem
+# define platform_pci_get_legacy_mem	ia64_pci_get_legacy_mem
+#endif
+#ifndef platform_pci_legacy_read
+# define platform_pci_legacy_read	ia64_pci_legacy_read
+extern int ia64_pci_legacy_read(struct pci_bus *bus, u16 port, u32 *val, u8 size);
+#endif
+#ifndef platform_pci_legacy_write
+# define platform_pci_legacy_write	ia64_pci_legacy_write
+extern int ia64_pci_legacy_write(struct pci_bus *bus, u16 port, u32 val, u8 size);
+#endif
+#ifndef platform_inb
+# define platform_inb		__ia64_inb
+#endif
+#ifndef platform_inw
+# define platform_inw		__ia64_inw
+#endif
+#ifndef platform_inl
+# define platform_inl		__ia64_inl
+#endif
+#ifndef platform_outb
+# define platform_outb		__ia64_outb
+#endif
+#ifndef platform_outw
+# define platform_outw		__ia64_outw
+#endif
+#ifndef platform_outl
+# define platform_outl		__ia64_outl
+#endif
+#ifndef platform_mmiowb
+# define platform_mmiowb	__ia64_mmiowb
+#endif
+#ifndef platform_readb
+# define platform_readb		__ia64_readb
+#endif
+#ifndef platform_readw
+# define platform_readw		__ia64_readw
+#endif
+#ifndef platform_readl
+# define platform_readl		__ia64_readl
+#endif
+#ifndef platform_readq
+# define platform_readq		__ia64_readq
+#endif
+#ifndef platform_readb_relaxed
+# define platform_readb_relaxed	__ia64_readb_relaxed
+#endif
+#ifndef platform_readw_relaxed
+# define platform_readw_relaxed	__ia64_readw_relaxed
+#endif
+#ifndef platform_readl_relaxed
+# define platform_readl_relaxed	__ia64_readl_relaxed
+#endif
+#ifndef platform_readq_relaxed
+# define platform_readq_relaxed	__ia64_readq_relaxed
+#endif
+#ifndef platform_migrate
+# define platform_migrate machvec_noop_task
+#endif
+#ifndef platform_setup_msi_irq
+# define platform_setup_msi_irq		((ia64_mv_setup_msi_irq_t*)NULL)
+#endif
+#ifndef platform_teardown_msi_irq
+# define platform_teardown_msi_irq	((ia64_mv_teardown_msi_irq_t*)NULL)
+#endif
+#ifndef platform_pci_fixup_bus
+# define platform_pci_fixup_bus	machvec_noop_bus
+#endif
+
+#endif /* _ASM_IA64_MACHVEC_H */
diff --git a/arch/ia64/include/asm/machvec_dig.h b/arch/ia64/include/asm/machvec_dig.h
new file mode 100644
index 00000000000..1f7403a2fbe
--- /dev/null
+++ b/arch/ia64/include/asm/machvec_dig.h
@@ -0,0 +1,16 @@
+#ifndef _ASM_IA64_MACHVEC_DIG_h
+#define _ASM_IA64_MACHVEC_DIG_h
+
+extern ia64_mv_setup_t dig_setup;
+
+/*
+ * This stuff has dual use!
+ *
+ * For a generic kernel, the macros are used to initialize the
+ * platform's machvec structure.  When compiling a non-generic kernel,
+ * the macros are used directly.
+ */
+#define ia64_platform_name	"dig"
+#define platform_setup		dig_setup
+
+#endif /* _ASM_IA64_MACHVEC_DIG_h */
diff --git a/arch/ia64/include/asm/machvec_dig_vtd.h b/arch/ia64/include/asm/machvec_dig_vtd.h
new file mode 100644
index 00000000000..44308b4c3f6
--- /dev/null
+++ b/arch/ia64/include/asm/machvec_dig_vtd.h
@@ -0,0 +1,18 @@
+#ifndef _ASM_IA64_MACHVEC_DIG_VTD_h
+#define _ASM_IA64_MACHVEC_DIG_VTD_h
+
+extern ia64_mv_setup_t			dig_setup;
+extern ia64_mv_dma_init			pci_iommu_alloc;
+
+/*
+ * This stuff has dual use!
+ *
+ * For a generic kernel, the macros are used to initialize the
+ * platform's machvec structure.  When compiling a non-generic kernel,
+ * the macros are used directly.
+ */
+#define ia64_platform_name			"dig_vtd"
+#define platform_setup				dig_setup
+#define platform_dma_init			pci_iommu_alloc
+
+#endif /* _ASM_IA64_MACHVEC_DIG_VTD_h */
diff --git a/arch/ia64/include/asm/machvec_hpsim.h b/arch/ia64/include/asm/machvec_hpsim.h
new file mode 100644
index 00000000000..e7571127936
--- /dev/null
+++ b/arch/ia64/include/asm/machvec_hpsim.h
@@ -0,0 +1,18 @@
+#ifndef _ASM_IA64_MACHVEC_HPSIM_h
+#define _ASM_IA64_MACHVEC_HPSIM_h
+
+extern ia64_mv_setup_t hpsim_setup;
+extern ia64_mv_irq_init_t hpsim_irq_init;
+
+/*
+ * This stuff has dual use!
+ *
+ * For a generic kernel, the macros are used to initialize the
+ * platform's machvec structure.  When compiling a non-generic kernel,
+ * the macros are used directly.
+ */
+#define ia64_platform_name	"hpsim"
+#define platform_setup		hpsim_setup
+#define platform_irq_init	hpsim_irq_init
+
+#endif /* _ASM_IA64_MACHVEC_HPSIM_h */
diff --git a/arch/ia64/include/asm/machvec_hpzx1.h b/arch/ia64/include/asm/machvec_hpzx1.h
new file mode 100644
index 00000000000..c74d3159e9e
--- /dev/null
+++ b/arch/ia64/include/asm/machvec_hpzx1.h
@@ -0,0 +1,18 @@
+#ifndef _ASM_IA64_MACHVEC_HPZX1_h
+#define _ASM_IA64_MACHVEC_HPZX1_h
+
+extern ia64_mv_setup_t			dig_setup;
+extern ia64_mv_dma_init			sba_dma_init;
+
+/*
+ * This stuff has dual use!
+ *
+ * For a generic kernel, the macros are used to initialize the
+ * platform's machvec structure.  When compiling a non-generic kernel,
+ * the macros are used directly.
+ */
+#define ia64_platform_name			"hpzx1"
+#define platform_setup				dig_setup
+#define platform_dma_init			sba_dma_init
+
+#endif /* _ASM_IA64_MACHVEC_HPZX1_h */
diff --git a/arch/ia64/include/asm/machvec_hpzx1_swiotlb.h b/arch/ia64/include/asm/machvec_hpzx1_swiotlb.h
new file mode 100644
index 00000000000..906ef621077
--- /dev/null
+++ b/arch/ia64/include/asm/machvec_hpzx1_swiotlb.h
@@ -0,0 +1,19 @@
+#ifndef _ASM_IA64_MACHVEC_HPZX1_SWIOTLB_h
+#define _ASM_IA64_MACHVEC_HPZX1_SWIOTLB_h
+
+extern ia64_mv_setup_t				dig_setup;
+extern ia64_mv_dma_get_ops			hwsw_dma_get_ops;
+
+/*
+ * This stuff has dual use!
+ *
+ * For a generic kernel, the macros are used to initialize the
+ * platform's machvec structure.  When compiling a non-generic kernel,
+ * the macros are used directly.
+ */
+#define ia64_platform_name			"hpzx1_swiotlb"
+#define platform_setup				dig_setup
+#define platform_dma_init			machvec_noop
+#define platform_dma_get_ops			hwsw_dma_get_ops
+
+#endif /* _ASM_IA64_MACHVEC_HPZX1_SWIOTLB_h */
diff --git a/arch/ia64/include/asm/machvec_init.h b/arch/ia64/include/asm/machvec_init.h
new file mode 100644
index 00000000000..37a469849ab
--- /dev/null
+++ b/arch/ia64/include/asm/machvec_init.h
@@ -0,0 +1,35 @@
+#include <asm/iommu.h>
+#include <asm/machvec.h>
+
+extern ia64_mv_send_ipi_t ia64_send_ipi;
+extern ia64_mv_global_tlb_purge_t ia64_global_tlb_purge;
+extern ia64_mv_dma_get_required_mask ia64_dma_get_required_mask;
+extern ia64_mv_irq_to_vector __ia64_irq_to_vector;
+extern ia64_mv_local_vector_to_irq __ia64_local_vector_to_irq;
+extern ia64_mv_pci_get_legacy_mem_t ia64_pci_get_legacy_mem;
+extern ia64_mv_pci_legacy_read_t ia64_pci_legacy_read;
+extern ia64_mv_pci_legacy_write_t ia64_pci_legacy_write;
+
+extern ia64_mv_inb_t __ia64_inb;
+extern ia64_mv_inw_t __ia64_inw;
+extern ia64_mv_inl_t __ia64_inl;
+extern ia64_mv_outb_t __ia64_outb;
+extern ia64_mv_outw_t __ia64_outw;
+extern ia64_mv_outl_t __ia64_outl;
+extern ia64_mv_mmiowb_t __ia64_mmiowb;
+extern ia64_mv_readb_t __ia64_readb;
+extern ia64_mv_readw_t __ia64_readw;
+extern ia64_mv_readl_t __ia64_readl;
+extern ia64_mv_readq_t __ia64_readq;
+extern ia64_mv_readb_t __ia64_readb_relaxed;
+extern ia64_mv_readw_t __ia64_readw_relaxed;
+extern ia64_mv_readl_t __ia64_readl_relaxed;
+extern ia64_mv_readq_t __ia64_readq_relaxed;
+
+#define MACHVEC_HELPER(name)									\
+ struct ia64_machine_vector machvec_##name __attribute__ ((unused, __section__ (".machvec")))	\
+	= MACHVEC_INIT(name);
+
+#define MACHVEC_DEFINE(name)	MACHVEC_HELPER(name)
+
+MACHVEC_DEFINE(MACHVEC_PLATFORM_NAME)
diff --git a/arch/ia64/include/asm/machvec_sn2.h b/arch/ia64/include/asm/machvec_sn2.h
new file mode 100644
index 00000000000..ece9fa85be8
--- /dev/null
+++ b/arch/ia64/include/asm/machvec_sn2.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2002-2003,2006 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it 
+ * under the terms of version 2 of the GNU General Public License 
+ * as published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but 
+ * WITHOUT ANY WARRANTY; without even the implied warranty of 
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 
+ * 
+ * Further, this software is distributed without any warranty that it is 
+ * free of the rightful claim of any third person regarding infringement 
+ * or the like.  Any license provided herein, whether implied or 
+ * otherwise, applies only to this software file.  Patent licenses, if 
+ * any, provided herein do not apply to combinations of this program with 
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public 
+ * License along with this program; if not, write the Free Software 
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/NoticeExplan
+ */
+
+#ifndef _ASM_IA64_MACHVEC_SN2_H
+#define _ASM_IA64_MACHVEC_SN2_H
+
+extern ia64_mv_setup_t sn_setup;
+extern ia64_mv_cpu_init_t sn_cpu_init;
+extern ia64_mv_irq_init_t sn_irq_init;
+extern ia64_mv_send_ipi_t sn2_send_IPI;
+extern ia64_mv_timer_interrupt_t sn_timer_interrupt;
+extern ia64_mv_global_tlb_purge_t sn2_global_tlb_purge;
+extern ia64_mv_tlb_migrate_finish_t	sn_tlb_migrate_finish;
+extern ia64_mv_irq_to_vector sn_irq_to_vector;
+extern ia64_mv_local_vector_to_irq sn_local_vector_to_irq;
+extern ia64_mv_pci_get_legacy_mem_t sn_pci_get_legacy_mem;
+extern ia64_mv_pci_legacy_read_t sn_pci_legacy_read;
+extern ia64_mv_pci_legacy_write_t sn_pci_legacy_write;
+extern ia64_mv_inb_t __sn_inb;
+extern ia64_mv_inw_t __sn_inw;
+extern ia64_mv_inl_t __sn_inl;
+extern ia64_mv_outb_t __sn_outb;
+extern ia64_mv_outw_t __sn_outw;
+extern ia64_mv_outl_t __sn_outl;
+extern ia64_mv_mmiowb_t __sn_mmiowb;
+extern ia64_mv_readb_t __sn_readb;
+extern ia64_mv_readw_t __sn_readw;
+extern ia64_mv_readl_t __sn_readl;
+extern ia64_mv_readq_t __sn_readq;
+extern ia64_mv_readb_t __sn_readb_relaxed;
+extern ia64_mv_readw_t __sn_readw_relaxed;
+extern ia64_mv_readl_t __sn_readl_relaxed;
+extern ia64_mv_readq_t __sn_readq_relaxed;
+extern ia64_mv_dma_get_required_mask	sn_dma_get_required_mask;
+extern ia64_mv_dma_init			sn_dma_init;
+extern ia64_mv_migrate_t		sn_migrate;
+extern ia64_mv_kernel_launch_event_t	sn_kernel_launch_event;
+extern ia64_mv_setup_msi_irq_t		sn_setup_msi_irq;
+extern ia64_mv_teardown_msi_irq_t	sn_teardown_msi_irq;
+extern ia64_mv_pci_fixup_bus_t		sn_pci_fixup_bus;
+
+
+/*
+ * This stuff has dual use!
+ *
+ * For a generic kernel, the macros are used to initialize the
+ * platform's machvec structure.  When compiling a non-generic kernel,
+ * the macros are used directly.
+ */
+#define ia64_platform_name		"sn2"
+#define platform_setup			sn_setup
+#define platform_cpu_init		sn_cpu_init
+#define platform_irq_init		sn_irq_init
+#define platform_send_ipi		sn2_send_IPI
+#define platform_timer_interrupt	sn_timer_interrupt
+#define platform_global_tlb_purge       sn2_global_tlb_purge
+#define platform_tlb_migrate_finish	sn_tlb_migrate_finish
+#define platform_pci_fixup		sn_pci_fixup
+#define platform_inb			__sn_inb
+#define platform_inw			__sn_inw
+#define platform_inl			__sn_inl
+#define platform_outb			__sn_outb
+#define platform_outw			__sn_outw
+#define platform_outl			__sn_outl
+#define platform_mmiowb			__sn_mmiowb
+#define platform_readb			__sn_readb
+#define platform_readw			__sn_readw
+#define platform_readl			__sn_readl
+#define platform_readq			__sn_readq
+#define platform_readb_relaxed		__sn_readb_relaxed
+#define platform_readw_relaxed		__sn_readw_relaxed
+#define platform_readl_relaxed		__sn_readl_relaxed
+#define platform_readq_relaxed		__sn_readq_relaxed
+#define platform_irq_to_vector		sn_irq_to_vector
+#define platform_local_vector_to_irq	sn_local_vector_to_irq
+#define platform_pci_get_legacy_mem	sn_pci_get_legacy_mem
+#define platform_pci_legacy_read	sn_pci_legacy_read
+#define platform_pci_legacy_write	sn_pci_legacy_write
+#define platform_dma_get_required_mask	sn_dma_get_required_mask
+#define platform_dma_init		sn_dma_init
+#define platform_migrate		sn_migrate
+#define platform_kernel_launch_event    sn_kernel_launch_event
+#ifdef CONFIG_PCI_MSI
+#define platform_setup_msi_irq		sn_setup_msi_irq
+#define platform_teardown_msi_irq	sn_teardown_msi_irq
+#else
+#define platform_setup_msi_irq		((ia64_mv_setup_msi_irq_t*)NULL)
+#define platform_teardown_msi_irq	((ia64_mv_teardown_msi_irq_t*)NULL)
+#endif
+#define platform_pci_fixup_bus		sn_pci_fixup_bus
+
+#include <asm/sn/io.h>
+
+#endif /* _ASM_IA64_MACHVEC_SN2_H */
diff --git a/arch/ia64/include/asm/machvec_uv.h b/arch/ia64/include/asm/machvec_uv.h
new file mode 100644
index 00000000000..2c50853f35a
--- /dev/null
+++ b/arch/ia64/include/asm/machvec_uv.h
@@ -0,0 +1,26 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * SGI UV Core Functions
+ *
+ * Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#ifndef _ASM_IA64_MACHVEC_UV_H
+#define _ASM_IA64_MACHVEC_UV_H
+
+extern ia64_mv_setup_t uv_setup;
+
+/*
+ * This stuff has dual use!
+ *
+ * For a generic kernel, the macros are used to initialize the
+ * platform's machvec structure.  When compiling a non-generic kernel,
+ * the macros are used directly.
+ */
+#define ia64_platform_name		"uv"
+#define platform_setup			uv_setup
+
+#endif /* _ASM_IA64_MACHVEC_UV_H */
diff --git a/arch/ia64/include/asm/mc146818rtc.h b/arch/ia64/include/asm/mc146818rtc.h
new file mode 100644
index 00000000000..407787a237b
--- /dev/null
+++ b/arch/ia64/include/asm/mc146818rtc.h
@@ -0,0 +1,10 @@
+#ifndef _ASM_IA64_MC146818RTC_H
+#define _ASM_IA64_MC146818RTC_H
+
+/*
+ * Machine dependent access functions for RTC registers.
+ */
+
+/* empty include file to satisfy the include in genrtc.c */
+
+#endif /* _ASM_IA64_MC146818RTC_H */
diff --git a/arch/ia64/include/asm/mca.h b/arch/ia64/include/asm/mca.h
new file mode 100644
index 00000000000..8c709616871
--- /dev/null
+++ b/arch/ia64/include/asm/mca.h
@@ -0,0 +1,187 @@
+/*
+ * File:	mca.h
+ * Purpose:	Machine check handling specific defines
+ *
+ * Copyright (C) 1999, 2004 Silicon Graphics, Inc.
+ * Copyright (C) Vijay Chander <vijay@engr.sgi.com>
+ * Copyright (C) Srinivasa Thirumalachar <sprasad@engr.sgi.com>
+ * Copyright (C) Russ Anderson <rja@sgi.com>
+ */
+
+#ifndef _ASM_IA64_MCA_H
+#define _ASM_IA64_MCA_H
+
+#if !defined(__ASSEMBLY__)
+
+#include <linux/interrupt.h>
+#include <linux/types.h>
+
+#include <asm/param.h>
+#include <asm/sal.h>
+#include <asm/processor.h>
+#include <asm/mca_asm.h>
+
+#define IA64_MCA_RENDEZ_TIMEOUT		(20 * 1000)	/* value in milliseconds - 20 seconds */
+
+typedef struct ia64_fptr {
+	unsigned long fp;
+	unsigned long gp;
+} ia64_fptr_t;
+
+typedef union cmcv_reg_u {
+	u64	cmcv_regval;
+	struct	{
+		u64	cmcr_vector		: 8;
+		u64	cmcr_reserved1		: 4;
+		u64	cmcr_ignored1		: 1;
+		u64	cmcr_reserved2		: 3;
+		u64	cmcr_mask		: 1;
+		u64	cmcr_ignored2		: 47;
+	} cmcv_reg_s;
+
+} cmcv_reg_t;
+
+#define cmcv_mask		cmcv_reg_s.cmcr_mask
+#define cmcv_vector		cmcv_reg_s.cmcr_vector
+
+enum {
+	IA64_MCA_RENDEZ_CHECKIN_NOTDONE	=	0x0,
+	IA64_MCA_RENDEZ_CHECKIN_DONE	=	0x1,
+	IA64_MCA_RENDEZ_CHECKIN_INIT	=	0x2,
+	IA64_MCA_RENDEZ_CHECKIN_CONCURRENT_MCA	=	0x3,
+};
+
+/* Information maintained by the MC infrastructure */
+typedef struct ia64_mc_info_s {
+	u64		imi_mca_handler;
+	size_t		imi_mca_handler_size;
+	u64		imi_monarch_init_handler;
+	size_t		imi_monarch_init_handler_size;
+	u64		imi_slave_init_handler;
+	size_t		imi_slave_init_handler_size;
+	u8		imi_rendez_checkin[NR_CPUS];
+
+} ia64_mc_info_t;
+
+/* Handover state from SAL to OS and vice versa, for both MCA and INIT events.
+ * Besides the handover state, it also contains some saved registers from the
+ * time of the event.
+ * Note: mca_asm.S depends on the precise layout of this structure.
+ */
+
+struct ia64_sal_os_state {
+
+	/* SAL to OS */
+	unsigned long		os_gp;			/* GP of the os registered with the SAL, physical */
+	unsigned long		pal_proc;		/* PAL_PROC entry point, physical */
+	unsigned long		sal_proc;		/* SAL_PROC entry point, physical */
+	unsigned long		rv_rc;			/* MCA - Rendezvous state, INIT - reason code */
+	unsigned long		proc_state_param;	/* from R18 */
+	unsigned long		monarch;		/* 1 for a monarch event, 0 for a slave */
+
+	/* common */
+	unsigned long		sal_ra;			/* Return address in SAL, physical */
+	unsigned long		sal_gp;			/* GP of the SAL - physical */
+	pal_min_state_area_t	*pal_min_state;		/* from R17.  physical in asm, virtual in C */
+	/* Previous values of IA64_KR(CURRENT) and IA64_KR(CURRENT_STACK).
+	 * Note: if the MCA/INIT recovery code wants to resume to a new context
+	 * then it must change these values to reflect the new kernel stack.
+	 */
+	unsigned long		prev_IA64_KR_CURRENT;	/* previous value of IA64_KR(CURRENT) */
+	unsigned long		prev_IA64_KR_CURRENT_STACK;
+	struct task_struct	*prev_task;		/* previous task, NULL if it is not useful */
+	/* Some interrupt registers are not saved in minstate, pt_regs or
+	 * switch_stack.  Because MCA/INIT can occur when interrupts are
+	 * disabled, we need to save the additional interrupt registers over
+	 * MCA/INIT and resume.
+	 */
+	unsigned long		isr;
+	unsigned long		ifa;
+	unsigned long		itir;
+	unsigned long		iipa;
+	unsigned long		iim;
+	unsigned long		iha;
+
+	/* OS to SAL */
+	unsigned long		os_status;		/* OS status to SAL, enum below */
+	unsigned long		context;		/* 0 if return to same context
+							   1 if return to new context */
+
+	/* I-resources */
+	unsigned long		iip;
+	unsigned long		ipsr;
+	unsigned long		ifs;
+};
+
+enum {
+	IA64_MCA_CORRECTED	=	0x0,	/* Error has been corrected by OS_MCA */
+	IA64_MCA_WARM_BOOT	=	-1,	/* Warm boot of the system need from SAL */
+	IA64_MCA_COLD_BOOT	=	-2,	/* Cold boot of the system need from SAL */
+	IA64_MCA_HALT		=	-3	/* System to be halted by SAL */
+};
+
+enum {
+	IA64_INIT_RESUME	=	0x0,	/* Resume after return from INIT */
+	IA64_INIT_WARM_BOOT	=	-1,	/* Warm boot of the system need from SAL */
+};
+
+enum {
+	IA64_MCA_SAME_CONTEXT	=	0x0,	/* SAL to return to same context */
+	IA64_MCA_NEW_CONTEXT	=	-1	/* SAL to return to new context */
+};
+
+/* Per-CPU MCA state that is too big for normal per-CPU variables.  */
+
+struct ia64_mca_cpu {
+	u64 mca_stack[KERNEL_STACK_SIZE/8];
+	u64 init_stack[KERNEL_STACK_SIZE/8];
+};
+
+/* Array of physical addresses of each CPU's MCA area.  */
+extern unsigned long __per_cpu_mca[NR_CPUS];
+
+extern int cpe_vector;
+extern int ia64_cpe_irq;
+extern void ia64_mca_init(void);
+extern void ia64_mca_irq_init(void);
+extern void ia64_mca_cpu_init(void *);
+extern void ia64_os_mca_dispatch(void);
+extern void ia64_os_mca_dispatch_end(void);
+extern void ia64_mca_ucmc_handler(struct pt_regs *, struct ia64_sal_os_state *);
+extern void ia64_init_handler(struct pt_regs *,
+			      struct switch_stack *,
+			      struct ia64_sal_os_state *);
+extern void ia64_os_init_on_kdump(void);
+extern void ia64_monarch_init_handler(void);
+extern void ia64_slave_init_handler(void);
+extern void ia64_mca_cmc_vector_setup(void);
+extern int  ia64_reg_MCA_extension(int (*fn)(void *, struct ia64_sal_os_state *));
+extern void ia64_unreg_MCA_extension(void);
+extern unsigned long ia64_get_rnat(unsigned long *);
+extern void ia64_set_psr_mc(void);
+extern void ia64_mca_printk(const char * fmt, ...)
+	 __attribute__ ((format (printf, 1, 2)));
+
+struct ia64_mca_notify_die {
+	struct ia64_sal_os_state *sos;
+	int *monarch_cpu;
+	int *data;
+};
+
+DECLARE_PER_CPU(u64, ia64_mca_pal_base);
+
+#else	/* __ASSEMBLY__ */
+
+#define IA64_MCA_CORRECTED	0x0	/* Error has been corrected by OS_MCA */
+#define IA64_MCA_WARM_BOOT	-1	/* Warm boot of the system need from SAL */
+#define IA64_MCA_COLD_BOOT	-2	/* Cold boot of the system need from SAL */
+#define IA64_MCA_HALT		-3	/* System to be halted by SAL */
+
+#define IA64_INIT_RESUME	0x0	/* Resume after return from INIT */
+#define IA64_INIT_WARM_BOOT	-1	/* Warm boot of the system need from SAL */
+
+#define IA64_MCA_SAME_CONTEXT	0x0	/* SAL to return to same context */
+#define IA64_MCA_NEW_CONTEXT	-1	/* SAL to return to new context */
+
+#endif /* !__ASSEMBLY__ */
+#endif /* _ASM_IA64_MCA_H */
diff --git a/arch/ia64/include/asm/mca_asm.h b/arch/ia64/include/asm/mca_asm.h
new file mode 100644
index 00000000000..13c1d4994d4
--- /dev/null
+++ b/arch/ia64/include/asm/mca_asm.h
@@ -0,0 +1,244 @@
+/*
+ * File:	mca_asm.h
+ * Purpose:	Machine check handling specific defines
+ *
+ * Copyright (C) 1999 Silicon Graphics, Inc.
+ * Copyright (C) Vijay Chander <vijay@engr.sgi.com>
+ * Copyright (C) Srinivasa Thirumalachar <sprasad@engr.sgi.com>
+ * Copyright (C) 2000 Hewlett-Packard Co.
+ * Copyright (C) 2000 David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 2002 Intel Corp.
+ * Copyright (C) 2002 Jenna Hall <jenna.s.hall@intel.com>
+ * Copyright (C) 2005 Silicon Graphics, Inc
+ * Copyright (C) 2005 Keith Owens <kaos@sgi.com>
+ */
+#ifndef _ASM_IA64_MCA_ASM_H
+#define _ASM_IA64_MCA_ASM_H
+
+#include <asm/percpu.h>
+
+#define PSR_IC		13
+#define PSR_I		14
+#define	PSR_DT		17
+#define PSR_RT		27
+#define PSR_MC		35
+#define PSR_IT		36
+#define PSR_BN		44
+
+/*
+ * This macro converts a instruction virtual address to a physical address
+ * Right now for simulation purposes the virtual addresses are
+ * direct mapped to physical addresses.
+ *	1. Lop off bits 61 thru 63 in the virtual address
+ */
+#define INST_VA_TO_PA(addr)							\
+	dep	addr	= 0, addr, 61, 3
+/*
+ * This macro converts a data virtual address to a physical address
+ * Right now for simulation purposes the virtual addresses are
+ * direct mapped to physical addresses.
+ *	1. Lop off bits 61 thru 63 in the virtual address
+ */
+#define DATA_VA_TO_PA(addr)							\
+	tpa	addr	= addr
+/*
+ * This macro converts a data physical address to a virtual address
+ * Right now for simulation purposes the virtual addresses are
+ * direct mapped to physical addresses.
+ *	1. Put 0x7 in bits 61 thru 63.
+ */
+#define DATA_PA_TO_VA(addr,temp)							\
+	mov	temp	= 0x7	;;							\
+	dep	addr	= temp, addr, 61, 3
+
+#define GET_THIS_PADDR(reg, var)		\
+	mov	reg = IA64_KR(PER_CPU_DATA);;	\
+        addl	reg = THIS_CPU(var), reg
+
+/*
+ * This macro jumps to the instruction at the given virtual address
+ * and starts execution in physical mode with all the address
+ * translations turned off.
+ *	1.	Save the current psr
+ *	2.	Make sure that all the upper 32 bits are off
+ *
+ *	3.	Clear the interrupt enable and interrupt state collection bits
+ *		in the psr before updating the ipsr and iip.
+ *
+ *	4.	Turn off the instruction, data and rse translation bits of the psr
+ *		and store the new value into ipsr
+ *		Also make sure that the interrupts are disabled.
+ *		Ensure that we are in little endian mode.
+ *		[psr.{rt, it, dt, i, be} = 0]
+ *
+ *	5.	Get the physical address corresponding to the virtual address
+ *		of the next instruction bundle and put it in iip.
+ *		(Using magic numbers 24 and 40 in the deposint instruction since
+ *		 the IA64_SDK code directly maps to lower 24bits as physical address
+ *		 from a virtual address).
+ *
+ *	6.	Do an rfi to move the values from ipsr to psr and iip to ip.
+ */
+#define  PHYSICAL_MODE_ENTER(temp1, temp2, start_addr, old_psr)				\
+	mov	old_psr = psr;								\
+	;;										\
+	dep	old_psr = 0, old_psr, 32, 32;						\
+											\
+	mov	ar.rsc = 0 ;								\
+	;;										\
+	srlz.d;										\
+	mov	temp2 = ar.bspstore;							\
+	;;										\
+	DATA_VA_TO_PA(temp2);								\
+	;;										\
+	mov	temp1 = ar.rnat;							\
+	;;										\
+	mov	ar.bspstore = temp2;							\
+	;;										\
+	mov	ar.rnat = temp1;							\
+	mov	temp1 = psr;								\
+	mov	temp2 = psr;								\
+	;;										\
+											\
+	dep	temp2 = 0, temp2, PSR_IC, 2;						\
+	;;										\
+	mov	psr.l = temp2;								\
+	;;										\
+	srlz.d;										\
+	dep	temp1 = 0, temp1, 32, 32;						\
+	;;										\
+	dep	temp1 = 0, temp1, PSR_IT, 1;						\
+	;;										\
+	dep	temp1 = 0, temp1, PSR_DT, 1;						\
+	;;										\
+	dep	temp1 = 0, temp1, PSR_RT, 1;						\
+	;;										\
+	dep	temp1 = 0, temp1, PSR_I, 1;						\
+	;;										\
+	dep	temp1 = 0, temp1, PSR_IC, 1;						\
+	;;										\
+	dep	temp1 = -1, temp1, PSR_MC, 1;						\
+	;;										\
+	mov	cr.ipsr = temp1;							\
+	;;										\
+	LOAD_PHYSICAL(p0, temp2, start_addr);						\
+	;;										\
+	mov	cr.iip = temp2;								\
+	mov	cr.ifs = r0;								\
+	DATA_VA_TO_PA(sp);								\
+	DATA_VA_TO_PA(gp);								\
+	;;										\
+	srlz.i;										\
+	;;										\
+	nop	1;									\
+	nop	2;									\
+	nop	1;									\
+	nop	2;									\
+	rfi;										\
+	;;
+
+/*
+ * This macro jumps to the instruction at the given virtual address
+ * and starts execution in virtual mode with all the address
+ * translations turned on.
+ *	1.	Get the old saved psr
+ *
+ *	2.	Clear the interrupt state collection bit in the current psr.
+ *
+ *	3.	Set the instruction translation bit back in the old psr
+ *		Note we have to do this since we are right now saving only the
+ *		lower 32-bits of old psr.(Also the old psr has the data and
+ *		rse translation bits on)
+ *
+ *	4.	Set ipsr to this old_psr with "it" bit set and "bn" = 1.
+ *
+ *	5.	Reset the current thread pointer (r13).
+ *
+ *	6.	Set iip to the virtual address of the next instruction bundle.
+ *
+ *	7.	Do an rfi to move ipsr to psr and iip to ip.
+ */
+
+#define VIRTUAL_MODE_ENTER(temp1, temp2, start_addr, old_psr)	\
+	mov	temp2 = psr;					\
+	;;							\
+	mov	old_psr = temp2;				\
+	;;							\
+	dep	temp2 = 0, temp2, PSR_IC, 2;			\
+	;;							\
+	mov	psr.l = temp2;					\
+	mov	ar.rsc = 0;					\
+	;;							\
+	srlz.d;							\
+	mov	r13 = ar.k6;					\
+	mov	temp2 = ar.bspstore;				\
+	;;							\
+	DATA_PA_TO_VA(temp2,temp1);				\
+	;;							\
+	mov	temp1 = ar.rnat;				\
+	;;							\
+	mov	ar.bspstore = temp2;				\
+	;;							\
+	mov	ar.rnat = temp1;				\
+	;;							\
+	mov	temp1 = old_psr;				\
+	;;							\
+	mov	temp2 = 1;					\
+	;;							\
+	dep	temp1 = temp2, temp1, PSR_IC, 1;		\
+	;;							\
+	dep	temp1 = temp2, temp1, PSR_IT, 1;		\
+	;;							\
+	dep	temp1 = temp2, temp1, PSR_DT, 1;		\
+	;;							\
+	dep	temp1 = temp2, temp1, PSR_RT, 1;		\
+	;;							\
+	dep	temp1 = temp2, temp1, PSR_BN, 1;		\
+	;;							\
+								\
+	mov     cr.ipsr = temp1;				\
+	movl	temp2 = start_addr;				\
+	;;							\
+	mov	cr.iip = temp2;					\
+	movl	gp = __gp					\
+	;;							\
+	DATA_PA_TO_VA(sp, temp1);				\
+	srlz.i;							\
+	;;							\
+	nop	1;						\
+	nop	2;						\
+	nop	1;						\
+	rfi							\
+	;;
+
+/*
+ * The MCA and INIT stacks in struct ia64_mca_cpu look like normal kernel
+ * stacks, except that the SAL/OS state and a switch_stack are stored near the
+ * top of the MCA/INIT stack.  To support concurrent entry to MCA or INIT, as
+ * well as MCA over INIT, each event needs its own SAL/OS state.  All entries
+ * are 16 byte aligned.
+ *
+ *      +---------------------------+
+ *      |          pt_regs          |
+ *      +---------------------------+
+ *      |        switch_stack       |
+ *      +---------------------------+
+ *      |        SAL/OS state       |
+ *      +---------------------------+
+ *      |    16 byte scratch area   |
+ *      +---------------------------+ <-------- SP at start of C MCA handler
+ *      |           .....           |
+ *      +---------------------------+
+ *      | RBS for MCA/INIT handler  |
+ *      +---------------------------+
+ *      | struct task for MCA/INIT  |
+ *      +---------------------------+ <-------- Bottom of MCA/INIT stack
+ */
+
+#define ALIGN16(x)			((x)&~15)
+#define MCA_PT_REGS_OFFSET		ALIGN16(KERNEL_STACK_SIZE-IA64_PT_REGS_SIZE)
+#define MCA_SWITCH_STACK_OFFSET		ALIGN16(MCA_PT_REGS_OFFSET-IA64_SWITCH_STACK_SIZE)
+#define MCA_SOS_OFFSET			ALIGN16(MCA_SWITCH_STACK_OFFSET-IA64_SAL_OS_STATE_SIZE)
+#define MCA_SP_OFFSET			ALIGN16(MCA_SOS_OFFSET-16)
+
+#endif /* _ASM_IA64_MCA_ASM_H */
diff --git a/arch/ia64/include/asm/meminit.h b/arch/ia64/include/asm/meminit.h
new file mode 100644
index 00000000000..092f1c91b36
--- /dev/null
+++ b/arch/ia64/include/asm/meminit.h
@@ -0,0 +1,74 @@
+#ifndef meminit_h
+#define meminit_h
+
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+
+/*
+ * Entries defined so far:
+ * 	- boot param structure itself
+ * 	- memory map
+ * 	- initrd (optional)
+ * 	- command line string
+ * 	- kernel code & data
+ * 	- crash dumping code reserved region
+ * 	- Kernel memory map built from EFI memory map
+ * 	- ELF core header
+ *
+ * More could be added if necessary
+ */
+#define IA64_MAX_RSVD_REGIONS 9
+
+struct rsvd_region {
+	u64 start;	/* virtual address of beginning of element */
+	u64 end;	/* virtual address of end of element + 1 */
+};
+
+extern struct rsvd_region rsvd_region[IA64_MAX_RSVD_REGIONS + 1];
+extern int num_rsvd_regions;
+
+extern void find_memory (void);
+extern void reserve_memory (void);
+extern void find_initrd (void);
+extern int filter_rsvd_memory (u64 start, u64 end, void *arg);
+extern int filter_memory (u64 start, u64 end, void *arg);
+extern unsigned long efi_memmap_init(u64 *s, u64 *e);
+extern int find_max_min_low_pfn (u64, u64, void *);
+
+extern unsigned long vmcore_find_descriptor_size(unsigned long address);
+extern int reserve_elfcorehdr(u64 *start, u64 *end);
+
+/*
+ * For rounding an address to the next IA64_GRANULE_SIZE or order
+ */
+#define GRANULEROUNDDOWN(n)	((n) & ~(IA64_GRANULE_SIZE-1))
+#define GRANULEROUNDUP(n)	(((n)+IA64_GRANULE_SIZE-1) & ~(IA64_GRANULE_SIZE-1))
+
+#ifdef CONFIG_NUMA
+  extern void call_pernode_memory (unsigned long start, unsigned long len, void *func);
+#else
+# define call_pernode_memory(start, len, func)	(*func)(start, len, 0)
+#endif
+
+#define IGNORE_PFN0	1	/* XXX fix me: ignore pfn 0 until TLB miss handler is updated... */
+
+extern int register_active_ranges(u64 start, u64 len, int nid);
+
+#ifdef CONFIG_VIRTUAL_MEM_MAP
+# define LARGE_GAP	0x40000000 /* Use virtual mem map if hole is > than this */
+  extern unsigned long VMALLOC_END;
+  extern struct page *vmem_map;
+  extern int find_largest_hole(u64 start, u64 end, void *arg);
+  extern int create_mem_map_page_table(u64 start, u64 end, void *arg);
+  extern int vmemmap_find_next_valid_pfn(int, int);
+#else
+static inline int vmemmap_find_next_valid_pfn(int node, int i)
+{
+	return i + 1;
+}
+#endif
+#endif /* meminit_h */
diff --git a/arch/ia64/include/asm/mman.h b/arch/ia64/include/asm/mman.h
new file mode 100644
index 00000000000..fdd5f5229f7
--- /dev/null
+++ b/arch/ia64/include/asm/mman.h
@@ -0,0 +1,17 @@
+/*
+ * Based on <asm-i386/mman.h>.
+ *
+ * Modified 1998-2000, 2002
+ *	David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co
+ */
+#ifndef _ASM_IA64_MMAN_H
+#define _ASM_IA64_MMAN_H
+
+#include <uapi/asm/mman.h>
+
+#ifndef __ASSEMBLY__
+#define arch_mmap_check	ia64_mmap_check
+int ia64_mmap_check(unsigned long addr, unsigned long len,
+		unsigned long flags);
+#endif
+#endif /* _ASM_IA64_MMAN_H */
diff --git a/arch/ia64/include/asm/mmu.h b/arch/ia64/include/asm/mmu.h
new file mode 100644
index 00000000000..611432ba579
--- /dev/null
+++ b/arch/ia64/include/asm/mmu.h
@@ -0,0 +1,13 @@
+#ifndef __MMU_H
+#define __MMU_H
+
+/*
+ * Type for a context number.  We declare it volatile to ensure proper
+ * ordering when it's accessed outside of spinlock'd critical sections
+ * (e.g., as done in activate_mm() and init_new_context()).
+ */
+typedef volatile unsigned long mm_context_t;
+
+typedef unsigned long nv_mm_context_t;
+
+#endif
diff --git a/arch/ia64/include/asm/mmu_context.h b/arch/ia64/include/asm/mmu_context.h
new file mode 100644
index 00000000000..7f2a456603c
--- /dev/null
+++ b/arch/ia64/include/asm/mmu_context.h
@@ -0,0 +1,198 @@
+#ifndef _ASM_IA64_MMU_CONTEXT_H
+#define _ASM_IA64_MMU_CONTEXT_H
+
+/*
+ * Copyright (C) 1998-2002 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+/*
+ * Routines to manage the allocation of task context numbers.  Task context
+ * numbers are used to reduce or eliminate the need to perform TLB flushes
+ * due to context switches.  Context numbers are implemented using ia-64
+ * region ids.  Since the IA-64 TLB does not consider the region number when
+ * performing a TLB lookup, we need to assign a unique region id to each
+ * region in a process.  We use the least significant three bits in aregion
+ * id for this purpose.
+ */
+
+#define IA64_REGION_ID_KERNEL	0 /* the kernel's region id (tlb.c depends on this being 0) */
+
+#define ia64_rid(ctx,addr)	(((ctx) << 3) | (addr >> 61))
+
+# include <asm/page.h>
+# ifndef __ASSEMBLY__
+
+#include <linux/compiler.h>
+#include <linux/percpu.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+
+#include <asm/processor.h>
+#include <asm-generic/mm_hooks.h>
+
+struct ia64_ctx {
+	spinlock_t lock;
+	unsigned int next;	/* next context number to use */
+	unsigned int limit;     /* available free range */
+	unsigned int max_ctx;   /* max. context value supported by all CPUs */
+				/* call wrap_mmu_context when next >= max */
+	unsigned long *bitmap;  /* bitmap size is max_ctx+1 */
+	unsigned long *flushmap;/* pending rid to be flushed */
+};
+
+extern struct ia64_ctx ia64_ctx;
+DECLARE_PER_CPU(u8, ia64_need_tlb_flush);
+
+extern void mmu_context_init (void);
+extern void wrap_mmu_context (struct mm_struct *mm);
+
+static inline void
+enter_lazy_tlb (struct mm_struct *mm, struct task_struct *tsk)
+{
+}
+
+/*
+ * When the context counter wraps around all TLBs need to be flushed because
+ * an old context number might have been reused. This is signalled by the
+ * ia64_need_tlb_flush per-CPU variable, which is checked in the routine
+ * below. Called by activate_mm(). <efocht@ess.nec.de>
+ */
+static inline void
+delayed_tlb_flush (void)
+{
+	extern void local_flush_tlb_all (void);
+	unsigned long flags;
+
+	if (unlikely(__ia64_per_cpu_var(ia64_need_tlb_flush))) {
+		spin_lock_irqsave(&ia64_ctx.lock, flags);
+		if (__ia64_per_cpu_var(ia64_need_tlb_flush)) {
+			local_flush_tlb_all();
+			__ia64_per_cpu_var(ia64_need_tlb_flush) = 0;
+		}
+		spin_unlock_irqrestore(&ia64_ctx.lock, flags);
+	}
+}
+
+static inline nv_mm_context_t
+get_mmu_context (struct mm_struct *mm)
+{
+	unsigned long flags;
+	nv_mm_context_t context = mm->context;
+
+	if (likely(context))
+		goto out;
+
+	spin_lock_irqsave(&ia64_ctx.lock, flags);
+	/* re-check, now that we've got the lock: */
+	context = mm->context;
+	if (context == 0) {
+		cpumask_clear(mm_cpumask(mm));
+		if (ia64_ctx.next >= ia64_ctx.limit) {
+			ia64_ctx.next = find_next_zero_bit(ia64_ctx.bitmap,
+					ia64_ctx.max_ctx, ia64_ctx.next);
+			ia64_ctx.limit = find_next_bit(ia64_ctx.bitmap,
+					ia64_ctx.max_ctx, ia64_ctx.next);
+			if (ia64_ctx.next >= ia64_ctx.max_ctx)
+				wrap_mmu_context(mm);
+		}
+		mm->context = context = ia64_ctx.next++;
+		__set_bit(context, ia64_ctx.bitmap);
+	}
+	spin_unlock_irqrestore(&ia64_ctx.lock, flags);
+out:
+	/*
+	 * Ensure we're not starting to use "context" before any old
+	 * uses of it are gone from our TLB.
+	 */
+	delayed_tlb_flush();
+
+	return context;
+}
+
+/*
+ * Initialize context number to some sane value.  MM is guaranteed to be a
+ * brand-new address-space, so no TLB flushing is needed, ever.
+ */
+static inline int
+init_new_context (struct task_struct *p, struct mm_struct *mm)
+{
+	mm->context = 0;
+	return 0;
+}
+
+static inline void
+destroy_context (struct mm_struct *mm)
+{
+	/* Nothing to do.  */
+}
+
+static inline void
+reload_context (nv_mm_context_t context)
+{
+	unsigned long rid;
+	unsigned long rid_incr = 0;
+	unsigned long rr0, rr1, rr2, rr3, rr4, old_rr4;
+
+	old_rr4 = ia64_get_rr(RGN_BASE(RGN_HPAGE));
+	rid = context << 3;	/* make space for encoding the region number */
+	rid_incr = 1 << 8;
+
+	/* encode the region id, preferred page size, and VHPT enable bit: */
+	rr0 = (rid << 8) | (PAGE_SHIFT << 2) | 1;
+	rr1 = rr0 + 1*rid_incr;
+	rr2 = rr0 + 2*rid_incr;
+	rr3 = rr0 + 3*rid_incr;
+	rr4 = rr0 + 4*rid_incr;
+#ifdef  CONFIG_HUGETLB_PAGE
+	rr4 = (rr4 & (~(0xfcUL))) | (old_rr4 & 0xfc);
+
+#  if RGN_HPAGE != 4
+#    error "reload_context assumes RGN_HPAGE is 4"
+#  endif
+#endif
+
+	ia64_set_rr0_to_rr4(rr0, rr1, rr2, rr3, rr4);
+	ia64_srlz_i();			/* srlz.i implies srlz.d */
+}
+
+/*
+ * Must be called with preemption off
+ */
+static inline void
+activate_context (struct mm_struct *mm)
+{
+	nv_mm_context_t context;
+
+	do {
+		context = get_mmu_context(mm);
+		if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm)))
+			cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
+		reload_context(context);
+		/*
+		 * in the unlikely event of a TLB-flush by another thread,
+		 * redo the load.
+		 */
+	} while (unlikely(context != mm->context));
+}
+
+#define deactivate_mm(tsk,mm)	do { } while (0)
+
+/*
+ * Switch from address space PREV to address space NEXT.
+ */
+static inline void
+activate_mm (struct mm_struct *prev, struct mm_struct *next)
+{
+	/*
+	 * We may get interrupts here, but that's OK because interrupt
+	 * handlers cannot touch user-space.
+	 */
+	ia64_set_kr(IA64_KR_PT_BASE, __pa(next->pgd));
+	activate_context(next);
+}
+
+#define switch_mm(prev_mm,next_mm,next_task)	activate_mm(prev_mm, next_mm)
+
+# endif /* ! __ASSEMBLY__ */
+#endif /* _ASM_IA64_MMU_CONTEXT_H */
diff --git a/arch/ia64/include/asm/mmzone.h b/arch/ia64/include/asm/mmzone.h
new file mode 100644
index 00000000000..e0de61709cf
--- /dev/null
+++ b/arch/ia64/include/asm/mmzone.h
@@ -0,0 +1,42 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2000,2003 Silicon Graphics, Inc.  All rights reserved.
+ * Copyright (c) 2002 NEC Corp.
+ * Copyright (c) 2002 Erich Focht <efocht@ess.nec.de>
+ * Copyright (c) 2002 Kimio Suganuma <k-suganuma@da.jp.nec.com>
+ */
+#ifndef _ASM_IA64_MMZONE_H
+#define _ASM_IA64_MMZONE_H
+
+#include <linux/numa.h>
+#include <asm/page.h>
+#include <asm/meminit.h>
+
+#ifdef CONFIG_NUMA
+
+static inline int pfn_to_nid(unsigned long pfn)
+{
+	extern int paddr_to_nid(unsigned long);
+	int nid = paddr_to_nid(pfn << PAGE_SHIFT);
+	if (nid < 0)
+		return 0;
+	else
+		return nid;
+}
+
+#ifdef CONFIG_IA64_DIG /* DIG systems are small */
+# define MAX_PHYSNODE_ID	8
+# define NR_NODE_MEMBLKS	(MAX_NUMNODES * 8)
+#else /* sn2 is the biggest case, so we use that if !DIG */
+# define MAX_PHYSNODE_ID	2048
+# define NR_NODE_MEMBLKS	(MAX_NUMNODES * 4)
+#endif
+
+#else /* CONFIG_NUMA */
+# define NR_NODE_MEMBLKS	(MAX_NUMNODES * 4)
+#endif /* CONFIG_NUMA */
+
+#endif /* _ASM_IA64_MMZONE_H */
diff --git a/arch/ia64/include/asm/module.h b/arch/ia64/include/asm/module.h
new file mode 100644
index 00000000000..dfba22a872c
--- /dev/null
+++ b/arch/ia64/include/asm/module.h
@@ -0,0 +1,40 @@
+#ifndef _ASM_IA64_MODULE_H
+#define _ASM_IA64_MODULE_H
+
+#include <asm-generic/module.h>
+
+/*
+ * IA-64-specific support for kernel module loader.
+ *
+ * Copyright (C) 2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+struct elf64_shdr;			/* forward declration */
+
+struct mod_arch_specific {
+	struct elf64_shdr *core_plt;	/* core PLT section */
+	struct elf64_shdr *init_plt;	/* init PLT section */
+	struct elf64_shdr *got;		/* global offset table */
+	struct elf64_shdr *opd;		/* official procedure descriptors */
+	struct elf64_shdr *unwind;	/* unwind-table section */
+#ifdef CONFIG_PARAVIRT
+	struct elf64_shdr *paravirt_bundles;
+					/* paravirt_alt_bundle_patch table */
+	struct elf64_shdr *paravirt_insts;
+					/* paravirt_alt_inst_patch table */
+#endif
+	unsigned long gp;		/* global-pointer for module */
+
+	void *core_unw_table;		/* core unwind-table cookie returned by unwinder */
+	void *init_unw_table;		/* init unwind-table cookie returned by unwinder */
+	unsigned int next_got_entry;	/* index of next available got entry */
+};
+
+#define MODULE_PROC_FAMILY	"ia64"
+#define MODULE_ARCH_VERMAGIC	MODULE_PROC_FAMILY \
+	"gcc-" __stringify(__GNUC__) "." __stringify(__GNUC_MINOR__)
+
+#define ARCH_SHF_SMALL	SHF_IA_64_SHORT
+
+#endif /* _ASM_IA64_MODULE_H */
diff --git a/arch/ia64/include/asm/msidef.h b/arch/ia64/include/asm/msidef.h
new file mode 100644
index 00000000000..592c1047a0c
--- /dev/null
+++ b/arch/ia64/include/asm/msidef.h
@@ -0,0 +1,42 @@
+#ifndef _IA64_MSI_DEF_H
+#define _IA64_MSI_DEF_H
+
+/*
+ * Shifts for APIC-based data
+ */
+
+#define     MSI_DATA_VECTOR_SHIFT	0
+#define	    MSI_DATA_VECTOR(v)		(((u8)v) << MSI_DATA_VECTOR_SHIFT)
+#define     MSI_DATA_VECTOR_MASK	0xffffff00
+
+#define     MSI_DATA_DELIVERY_MODE_SHIFT	8
+#define     MSI_DATA_DELIVERY_FIXED	(0 << MSI_DATA_DELIVERY_MODE_SHIFT)
+#define     MSI_DATA_DELIVERY_LOWPRI	(1 << MSI_DATA_DELIVERY_MODE_SHIFT)
+
+#define     MSI_DATA_LEVEL_SHIFT	14
+#define     MSI_DATA_LEVEL_DEASSERT	(0 << MSI_DATA_LEVEL_SHIFT)
+#define     MSI_DATA_LEVEL_ASSERT	(1 << MSI_DATA_LEVEL_SHIFT)
+
+#define     MSI_DATA_TRIGGER_SHIFT	15
+#define     MSI_DATA_TRIGGER_EDGE	(0 << MSI_DATA_TRIGGER_SHIFT)
+#define     MSI_DATA_TRIGGER_LEVEL	(1 << MSI_DATA_TRIGGER_SHIFT)
+
+/*
+ * Shift/mask fields for APIC-based bus address
+ */
+
+#define     MSI_ADDR_DEST_ID_SHIFT	4
+#define     MSI_ADDR_HEADER		0xfee00000
+
+#define     MSI_ADDR_DEST_ID_MASK	0xfff0000f
+#define     MSI_ADDR_DEST_ID_CPU(cpu)	((cpu) << MSI_ADDR_DEST_ID_SHIFT)
+
+#define     MSI_ADDR_DEST_MODE_SHIFT	2
+#define     MSI_ADDR_DEST_MODE_PHYS	(0 << MSI_ADDR_DEST_MODE_SHIFT)
+#define	    MSI_ADDR_DEST_MODE_LOGIC	(1 << MSI_ADDR_DEST_MODE_SHIFT)
+
+#define     MSI_ADDR_REDIRECTION_SHIFT	3
+#define     MSI_ADDR_REDIRECTION_CPU	(0 << MSI_ADDR_REDIRECTION_SHIFT)
+#define     MSI_ADDR_REDIRECTION_LOWPRI	(1 << MSI_ADDR_REDIRECTION_SHIFT)
+
+#endif/* _IA64_MSI_DEF_H */
diff --git a/arch/ia64/include/asm/mutex.h b/arch/ia64/include/asm/mutex.h
new file mode 100644
index 00000000000..f41e66d65e3
--- /dev/null
+++ b/arch/ia64/include/asm/mutex.h
@@ -0,0 +1,90 @@
+/*
+ * ia64 implementation of the mutex fastpath.
+ *
+ * Copyright (C) 2006 Ken Chen <kenneth.w.chen@intel.com>
+ *
+ */
+
+#ifndef _ASM_MUTEX_H
+#define _ASM_MUTEX_H
+
+/**
+ *  __mutex_fastpath_lock - try to take the lock by moving the count
+ *                          from 1 to a 0 value
+ *  @count: pointer of type atomic_t
+ *  @fail_fn: function to call if the original value was not 1
+ *
+ * Change the count from 1 to a value lower than 1, and call <fail_fn> if
+ * it wasn't 1 originally. This function MUST leave the value lower than
+ * 1 even when the "1" assertion wasn't true.
+ */
+static inline void
+__mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
+{
+	if (unlikely(ia64_fetchadd4_acq(count, -1) != 1))
+		fail_fn(count);
+}
+
+/**
+ *  __mutex_fastpath_lock_retval - try to take the lock by moving the count
+ *                                 from 1 to a 0 value
+ *  @count: pointer of type atomic_t
+ *
+ * Change the count from 1 to a value lower than 1. This function returns 0
+ * if the fastpath succeeds, or -1 otherwise.
+ */
+static inline int
+__mutex_fastpath_lock_retval(atomic_t *count)
+{
+	if (unlikely(ia64_fetchadd4_acq(count, -1) != 1))
+		return -1;
+	return 0;
+}
+
+/**
+ *  __mutex_fastpath_unlock - try to promote the count from 0 to 1
+ *  @count: pointer of type atomic_t
+ *  @fail_fn: function to call if the original value was not 0
+ *
+ * Try to promote the count from 0 to 1. If it wasn't 0, call <fail_fn>.
+ * In the failure case, this function is allowed to either set the value to
+ * 1, or to set it to a value lower than 1.
+ *
+ * If the implementation sets it to a value of lower than 1, then the
+ * __mutex_slowpath_needs_to_unlock() macro needs to return 1, it needs
+ * to return 0 otherwise.
+ */
+static inline void
+__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
+{
+	int ret = ia64_fetchadd4_rel(count, 1);
+	if (unlikely(ret < 0))
+		fail_fn(count);
+}
+
+#define __mutex_slowpath_needs_to_unlock()		1
+
+/**
+ * __mutex_fastpath_trylock - try to acquire the mutex, without waiting
+ *
+ *  @count: pointer of type atomic_t
+ *  @fail_fn: fallback function
+ *
+ * Change the count from 1 to a value lower than 1, and return 0 (failure)
+ * if it wasn't 1 originally, or return 1 (success) otherwise. This function
+ * MUST leave the value lower than 1 even when the "1" assertion wasn't true.
+ * Additionally, if the value was < 0 originally, this function must not leave
+ * it to 0 on failure.
+ *
+ * If the architecture has no effective trylock variant, it should call the
+ * <fail_fn> spinlock-based trylock variant unconditionally.
+ */
+static inline int
+__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
+{
+	if (cmpxchg_acq(count, 1, 0) == 1)
+		return 1;
+	return 0;
+}
+
+#endif
diff --git a/arch/ia64/include/asm/native/inst.h b/arch/ia64/include/asm/native/inst.h
new file mode 100644
index 00000000000..d2d46efb3e6
--- /dev/null
+++ b/arch/ia64/include/asm/native/inst.h
@@ -0,0 +1,194 @@
+/******************************************************************************
+ * arch/ia64/include/asm/native/inst.h
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#define DO_SAVE_MIN		IA64_NATIVE_DO_SAVE_MIN
+
+#define __paravirt_switch_to			ia64_native_switch_to
+#define __paravirt_leave_syscall		ia64_native_leave_syscall
+#define __paravirt_work_processed_syscall	ia64_native_work_processed_syscall
+#define __paravirt_leave_kernel			ia64_native_leave_kernel
+#define __paravirt_pending_syscall_end		ia64_work_pending_syscall_end
+#define __paravirt_work_processed_syscall_target \
+						ia64_work_processed_syscall
+
+#define paravirt_fsyscall_table			ia64_native_fsyscall_table
+#define paravirt_fsys_bubble_down		ia64_native_fsys_bubble_down
+
+#ifdef CONFIG_PARAVIRT_GUEST_ASM_CLOBBER_CHECK
+# define PARAVIRT_POISON	0xdeadbeefbaadf00d
+# define CLOBBER(clob)				\
+	;;					\
+	movl clob = PARAVIRT_POISON;		\
+	;;
+# define CLOBBER_PRED(pred_clob)		\
+	;;					\
+	cmp.eq pred_clob, p0 = r0, r0		\
+	;;
+#else
+# define CLOBBER(clob)			/* nothing */
+# define CLOBBER_PRED(pred_clob)	/* nothing */
+#endif
+
+#define MOV_FROM_IFA(reg)	\
+	mov reg = cr.ifa
+
+#define MOV_FROM_ITIR(reg)	\
+	mov reg = cr.itir
+
+#define MOV_FROM_ISR(reg)	\
+	mov reg = cr.isr
+
+#define MOV_FROM_IHA(reg)	\
+	mov reg = cr.iha
+
+#define MOV_FROM_IPSR(pred, reg)	\
+(pred)	mov reg = cr.ipsr
+
+#define MOV_FROM_IIM(reg)	\
+	mov reg = cr.iim
+
+#define MOV_FROM_IIP(reg)	\
+	mov reg = cr.iip
+
+#define MOV_FROM_IVR(reg, clob)	\
+	mov reg = cr.ivr	\
+	CLOBBER(clob)
+
+#define MOV_FROM_PSR(pred, reg, clob)	\
+(pred)	mov reg = psr			\
+	CLOBBER(clob)
+
+#define MOV_FROM_ITC(pred, pred_clob, reg, clob)	\
+(pred)	mov reg = ar.itc				\
+	CLOBBER(clob)					\
+	CLOBBER_PRED(pred_clob)
+
+#define MOV_TO_IFA(reg, clob)	\
+	mov cr.ifa = reg	\
+	CLOBBER(clob)
+
+#define MOV_TO_ITIR(pred, reg, clob)	\
+(pred)	mov cr.itir = reg		\
+	CLOBBER(clob)
+
+#define MOV_TO_IHA(pred, reg, clob)	\
+(pred)	mov cr.iha = reg		\
+	CLOBBER(clob)
+
+#define MOV_TO_IPSR(pred, reg, clob)		\
+(pred)	mov cr.ipsr = reg			\
+	CLOBBER(clob)
+
+#define MOV_TO_IFS(pred, reg, clob)	\
+(pred)	mov cr.ifs = reg		\
+	CLOBBER(clob)
+
+#define MOV_TO_IIP(reg, clob)	\
+	mov cr.iip = reg	\
+	CLOBBER(clob)
+
+#define MOV_TO_KR(kr, reg, clob0, clob1)	\
+	mov IA64_KR(kr) = reg			\
+	CLOBBER(clob0)				\
+	CLOBBER(clob1)
+
+#define ITC_I(pred, reg, clob)	\
+(pred)	itc.i reg		\
+	CLOBBER(clob)
+
+#define ITC_D(pred, reg, clob)	\
+(pred)	itc.d reg		\
+	CLOBBER(clob)
+
+#define ITC_I_AND_D(pred_i, pred_d, reg, clob)	\
+(pred_i) itc.i reg;				\
+(pred_d) itc.d reg				\
+	CLOBBER(clob)
+
+#define THASH(pred, reg0, reg1, clob)		\
+(pred)	thash reg0 = reg1			\
+	CLOBBER(clob)
+
+#define SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(clob0, clob1)		\
+	ssm psr.ic | PSR_DEFAULT_BITS					\
+	CLOBBER(clob0)							\
+	CLOBBER(clob1)							\
+	;;								\
+	srlz.i /* guarantee that interruption collectin is on */	\
+	;;
+
+#define SSM_PSR_IC_AND_SRLZ_D(clob0, clob1)	\
+	ssm psr.ic				\
+	CLOBBER(clob0)				\
+	CLOBBER(clob1)				\
+	;;					\
+	srlz.d
+
+#define RSM_PSR_IC(clob)	\
+	rsm psr.ic		\
+	CLOBBER(clob)
+
+#define SSM_PSR_I(pred, pred_clob, clob)	\
+(pred)	ssm psr.i				\
+	CLOBBER(clob)				\
+	CLOBBER_PRED(pred_clob)
+
+#define RSM_PSR_I(pred, clob0, clob1)	\
+(pred)	rsm psr.i			\
+	CLOBBER(clob0)			\
+	CLOBBER(clob1)
+
+#define RSM_PSR_I_IC(clob0, clob1, clob2)	\
+	rsm psr.i | psr.ic			\
+	CLOBBER(clob0)				\
+	CLOBBER(clob1)				\
+	CLOBBER(clob2)
+
+#define RSM_PSR_DT		\
+	rsm psr.dt
+
+#define RSM_PSR_BE_I(clob0, clob1)	\
+	rsm psr.be | psr.i		\
+	CLOBBER(clob0)			\
+	CLOBBER(clob1)
+
+#define SSM_PSR_DT_AND_SRLZ_I	\
+	ssm psr.dt		\
+	;;			\
+	srlz.i
+
+#define BSW_0(clob0, clob1, clob2)	\
+	bsw.0				\
+	CLOBBER(clob0)			\
+	CLOBBER(clob1)			\
+	CLOBBER(clob2)
+
+#define BSW_1(clob0, clob1)	\
+	bsw.1			\
+	CLOBBER(clob0)		\
+	CLOBBER(clob1)
+
+#define COVER	\
+	cover
+
+#define RFI	\
+	rfi
diff --git a/arch/ia64/include/asm/native/irq.h b/arch/ia64/include/asm/native/irq.h
new file mode 100644
index 00000000000..887a228e2ed
--- /dev/null
+++ b/arch/ia64/include/asm/native/irq.h
@@ -0,0 +1,33 @@
+/******************************************************************************
+ * arch/ia64/include/asm/native/irq.h
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef _ASM_IA64_NATIVE_IRQ_H
+#define _ASM_IA64_NATIVE_IRQ_H
+
+#define NR_VECTORS	256
+
+#if (NR_VECTORS + 32 * NR_CPUS) < 1024
+#define IA64_NATIVE_NR_IRQS (NR_VECTORS + 32 * NR_CPUS)
+#else
+#define IA64_NATIVE_NR_IRQS 1024
+#endif
+
+#endif /* _ASM_IA64_NATIVE_IRQ_H */
diff --git a/arch/ia64/include/asm/native/patchlist.h b/arch/ia64/include/asm/native/patchlist.h
new file mode 100644
index 00000000000..be16ca9311b
--- /dev/null
+++ b/arch/ia64/include/asm/native/patchlist.h
@@ -0,0 +1,38 @@
+/******************************************************************************
+ * arch/ia64/include/asm/native/inst.h
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#define __paravirt_start_gate_fsyscall_patchlist		\
+	__ia64_native_start_gate_fsyscall_patchlist
+#define __paravirt_end_gate_fsyscall_patchlist			\
+	__ia64_native_end_gate_fsyscall_patchlist
+#define __paravirt_start_gate_brl_fsys_bubble_down_patchlist	\
+	__ia64_native_start_gate_brl_fsys_bubble_down_patchlist
+#define __paravirt_end_gate_brl_fsys_bubble_down_patchlist	\
+	__ia64_native_end_gate_brl_fsys_bubble_down_patchlist
+#define __paravirt_start_gate_vtop_patchlist			\
+	__ia64_native_start_gate_vtop_patchlist
+#define __paravirt_end_gate_vtop_patchlist			\
+	__ia64_native_end_gate_vtop_patchlist
+#define __paravirt_start_gate_mckinley_e9_patchlist		\
+	__ia64_native_start_gate_mckinley_e9_patchlist
+#define __paravirt_end_gate_mckinley_e9_patchlist		\
+	__ia64_native_end_gate_mckinley_e9_patchlist
diff --git a/arch/ia64/include/asm/native/pvchk_inst.h b/arch/ia64/include/asm/native/pvchk_inst.h
new file mode 100644
index 00000000000..8d72962ec83
--- /dev/null
+++ b/arch/ia64/include/asm/native/pvchk_inst.h
@@ -0,0 +1,271 @@
+#ifndef _ASM_NATIVE_PVCHK_INST_H
+#define _ASM_NATIVE_PVCHK_INST_H
+
+/******************************************************************************
+ * arch/ia64/include/asm/native/pvchk_inst.h
+ * Checker for paravirtualizations of privileged operations.
+ *
+ * Copyright (C) 2005 Hewlett-Packard Co
+ *      Dan Magenheimer <dan.magenheimer@hp.com>
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/**********************************************
+ * Instructions paravirtualized for correctness
+ **********************************************/
+
+/* "fc" and "thash" are privilege-sensitive instructions, meaning they
+ *  may have different semantics depending on whether they are executed
+ *  at PL0 vs PL!=0.  When paravirtualized, these instructions mustn't
+ *  be allowed to execute directly, lest incorrect semantics result.
+ */
+
+#define fc	.error "fc should not be used directly."
+#define thash	.error "thash should not be used directly."
+
+/* Note that "ttag" and "cover" are also privilege-sensitive; "ttag"
+ * is not currently used (though it may be in a long-format VHPT system!)
+ * and the semantics of cover only change if psr.ic is off which is very
+ * rare (and currently non-existent outside of assembly code
+ */
+#define ttag	.error "ttag should not be used directly."
+#define cover	.error "cover should not be used directly."
+
+/* There are also privilege-sensitive registers.  These registers are
+ * readable at any privilege level but only writable at PL0.
+ */
+#define cpuid	.error "cpuid should not be used directly."
+#define pmd	.error "pmd should not be used directly."
+
+/*
+ * mov ar.eflag =
+ * mov = ar.eflag
+ */
+
+/**********************************************
+ * Instructions paravirtualized for performance
+ **********************************************/
+/*
+ * Those instructions include '.' which can't be handled by cpp.
+ * or can't be handled by cpp easily.
+ * They are handled by sed instead of cpp.
+ */
+
+/* for .S
+ * itc.i
+ * itc.d
+ *
+ * bsw.0
+ * bsw.1
+ *
+ * ssm psr.ic | PSR_DEFAULT_BITS
+ * ssm psr.ic
+ * rsm psr.ic
+ * ssm psr.i
+ * rsm psr.i
+ * rsm psr.i | psr.ic
+ * rsm psr.dt
+ * ssm psr.dt
+ *
+ * mov = cr.ifa
+ * mov = cr.itir
+ * mov = cr.isr
+ * mov = cr.iha
+ * mov = cr.ipsr
+ * mov = cr.iim
+ * mov = cr.iip
+ * mov = cr.ivr
+ * mov = psr
+ *
+ * mov cr.ifa =
+ * mov cr.itir =
+ * mov cr.iha =
+ * mov cr.ipsr =
+ * mov cr.ifs =
+ * mov cr.iip =
+ * mov cr.kr =
+ */
+
+/* for intrinsics
+ * ssm psr.i
+ * rsm psr.i
+ * mov = psr
+ * mov = ivr
+ * mov = tpr
+ * mov cr.itm =
+ * mov eoi =
+ * mov rr[] =
+ * mov = rr[]
+ * mov = kr
+ * mov kr =
+ * ptc.ga
+ */
+
+/*************************************************************
+ * define paravirtualized instrcution macros as nop to ingore.
+ * and check whether arguments are appropriate.
+ *************************************************************/
+
+/* check whether reg is a regular register */
+.macro is_rreg_in reg
+	.ifc "\reg", "r0"
+		nop 0
+		.exitm
+	.endif
+	;;
+	mov \reg = r0
+	;;
+.endm
+#define IS_RREG_IN(reg)	is_rreg_in reg ;
+
+#define IS_RREG_OUT(reg)			\
+	;;					\
+	mov reg = r0				\
+	;;
+
+#define IS_RREG_CLOB(reg)	IS_RREG_OUT(reg)
+
+/* check whether pred is a predicate register */
+#define IS_PRED_IN(pred)			\
+	;;					\
+	(pred)	nop 0				\
+	;;
+
+#define IS_PRED_OUT(pred)			\
+	;;					\
+	cmp.eq pred, p0 = r0, r0		\
+	;;
+
+#define IS_PRED_CLOB(pred)	IS_PRED_OUT(pred)
+
+
+#define DO_SAVE_MIN(__COVER, SAVE_IFS, EXTRA, WORKAROUND)	\
+	nop 0
+#define MOV_FROM_IFA(reg)			\
+	IS_RREG_OUT(reg)
+#define MOV_FROM_ITIR(reg)			\
+	IS_RREG_OUT(reg)
+#define MOV_FROM_ISR(reg)			\
+	IS_RREG_OUT(reg)
+#define MOV_FROM_IHA(reg)			\
+	IS_RREG_OUT(reg)
+#define MOV_FROM_IPSR(pred, reg)		\
+	IS_PRED_IN(pred)			\
+	IS_RREG_OUT(reg)
+#define MOV_FROM_IIM(reg)			\
+	IS_RREG_OUT(reg)
+#define MOV_FROM_IIP(reg)			\
+	IS_RREG_OUT(reg)
+#define MOV_FROM_IVR(reg, clob)			\
+	IS_RREG_OUT(reg)			\
+	IS_RREG_CLOB(clob)
+#define MOV_FROM_PSR(pred, reg, clob)		\
+	IS_PRED_IN(pred)			\
+	IS_RREG_OUT(reg)			\
+	IS_RREG_CLOB(clob)
+#define MOV_FROM_ITC(pred, pred_clob, reg, clob)	\
+	IS_PRED_IN(pred)				\
+	IS_PRED_CLOB(pred_clob)				\
+	IS_RREG_OUT(reg)				\
+	IS_RREG_CLOB(clob)
+#define MOV_TO_IFA(reg, clob)			\
+	IS_RREG_IN(reg)				\
+	IS_RREG_CLOB(clob)
+#define MOV_TO_ITIR(pred, reg, clob)		\
+	IS_PRED_IN(pred)			\
+	IS_RREG_IN(reg)				\
+	IS_RREG_CLOB(clob)
+#define MOV_TO_IHA(pred, reg, clob)		\
+	IS_PRED_IN(pred)			\
+	IS_RREG_IN(reg)				\
+	IS_RREG_CLOB(clob)
+#define MOV_TO_IPSR(pred, reg, clob)		\
+	IS_PRED_IN(pred)			\
+	IS_RREG_IN(reg)				\
+	IS_RREG_CLOB(clob)
+#define MOV_TO_IFS(pred, reg, clob)		\
+	IS_PRED_IN(pred)			\
+	IS_RREG_IN(reg)				\
+	IS_RREG_CLOB(clob)
+#define MOV_TO_IIP(reg, clob)			\
+	IS_RREG_IN(reg)				\
+	IS_RREG_CLOB(clob)
+#define MOV_TO_KR(kr, reg, clob0, clob1)	\
+	IS_RREG_IN(reg)				\
+	IS_RREG_CLOB(clob0)			\
+	IS_RREG_CLOB(clob1)
+#define ITC_I(pred, reg, clob)			\
+	IS_PRED_IN(pred)			\
+	IS_RREG_IN(reg)				\
+	IS_RREG_CLOB(clob)
+#define ITC_D(pred, reg, clob)			\
+	IS_PRED_IN(pred)			\
+	IS_RREG_IN(reg)				\
+	IS_RREG_CLOB(clob)
+#define ITC_I_AND_D(pred_i, pred_d, reg, clob)	\
+	IS_PRED_IN(pred_i)			\
+	IS_PRED_IN(pred_d)			\
+	IS_RREG_IN(reg)				\
+	IS_RREG_CLOB(clob)
+#define THASH(pred, reg0, reg1, clob)		\
+	IS_PRED_IN(pred)			\
+	IS_RREG_OUT(reg0)			\
+	IS_RREG_IN(reg1)			\
+	IS_RREG_CLOB(clob)
+#define SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(clob0, clob1)	\
+	IS_RREG_CLOB(clob0)					\
+	IS_RREG_CLOB(clob1)
+#define SSM_PSR_IC_AND_SRLZ_D(clob0, clob1)	\
+	IS_RREG_CLOB(clob0)			\
+	IS_RREG_CLOB(clob1)
+#define RSM_PSR_IC(clob)			\
+	IS_RREG_CLOB(clob)
+#define SSM_PSR_I(pred, pred_clob, clob)	\
+	IS_PRED_IN(pred)			\
+	IS_PRED_CLOB(pred_clob)			\
+	IS_RREG_CLOB(clob)
+#define RSM_PSR_I(pred, clob0, clob1)		\
+	IS_PRED_IN(pred)			\
+	IS_RREG_CLOB(clob0)			\
+	IS_RREG_CLOB(clob1)
+#define RSM_PSR_I_IC(clob0, clob1, clob2)	\
+	IS_RREG_CLOB(clob0)			\
+	IS_RREG_CLOB(clob1)			\
+	IS_RREG_CLOB(clob2)
+#define RSM_PSR_DT				\
+	nop 0
+#define RSM_PSR_BE_I(clob0, clob1)		\
+	IS_RREG_CLOB(clob0)			\
+	IS_RREG_CLOB(clob1)
+#define SSM_PSR_DT_AND_SRLZ_I			\
+	nop 0
+#define BSW_0(clob0, clob1, clob2)		\
+	IS_RREG_CLOB(clob0)			\
+	IS_RREG_CLOB(clob1)			\
+	IS_RREG_CLOB(clob2)
+#define BSW_1(clob0, clob1)			\
+	IS_RREG_CLOB(clob0)			\
+	IS_RREG_CLOB(clob1)
+#define COVER					\
+	nop 0
+#define RFI					\
+	br.ret.sptk.many rp /* defining nop causes dependency error */
+
+#endif /* _ASM_NATIVE_PVCHK_INST_H */
diff --git a/arch/ia64/include/asm/nodedata.h b/arch/ia64/include/asm/nodedata.h
new file mode 100644
index 00000000000..2fb337b0e9b
--- /dev/null
+++ b/arch/ia64/include/asm/nodedata.h
@@ -0,0 +1,63 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All rights reserved.
+ * Copyright (c) 2002 NEC Corp.
+ * Copyright (c) 2002 Erich Focht <efocht@ess.nec.de>
+ * Copyright (c) 2002 Kimio Suganuma <k-suganuma@da.jp.nec.com>
+ */
+#ifndef _ASM_IA64_NODEDATA_H
+#define _ASM_IA64_NODEDATA_H
+
+#include <linux/numa.h>
+
+#include <asm/percpu.h>
+#include <asm/mmzone.h>
+
+#ifdef CONFIG_NUMA
+
+/*
+ * Node Data. One of these structures is located on each node of a NUMA system.
+ */
+
+struct pglist_data;
+struct ia64_node_data {
+	short			active_cpu_count;
+	short			node;
+	struct pglist_data	*pg_data_ptrs[MAX_NUMNODES];
+};
+
+
+/*
+ * Return a pointer to the node_data structure for the executing cpu.
+ */
+#define local_node_data		(local_cpu_data->node_data)
+
+/*
+ * Given a node id, return a pointer to the pg_data_t for the node.
+ *
+ * NODE_DATA 	- should be used in all code not related to system
+ *		  initialization. It uses pernode data structures to minimize
+ *		  offnode memory references. However, these structure are not 
+ *		  present during boot. This macro can be used once cpu_init
+ *		  completes.
+ */
+#define NODE_DATA(nid)		(local_node_data->pg_data_ptrs[nid])
+
+/*
+ * LOCAL_DATA_ADDR - This is to calculate the address of other node's
+ *		     "local_node_data" at hot-plug phase. The local_node_data
+ *		     is pointed by per_cpu_page. Kernel usually use it for
+ *		     just executing cpu. However, when new node is hot-added,
+ *		     the addresses of local data for other nodes are necessary
+ *		     to update all of them.
+ */
+#define LOCAL_DATA_ADDR(pgdat)  			\
+	((struct ia64_node_data *)((u64)(pgdat) + 	\
+				   L1_CACHE_ALIGN(sizeof(struct pglist_data))))
+
+#endif /* CONFIG_NUMA */
+
+#endif /* _ASM_IA64_NODEDATA_H */
diff --git a/arch/ia64/include/asm/numa.h b/arch/ia64/include/asm/numa.h
new file mode 100644
index 00000000000..2db0a6c6daa
--- /dev/null
+++ b/arch/ia64/include/asm/numa.h
@@ -0,0 +1,79 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * This file contains NUMA specific prototypes and definitions.
+ *
+ * 2002/08/05 Erich Focht <efocht@ess.nec.de>
+ *
+ */
+#ifndef _ASM_IA64_NUMA_H
+#define _ASM_IA64_NUMA_H
+
+
+#ifdef CONFIG_NUMA
+
+#include <linux/cache.h>
+#include <linux/cpumask.h>
+#include <linux/numa.h>
+#include <linux/smp.h>
+#include <linux/threads.h>
+
+#include <asm/mmzone.h>
+
+extern u16 cpu_to_node_map[NR_CPUS] __cacheline_aligned;
+extern cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned;
+extern pg_data_t *pgdat_list[MAX_NUMNODES];
+
+/* Stuff below this line could be architecture independent */
+
+extern int num_node_memblks;		/* total number of memory chunks */
+
+/*
+ * List of node memory chunks. Filled when parsing SRAT table to
+ * obtain information about memory nodes.
+*/
+
+struct node_memblk_s {
+	unsigned long start_paddr;
+	unsigned long size;
+	int nid;		/* which logical node contains this chunk? */
+	int bank;		/* which mem bank on this node */
+};
+
+struct node_cpuid_s {
+	u16	phys_id;	/* id << 8 | eid */
+	int	nid;		/* logical node containing this CPU */
+};
+
+extern struct node_memblk_s node_memblk[NR_NODE_MEMBLKS];
+extern struct node_cpuid_s node_cpuid[NR_CPUS];
+
+/*
+ * ACPI 2.0 SLIT (System Locality Information Table)
+ * http://devresource.hp.com/devresource/Docs/TechPapers/IA64/slit.pdf
+ *
+ * This is a matrix with "distances" between nodes, they should be
+ * proportional to the memory access latency ratios.
+ */
+
+extern u8 numa_slit[MAX_NUMNODES * MAX_NUMNODES];
+#define node_distance(from,to) (numa_slit[(from) * MAX_NUMNODES + (to)])
+
+extern int paddr_to_nid(unsigned long paddr);
+
+#define local_nodeid (cpu_to_node_map[smp_processor_id()])
+
+extern void map_cpu_to_node(int cpu, int nid);
+extern void unmap_cpu_from_node(int cpu, int nid);
+extern void numa_clear_node(int cpu);
+
+#else /* !CONFIG_NUMA */
+#define map_cpu_to_node(cpu, nid)	do{}while(0)
+#define unmap_cpu_from_node(cpu, nid)	do{}while(0)
+#define paddr_to_nid(addr)	0
+#define numa_clear_node(cpu)	do { } while (0)
+#endif /* CONFIG_NUMA */
+
+#endif /* _ASM_IA64_NUMA_H */
diff --git a/arch/ia64/include/asm/page.h b/arch/ia64/include/asm/page.h
new file mode 100644
index 00000000000..f1e1b2e3cdb
--- /dev/null
+++ b/arch/ia64/include/asm/page.h
@@ -0,0 +1,234 @@
+#ifndef _ASM_IA64_PAGE_H
+#define _ASM_IA64_PAGE_H
+/*
+ * Pagetable related stuff.
+ *
+ * Copyright (C) 1998, 1999, 2002 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+#include <asm/intrinsics.h>
+#include <asm/types.h>
+
+/*
+ * The top three bits of an IA64 address are its Region Number.
+ * Different regions are assigned to different purposes.
+ */
+#define RGN_SHIFT	(61)
+#define RGN_BASE(r)	(__IA64_UL_CONST(r)<<RGN_SHIFT)
+#define RGN_BITS	(RGN_BASE(-1))
+
+#define RGN_KERNEL	7	/* Identity mapped region */
+#define RGN_UNCACHED    6	/* Identity mapped I/O region */
+#define RGN_GATE	5	/* Gate page, Kernel text, etc */
+#define RGN_HPAGE	4	/* For Huge TLB pages */
+
+/*
+ * PAGE_SHIFT determines the actual kernel page size.
+ */
+#if defined(CONFIG_IA64_PAGE_SIZE_4KB)
+# define PAGE_SHIFT	12
+#elif defined(CONFIG_IA64_PAGE_SIZE_8KB)
+# define PAGE_SHIFT	13
+#elif defined(CONFIG_IA64_PAGE_SIZE_16KB)
+# define PAGE_SHIFT	14
+#elif defined(CONFIG_IA64_PAGE_SIZE_64KB)
+# define PAGE_SHIFT	16
+#else
+# error Unsupported page size!
+#endif
+
+#define PAGE_SIZE		(__IA64_UL_CONST(1) << PAGE_SHIFT)
+#define PAGE_MASK		(~(PAGE_SIZE - 1))
+
+#define PERCPU_PAGE_SHIFT	18	/* log2() of max. size of per-CPU area */
+#define PERCPU_PAGE_SIZE	(__IA64_UL_CONST(1) << PERCPU_PAGE_SHIFT)
+
+
+#ifdef CONFIG_HUGETLB_PAGE
+# define HPAGE_REGION_BASE	RGN_BASE(RGN_HPAGE)
+# define HPAGE_SHIFT		hpage_shift
+# define HPAGE_SHIFT_DEFAULT	28	/* check ia64 SDM for architecture supported size */
+# define HPAGE_SIZE		(__IA64_UL_CONST(1) << HPAGE_SHIFT)
+# define HPAGE_MASK		(~(HPAGE_SIZE - 1))
+
+# define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
+#endif /* CONFIG_HUGETLB_PAGE */
+
+#ifdef __ASSEMBLY__
+# define __pa(x)		((x) - PAGE_OFFSET)
+# define __va(x)		((x) + PAGE_OFFSET)
+#else /* !__ASSEMBLY */
+#  define STRICT_MM_TYPECHECKS
+
+extern void clear_page (void *page);
+extern void copy_page (void *to, void *from);
+
+/*
+ * clear_user_page() and copy_user_page() can't be inline functions because
+ * flush_dcache_page() can't be defined until later...
+ */
+#define clear_user_page(addr, vaddr, page)	\
+do {						\
+	clear_page(addr);			\
+	flush_dcache_page(page);		\
+} while (0)
+
+#define copy_user_page(to, from, vaddr, page)	\
+do {						\
+	copy_page((to), (from));		\
+	flush_dcache_page(page);		\
+} while (0)
+
+
+#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr)		\
+({									\
+	struct page *page = alloc_page_vma(				\
+		GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr);	\
+	if (page)							\
+ 		flush_dcache_page(page);				\
+	page;								\
+})
+
+#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
+
+#define virt_addr_valid(kaddr)	pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
+
+#ifdef CONFIG_VIRTUAL_MEM_MAP
+extern int ia64_pfn_valid (unsigned long pfn);
+#else
+# define ia64_pfn_valid(pfn) 1
+#endif
+
+#ifdef CONFIG_VIRTUAL_MEM_MAP
+extern struct page *vmem_map;
+#ifdef CONFIG_DISCONTIGMEM
+# define page_to_pfn(page)	((unsigned long) (page - vmem_map))
+# define pfn_to_page(pfn)	(vmem_map + (pfn))
+#else
+# include <asm-generic/memory_model.h>
+#endif
+#else
+# include <asm-generic/memory_model.h>
+#endif
+
+#ifdef CONFIG_FLATMEM
+# define pfn_valid(pfn)		(((pfn) < max_mapnr) && ia64_pfn_valid(pfn))
+#elif defined(CONFIG_DISCONTIGMEM)
+extern unsigned long min_low_pfn;
+extern unsigned long max_low_pfn;
+# define pfn_valid(pfn)		(((pfn) >= min_low_pfn) && ((pfn) < max_low_pfn) && ia64_pfn_valid(pfn))
+#endif
+
+#define page_to_phys(page)	(page_to_pfn(page) << PAGE_SHIFT)
+#define virt_to_page(kaddr)	pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
+#define pfn_to_kaddr(pfn)	__va((pfn) << PAGE_SHIFT)
+
+typedef union ia64_va {
+	struct {
+		unsigned long off : 61;		/* intra-region offset */
+		unsigned long reg :  3;		/* region number */
+	} f;
+	unsigned long l;
+	void *p;
+} ia64_va;
+
+/*
+ * Note: These macros depend on the fact that PAGE_OFFSET has all
+ * region bits set to 1 and all other bits set to zero.  They are
+ * expressed in this way to ensure they result in a single "dep"
+ * instruction.
+ */
+#define __pa(x)		({ia64_va _v; _v.l = (long) (x); _v.f.reg = 0; _v.l;})
+#define __va(x)		({ia64_va _v; _v.l = (long) (x); _v.f.reg = -1; _v.p;})
+
+#define REGION_NUMBER(x)	({ia64_va _v; _v.l = (long) (x); _v.f.reg;})
+#define REGION_OFFSET(x)	({ia64_va _v; _v.l = (long) (x); _v.f.off;})
+
+#ifdef CONFIG_HUGETLB_PAGE
+# define htlbpage_to_page(x)	(((unsigned long) REGION_NUMBER(x) << 61)			\
+				 | (REGION_OFFSET(x) >> (HPAGE_SHIFT-PAGE_SHIFT)))
+# define HUGETLB_PAGE_ORDER	(HPAGE_SHIFT - PAGE_SHIFT)
+extern unsigned int hpage_shift;
+#endif
+
+static __inline__ int
+get_order (unsigned long size)
+{
+	long double d = size - 1;
+	long order;
+
+	order = ia64_getf_exp(d);
+	order = order - PAGE_SHIFT - 0xffff + 1;
+	if (order < 0)
+		order = 0;
+	return order;
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#ifdef STRICT_MM_TYPECHECKS
+  /*
+   * These are used to make use of C type-checking..
+   */
+  typedef struct { unsigned long pte; } pte_t;
+  typedef struct { unsigned long pmd; } pmd_t;
+#ifdef CONFIG_PGTABLE_4
+  typedef struct { unsigned long pud; } pud_t;
+#endif
+  typedef struct { unsigned long pgd; } pgd_t;
+  typedef struct { unsigned long pgprot; } pgprot_t;
+  typedef struct page *pgtable_t;
+
+# define pte_val(x)	((x).pte)
+# define pmd_val(x)	((x).pmd)
+#ifdef CONFIG_PGTABLE_4
+# define pud_val(x)	((x).pud)
+#endif
+# define pgd_val(x)	((x).pgd)
+# define pgprot_val(x)	((x).pgprot)
+
+# define __pte(x)	((pte_t) { (x) } )
+# define __pmd(x)	((pmd_t) { (x) } )
+# define __pgprot(x)	((pgprot_t) { (x) } )
+
+#else /* !STRICT_MM_TYPECHECKS */
+  /*
+   * .. while these make it easier on the compiler
+   */
+# ifndef __ASSEMBLY__
+    typedef unsigned long pte_t;
+    typedef unsigned long pmd_t;
+    typedef unsigned long pgd_t;
+    typedef unsigned long pgprot_t;
+    typedef struct page *pgtable_t;
+# endif
+
+# define pte_val(x)	(x)
+# define pmd_val(x)	(x)
+# define pgd_val(x)	(x)
+# define pgprot_val(x)	(x)
+
+# define __pte(x)	(x)
+# define __pgd(x)	(x)
+# define __pgprot(x)	(x)
+#endif /* !STRICT_MM_TYPECHECKS */
+
+#define PAGE_OFFSET			RGN_BASE(RGN_KERNEL)
+
+#define VM_DATA_DEFAULT_FLAGS		(VM_READ | VM_WRITE |					\
+					 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC |		\
+					 (((current->personality & READ_IMPLIES_EXEC) != 0)	\
+					  ? VM_EXEC : 0))
+
+#define GATE_ADDR		RGN_BASE(RGN_GATE)
+
+/*
+ * 0xa000000000000000+2*PERCPU_PAGE_SIZE
+ * - 0xa000000000000000+3*PERCPU_PAGE_SIZE remain unmapped (guard page)
+ */
+#define KERNEL_START		 (GATE_ADDR+__IA64_UL_CONST(0x100000000))
+#define PERCPU_ADDR		(-PERCPU_PAGE_SIZE)
+#define LOAD_OFFSET		(KERNEL_START - KERNEL_TR_PAGE_SIZE)
+
+#endif /* _ASM_IA64_PAGE_H */
diff --git a/arch/ia64/include/asm/pal.h b/arch/ia64/include/asm/pal.h
new file mode 100644
index 00000000000..2e69284df8e
--- /dev/null
+++ b/arch/ia64/include/asm/pal.h
@@ -0,0 +1,1825 @@
+#ifndef _ASM_IA64_PAL_H
+#define _ASM_IA64_PAL_H
+
+/*
+ * Processor Abstraction Layer definitions.
+ *
+ * This is based on Intel IA-64 Architecture Software Developer's Manual rev 1.0
+ * chapter 11 IA-64 Processor Abstraction Layer
+ *
+ * Copyright (C) 1998-2001 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *	Stephane Eranian <eranian@hpl.hp.com>
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ * Copyright (C) 1999 Srinivasa Prasad Thirumalachar <sprasad@sprasad.engr.sgi.com>
+ * Copyright (C) 2008 Silicon Graphics, Inc. (SGI)
+ *
+ * 99/10/01	davidm	Make sure we pass zero for reserved parameters.
+ * 00/03/07	davidm	Updated pal_cache_flush() to be in sync with PAL v2.6.
+ * 00/03/23     cfleck  Modified processor min-state save area to match updated PAL & SAL info
+ * 00/05/24     eranian Updated to latest PAL spec, fix structures bugs, added
+ * 00/05/25	eranian Support for stack calls, and static physical calls
+ * 00/06/18	eranian Support for stacked physical calls
+ * 06/10/26	rja	Support for Intel Itanium Architecture Software Developer's
+ *			Manual Rev 2.2 (Jan 2006)
+ */
+
+/*
+ * Note that some of these calls use a static-register only calling
+ * convention which has nothing to do with the regular calling
+ * convention.
+ */
+#define PAL_CACHE_FLUSH		1	/* flush i/d cache */
+#define PAL_CACHE_INFO		2	/* get detailed i/d cache info */
+#define PAL_CACHE_INIT		3	/* initialize i/d cache */
+#define PAL_CACHE_SUMMARY	4	/* get summary of cache hierarchy */
+#define PAL_MEM_ATTRIB		5	/* list supported memory attributes */
+#define PAL_PTCE_INFO		6	/* purge TLB info */
+#define PAL_VM_INFO		7	/* return supported virtual memory features */
+#define PAL_VM_SUMMARY		8	/* return summary on supported vm features */
+#define PAL_BUS_GET_FEATURES	9	/* return processor bus interface features settings */
+#define PAL_BUS_SET_FEATURES	10	/* set processor bus features */
+#define PAL_DEBUG_INFO		11	/* get number of debug registers */
+#define PAL_FIXED_ADDR		12	/* get fixed component of processors's directed address */
+#define PAL_FREQ_BASE		13	/* base frequency of the platform */
+#define PAL_FREQ_RATIOS		14	/* ratio of processor, bus and ITC frequency */
+#define PAL_PERF_MON_INFO	15	/* return performance monitor info */
+#define PAL_PLATFORM_ADDR	16	/* set processor interrupt block and IO port space addr */
+#define PAL_PROC_GET_FEATURES	17	/* get configurable processor features & settings */
+#define PAL_PROC_SET_FEATURES	18	/* enable/disable configurable processor features */
+#define PAL_RSE_INFO		19	/* return rse information */
+#define PAL_VERSION		20	/* return version of PAL code */
+#define PAL_MC_CLEAR_LOG	21	/* clear all processor log info */
+#define PAL_MC_DRAIN		22	/* drain operations which could result in an MCA */
+#define PAL_MC_EXPECTED		23	/* set/reset expected MCA indicator */
+#define PAL_MC_DYNAMIC_STATE	24	/* get processor dynamic state */
+#define PAL_MC_ERROR_INFO	25	/* get processor MCA info and static state */
+#define PAL_MC_RESUME		26	/* Return to interrupted process */
+#define PAL_MC_REGISTER_MEM	27	/* Register memory for PAL to use during MCAs and inits */
+#define PAL_HALT		28	/* enter the low power HALT state */
+#define PAL_HALT_LIGHT		29	/* enter the low power light halt state*/
+#define PAL_COPY_INFO		30	/* returns info needed to relocate PAL */
+#define PAL_CACHE_LINE_INIT	31	/* init tags & data of cache line */
+#define PAL_PMI_ENTRYPOINT	32	/* register PMI memory entry points with the processor */
+#define PAL_ENTER_IA_32_ENV	33	/* enter IA-32 system environment */
+#define PAL_VM_PAGE_SIZE	34	/* return vm TC and page walker page sizes */
+
+#define PAL_MEM_FOR_TEST	37	/* get amount of memory needed for late processor test */
+#define PAL_CACHE_PROT_INFO	38	/* get i/d cache protection info */
+#define PAL_REGISTER_INFO	39	/* return AR and CR register information*/
+#define PAL_SHUTDOWN		40	/* enter processor shutdown state */
+#define PAL_PREFETCH_VISIBILITY	41	/* Make Processor Prefetches Visible */
+#define PAL_LOGICAL_TO_PHYSICAL 42	/* returns information on logical to physical processor mapping */
+#define PAL_CACHE_SHARED_INFO	43	/* returns information on caches shared by logical processor */
+#define PAL_GET_HW_POLICY	48	/* Get current hardware resource sharing policy */
+#define PAL_SET_HW_POLICY	49	/* Set current hardware resource sharing policy */
+#define PAL_VP_INFO		50	/* Information about virtual processor features */
+#define PAL_MC_HW_TRACKING	51	/* Hardware tracking status */
+
+#define PAL_COPY_PAL		256	/* relocate PAL procedures and PAL PMI */
+#define PAL_HALT_INFO		257	/* return the low power capabilities of processor */
+#define PAL_TEST_PROC		258	/* perform late processor self-test */
+#define PAL_CACHE_READ		259	/* read tag & data of cacheline for diagnostic testing */
+#define PAL_CACHE_WRITE		260	/* write tag & data of cacheline for diagnostic testing */
+#define PAL_VM_TR_READ		261	/* read contents of translation register */
+#define PAL_GET_PSTATE		262	/* get the current P-state */
+#define PAL_SET_PSTATE		263	/* set the P-state */
+#define PAL_BRAND_INFO		274	/* Processor branding information */
+
+#define PAL_GET_PSTATE_TYPE_LASTSET	0
+#define PAL_GET_PSTATE_TYPE_AVGANDRESET	1
+#define PAL_GET_PSTATE_TYPE_AVGNORESET	2
+#define PAL_GET_PSTATE_TYPE_INSTANT	3
+
+#define PAL_MC_ERROR_INJECT	276	/* Injects processor error or returns injection capabilities */
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+#include <asm/fpu.h>
+
+/*
+ * Data types needed to pass information into PAL procedures and
+ * interpret information returned by them.
+ */
+
+/* Return status from the PAL procedure */
+typedef s64				pal_status_t;
+
+#define PAL_STATUS_SUCCESS		0	/* No error */
+#define PAL_STATUS_UNIMPLEMENTED	(-1)	/* Unimplemented procedure */
+#define PAL_STATUS_EINVAL		(-2)	/* Invalid argument */
+#define PAL_STATUS_ERROR		(-3)	/* Error */
+#define PAL_STATUS_CACHE_INIT_FAIL	(-4)	/* Could not initialize the
+						 * specified level and type of
+						 * cache without sideeffects
+						 * and "restrict" was 1
+						 */
+#define PAL_STATUS_REQUIRES_MEMORY	(-9)	/* Call requires PAL memory buffer */
+
+/* Processor cache level in the hierarchy */
+typedef u64				pal_cache_level_t;
+#define PAL_CACHE_LEVEL_L0		0	/* L0 */
+#define PAL_CACHE_LEVEL_L1		1	/* L1 */
+#define PAL_CACHE_LEVEL_L2		2	/* L2 */
+
+
+/* Processor cache type at a particular level in the hierarchy */
+
+typedef u64				pal_cache_type_t;
+#define PAL_CACHE_TYPE_INSTRUCTION	1	/* Instruction cache */
+#define PAL_CACHE_TYPE_DATA		2	/* Data or unified cache */
+#define PAL_CACHE_TYPE_INSTRUCTION_DATA	3	/* Both Data & Instruction */
+
+
+#define PAL_CACHE_FLUSH_INVALIDATE	1	/* Invalidate clean lines */
+#define PAL_CACHE_FLUSH_CHK_INTRS	2	/* check for interrupts/mc while flushing */
+
+/* Processor cache line size in bytes  */
+typedef int				pal_cache_line_size_t;
+
+/* Processor cache line state */
+typedef u64				pal_cache_line_state_t;
+#define PAL_CACHE_LINE_STATE_INVALID	0	/* Invalid */
+#define PAL_CACHE_LINE_STATE_SHARED	1	/* Shared */
+#define PAL_CACHE_LINE_STATE_EXCLUSIVE	2	/* Exclusive */
+#define PAL_CACHE_LINE_STATE_MODIFIED	3	/* Modified */
+
+typedef struct pal_freq_ratio {
+	u32 den, num;		/* numerator & denominator */
+} itc_ratio, proc_ratio;
+
+typedef	union  pal_cache_config_info_1_s {
+	struct {
+		u64		u		: 1,	/* 0 Unified cache ? */
+				at		: 2,	/* 2-1 Cache mem attr*/
+				reserved	: 5,	/* 7-3 Reserved */
+				associativity	: 8,	/* 16-8 Associativity*/
+				line_size	: 8,	/* 23-17 Line size */
+				stride		: 8,	/* 31-24 Stride */
+				store_latency	: 8,	/*39-32 Store latency*/
+				load_latency	: 8,	/* 47-40 Load latency*/
+				store_hints	: 8,	/* 55-48 Store hints*/
+				load_hints	: 8;	/* 63-56 Load hints */
+	} pcci1_bits;
+	u64			pcci1_data;
+} pal_cache_config_info_1_t;
+
+typedef	union  pal_cache_config_info_2_s {
+	struct {
+		u32		cache_size;		/*cache size in bytes*/
+
+
+		u32		alias_boundary	: 8,	/* 39-32 aliased addr
+							 * separation for max
+							 * performance.
+							 */
+				tag_ls_bit	: 8,	/* 47-40 LSb of addr*/
+				tag_ms_bit	: 8,	/* 55-48 MSb of addr*/
+				reserved	: 8;	/* 63-56 Reserved */
+	} pcci2_bits;
+	u64			pcci2_data;
+} pal_cache_config_info_2_t;
+
+
+typedef struct pal_cache_config_info_s {
+	pal_status_t			pcci_status;
+	pal_cache_config_info_1_t	pcci_info_1;
+	pal_cache_config_info_2_t	pcci_info_2;
+	u64				pcci_reserved;
+} pal_cache_config_info_t;
+
+#define pcci_ld_hints		pcci_info_1.pcci1_bits.load_hints
+#define pcci_st_hints		pcci_info_1.pcci1_bits.store_hints
+#define pcci_ld_latency		pcci_info_1.pcci1_bits.load_latency
+#define pcci_st_latency		pcci_info_1.pcci1_bits.store_latency
+#define pcci_stride		pcci_info_1.pcci1_bits.stride
+#define pcci_line_size		pcci_info_1.pcci1_bits.line_size
+#define pcci_assoc		pcci_info_1.pcci1_bits.associativity
+#define pcci_cache_attr		pcci_info_1.pcci1_bits.at
+#define pcci_unified		pcci_info_1.pcci1_bits.u
+#define pcci_tag_msb		pcci_info_2.pcci2_bits.tag_ms_bit
+#define pcci_tag_lsb		pcci_info_2.pcci2_bits.tag_ls_bit
+#define pcci_alias_boundary	pcci_info_2.pcci2_bits.alias_boundary
+#define pcci_cache_size		pcci_info_2.pcci2_bits.cache_size
+
+
+
+/* Possible values for cache attributes */
+
+#define PAL_CACHE_ATTR_WT		0	/* Write through cache */
+#define PAL_CACHE_ATTR_WB		1	/* Write back cache */
+#define PAL_CACHE_ATTR_WT_OR_WB		2	/* Either write thru or write
+						 * back depending on TLB
+						 * memory attributes
+						 */
+
+
+/* Possible values for cache hints */
+
+#define PAL_CACHE_HINT_TEMP_1		0	/* Temporal level 1 */
+#define PAL_CACHE_HINT_NTEMP_1		1	/* Non-temporal level 1 */
+#define PAL_CACHE_HINT_NTEMP_ALL	3	/* Non-temporal all levels */
+
+/* Processor cache protection  information */
+typedef union pal_cache_protection_element_u {
+	u32			pcpi_data;
+	struct {
+		u32		data_bits	: 8, /* # data bits covered by
+						      * each unit of protection
+						      */
+
+				tagprot_lsb	: 6, /* Least -do- */
+				tagprot_msb	: 6, /* Most Sig. tag address
+						      * bit that this
+						      * protection covers.
+						      */
+				prot_bits	: 6, /* # of protection bits */
+				method		: 4, /* Protection method */
+				t_d		: 2; /* Indicates which part
+						      * of the cache this
+						      * protection encoding
+						      * applies.
+						      */
+	} pcp_info;
+} pal_cache_protection_element_t;
+
+#define pcpi_cache_prot_part	pcp_info.t_d
+#define pcpi_prot_method	pcp_info.method
+#define pcpi_prot_bits		pcp_info.prot_bits
+#define pcpi_tagprot_msb	pcp_info.tagprot_msb
+#define pcpi_tagprot_lsb	pcp_info.tagprot_lsb
+#define pcpi_data_bits		pcp_info.data_bits
+
+/* Processor cache part encodings */
+#define PAL_CACHE_PROT_PART_DATA	0	/* Data protection  */
+#define PAL_CACHE_PROT_PART_TAG		1	/* Tag  protection */
+#define PAL_CACHE_PROT_PART_TAG_DATA	2	/* Tag+data protection (tag is
+						 * more significant )
+						 */
+#define PAL_CACHE_PROT_PART_DATA_TAG	3	/* Data+tag protection (data is
+						 * more significant )
+						 */
+#define PAL_CACHE_PROT_PART_MAX		6
+
+
+typedef struct pal_cache_protection_info_s {
+	pal_status_t			pcpi_status;
+	pal_cache_protection_element_t	pcp_info[PAL_CACHE_PROT_PART_MAX];
+} pal_cache_protection_info_t;
+
+
+/* Processor cache protection method encodings */
+#define PAL_CACHE_PROT_METHOD_NONE		0	/* No protection */
+#define PAL_CACHE_PROT_METHOD_ODD_PARITY	1	/* Odd parity */
+#define PAL_CACHE_PROT_METHOD_EVEN_PARITY	2	/* Even parity */
+#define PAL_CACHE_PROT_METHOD_ECC		3	/* ECC protection */
+
+
+/* Processor cache line identification in the hierarchy */
+typedef union pal_cache_line_id_u {
+	u64			pclid_data;
+	struct {
+		u64		cache_type	: 8,	/* 7-0 cache type */
+				level		: 8,	/* 15-8 level of the
+							 * cache in the
+							 * hierarchy.
+							 */
+				way		: 8,	/* 23-16 way in the set
+							 */
+				part		: 8,	/* 31-24 part of the
+							 * cache
+							 */
+				reserved	: 32;	/* 63-32 is reserved*/
+	} pclid_info_read;
+	struct {
+		u64		cache_type	: 8,	/* 7-0 cache type */
+				level		: 8,	/* 15-8 level of the
+							 * cache in the
+							 * hierarchy.
+							 */
+				way		: 8,	/* 23-16 way in the set
+							 */
+				part		: 8,	/* 31-24 part of the
+							 * cache
+							 */
+				mesi		: 8,	/* 39-32 cache line
+							 * state
+							 */
+				start		: 8,	/* 47-40 lsb of data to
+							 * invert
+							 */
+				length		: 8,	/* 55-48 #bits to
+							 * invert
+							 */
+				trigger		: 8;	/* 63-56 Trigger error
+							 * by doing a load
+							 * after the write
+							 */
+
+	} pclid_info_write;
+} pal_cache_line_id_u_t;
+
+#define pclid_read_part		pclid_info_read.part
+#define pclid_read_way		pclid_info_read.way
+#define pclid_read_level	pclid_info_read.level
+#define pclid_read_cache_type	pclid_info_read.cache_type
+
+#define pclid_write_trigger	pclid_info_write.trigger
+#define pclid_write_length	pclid_info_write.length
+#define pclid_write_start	pclid_info_write.start
+#define pclid_write_mesi	pclid_info_write.mesi
+#define pclid_write_part	pclid_info_write.part
+#define pclid_write_way		pclid_info_write.way
+#define pclid_write_level	pclid_info_write.level
+#define pclid_write_cache_type	pclid_info_write.cache_type
+
+/* Processor cache line part encodings */
+#define PAL_CACHE_LINE_ID_PART_DATA		0	/* Data */
+#define PAL_CACHE_LINE_ID_PART_TAG		1	/* Tag */
+#define PAL_CACHE_LINE_ID_PART_DATA_PROT	2	/* Data protection */
+#define PAL_CACHE_LINE_ID_PART_TAG_PROT		3	/* Tag protection */
+#define PAL_CACHE_LINE_ID_PART_DATA_TAG_PROT	4	/* Data+tag
+							 * protection
+							 */
+typedef struct pal_cache_line_info_s {
+	pal_status_t		pcli_status;		/* Return status of the read cache line
+							 * info call.
+							 */
+	u64			pcli_data;		/* 64-bit data, tag, protection bits .. */
+	u64			pcli_data_len;		/* data length in bits */
+	pal_cache_line_state_t	pcli_cache_line_state;	/* mesi state */
+
+} pal_cache_line_info_t;
+
+
+/* Machine Check related crap */
+
+/* Pending event status bits  */
+typedef u64					pal_mc_pending_events_t;
+
+#define PAL_MC_PENDING_MCA			(1 << 0)
+#define PAL_MC_PENDING_INIT			(1 << 1)
+
+/* Error information type */
+typedef u64					pal_mc_info_index_t;
+
+#define PAL_MC_INFO_PROCESSOR			0	/* Processor */
+#define PAL_MC_INFO_CACHE_CHECK			1	/* Cache check */
+#define PAL_MC_INFO_TLB_CHECK			2	/* Tlb check */
+#define PAL_MC_INFO_BUS_CHECK			3	/* Bus check */
+#define PAL_MC_INFO_REQ_ADDR			4	/* Requestor address */
+#define PAL_MC_INFO_RESP_ADDR			5	/* Responder address */
+#define PAL_MC_INFO_TARGET_ADDR			6	/* Target address */
+#define PAL_MC_INFO_IMPL_DEP			7	/* Implementation
+							 * dependent
+							 */
+
+#define PAL_TLB_CHECK_OP_PURGE			8
+
+typedef struct pal_process_state_info_s {
+	u64		reserved1	: 2,
+			rz		: 1,	/* PAL_CHECK processor
+						 * rendezvous
+						 * successful.
+						 */
+
+			ra		: 1,	/* PAL_CHECK attempted
+						 * a rendezvous.
+						 */
+			me		: 1,	/* Distinct multiple
+						 * errors occurred
+						 */
+
+			mn		: 1,	/* Min. state save
+						 * area has been
+						 * registered with PAL
+						 */
+
+			sy		: 1,	/* Storage integrity
+						 * synched
+						 */
+
+
+			co		: 1,	/* Continuable */
+			ci		: 1,	/* MC isolated */
+			us		: 1,	/* Uncontained storage
+						 * damage.
+						 */
+
+
+			hd		: 1,	/* Non-essential hw
+						 * lost (no loss of
+						 * functionality)
+						 * causing the
+						 * processor to run in
+						 * degraded mode.
+						 */
+
+			tl		: 1,	/* 1 => MC occurred
+						 * after an instr was
+						 * executed but before
+						 * the trap that
+						 * resulted from instr
+						 * execution was
+						 * generated.
+						 * (Trap Lost )
+						 */
+			mi		: 1,	/* More information available
+						 * call PAL_MC_ERROR_INFO
+						 */
+			pi		: 1,	/* Precise instruction pointer */
+			pm		: 1,	/* Precise min-state save area */
+
+			dy		: 1,	/* Processor dynamic
+						 * state valid
+						 */
+
+
+			in		: 1,	/* 0 = MC, 1 = INIT */
+			rs		: 1,	/* RSE valid */
+			cm		: 1,	/* MC corrected */
+			ex		: 1,	/* MC is expected */
+			cr		: 1,	/* Control regs valid*/
+			pc		: 1,	/* Perf cntrs valid */
+			dr		: 1,	/* Debug regs valid */
+			tr		: 1,	/* Translation regs
+						 * valid
+						 */
+			rr		: 1,	/* Region regs valid */
+			ar		: 1,	/* App regs valid */
+			br		: 1,	/* Branch regs valid */
+			pr		: 1,	/* Predicate registers
+						 * valid
+						 */
+
+			fp		: 1,	/* fp registers valid*/
+			b1		: 1,	/* Preserved bank one
+						 * general registers
+						 * are valid
+						 */
+			b0		: 1,	/* Preserved bank zero
+						 * general registers
+						 * are valid
+						 */
+			gr		: 1,	/* General registers
+						 * are valid
+						 * (excl. banked regs)
+						 */
+			dsize		: 16,	/* size of dynamic
+						 * state returned
+						 * by the processor
+						 */
+
+			se		: 1,	/* Shared error.  MCA in a
+						   shared structure */
+			reserved2	: 10,
+			cc		: 1,	/* Cache check */
+			tc		: 1,	/* TLB check */
+			bc		: 1,	/* Bus check */
+			rc		: 1,	/* Register file check */
+			uc		: 1;	/* Uarch check */
+
+} pal_processor_state_info_t;
+
+typedef struct pal_cache_check_info_s {
+	u64		op		: 4,	/* Type of cache
+						 * operation that
+						 * caused the machine
+						 * check.
+						 */
+			level		: 2,	/* Cache level */
+			reserved1	: 2,
+			dl		: 1,	/* Failure in data part
+						 * of cache line
+						 */
+			tl		: 1,	/* Failure in tag part
+						 * of cache line
+						 */
+			dc		: 1,	/* Failure in dcache */
+			ic		: 1,	/* Failure in icache */
+			mesi		: 3,	/* Cache line state */
+			mv		: 1,	/* mesi valid */
+			way		: 5,	/* Way in which the
+						 * error occurred
+						 */
+			wiv		: 1,	/* Way field valid */
+			reserved2	: 1,
+			dp		: 1,	/* Data poisoned on MBE */
+			reserved3	: 6,
+			hlth		: 2,	/* Health indicator */
+
+			index		: 20,	/* Cache line index */
+			reserved4	: 2,
+
+			is		: 1,	/* instruction set (1 == ia32) */
+			iv		: 1,	/* instruction set field valid */
+			pl		: 2,	/* privilege level */
+			pv		: 1,	/* privilege level field valid */
+			mcc		: 1,	/* Machine check corrected */
+			tv		: 1,	/* Target address
+						 * structure is valid
+						 */
+			rq		: 1,	/* Requester identifier
+						 * structure is valid
+						 */
+			rp		: 1,	/* Responder identifier
+						 * structure is valid
+						 */
+			pi		: 1;	/* Precise instruction pointer
+						 * structure is valid
+						 */
+} pal_cache_check_info_t;
+
+typedef struct pal_tlb_check_info_s {
+
+	u64		tr_slot		: 8,	/* Slot# of TR where
+						 * error occurred
+						 */
+			trv		: 1,	/* tr_slot field is valid */
+			reserved1	: 1,
+			level		: 2,	/* TLB level where failure occurred */
+			reserved2	: 4,
+			dtr		: 1,	/* Fail in data TR */
+			itr		: 1,	/* Fail in inst TR */
+			dtc		: 1,	/* Fail in data TC */
+			itc		: 1,	/* Fail in inst. TC */
+			op		: 4,	/* Cache operation */
+			reserved3	: 6,
+			hlth		: 2,	/* Health indicator */
+			reserved4	: 22,
+
+			is		: 1,	/* instruction set (1 == ia32) */
+			iv		: 1,	/* instruction set field valid */
+			pl		: 2,	/* privilege level */
+			pv		: 1,	/* privilege level field valid */
+			mcc		: 1,	/* Machine check corrected */
+			tv		: 1,	/* Target address
+						 * structure is valid
+						 */
+			rq		: 1,	/* Requester identifier
+						 * structure is valid
+						 */
+			rp		: 1,	/* Responder identifier
+						 * structure is valid
+						 */
+			pi		: 1;	/* Precise instruction pointer
+						 * structure is valid
+						 */
+} pal_tlb_check_info_t;
+
+typedef struct pal_bus_check_info_s {
+	u64		size		: 5,	/* Xaction size */
+			ib		: 1,	/* Internal bus error */
+			eb		: 1,	/* External bus error */
+			cc		: 1,	/* Error occurred
+						 * during cache-cache
+						 * transfer.
+						 */
+			type		: 8,	/* Bus xaction type*/
+			sev		: 5,	/* Bus error severity*/
+			hier		: 2,	/* Bus hierarchy level */
+			dp		: 1,	/* Data poisoned on MBE */
+			bsi		: 8,	/* Bus error status
+						 * info
+						 */
+			reserved2	: 22,
+
+			is		: 1,	/* instruction set (1 == ia32) */
+			iv		: 1,	/* instruction set field valid */
+			pl		: 2,	/* privilege level */
+			pv		: 1,	/* privilege level field valid */
+			mcc		: 1,	/* Machine check corrected */
+			tv		: 1,	/* Target address
+						 * structure is valid
+						 */
+			rq		: 1,	/* Requester identifier
+						 * structure is valid
+						 */
+			rp		: 1,	/* Responder identifier
+						 * structure is valid
+						 */
+			pi		: 1;	/* Precise instruction pointer
+						 * structure is valid
+						 */
+} pal_bus_check_info_t;
+
+typedef struct pal_reg_file_check_info_s {
+	u64		id		: 4,	/* Register file identifier */
+			op		: 4,	/* Type of register
+						 * operation that
+						 * caused the machine
+						 * check.
+						 */
+			reg_num		: 7,	/* Register number */
+			rnv		: 1,	/* reg_num valid */
+			reserved2	: 38,
+
+			is		: 1,	/* instruction set (1 == ia32) */
+			iv		: 1,	/* instruction set field valid */
+			pl		: 2,	/* privilege level */
+			pv		: 1,	/* privilege level field valid */
+			mcc		: 1,	/* Machine check corrected */
+			reserved3	: 3,
+			pi		: 1;	/* Precise instruction pointer
+						 * structure is valid
+						 */
+} pal_reg_file_check_info_t;
+
+typedef struct pal_uarch_check_info_s {
+	u64		sid		: 5,	/* Structure identification */
+			level		: 3,	/* Level of failure */
+			array_id	: 4,	/* Array identification */
+			op		: 4,	/* Type of
+						 * operation that
+						 * caused the machine
+						 * check.
+						 */
+			way		: 6,	/* Way of structure */
+			wv		: 1,	/* way valid */
+			xv		: 1,	/* index valid */
+			reserved1	: 6,
+			hlth		: 2,	/* Health indicator */
+			index		: 8,	/* Index or set of the uarch
+						 * structure that failed.
+						 */
+			reserved2	: 24,
+
+			is		: 1,	/* instruction set (1 == ia32) */
+			iv		: 1,	/* instruction set field valid */
+			pl		: 2,	/* privilege level */
+			pv		: 1,	/* privilege level field valid */
+			mcc		: 1,	/* Machine check corrected */
+			tv		: 1,	/* Target address
+						 * structure is valid
+						 */
+			rq		: 1,	/* Requester identifier
+						 * structure is valid
+						 */
+			rp		: 1,	/* Responder identifier
+						 * structure is valid
+						 */
+			pi		: 1;	/* Precise instruction pointer
+						 * structure is valid
+						 */
+} pal_uarch_check_info_t;
+
+typedef union pal_mc_error_info_u {
+	u64				pmei_data;
+	pal_processor_state_info_t	pme_processor;
+	pal_cache_check_info_t		pme_cache;
+	pal_tlb_check_info_t		pme_tlb;
+	pal_bus_check_info_t		pme_bus;
+	pal_reg_file_check_info_t	pme_reg_file;
+	pal_uarch_check_info_t		pme_uarch;
+} pal_mc_error_info_t;
+
+#define pmci_proc_unknown_check			pme_processor.uc
+#define pmci_proc_bus_check			pme_processor.bc
+#define pmci_proc_tlb_check			pme_processor.tc
+#define pmci_proc_cache_check			pme_processor.cc
+#define pmci_proc_dynamic_state_size		pme_processor.dsize
+#define pmci_proc_gpr_valid			pme_processor.gr
+#define pmci_proc_preserved_bank0_gpr_valid	pme_processor.b0
+#define pmci_proc_preserved_bank1_gpr_valid	pme_processor.b1
+#define pmci_proc_fp_valid			pme_processor.fp
+#define pmci_proc_predicate_regs_valid		pme_processor.pr
+#define pmci_proc_branch_regs_valid		pme_processor.br
+#define pmci_proc_app_regs_valid		pme_processor.ar
+#define pmci_proc_region_regs_valid		pme_processor.rr
+#define pmci_proc_translation_regs_valid	pme_processor.tr
+#define pmci_proc_debug_regs_valid		pme_processor.dr
+#define pmci_proc_perf_counters_valid		pme_processor.pc
+#define pmci_proc_control_regs_valid		pme_processor.cr
+#define pmci_proc_machine_check_expected	pme_processor.ex
+#define pmci_proc_machine_check_corrected	pme_processor.cm
+#define pmci_proc_rse_valid			pme_processor.rs
+#define pmci_proc_machine_check_or_init		pme_processor.in
+#define pmci_proc_dynamic_state_valid		pme_processor.dy
+#define pmci_proc_operation			pme_processor.op
+#define pmci_proc_trap_lost			pme_processor.tl
+#define pmci_proc_hardware_damage		pme_processor.hd
+#define pmci_proc_uncontained_storage_damage	pme_processor.us
+#define pmci_proc_machine_check_isolated	pme_processor.ci
+#define pmci_proc_continuable			pme_processor.co
+#define pmci_proc_storage_intergrity_synced	pme_processor.sy
+#define pmci_proc_min_state_save_area_regd	pme_processor.mn
+#define	pmci_proc_distinct_multiple_errors	pme_processor.me
+#define pmci_proc_pal_attempted_rendezvous	pme_processor.ra
+#define pmci_proc_pal_rendezvous_complete	pme_processor.rz
+
+
+#define pmci_cache_level			pme_cache.level
+#define pmci_cache_line_state			pme_cache.mesi
+#define pmci_cache_line_state_valid		pme_cache.mv
+#define pmci_cache_line_index			pme_cache.index
+#define pmci_cache_instr_cache_fail		pme_cache.ic
+#define pmci_cache_data_cache_fail		pme_cache.dc
+#define pmci_cache_line_tag_fail		pme_cache.tl
+#define pmci_cache_line_data_fail		pme_cache.dl
+#define pmci_cache_operation			pme_cache.op
+#define pmci_cache_way_valid			pme_cache.wv
+#define pmci_cache_target_address_valid		pme_cache.tv
+#define pmci_cache_way				pme_cache.way
+#define pmci_cache_mc				pme_cache.mc
+
+#define pmci_tlb_instr_translation_cache_fail	pme_tlb.itc
+#define pmci_tlb_data_translation_cache_fail	pme_tlb.dtc
+#define pmci_tlb_instr_translation_reg_fail	pme_tlb.itr
+#define pmci_tlb_data_translation_reg_fail	pme_tlb.dtr
+#define pmci_tlb_translation_reg_slot		pme_tlb.tr_slot
+#define pmci_tlb_mc				pme_tlb.mc
+
+#define pmci_bus_status_info			pme_bus.bsi
+#define pmci_bus_req_address_valid		pme_bus.rq
+#define pmci_bus_resp_address_valid		pme_bus.rp
+#define pmci_bus_target_address_valid		pme_bus.tv
+#define pmci_bus_error_severity			pme_bus.sev
+#define pmci_bus_transaction_type		pme_bus.type
+#define pmci_bus_cache_cache_transfer		pme_bus.cc
+#define pmci_bus_transaction_size		pme_bus.size
+#define pmci_bus_internal_error			pme_bus.ib
+#define pmci_bus_external_error			pme_bus.eb
+#define pmci_bus_mc				pme_bus.mc
+
+/*
+ * NOTE: this min_state_save area struct only includes the 1KB
+ * architectural state save area.  The other 3 KB is scratch space
+ * for PAL.
+ */
+
+typedef struct pal_min_state_area_s {
+	u64	pmsa_nat_bits;		/* nat bits for saved GRs  */
+	u64	pmsa_gr[15];		/* GR1	- GR15		   */
+	u64	pmsa_bank0_gr[16];	/* GR16 - GR31		   */
+	u64	pmsa_bank1_gr[16];	/* GR16 - GR31		   */
+	u64	pmsa_pr;		/* predicate registers	   */
+	u64	pmsa_br0;		/* branch register 0	   */
+	u64	pmsa_rsc;		/* ar.rsc		   */
+	u64	pmsa_iip;		/* cr.iip		   */
+	u64	pmsa_ipsr;		/* cr.ipsr		   */
+	u64	pmsa_ifs;		/* cr.ifs		   */
+	u64	pmsa_xip;		/* previous iip		   */
+	u64	pmsa_xpsr;		/* previous psr		   */
+	u64	pmsa_xfs;		/* previous ifs		   */
+	u64	pmsa_br1;		/* branch register 1	   */
+	u64	pmsa_reserved[70];	/* pal_min_state_area should total to 1KB */
+} pal_min_state_area_t;
+
+
+struct ia64_pal_retval {
+	/*
+	 * A zero status value indicates call completed without error.
+	 * A negative status value indicates reason of call failure.
+	 * A positive status value indicates success but an
+	 * informational value should be printed (e.g., "reboot for
+	 * change to take effect").
+	 */
+	s64 status;
+	u64 v0;
+	u64 v1;
+	u64 v2;
+};
+
+/*
+ * Note: Currently unused PAL arguments are generally labeled
+ * "reserved" so the value specified in the PAL documentation
+ * (generally 0) MUST be passed.  Reserved parameters are not optional
+ * parameters.
+ */
+extern struct ia64_pal_retval ia64_pal_call_static (u64, u64, u64, u64);
+extern struct ia64_pal_retval ia64_pal_call_stacked (u64, u64, u64, u64);
+extern struct ia64_pal_retval ia64_pal_call_phys_static (u64, u64, u64, u64);
+extern struct ia64_pal_retval ia64_pal_call_phys_stacked (u64, u64, u64, u64);
+extern void ia64_save_scratch_fpregs (struct ia64_fpreg *);
+extern void ia64_load_scratch_fpregs (struct ia64_fpreg *);
+
+#define PAL_CALL(iprv,a0,a1,a2,a3) do {			\
+	struct ia64_fpreg fr[6];			\
+	ia64_save_scratch_fpregs(fr);			\
+	iprv = ia64_pal_call_static(a0, a1, a2, a3);	\
+	ia64_load_scratch_fpregs(fr);			\
+} while (0)
+
+#define PAL_CALL_STK(iprv,a0,a1,a2,a3) do {		\
+	struct ia64_fpreg fr[6];			\
+	ia64_save_scratch_fpregs(fr);			\
+	iprv = ia64_pal_call_stacked(a0, a1, a2, a3);	\
+	ia64_load_scratch_fpregs(fr);			\
+} while (0)
+
+#define PAL_CALL_PHYS(iprv,a0,a1,a2,a3) do {			\
+	struct ia64_fpreg fr[6];				\
+	ia64_save_scratch_fpregs(fr);				\
+	iprv = ia64_pal_call_phys_static(a0, a1, a2, a3);	\
+	ia64_load_scratch_fpregs(fr);				\
+} while (0)
+
+#define PAL_CALL_PHYS_STK(iprv,a0,a1,a2,a3) do {		\
+	struct ia64_fpreg fr[6];				\
+	ia64_save_scratch_fpregs(fr);				\
+	iprv = ia64_pal_call_phys_stacked(a0, a1, a2, a3);	\
+	ia64_load_scratch_fpregs(fr);				\
+} while (0)
+
+typedef int (*ia64_pal_handler) (u64, ...);
+extern ia64_pal_handler ia64_pal;
+extern void ia64_pal_handler_init (void *);
+
+extern ia64_pal_handler ia64_pal;
+
+extern pal_cache_config_info_t		l0d_cache_config_info;
+extern pal_cache_config_info_t		l0i_cache_config_info;
+extern pal_cache_config_info_t		l1_cache_config_info;
+extern pal_cache_config_info_t		l2_cache_config_info;
+
+extern pal_cache_protection_info_t	l0d_cache_protection_info;
+extern pal_cache_protection_info_t	l0i_cache_protection_info;
+extern pal_cache_protection_info_t	l1_cache_protection_info;
+extern pal_cache_protection_info_t	l2_cache_protection_info;
+
+extern pal_cache_config_info_t		pal_cache_config_info_get(pal_cache_level_t,
+								  pal_cache_type_t);
+
+extern pal_cache_protection_info_t	pal_cache_protection_info_get(pal_cache_level_t,
+								      pal_cache_type_t);
+
+
+extern void				pal_error(int);
+
+
+/* Useful wrappers for the current list of pal procedures */
+
+typedef union pal_bus_features_u {
+	u64	pal_bus_features_val;
+	struct {
+		u64	pbf_reserved1				:	29;
+		u64	pbf_req_bus_parking			:	1;
+		u64	pbf_bus_lock_mask			:	1;
+		u64	pbf_enable_half_xfer_rate		:	1;
+		u64	pbf_reserved2				:	20;
+		u64	pbf_enable_shared_line_replace		:	1;
+		u64	pbf_enable_exclusive_line_replace	:	1;
+		u64	pbf_disable_xaction_queueing		:	1;
+		u64	pbf_disable_resp_err_check		:	1;
+		u64	pbf_disable_berr_check			:	1;
+		u64	pbf_disable_bus_req_internal_err_signal	:	1;
+		u64	pbf_disable_bus_req_berr_signal		:	1;
+		u64	pbf_disable_bus_init_event_check	:	1;
+		u64	pbf_disable_bus_init_event_signal	:	1;
+		u64	pbf_disable_bus_addr_err_check		:	1;
+		u64	pbf_disable_bus_addr_err_signal		:	1;
+		u64	pbf_disable_bus_data_err_check		:	1;
+	} pal_bus_features_s;
+} pal_bus_features_u_t;
+
+extern void pal_bus_features_print (u64);
+
+/* Provide information about configurable processor bus features */
+static inline s64
+ia64_pal_bus_get_features (pal_bus_features_u_t *features_avail,
+			   pal_bus_features_u_t *features_status,
+			   pal_bus_features_u_t *features_control)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL_PHYS(iprv, PAL_BUS_GET_FEATURES, 0, 0, 0);
+	if (features_avail)
+		features_avail->pal_bus_features_val = iprv.v0;
+	if (features_status)
+		features_status->pal_bus_features_val = iprv.v1;
+	if (features_control)
+		features_control->pal_bus_features_val = iprv.v2;
+	return iprv.status;
+}
+
+/* Enables/disables specific processor bus features */
+static inline s64
+ia64_pal_bus_set_features (pal_bus_features_u_t feature_select)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL_PHYS(iprv, PAL_BUS_SET_FEATURES, feature_select.pal_bus_features_val, 0, 0);
+	return iprv.status;
+}
+
+/* Get detailed cache information */
+static inline s64
+ia64_pal_cache_config_info (u64 cache_level, u64 cache_type, pal_cache_config_info_t *conf)
+{
+	struct ia64_pal_retval iprv;
+
+	PAL_CALL(iprv, PAL_CACHE_INFO, cache_level, cache_type, 0);
+
+	if (iprv.status == 0) {
+		conf->pcci_status                 = iprv.status;
+		conf->pcci_info_1.pcci1_data      = iprv.v0;
+		conf->pcci_info_2.pcci2_data      = iprv.v1;
+		conf->pcci_reserved               = iprv.v2;
+	}
+	return iprv.status;
+
+}
+
+/* Get detailed cche protection information */
+static inline s64
+ia64_pal_cache_prot_info (u64 cache_level, u64 cache_type, pal_cache_protection_info_t *prot)
+{
+	struct ia64_pal_retval iprv;
+
+	PAL_CALL(iprv, PAL_CACHE_PROT_INFO, cache_level, cache_type, 0);
+
+	if (iprv.status == 0) {
+		prot->pcpi_status           = iprv.status;
+		prot->pcp_info[0].pcpi_data = iprv.v0 & 0xffffffff;
+		prot->pcp_info[1].pcpi_data = iprv.v0 >> 32;
+		prot->pcp_info[2].pcpi_data = iprv.v1 & 0xffffffff;
+		prot->pcp_info[3].pcpi_data = iprv.v1 >> 32;
+		prot->pcp_info[4].pcpi_data = iprv.v2 & 0xffffffff;
+		prot->pcp_info[5].pcpi_data = iprv.v2 >> 32;
+	}
+	return iprv.status;
+}
+
+/*
+ * Flush the processor instruction or data caches.  *PROGRESS must be
+ * initialized to zero before calling this for the first time..
+ */
+static inline s64
+ia64_pal_cache_flush (u64 cache_type, u64 invalidate, u64 *progress, u64 *vector)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_CACHE_FLUSH, cache_type, invalidate, *progress);
+	if (vector)
+		*vector = iprv.v0;
+	*progress = iprv.v1;
+	return iprv.status;
+}
+
+
+/* Initialize the processor controlled caches */
+static inline s64
+ia64_pal_cache_init (u64 level, u64 cache_type, u64 rest)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_CACHE_INIT, level, cache_type, rest);
+	return iprv.status;
+}
+
+/* Initialize the tags and data of a data or unified cache line of
+ * processor controlled cache to known values without the availability
+ * of backing memory.
+ */
+static inline s64
+ia64_pal_cache_line_init (u64 physical_addr, u64 data_value)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_CACHE_LINE_INIT, physical_addr, data_value, 0);
+	return iprv.status;
+}
+
+
+/* Read the data and tag of a processor controlled cache line for diags */
+static inline s64
+ia64_pal_cache_read (pal_cache_line_id_u_t line_id, u64 physical_addr)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL_PHYS_STK(iprv, PAL_CACHE_READ, line_id.pclid_data,
+				physical_addr, 0);
+	return iprv.status;
+}
+
+/* Return summary information about the hierarchy of caches controlled by the processor */
+static inline long ia64_pal_cache_summary(unsigned long *cache_levels,
+						unsigned long *unique_caches)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_CACHE_SUMMARY, 0, 0, 0);
+	if (cache_levels)
+		*cache_levels = iprv.v0;
+	if (unique_caches)
+		*unique_caches = iprv.v1;
+	return iprv.status;
+}
+
+/* Write the data and tag of a processor-controlled cache line for diags */
+static inline s64
+ia64_pal_cache_write (pal_cache_line_id_u_t line_id, u64 physical_addr, u64 data)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL_PHYS_STK(iprv, PAL_CACHE_WRITE, line_id.pclid_data,
+				physical_addr, data);
+	return iprv.status;
+}
+
+
+/* Return the parameters needed to copy relocatable PAL procedures from ROM to memory */
+static inline s64
+ia64_pal_copy_info (u64 copy_type, u64 num_procs, u64 num_iopics,
+		    u64 *buffer_size, u64 *buffer_align)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_COPY_INFO, copy_type, num_procs, num_iopics);
+	if (buffer_size)
+		*buffer_size = iprv.v0;
+	if (buffer_align)
+		*buffer_align = iprv.v1;
+	return iprv.status;
+}
+
+/* Copy relocatable PAL procedures from ROM to memory */
+static inline s64
+ia64_pal_copy_pal (u64 target_addr, u64 alloc_size, u64 processor, u64 *pal_proc_offset)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_COPY_PAL, target_addr, alloc_size, processor);
+	if (pal_proc_offset)
+		*pal_proc_offset = iprv.v0;
+	return iprv.status;
+}
+
+/* Return the number of instruction and data debug register pairs */
+static inline long ia64_pal_debug_info(unsigned long *inst_regs,
+						unsigned long *data_regs)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_DEBUG_INFO, 0, 0, 0);
+	if (inst_regs)
+		*inst_regs = iprv.v0;
+	if (data_regs)
+		*data_regs = iprv.v1;
+
+	return iprv.status;
+}
+
+#ifdef TBD
+/* Switch from IA64-system environment to IA-32 system environment */
+static inline s64
+ia64_pal_enter_ia32_env (ia32_env1, ia32_env2, ia32_env3)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_ENTER_IA_32_ENV, ia32_env1, ia32_env2, ia32_env3);
+	return iprv.status;
+}
+#endif
+
+/* Get unique geographical address of this processor on its bus */
+static inline s64
+ia64_pal_fixed_addr (u64 *global_unique_addr)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_FIXED_ADDR, 0, 0, 0);
+	if (global_unique_addr)
+		*global_unique_addr = iprv.v0;
+	return iprv.status;
+}
+
+/* Get base frequency of the platform if generated by the processor */
+static inline long ia64_pal_freq_base(unsigned long *platform_base_freq)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_FREQ_BASE, 0, 0, 0);
+	if (platform_base_freq)
+		*platform_base_freq = iprv.v0;
+	return iprv.status;
+}
+
+/*
+ * Get the ratios for processor frequency, bus frequency and interval timer to
+ * to base frequency of the platform
+ */
+static inline s64
+ia64_pal_freq_ratios (struct pal_freq_ratio *proc_ratio, struct pal_freq_ratio *bus_ratio,
+		      struct pal_freq_ratio *itc_ratio)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_FREQ_RATIOS, 0, 0, 0);
+	if (proc_ratio)
+		*(u64 *)proc_ratio = iprv.v0;
+	if (bus_ratio)
+		*(u64 *)bus_ratio = iprv.v1;
+	if (itc_ratio)
+		*(u64 *)itc_ratio = iprv.v2;
+	return iprv.status;
+}
+
+/*
+ * Get the current hardware resource sharing policy of the processor
+ */
+static inline s64
+ia64_pal_get_hw_policy (u64 proc_num, u64 *cur_policy, u64 *num_impacted,
+			u64 *la)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_GET_HW_POLICY, proc_num, 0, 0);
+	if (cur_policy)
+		*cur_policy = iprv.v0;
+	if (num_impacted)
+		*num_impacted = iprv.v1;
+	if (la)
+		*la = iprv.v2;
+	return iprv.status;
+}
+
+/* Make the processor enter HALT or one of the implementation dependent low
+ * power states where prefetching and execution are suspended and cache and
+ * TLB coherency is not maintained.
+ */
+static inline s64
+ia64_pal_halt (u64 halt_state)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_HALT, halt_state, 0, 0);
+	return iprv.status;
+}
+
+typedef union pal_power_mgmt_info_u {
+	u64			ppmi_data;
+	struct {
+	       u64		exit_latency		: 16,
+				entry_latency		: 16,
+				power_consumption	: 28,
+				im			: 1,
+				co			: 1,
+				reserved		: 2;
+	} pal_power_mgmt_info_s;
+} pal_power_mgmt_info_u_t;
+
+/* Return information about processor's optional power management capabilities. */
+static inline s64
+ia64_pal_halt_info (pal_power_mgmt_info_u_t *power_buf)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL_STK(iprv, PAL_HALT_INFO, (unsigned long) power_buf, 0, 0);
+	return iprv.status;
+}
+
+/* Get the current P-state information */
+static inline s64
+ia64_pal_get_pstate (u64 *pstate_index, unsigned long type)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL_STK(iprv, PAL_GET_PSTATE, type, 0, 0);
+	*pstate_index = iprv.v0;
+	return iprv.status;
+}
+
+/* Set the P-state */
+static inline s64
+ia64_pal_set_pstate (u64 pstate_index)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL_STK(iprv, PAL_SET_PSTATE, pstate_index, 0, 0);
+	return iprv.status;
+}
+
+/* Processor branding information*/
+static inline s64
+ia64_pal_get_brand_info (char *brand_info)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL_STK(iprv, PAL_BRAND_INFO, 0, (u64)brand_info, 0);
+	return iprv.status;
+}
+
+/* Cause the processor to enter LIGHT HALT state, where prefetching and execution are
+ * suspended, but cache and TLB coherency is maintained.
+ */
+static inline s64
+ia64_pal_halt_light (void)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_HALT_LIGHT, 0, 0, 0);
+	return iprv.status;
+}
+
+/* Clear all the processor error logging   registers and reset the indicator that allows
+ * the error logging registers to be written. This procedure also checks the pending
+ * machine check bit and pending INIT bit and reports their states.
+ */
+static inline s64
+ia64_pal_mc_clear_log (u64 *pending_vector)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_MC_CLEAR_LOG, 0, 0, 0);
+	if (pending_vector)
+		*pending_vector = iprv.v0;
+	return iprv.status;
+}
+
+/* Ensure that all outstanding transactions in a processor are completed or that any
+ * MCA due to thes outstanding transaction is taken.
+ */
+static inline s64
+ia64_pal_mc_drain (void)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_MC_DRAIN, 0, 0, 0);
+	return iprv.status;
+}
+
+/* Return the machine check dynamic processor state */
+static inline s64
+ia64_pal_mc_dynamic_state (u64 info_type, u64 dy_buffer, u64 *size)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_MC_DYNAMIC_STATE, info_type, dy_buffer, 0);
+	if (size)
+		*size = iprv.v0;
+	return iprv.status;
+}
+
+/* Return processor machine check information */
+static inline s64
+ia64_pal_mc_error_info (u64 info_index, u64 type_index, u64 *size, u64 *error_info)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_MC_ERROR_INFO, info_index, type_index, 0);
+	if (size)
+		*size = iprv.v0;
+	if (error_info)
+		*error_info = iprv.v1;
+	return iprv.status;
+}
+
+/* Injects the requested processor error or returns info on
+ * supported injection capabilities for current processor implementation
+ */
+static inline s64
+ia64_pal_mc_error_inject_phys (u64 err_type_info, u64 err_struct_info,
+			u64 err_data_buffer, u64 *capabilities, u64 *resources)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL_PHYS_STK(iprv, PAL_MC_ERROR_INJECT, err_type_info,
+			  err_struct_info, err_data_buffer);
+	if (capabilities)
+		*capabilities= iprv.v0;
+	if (resources)
+		*resources= iprv.v1;
+	return iprv.status;
+}
+
+static inline s64
+ia64_pal_mc_error_inject_virt (u64 err_type_info, u64 err_struct_info,
+			u64 err_data_buffer, u64 *capabilities, u64 *resources)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL_STK(iprv, PAL_MC_ERROR_INJECT, err_type_info,
+			  err_struct_info, err_data_buffer);
+	if (capabilities)
+		*capabilities= iprv.v0;
+	if (resources)
+		*resources= iprv.v1;
+	return iprv.status;
+}
+
+/* Inform PALE_CHECK whether a machine check is expected so that PALE_CHECK willnot
+ * attempt to correct any expected machine checks.
+ */
+static inline s64
+ia64_pal_mc_expected (u64 expected, u64 *previous)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_MC_EXPECTED, expected, 0, 0);
+	if (previous)
+		*previous = iprv.v0;
+	return iprv.status;
+}
+
+typedef union pal_hw_tracking_u {
+	u64			pht_data;
+	struct {
+		u64		itc	:4,	/* Instruction cache tracking */
+				dct	:4,	/* Date cache tracking */
+				itt	:4,	/* Instruction TLB tracking */
+				ddt	:4,	/* Data TLB tracking */
+				reserved:48;
+	} pal_hw_tracking_s;
+} pal_hw_tracking_u_t;
+
+/*
+ * Hardware tracking status.
+ */
+static inline s64
+ia64_pal_mc_hw_tracking (u64 *status)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_MC_HW_TRACKING, 0, 0, 0);
+	if (status)
+		*status = iprv.v0;
+	return iprv.status;
+}
+
+/* Register a platform dependent location with PAL to which it can save
+ * minimal processor state in the event of a machine check or initialization
+ * event.
+ */
+static inline s64
+ia64_pal_mc_register_mem (u64 physical_addr, u64 size, u64 *req_size)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_MC_REGISTER_MEM, physical_addr, size, 0);
+	if (req_size)
+		*req_size = iprv.v0;
+	return iprv.status;
+}
+
+/* Restore minimal architectural processor state, set CMC interrupt if necessary
+ * and resume execution
+ */
+static inline s64
+ia64_pal_mc_resume (u64 set_cmci, u64 save_ptr)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_MC_RESUME, set_cmci, save_ptr, 0);
+	return iprv.status;
+}
+
+/* Return the memory attributes implemented by the processor */
+static inline s64
+ia64_pal_mem_attrib (u64 *mem_attrib)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_MEM_ATTRIB, 0, 0, 0);
+	if (mem_attrib)
+		*mem_attrib = iprv.v0 & 0xff;
+	return iprv.status;
+}
+
+/* Return the amount of memory needed for second phase of processor
+ * self-test and the required alignment of memory.
+ */
+static inline s64
+ia64_pal_mem_for_test (u64 *bytes_needed, u64 *alignment)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_MEM_FOR_TEST, 0, 0, 0);
+	if (bytes_needed)
+		*bytes_needed = iprv.v0;
+	if (alignment)
+		*alignment = iprv.v1;
+	return iprv.status;
+}
+
+typedef union pal_perf_mon_info_u {
+	u64			  ppmi_data;
+	struct {
+	       u64		generic		: 8,
+				width		: 8,
+				cycles		: 8,
+				retired		: 8,
+				reserved	: 32;
+	} pal_perf_mon_info_s;
+} pal_perf_mon_info_u_t;
+
+/* Return the performance monitor information about what can be counted
+ * and how to configure the monitors to count the desired events.
+ */
+static inline s64
+ia64_pal_perf_mon_info (u64 *pm_buffer, pal_perf_mon_info_u_t *pm_info)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_PERF_MON_INFO, (unsigned long) pm_buffer, 0, 0);
+	if (pm_info)
+		pm_info->ppmi_data = iprv.v0;
+	return iprv.status;
+}
+
+/* Specifies the physical address of the processor interrupt block
+ * and I/O port space.
+ */
+static inline s64
+ia64_pal_platform_addr (u64 type, u64 physical_addr)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_PLATFORM_ADDR, type, physical_addr, 0);
+	return iprv.status;
+}
+
+/* Set the SAL PMI entrypoint in memory */
+static inline s64
+ia64_pal_pmi_entrypoint (u64 sal_pmi_entry_addr)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_PMI_ENTRYPOINT, sal_pmi_entry_addr, 0, 0);
+	return iprv.status;
+}
+
+struct pal_features_s;
+/* Provide information about configurable processor features */
+static inline s64
+ia64_pal_proc_get_features (u64 *features_avail,
+			    u64 *features_status,
+			    u64 *features_control,
+			    u64 features_set)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL_PHYS(iprv, PAL_PROC_GET_FEATURES, 0, features_set, 0);
+	if (iprv.status == 0) {
+		*features_avail   = iprv.v0;
+		*features_status  = iprv.v1;
+		*features_control = iprv.v2;
+	}
+	return iprv.status;
+}
+
+/* Enable/disable processor dependent features */
+static inline s64
+ia64_pal_proc_set_features (u64 feature_select)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL_PHYS(iprv, PAL_PROC_SET_FEATURES, feature_select, 0, 0);
+	return iprv.status;
+}
+
+/*
+ * Put everything in a struct so we avoid the global offset table whenever
+ * possible.
+ */
+typedef struct ia64_ptce_info_s {
+	unsigned long	base;
+	u32		count[2];
+	u32		stride[2];
+} ia64_ptce_info_t;
+
+/* Return the information required for the architected loop used to purge
+ * (initialize) the entire TC
+ */
+static inline s64
+ia64_get_ptce (ia64_ptce_info_t *ptce)
+{
+	struct ia64_pal_retval iprv;
+
+	if (!ptce)
+		return -1;
+
+	PAL_CALL(iprv, PAL_PTCE_INFO, 0, 0, 0);
+	if (iprv.status == 0) {
+		ptce->base = iprv.v0;
+		ptce->count[0] = iprv.v1 >> 32;
+		ptce->count[1] = iprv.v1 & 0xffffffff;
+		ptce->stride[0] = iprv.v2 >> 32;
+		ptce->stride[1] = iprv.v2 & 0xffffffff;
+	}
+	return iprv.status;
+}
+
+/* Return info about implemented application and control registers. */
+static inline s64
+ia64_pal_register_info (u64 info_request, u64 *reg_info_1, u64 *reg_info_2)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_REGISTER_INFO, info_request, 0, 0);
+	if (reg_info_1)
+		*reg_info_1 = iprv.v0;
+	if (reg_info_2)
+		*reg_info_2 = iprv.v1;
+	return iprv.status;
+}
+
+typedef union pal_hints_u {
+	unsigned long		ph_data;
+	struct {
+	       unsigned long	si		: 1,
+				li		: 1,
+				reserved	: 62;
+	} pal_hints_s;
+} pal_hints_u_t;
+
+/* Return information about the register stack and RSE for this processor
+ * implementation.
+ */
+static inline long ia64_pal_rse_info(unsigned long *num_phys_stacked,
+							pal_hints_u_t *hints)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_RSE_INFO, 0, 0, 0);
+	if (num_phys_stacked)
+		*num_phys_stacked = iprv.v0;
+	if (hints)
+		hints->ph_data = iprv.v1;
+	return iprv.status;
+}
+
+/*
+ * Set the current hardware resource sharing policy of the processor
+ */
+static inline s64
+ia64_pal_set_hw_policy (u64 policy)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_SET_HW_POLICY, policy, 0, 0);
+	return iprv.status;
+}
+
+/* Cause the processor to enter	SHUTDOWN state, where prefetching and execution are
+ * suspended, but cause cache and TLB coherency to be maintained.
+ * This is usually called in IA-32 mode.
+ */
+static inline s64
+ia64_pal_shutdown (void)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_SHUTDOWN, 0, 0, 0);
+	return iprv.status;
+}
+
+/* Perform the second phase of processor self-test. */
+static inline s64
+ia64_pal_test_proc (u64 test_addr, u64 test_size, u64 attributes, u64 *self_test_state)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_TEST_PROC, test_addr, test_size, attributes);
+	if (self_test_state)
+		*self_test_state = iprv.v0;
+	return iprv.status;
+}
+
+typedef union  pal_version_u {
+	u64	pal_version_val;
+	struct {
+		u64	pv_pal_b_rev		:	8;
+		u64	pv_pal_b_model		:	8;
+		u64	pv_reserved1		:	8;
+		u64	pv_pal_vendor		:	8;
+		u64	pv_pal_a_rev		:	8;
+		u64	pv_pal_a_model		:	8;
+		u64	pv_reserved2		:	16;
+	} pal_version_s;
+} pal_version_u_t;
+
+
+/*
+ * Return PAL version information.  While the documentation states that
+ * PAL_VERSION can be called in either physical or virtual mode, some
+ * implementations only allow physical calls.  We don't call it very often,
+ * so the overhead isn't worth eliminating.
+ */
+static inline s64
+ia64_pal_version (pal_version_u_t *pal_min_version, pal_version_u_t *pal_cur_version)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL_PHYS(iprv, PAL_VERSION, 0, 0, 0);
+	if (pal_min_version)
+		pal_min_version->pal_version_val = iprv.v0;
+
+	if (pal_cur_version)
+		pal_cur_version->pal_version_val = iprv.v1;
+
+	return iprv.status;
+}
+
+typedef union pal_tc_info_u {
+	u64			pti_val;
+	struct {
+	       u64		num_sets	:	8,
+				associativity	:	8,
+				num_entries	:	16,
+				pf		:	1,
+				unified		:	1,
+				reduce_tr	:	1,
+				reserved	:	29;
+	} pal_tc_info_s;
+} pal_tc_info_u_t;
+
+#define tc_reduce_tr		pal_tc_info_s.reduce_tr
+#define tc_unified		pal_tc_info_s.unified
+#define tc_pf			pal_tc_info_s.pf
+#define tc_num_entries		pal_tc_info_s.num_entries
+#define tc_associativity	pal_tc_info_s.associativity
+#define tc_num_sets		pal_tc_info_s.num_sets
+
+
+/* Return information about the virtual memory characteristics of the processor
+ * implementation.
+ */
+static inline s64
+ia64_pal_vm_info (u64 tc_level, u64 tc_type,  pal_tc_info_u_t *tc_info, u64 *tc_pages)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_VM_INFO, tc_level, tc_type, 0);
+	if (tc_info)
+		tc_info->pti_val = iprv.v0;
+	if (tc_pages)
+		*tc_pages = iprv.v1;
+	return iprv.status;
+}
+
+/* Get page size information about the virtual memory characteristics of the processor
+ * implementation.
+ */
+static inline s64 ia64_pal_vm_page_size(u64 *tr_pages, u64 *vw_pages)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_VM_PAGE_SIZE, 0, 0, 0);
+	if (tr_pages)
+		*tr_pages = iprv.v0;
+	if (vw_pages)
+		*vw_pages = iprv.v1;
+	return iprv.status;
+}
+
+typedef union pal_vm_info_1_u {
+	u64			pvi1_val;
+	struct {
+		u64		vw		: 1,
+				phys_add_size	: 7,
+				key_size	: 8,
+				max_pkr		: 8,
+				hash_tag_id	: 8,
+				max_dtr_entry	: 8,
+				max_itr_entry	: 8,
+				max_unique_tcs	: 8,
+				num_tc_levels	: 8;
+	} pal_vm_info_1_s;
+} pal_vm_info_1_u_t;
+
+#define PAL_MAX_PURGES		0xFFFF		/* all ones is means unlimited */
+
+typedef union pal_vm_info_2_u {
+	u64			pvi2_val;
+	struct {
+		u64		impl_va_msb	: 8,
+				rid_size	: 8,
+				max_purges	: 16,
+				reserved	: 32;
+	} pal_vm_info_2_s;
+} pal_vm_info_2_u_t;
+
+/* Get summary information about the virtual memory characteristics of the processor
+ * implementation.
+ */
+static inline s64
+ia64_pal_vm_summary (pal_vm_info_1_u_t *vm_info_1, pal_vm_info_2_u_t *vm_info_2)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_VM_SUMMARY, 0, 0, 0);
+	if (vm_info_1)
+		vm_info_1->pvi1_val = iprv.v0;
+	if (vm_info_2)
+		vm_info_2->pvi2_val = iprv.v1;
+	return iprv.status;
+}
+
+typedef union pal_vp_info_u {
+	u64			pvi_val;
+	struct {
+		u64		index:		48,	/* virtual feature set info */
+				vmm_id:		16;	/* feature set id */
+	} pal_vp_info_s;
+} pal_vp_info_u_t;
+
+/*
+ * Returns information about virtual processor features
+ */
+static inline s64
+ia64_pal_vp_info (u64 feature_set, u64 vp_buffer, u64 *vp_info, u64 *vmm_id)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_VP_INFO, feature_set, vp_buffer, 0);
+	if (vp_info)
+		*vp_info = iprv.v0;
+	if (vmm_id)
+		*vmm_id = iprv.v1;
+	return iprv.status;
+}
+
+typedef union pal_itr_valid_u {
+	u64			piv_val;
+	struct {
+	       u64		access_rights_valid	: 1,
+				priv_level_valid	: 1,
+				dirty_bit_valid		: 1,
+				mem_attr_valid		: 1,
+				reserved		: 60;
+	} pal_tr_valid_s;
+} pal_tr_valid_u_t;
+
+/* Read a translation register */
+static inline s64
+ia64_pal_tr_read (u64 reg_num, u64 tr_type, u64 *tr_buffer, pal_tr_valid_u_t *tr_valid)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL_PHYS_STK(iprv, PAL_VM_TR_READ, reg_num, tr_type,(u64)ia64_tpa(tr_buffer));
+	if (tr_valid)
+		tr_valid->piv_val = iprv.v0;
+	return iprv.status;
+}
+
+/*
+ * PAL_PREFETCH_VISIBILITY transaction types
+ */
+#define PAL_VISIBILITY_VIRTUAL		0
+#define PAL_VISIBILITY_PHYSICAL		1
+
+/*
+ * PAL_PREFETCH_VISIBILITY return codes
+ */
+#define PAL_VISIBILITY_OK		1
+#define PAL_VISIBILITY_OK_REMOTE_NEEDED	0
+#define PAL_VISIBILITY_INVAL_ARG	-2
+#define PAL_VISIBILITY_ERROR		-3
+
+static inline s64
+ia64_pal_prefetch_visibility (s64 trans_type)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_PREFETCH_VISIBILITY, trans_type, 0, 0);
+	return iprv.status;
+}
+
+/* data structure for getting information on logical to physical mappings */
+typedef union pal_log_overview_u {
+	struct {
+		u64	num_log		:16,	/* Total number of logical
+						 * processors on this die
+						 */
+			tpc		:8,	/* Threads per core */
+			reserved3	:8,	/* Reserved */
+			cpp		:8,	/* Cores per processor */
+			reserved2	:8,	/* Reserved */
+			ppid		:8,	/* Physical processor ID */
+			reserved1	:8;	/* Reserved */
+	} overview_bits;
+	u64 overview_data;
+} pal_log_overview_t;
+
+typedef union pal_proc_n_log_info1_u{
+	struct {
+		u64	tid		:16,	/* Thread id */
+			reserved2	:16,	/* Reserved */
+			cid		:16,	/* Core id */
+			reserved1	:16;	/* Reserved */
+	} ppli1_bits;
+	u64	ppli1_data;
+} pal_proc_n_log_info1_t;
+
+typedef union pal_proc_n_log_info2_u {
+	struct {
+		u64	la		:16,	/* Logical address */
+			reserved	:48;	/* Reserved */
+	} ppli2_bits;
+	u64	ppli2_data;
+} pal_proc_n_log_info2_t;
+
+typedef struct pal_logical_to_physical_s
+{
+	pal_log_overview_t overview;
+	pal_proc_n_log_info1_t ppli1;
+	pal_proc_n_log_info2_t ppli2;
+} pal_logical_to_physical_t;
+
+#define overview_num_log	overview.overview_bits.num_log
+#define overview_tpc		overview.overview_bits.tpc
+#define overview_cpp		overview.overview_bits.cpp
+#define overview_ppid		overview.overview_bits.ppid
+#define log1_tid		ppli1.ppli1_bits.tid
+#define log1_cid		ppli1.ppli1_bits.cid
+#define log2_la			ppli2.ppli2_bits.la
+
+/* Get information on logical to physical processor mappings. */
+static inline s64
+ia64_pal_logical_to_phys(u64 proc_number, pal_logical_to_physical_t *mapping)
+{
+	struct ia64_pal_retval iprv;
+
+	PAL_CALL(iprv, PAL_LOGICAL_TO_PHYSICAL, proc_number, 0, 0);
+
+	if (iprv.status == PAL_STATUS_SUCCESS)
+	{
+		mapping->overview.overview_data = iprv.v0;
+		mapping->ppli1.ppli1_data = iprv.v1;
+		mapping->ppli2.ppli2_data = iprv.v2;
+	}
+
+	return iprv.status;
+}
+
+typedef struct pal_cache_shared_info_s
+{
+	u64 num_shared;
+	pal_proc_n_log_info1_t ppli1;
+	pal_proc_n_log_info2_t ppli2;
+} pal_cache_shared_info_t;
+
+/* Get information on logical to physical processor mappings. */
+static inline s64
+ia64_pal_cache_shared_info(u64 level,
+		u64 type,
+		u64 proc_number,
+		pal_cache_shared_info_t *info)
+{
+	struct ia64_pal_retval iprv;
+
+	PAL_CALL(iprv, PAL_CACHE_SHARED_INFO, level, type, proc_number);
+
+	if (iprv.status == PAL_STATUS_SUCCESS) {
+		info->num_shared = iprv.v0;
+		info->ppli1.ppli1_data = iprv.v1;
+		info->ppli2.ppli2_data = iprv.v2;
+	}
+
+	return iprv.status;
+}
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_IA64_PAL_H */
diff --git a/arch/ia64/include/asm/param.h b/arch/ia64/include/asm/param.h
new file mode 100644
index 00000000000..1295913d6a8
--- /dev/null
+++ b/arch/ia64/include/asm/param.h
@@ -0,0 +1,17 @@
+/*
+ * Fundamental kernel parameters.
+ *
+ * Based on <asm-i386/param.h>.
+ *
+ * Modified 1998, 1999, 2002-2003
+ *	David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co
+ */
+#ifndef _ASM_IA64_PARAM_H
+#define _ASM_IA64_PARAM_H
+
+#include <uapi/asm/param.h>
+
+# define HZ		CONFIG_HZ
+# define USER_HZ	HZ
+# define CLOCKS_PER_SEC	HZ	/* frequency at which times() counts */
+#endif /* _ASM_IA64_PARAM_H */
diff --git a/arch/ia64/include/asm/paravirt.h b/arch/ia64/include/asm/paravirt.h
new file mode 100644
index 00000000000..b53518a9802
--- /dev/null
+++ b/arch/ia64/include/asm/paravirt.h
@@ -0,0 +1,321 @@
+/******************************************************************************
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+
+#ifndef __ASM_PARAVIRT_H
+#define __ASM_PARAVIRT_H
+
+#ifndef __ASSEMBLY__
+/******************************************************************************
+ * fsys related addresses
+ */
+struct pv_fsys_data {
+	unsigned long *fsyscall_table;
+	void *fsys_bubble_down;
+};
+
+extern struct pv_fsys_data pv_fsys_data;
+
+unsigned long *paravirt_get_fsyscall_table(void);
+char *paravirt_get_fsys_bubble_down(void);
+
+/******************************************************************************
+ * patchlist addresses for gate page
+ */
+enum pv_gate_patchlist {
+	PV_GATE_START_FSYSCALL,
+	PV_GATE_END_FSYSCALL,
+
+	PV_GATE_START_BRL_FSYS_BUBBLE_DOWN,
+	PV_GATE_END_BRL_FSYS_BUBBLE_DOWN,
+
+	PV_GATE_START_VTOP,
+	PV_GATE_END_VTOP,
+
+	PV_GATE_START_MCKINLEY_E9,
+	PV_GATE_END_MCKINLEY_E9,
+};
+
+struct pv_patchdata {
+	unsigned long start_fsyscall_patchlist;
+	unsigned long end_fsyscall_patchlist;
+	unsigned long start_brl_fsys_bubble_down_patchlist;
+	unsigned long end_brl_fsys_bubble_down_patchlist;
+	unsigned long start_vtop_patchlist;
+	unsigned long end_vtop_patchlist;
+	unsigned long start_mckinley_e9_patchlist;
+	unsigned long end_mckinley_e9_patchlist;
+
+	void *gate_section;
+};
+
+extern struct pv_patchdata pv_patchdata;
+
+unsigned long paravirt_get_gate_patchlist(enum pv_gate_patchlist type);
+void *paravirt_get_gate_section(void);
+#endif
+
+#ifdef CONFIG_PARAVIRT_GUEST
+
+#define PARAVIRT_HYPERVISOR_TYPE_DEFAULT	0
+
+#ifndef __ASSEMBLY__
+
+#include <asm/hw_irq.h>
+#include <asm/meminit.h>
+
+/******************************************************************************
+ * general info
+ */
+struct pv_info {
+	unsigned int kernel_rpl;
+	int paravirt_enabled;
+	const char *name;
+};
+
+extern struct pv_info pv_info;
+
+static inline int paravirt_enabled(void)
+{
+	return pv_info.paravirt_enabled;
+}
+
+static inline unsigned int get_kernel_rpl(void)
+{
+	return pv_info.kernel_rpl;
+}
+
+/******************************************************************************
+ * initialization hooks.
+ */
+struct rsvd_region;
+
+struct pv_init_ops {
+	void (*banner)(void);
+
+	int (*reserve_memory)(struct rsvd_region *region);
+
+	void (*arch_setup_early)(void);
+	void (*arch_setup_console)(char **cmdline_p);
+	int (*arch_setup_nomca)(void);
+
+	void (*post_smp_prepare_boot_cpu)(void);
+
+#ifdef ASM_SUPPORTED
+	unsigned long (*patch_bundle)(void *sbundle, void *ebundle,
+				      unsigned long type);
+	unsigned long (*patch_inst)(unsigned long stag, unsigned long etag,
+				    unsigned long type);
+#endif
+	void (*patch_branch)(unsigned long tag, unsigned long type);
+};
+
+extern struct pv_init_ops pv_init_ops;
+
+static inline void paravirt_banner(void)
+{
+	if (pv_init_ops.banner)
+		pv_init_ops.banner();
+}
+
+static inline int paravirt_reserve_memory(struct rsvd_region *region)
+{
+	if (pv_init_ops.reserve_memory)
+		return pv_init_ops.reserve_memory(region);
+	return 0;
+}
+
+static inline void paravirt_arch_setup_early(void)
+{
+	if (pv_init_ops.arch_setup_early)
+		pv_init_ops.arch_setup_early();
+}
+
+static inline void paravirt_arch_setup_console(char **cmdline_p)
+{
+	if (pv_init_ops.arch_setup_console)
+		pv_init_ops.arch_setup_console(cmdline_p);
+}
+
+static inline int paravirt_arch_setup_nomca(void)
+{
+	if (pv_init_ops.arch_setup_nomca)
+		return pv_init_ops.arch_setup_nomca();
+	return 0;
+}
+
+static inline void paravirt_post_smp_prepare_boot_cpu(void)
+{
+	if (pv_init_ops.post_smp_prepare_boot_cpu)
+		pv_init_ops.post_smp_prepare_boot_cpu();
+}
+
+/******************************************************************************
+ * replacement of iosapic operations.
+ */
+
+struct pv_iosapic_ops {
+	void (*pcat_compat_init)(void);
+
+	struct irq_chip *(*__get_irq_chip)(unsigned long trigger);
+
+	unsigned int (*__read)(char __iomem *iosapic, unsigned int reg);
+	void (*__write)(char __iomem *iosapic, unsigned int reg, u32 val);
+};
+
+extern struct pv_iosapic_ops pv_iosapic_ops;
+
+static inline void
+iosapic_pcat_compat_init(void)
+{
+	if (pv_iosapic_ops.pcat_compat_init)
+		pv_iosapic_ops.pcat_compat_init();
+}
+
+static inline struct irq_chip*
+iosapic_get_irq_chip(unsigned long trigger)
+{
+	return pv_iosapic_ops.__get_irq_chip(trigger);
+}
+
+static inline unsigned int
+__iosapic_read(char __iomem *iosapic, unsigned int reg)
+{
+	return pv_iosapic_ops.__read(iosapic, reg);
+}
+
+static inline void
+__iosapic_write(char __iomem *iosapic, unsigned int reg, u32 val)
+{
+	return pv_iosapic_ops.__write(iosapic, reg, val);
+}
+
+/******************************************************************************
+ * replacement of irq operations.
+ */
+
+struct pv_irq_ops {
+	void (*register_ipi)(void);
+
+	int (*assign_irq_vector)(int irq);
+	void (*free_irq_vector)(int vector);
+
+	void (*register_percpu_irq)(ia64_vector vec,
+				    struct irqaction *action);
+
+	void (*resend_irq)(unsigned int vector);
+};
+
+extern struct pv_irq_ops pv_irq_ops;
+
+static inline void
+ia64_register_ipi(void)
+{
+	pv_irq_ops.register_ipi();
+}
+
+static inline int
+assign_irq_vector(int irq)
+{
+	return pv_irq_ops.assign_irq_vector(irq);
+}
+
+static inline void
+free_irq_vector(int vector)
+{
+	return pv_irq_ops.free_irq_vector(vector);
+}
+
+static inline void
+register_percpu_irq(ia64_vector vec, struct irqaction *action)
+{
+	pv_irq_ops.register_percpu_irq(vec, action);
+}
+
+static inline void
+ia64_resend_irq(unsigned int vector)
+{
+	pv_irq_ops.resend_irq(vector);
+}
+
+/******************************************************************************
+ * replacement of time operations.
+ */
+
+extern struct itc_jitter_data_t itc_jitter_data;
+extern volatile int time_keeper_id;
+
+struct pv_time_ops {
+	void (*init_missing_ticks_accounting)(int cpu);
+	int (*do_steal_accounting)(unsigned long *new_itm);
+
+	void (*clocksource_resume)(void);
+
+	unsigned long long (*sched_clock)(void);
+};
+
+extern struct pv_time_ops pv_time_ops;
+
+static inline void
+paravirt_init_missing_ticks_accounting(int cpu)
+{
+	if (pv_time_ops.init_missing_ticks_accounting)
+		pv_time_ops.init_missing_ticks_accounting(cpu);
+}
+
+struct static_key;
+extern struct static_key paravirt_steal_enabled;
+extern struct static_key paravirt_steal_rq_enabled;
+
+static inline int
+paravirt_do_steal_accounting(unsigned long *new_itm)
+{
+	return pv_time_ops.do_steal_accounting(new_itm);
+}
+
+static inline unsigned long long paravirt_sched_clock(void)
+{
+	return pv_time_ops.sched_clock();
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#else
+/* fallback for native case */
+
+#ifndef __ASSEMBLY__
+
+#define paravirt_banner()				do { } while (0)
+#define paravirt_reserve_memory(region)			0
+
+#define paravirt_arch_setup_early()			do { } while (0)
+#define paravirt_arch_setup_console(cmdline_p)		do { } while (0)
+#define paravirt_arch_setup_nomca()			0
+#define paravirt_post_smp_prepare_boot_cpu()		do { } while (0)
+
+#define paravirt_init_missing_ticks_accounting(cpu)	do { } while (0)
+#define paravirt_do_steal_accounting(new_itm)		0
+
+#endif /* __ASSEMBLY__ */
+
+
+#endif /* CONFIG_PARAVIRT_GUEST */
+
+#endif /* __ASM_PARAVIRT_H */
diff --git a/arch/ia64/include/asm/paravirt_patch.h b/arch/ia64/include/asm/paravirt_patch.h
new file mode 100644
index 00000000000..128ff5db6e6
--- /dev/null
+++ b/arch/ia64/include/asm/paravirt_patch.h
@@ -0,0 +1,143 @@
+/******************************************************************************
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#ifndef __ASM_PARAVIRT_PATCH_H
+#define __ASM_PARAVIRT_PATCH_H
+
+#ifdef __ASSEMBLY__
+
+	.section .paravirt_branches, "a"
+	.previous
+#define PARAVIRT_PATCH_SITE_BR(type)		\
+	{					\
+	[1:] ;					\
+	br.cond.sptk.many 2f ;			\
+	nop.b 0 ;				\
+	nop.b 0;; ;				\
+	} ;					\
+	2:					\
+	.xdata8 ".paravirt_branches", 1b, type
+
+#else
+
+#include <linux/stringify.h>
+#include <asm/intrinsics.h>
+
+/* for binary patch */
+struct paravirt_patch_site_bundle {
+	void		*sbundle;
+	void		*ebundle;
+	unsigned long	type;
+};
+
+/* label means the beginning of new bundle */
+#define paravirt_alt_bundle(instr, privop)				\
+	"\t998:\n"							\
+	"\t" instr "\n"							\
+	"\t999:\n"							\
+	"\t.pushsection .paravirt_bundles, \"a\"\n"			\
+	"\t.popsection\n"						\
+	"\t.xdata8 \".paravirt_bundles\", 998b, 999b, "			\
+	__stringify(privop) "\n"
+
+
+struct paravirt_patch_bundle_elem {
+	const void	*sbundle;
+	const void	*ebundle;
+	unsigned long	type;
+};
+
+
+struct paravirt_patch_site_inst {
+	unsigned long	stag;
+	unsigned long	etag;
+	unsigned long	type;
+};
+
+#define paravirt_alt_inst(instr, privop)				\
+	"\t[998:]\n"							\
+	"\t" instr "\n"							\
+	"\t[999:]\n"							\
+	"\t.pushsection .paravirt_insts, \"a\"\n"			\
+	"\t.popsection\n"						\
+	"\t.xdata8 \".paravirt_insts\", 998b, 999b, "			\
+	__stringify(privop) "\n"
+
+struct paravirt_patch_site_branch {
+	unsigned long	tag;
+	unsigned long	type;
+};
+
+struct paravirt_patch_branch_target {
+	const void	*entry;
+	unsigned long	type;
+};
+
+void
+__paravirt_patch_apply_branch(
+	unsigned long tag, unsigned long type,
+	const struct paravirt_patch_branch_target *entries,
+	unsigned int nr_entries);
+
+void
+paravirt_patch_reloc_br(unsigned long tag, const void *target);
+
+void
+paravirt_patch_reloc_brl(unsigned long tag, const void *target);
+
+
+#if defined(ASM_SUPPORTED) && defined(CONFIG_PARAVIRT)
+unsigned long
+ia64_native_patch_bundle(void *sbundle, void *ebundle, unsigned long type);
+
+unsigned long
+__paravirt_patch_apply_bundle(void *sbundle, void *ebundle, unsigned long type,
+			      const struct paravirt_patch_bundle_elem *elems,
+			      unsigned long nelems,
+			      const struct paravirt_patch_bundle_elem **found);
+
+void
+paravirt_patch_apply_bundle(const struct paravirt_patch_site_bundle *start,
+			    const struct paravirt_patch_site_bundle *end);
+
+void
+paravirt_patch_apply_inst(const struct paravirt_patch_site_inst *start,
+			  const struct paravirt_patch_site_inst *end);
+
+void paravirt_patch_apply(void);
+#else
+#define paravirt_patch_apply_bundle(start, end)	do { } while (0)
+#define paravirt_patch_apply_inst(start, end)	do { } while (0)
+#define paravirt_patch_apply()			do { } while (0)
+#endif
+
+#endif /* !__ASSEMBLEY__ */
+
+#endif /* __ASM_PARAVIRT_PATCH_H */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "linux"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * indent-tabs-mode: t
+ * End:
+ */
diff --git a/arch/ia64/include/asm/paravirt_privop.h b/arch/ia64/include/asm/paravirt_privop.h
new file mode 100644
index 00000000000..8f6cb11c9fa
--- /dev/null
+++ b/arch/ia64/include/asm/paravirt_privop.h
@@ -0,0 +1,479 @@
+/******************************************************************************
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#ifndef _ASM_IA64_PARAVIRT_PRIVOP_H
+#define _ASM_IA64_PARAVIRT_PRIVOP_H
+
+#ifdef CONFIG_PARAVIRT
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+#include <asm/kregs.h> /* for IA64_PSR_I */
+
+/******************************************************************************
+ * replacement of intrinsics operations.
+ */
+
+struct pv_cpu_ops {
+	void (*fc)(void *addr);
+	unsigned long (*thash)(unsigned long addr);
+	unsigned long (*get_cpuid)(int index);
+	unsigned long (*get_pmd)(int index);
+	unsigned long (*getreg)(int reg);
+	void (*setreg)(int reg, unsigned long val);
+	void (*ptcga)(unsigned long addr, unsigned long size);
+	unsigned long (*get_rr)(unsigned long index);
+	void (*set_rr)(unsigned long index, unsigned long val);
+	void (*set_rr0_to_rr4)(unsigned long val0, unsigned long val1,
+			       unsigned long val2, unsigned long val3,
+			       unsigned long val4);
+	void (*ssm_i)(void);
+	void (*rsm_i)(void);
+	unsigned long (*get_psr_i)(void);
+	void (*intrin_local_irq_restore)(unsigned long flags);
+};
+
+extern struct pv_cpu_ops pv_cpu_ops;
+
+extern void ia64_native_setreg_func(int regnum, unsigned long val);
+extern unsigned long ia64_native_getreg_func(int regnum);
+
+/************************************************/
+/* Instructions paravirtualized for performance */
+/************************************************/
+
+#ifndef ASM_SUPPORTED
+#define paravirt_ssm_i()	pv_cpu_ops.ssm_i()
+#define paravirt_rsm_i()	pv_cpu_ops.rsm_i()
+#define __paravirt_getreg()	pv_cpu_ops.getreg()
+#endif
+
+/* mask for ia64_native_ssm/rsm() must be constant.("i" constraing).
+ * static inline function doesn't satisfy it. */
+#define paravirt_ssm(mask)			\
+	do {					\
+		if ((mask) == IA64_PSR_I)	\
+			paravirt_ssm_i();	\
+		else				\
+			ia64_native_ssm(mask);	\
+	} while (0)
+
+#define paravirt_rsm(mask)			\
+	do {					\
+		if ((mask) == IA64_PSR_I)	\
+			paravirt_rsm_i();	\
+		else				\
+			ia64_native_rsm(mask);	\
+	} while (0)
+
+/* returned ip value should be the one in the caller,
+ * not in __paravirt_getreg() */
+#define paravirt_getreg(reg)					\
+	({							\
+		unsigned long res;				\
+		if ((reg) == _IA64_REG_IP)			\
+			res = ia64_native_getreg(_IA64_REG_IP); \
+		else						\
+			res = __paravirt_getreg(reg);		\
+		res;						\
+	})
+
+/******************************************************************************
+ * replacement of hand written assembly codes.
+ */
+struct pv_cpu_asm_switch {
+	unsigned long switch_to;
+	unsigned long leave_syscall;
+	unsigned long work_processed_syscall;
+	unsigned long leave_kernel;
+};
+void paravirt_cpu_asm_init(const struct pv_cpu_asm_switch *cpu_asm_switch);
+
+#endif /* __ASSEMBLY__ */
+
+#define IA64_PARAVIRT_ASM_FUNC(name)	paravirt_ ## name
+
+#else
+
+/* fallback for native case */
+#define IA64_PARAVIRT_ASM_FUNC(name)	ia64_native_ ## name
+
+#endif /* CONFIG_PARAVIRT */
+
+#if defined(CONFIG_PARAVIRT) && defined(ASM_SUPPORTED)
+#define paravirt_dv_serialize_data()	ia64_dv_serialize_data()
+#else
+#define paravirt_dv_serialize_data()	/* nothing */
+#endif
+
+/* these routines utilize privilege-sensitive or performance-sensitive
+ * privileged instructions so the code must be replaced with
+ * paravirtualized versions */
+#define ia64_switch_to			IA64_PARAVIRT_ASM_FUNC(switch_to)
+#define ia64_leave_syscall		IA64_PARAVIRT_ASM_FUNC(leave_syscall)
+#define ia64_work_processed_syscall	\
+	IA64_PARAVIRT_ASM_FUNC(work_processed_syscall)
+#define ia64_leave_kernel		IA64_PARAVIRT_ASM_FUNC(leave_kernel)
+
+
+#if defined(CONFIG_PARAVIRT)
+/******************************************************************************
+ * binary patching infrastructure
+ */
+#define PARAVIRT_PATCH_TYPE_FC				1
+#define PARAVIRT_PATCH_TYPE_THASH			2
+#define PARAVIRT_PATCH_TYPE_GET_CPUID			3
+#define PARAVIRT_PATCH_TYPE_GET_PMD			4
+#define PARAVIRT_PATCH_TYPE_PTCGA			5
+#define PARAVIRT_PATCH_TYPE_GET_RR			6
+#define PARAVIRT_PATCH_TYPE_SET_RR			7
+#define PARAVIRT_PATCH_TYPE_SET_RR0_TO_RR4		8
+#define PARAVIRT_PATCH_TYPE_SSM_I			9
+#define PARAVIRT_PATCH_TYPE_RSM_I			10
+#define PARAVIRT_PATCH_TYPE_GET_PSR_I			11
+#define PARAVIRT_PATCH_TYPE_INTRIN_LOCAL_IRQ_RESTORE	12
+
+/* PARAVIRT_PATY_TYPE_[GS]ETREG + _IA64_REG_xxx */
+#define PARAVIRT_PATCH_TYPE_GETREG			0x10000000
+#define PARAVIRT_PATCH_TYPE_SETREG			0x20000000
+
+/*
+ * struct task_struct* (*ia64_switch_to)(void* next_task);
+ * void *ia64_leave_syscall;
+ * void *ia64_work_processed_syscall
+ * void *ia64_leave_kernel;
+ */
+
+#define PARAVIRT_PATCH_TYPE_BR_START			0x30000000
+#define PARAVIRT_PATCH_TYPE_BR_SWITCH_TO		\
+	(PARAVIRT_PATCH_TYPE_BR_START + 0)
+#define PARAVIRT_PATCH_TYPE_BR_LEAVE_SYSCALL		\
+	(PARAVIRT_PATCH_TYPE_BR_START + 1)
+#define PARAVIRT_PATCH_TYPE_BR_WORK_PROCESSED_SYSCALL	\
+	(PARAVIRT_PATCH_TYPE_BR_START + 2)
+#define PARAVIRT_PATCH_TYPE_BR_LEAVE_KERNEL		\
+	(PARAVIRT_PATCH_TYPE_BR_START + 3)
+
+#ifdef ASM_SUPPORTED
+#include <asm/paravirt_patch.h>
+
+/*
+ * pv_cpu_ops calling stub.
+ * normal function call convension can't be written by gcc
+ * inline assembly.
+ *
+ * from the caller's point of view,
+ * the following registers will be clobbered.
+ * r2, r3
+ * r8-r15
+ * r16, r17
+ * b6, b7
+ * p6-p15
+ * ar.ccv
+ *
+ * from the callee's point of view ,
+ * the following registers can be used.
+ * r2, r3: scratch
+ * r8: scratch, input argument0 and return value
+ * r0-r15: scratch, input argument1-5
+ * b6: return pointer
+ * b7: scratch
+ * p6-p15: scratch
+ * ar.ccv: scratch
+ *
+ * other registers must not be changed. especially
+ * b0: rp: preserved. gcc ignores b0 in clobbered register.
+ * r16: saved gp
+ */
+/* 5 bundles */
+#define __PARAVIRT_BR							\
+	";;\n"								\
+	"{ .mlx\n"							\
+	"nop 0\n"							\
+	"movl r2 = %[op_addr]\n"/* get function pointer address */	\
+	";;\n"								\
+	"}\n"								\
+	"1:\n"								\
+	"{ .mii\n"							\
+	"ld8 r2 = [r2]\n"	/* load function descriptor address */	\
+	"mov r17 = ip\n"	/* get ip to calc return address */	\
+	"mov r16 = gp\n"	/* save gp */				\
+	";;\n"								\
+	"}\n"								\
+	"{ .mii\n"							\
+	"ld8 r3 = [r2], 8\n"	/* load entry address */		\
+	"adds r17 =  1f - 1b, r17\n"	/* calculate return address */	\
+	";;\n"								\
+	"mov b7 = r3\n"		/* set entry address */			\
+	"}\n"								\
+	"{ .mib\n"							\
+	"ld8 gp = [r2]\n"	/* load gp value */			\
+	"mov b6 = r17\n"	/* set return address */		\
+	"br.cond.sptk.few b7\n"	/* intrinsics are very short isns */	\
+	"}\n"								\
+	"1:\n"								\
+	"{ .mii\n"							\
+	"mov gp = r16\n"	/* restore gp value */			\
+	"nop 0\n"							\
+	"nop 0\n"							\
+	";;\n"								\
+	"}\n"
+
+#define PARAVIRT_OP(op)				\
+	[op_addr] "i"(&pv_cpu_ops.op)
+
+#define PARAVIRT_TYPE(type)			\
+	PARAVIRT_PATCH_TYPE_ ## type
+
+#define PARAVIRT_REG_CLOBBERS0					\
+	"r2", "r3", /*"r8",*/ "r9", "r10", "r11", "r14",	\
+		"r15", "r16", "r17"
+
+#define PARAVIRT_REG_CLOBBERS1					\
+	"r2","r3", /*"r8",*/ "r9", "r10", "r11", "r14",		\
+		"r15", "r16", "r17"
+
+#define PARAVIRT_REG_CLOBBERS2					\
+	"r2", "r3", /*"r8", "r9",*/ "r10", "r11", "r14",	\
+		"r15", "r16", "r17"
+
+#define PARAVIRT_REG_CLOBBERS5					\
+	"r2", "r3", /*"r8", "r9", "r10", "r11", "r14",*/	\
+		"r15", "r16", "r17"
+
+#define PARAVIRT_BR_CLOBBERS			\
+	"b6", "b7"
+
+#define PARAVIRT_PR_CLOBBERS						\
+	"p6", "p7", "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15"
+
+#define PARAVIRT_AR_CLOBBERS			\
+	"ar.ccv"
+
+#define PARAVIRT_CLOBBERS0			\
+		PARAVIRT_REG_CLOBBERS0,		\
+		PARAVIRT_BR_CLOBBERS,		\
+		PARAVIRT_PR_CLOBBERS,		\
+		PARAVIRT_AR_CLOBBERS,		\
+		"memory"
+
+#define PARAVIRT_CLOBBERS1			\
+		PARAVIRT_REG_CLOBBERS1,		\
+		PARAVIRT_BR_CLOBBERS,		\
+		PARAVIRT_PR_CLOBBERS,		\
+		PARAVIRT_AR_CLOBBERS,		\
+		"memory"
+
+#define PARAVIRT_CLOBBERS2			\
+		PARAVIRT_REG_CLOBBERS2,		\
+		PARAVIRT_BR_CLOBBERS,		\
+		PARAVIRT_PR_CLOBBERS,		\
+		PARAVIRT_AR_CLOBBERS,		\
+		"memory"
+
+#define PARAVIRT_CLOBBERS5			\
+		PARAVIRT_REG_CLOBBERS5,		\
+		PARAVIRT_BR_CLOBBERS,		\
+		PARAVIRT_PR_CLOBBERS,		\
+		PARAVIRT_AR_CLOBBERS,		\
+		"memory"
+
+#define PARAVIRT_BR0(op, type)					\
+	register unsigned long ia64_clobber asm ("r8");		\
+	asm volatile (paravirt_alt_bundle(__PARAVIRT_BR,	\
+					  PARAVIRT_TYPE(type))	\
+		      :	"=r"(ia64_clobber)			\
+		      : PARAVIRT_OP(op)				\
+		      : PARAVIRT_CLOBBERS0)
+
+#define PARAVIRT_BR0_RET(op, type)				\
+	register unsigned long ia64_intri_res asm ("r8");	\
+	asm volatile (paravirt_alt_bundle(__PARAVIRT_BR,	\
+					  PARAVIRT_TYPE(type))	\
+		      : "=r"(ia64_intri_res)			\
+		      : PARAVIRT_OP(op)				\
+		      : PARAVIRT_CLOBBERS0)
+
+#define PARAVIRT_BR1(op, type, arg1)				\
+	register unsigned long __##arg1 asm ("r8") = arg1;	\
+	register unsigned long ia64_clobber asm ("r8");		\
+	asm volatile (paravirt_alt_bundle(__PARAVIRT_BR,	\
+					  PARAVIRT_TYPE(type))	\
+		      :	"=r"(ia64_clobber)			\
+		      : PARAVIRT_OP(op), "0"(__##arg1)		\
+		      : PARAVIRT_CLOBBERS1)
+
+#define PARAVIRT_BR1_RET(op, type, arg1)			\
+	register unsigned long ia64_intri_res asm ("r8");	\
+	register unsigned long __##arg1 asm ("r8") = arg1;	\
+	asm volatile (paravirt_alt_bundle(__PARAVIRT_BR,	\
+					  PARAVIRT_TYPE(type))	\
+		      : "=r"(ia64_intri_res)			\
+		      : PARAVIRT_OP(op), "0"(__##arg1)		\
+		      : PARAVIRT_CLOBBERS1)
+
+#define PARAVIRT_BR1_VOID(op, type, arg1)			\
+	register void *__##arg1 asm ("r8") = arg1;		\
+	register unsigned long ia64_clobber asm ("r8");		\
+	asm volatile (paravirt_alt_bundle(__PARAVIRT_BR,	\
+					  PARAVIRT_TYPE(type))	\
+		      :	"=r"(ia64_clobber)			\
+		      : PARAVIRT_OP(op), "0"(__##arg1)		\
+		      : PARAVIRT_CLOBBERS1)
+
+#define PARAVIRT_BR2(op, type, arg1, arg2)				\
+	register unsigned long __##arg1 asm ("r8") = arg1;		\
+	register unsigned long __##arg2 asm ("r9") = arg2;		\
+	register unsigned long ia64_clobber1 asm ("r8");		\
+	register unsigned long ia64_clobber2 asm ("r9");		\
+	asm volatile (paravirt_alt_bundle(__PARAVIRT_BR,		\
+					  PARAVIRT_TYPE(type))		\
+		      : "=r"(ia64_clobber1), "=r"(ia64_clobber2)	\
+		      : PARAVIRT_OP(op), "0"(__##arg1), "1"(__##arg2)	\
+		      : PARAVIRT_CLOBBERS2)
+
+
+#define PARAVIRT_DEFINE_CPU_OP0(op, type)		\
+	static inline void				\
+	paravirt_ ## op (void)				\
+	{						\
+		PARAVIRT_BR0(op, type);			\
+	}
+
+#define PARAVIRT_DEFINE_CPU_OP0_RET(op, type)		\
+	static inline unsigned long			\
+	paravirt_ ## op (void)				\
+	{						\
+		PARAVIRT_BR0_RET(op, type);		\
+		return ia64_intri_res;			\
+	}
+
+#define PARAVIRT_DEFINE_CPU_OP1_VOID(op, type)		\
+	static inline void				\
+	paravirt_ ## op (void *arg1)			\
+	{						\
+		PARAVIRT_BR1_VOID(op, type, arg1);	\
+	}
+
+#define PARAVIRT_DEFINE_CPU_OP1(op, type)		\
+	static inline void				\
+	paravirt_ ## op (unsigned long arg1)		\
+	{						\
+		PARAVIRT_BR1(op, type, arg1);		\
+	}
+
+#define PARAVIRT_DEFINE_CPU_OP1_RET(op, type)		\
+	static inline unsigned long			\
+	paravirt_ ## op (unsigned long arg1)		\
+	{						\
+		PARAVIRT_BR1_RET(op, type, arg1);	\
+		return ia64_intri_res;			\
+	}
+
+#define PARAVIRT_DEFINE_CPU_OP2(op, type)		\
+	static inline void				\
+	paravirt_ ## op (unsigned long arg1,		\
+			 unsigned long arg2)		\
+	{						\
+		PARAVIRT_BR2(op, type, arg1, arg2);	\
+	}
+
+
+PARAVIRT_DEFINE_CPU_OP1_VOID(fc, FC);
+PARAVIRT_DEFINE_CPU_OP1_RET(thash, THASH)
+PARAVIRT_DEFINE_CPU_OP1_RET(get_cpuid, GET_CPUID)
+PARAVIRT_DEFINE_CPU_OP1_RET(get_pmd, GET_PMD)
+PARAVIRT_DEFINE_CPU_OP2(ptcga, PTCGA)
+PARAVIRT_DEFINE_CPU_OP1_RET(get_rr, GET_RR)
+PARAVIRT_DEFINE_CPU_OP2(set_rr, SET_RR)
+PARAVIRT_DEFINE_CPU_OP0(ssm_i, SSM_I)
+PARAVIRT_DEFINE_CPU_OP0(rsm_i, RSM_I)
+PARAVIRT_DEFINE_CPU_OP0_RET(get_psr_i, GET_PSR_I)
+PARAVIRT_DEFINE_CPU_OP1(intrin_local_irq_restore, INTRIN_LOCAL_IRQ_RESTORE)
+
+static inline void
+paravirt_set_rr0_to_rr4(unsigned long val0, unsigned long val1,
+			unsigned long val2, unsigned long val3,
+			unsigned long val4)
+{
+	register unsigned long __val0 asm ("r8") = val0;
+	register unsigned long __val1 asm ("r9") = val1;
+	register unsigned long __val2 asm ("r10") = val2;
+	register unsigned long __val3 asm ("r11") = val3;
+	register unsigned long __val4 asm ("r14") = val4;
+
+	register unsigned long ia64_clobber0 asm ("r8");
+	register unsigned long ia64_clobber1 asm ("r9");
+	register unsigned long ia64_clobber2 asm ("r10");
+	register unsigned long ia64_clobber3 asm ("r11");
+	register unsigned long ia64_clobber4 asm ("r14");
+
+	asm volatile (paravirt_alt_bundle(__PARAVIRT_BR,
+					  PARAVIRT_TYPE(SET_RR0_TO_RR4))
+		      : "=r"(ia64_clobber0),
+			"=r"(ia64_clobber1),
+			"=r"(ia64_clobber2),
+			"=r"(ia64_clobber3),
+			"=r"(ia64_clobber4)
+		      : PARAVIRT_OP(set_rr0_to_rr4),
+			"0"(__val0), "1"(__val1), "2"(__val2),
+			"3"(__val3), "4"(__val4)
+		      : PARAVIRT_CLOBBERS5);
+}
+
+/* unsigned long paravirt_getreg(int reg) */
+#define __paravirt_getreg(reg)						\
+	({								\
+		register unsigned long ia64_intri_res asm ("r8");	\
+		register unsigned long __reg asm ("r8") = (reg);	\
+									\
+		asm volatile (paravirt_alt_bundle(__PARAVIRT_BR,	\
+						  PARAVIRT_TYPE(GETREG) \
+						  + (reg))		\
+			      : "=r"(ia64_intri_res)			\
+			      : PARAVIRT_OP(getreg), "0"(__reg)		\
+			      : PARAVIRT_CLOBBERS1);			\
+									\
+		ia64_intri_res;						\
+	})
+
+/* void paravirt_setreg(int reg, unsigned long val) */
+#define paravirt_setreg(reg, val)					\
+	do {								\
+		register unsigned long __val asm ("r8") = val;		\
+		register unsigned long __reg asm ("r9") = reg;		\
+		register unsigned long ia64_clobber1 asm ("r8");	\
+		register unsigned long ia64_clobber2 asm ("r9");	\
+									\
+		asm volatile (paravirt_alt_bundle(__PARAVIRT_BR,	\
+						  PARAVIRT_TYPE(SETREG) \
+						  + (reg))		\
+			      : "=r"(ia64_clobber1),			\
+				"=r"(ia64_clobber2)			\
+			      : PARAVIRT_OP(setreg),			\
+				"1"(__reg), "0"(__val)			\
+			      : PARAVIRT_CLOBBERS2);			\
+	} while (0)
+
+#endif /* ASM_SUPPORTED */
+#endif /* CONFIG_PARAVIRT && ASM_SUPPOTED */
+
+#endif /* _ASM_IA64_PARAVIRT_PRIVOP_H */
diff --git a/arch/ia64/include/asm/parport.h b/arch/ia64/include/asm/parport.h
new file mode 100644
index 00000000000..638b4d271b9
--- /dev/null
+++ b/arch/ia64/include/asm/parport.h
@@ -0,0 +1,19 @@
+/*
+ * parport.h: platform-specific PC-style parport initialisation
+ *
+ * Copyright (C) 1999, 2000  Tim Waugh <tim@cyberelk.demon.co.uk>
+ *
+ * This file should only be included by drivers/parport/parport_pc.c.
+ */
+
+#ifndef _ASM_IA64_PARPORT_H
+#define _ASM_IA64_PARPORT_H 1
+
+static int parport_pc_find_isa_ports(int autoirq, int autodma);
+
+static int parport_pc_find_nonpci_ports(int autoirq, int autodma)
+{
+	return parport_pc_find_isa_ports(autoirq, autodma);
+}
+
+#endif /* _ASM_IA64_PARPORT_H */
diff --git a/arch/ia64/include/asm/patch.h b/arch/ia64/include/asm/patch.h
new file mode 100644
index 00000000000..295fe6ab458
--- /dev/null
+++ b/arch/ia64/include/asm/patch.h
@@ -0,0 +1,27 @@
+#ifndef _ASM_IA64_PATCH_H
+#define _ASM_IA64_PATCH_H
+
+/*
+ * Copyright (C) 2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * There are a number of reasons for patching instructions.  Rather than duplicating code
+ * all over the place, we put the common stuff here.  Reasons for patching: in-kernel
+ * module-loader, virtual-to-physical patch-list, McKinley Errata 9 workaround, and gate
+ * shared library.  Undoubtedly, some of these reasons will disappear and others will
+ * be added over time.
+ */
+#include <linux/elf.h>
+#include <linux/types.h>
+
+extern void ia64_patch (u64 insn_addr, u64 mask, u64 val);	/* patch any insn slot */
+extern void ia64_patch_imm64 (u64 insn_addr, u64 val);		/* patch "movl" w/abs. value*/
+extern void ia64_patch_imm60 (u64 insn_addr, u64 val);		/* patch "brl" w/ip-rel value */
+
+extern void ia64_patch_mckinley_e9 (unsigned long start, unsigned long end);
+extern void ia64_patch_vtop (unsigned long start, unsigned long end);
+extern void ia64_patch_phys_stack_reg(unsigned long val);
+extern void ia64_patch_rse (unsigned long start, unsigned long end);
+extern void ia64_patch_gate (void);
+
+#endif /* _ASM_IA64_PATCH_H */
diff --git a/arch/ia64/include/asm/pci.h b/arch/ia64/include/asm/pci.h
new file mode 100644
index 00000000000..52af5ed9f60
--- /dev/null
+++ b/arch/ia64/include/asm/pci.h
@@ -0,0 +1,133 @@
+#ifndef _ASM_IA64_PCI_H
+#define _ASM_IA64_PCI_H
+
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+#include <asm/io.h>
+#include <asm/scatterlist.h>
+#include <asm/hw_irq.h>
+
+struct pci_vector_struct {
+	__u16 segment;	/* PCI Segment number */
+	__u16 bus;	/* PCI Bus number */
+	__u32 pci_id;	/* ACPI split 16 bits device, 16 bits function (see section 6.1.1) */
+	__u8 pin;	/* PCI PIN (0 = A, 1 = B, 2 = C, 3 = D) */
+	__u32 irq;	/* IRQ assigned */
+};
+
+/*
+ * Can be used to override the logic in pci_scan_bus for skipping already-configured bus
+ * numbers - to be used for buggy BIOSes or architectures with incomplete PCI setup by the
+ * loader.
+ */
+#define pcibios_assign_all_busses()     0
+
+#define PCIBIOS_MIN_IO		0x1000
+#define PCIBIOS_MIN_MEM		0x10000000
+
+void pcibios_config_init(void);
+
+struct pci_dev;
+
+/*
+ * PCI_DMA_BUS_IS_PHYS should be set to 1 if there is _necessarily_ a direct
+ * correspondence between device bus addresses and CPU physical addresses.
+ * Platforms with a hardware I/O MMU _must_ turn this off to suppress the
+ * bounce buffer handling code in the block and network device layers.
+ * Platforms with separate bus address spaces _must_ turn this off and provide
+ * a device DMA mapping implementation that takes care of the necessary
+ * address translation.
+ *
+ * For now, the ia64 platforms which may have separate/multiple bus address
+ * spaces all have I/O MMUs which support the merging of physically
+ * discontiguous buffers, so we can use that as the sole factor to determine
+ * the setting of PCI_DMA_BUS_IS_PHYS.
+ */
+extern unsigned long ia64_max_iommu_merge_mask;
+#define PCI_DMA_BUS_IS_PHYS	(ia64_max_iommu_merge_mask == ~0UL)
+
+#include <asm-generic/pci-dma-compat.h>
+
+#ifdef CONFIG_PCI
+static inline void pci_dma_burst_advice(struct pci_dev *pdev,
+					enum pci_dma_burst_strategy *strat,
+					unsigned long *strategy_parameter)
+{
+	unsigned long cacheline_size;
+	u8 byte;
+
+	pci_read_config_byte(pdev, PCI_CACHE_LINE_SIZE, &byte);
+	if (byte == 0)
+		cacheline_size = 1024;
+	else
+		cacheline_size = (int) byte * 4;
+
+	*strat = PCI_DMA_BURST_MULTIPLE;
+	*strategy_parameter = cacheline_size;
+}
+#endif
+
+#define HAVE_PCI_MMAP
+extern int pci_mmap_page_range (struct pci_dev *dev, struct vm_area_struct *vma,
+				enum pci_mmap_state mmap_state, int write_combine);
+#define HAVE_PCI_LEGACY
+extern int pci_mmap_legacy_page_range(struct pci_bus *bus,
+				      struct vm_area_struct *vma,
+				      enum pci_mmap_state mmap_state);
+
+#define pci_get_legacy_mem platform_pci_get_legacy_mem
+#define pci_legacy_read platform_pci_legacy_read
+#define pci_legacy_write platform_pci_legacy_write
+
+struct iospace_resource {
+	struct list_head list;
+	struct resource res;
+};
+
+struct pci_controller {
+	struct acpi_device *companion;
+	void *iommu;
+	int segment;
+	int node;		/* nearest node with memory or NUMA_NO_NODE for global allocation */
+
+	void *platform_data;
+};
+
+
+#define PCI_CONTROLLER(busdev) ((struct pci_controller *) busdev->sysdata)
+#define pci_domain_nr(busdev)    (PCI_CONTROLLER(busdev)->segment)
+
+extern struct pci_ops pci_root_ops;
+
+static inline int pci_proc_domain(struct pci_bus *bus)
+{
+	return (pci_domain_nr(bus) != 0);
+}
+
+static inline struct resource *
+pcibios_select_root(struct pci_dev *pdev, struct resource *res)
+{
+	struct resource *root = NULL;
+
+	if (res->flags & IORESOURCE_IO)
+		root = &ioport_resource;
+	if (res->flags & IORESOURCE_MEM)
+		root = &iomem_resource;
+
+	return root;
+}
+
+#define HAVE_ARCH_PCI_GET_LEGACY_IDE_IRQ
+static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
+{
+	return channel ? isa_irq_to_vector(15) : isa_irq_to_vector(14);
+}
+
+#ifdef CONFIG_INTEL_IOMMU
+extern void pci_iommu_alloc(void);
+#endif
+#endif /* _ASM_IA64_PCI_H */
diff --git a/arch/ia64/include/asm/percpu.h b/arch/ia64/include/asm/percpu.h
new file mode 100644
index 00000000000..14aa1c58912
--- /dev/null
+++ b/arch/ia64/include/asm/percpu.h
@@ -0,0 +1,54 @@
+#ifndef _ASM_IA64_PERCPU_H
+#define _ASM_IA64_PERCPU_H
+
+/*
+ * Copyright (C) 2002-2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+#define PERCPU_ENOUGH_ROOM PERCPU_PAGE_SIZE
+
+#ifdef __ASSEMBLY__
+# define THIS_CPU(var)	(var)  /* use this to mark accesses to per-CPU variables... */
+#else /* !__ASSEMBLY__ */
+
+
+#include <linux/threads.h>
+
+#ifdef CONFIG_SMP
+
+#ifdef HAVE_MODEL_SMALL_ATTRIBUTE
+# define PER_CPU_ATTRIBUTES	__attribute__((__model__ (__small__)))
+#endif
+
+#define __my_cpu_offset	__ia64_per_cpu_var(local_per_cpu_offset)
+
+extern void *per_cpu_init(void);
+
+#else /* ! SMP */
+
+#define per_cpu_init()				(__phys_per_cpu_start)
+
+#endif	/* SMP */
+
+#define PER_CPU_BASE_SECTION ".data..percpu"
+
+/*
+ * Be extremely careful when taking the address of this variable!  Due to virtual
+ * remapping, it is different from the canonical address returned by __get_cpu_var(var)!
+ * On the positive side, using __ia64_per_cpu_var() instead of __get_cpu_var() is slightly
+ * more efficient.
+ */
+#define __ia64_per_cpu_var(var) (*({					\
+	__verify_pcpu_ptr(&(var));					\
+	((typeof(var) __kernel __force *)&(var));			\
+}))
+
+#include <asm-generic/percpu.h>
+
+/* Equal to __per_cpu_offset[smp_processor_id()], but faster to access: */
+DECLARE_PER_CPU(unsigned long, local_per_cpu_offset);
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _ASM_IA64_PERCPU_H */
diff --git a/arch/ia64/include/asm/perfmon.h b/arch/ia64/include/asm/perfmon.h
new file mode 100644
index 00000000000..15476dd3a8b
--- /dev/null
+++ b/arch/ia64/include/asm/perfmon.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright (C) 2001-2003 Hewlett-Packard Co
+ *               Stephane Eranian <eranian@hpl.hp.com>
+ */
+#ifndef _ASM_IA64_PERFMON_H
+#define _ASM_IA64_PERFMON_H
+
+#include <uapi/asm/perfmon.h>
+
+
+extern long perfmonctl(int fd, int cmd, void *arg, int narg);
+
+typedef struct {
+	void (*handler)(int irq, void *arg, struct pt_regs *regs);
+} pfm_intr_handler_desc_t;
+
+extern void pfm_save_regs (struct task_struct *);
+extern void pfm_load_regs (struct task_struct *);
+
+extern void pfm_exit_thread(struct task_struct *);
+extern int  pfm_use_debug_registers(struct task_struct *);
+extern int  pfm_release_debug_registers(struct task_struct *);
+extern void pfm_syst_wide_update_task(struct task_struct *, unsigned long info, int is_ctxswin);
+extern void pfm_inherit(struct task_struct *task, struct pt_regs *regs);
+extern void pfm_init_percpu(void);
+extern void pfm_handle_work(void);
+extern int  pfm_install_alt_pmu_interrupt(pfm_intr_handler_desc_t *h);
+extern int  pfm_remove_alt_pmu_interrupt(pfm_intr_handler_desc_t *h);
+
+
+
+/*
+ * Reset PMD register flags
+ */
+#define PFM_PMD_SHORT_RESET	0
+#define PFM_PMD_LONG_RESET	1
+
+typedef union {
+	unsigned int val;
+	struct {
+		unsigned int notify_user:1;	/* notify user program of overflow */
+		unsigned int reset_ovfl_pmds:1;	/* reset overflowed PMDs */
+		unsigned int block_task:1;	/* block monitored task on kernel exit */
+		unsigned int mask_monitoring:1; /* mask monitors via PMCx.plm */
+		unsigned int reserved:28;	/* for future use */
+	} bits;
+} pfm_ovfl_ctrl_t;
+
+typedef struct {
+	unsigned char	ovfl_pmd;			/* index of overflowed PMD  */
+	unsigned char   ovfl_notify;			/* =1 if monitor requested overflow notification */
+	unsigned short  active_set;			/* event set active at the time of the overflow */
+	pfm_ovfl_ctrl_t ovfl_ctrl;			/* return: perfmon controls to set by handler */
+
+	unsigned long   pmd_last_reset;			/* last reset value of of the PMD */
+	unsigned long	smpl_pmds[4];			/* bitmask of other PMD of interest on overflow */
+	unsigned long   smpl_pmds_values[PMU_MAX_PMDS]; /* values for the other PMDs of interest */
+	unsigned long   pmd_value;			/* current 64-bit value of the PMD */
+	unsigned long	pmd_eventid;			/* eventid associated with PMD */
+} pfm_ovfl_arg_t;
+
+
+typedef struct {
+	char		*fmt_name;
+	pfm_uuid_t	fmt_uuid;
+	size_t		fmt_arg_size;
+	unsigned long	fmt_flags;
+
+	int		(*fmt_validate)(struct task_struct *task, unsigned int flags, int cpu, void *arg);
+	int		(*fmt_getsize)(struct task_struct *task, unsigned int flags, int cpu, void *arg, unsigned long *size);
+	int 		(*fmt_init)(struct task_struct *task, void *buf, unsigned int flags, int cpu, void *arg);
+	int		(*fmt_handler)(struct task_struct *task, void *buf, pfm_ovfl_arg_t *arg, struct pt_regs *regs, unsigned long stamp);
+	int		(*fmt_restart)(struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs);
+	int		(*fmt_restart_active)(struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs);
+	int		(*fmt_exit)(struct task_struct *task, void *buf, struct pt_regs *regs);
+
+	struct list_head fmt_list;
+} pfm_buffer_fmt_t;
+
+extern int pfm_register_buffer_fmt(pfm_buffer_fmt_t *fmt);
+extern int pfm_unregister_buffer_fmt(pfm_uuid_t uuid);
+
+/*
+ * perfmon interface exported to modules
+ */
+extern int pfm_mod_read_pmds(struct task_struct *, void *req, unsigned int nreq, struct pt_regs *regs);
+extern int pfm_mod_write_pmcs(struct task_struct *, void *req, unsigned int nreq, struct pt_regs *regs);
+extern int pfm_mod_write_ibrs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs);
+extern int pfm_mod_write_dbrs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs);
+
+/*
+ * describe the content of the local_cpu_date->pfm_syst_info field
+ */
+#define PFM_CPUINFO_SYST_WIDE	0x1	/* if set a system wide session exists */
+#define PFM_CPUINFO_DCR_PP	0x2	/* if set the system wide session has started */
+#define PFM_CPUINFO_EXCL_IDLE	0x4	/* the system wide session excludes the idle task */
+
+/*
+ * sysctl control structure. visible to sampling formats
+ */
+typedef struct {
+	int	debug;		/* turn on/off debugging via syslog */
+	int	debug_ovfl;	/* turn on/off debug printk in overflow handler */
+	int	fastctxsw;	/* turn on/off fast (unsecure) ctxsw */
+	int	expert_mode;	/* turn on/off value checking */
+} pfm_sysctl_t;
+extern pfm_sysctl_t pfm_sysctl;
+
+
+#endif /* _ASM_IA64_PERFMON_H */
diff --git a/arch/ia64/include/asm/pgalloc.h b/arch/ia64/include/asm/pgalloc.h
new file mode 100644
index 00000000000..5767cdfc08d
--- /dev/null
+++ b/arch/ia64/include/asm/pgalloc.h
@@ -0,0 +1,125 @@
+#ifndef _ASM_IA64_PGALLOC_H
+#define _ASM_IA64_PGALLOC_H
+
+/*
+ * This file contains the functions and defines necessary to allocate
+ * page tables.
+ *
+ * This hopefully works with any (fixed) ia-64 page-size, as defined
+ * in <asm/page.h> (currently 8192).
+ *
+ * Copyright (C) 1998-2001 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 2000, Goutham Rao <goutham.rao@intel.com>
+ */
+
+
+#include <linux/compiler.h>
+#include <linux/mm.h>
+#include <linux/page-flags.h>
+#include <linux/threads.h>
+#include <linux/quicklist.h>
+
+#include <asm/mmu_context.h>
+
+static inline pgd_t *pgd_alloc(struct mm_struct *mm)
+{
+	return quicklist_alloc(0, GFP_KERNEL, NULL);
+}
+
+static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+	quicklist_free(0, NULL, pgd);
+}
+
+#ifdef CONFIG_PGTABLE_4
+static inline void
+pgd_populate(struct mm_struct *mm, pgd_t * pgd_entry, pud_t * pud)
+{
+	pgd_val(*pgd_entry) = __pa(pud);
+}
+
+static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+	return quicklist_alloc(0, GFP_KERNEL, NULL);
+}
+
+static inline void pud_free(struct mm_struct *mm, pud_t *pud)
+{
+	quicklist_free(0, NULL, pud);
+}
+#define __pud_free_tlb(tlb, pud, address)	pud_free((tlb)->mm, pud)
+#endif /* CONFIG_PGTABLE_4 */
+
+static inline void
+pud_populate(struct mm_struct *mm, pud_t * pud_entry, pmd_t * pmd)
+{
+	pud_val(*pud_entry) = __pa(pmd);
+}
+
+static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+	return quicklist_alloc(0, GFP_KERNEL, NULL);
+}
+
+static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
+{
+	quicklist_free(0, NULL, pmd);
+}
+
+#define __pmd_free_tlb(tlb, pmd, address)	pmd_free((tlb)->mm, pmd)
+
+static inline void
+pmd_populate(struct mm_struct *mm, pmd_t * pmd_entry, pgtable_t pte)
+{
+	pmd_val(*pmd_entry) = page_to_phys(pte);
+}
+#define pmd_pgtable(pmd) pmd_page(pmd)
+
+static inline void
+pmd_populate_kernel(struct mm_struct *mm, pmd_t * pmd_entry, pte_t * pte)
+{
+	pmd_val(*pmd_entry) = __pa(pte);
+}
+
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+	struct page *page;
+	void *pg;
+
+	pg = quicklist_alloc(0, GFP_KERNEL, NULL);
+	if (!pg)
+		return NULL;
+	page = virt_to_page(pg);
+	if (!pgtable_page_ctor(page)) {
+		quicklist_free(0, NULL, pg);
+		return NULL;
+	}
+	return page;
+}
+
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
+					  unsigned long addr)
+{
+	return quicklist_alloc(0, GFP_KERNEL, NULL);
+}
+
+static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
+{
+	pgtable_page_dtor(pte);
+	quicklist_free_page(0, NULL, pte);
+}
+
+static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
+{
+	quicklist_free(0, NULL, pte);
+}
+
+static inline void check_pgt_cache(void)
+{
+	quicklist_trim(0, NULL, 25, 16);
+}
+
+#define __pte_free_tlb(tlb, pte, address)	pte_free((tlb)->mm, pte)
+
+#endif				/* _ASM_IA64_PGALLOC_H */
diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h
new file mode 100644
index 00000000000..7935115398a
--- /dev/null
+++ b/arch/ia64/include/asm/pgtable.h
@@ -0,0 +1,609 @@
+#ifndef _ASM_IA64_PGTABLE_H
+#define _ASM_IA64_PGTABLE_H
+
+/*
+ * This file contains the functions and defines necessary to modify and use
+ * the IA-64 page table tree.
+ *
+ * This hopefully works with any (fixed) IA-64 page-size, as defined
+ * in <asm/page.h>.
+ *
+ * Copyright (C) 1998-2005 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+
+#include <asm/mman.h>
+#include <asm/page.h>
+#include <asm/processor.h>
+#include <asm/types.h>
+
+#define IA64_MAX_PHYS_BITS	50	/* max. number of physical address bits (architected) */
+
+/*
+ * First, define the various bits in a PTE.  Note that the PTE format
+ * matches the VHPT short format, the firt doubleword of the VHPD long
+ * format, and the first doubleword of the TLB insertion format.
+ */
+#define _PAGE_P_BIT		0
+#define _PAGE_A_BIT		5
+#define _PAGE_D_BIT		6
+
+#define _PAGE_P			(1 << _PAGE_P_BIT)	/* page present bit */
+#define _PAGE_MA_WB		(0x0 <<  2)	/* write back memory attribute */
+#define _PAGE_MA_UC		(0x4 <<  2)	/* uncacheable memory attribute */
+#define _PAGE_MA_UCE		(0x5 <<  2)	/* UC exported attribute */
+#define _PAGE_MA_WC		(0x6 <<  2)	/* write coalescing memory attribute */
+#define _PAGE_MA_NAT		(0x7 <<  2)	/* not-a-thing attribute */
+#define _PAGE_MA_MASK		(0x7 <<  2)
+#define _PAGE_PL_0		(0 <<  7)	/* privilege level 0 (kernel) */
+#define _PAGE_PL_1		(1 <<  7)	/* privilege level 1 (unused) */
+#define _PAGE_PL_2		(2 <<  7)	/* privilege level 2 (unused) */
+#define _PAGE_PL_3		(3 <<  7)	/* privilege level 3 (user) */
+#define _PAGE_PL_MASK		(3 <<  7)
+#define _PAGE_AR_R		(0 <<  9)	/* read only */
+#define _PAGE_AR_RX		(1 <<  9)	/* read & execute */
+#define _PAGE_AR_RW		(2 <<  9)	/* read & write */
+#define _PAGE_AR_RWX		(3 <<  9)	/* read, write & execute */
+#define _PAGE_AR_R_RW		(4 <<  9)	/* read / read & write */
+#define _PAGE_AR_RX_RWX		(5 <<  9)	/* read & exec / read, write & exec */
+#define _PAGE_AR_RWX_RW		(6 <<  9)	/* read, write & exec / read & write */
+#define _PAGE_AR_X_RX		(7 <<  9)	/* exec & promote / read & exec */
+#define _PAGE_AR_MASK		(7 <<  9)
+#define _PAGE_AR_SHIFT		9
+#define _PAGE_A			(1 << _PAGE_A_BIT)	/* page accessed bit */
+#define _PAGE_D			(1 << _PAGE_D_BIT)	/* page dirty bit */
+#define _PAGE_PPN_MASK		(((__IA64_UL(1) << IA64_MAX_PHYS_BITS) - 1) & ~0xfffUL)
+#define _PAGE_ED		(__IA64_UL(1) << 52)	/* exception deferral */
+#define _PAGE_PROTNONE		(__IA64_UL(1) << 63)
+
+/* Valid only for a PTE with the present bit cleared: */
+#define _PAGE_FILE		(1 << 1)		/* see swap & file pte remarks below */
+
+#define _PFN_MASK		_PAGE_PPN_MASK
+/* Mask of bits which may be changed by pte_modify(); the odd bits are there for _PAGE_PROTNONE */
+#define _PAGE_CHG_MASK	(_PAGE_P | _PAGE_PROTNONE | _PAGE_PL_MASK | _PAGE_AR_MASK | _PAGE_ED)
+
+#define _PAGE_SIZE_4K	12
+#define _PAGE_SIZE_8K	13
+#define _PAGE_SIZE_16K	14
+#define _PAGE_SIZE_64K	16
+#define _PAGE_SIZE_256K	18
+#define _PAGE_SIZE_1M	20
+#define _PAGE_SIZE_4M	22
+#define _PAGE_SIZE_16M	24
+#define _PAGE_SIZE_64M	26
+#define _PAGE_SIZE_256M	28
+#define _PAGE_SIZE_1G	30
+#define _PAGE_SIZE_4G	32
+
+#define __ACCESS_BITS		_PAGE_ED | _PAGE_A | _PAGE_P | _PAGE_MA_WB
+#define __DIRTY_BITS_NO_ED	_PAGE_A | _PAGE_P | _PAGE_D | _PAGE_MA_WB
+#define __DIRTY_BITS		_PAGE_ED | __DIRTY_BITS_NO_ED
+
+/*
+ * How many pointers will a page table level hold expressed in shift
+ */
+#define PTRS_PER_PTD_SHIFT	(PAGE_SHIFT-3)
+
+/*
+ * Definitions for fourth level:
+ */
+#define PTRS_PER_PTE	(__IA64_UL(1) << (PTRS_PER_PTD_SHIFT))
+
+/*
+ * Definitions for third level:
+ *
+ * PMD_SHIFT determines the size of the area a third-level page table
+ * can map.
+ */
+#define PMD_SHIFT	(PAGE_SHIFT + (PTRS_PER_PTD_SHIFT))
+#define PMD_SIZE	(1UL << PMD_SHIFT)
+#define PMD_MASK	(~(PMD_SIZE-1))
+#define PTRS_PER_PMD	(1UL << (PTRS_PER_PTD_SHIFT))
+
+#ifdef CONFIG_PGTABLE_4
+/*
+ * Definitions for second level:
+ *
+ * PUD_SHIFT determines the size of the area a second-level page table
+ * can map.
+ */
+#define PUD_SHIFT	(PMD_SHIFT + (PTRS_PER_PTD_SHIFT))
+#define PUD_SIZE	(1UL << PUD_SHIFT)
+#define PUD_MASK	(~(PUD_SIZE-1))
+#define PTRS_PER_PUD	(1UL << (PTRS_PER_PTD_SHIFT))
+#endif
+
+/*
+ * Definitions for first level:
+ *
+ * PGDIR_SHIFT determines what a first-level page table entry can map.
+ */
+#ifdef CONFIG_PGTABLE_4
+#define PGDIR_SHIFT		(PUD_SHIFT + (PTRS_PER_PTD_SHIFT))
+#else
+#define PGDIR_SHIFT		(PMD_SHIFT + (PTRS_PER_PTD_SHIFT))
+#endif
+#define PGDIR_SIZE		(__IA64_UL(1) << PGDIR_SHIFT)
+#define PGDIR_MASK		(~(PGDIR_SIZE-1))
+#define PTRS_PER_PGD_SHIFT	PTRS_PER_PTD_SHIFT
+#define PTRS_PER_PGD		(1UL << PTRS_PER_PGD_SHIFT)
+#define USER_PTRS_PER_PGD	(5*PTRS_PER_PGD/8)	/* regions 0-4 are user regions */
+#define FIRST_USER_ADDRESS	0
+
+/*
+ * All the normal masks have the "page accessed" bits on, as any time
+ * they are used, the page is accessed. They are cleared only by the
+ * page-out routines.
+ */
+#define PAGE_NONE	__pgprot(_PAGE_PROTNONE | _PAGE_A)
+#define PAGE_SHARED	__pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RW)
+#define PAGE_READONLY	__pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_R)
+#define PAGE_COPY	__pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_R)
+#define PAGE_COPY_EXEC	__pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RX)
+#define PAGE_GATE	__pgprot(__ACCESS_BITS | _PAGE_PL_0 | _PAGE_AR_X_RX)
+#define PAGE_KERNEL	__pgprot(__DIRTY_BITS  | _PAGE_PL_0 | _PAGE_AR_RWX)
+#define PAGE_KERNELRX	__pgprot(__ACCESS_BITS | _PAGE_PL_0 | _PAGE_AR_RX)
+#define PAGE_KERNEL_UC	__pgprot(__DIRTY_BITS  | _PAGE_PL_0 | _PAGE_AR_RWX | \
+				 _PAGE_MA_UC)
+
+# ifndef __ASSEMBLY__
+
+#include <linux/sched.h>	/* for mm_struct */
+#include <linux/bitops.h>
+#include <asm/cacheflush.h>
+#include <asm/mmu_context.h>
+
+/*
+ * Next come the mappings that determine how mmap() protection bits
+ * (PROT_EXEC, PROT_READ, PROT_WRITE, PROT_NONE) get implemented.  The
+ * _P version gets used for a private shared memory segment, the _S
+ * version gets used for a shared memory segment with MAP_SHARED on.
+ * In a private shared memory segment, we do a copy-on-write if a task
+ * attempts to write to the page.
+ */
+	/* xwr */
+#define __P000	PAGE_NONE
+#define __P001	PAGE_READONLY
+#define __P010	PAGE_READONLY	/* write to priv pg -> copy & make writable */
+#define __P011	PAGE_READONLY	/* ditto */
+#define __P100	__pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_X_RX)
+#define __P101	__pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RX)
+#define __P110	PAGE_COPY_EXEC
+#define __P111	PAGE_COPY_EXEC
+
+#define __S000	PAGE_NONE
+#define __S001	PAGE_READONLY
+#define __S010	PAGE_SHARED	/* we don't have (and don't need) write-only */
+#define __S011	PAGE_SHARED
+#define __S100	__pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_X_RX)
+#define __S101	__pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RX)
+#define __S110	__pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RWX)
+#define __S111	__pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RWX)
+
+#define pgd_ERROR(e)	printk("%s:%d: bad pgd %016lx.\n", __FILE__, __LINE__, pgd_val(e))
+#ifdef CONFIG_PGTABLE_4
+#define pud_ERROR(e)	printk("%s:%d: bad pud %016lx.\n", __FILE__, __LINE__, pud_val(e))
+#endif
+#define pmd_ERROR(e)	printk("%s:%d: bad pmd %016lx.\n", __FILE__, __LINE__, pmd_val(e))
+#define pte_ERROR(e)	printk("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e))
+
+
+/*
+ * Some definitions to translate between mem_map, PTEs, and page addresses:
+ */
+
+
+/* Quick test to see if ADDR is a (potentially) valid physical address. */
+static inline long
+ia64_phys_addr_valid (unsigned long addr)
+{
+	return (addr & (local_cpu_data->unimpl_pa_mask)) == 0;
+}
+
+/*
+ * kern_addr_valid(ADDR) tests if ADDR is pointing to valid kernel
+ * memory.  For the return value to be meaningful, ADDR must be >=
+ * PAGE_OFFSET.  This operation can be relatively expensive (e.g.,
+ * require a hash-, or multi-level tree-lookup or something of that
+ * sort) but it guarantees to return TRUE only if accessing the page
+ * at that address does not cause an error.  Note that there may be
+ * addresses for which kern_addr_valid() returns FALSE even though an
+ * access would not cause an error (e.g., this is typically true for
+ * memory mapped I/O regions.
+ *
+ * XXX Need to implement this for IA-64.
+ */
+#define kern_addr_valid(addr)	(1)
+
+
+/*
+ * Now come the defines and routines to manage and access the three-level
+ * page table.
+ */
+
+
+#define VMALLOC_START		(RGN_BASE(RGN_GATE) + 0x200000000UL)
+#ifdef CONFIG_VIRTUAL_MEM_MAP
+# define VMALLOC_END_INIT	(RGN_BASE(RGN_GATE) + (1UL << (4*PAGE_SHIFT - 9)))
+extern unsigned long VMALLOC_END;
+#else
+#if defined(CONFIG_SPARSEMEM) && defined(CONFIG_SPARSEMEM_VMEMMAP)
+/* SPARSEMEM_VMEMMAP uses half of vmalloc... */
+# define VMALLOC_END		(RGN_BASE(RGN_GATE) + (1UL << (4*PAGE_SHIFT - 10)))
+# define vmemmap		((struct page *)VMALLOC_END)
+#else
+# define VMALLOC_END		(RGN_BASE(RGN_GATE) + (1UL << (4*PAGE_SHIFT - 9)))
+#endif
+#endif
+
+/* fs/proc/kcore.c */
+#define	kc_vaddr_to_offset(v) ((v) - RGN_BASE(RGN_GATE))
+#define	kc_offset_to_vaddr(o) ((o) + RGN_BASE(RGN_GATE))
+
+#define RGN_MAP_SHIFT (PGDIR_SHIFT + PTRS_PER_PGD_SHIFT - 3)
+#define RGN_MAP_LIMIT	((1UL << RGN_MAP_SHIFT) - PAGE_SIZE)	/* per region addr limit */
+
+/*
+ * Conversion functions: convert page frame number (pfn) and a protection value to a page
+ * table entry (pte).
+ */
+#define pfn_pte(pfn, pgprot) \
+({ pte_t __pte; pte_val(__pte) = ((pfn) << PAGE_SHIFT) | pgprot_val(pgprot); __pte; })
+
+/* Extract pfn from pte.  */
+#define pte_pfn(_pte)		((pte_val(_pte) & _PFN_MASK) >> PAGE_SHIFT)
+
+#define mk_pte(page, pgprot)	pfn_pte(page_to_pfn(page), (pgprot))
+
+/* This takes a physical page address that is used by the remapping functions */
+#define mk_pte_phys(physpage, pgprot) \
+({ pte_t __pte; pte_val(__pte) = physpage + pgprot_val(pgprot); __pte; })
+
+#define pte_modify(_pte, newprot) \
+	(__pte((pte_val(_pte) & ~_PAGE_CHG_MASK) | (pgprot_val(newprot) & _PAGE_CHG_MASK)))
+
+#define pte_none(pte) 			(!pte_val(pte))
+#define pte_present(pte)		(pte_val(pte) & (_PAGE_P | _PAGE_PROTNONE))
+#define pte_clear(mm,addr,pte)		(pte_val(*(pte)) = 0UL)
+/* pte_page() returns the "struct page *" corresponding to the PTE: */
+#define pte_page(pte)			virt_to_page(((pte_val(pte) & _PFN_MASK) + PAGE_OFFSET))
+
+#define pmd_none(pmd)			(!pmd_val(pmd))
+#define pmd_bad(pmd)			(!ia64_phys_addr_valid(pmd_val(pmd)))
+#define pmd_present(pmd)		(pmd_val(pmd) != 0UL)
+#define pmd_clear(pmdp)			(pmd_val(*(pmdp)) = 0UL)
+#define pmd_page_vaddr(pmd)		((unsigned long) __va(pmd_val(pmd) & _PFN_MASK))
+#define pmd_page(pmd)			virt_to_page((pmd_val(pmd) + PAGE_OFFSET))
+
+#define pud_none(pud)			(!pud_val(pud))
+#define pud_bad(pud)			(!ia64_phys_addr_valid(pud_val(pud)))
+#define pud_present(pud)		(pud_val(pud) != 0UL)
+#define pud_clear(pudp)			(pud_val(*(pudp)) = 0UL)
+#define pud_page_vaddr(pud)		((unsigned long) __va(pud_val(pud) & _PFN_MASK))
+#define pud_page(pud)			virt_to_page((pud_val(pud) + PAGE_OFFSET))
+
+#ifdef CONFIG_PGTABLE_4
+#define pgd_none(pgd)			(!pgd_val(pgd))
+#define pgd_bad(pgd)			(!ia64_phys_addr_valid(pgd_val(pgd)))
+#define pgd_present(pgd)		(pgd_val(pgd) != 0UL)
+#define pgd_clear(pgdp)			(pgd_val(*(pgdp)) = 0UL)
+#define pgd_page_vaddr(pgd)		((unsigned long) __va(pgd_val(pgd) & _PFN_MASK))
+#define pgd_page(pgd)			virt_to_page((pgd_val(pgd) + PAGE_OFFSET))
+#endif
+
+/*
+ * The following have defined behavior only work if pte_present() is true.
+ */
+#define pte_write(pte)	((unsigned) (((pte_val(pte) & _PAGE_AR_MASK) >> _PAGE_AR_SHIFT) - 2) <= 4)
+#define pte_exec(pte)		((pte_val(pte) & _PAGE_AR_RX) != 0)
+#define pte_dirty(pte)		((pte_val(pte) & _PAGE_D) != 0)
+#define pte_young(pte)		((pte_val(pte) & _PAGE_A) != 0)
+#define pte_file(pte)		((pte_val(pte) & _PAGE_FILE) != 0)
+#define pte_special(pte)	0
+
+/*
+ * Note: we convert AR_RWX to AR_RX and AR_RW to AR_R by clearing the 2nd bit in the
+ * access rights:
+ */
+#define pte_wrprotect(pte)	(__pte(pte_val(pte) & ~_PAGE_AR_RW))
+#define pte_mkwrite(pte)	(__pte(pte_val(pte) | _PAGE_AR_RW))
+#define pte_mkold(pte)		(__pte(pte_val(pte) & ~_PAGE_A))
+#define pte_mkyoung(pte)	(__pte(pte_val(pte) | _PAGE_A))
+#define pte_mkclean(pte)	(__pte(pte_val(pte) & ~_PAGE_D))
+#define pte_mkdirty(pte)	(__pte(pte_val(pte) | _PAGE_D))
+#define pte_mkhuge(pte)		(__pte(pte_val(pte)))
+#define pte_mkspecial(pte)	(pte)
+
+/*
+ * Because ia64's Icache and Dcache is not coherent (on a cpu), we need to
+ * sync icache and dcache when we insert *new* executable page.
+ *  __ia64_sync_icache_dcache() check Pg_arch_1 bit and flush icache
+ * if necessary.
+ *
+ *  set_pte() is also called by the kernel, but we can expect that the kernel
+ *  flushes icache explicitly if necessary.
+ */
+#define pte_present_exec_user(pte)\
+	((pte_val(pte) & (_PAGE_P | _PAGE_PL_MASK | _PAGE_AR_RX)) == \
+		(_PAGE_P | _PAGE_PL_3 | _PAGE_AR_RX))
+
+extern void __ia64_sync_icache_dcache(pte_t pteval);
+static inline void set_pte(pte_t *ptep, pte_t pteval)
+{
+	/* page is present && page is user  && page is executable
+	 * && (page swapin or new page or page migraton
+	 *	|| copy_on_write with page copying.)
+	 */
+	if (pte_present_exec_user(pteval) &&
+	    (!pte_present(*ptep) ||
+		pte_pfn(*ptep) != pte_pfn(pteval)))
+		/* load_module() calles flush_icache_range() explicitly*/
+		__ia64_sync_icache_dcache(pteval);
+	*ptep = pteval;
+}
+
+#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval)
+
+/*
+ * Make page protection values cacheable, uncacheable, or write-
+ * combining.  Note that "protection" is really a misnomer here as the
+ * protection value contains the memory attribute bits, dirty bits, and
+ * various other bits as well.
+ */
+#define pgprot_cacheable(prot)		__pgprot((pgprot_val(prot) & ~_PAGE_MA_MASK) | _PAGE_MA_WB)
+#define pgprot_noncached(prot)		__pgprot((pgprot_val(prot) & ~_PAGE_MA_MASK) | _PAGE_MA_UC)
+#define pgprot_writecombine(prot)	__pgprot((pgprot_val(prot) & ~_PAGE_MA_MASK) | _PAGE_MA_WC)
+
+struct file;
+extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
+				     unsigned long size, pgprot_t vma_prot);
+#define __HAVE_PHYS_MEM_ACCESS_PROT
+
+static inline unsigned long
+pgd_index (unsigned long address)
+{
+	unsigned long region = address >> 61;
+	unsigned long l1index = (address >> PGDIR_SHIFT) & ((PTRS_PER_PGD >> 3) - 1);
+
+	return (region << (PAGE_SHIFT - 6)) | l1index;
+}
+
+/* The offset in the 1-level directory is given by the 3 region bits
+   (61..63) and the level-1 bits.  */
+static inline pgd_t*
+pgd_offset (const struct mm_struct *mm, unsigned long address)
+{
+	return mm->pgd + pgd_index(address);
+}
+
+/* In the kernel's mapped region we completely ignore the region number
+   (since we know it's in region number 5). */
+#define pgd_offset_k(addr) \
+	(init_mm.pgd + (((addr) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)))
+
+/* Look up a pgd entry in the gate area.  On IA-64, the gate-area
+   resides in the kernel-mapped segment, hence we use pgd_offset_k()
+   here.  */
+#define pgd_offset_gate(mm, addr)	pgd_offset_k(addr)
+
+#ifdef CONFIG_PGTABLE_4
+/* Find an entry in the second-level page table.. */
+#define pud_offset(dir,addr) \
+	((pud_t *) pgd_page_vaddr(*(dir)) + (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)))
+#endif
+
+/* Find an entry in the third-level page table.. */
+#define pmd_offset(dir,addr) \
+	((pmd_t *) pud_page_vaddr(*(dir)) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)))
+
+/*
+ * Find an entry in the third-level page table.  This looks more complicated than it
+ * should be because some platforms place page tables in high memory.
+ */
+#define pte_index(addr)	 	(((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
+#define pte_offset_kernel(dir,addr)	((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(addr))
+#define pte_offset_map(dir,addr)	pte_offset_kernel(dir, addr)
+#define pte_unmap(pte)			do { } while (0)
+
+/* atomic versions of the some PTE manipulations: */
+
+static inline int
+ptep_test_and_clear_young (struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
+{
+#ifdef CONFIG_SMP
+	if (!pte_young(*ptep))
+		return 0;
+	return test_and_clear_bit(_PAGE_A_BIT, ptep);
+#else
+	pte_t pte = *ptep;
+	if (!pte_young(pte))
+		return 0;
+	set_pte_at(vma->vm_mm, addr, ptep, pte_mkold(pte));
+	return 1;
+#endif
+}
+
+static inline pte_t
+ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+{
+#ifdef CONFIG_SMP
+	return __pte(xchg((long *) ptep, 0));
+#else
+	pte_t pte = *ptep;
+	pte_clear(mm, addr, ptep);
+	return pte;
+#endif
+}
+
+static inline void
+ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+{
+#ifdef CONFIG_SMP
+	unsigned long new, old;
+
+	do {
+		old = pte_val(*ptep);
+		new = pte_val(pte_wrprotect(__pte (old)));
+	} while (cmpxchg((unsigned long *) ptep, old, new) != old);
+#else
+	pte_t old_pte = *ptep;
+	set_pte_at(mm, addr, ptep, pte_wrprotect(old_pte));
+#endif
+}
+
+static inline int
+pte_same (pte_t a, pte_t b)
+{
+	return pte_val(a) == pte_val(b);
+}
+
+#define update_mmu_cache(vma, address, ptep) do { } while (0)
+
+extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
+extern void paging_init (void);
+
+/*
+ * Note: The macros below rely on the fact that MAX_SWAPFILES_SHIFT <= number of
+ *	 bits in the swap-type field of the swap pte.  It would be nice to
+ *	 enforce that, but we can't easily include <linux/swap.h> here.
+ *	 (Of course, better still would be to define MAX_SWAPFILES_SHIFT here...).
+ *
+ * Format of swap pte:
+ *	bit   0   : present bit (must be zero)
+ *	bit   1   : _PAGE_FILE (must be zero)
+ *	bits  2- 8: swap-type
+ *	bits  9-62: swap offset
+ *	bit  63   : _PAGE_PROTNONE bit
+ *
+ * Format of file pte:
+ *	bit   0   : present bit (must be zero)
+ *	bit   1   : _PAGE_FILE (must be one)
+ *	bits  2-62: file_offset/PAGE_SIZE
+ *	bit  63   : _PAGE_PROTNONE bit
+ */
+#define __swp_type(entry)		(((entry).val >> 2) & 0x7f)
+#define __swp_offset(entry)		(((entry).val << 1) >> 10)
+#define __swp_entry(type,offset)	((swp_entry_t) { ((type) << 2) | ((long) (offset) << 9) })
+#define __pte_to_swp_entry(pte)		((swp_entry_t) { pte_val(pte) })
+#define __swp_entry_to_pte(x)		((pte_t) { (x).val })
+
+#define PTE_FILE_MAX_BITS		61
+#define pte_to_pgoff(pte)		((pte_val(pte) << 1) >> 3)
+#define pgoff_to_pte(off)		((pte_t) { ((off) << 2) | _PAGE_FILE })
+
+/*
+ * ZERO_PAGE is a global shared page that is always zero: used
+ * for zero-mapped memory areas etc..
+ */
+extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)];
+extern struct page *zero_page_memmap_ptr;
+#define ZERO_PAGE(vaddr) (zero_page_memmap_ptr)
+
+/* We provide our own get_unmapped_area to cope with VA holes for userland */
+#define HAVE_ARCH_UNMAPPED_AREA
+
+#ifdef CONFIG_HUGETLB_PAGE
+#define HUGETLB_PGDIR_SHIFT	(HPAGE_SHIFT + 2*(PAGE_SHIFT-3))
+#define HUGETLB_PGDIR_SIZE	(__IA64_UL(1) << HUGETLB_PGDIR_SHIFT)
+#define HUGETLB_PGDIR_MASK	(~(HUGETLB_PGDIR_SIZE-1))
+#endif
+
+
+#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+/*
+ * Update PTEP with ENTRY, which is guaranteed to be a less
+ * restrictive PTE.  That is, ENTRY may have the ACCESSED, DIRTY, and
+ * WRITABLE bits turned on, when the value at PTEP did not.  The
+ * WRITABLE bit may only be turned if SAFELY_WRITABLE is TRUE.
+ *
+ * SAFELY_WRITABLE is TRUE if we can update the value at PTEP without
+ * having to worry about races.  On SMP machines, there are only two
+ * cases where this is true:
+ *
+ *	(1) *PTEP has the PRESENT bit turned OFF
+ *	(2) ENTRY has the DIRTY bit turned ON
+ *
+ * On ia64, we could implement this routine with a cmpxchg()-loop
+ * which ORs in the _PAGE_A/_PAGE_D bit if they're set in ENTRY.
+ * However, like on x86, we can get a more streamlined version by
+ * observing that it is OK to drop ACCESSED bit updates when
+ * SAFELY_WRITABLE is FALSE.  Besides being rare, all that would do is
+ * result in an extra Access-bit fault, which would then turn on the
+ * ACCESSED bit in the low-level fault handler (iaccess_bit or
+ * daccess_bit in ivt.S).
+ */
+#ifdef CONFIG_SMP
+# define ptep_set_access_flags(__vma, __addr, __ptep, __entry, __safely_writable) \
+({									\
+	int __changed = !pte_same(*(__ptep), __entry);			\
+	if (__changed && __safely_writable) {				\
+		set_pte(__ptep, __entry);				\
+		flush_tlb_page(__vma, __addr);				\
+	}								\
+	__changed;							\
+})
+#else
+# define ptep_set_access_flags(__vma, __addr, __ptep, __entry, __safely_writable) \
+({									\
+	int __changed = !pte_same(*(__ptep), __entry);			\
+	if (__changed) {						\
+		set_pte_at((__vma)->vm_mm, (__addr), __ptep, __entry);	\
+		flush_tlb_page(__vma, __addr);				\
+	}								\
+	__changed;							\
+})
+#endif
+
+#  ifdef CONFIG_VIRTUAL_MEM_MAP
+  /* arch mem_map init routine is needed due to holes in a virtual mem_map */
+#   define __HAVE_ARCH_MEMMAP_INIT
+    extern void memmap_init (unsigned long size, int nid, unsigned long zone,
+			     unsigned long start_pfn);
+#  endif /* CONFIG_VIRTUAL_MEM_MAP */
+# endif /* !__ASSEMBLY__ */
+
+/*
+ * Identity-mapped regions use a large page size.  We'll call such large pages
+ * "granules".  If you can think of a better name that's unambiguous, let me
+ * know...
+ */
+#if defined(CONFIG_IA64_GRANULE_64MB)
+# define IA64_GRANULE_SHIFT	_PAGE_SIZE_64M
+#elif defined(CONFIG_IA64_GRANULE_16MB)
+# define IA64_GRANULE_SHIFT	_PAGE_SIZE_16M
+#endif
+#define IA64_GRANULE_SIZE	(1 << IA64_GRANULE_SHIFT)
+/*
+ * log2() of the page size we use to map the kernel image (IA64_TR_KERNEL):
+ */
+#define KERNEL_TR_PAGE_SHIFT	_PAGE_SIZE_64M
+#define KERNEL_TR_PAGE_SIZE	(1 << KERNEL_TR_PAGE_SHIFT)
+
+/*
+ * No page table caches to initialise
+ */
+#define pgtable_cache_init()	do { } while (0)
+
+/* These tell get_user_pages() that the first gate page is accessible from user-level.  */
+#define FIXADDR_USER_START	GATE_ADDR
+#ifdef HAVE_BUGGY_SEGREL
+# define FIXADDR_USER_END	(GATE_ADDR + 2*PAGE_SIZE)
+#else
+# define FIXADDR_USER_END	(GATE_ADDR + 2*PERCPU_PAGE_SIZE)
+#endif
+
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+#define __HAVE_ARCH_PTEP_SET_WRPROTECT
+#define __HAVE_ARCH_PTE_SAME
+#define __HAVE_ARCH_PGD_OFFSET_GATE
+
+
+#ifndef CONFIG_PGTABLE_4
+#include <asm-generic/pgtable-nopud.h>
+#endif
+#include <asm-generic/pgtable.h>
+
+#endif /* _ASM_IA64_PGTABLE_H */
diff --git a/arch/ia64/include/asm/processor.h b/arch/ia64/include/asm/processor.h
new file mode 100644
index 00000000000..efd1b927ccb
--- /dev/null
+++ b/arch/ia64/include/asm/processor.h
@@ -0,0 +1,711 @@
+#ifndef _ASM_IA64_PROCESSOR_H
+#define _ASM_IA64_PROCESSOR_H
+
+/*
+ * Copyright (C) 1998-2004 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *	Stephane Eranian <eranian@hpl.hp.com>
+ * Copyright (C) 1999 Asit Mallick <asit.k.mallick@intel.com>
+ * Copyright (C) 1999 Don Dugger <don.dugger@intel.com>
+ *
+ * 11/24/98	S.Eranian	added ia64_set_iva()
+ * 12/03/99	D. Mosberger	implement thread_saved_pc() via kernel unwind API
+ * 06/16/00	A. Mallick	added csd/ssd/tssd for ia32 support
+ */
+
+
+#include <asm/intrinsics.h>
+#include <asm/kregs.h>
+#include <asm/ptrace.h>
+#include <asm/ustack.h>
+
+#define __ARCH_WANT_UNLOCKED_CTXSW
+#define ARCH_HAS_PREFETCH_SWITCH_STACK
+
+#define IA64_NUM_PHYS_STACK_REG	96
+#define IA64_NUM_DBG_REGS	8
+
+#define DEFAULT_MAP_BASE	__IA64_UL_CONST(0x2000000000000000)
+#define DEFAULT_TASK_SIZE	__IA64_UL_CONST(0xa000000000000000)
+
+/*
+ * TASK_SIZE really is a mis-named.  It really is the maximum user
+ * space address (plus one).  On IA-64, there are five regions of 2TB
+ * each (assuming 8KB page size), for a total of 8TB of user virtual
+ * address space.
+ */
+#define TASK_SIZE       	DEFAULT_TASK_SIZE
+
+/*
+ * This decides where the kernel will search for a free chunk of vm
+ * space during mmap's.
+ */
+#define TASK_UNMAPPED_BASE	(current->thread.map_base)
+
+#define IA64_THREAD_FPH_VALID	(__IA64_UL(1) << 0)	/* floating-point high state valid? */
+#define IA64_THREAD_DBG_VALID	(__IA64_UL(1) << 1)	/* debug registers valid? */
+#define IA64_THREAD_PM_VALID	(__IA64_UL(1) << 2)	/* performance registers valid? */
+#define IA64_THREAD_UAC_NOPRINT	(__IA64_UL(1) << 3)	/* don't log unaligned accesses */
+#define IA64_THREAD_UAC_SIGBUS	(__IA64_UL(1) << 4)	/* generate SIGBUS on unaligned acc. */
+#define IA64_THREAD_MIGRATION	(__IA64_UL(1) << 5)	/* require migration
+							   sync at ctx sw */
+#define IA64_THREAD_FPEMU_NOPRINT (__IA64_UL(1) << 6)	/* don't log any fpswa faults */
+#define IA64_THREAD_FPEMU_SIGFPE  (__IA64_UL(1) << 7)	/* send a SIGFPE for fpswa faults */
+
+#define IA64_THREAD_UAC_SHIFT	3
+#define IA64_THREAD_UAC_MASK	(IA64_THREAD_UAC_NOPRINT | IA64_THREAD_UAC_SIGBUS)
+#define IA64_THREAD_FPEMU_SHIFT	6
+#define IA64_THREAD_FPEMU_MASK	(IA64_THREAD_FPEMU_NOPRINT | IA64_THREAD_FPEMU_SIGFPE)
+
+
+/*
+ * This shift should be large enough to be able to represent 1000000000/itc_freq with good
+ * accuracy while being small enough to fit 10*1000000000<<IA64_NSEC_PER_CYC_SHIFT in 64 bits
+ * (this will give enough slack to represent 10 seconds worth of time as a scaled number).
+ */
+#define IA64_NSEC_PER_CYC_SHIFT	30
+
+#ifndef __ASSEMBLY__
+
+#include <linux/cache.h>
+#include <linux/compiler.h>
+#include <linux/threads.h>
+#include <linux/types.h>
+#include <linux/bitops.h>
+
+#include <asm/fpu.h>
+#include <asm/page.h>
+#include <asm/percpu.h>
+#include <asm/rse.h>
+#include <asm/unwind.h>
+#include <linux/atomic.h>
+#ifdef CONFIG_NUMA
+#include <asm/nodedata.h>
+#endif
+
+/* like above but expressed as bitfields for more efficient access: */
+struct ia64_psr {
+	__u64 reserved0 : 1;
+	__u64 be : 1;
+	__u64 up : 1;
+	__u64 ac : 1;
+	__u64 mfl : 1;
+	__u64 mfh : 1;
+	__u64 reserved1 : 7;
+	__u64 ic : 1;
+	__u64 i : 1;
+	__u64 pk : 1;
+	__u64 reserved2 : 1;
+	__u64 dt : 1;
+	__u64 dfl : 1;
+	__u64 dfh : 1;
+	__u64 sp : 1;
+	__u64 pp : 1;
+	__u64 di : 1;
+	__u64 si : 1;
+	__u64 db : 1;
+	__u64 lp : 1;
+	__u64 tb : 1;
+	__u64 rt : 1;
+	__u64 reserved3 : 4;
+	__u64 cpl : 2;
+	__u64 is : 1;
+	__u64 mc : 1;
+	__u64 it : 1;
+	__u64 id : 1;
+	__u64 da : 1;
+	__u64 dd : 1;
+	__u64 ss : 1;
+	__u64 ri : 2;
+	__u64 ed : 1;
+	__u64 bn : 1;
+	__u64 reserved4 : 19;
+};
+
+union ia64_isr {
+	__u64  val;
+	struct {
+		__u64 code : 16;
+		__u64 vector : 8;
+		__u64 reserved1 : 8;
+		__u64 x : 1;
+		__u64 w : 1;
+		__u64 r : 1;
+		__u64 na : 1;
+		__u64 sp : 1;
+		__u64 rs : 1;
+		__u64 ir : 1;
+		__u64 ni : 1;
+		__u64 so : 1;
+		__u64 ei : 2;
+		__u64 ed : 1;
+		__u64 reserved2 : 20;
+	};
+};
+
+union ia64_lid {
+	__u64 val;
+	struct {
+		__u64  rv  : 16;
+		__u64  eid : 8;
+		__u64  id  : 8;
+		__u64  ig  : 32;
+	};
+};
+
+union ia64_tpr {
+	__u64 val;
+	struct {
+		__u64 ig0 : 4;
+		__u64 mic : 4;
+		__u64 rsv : 8;
+		__u64 mmi : 1;
+		__u64 ig1 : 47;
+	};
+};
+
+union ia64_itir {
+	__u64 val;
+	struct {
+		__u64 rv3  :  2; /* 0-1 */
+		__u64 ps   :  6; /* 2-7 */
+		__u64 key  : 24; /* 8-31 */
+		__u64 rv4  : 32; /* 32-63 */
+	};
+};
+
+union  ia64_rr {
+	__u64 val;
+	struct {
+		__u64  ve	:  1;  /* enable hw walker */
+		__u64  reserved0:  1;  /* reserved */
+		__u64  ps	:  6;  /* log page size */
+		__u64  rid	: 24;  /* region id */
+		__u64  reserved1: 32;  /* reserved */
+	};
+};
+
+/*
+ * CPU type, hardware bug flags, and per-CPU state.  Frequently used
+ * state comes earlier:
+ */
+struct cpuinfo_ia64 {
+	unsigned int softirq_pending;
+	unsigned long itm_delta;	/* # of clock cycles between clock ticks */
+	unsigned long itm_next;		/* interval timer mask value to use for next clock tick */
+	unsigned long nsec_per_cyc;	/* (1000000000<<IA64_NSEC_PER_CYC_SHIFT)/itc_freq */
+	unsigned long unimpl_va_mask;	/* mask of unimplemented virtual address bits (from PAL) */
+	unsigned long unimpl_pa_mask;	/* mask of unimplemented physical address bits (from PAL) */
+	unsigned long itc_freq;		/* frequency of ITC counter */
+	unsigned long proc_freq;	/* frequency of processor */
+	unsigned long cyc_per_usec;	/* itc_freq/1000000 */
+	unsigned long ptce_base;
+	unsigned int ptce_count[2];
+	unsigned int ptce_stride[2];
+	struct task_struct *ksoftirqd;	/* kernel softirq daemon for this CPU */
+
+#ifdef CONFIG_SMP
+	unsigned long loops_per_jiffy;
+	int cpu;
+	unsigned int socket_id;	/* physical processor socket id */
+	unsigned short core_id;	/* core id */
+	unsigned short thread_id; /* thread id */
+	unsigned short num_log;	/* Total number of logical processors on
+				 * this socket that were successfully booted */
+	unsigned char cores_per_socket;	/* Cores per processor socket */
+	unsigned char threads_per_core;	/* Threads per core */
+#endif
+
+	/* CPUID-derived information: */
+	unsigned long ppn;
+	unsigned long features;
+	unsigned char number;
+	unsigned char revision;
+	unsigned char model;
+	unsigned char family;
+	unsigned char archrev;
+	char vendor[16];
+	char *model_name;
+
+#ifdef CONFIG_NUMA
+	struct ia64_node_data *node_data;
+#endif
+};
+
+DECLARE_PER_CPU(struct cpuinfo_ia64, ia64_cpu_info);
+
+/*
+ * The "local" data variable.  It refers to the per-CPU data of the currently executing
+ * CPU, much like "current" points to the per-task data of the currently executing task.
+ * Do not use the address of local_cpu_data, since it will be different from
+ * cpu_data(smp_processor_id())!
+ */
+#define local_cpu_data		(&__ia64_per_cpu_var(ia64_cpu_info))
+#define cpu_data(cpu)		(&per_cpu(ia64_cpu_info, cpu))
+
+extern void print_cpu_info (struct cpuinfo_ia64 *);
+
+typedef struct {
+	unsigned long seg;
+} mm_segment_t;
+
+#define SET_UNALIGN_CTL(task,value)								\
+({												\
+	(task)->thread.flags = (((task)->thread.flags & ~IA64_THREAD_UAC_MASK)			\
+				| (((value) << IA64_THREAD_UAC_SHIFT) & IA64_THREAD_UAC_MASK));	\
+	0;											\
+})
+#define GET_UNALIGN_CTL(task,addr)								\
+({												\
+	put_user(((task)->thread.flags & IA64_THREAD_UAC_MASK) >> IA64_THREAD_UAC_SHIFT,	\
+		 (int __user *) (addr));							\
+})
+
+#define SET_FPEMU_CTL(task,value)								\
+({												\
+	(task)->thread.flags = (((task)->thread.flags & ~IA64_THREAD_FPEMU_MASK)		\
+			  | (((value) << IA64_THREAD_FPEMU_SHIFT) & IA64_THREAD_FPEMU_MASK));	\
+	0;											\
+})
+#define GET_FPEMU_CTL(task,addr)								\
+({												\
+	put_user(((task)->thread.flags & IA64_THREAD_FPEMU_MASK) >> IA64_THREAD_FPEMU_SHIFT,	\
+		 (int __user *) (addr));							\
+})
+
+struct thread_struct {
+	__u32 flags;			/* various thread flags (see IA64_THREAD_*) */
+	/* writing on_ustack is performance-critical, so it's worth spending 8 bits on it... */
+	__u8 on_ustack;			/* executing on user-stacks? */
+	__u8 pad[3];
+	__u64 ksp;			/* kernel stack pointer */
+	__u64 map_base;			/* base address for get_unmapped_area() */
+	__u64 rbs_bot;			/* the base address for the RBS */
+	int last_fph_cpu;		/* CPU that may hold the contents of f32-f127 */
+
+#ifdef CONFIG_PERFMON
+	void *pfm_context;		     /* pointer to detailed PMU context */
+	unsigned long pfm_needs_checking;    /* when >0, pending perfmon work on kernel exit */
+# define INIT_THREAD_PM		.pfm_context =		NULL,     \
+				.pfm_needs_checking =	0UL,
+#else
+# define INIT_THREAD_PM
+#endif
+	unsigned long dbr[IA64_NUM_DBG_REGS];
+	unsigned long ibr[IA64_NUM_DBG_REGS];
+	struct ia64_fpreg fph[96];	/* saved/loaded on demand */
+};
+
+#define INIT_THREAD {						\
+	.flags =	0,					\
+	.on_ustack =	0,					\
+	.ksp =		0,					\
+	.map_base =	DEFAULT_MAP_BASE,			\
+	.rbs_bot =	STACK_TOP - DEFAULT_USER_STACK_SIZE,	\
+	.last_fph_cpu =  -1,					\
+	INIT_THREAD_PM						\
+	.dbr =		{0, },					\
+	.ibr =		{0, },					\
+	.fph =		{{{{0}}}, }				\
+}
+
+#define start_thread(regs,new_ip,new_sp) do {							\
+	regs->cr_ipsr = ((regs->cr_ipsr | (IA64_PSR_BITS_TO_SET | IA64_PSR_CPL))		\
+			 & ~(IA64_PSR_BITS_TO_CLEAR | IA64_PSR_RI | IA64_PSR_IS));		\
+	regs->cr_iip = new_ip;									\
+	regs->ar_rsc = 0xf;		/* eager mode, privilege level 3 */			\
+	regs->ar_rnat = 0;									\
+	regs->ar_bspstore = current->thread.rbs_bot;						\
+	regs->ar_fpsr = FPSR_DEFAULT;								\
+	regs->loadrs = 0;									\
+	regs->r8 = get_dumpable(current->mm);	/* set "don't zap registers" flag */		\
+	regs->r12 = new_sp - 16;	/* allocate 16 byte scratch area */			\
+	if (unlikely(get_dumpable(current->mm) != SUID_DUMP_USER)) {	\
+		/*										\
+		 * Zap scratch regs to avoid leaking bits between processes with different	\
+		 * uid/privileges.								\
+		 */										\
+		regs->ar_pfs = 0; regs->b0 = 0; regs->pr = 0;					\
+		regs->r1 = 0; regs->r9  = 0; regs->r11 = 0; regs->r13 = 0; regs->r15 = 0;	\
+	}											\
+} while (0)
+
+/* Forward declarations, a strange C thing... */
+struct mm_struct;
+struct task_struct;
+
+/*
+ * Free all resources held by a thread. This is called after the
+ * parent of DEAD_TASK has collected the exit status of the task via
+ * wait().
+ */
+#define release_thread(dead_task)
+
+/* Get wait channel for task P.  */
+extern unsigned long get_wchan (struct task_struct *p);
+
+/* Return instruction pointer of blocked task TSK.  */
+#define KSTK_EIP(tsk)					\
+  ({							\
+	struct pt_regs *_regs = task_pt_regs(tsk);	\
+	_regs->cr_iip + ia64_psr(_regs)->ri;		\
+  })
+
+/* Return stack pointer of blocked task TSK.  */
+#define KSTK_ESP(tsk)  ((tsk)->thread.ksp)
+
+extern void ia64_getreg_unknown_kr (void);
+extern void ia64_setreg_unknown_kr (void);
+
+#define ia64_get_kr(regnum)					\
+({								\
+	unsigned long r = 0;					\
+								\
+	switch (regnum) {					\
+	    case 0: r = ia64_getreg(_IA64_REG_AR_KR0); break;	\
+	    case 1: r = ia64_getreg(_IA64_REG_AR_KR1); break;	\
+	    case 2: r = ia64_getreg(_IA64_REG_AR_KR2); break;	\
+	    case 3: r = ia64_getreg(_IA64_REG_AR_KR3); break;	\
+	    case 4: r = ia64_getreg(_IA64_REG_AR_KR4); break;	\
+	    case 5: r = ia64_getreg(_IA64_REG_AR_KR5); break;	\
+	    case 6: r = ia64_getreg(_IA64_REG_AR_KR6); break;	\
+	    case 7: r = ia64_getreg(_IA64_REG_AR_KR7); break;	\
+	    default: ia64_getreg_unknown_kr(); break;		\
+	}							\
+	r;							\
+})
+
+#define ia64_set_kr(regnum, r) 					\
+({								\
+	switch (regnum) {					\
+	    case 0: ia64_setreg(_IA64_REG_AR_KR0, r); break;	\
+	    case 1: ia64_setreg(_IA64_REG_AR_KR1, r); break;	\
+	    case 2: ia64_setreg(_IA64_REG_AR_KR2, r); break;	\
+	    case 3: ia64_setreg(_IA64_REG_AR_KR3, r); break;	\
+	    case 4: ia64_setreg(_IA64_REG_AR_KR4, r); break;	\
+	    case 5: ia64_setreg(_IA64_REG_AR_KR5, r); break;	\
+	    case 6: ia64_setreg(_IA64_REG_AR_KR6, r); break;	\
+	    case 7: ia64_setreg(_IA64_REG_AR_KR7, r); break;	\
+	    default: ia64_setreg_unknown_kr(); break;		\
+	}							\
+})
+
+/*
+ * The following three macros can't be inline functions because we don't have struct
+ * task_struct at this point.
+ */
+
+/*
+ * Return TRUE if task T owns the fph partition of the CPU we're running on.
+ * Must be called from code that has preemption disabled.
+ */
+#define ia64_is_local_fpu_owner(t)								\
+({												\
+	struct task_struct *__ia64_islfo_task = (t);						\
+	(__ia64_islfo_task->thread.last_fph_cpu == smp_processor_id()				\
+	 && __ia64_islfo_task == (struct task_struct *) ia64_get_kr(IA64_KR_FPU_OWNER));	\
+})
+
+/*
+ * Mark task T as owning the fph partition of the CPU we're running on.
+ * Must be called from code that has preemption disabled.
+ */
+#define ia64_set_local_fpu_owner(t) do {						\
+	struct task_struct *__ia64_slfo_task = (t);					\
+	__ia64_slfo_task->thread.last_fph_cpu = smp_processor_id();			\
+	ia64_set_kr(IA64_KR_FPU_OWNER, (unsigned long) __ia64_slfo_task);		\
+} while (0)
+
+/* Mark the fph partition of task T as being invalid on all CPUs.  */
+#define ia64_drop_fpu(t)	((t)->thread.last_fph_cpu = -1)
+
+extern void __ia64_init_fpu (void);
+extern void __ia64_save_fpu (struct ia64_fpreg *fph);
+extern void __ia64_load_fpu (struct ia64_fpreg *fph);
+extern void ia64_save_debug_regs (unsigned long *save_area);
+extern void ia64_load_debug_regs (unsigned long *save_area);
+
+#define ia64_fph_enable()	do { ia64_rsm(IA64_PSR_DFH); ia64_srlz_d(); } while (0)
+#define ia64_fph_disable()	do { ia64_ssm(IA64_PSR_DFH); ia64_srlz_d(); } while (0)
+
+/* load fp 0.0 into fph */
+static inline void
+ia64_init_fpu (void) {
+	ia64_fph_enable();
+	__ia64_init_fpu();
+	ia64_fph_disable();
+}
+
+/* save f32-f127 at FPH */
+static inline void
+ia64_save_fpu (struct ia64_fpreg *fph) {
+	ia64_fph_enable();
+	__ia64_save_fpu(fph);
+	ia64_fph_disable();
+}
+
+/* load f32-f127 from FPH */
+static inline void
+ia64_load_fpu (struct ia64_fpreg *fph) {
+	ia64_fph_enable();
+	__ia64_load_fpu(fph);
+	ia64_fph_disable();
+}
+
+static inline __u64
+ia64_clear_ic (void)
+{
+	__u64 psr;
+	psr = ia64_getreg(_IA64_REG_PSR);
+	ia64_stop();
+	ia64_rsm(IA64_PSR_I | IA64_PSR_IC);
+	ia64_srlz_i();
+	return psr;
+}
+
+/*
+ * Restore the psr.
+ */
+static inline void
+ia64_set_psr (__u64 psr)
+{
+	ia64_stop();
+	ia64_setreg(_IA64_REG_PSR_L, psr);
+	ia64_srlz_i();
+}
+
+/*
+ * Insert a translation into an instruction and/or data translation
+ * register.
+ */
+static inline void
+ia64_itr (__u64 target_mask, __u64 tr_num,
+	  __u64 vmaddr, __u64 pte,
+	  __u64 log_page_size)
+{
+	ia64_setreg(_IA64_REG_CR_ITIR, (log_page_size << 2));
+	ia64_setreg(_IA64_REG_CR_IFA, vmaddr);
+	ia64_stop();
+	if (target_mask & 0x1)
+		ia64_itri(tr_num, pte);
+	if (target_mask & 0x2)
+		ia64_itrd(tr_num, pte);
+}
+
+/*
+ * Insert a translation into the instruction and/or data translation
+ * cache.
+ */
+static inline void
+ia64_itc (__u64 target_mask, __u64 vmaddr, __u64 pte,
+	  __u64 log_page_size)
+{
+	ia64_setreg(_IA64_REG_CR_ITIR, (log_page_size << 2));
+	ia64_setreg(_IA64_REG_CR_IFA, vmaddr);
+	ia64_stop();
+	/* as per EAS2.6, itc must be the last instruction in an instruction group */
+	if (target_mask & 0x1)
+		ia64_itci(pte);
+	if (target_mask & 0x2)
+		ia64_itcd(pte);
+}
+
+/*
+ * Purge a range of addresses from instruction and/or data translation
+ * register(s).
+ */
+static inline void
+ia64_ptr (__u64 target_mask, __u64 vmaddr, __u64 log_size)
+{
+	if (target_mask & 0x1)
+		ia64_ptri(vmaddr, (log_size << 2));
+	if (target_mask & 0x2)
+		ia64_ptrd(vmaddr, (log_size << 2));
+}
+
+/* Set the interrupt vector address.  The address must be suitably aligned (32KB).  */
+static inline void
+ia64_set_iva (void *ivt_addr)
+{
+	ia64_setreg(_IA64_REG_CR_IVA, (__u64) ivt_addr);
+	ia64_srlz_i();
+}
+
+/* Set the page table address and control bits.  */
+static inline void
+ia64_set_pta (__u64 pta)
+{
+	/* Note: srlz.i implies srlz.d */
+	ia64_setreg(_IA64_REG_CR_PTA, pta);
+	ia64_srlz_i();
+}
+
+static inline void
+ia64_eoi (void)
+{
+	ia64_setreg(_IA64_REG_CR_EOI, 0);
+	ia64_srlz_d();
+}
+
+#define cpu_relax()	ia64_hint(ia64_hint_pause)
+
+static inline int
+ia64_get_irr(unsigned int vector)
+{
+	unsigned int reg = vector / 64;
+	unsigned int bit = vector % 64;
+	u64 irr;
+
+	switch (reg) {
+	case 0: irr = ia64_getreg(_IA64_REG_CR_IRR0); break;
+	case 1: irr = ia64_getreg(_IA64_REG_CR_IRR1); break;
+	case 2: irr = ia64_getreg(_IA64_REG_CR_IRR2); break;
+	case 3: irr = ia64_getreg(_IA64_REG_CR_IRR3); break;
+	}
+
+	return test_bit(bit, &irr);
+}
+
+static inline void
+ia64_set_lrr0 (unsigned long val)
+{
+	ia64_setreg(_IA64_REG_CR_LRR0, val);
+	ia64_srlz_d();
+}
+
+static inline void
+ia64_set_lrr1 (unsigned long val)
+{
+	ia64_setreg(_IA64_REG_CR_LRR1, val);
+	ia64_srlz_d();
+}
+
+
+/*
+ * Given the address to which a spill occurred, return the unat bit
+ * number that corresponds to this address.
+ */
+static inline __u64
+ia64_unat_pos (void *spill_addr)
+{
+	return ((__u64) spill_addr >> 3) & 0x3f;
+}
+
+/*
+ * Set the NaT bit of an integer register which was spilled at address
+ * SPILL_ADDR.  UNAT is the mask to be updated.
+ */
+static inline void
+ia64_set_unat (__u64 *unat, void *spill_addr, unsigned long nat)
+{
+	__u64 bit = ia64_unat_pos(spill_addr);
+	__u64 mask = 1UL << bit;
+
+	*unat = (*unat & ~mask) | (nat << bit);
+}
+
+/*
+ * Return saved PC of a blocked thread.
+ * Note that the only way T can block is through a call to schedule() -> switch_to().
+ */
+static inline unsigned long
+thread_saved_pc (struct task_struct *t)
+{
+	struct unw_frame_info info;
+	unsigned long ip;
+
+	unw_init_from_blocked_task(&info, t);
+	if (unw_unwind(&info) < 0)
+		return 0;
+	unw_get_ip(&info, &ip);
+	return ip;
+}
+
+/*
+ * Get the current instruction/program counter value.
+ */
+#define current_text_addr() \
+	({ void *_pc; _pc = (void *)ia64_getreg(_IA64_REG_IP); _pc; })
+
+static inline __u64
+ia64_get_ivr (void)
+{
+	__u64 r;
+	ia64_srlz_d();
+	r = ia64_getreg(_IA64_REG_CR_IVR);
+	ia64_srlz_d();
+	return r;
+}
+
+static inline void
+ia64_set_dbr (__u64 regnum, __u64 value)
+{
+	__ia64_set_dbr(regnum, value);
+#ifdef CONFIG_ITANIUM
+	ia64_srlz_d();
+#endif
+}
+
+static inline __u64
+ia64_get_dbr (__u64 regnum)
+{
+	__u64 retval;
+
+	retval = __ia64_get_dbr(regnum);
+#ifdef CONFIG_ITANIUM
+	ia64_srlz_d();
+#endif
+	return retval;
+}
+
+static inline __u64
+ia64_rotr (__u64 w, __u64 n)
+{
+	return (w >> n) | (w << (64 - n));
+}
+
+#define ia64_rotl(w,n)	ia64_rotr((w), (64) - (n))
+
+/*
+ * Take a mapped kernel address and return the equivalent address
+ * in the region 7 identity mapped virtual area.
+ */
+static inline void *
+ia64_imva (void *addr)
+{
+	void *result;
+	result = (void *) ia64_tpa(addr);
+	return __va(result);
+}
+
+#define ARCH_HAS_PREFETCH
+#define ARCH_HAS_PREFETCHW
+#define ARCH_HAS_SPINLOCK_PREFETCH
+#define PREFETCH_STRIDE			L1_CACHE_BYTES
+
+static inline void
+prefetch (const void *x)
+{
+	 ia64_lfetch(ia64_lfhint_none, x);
+}
+
+static inline void
+prefetchw (const void *x)
+{
+	ia64_lfetch_excl(ia64_lfhint_none, x);
+}
+
+#define spin_lock_prefetch(x)	prefetchw(x)
+
+extern unsigned long boot_option_idle_override;
+
+enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_FORCE_MWAIT,
+			 IDLE_NOMWAIT, IDLE_POLL};
+
+void default_idle(void);
+
+#define ia64_platform_is(x) (strcmp(x, ia64_platform_name) == 0)
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _ASM_IA64_PROCESSOR_H */
diff --git a/arch/ia64/include/asm/ptrace.h b/arch/ia64/include/asm/ptrace.h
new file mode 100644
index 00000000000..845143990a1
--- /dev/null
+++ b/arch/ia64/include/asm/ptrace.h
@@ -0,0 +1,151 @@
+/*
+ * Copyright (C) 1998-2004 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *	Stephane Eranian <eranian@hpl.hp.com>
+ * Copyright (C) 2003 Intel Co
+ *	Suresh Siddha <suresh.b.siddha@intel.com>
+ *	Fenghua Yu <fenghua.yu@intel.com>
+ *	Arun Sharma <arun.sharma@intel.com>
+ *
+ * 12/07/98	S. Eranian	added pt_regs & switch_stack
+ * 12/21/98	D. Mosberger	updated to match latest code
+ *  6/17/99	D. Mosberger	added second unat member to "struct switch_stack"
+ *
+ */
+#ifndef _ASM_IA64_PTRACE_H
+#define _ASM_IA64_PTRACE_H
+
+#ifndef ASM_OFFSETS_C
+#include <asm/asm-offsets.h>
+#endif
+#include <uapi/asm/ptrace.h>
+
+/*
+ * Base-2 logarithm of number of pages to allocate per task structure
+ * (including register backing store and memory stack):
+ */
+#if defined(CONFIG_IA64_PAGE_SIZE_4KB)
+# define KERNEL_STACK_SIZE_ORDER		3
+#elif defined(CONFIG_IA64_PAGE_SIZE_8KB)
+# define KERNEL_STACK_SIZE_ORDER		2
+#elif defined(CONFIG_IA64_PAGE_SIZE_16KB)
+# define KERNEL_STACK_SIZE_ORDER		1
+#else
+# define KERNEL_STACK_SIZE_ORDER		0
+#endif
+
+#define IA64_RBS_OFFSET			((IA64_TASK_SIZE + IA64_THREAD_INFO_SIZE + 31) & ~31)
+#define IA64_STK_OFFSET			((1 << KERNEL_STACK_SIZE_ORDER)*PAGE_SIZE)
+
+#define KERNEL_STACK_SIZE		IA64_STK_OFFSET
+
+#ifndef __ASSEMBLY__
+
+#include <asm/current.h>
+#include <asm/page.h>
+
+/*
+ * We use the ia64_psr(regs)->ri to determine which of the three
+ * instructions in bundle (16 bytes) took the sample. Generate
+ * the canonical representation by adding to instruction pointer.
+ */
+# define instruction_pointer(regs) ((regs)->cr_iip + ia64_psr(regs)->ri)
+
+static inline unsigned long user_stack_pointer(struct pt_regs *regs)
+{
+	/* FIXME: should this be bspstore + nr_dirty regs? */
+	return regs->ar_bspstore;
+}
+
+static inline int is_syscall_success(struct pt_regs *regs)
+{
+	return regs->r10 != -1;
+}
+
+static inline long regs_return_value(struct pt_regs *regs)
+{
+	if (is_syscall_success(regs))
+		return regs->r8;
+	else
+		return -regs->r8;
+}
+
+/* Conserve space in histogram by encoding slot bits in address
+ * bits 2 and 3 rather than bits 0 and 1.
+ */
+#define profile_pc(regs)						\
+({									\
+	unsigned long __ip = instruction_pointer(regs);			\
+	(__ip & ~3UL) + ((__ip & 3UL) << 2);				\
+})
+/*
+ * Why not default?  Because user_stack_pointer() on ia64 gives register
+ * stack backing store instead...
+ */
+#define current_user_stack_pointer() (current_pt_regs()->r12)
+
+  /* given a pointer to a task_struct, return the user's pt_regs */
+# define task_pt_regs(t)		(((struct pt_regs *) ((char *) (t) + IA64_STK_OFFSET)) - 1)
+# define ia64_psr(regs)			((struct ia64_psr *) &(regs)->cr_ipsr)
+# define user_mode(regs)		(((struct ia64_psr *) &(regs)->cr_ipsr)->cpl != 0)
+# define user_stack(task,regs)	((long) regs - (long) task == IA64_STK_OFFSET - sizeof(*regs))
+# define fsys_mode(task,regs)					\
+  ({								\
+	  struct task_struct *_task = (task);			\
+	  struct pt_regs *_regs = (regs);			\
+	  !user_mode(_regs) && user_stack(_task, _regs);	\
+  })
+
+  /*
+   * System call handlers that, upon successful completion, need to return a negative value
+   * should call force_successful_syscall_return() right before returning.  On architectures
+   * where the syscall convention provides for a separate error flag (e.g., alpha, ia64,
+   * ppc{,64}, sparc{,64}, possibly others), this macro can be used to ensure that the error
+   * flag will not get set.  On architectures which do not support a separate error flag,
+   * the macro is a no-op and the spurious error condition needs to be filtered out by some
+   * other means (e.g., in user-level, by passing an extra argument to the syscall handler,
+   * or something along those lines).
+   *
+   * On ia64, we can clear the user's pt_regs->r8 to force a successful syscall.
+   */
+# define force_successful_syscall_return()	(task_pt_regs(current)->r8 = 0)
+
+  struct task_struct;			/* forward decl */
+  struct unw_frame_info;		/* forward decl */
+
+  extern void ia64_do_show_stack (struct unw_frame_info *, void *);
+  extern unsigned long ia64_get_user_rbs_end (struct task_struct *, struct pt_regs *,
+					      unsigned long *);
+  extern long ia64_peek (struct task_struct *, struct switch_stack *, unsigned long,
+			 unsigned long, long *);
+  extern long ia64_poke (struct task_struct *, struct switch_stack *, unsigned long,
+			 unsigned long, long);
+  extern void ia64_flush_fph (struct task_struct *);
+  extern void ia64_sync_fph (struct task_struct *);
+  extern void ia64_sync_krbs(void);
+  extern long ia64_sync_user_rbs (struct task_struct *, struct switch_stack *,
+				  unsigned long, unsigned long);
+
+  /* get nat bits for scratch registers such that bit N==1 iff scratch register rN is a NaT */
+  extern unsigned long ia64_get_scratch_nat_bits (struct pt_regs *pt, unsigned long scratch_unat);
+  /* put nat bits for scratch registers such that scratch register rN is a NaT iff bit N==1 */
+  extern unsigned long ia64_put_scratch_nat_bits (struct pt_regs *pt, unsigned long nat);
+
+  extern void ia64_increment_ip (struct pt_regs *pt);
+  extern void ia64_decrement_ip (struct pt_regs *pt);
+
+  extern void ia64_ptrace_stop(void);
+  #define arch_ptrace_stop(code, info) \
+	ia64_ptrace_stop()
+  #define arch_ptrace_stop_needed(code, info) \
+	(!test_thread_flag(TIF_RESTORE_RSE))
+
+  extern void ptrace_attach_sync_user_rbs (struct task_struct *);
+  #define arch_ptrace_attach(child) \
+	ptrace_attach_sync_user_rbs(child)
+
+  #define arch_has_single_step()  (1)
+  #define arch_has_block_step()   (1)
+
+#endif /* !__ASSEMBLY__ */
+#endif /* _ASM_IA64_PTRACE_H */
diff --git a/arch/ia64/include/asm/pvclock-abi.h b/arch/ia64/include/asm/pvclock-abi.h
new file mode 100644
index 00000000000..42b233bedeb
--- /dev/null
+++ b/arch/ia64/include/asm/pvclock-abi.h
@@ -0,0 +1,48 @@
+/*
+ * same structure to x86's
+ * Hopefully asm-x86/pvclock-abi.h would be moved to somewhere more generic.
+ * For now, define same duplicated definitions.
+ */
+
+#ifndef _ASM_IA64__PVCLOCK_ABI_H
+#define _ASM_IA64__PVCLOCK_ABI_H
+#ifndef __ASSEMBLY__
+
+/*
+ * These structs MUST NOT be changed.
+ * They are the ABI between hypervisor and guest OS.
+ * KVM is using this.
+ *
+ * pvclock_vcpu_time_info holds the system time and the tsc timestamp
+ * of the last update. So the guest can use the tsc delta to get a
+ * more precise system time.  There is one per virtual cpu.
+ *
+ * pvclock_wall_clock references the point in time when the system
+ * time was zero (usually boot time), thus the guest calculates the
+ * current wall clock by adding the system time.
+ *
+ * Protocol for the "version" fields is: hypervisor raises it (making
+ * it uneven) before it starts updating the fields and raises it again
+ * (making it even) when it is done.  Thus the guest can make sure the
+ * time values it got are consistent by checking the version before
+ * and after reading them.
+ */
+
+struct pvclock_vcpu_time_info {
+	u32   version;
+	u32   pad0;
+	u64   tsc_timestamp;
+	u64   system_time;
+	u32   tsc_to_system_mul;
+	s8    tsc_shift;
+	u8    pad[3];
+} __attribute__((__packed__)); /* 32 bytes */
+
+struct pvclock_wall_clock {
+	u32   version;
+	u32   sec;
+	u32   nsec;
+} __attribute__((__packed__));
+
+#endif /* __ASSEMBLY__ */
+#endif /* _ASM_IA64__PVCLOCK_ABI_H */
diff --git a/arch/ia64/include/asm/rwsem.h b/arch/ia64/include/asm/rwsem.h
new file mode 100644
index 00000000000..3027e7516d8
--- /dev/null
+++ b/arch/ia64/include/asm/rwsem.h
@@ -0,0 +1,145 @@
+/*
+ * R/W semaphores for ia64
+ *
+ * Copyright (C) 2003 Ken Chen <kenneth.w.chen@intel.com>
+ * Copyright (C) 2003 Asit Mallick <asit.k.mallick@intel.com>
+ * Copyright (C) 2005 Christoph Lameter <clameter@sgi.com>
+ *
+ * Based on asm-i386/rwsem.h and other architecture implementation.
+ *
+ * The MSW of the count is the negated number of active writers and
+ * waiting lockers, and the LSW is the total number of active locks.
+ *
+ * The lock count is initialized to 0 (no active and no waiting lockers).
+ *
+ * When a writer subtracts WRITE_BIAS, it'll get 0xffffffff00000001 for
+ * the case of an uncontended lock. Readers increment by 1 and see a positive
+ * value when uncontended, negative if there are writers (and maybe) readers
+ * waiting (in which case it goes to sleep).
+ */
+
+#ifndef _ASM_IA64_RWSEM_H
+#define _ASM_IA64_RWSEM_H
+
+#ifndef _LINUX_RWSEM_H
+#error "Please don't include <asm/rwsem.h> directly, use <linux/rwsem.h> instead."
+#endif
+
+#include <asm/intrinsics.h>
+
+#define RWSEM_UNLOCKED_VALUE		__IA64_UL_CONST(0x0000000000000000)
+#define RWSEM_ACTIVE_BIAS		(1L)
+#define RWSEM_ACTIVE_MASK		(0xffffffffL)
+#define RWSEM_WAITING_BIAS		(-0x100000000L)
+#define RWSEM_ACTIVE_READ_BIAS		RWSEM_ACTIVE_BIAS
+#define RWSEM_ACTIVE_WRITE_BIAS		(RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
+
+/*
+ * lock for reading
+ */
+static inline void
+__down_read (struct rw_semaphore *sem)
+{
+	long result = ia64_fetchadd8_acq((unsigned long *)&sem->count, 1);
+
+	if (result < 0)
+		rwsem_down_read_failed(sem);
+}
+
+/*
+ * lock for writing
+ */
+static inline void
+__down_write (struct rw_semaphore *sem)
+{
+	long old, new;
+
+	do {
+		old = sem->count;
+		new = old + RWSEM_ACTIVE_WRITE_BIAS;
+	} while (cmpxchg_acq(&sem->count, old, new) != old);
+
+	if (old != 0)
+		rwsem_down_write_failed(sem);
+}
+
+/*
+ * unlock after reading
+ */
+static inline void
+__up_read (struct rw_semaphore *sem)
+{
+	long result = ia64_fetchadd8_rel((unsigned long *)&sem->count, -1);
+
+	if (result < 0 && (--result & RWSEM_ACTIVE_MASK) == 0)
+		rwsem_wake(sem);
+}
+
+/*
+ * unlock after writing
+ */
+static inline void
+__up_write (struct rw_semaphore *sem)
+{
+	long old, new;
+
+	do {
+		old = sem->count;
+		new = old - RWSEM_ACTIVE_WRITE_BIAS;
+	} while (cmpxchg_rel(&sem->count, old, new) != old);
+
+	if (new < 0 && (new & RWSEM_ACTIVE_MASK) == 0)
+		rwsem_wake(sem);
+}
+
+/*
+ * trylock for reading -- returns 1 if successful, 0 if contention
+ */
+static inline int
+__down_read_trylock (struct rw_semaphore *sem)
+{
+	long tmp;
+	while ((tmp = sem->count) >= 0) {
+		if (tmp == cmpxchg_acq(&sem->count, tmp, tmp+1)) {
+			return 1;
+		}
+	}
+	return 0;
+}
+
+/*
+ * trylock for writing -- returns 1 if successful, 0 if contention
+ */
+static inline int
+__down_write_trylock (struct rw_semaphore *sem)
+{
+	long tmp = cmpxchg_acq(&sem->count, RWSEM_UNLOCKED_VALUE,
+			      RWSEM_ACTIVE_WRITE_BIAS);
+	return tmp == RWSEM_UNLOCKED_VALUE;
+}
+
+/*
+ * downgrade write lock to read lock
+ */
+static inline void
+__downgrade_write (struct rw_semaphore *sem)
+{
+	long old, new;
+
+	do {
+		old = sem->count;
+		new = old - RWSEM_WAITING_BIAS;
+	} while (cmpxchg_rel(&sem->count, old, new) != old);
+
+	if (old < 0)
+		rwsem_downgrade_wake(sem);
+}
+
+/*
+ * Implement atomic add functionality.  These used to be "inline" functions, but GCC v3.1
+ * doesn't quite optimize this stuff right and ends up with bad calls to fetchandadd.
+ */
+#define rwsem_atomic_add(delta, sem)	atomic64_add(delta, (atomic64_t *)(&(sem)->count))
+#define rwsem_atomic_update(delta, sem)	atomic64_add_return(delta, (atomic64_t *)(&(sem)->count))
+
+#endif /* _ASM_IA64_RWSEM_H */
diff --git a/arch/ia64/include/asm/sal.h b/arch/ia64/include/asm/sal.h
new file mode 100644
index 00000000000..e504f382115
--- /dev/null
+++ b/arch/ia64/include/asm/sal.h
@@ -0,0 +1,917 @@
+#ifndef _ASM_IA64_SAL_H
+#define _ASM_IA64_SAL_H
+
+/*
+ * System Abstraction Layer definitions.
+ *
+ * This is based on version 2.5 of the manual "IA-64 System
+ * Abstraction Layer".
+ *
+ * Copyright (C) 2001 Intel
+ * Copyright (C) 2002 Jenna Hall <jenna.s.hall@intel.com>
+ * Copyright (C) 2001 Fred Lewis <frederick.v.lewis@intel.com>
+ * Copyright (C) 1998, 1999, 2001, 2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 Srinivasa Prasad Thirumalachar <sprasad@sprasad.engr.sgi.com>
+ *
+ * 02/01/04 J. Hall Updated Error Record Structures to conform to July 2001
+ *		    revision of the SAL spec.
+ * 01/01/03 fvlewis Updated Error Record Structures to conform with Nov. 2000
+ *                  revision of the SAL spec.
+ * 99/09/29 davidm	Updated for SAL 2.6.
+ * 00/03/29 cfleck      Updated SAL Error Logging info for processor (SAL 2.6)
+ *                      (plus examples of platform error info structures from smariset @ Intel)
+ */
+
+#define IA64_SAL_PLATFORM_FEATURE_BUS_LOCK_BIT		0
+#define IA64_SAL_PLATFORM_FEATURE_IRQ_REDIR_HINT_BIT	1
+#define IA64_SAL_PLATFORM_FEATURE_IPI_REDIR_HINT_BIT	2
+#define IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT_BIT	 	3
+
+#define IA64_SAL_PLATFORM_FEATURE_BUS_LOCK	  (1<<IA64_SAL_PLATFORM_FEATURE_BUS_LOCK_BIT)
+#define IA64_SAL_PLATFORM_FEATURE_IRQ_REDIR_HINT (1<<IA64_SAL_PLATFORM_FEATURE_IRQ_REDIR_HINT_BIT)
+#define IA64_SAL_PLATFORM_FEATURE_IPI_REDIR_HINT (1<<IA64_SAL_PLATFORM_FEATURE_IPI_REDIR_HINT_BIT)
+#define IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT	  (1<<IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT_BIT)
+
+#ifndef __ASSEMBLY__
+
+#include <linux/bcd.h>
+#include <linux/spinlock.h>
+#include <linux/efi.h>
+
+#include <asm/pal.h>
+#include <asm/fpu.h>
+
+extern spinlock_t sal_lock;
+
+/* SAL spec _requires_ eight args for each call. */
+#define __IA64_FW_CALL(entry,result,a0,a1,a2,a3,a4,a5,a6,a7)	\
+	result = (*entry)(a0,a1,a2,a3,a4,a5,a6,a7)
+
+# define IA64_FW_CALL(entry,result,args...) do {		\
+	unsigned long __ia64_sc_flags;				\
+	struct ia64_fpreg __ia64_sc_fr[6];			\
+	ia64_save_scratch_fpregs(__ia64_sc_fr);			\
+	spin_lock_irqsave(&sal_lock, __ia64_sc_flags);		\
+	__IA64_FW_CALL(entry, result, args);			\
+	spin_unlock_irqrestore(&sal_lock, __ia64_sc_flags);	\
+	ia64_load_scratch_fpregs(__ia64_sc_fr);			\
+} while (0)
+
+# define SAL_CALL(result,args...)			\
+	IA64_FW_CALL(ia64_sal, result, args);
+
+# define SAL_CALL_NOLOCK(result,args...) do {		\
+	unsigned long __ia64_scn_flags;			\
+	struct ia64_fpreg __ia64_scn_fr[6];		\
+	ia64_save_scratch_fpregs(__ia64_scn_fr);	\
+	local_irq_save(__ia64_scn_flags);		\
+	__IA64_FW_CALL(ia64_sal, result, args);		\
+	local_irq_restore(__ia64_scn_flags);		\
+	ia64_load_scratch_fpregs(__ia64_scn_fr);	\
+} while (0)
+
+# define SAL_CALL_REENTRANT(result,args...) do {	\
+	struct ia64_fpreg __ia64_scs_fr[6];		\
+	ia64_save_scratch_fpregs(__ia64_scs_fr);	\
+	preempt_disable();				\
+	__IA64_FW_CALL(ia64_sal, result, args);		\
+	preempt_enable();				\
+	ia64_load_scratch_fpregs(__ia64_scs_fr);	\
+} while (0)
+
+#define SAL_SET_VECTORS			0x01000000
+#define SAL_GET_STATE_INFO		0x01000001
+#define SAL_GET_STATE_INFO_SIZE		0x01000002
+#define SAL_CLEAR_STATE_INFO		0x01000003
+#define SAL_MC_RENDEZ			0x01000004
+#define SAL_MC_SET_PARAMS		0x01000005
+#define SAL_REGISTER_PHYSICAL_ADDR	0x01000006
+
+#define SAL_CACHE_FLUSH			0x01000008
+#define SAL_CACHE_INIT			0x01000009
+#define SAL_PCI_CONFIG_READ		0x01000010
+#define SAL_PCI_CONFIG_WRITE		0x01000011
+#define SAL_FREQ_BASE			0x01000012
+#define SAL_PHYSICAL_ID_INFO		0x01000013
+
+#define SAL_UPDATE_PAL			0x01000020
+
+struct ia64_sal_retval {
+	/*
+	 * A zero status value indicates call completed without error.
+	 * A negative status value indicates reason of call failure.
+	 * A positive status value indicates success but an
+	 * informational value should be printed (e.g., "reboot for
+	 * change to take effect").
+	 */
+	long status;
+	unsigned long v0;
+	unsigned long v1;
+	unsigned long v2;
+};
+
+typedef struct ia64_sal_retval (*ia64_sal_handler) (u64, ...);
+
+enum {
+	SAL_FREQ_BASE_PLATFORM = 0,
+	SAL_FREQ_BASE_INTERVAL_TIMER = 1,
+	SAL_FREQ_BASE_REALTIME_CLOCK = 2
+};
+
+/*
+ * The SAL system table is followed by a variable number of variable
+ * length descriptors.  The structure of these descriptors follows
+ * below.
+ * The defininition follows SAL specs from July 2000
+ */
+struct ia64_sal_systab {
+	u8 signature[4];	/* should be "SST_" */
+	u32 size;		/* size of this table in bytes */
+	u8 sal_rev_minor;
+	u8 sal_rev_major;
+	u16 entry_count;	/* # of entries in variable portion */
+	u8 checksum;
+	u8 reserved1[7];
+	u8 sal_a_rev_minor;
+	u8 sal_a_rev_major;
+	u8 sal_b_rev_minor;
+	u8 sal_b_rev_major;
+	/* oem_id & product_id: terminating NUL is missing if string is exactly 32 bytes long. */
+	u8 oem_id[32];
+	u8 product_id[32];	/* ASCII product id  */
+	u8 reserved2[8];
+};
+
+enum sal_systab_entry_type {
+	SAL_DESC_ENTRY_POINT = 0,
+	SAL_DESC_MEMORY = 1,
+	SAL_DESC_PLATFORM_FEATURE = 2,
+	SAL_DESC_TR = 3,
+	SAL_DESC_PTC = 4,
+	SAL_DESC_AP_WAKEUP = 5
+};
+
+/*
+ * Entry type:	Size:
+ *	0	48
+ *	1	32
+ *	2	16
+ *	3	32
+ *	4	16
+ *	5	16
+ */
+#define SAL_DESC_SIZE(type)	"\060\040\020\040\020\020"[(unsigned) type]
+
+typedef struct ia64_sal_desc_entry_point {
+	u8 type;
+	u8 reserved1[7];
+	u64 pal_proc;
+	u64 sal_proc;
+	u64 gp;
+	u8 reserved2[16];
+}ia64_sal_desc_entry_point_t;
+
+typedef struct ia64_sal_desc_memory {
+	u8 type;
+	u8 used_by_sal;	/* needs to be mapped for SAL? */
+	u8 mem_attr;		/* current memory attribute setting */
+	u8 access_rights;	/* access rights set up by SAL */
+	u8 mem_attr_mask;	/* mask of supported memory attributes */
+	u8 reserved1;
+	u8 mem_type;		/* memory type */
+	u8 mem_usage;		/* memory usage */
+	u64 addr;		/* physical address of memory */
+	u32 length;	/* length (multiple of 4KB pages) */
+	u32 reserved2;
+	u8 oem_reserved[8];
+} ia64_sal_desc_memory_t;
+
+typedef struct ia64_sal_desc_platform_feature {
+	u8 type;
+	u8 feature_mask;
+	u8 reserved1[14];
+} ia64_sal_desc_platform_feature_t;
+
+typedef struct ia64_sal_desc_tr {
+	u8 type;
+	u8 tr_type;		/* 0 == instruction, 1 == data */
+	u8 regnum;		/* translation register number */
+	u8 reserved1[5];
+	u64 addr;		/* virtual address of area covered */
+	u64 page_size;		/* encoded page size */
+	u8 reserved2[8];
+} ia64_sal_desc_tr_t;
+
+typedef struct ia64_sal_desc_ptc {
+	u8 type;
+	u8 reserved1[3];
+	u32 num_domains;	/* # of coherence domains */
+	u64 domain_info;	/* physical address of domain info table */
+} ia64_sal_desc_ptc_t;
+
+typedef struct ia64_sal_ptc_domain_info {
+	u64 proc_count;		/* number of processors in domain */
+	u64 proc_list;		/* physical address of LID array */
+} ia64_sal_ptc_domain_info_t;
+
+typedef struct ia64_sal_ptc_domain_proc_entry {
+	u64 id  : 8;		/* id of processor */
+	u64 eid : 8;		/* eid of processor */
+} ia64_sal_ptc_domain_proc_entry_t;
+
+
+#define IA64_SAL_AP_EXTERNAL_INT 0
+
+typedef struct ia64_sal_desc_ap_wakeup {
+	u8 type;
+	u8 mechanism;		/* 0 == external interrupt */
+	u8 reserved1[6];
+	u64 vector;		/* interrupt vector in range 0x10-0xff */
+} ia64_sal_desc_ap_wakeup_t ;
+
+extern ia64_sal_handler ia64_sal;
+extern struct ia64_sal_desc_ptc *ia64_ptc_domain_info;
+
+extern unsigned short sal_revision;	/* supported SAL spec revision */
+extern unsigned short sal_version;	/* SAL version; OEM dependent */
+#define SAL_VERSION_CODE(major, minor) ((bin2bcd(major) << 8) | bin2bcd(minor))
+
+extern const char *ia64_sal_strerror (long status);
+extern void ia64_sal_init (struct ia64_sal_systab *sal_systab);
+
+/* SAL information type encodings */
+enum {
+	SAL_INFO_TYPE_MCA  = 0,		/* Machine check abort information */
+        SAL_INFO_TYPE_INIT = 1,		/* Init information */
+        SAL_INFO_TYPE_CMC  = 2,		/* Corrected machine check information */
+        SAL_INFO_TYPE_CPE  = 3		/* Corrected platform error information */
+};
+
+/* Encodings for machine check parameter types */
+enum {
+	SAL_MC_PARAM_RENDEZ_INT    = 1,	/* Rendezvous interrupt */
+	SAL_MC_PARAM_RENDEZ_WAKEUP = 2,	/* Wakeup */
+	SAL_MC_PARAM_CPE_INT	   = 3	/* Corrected Platform Error Int */
+};
+
+/* Encodings for rendezvous mechanisms */
+enum {
+	SAL_MC_PARAM_MECHANISM_INT = 1,	/* Use interrupt */
+	SAL_MC_PARAM_MECHANISM_MEM = 2	/* Use memory synchronization variable*/
+};
+
+/* Encodings for vectors which can be registered by the OS with SAL */
+enum {
+	SAL_VECTOR_OS_MCA	  = 0,
+	SAL_VECTOR_OS_INIT	  = 1,
+	SAL_VECTOR_OS_BOOT_RENDEZ = 2
+};
+
+/* Encodings for mca_opt parameter sent to SAL_MC_SET_PARAMS */
+#define	SAL_MC_PARAM_RZ_ALWAYS		0x1
+#define	SAL_MC_PARAM_BINIT_ESCALATE	0x10
+
+/*
+ * Definition of the SAL Error Log from the SAL spec
+ */
+
+/* SAL Error Record Section GUID Definitions */
+#define SAL_PROC_DEV_ERR_SECT_GUID  \
+    EFI_GUID(0xe429faf1, 0x3cb7, 0x11d4, 0xbc, 0xa7, 0x0, 0x80, 0xc7, 0x3c, 0x88, 0x81)
+#define SAL_PLAT_MEM_DEV_ERR_SECT_GUID  \
+    EFI_GUID(0xe429faf2, 0x3cb7, 0x11d4, 0xbc, 0xa7, 0x0, 0x80, 0xc7, 0x3c, 0x88, 0x81)
+#define SAL_PLAT_SEL_DEV_ERR_SECT_GUID  \
+    EFI_GUID(0xe429faf3, 0x3cb7, 0x11d4, 0xbc, 0xa7, 0x0, 0x80, 0xc7, 0x3c, 0x88, 0x81)
+#define SAL_PLAT_PCI_BUS_ERR_SECT_GUID  \
+    EFI_GUID(0xe429faf4, 0x3cb7, 0x11d4, 0xbc, 0xa7, 0x0, 0x80, 0xc7, 0x3c, 0x88, 0x81)
+#define SAL_PLAT_SMBIOS_DEV_ERR_SECT_GUID  \
+    EFI_GUID(0xe429faf5, 0x3cb7, 0x11d4, 0xbc, 0xa7, 0x0, 0x80, 0xc7, 0x3c, 0x88, 0x81)
+#define SAL_PLAT_PCI_COMP_ERR_SECT_GUID  \
+    EFI_GUID(0xe429faf6, 0x3cb7, 0x11d4, 0xbc, 0xa7, 0x0, 0x80, 0xc7, 0x3c, 0x88, 0x81)
+#define SAL_PLAT_SPECIFIC_ERR_SECT_GUID  \
+    EFI_GUID(0xe429faf7, 0x3cb7, 0x11d4, 0xbc, 0xa7, 0x0, 0x80, 0xc7, 0x3c, 0x88, 0x81)
+#define SAL_PLAT_HOST_CTLR_ERR_SECT_GUID  \
+    EFI_GUID(0xe429faf8, 0x3cb7, 0x11d4, 0xbc, 0xa7, 0x0, 0x80, 0xc7, 0x3c, 0x88, 0x81)
+#define SAL_PLAT_BUS_ERR_SECT_GUID  \
+    EFI_GUID(0xe429faf9, 0x3cb7, 0x11d4, 0xbc, 0xa7, 0x0, 0x80, 0xc7, 0x3c, 0x88, 0x81)
+#define PROCESSOR_ABSTRACTION_LAYER_OVERWRITE_GUID \
+    EFI_GUID(0x6cb0a200, 0x893a, 0x11da, 0x96, 0xd2, 0x0, 0x10, 0x83, 0xff, \
+		0xca, 0x4d)
+
+#define MAX_CACHE_ERRORS	6
+#define MAX_TLB_ERRORS		6
+#define MAX_BUS_ERRORS		1
+
+/* Definition of version  according to SAL spec for logging purposes */
+typedef struct sal_log_revision {
+	u8 minor;		/* BCD (0..99) */
+	u8 major;		/* BCD (0..99) */
+} sal_log_revision_t;
+
+/* Definition of timestamp according to SAL spec for logging purposes */
+typedef struct sal_log_timestamp {
+	u8 slh_second;		/* Second (0..59) */
+	u8 slh_minute;		/* Minute (0..59) */
+	u8 slh_hour;		/* Hour (0..23) */
+	u8 slh_reserved;
+	u8 slh_day;		/* Day (1..31) */
+	u8 slh_month;		/* Month (1..12) */
+	u8 slh_year;		/* Year (00..99) */
+	u8 slh_century;		/* Century (19, 20, 21, ...) */
+} sal_log_timestamp_t;
+
+/* Definition of log record  header structures */
+typedef struct sal_log_record_header {
+	u64 id;				/* Unique monotonically increasing ID */
+	sal_log_revision_t revision;	/* Major and Minor revision of header */
+	u8 severity;			/* Error Severity */
+	u8 validation_bits;		/* 0: platform_guid, 1: !timestamp */
+	u32 len;			/* Length of this error log in bytes */
+	sal_log_timestamp_t timestamp;	/* Timestamp */
+	efi_guid_t platform_guid;	/* Unique OEM Platform ID */
+} sal_log_record_header_t;
+
+#define sal_log_severity_recoverable	0
+#define sal_log_severity_fatal		1
+#define sal_log_severity_corrected	2
+
+/*
+ * Error Recovery Info (ERI) bit decode.  From SAL Spec section B.2.2 Table B-3
+ * Error Section Error_Recovery_Info Field Definition.
+ */
+#define ERI_NOT_VALID		0x0	/* Error Recovery Field is not valid */
+#define ERI_NOT_ACCESSIBLE	0x30	/* Resource not accessible */
+#define ERI_CONTAINMENT_WARN	0x22	/* Corrupt data propagated */
+#define ERI_UNCORRECTED_ERROR	0x20	/* Uncorrected error */
+#define ERI_COMPONENT_RESET	0x24	/* Component must be reset */
+#define ERI_CORR_ERROR_LOG	0x21	/* Corrected error, needs logging */
+#define ERI_CORR_ERROR_THRESH	0x29	/* Corrected error threshold exceeded */
+
+/* Definition of log section header structures */
+typedef struct sal_log_sec_header {
+    efi_guid_t guid;			/* Unique Section ID */
+    sal_log_revision_t revision;	/* Major and Minor revision of Section */
+    u8 error_recovery_info;		/* Platform error recovery status */
+    u8 reserved;
+    u32 len;				/* Section length */
+} sal_log_section_hdr_t;
+
+typedef struct sal_log_mod_error_info {
+	struct {
+		u64 check_info              : 1,
+		    requestor_identifier    : 1,
+		    responder_identifier    : 1,
+		    target_identifier       : 1,
+		    precise_ip              : 1,
+		    reserved                : 59;
+	} valid;
+	u64 check_info;
+	u64 requestor_identifier;
+	u64 responder_identifier;
+	u64 target_identifier;
+	u64 precise_ip;
+} sal_log_mod_error_info_t;
+
+typedef struct sal_processor_static_info {
+	struct {
+		u64 minstate        : 1,
+		    br              : 1,
+		    cr              : 1,
+		    ar              : 1,
+		    rr              : 1,
+		    fr              : 1,
+		    reserved        : 58;
+	} valid;
+	pal_min_state_area_t min_state_area;
+	u64 br[8];
+	u64 cr[128];
+	u64 ar[128];
+	u64 rr[8];
+	struct ia64_fpreg __attribute__ ((packed)) fr[128];
+} sal_processor_static_info_t;
+
+struct sal_cpuid_info {
+	u64 regs[5];
+	u64 reserved;
+};
+
+typedef struct sal_log_processor_info {
+	sal_log_section_hdr_t header;
+	struct {
+		u64 proc_error_map      : 1,
+		    proc_state_param    : 1,
+		    proc_cr_lid         : 1,
+		    psi_static_struct   : 1,
+		    num_cache_check     : 4,
+		    num_tlb_check       : 4,
+		    num_bus_check       : 4,
+		    num_reg_file_check  : 4,
+		    num_ms_check        : 4,
+		    cpuid_info          : 1,
+		    reserved1           : 39;
+	} valid;
+	u64 proc_error_map;
+	u64 proc_state_parameter;
+	u64 proc_cr_lid;
+	/*
+	 * The rest of this structure consists of variable-length arrays, which can't be
+	 * expressed in C.
+	 */
+	sal_log_mod_error_info_t info[0];
+	/*
+	 * This is what the rest looked like if C supported variable-length arrays:
+	 *
+	 * sal_log_mod_error_info_t cache_check_info[.valid.num_cache_check];
+	 * sal_log_mod_error_info_t tlb_check_info[.valid.num_tlb_check];
+	 * sal_log_mod_error_info_t bus_check_info[.valid.num_bus_check];
+	 * sal_log_mod_error_info_t reg_file_check_info[.valid.num_reg_file_check];
+	 * sal_log_mod_error_info_t ms_check_info[.valid.num_ms_check];
+	 * struct sal_cpuid_info cpuid_info;
+	 * sal_processor_static_info_t processor_static_info;
+	 */
+} sal_log_processor_info_t;
+
+/* Given a sal_log_processor_info_t pointer, return a pointer to the processor_static_info: */
+#define SAL_LPI_PSI_INFO(l)									\
+({	sal_log_processor_info_t *_l = (l);							\
+	((sal_processor_static_info_t *)							\
+	 ((char *) _l->info + ((_l->valid.num_cache_check + _l->valid.num_tlb_check		\
+				+ _l->valid.num_bus_check + _l->valid.num_reg_file_check	\
+				+ _l->valid.num_ms_check) * sizeof(sal_log_mod_error_info_t)	\
+			       + sizeof(struct sal_cpuid_info))));				\
+})
+
+/* platform error log structures */
+
+typedef struct sal_log_mem_dev_err_info {
+	sal_log_section_hdr_t header;
+	struct {
+		u64 error_status    : 1,
+		    physical_addr   : 1,
+		    addr_mask       : 1,
+		    node            : 1,
+		    card            : 1,
+		    module          : 1,
+		    bank            : 1,
+		    device          : 1,
+		    row             : 1,
+		    column          : 1,
+		    bit_position    : 1,
+		    requestor_id    : 1,
+		    responder_id    : 1,
+		    target_id       : 1,
+		    bus_spec_data   : 1,
+		    oem_id          : 1,
+		    oem_data        : 1,
+		    reserved        : 47;
+	} valid;
+	u64 error_status;
+	u64 physical_addr;
+	u64 addr_mask;
+	u16 node;
+	u16 card;
+	u16 module;
+	u16 bank;
+	u16 device;
+	u16 row;
+	u16 column;
+	u16 bit_position;
+	u64 requestor_id;
+	u64 responder_id;
+	u64 target_id;
+	u64 bus_spec_data;
+	u8 oem_id[16];
+	u8 oem_data[1];			/* Variable length data */
+} sal_log_mem_dev_err_info_t;
+
+typedef struct sal_log_sel_dev_err_info {
+	sal_log_section_hdr_t header;
+	struct {
+		u64 record_id       : 1,
+		    record_type     : 1,
+		    generator_id    : 1,
+		    evm_rev         : 1,
+		    sensor_type     : 1,
+		    sensor_num      : 1,
+		    event_dir       : 1,
+		    event_data1     : 1,
+		    event_data2     : 1,
+		    event_data3     : 1,
+		    reserved        : 54;
+	} valid;
+	u16 record_id;
+	u8 record_type;
+	u8 timestamp[4];
+	u16 generator_id;
+	u8 evm_rev;
+	u8 sensor_type;
+	u8 sensor_num;
+	u8 event_dir;
+	u8 event_data1;
+	u8 event_data2;
+	u8 event_data3;
+} sal_log_sel_dev_err_info_t;
+
+typedef struct sal_log_pci_bus_err_info {
+	sal_log_section_hdr_t header;
+	struct {
+		u64 err_status      : 1,
+		    err_type        : 1,
+		    bus_id          : 1,
+		    bus_address     : 1,
+		    bus_data        : 1,
+		    bus_cmd         : 1,
+		    requestor_id    : 1,
+		    responder_id    : 1,
+		    target_id       : 1,
+		    oem_data        : 1,
+		    reserved        : 54;
+	} valid;
+	u64 err_status;
+	u16 err_type;
+	u16 bus_id;
+	u32 reserved;
+	u64 bus_address;
+	u64 bus_data;
+	u64 bus_cmd;
+	u64 requestor_id;
+	u64 responder_id;
+	u64 target_id;
+	u8 oem_data[1];			/* Variable length data */
+} sal_log_pci_bus_err_info_t;
+
+typedef struct sal_log_smbios_dev_err_info {
+	sal_log_section_hdr_t header;
+	struct {
+		u64 event_type      : 1,
+		    length          : 1,
+		    time_stamp      : 1,
+		    data            : 1,
+		    reserved1       : 60;
+	} valid;
+	u8 event_type;
+	u8 length;
+	u8 time_stamp[6];
+	u8 data[1];			/* data of variable length, length == slsmb_length */
+} sal_log_smbios_dev_err_info_t;
+
+typedef struct sal_log_pci_comp_err_info {
+	sal_log_section_hdr_t header;
+	struct {
+		u64 err_status      : 1,
+		    comp_info       : 1,
+		    num_mem_regs    : 1,
+		    num_io_regs     : 1,
+		    reg_data_pairs  : 1,
+		    oem_data        : 1,
+		    reserved        : 58;
+	} valid;
+	u64 err_status;
+	struct {
+		u16 vendor_id;
+		u16 device_id;
+		u8 class_code[3];
+		u8 func_num;
+		u8 dev_num;
+		u8 bus_num;
+		u8 seg_num;
+		u8 reserved[5];
+	} comp_info;
+	u32 num_mem_regs;
+	u32 num_io_regs;
+	u64 reg_data_pairs[1];
+	/*
+	 * array of address/data register pairs is num_mem_regs + num_io_regs elements
+	 * long.  Each array element consists of a u64 address followed by a u64 data
+	 * value.  The oem_data array immediately follows the reg_data_pairs array
+	 */
+	u8 oem_data[1];			/* Variable length data */
+} sal_log_pci_comp_err_info_t;
+
+typedef struct sal_log_plat_specific_err_info {
+	sal_log_section_hdr_t header;
+	struct {
+		u64 err_status      : 1,
+		    guid            : 1,
+		    oem_data        : 1,
+		    reserved        : 61;
+	} valid;
+	u64 err_status;
+	efi_guid_t guid;
+	u8 oem_data[1];			/* platform specific variable length data */
+} sal_log_plat_specific_err_info_t;
+
+typedef struct sal_log_host_ctlr_err_info {
+	sal_log_section_hdr_t header;
+	struct {
+		u64 err_status      : 1,
+		    requestor_id    : 1,
+		    responder_id    : 1,
+		    target_id       : 1,
+		    bus_spec_data   : 1,
+		    oem_data        : 1,
+		    reserved        : 58;
+	} valid;
+	u64 err_status;
+	u64 requestor_id;
+	u64 responder_id;
+	u64 target_id;
+	u64 bus_spec_data;
+	u8 oem_data[1];			/* Variable length OEM data */
+} sal_log_host_ctlr_err_info_t;
+
+typedef struct sal_log_plat_bus_err_info {
+	sal_log_section_hdr_t header;
+	struct {
+		u64 err_status      : 1,
+		    requestor_id    : 1,
+		    responder_id    : 1,
+		    target_id       : 1,
+		    bus_spec_data   : 1,
+		    oem_data        : 1,
+		    reserved        : 58;
+	} valid;
+	u64 err_status;
+	u64 requestor_id;
+	u64 responder_id;
+	u64 target_id;
+	u64 bus_spec_data;
+	u8 oem_data[1];			/* Variable length OEM data */
+} sal_log_plat_bus_err_info_t;
+
+/* Overall platform error section structure */
+typedef union sal_log_platform_err_info {
+	sal_log_mem_dev_err_info_t mem_dev_err;
+	sal_log_sel_dev_err_info_t sel_dev_err;
+	sal_log_pci_bus_err_info_t pci_bus_err;
+	sal_log_smbios_dev_err_info_t smbios_dev_err;
+	sal_log_pci_comp_err_info_t pci_comp_err;
+	sal_log_plat_specific_err_info_t plat_specific_err;
+	sal_log_host_ctlr_err_info_t host_ctlr_err;
+	sal_log_plat_bus_err_info_t plat_bus_err;
+} sal_log_platform_err_info_t;
+
+/* SAL log over-all, multi-section error record structure (processor+platform) */
+typedef struct err_rec {
+	sal_log_record_header_t sal_elog_header;
+	sal_log_processor_info_t proc_err;
+	sal_log_platform_err_info_t plat_err;
+	u8 oem_data_pad[1024];
+} ia64_err_rec_t;
+
+/*
+ * Now define a couple of inline functions for improved type checking
+ * and convenience.
+ */
+
+extern s64 ia64_sal_cache_flush (u64 cache_type);
+extern void __init check_sal_cache_flush (void);
+
+/* Initialize all the processor and platform level instruction and data caches */
+static inline s64
+ia64_sal_cache_init (void)
+{
+	struct ia64_sal_retval isrv;
+	SAL_CALL(isrv, SAL_CACHE_INIT, 0, 0, 0, 0, 0, 0, 0);
+	return isrv.status;
+}
+
+/*
+ * Clear the processor and platform information logged by SAL with respect to the machine
+ * state at the time of MCA's, INITs, CMCs, or CPEs.
+ */
+static inline s64
+ia64_sal_clear_state_info (u64 sal_info_type)
+{
+	struct ia64_sal_retval isrv;
+	SAL_CALL_REENTRANT(isrv, SAL_CLEAR_STATE_INFO, sal_info_type, 0,
+	              0, 0, 0, 0, 0);
+	return isrv.status;
+}
+
+
+/* Get the processor and platform information logged by SAL with respect to the machine
+ * state at the time of the MCAs, INITs, CMCs, or CPEs.
+ */
+static inline u64
+ia64_sal_get_state_info (u64 sal_info_type, u64 *sal_info)
+{
+	struct ia64_sal_retval isrv;
+	SAL_CALL_REENTRANT(isrv, SAL_GET_STATE_INFO, sal_info_type, 0,
+	              sal_info, 0, 0, 0, 0);
+	if (isrv.status)
+		return 0;
+
+	return isrv.v0;
+}
+
+/*
+ * Get the maximum size of the information logged by SAL with respect to the machine state
+ * at the time of MCAs, INITs, CMCs, or CPEs.
+ */
+static inline u64
+ia64_sal_get_state_info_size (u64 sal_info_type)
+{
+	struct ia64_sal_retval isrv;
+	SAL_CALL_REENTRANT(isrv, SAL_GET_STATE_INFO_SIZE, sal_info_type, 0,
+	              0, 0, 0, 0, 0);
+	if (isrv.status)
+		return 0;
+	return isrv.v0;
+}
+
+/*
+ * Causes the processor to go into a spin loop within SAL where SAL awaits a wakeup from
+ * the monarch processor.  Must not lock, because it will not return on any cpu until the
+ * monarch processor sends a wake up.
+ */
+static inline s64
+ia64_sal_mc_rendez (void)
+{
+	struct ia64_sal_retval isrv;
+	SAL_CALL_NOLOCK(isrv, SAL_MC_RENDEZ, 0, 0, 0, 0, 0, 0, 0);
+	return isrv.status;
+}
+
+/*
+ * Allow the OS to specify the interrupt number to be used by SAL to interrupt OS during
+ * the machine check rendezvous sequence as well as the mechanism to wake up the
+ * non-monarch processor at the end of machine check processing.
+ * Returns the complete ia64_sal_retval because some calls return more than just a status
+ * value.
+ */
+static inline struct ia64_sal_retval
+ia64_sal_mc_set_params (u64 param_type, u64 i_or_m, u64 i_or_m_val, u64 timeout, u64 rz_always)
+{
+	struct ia64_sal_retval isrv;
+	SAL_CALL(isrv, SAL_MC_SET_PARAMS, param_type, i_or_m, i_or_m_val,
+		 timeout, rz_always, 0, 0);
+	return isrv;
+}
+
+/* Read from PCI configuration space */
+static inline s64
+ia64_sal_pci_config_read (u64 pci_config_addr, int type, u64 size, u64 *value)
+{
+	struct ia64_sal_retval isrv;
+	SAL_CALL(isrv, SAL_PCI_CONFIG_READ, pci_config_addr, size, type, 0, 0, 0, 0);
+	if (value)
+		*value = isrv.v0;
+	return isrv.status;
+}
+
+/* Write to PCI configuration space */
+static inline s64
+ia64_sal_pci_config_write (u64 pci_config_addr, int type, u64 size, u64 value)
+{
+	struct ia64_sal_retval isrv;
+	SAL_CALL(isrv, SAL_PCI_CONFIG_WRITE, pci_config_addr, size, value,
+	         type, 0, 0, 0);
+	return isrv.status;
+}
+
+/*
+ * Register physical addresses of locations needed by SAL when SAL procedures are invoked
+ * in virtual mode.
+ */
+static inline s64
+ia64_sal_register_physical_addr (u64 phys_entry, u64 phys_addr)
+{
+	struct ia64_sal_retval isrv;
+	SAL_CALL(isrv, SAL_REGISTER_PHYSICAL_ADDR, phys_entry, phys_addr,
+	         0, 0, 0, 0, 0);
+	return isrv.status;
+}
+
+/*
+ * Register software dependent code locations within SAL. These locations are handlers or
+ * entry points where SAL will pass control for the specified event. These event handlers
+ * are for the bott rendezvous, MCAs and INIT scenarios.
+ */
+static inline s64
+ia64_sal_set_vectors (u64 vector_type,
+		      u64 handler_addr1, u64 gp1, u64 handler_len1,
+		      u64 handler_addr2, u64 gp2, u64 handler_len2)
+{
+	struct ia64_sal_retval isrv;
+	SAL_CALL(isrv, SAL_SET_VECTORS, vector_type,
+			handler_addr1, gp1, handler_len1,
+			handler_addr2, gp2, handler_len2);
+
+	return isrv.status;
+}
+
+/* Update the contents of PAL block in the non-volatile storage device */
+static inline s64
+ia64_sal_update_pal (u64 param_buf, u64 scratch_buf, u64 scratch_buf_size,
+		     u64 *error_code, u64 *scratch_buf_size_needed)
+{
+	struct ia64_sal_retval isrv;
+	SAL_CALL(isrv, SAL_UPDATE_PAL, param_buf, scratch_buf, scratch_buf_size,
+	         0, 0, 0, 0);
+	if (error_code)
+		*error_code = isrv.v0;
+	if (scratch_buf_size_needed)
+		*scratch_buf_size_needed = isrv.v1;
+	return isrv.status;
+}
+
+/* Get physical processor die mapping in the platform. */
+static inline s64
+ia64_sal_physical_id_info(u16 *splid)
+{
+	struct ia64_sal_retval isrv;
+
+	if (sal_revision < SAL_VERSION_CODE(3,2))
+		return -1;
+
+	SAL_CALL(isrv, SAL_PHYSICAL_ID_INFO, 0, 0, 0, 0, 0, 0, 0);
+	if (splid)
+		*splid = isrv.v0;
+	return isrv.status;
+}
+
+extern unsigned long sal_platform_features;
+
+extern int (*salinfo_platform_oemdata)(const u8 *, u8 **, u64 *);
+
+struct sal_ret_values {
+	long r8; long r9; long r10; long r11;
+};
+
+#define IA64_SAL_OEMFUNC_MIN		0x02000000
+#define IA64_SAL_OEMFUNC_MAX		0x03ffffff
+
+extern int ia64_sal_oemcall(struct ia64_sal_retval *, u64, u64, u64, u64, u64,
+			    u64, u64, u64);
+extern int ia64_sal_oemcall_nolock(struct ia64_sal_retval *, u64, u64, u64,
+				   u64, u64, u64, u64, u64);
+extern int ia64_sal_oemcall_reentrant(struct ia64_sal_retval *, u64, u64, u64,
+				      u64, u64, u64, u64, u64);
+extern long
+ia64_sal_freq_base (unsigned long which, unsigned long *ticks_per_second,
+		    unsigned long *drift_info);
+#ifdef CONFIG_HOTPLUG_CPU
+/*
+ * System Abstraction Layer Specification
+ * Section 3.2.5.1: OS_BOOT_RENDEZ to SAL return State.
+ * Note: region regs are stored first in head.S _start. Hence they must
+ * stay up front.
+ */
+struct sal_to_os_boot {
+	u64 rr[8];		/* Region Registers */
+	u64 br[6];		/* br0:
+				 * return addr into SAL boot rendez routine */
+	u64 gr1;		/* SAL:GP */
+	u64 gr12;		/* SAL:SP */
+	u64 gr13;		/* SAL: Task Pointer */
+	u64 fpsr;
+	u64 pfs;
+	u64 rnat;
+	u64 unat;
+	u64 bspstore;
+	u64 dcr;		/* Default Control Register */
+	u64 iva;
+	u64 pta;
+	u64 itv;
+	u64 pmv;
+	u64 cmcv;
+	u64 lrr[2];
+	u64 gr[4];
+	u64 pr;			/* Predicate registers */
+	u64 lc;			/* Loop Count */
+	struct ia64_fpreg fp[20];
+};
+
+/*
+ * Global array allocated for NR_CPUS at boot time
+ */
+extern struct sal_to_os_boot sal_boot_rendez_state[NR_CPUS];
+
+extern void ia64_jump_to_sal(struct sal_to_os_boot *);
+#endif
+
+extern void ia64_sal_handler_init(void *entry_point, void *gpval);
+
+#define PALO_MAX_TLB_PURGES	0xFFFF
+#define PALO_SIG	"PALO"
+
+struct palo_table {
+	u8  signature[4];	/* Should be "PALO" */
+	u32 length;
+	u8  minor_revision;
+	u8  major_revision;
+	u8  checksum;
+	u8  reserved1[5];
+	u16 max_tlb_purges;
+	u8  reserved2[6];
+};
+
+#define NPTCG_FROM_PAL			0
+#define NPTCG_FROM_PALO			1
+#define NPTCG_FROM_KERNEL_PARAMETER	2
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_IA64_SAL_H */
diff --git a/arch/ia64/include/asm/scatterlist.h b/arch/ia64/include/asm/scatterlist.h
new file mode 100644
index 00000000000..08fd93bff1d
--- /dev/null
+++ b/arch/ia64/include/asm/scatterlist.h
@@ -0,0 +1,7 @@
+#ifndef _ASM_IA64_SCATTERLIST_H
+#define _ASM_IA64_SCATTERLIST_H
+
+#include <asm-generic/scatterlist.h>
+#define ARCH_HAS_SG_CHAIN
+
+#endif /* _ASM_IA64_SCATTERLIST_H */
diff --git a/arch/ia64/include/asm/sections.h b/arch/ia64/include/asm/sections.h
new file mode 100644
index 00000000000..1a873b36a4a
--- /dev/null
+++ b/arch/ia64/include/asm/sections.h
@@ -0,0 +1,42 @@
+#ifndef _ASM_IA64_SECTIONS_H
+#define _ASM_IA64_SECTIONS_H
+
+/*
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+#include <linux/elf.h>
+#include <linux/uaccess.h>
+#include <asm-generic/sections.h>
+
+extern char __per_cpu_start[], __per_cpu_end[], __phys_per_cpu_start[];
+#ifdef	CONFIG_SMP
+extern char __cpu0_per_cpu[];
+#endif
+extern char __start___vtop_patchlist[], __end___vtop_patchlist[];
+extern char __start___rse_patchlist[], __end___rse_patchlist[];
+extern char __start___mckinley_e9_bundles[], __end___mckinley_e9_bundles[];
+extern char __start___phys_stack_reg_patchlist[], __end___phys_stack_reg_patchlist[];
+extern char __start_gate_section[];
+extern char __start_gate_mckinley_e9_patchlist[], __end_gate_mckinley_e9_patchlist[];
+extern char __start_gate_vtop_patchlist[], __end_gate_vtop_patchlist[];
+extern char __start_gate_fsyscall_patchlist[], __end_gate_fsyscall_patchlist[];
+extern char __start_gate_brl_fsys_bubble_down_patchlist[], __end_gate_brl_fsys_bubble_down_patchlist[];
+extern char __start_unwind[], __end_unwind[];
+extern char __start_ivt_text[], __end_ivt_text[];
+
+#undef dereference_function_descriptor
+static inline void *dereference_function_descriptor(void *ptr)
+{
+	struct fdesc *desc = ptr;
+	void *p;
+
+	if (!probe_kernel_address(&desc->ip, p))
+		ptr = p;
+	return ptr;
+}
+
+
+#endif /* _ASM_IA64_SECTIONS_H */
+
diff --git a/arch/ia64/include/asm/segment.h b/arch/ia64/include/asm/segment.h
new file mode 100644
index 00000000000..b89e2b3d648
--- /dev/null
+++ b/arch/ia64/include/asm/segment.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_IA64_SEGMENT_H
+#define _ASM_IA64_SEGMENT_H
+
+/* Only here because we have some old header files that expect it.. */
+
+#endif /* _ASM_IA64_SEGMENT_H */
diff --git a/arch/ia64/include/asm/serial.h b/arch/ia64/include/asm/serial.h
new file mode 100644
index 00000000000..068be11583d
--- /dev/null
+++ b/arch/ia64/include/asm/serial.h
@@ -0,0 +1,17 @@
+/*
+ * Derived from the i386 version.
+ */
+
+/*
+ * This assumes you have a 1.8432 MHz clock for your UART.
+ *
+ * It'd be nice if someone built a serial card with a 24.576 MHz
+ * clock, since the 16550A is capable of handling a top speed of 1.5
+ * megabits/second; but this requires the faster clock.
+ */
+#define BASE_BAUD ( 1843200 / 16 )
+
+/*
+ * All legacy serial ports should be enumerated via ACPI namespace, so
+ * we need not list them here.
+ */
diff --git a/arch/ia64/include/asm/shmparam.h b/arch/ia64/include/asm/shmparam.h
new file mode 100644
index 00000000000..d07508dc54a
--- /dev/null
+++ b/arch/ia64/include/asm/shmparam.h
@@ -0,0 +1,12 @@
+#ifndef _ASM_IA64_SHMPARAM_H
+#define _ASM_IA64_SHMPARAM_H
+
+/*
+ * SHMLBA controls minimum alignment at which shared memory segments
+ * get attached.  The IA-64 architecture says that there may be a
+ * performance degradation when there are virtual aliases within 1MB.
+ * To reduce the chance of this, we set SHMLBA to 1MB. --davidm 00/12/20
+ */
+#define	SHMLBA	(1024*1024)
+
+#endif /* _ASM_IA64_SHMPARAM_H */
diff --git a/arch/ia64/include/asm/siginfo.h b/arch/ia64/include/asm/siginfo.h
new file mode 100644
index 00000000000..6f2e2dd0f28
--- /dev/null
+++ b/arch/ia64/include/asm/siginfo.h
@@ -0,0 +1,23 @@
+/*
+ * Based on <asm-i386/siginfo.h>.
+ *
+ * Modified 1998-2002
+ *	David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co
+ */
+#ifndef _ASM_IA64_SIGINFO_H
+#define _ASM_IA64_SIGINFO_H
+
+#include <linux/string.h>
+#include <uapi/asm/siginfo.h>
+
+static inline void
+copy_siginfo (siginfo_t *to, siginfo_t *from)
+{
+	if (from->si_code < 0)
+		memcpy(to, from, sizeof(siginfo_t));
+	else
+		/* _sigchld is currently the largest know union member */
+		memcpy(to, from, 4*sizeof(int) + sizeof(from->_sifields._sigchld));
+}
+
+#endif /* _ASM_IA64_SIGINFO_H */
diff --git a/arch/ia64/include/asm/signal.h b/arch/ia64/include/asm/signal.h
new file mode 100644
index 00000000000..c62afa4a0dc
--- /dev/null
+++ b/arch/ia64/include/asm/signal.h
@@ -0,0 +1,32 @@
+/*
+ * Modified 1998-2001, 2003
+ *	David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co
+ *
+ * Unfortunately, this file is being included by bits/signal.h in
+ * glibc-2.x.  Hence the #ifdef __KERNEL__ ugliness.
+ */
+#ifndef _ASM_IA64_SIGNAL_H
+#define _ASM_IA64_SIGNAL_H
+
+#include <uapi/asm/signal.h>
+
+
+#define _NSIG		64
+#define _NSIG_BPW	64
+#define _NSIG_WORDS	(_NSIG / _NSIG_BPW)
+
+# ifndef __ASSEMBLY__
+
+/* Most things should be clean enough to redefine this at will, if care
+   is taken to make libc match.  */
+
+typedef unsigned long old_sigset_t;
+
+typedef struct {
+	unsigned long sig[_NSIG_WORDS];
+} sigset_t;
+
+#  include <asm/sigcontext.h>
+
+# endif /* !__ASSEMBLY__ */
+#endif /* _ASM_IA64_SIGNAL_H */
diff --git a/arch/ia64/include/asm/smp.h b/arch/ia64/include/asm/smp.h
new file mode 100644
index 00000000000..fea21e98602
--- /dev/null
+++ b/arch/ia64/include/asm/smp.h
@@ -0,0 +1,137 @@
+/*
+ * SMP Support
+ *
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ * (c) Copyright 2001-2003, 2005 Hewlett-Packard Development Company, L.P.
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *	Bjorn Helgaas <bjorn.helgaas@hp.com>
+ */
+#ifndef _ASM_IA64_SMP_H
+#define _ASM_IA64_SMP_H
+
+#include <linux/init.h>
+#include <linux/threads.h>
+#include <linux/kernel.h>
+#include <linux/cpumask.h>
+#include <linux/bitops.h>
+#include <linux/irqreturn.h>
+
+#include <asm/io.h>
+#include <asm/param.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+
+static inline unsigned int
+ia64_get_lid (void)
+{
+	union {
+		struct {
+			unsigned long reserved : 16;
+			unsigned long eid : 8;
+			unsigned long id : 8;
+			unsigned long ignored : 32;
+		} f;
+		unsigned long bits;
+	} lid;
+
+	lid.bits = ia64_getreg(_IA64_REG_CR_LID);
+	return lid.f.id << 8 | lid.f.eid;
+}
+
+#define hard_smp_processor_id()		ia64_get_lid()
+
+#ifdef CONFIG_SMP
+
+#define XTP_OFFSET		0x1e0008
+
+#define SMP_IRQ_REDIRECTION	(1 << 0)
+#define SMP_IPI_REDIRECTION	(1 << 1)
+
+#define raw_smp_processor_id() (current_thread_info()->cpu)
+
+extern struct smp_boot_data {
+	int cpu_count;
+	int cpu_phys_id[NR_CPUS];
+} smp_boot_data __initdata;
+
+extern char no_int_routing;
+
+extern cpumask_t cpu_core_map[NR_CPUS];
+DECLARE_PER_CPU_SHARED_ALIGNED(cpumask_t, cpu_sibling_map);
+extern int smp_num_siblings;
+extern void __iomem *ipi_base_addr;
+extern unsigned char smp_int_redirect;
+
+extern volatile int ia64_cpu_to_sapicid[];
+#define cpu_physical_id(i)	ia64_cpu_to_sapicid[i]
+
+extern unsigned long ap_wakeup_vector;
+
+/*
+ * Function to map hard smp processor id to logical id.  Slow, so don't use this in
+ * performance-critical code.
+ */
+static inline int
+cpu_logical_id (int cpuid)
+{
+	int i;
+
+	for (i = 0; i < NR_CPUS; ++i)
+		if (cpu_physical_id(i) == cpuid)
+			break;
+	return i;
+}
+
+/*
+ * XTP control functions:
+ *	min_xtp   : route all interrupts to this CPU
+ *	normal_xtp: nominal XTP value
+ *	max_xtp   : never deliver interrupts to this CPU.
+ */
+
+static inline void
+min_xtp (void)
+{
+	if (smp_int_redirect & SMP_IRQ_REDIRECTION)
+		writeb(0x00, ipi_base_addr + XTP_OFFSET); /* XTP to min */
+}
+
+static inline void
+normal_xtp (void)
+{
+	if (smp_int_redirect & SMP_IRQ_REDIRECTION)
+		writeb(0x08, ipi_base_addr + XTP_OFFSET); /* XTP normal */
+}
+
+static inline void
+max_xtp (void)
+{
+	if (smp_int_redirect & SMP_IRQ_REDIRECTION)
+		writeb(0x0f, ipi_base_addr + XTP_OFFSET); /* Set XTP to max */
+}
+
+/* Upping and downing of CPUs */
+extern int __cpu_disable (void);
+extern void __cpu_die (unsigned int cpu);
+extern void cpu_die (void) __attribute__ ((noreturn));
+extern void __init smp_build_cpu_map(void);
+
+extern void __init init_smp_config (void);
+extern void smp_do_timer (struct pt_regs *regs);
+
+extern irqreturn_t handle_IPI(int irq, void *dev_id);
+extern void smp_send_reschedule (int cpu);
+extern void identify_siblings (struct cpuinfo_ia64 *);
+extern int is_multithreading_enabled(void);
+
+extern void arch_send_call_function_single_ipi(int cpu);
+extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
+
+#else /* CONFIG_SMP */
+
+#define cpu_logical_id(i)		0
+#define cpu_physical_id(i)		ia64_get_lid()
+
+#endif /* CONFIG_SMP */
+#endif /* _ASM_IA64_SMP_H */
diff --git a/arch/ia64/include/asm/sn/acpi.h b/arch/ia64/include/asm/sn/acpi.h
new file mode 100644
index 00000000000..fd480db2556
--- /dev/null
+++ b/arch/ia64/include/asm/sn/acpi.h
@@ -0,0 +1,15 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2006 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#ifndef _ASM_IA64_SN_ACPI_H
+#define _ASM_IA64_SN_ACPI_H
+
+extern int sn_acpi_rev;
+#define SN_ACPI_BASE_SUPPORT()   (sn_acpi_rev >= 0x20101)
+
+#endif /* _ASM_IA64_SN_ACPI_H */
diff --git a/arch/ia64/include/asm/sn/addrs.h b/arch/ia64/include/asm/sn/addrs.h
new file mode 100644
index 00000000000..e715c794b18
--- /dev/null
+++ b/arch/ia64/include/asm/sn/addrs.h
@@ -0,0 +1,299 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 1992-1999,2001-2005 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#ifndef _ASM_IA64_SN_ADDRS_H
+#define _ASM_IA64_SN_ADDRS_H
+
+#include <asm/percpu.h>
+#include <asm/sn/types.h>
+#include <asm/sn/arch.h>
+#include <asm/sn/pda.h>
+
+/*
+ *  Memory/SHUB Address Format:
+ *  +-+---------+--+--------------+
+ *  |0|  NASID  |AS| NodeOffset   |
+ *  +-+---------+--+--------------+
+ *
+ *  NASID: (low NASID bit is 0) Memory and SHUB MMRs
+ *   AS: 2-bit Address Space Identifier. Used only if low NASID bit is 0
+ *     00: Local Resources and MMR space
+ *           Top bit of NodeOffset
+ *               0: Local resources space
+ *                  node id:
+ *                        0: IA64/NT compatibility space
+ *                        2: Local MMR Space
+ *                        4: Local memory, regardless of local node id
+ *               1: Global MMR space
+ *     01: GET space.
+ *     10: AMO space.
+ *     11: Cacheable memory space.
+ *
+ *   NodeOffset: byte offset
+ *
+ *
+ *  TIO address format:
+ *  +-+----------+--+--------------+
+ *  |0|  NASID   |AS| Nodeoffset   |
+ *  +-+----------+--+--------------+
+ *
+ *  NASID: (low NASID bit is 1) TIO
+ *   AS: 2-bit Chiplet Identifier
+ *     00: TIO LB (Indicates TIO MMR access.)
+ *     01: TIO ICE (indicates coretalk space access.)
+ * 
+ *   NodeOffset: top bit must be set.
+ *
+ *
+ * Note that in both of the above address formats, the low
+ * NASID bit indicates if the reference is to the SHUB or TIO MMRs.
+ */
+
+
+/*
+ * Define basic shift & mask constants for manipulating NASIDs and AS values.
+ */
+#define NASID_BITMASK		(sn_hub_info->nasid_bitmask)
+#define NASID_SHIFT		(sn_hub_info->nasid_shift)
+#define AS_SHIFT		(sn_hub_info->as_shift)
+#define AS_BITMASK		0x3UL
+
+#define NASID_MASK              ((u64)NASID_BITMASK << NASID_SHIFT)
+#define AS_MASK			((u64)AS_BITMASK << AS_SHIFT)
+
+
+/*
+ * AS values. These are the same on both SHUB1 & SHUB2.
+ */
+#define AS_GET_VAL		1UL
+#define AS_AMO_VAL		2UL
+#define AS_CAC_VAL		3UL
+#define AS_GET_SPACE		(AS_GET_VAL << AS_SHIFT)
+#define AS_AMO_SPACE		(AS_AMO_VAL << AS_SHIFT)
+#define AS_CAC_SPACE		(AS_CAC_VAL << AS_SHIFT)
+
+
+/* 
+ * Virtual Mode Local & Global MMR space.  
+ */
+#define SH1_LOCAL_MMR_OFFSET	0x8000000000UL
+#define SH2_LOCAL_MMR_OFFSET	0x0200000000UL
+#define LOCAL_MMR_OFFSET	(is_shub2() ? SH2_LOCAL_MMR_OFFSET : SH1_LOCAL_MMR_OFFSET)
+#define LOCAL_MMR_SPACE		(__IA64_UNCACHED_OFFSET | LOCAL_MMR_OFFSET)
+#define LOCAL_PHYS_MMR_SPACE	(RGN_BASE(RGN_HPAGE) | LOCAL_MMR_OFFSET)
+
+#define SH1_GLOBAL_MMR_OFFSET	0x0800000000UL
+#define SH2_GLOBAL_MMR_OFFSET	0x0300000000UL
+#define GLOBAL_MMR_OFFSET	(is_shub2() ? SH2_GLOBAL_MMR_OFFSET : SH1_GLOBAL_MMR_OFFSET)
+#define GLOBAL_MMR_SPACE	(__IA64_UNCACHED_OFFSET | GLOBAL_MMR_OFFSET)
+
+/*
+ * Physical mode addresses
+ */
+#define GLOBAL_PHYS_MMR_SPACE	(RGN_BASE(RGN_HPAGE) | GLOBAL_MMR_OFFSET)
+
+
+/*
+ * Clear region & AS bits.
+ */
+#define TO_PHYS_MASK		(~(RGN_BITS | AS_MASK))
+
+
+/*
+ * Misc NASID manipulation.
+ */
+#define NASID_SPACE(n)		((u64)(n) << NASID_SHIFT)
+#define REMOTE_ADDR(n,a)	(NASID_SPACE(n) | (a))
+#define NODE_OFFSET(x)		((x) & (NODE_ADDRSPACE_SIZE - 1))
+#define NODE_ADDRSPACE_SIZE     (1UL << AS_SHIFT)
+#define NASID_GET(x)		(int) (((u64) (x) >> NASID_SHIFT) & NASID_BITMASK)
+#define LOCAL_MMR_ADDR(a)	(LOCAL_MMR_SPACE | (a))
+#define GLOBAL_MMR_ADDR(n,a)	(GLOBAL_MMR_SPACE | REMOTE_ADDR(n,a))
+#define GLOBAL_MMR_PHYS_ADDR(n,a) (GLOBAL_PHYS_MMR_SPACE | REMOTE_ADDR(n,a))
+#define GLOBAL_CAC_ADDR(n,a)	(CAC_BASE | REMOTE_ADDR(n,a))
+#define CHANGE_NASID(n,x)	((void *)(((u64)(x) & ~NASID_MASK) | NASID_SPACE(n)))
+#define IS_TIO_NASID(n)		((n) & 1)
+
+
+/* non-II mmr's start at top of big window space (4G) */
+#define BWIN_TOP		0x0000000100000000UL
+
+/*
+ * general address defines
+ */
+#define CAC_BASE		(PAGE_OFFSET | AS_CAC_SPACE)
+#define AMO_BASE		(__IA64_UNCACHED_OFFSET | AS_AMO_SPACE)
+#define AMO_PHYS_BASE		(RGN_BASE(RGN_HPAGE) | AS_AMO_SPACE)
+#define GET_BASE		(PAGE_OFFSET | AS_GET_SPACE)
+
+/*
+ * Convert Memory addresses between various addressing modes.
+ */
+#define TO_PHYS(x)		(TO_PHYS_MASK & (x))
+#define TO_CAC(x)		(CAC_BASE     | TO_PHYS(x))
+#ifdef CONFIG_SGI_SN
+#define TO_AMO(x)		(AMO_BASE     | TO_PHYS(x))
+#define TO_GET(x)		(GET_BASE     | TO_PHYS(x))
+#else
+#define TO_AMO(x)		({ BUG(); x; })
+#define TO_GET(x)		({ BUG(); x; })
+#endif
+
+/*
+ * Covert from processor physical address to II/TIO physical address:
+ *	II - squeeze out the AS bits
+ *	TIO- requires a chiplet id in bits 38-39.  For DMA to memory,
+ *           the chiplet id is zero.  If we implement TIO-TIO dma, we might need
+ *           to insert a chiplet id into this macro.  However, it is our belief
+ *           right now that this chiplet id will be ICE, which is also zero.
+ */
+#define SH1_TIO_PHYS_TO_DMA(x) 						\
+	((((u64)(NASID_GET(x))) << 40) | NODE_OFFSET(x))
+
+#define SH2_NETWORK_BANK_OFFSET(x) 					\
+        ((u64)(x) & ((1UL << (sn_hub_info->nasid_shift - 4)) -1))
+
+#define SH2_NETWORK_BANK_SELECT(x) 					\
+        ((((u64)(x) & (0x3UL << (sn_hub_info->nasid_shift - 4)))	\
+        	>> (sn_hub_info->nasid_shift - 4)) << 36)
+
+#define SH2_NETWORK_ADDRESS(x) 						\
+	(SH2_NETWORK_BANK_OFFSET(x) | SH2_NETWORK_BANK_SELECT(x))
+
+#define SH2_TIO_PHYS_TO_DMA(x) 						\
+        (((u64)(NASID_GET(x)) << 40) | 	SH2_NETWORK_ADDRESS(x))
+
+#define PHYS_TO_TIODMA(x)						\
+	(is_shub1() ? SH1_TIO_PHYS_TO_DMA(x) : SH2_TIO_PHYS_TO_DMA(x))
+
+#define PHYS_TO_DMA(x)							\
+	((((u64)(x) & NASID_MASK) >> 2) | NODE_OFFSET(x))
+
+
+/*
+ * Macros to test for address type.
+ */
+#define IS_AMO_ADDRESS(x)	(((u64)(x) & (RGN_BITS | AS_MASK)) == AMO_BASE)
+#define IS_AMO_PHYS_ADDRESS(x)	(((u64)(x) & (RGN_BITS | AS_MASK)) == AMO_PHYS_BASE)
+
+
+/*
+ * The following definitions pertain to the IO special address
+ * space.  They define the location of the big and little windows
+ * of any given node.
+ */
+#define BWIN_SIZE_BITS			29	/* big window size: 512M */
+#define TIO_BWIN_SIZE_BITS		30	/* big window size: 1G */
+#define NODE_SWIN_BASE(n, w)		((w == 0) ? NODE_BWIN_BASE((n), SWIN0_BIGWIN) \
+		: RAW_NODE_SWIN_BASE(n, w))
+#define TIO_SWIN_BASE(n, w) 		(TIO_IO_BASE(n) + \
+					    ((u64) (w) << TIO_SWIN_SIZE_BITS))
+#define NODE_IO_BASE(n)			(GLOBAL_MMR_SPACE | NASID_SPACE(n))
+#define TIO_IO_BASE(n)                  (__IA64_UNCACHED_OFFSET | NASID_SPACE(n))
+#define BWIN_SIZE			(1UL << BWIN_SIZE_BITS)
+#define NODE_BWIN_BASE0(n)		(NODE_IO_BASE(n) + BWIN_SIZE)
+#define NODE_BWIN_BASE(n, w)		(NODE_BWIN_BASE0(n) + ((u64) (w) << BWIN_SIZE_BITS))
+#define RAW_NODE_SWIN_BASE(n, w)	(NODE_IO_BASE(n) + ((u64) (w) << SWIN_SIZE_BITS))
+#define BWIN_WIDGET_MASK		0x7
+#define BWIN_WINDOWNUM(x)		(((x) >> BWIN_SIZE_BITS) & BWIN_WIDGET_MASK)
+#define SH1_IS_BIG_WINDOW_ADDR(x)	((x) & BWIN_TOP)
+
+#define TIO_BWIN_WINDOW_SELECT_MASK	0x7
+#define TIO_BWIN_WINDOWNUM(x)		(((x) >> TIO_BWIN_SIZE_BITS) & TIO_BWIN_WINDOW_SELECT_MASK)
+
+#define TIO_HWIN_SHIFT_BITS		33
+#define TIO_HWIN(x)			(NODE_OFFSET(x) >> TIO_HWIN_SHIFT_BITS)
+
+/*
+ * The following definitions pertain to the IO special address
+ * space.  They define the location of the big and little windows
+ * of any given node.
+ */
+
+#define SWIN_SIZE_BITS			24
+#define	SWIN_WIDGET_MASK		0xF
+
+#define TIO_SWIN_SIZE_BITS		28
+#define TIO_SWIN_SIZE			(1UL << TIO_SWIN_SIZE_BITS)
+#define TIO_SWIN_WIDGET_MASK		0x3
+
+/*
+ * Convert smallwindow address to xtalk address.
+ *
+ * 'addr' can be physical or virtual address, but will be converted
+ * to Xtalk address in the range 0 -> SWINZ_SIZEMASK
+ */
+#define	SWIN_WIDGETNUM(x)		(((x)  >> SWIN_SIZE_BITS) & SWIN_WIDGET_MASK)
+#define TIO_SWIN_WIDGETNUM(x)		(((x)  >> TIO_SWIN_SIZE_BITS) & TIO_SWIN_WIDGET_MASK)
+
+
+/*
+ * The following macros produce the correct base virtual address for
+ * the hub registers. The REMOTE_HUB_* macro produce
+ * the address for the specified hub's registers.  The intent is
+ * that the appropriate PI, MD, NI, or II register would be substituted
+ * for x.
+ *
+ *   WARNING:
+ *	When certain Hub chip workaround are defined, it's not sufficient
+ *	to dereference the *_HUB_ADDR() macros.  You should instead use
+ *	HUB_L() and HUB_S() if you must deal with pointers to hub registers.
+ *	Otherwise, the recommended approach is to use *_HUB_L() and *_HUB_S().
+ *	They're always safe.
+ */
+/* Shub1 TIO & MMR addressing macros */
+#define SH1_TIO_IOSPACE_ADDR(n,x)					\
+	GLOBAL_MMR_ADDR(n,x)
+
+#define SH1_REMOTE_BWIN_MMR(n,x)					\
+	GLOBAL_MMR_ADDR(n,x)
+
+#define SH1_REMOTE_SWIN_MMR(n,x)					\
+	(NODE_SWIN_BASE(n,1) + 0x800000UL + (x))
+
+#define SH1_REMOTE_MMR(n,x)						\
+	(SH1_IS_BIG_WINDOW_ADDR(x) ? SH1_REMOTE_BWIN_MMR(n,x) :		\
+	 	SH1_REMOTE_SWIN_MMR(n,x))
+
+/* Shub1 TIO & MMR addressing macros */
+#define SH2_TIO_IOSPACE_ADDR(n,x)					\
+	((__IA64_UNCACHED_OFFSET | REMOTE_ADDR(n,x) | 1UL << (NASID_SHIFT - 2)))
+
+#define SH2_REMOTE_MMR(n,x)						\
+	GLOBAL_MMR_ADDR(n,x)
+
+
+/* TIO & MMR addressing macros that work on both shub1 & shub2 */
+#define TIO_IOSPACE_ADDR(n,x)						\
+	((u64 *)(is_shub1() ? SH1_TIO_IOSPACE_ADDR(n,x) :		\
+		 SH2_TIO_IOSPACE_ADDR(n,x)))
+
+#define SH_REMOTE_MMR(n,x)						\
+	(is_shub1() ? SH1_REMOTE_MMR(n,x) : SH2_REMOTE_MMR(n,x))
+
+#define REMOTE_HUB_ADDR(n,x)						\
+	(IS_TIO_NASID(n) ?  ((volatile u64*)TIO_IOSPACE_ADDR(n,x)) :	\
+	 ((volatile u64*)SH_REMOTE_MMR(n,x)))
+
+
+#define HUB_L(x)			(*((volatile typeof(*x) *)x))
+#define	HUB_S(x,d)			(*((volatile typeof(*x) *)x) = (d))
+
+#define REMOTE_HUB_L(n, a)		HUB_L(REMOTE_HUB_ADDR((n), (a)))
+#define REMOTE_HUB_S(n, a, d)		HUB_S(REMOTE_HUB_ADDR((n), (a)), (d))
+
+/*
+ * Coretalk address breakdown
+ */
+#define CTALK_NASID_SHFT		40
+#define CTALK_NASID_MASK		(0x3FFFULL << CTALK_NASID_SHFT)
+#define CTALK_CID_SHFT			38
+#define CTALK_CID_MASK			(0x3ULL << CTALK_CID_SHFT)
+#define CTALK_NODE_OFFSET		0x3FFFFFFFFF
+
+#endif /* _ASM_IA64_SN_ADDRS_H */
diff --git a/arch/ia64/include/asm/sn/arch.h b/arch/ia64/include/asm/sn/arch.h
new file mode 100644
index 00000000000..7caa1f44cd9
--- /dev/null
+++ b/arch/ia64/include/asm/sn/arch.h
@@ -0,0 +1,86 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * SGI specific setup.
+ *
+ * Copyright (C) 1995-1997,1999,2001-2005 Silicon Graphics, Inc.  All rights reserved.
+ * Copyright (C) 1999 Ralf Baechle (ralf@gnu.org)
+ */
+#ifndef _ASM_IA64_SN_ARCH_H
+#define _ASM_IA64_SN_ARCH_H
+
+#include <linux/numa.h>
+#include <asm/types.h>
+#include <asm/percpu.h>
+#include <asm/sn/types.h>
+#include <asm/sn/sn_cpuid.h>
+
+/*
+ * This is the maximum number of NUMALINK nodes that can be part of a single
+ * SSI kernel. This number includes C-brick, M-bricks, and TIOs. Nodes in
+ * remote partitions are NOT included in this number.
+ * The number of compact nodes cannot exceed size of a coherency domain.
+ * The purpose of this define is to specify a node count that includes
+ * all C/M/TIO nodes in an SSI system.
+ *
+ * SGI system can currently support up to 256 C/M nodes plus additional TIO nodes.
+ *
+ * 	Note: ACPI20 has an architectural limit of 256 nodes. When we upgrade
+ * 	to ACPI3.0, this limit will be removed. The notion of "compact nodes"
+ * 	should be deleted and TIOs should be included in MAX_NUMNODES.
+ */
+#define MAX_TIO_NODES		MAX_NUMNODES
+#define MAX_COMPACT_NODES	(MAX_NUMNODES + MAX_TIO_NODES)
+
+/*
+ * Maximum number of nodes in all partitions and in all coherency domains.
+ * This is the total number of nodes accessible in the numalink fabric. It
+ * includes all C & M bricks, plus all TIOs.
+ *
+ * This value is also the value of the maximum number of NASIDs in the numalink
+ * fabric.
+ */
+#define MAX_NUMALINK_NODES	16384
+
+/*
+ * The following defines attributes of the HUB chip. These attributes are
+ * frequently referenced. They are kept in the per-cpu data areas of each cpu.
+ * They are kept together in a struct to minimize cache misses.
+ */
+struct sn_hub_info_s {
+	u8 shub2;
+	u8 nasid_shift;
+	u8 as_shift;
+	u8 shub_1_1_found;
+	u16 nasid_bitmask;
+};
+DECLARE_PER_CPU(struct sn_hub_info_s, __sn_hub_info);
+#define sn_hub_info 	(&__get_cpu_var(__sn_hub_info))
+#define is_shub2()	(sn_hub_info->shub2)
+#define is_shub1()	(sn_hub_info->shub2 == 0)
+
+/*
+ * Use this macro to test if shub 1.1 wars should be enabled
+ */
+#define enable_shub_wars_1_1()	(sn_hub_info->shub_1_1_found)
+
+
+/*
+ * Compact node ID to nasid mappings kept in the per-cpu data areas of each
+ * cpu.
+ */
+DECLARE_PER_CPU(short, __sn_cnodeid_to_nasid[MAX_COMPACT_NODES]);
+#define sn_cnodeid_to_nasid	(&__get_cpu_var(__sn_cnodeid_to_nasid[0]))
+
+
+extern u8 sn_partition_id;
+extern u8 sn_system_size;
+extern u8 sn_sharing_domain_size;
+extern u8 sn_region_size;
+
+extern void sn_flush_all_caches(long addr, long bytes);
+extern bool sn_cpu_disable_allowed(int cpu);
+
+#endif /* _ASM_IA64_SN_ARCH_H */
diff --git a/arch/ia64/include/asm/sn/bte.h b/arch/ia64/include/asm/sn/bte.h
new file mode 100644
index 00000000000..cc6c4dbf53a
--- /dev/null
+++ b/arch/ia64/include/asm/sn/bte.h
@@ -0,0 +1,234 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2000-2007 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+
+#ifndef _ASM_IA64_SN_BTE_H
+#define _ASM_IA64_SN_BTE_H
+
+#include <linux/timer.h>
+#include <linux/spinlock.h>
+#include <linux/cache.h>
+#include <asm/sn/pda.h>
+#include <asm/sn/types.h>
+#include <asm/sn/shub_mmr.h>
+
+#define IBCT_NOTIFY             (0x1UL << 4)
+#define IBCT_ZFIL_MODE          (0x1UL << 0)
+
+/* #define BTE_DEBUG */
+/* #define BTE_DEBUG_VERBOSE */
+
+#ifdef BTE_DEBUG
+#  define BTE_PRINTK(x) printk x	/* Terse */
+#  ifdef BTE_DEBUG_VERBOSE
+#    define BTE_PRINTKV(x) printk x	/* Verbose */
+#  else
+#    define BTE_PRINTKV(x)
+#  endif /* BTE_DEBUG_VERBOSE */
+#else
+#  define BTE_PRINTK(x)
+#  define BTE_PRINTKV(x)
+#endif	/* BTE_DEBUG */
+
+
+/* BTE status register only supports 16 bits for length field */
+#define BTE_LEN_BITS (16)
+#define BTE_LEN_MASK ((1 << BTE_LEN_BITS) - 1)
+#define BTE_MAX_XFER (BTE_LEN_MASK << L1_CACHE_SHIFT)
+
+
+/* Define hardware */
+#define BTES_PER_NODE (is_shub2() ? 4 : 2)
+#define MAX_BTES_PER_NODE 4
+
+#define BTE2OFF_CTRL	0
+#define BTE2OFF_SRC	(SH2_BT_ENG_SRC_ADDR_0 - SH2_BT_ENG_CSR_0)
+#define BTE2OFF_DEST	(SH2_BT_ENG_DEST_ADDR_0 - SH2_BT_ENG_CSR_0)
+#define BTE2OFF_NOTIFY	(SH2_BT_ENG_NOTIF_ADDR_0 - SH2_BT_ENG_CSR_0)
+
+#define BTE_BASE_ADDR(interface) 				\
+    (is_shub2() ? (interface == 0) ? SH2_BT_ENG_CSR_0 :		\
+		  (interface == 1) ? SH2_BT_ENG_CSR_1 :		\
+		  (interface == 2) ? SH2_BT_ENG_CSR_2 :		\
+		  		     SH2_BT_ENG_CSR_3 		\
+		: (interface == 0) ? IIO_IBLS0 : IIO_IBLS1)
+
+#define BTE_SOURCE_ADDR(base)					\
+    (is_shub2() ? base + (BTE2OFF_SRC/8) 			\
+		: base + (BTEOFF_SRC/8))
+
+#define BTE_DEST_ADDR(base)					\
+    (is_shub2() ? base + (BTE2OFF_DEST/8) 			\
+		: base + (BTEOFF_DEST/8))
+
+#define BTE_CTRL_ADDR(base)					\
+    (is_shub2() ? base + (BTE2OFF_CTRL/8) 			\
+		: base + (BTEOFF_CTRL/8))
+
+#define BTE_NOTIF_ADDR(base)					\
+    (is_shub2() ? base + (BTE2OFF_NOTIFY/8) 			\
+		: base + (BTEOFF_NOTIFY/8))
+
+/* Define hardware modes */
+#define BTE_NOTIFY IBCT_NOTIFY
+#define BTE_NORMAL BTE_NOTIFY
+#define BTE_ZERO_FILL (BTE_NOTIFY | IBCT_ZFIL_MODE)
+/* Use a reserved bit to let the caller specify a wait for any BTE */
+#define BTE_WACQUIRE 0x4000
+/* Use the BTE on the node with the destination memory */
+#define BTE_USE_DEST (BTE_WACQUIRE << 1)
+/* Use any available BTE interface on any node for the transfer */
+#define BTE_USE_ANY (BTE_USE_DEST << 1)
+/* macro to force the IBCT0 value valid */
+#define BTE_VALID_MODE(x) ((x) & (IBCT_NOTIFY | IBCT_ZFIL_MODE))
+
+#define BTE_ACTIVE		(IBLS_BUSY | IBLS_ERROR)
+#define BTE_WORD_AVAILABLE	(IBLS_BUSY << 1)
+#define BTE_WORD_BUSY		(~BTE_WORD_AVAILABLE)
+
+/*
+ * Some macros to simplify reading.
+ * Start with macros to locate the BTE control registers.
+ */
+#define BTE_LNSTAT_LOAD(_bte)						\
+			HUB_L(_bte->bte_base_addr)
+#define BTE_LNSTAT_STORE(_bte, _x)					\
+			HUB_S(_bte->bte_base_addr, (_x))
+#define BTE_SRC_STORE(_bte, _x)						\
+({									\
+		u64 __addr = ((_x) & ~AS_MASK);				\
+		if (is_shub2()) 					\
+			__addr = SH2_TIO_PHYS_TO_DMA(__addr);		\
+		HUB_S(_bte->bte_source_addr, __addr);			\
+})
+#define BTE_DEST_STORE(_bte, _x)					\
+({									\
+		u64 __addr = ((_x) & ~AS_MASK);				\
+		if (is_shub2()) 					\
+			__addr = SH2_TIO_PHYS_TO_DMA(__addr);		\
+		HUB_S(_bte->bte_destination_addr, __addr);		\
+})
+#define BTE_CTRL_STORE(_bte, _x)					\
+			HUB_S(_bte->bte_control_addr, (_x))
+#define BTE_NOTIF_STORE(_bte, _x)					\
+({									\
+		u64 __addr = ia64_tpa((_x) & ~AS_MASK);			\
+		if (is_shub2()) 					\
+			__addr = SH2_TIO_PHYS_TO_DMA(__addr);		\
+		HUB_S(_bte->bte_notify_addr, __addr);			\
+})
+
+#define BTE_START_TRANSFER(_bte, _len, _mode)				\
+	is_shub2() ? BTE_CTRL_STORE(_bte, IBLS_BUSY | (_mode << 24) | _len) \
+		: BTE_LNSTAT_STORE(_bte, _len);				\
+		  BTE_CTRL_STORE(_bte, _mode)
+
+/* Possible results from bte_copy and bte_unaligned_copy */
+/* The following error codes map into the BTE hardware codes
+ * IIO_ICRB_ECODE_* (in shubio.h). The hardware uses
+ * an error code of 0 (IIO_ICRB_ECODE_DERR), but we want zero
+ * to mean BTE_SUCCESS, so add one (BTEFAIL_OFFSET) to the error
+ * codes to give the following error codes.
+ */
+#define BTEFAIL_OFFSET	1
+
+typedef enum {
+	BTE_SUCCESS,		/* 0 is success */
+	BTEFAIL_DIR,		/* Directory error due to IIO access*/
+	BTEFAIL_POISON,		/* poison error on IO access (write to poison page) */
+	BTEFAIL_WERR,		/* Write error (ie WINV to a Read only line) */
+	BTEFAIL_ACCESS,		/* access error (protection violation) */
+	BTEFAIL_PWERR,		/* Partial Write Error */
+	BTEFAIL_PRERR,		/* Partial Read Error */
+	BTEFAIL_TOUT,		/* CRB Time out */
+	BTEFAIL_XTERR,		/* Incoming xtalk pkt had error bit */
+	BTEFAIL_NOTAVAIL,	/* BTE not available */
+} bte_result_t;
+
+#define BTEFAIL_SH2_RESP_SHORT	0x1	/* bit 000001 */
+#define BTEFAIL_SH2_RESP_LONG	0x2	/* bit 000010 */
+#define BTEFAIL_SH2_RESP_DSP	0x4	/* bit 000100 */
+#define BTEFAIL_SH2_RESP_ACCESS	0x8	/* bit 001000 */
+#define BTEFAIL_SH2_CRB_TO	0x10	/* bit 010000 */
+#define BTEFAIL_SH2_NACK_LIMIT	0x20	/* bit 100000 */
+#define BTEFAIL_SH2_ALL		0x3F	/* bit 111111 */
+
+#define	BTE_ERR_BITS	0x3FUL
+#define	BTE_ERR_SHIFT	36
+#define BTE_ERR_MASK	(BTE_ERR_BITS << BTE_ERR_SHIFT)
+
+#define BTE_ERROR_RETRY(value)						\
+	(is_shub2() ? (value != BTEFAIL_SH2_CRB_TO)			\
+		: (value != BTEFAIL_TOUT))
+
+/*
+ * On shub1 BTE_ERR_MASK will always be false, so no need for is_shub2()
+ */
+#define BTE_SHUB2_ERROR(_status)					\
+	((_status & BTE_ERR_MASK) 					\
+	   ? (((_status >> BTE_ERR_SHIFT) & BTE_ERR_BITS) | IBLS_ERROR) \
+	   : _status)
+
+#define BTE_GET_ERROR_STATUS(_status)					\
+	(BTE_SHUB2_ERROR(_status) & ~IBLS_ERROR)
+
+#define BTE_VALID_SH2_ERROR(value)					\
+	((value >= BTEFAIL_SH2_RESP_SHORT) && (value <= BTEFAIL_SH2_ALL))
+
+/*
+ * Structure defining a bte.  An instance of this
+ * structure is created in the nodepda for each
+ * bte on that node (as defined by BTES_PER_NODE)
+ * This structure contains everything necessary
+ * to work with a BTE.
+ */
+struct bteinfo_s {
+	volatile u64 notify ____cacheline_aligned;
+	u64 *bte_base_addr ____cacheline_aligned;
+	u64 *bte_source_addr;
+	u64 *bte_destination_addr;
+	u64 *bte_control_addr;
+	u64 *bte_notify_addr;
+	spinlock_t spinlock;
+	cnodeid_t bte_cnode;	/* cnode                            */
+	int bte_error_count;	/* Number of errors encountered     */
+	int bte_num;		/* 0 --> BTE0, 1 --> BTE1           */
+	int cleanup_active;	/* Interface is locked for cleanup  */
+	volatile bte_result_t bh_error;	/* error while processing   */
+	volatile u64 *most_rcnt_na;
+	struct bteinfo_s *btes_to_try[MAX_BTES_PER_NODE];
+};
+
+
+/*
+ * Function prototypes (functions defined in bte.c, used elsewhere)
+ */
+extern bte_result_t bte_copy(u64, u64, u64, u64, void *);
+extern bte_result_t bte_unaligned_copy(u64, u64, u64, u64);
+extern void bte_error_handler(unsigned long);
+
+#define bte_zero(dest, len, mode, notification) \
+	bte_copy(0, dest, len, ((mode) | BTE_ZERO_FILL), notification)
+
+/*
+ * The following is the preferred way of calling bte_unaligned_copy
+ * If the copy is fully cache line aligned, then bte_copy is
+ * used instead.  Since bte_copy is inlined, this saves a call
+ * stack.  NOTE: bte_copy is called synchronously and does block
+ * until the transfer is complete.  In order to get the asynch
+ * version of bte_copy, you must perform this check yourself.
+ */
+#define BTE_UNALIGNED_COPY(src, dest, len, mode)			\
+	(((len & (L1_CACHE_BYTES - 1)) ||				\
+	  (src & (L1_CACHE_BYTES - 1)) ||				\
+	  (dest & (L1_CACHE_BYTES - 1))) ?				\
+	 bte_unaligned_copy(src, dest, len, mode) :			\
+	 bte_copy(src, dest, len, mode, NULL))
+
+
+#endif	/* _ASM_IA64_SN_BTE_H */
diff --git a/arch/ia64/include/asm/sn/clksupport.h b/arch/ia64/include/asm/sn/clksupport.h
new file mode 100644
index 00000000000..d340c365a82
--- /dev/null
+++ b/arch/ia64/include/asm/sn/clksupport.h
@@ -0,0 +1,28 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2000-2004 Silicon Graphics, Inc. All rights reserved.
+ */
+
+/*
+ * This file contains definitions for accessing a platform supported high resolution
+ * clock. The clock is monitonically increasing and can be accessed from any node
+ * in the system. The clock is synchronized across nodes - all nodes see the
+ * same value.
+ * 
+ *	RTC_COUNTER_ADDR - contains the address of the counter 
+ *
+ */
+
+#ifndef _ASM_IA64_SN_CLKSUPPORT_H
+#define _ASM_IA64_SN_CLKSUPPORT_H
+
+extern unsigned long sn_rtc_cycles_per_second;
+
+#define RTC_COUNTER_ADDR	((long *)LOCAL_MMR_ADDR(SH_RTC))
+
+#define rtc_time()		(*RTC_COUNTER_ADDR)
+
+#endif /* _ASM_IA64_SN_CLKSUPPORT_H */
diff --git a/arch/ia64/include/asm/sn/geo.h b/arch/ia64/include/asm/sn/geo.h
new file mode 100644
index 00000000000..f083c943406
--- /dev/null
+++ b/arch/ia64/include/asm/sn/geo.h
@@ -0,0 +1,132 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000-2005 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#ifndef _ASM_IA64_SN_GEO_H
+#define _ASM_IA64_SN_GEO_H
+
+/* The geoid_t implementation below is based loosely on the pcfg_t
+   implementation in sys/SN/promcfg.h. */
+
+/* Type declaractions */
+
+/* Size of a geoid_t structure (must be before decl. of geoid_u) */
+#define GEOID_SIZE	8	/* Would 16 be better?  The size can
+				   be different on different platforms. */
+
+#define MAX_SLOTS	0xf	/* slots per module */
+#define MAX_SLABS	0xf	/* slabs per slot */
+
+typedef unsigned char	geo_type_t;
+
+/* Fields common to all substructures */
+typedef struct geo_common_s {
+    moduleid_t	module;		/* The module (box) this h/w lives in */
+    geo_type_t	type;		/* What type of h/w is named by this geoid_t */
+    slabid_t	slab:4;		/* slab (ASIC), 0 .. 15 within slot */
+    slotid_t	slot:4;		/* slot (Blade), 0 .. 15 within module */
+} geo_common_t;
+
+/* Additional fields for particular types of hardware */
+typedef struct geo_node_s {
+    geo_common_t	common;		/* No additional fields needed */
+} geo_node_t;
+
+typedef struct geo_rtr_s {
+    geo_common_t	common;		/* No additional fields needed */
+} geo_rtr_t;
+
+typedef struct geo_iocntl_s {
+    geo_common_t	common;		/* No additional fields needed */
+} geo_iocntl_t;
+
+typedef struct geo_pcicard_s {
+    geo_iocntl_t	common;
+    char		bus;	/* Bus/widget number */
+    char		slot;	/* PCI slot number */
+} geo_pcicard_t;
+
+/* Subcomponents of a node */
+typedef struct geo_cpu_s {
+    geo_node_t	node;
+    char	slice;		/* Which CPU on the node */
+} geo_cpu_t;
+
+typedef struct geo_mem_s {
+    geo_node_t	node;
+    char	membus;		/* The memory bus on the node */
+    char	memslot;	/* The memory slot on the bus */
+} geo_mem_t;
+
+
+typedef union geoid_u {
+    geo_common_t	common;
+    geo_node_t		node;
+    geo_iocntl_t	iocntl;
+    geo_pcicard_t	pcicard;
+    geo_rtr_t		rtr;
+    geo_cpu_t		cpu;
+    geo_mem_t		mem;
+    char		padsize[GEOID_SIZE];
+} geoid_t;
+
+
+/* Preprocessor macros */
+
+#define GEO_MAX_LEN	48	/* max. formatted length, plus some pad:
+				   module/001c07/slab/5/node/memory/2/slot/4 */
+
+/* Values for geo_type_t */
+#define GEO_TYPE_INVALID	0
+#define GEO_TYPE_MODULE		1
+#define GEO_TYPE_NODE		2
+#define GEO_TYPE_RTR		3
+#define GEO_TYPE_IOCNTL		4
+#define GEO_TYPE_IOCARD		5
+#define GEO_TYPE_CPU		6
+#define GEO_TYPE_MEM		7
+#define GEO_TYPE_MAX		(GEO_TYPE_MEM+1)
+
+/* Parameter for hwcfg_format_geoid_compt() */
+#define GEO_COMPT_MODULE	1
+#define GEO_COMPT_SLAB		2
+#define GEO_COMPT_IOBUS		3
+#define GEO_COMPT_IOSLOT	4
+#define GEO_COMPT_CPU		5
+#define GEO_COMPT_MEMBUS	6
+#define GEO_COMPT_MEMSLOT	7
+
+#define GEO_INVALID_STR		"<invalid>"
+
+#define INVALID_NASID           ((nasid_t)-1)
+#define INVALID_CNODEID         ((cnodeid_t)-1)
+#define INVALID_PNODEID         ((pnodeid_t)-1)
+#define INVALID_SLAB            (slabid_t)-1
+#define INVALID_SLOT            (slotid_t)-1
+#define INVALID_MODULE          ((moduleid_t)-1)
+
+static inline slabid_t geo_slab(geoid_t g)
+{
+	return (g.common.type == GEO_TYPE_INVALID) ?
+		INVALID_SLAB : g.common.slab;
+}
+
+static inline slotid_t geo_slot(geoid_t g)
+{
+	return (g.common.type == GEO_TYPE_INVALID) ?
+		INVALID_SLOT : g.common.slot;
+}
+
+static inline moduleid_t geo_module(geoid_t g)
+{
+	return (g.common.type == GEO_TYPE_INVALID) ?
+		INVALID_MODULE : g.common.module;
+}
+
+extern geoid_t cnodeid_get_geoid(cnodeid_t cnode);
+
+#endif /* _ASM_IA64_SN_GEO_H */
diff --git a/arch/ia64/include/asm/sn/intr.h b/arch/ia64/include/asm/sn/intr.h
new file mode 100644
index 00000000000..e0487aa9741
--- /dev/null
+++ b/arch/ia64/include/asm/sn/intr.h
@@ -0,0 +1,68 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000-2006 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#ifndef _ASM_IA64_SN_INTR_H
+#define _ASM_IA64_SN_INTR_H
+
+#include <linux/rcupdate.h>
+#include <asm/sn/types.h>
+
+#define SGI_UART_VECTOR		0xe9
+
+/* Reserved IRQs : Note, not to exceed IA64_SN2_FIRST_DEVICE_VECTOR */
+#define SGI_XPC_ACTIVATE	0x30
+#define SGI_II_ERROR		0x31
+#define SGI_XBOW_ERROR		0x32
+#define SGI_PCIASIC_ERROR	0x33
+#define SGI_ACPI_SCI_INT	0x34
+#define SGI_TIOCA_ERROR		0x35
+#define SGI_TIO_ERROR		0x36
+#define SGI_TIOCX_ERROR		0x37
+#define SGI_MMTIMER_VECTOR	0x38
+#define SGI_XPC_NOTIFY		0xe7
+
+#define IA64_SN2_FIRST_DEVICE_VECTOR	0x3c
+#define IA64_SN2_LAST_DEVICE_VECTOR	0xe6
+
+#define SN2_IRQ_RESERVED	0x1
+#define SN2_IRQ_CONNECTED	0x2
+#define SN2_IRQ_SHARED		0x4
+
+// The SN PROM irq struct
+struct sn_irq_info {
+	struct sn_irq_info *irq_next;	/* deprecated DO NOT USE     */
+	short		irq_nasid;	/* Nasid IRQ is assigned to  */
+	int		irq_slice;	/* slice IRQ is assigned to  */
+	int		irq_cpuid;	/* kernel logical cpuid	     */
+	int		irq_irq;	/* the IRQ number */
+	int		irq_int_bit;	/* Bridge interrupt pin */
+					/* <0 means MSI */
+	u64	irq_xtalkaddr;	/* xtalkaddr IRQ is sent to  */
+	int		irq_bridge_type;/* pciio asic type (pciio.h) */
+	void	       *irq_bridge;	/* bridge generating irq     */
+	void	       *irq_pciioinfo;	/* associated pciio_info_t   */
+	int		irq_last_intr;	/* For Shub lb lost intr WAR */
+	int		irq_cookie;	/* unique cookie 	     */
+	int		irq_flags;	/* flags */
+	int		irq_share_cnt;	/* num devices sharing IRQ   */
+	struct list_head	list;	/* list of sn_irq_info structs */
+	struct rcu_head		rcu;	/* rcu callback list */
+};
+
+extern void sn_send_IPI_phys(int, long, int, int);
+extern u64 sn_intr_alloc(nasid_t, int,
+			      struct sn_irq_info *,
+			      int, nasid_t, int);
+extern void sn_intr_free(nasid_t, int, struct sn_irq_info *);
+extern struct sn_irq_info *sn_retarget_vector(struct sn_irq_info *, nasid_t, int);
+extern void sn_set_err_irq_affinity(unsigned int);
+extern struct list_head **sn_irq_lh;
+
+#define CPU_VECTOR_TO_IRQ(cpuid,vector) (vector)
+
+#endif /* _ASM_IA64_SN_INTR_H */
diff --git a/arch/ia64/include/asm/sn/io.h b/arch/ia64/include/asm/sn/io.h
new file mode 100644
index 00000000000..41c73a73562
--- /dev/null
+++ b/arch/ia64/include/asm/sn/io.h
@@ -0,0 +1,274 @@
+/* 
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2000-2004 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#ifndef _ASM_SN_IO_H
+#define _ASM_SN_IO_H
+#include <linux/compiler.h>
+#include <asm/intrinsics.h>
+
+extern void * sn_io_addr(unsigned long port) __attribute_const__; /* Forward definition */
+extern void __sn_mmiowb(void); /* Forward definition */
+
+extern int num_cnodes;
+
+#define __sn_mf_a()   ia64_mfa()
+
+extern void sn_dma_flush(unsigned long);
+
+#define __sn_inb ___sn_inb
+#define __sn_inw ___sn_inw
+#define __sn_inl ___sn_inl
+#define __sn_outb ___sn_outb
+#define __sn_outw ___sn_outw
+#define __sn_outl ___sn_outl
+#define __sn_readb ___sn_readb
+#define __sn_readw ___sn_readw
+#define __sn_readl ___sn_readl
+#define __sn_readq ___sn_readq
+#define __sn_readb_relaxed ___sn_readb_relaxed
+#define __sn_readw_relaxed ___sn_readw_relaxed
+#define __sn_readl_relaxed ___sn_readl_relaxed
+#define __sn_readq_relaxed ___sn_readq_relaxed
+
+/*
+ * Convenience macros for setting/clearing bits using the above accessors
+ */
+
+#define __sn_setq_relaxed(addr, val) \
+	writeq((__sn_readq_relaxed(addr) | (val)), (addr))
+#define __sn_clrq_relaxed(addr, val) \
+	writeq((__sn_readq_relaxed(addr) & ~(val)), (addr))
+
+/*
+ * The following routines are SN Platform specific, called when
+ * a reference is made to inX/outX set macros.  SN Platform
+ * inX set of macros ensures that Posted DMA writes on the
+ * Bridge is flushed.
+ *
+ * The routines should be self explainatory.
+ */
+
+static inline unsigned int
+___sn_inb (unsigned long port)
+{
+	volatile unsigned char *addr;
+	unsigned char ret = -1;
+
+	if ((addr = sn_io_addr(port))) {
+		ret = *addr;
+		__sn_mf_a();
+		sn_dma_flush((unsigned long)addr);
+	}
+	return ret;
+}
+
+static inline unsigned int
+___sn_inw (unsigned long port)
+{
+	volatile unsigned short *addr;
+	unsigned short ret = -1;
+
+	if ((addr = sn_io_addr(port))) {
+		ret = *addr;
+		__sn_mf_a();
+		sn_dma_flush((unsigned long)addr);
+	}
+	return ret;
+}
+
+static inline unsigned int
+___sn_inl (unsigned long port)
+{
+	volatile unsigned int *addr;
+	unsigned int ret = -1;
+
+	if ((addr = sn_io_addr(port))) {
+		ret = *addr;
+		__sn_mf_a();
+		sn_dma_flush((unsigned long)addr);
+	}
+	return ret;
+}
+
+static inline void
+___sn_outb (unsigned char val, unsigned long port)
+{
+	volatile unsigned char *addr;
+
+	if ((addr = sn_io_addr(port))) {
+		*addr = val;
+		__sn_mmiowb();
+	}
+}
+
+static inline void
+___sn_outw (unsigned short val, unsigned long port)
+{
+	volatile unsigned short *addr;
+
+	if ((addr = sn_io_addr(port))) {
+		*addr = val;
+		__sn_mmiowb();
+	}
+}
+
+static inline void
+___sn_outl (unsigned int val, unsigned long port)
+{
+	volatile unsigned int *addr;
+
+	if ((addr = sn_io_addr(port))) {
+		*addr = val;
+		__sn_mmiowb();
+	}
+}
+
+/*
+ * The following routines are SN Platform specific, called when 
+ * a reference is made to readX/writeX set macros.  SN Platform 
+ * readX set of macros ensures that Posted DMA writes on the 
+ * Bridge is flushed.
+ * 
+ * The routines should be self explainatory.
+ */
+
+static inline unsigned char
+___sn_readb (const volatile void __iomem *addr)
+{
+	unsigned char val;
+
+	val = *(volatile unsigned char __force *)addr;
+	__sn_mf_a();
+	sn_dma_flush((unsigned long)addr);
+        return val;
+}
+
+static inline unsigned short
+___sn_readw (const volatile void __iomem *addr)
+{
+	unsigned short val;
+
+	val = *(volatile unsigned short __force *)addr;
+	__sn_mf_a();
+	sn_dma_flush((unsigned long)addr);
+        return val;
+}
+
+static inline unsigned int
+___sn_readl (const volatile void __iomem *addr)
+{
+	unsigned int val;
+
+	val = *(volatile unsigned int __force *)addr;
+	__sn_mf_a();
+	sn_dma_flush((unsigned long)addr);
+        return val;
+}
+
+static inline unsigned long
+___sn_readq (const volatile void __iomem *addr)
+{
+	unsigned long val;
+
+	val = *(volatile unsigned long __force *)addr;
+	__sn_mf_a();
+	sn_dma_flush((unsigned long)addr);
+        return val;
+}
+
+/*
+ * For generic and SN2 kernels, we have a set of fast access
+ * PIO macros.	These macros are provided on SN Platform
+ * because the normal inX and readX macros perform an
+ * additional task of flushing Post DMA request on the Bridge.
+ *
+ * These routines should be self explainatory.
+ */
+
+static inline unsigned int
+sn_inb_fast (unsigned long port)
+{
+	volatile unsigned char *addr = (unsigned char *)port;
+	unsigned char ret;
+
+	ret = *addr;
+	__sn_mf_a();
+	return ret;
+}
+
+static inline unsigned int
+sn_inw_fast (unsigned long port)
+{
+	volatile unsigned short *addr = (unsigned short *)port;
+	unsigned short ret;
+
+	ret = *addr;
+	__sn_mf_a();
+	return ret;
+}
+
+static inline unsigned int
+sn_inl_fast (unsigned long port)
+{
+	volatile unsigned int *addr = (unsigned int *)port;
+	unsigned int ret;
+
+	ret = *addr;
+	__sn_mf_a();
+	return ret;
+}
+
+static inline unsigned char
+___sn_readb_relaxed (const volatile void __iomem *addr)
+{
+	return *(volatile unsigned char __force *)addr;
+}
+
+static inline unsigned short
+___sn_readw_relaxed (const volatile void __iomem *addr)
+{
+	return *(volatile unsigned short __force *)addr;
+}
+
+static inline unsigned int
+___sn_readl_relaxed (const volatile void __iomem *addr)
+{
+	return *(volatile unsigned int __force *) addr;
+}
+
+static inline unsigned long
+___sn_readq_relaxed (const volatile void __iomem *addr)
+{
+	return *(volatile unsigned long __force *) addr;
+}
+
+struct pci_dev;
+
+static inline int
+sn_pci_set_vchan(struct pci_dev *pci_dev, unsigned long *addr, int vchan)
+{
+
+	if (vchan > 1) {
+		return -1;
+	}
+
+	if (!(*addr >> 32))	/* Using a mask here would be cleaner */
+		return 0;	/* but this generates better code */
+
+	if (vchan == 1) {
+		/* Set Bit 57 */
+		*addr |= (1UL << 57);
+	} else {
+		/* Clear Bit 57 */
+		*addr &= ~(1UL << 57);
+	}
+
+	return 0;
+}
+
+#endif	/* _ASM_SN_IO_H */
diff --git a/arch/ia64/include/asm/sn/ioc3.h b/arch/ia64/include/asm/sn/ioc3.h
new file mode 100644
index 00000000000..95ed6cc83cf
--- /dev/null
+++ b/arch/ia64/include/asm/sn/ioc3.h
@@ -0,0 +1,241 @@
+/*
+ * Copyright (C) 2005 Silicon Graphics, Inc.
+ */
+#ifndef IA64_SN_IOC3_H
+#define IA64_SN_IOC3_H
+
+/* serial port register map */
+struct ioc3_serialregs {
+	uint32_t sscr;
+	uint32_t stpir;
+	uint32_t stcir;
+	uint32_t srpir;
+	uint32_t srcir;
+	uint32_t srtr;
+	uint32_t shadow;
+};
+
+/* SUPERIO uart register map */
+struct ioc3_uartregs {
+	char iu_lcr;
+	union {
+		char iir;	/* read only */
+		char fcr;	/* write only */
+	} u3;
+	union {
+		char ier;	/* DLAB == 0 */
+		char dlm;	/* DLAB == 1 */
+	} u2;
+	union {
+		char rbr;	/* read only, DLAB == 0 */
+		char thr;	/* write only, DLAB == 0 */
+		char dll;	/* DLAB == 1 */
+	} u1;
+	char iu_scr;
+	char iu_msr;
+	char iu_lsr;
+	char iu_mcr;
+};
+
+#define iu_rbr u1.rbr
+#define iu_thr u1.thr
+#define iu_dll u1.dll
+#define iu_ier u2.ier
+#define iu_dlm u2.dlm
+#define iu_iir u3.iir
+#define iu_fcr u3.fcr
+
+struct ioc3_sioregs {
+	char fill[0x170];
+	struct ioc3_uartregs uartb;
+	struct ioc3_uartregs uarta;
+};
+
+/* PCI IO/mem space register map */
+struct ioc3 {
+	uint32_t pci_id;
+	uint32_t pci_scr;
+	uint32_t pci_rev;
+	uint32_t pci_lat;
+	uint32_t pci_addr;
+	uint32_t pci_err_addr_l;
+	uint32_t pci_err_addr_h;
+
+	uint32_t sio_ir;
+	/* these registers are read-only for general kernel code. To
+	 * modify them use the functions in ioc3.c
+	 */
+	uint32_t sio_ies;
+	uint32_t sio_iec;
+	uint32_t sio_cr;
+	uint32_t int_out;
+	uint32_t mcr;
+	uint32_t gpcr_s;
+	uint32_t gpcr_c;
+	uint32_t gpdr;
+	uint32_t gppr[9];
+	char fill[0x4c];
+
+	/* serial port registers */
+	uint32_t sbbr_h;
+	uint32_t sbbr_l;
+
+	struct ioc3_serialregs port_a;
+	struct ioc3_serialregs port_b;
+	char fill1[0x1ff10];
+	/* superio registers */
+	struct ioc3_sioregs sregs;
+};
+
+/* These don't exist on the ioc3 serial card... */
+#define eier	fill1[8]
+#define eisr	fill1[4]
+
+#define PCI_LAT			0xc	/* Latency Timer */
+#define PCI_SCR_DROP_MODE_EN	0x00008000 /* drop pios on parity err */
+#define UARTA_BASE		0x178
+#define UARTB_BASE		0x170
+
+
+/* bitmasks for serial RX status byte */
+#define RXSB_OVERRUN		0x01	/* char(s) lost */
+#define RXSB_PAR_ERR		0x02	/* parity error */
+#define RXSB_FRAME_ERR		0x04	/* framing error */
+#define RXSB_BREAK		0x08	/* break character */
+#define RXSB_CTS		0x10	/* state of CTS */
+#define RXSB_DCD		0x20	/* state of DCD */
+#define RXSB_MODEM_VALID	0x40	/* DCD, CTS and OVERRUN are valid */
+#define RXSB_DATA_VALID		0x80	/* FRAME_ERR PAR_ERR & BREAK valid */
+
+/* bitmasks for serial TX control byte */
+#define TXCB_INT_WHEN_DONE	0x20	/* interrupt after this byte is sent */
+#define TXCB_INVALID		0x00	/* byte is invalid */
+#define TXCB_VALID		0x40	/* byte is valid */
+#define TXCB_MCR		0x80	/* data<7:0> to modem cntrl register */
+#define TXCB_DELAY		0xc0	/* delay data<7:0> mSec */
+
+/* bitmasks for SBBR_L */
+#define SBBR_L_SIZE		0x00000001	/* 0 1KB rings, 1 4KB rings */
+
+/* bitmasks for SSCR_<A:B> */
+#define SSCR_RX_THRESHOLD	0x000001ff	/* hiwater mark */
+#define SSCR_TX_TIMER_BUSY	0x00010000	/* TX timer in progress */
+#define SSCR_HFC_EN		0x00020000	/* h/w flow cntrl enabled */
+#define SSCR_RX_RING_DCD	0x00040000	/* postRX record on delta-DCD */
+#define SSCR_RX_RING_CTS	0x00080000	/* postRX record on delta-CTS */
+#define SSCR_HIGH_SPD		0x00100000	/* 4X speed */
+#define SSCR_DIAG		0x00200000	/* bypass clock divider */
+#define SSCR_RX_DRAIN		0x08000000	/* drain RX buffer to memory */
+#define SSCR_DMA_EN		0x10000000	/* enable ring buffer DMA */
+#define SSCR_DMA_PAUSE		0x20000000	/* pause DMA */
+#define SSCR_PAUSE_STATE	0x40000000	/* set when PAUSE takes effect*/
+#define SSCR_RESET		0x80000000	/* reset DMA channels */
+
+/* all producer/comsumer pointers are the same bitfield */
+#define PROD_CONS_PTR_4K	0x00000ff8	/* for 4K buffers */
+#define PROD_CONS_PTR_1K	0x000003f8	/* for 1K buffers */
+#define PROD_CONS_PTR_OFF	3
+
+/* bitmasks for SRCIR_<A:B> */
+#define SRCIR_ARM		0x80000000	/* arm RX timer */
+
+/* bitmasks for SHADOW_<A:B> */
+#define SHADOW_DR		0x00000001	/* data ready */
+#define SHADOW_OE		0x00000002	/* overrun error */
+#define SHADOW_PE		0x00000004	/* parity error */
+#define SHADOW_FE		0x00000008	/* framing error */
+#define SHADOW_BI		0x00000010	/* break interrupt */
+#define SHADOW_THRE		0x00000020	/* transmit holding reg empty */
+#define SHADOW_TEMT		0x00000040	/* transmit shift reg empty */
+#define SHADOW_RFCE		0x00000080	/* char in RX fifo has error */
+#define SHADOW_DCTS		0x00010000	/* delta clear to send */
+#define SHADOW_DDCD		0x00080000	/* delta data carrier detect */
+#define SHADOW_CTS		0x00100000	/* clear to send */
+#define SHADOW_DCD		0x00800000	/* data carrier detect */
+#define SHADOW_DTR		0x01000000	/* data terminal ready */
+#define SHADOW_RTS		0x02000000	/* request to send */
+#define SHADOW_OUT1		0x04000000	/* 16550 OUT1 bit */
+#define SHADOW_OUT2		0x08000000	/* 16550 OUT2 bit */
+#define SHADOW_LOOP		0x10000000	/* loopback enabled */
+
+/* bitmasks for SRTR_<A:B> */
+#define SRTR_CNT		0x00000fff	/* reload value for RX timer */
+#define SRTR_CNT_VAL		0x0fff0000	/* current value of RX timer */
+#define SRTR_CNT_VAL_SHIFT	16
+#define SRTR_HZ			16000		/* SRTR clock frequency */
+
+/* bitmasks for SIO_IR, SIO_IEC and SIO_IES  */
+#define SIO_IR_SA_TX_MT		0x00000001	/* Serial port A TX empty */
+#define SIO_IR_SA_RX_FULL	0x00000002	/* port A RX buf full */
+#define SIO_IR_SA_RX_HIGH	0x00000004	/* port A RX hiwat */
+#define SIO_IR_SA_RX_TIMER	0x00000008	/* port A RX timeout */
+#define SIO_IR_SA_DELTA_DCD	0x00000010	/* port A delta DCD */
+#define SIO_IR_SA_DELTA_CTS	0x00000020	/* port A delta CTS */
+#define SIO_IR_SA_INT		0x00000040	/* port A pass-thru intr */
+#define SIO_IR_SA_TX_EXPLICIT	0x00000080	/* port A explicit TX thru */
+#define SIO_IR_SA_MEMERR	0x00000100	/* port A PCI error */
+#define SIO_IR_SB_TX_MT		0x00000200
+#define SIO_IR_SB_RX_FULL	0x00000400
+#define SIO_IR_SB_RX_HIGH	0x00000800
+#define SIO_IR_SB_RX_TIMER	0x00001000
+#define SIO_IR_SB_DELTA_DCD	0x00002000
+#define SIO_IR_SB_DELTA_CTS	0x00004000
+#define SIO_IR_SB_INT		0x00008000
+#define SIO_IR_SB_TX_EXPLICIT	0x00010000
+#define SIO_IR_SB_MEMERR	0x00020000
+#define SIO_IR_PP_INT		0x00040000	/* P port pass-thru intr */
+#define SIO_IR_PP_INTA		0x00080000	/* PP context A thru */
+#define SIO_IR_PP_INTB		0x00100000	/* PP context B thru */
+#define SIO_IR_PP_MEMERR	0x00200000	/* PP PCI error */
+#define SIO_IR_KBD_INT		0x00400000	/* kbd/mouse intr */
+#define SIO_IR_RT_INT		0x08000000	/* RT output pulse */
+#define SIO_IR_GEN_INT1		0x10000000	/* RT input pulse */
+#define SIO_IR_GEN_INT_SHIFT	28
+
+/* per device interrupt masks */
+#define SIO_IR_SA		(SIO_IR_SA_TX_MT | \
+				 SIO_IR_SA_RX_FULL | \
+				 SIO_IR_SA_RX_HIGH | \
+				 SIO_IR_SA_RX_TIMER | \
+				 SIO_IR_SA_DELTA_DCD | \
+				 SIO_IR_SA_DELTA_CTS | \
+				 SIO_IR_SA_INT | \
+				 SIO_IR_SA_TX_EXPLICIT | \
+				 SIO_IR_SA_MEMERR)
+
+#define SIO_IR_SB		(SIO_IR_SB_TX_MT | \
+				 SIO_IR_SB_RX_FULL | \
+				 SIO_IR_SB_RX_HIGH | \
+				 SIO_IR_SB_RX_TIMER | \
+				 SIO_IR_SB_DELTA_DCD | \
+				 SIO_IR_SB_DELTA_CTS | \
+				 SIO_IR_SB_INT | \
+				 SIO_IR_SB_TX_EXPLICIT | \
+				 SIO_IR_SB_MEMERR)
+
+#define SIO_IR_PP		(SIO_IR_PP_INT | SIO_IR_PP_INTA | \
+				 SIO_IR_PP_INTB | SIO_IR_PP_MEMERR)
+#define SIO_IR_RT		(SIO_IR_RT_INT | SIO_IR_GEN_INT1)
+
+/* bitmasks for SIO_CR */
+#define SIO_CR_CMD_PULSE_SHIFT 15
+#define SIO_CR_SER_A_BASE_SHIFT 1
+#define SIO_CR_SER_B_BASE_SHIFT 8
+#define SIO_CR_ARB_DIAG		0x00380000	/* cur !enet PCI requet (ro) */
+#define SIO_CR_ARB_DIAG_TXA	0x00000000
+#define SIO_CR_ARB_DIAG_RXA	0x00080000
+#define SIO_CR_ARB_DIAG_TXB	0x00100000
+#define SIO_CR_ARB_DIAG_RXB	0x00180000
+#define SIO_CR_ARB_DIAG_PP	0x00200000
+#define SIO_CR_ARB_DIAG_IDLE	0x00400000	/* 0 -> active request (ro) */
+
+/* defs for some of the generic I/O pins */
+#define GPCR_PHY_RESET		0x20	/* pin is output to PHY reset */
+#define GPCR_UARTB_MODESEL	0x40	/* pin is output to port B mode sel */
+#define GPCR_UARTA_MODESEL	0x80	/* pin is output to port A mode sel */
+
+#define GPPR_PHY_RESET_PIN	5	/* GIO pin controlling phy reset */
+#define GPPR_UARTB_MODESEL_PIN	6	/* GIO pin cntrling uartb modeselect */
+#define GPPR_UARTA_MODESEL_PIN	7	/* GIO pin cntrling uarta modeselect */
+
+#endif /* IA64_SN_IOC3_H */
diff --git a/arch/ia64/include/asm/sn/klconfig.h b/arch/ia64/include/asm/sn/klconfig.h
new file mode 100644
index 00000000000..bcbf209d63b
--- /dev/null
+++ b/arch/ia64/include/asm/sn/klconfig.h
@@ -0,0 +1,246 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Derived from IRIX <sys/SN/klconfig.h>.
+ *
+ * Copyright (C) 1992-1997,1999,2001-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (C) 1999 by Ralf Baechle
+ */
+#ifndef _ASM_IA64_SN_KLCONFIG_H
+#define _ASM_IA64_SN_KLCONFIG_H
+
+/*
+ * The KLCONFIG structures store info about the various BOARDs found
+ * during Hardware Discovery. In addition, it stores info about the
+ * components found on the BOARDs.
+ */
+
+typedef s32 klconf_off_t;
+
+
+/* Functions/macros needed to use this structure */
+
+typedef struct kl_config_hdr {
+	char		pad[20];
+	klconf_off_t	ch_board_info;	/* the link list of boards */
+	char		pad0[88];
+} kl_config_hdr_t;
+
+
+#define NODE_OFFSET_TO_LBOARD(nasid,off)        (lboard_t*)(GLOBAL_CAC_ADDR((nasid), (off)))
+
+/*
+ * The KLCONFIG area is organized as a LINKED LIST of BOARDs. A BOARD
+ * can be either 'LOCAL' or 'REMOTE'. LOCAL means it is attached to 
+ * the LOCAL/current NODE. REMOTE means it is attached to a different
+ * node.(TBD - Need a way to treat ROUTER boards.)
+ *
+ * There are 2 different structures to represent these boards -
+ * lboard - Local board, rboard - remote board. These 2 structures
+ * can be arbitrarily mixed in the LINKED LIST of BOARDs. (Refer
+ * Figure below). The first byte of the rboard or lboard structure
+ * is used to find out its type - no unions are used.
+ * If it is a lboard, then the config info of this board will be found
+ * on the local node. (LOCAL NODE BASE + offset value gives pointer to 
+ * the structure.
+ * If it is a rboard, the local structure contains the node number
+ * and the offset of the beginning of the LINKED LIST on the remote node.
+ * The details of the hardware on a remote node can be built locally,
+ * if required, by reading the LINKED LIST on the remote node and 
+ * ignoring all the rboards on that node.
+ *
+ * The local node uses the REMOTE NODE NUMBER + OFFSET to point to the 
+ * First board info on the remote node. The remote node list is 
+ * traversed as the local list, using the REMOTE BASE ADDRESS and not
+ * the local base address and ignoring all rboard values.
+ *
+ * 
+ KLCONFIG
+
+ +------------+      +------------+      +------------+      +------------+
+ |  lboard    |  +-->|   lboard   |  +-->|   rboard   |  +-->|   lboard   |
+ +------------+  |   +------------+  |   +------------+  |   +------------+
+ | board info |  |   | board info |  |   |errinfo,bptr|  |   | board info |
+ +------------+  |   +------------+  |   +------------+  |   +------------+
+ | offset     |--+   |  offset    |--+   |  offset    |--+   |offset=NULL |
+ +------------+      +------------+      +------------+      +------------+
+
+
+ +------------+
+ | board info |
+ +------------+       +--------------------------------+
+ | compt 1    |------>| type, rev, diaginfo, size ...  |  (CPU)
+ +------------+       +--------------------------------+
+ | compt 2    |--+
+ +------------+  |    +--------------------------------+
+ |  ...       |  +--->| type, rev, diaginfo, size ...  |  (MEM_BANK)
+ +------------+       +--------------------------------+
+ | errinfo    |--+
+ +------------+  |    +--------------------------------+
+                 +--->|r/l brd errinfo,compt err flags |
+                      +--------------------------------+
+
+ *
+ * Each BOARD consists of COMPONENTs and the BOARD structure has 
+ * pointers (offsets) to its COMPONENT structure.
+ * The COMPONENT structure has version info, size and speed info, revision,
+ * error info and the NIC info. This structure can accommodate any
+ * BOARD with arbitrary COMPONENT composition.
+ *
+ * The ERRORINFO part of each BOARD has error information
+ * that describes errors about the BOARD itself. It also has flags to
+ * indicate the COMPONENT(s) on the board that have errors. The error 
+ * information specific to the COMPONENT is present in the respective 
+ * COMPONENT structure.
+ *
+ * The ERRORINFO structure is also treated like a COMPONENT, ie. the 
+ * BOARD has pointers(offset) to the ERRORINFO structure. The rboard
+ * structure also has a pointer to the ERRORINFO structure. This is 
+ * the place to store ERRORINFO about a REMOTE NODE, if the HUB on
+ * that NODE is not working or if the REMOTE MEMORY is BAD. In cases where 
+ * only the CPU of the REMOTE NODE is disabled, the ERRORINFO pointer can
+ * be a NODE NUMBER, REMOTE OFFSET combination, pointing to error info 
+ * which is present on the REMOTE NODE.(TBD)
+ * REMOTE ERRINFO can be stored on any of the nearest nodes 
+ * or on all the nearest nodes.(TBD)
+ * Like BOARD structures, REMOTE ERRINFO structures can be built locally
+ * using the rboard errinfo pointer.
+ *
+ * In order to get useful information from this Data organization, a set of
+ * interface routines are provided (TBD). The important thing to remember while
+ * manipulating the structures, is that, the NODE number information should
+ * be used. If the NODE is non-zero (remote) then each offset should
+ * be added to the REMOTE BASE ADDR else it should be added to the LOCAL BASE ADDR. 
+ * This includes offsets for BOARDS, COMPONENTS and ERRORINFO.
+ * 
+ * Note that these structures do not provide much info about connectivity.
+ * That info will be part of HWGRAPH, which is an extension of the cfg_t
+ * data structure. (ref IP27prom/cfg.h) It has to be extended to include
+ * the IO part of the Network(TBD).
+ *
+ * The data structures below define the above concepts.
+ */
+
+
+/*
+ * BOARD classes
+ */
+
+#define KLCLASS_MASK	0xf0   
+#define KLCLASS_NONE	0x00
+#define KLCLASS_NODE	0x10             /* CPU, Memory and HUB board */
+#define KLCLASS_CPU	KLCLASS_NODE	
+#define KLCLASS_IO	0x20             /* BaseIO, 4 ch SCSI, ethernet, FDDI 
+					    and the non-graphics widget boards */
+#define KLCLASS_ROUTER	0x30             /* Router board */
+#define KLCLASS_MIDPLANE 0x40            /* We need to treat this as a board
+                                            so that we can record error info */
+#define KLCLASS_IOBRICK	0x70		/* IP35 iobrick */
+#define KLCLASS_MAX	8		/* Bump this if a new CLASS is added */
+
+#define KLCLASS(_x) ((_x) & KLCLASS_MASK)
+
+
+/*
+ * board types
+ */
+
+#define KLTYPE_MASK	0x0f
+#define KLTYPE(_x)      ((_x) & KLTYPE_MASK)
+
+#define KLTYPE_SNIA	(KLCLASS_CPU | 0x1)
+#define KLTYPE_TIO	(KLCLASS_CPU | 0x2)
+
+#define KLTYPE_ROUTER     (KLCLASS_ROUTER | 0x1)
+#define KLTYPE_META_ROUTER (KLCLASS_ROUTER | 0x3)
+#define KLTYPE_REPEATER_ROUTER (KLCLASS_ROUTER | 0x4)
+
+#define KLTYPE_IOBRICK_XBOW	(KLCLASS_MIDPLANE | 0x2)
+
+#define KLTYPE_IOBRICK		(KLCLASS_IOBRICK | 0x0)
+#define KLTYPE_NBRICK		(KLCLASS_IOBRICK | 0x4)
+#define KLTYPE_PXBRICK		(KLCLASS_IOBRICK | 0x6)
+#define KLTYPE_IXBRICK		(KLCLASS_IOBRICK | 0x7)
+#define KLTYPE_CGBRICK		(KLCLASS_IOBRICK | 0x8)
+#define KLTYPE_OPUSBRICK	(KLCLASS_IOBRICK | 0x9)
+#define KLTYPE_SABRICK          (KLCLASS_IOBRICK | 0xa)
+#define KLTYPE_IABRICK		(KLCLASS_IOBRICK | 0xb)
+#define KLTYPE_PABRICK          (KLCLASS_IOBRICK | 0xc)
+#define KLTYPE_GABRICK		(KLCLASS_IOBRICK | 0xd)
+
+
+/* 
+ * board structures
+ */
+
+#define MAX_COMPTS_PER_BRD 24
+
+typedef struct lboard_s {
+	klconf_off_t 	brd_next_any;     /* Next BOARD */
+	unsigned char 	struct_type;      /* type of structure, local or remote */
+	unsigned char 	brd_type;         /* type+class */
+	unsigned char 	brd_sversion;     /* version of this structure */
+        unsigned char 	brd_brevision;    /* board revision */
+        unsigned char 	brd_promver;      /* board prom version, if any */
+ 	unsigned char 	brd_flags;        /* Enabled, Disabled etc */
+	unsigned char 	brd_slot;         /* slot number */
+	unsigned short	brd_debugsw;      /* Debug switches */
+	geoid_t		brd_geoid;	  /* geo id */
+	partid_t 	brd_partition;    /* Partition number */
+        unsigned short 	brd_diagval;      /* diagnostic value */
+        unsigned short 	brd_diagparm;     /* diagnostic parameter */
+        unsigned char 	brd_inventory;    /* inventory history */
+        unsigned char 	brd_numcompts;    /* Number of components */
+        nic_t         	brd_nic;          /* Number in CAN */
+	nasid_t		brd_nasid;        /* passed parameter */
+	klconf_off_t 	brd_compts[MAX_COMPTS_PER_BRD]; /* pointers to COMPONENTS */
+	klconf_off_t 	brd_errinfo;      /* Board's error information */
+	struct lboard_s *brd_parent;	  /* Logical parent for this brd */
+	char            pad0[4];
+	unsigned char	brd_confidence;	  /* confidence that the board is bad */
+	nasid_t		brd_owner;        /* who owns this board */
+	unsigned char 	brd_nic_flags;    /* To handle 8 more NICs */
+	char		pad1[24];	  /* future expansion */
+	char		brd_name[32];
+	nasid_t		brd_next_same_host; /* host of next brd w/same nasid */
+	klconf_off_t	brd_next_same;    /* Next BOARD with same nasid */
+} lboard_t;
+
+/*
+ * Generic info structure. This stores common info about a 
+ * component.
+ */
+ 
+typedef struct klinfo_s {                  /* Generic info */
+        unsigned char   struct_type;       /* type of this structure */
+        unsigned char   struct_version;    /* version of this structure */
+        unsigned char   flags;            /* Enabled, disabled etc */
+        unsigned char   revision;         /* component revision */
+        unsigned short  diagval;          /* result of diagnostics */
+        unsigned short  diagparm;         /* diagnostic parameter */
+        unsigned char   inventory;        /* previous inventory status */
+        unsigned short  partid;		   /* widget part number */
+	nic_t 		nic;              /* MUst be aligned properly */
+        unsigned char   physid;           /* physical id of component */
+        unsigned int    virtid;           /* virtual id as seen by system */
+	unsigned char	widid;	          /* Widget id - if applicable */
+	nasid_t		nasid;            /* node number - from parent */
+	char		pad1;		  /* pad out structure. */
+	char		pad2;		  /* pad out structure. */
+	void		*data;
+        klconf_off_t	errinfo;          /* component specific errors */
+        unsigned short  pad3;             /* pci fields have moved over to */
+        unsigned short  pad4;             /* klbri_t */
+} klinfo_t ;
+
+
+static inline lboard_t *find_lboard_next(lboard_t * brd)
+{
+	if (brd && brd->brd_next_any)
+		return NODE_OFFSET_TO_LBOARD(NASID_GET(brd), brd->brd_next_any);
+        return NULL;
+}
+
+#endif /* _ASM_IA64_SN_KLCONFIG_H */
diff --git a/arch/ia64/include/asm/sn/l1.h b/arch/ia64/include/asm/sn/l1.h
new file mode 100644
index 00000000000..344bf44bb35
--- /dev/null
+++ b/arch/ia64/include/asm/sn/l1.h
@@ -0,0 +1,51 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992-1997,2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+#ifndef _ASM_IA64_SN_L1_H
+#define _ASM_IA64_SN_L1_H
+
+/* brick type response codes */
+#define L1_BRICKTYPE_PX         0x23            /* # */
+#define L1_BRICKTYPE_PE         0x25            /* % */
+#define L1_BRICKTYPE_N_p0       0x26            /* & */
+#define L1_BRICKTYPE_IP45       0x34            /* 4 */
+#define L1_BRICKTYPE_IP41       0x35            /* 5 */
+#define L1_BRICKTYPE_TWISTER    0x36            /* 6 */ /* IP53 & ROUTER */
+#define L1_BRICKTYPE_IX         0x3d            /* = */
+#define L1_BRICKTYPE_IP34       0x61            /* a */
+#define L1_BRICKTYPE_GA		0x62            /* b */
+#define L1_BRICKTYPE_C          0x63            /* c */
+#define L1_BRICKTYPE_OPUS_TIO	0x66		/* f */
+#define L1_BRICKTYPE_I          0x69            /* i */
+#define L1_BRICKTYPE_N          0x6e            /* n */
+#define L1_BRICKTYPE_OPUS       0x6f		/* o */
+#define L1_BRICKTYPE_P          0x70            /* p */
+#define L1_BRICKTYPE_R          0x72            /* r */
+#define L1_BRICKTYPE_CHI_CG     0x76            /* v */
+#define L1_BRICKTYPE_X          0x78            /* x */
+#define L1_BRICKTYPE_X2         0x79            /* y */
+#define L1_BRICKTYPE_SA		0x5e            /* ^ */
+#define L1_BRICKTYPE_PA		0x6a            /* j */
+#define L1_BRICKTYPE_IA		0x6b            /* k */
+#define L1_BRICKTYPE_ATHENA	0x2b            /* + */
+#define L1_BRICKTYPE_DAYTONA	0x7a            /* z */
+#define L1_BRICKTYPE_1932	0x2c		/* . */
+#define L1_BRICKTYPE_191010	0x2e		/* , */
+
+/* board type response codes */
+#define L1_BOARDTYPE_IP69       0x0100          /* CA */
+#define L1_BOARDTYPE_IP63       0x0200          /* CB */
+#define L1_BOARDTYPE_BASEIO     0x0300          /* IB */
+#define L1_BOARDTYPE_PCIE2SLOT  0x0400          /* IC */
+#define L1_BOARDTYPE_PCIX3SLOT  0x0500          /* ID */
+#define L1_BOARDTYPE_PCIXPCIE4SLOT 0x0600       /* IE */
+#define L1_BOARDTYPE_ABACUS     0x0700          /* AB */
+#define L1_BOARDTYPE_DAYTONA    0x0800          /* AD */
+#define L1_BOARDTYPE_INVAL      (-1)            /* invalid brick type */
+
+#endif /* _ASM_IA64_SN_L1_H */
diff --git a/arch/ia64/include/asm/sn/leds.h b/arch/ia64/include/asm/sn/leds.h
new file mode 100644
index 00000000000..66cf8c4d92c
--- /dev/null
+++ b/arch/ia64/include/asm/sn/leds.h
@@ -0,0 +1,33 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ * Copyright (C) 2000-2004 Silicon Graphics, Inc. All rights reserved.
+ */
+#ifndef _ASM_IA64_SN_LEDS_H
+#define _ASM_IA64_SN_LEDS_H
+
+#include <asm/sn/addrs.h>
+#include <asm/sn/pda.h>
+#include <asm/sn/shub_mmr.h>
+
+#define LED0		(LOCAL_MMR_ADDR(SH_REAL_JUNK_BUS_LED0))
+#define LED_CPU_SHIFT	16
+
+#define LED_CPU_HEARTBEAT	0x01
+#define LED_CPU_ACTIVITY	0x02
+#define LED_ALWAYS_SET		0x00
+
+/*
+ * Basic macros for flashing the LEDS on an SGI SN.
+ */
+
+static __inline__ void
+set_led_bits(u8 value, u8 mask)
+{
+	pda->led_state = (pda->led_state & ~mask) | (value & mask);
+	*pda->led_address = (short) pda->led_state;
+}
+
+#endif /* _ASM_IA64_SN_LEDS_H */
+
diff --git a/arch/ia64/include/asm/sn/module.h b/arch/ia64/include/asm/sn/module.h
new file mode 100644
index 00000000000..734e980ece2
--- /dev/null
+++ b/arch/ia64/include/asm/sn/module.h
@@ -0,0 +1,127 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000-2004 Silicon Graphics, Inc. All rights reserved.
+ */
+#ifndef _ASM_IA64_SN_MODULE_H
+#define _ASM_IA64_SN_MODULE_H
+
+/* parameter for format_module_id() */
+#define MODULE_FORMAT_BRIEF	1
+#define MODULE_FORMAT_LONG	2
+#define MODULE_FORMAT_LCD	3
+
+/*
+ *	Module id format
+ *
+ *	31-16	Rack ID (encoded class, group, number - 16-bit unsigned int)
+ *	 15-8	Brick type (8-bit ascii character)
+ *	  7-0	Bay (brick position in rack (0-63) - 8-bit unsigned int)
+ *
+ */
+
+/*
+ * Macros for getting the brick type
+ */
+#define MODULE_BTYPE_MASK	0xff00
+#define MODULE_BTYPE_SHFT	8
+#define MODULE_GET_BTYPE(_m)	(((_m) & MODULE_BTYPE_MASK) >> MODULE_BTYPE_SHFT)
+#define MODULE_BT_TO_CHAR(_b)	((char)(_b))
+#define MODULE_GET_BTCHAR(_m)	(MODULE_BT_TO_CHAR(MODULE_GET_BTYPE(_m)))
+
+/*
+ * Macros for getting the rack ID.
+ */
+#define MODULE_RACK_MASK	0xffff0000
+#define MODULE_RACK_SHFT	16
+#define MODULE_GET_RACK(_m)	(((_m) & MODULE_RACK_MASK) >> MODULE_RACK_SHFT)
+
+/*
+ * Macros for getting the brick position
+ */
+#define MODULE_BPOS_MASK	0x00ff
+#define MODULE_BPOS_SHFT	0
+#define MODULE_GET_BPOS(_m)	(((_m) & MODULE_BPOS_MASK) >> MODULE_BPOS_SHFT)
+
+/*
+ * Macros for encoding and decoding rack IDs
+ * A rack number consists of three parts:
+ *   class (0==CPU/mixed, 1==I/O), group, number
+ *
+ * Rack number is stored just as it is displayed on the screen:
+ * a 3-decimal-digit number.
+ */
+#define RACK_CLASS_DVDR         100
+#define RACK_GROUP_DVDR         10
+#define RACK_NUM_DVDR           1
+
+#define RACK_CREATE_RACKID(_c, _g, _n)  ((_c) * RACK_CLASS_DVDR +       \
+        (_g) * RACK_GROUP_DVDR + (_n) * RACK_NUM_DVDR)
+
+#define RACK_GET_CLASS(_r)              ((_r) / RACK_CLASS_DVDR)
+#define RACK_GET_GROUP(_r)              (((_r) - RACK_GET_CLASS(_r) *   \
+            RACK_CLASS_DVDR) / RACK_GROUP_DVDR)
+#define RACK_GET_NUM(_r)                (((_r) - RACK_GET_CLASS(_r) *   \
+            RACK_CLASS_DVDR - RACK_GET_GROUP(_r) *      \
+            RACK_GROUP_DVDR) / RACK_NUM_DVDR)
+
+/*
+ * Macros for encoding and decoding rack IDs
+ * A rack number consists of three parts:
+ *   class      1 bit, 0==CPU/mixed, 1==I/O
+ *   group      2 bits for CPU/mixed, 3 bits for I/O
+ *   number     3 bits for CPU/mixed, 2 bits for I/O (1 based)
+ */
+#define RACK_GROUP_BITS(_r)     (RACK_GET_CLASS(_r) ? 3 : 2)
+#define RACK_NUM_BITS(_r)       (RACK_GET_CLASS(_r) ? 2 : 3)
+
+#define RACK_CLASS_MASK(_r)     0x20
+#define RACK_CLASS_SHFT(_r)     5
+#define RACK_ADD_CLASS(_r, _c)  \
+        ((_r) |= (_c) << RACK_CLASS_SHFT(_r) & RACK_CLASS_MASK(_r))
+
+#define RACK_GROUP_SHFT(_r)     RACK_NUM_BITS(_r)
+#define RACK_GROUP_MASK(_r)     \
+        ( (((unsigned)1<<RACK_GROUP_BITS(_r)) - 1) << RACK_GROUP_SHFT(_r) )
+#define RACK_ADD_GROUP(_r, _g)  \
+        ((_r) |= (_g) << RACK_GROUP_SHFT(_r) & RACK_GROUP_MASK(_r))
+
+#define RACK_NUM_SHFT(_r)       0
+#define RACK_NUM_MASK(_r)       \
+        ( (((unsigned)1<<RACK_NUM_BITS(_r)) - 1) << RACK_NUM_SHFT(_r) )
+#define RACK_ADD_NUM(_r, _n)    \
+        ((_r) |= ((_n) - 1) << RACK_NUM_SHFT(_r) & RACK_NUM_MASK(_r))
+
+
+/*
+ * Brick type definitions
+ */
+#define MAX_BRICK_TYPES         256 /* brick type is stored as uchar */
+
+extern char brick_types[];
+
+#define MODULE_CBRICK           0
+#define MODULE_RBRICK           1
+#define MODULE_IBRICK           2
+#define MODULE_KBRICK           3
+#define MODULE_XBRICK           4
+#define MODULE_DBRICK           5
+#define MODULE_PBRICK           6
+#define MODULE_NBRICK           7
+#define MODULE_PEBRICK          8
+#define MODULE_PXBRICK          9
+#define MODULE_IXBRICK          10
+#define MODULE_CGBRICK		11
+#define MODULE_OPUSBRICK        12
+#define MODULE_SABRICK		13	/* TIO BringUp Brick */
+#define MODULE_IABRICK		14
+#define MODULE_PABRICK		15
+#define MODULE_GABRICK		16
+#define MODULE_OPUS_TIO		17	/* OPUS TIO Riser */
+
+extern char brick_types[];
+extern void format_module_id(char *, moduleid_t, int);
+
+#endif /* _ASM_IA64_SN_MODULE_H */
diff --git a/arch/ia64/include/asm/sn/mspec.h b/arch/ia64/include/asm/sn/mspec.h
new file mode 100644
index 00000000000..c1d3c50c322
--- /dev/null
+++ b/arch/ia64/include/asm/sn/mspec.h
@@ -0,0 +1,59 @@
+/*
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2001-2008 Silicon Graphics, Inc.  All rights reserved.
+ */
+
+#ifndef _ASM_IA64_SN_MSPEC_H
+#define _ASM_IA64_SN_MSPEC_H
+
+#define FETCHOP_VAR_SIZE 64 /* 64 byte per fetchop variable */
+
+#define FETCHOP_LOAD		0
+#define FETCHOP_INCREMENT	8
+#define FETCHOP_DECREMENT	16
+#define FETCHOP_CLEAR		24
+
+#define FETCHOP_STORE		0
+#define FETCHOP_AND		24
+#define FETCHOP_OR		32
+
+#define FETCHOP_CLEAR_CACHE	56
+
+#define FETCHOP_LOAD_OP(addr, op) ( \
+         *(volatile long *)((char*) (addr) + (op)))
+
+#define FETCHOP_STORE_OP(addr, op, x) ( \
+         *(volatile long *)((char*) (addr) + (op)) = (long) (x))
+
+#ifdef __KERNEL__
+
+/*
+ * Each Atomic Memory Operation (amo, formerly known as fetchop)
+ * variable is 64 bytes long.  The first 8 bytes are used.  The
+ * remaining 56 bytes are unaddressable due to the operation taking
+ * that portion of the address.
+ *
+ * NOTE: The amo structure _MUST_ be placed in either the first or second
+ * half of the cache line.  The cache line _MUST NOT_ be used for anything
+ * other than additional amo entries.  This is because there are two
+ * addresses which reference the same physical cache line.  One will
+ * be a cached entry with the memory type bits all set.  This address
+ * may be loaded into processor cache.  The amo will be referenced
+ * uncached via the memory special memory type.  If any portion of the
+ * cached cache-line is modified, when that line is flushed, it will
+ * overwrite the uncached value in physical memory and lead to
+ * inconsistency.
+ */
+struct amo {
+        u64 variable;
+        u64 unused[7];
+};
+
+
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_IA64_SN_MSPEC_H */
diff --git a/arch/ia64/include/asm/sn/nodepda.h b/arch/ia64/include/asm/sn/nodepda.h
new file mode 100644
index 00000000000..ee118b901de
--- /dev/null
+++ b/arch/ia64/include/asm/sn/nodepda.h
@@ -0,0 +1,82 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000-2005 Silicon Graphics, Inc. All rights reserved.
+ */
+#ifndef _ASM_IA64_SN_NODEPDA_H
+#define _ASM_IA64_SN_NODEPDA_H
+
+
+#include <asm/irq.h>
+#include <asm/sn/arch.h>
+#include <asm/sn/intr.h>
+#include <asm/sn/bte.h>
+
+/*
+ * NUMA Node-Specific Data structures are defined in this file.
+ * In particular, this is the location of the node PDA.
+ * A pointer to the right node PDA is saved in each CPU PDA.
+ */
+
+/*
+ * Node-specific data structure.
+ *
+ * One of these structures is allocated on each node of a NUMA system.
+ *
+ * This structure provides a convenient way of keeping together 
+ * all per-node data structures. 
+ */
+struct phys_cpuid {
+	short			nasid;
+	char			subnode;
+	char			slice;
+};
+
+struct nodepda_s {
+	void 		*pdinfo;	/* Platform-dependent per-node info */
+
+	/*
+	 * The BTEs on this node are shared by the local cpus
+	 */
+	struct bteinfo_s	bte_if[MAX_BTES_PER_NODE];	/* Virtual Interface */
+	struct timer_list	bte_recovery_timer;
+	spinlock_t		bte_recovery_lock;
+
+	/* 
+	 * Array of pointers to the nodepdas for each node.
+	 */
+	struct nodepda_s	*pernode_pdaindr[MAX_COMPACT_NODES]; 
+
+	/*
+	 * Array of physical cpu identifiers. Indexed by cpuid.
+	 */
+	struct phys_cpuid	phys_cpuid[NR_CPUS];
+	spinlock_t		ptc_lock ____cacheline_aligned_in_smp;
+};
+
+typedef struct nodepda_s nodepda_t;
+
+/*
+ * Access Functions for node PDA.
+ * Since there is one nodepda for each node, we need a convenient mechanism
+ * to access these nodepdas without cluttering code with #ifdefs.
+ * The next set of definitions provides this.
+ * Routines are expected to use 
+ *
+ *	sn_nodepda   - to access node PDA for the node on which code is running
+ *	NODEPDA(cnodeid)   - to access node PDA for cnodeid
+ */
+
+DECLARE_PER_CPU(struct nodepda_s *, __sn_nodepda);
+#define sn_nodepda		(__get_cpu_var(__sn_nodepda))
+#define	NODEPDA(cnodeid)	(sn_nodepda->pernode_pdaindr[cnodeid])
+
+/*
+ * Check if given a compact node id the corresponding node has all the
+ * cpus disabled. 
+ */
+#define is_headless_node(cnodeid)	(nr_cpus_node(cnodeid) == 0)
+
+#endif /* _ASM_IA64_SN_NODEPDA_H */
diff --git a/arch/ia64/include/asm/sn/pcibr_provider.h b/arch/ia64/include/asm/sn/pcibr_provider.h
new file mode 100644
index 00000000000..da205b7cdaa
--- /dev/null
+++ b/arch/ia64/include/asm/sn/pcibr_provider.h
@@ -0,0 +1,150 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992-1997,2000-2006 Silicon Graphics, Inc. All rights reserved.
+ */
+#ifndef _ASM_IA64_SN_PCI_PCIBR_PROVIDER_H
+#define _ASM_IA64_SN_PCI_PCIBR_PROVIDER_H
+
+#include <asm/sn/intr.h>
+#include <asm/sn/pcibus_provider_defs.h>
+
+/* Workarounds */
+#define PV907516 (1 << 1) /* TIOCP: Don't write the write buffer flush reg */
+
+#define BUSTYPE_MASK                    0x1
+
+/* Macros given a pcibus structure */
+#define IS_PCIX(ps)     ((ps)->pbi_bridge_mode & BUSTYPE_MASK)
+#define IS_PCI_BRIDGE_ASIC(asic) (asic == PCIIO_ASIC_TYPE_PIC || \
+                asic == PCIIO_ASIC_TYPE_TIOCP)
+#define IS_PIC_SOFT(ps)     (ps->pbi_bridge_type == PCIBR_BRIDGETYPE_PIC)
+#define IS_TIOCP_SOFT(ps)   (ps->pbi_bridge_type == PCIBR_BRIDGETYPE_TIOCP)
+
+
+/*
+ * The different PCI Bridge types supported on the SGI Altix platforms
+ */
+#define PCIBR_BRIDGETYPE_UNKNOWN       -1
+#define PCIBR_BRIDGETYPE_PIC            2
+#define PCIBR_BRIDGETYPE_TIOCP          3
+
+/*
+ * Bridge 64bit Direct Map Attributes
+ */
+#define PCI64_ATTR_PREF                 (1ull << 59)
+#define PCI64_ATTR_PREC                 (1ull << 58)
+#define PCI64_ATTR_VIRTUAL              (1ull << 57)
+#define PCI64_ATTR_BAR                  (1ull << 56)
+#define PCI64_ATTR_SWAP                 (1ull << 55)
+#define PCI64_ATTR_VIRTUAL1             (1ull << 54)
+
+#define PCI32_LOCAL_BASE                0
+#define PCI32_MAPPED_BASE               0x40000000
+#define PCI32_DIRECT_BASE               0x80000000
+
+#define IS_PCI32_MAPPED(x)              ((u64)(x) < PCI32_DIRECT_BASE && \
+                                         (u64)(x) >= PCI32_MAPPED_BASE)
+#define IS_PCI32_DIRECT(x)              ((u64)(x) >= PCI32_MAPPED_BASE)
+
+
+/*
+ * Bridge PMU Address Transaltion Entry Attibutes
+ */
+#define PCI32_ATE_V                     (0x1 << 0)
+#define PCI32_ATE_CO                    (0x1 << 1)	/* PIC ASIC ONLY */
+#define PCI32_ATE_PIO                   (0x1 << 1)	/* TIOCP ASIC ONLY */
+#define PCI32_ATE_MSI                   (0x1 << 2)
+#define PCI32_ATE_PREF                  (0x1 << 3)
+#define PCI32_ATE_BAR                   (0x1 << 4)
+#define PCI32_ATE_ADDR_SHFT             12
+
+#define MINIMAL_ATES_REQUIRED(addr, size) \
+	(IOPG(IOPGOFF(addr) + (size) - 1) == IOPG((size) - 1))
+
+#define MINIMAL_ATE_FLAG(addr, size) \
+	(MINIMAL_ATES_REQUIRED((u64)addr, size) ? 1 : 0)
+
+/* bit 29 of the pci address is the SWAP bit */
+#define ATE_SWAPSHIFT                   29
+#define ATE_SWAP_ON(x)                  ((x) |= (1 << ATE_SWAPSHIFT))
+#define ATE_SWAP_OFF(x)                 ((x) &= ~(1 << ATE_SWAPSHIFT))
+
+/*
+ * I/O page size
+ */
+#if PAGE_SIZE < 16384
+#define IOPFNSHIFT                      12      /* 4K per mapped page */
+#else
+#define IOPFNSHIFT                      14      /* 16K per mapped page */
+#endif
+
+#define IOPGSIZE                        (1 << IOPFNSHIFT)
+#define IOPG(x)                         ((x) >> IOPFNSHIFT)
+#define IOPGOFF(x)                      ((x) & (IOPGSIZE-1))
+
+#define PCIBR_DEV_SWAP_DIR              (1ull << 19)
+#define PCIBR_CTRL_PAGE_SIZE            (0x1 << 21)
+
+/*
+ * PMU resources.
+ */
+struct ate_resource{
+	u64 *ate;
+	u64 num_ate;
+	u64 lowest_free_index;
+};
+
+struct pcibus_info {
+	struct pcibus_bussoft	pbi_buscommon;   /* common header */
+	u32                pbi_moduleid;
+	short                   pbi_bridge_type;
+	short                   pbi_bridge_mode;
+
+	struct ate_resource     pbi_int_ate_resource;
+	u64                pbi_int_ate_size;
+
+	u64                pbi_dir_xbase;
+	char                    pbi_hub_xid;
+
+	u64                pbi_devreg[8];
+
+	u32		pbi_valid_devices;
+	u32		pbi_enabled_devices;
+
+	spinlock_t              pbi_lock;
+};
+
+extern int  pcibr_init_provider(void);
+extern void *pcibr_bus_fixup(struct pcibus_bussoft *, struct pci_controller *);
+extern dma_addr_t pcibr_dma_map(struct pci_dev *, unsigned long, size_t, int type);
+extern dma_addr_t pcibr_dma_map_consistent(struct pci_dev *, unsigned long, size_t, int type);
+extern void pcibr_dma_unmap(struct pci_dev *, dma_addr_t, int);
+
+/*
+ * prototypes for the bridge asic register access routines in pcibr_reg.c
+ */
+extern void             pcireg_control_bit_clr(struct pcibus_info *, u64);
+extern void             pcireg_control_bit_set(struct pcibus_info *, u64);
+extern u64         pcireg_tflush_get(struct pcibus_info *);
+extern u64         pcireg_intr_status_get(struct pcibus_info *);
+extern void             pcireg_intr_enable_bit_clr(struct pcibus_info *, u64);
+extern void             pcireg_intr_enable_bit_set(struct pcibus_info *, u64);
+extern void             pcireg_intr_addr_addr_set(struct pcibus_info *, int, u64);
+extern void             pcireg_force_intr_set(struct pcibus_info *, int);
+extern u64         pcireg_wrb_flush_get(struct pcibus_info *, int);
+extern void             pcireg_int_ate_set(struct pcibus_info *, int, u64);
+extern u64 __iomem *	pcireg_int_ate_addr(struct pcibus_info *, int);
+extern void 		pcibr_force_interrupt(struct sn_irq_info *sn_irq_info);
+extern void 		pcibr_change_devices_irq(struct sn_irq_info *sn_irq_info);
+extern int 		pcibr_ate_alloc(struct pcibus_info *, int);
+extern void 		pcibr_ate_free(struct pcibus_info *, int);
+extern void 		ate_write(struct pcibus_info *, int, int, u64);
+extern int sal_pcibr_slot_enable(struct pcibus_info *soft, int device,
+				 void *resp, char **ssdt);
+extern int sal_pcibr_slot_disable(struct pcibus_info *soft, int device,
+				  int action, void *resp);
+extern u16 sn_ioboard_to_pci_bus(struct pci_bus *pci_bus);
+#endif
diff --git a/arch/ia64/include/asm/sn/pcibus_provider_defs.h b/arch/ia64/include/asm/sn/pcibus_provider_defs.h
new file mode 100644
index 00000000000..8f7c83d0f6d
--- /dev/null
+++ b/arch/ia64/include/asm/sn/pcibus_provider_defs.h
@@ -0,0 +1,68 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000-2005 Silicon Graphics, Inc. All rights reserved.
+ */
+#ifndef _ASM_IA64_SN_PCI_PCIBUS_PROVIDER_H
+#define _ASM_IA64_SN_PCI_PCIBUS_PROVIDER_H
+
+/*
+ * SN pci asic types.  Do not ever renumber these or reuse values.  The
+ * values must agree with what prom thinks they are.
+ */
+
+#define PCIIO_ASIC_TYPE_UNKNOWN	0
+#define PCIIO_ASIC_TYPE_PPB	1
+#define PCIIO_ASIC_TYPE_PIC	2
+#define PCIIO_ASIC_TYPE_TIOCP	3
+#define PCIIO_ASIC_TYPE_TIOCA	4
+#define PCIIO_ASIC_TYPE_TIOCE	5
+
+#define PCIIO_ASIC_MAX_TYPES	6
+
+/*
+ * Common pciio bus provider data.  There should be one of these as the
+ * first field in any pciio based provider soft structure (e.g. pcibr_soft
+ * tioca_soft, etc).
+ */
+
+struct pcibus_bussoft {
+	u32		bs_asic_type;	/* chipset type */
+	u32		bs_xid;		/* xwidget id */
+	u32		bs_persist_busnum; /* Persistent Bus Number */
+	u32		bs_persist_segment; /* Segment Number */
+	u64		bs_legacy_io;	/* legacy io pio addr */
+	u64		bs_legacy_mem;	/* legacy mem pio addr */
+	u64		bs_base;	/* widget base */
+	struct xwidget_info	*bs_xwidget_info;
+};
+
+struct pci_controller;
+/*
+ * SN pci bus indirection
+ */
+
+struct sn_pcibus_provider {
+	dma_addr_t	(*dma_map)(struct pci_dev *, unsigned long, size_t, int flags);
+	dma_addr_t	(*dma_map_consistent)(struct pci_dev *, unsigned long, size_t, int flags);
+	void		(*dma_unmap)(struct pci_dev *, dma_addr_t, int);
+	void *		(*bus_fixup)(struct pcibus_bussoft *, struct pci_controller *);
+ 	void		(*force_interrupt)(struct sn_irq_info *);
+ 	void		(*target_interrupt)(struct sn_irq_info *);
+};
+
+/*
+ * Flags used by the map interfaces
+ * bits 3:0 specifies format of passed in address
+ * bit  4   specifies that address is to be used for MSI
+ */
+
+#define SN_DMA_ADDRTYPE(x)	((x) & 0xf)
+#define     SN_DMA_ADDR_PHYS	1	/* address is an xio address. */
+#define     SN_DMA_ADDR_XIO	2	/* address is phys memory */
+#define SN_DMA_MSI		0x10	/* Bus address is to be used for MSI */
+
+extern struct sn_pcibus_provider *sn_pci_provider[];
+#endif				/* _ASM_IA64_SN_PCI_PCIBUS_PROVIDER_H */
diff --git a/arch/ia64/include/asm/sn/pcidev.h b/arch/ia64/include/asm/sn/pcidev.h
new file mode 100644
index 00000000000..1c2382cea80
--- /dev/null
+++ b/arch/ia64/include/asm/sn/pcidev.h
@@ -0,0 +1,85 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000-2006 Silicon Graphics, Inc. All rights reserved.
+ */
+#ifndef _ASM_IA64_SN_PCI_PCIDEV_H
+#define _ASM_IA64_SN_PCI_PCIDEV_H
+
+#include <linux/pci.h>
+
+/*
+ * In ia64, pci_dev->sysdata must be a *pci_controller. To provide access to
+ * the pcidev_info structs for all devices under a controller, we keep a
+ * list of pcidev_info under pci_controller->platform_data.
+ */
+struct sn_platform_data {
+	void *provider_soft;
+	struct list_head pcidev_info;
+};
+
+#define SN_PLATFORM_DATA(busdev) \
+	((struct sn_platform_data *)(PCI_CONTROLLER(busdev)->platform_data))
+
+#define SN_PCIDEV_INFO(dev)	sn_pcidev_info_get(dev)
+
+/*
+ * Given a pci_bus, return the sn pcibus_bussoft struct.  Note that
+ * this only works for root busses, not for busses represented by PPB's.
+ */
+
+#define SN_PCIBUS_BUSSOFT(pci_bus) \
+	((struct pcibus_bussoft *)(SN_PLATFORM_DATA(pci_bus)->provider_soft))
+
+#define SN_PCIBUS_BUSSOFT_INFO(pci_bus) \
+	((struct pcibus_info *)(SN_PLATFORM_DATA(pci_bus)->provider_soft))
+/*
+ * Given a struct pci_dev, return the sn pcibus_bussoft struct.  Note
+ * that this is not equivalent to SN_PCIBUS_BUSSOFT(pci_dev->bus) due
+ * due to possible PPB's in the path.
+ */
+
+#define SN_PCIDEV_BUSSOFT(pci_dev) \
+	(SN_PCIDEV_INFO(pci_dev)->pdi_host_pcidev_info->pdi_pcibus_info)
+
+#define SN_PCIDEV_BUSPROVIDER(pci_dev) \
+	(SN_PCIDEV_INFO(pci_dev)->pdi_provider)
+
+#define PCIIO_BUS_NONE	255      /* bus 255 reserved */
+#define PCIIO_SLOT_NONE 255
+#define PCIIO_FUNC_NONE 255
+#define PCIIO_VENDOR_ID_NONE	(-1)
+
+struct pcidev_info {
+	u64		pdi_pio_mapped_addr[7]; /* 6 BARs PLUS 1 ROM */
+	u64		pdi_slot_host_handle;	/* Bus and devfn Host pci_dev */
+
+	struct pcibus_bussoft	*pdi_pcibus_info;	/* Kernel common bus soft */
+	struct pcidev_info	*pdi_host_pcidev_info;	/* Kernel Host pci_dev */
+	struct pci_dev		*pdi_linux_pcidev;	/* Kernel pci_dev */
+
+	struct sn_irq_info	*pdi_sn_irq_info;
+	struct sn_pcibus_provider *pdi_provider;	/* sn pci ops */
+	struct pci_dev 		*host_pci_dev;		/* host bus link */
+	struct list_head	pdi_list;		/* List of pcidev_info */
+};
+
+extern void sn_irq_fixup(struct pci_dev *pci_dev,
+			 struct sn_irq_info *sn_irq_info);
+extern void sn_irq_unfixup(struct pci_dev *pci_dev);
+extern struct pcidev_info * sn_pcidev_info_get(struct pci_dev *);
+extern void sn_bus_fixup(struct pci_bus *);
+extern void sn_acpi_bus_fixup(struct pci_bus *);
+extern void sn_common_bus_fixup(struct pci_bus *, struct pcibus_bussoft *);
+extern void sn_bus_store_sysdata(struct pci_dev *dev);
+extern void sn_bus_free_sysdata(void);
+extern void sn_generate_path(struct pci_bus *pci_bus, char *address);
+extern void sn_io_slot_fixup(struct pci_dev *);
+extern void sn_acpi_slot_fixup(struct pci_dev *);
+extern void sn_pci_fixup_slot(struct pci_dev *dev, struct pcidev_info *,
+			      struct sn_irq_info *);
+extern void sn_pci_unfixup_slot(struct pci_dev *dev);
+extern void sn_irq_lh_init(void);
+#endif				/* _ASM_IA64_SN_PCI_PCIDEV_H */
diff --git a/arch/ia64/include/asm/sn/pda.h b/arch/ia64/include/asm/sn/pda.h
new file mode 100644
index 00000000000..22ae358c8d1
--- /dev/null
+++ b/arch/ia64/include/asm/sn/pda.h
@@ -0,0 +1,68 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000-2005 Silicon Graphics, Inc. All rights reserved.
+ */
+#ifndef _ASM_IA64_SN_PDA_H
+#define _ASM_IA64_SN_PDA_H
+
+#include <linux/cache.h>
+#include <asm/percpu.h>
+
+
+/*
+ * CPU-specific data structure.
+ *
+ * One of these structures is allocated for each cpu of a NUMA system.
+ *
+ * This structure provides a convenient way of keeping together 
+ * all SN per-cpu data structures. 
+ */
+
+typedef struct pda_s {
+
+	/*
+	 * Support for SN LEDs
+	 */
+	volatile short	*led_address;
+	u8		led_state;
+	u8		hb_state;	/* supports blinking heartbeat leds */
+	unsigned int	hb_count;
+
+	unsigned int	idle_flag;
+	
+	volatile unsigned long *bedrock_rev_id;
+	volatile unsigned long *pio_write_status_addr;
+	unsigned long pio_write_status_val;
+	volatile unsigned long *pio_shub_war_cam_addr;
+
+	unsigned long	sn_in_service_ivecs[4];
+	int		sn_lb_int_war_ticks;
+	int		sn_last_irq;
+	int		sn_first_irq;
+} pda_t;
+
+
+#define CACHE_ALIGN(x)	(((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
+
+/*
+ * PDA
+ * Per-cpu private data area for each cpu. The PDA is located immediately after
+ * the IA64 cpu_data area. A full page is allocated for the cp_data area for each
+ * cpu but only a small amout of the page is actually used. We put the SNIA PDA
+ * in the same page as the cpu_data area. Note that there is a check in the setup
+ * code to verify that we don't overflow the page.
+ *
+ * Seems like we should should cache-line align the pda so that any changes in the
+ * size of the cpu_data area don't change cache layout. Should we align to 32, 64, 128
+ * or 512 boundary. Each has merits. For now, pick 128 but should be revisited later.
+ */
+DECLARE_PER_CPU(struct pda_s, pda_percpu);
+
+#define pda		(&__ia64_per_cpu_var(pda_percpu))
+
+#define pdacpu(cpu)	(&per_cpu(pda_percpu, cpu))
+
+#endif /* _ASM_IA64_SN_PDA_H */
diff --git a/arch/ia64/include/asm/sn/pic.h b/arch/ia64/include/asm/sn/pic.h
new file mode 100644
index 00000000000..5f9da5fd6e5
--- /dev/null
+++ b/arch/ia64/include/asm/sn/pic.h
@@ -0,0 +1,261 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000-2003 Silicon Graphics, Inc. All rights reserved.
+ */
+#ifndef _ASM_IA64_SN_PCI_PIC_H
+#define _ASM_IA64_SN_PCI_PIC_H
+
+/*
+ * PIC AS DEVICE ZERO
+ * ------------------
+ *
+ * PIC handles PCI/X busses.  PCI/X requires that the 'bridge' (i.e. PIC)
+ * be designated as 'device 0'.   That is a departure from earlier SGI
+ * PCI bridges.  Because of that we use config space 1 to access the
+ * config space of the first actual PCI device on the bus.
+ * Here's what the PIC manual says:
+ *
+ *     The current PCI-X bus specification now defines that the parent
+ *     hosts bus bridge (PIC for example) must be device 0 on bus 0. PIC
+ *     reduced the total number of devices from 8 to 4 and removed the
+ *     device registers and windows, now only supporting devices 0,1,2, and
+ *     3. PIC did leave all 8 configuration space windows. The reason was
+ *     there was nothing to gain by removing them. Here in lies the problem.
+ *     The device numbering we do using 0 through 3 is unrelated to the device
+ *     numbering which PCI-X requires in configuration space. In the past we
+ *     correlated Configs pace and our device space 0 <-> 0, 1 <-> 1, etc.
+ *     PCI-X requires we start a 1, not 0 and currently the PX brick
+ *     does associate our:
+ *
+ *         device 0 with configuration space window 1,
+ *         device 1 with configuration space window 2,
+ *         device 2 with configuration space window 3,
+ *         device 3 with configuration space window 4.
+ *
+ * The net effect is that all config space access are off-by-one with
+ * relation to other per-slot accesses on the PIC.
+ * Here is a table that shows some of that:
+ *
+ *                               Internal Slot#
+ *           |
+ *           |     0         1        2         3
+ * ----------|---------------------------------------
+ * config    |  0x21000   0x22000  0x23000   0x24000
+ *           |
+ * even rrb  |  0[0]      n/a      1[0]      n/a	[] == implied even/odd
+ *           |
+ * odd rrb   |  n/a       0[1]     n/a       1[1]
+ *           |
+ * int dev   |  00       01        10        11
+ *           |
+ * ext slot# |  1        2         3         4
+ * ----------|---------------------------------------
+ */
+
+#define PIC_ATE_TARGETID_SHFT           8
+#define PIC_HOST_INTR_ADDR              0x0000FFFFFFFFFFFFUL
+#define PIC_PCI64_ATTR_TARG_SHFT        60
+
+
+/*****************************************************************************
+ *********************** PIC MMR structure mapping ***************************
+ *****************************************************************************/
+
+/* NOTE: PIC WAR. PV#854697.  PIC does not allow writes just to [31:0]
+ * of a 64-bit register.  When writing PIC registers, always write the
+ * entire 64 bits.
+ */
+
+struct pic {
+
+    /* 0x000000-0x00FFFF -- Local Registers */
+
+    /* 0x000000-0x000057 -- Standard Widget Configuration */
+    u64		p_wid_id;			/* 0x000000 */
+    u64		p_wid_stat;			/* 0x000008 */
+    u64		p_wid_err_upper;		/* 0x000010 */
+    u64		p_wid_err_lower;		/* 0x000018 */
+    #define p_wid_err p_wid_err_lower
+    u64		p_wid_control;			/* 0x000020 */
+    u64		p_wid_req_timeout;		/* 0x000028 */
+    u64		p_wid_int_upper;		/* 0x000030 */
+    u64		p_wid_int_lower;		/* 0x000038 */
+    #define p_wid_int p_wid_int_lower
+    u64		p_wid_err_cmdword;		/* 0x000040 */
+    u64		p_wid_llp;			/* 0x000048 */
+    u64		p_wid_tflush;			/* 0x000050 */
+
+    /* 0x000058-0x00007F -- Bridge-specific Widget Configuration */
+    u64		p_wid_aux_err;			/* 0x000058 */
+    u64		p_wid_resp_upper;		/* 0x000060 */
+    u64		p_wid_resp_lower;		/* 0x000068 */
+    #define p_wid_resp p_wid_resp_lower
+    u64		p_wid_tst_pin_ctrl;		/* 0x000070 */
+    u64		p_wid_addr_lkerr;		/* 0x000078 */
+
+    /* 0x000080-0x00008F -- PMU & MAP */
+    u64		p_dir_map;			/* 0x000080 */
+    u64		_pad_000088;			/* 0x000088 */
+
+    /* 0x000090-0x00009F -- SSRAM */
+    u64		p_map_fault;			/* 0x000090 */
+    u64		_pad_000098;			/* 0x000098 */
+
+    /* 0x0000A0-0x0000AF -- Arbitration */
+    u64		p_arb;				/* 0x0000A0 */
+    u64		_pad_0000A8;			/* 0x0000A8 */
+
+    /* 0x0000B0-0x0000BF -- Number In A Can or ATE Parity Error */
+    u64		p_ate_parity_err;		/* 0x0000B0 */
+    u64		_pad_0000B8;			/* 0x0000B8 */
+
+    /* 0x0000C0-0x0000FF -- PCI/GIO */
+    u64		p_bus_timeout;			/* 0x0000C0 */
+    u64		p_pci_cfg;			/* 0x0000C8 */
+    u64		p_pci_err_upper;		/* 0x0000D0 */
+    u64		p_pci_err_lower;		/* 0x0000D8 */
+    #define p_pci_err p_pci_err_lower
+    u64		_pad_0000E0[4];			/* 0x0000{E0..F8} */
+
+    /* 0x000100-0x0001FF -- Interrupt */
+    u64		p_int_status;			/* 0x000100 */
+    u64		p_int_enable;			/* 0x000108 */
+    u64		p_int_rst_stat;			/* 0x000110 */
+    u64		p_int_mode;			/* 0x000118 */
+    u64		p_int_device;			/* 0x000120 */
+    u64		p_int_host_err;			/* 0x000128 */
+    u64		p_int_addr[8];			/* 0x0001{30,,,68} */
+    u64		p_err_int_view;			/* 0x000170 */
+    u64		p_mult_int;			/* 0x000178 */
+    u64		p_force_always[8];		/* 0x0001{80,,,B8} */
+    u64		p_force_pin[8];			/* 0x0001{C0,,,F8} */
+
+    /* 0x000200-0x000298 -- Device */
+    u64		p_device[4];			/* 0x0002{00,,,18} */
+    u64		_pad_000220[4];			/* 0x0002{20,,,38} */
+    u64		p_wr_req_buf[4];		/* 0x0002{40,,,58} */
+    u64		_pad_000260[4];			/* 0x0002{60,,,78} */
+    u64		p_rrb_map[2];			/* 0x0002{80,,,88} */
+    #define p_even_resp p_rrb_map[0]			/* 0x000280 */
+    #define p_odd_resp  p_rrb_map[1]			/* 0x000288 */
+    u64		p_resp_status;			/* 0x000290 */
+    u64		p_resp_clear;			/* 0x000298 */
+
+    u64		_pad_0002A0[12];		/* 0x0002{A0..F8} */
+
+    /* 0x000300-0x0003F8 -- Buffer Address Match Registers */
+    struct {
+	u64	upper;				/* 0x0003{00,,,F0} */
+	u64	lower;				/* 0x0003{08,,,F8} */
+    } p_buf_addr_match[16];
+
+    /* 0x000400-0x0005FF -- Performance Monitor Registers (even only) */
+    struct {
+	u64	flush_w_touch;			/* 0x000{400,,,5C0} */
+	u64	flush_wo_touch;			/* 0x000{408,,,5C8} */
+	u64	inflight;			/* 0x000{410,,,5D0} */
+	u64	prefetch;			/* 0x000{418,,,5D8} */
+	u64	total_pci_retry;		/* 0x000{420,,,5E0} */
+	u64	max_pci_retry;			/* 0x000{428,,,5E8} */
+	u64	max_latency;			/* 0x000{430,,,5F0} */
+	u64	clear_all;			/* 0x000{438,,,5F8} */
+    } p_buf_count[8];
+
+
+    /* 0x000600-0x0009FF -- PCI/X registers */
+    u64		p_pcix_bus_err_addr;		/* 0x000600 */
+    u64		p_pcix_bus_err_attr;		/* 0x000608 */
+    u64		p_pcix_bus_err_data;		/* 0x000610 */
+    u64		p_pcix_pio_split_addr;		/* 0x000618 */
+    u64		p_pcix_pio_split_attr;		/* 0x000620 */
+    u64		p_pcix_dma_req_err_attr;	/* 0x000628 */
+    u64		p_pcix_dma_req_err_addr;	/* 0x000630 */
+    u64		p_pcix_timeout;			/* 0x000638 */
+
+    u64		_pad_000640[120];		/* 0x000{640,,,9F8} */
+
+    /* 0x000A00-0x000BFF -- PCI/X Read&Write Buffer */
+    struct {
+	u64	p_buf_addr;			/* 0x000{A00,,,AF0} */
+	u64	p_buf_attr;			/* 0X000{A08,,,AF8} */
+    } p_pcix_read_buf_64[16];
+
+    struct {
+	u64	p_buf_addr;			/* 0x000{B00,,,BE0} */
+	u64	p_buf_attr;			/* 0x000{B08,,,BE8} */
+	u64	p_buf_valid;			/* 0x000{B10,,,BF0} */
+	u64	__pad1;				/* 0x000{B18,,,BF8} */
+    } p_pcix_write_buf_64[8];
+
+    /* End of Local Registers -- Start of Address Map space */
+
+    char		_pad_000c00[0x010000 - 0x000c00];
+
+    /* 0x010000-0x011fff -- Internal ATE RAM (Auto Parity Generation) */
+    u64		p_int_ate_ram[1024];		/* 0x010000-0x011fff */
+
+    /* 0x012000-0x013fff -- Internal ATE RAM (Manual Parity Generation) */
+    u64		p_int_ate_ram_mp[1024];		/* 0x012000-0x013fff */
+
+    char		_pad_014000[0x18000 - 0x014000];
+
+    /* 0x18000-0x197F8 -- PIC Write Request Ram */
+    u64		p_wr_req_lower[256];		/* 0x18000 - 0x187F8 */
+    u64		p_wr_req_upper[256];		/* 0x18800 - 0x18FF8 */
+    u64		p_wr_req_parity[256];		/* 0x19000 - 0x197F8 */
+
+    char		_pad_019800[0x20000 - 0x019800];
+
+    /* 0x020000-0x027FFF -- PCI Device Configuration Spaces */
+    union {
+	u8		c[0x1000 / 1];			/* 0x02{0000,,,7FFF} */
+	u16	s[0x1000 / 2];			/* 0x02{0000,,,7FFF} */
+	u32	l[0x1000 / 4];			/* 0x02{0000,,,7FFF} */
+	u64	d[0x1000 / 8];			/* 0x02{0000,,,7FFF} */
+	union {
+	    u8	c[0x100 / 1];
+	    u16	s[0x100 / 2];
+	    u32	l[0x100 / 4];
+	    u64	d[0x100 / 8];
+	} f[8];
+    } p_type0_cfg_dev[8];				/* 0x02{0000,,,7FFF} */
+
+    /* 0x028000-0x028FFF -- PCI Type 1 Configuration Space */
+    union {
+	u8		c[0x1000 / 1];			/* 0x028000-0x029000 */
+	u16	s[0x1000 / 2];			/* 0x028000-0x029000 */
+	u32	l[0x1000 / 4];			/* 0x028000-0x029000 */
+	u64	d[0x1000 / 8];			/* 0x028000-0x029000 */
+	union {
+	    u8	c[0x100 / 1];
+	    u16	s[0x100 / 2];
+	    u32	l[0x100 / 4];
+	    u64	d[0x100 / 8];
+	} f[8];
+    } p_type1_cfg;					/* 0x028000-0x029000 */
+
+    char		_pad_029000[0x030000-0x029000];
+
+    /* 0x030000-0x030007 -- PCI Interrupt Acknowledge Cycle */
+    union {
+	u8		c[8 / 1];
+	u16	s[8 / 2];
+	u32	l[8 / 4];
+	u64	d[8 / 8];
+    } p_pci_iack;					/* 0x030000-0x030007 */
+
+    char		_pad_030007[0x040000-0x030008];
+
+    /* 0x040000-0x030007 -- PCIX Special Cycle */
+    union {
+	u8		c[8 / 1];
+	u16	s[8 / 2];
+	u32	l[8 / 4];
+	u64	d[8 / 8];
+    } p_pcix_cycle;					/* 0x040000-0x040007 */
+};
+
+#endif                          /* _ASM_IA64_SN_PCI_PIC_H */
diff --git a/arch/ia64/include/asm/sn/rw_mmr.h b/arch/ia64/include/asm/sn/rw_mmr.h
new file mode 100644
index 00000000000..2d78f4c5a45
--- /dev/null
+++ b/arch/ia64/include/asm/sn/rw_mmr.h
@@ -0,0 +1,28 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2002-2006 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+#ifndef _ASM_IA64_SN_RW_MMR_H
+#define _ASM_IA64_SN_RW_MMR_H
+
+
+/*
+ * This file that access MMRs via uncached physical addresses.
+ * 	pio_phys_read_mmr  - read an MMR
+ * 	pio_phys_write_mmr - write an MMR
+ * 	pio_atomic_phys_write_mmrs - atomically write 1 or 2 MMRs with psr.ic=0
+ *		Second MMR will be skipped if address is NULL
+ *
+ * Addresses passed to these routines should be uncached physical addresses
+ * ie., 0x80000....
+ */
+
+
+extern long pio_phys_read_mmr(volatile long *mmr); 
+extern void pio_phys_write_mmr(volatile long *mmr, long val);
+extern void pio_atomic_phys_write_mmrs(volatile long *mmr1, long val1, volatile long *mmr2, long val2); 
+
+#endif /* _ASM_IA64_SN_RW_MMR_H */
diff --git a/arch/ia64/include/asm/sn/shub_mmr.h b/arch/ia64/include/asm/sn/shub_mmr.h
new file mode 100644
index 00000000000..a84d870f429
--- /dev/null
+++ b/arch/ia64/include/asm/sn/shub_mmr.h
@@ -0,0 +1,502 @@
+/*
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2001-2005 Silicon Graphics, Inc.  All rights reserved.
+ */
+
+#ifndef _ASM_IA64_SN_SHUB_MMR_H
+#define _ASM_IA64_SN_SHUB_MMR_H
+
+/* ==================================================================== */
+/*                        Register "SH_IPI_INT"                         */
+/*               SHub Inter-Processor Interrupt Registers               */
+/* ==================================================================== */
+#define SH1_IPI_INT			__IA64_UL_CONST(0x0000000110000380)
+#define SH2_IPI_INT			__IA64_UL_CONST(0x0000000010000380)
+
+/*   SH_IPI_INT_TYPE                                                    */
+/*   Description:  Type of Interrupt: 0=INT, 2=PMI, 4=NMI, 5=INIT       */
+#define SH_IPI_INT_TYPE_SHFT				0
+#define SH_IPI_INT_TYPE_MASK		__IA64_UL_CONST(0x0000000000000007)
+
+/*   SH_IPI_INT_AGT                                                     */
+/*   Description:  Agent, must be 0 for SHub                            */
+#define SH_IPI_INT_AGT_SHFT				3
+#define SH_IPI_INT_AGT_MASK		__IA64_UL_CONST(0x0000000000000008)
+
+/*   SH_IPI_INT_PID                                                     */
+/*   Description:  Processor ID, same setting as on targeted McKinley  */
+#define SH_IPI_INT_PID_SHFT                      	4
+#define SH_IPI_INT_PID_MASK		__IA64_UL_CONST(0x00000000000ffff0)
+
+/*   SH_IPI_INT_BASE                                                    */
+/*   Description:  Optional interrupt vector area, 2MB aligned          */
+#define SH_IPI_INT_BASE_SHFT				21
+#define SH_IPI_INT_BASE_MASK 		__IA64_UL_CONST(0x0003ffffffe00000)
+
+/*   SH_IPI_INT_IDX                                                     */
+/*   Description:  Targeted McKinley interrupt vector                   */
+#define SH_IPI_INT_IDX_SHFT				52
+#define SH_IPI_INT_IDX_MASK		__IA64_UL_CONST(0x0ff0000000000000)
+
+/*   SH_IPI_INT_SEND                                                    */
+/*   Description:  Send Interrupt Message to PI, This generates a puls  */
+#define SH_IPI_INT_SEND_SHFT				63
+#define SH_IPI_INT_SEND_MASK		__IA64_UL_CONST(0x8000000000000000)
+
+/* ==================================================================== */
+/*                     Register "SH_EVENT_OCCURRED"                     */
+/*                    SHub Interrupt Event Occurred                     */
+/* ==================================================================== */
+#define SH1_EVENT_OCCURRED		__IA64_UL_CONST(0x0000000110010000)
+#define SH1_EVENT_OCCURRED_ALIAS	__IA64_UL_CONST(0x0000000110010008)
+#define SH2_EVENT_OCCURRED		__IA64_UL_CONST(0x0000000010010000)
+#define SH2_EVENT_OCCURRED_ALIAS 	__IA64_UL_CONST(0x0000000010010008)
+
+/* ==================================================================== */
+/*                     Register "SH_PI_CAM_CONTROL"                     */
+/*                      CRB CAM MMR Access Control                      */
+/* ==================================================================== */
+#define SH1_PI_CAM_CONTROL		__IA64_UL_CONST(0x0000000120050300)
+
+/* ==================================================================== */
+/*                        Register "SH_SHUB_ID"                         */
+/*                            SHub ID Number                            */
+/* ==================================================================== */
+#define SH1_SHUB_ID			__IA64_UL_CONST(0x0000000110060580)
+#define SH1_SHUB_ID_REVISION_SHFT			28
+#define SH1_SHUB_ID_REVISION_MASK	__IA64_UL_CONST(0x00000000f0000000)
+
+/* ==================================================================== */
+/*                          Register "SH_RTC"                           */
+/*                           Real-time Clock                            */
+/* ==================================================================== */
+#define SH1_RTC				__IA64_UL_CONST(0x00000001101c0000)
+#define SH2_RTC				__IA64_UL_CONST(0x00000002101c0000)
+#define SH_RTC_MASK			__IA64_UL_CONST(0x007fffffffffffff)
+
+/* ==================================================================== */
+/*                   Register "SH_PIO_WRITE_STATUS_0|1"                 */
+/*                      PIO Write Status for CPU 0 & 1                  */
+/* ==================================================================== */
+#define SH1_PIO_WRITE_STATUS_0		__IA64_UL_CONST(0x0000000120070200)
+#define SH1_PIO_WRITE_STATUS_1		__IA64_UL_CONST(0x0000000120070280)
+#define SH2_PIO_WRITE_STATUS_0		__IA64_UL_CONST(0x0000000020070200)
+#define SH2_PIO_WRITE_STATUS_1		__IA64_UL_CONST(0x0000000020070280)
+#define SH2_PIO_WRITE_STATUS_2		__IA64_UL_CONST(0x0000000020070300)
+#define SH2_PIO_WRITE_STATUS_3		__IA64_UL_CONST(0x0000000020070380)
+
+/*   SH_PIO_WRITE_STATUS_0_WRITE_DEADLOCK                               */
+/*   Description:  Deadlock response detected                           */
+#define SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_SHFT		1
+#define SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_MASK \
+					__IA64_UL_CONST(0x0000000000000002)
+
+/*   SH_PIO_WRITE_STATUS_0_PENDING_WRITE_COUNT                          */
+/*   Description:  Count of currently pending PIO writes                */
+#define SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_SHFT	56
+#define SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK \
+					__IA64_UL_CONST(0x3f00000000000000)
+
+/* ==================================================================== */
+/*                Register "SH_PIO_WRITE_STATUS_0_ALIAS"                */
+/* ==================================================================== */
+#define SH1_PIO_WRITE_STATUS_0_ALIAS	__IA64_UL_CONST(0x0000000120070208)
+#define SH2_PIO_WRITE_STATUS_0_ALIAS	__IA64_UL_CONST(0x0000000020070208)
+
+/* ==================================================================== */
+/*                     Register "SH_EVENT_OCCURRED"                     */
+/*                    SHub Interrupt Event Occurred                     */
+/* ==================================================================== */
+/*   SH_EVENT_OCCURRED_UART_INT                                         */
+/*   Description:  Pending Junk Bus UART Interrupt                      */
+#define SH_EVENT_OCCURRED_UART_INT_SHFT			20
+#define SH_EVENT_OCCURRED_UART_INT_MASK	__IA64_UL_CONST(0x0000000000100000)
+
+/*   SH_EVENT_OCCURRED_IPI_INT                                          */
+/*   Description:  Pending IPI Interrupt                                */
+#define SH_EVENT_OCCURRED_IPI_INT_SHFT			28
+#define SH_EVENT_OCCURRED_IPI_INT_MASK	__IA64_UL_CONST(0x0000000010000000)
+
+/*   SH_EVENT_OCCURRED_II_INT0                                          */
+/*   Description:  Pending II 0 Interrupt                               */
+#define SH_EVENT_OCCURRED_II_INT0_SHFT			29
+#define SH_EVENT_OCCURRED_II_INT0_MASK	__IA64_UL_CONST(0x0000000020000000)
+
+/*   SH_EVENT_OCCURRED_II_INT1                                          */
+/*   Description:  Pending II 1 Interrupt                               */
+#define SH_EVENT_OCCURRED_II_INT1_SHFT			30
+#define SH_EVENT_OCCURRED_II_INT1_MASK	__IA64_UL_CONST(0x0000000040000000)
+
+/*   SH2_EVENT_OCCURRED_EXTIO_INT2                                      */
+/*   Description:  Pending SHUB 2 EXT IO INT2                           */
+#define SH2_EVENT_OCCURRED_EXTIO_INT2_SHFT		33
+#define SH2_EVENT_OCCURRED_EXTIO_INT2_MASK __IA64_UL_CONST(0x0000000200000000)
+
+/*   SH2_EVENT_OCCURRED_EXTIO_INT3                                      */
+/*   Description:  Pending SHUB 2 EXT IO INT3                           */
+#define SH2_EVENT_OCCURRED_EXTIO_INT3_SHFT		34
+#define SH2_EVENT_OCCURRED_EXTIO_INT3_MASK __IA64_UL_CONST(0x0000000400000000)
+
+#define SH_ALL_INT_MASK \
+	(SH_EVENT_OCCURRED_UART_INT_MASK | SH_EVENT_OCCURRED_IPI_INT_MASK | \
+	 SH_EVENT_OCCURRED_II_INT0_MASK | SH_EVENT_OCCURRED_II_INT1_MASK | \
+	 SH_EVENT_OCCURRED_II_INT1_MASK | SH2_EVENT_OCCURRED_EXTIO_INT2_MASK | \
+	 SH2_EVENT_OCCURRED_EXTIO_INT3_MASK)
+
+
+/* ==================================================================== */
+/*                         LEDS                                         */
+/* ==================================================================== */
+#define SH1_REAL_JUNK_BUS_LED0			0x7fed00000UL
+#define SH1_REAL_JUNK_BUS_LED1			0x7fed10000UL
+#define SH1_REAL_JUNK_BUS_LED2			0x7fed20000UL
+#define SH1_REAL_JUNK_BUS_LED3			0x7fed30000UL
+
+#define SH2_REAL_JUNK_BUS_LED0			0xf0000000UL
+#define SH2_REAL_JUNK_BUS_LED1			0xf0010000UL
+#define SH2_REAL_JUNK_BUS_LED2			0xf0020000UL
+#define SH2_REAL_JUNK_BUS_LED3			0xf0030000UL
+
+/* ==================================================================== */
+/*                         Register "SH1_PTC_0"                         */
+/*       Puge Translation Cache Message Configuration Information       */
+/* ==================================================================== */
+#define SH1_PTC_0			__IA64_UL_CONST(0x00000001101a0000)
+
+/*   SH1_PTC_0_A                                                        */
+/*   Description:  Type                                                 */
+#define SH1_PTC_0_A_SHFT				0
+
+/*   SH1_PTC_0_PS                                                       */
+/*   Description:  Page Size                                            */
+#define SH1_PTC_0_PS_SHFT				2
+
+/*   SH1_PTC_0_RID                                                      */
+/*   Description:  Region ID                                            */
+#define SH1_PTC_0_RID_SHFT				8
+
+/*   SH1_PTC_0_START                                                    */
+/*   Description:  Start                                                */
+#define SH1_PTC_0_START_SHFT				63
+
+/* ==================================================================== */
+/*                         Register "SH1_PTC_1"                         */
+/*       Puge Translation Cache Message Configuration Information       */
+/* ==================================================================== */
+#define SH1_PTC_1			__IA64_UL_CONST(0x00000001101a0080)
+
+/*   SH1_PTC_1_START                                                    */
+/*   Description:  PTC_1 Start                                          */
+#define SH1_PTC_1_START_SHFT				63
+
+/* ==================================================================== */
+/*                         Register "SH2_PTC"                           */
+/*       Puge Translation Cache Message Configuration Information       */
+/* ==================================================================== */
+#define SH2_PTC				__IA64_UL_CONST(0x0000000170000000)
+
+/*   SH2_PTC_A                                                          */
+/*   Description:  Type                                                 */
+#define SH2_PTC_A_SHFT					0
+
+/*   SH2_PTC_PS                                                         */
+/*   Description:  Page Size                                            */
+#define SH2_PTC_PS_SHFT					2
+
+/*   SH2_PTC_RID                                                      */
+/*   Description:  Region ID                                            */
+#define SH2_PTC_RID_SHFT				4
+
+/*   SH2_PTC_START                                                      */
+/*   Description:  Start                                                */
+#define SH2_PTC_START_SHFT				63
+
+/*   SH2_PTC_ADDR_RID                                                   */
+/*   Description:  Region ID                                            */
+#define SH2_PTC_ADDR_SHFT				4
+#define SH2_PTC_ADDR_MASK		__IA64_UL_CONST(0x1ffffffffffff000)
+
+/* ==================================================================== */
+/*                    Register "SH_RTC1_INT_CONFIG"                     */
+/*                SHub RTC 1 Interrupt Config Registers                 */
+/* ==================================================================== */
+
+#define SH1_RTC1_INT_CONFIG		__IA64_UL_CONST(0x0000000110001480)
+#define SH2_RTC1_INT_CONFIG		__IA64_UL_CONST(0x0000000010001480)
+#define SH_RTC1_INT_CONFIG_MASK		__IA64_UL_CONST(0x0ff3ffffffefffff)
+#define SH_RTC1_INT_CONFIG_INIT		__IA64_UL_CONST(0x0000000000000000)
+
+/*   SH_RTC1_INT_CONFIG_TYPE                                            */
+/*   Description:  Type of Interrupt: 0=INT, 2=PMI, 4=NMI, 5=INIT       */
+#define SH_RTC1_INT_CONFIG_TYPE_SHFT			0
+#define SH_RTC1_INT_CONFIG_TYPE_MASK	__IA64_UL_CONST(0x0000000000000007)
+
+/*   SH_RTC1_INT_CONFIG_AGT                                             */
+/*   Description:  Agent, must be 0 for SHub                            */
+#define SH_RTC1_INT_CONFIG_AGT_SHFT			3
+#define SH_RTC1_INT_CONFIG_AGT_MASK	__IA64_UL_CONST(0x0000000000000008)
+
+/*   SH_RTC1_INT_CONFIG_PID                                             */
+/*   Description:  Processor ID, same setting as on targeted McKinley  */
+#define SH_RTC1_INT_CONFIG_PID_SHFT			4
+#define SH_RTC1_INT_CONFIG_PID_MASK	__IA64_UL_CONST(0x00000000000ffff0)
+
+/*   SH_RTC1_INT_CONFIG_BASE                                            */
+/*   Description:  Optional interrupt vector area, 2MB aligned          */
+#define SH_RTC1_INT_CONFIG_BASE_SHFT			21
+#define SH_RTC1_INT_CONFIG_BASE_MASK	__IA64_UL_CONST(0x0003ffffffe00000)
+
+/*   SH_RTC1_INT_CONFIG_IDX                                             */
+/*   Description:  Targeted McKinley interrupt vector                   */
+#define SH_RTC1_INT_CONFIG_IDX_SHFT			52
+#define SH_RTC1_INT_CONFIG_IDX_MASK	__IA64_UL_CONST(0x0ff0000000000000)
+
+/* ==================================================================== */
+/*                    Register "SH_RTC1_INT_ENABLE"                     */
+/*                SHub RTC 1 Interrupt Enable Registers                 */
+/* ==================================================================== */
+
+#define SH1_RTC1_INT_ENABLE		__IA64_UL_CONST(0x0000000110001500)
+#define SH2_RTC1_INT_ENABLE		__IA64_UL_CONST(0x0000000010001500)
+#define SH_RTC1_INT_ENABLE_MASK		__IA64_UL_CONST(0x0000000000000001)
+#define SH_RTC1_INT_ENABLE_INIT		__IA64_UL_CONST(0x0000000000000000)
+
+/*   SH_RTC1_INT_ENABLE_RTC1_ENABLE                                     */
+/*   Description:  Enable RTC 1 Interrupt                               */
+#define SH_RTC1_INT_ENABLE_RTC1_ENABLE_SHFT		0
+#define SH_RTC1_INT_ENABLE_RTC1_ENABLE_MASK \
+					__IA64_UL_CONST(0x0000000000000001)
+
+/* ==================================================================== */
+/*                    Register "SH_RTC2_INT_CONFIG"                     */
+/*                SHub RTC 2 Interrupt Config Registers                 */
+/* ==================================================================== */
+
+#define SH1_RTC2_INT_CONFIG		__IA64_UL_CONST(0x0000000110001580)
+#define SH2_RTC2_INT_CONFIG		__IA64_UL_CONST(0x0000000010001580)
+#define SH_RTC2_INT_CONFIG_MASK		__IA64_UL_CONST(0x0ff3ffffffefffff)
+#define SH_RTC2_INT_CONFIG_INIT		__IA64_UL_CONST(0x0000000000000000)
+
+/*   SH_RTC2_INT_CONFIG_TYPE                                            */
+/*   Description:  Type of Interrupt: 0=INT, 2=PMI, 4=NMI, 5=INIT       */
+#define SH_RTC2_INT_CONFIG_TYPE_SHFT			0
+#define SH_RTC2_INT_CONFIG_TYPE_MASK	__IA64_UL_CONST(0x0000000000000007)
+
+/*   SH_RTC2_INT_CONFIG_AGT                                             */
+/*   Description:  Agent, must be 0 for SHub                            */
+#define SH_RTC2_INT_CONFIG_AGT_SHFT			3
+#define SH_RTC2_INT_CONFIG_AGT_MASK	__IA64_UL_CONST(0x0000000000000008)
+
+/*   SH_RTC2_INT_CONFIG_PID                                             */
+/*   Description:  Processor ID, same setting as on targeted McKinley  */
+#define SH_RTC2_INT_CONFIG_PID_SHFT			4
+#define SH_RTC2_INT_CONFIG_PID_MASK	__IA64_UL_CONST(0x00000000000ffff0)
+
+/*   SH_RTC2_INT_CONFIG_BASE                                            */
+/*   Description:  Optional interrupt vector area, 2MB aligned          */
+#define SH_RTC2_INT_CONFIG_BASE_SHFT			21
+#define SH_RTC2_INT_CONFIG_BASE_MASK	__IA64_UL_CONST(0x0003ffffffe00000)
+
+/*   SH_RTC2_INT_CONFIG_IDX                                             */
+/*   Description:  Targeted McKinley interrupt vector                   */
+#define SH_RTC2_INT_CONFIG_IDX_SHFT			52
+#define SH_RTC2_INT_CONFIG_IDX_MASK	__IA64_UL_CONST(0x0ff0000000000000)
+
+/* ==================================================================== */
+/*                    Register "SH_RTC2_INT_ENABLE"                     */
+/*                SHub RTC 2 Interrupt Enable Registers                 */
+/* ==================================================================== */
+
+#define SH1_RTC2_INT_ENABLE		__IA64_UL_CONST(0x0000000110001600)
+#define SH2_RTC2_INT_ENABLE		__IA64_UL_CONST(0x0000000010001600)
+#define SH_RTC2_INT_ENABLE_MASK		__IA64_UL_CONST(0x0000000000000001)
+#define SH_RTC2_INT_ENABLE_INIT		__IA64_UL_CONST(0x0000000000000000)
+
+/*   SH_RTC2_INT_ENABLE_RTC2_ENABLE                                     */
+/*   Description:  Enable RTC 2 Interrupt                               */
+#define SH_RTC2_INT_ENABLE_RTC2_ENABLE_SHFT		0
+#define SH_RTC2_INT_ENABLE_RTC2_ENABLE_MASK \
+					__IA64_UL_CONST(0x0000000000000001)
+
+/* ==================================================================== */
+/*                    Register "SH_RTC3_INT_CONFIG"                     */
+/*                SHub RTC 3 Interrupt Config Registers                 */
+/* ==================================================================== */
+
+#define SH1_RTC3_INT_CONFIG		__IA64_UL_CONST(0x0000000110001680)
+#define SH2_RTC3_INT_CONFIG		__IA64_UL_CONST(0x0000000010001680)
+#define SH_RTC3_INT_CONFIG_MASK		__IA64_UL_CONST(0x0ff3ffffffefffff)
+#define SH_RTC3_INT_CONFIG_INIT		__IA64_UL_CONST(0x0000000000000000)
+
+/*   SH_RTC3_INT_CONFIG_TYPE                                            */
+/*   Description:  Type of Interrupt: 0=INT, 2=PMI, 4=NMI, 5=INIT       */
+#define SH_RTC3_INT_CONFIG_TYPE_SHFT			0
+#define SH_RTC3_INT_CONFIG_TYPE_MASK	__IA64_UL_CONST(0x0000000000000007)
+
+/*   SH_RTC3_INT_CONFIG_AGT                                             */
+/*   Description:  Agent, must be 0 for SHub                            */
+#define SH_RTC3_INT_CONFIG_AGT_SHFT			3
+#define SH_RTC3_INT_CONFIG_AGT_MASK	__IA64_UL_CONST(0x0000000000000008)
+
+/*   SH_RTC3_INT_CONFIG_PID                                             */
+/*   Description:  Processor ID, same setting as on targeted McKinley  */
+#define SH_RTC3_INT_CONFIG_PID_SHFT			4
+#define SH_RTC3_INT_CONFIG_PID_MASK	__IA64_UL_CONST(0x00000000000ffff0)
+
+/*   SH_RTC3_INT_CONFIG_BASE                                            */
+/*   Description:  Optional interrupt vector area, 2MB aligned          */
+#define SH_RTC3_INT_CONFIG_BASE_SHFT			21
+#define SH_RTC3_INT_CONFIG_BASE_MASK	__IA64_UL_CONST(0x0003ffffffe00000)
+
+/*   SH_RTC3_INT_CONFIG_IDX                                             */
+/*   Description:  Targeted McKinley interrupt vector                   */
+#define SH_RTC3_INT_CONFIG_IDX_SHFT			52
+#define SH_RTC3_INT_CONFIG_IDX_MASK	__IA64_UL_CONST(0x0ff0000000000000)
+
+/* ==================================================================== */
+/*                    Register "SH_RTC3_INT_ENABLE"                     */
+/*                SHub RTC 3 Interrupt Enable Registers                 */
+/* ==================================================================== */
+
+#define SH1_RTC3_INT_ENABLE		__IA64_UL_CONST(0x0000000110001700)
+#define SH2_RTC3_INT_ENABLE		__IA64_UL_CONST(0x0000000010001700)
+#define SH_RTC3_INT_ENABLE_MASK		__IA64_UL_CONST(0x0000000000000001)
+#define SH_RTC3_INT_ENABLE_INIT		__IA64_UL_CONST(0x0000000000000000)
+
+/*   SH_RTC3_INT_ENABLE_RTC3_ENABLE                                     */
+/*   Description:  Enable RTC 3 Interrupt                               */
+#define SH_RTC3_INT_ENABLE_RTC3_ENABLE_SHFT		0
+#define SH_RTC3_INT_ENABLE_RTC3_ENABLE_MASK \
+					__IA64_UL_CONST(0x0000000000000001)
+
+/*   SH_EVENT_OCCURRED_RTC1_INT                                         */
+/*   Description:  Pending RTC 1 Interrupt                              */
+#define SH_EVENT_OCCURRED_RTC1_INT_SHFT			24
+#define SH_EVENT_OCCURRED_RTC1_INT_MASK	__IA64_UL_CONST(0x0000000001000000)
+
+/*   SH_EVENT_OCCURRED_RTC2_INT                                         */
+/*   Description:  Pending RTC 2 Interrupt                              */
+#define SH_EVENT_OCCURRED_RTC2_INT_SHFT			25
+#define SH_EVENT_OCCURRED_RTC2_INT_MASK	__IA64_UL_CONST(0x0000000002000000)
+
+/*   SH_EVENT_OCCURRED_RTC3_INT                                         */
+/*   Description:  Pending RTC 3 Interrupt                              */
+#define SH_EVENT_OCCURRED_RTC3_INT_SHFT			26
+#define SH_EVENT_OCCURRED_RTC3_INT_MASK	__IA64_UL_CONST(0x0000000004000000)
+
+/* ==================================================================== */
+/*                       Register "SH_IPI_ACCESS"                       */
+/*                 CPU interrupt Access Permission Bits                 */
+/* ==================================================================== */
+
+#define SH1_IPI_ACCESS			__IA64_UL_CONST(0x0000000110060480)
+#define SH2_IPI_ACCESS0			__IA64_UL_CONST(0x0000000010060c00)
+#define SH2_IPI_ACCESS1			__IA64_UL_CONST(0x0000000010060c80)
+#define SH2_IPI_ACCESS2			__IA64_UL_CONST(0x0000000010060d00)
+#define SH2_IPI_ACCESS3			__IA64_UL_CONST(0x0000000010060d80)
+
+/* ==================================================================== */
+/*                        Register "SH_INT_CMPB"                        */
+/*                  RTC Compare Value for Processor B                   */
+/* ==================================================================== */
+
+#define SH1_INT_CMPB			__IA64_UL_CONST(0x00000001101b0080)
+#define SH2_INT_CMPB			__IA64_UL_CONST(0x00000000101b0080)
+#define SH_INT_CMPB_MASK		__IA64_UL_CONST(0x007fffffffffffff)
+#define SH_INT_CMPB_INIT		__IA64_UL_CONST(0x0000000000000000)
+
+/*   SH_INT_CMPB_REAL_TIME_CMPB                                         */
+/*   Description:  Real Time Clock Compare                              */
+#define SH_INT_CMPB_REAL_TIME_CMPB_SHFT			0
+#define SH_INT_CMPB_REAL_TIME_CMPB_MASK	__IA64_UL_CONST(0x007fffffffffffff)
+
+/* ==================================================================== */
+/*                        Register "SH_INT_CMPC"                        */
+/*                  RTC Compare Value for Processor C                   */
+/* ==================================================================== */
+
+#define SH1_INT_CMPC			__IA64_UL_CONST(0x00000001101b0100)
+#define SH2_INT_CMPC			__IA64_UL_CONST(0x00000000101b0100)
+#define SH_INT_CMPC_MASK		__IA64_UL_CONST(0x007fffffffffffff)
+#define SH_INT_CMPC_INIT		__IA64_UL_CONST(0x0000000000000000)
+
+/*   SH_INT_CMPC_REAL_TIME_CMPC                                         */
+/*   Description:  Real Time Clock Compare                              */
+#define SH_INT_CMPC_REAL_TIME_CMPC_SHFT			0
+#define SH_INT_CMPC_REAL_TIME_CMPC_MASK	__IA64_UL_CONST(0x007fffffffffffff)
+
+/* ==================================================================== */
+/*                        Register "SH_INT_CMPD"                        */
+/*                  RTC Compare Value for Processor D                   */
+/* ==================================================================== */
+
+#define SH1_INT_CMPD			__IA64_UL_CONST(0x00000001101b0180)
+#define SH2_INT_CMPD			__IA64_UL_CONST(0x00000000101b0180)
+#define SH_INT_CMPD_MASK		__IA64_UL_CONST(0x007fffffffffffff)
+#define SH_INT_CMPD_INIT		__IA64_UL_CONST(0x0000000000000000)
+
+/*   SH_INT_CMPD_REAL_TIME_CMPD                                         */
+/*   Description:  Real Time Clock Compare                              */
+#define SH_INT_CMPD_REAL_TIME_CMPD_SHFT			0
+#define SH_INT_CMPD_REAL_TIME_CMPD_MASK	__IA64_UL_CONST(0x007fffffffffffff)
+
+/* ==================================================================== */
+/*                Register "SH_MD_DQLP_MMR_DIR_PRIVEC0"                 */
+/*                      privilege vector for acc=0                      */
+/* ==================================================================== */
+#define SH1_MD_DQLP_MMR_DIR_PRIVEC0	__IA64_UL_CONST(0x0000000100030300)
+
+/* ==================================================================== */
+/*                Register "SH_MD_DQRP_MMR_DIR_PRIVEC0"                 */
+/*                      privilege vector for acc=0                      */
+/* ==================================================================== */
+#define SH1_MD_DQRP_MMR_DIR_PRIVEC0	__IA64_UL_CONST(0x0000000100050300)
+
+/* ==================================================================== */
+/* Some MMRs are functionally identical (or close enough) on both SHUB1 */
+/* and SHUB2 that it makes sense to define a geberic name for the MMR.  */
+/* It is acceptable to use (for example) SH_IPI_INT to reference the    */
+/* the IPI MMR. The value of SH_IPI_INT is determined at runtime based  */
+/* on the type of the SHUB. Do not use these #defines in performance    */
+/* critical code  or loops - there is a small performance penalty.      */
+/* ==================================================================== */
+#define shubmmr(a,b) 		(is_shub2() ? a##2_##b : a##1_##b)
+
+#define SH_REAL_JUNK_BUS_LED0	shubmmr(SH, REAL_JUNK_BUS_LED0)
+#define SH_IPI_INT		shubmmr(SH, IPI_INT)
+#define SH_EVENT_OCCURRED	shubmmr(SH, EVENT_OCCURRED)
+#define SH_EVENT_OCCURRED_ALIAS	shubmmr(SH, EVENT_OCCURRED_ALIAS)
+#define SH_RTC			shubmmr(SH, RTC)
+#define SH_RTC1_INT_CONFIG	shubmmr(SH, RTC1_INT_CONFIG)
+#define SH_RTC1_INT_ENABLE	shubmmr(SH, RTC1_INT_ENABLE)
+#define SH_RTC2_INT_CONFIG	shubmmr(SH, RTC2_INT_CONFIG)
+#define SH_RTC2_INT_ENABLE	shubmmr(SH, RTC2_INT_ENABLE)
+#define SH_RTC3_INT_CONFIG	shubmmr(SH, RTC3_INT_CONFIG)
+#define SH_RTC3_INT_ENABLE	shubmmr(SH, RTC3_INT_ENABLE)
+#define SH_INT_CMPB		shubmmr(SH, INT_CMPB)
+#define SH_INT_CMPC		shubmmr(SH, INT_CMPC)
+#define SH_INT_CMPD		shubmmr(SH, INT_CMPD)
+
+/* ========================================================================== */
+/*                        Register "SH2_BT_ENG_CSR_0"                         */
+/*                    Engine 0 Control and Status Register                    */
+/* ========================================================================== */
+
+#define SH2_BT_ENG_CSR_0		__IA64_UL_CONST(0x0000000030040000)
+#define SH2_BT_ENG_SRC_ADDR_0		__IA64_UL_CONST(0x0000000030040080)
+#define SH2_BT_ENG_DEST_ADDR_0		__IA64_UL_CONST(0x0000000030040100)
+#define SH2_BT_ENG_NOTIF_ADDR_0		__IA64_UL_CONST(0x0000000030040180)
+
+/* ========================================================================== */
+/*                       BTE interfaces 1-3                                   */
+/* ========================================================================== */
+
+#define SH2_BT_ENG_CSR_1		__IA64_UL_CONST(0x0000000030050000)
+#define SH2_BT_ENG_CSR_2		__IA64_UL_CONST(0x0000000030060000)
+#define SH2_BT_ENG_CSR_3		__IA64_UL_CONST(0x0000000030070000)
+
+#endif /* _ASM_IA64_SN_SHUB_MMR_H */
diff --git a/arch/ia64/include/asm/sn/shubio.h b/arch/ia64/include/asm/sn/shubio.h
new file mode 100644
index 00000000000..ecb8a49476b
--- /dev/null
+++ b/arch/ia64/include/asm/sn/shubio.h
@@ -0,0 +1,3358 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000-2005 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#ifndef _ASM_IA64_SN_SHUBIO_H
+#define _ASM_IA64_SN_SHUBIO_H
+
+#define HUB_WIDGET_ID_MAX	0xf
+#define IIO_NUM_ITTES		7
+#define HUB_NUM_BIG_WINDOW	(IIO_NUM_ITTES - 1)
+
+#define		IIO_WID			0x00400000	/* Crosstalk Widget Identification */
+							/* This register is also accessible from
+							 * Crosstalk at address 0x0.  */
+#define		IIO_WSTAT		0x00400008	/* Crosstalk Widget Status */
+#define		IIO_WCR			0x00400020	/* Crosstalk Widget Control Register */
+#define		IIO_ILAPR		0x00400100	/* IO Local Access Protection Register */
+#define		IIO_ILAPO		0x00400108	/* IO Local Access Protection Override */
+#define		IIO_IOWA		0x00400110	/* IO Outbound Widget Access */
+#define		IIO_IIWA		0x00400118	/* IO Inbound Widget Access */
+#define		IIO_IIDEM		0x00400120	/* IO Inbound Device Error Mask */
+#define		IIO_ILCSR		0x00400128	/* IO LLP Control and Status Register */
+#define		IIO_ILLR		0x00400130	/* IO LLP Log Register    */
+#define		IIO_IIDSR		0x00400138	/* IO Interrupt Destination */
+
+#define		IIO_IGFX0		0x00400140	/* IO Graphics Node-Widget Map 0 */
+#define		IIO_IGFX1		0x00400148	/* IO Graphics Node-Widget Map 1 */
+
+#define		IIO_ISCR0		0x00400150	/* IO Scratch Register 0 */
+#define		IIO_ISCR1		0x00400158	/* IO Scratch Register 1 */
+
+#define		IIO_ITTE1		0x00400160	/* IO Translation Table Entry 1 */
+#define		IIO_ITTE2		0x00400168	/* IO Translation Table Entry 2 */
+#define		IIO_ITTE3		0x00400170	/* IO Translation Table Entry 3 */
+#define		IIO_ITTE4		0x00400178	/* IO Translation Table Entry 4 */
+#define		IIO_ITTE5		0x00400180	/* IO Translation Table Entry 5 */
+#define		IIO_ITTE6		0x00400188	/* IO Translation Table Entry 6 */
+#define		IIO_ITTE7		0x00400190	/* IO Translation Table Entry 7 */
+
+#define		IIO_IPRB0		0x00400198	/* IO PRB Entry 0   */
+#define		IIO_IPRB8		0x004001A0	/* IO PRB Entry 8   */
+#define		IIO_IPRB9		0x004001A8	/* IO PRB Entry 9   */
+#define		IIO_IPRBA		0x004001B0	/* IO PRB Entry A   */
+#define		IIO_IPRBB		0x004001B8	/* IO PRB Entry B   */
+#define		IIO_IPRBC		0x004001C0	/* IO PRB Entry C   */
+#define		IIO_IPRBD		0x004001C8	/* IO PRB Entry D   */
+#define		IIO_IPRBE		0x004001D0	/* IO PRB Entry E   */
+#define		IIO_IPRBF		0x004001D8	/* IO PRB Entry F   */
+
+#define		IIO_IXCC		0x004001E0	/* IO Crosstalk Credit Count Timeout */
+#define		IIO_IMEM		0x004001E8	/* IO Miscellaneous Error Mask */
+#define		IIO_IXTT		0x004001F0	/* IO Crosstalk Timeout Threshold */
+#define		IIO_IECLR		0x004001F8	/* IO Error Clear Register */
+#define		IIO_IBCR		0x00400200	/* IO BTE Control Register */
+
+#define		IIO_IXSM		0x00400208	/* IO Crosstalk Spurious Message */
+#define		IIO_IXSS		0x00400210	/* IO Crosstalk Spurious Sideband */
+
+#define		IIO_ILCT		0x00400218	/* IO LLP Channel Test    */
+
+#define		IIO_IIEPH1 		0x00400220	/* IO Incoming Error Packet Header, Part 1 */
+#define		IIO_IIEPH2 		0x00400228	/* IO Incoming Error Packet Header, Part 2 */
+
+#define		IIO_ISLAPR 		0x00400230	/* IO SXB Local Access Protection Regster */
+#define		IIO_ISLAPO 		0x00400238	/* IO SXB Local Access Protection Override */
+
+#define		IIO_IWI			0x00400240	/* IO Wrapper Interrupt Register */
+#define		IIO_IWEL		0x00400248	/* IO Wrapper Error Log Register */
+#define		IIO_IWC			0x00400250	/* IO Wrapper Control Register */
+#define		IIO_IWS			0x00400258	/* IO Wrapper Status Register */
+#define		IIO_IWEIM		0x00400260	/* IO Wrapper Error Interrupt Masking Register */
+
+#define		IIO_IPCA		0x00400300	/* IO PRB Counter Adjust */
+
+#define		IIO_IPRTE0_A		0x00400308	/* IO PIO Read Address Table Entry 0, Part A */
+#define		IIO_IPRTE1_A		0x00400310	/* IO PIO Read Address Table Entry 1, Part A */
+#define		IIO_IPRTE2_A		0x00400318	/* IO PIO Read Address Table Entry 2, Part A */
+#define		IIO_IPRTE3_A		0x00400320	/* IO PIO Read Address Table Entry 3, Part A */
+#define		IIO_IPRTE4_A		0x00400328	/* IO PIO Read Address Table Entry 4, Part A */
+#define		IIO_IPRTE5_A		0x00400330	/* IO PIO Read Address Table Entry 5, Part A */
+#define		IIO_IPRTE6_A		0x00400338	/* IO PIO Read Address Table Entry 6, Part A */
+#define		IIO_IPRTE7_A		0x00400340	/* IO PIO Read Address Table Entry 7, Part A */
+
+#define		IIO_IPRTE0_B		0x00400348	/* IO PIO Read Address Table Entry 0, Part B */
+#define		IIO_IPRTE1_B		0x00400350	/* IO PIO Read Address Table Entry 1, Part B */
+#define		IIO_IPRTE2_B		0x00400358	/* IO PIO Read Address Table Entry 2, Part B */
+#define		IIO_IPRTE3_B		0x00400360	/* IO PIO Read Address Table Entry 3, Part B */
+#define		IIO_IPRTE4_B		0x00400368	/* IO PIO Read Address Table Entry 4, Part B */
+#define		IIO_IPRTE5_B		0x00400370	/* IO PIO Read Address Table Entry 5, Part B */
+#define		IIO_IPRTE6_B		0x00400378	/* IO PIO Read Address Table Entry 6, Part B */
+#define		IIO_IPRTE7_B		0x00400380	/* IO PIO Read Address Table Entry 7, Part B */
+
+#define		IIO_IPDR		0x00400388	/* IO PIO Deallocation Register */
+#define		IIO_ICDR		0x00400390	/* IO CRB Entry Deallocation Register */
+#define		IIO_IFDR		0x00400398	/* IO IOQ FIFO Depth Register */
+#define		IIO_IIAP		0x004003A0	/* IO IIQ Arbitration Parameters */
+#define		IIO_ICMR		0x004003A8	/* IO CRB Management Register */
+#define		IIO_ICCR		0x004003B0	/* IO CRB Control Register */
+#define		IIO_ICTO		0x004003B8	/* IO CRB Timeout   */
+#define		IIO_ICTP		0x004003C0	/* IO CRB Timeout Prescalar */
+
+#define		IIO_ICRB0_A		0x00400400	/* IO CRB Entry 0_A */
+#define		IIO_ICRB0_B		0x00400408	/* IO CRB Entry 0_B */
+#define		IIO_ICRB0_C		0x00400410	/* IO CRB Entry 0_C */
+#define		IIO_ICRB0_D		0x00400418	/* IO CRB Entry 0_D */
+#define		IIO_ICRB0_E		0x00400420	/* IO CRB Entry 0_E */
+
+#define		IIO_ICRB1_A		0x00400430	/* IO CRB Entry 1_A */
+#define		IIO_ICRB1_B		0x00400438	/* IO CRB Entry 1_B */
+#define		IIO_ICRB1_C		0x00400440	/* IO CRB Entry 1_C */
+#define		IIO_ICRB1_D		0x00400448	/* IO CRB Entry 1_D */
+#define		IIO_ICRB1_E		0x00400450	/* IO CRB Entry 1_E */
+
+#define		IIO_ICRB2_A		0x00400460	/* IO CRB Entry 2_A */
+#define		IIO_ICRB2_B		0x00400468	/* IO CRB Entry 2_B */
+#define		IIO_ICRB2_C		0x00400470	/* IO CRB Entry 2_C */
+#define		IIO_ICRB2_D		0x00400478	/* IO CRB Entry 2_D */
+#define		IIO_ICRB2_E		0x00400480	/* IO CRB Entry 2_E */
+
+#define		IIO_ICRB3_A		0x00400490	/* IO CRB Entry 3_A */
+#define		IIO_ICRB3_B		0x00400498	/* IO CRB Entry 3_B */
+#define		IIO_ICRB3_C		0x004004a0	/* IO CRB Entry 3_C */
+#define		IIO_ICRB3_D		0x004004a8	/* IO CRB Entry 3_D */
+#define		IIO_ICRB3_E		0x004004b0	/* IO CRB Entry 3_E */
+
+#define		IIO_ICRB4_A		0x004004c0	/* IO CRB Entry 4_A */
+#define		IIO_ICRB4_B		0x004004c8	/* IO CRB Entry 4_B */
+#define		IIO_ICRB4_C		0x004004d0	/* IO CRB Entry 4_C */
+#define		IIO_ICRB4_D		0x004004d8	/* IO CRB Entry 4_D */
+#define		IIO_ICRB4_E		0x004004e0	/* IO CRB Entry 4_E */
+
+#define		IIO_ICRB5_A		0x004004f0	/* IO CRB Entry 5_A */
+#define		IIO_ICRB5_B		0x004004f8	/* IO CRB Entry 5_B */
+#define		IIO_ICRB5_C		0x00400500	/* IO CRB Entry 5_C */
+#define		IIO_ICRB5_D		0x00400508	/* IO CRB Entry 5_D */
+#define		IIO_ICRB5_E		0x00400510	/* IO CRB Entry 5_E */
+
+#define		IIO_ICRB6_A		0x00400520	/* IO CRB Entry 6_A */
+#define		IIO_ICRB6_B		0x00400528	/* IO CRB Entry 6_B */
+#define		IIO_ICRB6_C		0x00400530	/* IO CRB Entry 6_C */
+#define		IIO_ICRB6_D		0x00400538	/* IO CRB Entry 6_D */
+#define		IIO_ICRB6_E		0x00400540	/* IO CRB Entry 6_E */
+
+#define		IIO_ICRB7_A		0x00400550	/* IO CRB Entry 7_A */
+#define		IIO_ICRB7_B		0x00400558	/* IO CRB Entry 7_B */
+#define		IIO_ICRB7_C		0x00400560	/* IO CRB Entry 7_C */
+#define		IIO_ICRB7_D		0x00400568	/* IO CRB Entry 7_D */
+#define		IIO_ICRB7_E		0x00400570	/* IO CRB Entry 7_E */
+
+#define		IIO_ICRB8_A		0x00400580	/* IO CRB Entry 8_A */
+#define		IIO_ICRB8_B		0x00400588	/* IO CRB Entry 8_B */
+#define		IIO_ICRB8_C		0x00400590	/* IO CRB Entry 8_C */
+#define		IIO_ICRB8_D		0x00400598	/* IO CRB Entry 8_D */
+#define		IIO_ICRB8_E		0x004005a0	/* IO CRB Entry 8_E */
+
+#define		IIO_ICRB9_A		0x004005b0	/* IO CRB Entry 9_A */
+#define		IIO_ICRB9_B		0x004005b8	/* IO CRB Entry 9_B */
+#define		IIO_ICRB9_C		0x004005c0	/* IO CRB Entry 9_C */
+#define		IIO_ICRB9_D		0x004005c8	/* IO CRB Entry 9_D */
+#define		IIO_ICRB9_E		0x004005d0	/* IO CRB Entry 9_E */
+
+#define		IIO_ICRBA_A		0x004005e0	/* IO CRB Entry A_A */
+#define		IIO_ICRBA_B		0x004005e8	/* IO CRB Entry A_B */
+#define		IIO_ICRBA_C		0x004005f0	/* IO CRB Entry A_C */
+#define		IIO_ICRBA_D		0x004005f8	/* IO CRB Entry A_D */
+#define		IIO_ICRBA_E		0x00400600	/* IO CRB Entry A_E */
+
+#define		IIO_ICRBB_A		0x00400610	/* IO CRB Entry B_A */
+#define		IIO_ICRBB_B		0x00400618	/* IO CRB Entry B_B */
+#define		IIO_ICRBB_C		0x00400620	/* IO CRB Entry B_C */
+#define		IIO_ICRBB_D		0x00400628	/* IO CRB Entry B_D */
+#define		IIO_ICRBB_E		0x00400630	/* IO CRB Entry B_E */
+
+#define		IIO_ICRBC_A		0x00400640	/* IO CRB Entry C_A */
+#define		IIO_ICRBC_B		0x00400648	/* IO CRB Entry C_B */
+#define		IIO_ICRBC_C		0x00400650	/* IO CRB Entry C_C */
+#define		IIO_ICRBC_D		0x00400658	/* IO CRB Entry C_D */
+#define		IIO_ICRBC_E		0x00400660	/* IO CRB Entry C_E */
+
+#define		IIO_ICRBD_A		0x00400670	/* IO CRB Entry D_A */
+#define		IIO_ICRBD_B		0x00400678	/* IO CRB Entry D_B */
+#define		IIO_ICRBD_C		0x00400680	/* IO CRB Entry D_C */
+#define		IIO_ICRBD_D		0x00400688	/* IO CRB Entry D_D */
+#define		IIO_ICRBD_E		0x00400690	/* IO CRB Entry D_E */
+
+#define		IIO_ICRBE_A		0x004006a0	/* IO CRB Entry E_A */
+#define		IIO_ICRBE_B		0x004006a8	/* IO CRB Entry E_B */
+#define		IIO_ICRBE_C		0x004006b0	/* IO CRB Entry E_C */
+#define		IIO_ICRBE_D		0x004006b8	/* IO CRB Entry E_D */
+#define		IIO_ICRBE_E		0x004006c0	/* IO CRB Entry E_E */
+
+#define		IIO_ICSML		0x00400700	/* IO CRB Spurious Message Low */
+#define		IIO_ICSMM		0x00400708	/* IO CRB Spurious Message Middle */
+#define		IIO_ICSMH		0x00400710	/* IO CRB Spurious Message High */
+
+#define		IIO_IDBSS		0x00400718	/* IO Debug Submenu Select */
+
+#define		IIO_IBLS0		0x00410000	/* IO BTE Length Status 0 */
+#define		IIO_IBSA0		0x00410008	/* IO BTE Source Address 0 */
+#define		IIO_IBDA0		0x00410010	/* IO BTE Destination Address 0 */
+#define		IIO_IBCT0		0x00410018	/* IO BTE Control Terminate 0 */
+#define		IIO_IBNA0		0x00410020	/* IO BTE Notification Address 0 */
+#define		IIO_IBIA0		0x00410028	/* IO BTE Interrupt Address 0 */
+#define		IIO_IBLS1		0x00420000	/* IO BTE Length Status 1 */
+#define		IIO_IBSA1		0x00420008	/* IO BTE Source Address 1 */
+#define		IIO_IBDA1		0x00420010	/* IO BTE Destination Address 1 */
+#define		IIO_IBCT1		0x00420018	/* IO BTE Control Terminate 1 */
+#define		IIO_IBNA1		0x00420020	/* IO BTE Notification Address 1 */
+#define		IIO_IBIA1		0x00420028	/* IO BTE Interrupt Address 1 */
+
+#define		IIO_IPCR		0x00430000	/* IO Performance Control */
+#define		IIO_IPPR		0x00430008	/* IO Performance Profiling */
+
+/************************************************************************
+ *									*
+ * Description:  This register echoes some information from the         *
+ * LB_REV_ID register. It is available through Crosstalk as described   *
+ * above. The REV_NUM and MFG_NUM fields receive their values from      *
+ * the REVISION and MANUFACTURER fields in the LB_REV_ID register.      *
+ * The PART_NUM field's value is the Crosstalk device ID number that    *
+ * Steve Miller assigned to the SHub chip.                              *
+ *									*
+ ************************************************************************/
+
+typedef union ii_wid_u {
+	u64 ii_wid_regval;
+	struct {
+		u64 w_rsvd_1:1;
+		u64 w_mfg_num:11;
+		u64 w_part_num:16;
+		u64 w_rev_num:4;
+		u64 w_rsvd:32;
+	} ii_wid_fld_s;
+} ii_wid_u_t;
+
+/************************************************************************
+ *									*
+ *  The fields in this register are set upon detection of an error      *
+ * and cleared by various mechanisms, as explained in the               *
+ * description.                                                         *
+ *									*
+ ************************************************************************/
+
+typedef union ii_wstat_u {
+	u64 ii_wstat_regval;
+	struct {
+		u64 w_pending:4;
+		u64 w_xt_crd_to:1;
+		u64 w_xt_tail_to:1;
+		u64 w_rsvd_3:3;
+		u64 w_tx_mx_rty:1;
+		u64 w_rsvd_2:6;
+		u64 w_llp_tx_cnt:8;
+		u64 w_rsvd_1:8;
+		u64 w_crazy:1;
+		u64 w_rsvd:31;
+	} ii_wstat_fld_s;
+} ii_wstat_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  This is a read-write enabled register. It controls     *
+ * various aspects of the Crosstalk flow control.                       *
+ *									*
+ ************************************************************************/
+
+typedef union ii_wcr_u {
+	u64 ii_wcr_regval;
+	struct {
+		u64 w_wid:4;
+		u64 w_tag:1;
+		u64 w_rsvd_1:8;
+		u64 w_dst_crd:3;
+		u64 w_f_bad_pkt:1;
+		u64 w_dir_con:1;
+		u64 w_e_thresh:5;
+		u64 w_rsvd:41;
+	} ii_wcr_fld_s;
+} ii_wcr_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  This register's value is a bit vector that guards      *
+ * access to local registers within the II as well as to external       *
+ * Crosstalk widgets. Each bit in the register corresponds to a         *
+ * particular region in the system; a region consists of one, two or    *
+ * four nodes (depending on the value of the REGION_SIZE field in the   *
+ * LB_REV_ID register, which is documented in Section 8.3.1.1). The     *
+ * protection provided by this register applies to PIO read             *
+ * operations as well as PIO write operations. The II will perform a    *
+ * PIO read or write request only if the bit for the requestor's        *
+ * region is set; otherwise, the II will not perform the requested      *
+ * operation and will return an error response. When a PIO read or      *
+ * write request targets an external Crosstalk widget, then not only    *
+ * must the bit for the requestor's region be set in the ILAPR, but     *
+ * also the target widget's bit in the IOWA register must be set in     *
+ * order for the II to perform the requested operation; otherwise,      *
+ * the II will return an error response. Hence, the protection          *
+ * provided by the IOWA register supplements the protection provided    *
+ * by the ILAPR for requests that target external Crosstalk widgets.    *
+ * This register itself can be accessed only by the nodes whose         *
+ * region ID bits are enabled in this same register. It can also be     *
+ * accessed through the IAlias space by the local processors.           *
+ * The reset value of this register allows access by all nodes.         *
+ *									*
+ ************************************************************************/
+
+typedef union ii_ilapr_u {
+	u64 ii_ilapr_regval;
+	struct {
+		u64 i_region:64;
+	} ii_ilapr_fld_s;
+} ii_ilapr_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  A write to this register of the 64-bit value           *
+ * "SGIrules" in ASCII, will cause the bit in the ILAPR register        *
+ * corresponding to the region of the requestor to be set (allow        *
+ * access). A write of any other value will be ignored. Access          *
+ * protection for this register is "SGIrules".                          *
+ * This register can also be accessed through the IAlias space.         *
+ * However, this access will not change the access permissions in the   *
+ * ILAPR.                                                               *
+ *									*
+ ************************************************************************/
+
+typedef union ii_ilapo_u {
+	u64 ii_ilapo_regval;
+	struct {
+		u64 i_io_ovrride:64;
+	} ii_ilapo_fld_s;
+} ii_ilapo_u_t;
+
+/************************************************************************
+ *									*
+ *  This register qualifies all the PIO and Graphics writes launched    *
+ * from the SHUB towards a widget.                                      *
+ *									*
+ ************************************************************************/
+
+typedef union ii_iowa_u {
+	u64 ii_iowa_regval;
+	struct {
+		u64 i_w0_oac:1;
+		u64 i_rsvd_1:7;
+		u64 i_wx_oac:8;
+		u64 i_rsvd:48;
+	} ii_iowa_fld_s;
+} ii_iowa_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  This register qualifies all the requests launched      *
+ * from a widget towards the Shub. This register is intended to be      *
+ * used by software in case of misbehaving widgets.                     *
+ *									*
+ *									*
+ ************************************************************************/
+
+typedef union ii_iiwa_u {
+	u64 ii_iiwa_regval;
+	struct {
+		u64 i_w0_iac:1;
+		u64 i_rsvd_1:7;
+		u64 i_wx_iac:8;
+		u64 i_rsvd:48;
+	} ii_iiwa_fld_s;
+} ii_iiwa_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  This register qualifies all the operations launched    *
+ * from a widget towards the SHub. It allows individual access          *
+ * control for up to 8 devices per widget. A device refers to           *
+ * individual DMA master hosted by a widget.                            *
+ * The bits in each field of this register are cleared by the Shub      *
+ * upon detection of an error which requires the device to be           *
+ * disabled. These fields assume that 0=TNUM=7 (i.e., Bridge-centric    *
+ * Crosstalk). Whether or not a device has access rights to this        *
+ * Shub is determined by an AND of the device enable bit in the         *
+ * appropriate field of this register and the corresponding bit in      *
+ * the Wx_IAC field (for the widget which this device belongs to).      *
+ * The bits in this field are set by writing a 1 to them. Incoming      *
+ * replies from Crosstalk are not subject to this access control        *
+ * mechanism.                                                           *
+ *									*
+ ************************************************************************/
+
+typedef union ii_iidem_u {
+	u64 ii_iidem_regval;
+	struct {
+		u64 i_w8_dxs:8;
+		u64 i_w9_dxs:8;
+		u64 i_wa_dxs:8;
+		u64 i_wb_dxs:8;
+		u64 i_wc_dxs:8;
+		u64 i_wd_dxs:8;
+		u64 i_we_dxs:8;
+		u64 i_wf_dxs:8;
+	} ii_iidem_fld_s;
+} ii_iidem_u_t;
+
+/************************************************************************
+ *									*
+ *  This register contains the various programmable fields necessary    *
+ * for controlling and observing the LLP signals.                       *
+ *									*
+ ************************************************************************/
+
+typedef union ii_ilcsr_u {
+	u64 ii_ilcsr_regval;
+	struct {
+		u64 i_nullto:6;
+		u64 i_rsvd_4:2;
+		u64 i_wrmrst:1;
+		u64 i_rsvd_3:1;
+		u64 i_llp_en:1;
+		u64 i_bm8:1;
+		u64 i_llp_stat:2;
+		u64 i_remote_power:1;
+		u64 i_rsvd_2:1;
+		u64 i_maxrtry:10;
+		u64 i_d_avail_sel:2;
+		u64 i_rsvd_1:4;
+		u64 i_maxbrst:10;
+		u64 i_rsvd:22;
+
+	} ii_ilcsr_fld_s;
+} ii_ilcsr_u_t;
+
+/************************************************************************
+ *									*
+ *  This is simply a status registers that monitors the LLP error       *
+ * rate.								*
+ *									*
+ ************************************************************************/
+
+typedef union ii_illr_u {
+	u64 ii_illr_regval;
+	struct {
+		u64 i_sn_cnt:16;
+		u64 i_cb_cnt:16;
+		u64 i_rsvd:32;
+	} ii_illr_fld_s;
+} ii_illr_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  All II-detected non-BTE error interrupts are           *
+ * specified via this register.                                         *
+ * NOTE: The PI interrupt register address is hardcoded in the II. If   *
+ * PI_ID==0, then the II sends an interrupt request (Duplonet PWRI      *
+ * packet) to address offset 0x0180_0090 within the local register      *
+ * address space of PI0 on the node specified by the NODE field. If     *
+ * PI_ID==1, then the II sends the interrupt request to address         *
+ * offset 0x01A0_0090 within the local register address space of PI1    *
+ * on the node specified by the NODE field.                             *
+ *									*
+ ************************************************************************/
+
+typedef union ii_iidsr_u {
+	u64 ii_iidsr_regval;
+	struct {
+		u64 i_level:8;
+		u64 i_pi_id:1;
+		u64 i_node:11;
+		u64 i_rsvd_3:4;
+		u64 i_enable:1;
+		u64 i_rsvd_2:3;
+		u64 i_int_sent:2;
+		u64 i_rsvd_1:2;
+		u64 i_pi0_forward_int:1;
+		u64 i_pi1_forward_int:1;
+		u64 i_rsvd:30;
+	} ii_iidsr_fld_s;
+} ii_iidsr_u_t;
+
+/************************************************************************
+ *									*
+ *  There are two instances of this register. This register is used     *
+ * for matching up the incoming responses from the graphics widget to   *
+ * the processor that initiated the graphics operation. The             *
+ * write-responses are converted to graphics credits and returned to    *
+ * the processor so that the processor interface can manage the flow    *
+ * control.                                                             *
+ *									*
+ ************************************************************************/
+
+typedef union ii_igfx0_u {
+	u64 ii_igfx0_regval;
+	struct {
+		u64 i_w_num:4;
+		u64 i_pi_id:1;
+		u64 i_n_num:12;
+		u64 i_p_num:1;
+		u64 i_rsvd:46;
+	} ii_igfx0_fld_s;
+} ii_igfx0_u_t;
+
+/************************************************************************
+ *									*
+ *  There are two instances of this register. This register is used     *
+ * for matching up the incoming responses from the graphics widget to   *
+ * the processor that initiated the graphics operation. The             *
+ * write-responses are converted to graphics credits and returned to    *
+ * the processor so that the processor interface can manage the flow    *
+ * control.                                                             *
+ *									*
+ ************************************************************************/
+
+typedef union ii_igfx1_u {
+	u64 ii_igfx1_regval;
+	struct {
+		u64 i_w_num:4;
+		u64 i_pi_id:1;
+		u64 i_n_num:12;
+		u64 i_p_num:1;
+		u64 i_rsvd:46;
+	} ii_igfx1_fld_s;
+} ii_igfx1_u_t;
+
+/************************************************************************
+ *									*
+ *  There are two instances of this registers. These registers are      *
+ * used as scratch registers for software use.                          *
+ *									*
+ ************************************************************************/
+
+typedef union ii_iscr0_u {
+	u64 ii_iscr0_regval;
+	struct {
+		u64 i_scratch:64;
+	} ii_iscr0_fld_s;
+} ii_iscr0_u_t;
+
+/************************************************************************
+ *									*
+ *  There are two instances of this registers. These registers are      *
+ * used as scratch registers for software use.                          *
+ *									*
+ ************************************************************************/
+
+typedef union ii_iscr1_u {
+	u64 ii_iscr1_regval;
+	struct {
+		u64 i_scratch:64;
+	} ii_iscr1_fld_s;
+} ii_iscr1_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  There are seven instances of translation table entry   *
+ * registers. Each register maps a Shub Big Window to a 48-bit          *
+ * address on Crosstalk.                                                *
+ * For M-mode (128 nodes, 8 GBytes/node), SysAD[31:29] (Big Window      *
+ * number) are used to select one of these 7 registers. The Widget      *
+ * number field is then derived from the W_NUM field for synthesizing   *
+ * a Crosstalk packet. The 5 bits of OFFSET are concatenated with       *
+ * SysAD[28:0] to form Crosstalk[33:0]. The upper Crosstalk[47:34]      *
+ * are padded with zeros. Although the maximum Crosstalk space          *
+ * addressable by the SHub is thus the lower 16 GBytes per widget       * 
+ * (M-mode), however only <SUP >7</SUP>/<SUB >32nds</SUB> of this       *
+ * space can be accessed.                                               *
+ * For the N-mode (256 nodes, 4 GBytes/node), SysAD[30:28] (Big         *
+ * Window number) are used to select one of these 7 registers. The      *
+ * Widget number field is then derived from the W_NUM field for         *
+ * synthesizing a Crosstalk packet. The 5 bits of OFFSET are            *
+ * concatenated with SysAD[27:0] to form Crosstalk[33:0]. The IOSP      *
+ * field is used as Crosstalk[47], and remainder of the Crosstalk       *
+ * address bits (Crosstalk[46:34]) are always zero. While the maximum   *
+ * Crosstalk space addressable by the Shub is thus the lower            *
+ * 8-GBytes per widget (N-mode), only <SUP >7</SUP>/<SUB >32nds</SUB>   *
+ * of this space can be accessed.                                       *
+ *									*
+ ************************************************************************/
+
+typedef union ii_itte1_u {
+	u64 ii_itte1_regval;
+	struct {
+		u64 i_offset:5;
+		u64 i_rsvd_1:3;
+		u64 i_w_num:4;
+		u64 i_iosp:1;
+		u64 i_rsvd:51;
+	} ii_itte1_fld_s;
+} ii_itte1_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  There are seven instances of translation table entry   *
+ * registers. Each register maps a Shub Big Window to a 48-bit          *
+ * address on Crosstalk.                                                *
+ * For M-mode (128 nodes, 8 GBytes/node), SysAD[31:29] (Big Window      *
+ * number) are used to select one of these 7 registers. The Widget      *
+ * number field is then derived from the W_NUM field for synthesizing   *
+ * a Crosstalk packet. The 5 bits of OFFSET are concatenated with       *
+ * SysAD[28:0] to form Crosstalk[33:0]. The upper Crosstalk[47:34]      *
+ * are padded with zeros. Although the maximum Crosstalk space          *
+ * addressable by the Shub is thus the lower 16 GBytes per widget       *
+ * (M-mode), however only <SUP >7</SUP>/<SUB >32nds</SUB> of this       *
+ * space can be accessed.                                               *
+ * For the N-mode (256 nodes, 4 GBytes/node), SysAD[30:28] (Big         *
+ * Window number) are used to select one of these 7 registers. The      *
+ * Widget number field is then derived from the W_NUM field for         *
+ * synthesizing a Crosstalk packet. The 5 bits of OFFSET are            *
+ * concatenated with SysAD[27:0] to form Crosstalk[33:0]. The IOSP      *
+ * field is used as Crosstalk[47], and remainder of the Crosstalk       *
+ * address bits (Crosstalk[46:34]) are always zero. While the maximum   *
+ * Crosstalk space addressable by the Shub is thus the lower            *
+ * 8-GBytes per widget (N-mode), only <SUP >7</SUP>/<SUB >32nds</SUB>   *
+ * of this space can be accessed.                                       *
+ *									*
+ ************************************************************************/
+
+typedef union ii_itte2_u {
+	u64 ii_itte2_regval;
+	struct {
+		u64 i_offset:5;
+		u64 i_rsvd_1:3;
+		u64 i_w_num:4;
+		u64 i_iosp:1;
+		u64 i_rsvd:51;
+	} ii_itte2_fld_s;
+} ii_itte2_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  There are seven instances of translation table entry   *
+ * registers. Each register maps a Shub Big Window to a 48-bit          *
+ * address on Crosstalk.                                                *
+ * For M-mode (128 nodes, 8 GBytes/node), SysAD[31:29] (Big Window      *
+ * number) are used to select one of these 7 registers. The Widget      *
+ * number field is then derived from the W_NUM field for synthesizing   *
+ * a Crosstalk packet. The 5 bits of OFFSET are concatenated with       *
+ * SysAD[28:0] to form Crosstalk[33:0]. The upper Crosstalk[47:34]      *
+ * are padded with zeros. Although the maximum Crosstalk space          *
+ * addressable by the Shub is thus the lower 16 GBytes per widget       *
+ * (M-mode), however only <SUP >7</SUP>/<SUB >32nds</SUB> of this       *
+ * space can be accessed.                                               *
+ * For the N-mode (256 nodes, 4 GBytes/node), SysAD[30:28] (Big         *
+ * Window number) are used to select one of these 7 registers. The      *
+ * Widget number field is then derived from the W_NUM field for         *
+ * synthesizing a Crosstalk packet. The 5 bits of OFFSET are            *
+ * concatenated with SysAD[27:0] to form Crosstalk[33:0]. The IOSP      *
+ * field is used as Crosstalk[47], and remainder of the Crosstalk       *
+ * address bits (Crosstalk[46:34]) are always zero. While the maximum   *
+ * Crosstalk space addressable by the SHub is thus the lower            *
+ * 8-GBytes per widget (N-mode), only <SUP >7</SUP>/<SUB >32nds</SUB>   *
+ * of this space can be accessed.                                       *
+ *									*
+ ************************************************************************/
+
+typedef union ii_itte3_u {
+	u64 ii_itte3_regval;
+	struct {
+		u64 i_offset:5;
+		u64 i_rsvd_1:3;
+		u64 i_w_num:4;
+		u64 i_iosp:1;
+		u64 i_rsvd:51;
+	} ii_itte3_fld_s;
+} ii_itte3_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  There are seven instances of translation table entry   *
+ * registers. Each register maps a SHub Big Window to a 48-bit          *
+ * address on Crosstalk.                                                *
+ * For M-mode (128 nodes, 8 GBytes/node), SysAD[31:29] (Big Window      *
+ * number) are used to select one of these 7 registers. The Widget      *
+ * number field is then derived from the W_NUM field for synthesizing   *
+ * a Crosstalk packet. The 5 bits of OFFSET are concatenated with       *
+ * SysAD[28:0] to form Crosstalk[33:0]. The upper Crosstalk[47:34]      *
+ * are padded with zeros. Although the maximum Crosstalk space          *
+ * addressable by the SHub is thus the lower 16 GBytes per widget       *
+ * (M-mode), however only <SUP >7</SUP>/<SUB >32nds</SUB> of this       *
+ * space can be accessed.                                               *
+ * For the N-mode (256 nodes, 4 GBytes/node), SysAD[30:28] (Big         *
+ * Window number) are used to select one of these 7 registers. The      *
+ * Widget number field is then derived from the W_NUM field for         *
+ * synthesizing a Crosstalk packet. The 5 bits of OFFSET are            *
+ * concatenated with SysAD[27:0] to form Crosstalk[33:0]. The IOSP      *
+ * field is used as Crosstalk[47], and remainder of the Crosstalk       *
+ * address bits (Crosstalk[46:34]) are always zero. While the maximum   *
+ * Crosstalk space addressable by the SHub is thus the lower            *
+ * 8-GBytes per widget (N-mode), only <SUP >7</SUP>/<SUB >32nds</SUB>   *
+ * of this space can be accessed.                                       *
+ *									*
+ ************************************************************************/
+
+typedef union ii_itte4_u {
+	u64 ii_itte4_regval;
+	struct {
+		u64 i_offset:5;
+		u64 i_rsvd_1:3;
+		u64 i_w_num:4;
+		u64 i_iosp:1;
+		u64 i_rsvd:51;
+	} ii_itte4_fld_s;
+} ii_itte4_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  There are seven instances of translation table entry   *
+ * registers. Each register maps a SHub Big Window to a 48-bit          *
+ * address on Crosstalk.                                                *
+ * For M-mode (128 nodes, 8 GBytes/node), SysAD[31:29] (Big Window      *
+ * number) are used to select one of these 7 registers. The Widget      *
+ * number field is then derived from the W_NUM field for synthesizing   *
+ * a Crosstalk packet. The 5 bits of OFFSET are concatenated with       *
+ * SysAD[28:0] to form Crosstalk[33:0]. The upper Crosstalk[47:34]      *
+ * are padded with zeros. Although the maximum Crosstalk space          *
+ * addressable by the Shub is thus the lower 16 GBytes per widget       *
+ * (M-mode), however only <SUP >7</SUP>/<SUB >32nds</SUB> of this       *
+ * space can be accessed.                                               *
+ * For the N-mode (256 nodes, 4 GBytes/node), SysAD[30:28] (Big         *
+ * Window number) are used to select one of these 7 registers. The      *
+ * Widget number field is then derived from the W_NUM field for         *
+ * synthesizing a Crosstalk packet. The 5 bits of OFFSET are            *
+ * concatenated with SysAD[27:0] to form Crosstalk[33:0]. The IOSP      *
+ * field is used as Crosstalk[47], and remainder of the Crosstalk       *
+ * address bits (Crosstalk[46:34]) are always zero. While the maximum   *
+ * Crosstalk space addressable by the Shub is thus the lower            *
+ * 8-GBytes per widget (N-mode), only <SUP >7</SUP>/<SUB >32nds</SUB>   *
+ * of this space can be accessed.                                       *
+ *									*
+ ************************************************************************/
+
+typedef union ii_itte5_u {
+	u64 ii_itte5_regval;
+	struct {
+		u64 i_offset:5;
+		u64 i_rsvd_1:3;
+		u64 i_w_num:4;
+		u64 i_iosp:1;
+		u64 i_rsvd:51;
+	} ii_itte5_fld_s;
+} ii_itte5_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  There are seven instances of translation table entry   *
+ * registers. Each register maps a Shub Big Window to a 48-bit          *
+ * address on Crosstalk.                                                *
+ * For M-mode (128 nodes, 8 GBytes/node), SysAD[31:29] (Big Window      *
+ * number) are used to select one of these 7 registers. The Widget      *
+ * number field is then derived from the W_NUM field for synthesizing   *
+ * a Crosstalk packet. The 5 bits of OFFSET are concatenated with       *
+ * SysAD[28:0] to form Crosstalk[33:0]. The upper Crosstalk[47:34]      *
+ * are padded with zeros. Although the maximum Crosstalk space          *
+ * addressable by the Shub is thus the lower 16 GBytes per widget       *
+ * (M-mode), however only <SUP >7</SUP>/<SUB >32nds</SUB> of this       *
+ * space can be accessed.                                               *
+ * For the N-mode (256 nodes, 4 GBytes/node), SysAD[30:28] (Big         *
+ * Window number) are used to select one of these 7 registers. The      *
+ * Widget number field is then derived from the W_NUM field for         *
+ * synthesizing a Crosstalk packet. The 5 bits of OFFSET are            *
+ * concatenated with SysAD[27:0] to form Crosstalk[33:0]. The IOSP      *
+ * field is used as Crosstalk[47], and remainder of the Crosstalk       *
+ * address bits (Crosstalk[46:34]) are always zero. While the maximum   *
+ * Crosstalk space addressable by the Shub is thus the lower            *
+ * 8-GBytes per widget (N-mode), only <SUP >7</SUP>/<SUB >32nds</SUB>   *
+ * of this space can be accessed.                                       *
+ *									*
+ ************************************************************************/
+
+typedef union ii_itte6_u {
+	u64 ii_itte6_regval;
+	struct {
+		u64 i_offset:5;
+		u64 i_rsvd_1:3;
+		u64 i_w_num:4;
+		u64 i_iosp:1;
+		u64 i_rsvd:51;
+	} ii_itte6_fld_s;
+} ii_itte6_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  There are seven instances of translation table entry   *
+ * registers. Each register maps a Shub Big Window to a 48-bit          *
+ * address on Crosstalk.                                                *
+ * For M-mode (128 nodes, 8 GBytes/node), SysAD[31:29] (Big Window      *
+ * number) are used to select one of these 7 registers. The Widget      *
+ * number field is then derived from the W_NUM field for synthesizing   *
+ * a Crosstalk packet. The 5 bits of OFFSET are concatenated with       *
+ * SysAD[28:0] to form Crosstalk[33:0]. The upper Crosstalk[47:34]      *
+ * are padded with zeros. Although the maximum Crosstalk space          *
+ * addressable by the Shub is thus the lower 16 GBytes per widget       *
+ * (M-mode), however only <SUP >7</SUP>/<SUB >32nds</SUB> of this       *
+ * space can be accessed.                                               *
+ * For the N-mode (256 nodes, 4 GBytes/node), SysAD[30:28] (Big         *
+ * Window number) are used to select one of these 7 registers. The      *
+ * Widget number field is then derived from the W_NUM field for         *
+ * synthesizing a Crosstalk packet. The 5 bits of OFFSET are            *
+ * concatenated with SysAD[27:0] to form Crosstalk[33:0]. The IOSP      *
+ * field is used as Crosstalk[47], and remainder of the Crosstalk       *
+ * address bits (Crosstalk[46:34]) are always zero. While the maximum   *
+ * Crosstalk space addressable by the SHub is thus the lower            *
+ * 8-GBytes per widget (N-mode), only <SUP >7</SUP>/<SUB >32nds</SUB>   *
+ * of this space can be accessed.                                       *
+ *									*
+ ************************************************************************/
+
+typedef union ii_itte7_u {
+	u64 ii_itte7_regval;
+	struct {
+		u64 i_offset:5;
+		u64 i_rsvd_1:3;
+		u64 i_w_num:4;
+		u64 i_iosp:1;
+		u64 i_rsvd:51;
+	} ii_itte7_fld_s;
+} ii_itte7_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  There are 9 instances of this register, one per        *
+ * actual widget in this implementation of SHub and Crossbow.           *
+ * Note: Crossbow only has ports for Widgets 8 through F, widget 0      *
+ * refers to Crossbow's internal space.                                 *
+ * This register contains the state elements per widget that are        *
+ * necessary to manage the PIO flow control on Crosstalk and on the     *
+ * Router Network. See the PIO Flow Control chapter for a complete      *
+ * description of this register                                         *
+ * The SPUR_WR bit requires some explanation. When this register is     *
+ * written, the new value of the C field is captured in an internal     *
+ * register so the hardware can remember what the programmer wrote      *
+ * into the credit counter. The SPUR_WR bit sets whenever the C field   *
+ * increments above this stored value, which indicates that there       *
+ * have been more responses received than requests sent. The SPUR_WR    *
+ * bit cannot be cleared until a value is written to the IPRBx          *
+ * register; the write will correct the C field and capture its new     *
+ * value in the internal register. Even if IECLR[E_PRB_x] is set, the   *
+ * SPUR_WR bit will persist if IPRBx hasn't yet been written.           *
+ * .    								*
+ *									*
+ ************************************************************************/
+
+typedef union ii_iprb0_u {
+	u64 ii_iprb0_regval;
+	struct {
+		u64 i_c:8;
+		u64 i_na:14;
+		u64 i_rsvd_2:2;
+		u64 i_nb:14;
+		u64 i_rsvd_1:2;
+		u64 i_m:2;
+		u64 i_f:1;
+		u64 i_of_cnt:5;
+		u64 i_error:1;
+		u64 i_rd_to:1;
+		u64 i_spur_wr:1;
+		u64 i_spur_rd:1;
+		u64 i_rsvd:11;
+		u64 i_mult_err:1;
+	} ii_iprb0_fld_s;
+} ii_iprb0_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  There are 9 instances of this register, one per        *
+ * actual widget in this implementation of SHub and Crossbow.           *
+ * Note: Crossbow only has ports for Widgets 8 through F, widget 0      *
+ * refers to Crossbow's internal space.                                 *
+ * This register contains the state elements per widget that are        *
+ * necessary to manage the PIO flow control on Crosstalk and on the     *
+ * Router Network. See the PIO Flow Control chapter for a complete      *
+ * description of this register                                         *
+ * The SPUR_WR bit requires some explanation. When this register is     *
+ * written, the new value of the C field is captured in an internal     *
+ * register so the hardware can remember what the programmer wrote      *
+ * into the credit counter. The SPUR_WR bit sets whenever the C field   *
+ * increments above this stored value, which indicates that there       *
+ * have been more responses received than requests sent. The SPUR_WR    *
+ * bit cannot be cleared until a value is written to the IPRBx          *
+ * register; the write will correct the C field and capture its new     *
+ * value in the internal register. Even if IECLR[E_PRB_x] is set, the   *
+ * SPUR_WR bit will persist if IPRBx hasn't yet been written.           *
+ * .    								*
+ *									*
+ ************************************************************************/
+
+typedef union ii_iprb8_u {
+	u64 ii_iprb8_regval;
+	struct {
+		u64 i_c:8;
+		u64 i_na:14;
+		u64 i_rsvd_2:2;
+		u64 i_nb:14;
+		u64 i_rsvd_1:2;
+		u64 i_m:2;
+		u64 i_f:1;
+		u64 i_of_cnt:5;
+		u64 i_error:1;
+		u64 i_rd_to:1;
+		u64 i_spur_wr:1;
+		u64 i_spur_rd:1;
+		u64 i_rsvd:11;
+		u64 i_mult_err:1;
+	} ii_iprb8_fld_s;
+} ii_iprb8_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  There are 9 instances of this register, one per        *
+ * actual widget in this implementation of SHub and Crossbow.           *
+ * Note: Crossbow only has ports for Widgets 8 through F, widget 0      *
+ * refers to Crossbow's internal space.                                 *
+ * This register contains the state elements per widget that are        *
+ * necessary to manage the PIO flow control on Crosstalk and on the     *
+ * Router Network. See the PIO Flow Control chapter for a complete      *
+ * description of this register                                         *
+ * The SPUR_WR bit requires some explanation. When this register is     *
+ * written, the new value of the C field is captured in an internal     *
+ * register so the hardware can remember what the programmer wrote      *
+ * into the credit counter. The SPUR_WR bit sets whenever the C field   *
+ * increments above this stored value, which indicates that there       *
+ * have been more responses received than requests sent. The SPUR_WR    *
+ * bit cannot be cleared until a value is written to the IPRBx          *
+ * register; the write will correct the C field and capture its new     *
+ * value in the internal register. Even if IECLR[E_PRB_x] is set, the   *
+ * SPUR_WR bit will persist if IPRBx hasn't yet been written.           *
+ * .    								*
+ *									*
+ ************************************************************************/
+
+typedef union ii_iprb9_u {
+	u64 ii_iprb9_regval;
+	struct {
+		u64 i_c:8;
+		u64 i_na:14;
+		u64 i_rsvd_2:2;
+		u64 i_nb:14;
+		u64 i_rsvd_1:2;
+		u64 i_m:2;
+		u64 i_f:1;
+		u64 i_of_cnt:5;
+		u64 i_error:1;
+		u64 i_rd_to:1;
+		u64 i_spur_wr:1;
+		u64 i_spur_rd:1;
+		u64 i_rsvd:11;
+		u64 i_mult_err:1;
+	} ii_iprb9_fld_s;
+} ii_iprb9_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  There are 9 instances of this register, one per        *
+ * actual widget in this implementation of SHub and Crossbow.        *
+ * Note: Crossbow only has ports for Widgets 8 through F, widget 0      *
+ * refers to Crossbow's internal space.                                 *
+ * This register contains the state elements per widget that are        *
+ * necessary to manage the PIO flow control on Crosstalk and on the     *
+ * Router Network. See the PIO Flow Control chapter for a complete      *
+ * description of this register                                         *
+ * The SPUR_WR bit requires some explanation. When this register is     *
+ * written, the new value of the C field is captured in an internal     *
+ * register so the hardware can remember what the programmer wrote      *
+ * into the credit counter. The SPUR_WR bit sets whenever the C field   *
+ * increments above this stored value, which indicates that there       *
+ * have been more responses received than requests sent. The SPUR_WR    *
+ * bit cannot be cleared until a value is written to the IPRBx          *
+ * register; the write will correct the C field and capture its new     *
+ * value in the internal register. Even if IECLR[E_PRB_x] is set, the   *
+ * SPUR_WR bit will persist if IPRBx hasn't yet been written.           *
+ *									*
+ *									*
+ ************************************************************************/
+
+typedef union ii_iprba_u {
+	u64 ii_iprba_regval;
+	struct {
+		u64 i_c:8;
+		u64 i_na:14;
+		u64 i_rsvd_2:2;
+		u64 i_nb:14;
+		u64 i_rsvd_1:2;
+		u64 i_m:2;
+		u64 i_f:1;
+		u64 i_of_cnt:5;
+		u64 i_error:1;
+		u64 i_rd_to:1;
+		u64 i_spur_wr:1;
+		u64 i_spur_rd:1;
+		u64 i_rsvd:11;
+		u64 i_mult_err:1;
+	} ii_iprba_fld_s;
+} ii_iprba_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  There are 9 instances of this register, one per        *
+ * actual widget in this implementation of SHub and Crossbow.           *
+ * Note: Crossbow only has ports for Widgets 8 through F, widget 0      *
+ * refers to Crossbow's internal space.                                 *
+ * This register contains the state elements per widget that are        *
+ * necessary to manage the PIO flow control on Crosstalk and on the     *
+ * Router Network. See the PIO Flow Control chapter for a complete      *
+ * description of this register                                         *
+ * The SPUR_WR bit requires some explanation. When this register is     *
+ * written, the new value of the C field is captured in an internal     *
+ * register so the hardware can remember what the programmer wrote      *
+ * into the credit counter. The SPUR_WR bit sets whenever the C field   *
+ * increments above this stored value, which indicates that there       *
+ * have been more responses received than requests sent. The SPUR_WR    *
+ * bit cannot be cleared until a value is written to the IPRBx          *
+ * register; the write will correct the C field and capture its new     *
+ * value in the internal register. Even if IECLR[E_PRB_x] is set, the   *
+ * SPUR_WR bit will persist if IPRBx hasn't yet been written.           *
+ * .    								*
+ *									*
+ ************************************************************************/
+
+typedef union ii_iprbb_u {
+	u64 ii_iprbb_regval;
+	struct {
+		u64 i_c:8;
+		u64 i_na:14;
+		u64 i_rsvd_2:2;
+		u64 i_nb:14;
+		u64 i_rsvd_1:2;
+		u64 i_m:2;
+		u64 i_f:1;
+		u64 i_of_cnt:5;
+		u64 i_error:1;
+		u64 i_rd_to:1;
+		u64 i_spur_wr:1;
+		u64 i_spur_rd:1;
+		u64 i_rsvd:11;
+		u64 i_mult_err:1;
+	} ii_iprbb_fld_s;
+} ii_iprbb_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  There are 9 instances of this register, one per        *
+ * actual widget in this implementation of SHub and Crossbow.           *
+ * Note: Crossbow only has ports for Widgets 8 through F, widget 0      *
+ * refers to Crossbow's internal space.                                 *
+ * This register contains the state elements per widget that are        *
+ * necessary to manage the PIO flow control on Crosstalk and on the     *
+ * Router Network. See the PIO Flow Control chapter for a complete      *
+ * description of this register                                         *
+ * The SPUR_WR bit requires some explanation. When this register is     *
+ * written, the new value of the C field is captured in an internal     *
+ * register so the hardware can remember what the programmer wrote      *
+ * into the credit counter. The SPUR_WR bit sets whenever the C field   *
+ * increments above this stored value, which indicates that there       *
+ * have been more responses received than requests sent. The SPUR_WR    *
+ * bit cannot be cleared until a value is written to the IPRBx          *
+ * register; the write will correct the C field and capture its new     *
+ * value in the internal register. Even if IECLR[E_PRB_x] is set, the   *
+ * SPUR_WR bit will persist if IPRBx hasn't yet been written.           *
+ * .    								*
+ *									*
+ ************************************************************************/
+
+typedef union ii_iprbc_u {
+	u64 ii_iprbc_regval;
+	struct {
+		u64 i_c:8;
+		u64 i_na:14;
+		u64 i_rsvd_2:2;
+		u64 i_nb:14;
+		u64 i_rsvd_1:2;
+		u64 i_m:2;
+		u64 i_f:1;
+		u64 i_of_cnt:5;
+		u64 i_error:1;
+		u64 i_rd_to:1;
+		u64 i_spur_wr:1;
+		u64 i_spur_rd:1;
+		u64 i_rsvd:11;
+		u64 i_mult_err:1;
+	} ii_iprbc_fld_s;
+} ii_iprbc_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  There are 9 instances of this register, one per        *
+ * actual widget in this implementation of SHub and Crossbow.           *
+ * Note: Crossbow only has ports for Widgets 8 through F, widget 0      *
+ * refers to Crossbow's internal space.                                 *
+ * This register contains the state elements per widget that are        *
+ * necessary to manage the PIO flow control on Crosstalk and on the     *
+ * Router Network. See the PIO Flow Control chapter for a complete      *
+ * description of this register                                         *
+ * The SPUR_WR bit requires some explanation. When this register is     *
+ * written, the new value of the C field is captured in an internal     *
+ * register so the hardware can remember what the programmer wrote      *
+ * into the credit counter. The SPUR_WR bit sets whenever the C field   *
+ * increments above this stored value, which indicates that there       *
+ * have been more responses received than requests sent. The SPUR_WR    *
+ * bit cannot be cleared until a value is written to the IPRBx          *
+ * register; the write will correct the C field and capture its new     *
+ * value in the internal register. Even if IECLR[E_PRB_x] is set, the   *
+ * SPUR_WR bit will persist if IPRBx hasn't yet been written.           *
+ * .    								*
+ *									*
+ ************************************************************************/
+
+typedef union ii_iprbd_u {
+	u64 ii_iprbd_regval;
+	struct {
+		u64 i_c:8;
+		u64 i_na:14;
+		u64 i_rsvd_2:2;
+		u64 i_nb:14;
+		u64 i_rsvd_1:2;
+		u64 i_m:2;
+		u64 i_f:1;
+		u64 i_of_cnt:5;
+		u64 i_error:1;
+		u64 i_rd_to:1;
+		u64 i_spur_wr:1;
+		u64 i_spur_rd:1;
+		u64 i_rsvd:11;
+		u64 i_mult_err:1;
+	} ii_iprbd_fld_s;
+} ii_iprbd_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  There are 9 instances of this register, one per        *
+ * actual widget in this implementation of SHub and Crossbow.           *
+ * Note: Crossbow only has ports for Widgets 8 through F, widget 0      *
+ * refers to Crossbow's internal space.                                 *
+ * This register contains the state elements per widget that are        *
+ * necessary to manage the PIO flow control on Crosstalk and on the     *
+ * Router Network. See the PIO Flow Control chapter for a complete      *
+ * description of this register                                         *
+ * The SPUR_WR bit requires some explanation. When this register is     *
+ * written, the new value of the C field is captured in an internal     *
+ * register so the hardware can remember what the programmer wrote      *
+ * into the credit counter. The SPUR_WR bit sets whenever the C field   *
+ * increments above this stored value, which indicates that there       *
+ * have been more responses received than requests sent. The SPUR_WR    *
+ * bit cannot be cleared until a value is written to the IPRBx          *
+ * register; the write will correct the C field and capture its new     *
+ * value in the internal register. Even if IECLR[E_PRB_x] is set, the   *
+ * SPUR_WR bit will persist if IPRBx hasn't yet been written.           *
+ * .    								*
+ *									*
+ ************************************************************************/
+
+typedef union ii_iprbe_u {
+	u64 ii_iprbe_regval;
+	struct {
+		u64 i_c:8;
+		u64 i_na:14;
+		u64 i_rsvd_2:2;
+		u64 i_nb:14;
+		u64 i_rsvd_1:2;
+		u64 i_m:2;
+		u64 i_f:1;
+		u64 i_of_cnt:5;
+		u64 i_error:1;
+		u64 i_rd_to:1;
+		u64 i_spur_wr:1;
+		u64 i_spur_rd:1;
+		u64 i_rsvd:11;
+		u64 i_mult_err:1;
+	} ii_iprbe_fld_s;
+} ii_iprbe_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  There are 9 instances of this register, one per        *
+ * actual widget in this implementation of Shub and Crossbow.           *
+ * Note: Crossbow only has ports for Widgets 8 through F, widget 0      *
+ * refers to Crossbow's internal space.                                 *
+ * This register contains the state elements per widget that are        *
+ * necessary to manage the PIO flow control on Crosstalk and on the     *
+ * Router Network. See the PIO Flow Control chapter for a complete      *
+ * description of this register                                         *
+ * The SPUR_WR bit requires some explanation. When this register is     *
+ * written, the new value of the C field is captured in an internal     *
+ * register so the hardware can remember what the programmer wrote      *
+ * into the credit counter. The SPUR_WR bit sets whenever the C field   *
+ * increments above this stored value, which indicates that there       *
+ * have been more responses received than requests sent. The SPUR_WR    *
+ * bit cannot be cleared until a value is written to the IPRBx          *
+ * register; the write will correct the C field and capture its new     *
+ * value in the internal register. Even if IECLR[E_PRB_x] is set, the   *
+ * SPUR_WR bit will persist if IPRBx hasn't yet been written.           *
+ * .    								*
+ *									*
+ ************************************************************************/
+
+typedef union ii_iprbf_u {
+	u64 ii_iprbf_regval;
+	struct {
+		u64 i_c:8;
+		u64 i_na:14;
+		u64 i_rsvd_2:2;
+		u64 i_nb:14;
+		u64 i_rsvd_1:2;
+		u64 i_m:2;
+		u64 i_f:1;
+		u64 i_of_cnt:5;
+		u64 i_error:1;
+		u64 i_rd_to:1;
+		u64 i_spur_wr:1;
+		u64 i_spur_rd:1;
+		u64 i_rsvd:11;
+		u64 i_mult_err:1;
+	} ii_iprbe_fld_s;
+} ii_iprbf_u_t;
+
+/************************************************************************
+ *									*
+ *  This register specifies the timeout value to use for monitoring     *
+ * Crosstalk credits which are used outbound to Crosstalk. An           *
+ * internal counter called the Crosstalk Credit Timeout Counter         *
+ * increments every 128 II clocks. The counter starts counting          *
+ * anytime the credit count drops below a threshold, and resets to      *
+ * zero (stops counting) anytime the credit count is at or above the    *
+ * threshold. The threshold is 1 credit in direct connect mode and 2    *
+ * in Crossbow connect mode. When the internal Crosstalk Credit         *
+ * Timeout Counter reaches the value programmed in this register, a     *
+ * Crosstalk Credit Timeout has occurred. The internal counter is not   *
+ * readable from software, and stops counting at its maximum value,     *
+ * so it cannot cause more than one interrupt.                          *
+ *									*
+ ************************************************************************/
+
+typedef union ii_ixcc_u {
+	u64 ii_ixcc_regval;
+	struct {
+		u64 i_time_out:26;
+		u64 i_rsvd:38;
+	} ii_ixcc_fld_s;
+} ii_ixcc_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  This register qualifies all the PIO and DMA            *
+ * operations launched from widget 0 towards the SHub. In               *
+ * addition, it also qualifies accesses by the BTE streams.             *
+ * The bits in each field of this register are cleared by the SHub      *
+ * upon detection of an error which requires widget 0 or the BTE        *
+ * streams to be terminated. Whether or not widget x has access         *
+ * rights to this SHub is determined by an AND of the device            *
+ * enable bit in the appropriate field of this register and bit 0 in    *
+ * the Wx_IAC field. The bits in this field are set by writing a 1 to   *
+ * them. Incoming replies from Crosstalk are not subject to this        *
+ * access control mechanism.                                            *
+ *									*
+ ************************************************************************/
+
+typedef union ii_imem_u {
+	u64 ii_imem_regval;
+	struct {
+		u64 i_w0_esd:1;
+		u64 i_rsvd_3:3;
+		u64 i_b0_esd:1;
+		u64 i_rsvd_2:3;
+		u64 i_b1_esd:1;
+		u64 i_rsvd_1:3;
+		u64 i_clr_precise:1;
+		u64 i_rsvd:51;
+	} ii_imem_fld_s;
+} ii_imem_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  This register specifies the timeout value to use for   *
+ * monitoring Crosstalk tail flits coming into the Shub in the          *
+ * TAIL_TO field. An internal counter associated with this register     *
+ * is incremented every 128 II internal clocks (7 bits). The counter    *
+ * starts counting anytime a header micropacket is received and stops   *
+ * counting (and resets to zero) any time a micropacket with a Tail     *
+ * bit is received. Once the counter reaches the threshold value        *
+ * programmed in this register, it generates an interrupt to the        *
+ * processor that is programmed into the IIDSR. The counter saturates   *
+ * (does not roll over) at its maximum value, so it cannot cause        *
+ * another interrupt until after it is cleared.                         *
+ * The register also contains the Read Response Timeout values. The     *
+ * Prescalar is 23 bits, and counts II clocks. An internal counter      *
+ * increments on every II clock and when it reaches the value in the    *
+ * Prescalar field, all IPRTE registers with their valid bits set       *
+ * have their Read Response timers bumped. Whenever any of them match   *
+ * the value in the RRSP_TO field, a Read Response Timeout has          *
+ * occurred, and error handling occurs as described in the Error        *
+ * Handling section of this document.                                   *
+ *									*
+ ************************************************************************/
+
+typedef union ii_ixtt_u {
+	u64 ii_ixtt_regval;
+	struct {
+		u64 i_tail_to:26;
+		u64 i_rsvd_1:6;
+		u64 i_rrsp_ps:23;
+		u64 i_rrsp_to:5;
+		u64 i_rsvd:4;
+	} ii_ixtt_fld_s;
+} ii_ixtt_u_t;
+
+/************************************************************************
+ *									*
+ *  Writing a 1 to the fields of this register clears the appropriate   *
+ * error bits in other areas of SHub. Note that when the                *
+ * E_PRB_x bits are used to clear error bits in PRB registers,          *
+ * SPUR_RD and SPUR_WR may persist, because they require additional     *
+ * action to clear them. See the IPRBx and IXSS Register                *
+ * specifications.                                                      *
+ *									*
+ ************************************************************************/
+
+typedef union ii_ieclr_u {
+	u64 ii_ieclr_regval;
+	struct {
+		u64 i_e_prb_0:1;
+		u64 i_rsvd:7;
+		u64 i_e_prb_8:1;
+		u64 i_e_prb_9:1;
+		u64 i_e_prb_a:1;
+		u64 i_e_prb_b:1;
+		u64 i_e_prb_c:1;
+		u64 i_e_prb_d:1;
+		u64 i_e_prb_e:1;
+		u64 i_e_prb_f:1;
+		u64 i_e_crazy:1;
+		u64 i_e_bte_0:1;
+		u64 i_e_bte_1:1;
+		u64 i_reserved_1:10;
+		u64 i_spur_rd_hdr:1;
+		u64 i_cam_intr_to:1;
+		u64 i_cam_overflow:1;
+		u64 i_cam_read_miss:1;
+		u64 i_ioq_rep_underflow:1;
+		u64 i_ioq_req_underflow:1;
+		u64 i_ioq_rep_overflow:1;
+		u64 i_ioq_req_overflow:1;
+		u64 i_iiq_rep_overflow:1;
+		u64 i_iiq_req_overflow:1;
+		u64 i_ii_xn_rep_cred_overflow:1;
+		u64 i_ii_xn_req_cred_overflow:1;
+		u64 i_ii_xn_invalid_cmd:1;
+		u64 i_xn_ii_invalid_cmd:1;
+		u64 i_reserved_2:21;
+	} ii_ieclr_fld_s;
+} ii_ieclr_u_t;
+
+/************************************************************************
+ *									*
+ *  This register controls both BTEs. SOFT_RESET is intended for        *
+ * recovery after an error. COUNT controls the total number of CRBs     *
+ * that both BTEs (combined) can use, which affects total BTE           *
+ * bandwidth.                                                           *
+ *									*
+ ************************************************************************/
+
+typedef union ii_ibcr_u {
+	u64 ii_ibcr_regval;
+	struct {
+		u64 i_count:4;
+		u64 i_rsvd_1:4;
+		u64 i_soft_reset:1;
+		u64 i_rsvd:55;
+	} ii_ibcr_fld_s;
+} ii_ibcr_u_t;
+
+/************************************************************************
+ *									*
+ *  This register contains the header of a spurious read response       *
+ * received from Crosstalk. A spurious read response is defined as a    *
+ * read response received by II from a widget for which (1) the SIDN    *
+ * has a value between 1 and 7, inclusive (II never sends requests to   *
+ * these widgets (2) there is no valid IPRTE register which             *
+ * corresponds to the TNUM, or (3) the widget indicated in SIDN is      *
+ * not the same as the widget recorded in the IPRTE register            *
+ * referenced by the TNUM. If this condition is true, and if the        *
+ * IXSS[VALID] bit is clear, then the header of the spurious read       *
+ * response is capture in IXSM and IXSS, and IXSS[VALID] is set. The    *
+ * errant header is thereby captured, and no further spurious read      *
+ * respones are captured until IXSS[VALID] is cleared by setting the    *
+ * appropriate bit in IECLR. Every time a spurious read response is     *
+ * detected, the SPUR_RD bit of the PRB corresponding to the incoming   *
+ * message's SIDN field is set. This always happens, regarless of       *
+ * whether a header is captured. The programmer should check            *
+ * IXSM[SIDN] to determine which widget sent the spurious response,     *
+ * because there may be more than one SPUR_RD bit set in the PRB        *
+ * registers. The widget indicated by IXSM[SIDN] was the first          *
+ * spurious read response to be received since the last time            *
+ * IXSS[VALID] was clear. The SPUR_RD bit of the corresponding PRB      *
+ * will be set. Any SPUR_RD bits in any other PRB registers indicate    *
+ * spurious messages from other widets which were detected after the    *
+ * header was captured..                                                *
+ *									*
+ ************************************************************************/
+
+typedef union ii_ixsm_u {
+	u64 ii_ixsm_regval;
+	struct {
+		u64 i_byte_en:32;
+		u64 i_reserved:1;
+		u64 i_tag:3;
+		u64 i_alt_pactyp:4;
+		u64 i_bo:1;
+		u64 i_error:1;
+		u64 i_vbpm:1;
+		u64 i_gbr:1;
+		u64 i_ds:2;
+		u64 i_ct:1;
+		u64 i_tnum:5;
+		u64 i_pactyp:4;
+		u64 i_sidn:4;
+		u64 i_didn:4;
+	} ii_ixsm_fld_s;
+} ii_ixsm_u_t;
+
+/************************************************************************
+ *									*
+ *  This register contains the sideband bits of a spurious read         *
+ * response received from Crosstalk.                                    *
+ *									*
+ ************************************************************************/
+
+typedef union ii_ixss_u {
+	u64 ii_ixss_regval;
+	struct {
+		u64 i_sideband:8;
+		u64 i_rsvd:55;
+		u64 i_valid:1;
+	} ii_ixss_fld_s;
+} ii_ixss_u_t;
+
+/************************************************************************
+ *									*
+ *  This register enables software to access the II LLP's test port.    *
+ * Refer to the LLP 2.5 documentation for an explanation of the test    *
+ * port. Software can write to this register to program the values      *
+ * for the control fields (TestErrCapture, TestClear, TestFlit,         *
+ * TestMask and TestSeed). Similarly, software can read from this       *
+ * register to obtain the values of the test port's status outputs      *
+ * (TestCBerr, TestValid and TestData).                                 *
+ *									*
+ ************************************************************************/
+
+typedef union ii_ilct_u {
+	u64 ii_ilct_regval;
+	struct {
+		u64 i_test_seed:20;
+		u64 i_test_mask:8;
+		u64 i_test_data:20;
+		u64 i_test_valid:1;
+		u64 i_test_cberr:1;
+		u64 i_test_flit:3;
+		u64 i_test_clear:1;
+		u64 i_test_err_capture:1;
+		u64 i_rsvd:9;
+	} ii_ilct_fld_s;
+} ii_ilct_u_t;
+
+/************************************************************************
+ *									*
+ *  If the II detects an illegal incoming Duplonet packet (request or   *
+ * reply) when VALID==0 in the IIEPH1 register, then it saves the       *
+ * contents of the packet's header flit in the IIEPH1 and IIEPH2        *
+ * registers, sets the VALID bit in IIEPH1, clears the OVERRUN bit,     *
+ * and assigns a value to the ERR_TYPE field which indicates the        *
+ * specific nature of the error. The II recognizes four different       *
+ * types of errors: short request packets (ERR_TYPE==2), short reply    *
+ * packets (ERR_TYPE==3), long request packets (ERR_TYPE==4) and long   *
+ * reply packets (ERR_TYPE==5). The encodings for these types of        *
+ * errors were chosen to be consistent with the same types of errors    *
+ * indicated by the ERR_TYPE field in the LB_ERROR_HDR1 register (in    *
+ * the LB unit). If the II detects an illegal incoming Duplonet         *
+ * packet when VALID==1 in the IIEPH1 register, then it merely sets     *
+ * the OVERRUN bit to indicate that a subsequent error has happened,    *
+ * and does nothing further.                                            *
+ *									*
+ ************************************************************************/
+
+typedef union ii_iieph1_u {
+	u64 ii_iieph1_regval;
+	struct {
+		u64 i_command:7;
+		u64 i_rsvd_5:1;
+		u64 i_suppl:14;
+		u64 i_rsvd_4:1;
+		u64 i_source:14;
+		u64 i_rsvd_3:1;
+		u64 i_err_type:4;
+		u64 i_rsvd_2:4;
+		u64 i_overrun:1;
+		u64 i_rsvd_1:3;
+		u64 i_valid:1;
+		u64 i_rsvd:13;
+	} ii_iieph1_fld_s;
+} ii_iieph1_u_t;
+
+/************************************************************************
+ *									*
+ *  This register holds the Address field from the header flit of an    *
+ * incoming erroneous Duplonet packet, along with the tail bit which    *
+ * accompanied this header flit. This register is essentially an        *
+ * extension of IIEPH1. Two registers were necessary because the 64     *
+ * bits available in only a single register were insufficient to        *
+ * capture the entire header flit of an erroneous packet.               *
+ *									*
+ ************************************************************************/
+
+typedef union ii_iieph2_u {
+	u64 ii_iieph2_regval;
+	struct {
+		u64 i_rsvd_0:3;
+		u64 i_address:47;
+		u64 i_rsvd_1:10;
+		u64 i_tail:1;
+		u64 i_rsvd:3;
+	} ii_iieph2_fld_s;
+} ii_iieph2_u_t;
+
+/******************************/
+
+/************************************************************************
+ *									*
+ *  This register's value is a bit vector that guards access from SXBs  *
+ * to local registers within the II as well as to external Crosstalk    *
+ * widgets								*
+ *									*
+ ************************************************************************/
+
+typedef union ii_islapr_u {
+	u64 ii_islapr_regval;
+	struct {
+		u64 i_region:64;
+	} ii_islapr_fld_s;
+} ii_islapr_u_t;
+
+/************************************************************************
+ *									*
+ *  A write to this register of the 56-bit value "Pup+Bun" will cause	*
+ * the bit in the ISLAPR register corresponding to the region of the	*
+ * requestor to be set (access allowed).				(
+ *									*
+ ************************************************************************/
+
+typedef union ii_islapo_u {
+	u64 ii_islapo_regval;
+	struct {
+		u64 i_io_sbx_ovrride:56;
+		u64 i_rsvd:8;
+	} ii_islapo_fld_s;
+} ii_islapo_u_t;
+
+/************************************************************************
+ *									*
+ *  Determines how long the wrapper will wait aftr an interrupt is	*
+ * initially issued from the II before it times out the outstanding	*
+ * interrupt and drops it from the interrupt queue.			* 
+ *									*
+ ************************************************************************/
+
+typedef union ii_iwi_u {
+	u64 ii_iwi_regval;
+	struct {
+		u64 i_prescale:24;
+		u64 i_rsvd:8;
+		u64 i_timeout:8;
+		u64 i_rsvd1:8;
+		u64 i_intrpt_retry_period:8;
+		u64 i_rsvd2:8;
+	} ii_iwi_fld_s;
+} ii_iwi_u_t;
+
+/************************************************************************
+ *									*
+ *  Log errors which have occurred in the II wrapper. The errors are	*
+ * cleared by writing to the IECLR register.				* 
+ *									*
+ ************************************************************************/
+
+typedef union ii_iwel_u {
+	u64 ii_iwel_regval;
+	struct {
+		u64 i_intr_timed_out:1;
+		u64 i_rsvd:7;
+		u64 i_cam_overflow:1;
+		u64 i_cam_read_miss:1;
+		u64 i_rsvd1:2;
+		u64 i_ioq_rep_underflow:1;
+		u64 i_ioq_req_underflow:1;
+		u64 i_ioq_rep_overflow:1;
+		u64 i_ioq_req_overflow:1;
+		u64 i_iiq_rep_overflow:1;
+		u64 i_iiq_req_overflow:1;
+		u64 i_rsvd2:6;
+		u64 i_ii_xn_rep_cred_over_under:1;
+		u64 i_ii_xn_req_cred_over_under:1;
+		u64 i_rsvd3:6;
+		u64 i_ii_xn_invalid_cmd:1;
+		u64 i_xn_ii_invalid_cmd:1;
+		u64 i_rsvd4:30;
+	} ii_iwel_fld_s;
+} ii_iwel_u_t;
+
+/************************************************************************
+ *									*
+ *  Controls the II wrapper.						* 
+ *									*
+ ************************************************************************/
+
+typedef union ii_iwc_u {
+	u64 ii_iwc_regval;
+	struct {
+		u64 i_dma_byte_swap:1;
+		u64 i_rsvd:3;
+		u64 i_cam_read_lines_reset:1;
+		u64 i_rsvd1:3;
+		u64 i_ii_xn_cred_over_under_log:1;
+		u64 i_rsvd2:19;
+		u64 i_xn_rep_iq_depth:5;
+		u64 i_rsvd3:3;
+		u64 i_xn_req_iq_depth:5;
+		u64 i_rsvd4:3;
+		u64 i_iiq_depth:6;
+		u64 i_rsvd5:12;
+		u64 i_force_rep_cred:1;
+		u64 i_force_req_cred:1;
+	} ii_iwc_fld_s;
+} ii_iwc_u_t;
+
+/************************************************************************
+ *									*
+ *  Status in the II wrapper.						* 
+ *									*
+ ************************************************************************/
+
+typedef union ii_iws_u {
+	u64 ii_iws_regval;
+	struct {
+		u64 i_xn_rep_iq_credits:5;
+		u64 i_rsvd:3;
+		u64 i_xn_req_iq_credits:5;
+		u64 i_rsvd1:51;
+	} ii_iws_fld_s;
+} ii_iws_u_t;
+
+/************************************************************************
+ *									*
+ *  Masks errors in the IWEL register.					*
+ *									*
+ ************************************************************************/
+
+typedef union ii_iweim_u {
+	u64 ii_iweim_regval;
+	struct {
+		u64 i_intr_timed_out:1;
+		u64 i_rsvd:7;
+		u64 i_cam_overflow:1;
+		u64 i_cam_read_miss:1;
+		u64 i_rsvd1:2;
+		u64 i_ioq_rep_underflow:1;
+		u64 i_ioq_req_underflow:1;
+		u64 i_ioq_rep_overflow:1;
+		u64 i_ioq_req_overflow:1;
+		u64 i_iiq_rep_overflow:1;
+		u64 i_iiq_req_overflow:1;
+		u64 i_rsvd2:6;
+		u64 i_ii_xn_rep_cred_overflow:1;
+		u64 i_ii_xn_req_cred_overflow:1;
+		u64 i_rsvd3:6;
+		u64 i_ii_xn_invalid_cmd:1;
+		u64 i_xn_ii_invalid_cmd:1;
+		u64 i_rsvd4:30;
+	} ii_iweim_fld_s;
+} ii_iweim_u_t;
+
+/************************************************************************
+ *									*
+ *  A write to this register causes a particular field in the           *
+ * corresponding widget's PRB entry to be adjusted up or down by 1.     *
+ * This counter should be used when recovering from error and reset     *
+ * conditions. Note that software would be capable of causing           *
+ * inadvertent overflow or underflow of these counters.                 *
+ *									*
+ ************************************************************************/
+
+typedef union ii_ipca_u {
+	u64 ii_ipca_regval;
+	struct {
+		u64 i_wid:4;
+		u64 i_adjust:1;
+		u64 i_rsvd_1:3;
+		u64 i_field:2;
+		u64 i_rsvd:54;
+	} ii_ipca_fld_s;
+} ii_ipca_u_t;
+
+/************************************************************************
+ *									*
+ *  There are 8 instances of this register. This register contains      *
+ * the information that the II has to remember once it has launched a   *
+ * PIO Read operation. The contents are used to form the correct        *
+ * Router Network packet and direct the Crosstalk reply to the          *
+ * appropriate processor.                                               *
+ *									*
+ ************************************************************************/
+
+typedef union ii_iprte0a_u {
+	u64 ii_iprte0a_regval;
+	struct {
+		u64 i_rsvd_1:54;
+		u64 i_widget:4;
+		u64 i_to_cnt:5;
+		u64 i_vld:1;
+	} ii_iprte0a_fld_s;
+} ii_iprte0a_u_t;
+
+/************************************************************************
+ *									*
+ *  There are 8 instances of this register. This register contains      *
+ * the information that the II has to remember once it has launched a   *
+ * PIO Read operation. The contents are used to form the correct        *
+ * Router Network packet and direct the Crosstalk reply to the          *
+ * appropriate processor.                                               *
+ *									*
+ ************************************************************************/
+
+typedef union ii_iprte1a_u {
+	u64 ii_iprte1a_regval;
+	struct {
+		u64 i_rsvd_1:54;
+		u64 i_widget:4;
+		u64 i_to_cnt:5;
+		u64 i_vld:1;
+	} ii_iprte1a_fld_s;
+} ii_iprte1a_u_t;
+
+/************************************************************************
+ *									*
+ *  There are 8 instances of this register. This register contains      *
+ * the information that the II has to remember once it has launched a   *
+ * PIO Read operation. The contents are used to form the correct        *
+ * Router Network packet and direct the Crosstalk reply to the          *
+ * appropriate processor.                                               *
+ *									*
+ ************************************************************************/
+
+typedef union ii_iprte2a_u {
+	u64 ii_iprte2a_regval;
+	struct {
+		u64 i_rsvd_1:54;
+		u64 i_widget:4;
+		u64 i_to_cnt:5;
+		u64 i_vld:1;
+	} ii_iprte2a_fld_s;
+} ii_iprte2a_u_t;
+
+/************************************************************************
+ *									*
+ *  There are 8 instances of this register. This register contains      *
+ * the information that the II has to remember once it has launched a   *
+ * PIO Read operation. The contents are used to form the correct        *
+ * Router Network packet and direct the Crosstalk reply to the          *
+ * appropriate processor.                                               *
+ *									*
+ ************************************************************************/
+
+typedef union ii_iprte3a_u {
+	u64 ii_iprte3a_regval;
+	struct {
+		u64 i_rsvd_1:54;
+		u64 i_widget:4;
+		u64 i_to_cnt:5;
+		u64 i_vld:1;
+	} ii_iprte3a_fld_s;
+} ii_iprte3a_u_t;
+
+/************************************************************************
+ *									*
+ *  There are 8 instances of this register. This register contains      *
+ * the information that the II has to remember once it has launched a   *
+ * PIO Read operation. The contents are used to form the correct        *
+ * Router Network packet and direct the Crosstalk reply to the          *
+ * appropriate processor.                                               *
+ *									*
+ ************************************************************************/
+
+typedef union ii_iprte4a_u {
+	u64 ii_iprte4a_regval;
+	struct {
+		u64 i_rsvd_1:54;
+		u64 i_widget:4;
+		u64 i_to_cnt:5;
+		u64 i_vld:1;
+	} ii_iprte4a_fld_s;
+} ii_iprte4a_u_t;
+
+/************************************************************************
+ *									*
+ *  There are 8 instances of this register. This register contains      *
+ * the information that the II has to remember once it has launched a   *
+ * PIO Read operation. The contents are used to form the correct        *
+ * Router Network packet and direct the Crosstalk reply to the          *
+ * appropriate processor.                                               *
+ *									*
+ ************************************************************************/
+
+typedef union ii_iprte5a_u {
+	u64 ii_iprte5a_regval;
+	struct {
+		u64 i_rsvd_1:54;
+		u64 i_widget:4;
+		u64 i_to_cnt:5;
+		u64 i_vld:1;
+	} ii_iprte5a_fld_s;
+} ii_iprte5a_u_t;
+
+/************************************************************************
+ *									*
+ *  There are 8 instances of this register. This register contains      *
+ * the information that the II has to remember once it has launched a   *
+ * PIO Read operation. The contents are used to form the correct        *
+ * Router Network packet and direct the Crosstalk reply to the          *
+ * appropriate processor.                                               *
+ *									*
+ ************************************************************************/
+
+typedef union ii_iprte6a_u {
+	u64 ii_iprte6a_regval;
+	struct {
+		u64 i_rsvd_1:54;
+		u64 i_widget:4;
+		u64 i_to_cnt:5;
+		u64 i_vld:1;
+	} ii_iprte6a_fld_s;
+} ii_iprte6a_u_t;
+
+/************************************************************************
+ *									*
+ *  There are 8 instances of this register. This register contains      *
+ * the information that the II has to remember once it has launched a   *
+ * PIO Read operation. The contents are used to form the correct        *
+ * Router Network packet and direct the Crosstalk reply to the          *
+ * appropriate processor.                                               *
+ *									*
+ ************************************************************************/
+
+typedef union ii_iprte7a_u {
+	u64 ii_iprte7a_regval;
+	struct {
+		u64 i_rsvd_1:54;
+		u64 i_widget:4;
+		u64 i_to_cnt:5;
+		u64 i_vld:1;
+	} ii_iprtea7_fld_s;
+} ii_iprte7a_u_t;
+
+/************************************************************************
+ *									*
+ *  There are 8 instances of this register. This register contains      *
+ * the information that the II has to remember once it has launched a   *
+ * PIO Read operation. The contents are used to form the correct        *
+ * Router Network packet and direct the Crosstalk reply to the          *
+ * appropriate processor.                                               *
+ *									*
+ ************************************************************************/
+
+typedef union ii_iprte0b_u {
+	u64 ii_iprte0b_regval;
+	struct {
+		u64 i_rsvd_1:3;
+		u64 i_address:47;
+		u64 i_init:3;
+		u64 i_source:11;
+	} ii_iprte0b_fld_s;
+} ii_iprte0b_u_t;
+
+/************************************************************************
+ *									*
+ *  There are 8 instances of this register. This register contains      *
+ * the information that the II has to remember once it has launched a   *
+ * PIO Read operation. The contents are used to form the correct        *
+ * Router Network packet and direct the Crosstalk reply to the          *
+ * appropriate processor.                                               *
+ *									*
+ ************************************************************************/
+
+typedef union ii_iprte1b_u {
+	u64 ii_iprte1b_regval;
+	struct {
+		u64 i_rsvd_1:3;
+		u64 i_address:47;
+		u64 i_init:3;
+		u64 i_source:11;
+	} ii_iprte1b_fld_s;
+} ii_iprte1b_u_t;
+
+/************************************************************************
+ *									*
+ *  There are 8 instances of this register. This register contains      *
+ * the information that the II has to remember once it has launched a   *
+ * PIO Read operation. The contents are used to form the correct        *
+ * Router Network packet and direct the Crosstalk reply to the          *
+ * appropriate processor.                                               *
+ *									*
+ ************************************************************************/
+
+typedef union ii_iprte2b_u {
+	u64 ii_iprte2b_regval;
+	struct {
+		u64 i_rsvd_1:3;
+		u64 i_address:47;
+		u64 i_init:3;
+		u64 i_source:11;
+	} ii_iprte2b_fld_s;
+} ii_iprte2b_u_t;
+
+/************************************************************************
+ *									*
+ *  There are 8 instances of this register. This register contains      *
+ * the information that the II has to remember once it has launched a   *
+ * PIO Read operation. The contents are used to form the correct        *
+ * Router Network packet and direct the Crosstalk reply to the          *
+ * appropriate processor.                                               *
+ *									*
+ ************************************************************************/
+
+typedef union ii_iprte3b_u {
+	u64 ii_iprte3b_regval;
+	struct {
+		u64 i_rsvd_1:3;
+		u64 i_address:47;
+		u64 i_init:3;
+		u64 i_source:11;
+	} ii_iprte3b_fld_s;
+} ii_iprte3b_u_t;
+
+/************************************************************************
+ *									*
+ *  There are 8 instances of this register. This register contains      *
+ * the information that the II has to remember once it has launched a   *
+ * PIO Read operation. The contents are used to form the correct        *
+ * Router Network packet and direct the Crosstalk reply to the          *
+ * appropriate processor.                                               *
+ *									*
+ ************************************************************************/
+
+typedef union ii_iprte4b_u {
+	u64 ii_iprte4b_regval;
+	struct {
+		u64 i_rsvd_1:3;
+		u64 i_address:47;
+		u64 i_init:3;
+		u64 i_source:11;
+	} ii_iprte4b_fld_s;
+} ii_iprte4b_u_t;
+
+/************************************************************************
+ *									*
+ *  There are 8 instances of this register. This register contains      *
+ * the information that the II has to remember once it has launched a   *
+ * PIO Read operation. The contents are used to form the correct        *
+ * Router Network packet and direct the Crosstalk reply to the          *
+ * appropriate processor.                                               *
+ *									*
+ ************************************************************************/
+
+typedef union ii_iprte5b_u {
+	u64 ii_iprte5b_regval;
+	struct {
+		u64 i_rsvd_1:3;
+		u64 i_address:47;
+		u64 i_init:3;
+		u64 i_source:11;
+	} ii_iprte5b_fld_s;
+} ii_iprte5b_u_t;
+
+/************************************************************************
+ *									*
+ *  There are 8 instances of this register. This register contains      *
+ * the information that the II has to remember once it has launched a   *
+ * PIO Read operation. The contents are used to form the correct        *
+ * Router Network packet and direct the Crosstalk reply to the          *
+ * appropriate processor.                                               *
+ *									*
+ ************************************************************************/
+
+typedef union ii_iprte6b_u {
+	u64 ii_iprte6b_regval;
+	struct {
+		u64 i_rsvd_1:3;
+		u64 i_address:47;
+		u64 i_init:3;
+		u64 i_source:11;
+
+	} ii_iprte6b_fld_s;
+} ii_iprte6b_u_t;
+
+/************************************************************************
+ *									*
+ *  There are 8 instances of this register. This register contains      *
+ * the information that the II has to remember once it has launched a   *
+ * PIO Read operation. The contents are used to form the correct        *
+ * Router Network packet and direct the Crosstalk reply to the          *
+ * appropriate processor.                                               *
+ *									*
+ ************************************************************************/
+
+typedef union ii_iprte7b_u {
+	u64 ii_iprte7b_regval;
+	struct {
+		u64 i_rsvd_1:3;
+		u64 i_address:47;
+		u64 i_init:3;
+		u64 i_source:11;
+	} ii_iprte7b_fld_s;
+} ii_iprte7b_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  SHub II contains a feature which did not exist in      *
+ * the Hub which automatically cleans up after a Read Response          *
+ * timeout, including deallocation of the IPRTE and recovery of IBuf    *
+ * space. The inclusion of this register in SHub is for backward        *
+ * compatibility                                                        *
+ * A write to this register causes an entry from the table of           *
+ * outstanding PIO Read Requests to be freed and returned to the        *
+ * stack of free entries. This register is used in handling the         *
+ * timeout errors that result in a PIO Reply never returning from       *
+ * Crosstalk.                                                           *
+ * Note that this register does not affect the contents of the IPRTE    *
+ * registers. The Valid bits in those registers have to be              *
+ * specifically turned off by software.                                 *
+ *									*
+ ************************************************************************/
+
+typedef union ii_ipdr_u {
+	u64 ii_ipdr_regval;
+	struct {
+		u64 i_te:3;
+		u64 i_rsvd_1:1;
+		u64 i_pnd:1;
+		u64 i_init_rpcnt:1;
+		u64 i_rsvd:58;
+	} ii_ipdr_fld_s;
+} ii_ipdr_u_t;
+
+/************************************************************************
+ *									*
+ *  A write to this register causes a CRB entry to be returned to the   *
+ * queue of free CRBs. The entry should have previously been cleared    *
+ * (mark bit) via backdoor access to the pertinent CRB entry. This      *
+ * register is used in the last step of handling the errors that are    *
+ * captured and marked in CRB entries.  Briefly: 1) first error for     *
+ * DMA write from a particular device, and first error for a            *
+ * particular BTE stream, lead to a marked CRB entry, and processor     *
+ * interrupt, 2) software reads the error information captured in the   *
+ * CRB entry, and presumably takes some corrective action, 3)           *
+ * software clears the mark bit, and finally 4) software writes to      *
+ * the ICDR register to return the CRB entry to the list of free CRB    *
+ * entries.                                                             *
+ *									*
+ ************************************************************************/
+
+typedef union ii_icdr_u {
+	u64 ii_icdr_regval;
+	struct {
+		u64 i_crb_num:4;
+		u64 i_pnd:1;
+		u64 i_rsvd:59;
+	} ii_icdr_fld_s;
+} ii_icdr_u_t;
+
+/************************************************************************
+ *									*
+ *  This register provides debug access to two FIFOs inside of II.      *
+ * Both IOQ_MAX* fields of this register contain the instantaneous      *
+ * depth (in units of the number of available entries) of the           *
+ * associated IOQ FIFO.  A read of this register will return the        *
+ * number of free entries on each FIFO at the time of the read.  So     *
+ * when a FIFO is idle, the associated field contains the maximum       *
+ * depth of the FIFO.  This register is writable for debug reasons      *
+ * and is intended to be written with the maximum desired FIFO depth    *
+ * while the FIFO is idle. Software must assure that II is idle when    *
+ * this register is written. If there are any active entries in any     *
+ * of these FIFOs when this register is written, the results are        *
+ * undefined.                                                           *
+ *									*
+ ************************************************************************/
+
+typedef union ii_ifdr_u {
+	u64 ii_ifdr_regval;
+	struct {
+		u64 i_ioq_max_rq:7;
+		u64 i_set_ioq_rq:1;
+		u64 i_ioq_max_rp:7;
+		u64 i_set_ioq_rp:1;
+		u64 i_rsvd:48;
+	} ii_ifdr_fld_s;
+} ii_ifdr_u_t;
+
+/************************************************************************
+ *									*
+ *  This register allows the II to become sluggish in removing          *
+ * messages from its inbound queue (IIQ). This will cause messages to   *
+ * back up in either virtual channel. Disabling the "molasses" mode     *
+ * subsequently allows the II to be tested under stress. In the         *
+ * sluggish ("Molasses") mode, the localized effects of congestion      *
+ * can be observed.                                                     *
+ *									*
+ ************************************************************************/
+
+typedef union ii_iiap_u {
+	u64 ii_iiap_regval;
+	struct {
+		u64 i_rq_mls:6;
+		u64 i_rsvd_1:2;
+		u64 i_rp_mls:6;
+		u64 i_rsvd:50;
+	} ii_iiap_fld_s;
+} ii_iiap_u_t;
+
+/************************************************************************
+ *									*
+ *  This register allows several parameters of CRB operation to be      *
+ * set. Note that writing to this register can have catastrophic side   *
+ * effects, if the CRB is not quiescent, i.e. if the CRB is             *
+ * processing protocol messages when the write occurs.                  *
+ *									*
+ ************************************************************************/
+
+typedef union ii_icmr_u {
+	u64 ii_icmr_regval;
+	struct {
+		u64 i_sp_msg:1;
+		u64 i_rd_hdr:1;
+		u64 i_rsvd_4:2;
+		u64 i_c_cnt:4;
+		u64 i_rsvd_3:4;
+		u64 i_clr_rqpd:1;
+		u64 i_clr_rppd:1;
+		u64 i_rsvd_2:2;
+		u64 i_fc_cnt:4;
+		u64 i_crb_vld:15;
+		u64 i_crb_mark:15;
+		u64 i_rsvd_1:2;
+		u64 i_precise:1;
+		u64 i_rsvd:11;
+	} ii_icmr_fld_s;
+} ii_icmr_u_t;
+
+/************************************************************************
+ *									*
+ *  This register allows control of the table portion of the CRB        *
+ * logic via software. Control operations from this register have       *
+ * priority over all incoming Crosstalk or BTE requests.                *
+ *									*
+ ************************************************************************/
+
+typedef union ii_iccr_u {
+	u64 ii_iccr_regval;
+	struct {
+		u64 i_crb_num:4;
+		u64 i_rsvd_1:4;
+		u64 i_cmd:8;
+		u64 i_pending:1;
+		u64 i_rsvd:47;
+	} ii_iccr_fld_s;
+} ii_iccr_u_t;
+
+/************************************************************************
+ *									*
+ *  This register allows the maximum timeout value to be programmed.    *
+ *									*
+ ************************************************************************/
+
+typedef union ii_icto_u {
+	u64 ii_icto_regval;
+	struct {
+		u64 i_timeout:8;
+		u64 i_rsvd:56;
+	} ii_icto_fld_s;
+} ii_icto_u_t;
+
+/************************************************************************
+ *									*
+ *  This register allows the timeout prescalar to be programmed. An     *
+ * internal counter is associated with this register. When the          *
+ * internal counter reaches the value of the PRESCALE field, the        *
+ * timer registers in all valid CRBs are incremented (CRBx_D[TIMEOUT]   *
+ * field). The internal counter resets to zero, and then continues      *
+ * counting.                                                            *
+ *									*
+ ************************************************************************/
+
+typedef union ii_ictp_u {
+	u64 ii_ictp_regval;
+	struct {
+		u64 i_prescale:24;
+		u64 i_rsvd:40;
+	} ii_ictp_fld_s;
+} ii_ictp_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  There are 15 CRB Entries (ICRB0 to ICRBE) that are     *
+ * used for Crosstalk operations (both cacheline and partial            *
+ * operations) or BTE/IO. Because the CRB entries are very wide, five   *
+ * registers (_A to _E) are required to read and write each entry.      *
+ * The CRB Entry registers can be conceptualized as rows and columns    *
+ * (illustrated in the table above). Each row contains the 4            *
+ * registers required for a single CRB Entry. The first doubleword      *
+ * (column) for each entry is labeled A, and the second doubleword      *
+ * (higher address) is labeled B, the third doubleword is labeled C,    *
+ * the fourth doubleword is labeled D and the fifth doubleword is       *
+ * labeled E. All CRB entries have their addresses on a quarter         *
+ * cacheline aligned boundary.                   *
+ * Upon reset, only the following fields are initialized: valid         *
+ * (VLD), priority count, timeout, timeout valid, and context valid.    *
+ * All other bits should be cleared by software before use (after       *
+ * recovering any potential error state from before the reset).         *
+ * The following four tables summarize the format for the four          *
+ * registers that are used for each ICRB# Entry.                        *
+ *									*
+ ************************************************************************/
+
+typedef union ii_icrb0_a_u {
+	u64 ii_icrb0_a_regval;
+	struct {
+		u64 ia_iow:1;
+		u64 ia_vld:1;
+		u64 ia_addr:47;
+		u64 ia_tnum:5;
+		u64 ia_sidn:4;
+		u64 ia_rsvd:6;
+	} ii_icrb0_a_fld_s;
+} ii_icrb0_a_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  There are 15 CRB Entries (ICRB0 to ICRBE) that are     *
+ * used for Crosstalk operations (both cacheline and partial            *
+ * operations) or BTE/IO. Because the CRB entries are very wide, five   *
+ * registers (_A to _E) are required to read and write each entry.      *
+ *									*
+ ************************************************************************/
+
+typedef union ii_icrb0_b_u {
+	u64 ii_icrb0_b_regval;
+	struct {
+		u64 ib_xt_err:1;
+		u64 ib_mark:1;
+		u64 ib_ln_uce:1;
+		u64 ib_errcode:3;
+		u64 ib_error:1;
+		u64 ib_stall__bte_1:1;
+		u64 ib_stall__bte_0:1;
+		u64 ib_stall__intr:1;
+		u64 ib_stall_ib:1;
+		u64 ib_intvn:1;
+		u64 ib_wb:1;
+		u64 ib_hold:1;
+		u64 ib_ack:1;
+		u64 ib_resp:1;
+		u64 ib_ack_cnt:11;
+		u64 ib_rsvd:7;
+		u64 ib_exc:5;
+		u64 ib_init:3;
+		u64 ib_imsg:8;
+		u64 ib_imsgtype:2;
+		u64 ib_use_old:1;
+		u64 ib_rsvd_1:11;
+	} ii_icrb0_b_fld_s;
+} ii_icrb0_b_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  There are 15 CRB Entries (ICRB0 to ICRBE) that are     *
+ * used for Crosstalk operations (both cacheline and partial            *
+ * operations) or BTE/IO. Because the CRB entries are very wide, five   *
+ * registers (_A to _E) are required to read and write each entry.      *
+ *									*
+ ************************************************************************/
+
+typedef union ii_icrb0_c_u {
+	u64 ii_icrb0_c_regval;
+	struct {
+		u64 ic_source:15;
+		u64 ic_size:2;
+		u64 ic_ct:1;
+		u64 ic_bte_num:1;
+		u64 ic_gbr:1;
+		u64 ic_resprqd:1;
+		u64 ic_bo:1;
+		u64 ic_suppl:15;
+		u64 ic_rsvd:27;
+	} ii_icrb0_c_fld_s;
+} ii_icrb0_c_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  There are 15 CRB Entries (ICRB0 to ICRBE) that are     *
+ * used for Crosstalk operations (both cacheline and partial            *
+ * operations) or BTE/IO. Because the CRB entries are very wide, five   *
+ * registers (_A to _E) are required to read and write each entry.      *
+ *									*
+ ************************************************************************/
+
+typedef union ii_icrb0_d_u {
+	u64 ii_icrb0_d_regval;
+	struct {
+		u64 id_pa_be:43;
+		u64 id_bte_op:1;
+		u64 id_pr_psc:4;
+		u64 id_pr_cnt:4;
+		u64 id_sleep:1;
+		u64 id_rsvd:11;
+	} ii_icrb0_d_fld_s;
+} ii_icrb0_d_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  There are 15 CRB Entries (ICRB0 to ICRBE) that are     *
+ * used for Crosstalk operations (both cacheline and partial            *
+ * operations) or BTE/IO. Because the CRB entries are very wide, five   *
+ * registers (_A to _E) are required to read and write each entry.      *
+ *									*
+ ************************************************************************/
+
+typedef union ii_icrb0_e_u {
+	u64 ii_icrb0_e_regval;
+	struct {
+		u64 ie_timeout:8;
+		u64 ie_context:15;
+		u64 ie_rsvd:1;
+		u64 ie_tvld:1;
+		u64 ie_cvld:1;
+		u64 ie_rsvd_0:38;
+	} ii_icrb0_e_fld_s;
+} ii_icrb0_e_u_t;
+
+/************************************************************************
+ *									*
+ *  This register contains the lower 64 bits of the header of the       *
+ * spurious message captured by II. Valid when the SP_MSG bit in ICMR   *
+ * register is set.                                                     *
+ *									*
+ ************************************************************************/
+
+typedef union ii_icsml_u {
+	u64 ii_icsml_regval;
+	struct {
+		u64 i_tt_addr:47;
+		u64 i_newsuppl_ex:14;
+		u64 i_reserved:2;
+		u64 i_overflow:1;
+	} ii_icsml_fld_s;
+} ii_icsml_u_t;
+
+/************************************************************************
+ *									*
+ *  This register contains the middle 64 bits of the header of the      *
+ * spurious message captured by II. Valid when the SP_MSG bit in ICMR   *
+ * register is set.                                                     *
+ *									*
+ ************************************************************************/
+
+typedef union ii_icsmm_u {
+	u64 ii_icsmm_regval;
+	struct {
+		u64 i_tt_ack_cnt:11;
+		u64 i_reserved:53;
+	} ii_icsmm_fld_s;
+} ii_icsmm_u_t;
+
+/************************************************************************
+ *									*
+ *  This register contains the microscopic state, all the inputs to     *
+ * the protocol table, captured with the spurious message. Valid when   *
+ * the SP_MSG bit in the ICMR register is set.                          *
+ *									*
+ ************************************************************************/
+
+typedef union ii_icsmh_u {
+	u64 ii_icsmh_regval;
+	struct {
+		u64 i_tt_vld:1;
+		u64 i_xerr:1;
+		u64 i_ft_cwact_o:1;
+		u64 i_ft_wact_o:1;
+		u64 i_ft_active_o:1;
+		u64 i_sync:1;
+		u64 i_mnusg:1;
+		u64 i_mnusz:1;
+		u64 i_plusz:1;
+		u64 i_plusg:1;
+		u64 i_tt_exc:5;
+		u64 i_tt_wb:1;
+		u64 i_tt_hold:1;
+		u64 i_tt_ack:1;
+		u64 i_tt_resp:1;
+		u64 i_tt_intvn:1;
+		u64 i_g_stall_bte1:1;
+		u64 i_g_stall_bte0:1;
+		u64 i_g_stall_il:1;
+		u64 i_g_stall_ib:1;
+		u64 i_tt_imsg:8;
+		u64 i_tt_imsgtype:2;
+		u64 i_tt_use_old:1;
+		u64 i_tt_respreqd:1;
+		u64 i_tt_bte_num:1;
+		u64 i_cbn:1;
+		u64 i_match:1;
+		u64 i_rpcnt_lt_34:1;
+		u64 i_rpcnt_ge_34:1;
+		u64 i_rpcnt_lt_18:1;
+		u64 i_rpcnt_ge_18:1;
+		u64 i_rpcnt_lt_2:1;
+		u64 i_rpcnt_ge_2:1;
+		u64 i_rqcnt_lt_18:1;
+		u64 i_rqcnt_ge_18:1;
+		u64 i_rqcnt_lt_2:1;
+		u64 i_rqcnt_ge_2:1;
+		u64 i_tt_device:7;
+		u64 i_tt_init:3;
+		u64 i_reserved:5;
+	} ii_icsmh_fld_s;
+} ii_icsmh_u_t;
+
+/************************************************************************
+ *									*
+ *  The Shub DEBUG unit provides a 3-bit selection signal to the        *
+ * II core and a 3-bit selection signal to the fsbclk domain in the II  *
+ * wrapper.                                                             *
+ *									*
+ ************************************************************************/
+
+typedef union ii_idbss_u {
+	u64 ii_idbss_regval;
+	struct {
+		u64 i_iioclk_core_submenu:3;
+		u64 i_rsvd:5;
+		u64 i_fsbclk_wrapper_submenu:3;
+		u64 i_rsvd_1:5;
+		u64 i_iioclk_menu:5;
+		u64 i_rsvd_2:43;
+	} ii_idbss_fld_s;
+} ii_idbss_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  This register is used to set up the length for a       *
+ * transfer and then to monitor the progress of that transfer. This     *
+ * register needs to be initialized before a transfer is started. A     *
+ * legitimate write to this register will set the Busy bit, clear the   *
+ * Error bit, and initialize the length to the value desired.           *
+ * While the transfer is in progress, hardware will decrement the       *
+ * length field with each successful block that is copied. Once the     *
+ * transfer completes, hardware will clear the Busy bit. The length     *
+ * field will also contain the number of cache lines left to be         *
+ * transferred.                                                         *
+ *									*
+ ************************************************************************/
+
+typedef union ii_ibls0_u {
+	u64 ii_ibls0_regval;
+	struct {
+		u64 i_length:16;
+		u64 i_error:1;
+		u64 i_rsvd_1:3;
+		u64 i_busy:1;
+		u64 i_rsvd:43;
+	} ii_ibls0_fld_s;
+} ii_ibls0_u_t;
+
+/************************************************************************
+ *									*
+ *  This register should be loaded before a transfer is started. The    *
+ * address to be loaded in bits 39:0 is the 40-bit TRex+ physical       *
+ * address as described in Section 1.3, Figure2 and Figure3. Since      *
+ * the bottom 7 bits of the address are always taken to be zero, BTE    *
+ * transfers are always cacheline-aligned.                              *
+ *									*
+ ************************************************************************/
+
+typedef union ii_ibsa0_u {
+	u64 ii_ibsa0_regval;
+	struct {
+		u64 i_rsvd_1:7;
+		u64 i_addr:42;
+		u64 i_rsvd:15;
+	} ii_ibsa0_fld_s;
+} ii_ibsa0_u_t;
+
+/************************************************************************
+ *									*
+ *  This register should be loaded before a transfer is started. The    *
+ * address to be loaded in bits 39:0 is the 40-bit TRex+ physical       *
+ * address as described in Section 1.3, Figure2 and Figure3. Since      *
+ * the bottom 7 bits of the address are always taken to be zero, BTE    *
+ * transfers are always cacheline-aligned.                              *
+ *									*
+ ************************************************************************/
+
+typedef union ii_ibda0_u {
+	u64 ii_ibda0_regval;
+	struct {
+		u64 i_rsvd_1:7;
+		u64 i_addr:42;
+		u64 i_rsvd:15;
+	} ii_ibda0_fld_s;
+} ii_ibda0_u_t;
+
+/************************************************************************
+ *									*
+ *  Writing to this register sets up the attributes of the transfer     *
+ * and initiates the transfer operation. Reading this register has      *
+ * the side effect of terminating any transfer in progress. Note:       *
+ * stopping a transfer midstream could have an adverse impact on the    *
+ * other BTE. If a BTE stream has to be stopped (due to error           *
+ * handling for example), both BTE streams should be stopped and        *
+ * their transfers discarded.                                           *
+ *									*
+ ************************************************************************/
+
+typedef union ii_ibct0_u {
+	u64 ii_ibct0_regval;
+	struct {
+		u64 i_zerofill:1;
+		u64 i_rsvd_2:3;
+		u64 i_notify:1;
+		u64 i_rsvd_1:3;
+		u64 i_poison:1;
+		u64 i_rsvd:55;
+	} ii_ibct0_fld_s;
+} ii_ibct0_u_t;
+
+/************************************************************************
+ *									*
+ *  This register contains the address to which the WINV is sent.       *
+ * This address has to be cache line aligned.                           *
+ *									*
+ ************************************************************************/
+
+typedef union ii_ibna0_u {
+	u64 ii_ibna0_regval;
+	struct {
+		u64 i_rsvd_1:7;
+		u64 i_addr:42;
+		u64 i_rsvd:15;
+	} ii_ibna0_fld_s;
+} ii_ibna0_u_t;
+
+/************************************************************************
+ *									*
+ *  This register contains the programmable level as well as the node   *
+ * ID and PI unit of the processor to which the interrupt will be       *
+ * sent.								*
+ *									*
+ ************************************************************************/
+
+typedef union ii_ibia0_u {
+	u64 ii_ibia0_regval;
+	struct {
+		u64 i_rsvd_2:1;
+		u64 i_node_id:11;
+		u64 i_rsvd_1:4;
+		u64 i_level:7;
+		u64 i_rsvd:41;
+	} ii_ibia0_fld_s;
+} ii_ibia0_u_t;
+
+/************************************************************************
+ *									*
+ * Description:  This register is used to set up the length for a       *
+ * transfer and then to monitor the progress of that transfer. This     *
+ * register needs to be initialized before a transfer is started. A     *
+ * legitimate write to this register will set the Busy bit, clear the   *
+ * Error bit, and initialize the length to the value desired.           *
+ * While the transfer is in progress, hardware will decrement the       *
+ * length field with each successful block that is copied. Once the     *
+ * transfer completes, hardware will clear the Busy bit. The length     *
+ * field will also contain the number of cache lines left to be         *
+ * transferred.                                                         *
+ *									*
+ ************************************************************************/
+
+typedef union ii_ibls1_u {
+	u64 ii_ibls1_regval;
+	struct {
+		u64 i_length:16;
+		u64 i_error:1;
+		u64 i_rsvd_1:3;
+		u64 i_busy:1;
+		u64 i_rsvd:43;
+	} ii_ibls1_fld_s;
+} ii_ibls1_u_t;
+
+/************************************************************************
+ *									*
+ *  This register should be loaded before a transfer is started. The    *
+ * address to be loaded in bits 39:0 is the 40-bit TRex+ physical       *
+ * address as described in Section 1.3, Figure2 and Figure3. Since      *
+ * the bottom 7 bits of the address are always taken to be zero, BTE    *
+ * transfers are always cacheline-aligned.                              *
+ *									*
+ ************************************************************************/
+
+typedef union ii_ibsa1_u {
+	u64 ii_ibsa1_regval;
+	struct {
+		u64 i_rsvd_1:7;
+		u64 i_addr:33;
+		u64 i_rsvd:24;
+	} ii_ibsa1_fld_s;
+} ii_ibsa1_u_t;
+
+/************************************************************************
+ *									*
+ *  This register should be loaded before a transfer is started. The    *
+ * address to be loaded in bits 39:0 is the 40-bit TRex+ physical       *
+ * address as described in Section 1.3, Figure2 and Figure3. Since      *
+ * the bottom 7 bits of the address are always taken to be zero, BTE    *
+ * transfers are always cacheline-aligned.                              *
+ *									*
+ ************************************************************************/
+
+typedef union ii_ibda1_u {
+	u64 ii_ibda1_regval;
+	struct {
+		u64 i_rsvd_1:7;
+		u64 i_addr:33;
+		u64 i_rsvd:24;
+	} ii_ibda1_fld_s;
+} ii_ibda1_u_t;
+
+/************************************************************************
+ *									*
+ *  Writing to this register sets up the attributes of the transfer     *
+ * and initiates the transfer operation. Reading this register has      *
+ * the side effect of terminating any transfer in progress. Note:       *
+ * stopping a transfer midstream could have an adverse impact on the    *
+ * other BTE. If a BTE stream has to be stopped (due to error           *
+ * handling for example), both BTE streams should be stopped and        *
+ * their transfers discarded.                                           *
+ *									*
+ ************************************************************************/
+
+typedef union ii_ibct1_u {
+	u64 ii_ibct1_regval;
+	struct {
+		u64 i_zerofill:1;
+		u64 i_rsvd_2:3;
+		u64 i_notify:1;
+		u64 i_rsvd_1:3;
+		u64 i_poison:1;
+		u64 i_rsvd:55;
+	} ii_ibct1_fld_s;
+} ii_ibct1_u_t;
+
+/************************************************************************
+ *									*
+ *  This register contains the address to which the WINV is sent.       *
+ * This address has to be cache line aligned.                           *
+ *									*
+ ************************************************************************/
+
+typedef union ii_ibna1_u {
+	u64 ii_ibna1_regval;
+	struct {
+		u64 i_rsvd_1:7;
+		u64 i_addr:33;
+		u64 i_rsvd:24;
+	} ii_ibna1_fld_s;
+} ii_ibna1_u_t;
+
+/************************************************************************
+ *									*
+ *  This register contains the programmable level as well as the node   *
+ * ID and PI unit of the processor to which the interrupt will be       *
+ * sent.								*
+ *									*
+ ************************************************************************/
+
+typedef union ii_ibia1_u {
+	u64 ii_ibia1_regval;
+	struct {
+		u64 i_pi_id:1;
+		u64 i_node_id:8;
+		u64 i_rsvd_1:7;
+		u64 i_level:7;
+		u64 i_rsvd:41;
+	} ii_ibia1_fld_s;
+} ii_ibia1_u_t;
+
+/************************************************************************
+ *									*
+ *  This register defines the resources that feed information into      *
+ * the two performance counters located in the IO Performance           *
+ * Profiling Register. There are 17 different quantities that can be    *
+ * measured. Given these 17 different options, the two performance      *
+ * counters have 15 of them in common; menu selections 0 through 0xE    *
+ * are identical for each performance counter. As for the other two     *
+ * options, one is available from one performance counter and the       *
+ * other is available from the other performance counter. Hence, the    *
+ * II supports all 17*16=272 possible combinations of quantities to     *
+ * measure.                                                             *
+ *									*
+ ************************************************************************/
+
+typedef union ii_ipcr_u {
+	u64 ii_ipcr_regval;
+	struct {
+		u64 i_ippr0_c:4;
+		u64 i_ippr1_c:4;
+		u64 i_icct:8;
+		u64 i_rsvd:48;
+	} ii_ipcr_fld_s;
+} ii_ipcr_u_t;
+
+/************************************************************************
+ *									*
+ *									*
+ *									*
+ ************************************************************************/
+
+typedef union ii_ippr_u {
+	u64 ii_ippr_regval;
+	struct {
+		u64 i_ippr0:32;
+		u64 i_ippr1:32;
+	} ii_ippr_fld_s;
+} ii_ippr_u_t;
+
+/************************************************************************
+ *									*
+ * The following defines which were not formed into structures are	*
+ * probably identical to another register, and the name of the		*
+ * register is provided against each of these registers. This		*
+ * information needs to be checked carefully				*
+ *									*
+ *		IIO_ICRB1_A		IIO_ICRB0_A			*
+ *		IIO_ICRB1_B		IIO_ICRB0_B			*
+ *		IIO_ICRB1_C		IIO_ICRB0_C			*
+ *		IIO_ICRB1_D		IIO_ICRB0_D			*
+ *		IIO_ICRB1_E		IIO_ICRB0_E			*
+ *		IIO_ICRB2_A		IIO_ICRB0_A			*
+ *		IIO_ICRB2_B		IIO_ICRB0_B			*
+ *		IIO_ICRB2_C		IIO_ICRB0_C			*
+ *		IIO_ICRB2_D		IIO_ICRB0_D			*
+ *		IIO_ICRB2_E		IIO_ICRB0_E			*
+ *		IIO_ICRB3_A		IIO_ICRB0_A			*
+ *		IIO_ICRB3_B		IIO_ICRB0_B			*
+ *		IIO_ICRB3_C		IIO_ICRB0_C			*
+ *		IIO_ICRB3_D		IIO_ICRB0_D			*
+ *		IIO_ICRB3_E		IIO_ICRB0_E			*
+ *		IIO_ICRB4_A		IIO_ICRB0_A			*
+ *		IIO_ICRB4_B		IIO_ICRB0_B			*
+ *		IIO_ICRB4_C		IIO_ICRB0_C			*
+ *		IIO_ICRB4_D		IIO_ICRB0_D			*
+ *		IIO_ICRB4_E		IIO_ICRB0_E			*
+ *		IIO_ICRB5_A		IIO_ICRB0_A			*
+ *		IIO_ICRB5_B		IIO_ICRB0_B			*
+ *		IIO_ICRB5_C		IIO_ICRB0_C			*
+ *		IIO_ICRB5_D		IIO_ICRB0_D			*
+ *		IIO_ICRB5_E		IIO_ICRB0_E			*
+ *		IIO_ICRB6_A		IIO_ICRB0_A			*
+ *		IIO_ICRB6_B		IIO_ICRB0_B			*
+ *		IIO_ICRB6_C		IIO_ICRB0_C			*
+ *		IIO_ICRB6_D		IIO_ICRB0_D			*
+ *		IIO_ICRB6_E		IIO_ICRB0_E			*
+ *		IIO_ICRB7_A		IIO_ICRB0_A			*
+ *		IIO_ICRB7_B		IIO_ICRB0_B			*
+ *		IIO_ICRB7_C		IIO_ICRB0_C			*
+ *		IIO_ICRB7_D		IIO_ICRB0_D			*
+ *		IIO_ICRB7_E		IIO_ICRB0_E			*
+ *		IIO_ICRB8_A		IIO_ICRB0_A			*
+ *		IIO_ICRB8_B		IIO_ICRB0_B			*
+ *		IIO_ICRB8_C		IIO_ICRB0_C			*
+ *		IIO_ICRB8_D		IIO_ICRB0_D			*
+ *		IIO_ICRB8_E		IIO_ICRB0_E			*
+ *		IIO_ICRB9_A		IIO_ICRB0_A			*
+ *		IIO_ICRB9_B		IIO_ICRB0_B			*
+ *		IIO_ICRB9_C		IIO_ICRB0_C			*
+ *		IIO_ICRB9_D		IIO_ICRB0_D			*
+ *		IIO_ICRB9_E		IIO_ICRB0_E			*
+ *		IIO_ICRBA_A		IIO_ICRB0_A			*
+ *		IIO_ICRBA_B		IIO_ICRB0_B			*
+ *		IIO_ICRBA_C		IIO_ICRB0_C			*
+ *		IIO_ICRBA_D		IIO_ICRB0_D			*
+ *		IIO_ICRBA_E		IIO_ICRB0_E			*
+ *		IIO_ICRBB_A		IIO_ICRB0_A			*
+ *		IIO_ICRBB_B		IIO_ICRB0_B			*
+ *		IIO_ICRBB_C		IIO_ICRB0_C			*
+ *		IIO_ICRBB_D		IIO_ICRB0_D			*
+ *		IIO_ICRBB_E		IIO_ICRB0_E			*
+ *		IIO_ICRBC_A		IIO_ICRB0_A			*
+ *		IIO_ICRBC_B		IIO_ICRB0_B			*
+ *		IIO_ICRBC_C		IIO_ICRB0_C			*
+ *		IIO_ICRBC_D		IIO_ICRB0_D			*
+ *		IIO_ICRBC_E		IIO_ICRB0_E			*
+ *		IIO_ICRBD_A		IIO_ICRB0_A			*
+ *		IIO_ICRBD_B		IIO_ICRB0_B			*
+ *		IIO_ICRBD_C		IIO_ICRB0_C			*
+ *		IIO_ICRBD_D		IIO_ICRB0_D			*
+ *		IIO_ICRBD_E		IIO_ICRB0_E			*
+ *		IIO_ICRBE_A		IIO_ICRB0_A			*
+ *		IIO_ICRBE_B		IIO_ICRB0_B			*
+ *		IIO_ICRBE_C		IIO_ICRB0_C			*
+ *		IIO_ICRBE_D		IIO_ICRB0_D			*
+ *		IIO_ICRBE_E		IIO_ICRB0_E			*
+ *									*
+ ************************************************************************/
+
+/*
+ * Slightly friendlier names for some common registers.
+ */
+#define IIO_WIDGET              IIO_WID		/* Widget identification */
+#define IIO_WIDGET_STAT         IIO_WSTAT	/* Widget status register */
+#define IIO_WIDGET_CTRL         IIO_WCR		/* Widget control register */
+#define IIO_PROTECT             IIO_ILAPR	/* IO interface protection */
+#define IIO_PROTECT_OVRRD       IIO_ILAPO	/* IO protect override */
+#define IIO_OUTWIDGET_ACCESS    IIO_IOWA	/* Outbound widget access */
+#define IIO_INWIDGET_ACCESS     IIO_IIWA	/* Inbound widget access */
+#define IIO_INDEV_ERR_MASK      IIO_IIDEM	/* Inbound device error mask */
+#define IIO_LLP_CSR             IIO_ILCSR	/* LLP control and status */
+#define IIO_LLP_LOG             IIO_ILLR	/* LLP log */
+#define IIO_XTALKCC_TOUT        IIO_IXCC	/* Xtalk credit count timeout */
+#define IIO_XTALKTT_TOUT        IIO_IXTT	/* Xtalk tail timeout */
+#define IIO_IO_ERR_CLR          IIO_IECLR	/* IO error clear */
+#define IIO_IGFX_0 		IIO_IGFX0
+#define IIO_IGFX_1 		IIO_IGFX1
+#define IIO_IBCT_0		IIO_IBCT0
+#define IIO_IBCT_1		IIO_IBCT1
+#define IIO_IBLS_0		IIO_IBLS0
+#define IIO_IBLS_1		IIO_IBLS1
+#define IIO_IBSA_0		IIO_IBSA0
+#define IIO_IBSA_1		IIO_IBSA1
+#define IIO_IBDA_0		IIO_IBDA0
+#define IIO_IBDA_1		IIO_IBDA1
+#define IIO_IBNA_0		IIO_IBNA0
+#define IIO_IBNA_1		IIO_IBNA1
+#define IIO_IBIA_0		IIO_IBIA0
+#define IIO_IBIA_1		IIO_IBIA1
+#define IIO_IOPRB_0		IIO_IPRB0
+
+#define IIO_PRTE_A(_x)		(IIO_IPRTE0_A + (8 * (_x)))
+#define IIO_PRTE_B(_x)		(IIO_IPRTE0_B + (8 * (_x)))
+#define IIO_NUM_PRTES		8	/* Total number of PRB table entries */
+#define IIO_WIDPRTE_A(x)	IIO_PRTE_A(((x) - 8))	/* widget ID to its PRTE num */
+#define IIO_WIDPRTE_B(x)	IIO_PRTE_B(((x) - 8))	/* widget ID to its PRTE num */
+
+#define IIO_NUM_IPRBS 		9
+
+#define IIO_LLP_CSR_IS_UP		0x00002000
+#define IIO_LLP_CSR_LLP_STAT_MASK       0x00003000
+#define IIO_LLP_CSR_LLP_STAT_SHFT       12
+
+#define IIO_LLP_CB_MAX  0xffff	/* in ILLR CB_CNT, Max Check Bit errors */
+#define IIO_LLP_SN_MAX  0xffff	/* in ILLR SN_CNT, Max Sequence Number errors */
+
+/* key to IIO_PROTECT_OVRRD */
+#define IIO_PROTECT_OVRRD_KEY   0x53474972756c6573ull	/* "SGIrules" */
+
+/* BTE register names */
+#define IIO_BTE_STAT_0          IIO_IBLS_0	/* Also BTE length/status 0 */
+#define IIO_BTE_SRC_0           IIO_IBSA_0	/* Also BTE source address  0 */
+#define IIO_BTE_DEST_0          IIO_IBDA_0	/* Also BTE dest. address 0 */
+#define IIO_BTE_CTRL_0          IIO_IBCT_0	/* Also BTE control/terminate 0 */
+#define IIO_BTE_NOTIFY_0        IIO_IBNA_0	/* Also BTE notification 0 */
+#define IIO_BTE_INT_0           IIO_IBIA_0	/* Also BTE interrupt 0 */
+#define IIO_BTE_OFF_0           0	/* Base offset from BTE 0 regs. */
+#define IIO_BTE_OFF_1   	(IIO_IBLS_1 - IIO_IBLS_0)	/* Offset from base to BTE 1 */
+
+/* BTE register offsets from base */
+#define BTEOFF_STAT             0
+#define BTEOFF_SRC      	(IIO_BTE_SRC_0 - IIO_BTE_STAT_0)
+#define BTEOFF_DEST     	(IIO_BTE_DEST_0 - IIO_BTE_STAT_0)
+#define BTEOFF_CTRL     	(IIO_BTE_CTRL_0 - IIO_BTE_STAT_0)
+#define BTEOFF_NOTIFY   	(IIO_BTE_NOTIFY_0 - IIO_BTE_STAT_0)
+#define BTEOFF_INT      	(IIO_BTE_INT_0 - IIO_BTE_STAT_0)
+
+/* names used in shub diags */
+#define IIO_BASE_BTE0   IIO_IBLS_0
+#define IIO_BASE_BTE1   IIO_IBLS_1
+
+/*
+ * Macro which takes the widget number, and returns the
+ * IO PRB address of that widget.
+ * value _x is expected to be a widget number in the range
+ * 0, 8 - 0xF
+ */
+#define IIO_IOPRB(_x)	(IIO_IOPRB_0 + ( ( (_x) < HUB_WIDGET_ID_MIN ? \
+                	(_x) : \
+                	(_x) - (HUB_WIDGET_ID_MIN-1)) << 3) )
+
+/* GFX Flow Control Node/Widget Register */
+#define IIO_IGFX_W_NUM_BITS	4	/* size of widget num field */
+#define IIO_IGFX_W_NUM_MASK	((1<<IIO_IGFX_W_NUM_BITS)-1)
+#define IIO_IGFX_W_NUM_SHIFT	0
+#define IIO_IGFX_PI_NUM_BITS	1	/* size of PI num field */
+#define IIO_IGFX_PI_NUM_MASK	((1<<IIO_IGFX_PI_NUM_BITS)-1)
+#define IIO_IGFX_PI_NUM_SHIFT	4
+#define IIO_IGFX_N_NUM_BITS	8	/* size of node num field */
+#define IIO_IGFX_N_NUM_MASK	((1<<IIO_IGFX_N_NUM_BITS)-1)
+#define IIO_IGFX_N_NUM_SHIFT	5
+#define IIO_IGFX_P_NUM_BITS	1	/* size of processor num field */
+#define IIO_IGFX_P_NUM_MASK	((1<<IIO_IGFX_P_NUM_BITS)-1)
+#define IIO_IGFX_P_NUM_SHIFT	16
+#define IIO_IGFX_INIT(widget, pi, node, cpu)				(\
+	(((widget) & IIO_IGFX_W_NUM_MASK) << IIO_IGFX_W_NUM_SHIFT) |	 \
+	(((pi)     & IIO_IGFX_PI_NUM_MASK)<< IIO_IGFX_PI_NUM_SHIFT)|	 \
+	(((node)   & IIO_IGFX_N_NUM_MASK) << IIO_IGFX_N_NUM_SHIFT) |	 \
+	(((cpu)    & IIO_IGFX_P_NUM_MASK) << IIO_IGFX_P_NUM_SHIFT))
+
+/* Scratch registers (all bits available) */
+#define IIO_SCRATCH_REG0        IIO_ISCR0
+#define IIO_SCRATCH_REG1        IIO_ISCR1
+#define IIO_SCRATCH_MASK        0xffffffffffffffffUL
+
+#define IIO_SCRATCH_BIT0_0      0x0000000000000001UL
+#define IIO_SCRATCH_BIT0_1      0x0000000000000002UL
+#define IIO_SCRATCH_BIT0_2      0x0000000000000004UL
+#define IIO_SCRATCH_BIT0_3      0x0000000000000008UL
+#define IIO_SCRATCH_BIT0_4      0x0000000000000010UL
+#define IIO_SCRATCH_BIT0_5      0x0000000000000020UL
+#define IIO_SCRATCH_BIT0_6      0x0000000000000040UL
+#define IIO_SCRATCH_BIT0_7      0x0000000000000080UL
+#define IIO_SCRATCH_BIT0_8      0x0000000000000100UL
+#define IIO_SCRATCH_BIT0_9      0x0000000000000200UL
+#define IIO_SCRATCH_BIT0_A      0x0000000000000400UL
+
+#define IIO_SCRATCH_BIT1_0      0x0000000000000001UL
+#define IIO_SCRATCH_BIT1_1      0x0000000000000002UL
+/* IO Translation Table Entries */
+#define IIO_NUM_ITTES   7	/* ITTEs numbered 0..6 */
+					/* Hw manuals number them 1..7! */
+/*
+ * IIO_IMEM Register fields.
+ */
+#define IIO_IMEM_W0ESD  0x1UL	/* Widget 0 shut down due to error */
+#define IIO_IMEM_B0ESD	(1UL << 4)	/* BTE 0 shut down due to error */
+#define IIO_IMEM_B1ESD	(1UL << 8)	/* BTE 1 Shut down due to error */
+
+/*
+ * As a permanent workaround for a bug in the PI side of the shub, we've
+ * redefined big window 7 as small window 0.
+ XXX does this still apply for SN1??
+ */
+#define HUB_NUM_BIG_WINDOW	(IIO_NUM_ITTES - 1)
+
+/*
+ * Use the top big window as a surrogate for the first small window
+ */
+#define SWIN0_BIGWIN            HUB_NUM_BIG_WINDOW
+
+#define ILCSR_WARM_RESET        0x100
+
+/*
+ * CRB manipulation macros
+ *	The CRB macros are slightly complicated, since there are up to
+ *	four registers associated with each CRB entry.
+ */
+#define IIO_NUM_CRBS            15	/* Number of CRBs */
+#define IIO_NUM_PC_CRBS         4	/* Number of partial cache CRBs */
+#define IIO_ICRB_OFFSET         8
+#define IIO_ICRB_0              IIO_ICRB0_A
+#define IIO_ICRB_ADDR_SHFT	2	/* Shift to get proper address */
+/* XXX - This is now tuneable:
+        #define IIO_FIRST_PC_ENTRY 12
+ */
+
+#define IIO_ICRB_A(_x)	((u64)(IIO_ICRB_0 + (6 * IIO_ICRB_OFFSET * (_x))))
+#define IIO_ICRB_B(_x)	((u64)((char *)IIO_ICRB_A(_x) + 1*IIO_ICRB_OFFSET))
+#define IIO_ICRB_C(_x)	((u64)((char *)IIO_ICRB_A(_x) + 2*IIO_ICRB_OFFSET))
+#define IIO_ICRB_D(_x)	((u64)((char *)IIO_ICRB_A(_x) + 3*IIO_ICRB_OFFSET))
+#define IIO_ICRB_E(_x)	((u64)((char *)IIO_ICRB_A(_x) + 4*IIO_ICRB_OFFSET))
+
+#define TNUM_TO_WIDGET_DEV(_tnum)	(_tnum & 0x7)
+
+/*
+ * values for "ecode" field
+ */
+#define IIO_ICRB_ECODE_DERR     0	/* Directory error due to IIO access */
+#define IIO_ICRB_ECODE_PERR     1	/* Poison error on IO access */
+#define IIO_ICRB_ECODE_WERR     2	/* Write error by IIO access
+					 * e.g. WINV to a Read only line. */
+#define IIO_ICRB_ECODE_AERR     3	/* Access error caused by IIO access */
+#define IIO_ICRB_ECODE_PWERR    4	/* Error on partial write */
+#define IIO_ICRB_ECODE_PRERR    5	/* Error on partial read  */
+#define IIO_ICRB_ECODE_TOUT     6	/* CRB timeout before deallocating */
+#define IIO_ICRB_ECODE_XTERR    7	/* Incoming xtalk pkt had error bit */
+
+/*
+ * Values for field imsgtype
+ */
+#define IIO_ICRB_IMSGT_XTALK    0	/* Incoming Meessage from Xtalk */
+#define IIO_ICRB_IMSGT_BTE      1	/* Incoming message from BTE    */
+#define IIO_ICRB_IMSGT_SN1NET   2	/* Incoming message from SN1 net */
+#define IIO_ICRB_IMSGT_CRB      3	/* Incoming message from CRB ???  */
+
+/*
+ * values for field initiator.
+ */
+#define IIO_ICRB_INIT_XTALK     0	/* Message originated in xtalk  */
+#define IIO_ICRB_INIT_BTE0      0x1	/* Message originated in BTE 0  */
+#define IIO_ICRB_INIT_SN1NET    0x2	/* Message originated in SN1net */
+#define IIO_ICRB_INIT_CRB       0x3	/* Message originated in CRB ?  */
+#define IIO_ICRB_INIT_BTE1      0x5	/* MEssage originated in BTE 1  */
+
+/*
+ * Number of credits Hub widget has while sending req/response to
+ * xbow.
+ * Value of 3 is required by Xbow 1.1
+ * We may be able to increase this to 4 with Xbow 1.2.
+ */
+#define		   HUBII_XBOW_CREDIT       3
+#define		   HUBII_XBOW_REV2_CREDIT  4
+
+/*
+ * Number of credits that xtalk devices should use when communicating
+ * with a SHub (depth of SHub's queue).
+ */
+#define HUB_CREDIT 4
+
+/*
+ * Some IIO_PRB fields
+ */
+#define IIO_PRB_MULTI_ERR	(1LL << 63)
+#define IIO_PRB_SPUR_RD		(1LL << 51)
+#define IIO_PRB_SPUR_WR		(1LL << 50)
+#define IIO_PRB_RD_TO		(1LL << 49)
+#define IIO_PRB_ERROR		(1LL << 48)
+
+/*************************************************************************
+
+ Some of the IIO field masks and shifts are defined here.
+ This is in order to maintain compatibility in SN0 and SN1 code
+ 
+**************************************************************************/
+
+/*
+ * ICMR register fields
+ * (Note: the IIO_ICMR_P_CNT and IIO_ICMR_PC_VLD from Hub are not
+ * present in SHub)
+ */
+
+#define IIO_ICMR_CRB_VLD_SHFT   20
+#define IIO_ICMR_CRB_VLD_MASK	(0x7fffUL << IIO_ICMR_CRB_VLD_SHFT)
+
+#define IIO_ICMR_FC_CNT_SHFT    16
+#define IIO_ICMR_FC_CNT_MASK	(0xf << IIO_ICMR_FC_CNT_SHFT)
+
+#define IIO_ICMR_C_CNT_SHFT     4
+#define IIO_ICMR_C_CNT_MASK	(0xf << IIO_ICMR_C_CNT_SHFT)
+
+#define IIO_ICMR_PRECISE	(1UL << 52)
+#define IIO_ICMR_CLR_RPPD	(1UL << 13)
+#define IIO_ICMR_CLR_RQPD	(1UL << 12)
+
+/*
+ * IIO PIO Deallocation register field masks : (IIO_IPDR)
+ XXX present but not needed in bedrock?  See the manual.
+ */
+#define IIO_IPDR_PND    	(1 << 4)
+
+/*
+ * IIO CRB deallocation register field masks: (IIO_ICDR)
+ */
+#define IIO_ICDR_PND    	(1 << 4)
+
+/* 
+ * IO BTE Length/Status (IIO_IBLS) register bit field definitions
+ */
+#define IBLS_BUSY		(0x1UL << 20)
+#define IBLS_ERROR_SHFT		16
+#define IBLS_ERROR		(0x1UL << IBLS_ERROR_SHFT)
+#define IBLS_LENGTH_MASK	0xffff
+
+/*
+ * IO BTE Control/Terminate register (IBCT) register bit field definitions
+ */
+#define IBCT_POISON		(0x1UL << 8)
+#define IBCT_NOTIFY		(0x1UL << 4)
+#define IBCT_ZFIL_MODE		(0x1UL << 0)
+
+/*
+ * IIO Incoming Error Packet Header (IIO_IIEPH1/IIO_IIEPH2)
+ */
+#define IIEPH1_VALID		(1UL << 44)
+#define IIEPH1_OVERRUN		(1UL << 40)
+#define IIEPH1_ERR_TYPE_SHFT	32
+#define IIEPH1_ERR_TYPE_MASK	0xf
+#define IIEPH1_SOURCE_SHFT	20
+#define IIEPH1_SOURCE_MASK	11
+#define IIEPH1_SUPPL_SHFT	8
+#define IIEPH1_SUPPL_MASK	11
+#define IIEPH1_CMD_SHFT		0
+#define IIEPH1_CMD_MASK		7
+
+#define IIEPH2_TAIL		(1UL << 40)
+#define IIEPH2_ADDRESS_SHFT	0
+#define IIEPH2_ADDRESS_MASK	38
+
+#define IIEPH1_ERR_SHORT_REQ	2
+#define IIEPH1_ERR_SHORT_REPLY	3
+#define IIEPH1_ERR_LONG_REQ	4
+#define IIEPH1_ERR_LONG_REPLY	5
+
+/*
+ * IO Error Clear register bit field definitions
+ */
+#define IECLR_PI1_FWD_INT	(1UL << 31)	/* clear PI1_FORWARD_INT in iidsr */
+#define IECLR_PI0_FWD_INT	(1UL << 30)	/* clear PI0_FORWARD_INT in iidsr */
+#define IECLR_SPUR_RD_HDR	(1UL << 29)	/* clear valid bit in ixss reg */
+#define IECLR_BTE1		(1UL << 18)	/* clear bte error 1 */
+#define IECLR_BTE0		(1UL << 17)	/* clear bte error 0 */
+#define IECLR_CRAZY		(1UL << 16)	/* clear crazy bit in wstat reg */
+#define IECLR_PRB_F		(1UL << 15)	/* clear err bit in PRB_F reg */
+#define IECLR_PRB_E		(1UL << 14)	/* clear err bit in PRB_E reg */
+#define IECLR_PRB_D		(1UL << 13)	/* clear err bit in PRB_D reg */
+#define IECLR_PRB_C		(1UL << 12)	/* clear err bit in PRB_C reg */
+#define IECLR_PRB_B		(1UL << 11)	/* clear err bit in PRB_B reg */
+#define IECLR_PRB_A		(1UL << 10)	/* clear err bit in PRB_A reg */
+#define IECLR_PRB_9		(1UL << 9)	/* clear err bit in PRB_9 reg */
+#define IECLR_PRB_8		(1UL << 8)	/* clear err bit in PRB_8 reg */
+#define IECLR_PRB_0		(1UL << 0)	/* clear err bit in PRB_0 reg */
+
+/*
+ * IIO CRB control register Fields: IIO_ICCR 
+ */
+#define	IIO_ICCR_PENDING	0x10000
+#define	IIO_ICCR_CMD_MASK	0xFF
+#define	IIO_ICCR_CMD_SHFT	7
+#define	IIO_ICCR_CMD_NOP	0x0	/* No Op */
+#define	IIO_ICCR_CMD_WAKE	0x100	/* Reactivate CRB entry and process */
+#define	IIO_ICCR_CMD_TIMEOUT	0x200	/* Make CRB timeout & mark invalid */
+#define	IIO_ICCR_CMD_EJECT	0x400	/* Contents of entry written to memory
+					 * via a WB
+					 */
+#define	IIO_ICCR_CMD_FLUSH	0x800
+
+/*
+ *
+ * CRB Register description.
+ *
+ * WARNING * WARNING * WARNING * WARNING * WARNING * WARNING * WARNING
+ * WARNING * WARNING * WARNING * WARNING * WARNING * WARNING * WARNING
+ * WARNING * WARNING * WARNING * WARNING * WARNING * WARNING * WARNING
+ * WARNING * WARNING * WARNING * WARNING * WARNING * WARNING * WARNING
+ * WARNING * WARNING * WARNING * WARNING * WARNING * WARNING * WARNING
+ *
+ * Many of the fields in CRB are status bits used by hardware
+ * for implementation of the protocol. It's very dangerous to
+ * mess around with the CRB registers.
+ *
+ * It's OK to read the CRB registers and try to make sense out of the
+ * fields in CRB.
+ *
+ * Updating CRB requires all activities in Hub IIO to be quiesced.
+ * otherwise, a write to CRB could corrupt other CRB entries.
+ * CRBs are here only as a back door peek to shub IIO's status.
+ * Quiescing implies  no dmas no PIOs
+ * either directly from the cpu or from sn0net.
+ * this is not something that can be done easily. So, AVOID updating
+ * CRBs.
+ */
+
+/*
+ * Easy access macros for CRBs, all 5 registers (A-E)
+ */
+typedef ii_icrb0_a_u_t icrba_t;
+#define a_sidn		ii_icrb0_a_fld_s.ia_sidn
+#define a_tnum		ii_icrb0_a_fld_s.ia_tnum
+#define a_addr          ii_icrb0_a_fld_s.ia_addr
+#define a_valid         ii_icrb0_a_fld_s.ia_vld
+#define a_iow           ii_icrb0_a_fld_s.ia_iow
+#define a_regvalue	ii_icrb0_a_regval
+
+typedef ii_icrb0_b_u_t icrbb_t;
+#define b_use_old       ii_icrb0_b_fld_s.ib_use_old
+#define b_imsgtype      ii_icrb0_b_fld_s.ib_imsgtype
+#define b_imsg          ii_icrb0_b_fld_s.ib_imsg
+#define b_initiator     ii_icrb0_b_fld_s.ib_init
+#define b_exc           ii_icrb0_b_fld_s.ib_exc
+#define b_ackcnt        ii_icrb0_b_fld_s.ib_ack_cnt
+#define b_resp          ii_icrb0_b_fld_s.ib_resp
+#define b_ack           ii_icrb0_b_fld_s.ib_ack
+#define b_hold          ii_icrb0_b_fld_s.ib_hold
+#define b_wb            ii_icrb0_b_fld_s.ib_wb
+#define b_intvn         ii_icrb0_b_fld_s.ib_intvn
+#define b_stall_ib      ii_icrb0_b_fld_s.ib_stall_ib
+#define b_stall_int     ii_icrb0_b_fld_s.ib_stall__intr
+#define b_stall_bte_0   ii_icrb0_b_fld_s.ib_stall__bte_0
+#define b_stall_bte_1   ii_icrb0_b_fld_s.ib_stall__bte_1
+#define b_error         ii_icrb0_b_fld_s.ib_error
+#define b_ecode         ii_icrb0_b_fld_s.ib_errcode
+#define b_lnetuce       ii_icrb0_b_fld_s.ib_ln_uce
+#define b_mark          ii_icrb0_b_fld_s.ib_mark
+#define b_xerr          ii_icrb0_b_fld_s.ib_xt_err
+#define b_regvalue	ii_icrb0_b_regval
+
+typedef ii_icrb0_c_u_t icrbc_t;
+#define c_suppl         ii_icrb0_c_fld_s.ic_suppl
+#define c_barrop        ii_icrb0_c_fld_s.ic_bo
+#define c_doresp        ii_icrb0_c_fld_s.ic_resprqd
+#define c_gbr           ii_icrb0_c_fld_s.ic_gbr
+#define c_btenum        ii_icrb0_c_fld_s.ic_bte_num
+#define c_cohtrans      ii_icrb0_c_fld_s.ic_ct
+#define c_xtsize        ii_icrb0_c_fld_s.ic_size
+#define c_source        ii_icrb0_c_fld_s.ic_source
+#define c_regvalue	ii_icrb0_c_regval
+
+typedef ii_icrb0_d_u_t icrbd_t;
+#define d_sleep         ii_icrb0_d_fld_s.id_sleep
+#define d_pricnt        ii_icrb0_d_fld_s.id_pr_cnt
+#define d_pripsc        ii_icrb0_d_fld_s.id_pr_psc
+#define d_bteop         ii_icrb0_d_fld_s.id_bte_op
+#define d_bteaddr       ii_icrb0_d_fld_s.id_pa_be	/* ic_pa_be fld has 2 names */
+#define d_benable       ii_icrb0_d_fld_s.id_pa_be	/* ic_pa_be fld has 2 names */
+#define d_regvalue	ii_icrb0_d_regval
+
+typedef ii_icrb0_e_u_t icrbe_t;
+#define icrbe_ctxtvld   ii_icrb0_e_fld_s.ie_cvld
+#define icrbe_toutvld   ii_icrb0_e_fld_s.ie_tvld
+#define icrbe_context   ii_icrb0_e_fld_s.ie_context
+#define icrbe_timeout   ii_icrb0_e_fld_s.ie_timeout
+#define e_regvalue	ii_icrb0_e_regval
+
+/* Number of widgets supported by shub */
+#define HUB_NUM_WIDGET          9
+#define HUB_WIDGET_ID_MIN       0x8
+#define HUB_WIDGET_ID_MAX       0xf
+
+#define HUB_WIDGET_PART_NUM     0xc120
+#define MAX_HUBS_PER_XBOW       2
+
+/* A few more #defines for backwards compatibility */
+#define iprb_t          ii_iprb0_u_t
+#define iprb_regval     ii_iprb0_regval
+#define iprb_mult_err	ii_iprb0_fld_s.i_mult_err
+#define iprb_spur_rd	ii_iprb0_fld_s.i_spur_rd
+#define iprb_spur_wr	ii_iprb0_fld_s.i_spur_wr
+#define iprb_rd_to	ii_iprb0_fld_s.i_rd_to
+#define iprb_ovflow     ii_iprb0_fld_s.i_of_cnt
+#define iprb_error      ii_iprb0_fld_s.i_error
+#define iprb_ff         ii_iprb0_fld_s.i_f
+#define iprb_mode       ii_iprb0_fld_s.i_m
+#define iprb_bnakctr    ii_iprb0_fld_s.i_nb
+#define iprb_anakctr    ii_iprb0_fld_s.i_na
+#define iprb_xtalkctr   ii_iprb0_fld_s.i_c
+
+#define LNK_STAT_WORKING        0x2		/* LLP is working */
+
+#define IIO_WSTAT_ECRAZY	(1ULL << 32)	/* Hub gone crazy */
+#define IIO_WSTAT_TXRETRY	(1ULL << 9)	/* Hub Tx Retry timeout */
+#define IIO_WSTAT_TXRETRY_MASK  0x7F		/* should be 0xFF?? */
+#define IIO_WSTAT_TXRETRY_SHFT  16
+#define IIO_WSTAT_TXRETRY_CNT(w)	(((w) >> IIO_WSTAT_TXRETRY_SHFT) & \
+                          		IIO_WSTAT_TXRETRY_MASK)
+
+/* Number of II perf. counters we can multiplex at once */
+
+#define IO_PERF_SETS	32
+
+/* Bit for the widget in inbound access register */
+#define IIO_IIWA_WIDGET(_w)	((u64)(1ULL << _w))
+/* Bit for the widget in outbound access register */
+#define IIO_IOWA_WIDGET(_w)	((u64)(1ULL << _w))
+
+/* NOTE: The following define assumes that we are going to get
+ * widget numbers from 8 thru F and the device numbers within
+ * widget from 0 thru 7.
+ */
+#define IIO_IIDEM_WIDGETDEV_MASK(w, d)	((u64)(1ULL << (8 * ((w) - 8) + (d))))
+
+/* IO Interrupt Destination Register */
+#define IIO_IIDSR_SENT_SHIFT    28
+#define IIO_IIDSR_SENT_MASK     0x30000000
+#define IIO_IIDSR_ENB_SHIFT     24
+#define IIO_IIDSR_ENB_MASK      0x01000000
+#define IIO_IIDSR_NODE_SHIFT    9
+#define IIO_IIDSR_NODE_MASK     0x000ff700
+#define IIO_IIDSR_PI_ID_SHIFT   8
+#define IIO_IIDSR_PI_ID_MASK    0x00000100
+#define IIO_IIDSR_LVL_SHIFT     0
+#define IIO_IIDSR_LVL_MASK      0x000000ff
+
+/* Xtalk timeout threshold register (IIO_IXTT) */
+#define IXTT_RRSP_TO_SHFT	55	/* read response timeout */
+#define IXTT_RRSP_TO_MASK	(0x1FULL << IXTT_RRSP_TO_SHFT)
+#define IXTT_RRSP_PS_SHFT	32	/* read responsed TO prescalar */
+#define IXTT_RRSP_PS_MASK	(0x7FFFFFULL << IXTT_RRSP_PS_SHFT)
+#define IXTT_TAIL_TO_SHFT	0	/* tail timeout counter threshold */
+#define IXTT_TAIL_TO_MASK	(0x3FFFFFFULL << IXTT_TAIL_TO_SHFT)
+
+/*
+ * The IO LLP control status register and widget control register
+ */
+
+typedef union hubii_wcr_u {
+	u64 wcr_reg_value;
+	struct {
+		u64 wcr_widget_id:4,	/* LLP crossbar credit */
+		 wcr_tag_mode:1,	/* Tag mode */
+		 wcr_rsvd1:8,	/* Reserved */
+		 wcr_xbar_crd:3,	/* LLP crossbar credit */
+		 wcr_f_bad_pkt:1,	/* Force bad llp pkt enable */
+		 wcr_dir_con:1,	/* widget direct connect */
+		 wcr_e_thresh:5,	/* elasticity threshold */
+		 wcr_rsvd:41;	/* unused */
+	} wcr_fields_s;
+} hubii_wcr_t;
+
+#define iwcr_dir_con    wcr_fields_s.wcr_dir_con
+
+/* The structures below are defined to extract and modify the ii
+performance registers */
+
+/* io_perf_sel allows the caller to specify what tests will be
+   performed */
+
+typedef union io_perf_sel {
+	u64 perf_sel_reg;
+	struct {
+		u64 perf_ippr0:4, perf_ippr1:4, perf_icct:8, perf_rsvd:48;
+	} perf_sel_bits;
+} io_perf_sel_t;
+
+/* io_perf_cnt is to extract the count from the shub registers. Due to
+   hardware problems there is only one counter, not two. */
+
+typedef union io_perf_cnt {
+	u64 perf_cnt;
+	struct {
+		u64 perf_cnt:20, perf_rsvd2:12, perf_rsvd1:32;
+	} perf_cnt_bits;
+
+} io_perf_cnt_t;
+
+typedef union iprte_a {
+	u64 entry;
+	struct {
+		u64 i_rsvd_1:3;
+		u64 i_addr:38;
+		u64 i_init:3;
+		u64 i_source:8;
+		u64 i_rsvd:2;
+		u64 i_widget:4;
+		u64 i_to_cnt:5;
+		u64 i_vld:1;
+	} iprte_fields;
+} iprte_a_t;
+
+#endif				/* _ASM_IA64_SN_SHUBIO_H */
diff --git a/arch/ia64/include/asm/sn/simulator.h b/arch/ia64/include/asm/sn/simulator.h
new file mode 100644
index 00000000000..c2611f6cfe3
--- /dev/null
+++ b/arch/ia64/include/asm/sn/simulator.h
@@ -0,0 +1,25 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ * Copyright (C) 2000-2004 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#ifndef _ASM_IA64_SN_SIMULATOR_H
+#define _ASM_IA64_SN_SIMULATOR_H
+
+#if defined(CONFIG_IA64_GENERIC) || defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_SGI_UV)
+#define SNMAGIC 0xaeeeeeee8badbeefL
+#define IS_MEDUSA()			({long sn; asm("mov %0=cpuid[%1]" : "=r"(sn) : "r"(2)); sn == SNMAGIC;})
+
+#define SIMULATOR_SLEEP()		asm("nop.i 0x8beef")
+#define IS_RUNNING_ON_SIMULATOR()	(sn_prom_type)
+#define IS_RUNNING_ON_FAKE_PROM()	(sn_prom_type == 2)
+extern int sn_prom_type;		/* 0=hardware, 1=medusa/realprom, 2=medusa/fakeprom */
+#else
+#define IS_MEDUSA()			0
+#define SIMULATOR_SLEEP()
+#define IS_RUNNING_ON_SIMULATOR()	0
+#endif
+
+#endif /* _ASM_IA64_SN_SIMULATOR_H */
diff --git a/arch/ia64/include/asm/sn/sn2/sn_hwperf.h b/arch/ia64/include/asm/sn/sn2/sn_hwperf.h
new file mode 100644
index 00000000000..e61ebac38cd
--- /dev/null
+++ b/arch/ia64/include/asm/sn/sn2/sn_hwperf.h
@@ -0,0 +1,242 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2004 Silicon Graphics, Inc. All rights reserved.
+ *
+ * Data types used by the SN_SAL_HWPERF_OP SAL call for monitoring
+ * SGI Altix node and router hardware
+ *
+ * Mark Goodwin <markgw@sgi.com> Mon Aug 30 12:23:46 EST 2004
+ */
+
+#ifndef SN_HWPERF_H
+#define SN_HWPERF_H
+
+/*
+ * object structure. SN_HWPERF_ENUM_OBJECTS and SN_HWPERF_GET_CPU_INFO
+ * return an array of these. Do not change this without also
+ * changing the corresponding SAL code.
+ */
+#define SN_HWPERF_MAXSTRING		128
+struct sn_hwperf_object_info {
+	u32 id;
+	union {
+		struct {
+			u64 this_part:1;
+			u64 is_shared:1;
+		} fields;
+		struct {
+			u64 flags;
+			u64 reserved;
+		} b;
+	} f;
+	char name[SN_HWPERF_MAXSTRING];
+	char location[SN_HWPERF_MAXSTRING];
+	u32 ports;
+};
+
+#define sn_hwp_this_part	f.fields.this_part
+#define sn_hwp_is_shared	f.fields.is_shared
+#define sn_hwp_flags		f.b.flags
+
+/* macros for object classification */
+#define SN_HWPERF_IS_NODE(x)		((x) && strstr((x)->name, "SHub"))
+#define SN_HWPERF_IS_NODE_SHUB2(x)	((x) && strstr((x)->name, "SHub 2."))
+#define SN_HWPERF_IS_IONODE(x)		((x) && strstr((x)->name, "TIO"))
+#define SN_HWPERF_IS_NL3ROUTER(x)	((x) && strstr((x)->name, "NL3Router"))
+#define SN_HWPERF_IS_NL4ROUTER(x)	((x) && strstr((x)->name, "NL4Router"))
+#define SN_HWPERF_IS_OLDROUTER(x)	((x) && strstr((x)->name, "Router"))
+#define SN_HWPERF_IS_ROUTER(x)		(SN_HWPERF_IS_NL3ROUTER(x) || 		\
+					 	SN_HWPERF_IS_NL4ROUTER(x) || 	\
+					 	SN_HWPERF_IS_OLDROUTER(x))
+#define SN_HWPERF_FOREIGN(x)		((x) && !(x)->sn_hwp_this_part && !(x)->sn_hwp_is_shared)
+#define SN_HWPERF_SAME_OBJTYPE(x,y)	((SN_HWPERF_IS_NODE(x) && SN_HWPERF_IS_NODE(y)) ||\
+					(SN_HWPERF_IS_IONODE(x) && SN_HWPERF_IS_IONODE(y)) ||\
+					(SN_HWPERF_IS_ROUTER(x) && SN_HWPERF_IS_ROUTER(y)))
+
+/* numa port structure, SN_HWPERF_ENUM_PORTS returns an array of these */
+struct sn_hwperf_port_info {
+	u32 port;
+	u32 conn_id;
+	u32 conn_port;
+};
+
+/* for HWPERF_{GET,SET}_MMRS */
+struct sn_hwperf_data {
+	u64 addr;
+	u64 data;
+};
+
+/* user ioctl() argument, see below */
+struct sn_hwperf_ioctl_args {
+        u64 arg;		/* argument, usually an object id */
+        u64 sz;                 /* size of transfer */
+        void *ptr;              /* pointer to source/target */
+        u32 v0;			/* second return value */
+};
+
+/*
+ * For SN_HWPERF_{GET,SET}_MMRS and SN_HWPERF_OBJECT_DISTANCE,
+ * sn_hwperf_ioctl_args.arg can be used to specify a CPU on which
+ * to call SAL, and whether to use an interprocessor interrupt
+ * or task migration in order to do so. If the CPU specified is
+ * SN_HWPERF_ARG_ANY_CPU, then the current CPU will be used.
+ */
+#define SN_HWPERF_ARG_ANY_CPU		0x7fffffffUL
+#define SN_HWPERF_ARG_CPU_MASK		0x7fffffff00000000ULL
+#define SN_HWPERF_ARG_USE_IPI_MASK	0x8000000000000000ULL
+#define SN_HWPERF_ARG_OBJID_MASK	0x00000000ffffffffULL
+
+/* 
+ * ioctl requests on the "sn_hwperf" misc device that call SAL.
+ */
+#define SN_HWPERF_OP_MEM_COPYIN		0x1000
+#define SN_HWPERF_OP_MEM_COPYOUT	0x2000
+#define SN_HWPERF_OP_MASK		0x0fff
+
+/*
+ * Determine mem requirement.
+ * arg	don't care
+ * sz	8
+ * p	pointer to u64 integer
+ */
+#define	SN_HWPERF_GET_HEAPSIZE		1
+
+/*
+ * Install mem for SAL drvr
+ * arg	don't care
+ * sz	sizeof buffer pointed to by p
+ * p	pointer to buffer for scratch area
+ */
+#define SN_HWPERF_INSTALL_HEAP		2
+
+/*
+ * Determine number of objects
+ * arg	don't care
+ * sz	8
+ * p	pointer to u64 integer
+ */
+#define SN_HWPERF_OBJECT_COUNT		(10|SN_HWPERF_OP_MEM_COPYOUT)
+
+/*
+ * Determine object "distance", relative to a cpu. This operation can
+ * execute on a designated logical cpu number, using either an IPI or
+ * via task migration. If the cpu number is SN_HWPERF_ANY_CPU, then
+ * the current CPU is used. See the SN_HWPERF_ARG_* macros above.
+ *
+ * arg	bitmap of IPI flag, cpu number and object id
+ * sz	8
+ * p	pointer to u64 integer
+ */
+#define SN_HWPERF_OBJECT_DISTANCE	(11|SN_HWPERF_OP_MEM_COPYOUT)
+
+/*
+ * Enumerate objects. Special case if sz == 8, returns the required
+ * buffer size.
+ * arg	don't care
+ * sz	sizeof buffer pointed to by p
+ * p	pointer to array of struct sn_hwperf_object_info
+ */
+#define SN_HWPERF_ENUM_OBJECTS		(12|SN_HWPERF_OP_MEM_COPYOUT)
+
+/*
+ * Enumerate NumaLink ports for an object. Special case if sz == 8,
+ * returns the required buffer size.
+ * arg	object id
+ * sz	sizeof buffer pointed to by p
+ * p	pointer to array of struct sn_hwperf_port_info
+ */
+#define SN_HWPERF_ENUM_PORTS		(13|SN_HWPERF_OP_MEM_COPYOUT)
+
+/*
+ * SET/GET memory mapped registers. These operations can execute
+ * on a designated logical cpu number, using either an IPI or via
+ * task migration. If the cpu number is SN_HWPERF_ANY_CPU, then
+ * the current CPU is used. See the SN_HWPERF_ARG_* macros above.
+ *
+ * arg	bitmap of ipi flag, cpu number and object id
+ * sz	sizeof buffer pointed to by p
+ * p	pointer to array of struct sn_hwperf_data
+ */
+#define SN_HWPERF_SET_MMRS		(14|SN_HWPERF_OP_MEM_COPYIN)
+#define SN_HWPERF_GET_MMRS		(15|SN_HWPERF_OP_MEM_COPYOUT| \
+					    SN_HWPERF_OP_MEM_COPYIN)
+/*
+ * Lock a shared object
+ * arg	object id
+ * sz	don't care
+ * p	don't care
+ */
+#define SN_HWPERF_ACQUIRE		16
+
+/*
+ * Unlock a shared object
+ * arg	object id
+ * sz	don't care
+ * p	don't care
+ */
+#define SN_HWPERF_RELEASE		17
+
+/*
+ * Break a lock on a shared object
+ * arg	object id
+ * sz	don't care
+ * p	don't care
+ */
+#define SN_HWPERF_FORCE_RELEASE		18
+
+/*
+ * ioctl requests on "sn_hwperf" that do not call SAL
+ */
+
+/*
+ * get cpu info as an array of hwperf_object_info_t. 
+ * id is logical CPU number, name is description, location
+ * is geoid (e.g. 001c04#1c). Special case if sz == 8,
+ * returns the required buffer size.
+ *
+ * arg	don't care
+ * sz	sizeof buffer pointed to by p
+ * p	pointer to array of struct sn_hwperf_object_info
+ */
+#define SN_HWPERF_GET_CPU_INFO		(100|SN_HWPERF_OP_MEM_COPYOUT)
+
+/*
+ * Given an object id, return it's node number (aka cnode).
+ * arg	object id
+ * sz	8
+ * p	pointer to u64 integer
+ */
+#define SN_HWPERF_GET_OBJ_NODE		(101|SN_HWPERF_OP_MEM_COPYOUT)
+
+/*
+ * Given a node number (cnode), return it's nasid.
+ * arg	ordinal node number (aka cnodeid)
+ * sz	8
+ * p	pointer to u64 integer
+ */
+#define SN_HWPERF_GET_NODE_NASID	(102|SN_HWPERF_OP_MEM_COPYOUT)
+
+/*
+ * Given a node id, determine the id of the nearest node with CPUs
+ * and the id of the nearest node that has memory. The argument
+ * node would normally be a "headless" node, e.g. an "IO node".
+ * Return 0 on success.
+ */
+extern int sn_hwperf_get_nearest_node(cnodeid_t node,
+	cnodeid_t *near_mem, cnodeid_t *near_cpu);
+
+/* return codes */
+#define SN_HWPERF_OP_OK			0
+#define SN_HWPERF_OP_NOMEM		1
+#define SN_HWPERF_OP_NO_PERM		2
+#define SN_HWPERF_OP_IO_ERROR		3
+#define SN_HWPERF_OP_BUSY		4
+#define SN_HWPERF_OP_RECONFIGURE	253
+#define SN_HWPERF_OP_INVAL		254
+
+int sn_topology_open(struct inode *inode, struct file *file);
+int sn_topology_release(struct inode *inode, struct file *file);
+#endif				/* SN_HWPERF_H */
diff --git a/arch/ia64/include/asm/sn/sn_cpuid.h b/arch/ia64/include/asm/sn/sn_cpuid.h
new file mode 100644
index 00000000000..a676dd9ace3
--- /dev/null
+++ b/arch/ia64/include/asm/sn/sn_cpuid.h
@@ -0,0 +1,132 @@
+/* 
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2000-2005 Silicon Graphics, Inc. All rights reserved.
+ */
+
+
+#ifndef _ASM_IA64_SN_SN_CPUID_H
+#define _ASM_IA64_SN_SN_CPUID_H
+
+#include <linux/smp.h>
+#include <asm/sn/addrs.h>
+#include <asm/sn/pda.h>
+#include <asm/intrinsics.h>
+
+
+/*
+ * Functions for converting between cpuids, nodeids and NASIDs.
+ * 
+ * These are for SGI platforms only.
+ *
+ */
+
+
+
+
+/*
+ *  Definitions of terms (these definitions are for IA64 ONLY. Other architectures
+ *  use cpuid/cpunum quite defferently):
+ *
+ *	   CPUID - a number in range of 0..NR_CPUS-1 that uniquely identifies
+ *		the cpu. The value cpuid has no significance on IA64 other than
+ *		the boot cpu is 0.
+ *			smp_processor_id() returns the cpuid of the current cpu.
+ *
+ * 	   CPU_PHYSICAL_ID (also known as HARD_PROCESSOR_ID)
+ *		This is the same as 31:24 of the processor LID register
+ *			hard_smp_processor_id()- cpu_physical_id of current processor
+ *			cpu_physical_id(cpuid) - convert a <cpuid> to a <physical_cpuid>
+ *			cpu_logical_id(phy_id) - convert a <physical_cpuid> to a <cpuid> 
+ *				* not real efficient - don't use in perf critical code
+ *
+ *         SLICE - a number in the range of 0 - 3 (typically) that represents the
+ *		cpu number on a brick.
+ *
+ *	   SUBNODE - (almost obsolete) the number of the FSB that a cpu is
+ *		connected to. This is also the same as the PI number. Usually 0 or 1.
+ *
+ *	NOTE!!!: the value of the bits in the cpu physical id (SAPICid or LID) of a cpu has no 
+ *	significance. The SAPIC id (LID) is a 16-bit cookie that has meaning only to the PROM.
+ *
+ *
+ * The macros convert between cpu physical ids & slice/nasid/cnodeid.
+ * These terms are described below:
+ *
+ *
+ * Brick
+ *          -----   -----           -----   -----       CPU
+ *          | 0 |   | 1 |           | 0 |   | 1 |       SLICE
+ *          -----   -----           -----   -----
+ *            |       |               |       |
+ *            |       |               |       |
+ *          0 |       | 2           0 |       | 2       FSB SLOT
+ *             -------                 -------  
+ *                |                       |
+ *                |                       |
+ *                |                       |
+ *             ------------      -------------
+ *             |          |      |           |
+ *             |    SHUB  |      |   SHUB    |        NASID   (0..MAX_NASIDS)
+ *             |          |----- |           |        CNODEID (0..num_compact_nodes-1)
+ *             |          |      |           |
+ *             |          |      |           |
+ *             ------------      -------------
+ *                   |                 |
+ *                           
+ *
+ */
+
+#define get_node_number(addr)			NASID_GET(addr)
+
+/*
+ * NOTE: on non-MP systems, only cpuid 0 exists
+ */
+
+extern short physical_node_map[];	/* indexed by nasid to get cnode */
+
+/*
+ * Macros for retrieving info about current cpu
+ */
+#define get_nasid()	(sn_nodepda->phys_cpuid[smp_processor_id()].nasid)
+#define get_subnode()	(sn_nodepda->phys_cpuid[smp_processor_id()].subnode)
+#define get_slice()	(sn_nodepda->phys_cpuid[smp_processor_id()].slice)
+#define get_cnode()	(sn_nodepda->phys_cpuid[smp_processor_id()].cnode)
+#define get_sapicid()	((ia64_getreg(_IA64_REG_CR_LID) >> 16) & 0xffff)
+
+/*
+ * Macros for retrieving info about an arbitrary cpu
+ *	cpuid - logical cpu id
+ */
+#define cpuid_to_nasid(cpuid)		(sn_nodepda->phys_cpuid[cpuid].nasid)
+#define cpuid_to_subnode(cpuid)		(sn_nodepda->phys_cpuid[cpuid].subnode)
+#define cpuid_to_slice(cpuid)		(sn_nodepda->phys_cpuid[cpuid].slice)
+
+
+/*
+ * Dont use the following in performance critical code. They require scans
+ * of potentially large tables.
+ */
+extern int nasid_slice_to_cpuid(int, int);
+
+/*
+ * cnodeid_to_nasid - convert a cnodeid to a NASID
+ */
+#define cnodeid_to_nasid(cnodeid)	(sn_cnodeid_to_nasid[cnodeid])
+ 
+/*
+ * nasid_to_cnodeid - convert a NASID to a cnodeid
+ */
+#define nasid_to_cnodeid(nasid)		(physical_node_map[nasid])
+
+/*
+ * partition_coherence_id - get the coherence ID of the current partition
+ */
+extern u8 sn_coherency_id;
+#define partition_coherence_id()	(sn_coherency_id)
+
+#endif /* _ASM_IA64_SN_SN_CPUID_H */
+
diff --git a/arch/ia64/include/asm/sn/sn_feature_sets.h b/arch/ia64/include/asm/sn/sn_feature_sets.h
new file mode 100644
index 00000000000..8e83ac117ac
--- /dev/null
+++ b/arch/ia64/include/asm/sn/sn_feature_sets.h
@@ -0,0 +1,58 @@
+#ifndef _ASM_IA64_SN_FEATURE_SETS_H
+#define _ASM_IA64_SN_FEATURE_SETS_H
+
+/*
+ * SN PROM Features
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2005-2006 Silicon Graphics, Inc.  All rights reserved.
+ */
+
+
+/* --------------------- PROM Features -----------------------------*/
+extern int sn_prom_feature_available(int id);
+
+#define MAX_PROM_FEATURE_SETS			2
+
+/*
+ * The following defines features that may or may not be supported by the
+ * current PROM. The OS uses sn_prom_feature_available(feature) to test for
+ * the presence of a PROM feature. Down rev (old) PROMs will always test
+ * "false" for new features.
+ *
+ * Use:
+ * 		if (sn_prom_feature_available(PRF_XXX))
+ * 			...
+ */
+
+#define PRF_PAL_CACHE_FLUSH_SAFE	0
+#define PRF_DEVICE_FLUSH_LIST		1
+#define PRF_HOTPLUG_SUPPORT		2
+#define PRF_CPU_DISABLE_SUPPORT		3
+
+/* --------------------- OS Features -------------------------------*/
+
+/*
+ * The following defines OS features that are optionally present in
+ * the operating system.
+ * During boot, PROM is notified of these features via a series of calls:
+ *
+ * 		ia64_sn_set_os_feature(feature1);
+ *
+ * Once enabled, a feature cannot be disabled.
+ *
+ * By default, features are disabled unless explicitly enabled.
+ *
+ * These defines must be kept in sync with the corresponding
+ * PROM definitions in feature_sets.h.
+ */
+#define  OSF_MCA_SLV_TO_OS_INIT_SLV	0
+#define  OSF_FEAT_LOG_SBES		1
+#define  OSF_ACPI_ENABLE		2
+#define  OSF_PCISEGMENT_ENABLE		3
+
+
+#endif /* _ASM_IA64_SN_FEATURE_SETS_H */
diff --git a/arch/ia64/include/asm/sn/sn_sal.h b/arch/ia64/include/asm/sn/sn_sal.h
new file mode 100644
index 00000000000..1f5ff470a5a
--- /dev/null
+++ b/arch/ia64/include/asm/sn/sn_sal.h
@@ -0,0 +1,1233 @@
+#ifndef _ASM_IA64_SN_SN_SAL_H
+#define _ASM_IA64_SN_SN_SAL_H
+
+/*
+ * System Abstraction Layer definitions for IA64
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.  All rights reserved.
+ */
+
+
+#include <asm/sal.h>
+#include <asm/sn/sn_cpuid.h>
+#include <asm/sn/arch.h>
+#include <asm/sn/geo.h>
+#include <asm/sn/nodepda.h>
+#include <asm/sn/shub_mmr.h>
+
+// SGI Specific Calls
+#define  SN_SAL_POD_MODE                           0x02000001
+#define  SN_SAL_SYSTEM_RESET                       0x02000002
+#define  SN_SAL_PROBE                              0x02000003
+#define  SN_SAL_GET_MASTER_NASID                   0x02000004
+#define	 SN_SAL_GET_KLCONFIG_ADDR		   0x02000005
+#define  SN_SAL_LOG_CE				   0x02000006
+#define  SN_SAL_REGISTER_CE			   0x02000007
+#define  SN_SAL_GET_PARTITION_ADDR		   0x02000009
+#define  SN_SAL_XP_ADDR_REGION			   0x0200000f
+#define  SN_SAL_NO_FAULT_ZONE_VIRTUAL		   0x02000010
+#define  SN_SAL_NO_FAULT_ZONE_PHYSICAL		   0x02000011
+#define  SN_SAL_PRINT_ERROR			   0x02000012
+#define  SN_SAL_REGISTER_PMI_HANDLER		   0x02000014
+#define  SN_SAL_SET_ERROR_HANDLING_FEATURES	   0x0200001a	// reentrant
+#define  SN_SAL_GET_FIT_COMPT			   0x0200001b	// reentrant
+#define  SN_SAL_GET_SAPIC_INFO                     0x0200001d
+#define  SN_SAL_GET_SN_INFO                        0x0200001e
+#define  SN_SAL_CONSOLE_PUTC                       0x02000021
+#define  SN_SAL_CONSOLE_GETC                       0x02000022
+#define  SN_SAL_CONSOLE_PUTS                       0x02000023
+#define  SN_SAL_CONSOLE_GETS                       0x02000024
+#define  SN_SAL_CONSOLE_GETS_TIMEOUT               0x02000025
+#define  SN_SAL_CONSOLE_POLL                       0x02000026
+#define  SN_SAL_CONSOLE_INTR                       0x02000027
+#define  SN_SAL_CONSOLE_PUTB			   0x02000028
+#define  SN_SAL_CONSOLE_XMIT_CHARS		   0x0200002a
+#define  SN_SAL_CONSOLE_READC			   0x0200002b
+#define  SN_SAL_SYSCTL_OP			   0x02000030
+#define  SN_SAL_SYSCTL_MODID_GET	           0x02000031
+#define  SN_SAL_SYSCTL_GET                         0x02000032
+#define  SN_SAL_SYSCTL_IOBRICK_MODULE_GET          0x02000033
+#define  SN_SAL_SYSCTL_IO_PORTSPEED_GET            0x02000035
+#define  SN_SAL_SYSCTL_SLAB_GET                    0x02000036
+#define  SN_SAL_BUS_CONFIG		   	   0x02000037
+#define  SN_SAL_SYS_SERIAL_GET			   0x02000038
+#define  SN_SAL_PARTITION_SERIAL_GET		   0x02000039
+#define  SN_SAL_SYSCTL_PARTITION_GET               0x0200003a
+#define  SN_SAL_SYSTEM_POWER_DOWN		   0x0200003b
+#define  SN_SAL_GET_MASTER_BASEIO_NASID		   0x0200003c
+#define  SN_SAL_COHERENCE                          0x0200003d
+#define  SN_SAL_MEMPROTECT                         0x0200003e
+#define  SN_SAL_SYSCTL_FRU_CAPTURE		   0x0200003f
+
+#define  SN_SAL_SYSCTL_IOBRICK_PCI_OP		   0x02000042	// reentrant
+#define	 SN_SAL_IROUTER_OP			   0x02000043
+#define  SN_SAL_SYSCTL_EVENT                       0x02000044
+#define  SN_SAL_IOIF_INTERRUPT			   0x0200004a
+#define  SN_SAL_HWPERF_OP			   0x02000050   // lock
+#define  SN_SAL_IOIF_ERROR_INTERRUPT		   0x02000051
+#define  SN_SAL_IOIF_PCI_SAFE			   0x02000052
+#define  SN_SAL_IOIF_SLOT_ENABLE		   0x02000053
+#define  SN_SAL_IOIF_SLOT_DISABLE		   0x02000054
+#define  SN_SAL_IOIF_GET_HUBDEV_INFO		   0x02000055
+#define  SN_SAL_IOIF_GET_PCIBUS_INFO		   0x02000056
+#define  SN_SAL_IOIF_GET_PCIDEV_INFO		   0x02000057
+#define  SN_SAL_IOIF_GET_WIDGET_DMAFLUSH_LIST	   0x02000058	// deprecated
+#define  SN_SAL_IOIF_GET_DEVICE_DMAFLUSH_LIST	   0x0200005a
+
+#define SN_SAL_IOIF_INIT			   0x0200005f
+#define SN_SAL_HUB_ERROR_INTERRUPT		   0x02000060
+#define SN_SAL_BTE_RECOVER			   0x02000061
+#define SN_SAL_RESERVED_DO_NOT_USE		   0x02000062
+#define SN_SAL_IOIF_GET_PCI_TOPOLOGY		   0x02000064
+
+#define  SN_SAL_GET_PROM_FEATURE_SET		   0x02000065
+#define  SN_SAL_SET_OS_FEATURE_SET		   0x02000066
+#define  SN_SAL_INJECT_ERROR			   0x02000067
+#define  SN_SAL_SET_CPU_NUMBER			   0x02000068
+
+#define  SN_SAL_KERNEL_LAUNCH_EVENT		   0x02000069
+#define  SN_SAL_WATCHLIST_ALLOC			   0x02000070
+#define  SN_SAL_WATCHLIST_FREE			   0x02000071
+
+/*
+ * Service-specific constants
+ */
+
+/* Console interrupt manipulation */
+	/* action codes */
+#define SAL_CONSOLE_INTR_OFF    0       /* turn the interrupt off */
+#define SAL_CONSOLE_INTR_ON     1       /* turn the interrupt on */
+#define SAL_CONSOLE_INTR_STATUS 2	/* retrieve the interrupt status */
+	/* interrupt specification & status return codes */
+#define SAL_CONSOLE_INTR_XMIT	1	/* output interrupt */
+#define SAL_CONSOLE_INTR_RECV	2	/* input interrupt */
+
+/* interrupt handling */
+#define SAL_INTR_ALLOC		1
+#define SAL_INTR_FREE		2
+#define SAL_INTR_REDIRECT	3
+
+/*
+ * operations available on the generic SN_SAL_SYSCTL_OP
+ * runtime service
+ */
+#define SAL_SYSCTL_OP_IOBOARD		0x0001  /*  retrieve board type */
+#define SAL_SYSCTL_OP_TIO_JLCK_RST      0x0002  /* issue TIO clock reset */
+
+/*
+ * IRouter (i.e. generalized system controller) operations
+ */
+#define SAL_IROUTER_OPEN	0	/* open a subchannel */
+#define SAL_IROUTER_CLOSE	1	/* close a subchannel */
+#define SAL_IROUTER_SEND	2	/* send part of an IRouter packet */
+#define SAL_IROUTER_RECV	3	/* receive part of an IRouter packet */
+#define SAL_IROUTER_INTR_STATUS	4	/* check the interrupt status for
+					 * an open subchannel
+					 */
+#define SAL_IROUTER_INTR_ON	5	/* enable an interrupt */
+#define SAL_IROUTER_INTR_OFF	6	/* disable an interrupt */
+#define SAL_IROUTER_INIT	7	/* initialize IRouter driver */
+
+/* IRouter interrupt mask bits */
+#define SAL_IROUTER_INTR_XMIT	SAL_CONSOLE_INTR_XMIT
+#define SAL_IROUTER_INTR_RECV	SAL_CONSOLE_INTR_RECV
+
+/*
+ * Error Handling Features
+ */
+#define SAL_ERR_FEAT_MCA_SLV_TO_OS_INIT_SLV	0x1	// obsolete
+#define SAL_ERR_FEAT_LOG_SBES			0x2	// obsolete
+#define SAL_ERR_FEAT_MFR_OVERRIDE		0x4
+#define SAL_ERR_FEAT_SBE_THRESHOLD		0xffff0000
+
+/*
+ * SAL Error Codes
+ */
+#define SALRET_MORE_PASSES	1
+#define SALRET_OK		0
+#define SALRET_NOT_IMPLEMENTED	(-1)
+#define SALRET_INVALID_ARG	(-2)
+#define SALRET_ERROR		(-3)
+
+#define SN_SAL_FAKE_PROM			   0x02009999
+
+/**
+  * sn_sal_revision - get the SGI SAL revision number
+  *
+  * The SGI PROM stores its version in the sal_[ab]_rev_(major|minor).
+  * This routine simply extracts the major and minor values and
+  * presents them in a u32 format.
+  *
+  * For example, version 4.05 would be represented at 0x0405.
+  */
+static inline u32
+sn_sal_rev(void)
+{
+	struct ia64_sal_systab *systab = __va(efi.sal_systab);
+
+	return (u32)(systab->sal_b_rev_major << 8 | systab->sal_b_rev_minor);
+}
+
+/*
+ * Returns the master console nasid, if the call fails, return an illegal
+ * value.
+ */
+static inline u64
+ia64_sn_get_console_nasid(void)
+{
+	struct ia64_sal_retval ret_stuff;
+
+	ret_stuff.status = 0;
+	ret_stuff.v0 = 0;
+	ret_stuff.v1 = 0;
+	ret_stuff.v2 = 0;
+	SAL_CALL(ret_stuff, SN_SAL_GET_MASTER_NASID, 0, 0, 0, 0, 0, 0, 0);
+
+	if (ret_stuff.status < 0)
+		return ret_stuff.status;
+
+	/* Master console nasid is in 'v0' */
+	return ret_stuff.v0;
+}
+
+/*
+ * Returns the master baseio nasid, if the call fails, return an illegal
+ * value.
+ */
+static inline u64
+ia64_sn_get_master_baseio_nasid(void)
+{
+	struct ia64_sal_retval ret_stuff;
+
+	ret_stuff.status = 0;
+	ret_stuff.v0 = 0;
+	ret_stuff.v1 = 0;
+	ret_stuff.v2 = 0;
+	SAL_CALL(ret_stuff, SN_SAL_GET_MASTER_BASEIO_NASID, 0, 0, 0, 0, 0, 0, 0);
+
+	if (ret_stuff.status < 0)
+		return ret_stuff.status;
+
+	/* Master baseio nasid is in 'v0' */
+	return ret_stuff.v0;
+}
+
+static inline void *
+ia64_sn_get_klconfig_addr(nasid_t nasid)
+{
+	struct ia64_sal_retval ret_stuff;
+
+	ret_stuff.status = 0;
+	ret_stuff.v0 = 0;
+	ret_stuff.v1 = 0;
+	ret_stuff.v2 = 0;
+	SAL_CALL(ret_stuff, SN_SAL_GET_KLCONFIG_ADDR, (u64)nasid, 0, 0, 0, 0, 0, 0);
+	return ret_stuff.v0 ? __va(ret_stuff.v0) : NULL;
+}
+
+/*
+ * Returns the next console character.
+ */
+static inline u64
+ia64_sn_console_getc(int *ch)
+{
+	struct ia64_sal_retval ret_stuff;
+
+	ret_stuff.status = 0;
+	ret_stuff.v0 = 0;
+	ret_stuff.v1 = 0;
+	ret_stuff.v2 = 0;
+	SAL_CALL_NOLOCK(ret_stuff, SN_SAL_CONSOLE_GETC, 0, 0, 0, 0, 0, 0, 0);
+
+	/* character is in 'v0' */
+	*ch = (int)ret_stuff.v0;
+
+	return ret_stuff.status;
+}
+
+/*
+ * Read a character from the SAL console device, after a previous interrupt
+ * or poll operation has given us to know that a character is available
+ * to be read.
+ */
+static inline u64
+ia64_sn_console_readc(void)
+{
+	struct ia64_sal_retval ret_stuff;
+
+	ret_stuff.status = 0;
+	ret_stuff.v0 = 0;
+	ret_stuff.v1 = 0;
+	ret_stuff.v2 = 0;
+	SAL_CALL_NOLOCK(ret_stuff, SN_SAL_CONSOLE_READC, 0, 0, 0, 0, 0, 0, 0);
+
+	/* character is in 'v0' */
+	return ret_stuff.v0;
+}
+
+/*
+ * Sends the given character to the console.
+ */
+static inline u64
+ia64_sn_console_putc(char ch)
+{
+	struct ia64_sal_retval ret_stuff;
+
+	ret_stuff.status = 0;
+	ret_stuff.v0 = 0;
+	ret_stuff.v1 = 0;
+	ret_stuff.v2 = 0;
+	SAL_CALL_NOLOCK(ret_stuff, SN_SAL_CONSOLE_PUTC, (u64)ch, 0, 0, 0, 0, 0, 0);
+
+	return ret_stuff.status;
+}
+
+/*
+ * Sends the given buffer to the console.
+ */
+static inline u64
+ia64_sn_console_putb(const char *buf, int len)
+{
+	struct ia64_sal_retval ret_stuff;
+
+	ret_stuff.status = 0;
+	ret_stuff.v0 = 0; 
+	ret_stuff.v1 = 0;
+	ret_stuff.v2 = 0;
+	SAL_CALL_NOLOCK(ret_stuff, SN_SAL_CONSOLE_PUTB, (u64)buf, (u64)len, 0, 0, 0, 0, 0);
+
+	if ( ret_stuff.status == 0 ) {
+		return ret_stuff.v0;
+	}
+	return (u64)0;
+}
+
+/*
+ * Print a platform error record
+ */
+static inline u64
+ia64_sn_plat_specific_err_print(int (*hook)(const char*, ...), char *rec)
+{
+	struct ia64_sal_retval ret_stuff;
+
+	ret_stuff.status = 0;
+	ret_stuff.v0 = 0;
+	ret_stuff.v1 = 0;
+	ret_stuff.v2 = 0;
+	SAL_CALL_REENTRANT(ret_stuff, SN_SAL_PRINT_ERROR, (u64)hook, (u64)rec, 0, 0, 0, 0, 0);
+
+	return ret_stuff.status;
+}
+
+/*
+ * Check for Platform errors
+ */
+static inline u64
+ia64_sn_plat_cpei_handler(void)
+{
+	struct ia64_sal_retval ret_stuff;
+
+	ret_stuff.status = 0;
+	ret_stuff.v0 = 0;
+	ret_stuff.v1 = 0;
+	ret_stuff.v2 = 0;
+	SAL_CALL_NOLOCK(ret_stuff, SN_SAL_LOG_CE, 0, 0, 0, 0, 0, 0, 0);
+
+	return ret_stuff.status;
+}
+
+/*
+ * Set Error Handling Features	(Obsolete)
+ */
+static inline u64
+ia64_sn_plat_set_error_handling_features(void)
+{
+	struct ia64_sal_retval ret_stuff;
+
+	ret_stuff.status = 0;
+	ret_stuff.v0 = 0;
+	ret_stuff.v1 = 0;
+	ret_stuff.v2 = 0;
+	SAL_CALL_REENTRANT(ret_stuff, SN_SAL_SET_ERROR_HANDLING_FEATURES,
+		SAL_ERR_FEAT_LOG_SBES,
+		0, 0, 0, 0, 0, 0);
+
+	return ret_stuff.status;
+}
+
+/*
+ * Checks for console input.
+ */
+static inline u64
+ia64_sn_console_check(int *result)
+{
+	struct ia64_sal_retval ret_stuff;
+
+	ret_stuff.status = 0;
+	ret_stuff.v0 = 0;
+	ret_stuff.v1 = 0;
+	ret_stuff.v2 = 0;
+	SAL_CALL_NOLOCK(ret_stuff, SN_SAL_CONSOLE_POLL, 0, 0, 0, 0, 0, 0, 0);
+
+	/* result is in 'v0' */
+	*result = (int)ret_stuff.v0;
+
+	return ret_stuff.status;
+}
+
+/*
+ * Checks console interrupt status
+ */
+static inline u64
+ia64_sn_console_intr_status(void)
+{
+	struct ia64_sal_retval ret_stuff;
+
+	ret_stuff.status = 0;
+	ret_stuff.v0 = 0;
+	ret_stuff.v1 = 0;
+	ret_stuff.v2 = 0;
+	SAL_CALL_NOLOCK(ret_stuff, SN_SAL_CONSOLE_INTR, 
+		 0, SAL_CONSOLE_INTR_STATUS,
+		 0, 0, 0, 0, 0);
+
+	if (ret_stuff.status == 0) {
+	    return ret_stuff.v0;
+	}
+	
+	return 0;
+}
+
+/*
+ * Enable an interrupt on the SAL console device.
+ */
+static inline void
+ia64_sn_console_intr_enable(u64 intr)
+{
+	struct ia64_sal_retval ret_stuff;
+
+	ret_stuff.status = 0;
+	ret_stuff.v0 = 0;
+	ret_stuff.v1 = 0;
+	ret_stuff.v2 = 0;
+	SAL_CALL_NOLOCK(ret_stuff, SN_SAL_CONSOLE_INTR, 
+		 intr, SAL_CONSOLE_INTR_ON,
+		 0, 0, 0, 0, 0);
+}
+
+/*
+ * Disable an interrupt on the SAL console device.
+ */
+static inline void
+ia64_sn_console_intr_disable(u64 intr)
+{
+	struct ia64_sal_retval ret_stuff;
+
+	ret_stuff.status = 0;
+	ret_stuff.v0 = 0;
+	ret_stuff.v1 = 0;
+	ret_stuff.v2 = 0;
+	SAL_CALL_NOLOCK(ret_stuff, SN_SAL_CONSOLE_INTR, 
+		 intr, SAL_CONSOLE_INTR_OFF,
+		 0, 0, 0, 0, 0);
+}
+
+/*
+ * Sends a character buffer to the console asynchronously.
+ */
+static inline u64
+ia64_sn_console_xmit_chars(char *buf, int len)
+{
+	struct ia64_sal_retval ret_stuff;
+
+	ret_stuff.status = 0;
+	ret_stuff.v0 = 0;
+	ret_stuff.v1 = 0;
+	ret_stuff.v2 = 0;
+	SAL_CALL_NOLOCK(ret_stuff, SN_SAL_CONSOLE_XMIT_CHARS,
+		 (u64)buf, (u64)len,
+		 0, 0, 0, 0, 0);
+
+	if (ret_stuff.status == 0) {
+	    return ret_stuff.v0;
+	}
+
+	return 0;
+}
+
+/*
+ * Returns the iobrick module Id
+ */
+static inline u64
+ia64_sn_sysctl_iobrick_module_get(nasid_t nasid, int *result)
+{
+	struct ia64_sal_retval ret_stuff;
+
+	ret_stuff.status = 0;
+	ret_stuff.v0 = 0;
+	ret_stuff.v1 = 0;
+	ret_stuff.v2 = 0;
+	SAL_CALL_NOLOCK(ret_stuff, SN_SAL_SYSCTL_IOBRICK_MODULE_GET, nasid, 0, 0, 0, 0, 0, 0);
+
+	/* result is in 'v0' */
+	*result = (int)ret_stuff.v0;
+
+	return ret_stuff.status;
+}
+
+/**
+ * ia64_sn_pod_mode - call the SN_SAL_POD_MODE function
+ *
+ * SN_SAL_POD_MODE actually takes an argument, but it's always
+ * 0 when we call it from the kernel, so we don't have to expose
+ * it to the caller.
+ */
+static inline u64
+ia64_sn_pod_mode(void)
+{
+	struct ia64_sal_retval isrv;
+	SAL_CALL_REENTRANT(isrv, SN_SAL_POD_MODE, 0, 0, 0, 0, 0, 0, 0);
+	if (isrv.status)
+		return 0;
+	return isrv.v0;
+}
+
+/**
+ * ia64_sn_probe_mem - read from memory safely
+ * @addr: address to probe
+ * @size: number bytes to read (1,2,4,8)
+ * @data_ptr: address to store value read by probe (-1 returned if probe fails)
+ *
+ * Call into the SAL to do a memory read.  If the read generates a machine
+ * check, this routine will recover gracefully and return -1 to the caller.
+ * @addr is usually a kernel virtual address in uncached space (i.e. the
+ * address starts with 0xc), but if called in physical mode, @addr should
+ * be a physical address.
+ *
+ * Return values:
+ *  0 - probe successful
+ *  1 - probe failed (generated MCA)
+ *  2 - Bad arg
+ * <0 - PAL error
+ */
+static inline u64
+ia64_sn_probe_mem(long addr, long size, void *data_ptr)
+{
+	struct ia64_sal_retval isrv;
+
+	SAL_CALL(isrv, SN_SAL_PROBE, addr, size, 0, 0, 0, 0, 0);
+
+	if (data_ptr) {
+		switch (size) {
+		case 1:
+			*((u8*)data_ptr) = (u8)isrv.v0;
+			break;
+		case 2:
+			*((u16*)data_ptr) = (u16)isrv.v0;
+			break;
+		case 4:
+			*((u32*)data_ptr) = (u32)isrv.v0;
+			break;
+		case 8:
+			*((u64*)data_ptr) = (u64)isrv.v0;
+			break;
+		default:
+			isrv.status = 2;
+		}
+	}
+	return isrv.status;
+}
+
+/*
+ * Retrieve the system serial number as an ASCII string.
+ */
+static inline u64
+ia64_sn_sys_serial_get(char *buf)
+{
+	struct ia64_sal_retval ret_stuff;
+	SAL_CALL_NOLOCK(ret_stuff, SN_SAL_SYS_SERIAL_GET, buf, 0, 0, 0, 0, 0, 0);
+	return ret_stuff.status;
+}
+
+extern char sn_system_serial_number_string[];
+extern u64 sn_partition_serial_number;
+
+static inline char *
+sn_system_serial_number(void) {
+	if (sn_system_serial_number_string[0]) {
+		return(sn_system_serial_number_string);
+	} else {
+		ia64_sn_sys_serial_get(sn_system_serial_number_string);
+		return(sn_system_serial_number_string);
+	}
+}
+	
+
+/*
+ * Returns a unique id number for this system and partition (suitable for
+ * use with license managers), based in part on the system serial number.
+ */
+static inline u64
+ia64_sn_partition_serial_get(void)
+{
+	struct ia64_sal_retval ret_stuff;
+	ia64_sal_oemcall_reentrant(&ret_stuff, SN_SAL_PARTITION_SERIAL_GET, 0,
+				   0, 0, 0, 0, 0, 0);
+	if (ret_stuff.status != 0)
+	    return 0;
+	return ret_stuff.v0;
+}
+
+static inline u64
+sn_partition_serial_number_val(void) {
+	if (unlikely(sn_partition_serial_number == 0)) {
+		sn_partition_serial_number = ia64_sn_partition_serial_get();
+	}
+	return sn_partition_serial_number;
+}
+
+/*
+ * Returns the partition id of the nasid passed in as an argument,
+ * or INVALID_PARTID if the partition id cannot be retrieved.
+ */
+static inline partid_t
+ia64_sn_sysctl_partition_get(nasid_t nasid)
+{
+	struct ia64_sal_retval ret_stuff;
+	SAL_CALL(ret_stuff, SN_SAL_SYSCTL_PARTITION_GET, nasid,
+		0, 0, 0, 0, 0, 0);
+	if (ret_stuff.status != 0)
+	    return -1;
+	return ((partid_t)ret_stuff.v0);
+}
+
+/*
+ * Returns the physical address of the partition's reserved page through
+ * an iterative number of calls.
+ *
+ * On first call, 'cookie' and 'len' should be set to 0, and 'addr'
+ * set to the nasid of the partition whose reserved page's address is
+ * being sought.
+ * On subsequent calls, pass the values, that were passed back on the
+ * previous call.
+ *
+ * While the return status equals SALRET_MORE_PASSES, keep calling
+ * this function after first copying 'len' bytes starting at 'addr'
+ * into 'buf'. Once the return status equals SALRET_OK, 'addr' will
+ * be the physical address of the partition's reserved page. If the
+ * return status equals neither of these, an error as occurred.
+ */
+static inline s64
+sn_partition_reserved_page_pa(u64 buf, u64 *cookie, u64 *addr, u64 *len)
+{
+	struct ia64_sal_retval rv;
+	ia64_sal_oemcall_reentrant(&rv, SN_SAL_GET_PARTITION_ADDR, *cookie,
+				   *addr, buf, *len, 0, 0, 0);
+	*cookie = rv.v0;
+	*addr = rv.v1;
+	*len = rv.v2;
+	return rv.status;
+}
+
+/*
+ * Register or unregister a physical address range being referenced across
+ * a partition boundary for which certain SAL errors should be scanned for,
+ * cleaned up and ignored.  This is of value for kernel partitioning code only.
+ * Values for the operation argument:
+ *	1 = register this address range with SAL
+ *	0 = unregister this address range with SAL
+ * 
+ * SAL maintains a reference count on an address range in case it is registered
+ * multiple times.
+ * 
+ * On success, returns the reference count of the address range after the SAL
+ * call has performed the current registration/unregistration.  Returns a
+ * negative value if an error occurred.
+ */
+static inline int
+sn_register_xp_addr_region(u64 paddr, u64 len, int operation)
+{
+	struct ia64_sal_retval ret_stuff;
+	ia64_sal_oemcall(&ret_stuff, SN_SAL_XP_ADDR_REGION, paddr, len,
+			 (u64)operation, 0, 0, 0, 0);
+	return ret_stuff.status;
+}
+
+/*
+ * Register or unregister an instruction range for which SAL errors should
+ * be ignored.  If an error occurs while in the registered range, SAL jumps
+ * to return_addr after ignoring the error.  Values for the operation argument:
+ *	1 = register this instruction range with SAL
+ *	0 = unregister this instruction range with SAL
+ *
+ * Returns 0 on success, or a negative value if an error occurred.
+ */
+static inline int
+sn_register_nofault_code(u64 start_addr, u64 end_addr, u64 return_addr,
+			 int virtual, int operation)
+{
+	struct ia64_sal_retval ret_stuff;
+	u64 call;
+	if (virtual) {
+		call = SN_SAL_NO_FAULT_ZONE_VIRTUAL;
+	} else {
+		call = SN_SAL_NO_FAULT_ZONE_PHYSICAL;
+	}
+	ia64_sal_oemcall(&ret_stuff, call, start_addr, end_addr, return_addr,
+			 (u64)1, 0, 0, 0);
+	return ret_stuff.status;
+}
+
+/*
+ * Register or unregister a function to handle a PMI received by a CPU.
+ * Before calling the registered handler, SAL sets r1 to the value that
+ * was passed in as the global_pointer.
+ *
+ * If the handler pointer is NULL, then the currently registered handler
+ * will be unregistered.
+ *
+ * Returns 0 on success, or a negative value if an error occurred.
+ */
+static inline int
+sn_register_pmi_handler(u64 handler, u64 global_pointer)
+{
+	struct ia64_sal_retval ret_stuff;
+	ia64_sal_oemcall(&ret_stuff, SN_SAL_REGISTER_PMI_HANDLER, handler,
+			 global_pointer, 0, 0, 0, 0, 0);
+	return ret_stuff.status;
+}
+
+/*
+ * Change or query the coherence domain for this partition. Each cpu-based
+ * nasid is represented by a bit in an array of 64-bit words:
+ *      0 = not in this partition's coherency domain
+ *      1 = in this partition's coherency domain
+ *
+ * It is not possible for the local system's nasids to be removed from
+ * the coherency domain.  Purpose of the domain arguments:
+ *      new_domain = set the coherence domain to the given nasids
+ *      old_domain = return the current coherence domain
+ *
+ * Returns 0 on success, or a negative value if an error occurred.
+ */
+static inline int
+sn_change_coherence(u64 *new_domain, u64 *old_domain)
+{
+	struct ia64_sal_retval ret_stuff;
+	ia64_sal_oemcall_nolock(&ret_stuff, SN_SAL_COHERENCE, (u64)new_domain,
+				(u64)old_domain, 0, 0, 0, 0, 0);
+	return ret_stuff.status;
+}
+
+/*
+ * Change memory access protections for a physical address range.
+ * nasid_array is not used on Altix, but may be in future architectures.
+ * Available memory protection access classes are defined after the function.
+ */
+static inline int
+sn_change_memprotect(u64 paddr, u64 len, u64 perms, u64 *nasid_array)
+{
+	struct ia64_sal_retval ret_stuff;
+
+	ia64_sal_oemcall_nolock(&ret_stuff, SN_SAL_MEMPROTECT, paddr, len,
+				(u64)nasid_array, perms, 0, 0, 0);
+	return ret_stuff.status;
+}
+#define SN_MEMPROT_ACCESS_CLASS_0		0x14a080
+#define SN_MEMPROT_ACCESS_CLASS_1		0x2520c2
+#define SN_MEMPROT_ACCESS_CLASS_2		0x14a1ca
+#define SN_MEMPROT_ACCESS_CLASS_3		0x14a290
+#define SN_MEMPROT_ACCESS_CLASS_6		0x084080
+#define SN_MEMPROT_ACCESS_CLASS_7		0x021080
+
+/*
+ * Turns off system power.
+ */
+static inline void
+ia64_sn_power_down(void)
+{
+	struct ia64_sal_retval ret_stuff;
+	SAL_CALL(ret_stuff, SN_SAL_SYSTEM_POWER_DOWN, 0, 0, 0, 0, 0, 0, 0);
+	while(1)
+		cpu_relax();
+	/* never returns */
+}
+
+/**
+ * ia64_sn_fru_capture - tell the system controller to capture hw state
+ *
+ * This routine will call the SAL which will tell the system controller(s)
+ * to capture hw mmr information from each SHub in the system.
+ */
+static inline u64
+ia64_sn_fru_capture(void)
+{
+        struct ia64_sal_retval isrv;
+        SAL_CALL(isrv, SN_SAL_SYSCTL_FRU_CAPTURE, 0, 0, 0, 0, 0, 0, 0);
+        if (isrv.status)
+                return 0;
+        return isrv.v0;
+}
+
+/*
+ * Performs an operation on a PCI bus or slot -- power up, power down
+ * or reset.
+ */
+static inline u64
+ia64_sn_sysctl_iobrick_pci_op(nasid_t n, u64 connection_type, 
+			      u64 bus, char slot, 
+			      u64 action)
+{
+	struct ia64_sal_retval rv = {0, 0, 0, 0};
+
+	SAL_CALL_NOLOCK(rv, SN_SAL_SYSCTL_IOBRICK_PCI_OP, connection_type, n, action,
+		 bus, (u64) slot, 0, 0);
+	if (rv.status)
+	    	return rv.v0;
+	return 0;
+}
+
+
+/*
+ * Open a subchannel for sending arbitrary data to the system
+ * controller network via the system controller device associated with
+ * 'nasid'.  Return the subchannel number or a negative error code.
+ */
+static inline int
+ia64_sn_irtr_open(nasid_t nasid)
+{
+	struct ia64_sal_retval rv;
+	SAL_CALL_REENTRANT(rv, SN_SAL_IROUTER_OP, SAL_IROUTER_OPEN, nasid,
+			   0, 0, 0, 0, 0);
+	return (int) rv.v0;
+}
+
+/*
+ * Close system controller subchannel 'subch' previously opened on 'nasid'.
+ */
+static inline int
+ia64_sn_irtr_close(nasid_t nasid, int subch)
+{
+	struct ia64_sal_retval rv;
+	SAL_CALL_REENTRANT(rv, SN_SAL_IROUTER_OP, SAL_IROUTER_CLOSE,
+			   (u64) nasid, (u64) subch, 0, 0, 0, 0);
+	return (int) rv.status;
+}
+
+/*
+ * Read data from system controller associated with 'nasid' on
+ * subchannel 'subch'.  The buffer to be filled is pointed to by
+ * 'buf', and its capacity is in the integer pointed to by 'len'.  The
+ * referent of 'len' is set to the number of bytes read by the SAL
+ * call.  The return value is either SALRET_OK (for bytes read) or
+ * SALRET_ERROR (for error or "no data available").
+ */
+static inline int
+ia64_sn_irtr_recv(nasid_t nasid, int subch, char *buf, int *len)
+{
+	struct ia64_sal_retval rv;
+	SAL_CALL_REENTRANT(rv, SN_SAL_IROUTER_OP, SAL_IROUTER_RECV,
+			   (u64) nasid, (u64) subch, (u64) buf, (u64) len,
+			   0, 0);
+	return (int) rv.status;
+}
+
+/*
+ * Write data to the system controller network via the system
+ * controller associated with 'nasid' on suchannel 'subch'.  The
+ * buffer to be written out is pointed to by 'buf', and 'len' is the
+ * number of bytes to be written.  The return value is either the
+ * number of bytes written (which could be zero) or a negative error
+ * code.
+ */
+static inline int
+ia64_sn_irtr_send(nasid_t nasid, int subch, char *buf, int len)
+{
+	struct ia64_sal_retval rv;
+	SAL_CALL_REENTRANT(rv, SN_SAL_IROUTER_OP, SAL_IROUTER_SEND,
+			   (u64) nasid, (u64) subch, (u64) buf, (u64) len,
+			   0, 0);
+	return (int) rv.v0;
+}
+
+/*
+ * Check whether any interrupts are pending for the system controller
+ * associated with 'nasid' and its subchannel 'subch'.  The return
+ * value is a mask of pending interrupts (SAL_IROUTER_INTR_XMIT and/or
+ * SAL_IROUTER_INTR_RECV).
+ */
+static inline int
+ia64_sn_irtr_intr(nasid_t nasid, int subch)
+{
+	struct ia64_sal_retval rv;
+	SAL_CALL_REENTRANT(rv, SN_SAL_IROUTER_OP, SAL_IROUTER_INTR_STATUS,
+			   (u64) nasid, (u64) subch, 0, 0, 0, 0);
+	return (int) rv.v0;
+}
+
+/*
+ * Enable the interrupt indicated by the intr parameter (either
+ * SAL_IROUTER_INTR_XMIT or SAL_IROUTER_INTR_RECV).
+ */
+static inline int
+ia64_sn_irtr_intr_enable(nasid_t nasid, int subch, u64 intr)
+{
+	struct ia64_sal_retval rv;
+	SAL_CALL_REENTRANT(rv, SN_SAL_IROUTER_OP, SAL_IROUTER_INTR_ON,
+			   (u64) nasid, (u64) subch, intr, 0, 0, 0);
+	return (int) rv.v0;
+}
+
+/*
+ * Disable the interrupt indicated by the intr parameter (either
+ * SAL_IROUTER_INTR_XMIT or SAL_IROUTER_INTR_RECV).
+ */
+static inline int
+ia64_sn_irtr_intr_disable(nasid_t nasid, int subch, u64 intr)
+{
+	struct ia64_sal_retval rv;
+	SAL_CALL_REENTRANT(rv, SN_SAL_IROUTER_OP, SAL_IROUTER_INTR_OFF,
+			   (u64) nasid, (u64) subch, intr, 0, 0, 0);
+	return (int) rv.v0;
+}
+
+/*
+ * Set up a node as the point of contact for system controller
+ * environmental event delivery.
+ */
+static inline int
+ia64_sn_sysctl_event_init(nasid_t nasid)
+{
+        struct ia64_sal_retval rv;
+        SAL_CALL_REENTRANT(rv, SN_SAL_SYSCTL_EVENT, (u64) nasid,
+			   0, 0, 0, 0, 0, 0);
+        return (int) rv.v0;
+}
+
+/*
+ * Ask the system controller on the specified nasid to reset
+ * the CX corelet clock.  Only valid on TIO nodes.
+ */
+static inline int
+ia64_sn_sysctl_tio_clock_reset(nasid_t nasid)
+{
+	struct ia64_sal_retval rv;
+	SAL_CALL_REENTRANT(rv, SN_SAL_SYSCTL_OP, SAL_SYSCTL_OP_TIO_JLCK_RST,
+			nasid, 0, 0, 0, 0, 0);
+	if (rv.status != 0)
+		return (int)rv.status;
+	if (rv.v0 != 0)
+		return (int)rv.v0;
+
+	return 0;
+}
+
+/*
+ * Get the associated ioboard type for a given nasid.
+ */
+static inline long
+ia64_sn_sysctl_ioboard_get(nasid_t nasid, u16 *ioboard)
+{
+	struct ia64_sal_retval isrv;
+	SAL_CALL_REENTRANT(isrv, SN_SAL_SYSCTL_OP, SAL_SYSCTL_OP_IOBOARD,
+			   nasid, 0, 0, 0, 0, 0);
+	if (isrv.v0 != 0) {
+		*ioboard = isrv.v0;
+		return isrv.status;
+	}
+	if (isrv.v1 != 0) {
+		*ioboard = isrv.v1;
+		return isrv.status;
+	}
+
+	return isrv.status;
+}
+
+/**
+ * ia64_sn_get_fit_compt - read a FIT entry from the PROM header
+ * @nasid: NASID of node to read
+ * @index: FIT entry index to be retrieved (0..n)
+ * @fitentry: 16 byte buffer where FIT entry will be stored.
+ * @banbuf: optional buffer for retrieving banner
+ * @banlen: length of banner buffer
+ *
+ * Access to the physical PROM chips needs to be serialized since reads and
+ * writes can't occur at the same time, so we need to call into the SAL when
+ * we want to look at the FIT entries on the chips.
+ *
+ * Returns:
+ *	%SALRET_OK if ok
+ *	%SALRET_INVALID_ARG if index too big
+ *	%SALRET_NOT_IMPLEMENTED if running on older PROM
+ *	??? if nasid invalid OR banner buffer not large enough
+ */
+static inline int
+ia64_sn_get_fit_compt(u64 nasid, u64 index, void *fitentry, void *banbuf,
+		      u64 banlen)
+{
+	struct ia64_sal_retval rv;
+	SAL_CALL_NOLOCK(rv, SN_SAL_GET_FIT_COMPT, nasid, index, fitentry,
+			banbuf, banlen, 0, 0);
+	return (int) rv.status;
+}
+
+/*
+ * Initialize the SAL components of the system controller
+ * communication driver; specifically pass in a sizable buffer that
+ * can be used for allocation of subchannel queues as new subchannels
+ * are opened.  "buf" points to the buffer, and "len" specifies its
+ * length.
+ */
+static inline int
+ia64_sn_irtr_init(nasid_t nasid, void *buf, int len)
+{
+	struct ia64_sal_retval rv;
+	SAL_CALL_REENTRANT(rv, SN_SAL_IROUTER_OP, SAL_IROUTER_INIT,
+			   (u64) nasid, (u64) buf, (u64) len, 0, 0, 0);
+	return (int) rv.status;
+}
+
+/*
+ * Returns the nasid, subnode & slice corresponding to a SAPIC ID
+ *
+ *  In:
+ *	arg0 - SN_SAL_GET_SAPIC_INFO
+ *	arg1 - sapicid (lid >> 16) 
+ *  Out:
+ *	v0 - nasid
+ *	v1 - subnode
+ *	v2 - slice
+ */
+static inline u64
+ia64_sn_get_sapic_info(int sapicid, int *nasid, int *subnode, int *slice)
+{
+	struct ia64_sal_retval ret_stuff;
+
+	ret_stuff.status = 0;
+	ret_stuff.v0 = 0;
+	ret_stuff.v1 = 0;
+	ret_stuff.v2 = 0;
+	SAL_CALL_NOLOCK(ret_stuff, SN_SAL_GET_SAPIC_INFO, sapicid, 0, 0, 0, 0, 0, 0);
+
+/***** BEGIN HACK - temp til old proms no longer supported ********/
+	if (ret_stuff.status == SALRET_NOT_IMPLEMENTED) {
+		if (nasid) *nasid = sapicid & 0xfff;
+		if (subnode) *subnode = (sapicid >> 13) & 1;
+		if (slice) *slice = (sapicid >> 12) & 3;
+		return 0;
+	}
+/***** END HACK *******/
+
+	if (ret_stuff.status < 0)
+		return ret_stuff.status;
+
+	if (nasid) *nasid = (int) ret_stuff.v0;
+	if (subnode) *subnode = (int) ret_stuff.v1;
+	if (slice) *slice = (int) ret_stuff.v2;
+	return 0;
+}
+ 
+/*
+ * Returns information about the HUB/SHUB.
+ *  In:
+ *	arg0 - SN_SAL_GET_SN_INFO
+ * 	arg1 - 0 (other values reserved for future use)
+ *  Out:
+ *	v0 
+ *		[7:0]   - shub type (0=shub1, 1=shub2)
+ *		[15:8]  - Log2 max number of nodes in entire system (includes
+ *			  C-bricks, I-bricks, etc)
+ *		[23:16] - Log2 of nodes per sharing domain			 
+ * 		[31:24] - partition ID
+ * 		[39:32] - coherency_id
+ * 		[47:40] - regionsize
+ *	v1 
+ *		[15:0]  - nasid mask (ex., 0x7ff for 11 bit nasid)
+ *	 	[23:15] - bit position of low nasid bit
+ */
+static inline u64
+ia64_sn_get_sn_info(int fc, u8 *shubtype, u16 *nasid_bitmask, u8 *nasid_shift, 
+		u8 *systemsize, u8 *sharing_domain_size, u8 *partid, u8 *coher, u8 *reg)
+{
+	struct ia64_sal_retval ret_stuff;
+
+	ret_stuff.status = 0;
+	ret_stuff.v0 = 0;
+	ret_stuff.v1 = 0;
+	ret_stuff.v2 = 0;
+	SAL_CALL_NOLOCK(ret_stuff, SN_SAL_GET_SN_INFO, fc, 0, 0, 0, 0, 0, 0);
+
+/***** BEGIN HACK - temp til old proms no longer supported ********/
+	if (ret_stuff.status == SALRET_NOT_IMPLEMENTED) {
+		int nasid = get_sapicid() & 0xfff;
+#define SH_SHUB_ID_NODES_PER_BIT_MASK 0x001f000000000000UL
+#define SH_SHUB_ID_NODES_PER_BIT_SHFT 48
+		if (shubtype) *shubtype = 0;
+		if (nasid_bitmask) *nasid_bitmask = 0x7ff;
+		if (nasid_shift) *nasid_shift = 38;
+		if (systemsize) *systemsize = 10;
+		if (sharing_domain_size) *sharing_domain_size = 8;
+		if (partid) *partid = ia64_sn_sysctl_partition_get(nasid);
+		if (coher) *coher = nasid >> 9;
+		if (reg) *reg = (HUB_L((u64 *) LOCAL_MMR_ADDR(SH1_SHUB_ID)) & SH_SHUB_ID_NODES_PER_BIT_MASK) >>
+			SH_SHUB_ID_NODES_PER_BIT_SHFT;
+		return 0;
+	}
+/***** END HACK *******/
+
+	if (ret_stuff.status < 0)
+		return ret_stuff.status;
+
+	if (shubtype) *shubtype = ret_stuff.v0 & 0xff;
+	if (systemsize) *systemsize = (ret_stuff.v0 >> 8) & 0xff;
+	if (sharing_domain_size) *sharing_domain_size = (ret_stuff.v0 >> 16) & 0xff;
+	if (partid) *partid = (ret_stuff.v0 >> 24) & 0xff;
+	if (coher) *coher = (ret_stuff.v0 >> 32) & 0xff;
+	if (reg) *reg = (ret_stuff.v0 >> 40) & 0xff;
+	if (nasid_bitmask) *nasid_bitmask = (ret_stuff.v1 & 0xffff);
+	if (nasid_shift) *nasid_shift = (ret_stuff.v1 >> 16) & 0xff;
+	return 0;
+}
+ 
+/*
+ * This is the access point to the Altix PROM hardware performance
+ * and status monitoring interface. For info on using this, see
+ * arch/ia64/include/asm/sn/sn2/sn_hwperf.h
+ */
+static inline int
+ia64_sn_hwperf_op(nasid_t nasid, u64 opcode, u64 a0, u64 a1, u64 a2,
+                  u64 a3, u64 a4, int *v0)
+{
+	struct ia64_sal_retval rv;
+	SAL_CALL_NOLOCK(rv, SN_SAL_HWPERF_OP, (u64)nasid,
+		opcode, a0, a1, a2, a3, a4);
+	if (v0)
+		*v0 = (int) rv.v0;
+	return (int) rv.status;
+}
+
+static inline int
+ia64_sn_ioif_get_pci_topology(u64 buf, u64 len)
+{
+	struct ia64_sal_retval rv;
+	SAL_CALL_NOLOCK(rv, SN_SAL_IOIF_GET_PCI_TOPOLOGY, buf, len, 0, 0, 0, 0, 0);
+	return (int) rv.status;
+}
+
+/*
+ * BTE error recovery is implemented in SAL
+ */
+static inline int
+ia64_sn_bte_recovery(nasid_t nasid)
+{
+	struct ia64_sal_retval rv;
+
+	rv.status = 0;
+	SAL_CALL_NOLOCK(rv, SN_SAL_BTE_RECOVER, (u64)nasid, 0, 0, 0, 0, 0, 0);
+	if (rv.status == SALRET_NOT_IMPLEMENTED)
+		return 0;
+	return (int) rv.status;
+}
+
+static inline int
+ia64_sn_is_fake_prom(void)
+{
+	struct ia64_sal_retval rv;
+	SAL_CALL_NOLOCK(rv, SN_SAL_FAKE_PROM, 0, 0, 0, 0, 0, 0, 0);
+	return (rv.status == 0);
+}
+
+static inline int
+ia64_sn_get_prom_feature_set(int set, unsigned long *feature_set)
+{
+	struct ia64_sal_retval rv;
+
+	SAL_CALL_NOLOCK(rv, SN_SAL_GET_PROM_FEATURE_SET, set, 0, 0, 0, 0, 0, 0);
+	if (rv.status != 0)
+		return rv.status;
+	*feature_set = rv.v0;
+	return 0;
+}
+
+static inline int
+ia64_sn_set_os_feature(int feature)
+{
+	struct ia64_sal_retval rv;
+
+	SAL_CALL_NOLOCK(rv, SN_SAL_SET_OS_FEATURE_SET, feature, 0, 0, 0, 0, 0, 0);
+	return rv.status;
+}
+
+static inline int
+sn_inject_error(u64 paddr, u64 *data, u64 *ecc)
+{
+	struct ia64_sal_retval ret_stuff;
+
+	ia64_sal_oemcall_nolock(&ret_stuff, SN_SAL_INJECT_ERROR, paddr, (u64)data,
+				(u64)ecc, 0, 0, 0, 0);
+	return ret_stuff.status;
+}
+
+static inline int
+ia64_sn_set_cpu_number(int cpu)
+{
+	struct ia64_sal_retval rv;
+
+	SAL_CALL_NOLOCK(rv, SN_SAL_SET_CPU_NUMBER, cpu, 0, 0, 0, 0, 0, 0);
+	return rv.status;
+}
+static inline int
+ia64_sn_kernel_launch_event(void)
+{
+ 	struct ia64_sal_retval rv;
+	SAL_CALL_NOLOCK(rv, SN_SAL_KERNEL_LAUNCH_EVENT, 0, 0, 0, 0, 0, 0, 0);
+	return rv.status;
+}
+
+union sn_watchlist_u {
+	u64     val;
+	struct {
+		u64	blade	: 16,
+			size	: 32,
+			filler	: 16;
+	};
+};
+
+static inline int
+sn_mq_watchlist_alloc(int blade, void *mq, unsigned int mq_size,
+				unsigned long *intr_mmr_offset)
+{
+	struct ia64_sal_retval rv;
+	unsigned long addr;
+	union sn_watchlist_u size_blade;
+	int watchlist;
+
+	addr = (unsigned long)mq;
+	size_blade.size = mq_size;
+	size_blade.blade = blade;
+
+	/*
+	 * bios returns watchlist number or negative error number.
+	 */
+	ia64_sal_oemcall_nolock(&rv, SN_SAL_WATCHLIST_ALLOC, addr,
+			size_blade.val, (u64)intr_mmr_offset,
+			(u64)&watchlist, 0, 0, 0);
+	if (rv.status < 0)
+		return rv.status;
+
+	return watchlist;
+}
+
+static inline int
+sn_mq_watchlist_free(int blade, int watchlist_num)
+{
+	struct ia64_sal_retval rv;
+	ia64_sal_oemcall_nolock(&rv, SN_SAL_WATCHLIST_FREE, blade,
+			watchlist_num, 0, 0, 0, 0, 0);
+	return rv.status;
+}
+#endif /* _ASM_IA64_SN_SN_SAL_H */
diff --git a/arch/ia64/include/asm/sn/tioca.h b/arch/ia64/include/asm/sn/tioca.h
new file mode 100644
index 00000000000..666222d7f0f
--- /dev/null
+++ b/arch/ia64/include/asm/sn/tioca.h
@@ -0,0 +1,596 @@
+#ifndef _ASM_IA64_SN_TIO_TIOCA_H
+#define _ASM_IA64_SN_TIO_TIOCA_H
+
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2003-2005 Silicon Graphics, Inc. All rights reserved.
+ */
+
+
+#define TIOCA_PART_NUM	0xE020
+#define TIOCA_MFGR_NUM	0x24
+#define TIOCA_REV_A	0x1
+
+/*
+ * Register layout for TIO:CA.  See below for bitmasks for each register.
+ */
+
+struct tioca {
+	u64	ca_id;				/* 0x000000 */
+	u64	ca_control1;			/* 0x000008 */
+	u64	ca_control2;			/* 0x000010 */
+	u64	ca_status1;			/* 0x000018 */
+	u64	ca_status2;			/* 0x000020 */
+	u64	ca_gart_aperature;		/* 0x000028 */
+	u64	ca_gfx_detach;			/* 0x000030 */
+	u64	ca_inta_dest_addr;		/* 0x000038 */
+	u64	ca_intb_dest_addr;		/* 0x000040 */
+	u64	ca_err_int_dest_addr;		/* 0x000048 */
+	u64	ca_int_status;			/* 0x000050 */
+	u64	ca_int_status_alias;		/* 0x000058 */
+	u64	ca_mult_error;			/* 0x000060 */
+	u64	ca_mult_error_alias;		/* 0x000068 */
+	u64	ca_first_error;			/* 0x000070 */
+	u64	ca_int_mask;			/* 0x000078 */
+	u64	ca_crm_pkterr_type;		/* 0x000080 */
+	u64	ca_crm_pkterr_type_alias;	/* 0x000088 */
+	u64	ca_crm_ct_error_detail_1;	/* 0x000090 */
+	u64	ca_crm_ct_error_detail_2;	/* 0x000098 */
+	u64	ca_crm_tnumto;			/* 0x0000A0 */
+	u64	ca_gart_err;			/* 0x0000A8 */
+	u64	ca_pcierr_type;			/* 0x0000B0 */
+	u64	ca_pcierr_addr;			/* 0x0000B8 */
+
+	u64	ca_pad_0000C0[3];		/* 0x0000{C0..D0} */
+
+	u64	ca_pci_rd_buf_flush;		/* 0x0000D8 */
+	u64	ca_pci_dma_addr_extn;		/* 0x0000E0 */
+	u64	ca_agp_dma_addr_extn;		/* 0x0000E8 */
+	u64	ca_force_inta;			/* 0x0000F0 */
+	u64	ca_force_intb;			/* 0x0000F8 */
+	u64	ca_debug_vector_sel;		/* 0x000100 */
+	u64	ca_debug_mux_core_sel;		/* 0x000108 */
+	u64	ca_debug_mux_pci_sel;		/* 0x000110 */
+	u64	ca_debug_domain_sel;		/* 0x000118 */
+
+	u64	ca_pad_000120[28];		/* 0x0001{20..F8} */
+
+	u64	ca_gart_ptr_table;		/* 0x200 */
+	u64	ca_gart_tlb_addr[8];		/* 0x2{08..40} */
+};
+
+/*
+ * Mask/shift definitions for TIO:CA registers.  The convention here is
+ * to mainly use the names as they appear in the "TIO AEGIS Programmers'
+ * Reference" with a CA_ prefix added.  Some exceptions were made to fix
+ * duplicate field names or to generalize fields that are common to
+ * different registers (ca_debug_mux_core_sel and ca_debug_mux_pci_sel for
+ * example).
+ *
+ * Fields consisting of a single bit have a single #define have a single
+ * macro declaration to mask the bit.  Fields consisting of multiple bits
+ * have two declarations: one to mask the proper bits in a register, and 
+ * a second with the suffix "_SHFT" to identify how far the mask needs to
+ * be shifted right to get its base value.
+ */
+
+/* ==== ca_control1 */
+#define CA_SYS_BIG_END			(1ull << 0)
+#define CA_DMA_AGP_SWAP			(1ull << 1)
+#define CA_DMA_PCI_SWAP			(1ull << 2)
+#define CA_PIO_IO_SWAP			(1ull << 3)
+#define CA_PIO_MEM_SWAP			(1ull << 4)
+#define CA_GFX_WR_SWAP			(1ull << 5)
+#define CA_AGP_FW_ENABLE		(1ull << 6)
+#define CA_AGP_CAL_CYCLE		(0x7ull << 7)
+#define CA_AGP_CAL_CYCLE_SHFT		7
+#define CA_AGP_CAL_PRSCL_BYP		(1ull << 10)
+#define CA_AGP_INIT_CAL_ENB		(1ull << 11)
+#define CA_INJ_ADDR_PERR		(1ull << 12)
+#define CA_INJ_DATA_PERR		(1ull << 13)
+	/* bits 15:14 unused */
+#define CA_PCIM_IO_NBE_AD		(0x7ull << 16)
+#define CA_PCIM_IO_NBE_AD_SHFT		16
+#define CA_PCIM_FAST_BTB_ENB		(1ull << 19)
+	/* bits 23:20 unused */
+#define CA_PIO_ADDR_OFFSET		(0xffull << 24)
+#define CA_PIO_ADDR_OFFSET_SHFT		24
+	/* bits 35:32 unused */
+#define CA_AGPDMA_OP_COMBDELAY		(0x1full << 36)
+#define CA_AGPDMA_OP_COMBDELAY_SHFT	36
+	/* bit 41 unused */
+#define CA_AGPDMA_OP_ENB_COMBDELAY	(1ull << 42)
+#define	CA_PCI_INT_LPCNT		(0xffull << 44)
+#define CA_PCI_INT_LPCNT_SHFT		44
+	/* bits 63:52 unused */
+
+/* ==== ca_control2 */
+#define CA_AGP_LATENCY_TO		(0xffull << 0)
+#define CA_AGP_LATENCY_TO_SHFT		0
+#define CA_PCI_LATENCY_TO		(0xffull << 8)
+#define CA_PCI_LATENCY_TO_SHFT		8
+#define CA_PCI_MAX_RETRY		(0x3ffull << 16)
+#define CA_PCI_MAX_RETRY_SHFT		16
+	/* bits 27:26 unused */
+#define CA_RT_INT_EN			(0x3ull << 28)
+#define CA_RT_INT_EN_SHFT			28
+#define CA_MSI_INT_ENB			(1ull << 30)
+#define CA_PCI_ARB_ERR_ENB		(1ull << 31)
+#define CA_GART_MEM_PARAM		(0x3ull << 32)
+#define CA_GART_MEM_PARAM_SHFT		32
+#define CA_GART_RD_PREFETCH_ENB		(1ull << 34)
+#define CA_GART_WR_PREFETCH_ENB		(1ull << 35)
+#define CA_GART_FLUSH_TLB		(1ull << 36)
+	/* bits 39:37 unused */
+#define CA_CRM_TNUMTO_PERIOD		(0x1fffull << 40)
+#define CA_CRM_TNUMTO_PERIOD_SHFT	40
+	/* bits 55:53 unused */
+#define CA_CRM_TNUMTO_ENB		(1ull << 56)
+#define CA_CRM_PRESCALER_BYP		(1ull << 57)
+	/* bits 59:58 unused */
+#define CA_CRM_MAX_CREDIT		(0x7ull << 60)
+#define CA_CRM_MAX_CREDIT_SHFT		60
+	/* bit 63 unused */
+
+/* ==== ca_status1 */
+#define CA_CORELET_ID			(0x3ull << 0)
+#define CA_CORELET_ID_SHFT		0
+#define CA_INTA_N			(1ull << 2)
+#define CA_INTB_N			(1ull << 3)
+#define CA_CRM_CREDIT_AVAIL		(0x7ull << 4)
+#define CA_CRM_CREDIT_AVAIL_SHFT	4
+	/* bit 7 unused */
+#define CA_CRM_SPACE_AVAIL		(0x7full << 8)
+#define CA_CRM_SPACE_AVAIL_SHFT		8
+	/* bit 15 unused */
+#define CA_GART_TLB_VAL			(0xffull << 16)
+#define CA_GART_TLB_VAL_SHFT		16
+	/* bits 63:24 unused */
+
+/* ==== ca_status2 */
+#define CA_GFX_CREDIT_AVAIL		(0xffull << 0)
+#define CA_GFX_CREDIT_AVAIL_SHFT	0
+#define CA_GFX_OPQ_AVAIL		(0xffull << 8)
+#define CA_GFX_OPQ_AVAIL_SHFT		8
+#define CA_GFX_WRBUFF_AVAIL		(0xffull << 16)
+#define CA_GFX_WRBUFF_AVAIL_SHFT	16
+#define CA_ADMA_OPQ_AVAIL		(0xffull << 24)
+#define CA_ADMA_OPQ_AVAIL_SHFT		24
+#define CA_ADMA_WRBUFF_AVAIL		(0xffull << 32)
+#define CA_ADMA_WRBUFF_AVAIL_SHFT	32
+#define CA_ADMA_RDBUFF_AVAIL		(0x7full << 40)
+#define CA_ADMA_RDBUFF_AVAIL_SHFT	40
+#define CA_PCI_PIO_OP_STAT		(1ull << 47)
+#define CA_PDMA_OPQ_AVAIL		(0xfull << 48)
+#define CA_PDMA_OPQ_AVAIL_SHFT		48
+#define CA_PDMA_WRBUFF_AVAIL		(0xfull << 52)
+#define CA_PDMA_WRBUFF_AVAIL_SHFT	52
+#define CA_PDMA_RDBUFF_AVAIL		(0x3ull << 56)
+#define CA_PDMA_RDBUFF_AVAIL_SHFT	56
+	/* bits 63:58 unused */
+
+/* ==== ca_gart_aperature */
+#define CA_GART_AP_ENB_AGP		(1ull << 0)
+#define CA_GART_PAGE_SIZE		(1ull << 1)
+#define CA_GART_AP_ENB_PCI		(1ull << 2)
+	/* bits 11:3 unused */
+#define CA_GART_AP_SIZE			(0x3ffull << 12)
+#define CA_GART_AP_SIZE_SHFT		12
+#define CA_GART_AP_BASE			(0x3ffffffffffull << 22)
+#define CA_GART_AP_BASE_SHFT		22
+
+/* ==== ca_inta_dest_addr
+   ==== ca_intb_dest_addr 
+   ==== ca_err_int_dest_addr */
+	/* bits 2:0 unused */
+#define CA_INT_DEST_ADDR		(0x7ffffffffffffull << 3)
+#define CA_INT_DEST_ADDR_SHFT		3
+	/* bits 55:54 unused */
+#define CA_INT_DEST_VECT		(0xffull << 56)
+#define CA_INT_DEST_VECT_SHFT		56
+
+/* ==== ca_int_status */
+/* ==== ca_int_status_alias */
+/* ==== ca_mult_error */
+/* ==== ca_mult_error_alias */
+/* ==== ca_first_error */
+/* ==== ca_int_mask */
+#define CA_PCI_ERR			(1ull << 0)
+	/* bits 3:1 unused */
+#define CA_GART_FETCH_ERR		(1ull << 4)
+#define CA_GFX_WR_OVFLW			(1ull << 5)
+#define CA_PIO_REQ_OVFLW		(1ull << 6)
+#define CA_CRM_PKTERR			(1ull << 7)
+#define CA_CRM_DVERR			(1ull << 8)
+#define CA_TNUMTO			(1ull << 9)
+#define CA_CXM_RSP_CRED_OVFLW		(1ull << 10)
+#define CA_CXM_REQ_CRED_OVFLW		(1ull << 11)
+#define CA_PIO_INVALID_ADDR		(1ull << 12)
+#define CA_PCI_ARB_TO			(1ull << 13)
+#define CA_AGP_REQ_OFLOW		(1ull << 14)
+#define CA_SBA_TYPE1_ERR		(1ull << 15)
+	/* bit 16 unused */
+#define CA_INTA				(1ull << 17)
+#define CA_INTB				(1ull << 18)
+#define CA_MULT_INTA			(1ull << 19)
+#define CA_MULT_INTB			(1ull << 20)
+#define CA_GFX_CREDIT_OVFLW		(1ull << 21)
+	/* bits 63:22 unused */
+
+/* ==== ca_crm_pkterr_type */
+/* ==== ca_crm_pkterr_type_alias */
+#define CA_CRM_PKTERR_SBERR_HDR		(1ull << 0)
+#define CA_CRM_PKTERR_DIDN		(1ull << 1)
+#define CA_CRM_PKTERR_PACTYPE		(1ull << 2)
+#define CA_CRM_PKTERR_INV_TNUM		(1ull << 3)
+#define CA_CRM_PKTERR_ADDR_RNG		(1ull << 4)
+#define CA_CRM_PKTERR_ADDR_ALGN		(1ull << 5)
+#define CA_CRM_PKTERR_HDR_PARAM		(1ull << 6)
+#define CA_CRM_PKTERR_CW_ERR		(1ull << 7)
+#define CA_CRM_PKTERR_SBERR_NH		(1ull << 8)
+#define CA_CRM_PKTERR_EARLY_TERM	(1ull << 9)
+#define CA_CRM_PKTERR_EARLY_TAIL	(1ull << 10)
+#define CA_CRM_PKTERR_MSSNG_TAIL	(1ull << 11)
+#define CA_CRM_PKTERR_MSSNG_HDR		(1ull << 12)
+	/* bits 15:13 unused */
+#define CA_FIRST_CRM_PKTERR_SBERR_HDR	(1ull << 16)
+#define CA_FIRST_CRM_PKTERR_DIDN	(1ull << 17)
+#define CA_FIRST_CRM_PKTERR_PACTYPE	(1ull << 18)
+#define CA_FIRST_CRM_PKTERR_INV_TNUM	(1ull << 19)
+#define CA_FIRST_CRM_PKTERR_ADDR_RNG	(1ull << 20)
+#define CA_FIRST_CRM_PKTERR_ADDR_ALGN	(1ull << 21)
+#define CA_FIRST_CRM_PKTERR_HDR_PARAM	(1ull << 22)
+#define CA_FIRST_CRM_PKTERR_CW_ERR	(1ull << 23)
+#define CA_FIRST_CRM_PKTERR_SBERR_NH	(1ull << 24)
+#define CA_FIRST_CRM_PKTERR_EARLY_TERM	(1ull << 25)
+#define CA_FIRST_CRM_PKTERR_EARLY_TAIL	(1ull << 26)
+#define CA_FIRST_CRM_PKTERR_MSSNG_TAIL	(1ull << 27)
+#define CA_FIRST_CRM_PKTERR_MSSNG_HDR	(1ull << 28)
+	/* bits 63:29 unused */
+
+/* ==== ca_crm_ct_error_detail_1 */
+#define CA_PKT_TYPE			(0xfull << 0)
+#define CA_PKT_TYPE_SHFT		0
+#define CA_SRC_ID			(0x3ull << 4)
+#define CA_SRC_ID_SHFT			4
+#define CA_DATA_SZ			(0x3ull << 6)
+#define CA_DATA_SZ_SHFT			6
+#define CA_TNUM				(0xffull << 8)
+#define CA_TNUM_SHFT			8
+#define CA_DW_DATA_EN			(0xffull << 16)
+#define CA_DW_DATA_EN_SHFT		16
+#define CA_GFX_CRED			(0xffull << 24)
+#define CA_GFX_CRED_SHFT		24
+#define CA_MEM_RD_PARAM			(0x3ull << 32)
+#define CA_MEM_RD_PARAM_SHFT		32
+#define CA_PIO_OP			(1ull << 34)
+#define CA_CW_ERR			(1ull << 35)
+	/* bits 62:36 unused */
+#define CA_VALID			(1ull << 63)
+
+/* ==== ca_crm_ct_error_detail_2 */
+	/* bits 2:0 unused */
+#define CA_PKT_ADDR			(0x1fffffffffffffull << 3)
+#define CA_PKT_ADDR_SHFT		3
+	/* bits 63:56 unused */
+
+/* ==== ca_crm_tnumto */
+#define CA_CRM_TNUMTO_VAL		(0xffull << 0)
+#define CA_CRM_TNUMTO_VAL_SHFT		0
+#define CA_CRM_TNUMTO_WR		(1ull << 8)
+	/* bits 63:9 unused */
+
+/* ==== ca_gart_err */
+#define CA_GART_ERR_SOURCE		(0x3ull << 0)
+#define CA_GART_ERR_SOURCE_SHFT		0
+	/* bits 3:2 unused */
+#define CA_GART_ERR_ADDR		(0xfffffffffull << 4)
+#define CA_GART_ERR_ADDR_SHFT		4
+	/* bits 63:40 unused */
+
+/* ==== ca_pcierr_type */
+#define CA_PCIERR_DATA			(0xffffffffull << 0)
+#define CA_PCIERR_DATA_SHFT		0
+#define CA_PCIERR_ENB			(0xfull << 32)
+#define CA_PCIERR_ENB_SHFT		32
+#define CA_PCIERR_CMD			(0xfull << 36)
+#define CA_PCIERR_CMD_SHFT		36
+#define CA_PCIERR_A64			(1ull << 40)
+#define CA_PCIERR_SLV_SERR		(1ull << 41)
+#define CA_PCIERR_SLV_WR_PERR		(1ull << 42)
+#define CA_PCIERR_SLV_RD_PERR		(1ull << 43)
+#define CA_PCIERR_MST_SERR		(1ull << 44)
+#define CA_PCIERR_MST_WR_PERR		(1ull << 45)
+#define CA_PCIERR_MST_RD_PERR		(1ull << 46)
+#define CA_PCIERR_MST_MABT		(1ull << 47)
+#define CA_PCIERR_MST_TABT		(1ull << 48)
+#define CA_PCIERR_MST_RETRY_TOUT	(1ull << 49)
+
+#define CA_PCIERR_TYPES \
+	(CA_PCIERR_A64|CA_PCIERR_SLV_SERR| \
+	 CA_PCIERR_SLV_WR_PERR|CA_PCIERR_SLV_RD_PERR| \
+	 CA_PCIERR_MST_SERR|CA_PCIERR_MST_WR_PERR|CA_PCIERR_MST_RD_PERR| \
+	 CA_PCIERR_MST_MABT|CA_PCIERR_MST_TABT|CA_PCIERR_MST_RETRY_TOUT)
+
+	/* bits 63:50 unused */
+
+/* ==== ca_pci_dma_addr_extn */
+#define CA_UPPER_NODE_OFFSET		(0x3full << 0)
+#define CA_UPPER_NODE_OFFSET_SHFT	0
+	/* bits 7:6 unused */
+#define CA_CHIPLET_ID			(0x3ull << 8)
+#define CA_CHIPLET_ID_SHFT		8
+	/* bits 11:10 unused */
+#define CA_PCI_DMA_NODE_ID		(0xffffull << 12)
+#define CA_PCI_DMA_NODE_ID_SHFT		12
+	/* bits 27:26 unused */
+#define CA_PCI_DMA_PIO_MEM_TYPE		(1ull << 28)
+	/* bits 63:29 unused */
+
+
+/* ==== ca_agp_dma_addr_extn */
+	/* bits 19:0 unused */
+#define CA_AGP_DMA_NODE_ID		(0xffffull << 20)
+#define CA_AGP_DMA_NODE_ID_SHFT		20
+	/* bits 27:26 unused */
+#define CA_AGP_DMA_PIO_MEM_TYPE		(1ull << 28)
+	/* bits 63:29 unused */
+
+/* ==== ca_debug_vector_sel */
+#define CA_DEBUG_MN_VSEL		(0xfull << 0)
+#define CA_DEBUG_MN_VSEL_SHFT		0
+#define CA_DEBUG_PP_VSEL		(0xfull << 4)
+#define CA_DEBUG_PP_VSEL_SHFT		4
+#define CA_DEBUG_GW_VSEL		(0xfull << 8)
+#define CA_DEBUG_GW_VSEL_SHFT		8
+#define CA_DEBUG_GT_VSEL		(0xfull << 12)
+#define CA_DEBUG_GT_VSEL_SHFT		12
+#define CA_DEBUG_PD_VSEL		(0xfull << 16)
+#define CA_DEBUG_PD_VSEL_SHFT		16
+#define CA_DEBUG_AD_VSEL		(0xfull << 20)
+#define CA_DEBUG_AD_VSEL_SHFT		20
+#define CA_DEBUG_CX_VSEL		(0xfull << 24)
+#define CA_DEBUG_CX_VSEL_SHFT		24
+#define CA_DEBUG_CR_VSEL		(0xfull << 28)
+#define CA_DEBUG_CR_VSEL_SHFT		28
+#define CA_DEBUG_BA_VSEL		(0xfull << 32)
+#define CA_DEBUG_BA_VSEL_SHFT		32
+#define CA_DEBUG_PE_VSEL		(0xfull << 36)
+#define CA_DEBUG_PE_VSEL_SHFT		36
+#define CA_DEBUG_BO_VSEL		(0xfull << 40)
+#define CA_DEBUG_BO_VSEL_SHFT		40
+#define CA_DEBUG_BI_VSEL		(0xfull << 44)
+#define CA_DEBUG_BI_VSEL_SHFT		44
+#define CA_DEBUG_AS_VSEL		(0xfull << 48)
+#define CA_DEBUG_AS_VSEL_SHFT		48
+#define CA_DEBUG_PS_VSEL		(0xfull << 52)
+#define CA_DEBUG_PS_VSEL_SHFT		52
+#define CA_DEBUG_PM_VSEL		(0xfull << 56)
+#define CA_DEBUG_PM_VSEL_SHFT		56
+	/* bits 63:60 unused */
+
+/* ==== ca_debug_mux_core_sel */
+/* ==== ca_debug_mux_pci_sel */
+#define CA_DEBUG_MSEL0			(0x7ull << 0)
+#define CA_DEBUG_MSEL0_SHFT		0
+	/* bit 3 unused */
+#define CA_DEBUG_NSEL0			(0x7ull << 4)
+#define CA_DEBUG_NSEL0_SHFT		4
+	/* bit 7 unused */
+#define CA_DEBUG_MSEL1			(0x7ull << 8)
+#define CA_DEBUG_MSEL1_SHFT		8
+	/* bit 11 unused */
+#define CA_DEBUG_NSEL1			(0x7ull << 12)
+#define CA_DEBUG_NSEL1_SHFT		12
+	/* bit 15 unused */
+#define CA_DEBUG_MSEL2			(0x7ull << 16)
+#define CA_DEBUG_MSEL2_SHFT		16
+	/* bit 19 unused */
+#define CA_DEBUG_NSEL2			(0x7ull << 20)
+#define CA_DEBUG_NSEL2_SHFT		20
+	/* bit 23 unused */
+#define CA_DEBUG_MSEL3			(0x7ull << 24)
+#define CA_DEBUG_MSEL3_SHFT		24
+	/* bit 27 unused */
+#define CA_DEBUG_NSEL3			(0x7ull << 28)
+#define CA_DEBUG_NSEL3_SHFT		28
+	/* bit 31 unused */
+#define CA_DEBUG_MSEL4			(0x7ull << 32)
+#define CA_DEBUG_MSEL4_SHFT		32
+	/* bit 35 unused */
+#define CA_DEBUG_NSEL4			(0x7ull << 36)
+#define CA_DEBUG_NSEL4_SHFT		36
+	/* bit 39 unused */
+#define CA_DEBUG_MSEL5			(0x7ull << 40)
+#define CA_DEBUG_MSEL5_SHFT		40
+	/* bit 43 unused */
+#define CA_DEBUG_NSEL5			(0x7ull << 44)
+#define CA_DEBUG_NSEL5_SHFT		44
+	/* bit 47 unused */
+#define CA_DEBUG_MSEL6			(0x7ull << 48)
+#define CA_DEBUG_MSEL6_SHFT		48
+	/* bit 51 unused */
+#define CA_DEBUG_NSEL6			(0x7ull << 52)
+#define CA_DEBUG_NSEL6_SHFT		52
+	/* bit 55 unused */
+#define CA_DEBUG_MSEL7			(0x7ull << 56)
+#define CA_DEBUG_MSEL7_SHFT		56
+	/* bit 59 unused */
+#define CA_DEBUG_NSEL7			(0x7ull << 60)
+#define CA_DEBUG_NSEL7_SHFT		60
+	/* bit 63 unused */
+
+
+/* ==== ca_debug_domain_sel */
+#define CA_DEBUG_DOMAIN_L		(1ull << 0)
+#define CA_DEBUG_DOMAIN_H		(1ull << 1)
+	/* bits 63:2 unused */
+
+/* ==== ca_gart_ptr_table */
+#define CA_GART_PTR_VAL			(1ull << 0)
+	/* bits 11:1 unused */
+#define CA_GART_PTR_ADDR		(0xfffffffffffull << 12)
+#define CA_GART_PTR_ADDR_SHFT		12
+	/* bits 63:56 unused */
+
+/* ==== ca_gart_tlb_addr[0-7] */
+#define CA_GART_TLB_ADDR		(0xffffffffffffffull << 0)
+#define CA_GART_TLB_ADDR_SHFT		0
+	/* bits 62:56 unused */
+#define CA_GART_TLB_ENTRY_VAL		(1ull << 63)
+
+/*
+ * PIO address space ranges for TIO:CA
+ */
+
+/* CA internal registers */
+#define CA_PIO_ADMIN			0x00000000
+#define CA_PIO_ADMIN_LEN		0x00010000
+
+/* GFX Write Buffer - Diagnostics */
+#define CA_PIO_GFX			0x00010000
+#define CA_PIO_GFX_LEN			0x00010000
+
+/* AGP DMA Write Buffer - Diagnostics */
+#define CA_PIO_AGP_DMAWRITE		0x00020000
+#define CA_PIO_AGP_DMAWRITE_LEN		0x00010000
+
+/* AGP DMA READ Buffer - Diagnostics */
+#define CA_PIO_AGP_DMAREAD		0x00030000
+#define CA_PIO_AGP_DMAREAD_LEN		0x00010000
+
+/* PCI Config Type 0 */
+#define CA_PIO_PCI_TYPE0_CONFIG		0x01000000
+#define CA_PIO_PCI_TYPE0_CONFIG_LEN	0x01000000
+
+/* PCI Config Type 1 */
+#define CA_PIO_PCI_TYPE1_CONFIG		0x02000000
+#define CA_PIO_PCI_TYPE1_CONFIG_LEN	0x01000000
+
+/* PCI I/O Cycles - mapped to PCI Address 0x00000000-0x04ffffff */
+#define CA_PIO_PCI_IO			0x03000000
+#define CA_PIO_PCI_IO_LEN		0x05000000
+
+/* PCI MEM Cycles - mapped to PCI with CA_PIO_ADDR_OFFSET of ca_control1 */
+/*	use Fast Write if enabled and coretalk packet type is a GFX request */
+#define CA_PIO_PCI_MEM_OFFSET		0x08000000
+#define CA_PIO_PCI_MEM_OFFSET_LEN	0x08000000
+
+/* PCI MEM Cycles - mapped to PCI Address 0x00000000-0xbfffffff */
+/*	use Fast Write if enabled and coretalk packet type is a GFX request */
+#define CA_PIO_PCI_MEM			0x40000000
+#define CA_PIO_PCI_MEM_LEN		0xc0000000
+
+/*
+ * DMA space
+ *
+ * The CA aperature (ie. bus address range) mapped by the GART is segmented into
+ * two parts.  The lower portion of the aperature is used for mapping 32 bit
+ * PCI addresses which are managed by the dma interfaces in this file.  The
+ * upper poprtion of the aperature is used for mapping 48 bit AGP addresses.
+ * The AGP portion of the aperature is managed by the agpgart_be.c driver
+ * in drivers/linux/agp.  There are ca-specific hooks in that driver to
+ * manipulate the gart, but management of the AGP portion of the aperature
+ * is the responsibility of that driver.
+ *
+ * CA allows three main types of DMA mapping:
+ *
+ * PCI 64-bit	Managed by this driver
+ * PCI 32-bit 	Managed by this driver
+ * AGP 48-bit	Managed by hooks in the /dev/agpgart driver
+ *
+ * All of the above can optionally be remapped through the GART.  The following
+ * table lists the combinations of addressing types and GART remapping that
+ * is currently supported by the driver (h/w supports all, s/w limits this):
+ *
+ *		PCI64		PCI32		AGP48
+ * GART		no		yes		yes
+ * Direct	yes		yes		no
+ *
+ * GART remapping of PCI64 is not done because there is no need to.  The
+ * 64 bit PCI address holds all of the information necessary to target any
+ * memory in the system.
+ *
+ * AGP48 is always mapped through the GART.  Management of the AGP48 portion
+ * of the aperature is the responsibility of code in the agpgart_be driver.
+ *
+ * The non-64 bit bus address space will currently be partitioned like this:
+ *
+ *	0xffff_ffff_ffff	+--------
+ *				| AGP48 direct
+ *				| Space managed by this driver
+ *	CA_AGP_DIRECT_BASE	+--------
+ *				| AGP GART mapped (gfx aperature)
+ *				| Space managed by /dev/agpgart driver
+ *				| This range is exposed to the agpgart
+ * 				| driver as the "graphics aperature"
+ *	CA_AGP_MAPPED_BASE	+-----
+ *				| PCI GART mapped
+ *				| Space managed by this driver		
+ *	CA_PCI32_MAPPED_BASE	+----
+ *				| PCI32 direct
+ *				| Space managed by this driver
+ *	0xC000_0000		+--------
+ *	(CA_PCI32_DIRECT_BASE)
+ *
+ * The bus address range CA_PCI32_MAPPED_BASE through CA_AGP_DIRECT_BASE
+ * is what we call the CA aperature.  Addresses falling in this range will
+ * be remapped using the GART.
+ *
+ * The bus address range CA_AGP_MAPPED_BASE through CA_AGP_DIRECT_BASE
+ * is what we call the graphics aperature.  This is a subset of the CA
+ * aperature and is under the control of the agpgart_be driver.
+ *
+ * CA_PCI32_MAPPED_BASE, CA_AGP_MAPPED_BASE, and CA_AGP_DIRECT_BASE are
+ * somewhat arbitrary values.  The known constraints on choosing these is:
+ *
+ * 1)  CA_AGP_DIRECT_BASE-CA_PCI32_MAPPED_BASE+1 (the CA aperature size)
+ *     must be one of the values supported by the ca_gart_aperature register.
+ *     Currently valid values are: 4MB through 4096MB in powers of 2 increments
+ *
+ * 2)  CA_AGP_DIRECT_BASE-CA_AGP_MAPPED_BASE+1 (the gfx aperature size)
+ *     must be in MB units since that's what the agpgart driver assumes.
+ */
+
+/*
+ * Define Bus DMA ranges.  These are configurable (see constraints above)
+ * and will probably need tuning based on experience.
+ */
+
+
+/*
+ * 11/24/03
+ * CA has an addressing glitch w.r.t. PCI direct 32 bit DMA that makes it
+ * generally unusable.  The problem is that for PCI direct 32 
+ * DMA's, all 32 bits of the bus address are used to form the lower 32 bits
+ * of the coretalk address, and coretalk bits 38:32 come from a register.
+ * Since only PCI bus addresses 0xC0000000-0xFFFFFFFF (1GB) are available
+ * for DMA (the rest is allocated to PIO), host node addresses need to be
+ * such that their lower 32 bits fall in the 0xC0000000-0xffffffff range
+ * as well.  So there can be no PCI32 direct DMA below 3GB!!  For this
+ * reason we set the CA_PCI32_DIRECT_SIZE to 0 which essentially makes
+ * tioca_dma_direct32() a noop but preserves the code flow should this issue
+ * be fixed in a respin.
+ *
+ * For now, all PCI32 DMA's must be mapped through the GART.
+ */
+
+#define CA_PCI32_DIRECT_BASE	0xC0000000UL	/* BASE not configurable */
+#define CA_PCI32_DIRECT_SIZE	0x00000000UL	/* 0 MB */
+
+#define CA_PCI32_MAPPED_BASE	0xC0000000UL
+#define CA_PCI32_MAPPED_SIZE	0x40000000UL	/* 2GB */
+
+#define CA_AGP_MAPPED_BASE	0x80000000UL
+#define CA_AGP_MAPPED_SIZE	0x40000000UL	/* 2GB */
+
+#define CA_AGP_DIRECT_BASE	0x40000000UL	/* 2GB */
+#define CA_AGP_DIRECT_SIZE	0x40000000UL
+
+#define CA_APERATURE_BASE	(CA_AGP_MAPPED_BASE)
+#define CA_APERATURE_SIZE	(CA_AGP_MAPPED_SIZE+CA_PCI32_MAPPED_SIZE)
+
+#endif  /* _ASM_IA64_SN_TIO_TIOCA_H */
diff --git a/arch/ia64/include/asm/sn/tioca_provider.h b/arch/ia64/include/asm/sn/tioca_provider.h
new file mode 100644
index 00000000000..9a820ac61be
--- /dev/null
+++ b/arch/ia64/include/asm/sn/tioca_provider.h
@@ -0,0 +1,207 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2003-2005 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#ifndef _ASM_IA64_SN_TIO_CA_AGP_PROVIDER_H
+#define _ASM_IA64_SN_TIO_CA_AGP_PROVIDER_H
+
+#include <asm/sn/tioca.h>
+
+/*
+ * WAR enables
+ * Defines for individual WARs. Each is a bitmask of applicable
+ * part revision numbers. (1 << 1) == rev A, (1 << 2) == rev B,
+ * (3 << 1) == (rev A or rev B), etc
+ */
+
+#define TIOCA_WAR_ENABLED(pv, tioca_common) \
+	((1 << tioca_common->ca_rev) & pv)
+
+  /* TIO:ICE:FRZ:Freezer loses a PIO data ucred on PIO RD RSP with CW error */
+#define PV907908 (1 << 1)
+  /* ATI config space problems after BIOS execution starts */
+#define PV908234 (1 << 1)
+  /* CA:AGPDMA write request data mismatch with ABC1CL merge */
+#define PV895469 (1 << 1)
+  /* TIO:CA TLB invalidate of written GART entries possibly not occurring in CA*/
+#define PV910244 (1 << 1)
+
+struct tioca_dmamap{
+	struct list_head	cad_list;	/* headed by ca_list */
+
+	dma_addr_t		cad_dma_addr;	/* Linux dma handle */
+	uint			cad_gart_entry; /* start entry in ca_gart_pagemap */
+	uint			cad_gart_size;	/* #entries for this map */
+};
+
+/*
+ * Kernel only fields.  Prom may look at this stuff for debugging only.
+ * Access this structure through the ca_kernel_private ptr.
+ */
+
+struct tioca_common ;
+
+struct tioca_kernel {
+	struct tioca_common	*ca_common;	/* tioca this belongs to */
+	struct list_head	ca_list;	/* list of all ca's */
+	struct list_head	ca_dmamaps;
+	spinlock_t		ca_lock;	/* Kernel lock */
+	cnodeid_t		ca_closest_node;
+	struct list_head	*ca_devices;	/* bus->devices */
+
+	/*
+	 * General GART stuff
+	 */
+	u64	ca_ap_size;		/* size of aperature in bytes */
+	u32	ca_gart_entries;	/* # u64 entries in gart */
+	u32	ca_ap_pagesize; 	/* aperature page size in bytes */
+	u64	ca_ap_bus_base; 	/* bus address of CA aperature */
+	u64	ca_gart_size;		/* gart size in bytes */
+	u64	*ca_gart;		/* gart table vaddr */
+	u64	ca_gart_coretalk_addr;	/* gart coretalk addr */
+	u8		ca_gart_iscoherent;	/* used in tioca_tlbflush */
+
+	/* PCI GART convenience values */
+	u64	ca_pciap_base;		/* pci aperature bus base address */
+	u64	ca_pciap_size;		/* pci aperature size (bytes) */
+	u64	ca_pcigart_base;	/* gfx GART bus base address */
+	u64	*ca_pcigart;		/* gfx GART vm address */
+	u32	ca_pcigart_entries;
+	u32	ca_pcigart_start;	/* PCI start index in ca_gart */
+	void		*ca_pcigart_pagemap;
+
+	/* AGP GART convenience values */
+	u64	ca_gfxap_base;		/* gfx aperature bus base address */
+	u64	ca_gfxap_size;		/* gfx aperature size (bytes) */
+	u64	ca_gfxgart_base;	/* gfx GART bus base address */
+	u64	*ca_gfxgart;		/* gfx GART vm address */
+	u32	ca_gfxgart_entries;
+	u32	ca_gfxgart_start;	/* agpgart start index in ca_gart */
+};
+
+/*
+ * Common tioca info shared between kernel and prom
+ *
+ * DO NOT CHANGE THIS STRUCT WITHOUT MAKING CORRESPONDING CHANGES
+ * TO THE PROM VERSION.
+ */
+
+struct tioca_common {
+	struct pcibus_bussoft	ca_common;	/* common pciio header */
+
+	u32		ca_rev;
+	u32		ca_closest_nasid;
+
+	u64		ca_prom_private;
+	u64		ca_kernel_private;
+};
+
+/**
+ * tioca_paddr_to_gart - Convert an SGI coretalk address to a CA GART entry
+ * @paddr: page address to convert
+ *
+ * Convert a system [coretalk] address to a GART entry.  GART entries are
+ * formed using the following:
+ *
+ *     data = ( (1<<63) |  ( (REMAP_NODE_ID << 40) | (MD_CHIPLET_ID << 38) | 
+ * (REMAP_SYS_ADDR) ) >> 12 )
+ *
+ * DATA written to 1 GART TABLE Entry in system memory is remapped system
+ * addr for 1 page 
+ *
+ * The data is for coretalk address format right shifted 12 bits with a
+ * valid bit.
+ *
+ *	GART_TABLE_ENTRY [ 25:0 ]  -- REMAP_SYS_ADDRESS[37:12].
+ *	GART_TABLE_ENTRY [ 27:26 ] -- SHUB MD chiplet id.
+ *	GART_TABLE_ENTRY [ 41:28 ] -- REMAP_NODE_ID.
+ *	GART_TABLE_ENTRY [ 63 ]    -- Valid Bit 
+ */
+static inline u64
+tioca_paddr_to_gart(unsigned long paddr)
+{
+	/*
+	 * We are assuming right now that paddr already has the correct
+	 * format since the address from xtalk_dmaXXX should already have
+	 * NODE_ID, CHIPLET_ID, and SYS_ADDR in the correct locations.
+	 */
+
+	return ((paddr) >> 12) | (1UL << 63);
+}
+
+/**
+ * tioca_physpage_to_gart - Map a host physical page for SGI CA based DMA
+ * @page_addr: system page address to map
+ */
+
+static inline unsigned long
+tioca_physpage_to_gart(u64 page_addr)
+{
+	u64 coretalk_addr;
+
+	coretalk_addr = PHYS_TO_TIODMA(page_addr);
+	if (!coretalk_addr) {
+		return 0;
+	}
+
+	return tioca_paddr_to_gart(coretalk_addr);
+}
+
+/**
+ * tioca_tlbflush - invalidate cached SGI CA GART TLB entries
+ * @tioca_kernel: CA context 
+ *
+ * Invalidate tlb entries for a given CA GART.  Main complexity is to account
+ * for revA bug.
+ */
+static inline void
+tioca_tlbflush(struct tioca_kernel *tioca_kernel)
+{
+	volatile u64 tmp;
+	volatile struct tioca __iomem *ca_base;
+	struct tioca_common *tioca_common;
+
+	tioca_common = tioca_kernel->ca_common;
+	ca_base = (struct tioca __iomem *)tioca_common->ca_common.bs_base;
+
+	/*
+	 * Explicit flushes not needed if GART is in cached mode
+	 */
+	if (tioca_kernel->ca_gart_iscoherent) {
+		if (TIOCA_WAR_ENABLED(PV910244, tioca_common)) {
+			/*
+			 * PV910244:  RevA CA needs explicit flushes.
+			 * Need to put GART into uncached mode before
+			 * flushing otherwise the explicit flush is ignored.
+			 *
+			 * Alternate WAR would be to leave GART cached and
+			 * touch every CL aligned GART entry.
+			 */
+
+			__sn_clrq_relaxed(&ca_base->ca_control2, CA_GART_MEM_PARAM);
+			__sn_setq_relaxed(&ca_base->ca_control2, CA_GART_FLUSH_TLB);
+			__sn_setq_relaxed(&ca_base->ca_control2,
+			    (0x2ull << CA_GART_MEM_PARAM_SHFT));
+			tmp = __sn_readq_relaxed(&ca_base->ca_control2);
+		}
+
+		return;
+	}
+
+	/*
+	 * Gart in uncached mode ... need an explicit flush.
+	 */
+
+	__sn_setq_relaxed(&ca_base->ca_control2, CA_GART_FLUSH_TLB);
+	tmp = __sn_readq_relaxed(&ca_base->ca_control2);
+}
+
+extern u32	tioca_gart_found;
+extern struct list_head tioca_list;
+extern int tioca_init_provider(void);
+extern void tioca_fastwrite_enable(struct tioca_kernel *tioca_kern);
+#endif /* _ASM_IA64_SN_TIO_CA_AGP_PROVIDER_H */
diff --git a/arch/ia64/include/asm/sn/tioce.h b/arch/ia64/include/asm/sn/tioce.h
new file mode 100644
index 00000000000..6eae8ada90f
--- /dev/null
+++ b/arch/ia64/include/asm/sn/tioce.h
@@ -0,0 +1,760 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2003-2005 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#ifndef __ASM_IA64_SN_TIOCE_H__
+#define __ASM_IA64_SN_TIOCE_H__
+
+/* CE ASIC part & mfgr information  */
+#define TIOCE_PART_NUM			0xCE00
+#define TIOCE_SRC_ID			0x01
+#define TIOCE_REV_A			0x1
+
+/* CE Virtual PPB Vendor/Device IDs */
+#define CE_VIRT_PPB_VENDOR_ID		0x10a9
+#define CE_VIRT_PPB_DEVICE_ID		0x4002
+
+/* CE Host Bridge Vendor/Device IDs */
+#define CE_HOST_BRIDGE_VENDOR_ID	0x10a9
+#define CE_HOST_BRIDGE_DEVICE_ID	0x4001
+
+
+#define TIOCE_NUM_M40_ATES		4096
+#define TIOCE_NUM_M3240_ATES		2048
+#define TIOCE_NUM_PORTS			2
+
+/*
+ * Register layout for TIOCE.  MMR offsets are shown at the far right of the
+ * structure definition.
+ */
+typedef volatile struct tioce {
+	/*
+	 * ADMIN : Administration Registers
+	 */
+	u64	ce_adm_id;				/* 0x000000 */
+	u64	ce_pad_000008;				/* 0x000008 */
+	u64	ce_adm_dyn_credit_status;		/* 0x000010 */
+	u64	ce_adm_last_credit_status;		/* 0x000018 */
+	u64	ce_adm_credit_limit;			/* 0x000020 */
+	u64	ce_adm_force_credit;			/* 0x000028 */
+	u64	ce_adm_control;				/* 0x000030 */
+	u64	ce_adm_mmr_chn_timeout;			/* 0x000038 */
+	u64	ce_adm_ssp_ure_timeout;			/* 0x000040 */
+	u64	ce_adm_ssp_dre_timeout;			/* 0x000048 */
+	u64	ce_adm_ssp_debug_sel;			/* 0x000050 */
+	u64	ce_adm_int_status;			/* 0x000058 */
+	u64	ce_adm_int_status_alias;		/* 0x000060 */
+	u64	ce_adm_int_mask;			/* 0x000068 */
+	u64	ce_adm_int_pending;			/* 0x000070 */
+	u64	ce_adm_force_int;			/* 0x000078 */
+	u64	ce_adm_ure_ups_buf_barrier_flush;	/* 0x000080 */
+	u64	ce_adm_int_dest[15];	    /* 0x000088 -- 0x0000F8 */
+	u64	ce_adm_error_summary;			/* 0x000100 */
+	u64	ce_adm_error_summary_alias;		/* 0x000108 */
+	u64	ce_adm_error_mask;			/* 0x000110 */
+	u64	ce_adm_first_error;			/* 0x000118 */
+	u64	ce_adm_error_overflow;			/* 0x000120 */
+	u64	ce_adm_error_overflow_alias;		/* 0x000128 */
+	u64	ce_pad_000130[2];	    /* 0x000130 -- 0x000138 */
+	u64	ce_adm_tnum_error;			/* 0x000140 */
+	u64	ce_adm_mmr_err_detail;			/* 0x000148 */
+	u64	ce_adm_msg_sram_perr_detail;		/* 0x000150 */
+	u64	ce_adm_bap_sram_perr_detail;		/* 0x000158 */
+	u64	ce_adm_ce_sram_perr_detail;		/* 0x000160 */
+	u64	ce_adm_ce_credit_oflow_detail;		/* 0x000168 */
+	u64	ce_adm_tx_link_idle_max_timer;		/* 0x000170 */
+	u64	ce_adm_pcie_debug_sel;			/* 0x000178 */
+	u64	ce_pad_000180[16];	    /* 0x000180 -- 0x0001F8 */
+
+	u64	ce_adm_pcie_debug_sel_top;		/* 0x000200 */
+	u64	ce_adm_pcie_debug_lat_sel_lo_top;	/* 0x000208 */
+	u64	ce_adm_pcie_debug_lat_sel_hi_top;	/* 0x000210 */
+	u64	ce_adm_pcie_debug_trig_sel_top;		/* 0x000218 */
+	u64	ce_adm_pcie_debug_trig_lat_sel_lo_top;	/* 0x000220 */
+	u64	ce_adm_pcie_debug_trig_lat_sel_hi_top;	/* 0x000228 */
+	u64	ce_adm_pcie_trig_compare_top;		/* 0x000230 */
+	u64	ce_adm_pcie_trig_compare_en_top;	/* 0x000238 */
+	u64	ce_adm_ssp_debug_sel_top;		/* 0x000240 */
+	u64	ce_adm_ssp_debug_lat_sel_lo_top;	/* 0x000248 */
+	u64	ce_adm_ssp_debug_lat_sel_hi_top;	/* 0x000250 */
+	u64	ce_adm_ssp_debug_trig_sel_top;		/* 0x000258 */
+	u64	ce_adm_ssp_debug_trig_lat_sel_lo_top;	/* 0x000260 */
+	u64	ce_adm_ssp_debug_trig_lat_sel_hi_top;	/* 0x000268 */
+	u64	ce_adm_ssp_trig_compare_top;		/* 0x000270 */
+	u64	ce_adm_ssp_trig_compare_en_top;		/* 0x000278 */
+	u64	ce_pad_000280[48];	    /* 0x000280 -- 0x0003F8 */
+
+	u64	ce_adm_bap_ctrl;			/* 0x000400 */
+	u64	ce_pad_000408[127];	    /* 0x000408 -- 0x0007F8 */
+
+	u64	ce_msg_buf_data63_0[35];    /* 0x000800 -- 0x000918 */
+	u64	ce_pad_000920[29];	    /* 0x000920 -- 0x0009F8 */
+
+	u64	ce_msg_buf_data127_64[35];  /* 0x000A00 -- 0x000B18 */
+	u64	ce_pad_000B20[29];	    /* 0x000B20 -- 0x000BF8 */
+
+	u64	ce_msg_buf_parity[35];	    /* 0x000C00 -- 0x000D18 */
+	u64	ce_pad_000D20[29];	    /* 0x000D20 -- 0x000DF8 */
+
+	u64	ce_pad_000E00[576];	    /* 0x000E00 -- 0x001FF8 */
+
+	/*
+	 * LSI : LSI's PCI Express Link Registers (Link#1 and Link#2)
+	 * Link#1 MMRs at start at 0x002000, Link#2 MMRs at 0x003000
+	 * NOTE: the comment offsets at far right: let 'z' = {2 or 3}
+	 */
+	#define ce_lsi(link_num)	ce_lsi[link_num-1]
+	struct ce_lsi_reg {
+		u64	ce_lsi_lpu_id;			/* 0x00z000 */
+		u64	ce_lsi_rst;			/* 0x00z008 */
+		u64	ce_lsi_dbg_stat;		/* 0x00z010 */
+		u64	ce_lsi_dbg_cfg;			/* 0x00z018 */
+		u64	ce_lsi_ltssm_ctrl;		/* 0x00z020 */
+		u64	ce_lsi_lk_stat;			/* 0x00z028 */
+		u64	ce_pad_00z030[2];   /* 0x00z030 -- 0x00z038 */
+		u64	ce_lsi_int_and_stat;		/* 0x00z040 */
+		u64	ce_lsi_int_mask;		/* 0x00z048 */
+		u64	ce_pad_00z050[22];  /* 0x00z050 -- 0x00z0F8 */
+		u64	ce_lsi_lk_perf_cnt_sel;		/* 0x00z100 */
+		u64	ce_pad_00z108;			/* 0x00z108 */
+		u64	ce_lsi_lk_perf_cnt_ctrl;	/* 0x00z110 */
+		u64	ce_pad_00z118;			/* 0x00z118 */
+		u64	ce_lsi_lk_perf_cnt1;		/* 0x00z120 */
+		u64	ce_lsi_lk_perf_cnt1_test;	/* 0x00z128 */
+		u64	ce_lsi_lk_perf_cnt2;		/* 0x00z130 */
+		u64	ce_lsi_lk_perf_cnt2_test;	/* 0x00z138 */
+		u64	ce_pad_00z140[24];  /* 0x00z140 -- 0x00z1F8 */
+		u64	ce_lsi_lk_lyr_cfg;		/* 0x00z200 */
+		u64	ce_lsi_lk_lyr_status;		/* 0x00z208 */
+		u64	ce_lsi_lk_lyr_int_stat;		/* 0x00z210 */
+		u64	ce_lsi_lk_ly_int_stat_test;	/* 0x00z218 */
+		u64	ce_lsi_lk_ly_int_stat_mask;	/* 0x00z220 */
+		u64	ce_pad_00z228[3];   /* 0x00z228 -- 0x00z238 */
+		u64	ce_lsi_fc_upd_ctl;		/* 0x00z240 */
+		u64	ce_pad_00z248[3];   /* 0x00z248 -- 0x00z258 */
+		u64	ce_lsi_flw_ctl_upd_to_timer;	/* 0x00z260 */
+		u64	ce_lsi_flw_ctl_upd_timer0;	/* 0x00z268 */
+		u64	ce_lsi_flw_ctl_upd_timer1;	/* 0x00z270 */
+		u64	ce_pad_00z278[49];  /* 0x00z278 -- 0x00z3F8 */
+		u64	ce_lsi_freq_nak_lat_thrsh;	/* 0x00z400 */
+		u64	ce_lsi_ack_nak_lat_tmr;		/* 0x00z408 */
+		u64	ce_lsi_rply_tmr_thr;		/* 0x00z410 */
+		u64	ce_lsi_rply_tmr;		/* 0x00z418 */
+		u64	ce_lsi_rply_num_stat;		/* 0x00z420 */
+		u64	ce_lsi_rty_buf_max_addr;	/* 0x00z428 */
+		u64	ce_lsi_rty_fifo_ptr;		/* 0x00z430 */
+		u64	ce_lsi_rty_fifo_rd_wr_ptr;	/* 0x00z438 */
+		u64	ce_lsi_rty_fifo_cred;		/* 0x00z440 */
+		u64	ce_lsi_seq_cnt;			/* 0x00z448 */
+		u64	ce_lsi_ack_sent_seq_num;	/* 0x00z450 */
+		u64	ce_lsi_seq_cnt_fifo_max_addr;	/* 0x00z458 */
+		u64	ce_lsi_seq_cnt_fifo_ptr;	/* 0x00z460 */
+		u64	ce_lsi_seq_cnt_rd_wr_ptr;	/* 0x00z468 */
+		u64	ce_lsi_tx_lk_ts_ctl;		/* 0x00z470 */
+		u64	ce_pad_00z478;			/* 0x00z478 */
+		u64	ce_lsi_mem_addr_ctl;		/* 0x00z480 */
+		u64	ce_lsi_mem_d_ld0;		/* 0x00z488 */
+		u64	ce_lsi_mem_d_ld1;		/* 0x00z490 */
+		u64	ce_lsi_mem_d_ld2;		/* 0x00z498 */
+		u64	ce_lsi_mem_d_ld3;		/* 0x00z4A0 */
+		u64	ce_lsi_mem_d_ld4;		/* 0x00z4A8 */
+		u64	ce_pad_00z4B0[2];   /* 0x00z4B0 -- 0x00z4B8 */
+		u64	ce_lsi_rty_d_cnt;		/* 0x00z4C0 */
+		u64	ce_lsi_seq_buf_cnt;		/* 0x00z4C8 */
+		u64	ce_lsi_seq_buf_bt_d;		/* 0x00z4D0 */
+		u64	ce_pad_00z4D8;			/* 0x00z4D8 */
+		u64	ce_lsi_ack_lat_thr;		/* 0x00z4E0 */
+		u64	ce_pad_00z4E8[3];   /* 0x00z4E8 -- 0x00z4F8 */
+		u64	ce_lsi_nxt_rcv_seq_1_cntr;	/* 0x00z500 */
+		u64	ce_lsi_unsp_dllp_rcvd;		/* 0x00z508 */
+		u64	ce_lsi_rcv_lk_ts_ctl;		/* 0x00z510 */
+		u64	ce_pad_00z518[29];  /* 0x00z518 -- 0x00z5F8 */
+		u64	ce_lsi_phy_lyr_cfg;		/* 0x00z600 */
+		u64	ce_pad_00z608;			/* 0x00z608 */
+		u64	ce_lsi_phy_lyr_int_stat;	/* 0x00z610 */
+		u64	ce_lsi_phy_lyr_int_stat_test;	/* 0x00z618 */
+		u64	ce_lsi_phy_lyr_int_mask;	/* 0x00z620 */
+		u64	ce_pad_00z628[11];  /* 0x00z628 -- 0x00z678 */
+		u64	ce_lsi_rcv_phy_cfg;		/* 0x00z680 */
+		u64	ce_lsi_rcv_phy_stat1;		/* 0x00z688 */
+		u64	ce_lsi_rcv_phy_stat2;		/* 0x00z690 */
+		u64	ce_lsi_rcv_phy_stat3;		/* 0x00z698 */
+		u64	ce_lsi_rcv_phy_int_stat;	/* 0x00z6A0 */
+		u64	ce_lsi_rcv_phy_int_stat_test;	/* 0x00z6A8 */
+		u64	ce_lsi_rcv_phy_int_mask;	/* 0x00z6B0 */
+		u64	ce_pad_00z6B8[9];   /* 0x00z6B8 -- 0x00z6F8 */
+		u64	ce_lsi_tx_phy_cfg;		/* 0x00z700 */
+		u64	ce_lsi_tx_phy_stat;		/* 0x00z708 */
+		u64	ce_lsi_tx_phy_int_stat;		/* 0x00z710 */
+		u64	ce_lsi_tx_phy_int_stat_test;	/* 0x00z718 */
+		u64	ce_lsi_tx_phy_int_mask;		/* 0x00z720 */
+		u64	ce_lsi_tx_phy_stat2;		/* 0x00z728 */
+		u64	ce_pad_00z730[10];  /* 0x00z730 -- 0x00z77F */
+		u64	ce_lsi_ltssm_cfg1;		/* 0x00z780 */
+		u64	ce_lsi_ltssm_cfg2;		/* 0x00z788 */
+		u64	ce_lsi_ltssm_cfg3;		/* 0x00z790 */
+		u64	ce_lsi_ltssm_cfg4;		/* 0x00z798 */
+		u64	ce_lsi_ltssm_cfg5;		/* 0x00z7A0 */
+		u64	ce_lsi_ltssm_stat1;		/* 0x00z7A8 */
+		u64	ce_lsi_ltssm_stat2;		/* 0x00z7B0 */
+		u64	ce_lsi_ltssm_int_stat;		/* 0x00z7B8 */
+		u64	ce_lsi_ltssm_int_stat_test;	/* 0x00z7C0 */
+		u64	ce_lsi_ltssm_int_mask;		/* 0x00z7C8 */
+		u64	ce_lsi_ltssm_stat_wr_en;	/* 0x00z7D0 */
+		u64	ce_pad_00z7D8[5];   /* 0x00z7D8 -- 0x00z7F8 */
+		u64	ce_lsi_gb_cfg1;			/* 0x00z800 */
+		u64	ce_lsi_gb_cfg2;			/* 0x00z808 */
+		u64	ce_lsi_gb_cfg3;			/* 0x00z810 */
+		u64	ce_lsi_gb_cfg4;			/* 0x00z818 */
+		u64	ce_lsi_gb_stat;			/* 0x00z820 */
+		u64	ce_lsi_gb_int_stat;		/* 0x00z828 */
+		u64	ce_lsi_gb_int_stat_test;	/* 0x00z830 */
+		u64	ce_lsi_gb_int_mask;		/* 0x00z838 */
+		u64	ce_lsi_gb_pwr_dn1;		/* 0x00z840 */
+		u64	ce_lsi_gb_pwr_dn2;		/* 0x00z848 */
+		u64	ce_pad_00z850[246]; /* 0x00z850 -- 0x00zFF8 */
+	} ce_lsi[2];
+
+	u64	ce_pad_004000[10];	    /* 0x004000 -- 0x004048 */
+
+	/*
+	 * CRM: Coretalk Receive Module Registers
+	 */
+	u64	ce_crm_debug_mux;			/* 0x004050 */
+	u64	ce_pad_004058;				/* 0x004058 */
+	u64	ce_crm_ssp_err_cmd_wrd;			/* 0x004060 */
+	u64	ce_crm_ssp_err_addr;			/* 0x004068 */
+	u64	ce_crm_ssp_err_syn;			/* 0x004070 */
+
+	u64	ce_pad_004078[499];	    /* 0x004078 -- 0x005008 */
+
+	/*
+         * CXM: Coretalk Xmit Module Registers
+         */
+	u64	ce_cxm_dyn_credit_status;		/* 0x005010 */
+	u64	ce_cxm_last_credit_status;		/* 0x005018 */
+	u64	ce_cxm_credit_limit;			/* 0x005020 */
+	u64	ce_cxm_force_credit;			/* 0x005028 */
+	u64	ce_cxm_disable_bypass;			/* 0x005030 */
+	u64	ce_pad_005038[3];	    /* 0x005038 -- 0x005048 */
+	u64	ce_cxm_debug_mux;			/* 0x005050 */
+
+        u64        ce_pad_005058[501];         /* 0x005058 -- 0x005FF8 */
+
+	/*
+	 * DTL: Downstream Transaction Layer Regs (Link#1 and Link#2)
+	 * DTL: Link#1 MMRs at start at 0x006000, Link#2 MMRs at 0x008000
+	 * DTL: the comment offsets at far right: let 'y' = {6 or 8}
+	 *
+	 * UTL: Downstream Transaction Layer Regs (Link#1 and Link#2)
+	 * UTL: Link#1 MMRs at start at 0x007000, Link#2 MMRs at 0x009000
+	 * UTL: the comment offsets at far right: let 'z' = {7 or 9}
+	 */
+	#define ce_dtl(link_num)	ce_dtl_utl[link_num-1]
+	#define ce_utl(link_num)	ce_dtl_utl[link_num-1]
+	struct ce_dtl_utl_reg {
+		/* DTL */
+		u64	ce_dtl_dtdr_credit_limit;	/* 0x00y000 */
+		u64	ce_dtl_dtdr_credit_force;	/* 0x00y008 */
+		u64	ce_dtl_dyn_credit_status;	/* 0x00y010 */
+		u64	ce_dtl_dtl_last_credit_stat;	/* 0x00y018 */
+		u64	ce_dtl_dtl_ctrl;		/* 0x00y020 */
+		u64	ce_pad_00y028[5];   /* 0x00y028 -- 0x00y048 */
+		u64	ce_dtl_debug_sel;		/* 0x00y050 */
+		u64	ce_pad_00y058[501]; /* 0x00y058 -- 0x00yFF8 */
+
+		/* UTL */
+		u64	ce_utl_utl_ctrl;		/* 0x00z000 */
+		u64	ce_utl_debug_sel;		/* 0x00z008 */
+		u64	ce_pad_00z010[510]; /* 0x00z010 -- 0x00zFF8 */
+	} ce_dtl_utl[2];
+
+	u64	ce_pad_00A000[514];	    /* 0x00A000 -- 0x00B008 */
+
+	/*
+	 * URE: Upstream Request Engine
+         */
+	u64	ce_ure_dyn_credit_status;		/* 0x00B010 */
+	u64	ce_ure_last_credit_status;		/* 0x00B018 */
+	u64	ce_ure_credit_limit;			/* 0x00B020 */
+	u64	ce_pad_00B028;				/* 0x00B028 */
+	u64	ce_ure_control;				/* 0x00B030 */
+	u64	ce_ure_status;				/* 0x00B038 */
+	u64	ce_pad_00B040[2];	    /* 0x00B040 -- 0x00B048 */
+	u64	ce_ure_debug_sel;			/* 0x00B050 */
+	u64	ce_ure_pcie_debug_sel;			/* 0x00B058 */
+	u64	ce_ure_ssp_err_cmd_wrd;			/* 0x00B060 */
+	u64	ce_ure_ssp_err_addr;			/* 0x00B068 */
+	u64	ce_ure_page_map;			/* 0x00B070 */
+	u64	ce_ure_dir_map[TIOCE_NUM_PORTS];	/* 0x00B078 */
+	u64	ce_ure_pipe_sel1;			/* 0x00B088 */
+	u64	ce_ure_pipe_mask1;			/* 0x00B090 */
+	u64	ce_ure_pipe_sel2;			/* 0x00B098 */
+	u64	ce_ure_pipe_mask2;			/* 0x00B0A0 */
+	u64	ce_ure_pcie1_credits_sent;		/* 0x00B0A8 */
+	u64	ce_ure_pcie1_credits_used;		/* 0x00B0B0 */
+	u64	ce_ure_pcie1_credit_limit;		/* 0x00B0B8 */
+	u64	ce_ure_pcie2_credits_sent;		/* 0x00B0C0 */
+	u64	ce_ure_pcie2_credits_used;		/* 0x00B0C8 */
+	u64	ce_ure_pcie2_credit_limit;		/* 0x00B0D0 */
+	u64	ce_ure_pcie_force_credit;		/* 0x00B0D8 */
+	u64	ce_ure_rd_tnum_val;			/* 0x00B0E0 */
+	u64	ce_ure_rd_tnum_rsp_rcvd;		/* 0x00B0E8 */
+	u64	ce_ure_rd_tnum_esent_timer;		/* 0x00B0F0 */
+	u64	ce_ure_rd_tnum_error;			/* 0x00B0F8 */
+	u64	ce_ure_rd_tnum_first_cl;		/* 0x00B100 */
+	u64	ce_ure_rd_tnum_link_buf;		/* 0x00B108 */
+	u64	ce_ure_wr_tnum_val;			/* 0x00B110 */
+	u64	ce_ure_sram_err_addr0;			/* 0x00B118 */
+	u64	ce_ure_sram_err_addr1;			/* 0x00B120 */
+	u64	ce_ure_sram_err_addr2;			/* 0x00B128 */
+	u64	ce_ure_sram_rd_addr0;			/* 0x00B130 */
+	u64	ce_ure_sram_rd_addr1;			/* 0x00B138 */
+	u64	ce_ure_sram_rd_addr2;			/* 0x00B140 */
+	u64	ce_ure_sram_wr_addr0;			/* 0x00B148 */
+	u64	ce_ure_sram_wr_addr1;			/* 0x00B150 */
+	u64	ce_ure_sram_wr_addr2;			/* 0x00B158 */
+	u64	ce_ure_buf_flush10;			/* 0x00B160 */
+	u64	ce_ure_buf_flush11;			/* 0x00B168 */
+	u64	ce_ure_buf_flush12;			/* 0x00B170 */
+	u64	ce_ure_buf_flush13;			/* 0x00B178 */
+	u64	ce_ure_buf_flush20;			/* 0x00B180 */
+	u64	ce_ure_buf_flush21;			/* 0x00B188 */
+	u64	ce_ure_buf_flush22;			/* 0x00B190 */
+	u64	ce_ure_buf_flush23;			/* 0x00B198 */
+	u64	ce_ure_pcie_control1;			/* 0x00B1A0 */
+	u64	ce_ure_pcie_control2;			/* 0x00B1A8 */
+
+	u64	ce_pad_00B1B0[458];	    /* 0x00B1B0 -- 0x00BFF8 */
+
+	/* Upstream Data Buffer, Port1 */
+	struct ce_ure_maint_ups_dat1_data {
+		u64	data63_0[512];	    /* 0x00C000 -- 0x00CFF8 */
+		u64	data127_64[512];    /* 0x00D000 -- 0x00DFF8 */
+		u64	parity[512];	    /* 0x00E000 -- 0x00EFF8 */
+	} ce_ure_maint_ups_dat1;
+
+	/* Upstream Header Buffer, Port1 */
+	struct ce_ure_maint_ups_hdr1_data {
+		u64	data63_0[512];	    /* 0x00F000 -- 0x00FFF8 */
+		u64	data127_64[512];    /* 0x010000 -- 0x010FF8 */
+		u64	parity[512];	    /* 0x011000 -- 0x011FF8 */
+	} ce_ure_maint_ups_hdr1;
+
+	/* Upstream Data Buffer, Port2 */
+	struct ce_ure_maint_ups_dat2_data {
+		u64	data63_0[512];	    /* 0x012000 -- 0x012FF8 */
+		u64	data127_64[512];    /* 0x013000 -- 0x013FF8 */
+		u64	parity[512];	    /* 0x014000 -- 0x014FF8 */
+	} ce_ure_maint_ups_dat2;
+
+	/* Upstream Header Buffer, Port2 */
+	struct ce_ure_maint_ups_hdr2_data {
+		u64	data63_0[512];	    /* 0x015000 -- 0x015FF8 */
+		u64	data127_64[512];    /* 0x016000 -- 0x016FF8 */
+		u64	parity[512];	    /* 0x017000 -- 0x017FF8 */
+	} ce_ure_maint_ups_hdr2;
+
+	/* Downstream Data Buffer */
+	struct ce_ure_maint_dns_dat_data {
+		u64	data63_0[512];	    /* 0x018000 -- 0x018FF8 */
+		u64	data127_64[512];    /* 0x019000 -- 0x019FF8 */
+		u64	parity[512];	    /* 0x01A000 -- 0x01AFF8 */
+	} ce_ure_maint_dns_dat;
+
+	/* Downstream Header Buffer */
+	struct	ce_ure_maint_dns_hdr_data {
+		u64	data31_0[64];	    /* 0x01B000 -- 0x01B1F8 */
+		u64	data95_32[64];	    /* 0x01B200 -- 0x01B3F8 */
+		u64	parity[64];	    /* 0x01B400 -- 0x01B5F8 */
+	} ce_ure_maint_dns_hdr;
+
+	/* RCI Buffer Data */
+	struct	ce_ure_maint_rci_data {
+		u64	data41_0[64];	    /* 0x01B600 -- 0x01B7F8 */
+		u64	data69_42[64];	    /* 0x01B800 -- 0x01B9F8 */
+	} ce_ure_maint_rci;
+
+	/* Response Queue */
+	u64	ce_ure_maint_rspq[64];	    /* 0x01BA00 -- 0x01BBF8 */
+
+	u64	ce_pad_01C000[4224];	    /* 0x01BC00 -- 0x023FF8 */
+
+	/* Admin Build-a-Packet Buffer */
+	struct	ce_adm_maint_bap_buf_data {
+		u64	data63_0[258];	    /* 0x024000 -- 0x024808 */
+		u64	data127_64[258];    /* 0x024810 -- 0x025018 */
+		u64	parity[258];	    /* 0x025020 -- 0x025828 */
+	} ce_adm_maint_bap_buf;
+
+	u64	ce_pad_025830[5370];	    /* 0x025830 -- 0x02FFF8 */
+
+	/* URE: 40bit PMU ATE Buffer */		    /* 0x030000 -- 0x037FF8 */
+	u64	ce_ure_ate40[TIOCE_NUM_M40_ATES];
+
+	/* URE: 32/40bit PMU ATE Buffer */	    /* 0x038000 -- 0x03BFF8 */
+	u64	ce_ure_ate3240[TIOCE_NUM_M3240_ATES];
+
+	u64	ce_pad_03C000[2050];	    /* 0x03C000 -- 0x040008 */
+
+	/*
+	 * DRE: Down Stream Request Engine
+         */
+	u64	ce_dre_dyn_credit_status1;		/* 0x040010 */
+	u64	ce_dre_dyn_credit_status2;		/* 0x040018 */
+	u64	ce_dre_last_credit_status1;		/* 0x040020 */
+	u64	ce_dre_last_credit_status2;		/* 0x040028 */
+	u64	ce_dre_credit_limit1;			/* 0x040030 */
+	u64	ce_dre_credit_limit2;			/* 0x040038 */
+	u64	ce_dre_force_credit1;			/* 0x040040 */
+	u64	ce_dre_force_credit2;			/* 0x040048 */
+	u64	ce_dre_debug_mux1;			/* 0x040050 */
+	u64	ce_dre_debug_mux2;			/* 0x040058 */
+	u64	ce_dre_ssp_err_cmd_wrd;			/* 0x040060 */
+	u64	ce_dre_ssp_err_addr;			/* 0x040068 */
+	u64	ce_dre_comp_err_cmd_wrd;		/* 0x040070 */
+	u64	ce_dre_comp_err_addr;			/* 0x040078 */
+	u64	ce_dre_req_status;			/* 0x040080 */
+	u64	ce_dre_config1;				/* 0x040088 */
+	u64	ce_dre_config2;				/* 0x040090 */
+	u64	ce_dre_config_req_status;		/* 0x040098 */
+	u64	ce_pad_0400A0[12];	    /* 0x0400A0 -- 0x0400F8 */
+	u64	ce_dre_dyn_fifo;			/* 0x040100 */
+	u64	ce_pad_040108[3];	    /* 0x040108 -- 0x040118 */
+	u64	ce_dre_last_fifo;			/* 0x040120 */
+
+	u64	ce_pad_040128[27];	    /* 0x040128 -- 0x0401F8 */
+
+	/* DRE Downstream Head Queue */
+	struct	ce_dre_maint_ds_head_queue {
+		u64	data63_0[32];	    /* 0x040200 -- 0x0402F8 */
+		u64	data127_64[32];	    /* 0x040300 -- 0x0403F8 */
+		u64	parity[32];	    /* 0x040400 -- 0x0404F8 */
+	} ce_dre_maint_ds_head_q;
+
+	u64	ce_pad_040500[352];	    /* 0x040500 -- 0x040FF8 */
+
+	/* DRE Downstream Data Queue */
+	struct	ce_dre_maint_ds_data_queue {
+		u64	data63_0[256];	    /* 0x041000 -- 0x0417F8 */
+		u64	ce_pad_041800[256]; /* 0x041800 -- 0x041FF8 */
+		u64	data127_64[256];    /* 0x042000 -- 0x0427F8 */
+		u64	ce_pad_042800[256]; /* 0x042800 -- 0x042FF8 */
+		u64	parity[256];	    /* 0x043000 -- 0x0437F8 */
+		u64	ce_pad_043800[256]; /* 0x043800 -- 0x043FF8 */
+	} ce_dre_maint_ds_data_q;
+
+	/* DRE URE Upstream Response Queue */
+	struct	ce_dre_maint_ure_us_rsp_queue {
+		u64	data63_0[8];	    /* 0x044000 -- 0x044038 */
+		u64	ce_pad_044040[24];  /* 0x044040 -- 0x0440F8 */
+		u64	data127_64[8];      /* 0x044100 -- 0x044138 */
+		u64	ce_pad_044140[24];  /* 0x044140 -- 0x0441F8 */
+		u64	parity[8];	    /* 0x044200 -- 0x044238 */
+		u64	ce_pad_044240[24];  /* 0x044240 -- 0x0442F8 */
+	} ce_dre_maint_ure_us_rsp_q;
+
+	u64 	ce_dre_maint_us_wrt_rsp[32];/* 0x044300 -- 0x0443F8 */
+
+	u64	ce_end_of_struct;			/* 0x044400 */
+} tioce_t;
+
+/* ce_lsiX_gb_cfg1 register bit masks & shifts */
+#define CE_LSI_GB_CFG1_RXL0S_THS_SHFT	0
+#define CE_LSI_GB_CFG1_RXL0S_THS_MASK	(0xffULL << 0)
+#define CE_LSI_GB_CFG1_RXL0S_SMP_SHFT	8
+#define CE_LSI_GB_CFG1_RXL0S_SMP_MASK	(0xfULL << 8)
+#define CE_LSI_GB_CFG1_RXL0S_ADJ_SHFT	12
+#define CE_LSI_GB_CFG1_RXL0S_ADJ_MASK	(0x7ULL << 12)
+#define CE_LSI_GB_CFG1_RXL0S_FLT_SHFT	15
+#define CE_LSI_GB_CFG1_RXL0S_FLT_MASK	(0x1ULL << 15)
+#define CE_LSI_GB_CFG1_LPBK_SEL_SHFT	16
+#define CE_LSI_GB_CFG1_LPBK_SEL_MASK	(0x3ULL << 16)
+#define CE_LSI_GB_CFG1_LPBK_EN_SHFT	18
+#define CE_LSI_GB_CFG1_LPBK_EN_MASK	(0x1ULL << 18)
+#define CE_LSI_GB_CFG1_RVRS_LB_SHFT	19
+#define CE_LSI_GB_CFG1_RVRS_LB_MASK	(0x1ULL << 19)
+#define CE_LSI_GB_CFG1_RVRS_CLK_SHFT	20
+#define CE_LSI_GB_CFG1_RVRS_CLK_MASK	(0x3ULL << 20)
+#define CE_LSI_GB_CFG1_SLF_TS_SHFT	24
+#define CE_LSI_GB_CFG1_SLF_TS_MASK	(0xfULL << 24)
+
+/* ce_adm_int_mask/ce_adm_int_status register bit defines */
+#define CE_ADM_INT_CE_ERROR_SHFT		0
+#define CE_ADM_INT_LSI1_IP_ERROR_SHFT		1
+#define CE_ADM_INT_LSI2_IP_ERROR_SHFT		2
+#define CE_ADM_INT_PCIE_ERROR_SHFT		3
+#define CE_ADM_INT_PORT1_HOTPLUG_EVENT_SHFT	4
+#define CE_ADM_INT_PORT2_HOTPLUG_EVENT_SHFT	5
+#define CE_ADM_INT_PCIE_PORT1_DEV_A_SHFT	6
+#define CE_ADM_INT_PCIE_PORT1_DEV_B_SHFT	7
+#define CE_ADM_INT_PCIE_PORT1_DEV_C_SHFT	8
+#define CE_ADM_INT_PCIE_PORT1_DEV_D_SHFT	9
+#define CE_ADM_INT_PCIE_PORT2_DEV_A_SHFT	10
+#define CE_ADM_INT_PCIE_PORT2_DEV_B_SHFT	11
+#define CE_ADM_INT_PCIE_PORT2_DEV_C_SHFT	12
+#define CE_ADM_INT_PCIE_PORT2_DEV_D_SHFT	13
+#define CE_ADM_INT_PCIE_MSG_SHFT		14 /*see int_dest_14*/
+#define CE_ADM_INT_PCIE_MSG_SLOT_0_SHFT		14
+#define CE_ADM_INT_PCIE_MSG_SLOT_1_SHFT		15
+#define CE_ADM_INT_PCIE_MSG_SLOT_2_SHFT		16
+#define CE_ADM_INT_PCIE_MSG_SLOT_3_SHFT		17
+#define CE_ADM_INT_PORT1_PM_PME_MSG_SHFT	22
+#define CE_ADM_INT_PORT2_PM_PME_MSG_SHFT	23
+
+/* ce_adm_force_int register bit defines */
+#define CE_ADM_FORCE_INT_PCIE_PORT1_DEV_A_SHFT	0
+#define CE_ADM_FORCE_INT_PCIE_PORT1_DEV_B_SHFT	1
+#define CE_ADM_FORCE_INT_PCIE_PORT1_DEV_C_SHFT	2
+#define CE_ADM_FORCE_INT_PCIE_PORT1_DEV_D_SHFT	3
+#define CE_ADM_FORCE_INT_PCIE_PORT2_DEV_A_SHFT	4
+#define CE_ADM_FORCE_INT_PCIE_PORT2_DEV_B_SHFT	5
+#define CE_ADM_FORCE_INT_PCIE_PORT2_DEV_C_SHFT	6
+#define CE_ADM_FORCE_INT_PCIE_PORT2_DEV_D_SHFT	7
+#define CE_ADM_FORCE_INT_ALWAYS_SHFT		8
+
+/* ce_adm_int_dest register bit masks & shifts */
+#define INTR_VECTOR_SHFT			56
+
+/* ce_adm_error_mask and ce_adm_error_summary register bit masks */
+#define CE_ADM_ERR_CRM_SSP_REQ_INVALID			(0x1ULL <<  0)
+#define CE_ADM_ERR_SSP_REQ_HEADER			(0x1ULL <<  1)
+#define CE_ADM_ERR_SSP_RSP_HEADER			(0x1ULL <<  2)
+#define CE_ADM_ERR_SSP_PROTOCOL_ERROR			(0x1ULL <<  3)
+#define CE_ADM_ERR_SSP_SBE				(0x1ULL <<  4)
+#define CE_ADM_ERR_SSP_MBE				(0x1ULL <<  5)
+#define CE_ADM_ERR_CXM_CREDIT_OFLOW			(0x1ULL <<  6)
+#define CE_ADM_ERR_DRE_SSP_REQ_INVAL			(0x1ULL <<  7)
+#define CE_ADM_ERR_SSP_REQ_LONG				(0x1ULL <<  8)
+#define CE_ADM_ERR_SSP_REQ_OFLOW			(0x1ULL <<  9)
+#define CE_ADM_ERR_SSP_REQ_SHORT			(0x1ULL << 10)
+#define CE_ADM_ERR_SSP_REQ_SIDEBAND			(0x1ULL << 11)
+#define CE_ADM_ERR_SSP_REQ_ADDR_ERR			(0x1ULL << 12)
+#define CE_ADM_ERR_SSP_REQ_BAD_BE			(0x1ULL << 13)
+#define CE_ADM_ERR_PCIE_COMPL_TIMEOUT			(0x1ULL << 14)
+#define CE_ADM_ERR_PCIE_UNEXP_COMPL			(0x1ULL << 15)
+#define CE_ADM_ERR_PCIE_ERR_COMPL			(0x1ULL << 16)
+#define CE_ADM_ERR_DRE_CREDIT_OFLOW			(0x1ULL << 17)
+#define CE_ADM_ERR_DRE_SRAM_PE				(0x1ULL << 18)
+#define CE_ADM_ERR_SSP_RSP_INVALID			(0x1ULL << 19)
+#define CE_ADM_ERR_SSP_RSP_LONG				(0x1ULL << 20)
+#define CE_ADM_ERR_SSP_RSP_SHORT			(0x1ULL << 21)
+#define CE_ADM_ERR_SSP_RSP_SIDEBAND			(0x1ULL << 22)
+#define CE_ADM_ERR_URE_SSP_RSP_UNEXP			(0x1ULL << 23)
+#define CE_ADM_ERR_URE_SSP_WR_REQ_TIMEOUT		(0x1ULL << 24)
+#define CE_ADM_ERR_URE_SSP_RD_REQ_TIMEOUT		(0x1ULL << 25)
+#define CE_ADM_ERR_URE_ATE3240_PAGE_FAULT		(0x1ULL << 26)
+#define CE_ADM_ERR_URE_ATE40_PAGE_FAULT			(0x1ULL << 27)
+#define CE_ADM_ERR_URE_CREDIT_OFLOW			(0x1ULL << 28)
+#define CE_ADM_ERR_URE_SRAM_PE				(0x1ULL << 29)
+#define CE_ADM_ERR_ADM_SSP_RSP_UNEXP			(0x1ULL << 30)
+#define CE_ADM_ERR_ADM_SSP_REQ_TIMEOUT			(0x1ULL << 31)
+#define CE_ADM_ERR_MMR_ACCESS_ERROR			(0x1ULL << 32)
+#define CE_ADM_ERR_MMR_ADDR_ERROR			(0x1ULL << 33)
+#define CE_ADM_ERR_ADM_CREDIT_OFLOW			(0x1ULL << 34)
+#define CE_ADM_ERR_ADM_SRAM_PE				(0x1ULL << 35)
+#define CE_ADM_ERR_DTL1_MIN_PDATA_CREDIT_ERR		(0x1ULL << 36)
+#define CE_ADM_ERR_DTL1_INF_COMPL_CRED_UPDT_ERR		(0x1ULL << 37)
+#define CE_ADM_ERR_DTL1_INF_POSTED_CRED_UPDT_ERR	(0x1ULL << 38)
+#define CE_ADM_ERR_DTL1_INF_NPOSTED_CRED_UPDT_ERR	(0x1ULL << 39)
+#define CE_ADM_ERR_DTL1_COMP_HD_CRED_MAX_ERR		(0x1ULL << 40)
+#define CE_ADM_ERR_DTL1_COMP_D_CRED_MAX_ERR		(0x1ULL << 41)
+#define CE_ADM_ERR_DTL1_NPOSTED_HD_CRED_MAX_ERR		(0x1ULL << 42)
+#define CE_ADM_ERR_DTL1_NPOSTED_D_CRED_MAX_ERR		(0x1ULL << 43)
+#define CE_ADM_ERR_DTL1_POSTED_HD_CRED_MAX_ERR		(0x1ULL << 44)
+#define CE_ADM_ERR_DTL1_POSTED_D_CRED_MAX_ERR		(0x1ULL << 45)
+#define CE_ADM_ERR_DTL2_MIN_PDATA_CREDIT_ERR		(0x1ULL << 46)
+#define CE_ADM_ERR_DTL2_INF_COMPL_CRED_UPDT_ERR		(0x1ULL << 47)
+#define CE_ADM_ERR_DTL2_INF_POSTED_CRED_UPDT_ERR	(0x1ULL << 48)
+#define CE_ADM_ERR_DTL2_INF_NPOSTED_CRED_UPDT_ERR	(0x1ULL << 49)
+#define CE_ADM_ERR_DTL2_COMP_HD_CRED_MAX_ERR		(0x1ULL << 50)
+#define CE_ADM_ERR_DTL2_COMP_D_CRED_MAX_ERR		(0x1ULL << 51)
+#define CE_ADM_ERR_DTL2_NPOSTED_HD_CRED_MAX_ERR		(0x1ULL << 52)
+#define CE_ADM_ERR_DTL2_NPOSTED_D_CRED_MAX_ERR		(0x1ULL << 53)
+#define CE_ADM_ERR_DTL2_POSTED_HD_CRED_MAX_ERR		(0x1ULL << 54)
+#define CE_ADM_ERR_DTL2_POSTED_D_CRED_MAX_ERR		(0x1ULL << 55)
+#define CE_ADM_ERR_PORT1_PCIE_COR_ERR			(0x1ULL << 56)
+#define CE_ADM_ERR_PORT1_PCIE_NFAT_ERR			(0x1ULL << 57)
+#define CE_ADM_ERR_PORT1_PCIE_FAT_ERR			(0x1ULL << 58)
+#define CE_ADM_ERR_PORT2_PCIE_COR_ERR			(0x1ULL << 59)
+#define CE_ADM_ERR_PORT2_PCIE_NFAT_ERR			(0x1ULL << 60)
+#define CE_ADM_ERR_PORT2_PCIE_FAT_ERR			(0x1ULL << 61)
+
+/* ce_adm_ure_ups_buf_barrier_flush register bit masks and shifts */
+#define FLUSH_SEL_PORT1_PIPE0_SHFT	0
+#define FLUSH_SEL_PORT1_PIPE1_SHFT	4
+#define FLUSH_SEL_PORT1_PIPE2_SHFT	8
+#define FLUSH_SEL_PORT1_PIPE3_SHFT	12
+#define FLUSH_SEL_PORT2_PIPE0_SHFT	16
+#define FLUSH_SEL_PORT2_PIPE1_SHFT	20
+#define FLUSH_SEL_PORT2_PIPE2_SHFT	24
+#define FLUSH_SEL_PORT2_PIPE3_SHFT	28
+
+/* ce_dre_config1 register bit masks and shifts */
+#define CE_DRE_RO_ENABLE		(0x1ULL << 0)
+#define CE_DRE_DYN_RO_ENABLE		(0x1ULL << 1)
+#define CE_DRE_SUP_CONFIG_COMP_ERROR	(0x1ULL << 2)
+#define CE_DRE_SUP_IO_COMP_ERROR	(0x1ULL << 3)
+#define CE_DRE_ADDR_MODE_SHFT		4
+
+/* ce_dre_config_req_status register bit masks */
+#define CE_DRE_LAST_CONFIG_COMPLETION	(0x7ULL << 0)
+#define CE_DRE_DOWNSTREAM_CONFIG_ERROR	(0x1ULL << 3)
+#define CE_DRE_CONFIG_COMPLETION_VALID	(0x1ULL << 4)
+#define CE_DRE_CONFIG_REQUEST_ACTIVE	(0x1ULL << 5)
+
+/* ce_ure_control register bit masks & shifts */
+#define CE_URE_RD_MRG_ENABLE		(0x1ULL << 0)
+#define CE_URE_WRT_MRG_ENABLE1		(0x1ULL << 4)
+#define CE_URE_WRT_MRG_ENABLE2		(0x1ULL << 5)
+#define CE_URE_WRT_MRG_TIMER_SHFT	12
+#define CE_URE_WRT_MRG_TIMER_MASK	(0x7FFULL << CE_URE_WRT_MRG_TIMER_SHFT)
+#define CE_URE_WRT_MRG_TIMER(x)		(((u64)(x) << \
+					  CE_URE_WRT_MRG_TIMER_SHFT) & \
+					 CE_URE_WRT_MRG_TIMER_MASK)
+#define CE_URE_RSPQ_BYPASS_DISABLE	(0x1ULL << 24)
+#define CE_URE_UPS_DAT1_PAR_DISABLE	(0x1ULL << 32)
+#define CE_URE_UPS_HDR1_PAR_DISABLE	(0x1ULL << 33)
+#define CE_URE_UPS_DAT2_PAR_DISABLE	(0x1ULL << 34)
+#define CE_URE_UPS_HDR2_PAR_DISABLE	(0x1ULL << 35)
+#define CE_URE_ATE_PAR_DISABLE		(0x1ULL << 36)
+#define CE_URE_RCI_PAR_DISABLE		(0x1ULL << 37)
+#define CE_URE_RSPQ_PAR_DISABLE		(0x1ULL << 38)
+#define CE_URE_DNS_DAT_PAR_DISABLE	(0x1ULL << 39)
+#define CE_URE_DNS_HDR_PAR_DISABLE	(0x1ULL << 40)
+#define CE_URE_MALFORM_DISABLE		(0x1ULL << 44)
+#define CE_URE_UNSUP_DISABLE		(0x1ULL << 45)
+
+/* ce_ure_page_map register bit masks & shifts */
+#define CE_URE_ATE3240_ENABLE		(0x1ULL << 0)
+#define CE_URE_ATE40_ENABLE 		(0x1ULL << 1)
+#define CE_URE_PAGESIZE_SHFT		4
+#define CE_URE_PAGESIZE_MASK		(0x7ULL << CE_URE_PAGESIZE_SHFT)
+#define CE_URE_4K_PAGESIZE		(0x0ULL << CE_URE_PAGESIZE_SHFT)
+#define CE_URE_16K_PAGESIZE		(0x1ULL << CE_URE_PAGESIZE_SHFT)
+#define CE_URE_64K_PAGESIZE		(0x2ULL << CE_URE_PAGESIZE_SHFT)
+#define CE_URE_128K_PAGESIZE		(0x3ULL << CE_URE_PAGESIZE_SHFT)
+#define CE_URE_256K_PAGESIZE		(0x4ULL << CE_URE_PAGESIZE_SHFT)
+
+/* ce_ure_pipe_sel register bit masks & shifts */
+#define PKT_TRAFIC_SHRT			16
+#define BUS_SRC_ID_SHFT			8
+#define DEV_SRC_ID_SHFT			3
+#define FNC_SRC_ID_SHFT			0
+#define CE_URE_TC_MASK			(0x07ULL << PKT_TRAFIC_SHRT)
+#define CE_URE_BUS_MASK			(0xFFULL << BUS_SRC_ID_SHFT)
+#define CE_URE_DEV_MASK			(0x1FULL << DEV_SRC_ID_SHFT)
+#define CE_URE_FNC_MASK			(0x07ULL << FNC_SRC_ID_SHFT)
+#define CE_URE_PIPE_BUS(b)		(((u64)(b) << BUS_SRC_ID_SHFT) & \
+					 CE_URE_BUS_MASK)
+#define CE_URE_PIPE_DEV(d)		(((u64)(d) << DEV_SRC_ID_SHFT) & \
+					 CE_URE_DEV_MASK)
+#define CE_URE_PIPE_FNC(f)		(((u64)(f) << FNC_SRC_ID_SHFT) & \
+					 CE_URE_FNC_MASK)
+
+#define CE_URE_SEL1_SHFT		0
+#define CE_URE_SEL2_SHFT		20
+#define CE_URE_SEL3_SHFT		40
+#define CE_URE_SEL1_MASK		(0x7FFFFULL << CE_URE_SEL1_SHFT)
+#define CE_URE_SEL2_MASK		(0x7FFFFULL << CE_URE_SEL2_SHFT)
+#define CE_URE_SEL3_MASK		(0x7FFFFULL << CE_URE_SEL3_SHFT)
+
+
+/* ce_ure_pipe_mask register bit masks & shifts */
+#define CE_URE_MASK1_SHFT		0
+#define CE_URE_MASK2_SHFT		20
+#define CE_URE_MASK3_SHFT		40
+#define CE_URE_MASK1_MASK		(0x7FFFFULL << CE_URE_MASK1_SHFT)
+#define CE_URE_MASK2_MASK		(0x7FFFFULL << CE_URE_MASK2_SHFT)
+#define CE_URE_MASK3_MASK		(0x7FFFFULL << CE_URE_MASK3_SHFT)
+
+
+/* ce_ure_pcie_control1 register bit masks & shifts */
+#define CE_URE_SI			(0x1ULL << 0)
+#define CE_URE_ELAL_SHFT		4
+#define CE_URE_ELAL_MASK		(0x7ULL << CE_URE_ELAL_SHFT)
+#define CE_URE_ELAL_SET(n)		(((u64)(n) << CE_URE_ELAL_SHFT) & \
+					 CE_URE_ELAL_MASK)
+#define CE_URE_ELAL1_SHFT		8
+#define CE_URE_ELAL1_MASK		(0x7ULL << CE_URE_ELAL1_SHFT)
+#define CE_URE_ELAL1_SET(n)		(((u64)(n) << CE_URE_ELAL1_SHFT) & \
+					 CE_URE_ELAL1_MASK)
+#define CE_URE_SCC			(0x1ULL << 12)
+#define CE_URE_PN1_SHFT			16
+#define CE_URE_PN1_MASK			(0xFFULL << CE_URE_PN1_SHFT)
+#define CE_URE_PN2_SHFT			24
+#define CE_URE_PN2_MASK			(0xFFULL << CE_URE_PN2_SHFT)
+#define CE_URE_PN1_SET(n)		(((u64)(n) << CE_URE_PN1_SHFT) & \
+					 CE_URE_PN1_MASK)
+#define CE_URE_PN2_SET(n)		(((u64)(n) << CE_URE_PN2_SHFT) & \
+					 CE_URE_PN2_MASK)
+
+/* ce_ure_pcie_control2 register bit masks & shifts */
+#define CE_URE_ABP			(0x1ULL << 0)
+#define CE_URE_PCP			(0x1ULL << 1)
+#define CE_URE_MSP			(0x1ULL << 2)
+#define CE_URE_AIP			(0x1ULL << 3)
+#define CE_URE_PIP			(0x1ULL << 4)
+#define CE_URE_HPS			(0x1ULL << 5)
+#define CE_URE_HPC			(0x1ULL << 6)
+#define CE_URE_SPLV_SHFT		7
+#define CE_URE_SPLV_MASK		(0xFFULL << CE_URE_SPLV_SHFT)
+#define CE_URE_SPLV_SET(n)		(((u64)(n) << CE_URE_SPLV_SHFT) & \
+					 CE_URE_SPLV_MASK)
+#define CE_URE_SPLS_SHFT		15
+#define CE_URE_SPLS_MASK		(0x3ULL << CE_URE_SPLS_SHFT)
+#define CE_URE_SPLS_SET(n)		(((u64)(n) << CE_URE_SPLS_SHFT) & \
+					 CE_URE_SPLS_MASK)
+#define CE_URE_PSN1_SHFT		19
+#define CE_URE_PSN1_MASK		(0x1FFFULL << CE_URE_PSN1_SHFT)
+#define CE_URE_PSN2_SHFT		32
+#define CE_URE_PSN2_MASK		(0x1FFFULL << CE_URE_PSN2_SHFT)
+#define CE_URE_PSN1_SET(n)		(((u64)(n) << CE_URE_PSN1_SHFT) & \
+					 CE_URE_PSN1_MASK)
+#define CE_URE_PSN2_SET(n)		(((u64)(n) << CE_URE_PSN2_SHFT) & \
+					 CE_URE_PSN2_MASK)
+
+/*
+ * PIO address space ranges for CE
+ */
+
+/* Local CE Registers Space */
+#define CE_PIO_MMR			0x00000000
+#define CE_PIO_MMR_LEN			0x04000000
+
+/* PCI Compatible Config Space */
+#define CE_PIO_CONFIG_SPACE		0x04000000
+#define CE_PIO_CONFIG_SPACE_LEN		0x04000000
+
+/* PCI I/O Space Alias */
+#define CE_PIO_IO_SPACE_ALIAS		0x08000000
+#define CE_PIO_IO_SPACE_ALIAS_LEN	0x08000000
+
+/* PCI Enhanced Config Space */
+#define CE_PIO_E_CONFIG_SPACE		0x10000000
+#define CE_PIO_E_CONFIG_SPACE_LEN	0x10000000
+
+/* PCI I/O Space */
+#define CE_PIO_IO_SPACE			0x100000000
+#define CE_PIO_IO_SPACE_LEN		0x100000000
+
+/* PCI MEM Space */
+#define CE_PIO_MEM_SPACE		0x200000000
+#define CE_PIO_MEM_SPACE_LEN		TIO_HWIN_SIZE
+
+
+/*
+ * CE PCI Enhanced Config Space shifts & masks
+ */
+#define CE_E_CONFIG_BUS_SHFT		20
+#define CE_E_CONFIG_BUS_MASK		(0xFF << CE_E_CONFIG_BUS_SHFT)
+#define CE_E_CONFIG_DEVICE_SHFT		15
+#define CE_E_CONFIG_DEVICE_MASK		(0x1F << CE_E_CONFIG_DEVICE_SHFT)
+#define CE_E_CONFIG_FUNC_SHFT		12
+#define CE_E_CONFIG_FUNC_MASK		(0x7  << CE_E_CONFIG_FUNC_SHFT)
+
+#endif /* __ASM_IA64_SN_TIOCE_H__ */
diff --git a/arch/ia64/include/asm/sn/tioce_provider.h b/arch/ia64/include/asm/sn/tioce_provider.h
new file mode 100644
index 00000000000..32c32f30b09
--- /dev/null
+++ b/arch/ia64/include/asm/sn/tioce_provider.h
@@ -0,0 +1,63 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2003-2005 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#ifndef _ASM_IA64_SN_CE_PROVIDER_H
+#define _ASM_IA64_SN_CE_PROVIDER_H
+
+#include <asm/sn/pcibus_provider_defs.h>
+#include <asm/sn/tioce.h>
+
+/*
+ * Common TIOCE structure shared between the prom and kernel
+ *
+ * DO NOT CHANGE THIS STRUCT WITHOUT MAKING CORRESPONDING CHANGES TO THE
+ * PROM VERSION.
+ */
+struct tioce_common {
+	struct pcibus_bussoft	ce_pcibus;	/* common pciio header */
+
+	u32		ce_rev;
+	u64		ce_kernel_private;
+	u64		ce_prom_private;
+};
+
+struct tioce_kernel {
+	struct tioce_common	*ce_common;
+	spinlock_t		ce_lock;
+	struct list_head	ce_dmamap_list;
+
+	u64		ce_ate40_shadow[TIOCE_NUM_M40_ATES];
+	u64		ce_ate3240_shadow[TIOCE_NUM_M3240_ATES];
+	u32		ce_ate3240_pagesize;
+
+	u8			ce_port1_secondary;
+
+	/* per-port resources */
+	struct {
+		int 		dirmap_refcnt;
+		u64	dirmap_shadow;
+	} ce_port[TIOCE_NUM_PORTS];
+};
+
+struct tioce_dmamap {
+	struct list_head	ce_dmamap_list;	/* headed by tioce_kernel */
+	u32		refcnt;
+
+	u64		nbytes;		/* # bytes mapped */
+
+	u64		ct_start;	/* coretalk start address */
+	u64		pci_start;	/* bus start address */
+
+	u64		__iomem *ate_hw;/* hw ptr of first ate in map */
+	u64		*ate_shadow;	/* shadow ptr of firat ate */
+	u16		ate_count;	/* # ate's in the map */
+};
+
+extern int tioce_init_provider(void);
+
+#endif  /* __ASM_IA64_SN_CE_PROVIDER_H */
diff --git a/arch/ia64/include/asm/sn/tiocp.h b/arch/ia64/include/asm/sn/tiocp.h
new file mode 100644
index 00000000000..e8ad0bb5b6c
--- /dev/null
+++ b/arch/ia64/include/asm/sn/tiocp.h
@@ -0,0 +1,257 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2003-2005 Silicon Graphics, Inc. All rights reserved.
+ */
+#ifndef _ASM_IA64_SN_PCI_TIOCP_H
+#define _ASM_IA64_SN_PCI_TIOCP_H
+
+#define TIOCP_HOST_INTR_ADDR            0x003FFFFFFFFFFFFFUL
+#define TIOCP_PCI64_CMDTYPE_MEM         (0x1ull << 60)
+#define TIOCP_PCI64_CMDTYPE_MSI         (0x3ull << 60)
+
+
+/*****************************************************************************
+ *********************** TIOCP MMR structure mapping ***************************
+ *****************************************************************************/
+
+struct tiocp{
+
+    /* 0x000000-0x00FFFF -- Local Registers */
+
+    /* 0x000000-0x000057 -- (Legacy Widget Space) Configuration */
+    u64		cp_id;				/* 0x000000 */
+    u64		cp_stat;			/* 0x000008 */
+    u64		cp_err_upper;			/* 0x000010 */
+    u64		cp_err_lower;			/* 0x000018 */
+    #define cp_err cp_err_lower
+    u64		cp_control;			/* 0x000020 */
+    u64		cp_req_timeout;			/* 0x000028 */
+    u64		cp_intr_upper;			/* 0x000030 */
+    u64		cp_intr_lower;			/* 0x000038 */
+    #define cp_intr cp_intr_lower
+    u64		cp_err_cmdword;			/* 0x000040 */
+    u64		_pad_000048;			/* 0x000048 */
+    u64		cp_tflush;			/* 0x000050 */
+
+    /* 0x000058-0x00007F -- Bridge-specific Configuration */
+    u64		cp_aux_err;			/* 0x000058 */
+    u64		cp_resp_upper;			/* 0x000060 */
+    u64		cp_resp_lower;			/* 0x000068 */
+    #define cp_resp cp_resp_lower
+    u64		cp_tst_pin_ctrl;		/* 0x000070 */
+    u64		cp_addr_lkerr;			/* 0x000078 */
+
+    /* 0x000080-0x00008F -- PMU & MAP */
+    u64		cp_dir_map;			/* 0x000080 */
+    u64		_pad_000088;			/* 0x000088 */
+
+    /* 0x000090-0x00009F -- SSRAM */
+    u64		cp_map_fault;			/* 0x000090 */
+    u64		_pad_000098;			/* 0x000098 */
+
+    /* 0x0000A0-0x0000AF -- Arbitration */
+    u64		cp_arb;				/* 0x0000A0 */
+    u64		_pad_0000A8;			/* 0x0000A8 */
+
+    /* 0x0000B0-0x0000BF -- Number In A Can or ATE Parity Error */
+    u64		cp_ate_parity_err;		/* 0x0000B0 */
+    u64		_pad_0000B8;			/* 0x0000B8 */
+
+    /* 0x0000C0-0x0000FF -- PCI/GIO */
+    u64		cp_bus_timeout;			/* 0x0000C0 */
+    u64		cp_pci_cfg;			/* 0x0000C8 */
+    u64		cp_pci_err_upper;		/* 0x0000D0 */
+    u64		cp_pci_err_lower;		/* 0x0000D8 */
+    #define cp_pci_err cp_pci_err_lower
+    u64		_pad_0000E0[4];			/* 0x0000{E0..F8} */
+
+    /* 0x000100-0x0001FF -- Interrupt */
+    u64		cp_int_status;			/* 0x000100 */
+    u64		cp_int_enable;			/* 0x000108 */
+    u64		cp_int_rst_stat;		/* 0x000110 */
+    u64		cp_int_mode;			/* 0x000118 */
+    u64		cp_int_device;			/* 0x000120 */
+    u64		cp_int_host_err;		/* 0x000128 */
+    u64		cp_int_addr[8];			/* 0x0001{30,,,68} */
+    u64		cp_err_int_view;		/* 0x000170 */
+    u64		cp_mult_int;			/* 0x000178 */
+    u64		cp_force_always[8];		/* 0x0001{80,,,B8} */
+    u64		cp_force_pin[8];		/* 0x0001{C0,,,F8} */
+
+    /* 0x000200-0x000298 -- Device */
+    u64		cp_device[4];			/* 0x0002{00,,,18} */
+    u64		_pad_000220[4];			/* 0x0002{20,,,38} */
+    u64		cp_wr_req_buf[4];		/* 0x0002{40,,,58} */
+    u64		_pad_000260[4];			/* 0x0002{60,,,78} */
+    u64		cp_rrb_map[2];			/* 0x0002{80,,,88} */
+    #define cp_even_resp cp_rrb_map[0]			/* 0x000280 */
+    #define cp_odd_resp  cp_rrb_map[1]			/* 0x000288 */
+    u64		cp_resp_status;			/* 0x000290 */
+    u64		cp_resp_clear;			/* 0x000298 */
+
+    u64		_pad_0002A0[12];		/* 0x0002{A0..F8} */
+
+    /* 0x000300-0x0003F8 -- Buffer Address Match Registers */
+    struct {
+	u64	upper;				/* 0x0003{00,,,F0} */
+	u64	lower;				/* 0x0003{08,,,F8} */
+    } cp_buf_addr_match[16];
+
+    /* 0x000400-0x0005FF -- Performance Monitor Registers (even only) */
+    struct {
+	u64	flush_w_touch;			/* 0x000{400,,,5C0} */
+	u64	flush_wo_touch;			/* 0x000{408,,,5C8} */
+	u64	inflight;			/* 0x000{410,,,5D0} */
+	u64	prefetch;			/* 0x000{418,,,5D8} */
+	u64	total_pci_retry;		/* 0x000{420,,,5E0} */
+	u64	max_pci_retry;			/* 0x000{428,,,5E8} */
+	u64	max_latency;			/* 0x000{430,,,5F0} */
+	u64	clear_all;			/* 0x000{438,,,5F8} */
+    } cp_buf_count[8];
+
+
+    /* 0x000600-0x0009FF -- PCI/X registers */
+    u64		cp_pcix_bus_err_addr;		/* 0x000600 */
+    u64		cp_pcix_bus_err_attr;		/* 0x000608 */
+    u64		cp_pcix_bus_err_data;		/* 0x000610 */
+    u64		cp_pcix_pio_split_addr;		/* 0x000618 */
+    u64		cp_pcix_pio_split_attr;		/* 0x000620 */
+    u64		cp_pcix_dma_req_err_attr;	/* 0x000628 */
+    u64		cp_pcix_dma_req_err_addr;	/* 0x000630 */
+    u64		cp_pcix_timeout;		/* 0x000638 */
+
+    u64		_pad_000640[24];		/* 0x000{640,,,6F8} */
+
+    /* 0x000700-0x000737 -- Debug Registers */
+    u64		cp_ct_debug_ctl;		/* 0x000700 */
+    u64		cp_br_debug_ctl;		/* 0x000708 */
+    u64		cp_mux3_debug_ctl;		/* 0x000710 */
+    u64		cp_mux4_debug_ctl;		/* 0x000718 */
+    u64		cp_mux5_debug_ctl;		/* 0x000720 */
+    u64		cp_mux6_debug_ctl;		/* 0x000728 */
+    u64		cp_mux7_debug_ctl;		/* 0x000730 */
+
+    u64		_pad_000738[89];		/* 0x000{738,,,9F8} */
+
+    /* 0x000A00-0x000BFF -- PCI/X Read&Write Buffer */
+    struct {
+	u64	cp_buf_addr;			/* 0x000{A00,,,AF0} */
+	u64	cp_buf_attr;			/* 0X000{A08,,,AF8} */
+    } cp_pcix_read_buf_64[16];
+
+    struct {
+	u64	cp_buf_addr;			/* 0x000{B00,,,BE0} */
+	u64	cp_buf_attr;			/* 0x000{B08,,,BE8} */
+	u64	cp_buf_valid;			/* 0x000{B10,,,BF0} */
+	u64	__pad1;				/* 0x000{B18,,,BF8} */
+    } cp_pcix_write_buf_64[8];
+
+    /* End of Local Registers -- Start of Address Map space */
+
+    char	_pad_000c00[0x010000 - 0x000c00];
+
+    /* 0x010000-0x011FF8 -- Internal ATE RAM (Auto Parity Generation) */
+    u64		cp_int_ate_ram[1024];		/* 0x010000-0x011FF8 */
+
+    char	_pad_012000[0x14000 - 0x012000];
+
+    /* 0x014000-0x015FF8 -- Internal ATE RAM (Manual Parity Generation) */
+    u64		cp_int_ate_ram_mp[1024];	/* 0x014000-0x015FF8 */
+
+    char	_pad_016000[0x18000 - 0x016000];
+
+    /* 0x18000-0x197F8 -- TIOCP Write Request Ram */
+    u64		cp_wr_req_lower[256];		/* 0x18000 - 0x187F8 */
+    u64		cp_wr_req_upper[256];		/* 0x18800 - 0x18FF8 */
+    u64		cp_wr_req_parity[256];		/* 0x19000 - 0x197F8 */
+
+    char	_pad_019800[0x1C000 - 0x019800];
+
+    /* 0x1C000-0x1EFF8 -- TIOCP Read Response Ram */
+    u64		cp_rd_resp_lower[512];		/* 0x1C000 - 0x1CFF8 */
+    u64		cp_rd_resp_upper[512];		/* 0x1D000 - 0x1DFF8 */
+    u64		cp_rd_resp_parity[512];		/* 0x1E000 - 0x1EFF8 */
+
+    char	_pad_01F000[0x20000 - 0x01F000];
+
+    /* 0x020000-0x021FFF -- Host Device (CP) Configuration Space (not used)  */
+    char	_pad_020000[0x021000 - 0x20000];
+
+    /* 0x021000-0x027FFF -- PCI Device Configuration Spaces */
+    union {
+	u8	c[0x1000 / 1];			/* 0x02{0000,,,7FFF} */
+	u16	s[0x1000 / 2];			/* 0x02{0000,,,7FFF} */
+	u32	l[0x1000 / 4];			/* 0x02{0000,,,7FFF} */
+	u64	d[0x1000 / 8];			/* 0x02{0000,,,7FFF} */
+	union {
+	    u8	c[0x100 / 1];
+	    u16	s[0x100 / 2];
+	    u32	l[0x100 / 4];
+	    u64	d[0x100 / 8];
+	} f[8];
+    } cp_type0_cfg_dev[7];				/* 0x02{1000,,,7FFF} */
+
+    /* 0x028000-0x028FFF -- PCI Type 1 Configuration Space */
+    union {
+	u8	c[0x1000 / 1];			/* 0x028000-0x029000 */
+	u16	s[0x1000 / 2];			/* 0x028000-0x029000 */
+	u32	l[0x1000 / 4];			/* 0x028000-0x029000 */
+	u64	d[0x1000 / 8];			/* 0x028000-0x029000 */
+	union {
+	    u8	c[0x100 / 1];
+	    u16	s[0x100 / 2];
+	    u32	l[0x100 / 4];
+	    u64	d[0x100 / 8];
+	} f[8];
+    } cp_type1_cfg;					/* 0x028000-0x029000 */
+
+    char		_pad_029000[0x030000-0x029000];
+
+    /* 0x030000-0x030007 -- PCI Interrupt Acknowledge Cycle */
+    union {
+	u8	c[8 / 1];
+	u16	s[8 / 2];
+	u32	l[8 / 4];
+	u64	d[8 / 8];
+    } cp_pci_iack;					/* 0x030000-0x030007 */
+
+    char		_pad_030007[0x040000-0x030008];
+
+    /* 0x040000-0x040007 -- PCIX Special Cycle */
+    union {
+	u8	c[8 / 1];
+	u16	s[8 / 2];
+	u32	l[8 / 4];
+	u64	d[8 / 8];
+    } cp_pcix_cycle;					/* 0x040000-0x040007 */
+
+    char		_pad_040007[0x200000-0x040008];
+
+    /* 0x200000-0x7FFFFF -- PCI/GIO Device Spaces */
+    union {
+	u8	c[0x100000 / 1];
+	u16	s[0x100000 / 2];
+	u32	l[0x100000 / 4];
+	u64	d[0x100000 / 8];
+    } cp_devio_raw[6];					/* 0x200000-0x7FFFFF */
+
+    #define cp_devio(n)  cp_devio_raw[((n)<2)?(n*2):(n+2)]
+
+    char		_pad_800000[0xA00000-0x800000];
+
+    /* 0xA00000-0xBFFFFF -- PCI/GIO Device Spaces w/flush  */
+    union {
+	u8	c[0x100000 / 1];
+	u16	s[0x100000 / 2];
+	u32	l[0x100000 / 4];
+	u64	d[0x100000 / 8];
+    } cp_devio_raw_flush[6];				/* 0xA00000-0xBFFFFF */
+
+    #define cp_devio_flush(n)  cp_devio_raw_flush[((n)<2)?(n*2):(n+2)]
+
+};
+
+#endif 	/* _ASM_IA64_SN_PCI_TIOCP_H */
diff --git a/arch/ia64/include/asm/sn/tiocx.h b/arch/ia64/include/asm/sn/tiocx.h
new file mode 100644
index 00000000000..d29728492f3
--- /dev/null
+++ b/arch/ia64/include/asm/sn/tiocx.h
@@ -0,0 +1,72 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2005 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#ifndef _ASM_IA64_SN_TIO_TIOCX_H
+#define _ASM_IA64_SN_TIO_TIOCX_H
+
+#ifdef __KERNEL__
+
+struct cx_id_s {
+	unsigned int part_num;
+	unsigned int mfg_num;
+	int nasid;
+};
+
+struct cx_dev {
+	struct cx_id_s cx_id;
+	int bt;				/* board/blade type */
+	void *soft;			/* driver specific */
+	struct hubdev_info *hubdev;
+	struct device dev;
+	struct cx_drv *driver;
+};
+
+struct cx_device_id {
+	unsigned int part_num;
+	unsigned int mfg_num;
+};
+
+struct cx_drv {
+	char *name;
+	const struct cx_device_id *id_table;
+	struct device_driver driver;
+	int (*probe) (struct cx_dev * dev, const struct cx_device_id * id);
+	int (*remove) (struct cx_dev * dev);
+};
+
+/* create DMA address by stripping AS bits */
+#define TIOCX_DMA_ADDR(a) (u64)((u64)(a) & 0xffffcfffffffffUL)
+
+#define TIOCX_TO_TIOCX_DMA_ADDR(a) (u64)(((u64)(a) & 0xfffffffff) |  \
+                                  ((((u64)(a)) & 0xffffc000000000UL) <<2))
+
+#define TIO_CE_ASIC_PARTNUM 0xce00
+#define TIOCX_CORELET 3
+
+/* These are taken from tio_mmr_as.h */
+#define TIO_ICE_FRZ_CFG               TIO_MMR_ADDR_MOD(0x00000000b0008100UL)
+#define TIO_ICE_PMI_TX_CFG            TIO_MMR_ADDR_MOD(0x00000000b000b100UL)
+#define TIO_ICE_PMI_TX_DYN_CREDIT_STAT_CB3 TIO_MMR_ADDR_MOD(0x00000000b000be18UL)
+#define TIO_ICE_PMI_TX_DYN_CREDIT_STAT_CB3_CREDIT_CNT_MASK 0x000000000000000fUL
+
+#define to_cx_dev(n) container_of(n, struct cx_dev, dev)
+#define to_cx_driver(drv) container_of(drv, struct cx_drv, driver)
+
+extern struct sn_irq_info *tiocx_irq_alloc(nasid_t, int, int, nasid_t, int);
+extern void tiocx_irq_free(struct sn_irq_info *);
+extern int cx_device_unregister(struct cx_dev *);
+extern int cx_device_register(nasid_t, int, int, struct hubdev_info *, int);
+extern int cx_driver_unregister(struct cx_drv *);
+extern int cx_driver_register(struct cx_drv *);
+extern u64 tiocx_dma_addr(u64 addr);
+extern u64 tiocx_swin_base(int nasid);
+extern void tiocx_mmr_store(int nasid, u64 offset, u64 value);
+extern u64 tiocx_mmr_load(int nasid, u64 offset);
+
+#endif				//  __KERNEL__
+#endif				// _ASM_IA64_SN_TIO_TIOCX__
diff --git a/arch/ia64/include/asm/sn/types.h b/arch/ia64/include/asm/sn/types.h
new file mode 100644
index 00000000000..8e04ee211e5
--- /dev/null
+++ b/arch/ia64/include/asm/sn/types.h
@@ -0,0 +1,26 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1999,2001-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (C) 1999 by Ralf Baechle
+ */
+#ifndef _ASM_IA64_SN_TYPES_H
+#define _ASM_IA64_SN_TYPES_H
+
+#include <linux/types.h>
+
+typedef unsigned long 	cpuid_t;
+typedef signed short	nasid_t;	/* node id in numa-as-id space */
+typedef signed char	partid_t;	/* partition ID type */
+typedef unsigned int    moduleid_t;     /* user-visible module number type */
+typedef unsigned int    cmoduleid_t;    /* kernel compact module id type */
+typedef unsigned char	slotid_t;	/* slot (blade) within module */
+typedef unsigned char	slabid_t;	/* slab (asic) within slot */
+typedef u64 nic_t;
+typedef unsigned long iopaddr_t;
+typedef unsigned long paddr_t;
+typedef short cnodeid_t;
+
+#endif /* _ASM_IA64_SN_TYPES_H */
diff --git a/arch/ia64/include/asm/sparsemem.h b/arch/ia64/include/asm/sparsemem.h
new file mode 100644
index 00000000000..67a7c40ec27
--- /dev/null
+++ b/arch/ia64/include/asm/sparsemem.h
@@ -0,0 +1,20 @@
+#ifndef _ASM_IA64_SPARSEMEM_H
+#define _ASM_IA64_SPARSEMEM_H
+
+#ifdef CONFIG_SPARSEMEM
+/*
+ * SECTION_SIZE_BITS            2^N: how big each section will be
+ * MAX_PHYSMEM_BITS             2^N: how much memory we can have in that space
+ */
+
+#define SECTION_SIZE_BITS	(30)
+#define MAX_PHYSMEM_BITS	(50)
+#ifdef CONFIG_FORCE_MAX_ZONEORDER
+#if ((CONFIG_FORCE_MAX_ZONEORDER - 1 + PAGE_SHIFT) > SECTION_SIZE_BITS)
+#undef SECTION_SIZE_BITS
+#define SECTION_SIZE_BITS (CONFIG_FORCE_MAX_ZONEORDER - 1 + PAGE_SHIFT)
+#endif
+#endif
+
+#endif /* CONFIG_SPARSEMEM */
+#endif /* _ASM_IA64_SPARSEMEM_H */
diff --git a/arch/ia64/include/asm/spinlock.h b/arch/ia64/include/asm/spinlock.h
new file mode 100644
index 00000000000..45698cd15b7
--- /dev/null
+++ b/arch/ia64/include/asm/spinlock.h
@@ -0,0 +1,296 @@
+#ifndef _ASM_IA64_SPINLOCK_H
+#define _ASM_IA64_SPINLOCK_H
+
+/*
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ *
+ * This file is used for SMP configurations only.
+ */
+
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/bitops.h>
+
+#include <linux/atomic.h>
+#include <asm/intrinsics.h>
+
+#define arch_spin_lock_init(x)			((x)->lock = 0)
+
+/*
+ * Ticket locks are conceptually two parts, one indicating the current head of
+ * the queue, and the other indicating the current tail. The lock is acquired
+ * by atomically noting the tail and incrementing it by one (thus adding
+ * ourself to the queue and noting our position), then waiting until the head
+ * becomes equal to the the initial value of the tail.
+ * The pad bits in the middle are used to prevent the next_ticket number
+ * overflowing into the now_serving number.
+ *
+ *   31             17  16    15  14                    0
+ *  +----------------------------------------------------+
+ *  |  now_serving     | padding |   next_ticket         |
+ *  +----------------------------------------------------+
+ */
+
+#define TICKET_SHIFT	17
+#define TICKET_BITS	15
+#define	TICKET_MASK	((1 << TICKET_BITS) - 1)
+
+static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock)
+{
+	int	*p = (int *)&lock->lock, ticket, serve;
+
+	ticket = ia64_fetchadd(1, p, acq);
+
+	if (!(((ticket >> TICKET_SHIFT) ^ ticket) & TICKET_MASK))
+		return;
+
+	ia64_invala();
+
+	for (;;) {
+		asm volatile ("ld4.c.nc %0=[%1]" : "=r"(serve) : "r"(p) : "memory");
+
+		if (!(((serve >> TICKET_SHIFT) ^ ticket) & TICKET_MASK))
+			return;
+		cpu_relax();
+	}
+}
+
+static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
+{
+	int tmp = ACCESS_ONCE(lock->lock);
+
+	if (!(((tmp >> TICKET_SHIFT) ^ tmp) & TICKET_MASK))
+		return ia64_cmpxchg(acq, &lock->lock, tmp, tmp + 1, sizeof (tmp)) == tmp;
+	return 0;
+}
+
+static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
+{
+	unsigned short	*p = (unsigned short *)&lock->lock + 1, tmp;
+
+	asm volatile ("ld2.bias %0=[%1]" : "=r"(tmp) : "r"(p));
+	ACCESS_ONCE(*p) = (tmp + 2) & ~1;
+}
+
+static __always_inline void __ticket_spin_unlock_wait(arch_spinlock_t *lock)
+{
+	int	*p = (int *)&lock->lock, ticket;
+
+	ia64_invala();
+
+	for (;;) {
+		asm volatile ("ld4.c.nc %0=[%1]" : "=r"(ticket) : "r"(p) : "memory");
+		if (!(((ticket >> TICKET_SHIFT) ^ ticket) & TICKET_MASK))
+			return;
+		cpu_relax();
+	}
+}
+
+static inline int __ticket_spin_is_locked(arch_spinlock_t *lock)
+{
+	long tmp = ACCESS_ONCE(lock->lock);
+
+	return !!(((tmp >> TICKET_SHIFT) ^ tmp) & TICKET_MASK);
+}
+
+static inline int __ticket_spin_is_contended(arch_spinlock_t *lock)
+{
+	long tmp = ACCESS_ONCE(lock->lock);
+
+	return ((tmp - (tmp >> TICKET_SHIFT)) & TICKET_MASK) > 1;
+}
+
+static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
+{
+	return !(((lock.lock >> TICKET_SHIFT) ^ lock.lock) & TICKET_MASK);
+}
+
+static inline int arch_spin_is_locked(arch_spinlock_t *lock)
+{
+	return __ticket_spin_is_locked(lock);
+}
+
+static inline int arch_spin_is_contended(arch_spinlock_t *lock)
+{
+	return __ticket_spin_is_contended(lock);
+}
+#define arch_spin_is_contended	arch_spin_is_contended
+
+static __always_inline void arch_spin_lock(arch_spinlock_t *lock)
+{
+	__ticket_spin_lock(lock);
+}
+
+static __always_inline int arch_spin_trylock(arch_spinlock_t *lock)
+{
+	return __ticket_spin_trylock(lock);
+}
+
+static __always_inline void arch_spin_unlock(arch_spinlock_t *lock)
+{
+	__ticket_spin_unlock(lock);
+}
+
+static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock,
+						  unsigned long flags)
+{
+	arch_spin_lock(lock);
+}
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+	__ticket_spin_unlock_wait(lock);
+}
+
+#define arch_read_can_lock(rw)		(*(volatile int *)(rw) >= 0)
+#define arch_write_can_lock(rw)	(*(volatile int *)(rw) == 0)
+
+#ifdef ASM_SUPPORTED
+
+static __always_inline void
+arch_read_lock_flags(arch_rwlock_t *lock, unsigned long flags)
+{
+	__asm__ __volatile__ (
+		"tbit.nz p6, p0 = %1,%2\n"
+		"br.few 3f\n"
+		"1:\n"
+		"fetchadd4.rel r2 = [%0], -1;;\n"
+		"(p6) ssm psr.i\n"
+		"2:\n"
+		"hint @pause\n"
+		"ld4 r2 = [%0];;\n"
+		"cmp4.lt p7,p0 = r2, r0\n"
+		"(p7) br.cond.spnt.few 2b\n"
+		"(p6) rsm psr.i\n"
+		";;\n"
+		"3:\n"
+		"fetchadd4.acq r2 = [%0], 1;;\n"
+		"cmp4.lt p7,p0 = r2, r0\n"
+		"(p7) br.cond.spnt.few 1b\n"
+		: : "r"(lock), "r"(flags), "i"(IA64_PSR_I_BIT)
+		: "p6", "p7", "r2", "memory");
+}
+
+#define arch_read_lock(lock) arch_read_lock_flags(lock, 0)
+
+#else /* !ASM_SUPPORTED */
+
+#define arch_read_lock_flags(rw, flags) arch_read_lock(rw)
+
+#define arch_read_lock(rw)								\
+do {											\
+	arch_rwlock_t *__read_lock_ptr = (rw);						\
+											\
+	while (unlikely(ia64_fetchadd(1, (int *) __read_lock_ptr, acq) < 0)) {		\
+		ia64_fetchadd(-1, (int *) __read_lock_ptr, rel);			\
+		while (*(volatile int *)__read_lock_ptr < 0)				\
+			cpu_relax();							\
+	}										\
+} while (0)
+
+#endif /* !ASM_SUPPORTED */
+
+#define arch_read_unlock(rw)					\
+do {								\
+	arch_rwlock_t *__read_lock_ptr = (rw);			\
+	ia64_fetchadd(-1, (int *) __read_lock_ptr, rel);	\
+} while (0)
+
+#ifdef ASM_SUPPORTED
+
+static __always_inline void
+arch_write_lock_flags(arch_rwlock_t *lock, unsigned long flags)
+{
+	__asm__ __volatile__ (
+		"tbit.nz p6, p0 = %1, %2\n"
+		"mov ar.ccv = r0\n"
+		"dep r29 = -1, r0, 31, 1\n"
+		"br.few 3f;;\n"
+		"1:\n"
+		"(p6) ssm psr.i\n"
+		"2:\n"
+		"hint @pause\n"
+		"ld4 r2 = [%0];;\n"
+		"cmp4.eq p0,p7 = r0, r2\n"
+		"(p7) br.cond.spnt.few 2b\n"
+		"(p6) rsm psr.i\n"
+		";;\n"
+		"3:\n"
+		"cmpxchg4.acq r2 = [%0], r29, ar.ccv;;\n"
+		"cmp4.eq p0,p7 = r0, r2\n"
+		"(p7) br.cond.spnt.few 1b;;\n"
+		: : "r"(lock), "r"(flags), "i"(IA64_PSR_I_BIT)
+		: "ar.ccv", "p6", "p7", "r2", "r29", "memory");
+}
+
+#define arch_write_lock(rw) arch_write_lock_flags(rw, 0)
+
+#define arch_write_trylock(rw)							\
+({										\
+	register long result;							\
+										\
+	__asm__ __volatile__ (							\
+		"mov ar.ccv = r0\n"						\
+		"dep r29 = -1, r0, 31, 1;;\n"					\
+		"cmpxchg4.acq %0 = [%1], r29, ar.ccv\n"				\
+		: "=r"(result) : "r"(rw) : "ar.ccv", "r29", "memory");		\
+	(result == 0);								\
+})
+
+static inline void arch_write_unlock(arch_rwlock_t *x)
+{
+	u8 *y = (u8 *)x;
+	barrier();
+	asm volatile ("st1.rel.nta [%0] = r0\n\t" :: "r"(y+3) : "memory" );
+}
+
+#else /* !ASM_SUPPORTED */
+
+#define arch_write_lock_flags(l, flags) arch_write_lock(l)
+
+#define arch_write_lock(l)								\
+({											\
+	__u64 ia64_val, ia64_set_val = ia64_dep_mi(-1, 0, 31, 1);			\
+	__u32 *ia64_write_lock_ptr = (__u32 *) (l);					\
+	do {										\
+		while (*ia64_write_lock_ptr)						\
+			ia64_barrier();							\
+		ia64_val = ia64_cmpxchg4_acq(ia64_write_lock_ptr, ia64_set_val, 0);	\
+	} while (ia64_val);								\
+})
+
+#define arch_write_trylock(rw)						\
+({									\
+	__u64 ia64_val;							\
+	__u64 ia64_set_val = ia64_dep_mi(-1, 0, 31,1);			\
+	ia64_val = ia64_cmpxchg4_acq((__u32 *)(rw), ia64_set_val, 0);	\
+	(ia64_val == 0);						\
+})
+
+static inline void arch_write_unlock(arch_rwlock_t *x)
+{
+	barrier();
+	x->write_lock = 0;
+}
+
+#endif /* !ASM_SUPPORTED */
+
+static inline int arch_read_trylock(arch_rwlock_t *x)
+{
+	union {
+		arch_rwlock_t lock;
+		__u32 word;
+	} old, new;
+	old.lock = new.lock = *x;
+	old.lock.write_lock = new.lock.write_lock = 0;
+	++new.lock.read_counter;
+	return (u32)ia64_cmpxchg4_acq((__u32 *)(x), new.word, old.word) == old.word;
+}
+
+#define arch_spin_relax(lock)	cpu_relax()
+#define arch_read_relax(lock)	cpu_relax()
+#define arch_write_relax(lock)	cpu_relax()
+
+#endif /*  _ASM_IA64_SPINLOCK_H */
diff --git a/arch/ia64/include/asm/spinlock_types.h b/arch/ia64/include/asm/spinlock_types.h
new file mode 100644
index 00000000000..e2b42a52a6d
--- /dev/null
+++ b/arch/ia64/include/asm/spinlock_types.h
@@ -0,0 +1,21 @@
+#ifndef _ASM_IA64_SPINLOCK_TYPES_H
+#define _ASM_IA64_SPINLOCK_TYPES_H
+
+#ifndef __LINUX_SPINLOCK_TYPES_H
+# error "please don't include this file directly"
+#endif
+
+typedef struct {
+	volatile unsigned int lock;
+} arch_spinlock_t;
+
+#define __ARCH_SPIN_LOCK_UNLOCKED	{ 0 }
+
+typedef struct {
+	volatile unsigned int read_counter	: 31;
+	volatile unsigned int write_lock	:  1;
+} arch_rwlock_t;
+
+#define __ARCH_RW_LOCK_UNLOCKED		{ 0, 0 }
+
+#endif
diff --git a/arch/ia64/include/asm/string.h b/arch/ia64/include/asm/string.h
new file mode 100644
index 00000000000..85fd65c52a8
--- /dev/null
+++ b/arch/ia64/include/asm/string.h
@@ -0,0 +1,21 @@
+#ifndef _ASM_IA64_STRING_H
+#define _ASM_IA64_STRING_H
+
+/*
+ * Here is where we want to put optimized versions of the string
+ * routines.
+ *
+ * Copyright (C) 1998-2000, 2002 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+
+#define __HAVE_ARCH_STRLEN	1 /* see arch/ia64/lib/strlen.S */
+#define __HAVE_ARCH_MEMSET	1 /* see arch/ia64/lib/memset.S */
+#define __HAVE_ARCH_MEMCPY	1 /* see arch/ia64/lib/memcpy.S */
+
+extern __kernel_size_t strlen (const char *);
+extern void *memcpy (void *, const void *, __kernel_size_t);
+extern void *memset (void *, int, __kernel_size_t);
+
+#endif /* _ASM_IA64_STRING_H */
diff --git a/arch/ia64/include/asm/swiotlb.h b/arch/ia64/include/asm/swiotlb.h
new file mode 100644
index 00000000000..f0acde68aae
--- /dev/null
+++ b/arch/ia64/include/asm/swiotlb.h
@@ -0,0 +1,17 @@
+#ifndef ASM_IA64__SWIOTLB_H
+#define ASM_IA64__SWIOTLB_H
+
+#include <linux/dma-mapping.h>
+#include <linux/swiotlb.h>
+
+#ifdef CONFIG_SWIOTLB
+extern int swiotlb;
+extern void pci_swiotlb_init(void);
+#else
+#define swiotlb 0
+static inline void pci_swiotlb_init(void)
+{
+}
+#endif
+
+#endif /* ASM_IA64__SWIOTLB_H */
diff --git a/arch/ia64/include/asm/switch_to.h b/arch/ia64/include/asm/switch_to.h
new file mode 100644
index 00000000000..d38c7ea5eea
--- /dev/null
+++ b/arch/ia64/include/asm/switch_to.h
@@ -0,0 +1,79 @@
+/*
+ * Low-level task switching. This is based on information published in
+ * the Processor Abstraction Layer and the System Abstraction Layer
+ * manual.
+ *
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 Asit Mallick <asit.k.mallick@intel.com>
+ * Copyright (C) 1999 Don Dugger <don.dugger@intel.com>
+ */
+#ifndef _ASM_IA64_SWITCH_TO_H
+#define _ASM_IA64_SWITCH_TO_H
+
+#include <linux/percpu.h>
+
+struct task_struct;
+
+/*
+ * Context switch from one thread to another.  If the two threads have
+ * different address spaces, schedule() has already taken care of
+ * switching to the new address space by calling switch_mm().
+ *
+ * Disabling access to the fph partition and the debug-register
+ * context switch MUST be done before calling ia64_switch_to() since a
+ * newly created thread returns directly to
+ * ia64_ret_from_syscall_clear_r8.
+ */
+extern struct task_struct *ia64_switch_to (void *next_task);
+
+extern void ia64_save_extra (struct task_struct *task);
+extern void ia64_load_extra (struct task_struct *task);
+
+#ifdef CONFIG_PERFMON
+  DECLARE_PER_CPU(unsigned long, pfm_syst_info);
+# define PERFMON_IS_SYSWIDE() (__get_cpu_var(pfm_syst_info) & 0x1)
+#else
+# define PERFMON_IS_SYSWIDE() (0)
+#endif
+
+#define IA64_HAS_EXTRA_STATE(t)							\
+	((t)->thread.flags & (IA64_THREAD_DBG_VALID|IA64_THREAD_PM_VALID)	\
+	 || PERFMON_IS_SYSWIDE())
+
+#define __switch_to(prev,next,last) do {							 \
+	if (IA64_HAS_EXTRA_STATE(prev))								 \
+		ia64_save_extra(prev);								 \
+	if (IA64_HAS_EXTRA_STATE(next))								 \
+		ia64_load_extra(next);								 \
+	ia64_psr(task_pt_regs(next))->dfh = !ia64_is_local_fpu_owner(next);			 \
+	(last) = ia64_switch_to((next));							 \
+} while (0)
+
+#ifdef CONFIG_SMP
+/*
+ * In the SMP case, we save the fph state when context-switching away from a thread that
+ * modified fph.  This way, when the thread gets scheduled on another CPU, the CPU can
+ * pick up the state from task->thread.fph, avoiding the complication of having to fetch
+ * the latest fph state from another CPU.  In other words: eager save, lazy restore.
+ */
+# define switch_to(prev,next,last) do {						\
+	if (ia64_psr(task_pt_regs(prev))->mfh && ia64_is_local_fpu_owner(prev)) {				\
+		ia64_psr(task_pt_regs(prev))->mfh = 0;			\
+		(prev)->thread.flags |= IA64_THREAD_FPH_VALID;			\
+		__ia64_save_fpu((prev)->thread.fph);				\
+	}									\
+	__switch_to(prev, next, last);						\
+	/* "next" in old context is "current" in new context */			\
+	if (unlikely((current->thread.flags & IA64_THREAD_MIGRATION) &&	       \
+		     (task_cpu(current) !=				       \
+		      		      task_thread_info(current)->last_cpu))) { \
+		platform_migrate(current);				       \
+		task_thread_info(current)->last_cpu = task_cpu(current);       \
+	}								       \
+} while (0)
+#else
+# define switch_to(prev,next,last)	__switch_to(prev, next, last)
+#endif
+
+#endif /* _ASM_IA64_SWITCH_TO_H */
diff --git a/arch/ia64/include/asm/syscall.h b/arch/ia64/include/asm/syscall.h
new file mode 100644
index 00000000000..a7ff1c6ab06
--- /dev/null
+++ b/arch/ia64/include/asm/syscall.h
@@ -0,0 +1,82 @@
+/*
+ * Access to user system call parameters and results
+ *
+ * Copyright (C) 2008 Intel Corp.  Shaohua Li <shaohua.li@intel.com>
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ *
+ * See asm-generic/syscall.h for descriptions of what we must do here.
+ */
+
+#ifndef _ASM_SYSCALL_H
+#define _ASM_SYSCALL_H	1
+
+#include <linux/sched.h>
+#include <linux/err.h>
+
+static inline long syscall_get_nr(struct task_struct *task,
+				  struct pt_regs *regs)
+{
+	if ((long)regs->cr_ifs < 0) /* Not a syscall */
+		return -1;
+
+	return regs->r15;
+}
+
+static inline void syscall_rollback(struct task_struct *task,
+				    struct pt_regs *regs)
+{
+	/* do nothing */
+}
+
+static inline long syscall_get_error(struct task_struct *task,
+				     struct pt_regs *regs)
+{
+	return regs->r10 == -1 ? regs->r8:0;
+}
+
+static inline long syscall_get_return_value(struct task_struct *task,
+					    struct pt_regs *regs)
+{
+	return regs->r8;
+}
+
+static inline void syscall_set_return_value(struct task_struct *task,
+					    struct pt_regs *regs,
+					    int error, long val)
+{
+	if (error) {
+		/* error < 0, but ia64 uses > 0 return value */
+		regs->r8 = -error;
+		regs->r10 = -1;
+	} else {
+		regs->r8 = val;
+		regs->r10 = 0;
+	}
+}
+
+extern void ia64_syscall_get_set_arguments(struct task_struct *task,
+	struct pt_regs *regs, unsigned int i, unsigned int n,
+	unsigned long *args, int rw);
+static inline void syscall_get_arguments(struct task_struct *task,
+					 struct pt_regs *regs,
+					 unsigned int i, unsigned int n,
+					 unsigned long *args)
+{
+	BUG_ON(i + n > 6);
+
+	ia64_syscall_get_set_arguments(task, regs, i, n, args, 0);
+}
+
+static inline void syscall_set_arguments(struct task_struct *task,
+					 struct pt_regs *regs,
+					 unsigned int i, unsigned int n,
+					 unsigned long *args)
+{
+	BUG_ON(i + n > 6);
+
+	ia64_syscall_get_set_arguments(task, regs, i, n, args, 1);
+}
+#endif	/* _ASM_SYSCALL_H */
diff --git a/arch/ia64/include/asm/termios.h b/arch/ia64/include/asm/termios.h
new file mode 100644
index 00000000000..a42f870ca4f
--- /dev/null
+++ b/arch/ia64/include/asm/termios.h
@@ -0,0 +1,57 @@
+/*
+ * Modified 1999
+ *	David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co
+ *
+ * 99/01/28	Added N_IRDA and N_SMSBLOCK
+ */
+#ifndef _ASM_IA64_TERMIOS_H
+#define _ASM_IA64_TERMIOS_H
+
+#include <uapi/asm/termios.h>
+
+
+/*	intr=^C		quit=^\		erase=del	kill=^U
+	eof=^D		vtime=\0	vmin=\1		sxtc=\0
+	start=^Q	stop=^S		susp=^Z		eol=\0
+	reprint=^R	discard=^U	werase=^W	lnext=^V
+	eol2=\0
+*/
+#define INIT_C_CC "\003\034\177\025\004\0\1\0\021\023\032\0\022\017\027\026\0"
+
+/*
+ * Translate a "termio" structure into a "termios". Ugh.
+ */
+#define SET_LOW_TERMIOS_BITS(termios, termio, x) {	\
+	unsigned short __tmp;				\
+	get_user(__tmp,&(termio)->x);			\
+	*(unsigned short *) &(termios)->x = __tmp;	\
+}
+
+#define user_termio_to_kernel_termios(termios, termio)		\
+({								\
+	SET_LOW_TERMIOS_BITS(termios, termio, c_iflag);		\
+	SET_LOW_TERMIOS_BITS(termios, termio, c_oflag);		\
+	SET_LOW_TERMIOS_BITS(termios, termio, c_cflag);		\
+	SET_LOW_TERMIOS_BITS(termios, termio, c_lflag);		\
+	copy_from_user((termios)->c_cc, (termio)->c_cc, NCC);	\
+})
+
+/*
+ * Translate a "termios" structure into a "termio". Ugh.
+ */
+#define kernel_termios_to_user_termio(termio, termios)		\
+({								\
+	put_user((termios)->c_iflag, &(termio)->c_iflag);	\
+	put_user((termios)->c_oflag, &(termio)->c_oflag);	\
+	put_user((termios)->c_cflag, &(termio)->c_cflag);	\
+	put_user((termios)->c_lflag, &(termio)->c_lflag);	\
+	put_user((termios)->c_line,  &(termio)->c_line);	\
+	copy_to_user((termio)->c_cc, (termios)->c_cc, NCC);	\
+})
+
+#define user_termios_to_kernel_termios(k, u) copy_from_user(k, u, sizeof(struct termios2))
+#define kernel_termios_to_user_termios(u, k) copy_to_user(u, k, sizeof(struct termios2))
+#define user_termios_to_kernel_termios_1(k, u) copy_from_user(k, u, sizeof(struct termios))
+#define kernel_termios_to_user_termios_1(u, k) copy_to_user(u, k, sizeof(struct termios))
+
+#endif /* _ASM_IA64_TERMIOS_H */
diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h
new file mode 100644
index 00000000000..5b17418b422
--- /dev/null
+++ b/arch/ia64/include/asm/thread_info.h
@@ -0,0 +1,158 @@
+/*
+ * Copyright (C) 2002-2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+#ifndef _ASM_IA64_THREAD_INFO_H
+#define _ASM_IA64_THREAD_INFO_H
+
+#ifndef ASM_OFFSETS_C
+#include <asm/asm-offsets.h>
+#endif
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+
+#ifndef __ASSEMBLY__
+
+/*
+ * On IA-64, we want to keep the task structure and kernel stack together, so they can be
+ * mapped by a single TLB entry and so they can be addressed by the "current" pointer
+ * without having to do pointer masking.
+ */
+struct thread_info {
+	struct task_struct *task;	/* XXX not really needed, except for dup_task_struct() */
+	struct exec_domain *exec_domain;/* execution domain */
+	__u32 flags;			/* thread_info flags (see TIF_*) */
+	__u32 cpu;			/* current CPU */
+	__u32 last_cpu;			/* Last CPU thread ran on */
+	__u32 status;			/* Thread synchronous flags */
+	mm_segment_t addr_limit;	/* user-level address space limit */
+	int preempt_count;		/* 0=premptable, <0=BUG; will also serve as bh-counter */
+	struct restart_block restart_block;
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+	__u64 ac_stamp;
+	__u64 ac_leave;
+	__u64 ac_stime;
+	__u64 ac_utime;
+#endif
+};
+
+#define THREAD_SIZE			KERNEL_STACK_SIZE
+
+#define INIT_THREAD_INFO(tsk)			\
+{						\
+	.task		= &tsk,			\
+	.exec_domain	= &default_exec_domain,	\
+	.flags		= 0,			\
+	.cpu		= 0,			\
+	.addr_limit	= KERNEL_DS,		\
+	.preempt_count	= INIT_PREEMPT_COUNT,	\
+	.restart_block = {			\
+		.fn = do_no_restart_syscall,	\
+	},					\
+}
+
+#ifndef ASM_OFFSETS_C
+/* how to get the thread information struct from C */
+#define current_thread_info()	((struct thread_info *) ((char *) current + IA64_TASK_SIZE))
+#define alloc_thread_info_node(tsk, node)	\
+		((struct thread_info *) ((char *) (tsk) + IA64_TASK_SIZE))
+#define task_thread_info(tsk)	((struct thread_info *) ((char *) (tsk) + IA64_TASK_SIZE))
+#else
+#define current_thread_info()	((struct thread_info *) 0)
+#define alloc_thread_info_node(tsk, node)	((struct thread_info *) 0)
+#define task_thread_info(tsk)	((struct thread_info *) 0)
+#endif
+#define free_thread_info(ti)	/* nothing */
+#define task_stack_page(tsk)	((void *)(tsk))
+
+#define __HAVE_THREAD_FUNCTIONS
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+#define setup_thread_stack(p, org)			\
+	*task_thread_info(p) = *task_thread_info(org);	\
+	task_thread_info(p)->ac_stime = 0;		\
+	task_thread_info(p)->ac_utime = 0;		\
+	task_thread_info(p)->task = (p);
+#else
+#define setup_thread_stack(p, org) \
+	*task_thread_info(p) = *task_thread_info(org); \
+	task_thread_info(p)->task = (p);
+#endif
+#define end_of_stack(p) (unsigned long *)((void *)(p) + IA64_RBS_OFFSET)
+
+#define alloc_task_struct_node(node)						\
+({										\
+	struct page *page = alloc_pages_node(node, GFP_KERNEL | __GFP_COMP,	\
+					     KERNEL_STACK_SIZE_ORDER);		\
+	struct task_struct *ret = page ? page_address(page) : NULL;		\
+										\
+	ret;									\
+})
+#define free_task_struct(tsk)	free_pages((unsigned long) (tsk), KERNEL_STACK_SIZE_ORDER)
+
+#endif /* !__ASSEMBLY */
+
+/*
+ * thread information flags
+ * - these are process state flags that various assembly files may need to access
+ * - pending work-to-be-done flags are in least-significant 16 bits, other flags
+ *   in top 16 bits
+ */
+#define TIF_SIGPENDING		0	/* signal pending */
+#define TIF_NEED_RESCHED	1	/* rescheduling necessary */
+#define TIF_SYSCALL_TRACE	2	/* syscall trace active */
+#define TIF_SYSCALL_AUDIT	3	/* syscall auditing active */
+#define TIF_SINGLESTEP		4	/* restore singlestep on return to user mode */
+#define TIF_NOTIFY_RESUME	6	/* resumption notification requested */
+#define TIF_MEMDIE		17	/* is terminating due to OOM killer */
+#define TIF_MCA_INIT		18	/* this task is processing MCA or INIT */
+#define TIF_DB_DISABLED		19	/* debug trap disabled for fsyscall */
+#define TIF_RESTORE_RSE		21	/* user RBS is newer than kernel RBS */
+#define TIF_POLLING_NRFLAG	22	/* idle is polling for TIF_NEED_RESCHED */
+
+#define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
+#define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
+#define _TIF_SINGLESTEP		(1 << TIF_SINGLESTEP)
+#define _TIF_SYSCALL_TRACEAUDIT	(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP)
+#define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
+#define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
+#define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
+#define _TIF_MCA_INIT		(1 << TIF_MCA_INIT)
+#define _TIF_DB_DISABLED	(1 << TIF_DB_DISABLED)
+#define _TIF_RESTORE_RSE	(1 << TIF_RESTORE_RSE)
+#define _TIF_POLLING_NRFLAG	(1 << TIF_POLLING_NRFLAG)
+
+/* "work to do on user-return" bits */
+#define TIF_ALLWORK_MASK	(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SYSCALL_AUDIT|\
+				 _TIF_NEED_RESCHED|_TIF_SYSCALL_TRACE)
+/* like TIF_ALLWORK_BITS but sans TIF_SYSCALL_TRACE or TIF_SYSCALL_AUDIT */
+#define TIF_WORK_MASK		(TIF_ALLWORK_MASK&~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT))
+
+#define TS_RESTORE_SIGMASK	2	/* restore signal mask in do_signal() */
+
+#ifndef __ASSEMBLY__
+#define HAVE_SET_RESTORE_SIGMASK	1
+static inline void set_restore_sigmask(void)
+{
+	struct thread_info *ti = current_thread_info();
+	ti->status |= TS_RESTORE_SIGMASK;
+	WARN_ON(!test_bit(TIF_SIGPENDING, &ti->flags));
+}
+static inline void clear_restore_sigmask(void)
+{
+	current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
+}
+static inline bool test_restore_sigmask(void)
+{
+	return current_thread_info()->status & TS_RESTORE_SIGMASK;
+}
+static inline bool test_and_clear_restore_sigmask(void)
+{
+	struct thread_info *ti = current_thread_info();
+	if (!(ti->status & TS_RESTORE_SIGMASK))
+		return false;
+	ti->status &= ~TS_RESTORE_SIGMASK;
+	return true;
+}
+#endif	/* !__ASSEMBLY__ */
+
+#endif /* _ASM_IA64_THREAD_INFO_H */
diff --git a/arch/ia64/include/asm/timex.h b/arch/ia64/include/asm/timex.h
new file mode 100644
index 00000000000..86c7db86118
--- /dev/null
+++ b/arch/ia64/include/asm/timex.h
@@ -0,0 +1,45 @@
+#ifndef _ASM_IA64_TIMEX_H
+#define _ASM_IA64_TIMEX_H
+
+/*
+ * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+/*
+ * 2001/01/18 davidm	Removed CLOCK_TICK_RATE.  It makes no sense on IA-64.
+ *			Also removed cacheflush_time as it's entirely unused.
+ */
+
+#include <asm/intrinsics.h>
+#include <asm/processor.h>
+
+typedef unsigned long cycles_t;
+
+extern void (*ia64_udelay)(unsigned long usecs);
+
+/*
+ * For performance reasons, we don't want to define CLOCK_TICK_TRATE as
+ * local_cpu_data->itc_rate.  Fortunately, we don't have to, either: according to George
+ * Anzinger, 1/CLOCK_TICK_RATE is taken as the resolution of the timer clock.  The time
+ * calculation assumes that you will use enough of these so that your tick size <= 1/HZ.
+ * If the calculation shows that your CLOCK_TICK_RATE can not supply exactly 1/HZ ticks,
+ * the actual value is calculated and used to update the wall clock each jiffie.  Setting
+ * the CLOCK_TICK_RATE to x*HZ insures that the calculation will find no errors.  Hence we
+ * pick a multiple of HZ which gives us a (totally virtual) CLOCK_TICK_RATE of about
+ * 100MHz.
+ */
+#define CLOCK_TICK_RATE		(HZ * 100000UL)
+
+static inline cycles_t
+get_cycles (void)
+{
+	cycles_t ret;
+
+	ret = ia64_getreg(_IA64_REG_AR_ITC);
+	return ret;
+}
+
+extern void ia64_cpu_local_tick (void);
+extern unsigned long long ia64_native_sched_clock (void);
+
+#endif /* _ASM_IA64_TIMEX_H */
diff --git a/arch/ia64/include/asm/tlb.h b/arch/ia64/include/asm/tlb.h
new file mode 100644
index 00000000000..39d64e0df1d
--- /dev/null
+++ b/arch/ia64/include/asm/tlb.h
@@ -0,0 +1,283 @@
+#ifndef _ASM_IA64_TLB_H
+#define _ASM_IA64_TLB_H
+/*
+ * Based on <asm-generic/tlb.h>.
+ *
+ * Copyright (C) 2002-2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+/*
+ * Removing a translation from a page table (including TLB-shootdown) is a four-step
+ * procedure:
+ *
+ *	(1) Flush (virtual) caches --- ensures virtual memory is coherent with kernel memory
+ *	    (this is a no-op on ia64).
+ *	(2) Clear the relevant portions of the page-table
+ *	(3) Flush the TLBs --- ensures that stale content is gone from CPU TLBs
+ *	(4) Release the pages that were freed up in step (2).
+ *
+ * Note that the ordering of these steps is crucial to avoid races on MP machines.
+ *
+ * The Linux kernel defines several platform-specific hooks for TLB-shootdown.  When
+ * unmapping a portion of the virtual address space, these hooks are called according to
+ * the following template:
+ *
+ *	tlb <- tlb_gather_mmu(mm, start, end);		// start unmap for address space MM
+ *	{
+ *	  for each vma that needs a shootdown do {
+ *	    tlb_start_vma(tlb, vma);
+ *	      for each page-table-entry PTE that needs to be removed do {
+ *		tlb_remove_tlb_entry(tlb, pte, address);
+ *		if (pte refers to a normal page) {
+ *		  tlb_remove_page(tlb, page);
+ *		}
+ *	      }
+ *	    tlb_end_vma(tlb, vma);
+ *	  }
+ *	}
+ *	tlb_finish_mmu(tlb, start, end);	// finish unmap for address space MM
+ */
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/swap.h>
+
+#include <asm/pgalloc.h>
+#include <asm/processor.h>
+#include <asm/tlbflush.h>
+#include <asm/machvec.h>
+
+/*
+ * If we can't allocate a page to make a big batch of page pointers
+ * to work on, then just handle a few from the on-stack structure.
+ */
+#define	IA64_GATHER_BUNDLE	8
+
+struct mmu_gather {
+	struct mm_struct	*mm;
+	unsigned int		nr;
+	unsigned int		max;
+	unsigned char		fullmm;		/* non-zero means full mm flush */
+	unsigned char		need_flush;	/* really unmapped some PTEs? */
+	unsigned long		start, end;
+	unsigned long		start_addr;
+	unsigned long		end_addr;
+	struct page		**pages;
+	struct page		*local[IA64_GATHER_BUNDLE];
+};
+
+struct ia64_tr_entry {
+	u64 ifa;
+	u64 itir;
+	u64 pte;
+	u64 rr;
+}; /*Record for tr entry!*/
+
+extern int ia64_itr_entry(u64 target_mask, u64 va, u64 pte, u64 log_size);
+extern void ia64_ptr_entry(u64 target_mask, int slot);
+
+extern struct ia64_tr_entry *ia64_idtrs[NR_CPUS];
+
+/*
+ region register macros
+*/
+#define RR_TO_VE(val)   (((val) >> 0) & 0x0000000000000001)
+#define RR_VE(val)	(((val) & 0x0000000000000001) << 0)
+#define RR_VE_MASK	0x0000000000000001L
+#define RR_VE_SHIFT	0
+#define RR_TO_PS(val)	(((val) >> 2) & 0x000000000000003f)
+#define RR_PS(val)	(((val) & 0x000000000000003f) << 2)
+#define RR_PS_MASK	0x00000000000000fcL
+#define RR_PS_SHIFT	2
+#define RR_RID_MASK	0x00000000ffffff00L
+#define RR_TO_RID(val) 	((val >> 8) & 0xffffff)
+
+static inline void
+ia64_tlb_flush_mmu_tlbonly(struct mmu_gather *tlb, unsigned long start, unsigned long end)
+{
+	tlb->need_flush = 0;
+
+	if (tlb->fullmm) {
+		/*
+		 * Tearing down the entire address space.  This happens both as a result
+		 * of exit() and execve().  The latter case necessitates the call to
+		 * flush_tlb_mm() here.
+		 */
+		flush_tlb_mm(tlb->mm);
+	} else if (unlikely (end - start >= 1024*1024*1024*1024UL
+			     || REGION_NUMBER(start) != REGION_NUMBER(end - 1)))
+	{
+		/*
+		 * If we flush more than a tera-byte or across regions, we're probably
+		 * better off just flushing the entire TLB(s).  This should be very rare
+		 * and is not worth optimizing for.
+		 */
+		flush_tlb_all();
+	} else {
+		/*
+		 * XXX fix me: flush_tlb_range() should take an mm pointer instead of a
+		 * vma pointer.
+		 */
+		struct vm_area_struct vma;
+
+		vma.vm_mm = tlb->mm;
+		/* flush the address range from the tlb: */
+		flush_tlb_range(&vma, start, end);
+		/* now flush the virt. page-table area mapping the address range: */
+		flush_tlb_range(&vma, ia64_thash(start), ia64_thash(end));
+	}
+
+}
+
+static inline void
+ia64_tlb_flush_mmu_free(struct mmu_gather *tlb)
+{
+	unsigned long i;
+	unsigned int nr;
+
+	/* lastly, release the freed pages */
+	nr = tlb->nr;
+
+	tlb->nr = 0;
+	tlb->start_addr = ~0UL;
+	for (i = 0; i < nr; ++i)
+		free_page_and_swap_cache(tlb->pages[i]);
+}
+
+/*
+ * Flush the TLB for address range START to END and, if not in fast mode, release the
+ * freed pages that where gathered up to this point.
+ */
+static inline void
+ia64_tlb_flush_mmu (struct mmu_gather *tlb, unsigned long start, unsigned long end)
+{
+	if (!tlb->need_flush)
+		return;
+	ia64_tlb_flush_mmu_tlbonly(tlb, start, end);
+	ia64_tlb_flush_mmu_free(tlb);
+}
+
+static inline void __tlb_alloc_page(struct mmu_gather *tlb)
+{
+	unsigned long addr = __get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0);
+
+	if (addr) {
+		tlb->pages = (void *)addr;
+		tlb->max = PAGE_SIZE / sizeof(void *);
+	}
+}
+
+
+static inline void
+tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
+{
+	tlb->mm = mm;
+	tlb->max = ARRAY_SIZE(tlb->local);
+	tlb->pages = tlb->local;
+	tlb->nr = 0;
+	tlb->fullmm = !(start | (end+1));
+	tlb->start = start;
+	tlb->end = end;
+	tlb->start_addr = ~0UL;
+}
+
+/*
+ * Called at the end of the shootdown operation to free up any resources that were
+ * collected.
+ */
+static inline void
+tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
+{
+	/*
+	 * Note: tlb->nr may be 0 at this point, so we can't rely on tlb->start_addr and
+	 * tlb->end_addr.
+	 */
+	ia64_tlb_flush_mmu(tlb, start, end);
+
+	/* keep the page table cache within bounds */
+	check_pgt_cache();
+
+	if (tlb->pages != tlb->local)
+		free_pages((unsigned long)tlb->pages, 0);
+}
+
+/*
+ * Logically, this routine frees PAGE.  On MP machines, the actual freeing of the page
+ * must be delayed until after the TLB has been flushed (see comments at the beginning of
+ * this file).
+ */
+static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+{
+	tlb->need_flush = 1;
+
+	if (!tlb->nr && tlb->pages == tlb->local)
+		__tlb_alloc_page(tlb);
+
+	tlb->pages[tlb->nr++] = page;
+	VM_BUG_ON(tlb->nr > tlb->max);
+
+	return tlb->max - tlb->nr;
+}
+
+static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
+{
+	ia64_tlb_flush_mmu_tlbonly(tlb, tlb->start_addr, tlb->end_addr);
+}
+
+static inline void tlb_flush_mmu_free(struct mmu_gather *tlb)
+{
+	ia64_tlb_flush_mmu_free(tlb);
+}
+
+static inline void tlb_flush_mmu(struct mmu_gather *tlb)
+{
+	ia64_tlb_flush_mmu(tlb, tlb->start_addr, tlb->end_addr);
+}
+
+static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+{
+	if (!__tlb_remove_page(tlb, page))
+		tlb_flush_mmu(tlb);
+}
+
+/*
+ * Remove TLB entry for PTE mapped at virtual address ADDRESS.  This is called for any
+ * PTE, not just those pointing to (normal) physical memory.
+ */
+static inline void
+__tlb_remove_tlb_entry (struct mmu_gather *tlb, pte_t *ptep, unsigned long address)
+{
+	if (tlb->start_addr == ~0UL)
+		tlb->start_addr = address;
+	tlb->end_addr = address + PAGE_SIZE;
+}
+
+#define tlb_migrate_finish(mm)	platform_tlb_migrate_finish(mm)
+
+#define tlb_start_vma(tlb, vma)			do { } while (0)
+#define tlb_end_vma(tlb, vma)			do { } while (0)
+
+#define tlb_remove_tlb_entry(tlb, ptep, addr)		\
+do {							\
+	tlb->need_flush = 1;				\
+	__tlb_remove_tlb_entry(tlb, ptep, addr);	\
+} while (0)
+
+#define pte_free_tlb(tlb, ptep, address)		\
+do {							\
+	tlb->need_flush = 1;				\
+	__pte_free_tlb(tlb, ptep, address);		\
+} while (0)
+
+#define pmd_free_tlb(tlb, ptep, address)		\
+do {							\
+	tlb->need_flush = 1;				\
+	__pmd_free_tlb(tlb, ptep, address);		\
+} while (0)
+
+#define pud_free_tlb(tlb, pudp, address)		\
+do {							\
+	tlb->need_flush = 1;				\
+	__pud_free_tlb(tlb, pudp, address);		\
+} while (0)
+
+#endif /* _ASM_IA64_TLB_H */
diff --git a/arch/ia64/include/asm/tlbflush.h b/arch/ia64/include/asm/tlbflush.h
new file mode 100644
index 00000000000..3be25dfed16
--- /dev/null
+++ b/arch/ia64/include/asm/tlbflush.h
@@ -0,0 +1,102 @@
+#ifndef _ASM_IA64_TLBFLUSH_H
+#define _ASM_IA64_TLBFLUSH_H
+
+/*
+ * Copyright (C) 2002 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+
+#include <linux/mm.h>
+
+#include <asm/intrinsics.h>
+#include <asm/mmu_context.h>
+#include <asm/page.h>
+
+/*
+ * Now for some TLB flushing routines.  This is the kind of stuff that
+ * can be very expensive, so try to avoid them whenever possible.
+ */
+extern void setup_ptcg_sem(int max_purges, int from_palo);
+
+/*
+ * Flush everything (kernel mapping may also have changed due to
+ * vmalloc/vfree).
+ */
+extern void local_flush_tlb_all (void);
+
+#ifdef CONFIG_SMP
+  extern void smp_flush_tlb_all (void);
+  extern void smp_flush_tlb_mm (struct mm_struct *mm);
+  extern void smp_flush_tlb_cpumask (cpumask_t xcpumask);
+# define flush_tlb_all()	smp_flush_tlb_all()
+#else
+# define flush_tlb_all()	local_flush_tlb_all()
+# define smp_flush_tlb_cpumask(m) local_flush_tlb_all()
+#endif
+
+static inline void
+local_finish_flush_tlb_mm (struct mm_struct *mm)
+{
+	if (mm == current->active_mm)
+		activate_context(mm);
+}
+
+/*
+ * Flush a specified user mapping.  This is called, e.g., as a result of fork() and
+ * exit().  fork() ends up here because the copy-on-write mechanism needs to write-protect
+ * the PTEs of the parent task.
+ */
+static inline void
+flush_tlb_mm (struct mm_struct *mm)
+{
+	if (!mm)
+		return;
+
+	set_bit(mm->context, ia64_ctx.flushmap);
+	mm->context = 0;
+
+	if (atomic_read(&mm->mm_users) == 0)
+		return;		/* happens as a result of exit_mmap() */
+
+#ifdef CONFIG_SMP
+	smp_flush_tlb_mm(mm);
+#else
+	local_finish_flush_tlb_mm(mm);
+#endif
+}
+
+extern void flush_tlb_range (struct vm_area_struct *vma, unsigned long start, unsigned long end);
+
+/*
+ * Page-granular tlb flush.
+ */
+static inline void
+flush_tlb_page (struct vm_area_struct *vma, unsigned long addr)
+{
+#ifdef CONFIG_SMP
+	flush_tlb_range(vma, (addr & PAGE_MASK), (addr & PAGE_MASK) + PAGE_SIZE);
+#else
+	if (vma->vm_mm == current->active_mm)
+		ia64_ptcl(addr, (PAGE_SHIFT << 2));
+	else
+		vma->vm_mm->context = 0;
+#endif
+}
+
+/*
+ * Flush the local TLB. Invoked from another cpu using an IPI.
+ */
+#ifdef CONFIG_SMP
+void smp_local_flush_tlb(void);
+#else
+#define smp_local_flush_tlb()
+#endif
+
+static inline void flush_tlb_kernel_range(unsigned long start,
+					  unsigned long end)
+{
+	flush_tlb_all();	/* XXX fix me */
+}
+
+#endif /* _ASM_IA64_TLBFLUSH_H */
diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h
new file mode 100644
index 00000000000..6437ca21f61
--- /dev/null
+++ b/arch/ia64/include/asm/topology.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2002, Erich Focht, NEC
+ *
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+#ifndef _ASM_IA64_TOPOLOGY_H
+#define _ASM_IA64_TOPOLOGY_H
+
+#include <asm/acpi.h>
+#include <asm/numa.h>
+#include <asm/smp.h>
+
+#ifdef CONFIG_NUMA
+
+/* Nodes w/o CPUs are preferred for memory allocations, see build_zonelists */
+#define PENALTY_FOR_NODE_WITH_CPUS 255
+
+/*
+ * Nodes within this distance are eligible for reclaim by zone_reclaim() when
+ * zone_reclaim_mode is enabled.
+ */
+#define RECLAIM_DISTANCE 15
+
+/*
+ * Returns a bitmask of CPUs on Node 'node'.
+ */
+#define cpumask_of_node(node) ((node) == -1 ?				\
+			       cpu_all_mask :				\
+			       &node_to_cpu_mask[node])
+
+/*
+ * Returns the number of the node containing Node 'nid'.
+ * Not implemented here. Multi-level hierarchies detected with
+ * the help of node_distance().
+ */
+#define parent_node(nid) (nid)
+
+/*
+ * Determines the node for a given pci bus
+ */
+#define pcibus_to_node(bus) PCI_CONTROLLER(bus)->node
+
+void build_cpu_to_node_map(void);
+
+#endif /* CONFIG_NUMA */
+
+#ifdef CONFIG_SMP
+#define topology_physical_package_id(cpu)	(cpu_data(cpu)->socket_id)
+#define topology_core_id(cpu)			(cpu_data(cpu)->core_id)
+#define topology_core_cpumask(cpu)		(&cpu_core_map[cpu])
+#define topology_thread_cpumask(cpu)		(&per_cpu(cpu_sibling_map, cpu))
+#endif
+
+extern void arch_fix_phys_package_id(int num, u32 slot);
+
+#define cpumask_of_pcibus(bus)	(pcibus_to_node(bus) == -1 ?		\
+				 cpu_all_mask :				\
+				 cpumask_of_node(pcibus_to_node(bus)))
+
+#include <asm-generic/topology.h>
+
+#endif /* _ASM_IA64_TOPOLOGY_H */
diff --git a/arch/ia64/include/asm/types.h b/arch/ia64/include/asm/types.h
new file mode 100644
index 00000000000..4c351b169da
--- /dev/null
+++ b/arch/ia64/include/asm/types.h
@@ -0,0 +1,31 @@
+/*
+ * This file is never included by application software unless explicitly
+ * requested (e.g., via linux/types.h) in which case the application is
+ * Linux specific so (user-) name space pollution is not a major issue.
+ * However, for interoperability, libraries still need to be careful to
+ * avoid naming clashes.
+ *
+ * Based on <asm-alpha/types.h>.
+ *
+ * Modified 1998-2000, 2002
+ *	David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co
+ */
+#ifndef _ASM_IA64_TYPES_H
+#define _ASM_IA64_TYPES_H
+
+#include <asm-generic/int-ll64.h>
+#include <uapi/asm/types.h>
+
+#ifdef __ASSEMBLY__
+#else
+/*
+ * These aren't exported outside the kernel to avoid name space clashes
+ */
+
+struct fnptr {
+	unsigned long ip;
+	unsigned long gp;
+};
+
+#endif /* !__ASSEMBLY__ */
+#endif /* _ASM_IA64_TYPES_H */
diff --git a/arch/ia64/include/asm/uaccess.h b/arch/ia64/include/asm/uaccess.h
new file mode 100644
index 00000000000..449c8c0fa2b
--- /dev/null
+++ b/arch/ia64/include/asm/uaccess.h
@@ -0,0 +1,401 @@
+#ifndef _ASM_IA64_UACCESS_H
+#define _ASM_IA64_UACCESS_H
+
+/*
+ * This file defines various macros to transfer memory areas across
+ * the user/kernel boundary.  This needs to be done carefully because
+ * this code is executed in kernel mode and uses user-specified
+ * addresses.  Thus, we need to be careful not to let the user to
+ * trick us into accessing kernel memory that would normally be
+ * inaccessible.  This code is also fairly performance sensitive,
+ * so we want to spend as little time doing safety checks as
+ * possible.
+ *
+ * To make matters a bit more interesting, these macros sometimes also
+ * called from within the kernel itself, in which case the address
+ * validity check must be skipped.  The get_fs() macro tells us what
+ * to do: if get_fs()==USER_DS, checking is performed, if
+ * get_fs()==KERNEL_DS, checking is bypassed.
+ *
+ * Note that even if the memory area specified by the user is in a
+ * valid address range, it is still possible that we'll get a page
+ * fault while accessing it.  This is handled by filling out an
+ * exception handler fixup entry for each instruction that has the
+ * potential to fault.  When such a fault occurs, the page fault
+ * handler checks to see whether the faulting instruction has a fixup
+ * associated and, if so, sets r8 to -EFAULT and clears r9 to 0 and
+ * then resumes execution at the continuation point.
+ *
+ * Based on <asm-alpha/uaccess.h>.
+ *
+ * Copyright (C) 1998, 1999, 2001-2004 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+#include <linux/compiler.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/page-flags.h>
+#include <linux/mm.h>
+
+#include <asm/intrinsics.h>
+#include <asm/pgtable.h>
+#include <asm/io.h>
+
+/*
+ * For historical reasons, the following macros are grossly misnamed:
+ */
+#define KERNEL_DS	((mm_segment_t) { ~0UL })		/* cf. access_ok() */
+#define USER_DS		((mm_segment_t) { TASK_SIZE-1 })	/* cf. access_ok() */
+
+#define VERIFY_READ	0
+#define VERIFY_WRITE	1
+
+#define get_ds()  (KERNEL_DS)
+#define get_fs()  (current_thread_info()->addr_limit)
+#define set_fs(x) (current_thread_info()->addr_limit = (x))
+
+#define segment_eq(a, b)	((a).seg == (b).seg)
+
+/*
+ * When accessing user memory, we need to make sure the entire area really is in
+ * user-level space.  In order to do this efficiently, we make sure that the page at
+ * address TASK_SIZE is never valid.  We also need to make sure that the address doesn't
+ * point inside the virtually mapped linear page table.
+ */
+#define __access_ok(addr, size, segment)						\
+({											\
+	__chk_user_ptr(addr);								\
+	(likely((unsigned long) (addr) <= (segment).seg)				\
+	 && ((segment).seg == KERNEL_DS.seg						\
+	     || likely(REGION_OFFSET((unsigned long) (addr)) < RGN_MAP_LIMIT)));	\
+})
+#define access_ok(type, addr, size)	__access_ok((addr), (size), get_fs())
+
+/*
+ * These are the main single-value transfer routines.  They automatically
+ * use the right size if we just have the right pointer type.
+ *
+ * Careful to not
+ * (a) re-use the arguments for side effects (sizeof/typeof is ok)
+ * (b) require any knowledge of processes at this stage
+ */
+#define put_user(x, ptr)	__put_user_check((__typeof__(*(ptr))) (x), (ptr), sizeof(*(ptr)), get_fs())
+#define get_user(x, ptr)	__get_user_check((x), (ptr), sizeof(*(ptr)), get_fs())
+
+/*
+ * The "__xxx" versions do not do address space checking, useful when
+ * doing multiple accesses to the same area (the programmer has to do the
+ * checks by hand with "access_ok()")
+ */
+#define __put_user(x, ptr)	__put_user_nocheck((__typeof__(*(ptr))) (x), (ptr), sizeof(*(ptr)))
+#define __get_user(x, ptr)	__get_user_nocheck((x), (ptr), sizeof(*(ptr)))
+
+extern long __put_user_unaligned_unknown (void);
+
+#define __put_user_unaligned(x, ptr)								\
+({												\
+	long __ret;										\
+	switch (sizeof(*(ptr))) {								\
+		case 1: __ret = __put_user((x), (ptr)); break;					\
+		case 2: __ret = (__put_user((x), (u8 __user *)(ptr)))				\
+			| (__put_user((x) >> 8, ((u8 __user *)(ptr) + 1))); break;		\
+		case 4: __ret = (__put_user((x), (u16 __user *)(ptr)))				\
+			| (__put_user((x) >> 16, ((u16 __user *)(ptr) + 1))); break;		\
+		case 8: __ret = (__put_user((x), (u32 __user *)(ptr)))				\
+			| (__put_user((x) >> 32, ((u32 __user *)(ptr) + 1))); break;		\
+		default: __ret = __put_user_unaligned_unknown();				\
+	}											\
+	__ret;											\
+})
+
+extern long __get_user_unaligned_unknown (void);
+
+#define __get_user_unaligned(x, ptr)								\
+({												\
+	long __ret;										\
+	switch (sizeof(*(ptr))) {								\
+		case 1: __ret = __get_user((x), (ptr)); break;					\
+		case 2: __ret = (__get_user((x), (u8 __user *)(ptr)))				\
+			| (__get_user((x) >> 8, ((u8 __user *)(ptr) + 1))); break;		\
+		case 4: __ret = (__get_user((x), (u16 __user *)(ptr)))				\
+			| (__get_user((x) >> 16, ((u16 __user *)(ptr) + 1))); break;		\
+		case 8: __ret = (__get_user((x), (u32 __user *)(ptr)))				\
+			| (__get_user((x) >> 32, ((u32 __user *)(ptr) + 1))); break;		\
+		default: __ret = __get_user_unaligned_unknown();				\
+	}											\
+	__ret;											\
+})
+
+#ifdef ASM_SUPPORTED
+  struct __large_struct { unsigned long buf[100]; };
+# define __m(x) (*(struct __large_struct __user *)(x))
+
+/* We need to declare the __ex_table section before we can use it in .xdata.  */
+asm (".section \"__ex_table\", \"a\"\n\t.previous");
+
+# define __get_user_size(val, addr, n, err)							\
+do {												\
+	register long __gu_r8 asm ("r8") = 0;							\
+	register long __gu_r9 asm ("r9");							\
+	asm ("\n[1:]\tld"#n" %0=%2%P2\t// %0 and %1 get overwritten by exception handler\n"	\
+	     "\t.xdata4 \"__ex_table\", 1b-., 1f-.+4\n"						\
+	     "[1:]"										\
+	     : "=r"(__gu_r9), "=r"(__gu_r8) : "m"(__m(addr)), "1"(__gu_r8));			\
+	(err) = __gu_r8;									\
+	(val) = __gu_r9;									\
+} while (0)
+
+/*
+ * The "__put_user_size()" macro tells gcc it reads from memory instead of writing it.  This
+ * is because they do not write to any memory gcc knows about, so there are no aliasing
+ * issues.
+ */
+# define __put_user_size(val, addr, n, err)							\
+do {												\
+	register long __pu_r8 asm ("r8") = 0;							\
+	asm volatile ("\n[1:]\tst"#n" %1=%r2%P1\t// %0 gets overwritten by exception handler\n"	\
+		      "\t.xdata4 \"__ex_table\", 1b-., 1f-.\n"					\
+		      "[1:]"									\
+		      : "=r"(__pu_r8) : "m"(__m(addr)), "rO"(val), "0"(__pu_r8));		\
+	(err) = __pu_r8;									\
+} while (0)
+
+#else /* !ASM_SUPPORTED */
+# define RELOC_TYPE	2	/* ip-rel */
+# define __get_user_size(val, addr, n, err)				\
+do {									\
+	__ld_user("__ex_table", (unsigned long) addr, n, RELOC_TYPE);	\
+	(err) = ia64_getreg(_IA64_REG_R8);				\
+	(val) = ia64_getreg(_IA64_REG_R9);				\
+} while (0)
+# define __put_user_size(val, addr, n, err)							\
+do {												\
+	__st_user("__ex_table", (unsigned long) addr, n, RELOC_TYPE, (unsigned long) (val));	\
+	(err) = ia64_getreg(_IA64_REG_R8);							\
+} while (0)
+#endif /* !ASM_SUPPORTED */
+
+extern void __get_user_unknown (void);
+
+/*
+ * Evaluating arguments X, PTR, SIZE, and SEGMENT may involve subroutine-calls, which
+ * could clobber r8 and r9 (among others).  Thus, be careful not to evaluate it while
+ * using r8/r9.
+ */
+#define __do_get_user(check, x, ptr, size, segment)					\
+({											\
+	const __typeof__(*(ptr)) __user *__gu_ptr = (ptr);				\
+	__typeof__ (size) __gu_size = (size);						\
+	long __gu_err = -EFAULT;							\
+	unsigned long __gu_val = 0;							\
+	if (!check || __access_ok(__gu_ptr, size, segment))				\
+		switch (__gu_size) {							\
+		      case 1: __get_user_size(__gu_val, __gu_ptr, 1, __gu_err); break;	\
+		      case 2: __get_user_size(__gu_val, __gu_ptr, 2, __gu_err); break;	\
+		      case 4: __get_user_size(__gu_val, __gu_ptr, 4, __gu_err); break;	\
+		      case 8: __get_user_size(__gu_val, __gu_ptr, 8, __gu_err); break;	\
+		      default: __get_user_unknown(); break;				\
+		}									\
+	(x) = (__typeof__(*(__gu_ptr))) __gu_val;					\
+	__gu_err;									\
+})
+
+#define __get_user_nocheck(x, ptr, size)	__do_get_user(0, x, ptr, size, KERNEL_DS)
+#define __get_user_check(x, ptr, size, segment)	__do_get_user(1, x, ptr, size, segment)
+
+extern void __put_user_unknown (void);
+
+/*
+ * Evaluating arguments X, PTR, SIZE, and SEGMENT may involve subroutine-calls, which
+ * could clobber r8 (among others).  Thus, be careful not to evaluate them while using r8.
+ */
+#define __do_put_user(check, x, ptr, size, segment)					\
+({											\
+	__typeof__ (x) __pu_x = (x);							\
+	__typeof__ (*(ptr)) __user *__pu_ptr = (ptr);					\
+	__typeof__ (size) __pu_size = (size);						\
+	long __pu_err = -EFAULT;							\
+											\
+	if (!check || __access_ok(__pu_ptr, __pu_size, segment))			\
+		switch (__pu_size) {							\
+		      case 1: __put_user_size(__pu_x, __pu_ptr, 1, __pu_err); break;	\
+		      case 2: __put_user_size(__pu_x, __pu_ptr, 2, __pu_err); break;	\
+		      case 4: __put_user_size(__pu_x, __pu_ptr, 4, __pu_err); break;	\
+		      case 8: __put_user_size(__pu_x, __pu_ptr, 8, __pu_err); break;	\
+		      default: __put_user_unknown(); break;				\
+		}									\
+	__pu_err;									\
+})
+
+#define __put_user_nocheck(x, ptr, size)	__do_put_user(0, x, ptr, size, KERNEL_DS)
+#define __put_user_check(x, ptr, size, segment)	__do_put_user(1, x, ptr, size, segment)
+
+/*
+ * Complex access routines
+ */
+extern unsigned long __must_check __copy_user (void __user *to, const void __user *from,
+					       unsigned long count);
+
+static inline unsigned long
+__copy_to_user (void __user *to, const void *from, unsigned long count)
+{
+	return __copy_user(to, (__force void __user *) from, count);
+}
+
+static inline unsigned long
+__copy_from_user (void *to, const void __user *from, unsigned long count)
+{
+	return __copy_user((__force void __user *) to, from, count);
+}
+
+#define __copy_to_user_inatomic		__copy_to_user
+#define __copy_from_user_inatomic	__copy_from_user
+#define copy_to_user(to, from, n)							\
+({											\
+	void __user *__cu_to = (to);							\
+	const void *__cu_from = (from);							\
+	long __cu_len = (n);								\
+											\
+	if (__access_ok(__cu_to, __cu_len, get_fs()))					\
+		__cu_len = __copy_user(__cu_to, (__force void __user *) __cu_from, __cu_len);	\
+	__cu_len;									\
+})
+
+#define copy_from_user(to, from, n)							\
+({											\
+	void *__cu_to = (to);								\
+	const void __user *__cu_from = (from);						\
+	long __cu_len = (n);								\
+											\
+	__chk_user_ptr(__cu_from);							\
+	if (__access_ok(__cu_from, __cu_len, get_fs()))					\
+		__cu_len = __copy_user((__force void __user *) __cu_to, __cu_from, __cu_len);	\
+	__cu_len;									\
+})
+
+#define __copy_in_user(to, from, size)	__copy_user((to), (from), (size))
+
+static inline unsigned long
+copy_in_user (void __user *to, const void __user *from, unsigned long n)
+{
+	if (likely(access_ok(VERIFY_READ, from, n) && access_ok(VERIFY_WRITE, to, n)))
+		n = __copy_user(to, from, n);
+	return n;
+}
+
+extern unsigned long __do_clear_user (void __user *, unsigned long);
+
+#define __clear_user(to, n)		__do_clear_user(to, n)
+
+#define clear_user(to, n)					\
+({								\
+	unsigned long __cu_len = (n);				\
+	if (__access_ok(to, __cu_len, get_fs()))		\
+		__cu_len = __do_clear_user(to, __cu_len);	\
+	__cu_len;						\
+})
+
+
+/*
+ * Returns: -EFAULT if exception before terminator, N if the entire buffer filled, else
+ * strlen.
+ */
+extern long __must_check __strncpy_from_user (char *to, const char __user *from, long to_len);
+
+#define strncpy_from_user(to, from, n)					\
+({									\
+	const char __user * __sfu_from = (from);			\
+	long __sfu_ret = -EFAULT;					\
+	if (__access_ok(__sfu_from, 0, get_fs()))			\
+		__sfu_ret = __strncpy_from_user((to), __sfu_from, (n));	\
+	__sfu_ret;							\
+})
+
+/* Returns: 0 if bad, string length+1 (memory size) of string if ok */
+extern unsigned long __strlen_user (const char __user *);
+
+#define strlen_user(str)				\
+({							\
+	const char __user *__su_str = (str);		\
+	unsigned long __su_ret = 0;			\
+	if (__access_ok(__su_str, 0, get_fs()))		\
+		__su_ret = __strlen_user(__su_str);	\
+	__su_ret;					\
+})
+
+/*
+ * Returns: 0 if exception before NUL or reaching the supplied limit
+ * (N), a value greater than N if the limit would be exceeded, else
+ * strlen.
+ */
+extern unsigned long __strnlen_user (const char __user *, long);
+
+#define strnlen_user(str, len)					\
+({								\
+	const char __user *__su_str = (str);			\
+	unsigned long __su_ret = 0;				\
+	if (__access_ok(__su_str, 0, get_fs()))			\
+		__su_ret = __strnlen_user(__su_str, len);	\
+	__su_ret;						\
+})
+
+/* Generic code can't deal with the location-relative format that we use for compactness.  */
+#define ARCH_HAS_SORT_EXTABLE
+#define ARCH_HAS_SEARCH_EXTABLE
+
+struct exception_table_entry {
+	int addr;	/* location-relative address of insn this fixup is for */
+	int cont;	/* location-relative continuation addr.; if bit 2 is set, r9 is set to 0 */
+};
+
+extern void ia64_handle_exception (struct pt_regs *regs, const struct exception_table_entry *e);
+extern const struct exception_table_entry *search_exception_tables (unsigned long addr);
+
+static inline int
+ia64_done_with_exception (struct pt_regs *regs)
+{
+	const struct exception_table_entry *e;
+	e = search_exception_tables(regs->cr_iip + ia64_psr(regs)->ri);
+	if (e) {
+		ia64_handle_exception(regs, e);
+		return 1;
+	}
+	return 0;
+}
+
+#define ARCH_HAS_TRANSLATE_MEM_PTR	1
+static __inline__ char *
+xlate_dev_mem_ptr (unsigned long p)
+{
+	struct page *page;
+	char * ptr;
+
+	page = pfn_to_page(p >> PAGE_SHIFT);
+	if (PageUncached(page))
+		ptr = (char *)p + __IA64_UNCACHED_OFFSET;
+	else
+		ptr = __va(p);
+
+	return ptr;
+}
+
+/*
+ * Convert a virtual cached kernel memory pointer to an uncached pointer
+ */
+static __inline__ char *
+xlate_dev_kmem_ptr (char * p)
+{
+	struct page *page;
+	char * ptr;
+
+	page = virt_to_page((unsigned long)p);
+	if (PageUncached(page))
+		ptr = (char *)__pa(p) + __IA64_UNCACHED_OFFSET;
+	else
+		ptr = p;
+
+	return ptr;
+}
+
+#endif /* _ASM_IA64_UACCESS_H */
diff --git a/arch/ia64/include/asm/unaligned.h b/arch/ia64/include/asm/unaligned.h
new file mode 100644
index 00000000000..7bddc7f5858
--- /dev/null
+++ b/arch/ia64/include/asm/unaligned.h
@@ -0,0 +1,11 @@
+#ifndef _ASM_IA64_UNALIGNED_H
+#define _ASM_IA64_UNALIGNED_H
+
+#include <linux/unaligned/le_struct.h>
+#include <linux/unaligned/be_byteshift.h>
+#include <linux/unaligned/generic.h>
+
+#define get_unaligned	__get_unaligned_le
+#define put_unaligned	__put_unaligned_le
+
+#endif /* _ASM_IA64_UNALIGNED_H */
diff --git a/arch/ia64/include/asm/uncached.h b/arch/ia64/include/asm/uncached.h
new file mode 100644
index 00000000000..13d7e65ca3c
--- /dev/null
+++ b/arch/ia64/include/asm/uncached.h
@@ -0,0 +1,12 @@
+/*
+ * Copyright (C) 2001-2008 Silicon Graphics, Inc.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ *
+ * Prototypes for the uncached page allocator
+ */
+
+extern unsigned long uncached_alloc_page(int starting_nid, int n_pages);
+extern void uncached_free_page(unsigned long uc_addr, int n_pages);
diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h
new file mode 100644
index 00000000000..fb13dc5e8f8
--- /dev/null
+++ b/arch/ia64/include/asm/unistd.h
@@ -0,0 +1,50 @@
+/*
+ * IA-64 Linux syscall numbers and inline-functions.
+ *
+ * Copyright (C) 1998-2005 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+#ifndef _ASM_IA64_UNISTD_H
+#define _ASM_IA64_UNISTD_H
+
+#include <uapi/asm/unistd.h>
+
+
+
+#define NR_syscalls			315 /* length of syscall table */
+
+/*
+ * The following defines stop scripts/checksyscalls.sh from complaining about
+ * unimplemented system calls.  Glibc provides for each of these by using
+ * more modern equivalent system calls.
+ */
+#define __IGNORE_fork		/* clone() */
+#define __IGNORE_time		/* gettimeofday() */
+#define __IGNORE_alarm		/* setitimer(ITIMER_REAL, ... */
+#define __IGNORE_pause		/* rt_sigprocmask(), rt_sigsuspend() */
+#define __IGNORE_utime		/* utimes() */
+#define __IGNORE_getpgrp	/* getpgid() */
+#define __IGNORE_vfork		/* clone() */
+#define __IGNORE_umount2	/* umount() */
+
+#if !defined(__ASSEMBLY__) && !defined(ASSEMBLER)
+
+#include <linux/types.h>
+#include <linux/linkage.h>
+#include <linux/compiler.h>
+
+extern long __ia64_syscall (long a0, long a1, long a2, long a3, long a4, long nr);
+
+asmlinkage unsigned long sys_mmap(
+				unsigned long addr, unsigned long len,
+				int prot, int flags,
+				int fd, long off);
+asmlinkage unsigned long sys_mmap2(
+				unsigned long addr, unsigned long len,
+				int prot, int flags,
+				int fd, long pgoff);
+struct pt_regs;
+asmlinkage long sys_ia64_pipe(void);
+
+#endif /* !__ASSEMBLY__ */
+#endif /* _ASM_IA64_UNISTD_H */
diff --git a/arch/ia64/include/asm/unwind.h b/arch/ia64/include/asm/unwind.h
new file mode 100644
index 00000000000..1af3875f1a5
--- /dev/null
+++ b/arch/ia64/include/asm/unwind.h
@@ -0,0 +1,233 @@
+#ifndef _ASM_IA64_UNWIND_H
+#define _ASM_IA64_UNWIND_H
+
+/*
+ * Copyright (C) 1999-2000, 2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * A simple API for unwinding kernel stacks.  This is used for
+ * debugging and error reporting purposes.  The kernel doesn't need
+ * full-blown stack unwinding with all the bells and whitles, so there
+ * is not much point in implementing the full IA-64 unwind API (though
+ * it would of course be possible to implement the kernel API on top
+ * of it).
+ */
+
+struct task_struct;	/* forward declaration */
+struct switch_stack;	/* forward declaration */
+
+enum unw_application_register {
+	UNW_AR_BSP,
+	UNW_AR_BSPSTORE,
+	UNW_AR_PFS,
+	UNW_AR_RNAT,
+	UNW_AR_UNAT,
+	UNW_AR_LC,
+	UNW_AR_EC,
+	UNW_AR_FPSR,
+	UNW_AR_RSC,
+	UNW_AR_CCV,
+	UNW_AR_CSD,
+	UNW_AR_SSD
+};
+
+/*
+ * The following declarations are private to the unwind
+ * implementation:
+ */
+
+struct unw_stack {
+	unsigned long limit;
+	unsigned long top;
+};
+
+#define UNW_FLAG_INTERRUPT_FRAME	(1UL << 0)
+
+/*
+ * No user of this module should every access this structure directly
+ * as it is subject to change.  It is declared here solely so we can
+ * use automatic variables.
+ */
+struct unw_frame_info {
+	struct unw_stack regstk;
+	struct unw_stack memstk;
+	unsigned int flags;
+	short hint;
+	short prev_script;
+
+	/* current frame info: */
+	unsigned long bsp;		/* backing store pointer value */
+	unsigned long sp;		/* stack pointer value */
+	unsigned long psp;		/* previous sp value */
+	unsigned long ip;		/* instruction pointer value */
+	unsigned long pr;		/* current predicate values */
+	unsigned long *cfm_loc;		/* cfm save location (or NULL) */
+	unsigned long pt;		/* struct pt_regs location */
+
+	struct task_struct *task;
+	struct switch_stack *sw;
+
+	/* preserved state: */
+	unsigned long *bsp_loc;		/* previous bsp save location */
+	unsigned long *bspstore_loc;
+	unsigned long *pfs_loc;
+	unsigned long *rnat_loc;
+	unsigned long *rp_loc;
+	unsigned long *pri_unat_loc;
+	unsigned long *unat_loc;
+	unsigned long *pr_loc;
+	unsigned long *lc_loc;
+	unsigned long *fpsr_loc;
+	struct unw_ireg {
+		unsigned long *loc;
+		struct unw_ireg_nat {
+			unsigned long type : 3;		/* enum unw_nat_type */
+			signed long off : 61;		/* NaT word is at loc+nat.off */
+		} nat;
+	} r4, r5, r6, r7;
+	unsigned long *b1_loc, *b2_loc, *b3_loc, *b4_loc, *b5_loc;
+	struct ia64_fpreg *f2_loc, *f3_loc, *f4_loc, *f5_loc, *fr_loc[16];
+};
+
+/*
+ * The official API follows below:
+ */
+
+struct unw_table_entry {
+	u64 start_offset;
+	u64 end_offset;
+	u64 info_offset;
+};
+
+/*
+ * Initialize unwind support.
+ */
+extern void unw_init (void);
+
+extern void *unw_add_unwind_table (const char *name, unsigned long segment_base, unsigned long gp,
+				   const void *table_start, const void *table_end);
+
+extern void unw_remove_unwind_table (void *handle);
+
+/*
+ * Prepare to unwind blocked task t.
+ */
+extern void unw_init_from_blocked_task (struct unw_frame_info *info, struct task_struct *t);
+
+extern void unw_init_frame_info (struct unw_frame_info *info, struct task_struct *t,
+				 struct switch_stack *sw);
+
+/*
+ * Prepare to unwind the currently running thread.
+ */
+extern void unw_init_running (void (*callback)(struct unw_frame_info *info, void *arg), void *arg);
+
+/*
+ * Unwind to previous to frame.  Returns 0 if successful, negative
+ * number in case of an error.
+ */
+extern int unw_unwind (struct unw_frame_info *info);
+
+/*
+ * Unwind until the return pointer is in user-land (or until an error
+ * occurs).  Returns 0 if successful, negative number in case of
+ * error.
+ */
+extern int unw_unwind_to_user (struct unw_frame_info *info);
+
+#define unw_is_intr_frame(info)	(((info)->flags & UNW_FLAG_INTERRUPT_FRAME) != 0)
+
+static inline int
+unw_get_ip (struct unw_frame_info *info, unsigned long *valp)
+{
+	*valp = (info)->ip;
+	return 0;
+}
+
+static inline int
+unw_get_sp (struct unw_frame_info *info, unsigned long *valp)
+{
+	*valp = (info)->sp;
+	return 0;
+}
+
+static inline int
+unw_get_psp (struct unw_frame_info *info, unsigned long *valp)
+{
+	*valp = (info)->psp;
+	return 0;
+}
+
+static inline int
+unw_get_bsp (struct unw_frame_info *info, unsigned long *valp)
+{
+	*valp = (info)->bsp;
+	return 0;
+}
+
+static inline int
+unw_get_cfm (struct unw_frame_info *info, unsigned long *valp)
+{
+	*valp = *(info)->cfm_loc;
+	return 0;
+}
+
+static inline int
+unw_set_cfm (struct unw_frame_info *info, unsigned long val)
+{
+	*(info)->cfm_loc = val;
+	return 0;
+}
+
+static inline int
+unw_get_rp (struct unw_frame_info *info, unsigned long *val)
+{
+	if (!info->rp_loc)
+		return -1;
+	*val = *info->rp_loc;
+	return 0;
+}
+
+extern int unw_access_gr (struct unw_frame_info *, int, unsigned long *, char *, int);
+extern int unw_access_br (struct unw_frame_info *, int, unsigned long *, int);
+extern int unw_access_fr (struct unw_frame_info *, int, struct ia64_fpreg *, int);
+extern int unw_access_ar (struct unw_frame_info *, int, unsigned long *, int);
+extern int unw_access_pr (struct unw_frame_info *, unsigned long *, int);
+
+static inline int
+unw_set_gr (struct unw_frame_info *i, int n, unsigned long v, char nat)
+{
+	return unw_access_gr(i, n, &v, &nat, 1);
+}
+
+static inline int
+unw_set_br (struct unw_frame_info *i, int n, unsigned long v)
+{
+	return unw_access_br(i, n, &v, 1);
+}
+
+static inline int
+unw_set_fr (struct unw_frame_info *i, int n, struct ia64_fpreg v)
+{
+	return unw_access_fr(i, n, &v, 1);
+}
+
+static inline int
+unw_set_ar (struct unw_frame_info *i, int n, unsigned long v)
+{
+	return unw_access_ar(i, n, &v, 1);
+}
+
+static inline int
+unw_set_pr (struct unw_frame_info *i, unsigned long v)
+{
+	return unw_access_pr(i, &v, 1);
+}
+
+#define unw_get_gr(i,n,v,nat)	unw_access_gr(i,n,v,nat,0)
+#define unw_get_br(i,n,v)	unw_access_br(i,n,v,0)
+#define unw_get_fr(i,n,v)	unw_access_fr(i,n,v,0)
+#define unw_get_ar(i,n,v)	unw_access_ar(i,n,v,0)
+#define unw_get_pr(i,v)		unw_access_pr(i,v,0)
+
+#endif /* _ASM_UNWIND_H */
diff --git a/arch/ia64/include/asm/user.h b/arch/ia64/include/asm/user.h
new file mode 100644
index 00000000000..8b982111034
--- /dev/null
+++ b/arch/ia64/include/asm/user.h
@@ -0,0 +1,58 @@
+#ifndef _ASM_IA64_USER_H
+#define _ASM_IA64_USER_H
+
+/*
+ * Core file format: The core file is written in such a way that gdb
+ * can understand it and provide useful information to the user (under
+ * linux we use the `trad-core' bfd).  The file contents are as
+ * follows:
+ *
+ *  upage: 1 page consisting of a user struct that tells gdb
+ *	what is present in the file.  Directly after this is a
+ *	copy of the task_struct, which is currently not used by gdb,
+ *	but it may come in handy at some point.  All of the registers
+ *	are stored as part of the upage.  The upage should always be
+ *	only one page long.
+ *  data: The data segment follows next.  We use current->end_text to
+ *	current->brk to pick up all of the user variables, plus any memory
+ *	that may have been sbrk'ed.  No attempt is made to determine if a
+ *	page is demand-zero or if a page is totally unused, we just cover
+ *	the entire range.  All of the addresses are rounded in such a way
+ *	that an integral number of pages is written.
+ *  stack: We need the stack information in order to get a meaningful
+ *	backtrace.  We need to write the data from usp to
+ *	current->start_stack, so we round each of these in order to be able
+ *	to write an integer number of pages.
+ *
+ * Modified 1998, 1999, 2001
+ *	David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co
+ */
+
+#include <linux/ptrace.h>
+#include <linux/types.h>
+
+#include <asm/page.h>
+
+#define EF_SIZE		3072	/* XXX fix me */
+
+struct user {
+	unsigned long	regs[EF_SIZE/8+32];	/* integer and fp regs */
+	size_t		u_tsize;		/* text size (pages) */
+	size_t		u_dsize;		/* data size (pages) */
+	size_t		u_ssize;		/* stack size (pages) */
+	unsigned long	start_code;		/* text starting address */
+	unsigned long	start_data;		/* data starting address */
+	unsigned long	start_stack;		/* stack starting address */
+	long int	signal;			/* signal causing core dump */
+	unsigned long	u_ar0;			/* help gdb find registers */
+	unsigned long	magic;			/* identifies a core file */
+	char		u_comm[32];		/* user command name */
+};
+
+#define NBPG			PAGE_SIZE
+#define UPAGES			1
+#define HOST_TEXT_START_ADDR	(u.start_code)
+#define HOST_DATA_START_ADDR	(u.start_data)
+#define HOST_STACK_END_ADDR	(u.start_stack + u.u_ssize * NBPG)
+
+#endif /* _ASM_IA64_USER_H */
diff --git a/arch/ia64/include/asm/ustack.h b/arch/ia64/include/asm/ustack.h
new file mode 100644
index 00000000000..b275401b96d
--- /dev/null
+++ b/arch/ia64/include/asm/ustack.h
@@ -0,0 +1,11 @@
+#ifndef _ASM_IA64_USTACK_H
+#define _ASM_IA64_USTACK_H
+
+#include <asm/page.h>
+#include <uapi/asm/ustack.h>
+
+/* The absolute hard limit for stack size is 1/2 of the mappable space in the region */
+#define MAX_USER_STACK_SIZE	(RGN_MAP_LIMIT/2)
+#define STACK_TOP		(0x6000000000000000UL + RGN_MAP_LIMIT)
+#define STACK_TOP_MAX		STACK_TOP
+#endif /* _ASM_IA64_USTACK_H */
diff --git a/arch/ia64/include/asm/uv/uv.h b/arch/ia64/include/asm/uv/uv.h
new file mode 100644
index 00000000000..8f6cbaa742e
--- /dev/null
+++ b/arch/ia64/include/asm/uv/uv.h
@@ -0,0 +1,12 @@
+#ifndef _ASM_IA64_UV_UV_H
+#define _ASM_IA64_UV_UV_H
+
+#include <asm/sn/simulator.h>
+
+static inline int is_uv_system(void)
+{
+	/* temporary support for running on hardware simulator */
+	return IS_MEDUSA() || ia64_platform_is("uv");
+}
+
+#endif	/* _ASM_IA64_UV_UV_H */
diff --git a/arch/ia64/include/asm/uv/uv_hub.h b/arch/ia64/include/asm/uv/uv_hub.h
new file mode 100644
index 00000000000..53e9dfacd07
--- /dev/null
+++ b/arch/ia64/include/asm/uv/uv_hub.h
@@ -0,0 +1,315 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * SGI UV architectural definitions
+ *
+ * Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#ifndef __ASM_IA64_UV_HUB_H__
+#define __ASM_IA64_UV_HUB_H__
+
+#include <linux/numa.h>
+#include <linux/percpu.h>
+#include <asm/types.h>
+#include <asm/percpu.h>
+
+
+/*
+ * Addressing Terminology
+ *
+ *	M       - The low M bits of a physical address represent the offset
+ *		  into the blade local memory. RAM memory on a blade is physically
+ *		  contiguous (although various IO spaces may punch holes in
+ *		  it)..
+ *
+ * 	N	- Number of bits in the node portion of a socket physical
+ * 		  address.
+ *
+ * 	NASID   - network ID of a router, Mbrick or Cbrick. Nasid values of
+ * 	 	  routers always have low bit of 1, C/MBricks have low bit
+ * 		  equal to 0. Most addressing macros that target UV hub chips
+ * 		  right shift the NASID by 1 to exclude the always-zero bit.
+ * 		  NASIDs contain up to 15 bits.
+ *
+ *	GNODE   - NASID right shifted by 1 bit. Most mmrs contain gnodes instead
+ *		  of nasids.
+ *
+ * 	PNODE   - the low N bits of the GNODE. The PNODE is the most useful variant
+ * 		  of the nasid for socket usage.
+ *
+ *
+ *  NumaLink Global Physical Address Format:
+ *  +--------------------------------+---------------------+
+ *  |00..000|      GNODE             |      NodeOffset     |
+ *  +--------------------------------+---------------------+
+ *          |<-------53 - M bits --->|<--------M bits ----->
+ *
+ *	M - number of node offset bits (35 .. 40)
+ *
+ *
+ *  Memory/UV-HUB Processor Socket Address Format:
+ *  +----------------+---------------+---------------------+
+ *  |00..000000000000|   PNODE       |      NodeOffset     |
+ *  +----------------+---------------+---------------------+
+ *                   <--- N bits --->|<--------M bits ----->
+ *
+ *	M - number of node offset bits (35 .. 40)
+ *	N - number of PNODE bits (0 .. 10)
+ *
+ *		Note: M + N cannot currently exceed 44 (x86_64) or 46 (IA64).
+ *		The actual values are configuration dependent and are set at
+ *		boot time. M & N values are set by the hardware/BIOS at boot.
+ */
+
+
+/*
+ * Maximum number of bricks in all partitions and in all coherency domains.
+ * This is the total number of bricks accessible in the numalink fabric. It
+ * includes all C & M bricks. Routers are NOT included.
+ *
+ * This value is also the value of the maximum number of non-router NASIDs
+ * in the numalink fabric.
+ *
+ * NOTE: a brick may contain 1 or 2 OS nodes. Don't get these confused.
+ */
+#define UV_MAX_NUMALINK_BLADES	16384
+
+/*
+ * Maximum number of C/Mbricks within a software SSI (hardware may support
+ * more).
+ */
+#define UV_MAX_SSI_BLADES	1
+
+/*
+ * The largest possible NASID of a C or M brick (+ 2)
+ */
+#define UV_MAX_NASID_VALUE	(UV_MAX_NUMALINK_NODES * 2)
+
+/*
+ * The following defines attributes of the HUB chip. These attributes are
+ * frequently referenced and are kept in the per-cpu data areas of each cpu.
+ * They are kept together in a struct to minimize cache misses.
+ */
+struct uv_hub_info_s {
+	unsigned long	global_mmr_base;
+	unsigned long	gpa_mask;
+	unsigned long	gnode_upper;
+	unsigned long	lowmem_remap_top;
+	unsigned long	lowmem_remap_base;
+	unsigned short	pnode;
+	unsigned short	pnode_mask;
+	unsigned short	coherency_domain_number;
+	unsigned short	numa_blade_id;
+	unsigned char	blade_processor_id;
+	unsigned char	m_val;
+	unsigned char	n_val;
+};
+DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
+#define uv_hub_info 		(&__get_cpu_var(__uv_hub_info))
+#define uv_cpu_hub_info(cpu)	(&per_cpu(__uv_hub_info, cpu))
+
+/*
+ * Local & Global MMR space macros.
+ * 	Note: macros are intended to be used ONLY by inline functions
+ * 	in this file - not by other kernel code.
+ * 		n -  NASID (full 15-bit global nasid)
+ * 		g -  GNODE (full 15-bit global nasid, right shifted 1)
+ * 		p -  PNODE (local part of nsids, right shifted 1)
+ */
+#define UV_NASID_TO_PNODE(n)		(((n) >> 1) & uv_hub_info->pnode_mask)
+#define UV_PNODE_TO_NASID(p)		(((p) << 1) | uv_hub_info->gnode_upper)
+
+#define UV_LOCAL_MMR_BASE		0xf4000000UL
+#define UV_GLOBAL_MMR32_BASE		0xf8000000UL
+#define UV_GLOBAL_MMR64_BASE		(uv_hub_info->global_mmr_base)
+
+#define UV_GLOBAL_MMR32_PNODE_SHIFT	15
+#define UV_GLOBAL_MMR64_PNODE_SHIFT	26
+
+#define UV_GLOBAL_MMR32_PNODE_BITS(p)	((p) << (UV_GLOBAL_MMR32_PNODE_SHIFT))
+
+#define UV_GLOBAL_MMR64_PNODE_BITS(p)					\
+	((unsigned long)(p) << UV_GLOBAL_MMR64_PNODE_SHIFT)
+
+/*
+ * Macros for converting between kernel virtual addresses, socket local physical
+ * addresses, and UV global physical addresses.
+ * 	Note: use the standard __pa() & __va() macros for converting
+ * 	      between socket virtual and socket physical addresses.
+ */
+
+/* socket phys RAM --> UV global physical address */
+static inline unsigned long uv_soc_phys_ram_to_gpa(unsigned long paddr)
+{
+	if (paddr < uv_hub_info->lowmem_remap_top)
+		paddr += uv_hub_info->lowmem_remap_base;
+	return paddr | uv_hub_info->gnode_upper;
+}
+
+
+/* socket virtual --> UV global physical address */
+static inline unsigned long uv_gpa(void *v)
+{
+	return __pa(v) | uv_hub_info->gnode_upper;
+}
+
+/* socket virtual --> UV global physical address */
+static inline void *uv_vgpa(void *v)
+{
+	return (void *)uv_gpa(v);
+}
+
+/* UV global physical address --> socket virtual */
+static inline void *uv_va(unsigned long gpa)
+{
+	return __va(gpa & uv_hub_info->gpa_mask);
+}
+
+/* pnode, offset --> socket virtual */
+static inline void *uv_pnode_offset_to_vaddr(int pnode, unsigned long offset)
+{
+	return __va(((unsigned long)pnode << uv_hub_info->m_val) | offset);
+}
+
+
+/*
+ * Access global MMRs using the low memory MMR32 space. This region supports
+ * faster MMR access but not all MMRs are accessible in this space.
+ */
+static inline unsigned long *uv_global_mmr32_address(int pnode,
+				unsigned long offset)
+{
+	return __va(UV_GLOBAL_MMR32_BASE |
+		       UV_GLOBAL_MMR32_PNODE_BITS(pnode) | offset);
+}
+
+static inline void uv_write_global_mmr32(int pnode, unsigned long offset,
+				 unsigned long val)
+{
+	*uv_global_mmr32_address(pnode, offset) = val;
+}
+
+static inline unsigned long uv_read_global_mmr32(int pnode,
+						 unsigned long offset)
+{
+	return *uv_global_mmr32_address(pnode, offset);
+}
+
+/*
+ * Access Global MMR space using the MMR space located at the top of physical
+ * memory.
+ */
+static inline unsigned long *uv_global_mmr64_address(int pnode,
+				unsigned long offset)
+{
+	return __va(UV_GLOBAL_MMR64_BASE |
+		    UV_GLOBAL_MMR64_PNODE_BITS(pnode) | offset);
+}
+
+static inline void uv_write_global_mmr64(int pnode, unsigned long offset,
+				unsigned long val)
+{
+	*uv_global_mmr64_address(pnode, offset) = val;
+}
+
+static inline unsigned long uv_read_global_mmr64(int pnode,
+						 unsigned long offset)
+{
+	return *uv_global_mmr64_address(pnode, offset);
+}
+
+/*
+ * Access hub local MMRs. Faster than using global space but only local MMRs
+ * are accessible.
+ */
+static inline unsigned long *uv_local_mmr_address(unsigned long offset)
+{
+	return __va(UV_LOCAL_MMR_BASE | offset);
+}
+
+static inline unsigned long uv_read_local_mmr(unsigned long offset)
+{
+	return *uv_local_mmr_address(offset);
+}
+
+static inline void uv_write_local_mmr(unsigned long offset, unsigned long val)
+{
+	*uv_local_mmr_address(offset) = val;
+}
+
+/*
+ * Structures and definitions for converting between cpu, node, pnode, and blade
+ * numbers.
+ */
+
+/* Blade-local cpu number of current cpu. Numbered 0 .. <# cpus on the blade> */
+static inline int uv_blade_processor_id(void)
+{
+	return smp_processor_id();
+}
+
+/* Blade number of current cpu. Numnbered 0 .. <#blades -1> */
+static inline int uv_numa_blade_id(void)
+{
+	return 0;
+}
+
+/* Convert a cpu number to the the UV blade number */
+static inline int uv_cpu_to_blade_id(int cpu)
+{
+	return 0;
+}
+
+/* Convert linux node number to the UV blade number */
+static inline int uv_node_to_blade_id(int nid)
+{
+	return 0;
+}
+
+/* Convert a blade id to the PNODE of the blade */
+static inline int uv_blade_to_pnode(int bid)
+{
+	return 0;
+}
+
+/* Determine the number of possible cpus on a blade */
+static inline int uv_blade_nr_possible_cpus(int bid)
+{
+	return num_possible_cpus();
+}
+
+/* Determine the number of online cpus on a blade */
+static inline int uv_blade_nr_online_cpus(int bid)
+{
+	return num_online_cpus();
+}
+
+/* Convert a cpu id to the PNODE of the blade containing the cpu */
+static inline int uv_cpu_to_pnode(int cpu)
+{
+	return 0;
+}
+
+/* Convert a linux node number to the PNODE of the blade */
+static inline int uv_node_to_pnode(int nid)
+{
+	return 0;
+}
+
+/* Maximum possible number of blades */
+static inline int uv_num_possible_blades(void)
+{
+	return 1;
+}
+
+static inline void uv_hub_send_ipi(int pnode, int apicid, int vector)
+{
+	/* not currently needed on ia64 */
+}
+
+
+#endif /* __ASM_IA64_UV_HUB__ */
+
diff --git a/arch/ia64/include/asm/uv/uv_mmrs.h b/arch/ia64/include/asm/uv/uv_mmrs.h
new file mode 100644
index 00000000000..fe0b8f05e1a
--- /dev/null
+++ b/arch/ia64/include/asm/uv/uv_mmrs.h
@@ -0,0 +1,825 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * SGI UV MMR definitions
+ *
+ * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#ifndef _ASM_IA64_UV_UV_MMRS_H
+#define _ASM_IA64_UV_UV_MMRS_H
+
+#define UV_MMR_ENABLE		(1UL << 63)
+
+/* ========================================================================= */
+/*                           UVH_BAU_DATA_CONFIG                             */
+/* ========================================================================= */
+#define UVH_BAU_DATA_CONFIG 0x61680UL
+#define UVH_BAU_DATA_CONFIG_32 0x0438
+
+#define UVH_BAU_DATA_CONFIG_VECTOR_SHFT 0
+#define UVH_BAU_DATA_CONFIG_VECTOR_MASK 0x00000000000000ffUL
+#define UVH_BAU_DATA_CONFIG_DM_SHFT 8
+#define UVH_BAU_DATA_CONFIG_DM_MASK 0x0000000000000700UL
+#define UVH_BAU_DATA_CONFIG_DESTMODE_SHFT 11
+#define UVH_BAU_DATA_CONFIG_DESTMODE_MASK 0x0000000000000800UL
+#define UVH_BAU_DATA_CONFIG_STATUS_SHFT 12
+#define UVH_BAU_DATA_CONFIG_STATUS_MASK 0x0000000000001000UL
+#define UVH_BAU_DATA_CONFIG_P_SHFT 13
+#define UVH_BAU_DATA_CONFIG_P_MASK 0x0000000000002000UL
+#define UVH_BAU_DATA_CONFIG_T_SHFT 15
+#define UVH_BAU_DATA_CONFIG_T_MASK 0x0000000000008000UL
+#define UVH_BAU_DATA_CONFIG_M_SHFT 16
+#define UVH_BAU_DATA_CONFIG_M_MASK 0x0000000000010000UL
+#define UVH_BAU_DATA_CONFIG_APIC_ID_SHFT 32
+#define UVH_BAU_DATA_CONFIG_APIC_ID_MASK 0xffffffff00000000UL
+
+union uvh_bau_data_config_u {
+    unsigned long	v;
+    struct uvh_bau_data_config_s {
+	unsigned long	vector_  :  8;  /* RW */
+	unsigned long	dm       :  3;  /* RW */
+	unsigned long	destmode :  1;  /* RW */
+	unsigned long	status   :  1;  /* RO */
+	unsigned long	p        :  1;  /* RO */
+	unsigned long	rsvd_14  :  1;  /*    */
+	unsigned long	t        :  1;  /* RO */
+	unsigned long	m        :  1;  /* RW */
+	unsigned long	rsvd_17_31: 15;  /*    */
+	unsigned long	apic_id  : 32;  /* RW */
+    } s;
+};
+
+/* ========================================================================= */
+/*                           UVH_EVENT_OCCURRED0                             */
+/* ========================================================================= */
+#define UVH_EVENT_OCCURRED0 0x70000UL
+#define UVH_EVENT_OCCURRED0_32 0x005e8
+
+#define UVH_EVENT_OCCURRED0_LB_HCERR_SHFT 0
+#define UVH_EVENT_OCCURRED0_LB_HCERR_MASK 0x0000000000000001UL
+#define UVH_EVENT_OCCURRED0_GR0_HCERR_SHFT 1
+#define UVH_EVENT_OCCURRED0_GR0_HCERR_MASK 0x0000000000000002UL
+#define UVH_EVENT_OCCURRED0_GR1_HCERR_SHFT 2
+#define UVH_EVENT_OCCURRED0_GR1_HCERR_MASK 0x0000000000000004UL
+#define UVH_EVENT_OCCURRED0_LH_HCERR_SHFT 3
+#define UVH_EVENT_OCCURRED0_LH_HCERR_MASK 0x0000000000000008UL
+#define UVH_EVENT_OCCURRED0_RH_HCERR_SHFT 4
+#define UVH_EVENT_OCCURRED0_RH_HCERR_MASK 0x0000000000000010UL
+#define UVH_EVENT_OCCURRED0_XN_HCERR_SHFT 5
+#define UVH_EVENT_OCCURRED0_XN_HCERR_MASK 0x0000000000000020UL
+#define UVH_EVENT_OCCURRED0_SI_HCERR_SHFT 6
+#define UVH_EVENT_OCCURRED0_SI_HCERR_MASK 0x0000000000000040UL
+#define UVH_EVENT_OCCURRED0_LB_AOERR0_SHFT 7
+#define UVH_EVENT_OCCURRED0_LB_AOERR0_MASK 0x0000000000000080UL
+#define UVH_EVENT_OCCURRED0_GR0_AOERR0_SHFT 8
+#define UVH_EVENT_OCCURRED0_GR0_AOERR0_MASK 0x0000000000000100UL
+#define UVH_EVENT_OCCURRED0_GR1_AOERR0_SHFT 9
+#define UVH_EVENT_OCCURRED0_GR1_AOERR0_MASK 0x0000000000000200UL
+#define UVH_EVENT_OCCURRED0_LH_AOERR0_SHFT 10
+#define UVH_EVENT_OCCURRED0_LH_AOERR0_MASK 0x0000000000000400UL
+#define UVH_EVENT_OCCURRED0_RH_AOERR0_SHFT 11
+#define UVH_EVENT_OCCURRED0_RH_AOERR0_MASK 0x0000000000000800UL
+#define UVH_EVENT_OCCURRED0_XN_AOERR0_SHFT 12
+#define UVH_EVENT_OCCURRED0_XN_AOERR0_MASK 0x0000000000001000UL
+#define UVH_EVENT_OCCURRED0_SI_AOERR0_SHFT 13
+#define UVH_EVENT_OCCURRED0_SI_AOERR0_MASK 0x0000000000002000UL
+#define UVH_EVENT_OCCURRED0_LB_AOERR1_SHFT 14
+#define UVH_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000004000UL
+#define UVH_EVENT_OCCURRED0_GR0_AOERR1_SHFT 15
+#define UVH_EVENT_OCCURRED0_GR0_AOERR1_MASK 0x0000000000008000UL
+#define UVH_EVENT_OCCURRED0_GR1_AOERR1_SHFT 16
+#define UVH_EVENT_OCCURRED0_GR1_AOERR1_MASK 0x0000000000010000UL
+#define UVH_EVENT_OCCURRED0_LH_AOERR1_SHFT 17
+#define UVH_EVENT_OCCURRED0_LH_AOERR1_MASK 0x0000000000020000UL
+#define UVH_EVENT_OCCURRED0_RH_AOERR1_SHFT 18
+#define UVH_EVENT_OCCURRED0_RH_AOERR1_MASK 0x0000000000040000UL
+#define UVH_EVENT_OCCURRED0_XN_AOERR1_SHFT 19
+#define UVH_EVENT_OCCURRED0_XN_AOERR1_MASK 0x0000000000080000UL
+#define UVH_EVENT_OCCURRED0_SI_AOERR1_SHFT 20
+#define UVH_EVENT_OCCURRED0_SI_AOERR1_MASK 0x0000000000100000UL
+#define UVH_EVENT_OCCURRED0_RH_VPI_INT_SHFT 21
+#define UVH_EVENT_OCCURRED0_RH_VPI_INT_MASK 0x0000000000200000UL
+#define UVH_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 22
+#define UVH_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000000000400000UL
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 23
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000000000800000UL
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 24
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000000001000000UL
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 25
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000000002000000UL
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 26
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000000004000000UL
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 27
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000000008000000UL
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 28
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000000010000000UL
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 29
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000000020000000UL
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 30
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0000000040000000UL
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 31
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0000000080000000UL
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 32
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0000000100000000UL
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 33
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0000000200000000UL
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 34
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0000000400000000UL
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 35
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0000000800000000UL
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 36
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0000001000000000UL
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 37
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0000002000000000UL
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 38
+#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0000004000000000UL
+#define UVH_EVENT_OCCURRED0_L1_NMI_INT_SHFT 39
+#define UVH_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0000008000000000UL
+#define UVH_EVENT_OCCURRED0_STOP_CLOCK_SHFT 40
+#define UVH_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0000010000000000UL
+#define UVH_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 41
+#define UVH_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0000020000000000UL
+#define UVH_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 42
+#define UVH_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x0000040000000000UL
+#define UVH_EVENT_OCCURRED0_LTC_INT_SHFT 43
+#define UVH_EVENT_OCCURRED0_LTC_INT_MASK 0x0000080000000000UL
+#define UVH_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 44
+#define UVH_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x0000100000000000UL
+#define UVH_EVENT_OCCURRED0_IPI_INT_SHFT 45
+#define UVH_EVENT_OCCURRED0_IPI_INT_MASK 0x0000200000000000UL
+#define UVH_EVENT_OCCURRED0_EXTIO_INT0_SHFT 46
+#define UVH_EVENT_OCCURRED0_EXTIO_INT0_MASK 0x0000400000000000UL
+#define UVH_EVENT_OCCURRED0_EXTIO_INT1_SHFT 47
+#define UVH_EVENT_OCCURRED0_EXTIO_INT1_MASK 0x0000800000000000UL
+#define UVH_EVENT_OCCURRED0_EXTIO_INT2_SHFT 48
+#define UVH_EVENT_OCCURRED0_EXTIO_INT2_MASK 0x0001000000000000UL
+#define UVH_EVENT_OCCURRED0_EXTIO_INT3_SHFT 49
+#define UVH_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x0002000000000000UL
+#define UVH_EVENT_OCCURRED0_PROFILE_INT_SHFT 50
+#define UVH_EVENT_OCCURRED0_PROFILE_INT_MASK 0x0004000000000000UL
+#define UVH_EVENT_OCCURRED0_RTC0_SHFT 51
+#define UVH_EVENT_OCCURRED0_RTC0_MASK 0x0008000000000000UL
+#define UVH_EVENT_OCCURRED0_RTC1_SHFT 52
+#define UVH_EVENT_OCCURRED0_RTC1_MASK 0x0010000000000000UL
+#define UVH_EVENT_OCCURRED0_RTC2_SHFT 53
+#define UVH_EVENT_OCCURRED0_RTC2_MASK 0x0020000000000000UL
+#define UVH_EVENT_OCCURRED0_RTC3_SHFT 54
+#define UVH_EVENT_OCCURRED0_RTC3_MASK 0x0040000000000000UL
+#define UVH_EVENT_OCCURRED0_BAU_DATA_SHFT 55
+#define UVH_EVENT_OCCURRED0_BAU_DATA_MASK 0x0080000000000000UL
+#define UVH_EVENT_OCCURRED0_POWER_MANAGEMENT_REQ_SHFT 56
+#define UVH_EVENT_OCCURRED0_POWER_MANAGEMENT_REQ_MASK 0x0100000000000000UL
+union uvh_event_occurred0_u {
+    unsigned long	v;
+    struct uvh_event_occurred0_s {
+	unsigned long	lb_hcerr             :  1;  /* RW, W1C */
+	unsigned long	gr0_hcerr            :  1;  /* RW, W1C */
+	unsigned long	gr1_hcerr            :  1;  /* RW, W1C */
+	unsigned long	lh_hcerr             :  1;  /* RW, W1C */
+	unsigned long	rh_hcerr             :  1;  /* RW, W1C */
+	unsigned long	xn_hcerr             :  1;  /* RW, W1C */
+	unsigned long	si_hcerr             :  1;  /* RW, W1C */
+	unsigned long	lb_aoerr0            :  1;  /* RW, W1C */
+	unsigned long	gr0_aoerr0           :  1;  /* RW, W1C */
+	unsigned long	gr1_aoerr0           :  1;  /* RW, W1C */
+	unsigned long	lh_aoerr0            :  1;  /* RW, W1C */
+	unsigned long	rh_aoerr0            :  1;  /* RW, W1C */
+	unsigned long	xn_aoerr0            :  1;  /* RW, W1C */
+	unsigned long	si_aoerr0            :  1;  /* RW, W1C */
+	unsigned long	lb_aoerr1            :  1;  /* RW, W1C */
+	unsigned long	gr0_aoerr1           :  1;  /* RW, W1C */
+	unsigned long	gr1_aoerr1           :  1;  /* RW, W1C */
+	unsigned long	lh_aoerr1            :  1;  /* RW, W1C */
+	unsigned long	rh_aoerr1            :  1;  /* RW, W1C */
+	unsigned long	xn_aoerr1            :  1;  /* RW, W1C */
+	unsigned long	si_aoerr1            :  1;  /* RW, W1C */
+	unsigned long	rh_vpi_int           :  1;  /* RW, W1C */
+	unsigned long	system_shutdown_int  :  1;  /* RW, W1C */
+	unsigned long	lb_irq_int_0         :  1;  /* RW, W1C */
+	unsigned long	lb_irq_int_1         :  1;  /* RW, W1C */
+	unsigned long	lb_irq_int_2         :  1;  /* RW, W1C */
+	unsigned long	lb_irq_int_3         :  1;  /* RW, W1C */
+	unsigned long	lb_irq_int_4         :  1;  /* RW, W1C */
+	unsigned long	lb_irq_int_5         :  1;  /* RW, W1C */
+	unsigned long	lb_irq_int_6         :  1;  /* RW, W1C */
+	unsigned long	lb_irq_int_7         :  1;  /* RW, W1C */
+	unsigned long	lb_irq_int_8         :  1;  /* RW, W1C */
+	unsigned long	lb_irq_int_9         :  1;  /* RW, W1C */
+	unsigned long	lb_irq_int_10        :  1;  /* RW, W1C */
+	unsigned long	lb_irq_int_11        :  1;  /* RW, W1C */
+	unsigned long	lb_irq_int_12        :  1;  /* RW, W1C */
+	unsigned long	lb_irq_int_13        :  1;  /* RW, W1C */
+	unsigned long	lb_irq_int_14        :  1;  /* RW, W1C */
+	unsigned long	lb_irq_int_15        :  1;  /* RW, W1C */
+	unsigned long	l1_nmi_int           :  1;  /* RW, W1C */
+	unsigned long	stop_clock           :  1;  /* RW, W1C */
+	unsigned long	asic_to_l1           :  1;  /* RW, W1C */
+	unsigned long	l1_to_asic           :  1;  /* RW, W1C */
+	unsigned long	ltc_int              :  1;  /* RW, W1C */
+	unsigned long	la_seq_trigger       :  1;  /* RW, W1C */
+	unsigned long	ipi_int              :  1;  /* RW, W1C */
+	unsigned long	extio_int0           :  1;  /* RW, W1C */
+	unsigned long	extio_int1           :  1;  /* RW, W1C */
+	unsigned long	extio_int2           :  1;  /* RW, W1C */
+	unsigned long	extio_int3           :  1;  /* RW, W1C */
+	unsigned long	profile_int          :  1;  /* RW, W1C */
+	unsigned long	rtc0                 :  1;  /* RW, W1C */
+	unsigned long	rtc1                 :  1;  /* RW, W1C */
+	unsigned long	rtc2                 :  1;  /* RW, W1C */
+	unsigned long	rtc3                 :  1;  /* RW, W1C */
+	unsigned long	bau_data             :  1;  /* RW, W1C */
+	unsigned long	power_management_req :  1;  /* RW, W1C */
+	unsigned long	rsvd_57_63           :  7;  /*    */
+    } s;
+};
+
+/* ========================================================================= */
+/*                        UVH_EVENT_OCCURRED0_ALIAS                          */
+/* ========================================================================= */
+#define UVH_EVENT_OCCURRED0_ALIAS 0x0000000000070008UL
+#define UVH_EVENT_OCCURRED0_ALIAS_32 0x005f0
+
+/* ========================================================================= */
+/*                         UVH_GR0_TLB_INT0_CONFIG                           */
+/* ========================================================================= */
+#define UVH_GR0_TLB_INT0_CONFIG 0x61b00UL
+
+#define UVH_GR0_TLB_INT0_CONFIG_VECTOR_SHFT 0
+#define UVH_GR0_TLB_INT0_CONFIG_VECTOR_MASK 0x00000000000000ffUL
+#define UVH_GR0_TLB_INT0_CONFIG_DM_SHFT 8
+#define UVH_GR0_TLB_INT0_CONFIG_DM_MASK 0x0000000000000700UL
+#define UVH_GR0_TLB_INT0_CONFIG_DESTMODE_SHFT 11
+#define UVH_GR0_TLB_INT0_CONFIG_DESTMODE_MASK 0x0000000000000800UL
+#define UVH_GR0_TLB_INT0_CONFIG_STATUS_SHFT 12
+#define UVH_GR0_TLB_INT0_CONFIG_STATUS_MASK 0x0000000000001000UL
+#define UVH_GR0_TLB_INT0_CONFIG_P_SHFT 13
+#define UVH_GR0_TLB_INT0_CONFIG_P_MASK 0x0000000000002000UL
+#define UVH_GR0_TLB_INT0_CONFIG_T_SHFT 15
+#define UVH_GR0_TLB_INT0_CONFIG_T_MASK 0x0000000000008000UL
+#define UVH_GR0_TLB_INT0_CONFIG_M_SHFT 16
+#define UVH_GR0_TLB_INT0_CONFIG_M_MASK 0x0000000000010000UL
+#define UVH_GR0_TLB_INT0_CONFIG_APIC_ID_SHFT 32
+#define UVH_GR0_TLB_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL
+
+union uvh_gr0_tlb_int0_config_u {
+    unsigned long	v;
+    struct uvh_gr0_tlb_int0_config_s {
+	unsigned long	vector_  :  8;  /* RW */
+	unsigned long	dm       :  3;  /* RW */
+	unsigned long	destmode :  1;  /* RW */
+	unsigned long	status   :  1;  /* RO */
+	unsigned long	p        :  1;  /* RO */
+	unsigned long	rsvd_14  :  1;  /*    */
+	unsigned long	t        :  1;  /* RO */
+	unsigned long	m        :  1;  /* RW */
+	unsigned long	rsvd_17_31: 15;  /*    */
+	unsigned long	apic_id  : 32;  /* RW */
+    } s;
+};
+
+/* ========================================================================= */
+/*                         UVH_GR0_TLB_INT1_CONFIG                           */
+/* ========================================================================= */
+#define UVH_GR0_TLB_INT1_CONFIG 0x61b40UL
+
+#define UVH_GR0_TLB_INT1_CONFIG_VECTOR_SHFT 0
+#define UVH_GR0_TLB_INT1_CONFIG_VECTOR_MASK 0x00000000000000ffUL
+#define UVH_GR0_TLB_INT1_CONFIG_DM_SHFT 8
+#define UVH_GR0_TLB_INT1_CONFIG_DM_MASK 0x0000000000000700UL
+#define UVH_GR0_TLB_INT1_CONFIG_DESTMODE_SHFT 11
+#define UVH_GR0_TLB_INT1_CONFIG_DESTMODE_MASK 0x0000000000000800UL
+#define UVH_GR0_TLB_INT1_CONFIG_STATUS_SHFT 12
+#define UVH_GR0_TLB_INT1_CONFIG_STATUS_MASK 0x0000000000001000UL
+#define UVH_GR0_TLB_INT1_CONFIG_P_SHFT 13
+#define UVH_GR0_TLB_INT1_CONFIG_P_MASK 0x0000000000002000UL
+#define UVH_GR0_TLB_INT1_CONFIG_T_SHFT 15
+#define UVH_GR0_TLB_INT1_CONFIG_T_MASK 0x0000000000008000UL
+#define UVH_GR0_TLB_INT1_CONFIG_M_SHFT 16
+#define UVH_GR0_TLB_INT1_CONFIG_M_MASK 0x0000000000010000UL
+#define UVH_GR0_TLB_INT1_CONFIG_APIC_ID_SHFT 32
+#define UVH_GR0_TLB_INT1_CONFIG_APIC_ID_MASK 0xffffffff00000000UL
+
+union uvh_gr0_tlb_int1_config_u {
+    unsigned long	v;
+    struct uvh_gr0_tlb_int1_config_s {
+	unsigned long	vector_  :  8;  /* RW */
+	unsigned long	dm       :  3;  /* RW */
+	unsigned long	destmode :  1;  /* RW */
+	unsigned long	status   :  1;  /* RO */
+	unsigned long	p        :  1;  /* RO */
+	unsigned long	rsvd_14  :  1;  /*    */
+	unsigned long	t        :  1;  /* RO */
+	unsigned long	m        :  1;  /* RW */
+	unsigned long	rsvd_17_31: 15;  /*    */
+	unsigned long	apic_id  : 32;  /* RW */
+    } s;
+};
+
+/* ========================================================================= */
+/*                         UVH_GR1_TLB_INT0_CONFIG                           */
+/* ========================================================================= */
+#define UVH_GR1_TLB_INT0_CONFIG 0x61f00UL
+
+#define UVH_GR1_TLB_INT0_CONFIG_VECTOR_SHFT 0
+#define UVH_GR1_TLB_INT0_CONFIG_VECTOR_MASK 0x00000000000000ffUL
+#define UVH_GR1_TLB_INT0_CONFIG_DM_SHFT 8
+#define UVH_GR1_TLB_INT0_CONFIG_DM_MASK 0x0000000000000700UL
+#define UVH_GR1_TLB_INT0_CONFIG_DESTMODE_SHFT 11
+#define UVH_GR1_TLB_INT0_CONFIG_DESTMODE_MASK 0x0000000000000800UL
+#define UVH_GR1_TLB_INT0_CONFIG_STATUS_SHFT 12
+#define UVH_GR1_TLB_INT0_CONFIG_STATUS_MASK 0x0000000000001000UL
+#define UVH_GR1_TLB_INT0_CONFIG_P_SHFT 13
+#define UVH_GR1_TLB_INT0_CONFIG_P_MASK 0x0000000000002000UL
+#define UVH_GR1_TLB_INT0_CONFIG_T_SHFT 15
+#define UVH_GR1_TLB_INT0_CONFIG_T_MASK 0x0000000000008000UL
+#define UVH_GR1_TLB_INT0_CONFIG_M_SHFT 16
+#define UVH_GR1_TLB_INT0_CONFIG_M_MASK 0x0000000000010000UL
+#define UVH_GR1_TLB_INT0_CONFIG_APIC_ID_SHFT 32
+#define UVH_GR1_TLB_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL
+
+union uvh_gr1_tlb_int0_config_u {
+    unsigned long	v;
+    struct uvh_gr1_tlb_int0_config_s {
+	unsigned long	vector_  :  8;  /* RW */
+	unsigned long	dm       :  3;  /* RW */
+	unsigned long	destmode :  1;  /* RW */
+	unsigned long	status   :  1;  /* RO */
+	unsigned long	p        :  1;  /* RO */
+	unsigned long	rsvd_14  :  1;  /*    */
+	unsigned long	t        :  1;  /* RO */
+	unsigned long	m        :  1;  /* RW */
+	unsigned long	rsvd_17_31: 15;  /*    */
+	unsigned long	apic_id  : 32;  /* RW */
+    } s;
+};
+
+/* ========================================================================= */
+/*                         UVH_GR1_TLB_INT1_CONFIG                           */
+/* ========================================================================= */
+#define UVH_GR1_TLB_INT1_CONFIG 0x61f40UL
+
+#define UVH_GR1_TLB_INT1_CONFIG_VECTOR_SHFT 0
+#define UVH_GR1_TLB_INT1_CONFIG_VECTOR_MASK 0x00000000000000ffUL
+#define UVH_GR1_TLB_INT1_CONFIG_DM_SHFT 8
+#define UVH_GR1_TLB_INT1_CONFIG_DM_MASK 0x0000000000000700UL
+#define UVH_GR1_TLB_INT1_CONFIG_DESTMODE_SHFT 11
+#define UVH_GR1_TLB_INT1_CONFIG_DESTMODE_MASK 0x0000000000000800UL
+#define UVH_GR1_TLB_INT1_CONFIG_STATUS_SHFT 12
+#define UVH_GR1_TLB_INT1_CONFIG_STATUS_MASK 0x0000000000001000UL
+#define UVH_GR1_TLB_INT1_CONFIG_P_SHFT 13
+#define UVH_GR1_TLB_INT1_CONFIG_P_MASK 0x0000000000002000UL
+#define UVH_GR1_TLB_INT1_CONFIG_T_SHFT 15
+#define UVH_GR1_TLB_INT1_CONFIG_T_MASK 0x0000000000008000UL
+#define UVH_GR1_TLB_INT1_CONFIG_M_SHFT 16
+#define UVH_GR1_TLB_INT1_CONFIG_M_MASK 0x0000000000010000UL
+#define UVH_GR1_TLB_INT1_CONFIG_APIC_ID_SHFT 32
+#define UVH_GR1_TLB_INT1_CONFIG_APIC_ID_MASK 0xffffffff00000000UL
+
+union uvh_gr1_tlb_int1_config_u {
+    unsigned long	v;
+    struct uvh_gr1_tlb_int1_config_s {
+	unsigned long	vector_  :  8;  /* RW */
+	unsigned long	dm       :  3;  /* RW */
+	unsigned long	destmode :  1;  /* RW */
+	unsigned long	status   :  1;  /* RO */
+	unsigned long	p        :  1;  /* RO */
+	unsigned long	rsvd_14  :  1;  /*    */
+	unsigned long	t        :  1;  /* RO */
+	unsigned long	m        :  1;  /* RW */
+	unsigned long	rsvd_17_31: 15;  /*    */
+	unsigned long	apic_id  : 32;  /* RW */
+    } s;
+};
+
+/* ========================================================================= */
+/*                               UVH_INT_CMPB                                */
+/* ========================================================================= */
+#define UVH_INT_CMPB 0x22080UL
+
+#define UVH_INT_CMPB_REAL_TIME_CMPB_SHFT 0
+#define UVH_INT_CMPB_REAL_TIME_CMPB_MASK 0x00ffffffffffffffUL
+
+union uvh_int_cmpb_u {
+    unsigned long	v;
+    struct uvh_int_cmpb_s {
+	unsigned long	real_time_cmpb : 56;  /* RW */
+	unsigned long	rsvd_56_63     :  8;  /*    */
+    } s;
+};
+
+/* ========================================================================= */
+/*                               UVH_INT_CMPC                                */
+/* ========================================================================= */
+#define UVH_INT_CMPC 0x22100UL
+
+#define UVH_INT_CMPC_REAL_TIME_CMPC_SHFT 0
+#define UVH_INT_CMPC_REAL_TIME_CMPC_MASK 0x00ffffffffffffffUL
+
+union uvh_int_cmpc_u {
+    unsigned long	v;
+    struct uvh_int_cmpc_s {
+	unsigned long	real_time_cmpc : 56;  /* RW */
+	unsigned long	rsvd_56_63     :  8;  /*    */
+    } s;
+};
+
+/* ========================================================================= */
+/*                               UVH_INT_CMPD                                */
+/* ========================================================================= */
+#define UVH_INT_CMPD 0x22180UL
+
+#define UVH_INT_CMPD_REAL_TIME_CMPD_SHFT 0
+#define UVH_INT_CMPD_REAL_TIME_CMPD_MASK 0x00ffffffffffffffUL
+
+union uvh_int_cmpd_u {
+    unsigned long	v;
+    struct uvh_int_cmpd_s {
+	unsigned long	real_time_cmpd : 56;  /* RW */
+	unsigned long	rsvd_56_63     :  8;  /*    */
+    } s;
+};
+
+/* ========================================================================= */
+/*                               UVH_NODE_ID                                 */
+/* ========================================================================= */
+#define UVH_NODE_ID 0x0UL
+
+#define UVH_NODE_ID_FORCE1_SHFT 0
+#define UVH_NODE_ID_FORCE1_MASK 0x0000000000000001UL
+#define UVH_NODE_ID_MANUFACTURER_SHFT 1
+#define UVH_NODE_ID_MANUFACTURER_MASK 0x0000000000000ffeUL
+#define UVH_NODE_ID_PART_NUMBER_SHFT 12
+#define UVH_NODE_ID_PART_NUMBER_MASK 0x000000000ffff000UL
+#define UVH_NODE_ID_REVISION_SHFT 28
+#define UVH_NODE_ID_REVISION_MASK 0x00000000f0000000UL
+#define UVH_NODE_ID_NODE_ID_SHFT 32
+#define UVH_NODE_ID_NODE_ID_MASK 0x00007fff00000000UL
+#define UVH_NODE_ID_NODES_PER_BIT_SHFT 48
+#define UVH_NODE_ID_NODES_PER_BIT_MASK 0x007f000000000000UL
+#define UVH_NODE_ID_NI_PORT_SHFT 56
+#define UVH_NODE_ID_NI_PORT_MASK 0x0f00000000000000UL
+
+union uvh_node_id_u {
+    unsigned long	v;
+    struct uvh_node_id_s {
+	unsigned long	force1        :  1;  /* RO */
+	unsigned long	manufacturer  : 11;  /* RO */
+	unsigned long	part_number   : 16;  /* RO */
+	unsigned long	revision      :  4;  /* RO */
+	unsigned long	node_id       : 15;  /* RW */
+	unsigned long	rsvd_47       :  1;  /*    */
+	unsigned long	nodes_per_bit :  7;  /* RW */
+	unsigned long	rsvd_55       :  1;  /*    */
+	unsigned long	ni_port       :  4;  /* RO */
+	unsigned long	rsvd_60_63    :  4;  /*    */
+    } s;
+};
+
+/* ========================================================================= */
+/*                UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR                  */
+/* ========================================================================= */
+#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x16000d0UL
+
+#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24
+#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL
+
+union uvh_rh_gam_alias210_redirect_config_0_mmr_u {
+    unsigned long	v;
+    struct uvh_rh_gam_alias210_redirect_config_0_mmr_s {
+	unsigned long	rsvd_0_23 : 24;  /*    */
+	unsigned long	dest_base : 22;  /* RW */
+	unsigned long	rsvd_46_63: 18;  /*    */
+    } s;
+};
+
+/* ========================================================================= */
+/*                UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR                  */
+/* ========================================================================= */
+#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x16000e0UL
+
+#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24
+#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL
+
+union uvh_rh_gam_alias210_redirect_config_1_mmr_u {
+    unsigned long	v;
+    struct uvh_rh_gam_alias210_redirect_config_1_mmr_s {
+	unsigned long	rsvd_0_23 : 24;  /*    */
+	unsigned long	dest_base : 22;  /* RW */
+	unsigned long	rsvd_46_63: 18;  /*    */
+    } s;
+};
+
+/* ========================================================================= */
+/*                UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR                  */
+/* ========================================================================= */
+#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x16000f0UL
+
+#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24
+#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL
+
+union uvh_rh_gam_alias210_redirect_config_2_mmr_u {
+    unsigned long	v;
+    struct uvh_rh_gam_alias210_redirect_config_2_mmr_s {
+	unsigned long	rsvd_0_23 : 24;  /*    */
+	unsigned long	dest_base : 22;  /* RW */
+	unsigned long	rsvd_46_63: 18;  /*    */
+    } s;
+};
+
+/* ========================================================================= */
+/*                    UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR                      */
+/* ========================================================================= */
+#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL
+
+#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28
+#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffff0000000UL
+#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_GR4_SHFT 48
+#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_GR4_MASK 0x0001000000000000UL
+#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52
+#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL
+#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
+#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
+
+union uvh_rh_gam_gru_overlay_config_mmr_u {
+    unsigned long	v;
+    struct uvh_rh_gam_gru_overlay_config_mmr_s {
+	unsigned long	rsvd_0_27: 28;  /*    */
+	unsigned long	base   : 18;  /* RW */
+	unsigned long	rsvd_46_47:  2;  /*    */
+	unsigned long	gr4    :  1;  /* RW */
+	unsigned long	rsvd_49_51:  3;  /*    */
+	unsigned long	n_gru  :  4;  /* RW */
+	unsigned long	rsvd_56_62:  7;  /*    */
+	unsigned long	enable :  1;  /* RW */
+    } s;
+};
+
+/* ========================================================================= */
+/*                    UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR                      */
+/* ========================================================================= */
+#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x1600028UL
+
+#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26
+#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL
+#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_DUAL_HUB_SHFT 46
+#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_DUAL_HUB_MASK 0x0000400000000000UL
+#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
+#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
+
+union uvh_rh_gam_mmr_overlay_config_mmr_u {
+    unsigned long	v;
+    struct uvh_rh_gam_mmr_overlay_config_mmr_s {
+	unsigned long	rsvd_0_25: 26;  /*    */
+	unsigned long	base     : 20;  /* RW */
+	unsigned long	dual_hub :  1;  /* RW */
+	unsigned long	rsvd_47_62: 16;  /*    */
+	unsigned long	enable   :  1;  /* RW */
+    } s;
+};
+
+/* ========================================================================= */
+/*                                 UVH_RTC                                   */
+/* ========================================================================= */
+#define UVH_RTC 0x340000UL
+
+#define UVH_RTC_REAL_TIME_CLOCK_SHFT 0
+#define UVH_RTC_REAL_TIME_CLOCK_MASK 0x00ffffffffffffffUL
+
+union uvh_rtc_u {
+    unsigned long	v;
+    struct uvh_rtc_s {
+	unsigned long	real_time_clock : 56;  /* RW */
+	unsigned long	rsvd_56_63      :  8;  /*    */
+    } s;
+};
+
+/* ========================================================================= */
+/*                           UVH_RTC1_INT_CONFIG                             */
+/* ========================================================================= */
+#define UVH_RTC1_INT_CONFIG 0x615c0UL
+
+#define UVH_RTC1_INT_CONFIG_VECTOR_SHFT 0
+#define UVH_RTC1_INT_CONFIG_VECTOR_MASK 0x00000000000000ffUL
+#define UVH_RTC1_INT_CONFIG_DM_SHFT 8
+#define UVH_RTC1_INT_CONFIG_DM_MASK 0x0000000000000700UL
+#define UVH_RTC1_INT_CONFIG_DESTMODE_SHFT 11
+#define UVH_RTC1_INT_CONFIG_DESTMODE_MASK 0x0000000000000800UL
+#define UVH_RTC1_INT_CONFIG_STATUS_SHFT 12
+#define UVH_RTC1_INT_CONFIG_STATUS_MASK 0x0000000000001000UL
+#define UVH_RTC1_INT_CONFIG_P_SHFT 13
+#define UVH_RTC1_INT_CONFIG_P_MASK 0x0000000000002000UL
+#define UVH_RTC1_INT_CONFIG_T_SHFT 15
+#define UVH_RTC1_INT_CONFIG_T_MASK 0x0000000000008000UL
+#define UVH_RTC1_INT_CONFIG_M_SHFT 16
+#define UVH_RTC1_INT_CONFIG_M_MASK 0x0000000000010000UL
+#define UVH_RTC1_INT_CONFIG_APIC_ID_SHFT 32
+#define UVH_RTC1_INT_CONFIG_APIC_ID_MASK 0xffffffff00000000UL
+
+union uvh_rtc1_int_config_u {
+    unsigned long	v;
+    struct uvh_rtc1_int_config_s {
+	unsigned long	vector_  :  8;  /* RW */
+	unsigned long	dm       :  3;  /* RW */
+	unsigned long	destmode :  1;  /* RW */
+	unsigned long	status   :  1;  /* RO */
+	unsigned long	p        :  1;  /* RO */
+	unsigned long	rsvd_14  :  1;  /*    */
+	unsigned long	t        :  1;  /* RO */
+	unsigned long	m        :  1;  /* RW */
+	unsigned long	rsvd_17_31: 15;  /*    */
+	unsigned long	apic_id  : 32;  /* RW */
+    } s;
+};
+
+/* ========================================================================= */
+/*                           UVH_RTC2_INT_CONFIG                             */
+/* ========================================================================= */
+#define UVH_RTC2_INT_CONFIG 0x61600UL
+
+#define UVH_RTC2_INT_CONFIG_VECTOR_SHFT 0
+#define UVH_RTC2_INT_CONFIG_VECTOR_MASK 0x00000000000000ffUL
+#define UVH_RTC2_INT_CONFIG_DM_SHFT 8
+#define UVH_RTC2_INT_CONFIG_DM_MASK 0x0000000000000700UL
+#define UVH_RTC2_INT_CONFIG_DESTMODE_SHFT 11
+#define UVH_RTC2_INT_CONFIG_DESTMODE_MASK 0x0000000000000800UL
+#define UVH_RTC2_INT_CONFIG_STATUS_SHFT 12
+#define UVH_RTC2_INT_CONFIG_STATUS_MASK 0x0000000000001000UL
+#define UVH_RTC2_INT_CONFIG_P_SHFT 13
+#define UVH_RTC2_INT_CONFIG_P_MASK 0x0000000000002000UL
+#define UVH_RTC2_INT_CONFIG_T_SHFT 15
+#define UVH_RTC2_INT_CONFIG_T_MASK 0x0000000000008000UL
+#define UVH_RTC2_INT_CONFIG_M_SHFT 16
+#define UVH_RTC2_INT_CONFIG_M_MASK 0x0000000000010000UL
+#define UVH_RTC2_INT_CONFIG_APIC_ID_SHFT 32
+#define UVH_RTC2_INT_CONFIG_APIC_ID_MASK 0xffffffff00000000UL
+
+union uvh_rtc2_int_config_u {
+    unsigned long	v;
+    struct uvh_rtc2_int_config_s {
+	unsigned long	vector_  :  8;  /* RW */
+	unsigned long	dm       :  3;  /* RW */
+	unsigned long	destmode :  1;  /* RW */
+	unsigned long	status   :  1;  /* RO */
+	unsigned long	p        :  1;  /* RO */
+	unsigned long	rsvd_14  :  1;  /*    */
+	unsigned long	t        :  1;  /* RO */
+	unsigned long	m        :  1;  /* RW */
+	unsigned long	rsvd_17_31: 15;  /*    */
+	unsigned long	apic_id  : 32;  /* RW */
+    } s;
+};
+
+/* ========================================================================= */
+/*                           UVH_RTC3_INT_CONFIG                             */
+/* ========================================================================= */
+#define UVH_RTC3_INT_CONFIG 0x61640UL
+
+#define UVH_RTC3_INT_CONFIG_VECTOR_SHFT 0
+#define UVH_RTC3_INT_CONFIG_VECTOR_MASK 0x00000000000000ffUL
+#define UVH_RTC3_INT_CONFIG_DM_SHFT 8
+#define UVH_RTC3_INT_CONFIG_DM_MASK 0x0000000000000700UL
+#define UVH_RTC3_INT_CONFIG_DESTMODE_SHFT 11
+#define UVH_RTC3_INT_CONFIG_DESTMODE_MASK 0x0000000000000800UL
+#define UVH_RTC3_INT_CONFIG_STATUS_SHFT 12
+#define UVH_RTC3_INT_CONFIG_STATUS_MASK 0x0000000000001000UL
+#define UVH_RTC3_INT_CONFIG_P_SHFT 13
+#define UVH_RTC3_INT_CONFIG_P_MASK 0x0000000000002000UL
+#define UVH_RTC3_INT_CONFIG_T_SHFT 15
+#define UVH_RTC3_INT_CONFIG_T_MASK 0x0000000000008000UL
+#define UVH_RTC3_INT_CONFIG_M_SHFT 16
+#define UVH_RTC3_INT_CONFIG_M_MASK 0x0000000000010000UL
+#define UVH_RTC3_INT_CONFIG_APIC_ID_SHFT 32
+#define UVH_RTC3_INT_CONFIG_APIC_ID_MASK 0xffffffff00000000UL
+
+union uvh_rtc3_int_config_u {
+    unsigned long	v;
+    struct uvh_rtc3_int_config_s {
+	unsigned long	vector_  :  8;  /* RW */
+	unsigned long	dm       :  3;  /* RW */
+	unsigned long	destmode :  1;  /* RW */
+	unsigned long	status   :  1;  /* RO */
+	unsigned long	p        :  1;  /* RO */
+	unsigned long	rsvd_14  :  1;  /*    */
+	unsigned long	t        :  1;  /* RO */
+	unsigned long	m        :  1;  /* RW */
+	unsigned long	rsvd_17_31: 15;  /*    */
+	unsigned long	apic_id  : 32;  /* RW */
+    } s;
+};
+
+/* ========================================================================= */
+/*                            UVH_RTC_INC_RATIO                              */
+/* ========================================================================= */
+#define UVH_RTC_INC_RATIO 0x350000UL
+
+#define UVH_RTC_INC_RATIO_FRACTION_SHFT 0
+#define UVH_RTC_INC_RATIO_FRACTION_MASK 0x00000000000fffffUL
+#define UVH_RTC_INC_RATIO_RATIO_SHFT 20
+#define UVH_RTC_INC_RATIO_RATIO_MASK 0x0000000000700000UL
+
+union uvh_rtc_inc_ratio_u {
+    unsigned long	v;
+    struct uvh_rtc_inc_ratio_s {
+	unsigned long	fraction : 20;  /* RW */
+	unsigned long	ratio    :  3;  /* RW */
+	unsigned long	rsvd_23_63: 41;  /*    */
+    } s;
+};
+
+/* ========================================================================= */
+/*                          UVH_SI_ADDR_MAP_CONFIG                           */
+/* ========================================================================= */
+#define UVH_SI_ADDR_MAP_CONFIG 0xc80000UL
+
+#define UVH_SI_ADDR_MAP_CONFIG_M_SKT_SHFT 0
+#define UVH_SI_ADDR_MAP_CONFIG_M_SKT_MASK 0x000000000000003fUL
+#define UVH_SI_ADDR_MAP_CONFIG_N_SKT_SHFT 8
+#define UVH_SI_ADDR_MAP_CONFIG_N_SKT_MASK 0x0000000000000f00UL
+
+union uvh_si_addr_map_config_u {
+    unsigned long	v;
+    struct uvh_si_addr_map_config_s {
+	unsigned long	m_skt :  6;  /* RW */
+	unsigned long	rsvd_6_7:  2;  /*    */
+	unsigned long	n_skt :  4;  /* RW */
+	unsigned long	rsvd_12_63: 52;  /*    */
+    } s;
+};
+
+/* ========================================================================= */
+/*                       UVH_SI_ALIAS0_OVERLAY_CONFIG                        */
+/* ========================================================================= */
+#define UVH_SI_ALIAS0_OVERLAY_CONFIG 0xc80008UL
+
+#define UVH_SI_ALIAS0_OVERLAY_CONFIG_BASE_SHFT 24
+#define UVH_SI_ALIAS0_OVERLAY_CONFIG_BASE_MASK 0x00000000ff000000UL
+#define UVH_SI_ALIAS0_OVERLAY_CONFIG_M_ALIAS_SHFT 48
+#define UVH_SI_ALIAS0_OVERLAY_CONFIG_M_ALIAS_MASK 0x001f000000000000UL
+#define UVH_SI_ALIAS0_OVERLAY_CONFIG_ENABLE_SHFT 63
+#define UVH_SI_ALIAS0_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL
+
+union uvh_si_alias0_overlay_config_u {
+    unsigned long	v;
+    struct uvh_si_alias0_overlay_config_s {
+	unsigned long	rsvd_0_23: 24;  /*    */
+	unsigned long	base    :  8;  /* RW */
+	unsigned long	rsvd_32_47: 16;  /*    */
+	unsigned long	m_alias :  5;  /* RW */
+	unsigned long	rsvd_53_62: 10;  /*    */
+	unsigned long	enable  :  1;  /* RW */
+    } s;
+};
+
+/* ========================================================================= */
+/*                       UVH_SI_ALIAS1_OVERLAY_CONFIG                        */
+/* ========================================================================= */
+#define UVH_SI_ALIAS1_OVERLAY_CONFIG 0xc80010UL
+
+#define UVH_SI_ALIAS1_OVERLAY_CONFIG_BASE_SHFT 24
+#define UVH_SI_ALIAS1_OVERLAY_CONFIG_BASE_MASK 0x00000000ff000000UL
+#define UVH_SI_ALIAS1_OVERLAY_CONFIG_M_ALIAS_SHFT 48
+#define UVH_SI_ALIAS1_OVERLAY_CONFIG_M_ALIAS_MASK 0x001f000000000000UL
+#define UVH_SI_ALIAS1_OVERLAY_CONFIG_ENABLE_SHFT 63
+#define UVH_SI_ALIAS1_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL
+
+union uvh_si_alias1_overlay_config_u {
+    unsigned long	v;
+    struct uvh_si_alias1_overlay_config_s {
+	unsigned long	rsvd_0_23: 24;  /*    */
+	unsigned long	base    :  8;  /* RW */
+	unsigned long	rsvd_32_47: 16;  /*    */
+	unsigned long	m_alias :  5;  /* RW */
+	unsigned long	rsvd_53_62: 10;  /*    */
+	unsigned long	enable  :  1;  /* RW */
+    } s;
+};
+
+/* ========================================================================= */
+/*                       UVH_SI_ALIAS2_OVERLAY_CONFIG                        */
+/* ========================================================================= */
+#define UVH_SI_ALIAS2_OVERLAY_CONFIG 0xc80018UL
+
+#define UVH_SI_ALIAS2_OVERLAY_CONFIG_BASE_SHFT 24
+#define UVH_SI_ALIAS2_OVERLAY_CONFIG_BASE_MASK 0x00000000ff000000UL
+#define UVH_SI_ALIAS2_OVERLAY_CONFIG_M_ALIAS_SHFT 48
+#define UVH_SI_ALIAS2_OVERLAY_CONFIG_M_ALIAS_MASK 0x001f000000000000UL
+#define UVH_SI_ALIAS2_OVERLAY_CONFIG_ENABLE_SHFT 63
+#define UVH_SI_ALIAS2_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL
+
+union uvh_si_alias2_overlay_config_u {
+    unsigned long	v;
+    struct uvh_si_alias2_overlay_config_s {
+	unsigned long	rsvd_0_23: 24;  /*    */
+	unsigned long	base    :  8;  /* RW */
+	unsigned long	rsvd_32_47: 16;  /*    */
+	unsigned long	m_alias :  5;  /* RW */
+	unsigned long	rsvd_53_62: 10;  /*    */
+	unsigned long	enable  :  1;  /* RW */
+    } s;
+};
+
+
+#endif /* _ASM_IA64_UV_UV_MMRS_H */
diff --git a/arch/ia64/include/asm/vga.h b/arch/ia64/include/asm/vga.h
new file mode 100644
index 00000000000..02184ecd820
--- /dev/null
+++ b/arch/ia64/include/asm/vga.h
@@ -0,0 +1,25 @@
+/*
+ *	Access to VGA videoram
+ *
+ *	(c) 1998 Martin Mares <mj@ucw.cz>
+ *	(c) 1999 Asit Mallick <asit.k.mallick@intel.com>
+ *	(c) 1999 Don Dugger <don.dugger@intel.com>
+ */
+
+#ifndef __ASM_IA64_VGA_H_
+#define __ASM_IA64_VGA_H_
+
+/*
+ * On the PC, we can just recalculate addresses and then access the
+ * videoram directly without any black magic.
+ */
+
+extern unsigned long vga_console_iobase;
+extern unsigned long vga_console_membase;
+
+#define VGA_MAP_MEM(x,s)	((unsigned long) ioremap_nocache(vga_console_membase + (x), s))
+
+#define vga_readb(x)	(*(x))
+#define vga_writeb(x,y)	(*(y) = (x))
+
+#endif /* __ASM_IA64_VGA_H_ */
diff --git a/arch/ia64/include/asm/xor.h b/arch/ia64/include/asm/xor.h
new file mode 100644
index 00000000000..a349e23dea1
--- /dev/null
+++ b/arch/ia64/include/asm/xor.h
@@ -0,0 +1,31 @@
+/*
+ * Optimized RAID-5 checksumming functions for IA-64.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * You should have received a copy of the GNU General Public License
+ * (for example /usr/src/linux/COPYING); if not, write to the Free
+ * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+
+extern void xor_ia64_2(unsigned long, unsigned long *, unsigned long *);
+extern void xor_ia64_3(unsigned long, unsigned long *, unsigned long *,
+		       unsigned long *);
+extern void xor_ia64_4(unsigned long, unsigned long *, unsigned long *,
+		       unsigned long *, unsigned long *);
+extern void xor_ia64_5(unsigned long, unsigned long *, unsigned long *,
+		       unsigned long *, unsigned long *, unsigned long *);
+
+static struct xor_block_template xor_block_ia64 = {
+	.name =	"ia64",
+	.do_2 =	xor_ia64_2,
+	.do_3 =	xor_ia64_3,
+	.do_4 =	xor_ia64_4,
+	.do_5 =	xor_ia64_5,
+};
+
+#define XOR_TRY_TEMPLATES	xor_speed(&xor_block_ia64)
diff --git a/arch/ia64/include/uapi/asm/Kbuild b/arch/ia64/include/uapi/asm/Kbuild
new file mode 100644
index 00000000000..1b3f5eb5fcd
--- /dev/null
+++ b/arch/ia64/include/uapi/asm/Kbuild
@@ -0,0 +1,50 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
+generic-y += kvm_para.h
+
+header-y += auxvec.h
+header-y += bitsperlong.h
+header-y += break.h
+header-y += byteorder.h
+header-y += cmpxchg.h
+header-y += errno.h
+header-y += fcntl.h
+header-y += fpu.h
+header-y += gcc_intrin.h
+header-y += ia64regs.h
+header-y += intel_intrin.h
+header-y += intrinsics.h
+header-y += ioctl.h
+header-y += ioctls.h
+header-y += ipcbuf.h
+header-y += kvm.h
+header-y += kvm_para.h
+header-y += mman.h
+header-y += msgbuf.h
+header-y += param.h
+header-y += perfmon.h
+header-y += perfmon_default_smpl.h
+header-y += poll.h
+header-y += posix_types.h
+header-y += ptrace.h
+header-y += ptrace_offsets.h
+header-y += resource.h
+header-y += rse.h
+header-y += sembuf.h
+header-y += setup.h
+header-y += shmbuf.h
+header-y += sigcontext.h
+header-y += siginfo.h
+header-y += signal.h
+header-y += socket.h
+header-y += sockios.h
+header-y += stat.h
+header-y += statfs.h
+header-y += swab.h
+header-y += termbits.h
+header-y += termios.h
+header-y += types.h
+header-y += ucontext.h
+header-y += unistd.h
+header-y += ustack.h
diff --git a/arch/ia64/include/uapi/asm/auxvec.h b/arch/ia64/include/uapi/asm/auxvec.h
new file mode 100644
index 00000000000..58277fc650e
--- /dev/null
+++ b/arch/ia64/include/uapi/asm/auxvec.h
@@ -0,0 +1,13 @@
+#ifndef _ASM_IA64_AUXVEC_H
+#define _ASM_IA64_AUXVEC_H
+
+/*
+ * Architecture-neutral AT_ values are in the range 0-17.  Leave some room for more of
+ * them, start the architecture-specific ones at 32.
+ */
+#define AT_SYSINFO	32
+#define AT_SYSINFO_EHDR	33
+
+#define AT_VECTOR_SIZE_ARCH 2 /* entries in ARCH_DLINFO */
+
+#endif /* _ASM_IA64_AUXVEC_H */
diff --git a/arch/ia64/include/uapi/asm/bitsperlong.h b/arch/ia64/include/uapi/asm/bitsperlong.h
new file mode 100644
index 00000000000..ec4db3c970b
--- /dev/null
+++ b/arch/ia64/include/uapi/asm/bitsperlong.h
@@ -0,0 +1,8 @@
+#ifndef __ASM_IA64_BITSPERLONG_H
+#define __ASM_IA64_BITSPERLONG_H
+
+#define __BITS_PER_LONG 64
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* __ASM_IA64_BITSPERLONG_H */
diff --git a/arch/ia64/include/uapi/asm/break.h b/arch/ia64/include/uapi/asm/break.h
new file mode 100644
index 00000000000..f0340203989
--- /dev/null
+++ b/arch/ia64/include/uapi/asm/break.h
@@ -0,0 +1,23 @@
+#ifndef _ASM_IA64_BREAK_H
+#define _ASM_IA64_BREAK_H
+
+/*
+ * IA-64 Linux break numbers.
+ *
+ * Copyright (C) 1999 Hewlett-Packard Co
+ * Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+/*
+ * OS-specific debug break numbers:
+ */
+#define __IA64_BREAK_KDB		0x80100
+#define __IA64_BREAK_KPROBE		0x81000 /* .. 0x81fff */
+#define __IA64_BREAK_JPROBE		0x82000
+
+/*
+ * OS-specific break numbers:
+ */
+#define __IA64_BREAK_SYSCALL		0x100000
+
+#endif /* _ASM_IA64_BREAK_H */
diff --git a/arch/ia64/include/uapi/asm/byteorder.h b/arch/ia64/include/uapi/asm/byteorder.h
new file mode 100644
index 00000000000..a8dd7355815
--- /dev/null
+++ b/arch/ia64/include/uapi/asm/byteorder.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_IA64_BYTEORDER_H
+#define _ASM_IA64_BYTEORDER_H
+
+#include <linux/byteorder/little_endian.h>
+
+#endif /* _ASM_IA64_BYTEORDER_H */
diff --git a/arch/ia64/include/uapi/asm/cmpxchg.h b/arch/ia64/include/uapi/asm/cmpxchg.h
new file mode 100644
index 00000000000..f35109b1d90
--- /dev/null
+++ b/arch/ia64/include/uapi/asm/cmpxchg.h
@@ -0,0 +1,156 @@
+#ifndef _ASM_IA64_CMPXCHG_H
+#define _ASM_IA64_CMPXCHG_H
+
+/*
+ * Compare/Exchange, forked from asm/intrinsics.h
+ * which was:
+ *
+ *	Copyright (C) 2002-2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+/* include compiler specific intrinsics */
+#include <asm/ia64regs.h>
+#ifdef __INTEL_COMPILER
+# include <asm/intel_intrin.h>
+#else
+# include <asm/gcc_intrin.h>
+#endif
+
+/*
+ * This function doesn't exist, so you'll get a linker error if
+ * something tries to do an invalid xchg().
+ */
+extern void ia64_xchg_called_with_bad_pointer(void);
+
+#define __xchg(x, ptr, size)						\
+({									\
+	unsigned long __xchg_result;					\
+									\
+	switch (size) {							\
+	case 1:								\
+		__xchg_result = ia64_xchg1((__u8 *)ptr, x);		\
+		break;							\
+									\
+	case 2:								\
+		__xchg_result = ia64_xchg2((__u16 *)ptr, x);		\
+		break;							\
+									\
+	case 4:								\
+		__xchg_result = ia64_xchg4((__u32 *)ptr, x);		\
+		break;							\
+									\
+	case 8:								\
+		__xchg_result = ia64_xchg8((__u64 *)ptr, x);		\
+		break;							\
+	default:							\
+		ia64_xchg_called_with_bad_pointer();			\
+	}								\
+	__xchg_result;							\
+})
+
+#define xchg(ptr, x)							\
+((__typeof__(*(ptr))) __xchg((unsigned long) (x), (ptr), sizeof(*(ptr))))
+
+/*
+ * Atomic compare and exchange.  Compare OLD with MEM, if identical,
+ * store NEW in MEM.  Return the initial value in MEM.  Success is
+ * indicated by comparing RETURN with OLD.
+ */
+
+#define __HAVE_ARCH_CMPXCHG 1
+
+/*
+ * This function doesn't exist, so you'll get a linker error
+ * if something tries to do an invalid cmpxchg().
+ */
+extern long ia64_cmpxchg_called_with_bad_pointer(void);
+
+#define ia64_cmpxchg(sem, ptr, old, new, size)				\
+({									\
+	__u64 _o_, _r_;							\
+									\
+	switch (size) {							\
+	case 1:								\
+		_o_ = (__u8) (long) (old);				\
+		break;							\
+	case 2:								\
+		_o_ = (__u16) (long) (old);				\
+		break;							\
+	case 4:								\
+		_o_ = (__u32) (long) (old);				\
+		break;							\
+	case 8:								\
+		_o_ = (__u64) (long) (old);				\
+		break;							\
+	default:							\
+		break;							\
+	}								\
+	switch (size) {							\
+	case 1:								\
+		_r_ = ia64_cmpxchg1_##sem((__u8 *) ptr, new, _o_);	\
+		break;							\
+									\
+	case 2:								\
+		_r_ = ia64_cmpxchg2_##sem((__u16 *) ptr, new, _o_);	\
+		break;							\
+									\
+	case 4:								\
+		_r_ = ia64_cmpxchg4_##sem((__u32 *) ptr, new, _o_);	\
+		break;							\
+									\
+	case 8:								\
+		_r_ = ia64_cmpxchg8_##sem((__u64 *) ptr, new, _o_);	\
+		break;							\
+									\
+	default:							\
+		_r_ = ia64_cmpxchg_called_with_bad_pointer();		\
+		break;							\
+	}								\
+	(__typeof__(old)) _r_;						\
+})
+
+#define cmpxchg_acq(ptr, o, n)	\
+	ia64_cmpxchg(acq, (ptr), (o), (n), sizeof(*(ptr)))
+#define cmpxchg_rel(ptr, o, n)	\
+	ia64_cmpxchg(rel, (ptr), (o), (n), sizeof(*(ptr)))
+
+/*
+ * Worse still - early processor implementations actually just ignored
+ * the acquire/release and did a full fence all the time.  Unfortunately
+ * this meant a lot of badly written code that used .acq when they really
+ * wanted .rel became legacy out in the wild - so when we made a cpu
+ * that strictly did the .acq or .rel ... all that code started breaking - so
+ * we had to back-pedal and keep the "legacy" behavior of a full fence :-(
+ */
+
+/* for compatibility with other platforms: */
+#define cmpxchg(ptr, o, n)	cmpxchg_acq((ptr), (o), (n))
+#define cmpxchg64(ptr, o, n)	cmpxchg_acq((ptr), (o), (n))
+
+#define cmpxchg_local		cmpxchg
+#define cmpxchg64_local		cmpxchg64
+
+#ifdef CONFIG_IA64_DEBUG_CMPXCHG
+# define CMPXCHG_BUGCHECK_DECL	int _cmpxchg_bugcheck_count = 128;
+# define CMPXCHG_BUGCHECK(v)						\
+do {									\
+	if (_cmpxchg_bugcheck_count-- <= 0) {				\
+		void *ip;						\
+		extern int printk(const char *fmt, ...);		\
+		ip = (void *) ia64_getreg(_IA64_REG_IP);		\
+		printk("CMPXCHG_BUGCHECK: stuck at %p on word %p\n", ip, (v));\
+		break;							\
+	}								\
+} while (0)
+#else /* !CONFIG_IA64_DEBUG_CMPXCHG */
+# define CMPXCHG_BUGCHECK_DECL
+# define CMPXCHG_BUGCHECK(v)
+#endif /* !CONFIG_IA64_DEBUG_CMPXCHG */
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _ASM_IA64_CMPXCHG_H */
diff --git a/arch/ia64/include/uapi/asm/errno.h b/arch/ia64/include/uapi/asm/errno.h
new file mode 100644
index 00000000000..4c82b503d92
--- /dev/null
+++ b/arch/ia64/include/uapi/asm/errno.h
@@ -0,0 +1 @@
+#include <asm-generic/errno.h>
diff --git a/arch/ia64/include/uapi/asm/fcntl.h b/arch/ia64/include/uapi/asm/fcntl.h
new file mode 100644
index 00000000000..7b485876cad
--- /dev/null
+++ b/arch/ia64/include/uapi/asm/fcntl.h
@@ -0,0 +1,14 @@
+#ifndef _ASM_IA64_FCNTL_H
+#define _ASM_IA64_FCNTL_H
+/*
+ * Modified 1998-2000
+ *	David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co.
+ */
+
+#define force_o_largefile()	\
+		(personality(current->personality) != PER_LINUX32)
+
+#include <linux/personality.h>
+#include <asm-generic/fcntl.h>
+
+#endif /* _ASM_IA64_FCNTL_H */
diff --git a/arch/ia64/include/uapi/asm/fpu.h b/arch/ia64/include/uapi/asm/fpu.h
new file mode 100644
index 00000000000..b6395ad1500
--- /dev/null
+++ b/arch/ia64/include/uapi/asm/fpu.h
@@ -0,0 +1,66 @@
+#ifndef _ASM_IA64_FPU_H
+#define _ASM_IA64_FPU_H
+
+/*
+ * Copyright (C) 1998, 1999, 2002, 2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+#include <linux/types.h>
+
+/* floating point status register: */
+#define FPSR_TRAP_VD	(1 << 0)	/* invalid op trap disabled */
+#define FPSR_TRAP_DD	(1 << 1)	/* denormal trap disabled */
+#define FPSR_TRAP_ZD	(1 << 2)	/* zero-divide trap disabled */
+#define FPSR_TRAP_OD	(1 << 3)	/* overflow trap disabled */
+#define FPSR_TRAP_UD	(1 << 4)	/* underflow trap disabled */
+#define FPSR_TRAP_ID	(1 << 5)	/* inexact trap disabled */
+#define FPSR_S0(x)	((x) <<  6)
+#define FPSR_S1(x)	((x) << 19)
+#define FPSR_S2(x)	(__IA64_UL(x) << 32)
+#define FPSR_S3(x)	(__IA64_UL(x) << 45)
+
+/* floating-point status field controls: */
+#define FPSF_FTZ	(1 << 0)		/* flush-to-zero */
+#define FPSF_WRE	(1 << 1)		/* widest-range exponent */
+#define FPSF_PC(x)	(((x) & 0x3) << 2)	/* precision control */
+#define FPSF_RC(x)	(((x) & 0x3) << 4)	/* rounding control */
+#define FPSF_TD		(1 << 6)		/* trap disabled */
+
+/* floating-point status field flags: */
+#define FPSF_V		(1 <<  7)		/* invalid operation flag */
+#define FPSF_D		(1 <<  8)		/* denormal/unnormal operand flag */
+#define FPSF_Z		(1 <<  9)		/* zero divide (IEEE) flag */
+#define FPSF_O		(1 << 10)		/* overflow (IEEE) flag */
+#define FPSF_U		(1 << 11)		/* underflow (IEEE) flag */
+#define FPSF_I		(1 << 12)		/* inexact (IEEE) flag) */
+
+/* floating-point rounding control: */
+#define FPRC_NEAREST	0x0
+#define FPRC_NEGINF	0x1
+#define FPRC_POSINF	0x2
+#define FPRC_TRUNC	0x3
+
+#define FPSF_DEFAULT	(FPSF_PC (0x3) | FPSF_RC (FPRC_NEAREST))
+
+/* This default value is the same as HP-UX uses.  Don't change it
+   without a very good reason.  */
+#define FPSR_DEFAULT	(FPSR_TRAP_VD | FPSR_TRAP_DD | FPSR_TRAP_ZD	\
+			 | FPSR_TRAP_OD | FPSR_TRAP_UD | FPSR_TRAP_ID	\
+			 | FPSR_S0 (FPSF_DEFAULT)			\
+			 | FPSR_S1 (FPSF_DEFAULT | FPSF_TD | FPSF_WRE)	\
+			 | FPSR_S2 (FPSF_DEFAULT | FPSF_TD)		\
+			 | FPSR_S3 (FPSF_DEFAULT | FPSF_TD))
+
+# ifndef __ASSEMBLY__
+
+struct ia64_fpreg {
+	union {
+		unsigned long bits[2];
+		long double __dummy;	/* force 16-byte alignment */
+	} u;
+};
+
+# endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_IA64_FPU_H */
diff --git a/arch/ia64/include/uapi/asm/gcc_intrin.h b/arch/ia64/include/uapi/asm/gcc_intrin.h
new file mode 100644
index 00000000000..61d0d011197
--- /dev/null
+++ b/arch/ia64/include/uapi/asm/gcc_intrin.h
@@ -0,0 +1,618 @@
+/*
+ *
+ * Copyright (C) 2002,2003 Jun Nakajima <jun.nakajima@intel.com>
+ * Copyright (C) 2002,2003 Suresh Siddha <suresh.b.siddha@intel.com>
+ */
+#ifndef _UAPI_ASM_IA64_GCC_INTRIN_H
+#define _UAPI_ASM_IA64_GCC_INTRIN_H
+
+#include <linux/types.h>
+#include <linux/compiler.h>
+
+/* define this macro to get some asm stmts included in 'c' files */
+#define ASM_SUPPORTED
+
+/* Optimization barrier */
+/* The "volatile" is due to gcc bugs */
+#define ia64_barrier()	asm volatile ("":::"memory")
+
+#define ia64_stop()	asm volatile (";;"::)
+
+#define ia64_invala_gr(regnum)	asm volatile ("invala.e r%0" :: "i"(regnum))
+
+#define ia64_invala_fr(regnum)	asm volatile ("invala.e f%0" :: "i"(regnum))
+
+#define ia64_flushrs() asm volatile ("flushrs;;":::"memory")
+
+#define ia64_loadrs() asm volatile ("loadrs;;":::"memory")
+
+extern void ia64_bad_param_for_setreg (void);
+extern void ia64_bad_param_for_getreg (void);
+
+
+#define ia64_native_setreg(regnum, val)						\
+({										\
+	switch (regnum) {							\
+	    case _IA64_REG_PSR_L:						\
+		    asm volatile ("mov psr.l=%0" :: "r"(val) : "memory");	\
+		    break;							\
+	    case _IA64_REG_AR_KR0 ... _IA64_REG_AR_EC:				\
+		    asm volatile ("mov ar%0=%1" ::				\
+		    			  "i" (regnum - _IA64_REG_AR_KR0),	\
+					  "r"(val): "memory");			\
+		    break;							\
+	    case _IA64_REG_CR_DCR ... _IA64_REG_CR_LRR1:			\
+		    asm volatile ("mov cr%0=%1" ::				\
+				          "i" (regnum - _IA64_REG_CR_DCR),	\
+					  "r"(val): "memory" );			\
+		    break;							\
+	    case _IA64_REG_SP:							\
+		    asm volatile ("mov r12=%0" ::				\
+			    		  "r"(val): "memory");			\
+		    break;							\
+	    case _IA64_REG_GP:							\
+		    asm volatile ("mov gp=%0" :: "r"(val) : "memory");		\
+		break;								\
+	    default:								\
+		    ia64_bad_param_for_setreg();				\
+		    break;							\
+	}									\
+})
+
+#define ia64_native_getreg(regnum)						\
+({										\
+	__u64 ia64_intri_res;							\
+										\
+	switch (regnum) {							\
+	case _IA64_REG_GP:							\
+		asm volatile ("mov %0=gp" : "=r"(ia64_intri_res));		\
+		break;								\
+	case _IA64_REG_IP:							\
+		asm volatile ("mov %0=ip" : "=r"(ia64_intri_res));		\
+		break;								\
+	case _IA64_REG_PSR:							\
+		asm volatile ("mov %0=psr" : "=r"(ia64_intri_res));		\
+		break;								\
+	case _IA64_REG_TP:	/* for current() */				\
+		ia64_intri_res = ia64_r13;					\
+		break;								\
+	case _IA64_REG_AR_KR0 ... _IA64_REG_AR_EC:				\
+		asm volatile ("mov %0=ar%1" : "=r" (ia64_intri_res)		\
+				      : "i"(regnum - _IA64_REG_AR_KR0));	\
+		break;								\
+	case _IA64_REG_CR_DCR ... _IA64_REG_CR_LRR1:				\
+		asm volatile ("mov %0=cr%1" : "=r" (ia64_intri_res)		\
+				      : "i" (regnum - _IA64_REG_CR_DCR));	\
+		break;								\
+	case _IA64_REG_SP:							\
+		asm volatile ("mov %0=sp" : "=r" (ia64_intri_res));		\
+		break;								\
+	default:								\
+		ia64_bad_param_for_getreg();					\
+		break;								\
+	}									\
+	ia64_intri_res;								\
+})
+
+#define ia64_hint_pause 0
+
+#define ia64_hint(mode)						\
+({								\
+	switch (mode) {						\
+	case ia64_hint_pause:					\
+		asm volatile ("hint @pause" ::: "memory");	\
+		break;						\
+	}							\
+})
+
+
+/* Integer values for mux1 instruction */
+#define ia64_mux1_brcst 0
+#define ia64_mux1_mix   8
+#define ia64_mux1_shuf  9
+#define ia64_mux1_alt  10
+#define ia64_mux1_rev  11
+
+#define ia64_mux1(x, mode)							\
+({										\
+	__u64 ia64_intri_res;							\
+										\
+	switch (mode) {								\
+	case ia64_mux1_brcst:							\
+		asm ("mux1 %0=%1,@brcst" : "=r" (ia64_intri_res) : "r" (x));	\
+		break;								\
+	case ia64_mux1_mix:							\
+		asm ("mux1 %0=%1,@mix" : "=r" (ia64_intri_res) : "r" (x));	\
+		break;								\
+	case ia64_mux1_shuf:							\
+		asm ("mux1 %0=%1,@shuf" : "=r" (ia64_intri_res) : "r" (x));	\
+		break;								\
+	case ia64_mux1_alt:							\
+		asm ("mux1 %0=%1,@alt" : "=r" (ia64_intri_res) : "r" (x));	\
+		break;								\
+	case ia64_mux1_rev:							\
+		asm ("mux1 %0=%1,@rev" : "=r" (ia64_intri_res) : "r" (x));	\
+		break;								\
+	}									\
+	ia64_intri_res;								\
+})
+
+#if __GNUC__ >= 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
+# define ia64_popcnt(x)		__builtin_popcountl(x)
+#else
+# define ia64_popcnt(x)						\
+  ({								\
+	__u64 ia64_intri_res;					\
+	asm ("popcnt %0=%1" : "=r" (ia64_intri_res) : "r" (x));	\
+								\
+	ia64_intri_res;						\
+  })
+#endif
+
+#define ia64_getf_exp(x)					\
+({								\
+	long ia64_intri_res;					\
+								\
+	asm ("getf.exp %0=%1" : "=r"(ia64_intri_res) : "f"(x));	\
+								\
+	ia64_intri_res;						\
+})
+
+#define ia64_shrp(a, b, count)								\
+({											\
+	__u64 ia64_intri_res;								\
+	asm ("shrp %0=%1,%2,%3" : "=r"(ia64_intri_res) : "r"(a), "r"(b), "i"(count));	\
+	ia64_intri_res;									\
+})
+
+#define ia64_ldfs(regnum, x)					\
+({								\
+	register double __f__ asm ("f"#regnum);			\
+	asm volatile ("ldfs %0=[%1]" :"=f"(__f__): "r"(x));	\
+})
+
+#define ia64_ldfd(regnum, x)					\
+({								\
+	register double __f__ asm ("f"#regnum);			\
+	asm volatile ("ldfd %0=[%1]" :"=f"(__f__): "r"(x));	\
+})
+
+#define ia64_ldfe(regnum, x)					\
+({								\
+	register double __f__ asm ("f"#regnum);			\
+	asm volatile ("ldfe %0=[%1]" :"=f"(__f__): "r"(x));	\
+})
+
+#define ia64_ldf8(regnum, x)					\
+({								\
+	register double __f__ asm ("f"#regnum);			\
+	asm volatile ("ldf8 %0=[%1]" :"=f"(__f__): "r"(x));	\
+})
+
+#define ia64_ldf_fill(regnum, x)				\
+({								\
+	register double __f__ asm ("f"#regnum);			\
+	asm volatile ("ldf.fill %0=[%1]" :"=f"(__f__): "r"(x));	\
+})
+
+#define ia64_st4_rel_nta(m, val)					\
+({									\
+	asm volatile ("st4.rel.nta [%0] = %1\n\t" :: "r"(m), "r"(val));	\
+})
+
+#define ia64_stfs(x, regnum)						\
+({									\
+	register double __f__ asm ("f"#regnum);				\
+	asm volatile ("stfs [%0]=%1" :: "r"(x), "f"(__f__) : "memory");	\
+})
+
+#define ia64_stfd(x, regnum)						\
+({									\
+	register double __f__ asm ("f"#regnum);				\
+	asm volatile ("stfd [%0]=%1" :: "r"(x), "f"(__f__) : "memory");	\
+})
+
+#define ia64_stfe(x, regnum)						\
+({									\
+	register double __f__ asm ("f"#regnum);				\
+	asm volatile ("stfe [%0]=%1" :: "r"(x), "f"(__f__) : "memory");	\
+})
+
+#define ia64_stf8(x, regnum)						\
+({									\
+	register double __f__ asm ("f"#regnum);				\
+	asm volatile ("stf8 [%0]=%1" :: "r"(x), "f"(__f__) : "memory");	\
+})
+
+#define ia64_stf_spill(x, regnum)						\
+({										\
+	register double __f__ asm ("f"#regnum);					\
+	asm volatile ("stf.spill [%0]=%1" :: "r"(x), "f"(__f__) : "memory");	\
+})
+
+#define ia64_fetchadd4_acq(p, inc)						\
+({										\
+										\
+	__u64 ia64_intri_res;							\
+	asm volatile ("fetchadd4.acq %0=[%1],%2"				\
+				: "=r"(ia64_intri_res) : "r"(p), "i" (inc)	\
+				: "memory");					\
+										\
+	ia64_intri_res;								\
+})
+
+#define ia64_fetchadd4_rel(p, inc)						\
+({										\
+	__u64 ia64_intri_res;							\
+	asm volatile ("fetchadd4.rel %0=[%1],%2"				\
+				: "=r"(ia64_intri_res) : "r"(p), "i" (inc)	\
+				: "memory");					\
+										\
+	ia64_intri_res;								\
+})
+
+#define ia64_fetchadd8_acq(p, inc)						\
+({										\
+										\
+	__u64 ia64_intri_res;							\
+	asm volatile ("fetchadd8.acq %0=[%1],%2"				\
+				: "=r"(ia64_intri_res) : "r"(p), "i" (inc)	\
+				: "memory");					\
+										\
+	ia64_intri_res;								\
+})
+
+#define ia64_fetchadd8_rel(p, inc)						\
+({										\
+	__u64 ia64_intri_res;							\
+	asm volatile ("fetchadd8.rel %0=[%1],%2"				\
+				: "=r"(ia64_intri_res) : "r"(p), "i" (inc)	\
+				: "memory");					\
+										\
+	ia64_intri_res;								\
+})
+
+#define ia64_xchg1(ptr,x)							\
+({										\
+	__u64 ia64_intri_res;							\
+	asm volatile ("xchg1 %0=[%1],%2"					\
+		      : "=r" (ia64_intri_res) : "r" (ptr), "r" (x) : "memory");	\
+	ia64_intri_res;								\
+})
+
+#define ia64_xchg2(ptr,x)						\
+({									\
+	__u64 ia64_intri_res;						\
+	asm volatile ("xchg2 %0=[%1],%2" : "=r" (ia64_intri_res)	\
+		      : "r" (ptr), "r" (x) : "memory");			\
+	ia64_intri_res;							\
+})
+
+#define ia64_xchg4(ptr,x)						\
+({									\
+	__u64 ia64_intri_res;						\
+	asm volatile ("xchg4 %0=[%1],%2" : "=r" (ia64_intri_res)	\
+		      : "r" (ptr), "r" (x) : "memory");			\
+	ia64_intri_res;							\
+})
+
+#define ia64_xchg8(ptr,x)						\
+({									\
+	__u64 ia64_intri_res;						\
+	asm volatile ("xchg8 %0=[%1],%2" : "=r" (ia64_intri_res)	\
+		      : "r" (ptr), "r" (x) : "memory");			\
+	ia64_intri_res;							\
+})
+
+#define ia64_cmpxchg1_acq(ptr, new, old)						\
+({											\
+	__u64 ia64_intri_res;								\
+	asm volatile ("mov ar.ccv=%0;;" :: "rO"(old));					\
+	asm volatile ("cmpxchg1.acq %0=[%1],%2,ar.ccv":					\
+			      "=r"(ia64_intri_res) : "r"(ptr), "r"(new) : "memory");	\
+	ia64_intri_res;									\
+})
+
+#define ia64_cmpxchg1_rel(ptr, new, old)						\
+({											\
+	__u64 ia64_intri_res;								\
+	asm volatile ("mov ar.ccv=%0;;" :: "rO"(old));					\
+	asm volatile ("cmpxchg1.rel %0=[%1],%2,ar.ccv":					\
+			      "=r"(ia64_intri_res) : "r"(ptr), "r"(new) : "memory");	\
+	ia64_intri_res;									\
+})
+
+#define ia64_cmpxchg2_acq(ptr, new, old)						\
+({											\
+	__u64 ia64_intri_res;								\
+	asm volatile ("mov ar.ccv=%0;;" :: "rO"(old));					\
+	asm volatile ("cmpxchg2.acq %0=[%1],%2,ar.ccv":					\
+			      "=r"(ia64_intri_res) : "r"(ptr), "r"(new) : "memory");	\
+	ia64_intri_res;									\
+})
+
+#define ia64_cmpxchg2_rel(ptr, new, old)						\
+({											\
+	__u64 ia64_intri_res;								\
+	asm volatile ("mov ar.ccv=%0;;" :: "rO"(old));					\
+											\
+	asm volatile ("cmpxchg2.rel %0=[%1],%2,ar.ccv":					\
+			      "=r"(ia64_intri_res) : "r"(ptr), "r"(new) : "memory");	\
+	ia64_intri_res;									\
+})
+
+#define ia64_cmpxchg4_acq(ptr, new, old)						\
+({											\
+	__u64 ia64_intri_res;								\
+	asm volatile ("mov ar.ccv=%0;;" :: "rO"(old));					\
+	asm volatile ("cmpxchg4.acq %0=[%1],%2,ar.ccv":					\
+			      "=r"(ia64_intri_res) : "r"(ptr), "r"(new) : "memory");	\
+	ia64_intri_res;									\
+})
+
+#define ia64_cmpxchg4_rel(ptr, new, old)						\
+({											\
+	__u64 ia64_intri_res;								\
+	asm volatile ("mov ar.ccv=%0;;" :: "rO"(old));					\
+	asm volatile ("cmpxchg4.rel %0=[%1],%2,ar.ccv":					\
+			      "=r"(ia64_intri_res) : "r"(ptr), "r"(new) : "memory");	\
+	ia64_intri_res;									\
+})
+
+#define ia64_cmpxchg8_acq(ptr, new, old)						\
+({											\
+	__u64 ia64_intri_res;								\
+	asm volatile ("mov ar.ccv=%0;;" :: "rO"(old));					\
+	asm volatile ("cmpxchg8.acq %0=[%1],%2,ar.ccv":					\
+			      "=r"(ia64_intri_res) : "r"(ptr), "r"(new) : "memory");	\
+	ia64_intri_res;									\
+})
+
+#define ia64_cmpxchg8_rel(ptr, new, old)						\
+({											\
+	__u64 ia64_intri_res;								\
+	asm volatile ("mov ar.ccv=%0;;" :: "rO"(old));					\
+											\
+	asm volatile ("cmpxchg8.rel %0=[%1],%2,ar.ccv":					\
+			      "=r"(ia64_intri_res) : "r"(ptr), "r"(new) : "memory");	\
+	ia64_intri_res;									\
+})
+
+#define ia64_mf()	asm volatile ("mf" ::: "memory")
+#define ia64_mfa()	asm volatile ("mf.a" ::: "memory")
+
+#define ia64_invala() asm volatile ("invala" ::: "memory")
+
+#define ia64_native_thash(addr)							\
+({										\
+	unsigned long ia64_intri_res;						\
+	asm volatile ("thash %0=%1" : "=r"(ia64_intri_res) : "r" (addr));	\
+	ia64_intri_res;								\
+})
+
+#define ia64_srlz_i()	asm volatile (";; srlz.i ;;" ::: "memory")
+#define ia64_srlz_d()	asm volatile (";; srlz.d" ::: "memory");
+
+#ifdef HAVE_SERIALIZE_DIRECTIVE
+# define ia64_dv_serialize_data()		asm volatile (".serialize.data");
+# define ia64_dv_serialize_instruction()	asm volatile (".serialize.instruction");
+#else
+# define ia64_dv_serialize_data()
+# define ia64_dv_serialize_instruction()
+#endif
+
+#define ia64_nop(x)	asm volatile ("nop %0"::"i"(x));
+
+#define ia64_itci(addr)	asm volatile ("itc.i %0;;" :: "r"(addr) : "memory")
+
+#define ia64_itcd(addr)	asm volatile ("itc.d %0;;" :: "r"(addr) : "memory")
+
+
+#define ia64_itri(trnum, addr) asm volatile ("itr.i itr[%0]=%1"				\
+					     :: "r"(trnum), "r"(addr) : "memory")
+
+#define ia64_itrd(trnum, addr) asm volatile ("itr.d dtr[%0]=%1"				\
+					     :: "r"(trnum), "r"(addr) : "memory")
+
+#define ia64_tpa(addr)								\
+({										\
+	unsigned long ia64_pa;							\
+	asm volatile ("tpa %0 = %1" : "=r"(ia64_pa) : "r"(addr) : "memory");	\
+	ia64_pa;								\
+})
+
+#define __ia64_set_dbr(index, val)						\
+	asm volatile ("mov dbr[%0]=%1" :: "r"(index), "r"(val) : "memory")
+
+#define ia64_set_ibr(index, val)						\
+	asm volatile ("mov ibr[%0]=%1" :: "r"(index), "r"(val) : "memory")
+
+#define ia64_set_pkr(index, val)						\
+	asm volatile ("mov pkr[%0]=%1" :: "r"(index), "r"(val) : "memory")
+
+#define ia64_set_pmc(index, val)						\
+	asm volatile ("mov pmc[%0]=%1" :: "r"(index), "r"(val) : "memory")
+
+#define ia64_set_pmd(index, val)						\
+	asm volatile ("mov pmd[%0]=%1" :: "r"(index), "r"(val) : "memory")
+
+#define ia64_native_set_rr(index, val)							\
+	asm volatile ("mov rr[%0]=%1" :: "r"(index), "r"(val) : "memory");
+
+#define ia64_native_get_cpuid(index)							\
+({											\
+	unsigned long ia64_intri_res;							\
+	asm volatile ("mov %0=cpuid[%r1]" : "=r"(ia64_intri_res) : "rO"(index));	\
+	ia64_intri_res;									\
+})
+
+#define __ia64_get_dbr(index)							\
+({										\
+	unsigned long ia64_intri_res;						\
+	asm volatile ("mov %0=dbr[%1]" : "=r"(ia64_intri_res) : "r"(index));	\
+	ia64_intri_res;								\
+})
+
+#define ia64_get_ibr(index)							\
+({										\
+	unsigned long ia64_intri_res;						\
+	asm volatile ("mov %0=ibr[%1]" : "=r"(ia64_intri_res) : "r"(index));	\
+	ia64_intri_res;								\
+})
+
+#define ia64_get_pkr(index)							\
+({										\
+	unsigned long ia64_intri_res;						\
+	asm volatile ("mov %0=pkr[%1]" : "=r"(ia64_intri_res) : "r"(index));	\
+	ia64_intri_res;								\
+})
+
+#define ia64_get_pmc(index)							\
+({										\
+	unsigned long ia64_intri_res;						\
+	asm volatile ("mov %0=pmc[%1]" : "=r"(ia64_intri_res) : "r"(index));	\
+	ia64_intri_res;								\
+})
+
+
+#define ia64_native_get_pmd(index)						\
+({										\
+	unsigned long ia64_intri_res;						\
+	asm volatile ("mov %0=pmd[%1]" : "=r"(ia64_intri_res) : "r"(index));	\
+	ia64_intri_res;								\
+})
+
+#define ia64_native_get_rr(index)						\
+({										\
+	unsigned long ia64_intri_res;						\
+	asm volatile ("mov %0=rr[%1]" : "=r"(ia64_intri_res) : "r" (index));	\
+	ia64_intri_res;								\
+})
+
+#define ia64_native_fc(addr)	asm volatile ("fc %0" :: "r"(addr) : "memory")
+
+
+#define ia64_sync_i()	asm volatile (";; sync.i" ::: "memory")
+
+#define ia64_native_ssm(mask)	asm volatile ("ssm %0":: "i"((mask)) : "memory")
+#define ia64_native_rsm(mask)	asm volatile ("rsm %0":: "i"((mask)) : "memory")
+#define ia64_sum(mask)	asm volatile ("sum %0":: "i"((mask)) : "memory")
+#define ia64_rum(mask)	asm volatile ("rum %0":: "i"((mask)) : "memory")
+
+#define ia64_ptce(addr)	asm volatile ("ptc.e %0" :: "r"(addr))
+
+#define ia64_native_ptcga(addr, size)						\
+do {										\
+	asm volatile ("ptc.ga %0,%1" :: "r"(addr), "r"(size) : "memory");	\
+	ia64_dv_serialize_data();						\
+} while (0)
+
+#define ia64_ptcl(addr, size)							\
+do {										\
+	asm volatile ("ptc.l %0,%1" :: "r"(addr), "r"(size) : "memory");	\
+	ia64_dv_serialize_data();						\
+} while (0)
+
+#define ia64_ptri(addr, size)						\
+	asm volatile ("ptr.i %0,%1" :: "r"(addr), "r"(size) : "memory")
+
+#define ia64_ptrd(addr, size)						\
+	asm volatile ("ptr.d %0,%1" :: "r"(addr), "r"(size) : "memory")
+
+#define ia64_ttag(addr)							\
+({									  \
+	__u64 ia64_intri_res;						   \
+	asm volatile ("ttag %0=%1" : "=r"(ia64_intri_res) : "r" (addr));   \
+	ia64_intri_res;							 \
+})
+
+
+/* Values for lfhint in ia64_lfetch and ia64_lfetch_fault */
+
+#define ia64_lfhint_none   0
+#define ia64_lfhint_nt1    1
+#define ia64_lfhint_nt2    2
+#define ia64_lfhint_nta    3
+
+#define ia64_lfetch(lfhint, y)					\
+({								\
+        switch (lfhint) {					\
+        case ia64_lfhint_none:					\
+                asm volatile ("lfetch [%0]" : : "r"(y));	\
+                break;						\
+        case ia64_lfhint_nt1:					\
+                asm volatile ("lfetch.nt1 [%0]" : : "r"(y));	\
+                break;						\
+        case ia64_lfhint_nt2:					\
+                asm volatile ("lfetch.nt2 [%0]" : : "r"(y));	\
+                break;						\
+        case ia64_lfhint_nta:					\
+                asm volatile ("lfetch.nta [%0]" : : "r"(y));	\
+                break;						\
+        }							\
+})
+
+#define ia64_lfetch_excl(lfhint, y)					\
+({									\
+        switch (lfhint) {						\
+        case ia64_lfhint_none:						\
+                asm volatile ("lfetch.excl [%0]" :: "r"(y));		\
+                break;							\
+        case ia64_lfhint_nt1:						\
+                asm volatile ("lfetch.excl.nt1 [%0]" :: "r"(y));	\
+                break;							\
+        case ia64_lfhint_nt2:						\
+                asm volatile ("lfetch.excl.nt2 [%0]" :: "r"(y));	\
+                break;							\
+        case ia64_lfhint_nta:						\
+                asm volatile ("lfetch.excl.nta [%0]" :: "r"(y));	\
+                break;							\
+        }								\
+})
+
+#define ia64_lfetch_fault(lfhint, y)					\
+({									\
+        switch (lfhint) {						\
+        case ia64_lfhint_none:						\
+                asm volatile ("lfetch.fault [%0]" : : "r"(y));		\
+                break;							\
+        case ia64_lfhint_nt1:						\
+                asm volatile ("lfetch.fault.nt1 [%0]" : : "r"(y));	\
+                break;							\
+        case ia64_lfhint_nt2:						\
+                asm volatile ("lfetch.fault.nt2 [%0]" : : "r"(y));	\
+                break;							\
+        case ia64_lfhint_nta:						\
+                asm volatile ("lfetch.fault.nta [%0]" : : "r"(y));	\
+                break;							\
+        }								\
+})
+
+#define ia64_lfetch_fault_excl(lfhint, y)				\
+({									\
+        switch (lfhint) {						\
+        case ia64_lfhint_none:						\
+                asm volatile ("lfetch.fault.excl [%0]" :: "r"(y));	\
+                break;							\
+        case ia64_lfhint_nt1:						\
+                asm volatile ("lfetch.fault.excl.nt1 [%0]" :: "r"(y));	\
+                break;							\
+        case ia64_lfhint_nt2:						\
+                asm volatile ("lfetch.fault.excl.nt2 [%0]" :: "r"(y));	\
+                break;							\
+        case ia64_lfhint_nta:						\
+                asm volatile ("lfetch.fault.excl.nta [%0]" :: "r"(y));	\
+                break;							\
+        }								\
+})
+
+#define ia64_native_intrin_local_irq_restore(x)			\
+do {								\
+	asm volatile (";;   cmp.ne p6,p7=%0,r0;;"		\
+		      "(p6) ssm psr.i;"				\
+		      "(p7) rsm psr.i;;"			\
+		      "(p6) srlz.d"				\
+		      :: "r"((x)) : "p6", "p7", "memory");	\
+} while (0)
+
+#endif /* _UAPI_ASM_IA64_GCC_INTRIN_H */
diff --git a/arch/ia64/include/uapi/asm/ia64regs.h b/arch/ia64/include/uapi/asm/ia64regs.h
new file mode 100644
index 00000000000..1757f1c11ad
--- /dev/null
+++ b/arch/ia64/include/uapi/asm/ia64regs.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright (C) 2002,2003 Intel Corp.
+ *      Jun Nakajima <jun.nakajima@intel.com>
+ *      Suresh Siddha <suresh.b.siddha@intel.com>
+ */
+
+#ifndef _ASM_IA64_IA64REGS_H
+#define _ASM_IA64_IA64REGS_H
+
+/*
+ * Register Names for getreg() and setreg().
+ *
+ * The "magic" numbers happen to match the values used by the Intel compiler's
+ * getreg()/setreg() intrinsics.
+ */
+
+/* Special Registers */
+
+#define _IA64_REG_IP		1016	/* getreg only */
+#define _IA64_REG_PSR		1019
+#define _IA64_REG_PSR_L		1019
+
+/* General Integer Registers */
+
+#define _IA64_REG_GP		1025	/* R1 */
+#define _IA64_REG_R8		1032	/* R8 */
+#define _IA64_REG_R9		1033	/* R9 */
+#define _IA64_REG_SP		1036	/* R12 */
+#define _IA64_REG_TP		1037	/* R13 */
+
+/* Application Registers */
+
+#define _IA64_REG_AR_KR0	3072
+#define _IA64_REG_AR_KR1	3073
+#define _IA64_REG_AR_KR2	3074
+#define _IA64_REG_AR_KR3	3075
+#define _IA64_REG_AR_KR4	3076
+#define _IA64_REG_AR_KR5	3077
+#define _IA64_REG_AR_KR6	3078
+#define _IA64_REG_AR_KR7	3079
+#define _IA64_REG_AR_RSC	3088
+#define _IA64_REG_AR_BSP	3089
+#define _IA64_REG_AR_BSPSTORE	3090
+#define _IA64_REG_AR_RNAT	3091
+#define _IA64_REG_AR_FCR	3093
+#define _IA64_REG_AR_EFLAG	3096
+#define _IA64_REG_AR_CSD	3097
+#define _IA64_REG_AR_SSD	3098
+#define _IA64_REG_AR_CFLAG	3099
+#define _IA64_REG_AR_FSR	3100
+#define _IA64_REG_AR_FIR	3101
+#define _IA64_REG_AR_FDR	3102
+#define _IA64_REG_AR_CCV	3104
+#define _IA64_REG_AR_UNAT	3108
+#define _IA64_REG_AR_FPSR	3112
+#define _IA64_REG_AR_ITC	3116
+#define _IA64_REG_AR_PFS	3136
+#define _IA64_REG_AR_LC		3137
+#define _IA64_REG_AR_EC		3138
+
+/* Control Registers */
+
+#define _IA64_REG_CR_DCR	4096
+#define _IA64_REG_CR_ITM	4097
+#define _IA64_REG_CR_IVA	4098
+#define _IA64_REG_CR_PTA	4104
+#define _IA64_REG_CR_IPSR	4112
+#define _IA64_REG_CR_ISR	4113
+#define _IA64_REG_CR_IIP	4115
+#define _IA64_REG_CR_IFA	4116
+#define _IA64_REG_CR_ITIR	4117
+#define _IA64_REG_CR_IIPA	4118
+#define _IA64_REG_CR_IFS	4119
+#define _IA64_REG_CR_IIM	4120
+#define _IA64_REG_CR_IHA	4121
+#define _IA64_REG_CR_LID	4160
+#define _IA64_REG_CR_IVR	4161	/* getreg only */
+#define _IA64_REG_CR_TPR	4162
+#define _IA64_REG_CR_EOI	4163
+#define _IA64_REG_CR_IRR0	4164	/* getreg only */
+#define _IA64_REG_CR_IRR1	4165	/* getreg only */
+#define _IA64_REG_CR_IRR2	4166	/* getreg only */
+#define _IA64_REG_CR_IRR3	4167	/* getreg only */
+#define _IA64_REG_CR_ITV	4168
+#define _IA64_REG_CR_PMV	4169
+#define _IA64_REG_CR_CMCV	4170
+#define _IA64_REG_CR_LRR0	4176
+#define _IA64_REG_CR_LRR1	4177
+
+/* Indirect Registers for getindreg() and setindreg() */
+
+#define _IA64_REG_INDR_CPUID	9000	/* getindreg only */
+#define _IA64_REG_INDR_DBR	9001
+#define _IA64_REG_INDR_IBR	9002
+#define _IA64_REG_INDR_PKR	9003
+#define _IA64_REG_INDR_PMC	9004
+#define _IA64_REG_INDR_PMD	9005
+#define _IA64_REG_INDR_RR	9006
+
+#endif /* _ASM_IA64_IA64REGS_H */
diff --git a/arch/ia64/include/uapi/asm/intel_intrin.h b/arch/ia64/include/uapi/asm/intel_intrin.h
new file mode 100644
index 00000000000..53cec577558
--- /dev/null
+++ b/arch/ia64/include/uapi/asm/intel_intrin.h
@@ -0,0 +1,161 @@
+#ifndef _ASM_IA64_INTEL_INTRIN_H
+#define _ASM_IA64_INTEL_INTRIN_H
+/*
+ * Intel Compiler Intrinsics
+ *
+ * Copyright (C) 2002,2003 Jun Nakajima <jun.nakajima@intel.com>
+ * Copyright (C) 2002,2003 Suresh Siddha <suresh.b.siddha@intel.com>
+ * Copyright (C) 2005,2006 Hongjiu Lu <hongjiu.lu@intel.com>
+ *
+ */
+#include <ia64intrin.h>
+
+#define ia64_barrier()		__memory_barrier()
+
+#define ia64_stop()	/* Nothing: As of now stop bit is generated for each
+		 	 * intrinsic
+		 	 */
+
+#define ia64_native_getreg	__getReg
+#define ia64_native_setreg	__setReg
+
+#define ia64_hint		__hint
+#define ia64_hint_pause		__hint_pause
+
+#define ia64_mux1_brcst		_m64_mux1_brcst
+#define ia64_mux1_mix		_m64_mux1_mix
+#define ia64_mux1_shuf		_m64_mux1_shuf
+#define ia64_mux1_alt		_m64_mux1_alt
+#define ia64_mux1_rev		_m64_mux1_rev
+
+#define ia64_mux1(x,v)		_m_to_int64(_m64_mux1(_m_from_int64(x), (v)))
+#define ia64_popcnt		_m64_popcnt
+#define ia64_getf_exp		__getf_exp
+#define ia64_shrp		_m64_shrp
+
+#define ia64_tpa		__tpa
+#define ia64_invala		__invala
+#define ia64_invala_gr		__invala_gr
+#define ia64_invala_fr		__invala_fr
+#define ia64_nop		__nop
+#define ia64_sum		__sum
+#define ia64_native_ssm		__ssm
+#define ia64_rum		__rum
+#define ia64_native_rsm		__rsm
+#define ia64_native_fc 		__fc
+
+#define ia64_ldfs		__ldfs
+#define ia64_ldfd		__ldfd
+#define ia64_ldfe		__ldfe
+#define ia64_ldf8		__ldf8
+#define ia64_ldf_fill		__ldf_fill
+
+#define ia64_stfs		__stfs
+#define ia64_stfd		__stfd
+#define ia64_stfe		__stfe
+#define ia64_stf8		__stf8
+#define ia64_stf_spill		__stf_spill
+
+#define ia64_mf			__mf
+#define ia64_mfa		__mfa
+
+#define ia64_fetchadd4_acq	__fetchadd4_acq
+#define ia64_fetchadd4_rel	__fetchadd4_rel
+#define ia64_fetchadd8_acq	__fetchadd8_acq
+#define ia64_fetchadd8_rel	__fetchadd8_rel
+
+#define ia64_xchg1		_InterlockedExchange8
+#define ia64_xchg2		_InterlockedExchange16
+#define ia64_xchg4		_InterlockedExchange
+#define ia64_xchg8		_InterlockedExchange64
+
+#define ia64_cmpxchg1_rel	_InterlockedCompareExchange8_rel
+#define ia64_cmpxchg1_acq	_InterlockedCompareExchange8_acq
+#define ia64_cmpxchg2_rel	_InterlockedCompareExchange16_rel
+#define ia64_cmpxchg2_acq	_InterlockedCompareExchange16_acq
+#define ia64_cmpxchg4_rel	_InterlockedCompareExchange_rel
+#define ia64_cmpxchg4_acq	_InterlockedCompareExchange_acq
+#define ia64_cmpxchg8_rel	_InterlockedCompareExchange64_rel
+#define ia64_cmpxchg8_acq	_InterlockedCompareExchange64_acq
+
+#define __ia64_set_dbr(index, val)	\
+		__setIndReg(_IA64_REG_INDR_DBR, index, val)
+#define ia64_set_ibr(index, val)	\
+		__setIndReg(_IA64_REG_INDR_IBR, index, val)
+#define ia64_set_pkr(index, val)	\
+		__setIndReg(_IA64_REG_INDR_PKR, index, val)
+#define ia64_set_pmc(index, val)	\
+		__setIndReg(_IA64_REG_INDR_PMC, index, val)
+#define ia64_set_pmd(index, val)	\
+		__setIndReg(_IA64_REG_INDR_PMD, index, val)
+#define ia64_native_set_rr(index, val)	\
+		__setIndReg(_IA64_REG_INDR_RR, index, val)
+
+#define ia64_native_get_cpuid(index)	\
+		__getIndReg(_IA64_REG_INDR_CPUID, index)
+#define __ia64_get_dbr(index)		__getIndReg(_IA64_REG_INDR_DBR, index)
+#define ia64_get_ibr(index)		__getIndReg(_IA64_REG_INDR_IBR, index)
+#define ia64_get_pkr(index)		__getIndReg(_IA64_REG_INDR_PKR, index)
+#define ia64_get_pmc(index)		__getIndReg(_IA64_REG_INDR_PMC, index)
+#define ia64_native_get_pmd(index)	__getIndReg(_IA64_REG_INDR_PMD, index)
+#define ia64_native_get_rr(index)	__getIndReg(_IA64_REG_INDR_RR, index)
+
+#define ia64_srlz_d		__dsrlz
+#define ia64_srlz_i		__isrlz
+
+#define ia64_dv_serialize_data()
+#define ia64_dv_serialize_instruction()
+
+#define ia64_st1_rel		__st1_rel
+#define ia64_st2_rel		__st2_rel
+#define ia64_st4_rel		__st4_rel
+#define ia64_st8_rel		__st8_rel
+
+/* FIXME: need st4.rel.nta intrinsic */
+#define ia64_st4_rel_nta	__st4_rel
+
+#define ia64_ld1_acq		__ld1_acq
+#define ia64_ld2_acq		__ld2_acq
+#define ia64_ld4_acq		__ld4_acq
+#define ia64_ld8_acq		__ld8_acq
+
+#define ia64_sync_i		__synci
+#define ia64_native_thash	__thash
+#define ia64_native_ttag	__ttag
+#define ia64_itcd		__itcd
+#define ia64_itci		__itci
+#define ia64_itrd		__itrd
+#define ia64_itri		__itri
+#define ia64_ptce		__ptce
+#define ia64_ptcl		__ptcl
+#define ia64_native_ptcg	__ptcg
+#define ia64_native_ptcga	__ptcga
+#define ia64_ptri		__ptri
+#define ia64_ptrd		__ptrd
+#define ia64_dep_mi		_m64_dep_mi
+
+/* Values for lfhint in __lfetch and __lfetch_fault */
+
+#define ia64_lfhint_none	__lfhint_none
+#define ia64_lfhint_nt1		__lfhint_nt1
+#define ia64_lfhint_nt2		__lfhint_nt2
+#define ia64_lfhint_nta		__lfhint_nta
+
+#define ia64_lfetch		__lfetch
+#define ia64_lfetch_excl	__lfetch_excl
+#define ia64_lfetch_fault	__lfetch_fault
+#define ia64_lfetch_fault_excl	__lfetch_fault_excl
+
+#define ia64_native_intrin_local_irq_restore(x)		\
+do {							\
+	if ((x) != 0) {					\
+		ia64_native_ssm(IA64_PSR_I);		\
+		ia64_srlz_d();				\
+	} else {					\
+		ia64_native_rsm(IA64_PSR_I);		\
+	}						\
+} while (0)
+
+#define __builtin_trap()	__break(0);
+
+#endif /* _ASM_IA64_INTEL_INTRIN_H */
diff --git a/arch/ia64/include/uapi/asm/intrinsics.h b/arch/ia64/include/uapi/asm/intrinsics.h
new file mode 100644
index 00000000000..5829978ff46
--- /dev/null
+++ b/arch/ia64/include/uapi/asm/intrinsics.h
@@ -0,0 +1,124 @@
+/*
+ * Compiler-dependent intrinsics.
+ *
+ * Copyright (C) 2002-2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+#ifndef _UAPI_ASM_IA64_INTRINSICS_H
+#define _UAPI_ASM_IA64_INTRINSICS_H
+
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+/* include compiler specific intrinsics */
+#include <asm/ia64regs.h>
+#ifdef __INTEL_COMPILER
+# include <asm/intel_intrin.h>
+#else
+# include <asm/gcc_intrin.h>
+#endif
+#include <asm/cmpxchg.h>
+
+#define ia64_native_get_psr_i()	(ia64_native_getreg(_IA64_REG_PSR) & IA64_PSR_I)
+
+#define ia64_native_set_rr0_to_rr4(val0, val1, val2, val3, val4)	\
+do {									\
+	ia64_native_set_rr(0x0000000000000000UL, (val0));		\
+	ia64_native_set_rr(0x2000000000000000UL, (val1));		\
+	ia64_native_set_rr(0x4000000000000000UL, (val2));		\
+	ia64_native_set_rr(0x6000000000000000UL, (val3));		\
+	ia64_native_set_rr(0x8000000000000000UL, (val4));		\
+} while (0)
+
+/*
+ * Force an unresolved reference if someone tries to use
+ * ia64_fetch_and_add() with a bad value.
+ */
+extern unsigned long __bad_size_for_ia64_fetch_and_add (void);
+extern unsigned long __bad_increment_for_ia64_fetch_and_add (void);
+
+#define IA64_FETCHADD(tmp,v,n,sz,sem)						\
+({										\
+	switch (sz) {								\
+	      case 4:								\
+	        tmp = ia64_fetchadd4_##sem((unsigned int *) v, n);		\
+		break;								\
+										\
+	      case 8:								\
+	        tmp = ia64_fetchadd8_##sem((unsigned long *) v, n);		\
+		break;								\
+										\
+	      default:								\
+		__bad_size_for_ia64_fetch_and_add();				\
+	}									\
+})
+
+#define ia64_fetchadd(i,v,sem)								\
+({											\
+	__u64 _tmp;									\
+	volatile __typeof__(*(v)) *_v = (v);						\
+	/* Can't use a switch () here: gcc isn't always smart enough for that... */	\
+	if ((i) == -16)									\
+		IA64_FETCHADD(_tmp, _v, -16, sizeof(*(v)), sem);			\
+	else if ((i) == -8)								\
+		IA64_FETCHADD(_tmp, _v, -8, sizeof(*(v)), sem);				\
+	else if ((i) == -4)								\
+		IA64_FETCHADD(_tmp, _v, -4, sizeof(*(v)), sem);				\
+	else if ((i) == -1)								\
+		IA64_FETCHADD(_tmp, _v, -1, sizeof(*(v)), sem);				\
+	else if ((i) == 1)								\
+		IA64_FETCHADD(_tmp, _v, 1, sizeof(*(v)), sem);				\
+	else if ((i) == 4)								\
+		IA64_FETCHADD(_tmp, _v, 4, sizeof(*(v)), sem);				\
+	else if ((i) == 8)								\
+		IA64_FETCHADD(_tmp, _v, 8, sizeof(*(v)), sem);				\
+	else if ((i) == 16)								\
+		IA64_FETCHADD(_tmp, _v, 16, sizeof(*(v)), sem);				\
+	else										\
+		_tmp = __bad_increment_for_ia64_fetch_and_add();			\
+	(__typeof__(*(v))) (_tmp);	/* return old value */				\
+})
+
+#define ia64_fetch_and_add(i,v)	(ia64_fetchadd(i, v, rel) + (i)) /* return new value */
+
+#endif
+
+
+#ifndef __ASSEMBLY__
+
+#define IA64_INTRINSIC_API(name)	ia64_native_ ## name
+#define IA64_INTRINSIC_MACRO(name)	ia64_native_ ## name
+
+
+/************************************************/
+/* Instructions paravirtualized for correctness */
+/************************************************/
+/* fc, thash, get_cpuid, get_pmd, get_eflags, set_eflags */
+/* Note that "ttag" and "cover" are also privilege-sensitive; "ttag"
+ * is not currently used (though it may be in a long-format VHPT system!)
+ */
+#define ia64_fc				IA64_INTRINSIC_API(fc)
+#define ia64_thash			IA64_INTRINSIC_API(thash)
+#define ia64_get_cpuid			IA64_INTRINSIC_API(get_cpuid)
+#define ia64_get_pmd			IA64_INTRINSIC_API(get_pmd)
+
+
+/************************************************/
+/* Instructions paravirtualized for performance */
+/************************************************/
+#define ia64_ssm			IA64_INTRINSIC_MACRO(ssm)
+#define ia64_rsm			IA64_INTRINSIC_MACRO(rsm)
+#define ia64_getreg			IA64_INTRINSIC_MACRO(getreg)
+#define ia64_setreg			IA64_INTRINSIC_API(setreg)
+#define ia64_set_rr			IA64_INTRINSIC_API(set_rr)
+#define ia64_get_rr			IA64_INTRINSIC_API(get_rr)
+#define ia64_ptcga			IA64_INTRINSIC_API(ptcga)
+#define ia64_get_psr_i			IA64_INTRINSIC_API(get_psr_i)
+#define ia64_intrin_local_irq_restore	\
+	IA64_INTRINSIC_API(intrin_local_irq_restore)
+#define ia64_set_rr0_to_rr4		IA64_INTRINSIC_API(set_rr0_to_rr4)
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _UAPI_ASM_IA64_INTRINSICS_H */
diff --git a/arch/ia64/include/uapi/asm/ioctl.h b/arch/ia64/include/uapi/asm/ioctl.h
new file mode 100644
index 00000000000..b279fe06dfe
--- /dev/null
+++ b/arch/ia64/include/uapi/asm/ioctl.h
@@ -0,0 +1 @@
+#include <asm-generic/ioctl.h>
diff --git a/arch/ia64/include/uapi/asm/ioctls.h b/arch/ia64/include/uapi/asm/ioctls.h
new file mode 100644
index 00000000000..f3aab5512e9
--- /dev/null
+++ b/arch/ia64/include/uapi/asm/ioctls.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_IA64_IOCTLS_H
+#define _ASM_IA64_IOCTLS_H
+
+#include <asm-generic/ioctls.h>
+
+#endif /* _ASM_IA64_IOCTLS_H */
diff --git a/arch/ia64/include/uapi/asm/ipcbuf.h b/arch/ia64/include/uapi/asm/ipcbuf.h
new file mode 100644
index 00000000000..84c7e51cb6d
--- /dev/null
+++ b/arch/ia64/include/uapi/asm/ipcbuf.h
@@ -0,0 +1 @@
+#include <asm-generic/ipcbuf.h>
diff --git a/arch/ia64/include/uapi/asm/kvm.h b/arch/ia64/include/uapi/asm/kvm.h
new file mode 100644
index 00000000000..99503c28440
--- /dev/null
+++ b/arch/ia64/include/uapi/asm/kvm.h
@@ -0,0 +1,268 @@
+#ifndef __ASM_IA64_KVM_H
+#define __ASM_IA64_KVM_H
+
+/*
+ * kvm structure definitions  for ia64
+ *
+ * Copyright (C) 2007 Xiantao Zhang <xiantao.zhang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+/* Select x86 specific features in <linux/kvm.h> */
+#define __KVM_HAVE_IOAPIC
+#define __KVM_HAVE_IRQ_LINE
+
+/* Architectural interrupt line count. */
+#define KVM_NR_INTERRUPTS 256
+
+#define KVM_IOAPIC_NUM_PINS  48
+
+struct kvm_ioapic_state {
+	__u64 base_address;
+	__u32 ioregsel;
+	__u32 id;
+	__u32 irr;
+	__u32 pad;
+	union {
+		__u64 bits;
+		struct {
+			__u8 vector;
+			__u8 delivery_mode:3;
+			__u8 dest_mode:1;
+			__u8 delivery_status:1;
+			__u8 polarity:1;
+			__u8 remote_irr:1;
+			__u8 trig_mode:1;
+			__u8 mask:1;
+			__u8 reserve:7;
+			__u8 reserved[4];
+			__u8 dest_id;
+		} fields;
+	} redirtbl[KVM_IOAPIC_NUM_PINS];
+};
+
+#define KVM_IRQCHIP_PIC_MASTER   0
+#define KVM_IRQCHIP_PIC_SLAVE    1
+#define KVM_IRQCHIP_IOAPIC       2
+#define KVM_NR_IRQCHIPS          3
+
+#define KVM_CONTEXT_SIZE	8*1024
+
+struct kvm_fpreg {
+	union {
+		unsigned long bits[2];
+		long double __dummy;	/* force 16-byte alignment */
+	} u;
+};
+
+union context {
+	/* 8K size */
+	char	dummy[KVM_CONTEXT_SIZE];
+	struct {
+		unsigned long       psr;
+		unsigned long       pr;
+		unsigned long       caller_unat;
+		unsigned long       pad;
+		unsigned long       gr[32];
+		unsigned long       ar[128];
+		unsigned long       br[8];
+		unsigned long       cr[128];
+		unsigned long       rr[8];
+		unsigned long       ibr[8];
+		unsigned long       dbr[8];
+		unsigned long       pkr[8];
+		struct kvm_fpreg   fr[128];
+	};
+};
+
+struct thash_data {
+	union {
+		struct {
+			unsigned long p    :  1; /* 0 */
+			unsigned long rv1  :  1; /* 1 */
+			unsigned long ma   :  3; /* 2-4 */
+			unsigned long a    :  1; /* 5 */
+			unsigned long d    :  1; /* 6 */
+			unsigned long pl   :  2; /* 7-8 */
+			unsigned long ar   :  3; /* 9-11 */
+			unsigned long ppn  : 38; /* 12-49 */
+			unsigned long rv2  :  2; /* 50-51 */
+			unsigned long ed   :  1; /* 52 */
+			unsigned long ig1  : 11; /* 53-63 */
+		};
+		struct {
+			unsigned long __rv1 : 53;     /* 0-52 */
+			unsigned long contiguous : 1; /*53 */
+			unsigned long tc : 1;         /* 54 TR or TC */
+			unsigned long cl : 1;
+			/* 55 I side or D side cache line */
+			unsigned long len  :  4;      /* 56-59 */
+			unsigned long io  : 1;	/* 60 entry is for io or not */
+			unsigned long nomap : 1;
+			/* 61 entry cann't be inserted into machine TLB.*/
+			unsigned long checked : 1;
+			/* 62 for VTLB/VHPT sanity check */
+			unsigned long invalid : 1;
+			/* 63 invalid entry */
+		};
+		unsigned long page_flags;
+	};                  /* same for VHPT and TLB */
+
+	union {
+		struct {
+			unsigned long rv3  :  2;
+			unsigned long ps   :  6;
+			unsigned long key  : 24;
+			unsigned long rv4  : 32;
+		};
+		unsigned long itir;
+	};
+	union {
+		struct {
+			unsigned long ig2  :  12;
+			unsigned long vpn  :  49;
+			unsigned long vrn  :   3;
+		};
+		unsigned long ifa;
+		unsigned long vadr;
+		struct {
+			unsigned long tag  :  63;
+			unsigned long ti   :  1;
+		};
+		unsigned long etag;
+	};
+	union {
+		struct thash_data *next;
+		unsigned long rid;
+		unsigned long gpaddr;
+	};
+};
+
+#define	NITRS	8
+#define NDTRS	8
+
+struct saved_vpd {
+	unsigned long  vhpi;
+	unsigned long  vgr[16];
+	unsigned long  vbgr[16];
+	unsigned long  vnat;
+	unsigned long  vbnat;
+	unsigned long  vcpuid[5];
+	unsigned long  vpsr;
+	unsigned long  vpr;
+	union {
+		unsigned long  vcr[128];
+		struct {
+			unsigned long dcr;
+			unsigned long itm;
+			unsigned long iva;
+			unsigned long rsv1[5];
+			unsigned long pta;
+			unsigned long rsv2[7];
+			unsigned long ipsr;
+			unsigned long isr;
+			unsigned long rsv3;
+			unsigned long iip;
+			unsigned long ifa;
+			unsigned long itir;
+			unsigned long iipa;
+			unsigned long ifs;
+			unsigned long iim;
+			unsigned long iha;
+			unsigned long rsv4[38];
+			unsigned long lid;
+			unsigned long ivr;
+			unsigned long tpr;
+			unsigned long eoi;
+			unsigned long irr[4];
+			unsigned long itv;
+			unsigned long pmv;
+			unsigned long cmcv;
+			unsigned long rsv5[5];
+			unsigned long lrr0;
+			unsigned long lrr1;
+			unsigned long rsv6[46];
+		};
+	};
+};
+
+struct kvm_regs {
+	struct saved_vpd vpd;
+	/*Arch-regs*/
+	int mp_state;
+	unsigned long vmm_rr;
+	/* TR and TC.  */
+	struct thash_data itrs[NITRS];
+	struct thash_data dtrs[NDTRS];
+	/* Bit is set if there is a tr/tc for the region.  */
+	unsigned char itr_regions;
+	unsigned char dtr_regions;
+	unsigned char tc_regions;
+
+	char irq_check;
+	unsigned long saved_itc;
+	unsigned long itc_check;
+	unsigned long timer_check;
+	unsigned long timer_pending;
+	unsigned long last_itc;
+
+	unsigned long vrr[8];
+	unsigned long ibr[8];
+	unsigned long dbr[8];
+	unsigned long insvc[4];		/* Interrupt in service.  */
+	unsigned long xtp;
+
+	unsigned long metaphysical_rr0; /* from kvm_arch (so is pinned) */
+	unsigned long metaphysical_rr4;	/* from kvm_arch (so is pinned) */
+	unsigned long metaphysical_saved_rr0; /* from kvm_arch          */
+	unsigned long metaphysical_saved_rr4; /* from kvm_arch          */
+	unsigned long fp_psr;       /*used for lazy float register */
+	unsigned long saved_gp;
+	/*for phycial  emulation */
+
+	union context saved_guest;
+
+	unsigned long reserved[64];	/* for future use */
+};
+
+struct kvm_sregs {
+};
+
+struct kvm_fpu {
+};
+
+#define KVM_IA64_VCPU_STACK_SHIFT	16
+#define KVM_IA64_VCPU_STACK_SIZE	(1UL << KVM_IA64_VCPU_STACK_SHIFT)
+
+struct kvm_ia64_vcpu_stack {
+	unsigned char stack[KVM_IA64_VCPU_STACK_SIZE];
+};
+
+struct kvm_debug_exit_arch {
+};
+
+/* for KVM_SET_GUEST_DEBUG */
+struct kvm_guest_debug_arch {
+};
+
+/* definition of registers in kvm_run */
+struct kvm_sync_regs {
+};
+
+#endif
diff --git a/arch/ia64/include/uapi/asm/mman.h b/arch/ia64/include/uapi/asm/mman.h
new file mode 100644
index 00000000000..8740819adc5
--- /dev/null
+++ b/arch/ia64/include/uapi/asm/mman.h
@@ -0,0 +1,16 @@
+/*
+ * Based on <asm-i386/mman.h>.
+ *
+ * Modified 1998-2000, 2002
+ *	David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co
+ */
+#ifndef _UAPI_ASM_IA64_MMAN_H
+#define _UAPI_ASM_IA64_MMAN_H
+
+
+#include <asm-generic/mman.h>
+
+#define MAP_GROWSUP	0x0200		/* register stack-like segment */
+
+
+#endif /* _UAPI_ASM_IA64_MMAN_H */
diff --git a/arch/ia64/include/uapi/asm/msgbuf.h b/arch/ia64/include/uapi/asm/msgbuf.h
new file mode 100644
index 00000000000..6c64c0d2aae
--- /dev/null
+++ b/arch/ia64/include/uapi/asm/msgbuf.h
@@ -0,0 +1,27 @@
+#ifndef _ASM_IA64_MSGBUF_H
+#define _ASM_IA64_MSGBUF_H
+
+/*
+ * The msqid64_ds structure for IA-64 architecture.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * Pad space is left for:
+ * - 2 miscellaneous 64-bit values
+ */
+
+struct msqid64_ds {
+	struct ipc64_perm msg_perm;
+	__kernel_time_t msg_stime;	/* last msgsnd time */
+	__kernel_time_t msg_rtime;	/* last msgrcv time */
+	__kernel_time_t msg_ctime;	/* last change time */
+	unsigned long  msg_cbytes;	/* current number of bytes on queue */
+	unsigned long  msg_qnum;	/* number of messages in queue */
+	unsigned long  msg_qbytes;	/* max number of bytes on queue */
+	__kernel_pid_t msg_lspid;	/* pid of last msgsnd */
+	__kernel_pid_t msg_lrpid;	/* last receive pid */
+	unsigned long  __unused1;
+	unsigned long  __unused2;
+};
+
+#endif /* _ASM_IA64_MSGBUF_H */
diff --git a/arch/ia64/include/uapi/asm/param.h b/arch/ia64/include/uapi/asm/param.h
new file mode 100644
index 00000000000..d7da41d9497
--- /dev/null
+++ b/arch/ia64/include/uapi/asm/param.h
@@ -0,0 +1,29 @@
+/*
+ * Fundamental kernel parameters.
+ *
+ * Based on <asm-i386/param.h>.
+ *
+ * Modified 1998, 1999, 2002-2003
+ *	David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co
+ */
+#ifndef _UAPI_ASM_IA64_PARAM_H
+#define _UAPI_ASM_IA64_PARAM_H
+
+
+#define EXEC_PAGESIZE	65536
+
+#ifndef NOGROUP
+# define NOGROUP	(-1)
+#endif
+
+#define MAXHOSTNAMELEN	64	/* max length of hostname */
+
+#ifndef __KERNEL__
+   /*
+    * Technically, this is wrong, but some old apps still refer to it.  The proper way to
+    * get the HZ value is via sysconf(_SC_CLK_TCK).
+    */
+# define HZ 1024
+#endif
+
+#endif /* _UAPI_ASM_IA64_PARAM_H */
diff --git a/arch/ia64/include/uapi/asm/perfmon.h b/arch/ia64/include/uapi/asm/perfmon.h
new file mode 100644
index 00000000000..1a10a2dd58a
--- /dev/null
+++ b/arch/ia64/include/uapi/asm/perfmon.h
@@ -0,0 +1,177 @@
+/*
+ * Copyright (C) 2001-2003 Hewlett-Packard Co
+ *               Stephane Eranian <eranian@hpl.hp.com>
+ */
+
+#ifndef _UAPI_ASM_IA64_PERFMON_H
+#define _UAPI_ASM_IA64_PERFMON_H
+
+/*
+ * perfmon commands supported on all CPU models
+ */
+#define PFM_WRITE_PMCS		0x01
+#define PFM_WRITE_PMDS		0x02
+#define PFM_READ_PMDS		0x03
+#define PFM_STOP		0x04
+#define PFM_START		0x05
+#define PFM_ENABLE		0x06 /* obsolete */
+#define PFM_DISABLE		0x07 /* obsolete */
+#define PFM_CREATE_CONTEXT	0x08
+#define PFM_DESTROY_CONTEXT	0x09 /* obsolete use close() */
+#define PFM_RESTART		0x0a
+#define PFM_PROTECT_CONTEXT	0x0b /* obsolete */
+#define PFM_GET_FEATURES	0x0c
+#define PFM_DEBUG		0x0d
+#define PFM_UNPROTECT_CONTEXT	0x0e /* obsolete */
+#define PFM_GET_PMC_RESET_VAL	0x0f
+#define PFM_LOAD_CONTEXT	0x10
+#define PFM_UNLOAD_CONTEXT	0x11
+
+/*
+ * PMU model specific commands (may not be supported on all PMU models)
+ */
+#define PFM_WRITE_IBRS		0x20
+#define PFM_WRITE_DBRS		0x21
+
+/*
+ * context flags
+ */
+#define PFM_FL_NOTIFY_BLOCK    	 0x01	/* block task on user level notifications */
+#define PFM_FL_SYSTEM_WIDE	 0x02	/* create a system wide context */
+#define PFM_FL_OVFL_NO_MSG	 0x80   /* do not post overflow/end messages for notification */
+
+/*
+ * event set flags
+ */
+#define PFM_SETFL_EXCL_IDLE      0x01   /* exclude idle task (syswide only) XXX: DO NOT USE YET */
+
+/*
+ * PMC flags
+ */
+#define PFM_REGFL_OVFL_NOTIFY	0x1	/* send notification on overflow */
+#define PFM_REGFL_RANDOM	0x2	/* randomize sampling interval   */
+
+/*
+ * PMD/PMC/IBR/DBR return flags (ignored on input)
+ *
+ * Those flags are used on output and must be checked in case EAGAIN is returned
+ * by any of the calls using a pfarg_reg_t or pfarg_dbreg_t structure.
+ */
+#define PFM_REG_RETFL_NOTAVAIL	(1UL<<31) /* set if register is implemented but not available */
+#define PFM_REG_RETFL_EINVAL	(1UL<<30) /* set if register entry is invalid */
+#define PFM_REG_RETFL_MASK	(PFM_REG_RETFL_NOTAVAIL|PFM_REG_RETFL_EINVAL)
+
+#define PFM_REG_HAS_ERROR(flag)	(((flag) & PFM_REG_RETFL_MASK) != 0)
+
+typedef unsigned char pfm_uuid_t[16];	/* custom sampling buffer identifier type */
+
+/*
+ * Request structure used to define a context
+ */
+typedef struct {
+	pfm_uuid_t     ctx_smpl_buf_id;	 /* which buffer format to use (if needed) */
+	unsigned long  ctx_flags;	 /* noblock/block */
+	unsigned short ctx_nextra_sets;	 /* number of extra event sets (you always get 1) */
+	unsigned short ctx_reserved1;	 /* for future use */
+	int	       ctx_fd;		 /* return arg: unique identification for context */
+	void	       *ctx_smpl_vaddr;	 /* return arg: virtual address of sampling buffer, is used */
+	unsigned long  ctx_reserved2[11];/* for future use */
+} pfarg_context_t;
+
+/*
+ * Request structure used to write/read a PMC or PMD
+ */
+typedef struct {
+	unsigned int	reg_num;	   /* which register */
+	unsigned short	reg_set;	   /* event set for this register */
+	unsigned short	reg_reserved1;	   /* for future use */
+
+	unsigned long	reg_value;	   /* initial pmc/pmd value */
+	unsigned long	reg_flags;	   /* input: pmc/pmd flags, return: reg error */
+
+	unsigned long	reg_long_reset;	   /* reset after buffer overflow notification */
+	unsigned long	reg_short_reset;   /* reset after counter overflow */
+
+	unsigned long	reg_reset_pmds[4]; /* which other counters to reset on overflow */
+	unsigned long	reg_random_seed;   /* seed value when randomization is used */
+	unsigned long	reg_random_mask;   /* bitmask used to limit random value */
+	unsigned long   reg_last_reset_val;/* return: PMD last reset value */
+
+	unsigned long	reg_smpl_pmds[4];  /* which pmds are accessed when PMC overflows */
+	unsigned long	reg_smpl_eventid;  /* opaque sampling event identifier */
+
+	unsigned long   reg_reserved2[3];   /* for future use */
+} pfarg_reg_t;
+
+typedef struct {
+	unsigned int	dbreg_num;		/* which debug register */
+	unsigned short	dbreg_set;		/* event set for this register */
+	unsigned short	dbreg_reserved1;	/* for future use */
+	unsigned long	dbreg_value;		/* value for debug register */
+	unsigned long	dbreg_flags;		/* return: dbreg error */
+	unsigned long	dbreg_reserved2[1];	/* for future use */
+} pfarg_dbreg_t;
+
+typedef struct {
+	unsigned int	ft_version;	/* perfmon: major [16-31], minor [0-15] */
+	unsigned int	ft_reserved;	/* reserved for future use */
+	unsigned long	reserved[4];	/* for future use */
+} pfarg_features_t;
+
+typedef struct {
+	pid_t		load_pid;	   /* process to load the context into */
+	unsigned short	load_set;	   /* first event set to load */
+	unsigned short	load_reserved1;	   /* for future use */
+	unsigned long	load_reserved2[3]; /* for future use */
+} pfarg_load_t;
+
+typedef struct {
+	int		msg_type;		/* generic message header */
+	int		msg_ctx_fd;		/* generic message header */
+	unsigned long	msg_ovfl_pmds[4];	/* which PMDs overflowed */
+	unsigned short  msg_active_set;		/* active set at the time of overflow */
+	unsigned short  msg_reserved1;		/* for future use */
+	unsigned int    msg_reserved2;		/* for future use */
+	unsigned long	msg_tstamp;		/* for perf tuning/debug */
+} pfm_ovfl_msg_t;
+
+typedef struct {
+	int		msg_type;		/* generic message header */
+	int		msg_ctx_fd;		/* generic message header */
+	unsigned long	msg_tstamp;		/* for perf tuning */
+} pfm_end_msg_t;
+
+typedef struct {
+	int		msg_type;		/* type of the message */
+	int		msg_ctx_fd;		/* unique identifier for the context */
+	unsigned long	msg_tstamp;		/* for perf tuning */
+} pfm_gen_msg_t;
+
+#define PFM_MSG_OVFL	1	/* an overflow happened */
+#define PFM_MSG_END	2	/* task to which context was attached ended */
+
+typedef union {
+	pfm_ovfl_msg_t	pfm_ovfl_msg;
+	pfm_end_msg_t	pfm_end_msg;
+	pfm_gen_msg_t	pfm_gen_msg;
+} pfm_msg_t;
+
+/*
+ * Define the version numbers for both perfmon as a whole and the sampling buffer format.
+ */
+#define PFM_VERSION_MAJ		 2U
+#define PFM_VERSION_MIN		 0U
+#define PFM_VERSION		 (((PFM_VERSION_MAJ&0xffff)<<16)|(PFM_VERSION_MIN & 0xffff))
+#define PFM_VERSION_MAJOR(x)	 (((x)>>16) & 0xffff)
+#define PFM_VERSION_MINOR(x)	 ((x) & 0xffff)
+
+
+/*
+ * miscellaneous architected definitions
+ */
+#define PMU_FIRST_COUNTER	4	/* first counting monitor (PMC/PMD) */
+#define PMU_MAX_PMCS		256	/* maximum architected number of PMC registers */
+#define PMU_MAX_PMDS		256	/* maximum architected number of PMD registers */
+
+
+#endif /* _UAPI_ASM_IA64_PERFMON_H */
diff --git a/arch/ia64/include/uapi/asm/perfmon_default_smpl.h b/arch/ia64/include/uapi/asm/perfmon_default_smpl.h
new file mode 100644
index 00000000000..a2d560c6723
--- /dev/null
+++ b/arch/ia64/include/uapi/asm/perfmon_default_smpl.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2002-2003 Hewlett-Packard Co
+ *               Stephane Eranian <eranian@hpl.hp.com>
+ *
+ * This file implements the default sampling buffer format
+ * for Linux/ia64 perfmon subsystem.
+ */
+#ifndef __PERFMON_DEFAULT_SMPL_H__
+#define __PERFMON_DEFAULT_SMPL_H__ 1
+
+#define PFM_DEFAULT_SMPL_UUID { \
+		0x4d, 0x72, 0xbe, 0xc0, 0x06, 0x64, 0x41, 0x43, 0x82, 0xb4, 0xd3, 0xfd, 0x27, 0x24, 0x3c, 0x97}
+
+/*
+ * format specific parameters (passed at context creation)
+ */
+typedef struct {
+	unsigned long buf_size;		/* size of the buffer in bytes */
+	unsigned int  flags;		/* buffer specific flags */
+	unsigned int  res1;		/* for future use */
+	unsigned long reserved[2];	/* for future use */
+} pfm_default_smpl_arg_t;
+
+/*
+ * combined context+format specific structure. Can be passed
+ * to PFM_CONTEXT_CREATE
+ */
+typedef struct {
+	pfarg_context_t		ctx_arg;
+	pfm_default_smpl_arg_t	buf_arg;
+} pfm_default_smpl_ctx_arg_t;
+
+/*
+ * This header is at the beginning of the sampling buffer returned to the user.
+ * It is directly followed by the first record.
+ */
+typedef struct {
+	unsigned long	hdr_count;		/* how many valid entries */
+	unsigned long	hdr_cur_offs;		/* current offset from top of buffer */
+	unsigned long	hdr_reserved2;		/* reserved for future use */
+
+	unsigned long	hdr_overflows;		/* how many times the buffer overflowed */
+	unsigned long   hdr_buf_size;		/* how many bytes in the buffer */
+
+	unsigned int	hdr_version;		/* contains perfmon version (smpl format diffs) */
+	unsigned int	hdr_reserved1;		/* for future use */
+	unsigned long	hdr_reserved[10];	/* for future use */
+} pfm_default_smpl_hdr_t;
+
+/*
+ * Entry header in the sampling buffer.  The header is directly followed
+ * with the values of the PMD registers of interest saved in increasing 
+ * index order: PMD4, PMD5, and so on. How many PMDs are present depends 
+ * on how the session was programmed.
+ *
+ * In the case where multiple counters overflow at the same time, multiple
+ * entries are written consecutively.
+ *
+ * last_reset_value member indicates the initial value of the overflowed PMD. 
+ */
+typedef struct {
+        int             pid;                    /* thread id (for NPTL, this is gettid()) */
+        unsigned char   reserved1[3];           /* reserved for future use */
+        unsigned char   ovfl_pmd;               /* index of overflowed PMD */
+
+        unsigned long   last_reset_val;         /* initial value of overflowed PMD */
+        unsigned long   ip;                     /* where did the overflow interrupt happened  */
+        unsigned long   tstamp;                 /* ar.itc when entering perfmon intr. handler */
+
+        unsigned short  cpu;                    /* cpu on which the overflow occurred */
+        unsigned short  set;                    /* event set active when overflow occurred   */
+        int    		tgid;              	/* thread group id (for NPTL, this is getpid()) */
+} pfm_default_smpl_entry_t;
+
+#define PFM_DEFAULT_MAX_PMDS		64 /* how many pmds supported by data structures (sizeof(unsigned long) */
+#define PFM_DEFAULT_MAX_ENTRY_SIZE	(sizeof(pfm_default_smpl_entry_t)+(sizeof(unsigned long)*PFM_DEFAULT_MAX_PMDS))
+#define PFM_DEFAULT_SMPL_MIN_BUF_SIZE	(sizeof(pfm_default_smpl_hdr_t)+PFM_DEFAULT_MAX_ENTRY_SIZE)
+
+#define PFM_DEFAULT_SMPL_VERSION_MAJ	2U
+#define PFM_DEFAULT_SMPL_VERSION_MIN	0U
+#define PFM_DEFAULT_SMPL_VERSION	(((PFM_DEFAULT_SMPL_VERSION_MAJ&0xffff)<<16)|(PFM_DEFAULT_SMPL_VERSION_MIN & 0xffff))
+
+#endif /* __PERFMON_DEFAULT_SMPL_H__ */
diff --git a/arch/ia64/include/uapi/asm/poll.h b/arch/ia64/include/uapi/asm/poll.h
new file mode 100644
index 00000000000..c98509d3149
--- /dev/null
+++ b/arch/ia64/include/uapi/asm/poll.h
@@ -0,0 +1 @@
+#include <asm-generic/poll.h>
diff --git a/arch/ia64/include/uapi/asm/posix_types.h b/arch/ia64/include/uapi/asm/posix_types.h
new file mode 100644
index 00000000000..99ee1d6510c
--- /dev/null
+++ b/arch/ia64/include/uapi/asm/posix_types.h
@@ -0,0 +1,8 @@
+#ifndef _ASM_IA64_POSIX_TYPES_H
+#define _ASM_IA64_POSIX_TYPES_H
+
+typedef unsigned long	__kernel_sigset_t;	/* at least 32 bits */
+
+#include <asm-generic/posix_types.h>
+
+#endif /* _ASM_IA64_POSIX_TYPES_H */
diff --git a/arch/ia64/include/uapi/asm/ptrace.h b/arch/ia64/include/uapi/asm/ptrace.h
new file mode 100644
index 00000000000..0a02f634e12
--- /dev/null
+++ b/arch/ia64/include/uapi/asm/ptrace.h
@@ -0,0 +1,247 @@
+/*
+ * Copyright (C) 1998-2004 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *	Stephane Eranian <eranian@hpl.hp.com>
+ * Copyright (C) 2003 Intel Co
+ *	Suresh Siddha <suresh.b.siddha@intel.com>
+ *	Fenghua Yu <fenghua.yu@intel.com>
+ *	Arun Sharma <arun.sharma@intel.com>
+ *
+ * 12/07/98	S. Eranian	added pt_regs & switch_stack
+ * 12/21/98	D. Mosberger	updated to match latest code
+ *  6/17/99	D. Mosberger	added second unat member to "struct switch_stack"
+ *
+ */
+#ifndef _UAPI_ASM_IA64_PTRACE_H
+#define _UAPI_ASM_IA64_PTRACE_H
+
+/*
+ * When a user process is blocked, its state looks as follows:
+ *
+ *            +----------------------+	-------	IA64_STK_OFFSET
+ *     	      |			     |	 ^
+ *            | struct pt_regs       |	 |
+ *	      |			     |	 |
+ *            +----------------------+	 |
+ *	      |			     |	 |
+ *     	      |	   memory stack	     |	 |
+ *	      |	(growing downwards)  |	 |
+ *	      //.....................//	 |
+ *					 |
+ *	      //.....................//	 |
+ *	      |			     |	 |
+ *            +----------------------+	 |
+ *            | struct switch_stack  |	 |
+ *	      |			     |	 |
+ *	      +----------------------+	 |
+ *	      |			     |	 |
+ *	      //.....................//	 |
+ *					 |
+ *	      //.....................//	 |
+ *	      |			     |	 |
+ *	      |	 register stack	     |	 |
+ *	      |	(growing upwards)    |	 |
+ *            |			     |	 |
+ *	      +----------------------+	 |  ---	IA64_RBS_OFFSET
+ *            |  struct thread_info  |	 |  ^
+ *	      +----------------------+	 |  |
+ *	      |			     |	 |  |
+ *            |  struct task_struct  |	 |  |
+ * current -> |			     |   |  |
+ *	      +----------------------+ -------
+ *
+ * Note that ar.ec is not saved explicitly in pt_reg or switch_stack.
+ * This is because ar.ec is saved as part of ar.pfs.
+ */
+
+
+#include <asm/fpu.h>
+
+
+#ifndef __ASSEMBLY__
+
+/*
+ * This struct defines the way the registers are saved on system
+ * calls.
+ *
+ * We don't save all floating point register because the kernel
+ * is compiled to use only a very small subset, so the other are
+ * untouched.
+ *
+ * THIS STRUCTURE MUST BE A MULTIPLE 16-BYTE IN SIZE
+ * (because the memory stack pointer MUST ALWAYS be aligned this way)
+ *
+ */
+struct pt_regs {
+	/* The following registers are saved by SAVE_MIN: */
+	unsigned long b6;		/* scratch */
+	unsigned long b7;		/* scratch */
+
+	unsigned long ar_csd;           /* used by cmp8xchg16 (scratch) */
+	unsigned long ar_ssd;           /* reserved for future use (scratch) */
+
+	unsigned long r8;		/* scratch (return value register 0) */
+	unsigned long r9;		/* scratch (return value register 1) */
+	unsigned long r10;		/* scratch (return value register 2) */
+	unsigned long r11;		/* scratch (return value register 3) */
+
+	unsigned long cr_ipsr;		/* interrupted task's psr */
+	unsigned long cr_iip;		/* interrupted task's instruction pointer */
+	/*
+	 * interrupted task's function state; if bit 63 is cleared, it
+	 * contains syscall's ar.pfs.pfm:
+	 */
+	unsigned long cr_ifs;
+
+	unsigned long ar_unat;		/* interrupted task's NaT register (preserved) */
+	unsigned long ar_pfs;		/* prev function state  */
+	unsigned long ar_rsc;		/* RSE configuration */
+	/* The following two are valid only if cr_ipsr.cpl > 0 || ti->flags & _TIF_MCA_INIT */
+	unsigned long ar_rnat;		/* RSE NaT */
+	unsigned long ar_bspstore;	/* RSE bspstore */
+
+	unsigned long pr;		/* 64 predicate registers (1 bit each) */
+	unsigned long b0;		/* return pointer (bp) */
+	unsigned long loadrs;		/* size of dirty partition << 16 */
+
+	unsigned long r1;		/* the gp pointer */
+	unsigned long r12;		/* interrupted task's memory stack pointer */
+	unsigned long r13;		/* thread pointer */
+
+	unsigned long ar_fpsr;		/* floating point status (preserved) */
+	unsigned long r15;		/* scratch */
+
+	/* The remaining registers are NOT saved for system calls.  */
+
+	unsigned long r14;		/* scratch */
+	unsigned long r2;		/* scratch */
+	unsigned long r3;		/* scratch */
+
+	/* The following registers are saved by SAVE_REST: */
+	unsigned long r16;		/* scratch */
+	unsigned long r17;		/* scratch */
+	unsigned long r18;		/* scratch */
+	unsigned long r19;		/* scratch */
+	unsigned long r20;		/* scratch */
+	unsigned long r21;		/* scratch */
+	unsigned long r22;		/* scratch */
+	unsigned long r23;		/* scratch */
+	unsigned long r24;		/* scratch */
+	unsigned long r25;		/* scratch */
+	unsigned long r26;		/* scratch */
+	unsigned long r27;		/* scratch */
+	unsigned long r28;		/* scratch */
+	unsigned long r29;		/* scratch */
+	unsigned long r30;		/* scratch */
+	unsigned long r31;		/* scratch */
+
+	unsigned long ar_ccv;		/* compare/exchange value (scratch) */
+
+	/*
+	 * Floating point registers that the kernel considers scratch:
+	 */
+	struct ia64_fpreg f6;		/* scratch */
+	struct ia64_fpreg f7;		/* scratch */
+	struct ia64_fpreg f8;		/* scratch */
+	struct ia64_fpreg f9;		/* scratch */
+	struct ia64_fpreg f10;		/* scratch */
+	struct ia64_fpreg f11;		/* scratch */
+};
+
+/*
+ * This structure contains the addition registers that need to
+ * preserved across a context switch.  This generally consists of
+ * "preserved" registers.
+ */
+struct switch_stack {
+	unsigned long caller_unat;	/* user NaT collection register (preserved) */
+	unsigned long ar_fpsr;		/* floating-point status register */
+
+	struct ia64_fpreg f2;		/* preserved */
+	struct ia64_fpreg f3;		/* preserved */
+	struct ia64_fpreg f4;		/* preserved */
+	struct ia64_fpreg f5;		/* preserved */
+
+	struct ia64_fpreg f12;		/* scratch, but untouched by kernel */
+	struct ia64_fpreg f13;		/* scratch, but untouched by kernel */
+	struct ia64_fpreg f14;		/* scratch, but untouched by kernel */
+	struct ia64_fpreg f15;		/* scratch, but untouched by kernel */
+	struct ia64_fpreg f16;		/* preserved */
+	struct ia64_fpreg f17;		/* preserved */
+	struct ia64_fpreg f18;		/* preserved */
+	struct ia64_fpreg f19;		/* preserved */
+	struct ia64_fpreg f20;		/* preserved */
+	struct ia64_fpreg f21;		/* preserved */
+	struct ia64_fpreg f22;		/* preserved */
+	struct ia64_fpreg f23;		/* preserved */
+	struct ia64_fpreg f24;		/* preserved */
+	struct ia64_fpreg f25;		/* preserved */
+	struct ia64_fpreg f26;		/* preserved */
+	struct ia64_fpreg f27;		/* preserved */
+	struct ia64_fpreg f28;		/* preserved */
+	struct ia64_fpreg f29;		/* preserved */
+	struct ia64_fpreg f30;		/* preserved */
+	struct ia64_fpreg f31;		/* preserved */
+
+	unsigned long r4;		/* preserved */
+	unsigned long r5;		/* preserved */
+	unsigned long r6;		/* preserved */
+	unsigned long r7;		/* preserved */
+
+	unsigned long b0;		/* so we can force a direct return in copy_thread */
+	unsigned long b1;
+	unsigned long b2;
+	unsigned long b3;
+	unsigned long b4;
+	unsigned long b5;
+
+	unsigned long ar_pfs;		/* previous function state */
+	unsigned long ar_lc;		/* loop counter (preserved) */
+	unsigned long ar_unat;		/* NaT bits for r4-r7 */
+	unsigned long ar_rnat;		/* RSE NaT collection register */
+	unsigned long ar_bspstore;	/* RSE dirty base (preserved) */
+	unsigned long pr;		/* 64 predicate registers (1 bit each) */
+};
+
+
+/* pt_all_user_regs is used for PTRACE_GETREGS PTRACE_SETREGS */
+struct pt_all_user_regs {
+	unsigned long nat;
+	unsigned long cr_iip;
+	unsigned long cfm;
+	unsigned long cr_ipsr;
+	unsigned long pr;
+
+	unsigned long gr[32];
+	unsigned long br[8];
+	unsigned long ar[128];
+	struct ia64_fpreg fr[128];
+};
+
+#endif /* !__ASSEMBLY__ */
+
+/* indices to application-registers array in pt_all_user_regs */
+#define PT_AUR_RSC	16
+#define PT_AUR_BSP	17
+#define PT_AUR_BSPSTORE	18
+#define PT_AUR_RNAT	19
+#define PT_AUR_CCV	32
+#define PT_AUR_UNAT	36
+#define PT_AUR_FPSR	40
+#define PT_AUR_PFS	64
+#define PT_AUR_LC	65
+#define PT_AUR_EC	66
+
+/*
+ * The numbers chosen here are somewhat arbitrary but absolutely MUST
+ * not overlap with any of the number assigned in <linux/ptrace.h>.
+ */
+#define PTRACE_SINGLEBLOCK	12	/* resume execution until next branch */
+#define PTRACE_OLD_GETSIGINFO	13	/* (replaced by PTRACE_GETSIGINFO in <linux/ptrace.h>)  */
+#define PTRACE_OLD_SETSIGINFO	14	/* (replaced by PTRACE_SETSIGINFO in <linux/ptrace.h>)  */
+#define PTRACE_GETREGS		18	/* get all registers (pt_all_user_regs) in one shot */
+#define PTRACE_SETREGS		19	/* set all registers (pt_all_user_regs) in one shot */
+
+#define PTRACE_OLDSETOPTIONS	21
+
+#endif /* _UAPI_ASM_IA64_PTRACE_H */
diff --git a/arch/ia64/include/uapi/asm/ptrace_offsets.h b/arch/ia64/include/uapi/asm/ptrace_offsets.h
new file mode 100644
index 00000000000..b712773c759
--- /dev/null
+++ b/arch/ia64/include/uapi/asm/ptrace_offsets.h
@@ -0,0 +1,268 @@
+#ifndef _ASM_IA64_PTRACE_OFFSETS_H
+#define _ASM_IA64_PTRACE_OFFSETS_H
+
+/*
+ * Copyright (C) 1999, 2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+/*
+ * The "uarea" that can be accessed via PEEKUSER and POKEUSER is a
+ * virtual structure that would have the following definition:
+ *
+ *	struct uarea {
+ *		struct ia64_fpreg fph[96];		// f32-f127
+ *		unsigned long nat_bits;
+ *		unsigned long empty1;
+ *		struct ia64_fpreg f2;			// f2-f5
+ *			:
+ *		struct ia64_fpreg f5;
+ *		struct ia64_fpreg f10;			// f10-f31
+ *			:
+ *		struct ia64_fpreg f31;
+ *		unsigned long r4;			// r4-r7
+ *			:
+ *		unsigned long r7;
+ *		unsigned long b1;			// b1-b5
+ *			:
+ *		unsigned long b5;
+ *		unsigned long ar_ec;
+ *		unsigned long ar_lc;
+ *		unsigned long empty2[5];
+ *		unsigned long cr_ipsr;
+ *		unsigned long cr_iip;
+ *		unsigned long cfm;
+ *		unsigned long ar_unat;
+ *		unsigned long ar_pfs;
+ *		unsigned long ar_rsc;
+ *		unsigned long ar_rnat;
+ *		unsigned long ar_bspstore;
+ *		unsigned long pr;
+ *		unsigned long b6;
+ *		unsigned long ar_bsp;
+ *		unsigned long r1;
+ *		unsigned long r2;
+ *		unsigned long r3;
+ *		unsigned long r12;
+ *		unsigned long r13;
+ *		unsigned long r14;
+ *		unsigned long r15;
+ *		unsigned long r8;
+ *		unsigned long r9;
+ *		unsigned long r10;
+ *		unsigned long r11;
+ *		unsigned long r16;
+ *			:
+ *		unsigned long r31;
+ *		unsigned long ar_ccv;
+ *		unsigned long ar_fpsr;
+ *		unsigned long b0;
+ *		unsigned long b7;
+ *		unsigned long f6;
+ *		unsigned long f7;
+ *		unsigned long f8;
+ *		unsigned long f9;
+ *		unsigned long ar_csd;
+ *		unsigned long ar_ssd;
+ *		unsigned long rsvd1[710];
+ *		unsigned long dbr[8];
+ *		unsigned long rsvd2[504];
+ *		unsigned long ibr[8];
+ *		unsigned long rsvd3[504];
+ *		unsigned long pmd[4];
+ *	}
+ */
+
+/* fph: */
+#define PT_F32			0x0000
+#define PT_F33			0x0010
+#define PT_F34			0x0020
+#define PT_F35			0x0030
+#define PT_F36			0x0040
+#define PT_F37			0x0050
+#define PT_F38			0x0060
+#define PT_F39			0x0070
+#define PT_F40			0x0080
+#define PT_F41			0x0090
+#define PT_F42			0x00a0
+#define PT_F43			0x00b0
+#define PT_F44			0x00c0
+#define PT_F45			0x00d0
+#define PT_F46			0x00e0
+#define PT_F47			0x00f0
+#define PT_F48			0x0100
+#define PT_F49			0x0110
+#define PT_F50			0x0120
+#define PT_F51			0x0130
+#define PT_F52			0x0140
+#define PT_F53			0x0150
+#define PT_F54			0x0160
+#define PT_F55			0x0170
+#define PT_F56			0x0180
+#define PT_F57			0x0190
+#define PT_F58			0x01a0
+#define PT_F59			0x01b0
+#define PT_F60			0x01c0
+#define PT_F61			0x01d0
+#define PT_F62			0x01e0
+#define PT_F63			0x01f0
+#define PT_F64			0x0200
+#define PT_F65			0x0210
+#define PT_F66			0x0220
+#define PT_F67			0x0230
+#define PT_F68			0x0240
+#define PT_F69			0x0250
+#define PT_F70			0x0260
+#define PT_F71			0x0270
+#define PT_F72			0x0280
+#define PT_F73			0x0290
+#define PT_F74			0x02a0
+#define PT_F75			0x02b0
+#define PT_F76			0x02c0
+#define PT_F77			0x02d0
+#define PT_F78			0x02e0
+#define PT_F79			0x02f0
+#define PT_F80			0x0300
+#define PT_F81			0x0310
+#define PT_F82			0x0320
+#define PT_F83			0x0330
+#define PT_F84			0x0340
+#define PT_F85			0x0350
+#define PT_F86			0x0360
+#define PT_F87			0x0370
+#define PT_F88			0x0380
+#define PT_F89			0x0390
+#define PT_F90			0x03a0
+#define PT_F91			0x03b0
+#define PT_F92			0x03c0
+#define PT_F93			0x03d0
+#define PT_F94			0x03e0
+#define PT_F95			0x03f0
+#define PT_F96			0x0400
+#define PT_F97			0x0410
+#define PT_F98			0x0420
+#define PT_F99			0x0430
+#define PT_F100			0x0440
+#define PT_F101			0x0450
+#define PT_F102			0x0460
+#define PT_F103			0x0470
+#define PT_F104			0x0480
+#define PT_F105			0x0490
+#define PT_F106			0x04a0
+#define PT_F107			0x04b0
+#define PT_F108			0x04c0
+#define PT_F109			0x04d0
+#define PT_F110			0x04e0
+#define PT_F111			0x04f0
+#define PT_F112			0x0500
+#define PT_F113			0x0510
+#define PT_F114			0x0520
+#define PT_F115			0x0530
+#define PT_F116			0x0540
+#define PT_F117			0x0550
+#define PT_F118			0x0560
+#define PT_F119			0x0570
+#define PT_F120			0x0580
+#define PT_F121			0x0590
+#define PT_F122			0x05a0
+#define PT_F123			0x05b0
+#define PT_F124			0x05c0
+#define PT_F125			0x05d0
+#define PT_F126			0x05e0
+#define PT_F127			0x05f0
+
+#define PT_NAT_BITS		0x0600
+
+#define PT_F2			0x0610
+#define PT_F3			0x0620
+#define PT_F4			0x0630
+#define PT_F5			0x0640
+#define PT_F10			0x0650
+#define PT_F11			0x0660
+#define PT_F12			0x0670
+#define PT_F13			0x0680
+#define PT_F14			0x0690
+#define PT_F15			0x06a0
+#define PT_F16			0x06b0
+#define PT_F17			0x06c0
+#define PT_F18			0x06d0
+#define PT_F19			0x06e0
+#define PT_F20			0x06f0
+#define PT_F21			0x0700
+#define PT_F22			0x0710
+#define PT_F23			0x0720
+#define PT_F24			0x0730
+#define PT_F25			0x0740
+#define PT_F26			0x0750
+#define PT_F27			0x0760
+#define PT_F28			0x0770
+#define PT_F29			0x0780
+#define PT_F30			0x0790
+#define PT_F31			0x07a0
+#define PT_R4			0x07b0
+#define PT_R5			0x07b8
+#define PT_R6			0x07c0
+#define PT_R7			0x07c8
+
+#define PT_B1			0x07d8
+#define PT_B2			0x07e0
+#define PT_B3			0x07e8
+#define PT_B4			0x07f0
+#define PT_B5			0x07f8
+
+#define PT_AR_EC		0x0800
+#define PT_AR_LC		0x0808
+
+#define PT_CR_IPSR		0x0830
+#define PT_CR_IIP		0x0838
+#define PT_CFM			0x0840
+#define PT_AR_UNAT		0x0848
+#define PT_AR_PFS		0x0850
+#define PT_AR_RSC		0x0858
+#define PT_AR_RNAT		0x0860
+#define PT_AR_BSPSTORE		0x0868
+#define PT_PR			0x0870
+#define PT_B6			0x0878
+#define PT_AR_BSP		0x0880	/* note: this points to the *end* of the backing store! */
+#define PT_R1			0x0888
+#define PT_R2			0x0890
+#define PT_R3			0x0898
+#define PT_R12			0x08a0
+#define PT_R13			0x08a8
+#define PT_R14			0x08b0
+#define PT_R15			0x08b8
+#define PT_R8 			0x08c0
+#define PT_R9			0x08c8
+#define PT_R10			0x08d0
+#define PT_R11			0x08d8
+#define PT_R16			0x08e0
+#define PT_R17			0x08e8
+#define PT_R18			0x08f0
+#define PT_R19			0x08f8
+#define PT_R20			0x0900
+#define PT_R21			0x0908
+#define PT_R22			0x0910
+#define PT_R23			0x0918
+#define PT_R24			0x0920
+#define PT_R25			0x0928
+#define PT_R26			0x0930
+#define PT_R27			0x0938
+#define PT_R28			0x0940
+#define PT_R29			0x0948
+#define PT_R30			0x0950
+#define PT_R31			0x0958
+#define PT_AR_CCV		0x0960
+#define PT_AR_FPSR		0x0968
+#define PT_B0			0x0970
+#define PT_B7			0x0978
+#define PT_F6			0x0980
+#define PT_F7			0x0990
+#define PT_F8			0x09a0
+#define PT_F9			0x09b0
+#define PT_AR_CSD		0x09c0
+#define PT_AR_SSD		0x09c8
+
+#define PT_DBR			0x2000	/* data breakpoint registers */
+#define PT_IBR			0x3000	/* instruction breakpoint registers */
+#define PT_PMD			0x4000	/* performance monitoring counters */
+
+#endif /* _ASM_IA64_PTRACE_OFFSETS_H */
diff --git a/arch/ia64/include/uapi/asm/resource.h b/arch/ia64/include/uapi/asm/resource.h
new file mode 100644
index 00000000000..ba2272a87fc
--- /dev/null
+++ b/arch/ia64/include/uapi/asm/resource.h
@@ -0,0 +1,7 @@
+#ifndef _ASM_IA64_RESOURCE_H
+#define _ASM_IA64_RESOURCE_H
+
+#include <asm/ustack.h>
+#include <asm-generic/resource.h>
+
+#endif /* _ASM_IA64_RESOURCE_H */
diff --git a/arch/ia64/include/uapi/asm/rse.h b/arch/ia64/include/uapi/asm/rse.h
new file mode 100644
index 00000000000..02830a3b019
--- /dev/null
+++ b/arch/ia64/include/uapi/asm/rse.h
@@ -0,0 +1,66 @@
+#ifndef _ASM_IA64_RSE_H
+#define _ASM_IA64_RSE_H
+
+/*
+ * Copyright (C) 1998, 1999 Hewlett-Packard Co
+ * Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * Register stack engine related helper functions.  This file may be
+ * used in applications, so be careful about the name-space and give
+ * some consideration to non-GNU C compilers (though __inline__ is
+ * fine).
+ */
+
+static __inline__ unsigned long
+ia64_rse_slot_num (unsigned long *addr)
+{
+	return (((unsigned long) addr) >> 3) & 0x3f;
+}
+
+/*
+ * Return TRUE if ADDR is the address of an RNAT slot.
+ */
+static __inline__ unsigned long
+ia64_rse_is_rnat_slot (unsigned long *addr)
+{
+	return ia64_rse_slot_num(addr) == 0x3f;
+}
+
+/*
+ * Returns the address of the RNAT slot that covers the slot at
+ * address SLOT_ADDR.
+ */
+static __inline__ unsigned long *
+ia64_rse_rnat_addr (unsigned long *slot_addr)
+{
+	return (unsigned long *) ((unsigned long) slot_addr | (0x3f << 3));
+}
+
+/*
+ * Calculate the number of registers in the dirty partition starting at BSPSTORE and
+ * ending at BSP.  This isn't simply (BSP-BSPSTORE)/8 because every 64th slot stores
+ * ar.rnat.
+ */
+static __inline__ unsigned long
+ia64_rse_num_regs (unsigned long *bspstore, unsigned long *bsp)
+{
+	unsigned long slots = (bsp - bspstore);
+
+	return slots - (ia64_rse_slot_num(bspstore) + slots)/0x40;
+}
+
+/*
+ * The inverse of the above: given bspstore and the number of
+ * registers, calculate ar.bsp.
+ */
+static __inline__ unsigned long *
+ia64_rse_skip_regs (unsigned long *addr, long num_regs)
+{
+	long delta = ia64_rse_slot_num(addr) + num_regs;
+
+	if (num_regs < 0)
+		delta -= 0x3e;
+	return addr + num_regs + delta/0x3f;
+}
+
+#endif /* _ASM_IA64_RSE_H */
diff --git a/arch/ia64/include/uapi/asm/sembuf.h b/arch/ia64/include/uapi/asm/sembuf.h
new file mode 100644
index 00000000000..1340fbc04d3
--- /dev/null
+++ b/arch/ia64/include/uapi/asm/sembuf.h
@@ -0,0 +1,22 @@
+#ifndef _ASM_IA64_SEMBUF_H
+#define _ASM_IA64_SEMBUF_H
+
+/*
+ * The semid64_ds structure for IA-64 architecture.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * Pad space is left for:
+ * - 2 miscellaneous 64-bit values
+ */
+
+struct semid64_ds {
+	struct ipc64_perm sem_perm;		/* permissions .. see ipc.h */
+	__kernel_time_t	sem_otime;		/* last semop time */
+	__kernel_time_t	sem_ctime;		/* last change time */
+	unsigned long	sem_nsems;		/* no. of semaphores in array */
+	unsigned long	__unused1;
+	unsigned long	__unused2;
+};
+
+#endif /* _ASM_IA64_SEMBUF_H */
diff --git a/arch/ia64/include/uapi/asm/setup.h b/arch/ia64/include/uapi/asm/setup.h
new file mode 100644
index 00000000000..8d56458310b
--- /dev/null
+++ b/arch/ia64/include/uapi/asm/setup.h
@@ -0,0 +1,24 @@
+#ifndef __IA64_SETUP_H
+#define __IA64_SETUP_H
+
+#define COMMAND_LINE_SIZE	2048
+
+extern struct ia64_boot_param {
+	__u64 command_line;		/* physical address of command line arguments */
+	__u64 efi_systab;		/* physical address of EFI system table */
+	__u64 efi_memmap;		/* physical address of EFI memory map */
+	__u64 efi_memmap_size;		/* size of EFI memory map */
+	__u64 efi_memdesc_size;		/* size of an EFI memory map descriptor */
+	__u32 efi_memdesc_version;	/* memory descriptor version */
+	struct {
+		__u16 num_cols;	/* number of columns on console output device */
+		__u16 num_rows;	/* number of rows on console output device */
+		__u16 orig_x;	/* cursor's x position */
+		__u16 orig_y;	/* cursor's y position */
+	} console_info;
+	__u64 fpswa;		/* physical address of the fpswa interface */
+	__u64 initrd_start;
+	__u64 initrd_size;
+} *ia64_boot_param;
+
+#endif
diff --git a/arch/ia64/include/uapi/asm/shmbuf.h b/arch/ia64/include/uapi/asm/shmbuf.h
new file mode 100644
index 00000000000..585002a77ac
--- /dev/null
+++ b/arch/ia64/include/uapi/asm/shmbuf.h
@@ -0,0 +1,38 @@
+#ifndef _ASM_IA64_SHMBUF_H
+#define _ASM_IA64_SHMBUF_H
+
+/*
+ * The shmid64_ds structure for IA-64 architecture.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * Pad space is left for:
+ * - 2 miscellaneous 64-bit values
+ */
+
+struct shmid64_ds {
+	struct ipc64_perm	shm_perm;	/* operation perms */
+	size_t			shm_segsz;	/* size of segment (bytes) */
+	__kernel_time_t		shm_atime;	/* last attach time */
+	__kernel_time_t		shm_dtime;	/* last detach time */
+	__kernel_time_t		shm_ctime;	/* last change time */
+	__kernel_pid_t		shm_cpid;	/* pid of creator */
+	__kernel_pid_t		shm_lpid;	/* pid of last operator */
+	unsigned long		shm_nattch;	/* no. of current attaches */
+	unsigned long		__unused1;
+	unsigned long		__unused2;
+};
+
+struct shminfo64 {
+	unsigned long	shmmax;
+	unsigned long	shmmin;
+	unsigned long	shmmni;
+	unsigned long	shmseg;
+	unsigned long	shmall;
+	unsigned long	__unused1;
+	unsigned long	__unused2;
+	unsigned long	__unused3;
+	unsigned long	__unused4;
+};
+
+#endif /* _ASM_IA64_SHMBUF_H */
diff --git a/arch/ia64/include/uapi/asm/sigcontext.h b/arch/ia64/include/uapi/asm/sigcontext.h
new file mode 100644
index 00000000000..57ff777bcc4
--- /dev/null
+++ b/arch/ia64/include/uapi/asm/sigcontext.h
@@ -0,0 +1,70 @@
+#ifndef _ASM_IA64_SIGCONTEXT_H
+#define _ASM_IA64_SIGCONTEXT_H
+
+/*
+ * Copyright (C) 1998, 1999, 2001 Hewlett-Packard Co
+ * Copyright (C) 1998, 1999, 2001 David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+#include <asm/fpu.h>
+
+#define IA64_SC_FLAG_ONSTACK_BIT		0	/* is handler running on signal stack? */
+#define IA64_SC_FLAG_IN_SYSCALL_BIT		1	/* did signal interrupt a syscall? */
+#define IA64_SC_FLAG_FPH_VALID_BIT		2	/* is state in f[32]-f[127] valid? */
+
+#define IA64_SC_FLAG_ONSTACK		(1 << IA64_SC_FLAG_ONSTACK_BIT)
+#define IA64_SC_FLAG_IN_SYSCALL		(1 << IA64_SC_FLAG_IN_SYSCALL_BIT)
+#define IA64_SC_FLAG_FPH_VALID		(1 << IA64_SC_FLAG_FPH_VALID_BIT)
+
+# ifndef __ASSEMBLY__
+
+/*
+ * Note on handling of register backing store: sc_ar_bsp contains the address that would
+ * be found in ar.bsp after executing a "cover" instruction the context in which the
+ * signal was raised.  If signal delivery required switching to an alternate signal stack
+ * (sc_rbs_base is not NULL), the "dirty" partition (as it would exist after executing the
+ * imaginary "cover" instruction) is backed by the *alternate* signal stack, not the
+ * original one.  In this case, sc_rbs_base contains the base address of the new register
+ * backing store.  The number of registers in the dirty partition can be calculated as:
+ *
+ *   ndirty = ia64_rse_num_regs(sc_rbs_base, sc_rbs_base + (sc_loadrs >> 16))
+ *
+ */
+
+struct sigcontext {
+	unsigned long		sc_flags;	/* see manifest constants above */
+	unsigned long		sc_nat;		/* bit i == 1 iff scratch reg gr[i] is a NaT */
+	stack_t			sc_stack;	/* previously active stack */
+
+	unsigned long		sc_ip;		/* instruction pointer */
+	unsigned long		sc_cfm;		/* current frame marker */
+	unsigned long		sc_um;		/* user mask bits */
+	unsigned long		sc_ar_rsc;	/* register stack configuration register */
+	unsigned long		sc_ar_bsp;	/* backing store pointer */
+	unsigned long		sc_ar_rnat;	/* RSE NaT collection register */
+	unsigned long		sc_ar_ccv;	/* compare and exchange compare value register */
+	unsigned long		sc_ar_unat;	/* ar.unat of interrupted context */
+	unsigned long		sc_ar_fpsr;	/* floating-point status register */
+	unsigned long		sc_ar_pfs;	/* previous function state */
+	unsigned long		sc_ar_lc;	/* loop count register */
+	unsigned long		sc_pr;		/* predicate registers */
+	unsigned long		sc_br[8];	/* branch registers */
+	/* Note: sc_gr[0] is used as the "uc_link" member of ucontext_t */
+	unsigned long		sc_gr[32];	/* general registers (static partition) */
+	struct ia64_fpreg	sc_fr[128];	/* floating-point registers */
+
+	unsigned long		sc_rbs_base;	/* NULL or new base of sighandler's rbs */
+	unsigned long		sc_loadrs;	/* see description above */
+
+	unsigned long		sc_ar25;	/* cmp8xchg16 uses this */
+	unsigned long		sc_ar26;	/* rsvd for scratch use */
+	unsigned long		sc_rsvd[12];	/* reserved for future use */
+	/*
+	 * The mask must come last so we can increase _NSIG_WORDS
+	 * without breaking binary compatibility.
+	 */
+	sigset_t		sc_mask;	/* signal mask to restore after handler returns */
+};
+
+# endif /* __ASSEMBLY__ */
+#endif /* _ASM_IA64_SIGCONTEXT_H */
diff --git a/arch/ia64/include/uapi/asm/siginfo.h b/arch/ia64/include/uapi/asm/siginfo.h
new file mode 100644
index 00000000000..4ea6225196b
--- /dev/null
+++ b/arch/ia64/include/uapi/asm/siginfo.h
@@ -0,0 +1,121 @@
+/*
+ * Based on <asm-i386/siginfo.h>.
+ *
+ * Modified 1998-2002
+ *	David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co
+ */
+#ifndef _UAPI_ASM_IA64_SIGINFO_H
+#define _UAPI_ASM_IA64_SIGINFO_H
+
+
+#define __ARCH_SI_PREAMBLE_SIZE	(4 * sizeof(int))
+
+#define HAVE_ARCH_SIGINFO_T
+#define HAVE_ARCH_COPY_SIGINFO
+#define HAVE_ARCH_COPY_SIGINFO_TO_USER
+
+#include <asm-generic/siginfo.h>
+
+typedef struct siginfo {
+	int si_signo;
+	int si_errno;
+	int si_code;
+	int __pad0;
+
+	union {
+		int _pad[SI_PAD_SIZE];
+
+		/* kill() */
+		struct {
+			pid_t _pid;		/* sender's pid */
+			uid_t _uid;		/* sender's uid */
+		} _kill;
+
+		/* POSIX.1b timers */
+		struct {
+			timer_t _tid;		/* timer id */
+			int _overrun;		/* overrun count */
+			char _pad[sizeof(__ARCH_SI_UID_T) - sizeof(int)];
+			sigval_t _sigval;	/* must overlay ._rt._sigval! */
+			int _sys_private;	/* not to be passed to user */
+		} _timer;
+
+		/* POSIX.1b signals */
+		struct {
+			pid_t _pid;		/* sender's pid */
+			uid_t _uid;		/* sender's uid */
+			sigval_t _sigval;
+		} _rt;
+
+		/* SIGCHLD */
+		struct {
+			pid_t _pid;		/* which child */
+			uid_t _uid;		/* sender's uid */
+			int _status;		/* exit code */
+			clock_t _utime;
+			clock_t _stime;
+		} _sigchld;
+
+		/* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
+		struct {
+			void __user *_addr;	/* faulting insn/memory ref. */
+			int _imm;		/* immediate value for "break" */
+			unsigned int _flags;	/* see below */
+			unsigned long _isr;	/* isr */
+			short _addr_lsb;	/* lsb of faulting address */
+		} _sigfault;
+
+		/* SIGPOLL */
+		struct {
+			long _band;	/* POLL_IN, POLL_OUT, POLL_MSG (XPG requires a "long") */
+			int _fd;
+		} _sigpoll;
+	} _sifields;
+} siginfo_t;
+
+#define si_imm		_sifields._sigfault._imm	/* as per UNIX SysV ABI spec */
+#define si_flags	_sifields._sigfault._flags
+/*
+ * si_isr is valid for SIGILL, SIGFPE, SIGSEGV, SIGBUS, and SIGTRAP provided that
+ * si_code is non-zero and __ISR_VALID is set in si_flags.
+ */
+#define si_isr		_sifields._sigfault._isr
+
+/*
+ * Flag values for si_flags:
+ */
+#define __ISR_VALID_BIT	0
+#define __ISR_VALID	(1 << __ISR_VALID_BIT)
+
+/*
+ * SIGILL si_codes
+ */
+#define ILL_BADIADDR	(__SI_FAULT|9)	/* unimplemented instruction address */
+#define __ILL_BREAK	(__SI_FAULT|10)	/* illegal break */
+#define __ILL_BNDMOD	(__SI_FAULT|11)	/* bundle-update (modification) in progress */
+#undef NSIGILL
+#define NSIGILL		11
+
+/*
+ * SIGFPE si_codes
+ */
+#define __FPE_DECOVF	(__SI_FAULT|9)	/* decimal overflow */
+#define __FPE_DECDIV	(__SI_FAULT|10)	/* decimal division by zero */
+#define __FPE_DECERR	(__SI_FAULT|11)	/* packed decimal error */
+#define __FPE_INVASC	(__SI_FAULT|12)	/* invalid ASCII digit */
+#define __FPE_INVDEC	(__SI_FAULT|13)	/* invalid decimal digit */
+#undef NSIGFPE
+#define NSIGFPE		13
+
+/*
+ * SIGSEGV si_codes
+ */
+#define __SEGV_PSTKOVF	(__SI_FAULT|3)	/* paragraph stack overflow */
+#undef NSIGSEGV
+#define NSIGSEGV	3
+
+#undef NSIGTRAP
+#define NSIGTRAP	4
+
+
+#endif /* _UAPI_ASM_IA64_SIGINFO_H */
diff --git a/arch/ia64/include/uapi/asm/signal.h b/arch/ia64/include/uapi/asm/signal.h
new file mode 100644
index 00000000000..c0ea2855e96
--- /dev/null
+++ b/arch/ia64/include/uapi/asm/signal.h
@@ -0,0 +1,121 @@
+/*
+ * Modified 1998-2001, 2003
+ *	David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co
+ *
+ * Unfortunately, this file is being included by bits/signal.h in
+ * glibc-2.x.  Hence the #ifdef __KERNEL__ ugliness.
+ */
+#ifndef _UAPI_ASM_IA64_SIGNAL_H
+#define _UAPI_ASM_IA64_SIGNAL_H
+
+
+#define SIGHUP		 1
+#define SIGINT		 2
+#define SIGQUIT		 3
+#define SIGILL		 4
+#define SIGTRAP		 5
+#define SIGABRT		 6
+#define SIGIOT		 6
+#define SIGBUS		 7
+#define SIGFPE		 8
+#define SIGKILL		 9
+#define SIGUSR1		10
+#define SIGSEGV		11
+#define SIGUSR2		12
+#define SIGPIPE		13
+#define SIGALRM		14
+#define SIGTERM		15
+#define SIGSTKFLT	16
+#define SIGCHLD		17
+#define SIGCONT		18
+#define SIGSTOP		19
+#define SIGTSTP		20
+#define SIGTTIN		21
+#define SIGTTOU		22
+#define SIGURG		23
+#define SIGXCPU		24
+#define SIGXFSZ		25
+#define SIGVTALRM	26
+#define SIGPROF		27
+#define SIGWINCH	28
+#define SIGIO		29
+#define SIGPOLL		SIGIO
+/*
+#define SIGLOST		29
+*/
+#define SIGPWR		30
+#define SIGSYS		31
+/* signal 31 is no longer "unused", but the SIGUNUSED macro remains for backwards compatibility */
+#define	SIGUNUSED	31
+
+/* These should not be considered constants from userland.  */
+#define SIGRTMIN	32
+#define SIGRTMAX	_NSIG
+
+/*
+ * SA_FLAGS values:
+ *
+ * SA_ONSTACK indicates that a registered stack_t will be used.
+ * SA_RESTART flag to get restarting signals (which were the default long ago)
+ * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
+ * SA_RESETHAND clears the handler when the signal is delivered.
+ * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
+ * SA_NODEFER prevents the current signal from being masked in the handler.
+ *
+ * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
+ * Unix names RESETHAND and NODEFER respectively.
+ */
+#define SA_NOCLDSTOP	0x00000001
+#define SA_NOCLDWAIT	0x00000002
+#define SA_SIGINFO	0x00000004
+#define SA_ONSTACK	0x08000000
+#define SA_RESTART	0x10000000
+#define SA_NODEFER	0x40000000
+#define SA_RESETHAND	0x80000000
+
+#define SA_NOMASK	SA_NODEFER
+#define SA_ONESHOT	SA_RESETHAND
+
+#define SA_RESTORER	0x04000000
+
+/*
+ * The minimum stack size needs to be fairly large because we want to
+ * be sure that an app compiled for today's CPUs will continue to run
+ * on all future CPU models.  The CPU model matters because the signal
+ * frame needs to have space for the complete machine state, including
+ * all physical stacked registers.  The number of physical stacked
+ * registers is CPU model dependent, but given that the width of
+ * ar.rsc.loadrs is 14 bits, we can assume that they'll never take up
+ * more than 16KB of space.
+ */
+#if 1
+  /*
+   * This is a stupid typo: the value was _meant_ to be 131072 (0x20000), but I typed it
+   * in wrong. ;-(  To preserve backwards compatibility, we leave the kernel at the
+   * incorrect value and fix libc only.
+   */
+# define MINSIGSTKSZ	131027	/* min. stack size for sigaltstack() */
+#else
+# define MINSIGSTKSZ	131072	/* min. stack size for sigaltstack() */
+#endif
+#define SIGSTKSZ	262144	/* default stack size for sigaltstack() */
+
+
+#include <asm-generic/signal-defs.h>
+
+# ifndef __ASSEMBLY__
+
+#  include <linux/types.h>
+
+/* Avoid too many header ordering problems.  */
+struct siginfo;
+
+typedef struct sigaltstack {
+	void __user *ss_sp;
+	int ss_flags;
+	size_t ss_size;
+} stack_t;
+
+
+# endif /* !__ASSEMBLY__ */
+#endif /* _UAPI_ASM_IA64_SIGNAL_H */
diff --git a/arch/ia64/include/uapi/asm/socket.h b/arch/ia64/include/uapi/asm/socket.h
new file mode 100644
index 00000000000..a1b49bac795
--- /dev/null
+++ b/arch/ia64/include/uapi/asm/socket.h
@@ -0,0 +1,92 @@
+#ifndef _ASM_IA64_SOCKET_H
+#define _ASM_IA64_SOCKET_H
+
+/*
+ * Socket related defines.
+ *
+ * Based on <asm-i386/socket.h>.
+ *
+ * Modified 1998-2000
+ *	David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co
+ */
+
+#include <asm/sockios.h>
+
+/* For setsockopt(2) */
+#define SOL_SOCKET	1
+
+#define SO_DEBUG	1
+#define SO_REUSEADDR	2
+#define SO_TYPE		3
+#define SO_ERROR	4
+#define SO_DONTROUTE	5
+#define SO_BROADCAST	6
+#define SO_SNDBUF	7
+#define SO_RCVBUF	8
+#define SO_SNDBUFFORCE	32
+#define SO_RCVBUFFORCE	33
+#define SO_KEEPALIVE	9
+#define SO_OOBINLINE	10
+#define SO_NO_CHECK	11
+#define SO_PRIORITY	12
+#define SO_LINGER	13
+#define SO_BSDCOMPAT	14
+#define SO_REUSEPORT	15
+#define SO_PASSCRED	16
+#define SO_PEERCRED	17
+#define SO_RCVLOWAT	18
+#define SO_SNDLOWAT	19
+#define SO_RCVTIMEO	20
+#define SO_SNDTIMEO	21
+
+/* Security levels - as per NRL IPv6 - don't actually do anything */
+#define SO_SECURITY_AUTHENTICATION		22
+#define SO_SECURITY_ENCRYPTION_TRANSPORT	23
+#define SO_SECURITY_ENCRYPTION_NETWORK		24
+
+#define SO_BINDTODEVICE		25
+
+/* Socket filtering */
+#define SO_ATTACH_FILTER	26
+#define SO_DETACH_FILTER	27
+#define SO_GET_FILTER		SO_ATTACH_FILTER
+
+#define SO_PEERNAME		28
+#define SO_TIMESTAMP		29
+#define SCM_TIMESTAMP		SO_TIMESTAMP
+
+#define SO_ACCEPTCONN		30
+
+#define SO_PEERSEC             31
+#define SO_PASSSEC		34
+#define SO_TIMESTAMPNS		35
+#define SCM_TIMESTAMPNS		SO_TIMESTAMPNS
+
+#define SO_MARK			36
+
+#define SO_TIMESTAMPING		37
+#define SCM_TIMESTAMPING	SO_TIMESTAMPING
+
+#define SO_PROTOCOL		38
+#define SO_DOMAIN		39
+
+#define SO_RXQ_OVFL             40
+
+#define SO_WIFI_STATUS		41
+#define SCM_WIFI_STATUS		SO_WIFI_STATUS
+#define SO_PEEK_OFF		42
+
+/* Instruct lower device to use last 4-bytes of skb data as FCS */
+#define SO_NOFCS		43
+
+#define SO_LOCK_FILTER		44
+
+#define SO_SELECT_ERR_QUEUE	45
+
+#define SO_BUSY_POLL		46
+
+#define SO_MAX_PACING_RATE	47
+
+#define SO_BPF_EXTENSIONS	48
+
+#endif /* _ASM_IA64_SOCKET_H */
diff --git a/arch/ia64/include/uapi/asm/sockios.h b/arch/ia64/include/uapi/asm/sockios.h
new file mode 100644
index 00000000000..15c92468ad3
--- /dev/null
+++ b/arch/ia64/include/uapi/asm/sockios.h
@@ -0,0 +1,20 @@
+#ifndef _ASM_IA64_SOCKIOS_H
+#define _ASM_IA64_SOCKIOS_H
+
+/*
+ * Socket-level I/O control calls.
+ *
+ * Based on <asm-i386/sockios.h>.
+ *
+ * Modified 1998, 1999
+ *	David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co
+ */
+#define FIOSETOWN 	0x8901
+#define SIOCSPGRP	0x8902
+#define FIOGETOWN	0x8903
+#define SIOCGPGRP	0x8904
+#define SIOCATMARK	0x8905
+#define SIOCGSTAMP	0x8906		/* Get stamp (timeval) */
+#define SIOCGSTAMPNS	0x8907		/* Get stamp (timespec) */
+
+#endif /* _ASM_IA64_SOCKIOS_H */
diff --git a/arch/ia64/include/uapi/asm/stat.h b/arch/ia64/include/uapi/asm/stat.h
new file mode 100644
index 00000000000..367bb90cdff
--- /dev/null
+++ b/arch/ia64/include/uapi/asm/stat.h
@@ -0,0 +1,51 @@
+#ifndef _ASM_IA64_STAT_H
+#define _ASM_IA64_STAT_H
+
+/*
+ * Modified 1998, 1999
+ *	David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co
+ */
+
+struct stat {
+	unsigned long	st_dev;
+	unsigned long	st_ino;
+	unsigned long	st_nlink;
+	unsigned int	st_mode;
+	unsigned int	st_uid;
+	unsigned int	st_gid;
+	unsigned int	__pad0;
+	unsigned long	st_rdev;
+	unsigned long	st_size;
+	unsigned long	st_atime;
+	unsigned long	st_atime_nsec;
+	unsigned long	st_mtime;
+	unsigned long	st_mtime_nsec;
+	unsigned long	st_ctime;
+	unsigned long	st_ctime_nsec;
+	unsigned long	st_blksize;
+	long		st_blocks;
+	unsigned long	__unused[3];
+};
+
+#define STAT_HAVE_NSEC 1
+
+struct ia64_oldstat {
+	unsigned int	st_dev;
+	unsigned int	st_ino;
+	unsigned int	st_mode;
+	unsigned int	st_nlink;
+	unsigned int	st_uid;
+	unsigned int	st_gid;
+	unsigned int	st_rdev;
+	unsigned int	__pad1;
+	unsigned long	st_size;
+	unsigned long	st_atime;
+	unsigned long	st_mtime;
+	unsigned long	st_ctime;
+	unsigned int	st_blksize;
+	int		st_blocks;
+	unsigned int	__unused1;
+	unsigned int	__unused2;
+};
+
+#endif /* _ASM_IA64_STAT_H */
diff --git a/arch/ia64/include/uapi/asm/statfs.h b/arch/ia64/include/uapi/asm/statfs.h
new file mode 100644
index 00000000000..1e589669de5
--- /dev/null
+++ b/arch/ia64/include/uapi/asm/statfs.h
@@ -0,0 +1,20 @@
+#ifndef _ASM_IA64_STATFS_H
+#define _ASM_IA64_STATFS_H
+
+/*
+ * Based on <asm-i386/statfs.h>.
+ *
+ * Modified 1998, 1999, 2003
+ *	David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co
+ */
+
+/*
+ * We need compat_statfs64 to be packed, because the i386 ABI won't
+ * add padding at the end to bring it to a multiple of 8 bytes, but
+ * the IA64 ABI will.
+ */
+#define ARCH_PACK_COMPAT_STATFS64 __attribute__((packed,aligned(4)))
+
+#include <asm-generic/statfs.h>
+
+#endif /* _ASM_IA64_STATFS_H */
diff --git a/arch/ia64/include/uapi/asm/swab.h b/arch/ia64/include/uapi/asm/swab.h
new file mode 100644
index 00000000000..c89a8cb5d8a
--- /dev/null
+++ b/arch/ia64/include/uapi/asm/swab.h
@@ -0,0 +1,34 @@
+#ifndef _ASM_IA64_SWAB_H
+#define _ASM_IA64_SWAB_H
+
+/*
+ * Modified 1998, 1999
+ *	David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co.
+ */
+
+#include <linux/types.h>
+#include <asm/intrinsics.h>
+#include <linux/compiler.h>
+
+static __inline__ __attribute_const__ __u64 __arch_swab64(__u64 x)
+{
+	__u64 result;
+
+	result = ia64_mux1(x, ia64_mux1_rev);
+	return result;
+}
+#define __arch_swab64 __arch_swab64
+
+static __inline__ __attribute_const__ __u32 __arch_swab32(__u32 x)
+{
+	return __arch_swab64(x) >> 32;
+}
+#define __arch_swab32 __arch_swab32
+
+static __inline__ __attribute_const__ __u16 __arch_swab16(__u16 x)
+{
+	return __arch_swab64(x) >> 48;
+}
+#define __arch_swab16 __arch_swab16
+
+#endif /* _ASM_IA64_SWAB_H */
diff --git a/arch/ia64/include/uapi/asm/termbits.h b/arch/ia64/include/uapi/asm/termbits.h
new file mode 100644
index 00000000000..c009b94e58d
--- /dev/null
+++ b/arch/ia64/include/uapi/asm/termbits.h
@@ -0,0 +1,208 @@
+#ifndef _ASM_IA64_TERMBITS_H
+#define _ASM_IA64_TERMBITS_H
+
+/*
+ * Based on <asm-i386/termbits.h>.
+ *
+ * Modified 1999
+ *	David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co
+ *
+ * 99/01/28	Added new baudrates
+ */
+
+#include <linux/posix_types.h>
+
+typedef unsigned char	cc_t;
+typedef unsigned int	speed_t;
+typedef unsigned int	tcflag_t;
+
+#define NCCS 19
+struct termios {
+	tcflag_t c_iflag;		/* input mode flags */
+	tcflag_t c_oflag;		/* output mode flags */
+	tcflag_t c_cflag;		/* control mode flags */
+	tcflag_t c_lflag;		/* local mode flags */
+	cc_t c_line;			/* line discipline */
+	cc_t c_cc[NCCS];		/* control characters */
+};
+
+struct termios2 {
+	tcflag_t c_iflag;		/* input mode flags */
+	tcflag_t c_oflag;		/* output mode flags */
+	tcflag_t c_cflag;		/* control mode flags */
+	tcflag_t c_lflag;		/* local mode flags */
+	cc_t c_line;			/* line discipline */
+	cc_t c_cc[NCCS];		/* control characters */
+	speed_t c_ispeed;		/* input speed */
+	speed_t c_ospeed;		/* output speed */
+};
+
+struct ktermios {
+	tcflag_t c_iflag;		/* input mode flags */
+	tcflag_t c_oflag;		/* output mode flags */
+	tcflag_t c_cflag;		/* control mode flags */
+	tcflag_t c_lflag;		/* local mode flags */
+	cc_t c_line;			/* line discipline */
+	cc_t c_cc[NCCS];		/* control characters */
+	speed_t c_ispeed;		/* input speed */
+	speed_t c_ospeed;		/* output speed */
+};
+
+/* c_cc characters */
+#define VINTR 0
+#define VQUIT 1
+#define VERASE 2
+#define VKILL 3
+#define VEOF 4
+#define VTIME 5
+#define VMIN 6
+#define VSWTC 7
+#define VSTART 8
+#define VSTOP 9
+#define VSUSP 10
+#define VEOL 11
+#define VREPRINT 12
+#define VDISCARD 13
+#define VWERASE 14
+#define VLNEXT 15
+#define VEOL2 16
+
+/* c_iflag bits */
+#define IGNBRK	0000001
+#define BRKINT	0000002
+#define IGNPAR	0000004
+#define PARMRK	0000010
+#define INPCK	0000020
+#define ISTRIP	0000040
+#define INLCR	0000100
+#define IGNCR	0000200
+#define ICRNL	0000400
+#define IUCLC	0001000
+#define IXON	0002000
+#define IXANY	0004000
+#define IXOFF	0010000
+#define IMAXBEL	0020000
+#define IUTF8	0040000
+
+/* c_oflag bits */
+#define OPOST	0000001
+#define OLCUC	0000002
+#define ONLCR	0000004
+#define OCRNL	0000010
+#define ONOCR	0000020
+#define ONLRET	0000040
+#define OFILL	0000100
+#define OFDEL	0000200
+#define NLDLY	0000400
+#define   NL0	0000000
+#define   NL1	0000400
+#define CRDLY	0003000
+#define   CR0	0000000
+#define   CR1	0001000
+#define   CR2	0002000
+#define   CR3	0003000
+#define TABDLY	0014000
+#define   TAB0	0000000
+#define   TAB1	0004000
+#define   TAB2	0010000
+#define   TAB3	0014000
+#define   XTABS	0014000
+#define BSDLY	0020000
+#define   BS0	0000000
+#define   BS1	0020000
+#define VTDLY	0040000
+#define   VT0	0000000
+#define   VT1	0040000
+#define FFDLY	0100000
+#define   FF0	0000000
+#define   FF1	0100000
+
+/* c_cflag bit meaning */
+#define CBAUD	0010017
+#define  B0	0000000		/* hang up */
+#define  B50	0000001
+#define  B75	0000002
+#define  B110	0000003
+#define  B134	0000004
+#define  B150	0000005
+#define  B200	0000006
+#define  B300	0000007
+#define  B600	0000010
+#define  B1200	0000011
+#define  B1800	0000012
+#define  B2400	0000013
+#define  B4800	0000014
+#define  B9600	0000015
+#define  B19200	0000016
+#define  B38400	0000017
+#define EXTA B19200
+#define EXTB B38400
+#define CSIZE	0000060
+#define   CS5	0000000
+#define   CS6	0000020
+#define   CS7	0000040
+#define   CS8	0000060
+#define CSTOPB	0000100
+#define CREAD	0000200
+#define PARENB	0000400
+#define PARODD	0001000
+#define HUPCL	0002000
+#define CLOCAL	0004000
+#define CBAUDEX 0010000
+#define    BOTHER 0010000
+#define    B57600 0010001
+#define   B115200 0010002
+#define   B230400 0010003
+#define   B460800 0010004
+#define   B500000 0010005
+#define   B576000 0010006
+#define   B921600 0010007
+#define  B1000000 0010010
+#define  B1152000 0010011
+#define  B1500000 0010012
+#define  B2000000 0010013
+#define  B2500000 0010014
+#define  B3000000 0010015
+#define  B3500000 0010016
+#define  B4000000 0010017
+#define CIBAUD	  002003600000		/* input baud rate */
+#define CMSPAR	  010000000000		/* mark or space (stick) parity */
+#define CRTSCTS	  020000000000		/* flow control */
+
+#define IBSHIFT	16		/* Shift from CBAUD to CIBAUD */
+
+/* c_lflag bits */
+#define ISIG	0000001
+#define ICANON	0000002
+#define XCASE	0000004
+#define ECHO	0000010
+#define ECHOE	0000020
+#define ECHOK	0000040
+#define ECHONL	0000100
+#define NOFLSH	0000200
+#define TOSTOP	0000400
+#define ECHOCTL	0001000
+#define ECHOPRT	0002000
+#define ECHOKE	0004000
+#define FLUSHO	0010000
+#define PENDIN	0040000
+#define IEXTEN	0100000
+#define EXTPROC	0200000
+
+/* tcflow() and TCXONC use these */
+#define	TCOOFF		0
+#define	TCOON		1
+#define	TCIOFF		2
+#define	TCION		3
+
+/* tcflush() and TCFLSH use these */
+#define	TCIFLUSH	0
+#define	TCOFLUSH	1
+#define	TCIOFLUSH	2
+
+/* tcsetattr uses these */
+#define	TCSANOW		0
+#define	TCSADRAIN	1
+#define	TCSAFLUSH	2
+
+#endif /* _ASM_IA64_TERMBITS_H */
diff --git a/arch/ia64/include/uapi/asm/termios.h b/arch/ia64/include/uapi/asm/termios.h
new file mode 100644
index 00000000000..d59b48c307f
--- /dev/null
+++ b/arch/ia64/include/uapi/asm/termios.h
@@ -0,0 +1,50 @@
+/*
+ * Modified 1999
+ *	David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co
+ *
+ * 99/01/28	Added N_IRDA and N_SMSBLOCK
+ */
+#ifndef _UAPI_ASM_IA64_TERMIOS_H
+#define _UAPI_ASM_IA64_TERMIOS_H
+
+
+#include <asm/termbits.h>
+#include <asm/ioctls.h>
+
+struct winsize {
+	unsigned short ws_row;
+	unsigned short ws_col;
+	unsigned short ws_xpixel;
+	unsigned short ws_ypixel;
+};
+
+#define NCC 8
+struct termio {
+	unsigned short c_iflag;		/* input mode flags */
+	unsigned short c_oflag;		/* output mode flags */
+	unsigned short c_cflag;		/* control mode flags */
+	unsigned short c_lflag;		/* local mode flags */
+	unsigned char c_line;		/* line discipline */
+	unsigned char c_cc[NCC];	/* control characters */
+};
+
+/* modem lines */
+#define TIOCM_LE	0x001
+#define TIOCM_DTR	0x002
+#define TIOCM_RTS	0x004
+#define TIOCM_ST	0x008
+#define TIOCM_SR	0x010
+#define TIOCM_CTS	0x020
+#define TIOCM_CAR	0x040
+#define TIOCM_RNG	0x080
+#define TIOCM_DSR	0x100
+#define TIOCM_CD	TIOCM_CAR
+#define TIOCM_RI	TIOCM_RNG
+#define TIOCM_OUT1	0x2000
+#define TIOCM_OUT2	0x4000
+#define TIOCM_LOOP	0x8000
+
+/* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */
+
+
+#endif /* _UAPI_ASM_IA64_TERMIOS_H */
diff --git a/arch/ia64/include/uapi/asm/types.h b/arch/ia64/include/uapi/asm/types.h
new file mode 100644
index 00000000000..321193b05ee
--- /dev/null
+++ b/arch/ia64/include/uapi/asm/types.h
@@ -0,0 +1,31 @@
+/*
+ * This file is never included by application software unless explicitly
+ * requested (e.g., via linux/types.h) in which case the application is
+ * Linux specific so (user-) name space pollution is not a major issue.
+ * However, for interoperability, libraries still need to be careful to
+ * avoid naming clashes.
+ *
+ * Based on <asm-alpha/types.h>.
+ *
+ * Modified 1998-2000, 2002
+ *	David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co
+ */
+#ifndef _UAPI_ASM_IA64_TYPES_H
+#define _UAPI_ASM_IA64_TYPES_H
+
+
+#ifndef __KERNEL__
+#include <asm-generic/int-l64.h>
+#endif
+
+#ifdef __ASSEMBLY__
+# define __IA64_UL(x)		(x)
+# define __IA64_UL_CONST(x)	x
+
+#else
+# define __IA64_UL(x)		((unsigned long)(x))
+# define __IA64_UL_CONST(x)	x##UL
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _UAPI_ASM_IA64_TYPES_H */
diff --git a/arch/ia64/include/uapi/asm/ucontext.h b/arch/ia64/include/uapi/asm/ucontext.h
new file mode 100644
index 00000000000..bf573dc8ca6
--- /dev/null
+++ b/arch/ia64/include/uapi/asm/ucontext.h
@@ -0,0 +1,12 @@
+#ifndef _ASM_IA64_UCONTEXT_H
+#define _ASM_IA64_UCONTEXT_H
+
+struct ucontext {
+	struct sigcontext uc_mcontext;
+};
+
+#define uc_link		uc_mcontext.sc_gr[0]	/* wrong type; nobody cares */
+#define uc_sigmask	uc_mcontext.sc_sigmask
+#define uc_stack	uc_mcontext.sc_stack
+
+#endif /* _ASM_IA64_UCONTEXT_H */
diff --git a/arch/ia64/include/uapi/asm/unistd.h b/arch/ia64/include/uapi/asm/unistd.h
new file mode 100644
index 00000000000..7de0a2d65da
--- /dev/null
+++ b/arch/ia64/include/uapi/asm/unistd.h
@@ -0,0 +1,332 @@
+/*
+ * IA-64 Linux syscall numbers and inline-functions.
+ *
+ * Copyright (C) 1998-2005 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+#ifndef _UAPI_ASM_IA64_UNISTD_H
+#define _UAPI_ASM_IA64_UNISTD_H
+
+
+#include <asm/break.h>
+
+#define __BREAK_SYSCALL			__IA64_BREAK_SYSCALL
+
+#define __NR_ni_syscall			1024
+#define __NR_exit			1025
+#define __NR_read			1026
+#define __NR_write			1027
+#define __NR_open			1028
+#define __NR_close			1029
+#define __NR_creat			1030
+#define __NR_link			1031
+#define __NR_unlink			1032
+#define __NR_execve			1033
+#define __NR_chdir			1034
+#define __NR_fchdir			1035
+#define __NR_utimes			1036
+#define __NR_mknod			1037
+#define __NR_chmod			1038
+#define __NR_chown			1039
+#define __NR_lseek			1040
+#define __NR_getpid			1041
+#define __NR_getppid			1042
+#define __NR_mount			1043
+#define __NR_umount			1044
+#define __NR_setuid			1045
+#define __NR_getuid			1046
+#define __NR_geteuid			1047
+#define __NR_ptrace			1048
+#define __NR_access			1049
+#define __NR_sync			1050
+#define __NR_fsync			1051
+#define __NR_fdatasync			1052
+#define __NR_kill			1053
+#define __NR_rename			1054
+#define __NR_mkdir			1055
+#define __NR_rmdir			1056
+#define __NR_dup			1057
+#define __NR_pipe			1058
+#define __NR_times			1059
+#define __NR_brk			1060
+#define __NR_setgid			1061
+#define __NR_getgid			1062
+#define __NR_getegid			1063
+#define __NR_acct			1064
+#define __NR_ioctl			1065
+#define __NR_fcntl			1066
+#define __NR_umask			1067
+#define __NR_chroot			1068
+#define __NR_ustat			1069
+#define __NR_dup2			1070
+#define __NR_setreuid			1071
+#define __NR_setregid			1072
+#define __NR_getresuid			1073
+#define __NR_setresuid			1074
+#define __NR_getresgid			1075
+#define __NR_setresgid			1076
+#define __NR_getgroups			1077
+#define __NR_setgroups			1078
+#define __NR_getpgid			1079
+#define __NR_setpgid			1080
+#define __NR_setsid			1081
+#define __NR_getsid			1082
+#define __NR_sethostname		1083
+#define __NR_setrlimit			1084
+#define __NR_getrlimit			1085
+#define __NR_getrusage			1086
+#define __NR_gettimeofday		1087
+#define __NR_settimeofday		1088
+#define __NR_select			1089
+#define __NR_poll			1090
+#define __NR_symlink			1091
+#define __NR_readlink			1092
+#define __NR_uselib			1093
+#define __NR_swapon			1094
+#define __NR_swapoff			1095
+#define __NR_reboot			1096
+#define __NR_truncate			1097
+#define __NR_ftruncate			1098
+#define __NR_fchmod			1099
+#define __NR_fchown			1100
+#define __NR_getpriority		1101
+#define __NR_setpriority		1102
+#define __NR_statfs			1103
+#define __NR_fstatfs			1104
+#define __NR_gettid			1105
+#define __NR_semget			1106
+#define __NR_semop			1107
+#define __NR_semctl			1108
+#define __NR_msgget			1109
+#define __NR_msgsnd			1110
+#define __NR_msgrcv			1111
+#define __NR_msgctl			1112
+#define __NR_shmget			1113
+#define __NR_shmat			1114
+#define __NR_shmdt			1115
+#define __NR_shmctl			1116
+/* also known as klogctl() in GNU libc: */
+#define __NR_syslog			1117
+#define __NR_setitimer			1118
+#define __NR_getitimer			1119
+/* 1120 was __NR_old_stat */
+/* 1121 was __NR_old_lstat */
+/* 1122 was __NR_old_fstat */
+#define __NR_vhangup			1123
+#define __NR_lchown			1124
+#define __NR_remap_file_pages		1125
+#define __NR_wait4			1126
+#define __NR_sysinfo			1127
+#define __NR_clone			1128
+#define __NR_setdomainname		1129
+#define __NR_uname			1130
+#define __NR_adjtimex			1131
+/* 1132 was __NR_create_module */
+#define __NR_init_module		1133
+#define __NR_delete_module		1134
+/* 1135 was __NR_get_kernel_syms */
+/* 1136 was __NR_query_module */
+#define __NR_quotactl			1137
+#define __NR_bdflush			1138
+#define __NR_sysfs			1139
+#define __NR_personality		1140
+#define __NR_afs_syscall		1141
+#define __NR_setfsuid			1142
+#define __NR_setfsgid			1143
+#define __NR_getdents			1144
+#define __NR_flock			1145
+#define __NR_readv			1146
+#define __NR_writev			1147
+#define __NR_pread64			1148
+#define __NR_pwrite64			1149
+#define __NR__sysctl			1150
+#define __NR_mmap			1151
+#define __NR_munmap			1152
+#define __NR_mlock			1153
+#define __NR_mlockall			1154
+#define __NR_mprotect			1155
+#define __NR_mremap			1156
+#define __NR_msync			1157
+#define __NR_munlock			1158
+#define __NR_munlockall			1159
+#define __NR_sched_getparam		1160
+#define __NR_sched_setparam		1161
+#define __NR_sched_getscheduler		1162
+#define __NR_sched_setscheduler		1163
+#define __NR_sched_yield		1164
+#define __NR_sched_get_priority_max	1165
+#define __NR_sched_get_priority_min	1166
+#define __NR_sched_rr_get_interval	1167
+#define __NR_nanosleep			1168
+#define __NR_nfsservctl			1169
+#define __NR_prctl			1170
+/* 1171 is reserved for backwards compatibility with old __NR_getpagesize */
+#define __NR_mmap2			1172
+#define __NR_pciconfig_read		1173
+#define __NR_pciconfig_write		1174
+#define __NR_perfmonctl			1175
+#define __NR_sigaltstack		1176
+#define __NR_rt_sigaction		1177
+#define __NR_rt_sigpending		1178
+#define __NR_rt_sigprocmask		1179
+#define __NR_rt_sigqueueinfo		1180
+#define __NR_rt_sigreturn		1181
+#define __NR_rt_sigsuspend		1182
+#define __NR_rt_sigtimedwait		1183
+#define __NR_getcwd			1184
+#define __NR_capget			1185
+#define __NR_capset			1186
+#define __NR_sendfile			1187
+#define __NR_getpmsg			1188
+#define __NR_putpmsg			1189
+#define __NR_socket			1190
+#define __NR_bind			1191
+#define __NR_connect			1192
+#define __NR_listen			1193
+#define __NR_accept			1194
+#define __NR_getsockname		1195
+#define __NR_getpeername		1196
+#define __NR_socketpair			1197
+#define __NR_send			1198
+#define __NR_sendto			1199
+#define __NR_recv			1200
+#define __NR_recvfrom			1201
+#define __NR_shutdown			1202
+#define __NR_setsockopt			1203
+#define __NR_getsockopt			1204
+#define __NR_sendmsg			1205
+#define __NR_recvmsg			1206
+#define __NR_pivot_root			1207
+#define __NR_mincore			1208
+#define __NR_madvise			1209
+#define __NR_stat			1210
+#define __NR_lstat			1211
+#define __NR_fstat			1212
+#define __NR_clone2			1213
+#define __NR_getdents64			1214
+#define __NR_getunwind			1215
+#define __NR_readahead			1216
+#define __NR_setxattr			1217
+#define __NR_lsetxattr			1218
+#define __NR_fsetxattr			1219
+#define __NR_getxattr			1220
+#define __NR_lgetxattr			1221
+#define __NR_fgetxattr			1222
+#define __NR_listxattr			1223
+#define __NR_llistxattr			1224
+#define __NR_flistxattr			1225
+#define __NR_removexattr		1226
+#define __NR_lremovexattr		1227
+#define __NR_fremovexattr		1228
+#define __NR_tkill			1229
+#define __NR_futex			1230
+#define __NR_sched_setaffinity		1231
+#define __NR_sched_getaffinity		1232
+#define __NR_set_tid_address		1233
+#define __NR_fadvise64			1234
+#define __NR_tgkill			1235
+#define __NR_exit_group			1236
+#define __NR_lookup_dcookie		1237
+#define __NR_io_setup			1238
+#define __NR_io_destroy			1239
+#define __NR_io_getevents		1240
+#define __NR_io_submit			1241
+#define __NR_io_cancel			1242
+#define __NR_epoll_create		1243
+#define __NR_epoll_ctl			1244
+#define __NR_epoll_wait			1245
+#define __NR_restart_syscall		1246
+#define __NR_semtimedop			1247
+#define __NR_timer_create		1248
+#define __NR_timer_settime		1249
+#define __NR_timer_gettime		1250
+#define __NR_timer_getoverrun		1251
+#define __NR_timer_delete		1252
+#define __NR_clock_settime		1253
+#define __NR_clock_gettime		1254
+#define __NR_clock_getres		1255
+#define __NR_clock_nanosleep		1256
+#define __NR_fstatfs64			1257
+#define __NR_statfs64			1258
+#define __NR_mbind			1259
+#define __NR_get_mempolicy		1260
+#define __NR_set_mempolicy		1261
+#define __NR_mq_open			1262
+#define __NR_mq_unlink			1263
+#define __NR_mq_timedsend		1264
+#define __NR_mq_timedreceive		1265
+#define __NR_mq_notify			1266
+#define __NR_mq_getsetattr		1267
+#define __NR_kexec_load			1268
+#define __NR_vserver			1269
+#define __NR_waitid			1270
+#define __NR_add_key			1271
+#define __NR_request_key		1272
+#define __NR_keyctl			1273
+#define __NR_ioprio_set			1274
+#define __NR_ioprio_get			1275
+#define __NR_move_pages			1276
+#define __NR_inotify_init		1277
+#define __NR_inotify_add_watch		1278
+#define __NR_inotify_rm_watch		1279
+#define __NR_migrate_pages		1280
+#define __NR_openat			1281
+#define __NR_mkdirat			1282
+#define __NR_mknodat			1283
+#define __NR_fchownat			1284
+#define __NR_futimesat			1285
+#define __NR_newfstatat			1286
+#define __NR_unlinkat			1287
+#define __NR_renameat			1288
+#define __NR_linkat			1289
+#define __NR_symlinkat			1290
+#define __NR_readlinkat			1291
+#define __NR_fchmodat			1292
+#define __NR_faccessat			1293
+#define __NR_pselect6			1294
+#define __NR_ppoll			1295
+#define __NR_unshare			1296
+#define __NR_splice			1297
+#define __NR_set_robust_list		1298
+#define __NR_get_robust_list		1299
+#define __NR_sync_file_range		1300
+#define __NR_tee			1301
+#define __NR_vmsplice			1302
+#define __NR_fallocate			1303
+#define __NR_getcpu			1304
+#define __NR_epoll_pwait		1305
+#define __NR_utimensat			1306
+#define __NR_signalfd			1307
+#define __NR_timerfd			1308
+#define __NR_eventfd			1309
+#define __NR_timerfd_create		1310
+#define __NR_timerfd_settime		1311
+#define __NR_timerfd_gettime		1312
+#define __NR_signalfd4			1313
+#define __NR_eventfd2			1314
+#define __NR_epoll_create1		1315
+#define __NR_dup3			1316
+#define __NR_pipe2			1317
+#define __NR_inotify_init1		1318
+#define __NR_preadv			1319
+#define __NR_pwritev			1320
+#define __NR_rt_tgsigqueueinfo		1321
+#define __NR_recvmmsg			1322
+#define __NR_fanotify_init		1323
+#define __NR_fanotify_mark		1324
+#define __NR_prlimit64			1325
+#define __NR_name_to_handle_at		1326
+#define __NR_open_by_handle_at  	1327
+#define __NR_clock_adjtime		1328
+#define __NR_syncfs			1329
+#define __NR_setns			1330
+#define __NR_sendmmsg			1331
+#define __NR_process_vm_readv		1332
+#define __NR_process_vm_writev		1333
+#define __NR_accept4			1334
+#define __NR_finit_module		1335
+#define __NR_sched_setattr		1336
+#define __NR_sched_getattr		1337
+#define __NR_renameat2			1338
+
+#endif /* _UAPI_ASM_IA64_UNISTD_H */
diff --git a/arch/ia64/include/uapi/asm/ustack.h b/arch/ia64/include/uapi/asm/ustack.h
new file mode 100644
index 00000000000..1dfebc62269
--- /dev/null
+++ b/arch/ia64/include/uapi/asm/ustack.h
@@ -0,0 +1,12 @@
+#ifndef _UAPI_ASM_IA64_USTACK_H
+#define _UAPI_ASM_IA64_USTACK_H
+
+/*
+ * Constants for the user stack size
+ */
+
+
+/* Make a default stack size of 2GiB */
+#define DEFAULT_USER_STACK_SIZE	(1UL << 31)
+
+#endif /* _UAPI_ASM_IA64_USTACK_H */
diff --git a/arch/ia64/install.sh b/arch/ia64/install.sh
index 929e780026d..0e932f5dcd1 100644
--- a/arch/ia64/install.sh
+++ b/arch/ia64/install.sh
@@ -21,8 +21,8 @@
 
 # User may have a custom install script
 
-if [ -x ~/bin/installkernel ]; then exec ~/bin/installkernel "$@"; fi
-if [ -x /sbin/installkernel ]; then exec /sbin/installkernel "$@"; fi
+if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
+if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi
 
 # Default install - same as make zlilo
 
diff --git a/arch/ia64/kernel/.gitignore b/arch/ia64/kernel/.gitignore
new file mode 100644
index 00000000000..21cb0da5ded
--- /dev/null
+++ b/arch/ia64/kernel/.gitignore
@@ -0,0 +1,2 @@
+gate.lds
+vmlinux.lds
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
index 09a0dbc17fb..20678a9ed11 100644
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@ -2,21 +2,19 @@
 # Makefile for the linux kernel.
 #
 
+ifdef CONFIG_DYNAMIC_FTRACE
+CFLAGS_REMOVE_ftrace.o = -pg
+endif
+
 extra-y	:= head.o init_task.o vmlinux.lds
 
-obj-y := acpi.o entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o	\
-	 irq_lsapic.o ivt.o machvec.o pal.o patch.o process.o perfmon.o ptrace.o sal.o		\
-	 salinfo.o semaphore.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \
-	 unwind.o mca.o mca_asm.o topology.o
+obj-y := entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o	\
+	 irq_lsapic.o ivt.o machvec.o pal.o paravirt_patchlist.o patch.o process.o perfmon.o ptrace.o sal.o		\
+	 salinfo.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \
+	 unwind.o mca.o mca_asm.o topology.o dma-mapping.o
 
+obj-$(CONFIG_ACPI)		+= acpi.o acpi-ext.o
 obj-$(CONFIG_IA64_BRL_EMU)	+= brl_emu.o
-obj-$(CONFIG_IA64_GENERIC)	+= acpi-ext.o
-obj-$(CONFIG_IA64_HP_ZX1)	+= acpi-ext.o
-obj-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += acpi-ext.o
-
-ifneq ($(CONFIG_ACPI_PROCESSOR),)
-obj-y				+= acpi-processor.o
-endif
 
 obj-$(CONFIG_IA64_PALINFO)	+= palinfo.o
 obj-$(CONFIG_IOSAPIC)		+= iosapic.o
@@ -25,37 +23,91 @@ obj-$(CONFIG_SMP)		+= smp.o smpboot.o
 obj-$(CONFIG_NUMA)		+= numa.o
 obj-$(CONFIG_PERFMON)		+= perfmon_default_smpl.o
 obj-$(CONFIG_IA64_CYCLONE)	+= cyclone.o
-obj-$(CONFIG_CPU_FREQ)		+= cpufreq/
 obj-$(CONFIG_IA64_MCA_RECOVERY)	+= mca_recovery.o
 obj-$(CONFIG_KPROBES)		+= kprobes.o jprobes.o
+obj-$(CONFIG_DYNAMIC_FTRACE)	+= ftrace.o
+obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o crash.o
+obj-$(CONFIG_CRASH_DUMP)	+= crash_dump.o
 obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR)	+= uncached.o
+obj-$(CONFIG_AUDIT)		+= audit.o
+obj-$(CONFIG_PCI_MSI)		+= msi_ia64.o
 mca_recovery-y			+= mca_drv.o mca_drv_asm.o
+obj-$(CONFIG_IA64_MC_ERR_INJECT)+= err_inject.o
+obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
 
-# The gate DSO image is built using a special linker script.
-targets += gate.so gate-syms.o
+obj-$(CONFIG_PARAVIRT)		+= paravirt.o paravirtentry.o \
+				   paravirt_patch.o
+
+obj-$(CONFIG_IA64_ESI)		+= esi.o
+ifneq ($(CONFIG_IA64_ESI),)
+obj-y				+= esi_stub.o	# must be in kernel proper
+endif
+obj-$(CONFIG_INTEL_IOMMU)	+= pci-dma.o
+obj-$(CONFIG_SWIOTLB)		+= pci-swiotlb.o
 
-extra-y += gate.so gate-syms.o gate.lds gate.o
+obj-$(CONFIG_BINFMT_ELF)	+= elfcore.o
 
 # fp_emulate() expects f2-f5,f16-f31 to contain the user-level state.
 CFLAGS_traps.o  += -mfixed-range=f2-f5,f16-f31
 
-CPPFLAGS_gate.lds := -P -C -U$(ARCH)
+# The gate DSO image is built using a special linker script.
+include $(srctree)/arch/ia64/kernel/Makefile.gate
+# tell compiled for native
+CPPFLAGS_gate.lds += -D__IA64_GATE_PARAVIRTUALIZED_NATIVE
+
+# Calculate NR_IRQ = max(IA64_NATIVE_NR_IRQS, XEN_NR_IRQS, ...) based on config
+define sed-y
+	"/^->/{s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; s:->::; p;}"
+endef
+quiet_cmd_nr_irqs = GEN     $@
+define cmd_nr_irqs
+	(set -e; \
+	 echo "#ifndef __ASM_NR_IRQS_H__"; \
+	 echo "#define __ASM_NR_IRQS_H__"; \
+	 echo "/*"; \
+	 echo " * DO NOT MODIFY."; \
+	 echo " *"; \
+	 echo " * This file was generated by Kbuild"; \
+	 echo " *"; \
+	 echo " */"; \
+	 echo ""; \
+	 sed -ne $(sed-y) $<; \
+	 echo ""; \
+	 echo "#endif" ) > $@
+endef
 
-quiet_cmd_gate = GATE $@
-      cmd_gate = $(CC) -nostdlib $(GATECFLAGS_$(@F)) -Wl,-T,$(filter-out FORCE,$^) -o $@
+# We use internal kbuild rules to avoid the "is up to date" message from make
+arch/$(SRCARCH)/kernel/nr-irqs.s: arch/$(SRCARCH)/kernel/nr-irqs.c
+	$(Q)mkdir -p $(dir $@)
+	$(call if_changed_dep,cc_s_c)
 
-GATECFLAGS_gate.so = -shared -s -Wl,-soname=linux-gate.so.1
-$(obj)/gate.so: $(obj)/gate.lds $(obj)/gate.o FORCE
-	$(call if_changed,gate)
+include/generated/nr-irqs.h: arch/$(SRCARCH)/kernel/nr-irqs.s
+	$(Q)mkdir -p $(dir $@)
+	$(call cmd,nr_irqs)
 
-$(obj)/built-in.o: $(obj)/gate-syms.o
-$(obj)/built-in.o: ld_flags += -R $(obj)/gate-syms.o
+#
+# native ivt.S, entry.S and fsys.S
+#
+ASM_PARAVIRT_OBJS = ivt.o entry.o fsys.o
+define paravirtualized_native
+AFLAGS_$(1) += -D__IA64_ASM_PARAVIRTUALIZED_NATIVE
+AFLAGS_pvchk-sed-$(1) += -D__IA64_ASM_PARAVIRTUALIZED_PVCHECK
+extra-y += pvchk-$(1)
+endef
+$(foreach obj,$(ASM_PARAVIRT_OBJS),$(eval $(call paravirtualized_native,$(obj))))
 
-GATECFLAGS_gate-syms.o = -r
-$(obj)/gate-syms.o: $(obj)/gate.lds $(obj)/gate.o FORCE
-	$(call if_changed,gate)
+#
+# Checker for paravirtualizations of privileged operations.
+#
+quiet_cmd_pv_check_sed = PVCHK   $@
+define cmd_pv_check_sed
+	sed -f $(srctree)/arch/$(SRCARCH)/scripts/pvcheck.sed $< > $@
+endef
 
-# gate-data.o contains the gate DSO image as data in section .data.gate.
-# We must build gate.so before we can assemble it.
-# Note: kbuild does not track this dependency due to usage of .incbin
-$(obj)/gate-data.o: $(obj)/gate.so
+$(obj)/pvchk-sed-%.s: $(src)/%.S $(srctree)/arch/$(SRCARCH)/scripts/pvcheck.sed FORCE
+	$(call if_changed_dep,as_s_S)
+$(obj)/pvchk-%.s: $(obj)/pvchk-sed-%.s FORCE
+	$(call if_changed,pv_check_sed)
+$(obj)/pvchk-%.o: $(obj)/pvchk-%.s FORCE
+	$(call if_changed,as_o_S)
+.PRECIOUS: $(obj)/pvchk-sed-%.s $(obj)/pvchk-%.s $(obj)/pvchk-%.o
diff --git a/arch/ia64/kernel/Makefile.gate b/arch/ia64/kernel/Makefile.gate
new file mode 100644
index 00000000000..ceeffc50976
--- /dev/null
+++ b/arch/ia64/kernel/Makefile.gate
@@ -0,0 +1,27 @@
+# The gate DSO image is built using a special linker script.
+
+targets += gate.so gate-syms.o
+
+extra-y += gate.so gate-syms.o gate.lds gate.o
+
+CPPFLAGS_gate.lds := -P -C -U$(ARCH)
+
+quiet_cmd_gate = GATE $@
+      cmd_gate = $(CC) -nostdlib $(GATECFLAGS_$(@F)) -Wl,-T,$(filter-out FORCE,$^) -o $@
+
+GATECFLAGS_gate.so = -shared -s -Wl,-soname=linux-gate.so.1 \
+		     $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
+$(obj)/gate.so: $(obj)/gate.lds $(obj)/gate.o FORCE
+	$(call if_changed,gate)
+
+$(obj)/built-in.o: $(obj)/gate-syms.o
+$(obj)/built-in.o: ld_flags += -R $(obj)/gate-syms.o
+
+GATECFLAGS_gate-syms.o = -r
+$(obj)/gate-syms.o: $(obj)/gate.lds $(obj)/gate.o FORCE
+	$(call if_changed,gate)
+
+# gate-data.o contains the gate DSO image as data in section .data..gate.
+# We must build gate.so before we can assemble it.
+# Note: kbuild does not track this dependency due to usage of .incbin
+$(obj)/gate-data.o: $(obj)/gate.so
diff --git a/arch/ia64/kernel/acpi-ext.c b/arch/ia64/kernel/acpi-ext.c
index fff82929d22..8b9318d311a 100644
--- a/arch/ia64/kernel/acpi-ext.c
+++ b/arch/ia64/kernel/acpi-ext.c
@@ -8,9 +8,9 @@
  * published by the Free Software Foundation.
  */
 
-#include <linux/config.h>
 #include <linux/module.h>
 #include <linux/types.h>
+#include <linux/slab.h>
 #include <linux/acpi.h>
 
 #include <asm/acpi-ext.h>
@@ -51,7 +51,7 @@ static acpi_status hp_ccsr_locate(acpi_handle obj, u64 *base, u64 *length)
 	memcpy(length, vendor->byte_data + 8, sizeof(*length));
 
   exit:
-	acpi_os_free(buffer.pointer);
+	kfree(buffer.pointer);
 	return status;
 }
 
diff --git a/arch/ia64/kernel/acpi-processor.c b/arch/ia64/kernel/acpi-processor.c
deleted file mode 100644
index e683630c8ce..00000000000
--- a/arch/ia64/kernel/acpi-processor.c
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * arch/ia64/kernel/cpufreq/processor.c
- *
- * Copyright (C) 2005 Intel Corporation
- * 	Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
- * 	- Added _PDC for platforms with Intel CPUs
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/acpi.h>
-
-#include <acpi/processor.h>
-#include <asm/acpi.h>
-
-static void init_intel_pdc(struct acpi_processor *pr)
-{
-	struct acpi_object_list *obj_list;
-	union acpi_object *obj;
-	u32 *buf;
-
-	/* allocate and initialize pdc. It will be used later. */
-	obj_list = kmalloc(sizeof(struct acpi_object_list), GFP_KERNEL);
-	if (!obj_list) {
-		printk(KERN_ERR "Memory allocation error\n");
-		return;
-	}
-
-	obj = kmalloc(sizeof(union acpi_object), GFP_KERNEL);
-	if (!obj) {
-		printk(KERN_ERR "Memory allocation error\n");
-		kfree(obj_list);
-		return;
-	}
-
-	buf = kmalloc(12, GFP_KERNEL);
-	if (!buf) {
-		printk(KERN_ERR "Memory allocation error\n");
-		kfree(obj);
-		kfree(obj_list);
-		return;
-	}
-
-	buf[0] = ACPI_PDC_REVISION_ID;
-	buf[1] = 1;
-	buf[2] |= ACPI_PDC_EST_CAPABILITY_SMP;
-
-	obj->type = ACPI_TYPE_BUFFER;
-	obj->buffer.length = 12;
-	obj->buffer.pointer = (u8 *) buf;
-	obj_list->count = 1;
-	obj_list->pointer = obj;
-	pr->pdc = obj_list;
-
-	return;
-}
-
-/* Initialize _PDC data based on the CPU vendor */
-void arch_acpi_processor_init_pdc(struct acpi_processor *pr)
-{
-	pr->pdc = NULL;
-	init_intel_pdc(pr);
-	return;
-}
-
-EXPORT_SYMBOL(arch_acpi_processor_init_pdc);
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index 58c93a30348..615ef81def4 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -32,7 +32,6 @@
  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  */
 
-#include <linux/config.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
@@ -45,46 +44,47 @@
 #include <linux/efi.h>
 #include <linux/mmzone.h>
 #include <linux/nodemask.h>
+#include <linux/slab.h>
+#include <acpi/processor.h>
 #include <asm/io.h>
 #include <asm/iosapic.h>
 #include <asm/machvec.h>
 #include <asm/page.h>
-#include <asm/system.h>
 #include <asm/numa.h>
 #include <asm/sal.h>
 #include <asm/cyclone.h>
 
-#define BAD_MADT_ENTRY(entry, end) (                                        \
-		(!entry) || (unsigned long)entry + sizeof(*entry) > end ||  \
-		((acpi_table_entry_header *)entry)->length != sizeof(*entry))
-
 #define PREFIX			"ACPI: "
 
-void (*pm_idle) (void);
-EXPORT_SYMBOL(pm_idle);
-void (*pm_power_off) (void);
-EXPORT_SYMBOL(pm_power_off);
-
-unsigned char acpi_kbd_controller_present = 1;
-unsigned char acpi_legacy_devices;
-
-static unsigned int __initdata acpi_madt_rev;
-
+int acpi_lapic;
 unsigned int acpi_cpei_override;
 unsigned int acpi_cpei_phys_cpuid;
 
-#define MAX_SAPICS 256
-u16 ia64_acpiid_to_sapicid[MAX_SAPICS] = {[0 ... MAX_SAPICS - 1] = -1 };
+unsigned long acpi_wakeup_address = 0;
 
-EXPORT_SYMBOL(ia64_acpiid_to_sapicid);
+#ifdef CONFIG_IA64_GENERIC
+static unsigned long __init acpi_find_rsdp(void)
+{
+	unsigned long rsdp_phys = 0;
 
-const char *acpi_get_sysname(void)
+	if (efi.acpi20 != EFI_INVALID_TABLE_ADDR)
+		rsdp_phys = efi.acpi20;
+	else if (efi.acpi != EFI_INVALID_TABLE_ADDR)
+		printk(KERN_WARNING PREFIX
+		       "v1.0/r0.71 tables no longer supported\n");
+	return rsdp_phys;
+}
+
+const char __init *
+acpi_get_sysname(void)
 {
-#ifdef CONFIG_IA64_GENERIC
 	unsigned long rsdp_phys;
-	struct acpi20_table_rsdp *rsdp;
+	struct acpi_table_rsdp *rsdp;
 	struct acpi_table_xsdt *xsdt;
 	struct acpi_table_header *hdr;
+#ifdef CONFIG_INTEL_IOMMU
+	u64 i, nentries;
+#endif
 
 	rsdp_phys = acpi_find_rsdp();
 	if (!rsdp_phys) {
@@ -93,16 +93,16 @@ const char *acpi_get_sysname(void)
 		return "dig";
 	}
 
-	rsdp = (struct acpi20_table_rsdp *)__va(rsdp_phys);
-	if (strncmp(rsdp->signature, RSDP_SIG, sizeof(RSDP_SIG) - 1)) {
+	rsdp = (struct acpi_table_rsdp *)__va(rsdp_phys);
+	if (strncmp(rsdp->signature, ACPI_SIG_RSDP, sizeof(ACPI_SIG_RSDP) - 1)) {
 		printk(KERN_ERR
 		       "ACPI 2.0 RSDP signature incorrect, default to \"dig\"\n");
 		return "dig";
 	}
 
-	xsdt = (struct acpi_table_xsdt *)__va(rsdp->xsdt_address);
+	xsdt = (struct acpi_table_xsdt *)__va(rsdp->xsdt_physical_address);
 	hdr = &xsdt->header;
-	if (strncmp(hdr->signature, XSDT_SIG, sizeof(XSDT_SIG) - 1)) {
+	if (strncmp(hdr->signature, ACPI_SIG_XSDT, sizeof(ACPI_SIG_XSDT) - 1)) {
 		printk(KERN_ERR
 		       "ACPI 2.0 XSDT signature incorrect, default to \"dig\"\n");
 		return "dig";
@@ -111,28 +111,27 @@ const char *acpi_get_sysname(void)
 	if (!strcmp(hdr->oem_id, "HP")) {
 		return "hpzx1";
 	} else if (!strcmp(hdr->oem_id, "SGI")) {
-		return "sn2";
+		if (!strcmp(hdr->oem_table_id + 4, "UV"))
+			return "uv";
+		else
+			return "sn2";
 	}
 
-	return "dig";
-#else
-# if defined (CONFIG_IA64_HP_SIM)
-	return "hpsim";
-# elif defined (CONFIG_IA64_HP_ZX1)
-	return "hpzx1";
-# elif defined (CONFIG_IA64_HP_ZX1_SWIOTLB)
-	return "hpzx1_swiotlb";
-# elif defined (CONFIG_IA64_SGI_SN2)
-	return "sn2";
-# elif defined (CONFIG_IA64_DIG)
-	return "dig";
-# else
-#	error Unknown platform.  Fix acpi.c.
-# endif
+#ifdef CONFIG_INTEL_IOMMU
+	/* Look for Intel IOMMU */
+	nentries = (hdr->length - sizeof(*hdr)) /
+			 sizeof(xsdt->table_offset_entry[0]);
+	for (i = 0; i < nentries; i++) {
+		hdr = __va(xsdt->table_offset_entry[i]);
+		if (strncmp(hdr->signature, ACPI_SIG_DMAR,
+			sizeof(ACPI_SIG_DMAR) - 1) == 0)
+			return "dig_vtd";
+	}
 #endif
-}
 
-#ifdef CONFIG_ACPI
+	return "dig";
+}
+#endif /* CONFIG_IA64_GENERIC */
 
 #define ACPI_MAX_PLATFORM_INTERRUPTS	256
 
@@ -160,27 +159,30 @@ int acpi_request_vector(u32 int_type)
 	return vector;
 }
 
-char *__acpi_map_table(unsigned long phys_addr, unsigned long size)
+char *__init __acpi_map_table(unsigned long phys_addr, unsigned long size)
 {
 	return __va(phys_addr);
 }
 
+void __init __acpi_unmap_table(char *map, unsigned long size)
+{
+}
+
 /* --------------------------------------------------------------------------
                             Boot-time Table Parsing
    -------------------------------------------------------------------------- */
 
-static int total_cpus __initdata;
 static int available_cpus __initdata;
 struct acpi_table_madt *acpi_madt __initdata;
 static u8 has_8259;
 
 static int __init
-acpi_parse_lapic_addr_ovr(acpi_table_entry_header * header,
+acpi_parse_lapic_addr_ovr(struct acpi_subtable_header * header,
 			  const unsigned long end)
 {
-	struct acpi_table_lapic_addr_ovr *lapic;
+	struct acpi_madt_local_apic_override *lapic;
 
-	lapic = (struct acpi_table_lapic_addr_ovr *)header;
+	lapic = (struct acpi_madt_local_apic_override *)header;
 
 	if (BAD_MADT_ENTRY(lapic, end))
 		return -EINVAL;
@@ -193,22 +195,19 @@ acpi_parse_lapic_addr_ovr(acpi_table_entry_header * header,
 }
 
 static int __init
-acpi_parse_lsapic(acpi_table_entry_header * header, const unsigned long end)
+acpi_parse_lsapic(struct acpi_subtable_header * header, const unsigned long end)
 {
-	struct acpi_table_lsapic *lsapic;
+	struct acpi_madt_local_sapic *lsapic;
 
-	lsapic = (struct acpi_table_lsapic *)header;
+	lsapic = (struct acpi_madt_local_sapic *)header;
 
-	if (BAD_MADT_ENTRY(lsapic, end))
-		return -EINVAL;
+	/*Skip BAD_MADT_ENTRY check, as lsapic size could vary */
 
-	if (lsapic->flags.enabled) {
+	if (lsapic->lapic_flags & ACPI_MADT_ENABLED) {
 #ifdef CONFIG_SMP
 		smp_boot_data.cpu_phys_id[available_cpus] =
 		    (lsapic->id << 8) | lsapic->eid;
 #endif
-		ia64_acpiid_to_sapicid[lsapic->acpi_id] =
-		    (lsapic->id << 8) | lsapic->eid;
 		++available_cpus;
 	}
 
@@ -217,11 +216,11 @@ acpi_parse_lsapic(acpi_table_entry_header * header, const unsigned long end)
 }
 
 static int __init
-acpi_parse_lapic_nmi(acpi_table_entry_header * header, const unsigned long end)
+acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long end)
 {
-	struct acpi_table_lapic_nmi *lacpi_nmi;
+	struct acpi_madt_local_apic_nmi *lacpi_nmi;
 
-	lacpi_nmi = (struct acpi_table_lapic_nmi *)header;
+	lacpi_nmi = (struct acpi_madt_local_apic_nmi *)header;
 
 	if (BAD_MADT_ENTRY(lacpi_nmi, end))
 		return -EINVAL;
@@ -231,11 +230,11 @@ acpi_parse_lapic_nmi(acpi_table_entry_header * header, const unsigned long end)
 }
 
 static int __init
-acpi_parse_iosapic(acpi_table_entry_header * header, const unsigned long end)
+acpi_parse_iosapic(struct acpi_subtable_header * header, const unsigned long end)
 {
-	struct acpi_table_iosapic *iosapic;
+	struct acpi_madt_io_sapic *iosapic;
 
-	iosapic = (struct acpi_table_iosapic *)header;
+	iosapic = (struct acpi_madt_io_sapic *)header;
 
 	if (BAD_MADT_ENTRY(iosapic, end))
 		return -EINVAL;
@@ -243,14 +242,16 @@ acpi_parse_iosapic(acpi_table_entry_header * header, const unsigned long end)
 	return iosapic_init(iosapic->address, iosapic->global_irq_base);
 }
 
+static unsigned int __initdata acpi_madt_rev;
+
 static int __init
-acpi_parse_plat_int_src(acpi_table_entry_header * header,
+acpi_parse_plat_int_src(struct acpi_subtable_header * header,
 			const unsigned long end)
 {
-	struct acpi_table_plat_int_src *plintsrc;
+	struct acpi_madt_interrupt_source *plintsrc;
 	int vector;
 
-	plintsrc = (struct acpi_table_plat_int_src *)header;
+	plintsrc = (struct acpi_madt_interrupt_source *)header;
 
 	if (BAD_MADT_ENTRY(plintsrc, end))
 		return -EINVAL;
@@ -261,19 +262,19 @@ acpi_parse_plat_int_src(acpi_table_entry_header * header,
 	 */
 	vector = iosapic_register_platform_intr(plintsrc->type,
 						plintsrc->global_irq,
-						plintsrc->iosapic_vector,
+						plintsrc->io_sapic_vector,
 						plintsrc->eid,
 						plintsrc->id,
-						(plintsrc->flags.polarity ==
-						 1) ? IOSAPIC_POL_HIGH :
-						IOSAPIC_POL_LOW,
-						(plintsrc->flags.trigger ==
-						 1) ? IOSAPIC_EDGE :
-						IOSAPIC_LEVEL);
+						((plintsrc->inti_flags & ACPI_MADT_POLARITY_MASK) ==
+						 ACPI_MADT_POLARITY_ACTIVE_HIGH) ?
+						IOSAPIC_POL_HIGH : IOSAPIC_POL_LOW,
+						((plintsrc->inti_flags & ACPI_MADT_TRIGGER_MASK) ==
+						 ACPI_MADT_TRIGGER_EDGE) ?
+						IOSAPIC_EDGE : IOSAPIC_LEVEL);
 
 	platform_intr_list[plintsrc->type] = vector;
 	if (acpi_madt_rev > 1) {
-		acpi_cpei_override = plintsrc->plint_flags.cpei_override_flag;
+		acpi_cpei_override = plintsrc->flags & ACPI_MADT_CPEI_OVERRIDE;
 	}
 
 	/*
@@ -328,30 +329,32 @@ unsigned int get_cpei_target_cpu(void)
 }
 
 static int __init
-acpi_parse_int_src_ovr(acpi_table_entry_header * header,
+acpi_parse_int_src_ovr(struct acpi_subtable_header * header,
 		       const unsigned long end)
 {
-	struct acpi_table_int_src_ovr *p;
+	struct acpi_madt_interrupt_override *p;
 
-	p = (struct acpi_table_int_src_ovr *)header;
+	p = (struct acpi_madt_interrupt_override *)header;
 
 	if (BAD_MADT_ENTRY(p, end))
 		return -EINVAL;
 
-	iosapic_override_isa_irq(p->bus_irq, p->global_irq,
-				 (p->flags.polarity ==
-				  1) ? IOSAPIC_POL_HIGH : IOSAPIC_POL_LOW,
-				 (p->flags.trigger ==
-				  1) ? IOSAPIC_EDGE : IOSAPIC_LEVEL);
+	iosapic_override_isa_irq(p->source_irq, p->global_irq,
+				 ((p->inti_flags & ACPI_MADT_POLARITY_MASK) ==
+				  ACPI_MADT_POLARITY_ACTIVE_LOW) ?
+				 IOSAPIC_POL_LOW : IOSAPIC_POL_HIGH,
+				 ((p->inti_flags & ACPI_MADT_TRIGGER_MASK) ==
+				 ACPI_MADT_TRIGGER_LEVEL) ?
+				 IOSAPIC_LEVEL : IOSAPIC_EDGE);
 	return 0;
 }
 
 static int __init
-acpi_parse_nmi_src(acpi_table_entry_header * header, const unsigned long end)
+acpi_parse_nmi_src(struct acpi_subtable_header * header, const unsigned long end)
 {
-	struct acpi_table_nmi_src *nmi_src;
+	struct acpi_madt_nmi_source *nmi_src;
 
-	nmi_src = (struct acpi_table_nmi_src *)header;
+	nmi_src = (struct acpi_madt_nmi_source *)header;
 
 	if (BAD_MADT_ENTRY(nmi_src, end))
 		return -EINVAL;
@@ -375,12 +378,12 @@ static void __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 	}
 }
 
-static int __init acpi_parse_madt(unsigned long phys_addr, unsigned long size)
+static int __init acpi_parse_madt(struct acpi_table_header *table)
 {
-	if (!phys_addr || !size)
+	if (!table)
 		return -EINVAL;
 
-	acpi_madt = (struct acpi_table_madt *)__va(phys_addr);
+	acpi_madt = (struct acpi_table_madt *)table;
 
 	acpi_madt_rev = acpi_madt->header.revision;
 
@@ -388,14 +391,14 @@ static int __init acpi_parse_madt(unsigned long phys_addr, unsigned long size)
 #ifdef CONFIG_ITANIUM
 	has_8259 = 1;		/* Firmware on old Itanium systems is broken */
 #else
-	has_8259 = acpi_madt->flags.pcat_compat;
+	has_8259 = acpi_madt->flags & ACPI_MADT_PCAT_COMPAT;
 #endif
 	iosapic_system_init(has_8259);
 
 	/* Get base address of IPI Message Block */
 
-	if (acpi_madt->lapic_address)
-		ipi_base_addr = ioremap(acpi_madt->lapic_address, 0);
+	if (acpi_madt->address)
+		ipi_base_addr = ioremap(acpi_madt->address, 0);
 
 	printk(KERN_INFO PREFIX "Local APIC address %p\n", ipi_base_addr);
 
@@ -412,31 +415,32 @@ static int __init acpi_parse_madt(unsigned long phys_addr, unsigned long size)
 #define PXM_FLAG_LEN ((MAX_PXM_DOMAINS + 1)/32)
 
 static int __initdata srat_num_cpus;	/* number of cpus */
-static u32 __devinitdata pxm_flag[PXM_FLAG_LEN];
+static u32 pxm_flag[PXM_FLAG_LEN];
 #define pxm_bit_set(bit)	(set_bit(bit,(void *)pxm_flag))
 #define pxm_bit_test(bit)	(test_bit(bit,(void *)pxm_flag))
-/* maps to convert between proximity domain and logical node ID */
-int __devinitdata pxm_to_nid_map[MAX_PXM_DOMAINS];
-int __initdata nid_to_pxm_map[MAX_NUMNODES];
 static struct acpi_table_slit __initdata *slit_table;
+cpumask_t early_cpu_possible_map = CPU_MASK_NONE;
 
-static int get_processor_proximity_domain(struct acpi_table_processor_affinity *pa)
+static int __init
+get_processor_proximity_domain(struct acpi_srat_cpu_affinity *pa)
 {
 	int pxm;
 
-	pxm = pa->proximity_domain;
-	if (ia64_platform_is("sn2"))
-		pxm += pa->reserved[0] << 8;
+	pxm = pa->proximity_domain_lo;
+	if (ia64_platform_is("sn2") || acpi_srat_revision >= 2)
+		pxm += pa->proximity_domain_hi[0] << 8;
 	return pxm;
 }
 
-static int get_memory_proximity_domain(struct acpi_table_memory_affinity *ma)
+static int __init
+get_memory_proximity_domain(struct acpi_srat_mem_affinity *ma)
 {
 	int pxm;
 
 	pxm = ma->proximity_domain;
-	if (ia64_platform_is("sn2"))
-		pxm += ma->reserved1[0] << 8;
+	if (!ia64_platform_is("sn2") && acpi_srat_revision <= 1)
+		pxm &= 0xff;
+
 	return pxm;
 }
 
@@ -449,39 +453,45 @@ void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
 	u32 len;
 
 	len = sizeof(struct acpi_table_header) + 8
-	    + slit->localities * slit->localities;
+	    + slit->locality_count * slit->locality_count;
 	if (slit->header.length != len) {
 		printk(KERN_ERR
 		       "ACPI 2.0 SLIT: size mismatch: %d expected, %d actual\n",
 		       len, slit->header.length);
-		memset(numa_slit, 10, sizeof(numa_slit));
 		return;
 	}
 	slit_table = slit;
 }
 
 void __init
-acpi_numa_processor_affinity_init(struct acpi_table_processor_affinity *pa)
+acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
 {
 	int pxm;
 
-	if (!pa->flags.enabled)
+	if (!(pa->flags & ACPI_SRAT_CPU_ENABLED))
 		return;
 
+	if (srat_num_cpus >= ARRAY_SIZE(node_cpuid)) {
+		printk_once(KERN_WARNING
+			    "node_cpuid[%ld] is too small, may not be able to use all cpus\n",
+			    ARRAY_SIZE(node_cpuid));
+		return;
+	}
 	pxm = get_processor_proximity_domain(pa);
 
 	/* record this node in proximity bitmap */
 	pxm_bit_set(pxm);
 
 	node_cpuid[srat_num_cpus].phys_id =
-	    (pa->apic_id << 8) | (pa->lsapic_eid);
+	    (pa->apic_id << 8) | (pa->local_sapic_eid);
 	/* nid should be overridden as logical node id later */
 	node_cpuid[srat_num_cpus].nid = pxm;
+	cpu_set(srat_num_cpus, early_cpu_possible_map);
 	srat_num_cpus++;
 }
 
-void __init
-acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma)
+int __init
+acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
 {
 	unsigned long paddr, size;
 	int pxm;
@@ -490,14 +500,12 @@ acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma)
 	pxm = get_memory_proximity_domain(ma);
 
 	/* fill node memory chunk structure */
-	paddr = ma->base_addr_hi;
-	paddr = (paddr << 32) | ma->base_addr_lo;
-	size = ma->length_hi;
-	size = (size << 32) | ma->length_lo;
+	paddr = ma->base_address;
+	size = ma->length;
 
 	/* Ignore disabled entries */
-	if (!ma->flags.enabled)
-		return;
+	if (!(ma->flags & ACPI_SRAT_MEM_ENABLED))
+		return -1;
 
 	/* record this node in proximity bitmap */
 	pxm_bit_set(pxm);
@@ -516,6 +524,7 @@ acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma)
 	p->size = size;
 	p->nid = pxm;
 	num_node_memblks++;
+	return 0;
 }
 
 void __init acpi_numa_arch_fixup(void)
@@ -533,22 +542,17 @@ void __init acpi_numa_arch_fixup(void)
 	 * MCD - This can probably be dropped now.  No need for pxm ID to node ID
 	 * mapping with sparse node numbering iff MAX_PXM_DOMAINS <= MAX_NUMNODES.
 	 */
-	/* calculate total number of nodes in system from PXM bitmap */
-	memset(pxm_to_nid_map, -1, sizeof(pxm_to_nid_map));
-	memset(nid_to_pxm_map, -1, sizeof(nid_to_pxm_map));
 	nodes_clear(node_online_map);
 	for (i = 0; i < MAX_PXM_DOMAINS; i++) {
 		if (pxm_bit_test(i)) {
-			int nid = num_online_nodes();
-			pxm_to_nid_map[i] = nid;
-			nid_to_pxm_map[nid] = i;
+			int nid = acpi_map_pxm_to_node(i);
 			node_set_online(nid);
 		}
 	}
 
 	/* set logical node id in memory chunk structure */
 	for (i = 0; i < num_node_memblks; i++)
-		node_memblk[i].nid = pxm_to_nid_map[node_memblk[i].nid];
+		node_memblk[i].nid = pxm_to_node(node_memblk[i].nid);
 
 	/* assign memory bank numbers for each chunk on each node */
 	for_each_online_node(i) {
@@ -561,27 +565,33 @@ void __init acpi_numa_arch_fixup(void)
 	}
 
 	/* set logical node id in cpu structure */
-	for (i = 0; i < srat_num_cpus; i++)
-		node_cpuid[i].nid = pxm_to_nid_map[node_cpuid[i].nid];
+	for_each_possible_early_cpu(i)
+		node_cpuid[i].nid = pxm_to_node(node_cpuid[i].nid);
 
 	printk(KERN_INFO "Number of logical nodes in system = %d\n",
 	       num_online_nodes());
 	printk(KERN_INFO "Number of memory chunks in system = %d\n",
 	       num_node_memblks);
 
-	if (!slit_table)
+	if (!slit_table) {
+		for (i = 0; i < MAX_NUMNODES; i++)
+			for (j = 0; j < MAX_NUMNODES; j++)
+				node_distance(i, j) = i == j ? LOCAL_DISTANCE :
+							REMOTE_DISTANCE;
 		return;
+	}
+
 	memset(numa_slit, -1, sizeof(numa_slit));
-	for (i = 0; i < slit_table->localities; i++) {
+	for (i = 0; i < slit_table->locality_count; i++) {
 		if (!pxm_bit_test(i))
 			continue;
-		node_from = pxm_to_nid_map[i];
-		for (j = 0; j < slit_table->localities; j++) {
+		node_from = pxm_to_node(i);
+		for (j = 0; j < slit_table->locality_count; j++) {
 			if (!pxm_bit_test(j))
 				continue;
-			node_to = pxm_to_nid_map[j];
+			node_to = pxm_to_node(j);
 			node_distance(node_from, node_to) =
-			    slit_table->entry[i * slit_table->localities + j];
+			    slit_table->entry[i * slit_table->locality_count + j];
 		}
 	}
 
@@ -600,8 +610,11 @@ void __init acpi_numa_arch_fixup(void)
  * success: return IRQ number (>=0)
  * failure: return < 0
  */
-int acpi_register_gsi(u32 gsi, int triggering, int polarity)
+int acpi_register_gsi(struct device *dev, u32 gsi, int triggering, int polarity)
 {
+	if (acpi_irq_model == ACPI_IRQ_MODEL_PLATFORM)
+		return gsi;
+
 	if (has_8259 && gsi < 16)
 		return isa_irq_to_vector(gsi);
 
@@ -613,50 +626,75 @@ int acpi_register_gsi(u32 gsi, int triggering, int polarity)
 				      ACPI_EDGE_SENSITIVE) ? IOSAPIC_EDGE :
 				     IOSAPIC_LEVEL);
 }
-
-EXPORT_SYMBOL(acpi_register_gsi);
+EXPORT_SYMBOL_GPL(acpi_register_gsi);
 
 void acpi_unregister_gsi(u32 gsi)
 {
+	if (acpi_irq_model == ACPI_IRQ_MODEL_PLATFORM)
+		return;
+
+	if (has_8259 && gsi < 16)
+		return;
+
 	iosapic_unregister_intr(gsi);
 }
+EXPORT_SYMBOL_GPL(acpi_unregister_gsi);
 
-EXPORT_SYMBOL(acpi_unregister_gsi);
-
-static int __init acpi_parse_fadt(unsigned long phys_addr, unsigned long size)
+static int __init acpi_parse_fadt(struct acpi_table_header *table)
 {
 	struct acpi_table_header *fadt_header;
-	struct fadt_descriptor_rev2 *fadt;
+	struct acpi_table_fadt *fadt;
 
-	if (!phys_addr || !size)
+	if (!table)
 		return -EINVAL;
 
-	fadt_header = (struct acpi_table_header *)__va(phys_addr);
+	fadt_header = (struct acpi_table_header *)table;
 	if (fadt_header->revision != 3)
 		return -ENODEV;	/* Only deal with ACPI 2.0 FADT */
 
-	fadt = (struct fadt_descriptor_rev2 *)fadt_header;
-
-	if (!(fadt->iapc_boot_arch & BAF_8042_KEYBOARD_CONTROLLER))
-		acpi_kbd_controller_present = 0;
-
-	if (fadt->iapc_boot_arch & BAF_LEGACY_DEVICES)
-		acpi_legacy_devices = 1;
+	fadt = (struct acpi_table_fadt *)fadt_header;
 
-	acpi_register_gsi(fadt->sci_int, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW);
+	acpi_register_gsi(NULL, fadt->sci_interrupt, ACPI_LEVEL_SENSITIVE,
+				 ACPI_ACTIVE_LOW);
 	return 0;
 }
 
-unsigned long __init acpi_find_rsdp(void)
+int __init early_acpi_boot_init(void)
 {
-	unsigned long rsdp_phys = 0;
+	int ret;
 
-	if (efi.acpi20 != EFI_INVALID_TABLE_ADDR)
-		rsdp_phys = efi.acpi20;
-	else if (efi.acpi != EFI_INVALID_TABLE_ADDR)
-		printk(KERN_WARNING PREFIX
-		       "v1.0/r0.71 tables no longer supported\n");
-	return rsdp_phys;
+	/*
+	 * do a partial walk of MADT to determine how many CPUs
+	 * we have including offline CPUs
+	 */
+	if (acpi_table_parse(ACPI_SIG_MADT, acpi_parse_madt)) {
+		printk(KERN_ERR PREFIX "Can't find MADT\n");
+		return 0;
+	}
+
+	ret = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_SAPIC,
+		acpi_parse_lsapic, NR_CPUS);
+	if (ret < 1)
+		printk(KERN_ERR PREFIX
+		       "Error parsing MADT - no LAPIC entries\n");
+	else
+		acpi_lapic = 1;
+
+#ifdef CONFIG_SMP
+	if (available_cpus == 0) {
+		printk(KERN_INFO "ACPI: Found 0 CPUS; assuming 1\n");
+		printk(KERN_INFO "CPU 0 (0x%04x)", hard_smp_processor_id());
+		smp_boot_data.cpu_phys_id[available_cpus] =
+		    hard_smp_processor_id();
+		available_cpus = 1;	/* We've got at least one of these, no? */
+	}
+	smp_boot_data.cpu_count = available_cpus;
+#endif
+	/* Make boot-up look pretty */
+	printk(KERN_INFO "%d CPUs available, %d CPUs total\n", available_cpus,
+	       total_cpus);
+
+	return 0;
 }
 
 int __init acpi_boot_init(void)
@@ -670,7 +708,7 @@ int __init acpi_boot_init(void)
 	 * information -- the successor to MPS tables.
 	 */
 
-	if (acpi_table_parse(ACPI_APIC, acpi_parse_madt) < 1) {
+	if (acpi_table_parse(ACPI_SIG_MADT, acpi_parse_madt)) {
 		printk(KERN_ERR PREFIX "Can't find MADT\n");
 		goto skip_madt;
 	}
@@ -678,40 +716,37 @@ int __init acpi_boot_init(void)
 	/* Local APIC */
 
 	if (acpi_table_parse_madt
-	    (ACPI_MADT_LAPIC_ADDR_OVR, acpi_parse_lapic_addr_ovr, 0) < 0)
+	    (ACPI_MADT_TYPE_LOCAL_APIC_OVERRIDE, acpi_parse_lapic_addr_ovr, 0) < 0)
 		printk(KERN_ERR PREFIX
 		       "Error parsing LAPIC address override entry\n");
 
-	if (acpi_table_parse_madt(ACPI_MADT_LSAPIC, acpi_parse_lsapic, NR_CPUS)
-	    < 1)
-		printk(KERN_ERR PREFIX
-		       "Error parsing MADT - no LAPIC entries\n");
-
-	if (acpi_table_parse_madt(ACPI_MADT_LAPIC_NMI, acpi_parse_lapic_nmi, 0)
+	if (acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_NMI, acpi_parse_lapic_nmi, 0)
 	    < 0)
 		printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n");
 
 	/* I/O APIC */
 
 	if (acpi_table_parse_madt
-	    (ACPI_MADT_IOSAPIC, acpi_parse_iosapic, NR_IOSAPICS) < 1)
-		printk(KERN_ERR PREFIX
-		       "Error parsing MADT - no IOSAPIC entries\n");
+	    (ACPI_MADT_TYPE_IO_SAPIC, acpi_parse_iosapic, NR_IOSAPICS) < 1) {
+		if (!ia64_platform_is("sn2"))
+			printk(KERN_ERR PREFIX
+			       "Error parsing MADT - no IOSAPIC entries\n");
+	}
 
 	/* System-Level Interrupt Routing */
 
 	if (acpi_table_parse_madt
-	    (ACPI_MADT_PLAT_INT_SRC, acpi_parse_plat_int_src,
+	    (ACPI_MADT_TYPE_INTERRUPT_SOURCE, acpi_parse_plat_int_src,
 	     ACPI_MAX_PLATFORM_INTERRUPTS) < 0)
 		printk(KERN_ERR PREFIX
 		       "Error parsing platform interrupt source entry\n");
 
 	if (acpi_table_parse_madt
-	    (ACPI_MADT_INT_SRC_OVR, acpi_parse_int_src_ovr, 0) < 0)
+	    (ACPI_MADT_TYPE_INTERRUPT_OVERRIDE, acpi_parse_int_src_ovr, 0) < 0)
 		printk(KERN_ERR PREFIX
 		       "Error parsing interrupt source overrides entry\n");
 
-	if (acpi_table_parse_madt(ACPI_MADT_NMI_SRC, acpi_parse_nmi_src, 0) < 0)
+	if (acpi_table_parse_madt(ACPI_MADT_TYPE_NMI_SOURCE, acpi_parse_nmi_src, 0) < 0)
 		printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n");
       skip_madt:
 
@@ -721,21 +756,11 @@ int __init acpi_boot_init(void)
 	 * gets interrupts such as power and sleep buttons.  If it's not
 	 * on a Legacy interrupt, it needs to be setup.
 	 */
-	if (acpi_table_parse(ACPI_FADT, acpi_parse_fadt) < 1)
+	if (acpi_table_parse(ACPI_SIG_FADT, acpi_parse_fadt))
 		printk(KERN_ERR PREFIX "Can't find FADT\n");
 
+#ifdef CONFIG_ACPI_NUMA
 #ifdef CONFIG_SMP
-	if (available_cpus == 0) {
-		printk(KERN_INFO "ACPI: Found 0 CPUS; assuming 1\n");
-		printk(KERN_INFO "CPU 0 (0x%04x)", hard_smp_processor_id());
-		smp_boot_data.cpu_phys_id[available_cpus] =
-		    hard_smp_processor_id();
-		available_cpus = 1;	/* We've got at least one of these, no? */
-	}
-	smp_boot_data.cpu_count = available_cpus;
-
-	smp_build_cpu_map();
-# ifdef CONFIG_ACPI_NUMA
 	if (srat_num_cpus == 0) {
 		int cpu, i = 1;
 		for (cpu = 0; cpu < smp_boot_data.cpu_count; cpu++)
@@ -744,54 +769,53 @@ int __init acpi_boot_init(void)
 				node_cpuid[i++].phys_id =
 				    smp_boot_data.cpu_phys_id[cpu];
 	}
-# endif
 #endif
-#ifdef CONFIG_ACPI_NUMA
 	build_cpu_to_node_map();
 #endif
-	/* Make boot-up look pretty */
-	printk(KERN_INFO "%d CPUs available, %d CPUs total\n", available_cpus,
-	       total_cpus);
 	return 0;
 }
 
 int acpi_gsi_to_irq(u32 gsi, unsigned int *irq)
 {
-	int vector;
+	int tmp;
 
 	if (has_8259 && gsi < 16)
 		*irq = isa_irq_to_vector(gsi);
 	else {
-		vector = gsi_to_vector(gsi);
-		if (vector == -1)
+		tmp = gsi_to_irq(gsi);
+		if (tmp == -1)
 			return -1;
-
-		*irq = vector;
+		*irq = tmp;
 	}
 	return 0;
 }
 
+int acpi_isa_irq_to_gsi(unsigned isa_irq, u32 *gsi)
+{
+	if (isa_irq >= 16)
+		return -1;
+	*gsi = isa_irq;
+	return 0;
+}
+
 /*
  *  ACPI based hotplug CPU support
  */
 #ifdef CONFIG_ACPI_HOTPLUG_CPU
-static
-int acpi_map_cpu2node(acpi_handle handle, int cpu, long physid)
+static int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
 {
 #ifdef CONFIG_ACPI_NUMA
-	int pxm_id;
-
-	pxm_id = acpi_get_pxm(handle);
-
 	/*
-	 * Assuming that the container driver would have set the proximity
-	 * domain and would have initialized pxm_to_nid_map[pxm_id] && pxm_flag
+	 * We don't have cpu-only-node hotadd. But if the system equips
+	 * SRAT table, pxm is already found and node is ready.
+  	 * So, just pxm_to_nid(pxm) is OK.
+	 * This code here is for the system which doesn't have full SRAT
+  	 * table for possible cpus.
 	 */
-	node_cpuid[cpu].nid = (pxm_id < 0) ? 0 : pxm_to_nid_map[pxm_id];
-
 	node_cpuid[cpu].phys_id = physid;
+	node_cpuid[cpu].nid = acpi_get_node(handle);
 #endif
-	return (0);
+	return 0;
 }
 
 int additional_cpus __initdata = -1;
@@ -807,11 +831,11 @@ static __init int setup_additional_cpus(char *s)
 early_param("additional_cpus", setup_additional_cpus);
 
 /*
- * cpu_possible_map should be static, it cannot change as cpu's
+ * cpu_possible_mask should be static, it cannot change as CPUs
  * are onlined, or offlined. The reason is per-cpu data-structures
  * are allocated by some modules at init time, and dont expect to
  * do this dynamically on cpu arrival/departure.
- * cpu_present_map on the other hand can change dynamically.
+ * cpu_present_mask on the other hand can change dynamically.
  * In case when cpu_hotplug is not compiled, then we resort to current
  * behaviour, which is cpu_possible == cpu_present.
  * - Ashok Raj
@@ -837,81 +861,48 @@ __init void prefill_possible_map(void)
 
 	possible = available_cpus + additional_cpus;
 
-	if (possible > NR_CPUS)
-		possible = NR_CPUS;
+	if (possible > nr_cpu_ids)
+		possible = nr_cpu_ids;
 
 	printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n",
 		possible, max((possible - available_cpus), 0));
 
 	for (i = 0; i < possible; i++)
-		cpu_set(i, cpu_possible_map);
+		set_cpu_possible(i, true);
 }
 
-int acpi_map_lsapic(acpi_handle handle, int *pcpu)
+static int _acpi_map_lsapic(acpi_handle handle, int physid, int *pcpu)
 {
-	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
-	union acpi_object *obj;
-	struct acpi_table_lsapic *lsapic;
 	cpumask_t tmp_map;
-	long physid;
 	int cpu;
 
-	if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer)))
-		return -EINVAL;
-
-	if (!buffer.length || !buffer.pointer)
-		return -EINVAL;
-
-	obj = buffer.pointer;
-	if (obj->type != ACPI_TYPE_BUFFER ||
-	    obj->buffer.length < sizeof(*lsapic)) {
-		acpi_os_free(buffer.pointer);
-		return -EINVAL;
-	}
-
-	lsapic = (struct acpi_table_lsapic *)obj->buffer.pointer;
-
-	if ((lsapic->header.type != ACPI_MADT_LSAPIC) ||
-	    (!lsapic->flags.enabled)) {
-		acpi_os_free(buffer.pointer);
-		return -EINVAL;
-	}
-
-	physid = ((lsapic->id << 8) | (lsapic->eid));
-
-	acpi_os_free(buffer.pointer);
-	buffer.length = ACPI_ALLOCATE_BUFFER;
-	buffer.pointer = NULL;
-
-	cpus_complement(tmp_map, cpu_present_map);
-	cpu = first_cpu(tmp_map);
-	if (cpu >= NR_CPUS)
+	cpumask_complement(&tmp_map, cpu_present_mask);
+	cpu = cpumask_first(&tmp_map);
+	if (cpu >= nr_cpu_ids)
 		return -EINVAL;
 
 	acpi_map_cpu2node(handle, cpu, physid);
 
-	cpu_set(cpu, cpu_present_map);
+	set_cpu_present(cpu, true);
 	ia64_cpu_to_sapicid[cpu] = physid;
-	ia64_acpiid_to_sapicid[lsapic->acpi_id] = ia64_cpu_to_sapicid[cpu];
+
+	acpi_processor_set_pdc(handle);
 
 	*pcpu = cpu;
 	return (0);
 }
 
+/* wrapper to silence section mismatch warning */
+int __ref acpi_map_lsapic(acpi_handle handle, int physid, int *pcpu)
+{
+	return _acpi_map_lsapic(handle, physid, pcpu);
+}
 EXPORT_SYMBOL(acpi_map_lsapic);
 
 int acpi_unmap_lsapic(int cpu)
 {
-	int i;
-
-	for (i = 0; i < MAX_SAPICS; i++) {
-		if (ia64_acpiid_to_sapicid[i] == ia64_cpu_to_sapicid[cpu]) {
-			ia64_acpiid_to_sapicid[i] = -1;
-			break;
-		}
-	}
 	ia64_cpu_to_sapicid[cpu] = -1;
-	cpu_clear(cpu, cpu_present_map);
+	set_cpu_present(cpu, false);
 
 #ifdef CONFIG_ACPI_NUMA
 	/* NUMA specific cleanup's */
@@ -924,14 +915,14 @@ EXPORT_SYMBOL(acpi_unmap_lsapic);
 #endif				/* CONFIG_ACPI_HOTPLUG_CPU */
 
 #ifdef CONFIG_ACPI_NUMA
-static acpi_status __devinit
-acpi_map_iosapic(acpi_handle handle, u32 depth, void *context, void **ret)
+static acpi_status acpi_map_iosapic(acpi_handle handle, u32 depth,
+				    void *context, void **ret)
 {
 	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
 	union acpi_object *obj;
-	struct acpi_table_iosapic *iosapic;
+	struct acpi_madt_io_sapic *iosapic;
 	unsigned int gsi_base;
-	int pxm, node;
+	int node;
 
 	/* Only care about objects w/ a method that returns the MADT */
 	if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer)))
@@ -943,33 +934,25 @@ acpi_map_iosapic(acpi_handle handle, u32 depth, void *context, void **ret)
 	obj = buffer.pointer;
 	if (obj->type != ACPI_TYPE_BUFFER ||
 	    obj->buffer.length < sizeof(*iosapic)) {
-		acpi_os_free(buffer.pointer);
+		kfree(buffer.pointer);
 		return AE_OK;
 	}
 
-	iosapic = (struct acpi_table_iosapic *)obj->buffer.pointer;
+	iosapic = (struct acpi_madt_io_sapic *)obj->buffer.pointer;
 
-	if (iosapic->header.type != ACPI_MADT_IOSAPIC) {
-		acpi_os_free(buffer.pointer);
+	if (iosapic->header.type != ACPI_MADT_TYPE_IO_SAPIC) {
+		kfree(buffer.pointer);
 		return AE_OK;
 	}
 
 	gsi_base = iosapic->global_irq_base;
 
-	acpi_os_free(buffer.pointer);
+	kfree(buffer.pointer);
 
-	/*
-	 * OK, it's an IOSAPIC MADT entry, look for a _PXM value to tell
-	 * us which node to associate this with.
-	 */
-	pxm = acpi_get_pxm(handle);
-	if (pxm < 0)
-		return AE_OK;
-
-	node = pxm_to_nid_map[pxm];
-
-	if (node >= MAX_NUMNODES || !node_online(node) ||
-	    cpus_empty(node_to_cpumask(node)))
+	/* OK, it's an IOSAPIC MADT entry; associate it with a node */
+	node = acpi_get_node(handle);
+	if (node == NUMA_NO_NODE || !node_online(node) ||
+	    cpumask_empty(cpumask_of_node(node)))
 		return AE_OK;
 
 	/* We know a gsi to node mapping! */
@@ -987,7 +970,7 @@ acpi_map_iosapics (void)
 fs_initcall(acpi_map_iosapics);
 #endif				/* CONFIG_ACPI_NUMA */
 
-int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base)
+int __ref acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base)
 {
 	int err;
 
@@ -1010,4 +993,9 @@ int acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base)
 
 EXPORT_SYMBOL(acpi_unregister_ioapic);
 
-#endif				/* CONFIG_ACPI */
+/*
+ * acpi_suspend_lowlevel() - save kernel state and suspend.
+ *
+ * TBD when when IA64 starts to support suspend...
+ */
+int acpi_suspend_lowlevel(void) { return 0; }
diff --git a/arch/ia64/kernel/asm-offsets.c b/arch/ia64/kernel/asm-offsets.c
index 77225659e96..60ef83e6db7 100644
--- a/arch/ia64/kernel/asm-offsets.c
+++ b/arch/ia64/kernel/asm-offsets.c
@@ -5,22 +5,19 @@
  */
 
 #define ASM_OFFSETS_C 1
-#include <linux/config.h>
 
 #include <linux/sched.h>
-
-#include <asm-ia64/processor.h>
-#include <asm-ia64/ptrace.h>
-#include <asm-ia64/siginfo.h>
-#include <asm-ia64/sigcontext.h>
-#include <asm-ia64/mca.h>
+#include <linux/pid.h>
+#include <linux/clocksource.h>
+#include <linux/kbuild.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+#include <asm/siginfo.h>
+#include <asm/sigcontext.h>
+#include <asm/mca.h>
 
 #include "../kernel/sigframe.h"
-
-#define DEFINE(sym, val) \
-        asm volatile("\n->" #sym " %0 " #val : : "i" (val))
-
-#define BLANK() asm volatile("\n->" : : )
+#include "../kernel/fsyscall_gtod_data.h"
 
 void foo(void)
 {
@@ -33,16 +30,29 @@ void foo(void)
 	DEFINE(SIGFRAME_SIZE, sizeof (struct sigframe));
 	DEFINE(UNW_FRAME_INFO_SIZE, sizeof (struct unw_frame_info));
 
+	BUILD_BUG_ON(sizeof(struct upid) != 32);
+	DEFINE(IA64_UPID_SHIFT, 5);
+
 	BLANK();
 
 	DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
+	DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
 	DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count));
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+	DEFINE(TI_AC_STAMP, offsetof(struct thread_info, ac_stamp));
+	DEFINE(TI_AC_LEAVE, offsetof(struct thread_info, ac_leave));
+	DEFINE(TI_AC_STIME, offsetof(struct thread_info, ac_stime));
+	DEFINE(TI_AC_UTIME, offsetof(struct thread_info, ac_utime));
+#endif
 
 	BLANK();
 
 	DEFINE(IA64_TASK_BLOCKED_OFFSET,offsetof (struct task_struct, blocked));
 	DEFINE(IA64_TASK_CLEAR_CHILD_TID_OFFSET,offsetof (struct task_struct, clear_child_tid));
 	DEFINE(IA64_TASK_GROUP_LEADER_OFFSET, offsetof (struct task_struct, group_leader));
+	DEFINE(IA64_TASK_TGIDLINK_OFFSET, offsetof (struct task_struct, pids[PIDTYPE_PID].pid));
+	DEFINE(IA64_PID_LEVEL_OFFSET, offsetof (struct pid, level));
+	DEFINE(IA64_PID_UPID_OFFSET, offsetof (struct pid, numbers[0]));
 	DEFINE(IA64_TASK_PENDING_OFFSET,offsetof (struct task_struct, pending));
 	DEFINE(IA64_TASK_PID_OFFSET, offsetof (struct task_struct, pid));
 	DEFINE(IA64_TASK_REAL_PARENT_OFFSET, offsetof (struct task_struct, real_parent));
@@ -217,16 +227,24 @@ void foo(void)
 	DEFINE(IA64_MCA_CPU_INIT_STACK_OFFSET,
 	       offsetof (struct ia64_mca_cpu, init_stack));
 	BLANK();
-	DEFINE(IA64_SAL_OS_STATE_COMMON_OFFSET,
-	       offsetof (struct ia64_sal_os_state, sal_ra));
 	DEFINE(IA64_SAL_OS_STATE_OS_GP_OFFSET,
 	       offsetof (struct ia64_sal_os_state, os_gp));
-	DEFINE(IA64_SAL_OS_STATE_PAL_MIN_STATE_OFFSET,
-	       offsetof (struct ia64_sal_os_state, pal_min_state));
 	DEFINE(IA64_SAL_OS_STATE_PROC_STATE_PARAM_OFFSET,
 	       offsetof (struct ia64_sal_os_state, proc_state_param));
+	DEFINE(IA64_SAL_OS_STATE_SAL_RA_OFFSET,
+	       offsetof (struct ia64_sal_os_state, sal_ra));
+	DEFINE(IA64_SAL_OS_STATE_SAL_GP_OFFSET,
+	       offsetof (struct ia64_sal_os_state, sal_gp));
+	DEFINE(IA64_SAL_OS_STATE_PAL_MIN_STATE_OFFSET,
+	       offsetof (struct ia64_sal_os_state, pal_min_state));
+	DEFINE(IA64_SAL_OS_STATE_OS_STATUS_OFFSET,
+	       offsetof (struct ia64_sal_os_state, os_status));
+	DEFINE(IA64_SAL_OS_STATE_CONTEXT_OFFSET,
+	       offsetof (struct ia64_sal_os_state, context));
 	DEFINE(IA64_SAL_OS_STATE_SIZE,
 	       sizeof (struct ia64_sal_os_state));
+	BLANK();
+
 	DEFINE(IA64_PMSA_GR_OFFSET,
 	       offsetof (struct pal_min_state_area_s, pmsa_gr));
 	DEFINE(IA64_PMSA_BANK1_GR_OFFSET,
@@ -248,17 +266,25 @@ void foo(void)
 	BLANK();
 
 	/* used by fsys_gettimeofday in arch/ia64/kernel/fsys.S */
-	DEFINE(IA64_TIME_INTERPOLATOR_ADDRESS_OFFSET, offsetof (struct time_interpolator, addr));
-	DEFINE(IA64_TIME_INTERPOLATOR_SOURCE_OFFSET, offsetof (struct time_interpolator, source));
-	DEFINE(IA64_TIME_INTERPOLATOR_SHIFT_OFFSET, offsetof (struct time_interpolator, shift));
-	DEFINE(IA64_TIME_INTERPOLATOR_NSEC_OFFSET, offsetof (struct time_interpolator, nsec_per_cyc));
-	DEFINE(IA64_TIME_INTERPOLATOR_OFFSET_OFFSET, offsetof (struct time_interpolator, offset));
-	DEFINE(IA64_TIME_INTERPOLATOR_LAST_CYCLE_OFFSET, offsetof (struct time_interpolator, last_cycle));
-	DEFINE(IA64_TIME_INTERPOLATOR_LAST_COUNTER_OFFSET, offsetof (struct time_interpolator, last_counter));
-	DEFINE(IA64_TIME_INTERPOLATOR_JITTER_OFFSET, offsetof (struct time_interpolator, jitter));
-	DEFINE(IA64_TIME_INTERPOLATOR_MASK_OFFSET, offsetof (struct time_interpolator, mask));
-	DEFINE(IA64_TIME_SOURCE_CPU, TIME_SOURCE_CPU);
-	DEFINE(IA64_TIME_SOURCE_MMIO64, TIME_SOURCE_MMIO64);
-	DEFINE(IA64_TIME_SOURCE_MMIO32, TIME_SOURCE_MMIO32);
-	DEFINE(IA64_TIMESPEC_TV_NSEC_OFFSET, offsetof (struct timespec, tv_nsec));
+	DEFINE(IA64_GTOD_SEQ_OFFSET,
+	       offsetof (struct fsyscall_gtod_data_t, seq));
+	DEFINE(IA64_GTOD_WALL_TIME_OFFSET,
+		offsetof (struct fsyscall_gtod_data_t, wall_time));
+	DEFINE(IA64_GTOD_MONO_TIME_OFFSET,
+		offsetof (struct fsyscall_gtod_data_t, monotonic_time));
+	DEFINE(IA64_CLKSRC_MASK_OFFSET,
+		offsetof (struct fsyscall_gtod_data_t, clk_mask));
+	DEFINE(IA64_CLKSRC_MULT_OFFSET,
+		offsetof (struct fsyscall_gtod_data_t, clk_mult));
+	DEFINE(IA64_CLKSRC_SHIFT_OFFSET,
+		offsetof (struct fsyscall_gtod_data_t, clk_shift));
+	DEFINE(IA64_CLKSRC_MMIO_OFFSET,
+		offsetof (struct fsyscall_gtod_data_t, clk_fsys_mmio));
+	DEFINE(IA64_CLKSRC_CYCLE_LAST_OFFSET,
+		offsetof (struct fsyscall_gtod_data_t, clk_cycle_last));
+	DEFINE(IA64_ITC_JITTER_OFFSET,
+		offsetof (struct itc_jitter_data_t, itc_jitter));
+	DEFINE(IA64_ITC_LASTCYCLE_OFFSET,
+		offsetof (struct itc_jitter_data_t, itc_lastcycle));
+
 }
diff --git a/arch/ia64/kernel/audit.c b/arch/ia64/kernel/audit.c
new file mode 100644
index 00000000000..96a9d18ff4c
--- /dev/null
+++ b/arch/ia64/kernel/audit.c
@@ -0,0 +1,60 @@
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/audit.h>
+#include <asm/unistd.h>
+
+static unsigned dir_class[] = {
+#include <asm-generic/audit_dir_write.h>
+~0U
+};
+
+static unsigned read_class[] = {
+#include <asm-generic/audit_read.h>
+~0U
+};
+
+static unsigned write_class[] = {
+#include <asm-generic/audit_write.h>
+~0U
+};
+
+static unsigned chattr_class[] = {
+#include <asm-generic/audit_change_attr.h>
+~0U
+};
+
+static unsigned signal_class[] = {
+#include <asm-generic/audit_signal.h>
+~0U
+};
+
+int audit_classify_arch(int arch)
+{
+	return 0;
+}
+
+int audit_classify_syscall(int abi, unsigned syscall)
+{
+	switch(syscall) {
+	case __NR_open:
+		return 2;
+	case __NR_openat:
+		return 3;
+	case __NR_execve:
+		return 5;
+	default:
+		return 0;
+	}
+}
+
+static int __init audit_classes_init(void)
+{
+	audit_register_class(AUDIT_CLASS_WRITE, write_class);
+	audit_register_class(AUDIT_CLASS_READ, read_class);
+	audit_register_class(AUDIT_CLASS_DIR_WRITE, dir_class);
+	audit_register_class(AUDIT_CLASS_CHATTR, chattr_class);
+	audit_register_class(AUDIT_CLASS_SIGNAL, signal_class);
+	return 0;
+}
+
+__initcall(audit_classes_init);
diff --git a/arch/ia64/kernel/cpufreq/Kconfig b/arch/ia64/kernel/cpufreq/Kconfig
deleted file mode 100644
index 2d9d5279b98..00000000000
--- a/arch/ia64/kernel/cpufreq/Kconfig
+++ /dev/null
@@ -1,29 +0,0 @@
-
-#
-# CPU Frequency scaling
-#
-
-menu "CPU Frequency scaling"
-
-source "drivers/cpufreq/Kconfig"
-
-if CPU_FREQ
-
-comment "CPUFreq processor drivers"
-
-config IA64_ACPI_CPUFREQ
-	tristate "ACPI Processor P-States driver"
-	select CPU_FREQ_TABLE
-	depends on ACPI_PROCESSOR
-	help
-	This driver adds a CPUFreq driver which utilizes the ACPI
-	Processor Performance States.
-
-	For details, take a look at <file:Documentation/cpu-freq/>.
-
-	If in doubt, say N.
-
-endif   # CPU_FREQ
-
-endmenu
-
diff --git a/arch/ia64/kernel/cpufreq/Makefile b/arch/ia64/kernel/cpufreq/Makefile
deleted file mode 100644
index 4838f2a57c7..00000000000
--- a/arch/ia64/kernel/cpufreq/Makefile
+++ /dev/null
@@ -1,2 +0,0 @@
-obj-$(CONFIG_IA64_ACPI_CPUFREQ)		+= acpi-cpufreq.o
-
diff --git a/arch/ia64/kernel/cpufreq/acpi-cpufreq.c b/arch/ia64/kernel/cpufreq/acpi-cpufreq.c
deleted file mode 100644
index 5a1bf815282..00000000000
--- a/arch/ia64/kernel/cpufreq/acpi-cpufreq.c
+++ /dev/null
@@ -1,448 +0,0 @@
-/*
- * arch/ia64/kernel/cpufreq/acpi-cpufreq.c
- * This file provides the ACPI based P-state support. This
- * module works with generic cpufreq infrastructure. Most of
- * the code is based on i386 version
- * (arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c)
- *
- * Copyright (C) 2005 Intel Corp
- *      Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
- */
-
-#include <linux/config.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/cpufreq.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <asm/io.h>
-#include <asm/uaccess.h>
-#include <asm/pal.h>
-
-#include <linux/acpi.h>
-#include <acpi/processor.h>
-
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "acpi-cpufreq", msg)
-
-MODULE_AUTHOR("Venkatesh Pallipadi");
-MODULE_DESCRIPTION("ACPI Processor P-States Driver");
-MODULE_LICENSE("GPL");
-
-
-struct cpufreq_acpi_io {
-	struct acpi_processor_performance	acpi_data;
-	struct cpufreq_frequency_table		*freq_table;
-	unsigned int				resume;
-};
-
-static struct cpufreq_acpi_io	*acpi_io_data[NR_CPUS];
-
-static struct cpufreq_driver acpi_cpufreq_driver;
-
-
-static int
-processor_set_pstate (
-	u32	value)
-{
-	s64 retval;
-
-	dprintk("processor_set_pstate\n");
-
-	retval = ia64_pal_set_pstate((u64)value);
-
-	if (retval) {
-		dprintk("Failed to set freq to 0x%x, with error 0x%x\n",
-		        value, retval);
-		return -ENODEV;
-	}
-	return (int)retval;
-}
-
-
-static int
-processor_get_pstate (
-	u32	*value)
-{
-	u64	pstate_index = 0;
-	s64 	retval;
-
-	dprintk("processor_get_pstate\n");
-
-	retval = ia64_pal_get_pstate(&pstate_index);
-	*value = (u32) pstate_index;
-
-	if (retval)
-		dprintk("Failed to get current freq with "
-		        "error 0x%x, idx 0x%x\n", retval, *value);
-
-	return (int)retval;
-}
-
-
-/* To be used only after data->acpi_data is initialized */
-static unsigned
-extract_clock (
-	struct cpufreq_acpi_io *data,
-	unsigned value,
-	unsigned int cpu)
-{
-	unsigned long i;
-
-	dprintk("extract_clock\n");
-
-	for (i = 0; i < data->acpi_data.state_count; i++) {
-		if (value >= data->acpi_data.states[i].control)
-			return data->acpi_data.states[i].core_frequency;
-	}
-	return data->acpi_data.states[i-1].core_frequency;
-}
-
-
-static unsigned int
-processor_get_freq (
-	struct cpufreq_acpi_io	*data,
-	unsigned int		cpu)
-{
-	int			ret = 0;
-	u32			value = 0;
-	cpumask_t		saved_mask;
-	unsigned long 		clock_freq;
-
-	dprintk("processor_get_freq\n");
-
-	saved_mask = current->cpus_allowed;
-	set_cpus_allowed(current, cpumask_of_cpu(cpu));
-	if (smp_processor_id() != cpu) {
-		ret = -EAGAIN;
-		goto migrate_end;
-	}
-
-	/*
-	 * processor_get_pstate gets the average frequency since the
-	 * last get. So, do two PAL_get_freq()...
-	 */
-	ret = processor_get_pstate(&value);
-	ret = processor_get_pstate(&value);
-
-	if (ret) {
-		set_cpus_allowed(current, saved_mask);
-		printk(KERN_WARNING "get performance failed with error %d\n",
-		       ret);
-		ret = -EAGAIN;
-		goto migrate_end;
-	}
-	clock_freq = extract_clock(data, value, cpu);
-	ret = (clock_freq*1000);
-
-migrate_end:
-	set_cpus_allowed(current, saved_mask);
-	return ret;
-}
-
-
-static int
-processor_set_freq (
-	struct cpufreq_acpi_io	*data,
-	unsigned int		cpu,
-	int			state)
-{
-	int			ret = 0;
-	u32			value = 0;
-	struct cpufreq_freqs    cpufreq_freqs;
-	cpumask_t		saved_mask;
-	int			retval;
-
-	dprintk("processor_set_freq\n");
-
-	saved_mask = current->cpus_allowed;
-	set_cpus_allowed(current, cpumask_of_cpu(cpu));
-	if (smp_processor_id() != cpu) {
-		retval = -EAGAIN;
-		goto migrate_end;
-	}
-
-	if (state == data->acpi_data.state) {
-		if (unlikely(data->resume)) {
-			dprintk("Called after resume, resetting to P%d\n", state);
-			data->resume = 0;
-		} else {
-			dprintk("Already at target state (P%d)\n", state);
-			retval = 0;
-			goto migrate_end;
-		}
-	}
-
-	dprintk("Transitioning from P%d to P%d\n",
-		data->acpi_data.state, state);
-
-	/* cpufreq frequency struct */
-	cpufreq_freqs.cpu = cpu;
-	cpufreq_freqs.old = data->freq_table[data->acpi_data.state].frequency;
-	cpufreq_freqs.new = data->freq_table[state].frequency;
-
-	/* notify cpufreq */
-	cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_PRECHANGE);
-
-	/*
-	 * First we write the target state's 'control' value to the
-	 * control_register.
-	 */
-
-	value = (u32) data->acpi_data.states[state].control;
-
-	dprintk("Transitioning to state: 0x%08x\n", value);
-
-	ret = processor_set_pstate(value);
-	if (ret) {
-		unsigned int tmp = cpufreq_freqs.new;
-		cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_POSTCHANGE);
-		cpufreq_freqs.new = cpufreq_freqs.old;
-		cpufreq_freqs.old = tmp;
-		cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_PRECHANGE);
-		cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_POSTCHANGE);
-		printk(KERN_WARNING "Transition failed with error %d\n", ret);
-		retval = -ENODEV;
-		goto migrate_end;
-	}
-
-	cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_POSTCHANGE);
-
-	data->acpi_data.state = state;
-
-	retval = 0;
-
-migrate_end:
-	set_cpus_allowed(current, saved_mask);
-	return (retval);
-}
-
-
-static unsigned int
-acpi_cpufreq_get (
-	unsigned int		cpu)
-{
-	struct cpufreq_acpi_io *data = acpi_io_data[cpu];
-
-	dprintk("acpi_cpufreq_get\n");
-
-	return processor_get_freq(data, cpu);
-}
-
-
-static int
-acpi_cpufreq_target (
-	struct cpufreq_policy   *policy,
-	unsigned int target_freq,
-	unsigned int relation)
-{
-	struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu];
-	unsigned int next_state = 0;
-	unsigned int result = 0;
-
-	dprintk("acpi_cpufreq_setpolicy\n");
-
-	result = cpufreq_frequency_table_target(policy,
-			data->freq_table, target_freq, relation, &next_state);
-	if (result)
-		return (result);
-
-	result = processor_set_freq(data, policy->cpu, next_state);
-
-	return (result);
-}
-
-
-static int
-acpi_cpufreq_verify (
-	struct cpufreq_policy   *policy)
-{
-	unsigned int result = 0;
-	struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu];
-
-	dprintk("acpi_cpufreq_verify\n");
-
-	result = cpufreq_frequency_table_verify(policy,
-			data->freq_table);
-
-	return (result);
-}
-
-
-static int
-acpi_cpufreq_cpu_init (
-	struct cpufreq_policy   *policy)
-{
-	unsigned int		i;
-	unsigned int		cpu = policy->cpu;
-	struct cpufreq_acpi_io	*data;
-	unsigned int		result = 0;
-
-	dprintk("acpi_cpufreq_cpu_init\n");
-
-	data = kmalloc(sizeof(struct cpufreq_acpi_io), GFP_KERNEL);
-	if (!data)
-		return (-ENOMEM);
-
-	memset(data, 0, sizeof(struct cpufreq_acpi_io));
-
-	acpi_io_data[cpu] = data;
-
-	result = acpi_processor_register_performance(&data->acpi_data, cpu);
-
-	if (result)
-		goto err_free;
-
-	/* capability check */
-	if (data->acpi_data.state_count <= 1) {
-		dprintk("No P-States\n");
-		result = -ENODEV;
-		goto err_unreg;
-	}
-
-	if ((data->acpi_data.control_register.space_id !=
-					ACPI_ADR_SPACE_FIXED_HARDWARE) ||
-	    (data->acpi_data.status_register.space_id !=
-					ACPI_ADR_SPACE_FIXED_HARDWARE)) {
-		dprintk("Unsupported address space [%d, %d]\n",
-			(u32) (data->acpi_data.control_register.space_id),
-			(u32) (data->acpi_data.status_register.space_id));
-		result = -ENODEV;
-		goto err_unreg;
-	}
-
-	/* alloc freq_table */
-	data->freq_table = kmalloc(sizeof(struct cpufreq_frequency_table) *
-	                           (data->acpi_data.state_count + 1),
-	                           GFP_KERNEL);
-	if (!data->freq_table) {
-		result = -ENOMEM;
-		goto err_unreg;
-	}
-
-	/* detect transition latency */
-	policy->cpuinfo.transition_latency = 0;
-	for (i=0; i<data->acpi_data.state_count; i++) {
-		if ((data->acpi_data.states[i].transition_latency * 1000) >
-		    policy->cpuinfo.transition_latency) {
-			policy->cpuinfo.transition_latency =
-			    data->acpi_data.states[i].transition_latency * 1000;
-		}
-	}
-	policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
-
-	policy->cur = processor_get_freq(data, policy->cpu);
-
-	/* table init */
-	for (i = 0; i <= data->acpi_data.state_count; i++)
-	{
-		data->freq_table[i].index = i;
-		if (i < data->acpi_data.state_count) {
-			data->freq_table[i].frequency =
-			      data->acpi_data.states[i].core_frequency * 1000;
-		} else {
-			data->freq_table[i].frequency = CPUFREQ_TABLE_END;
-		}
-	}
-
-	result = cpufreq_frequency_table_cpuinfo(policy, data->freq_table);
-	if (result) {
-		goto err_freqfree;
-	}
-
-	/* notify BIOS that we exist */
-	acpi_processor_notify_smm(THIS_MODULE);
-
-	printk(KERN_INFO "acpi-cpufreq: CPU%u - ACPI performance management "
-	       "activated.\n", cpu);
-
-	for (i = 0; i < data->acpi_data.state_count; i++)
-		dprintk("     %cP%d: %d MHz, %d mW, %d uS, %d uS, 0x%x 0x%x\n",
-			(i == data->acpi_data.state?'*':' '), i,
-			(u32) data->acpi_data.states[i].core_frequency,
-			(u32) data->acpi_data.states[i].power,
-			(u32) data->acpi_data.states[i].transition_latency,
-			(u32) data->acpi_data.states[i].bus_master_latency,
-			(u32) data->acpi_data.states[i].status,
-			(u32) data->acpi_data.states[i].control);
-
-	cpufreq_frequency_table_get_attr(data->freq_table, policy->cpu);
-
-	/* the first call to ->target() should result in us actually
-	 * writing something to the appropriate registers. */
-	data->resume = 1;
-
-	return (result);
-
- err_freqfree:
-	kfree(data->freq_table);
- err_unreg:
-	acpi_processor_unregister_performance(&data->acpi_data, cpu);
- err_free:
-	kfree(data);
-	acpi_io_data[cpu] = NULL;
-
-	return (result);
-}
-
-
-static int
-acpi_cpufreq_cpu_exit (
-	struct cpufreq_policy   *policy)
-{
-	struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu];
-
-	dprintk("acpi_cpufreq_cpu_exit\n");
-
-	if (data) {
-		cpufreq_frequency_table_put_attr(policy->cpu);
-		acpi_io_data[policy->cpu] = NULL;
-		acpi_processor_unregister_performance(&data->acpi_data,
-		                                      policy->cpu);
-		kfree(data);
-	}
-
-	return (0);
-}
-
-
-static struct freq_attr* acpi_cpufreq_attr[] = {
-	&cpufreq_freq_attr_scaling_available_freqs,
-	NULL,
-};
-
-
-static struct cpufreq_driver acpi_cpufreq_driver = {
-	.verify 	= acpi_cpufreq_verify,
-	.target 	= acpi_cpufreq_target,
-	.get 		= acpi_cpufreq_get,
-	.init		= acpi_cpufreq_cpu_init,
-	.exit		= acpi_cpufreq_cpu_exit,
-	.name		= "acpi-cpufreq",
-	.owner		= THIS_MODULE,
-	.attr           = acpi_cpufreq_attr,
-};
-
-
-static int __init
-acpi_cpufreq_init (void)
-{
-	dprintk("acpi_cpufreq_init\n");
-
- 	return cpufreq_register_driver(&acpi_cpufreq_driver);
-}
-
-
-static void __exit
-acpi_cpufreq_exit (void)
-{
-	dprintk("acpi_cpufreq_exit\n");
-
-	cpufreq_unregister_driver(&acpi_cpufreq_driver);
-	return;
-}
-
-
-late_initcall(acpi_cpufreq_init);
-module_exit(acpi_cpufreq_exit);
-
diff --git a/arch/ia64/kernel/crash.c b/arch/ia64/kernel/crash.c
new file mode 100644
index 00000000000..2955f359e2a
--- /dev/null
+++ b/arch/ia64/kernel/crash.c
@@ -0,0 +1,286 @@
+/*
+ * arch/ia64/kernel/crash.c
+ *
+ * Architecture specific (ia64) functions for kexec based crash dumps.
+ *
+ * Created by: Khalid Aziz <khalid.aziz@hp.com>
+ * Copyright (C) 2005 Hewlett-Packard Development Company, L.P.
+ * Copyright (C) 2005 Intel Corp	Zou Nan hai <nanhai.zou@intel.com>
+ *
+ */
+#include <linux/smp.h>
+#include <linux/delay.h>
+#include <linux/crash_dump.h>
+#include <linux/bootmem.h>
+#include <linux/kexec.h>
+#include <linux/elfcore.h>
+#include <linux/sysctl.h>
+#include <linux/init.h>
+#include <linux/kdebug.h>
+
+#include <asm/mca.h>
+
+int kdump_status[NR_CPUS];
+static atomic_t kdump_cpu_frozen;
+atomic_t kdump_in_progress;
+static int kdump_freeze_monarch;
+static int kdump_on_init = 1;
+static int kdump_on_fatal_mca = 1;
+
+static inline Elf64_Word
+*append_elf_note(Elf64_Word *buf, char *name, unsigned type, void *data,
+		size_t data_len)
+{
+	struct elf_note *note = (struct elf_note *)buf;
+	note->n_namesz = strlen(name) + 1;
+	note->n_descsz = data_len;
+	note->n_type   = type;
+	buf += (sizeof(*note) + 3)/4;
+	memcpy(buf, name, note->n_namesz);
+	buf += (note->n_namesz + 3)/4;
+	memcpy(buf, data, data_len);
+	buf += (data_len + 3)/4;
+	return buf;
+}
+
+static void
+final_note(void *buf)
+{
+	memset(buf, 0, sizeof(struct elf_note));
+}
+
+extern void ia64_dump_cpu_regs(void *);
+
+static DEFINE_PER_CPU(struct elf_prstatus, elf_prstatus);
+
+void
+crash_save_this_cpu(void)
+{
+	void *buf;
+	unsigned long cfm, sof, sol;
+
+	int cpu = smp_processor_id();
+	struct elf_prstatus *prstatus = &per_cpu(elf_prstatus, cpu);
+
+	elf_greg_t *dst = (elf_greg_t *)&(prstatus->pr_reg);
+	memset(prstatus, 0, sizeof(*prstatus));
+	prstatus->pr_pid = current->pid;
+
+	ia64_dump_cpu_regs(dst);
+	cfm = dst[43];
+	sol = (cfm >> 7) & 0x7f;
+	sof = cfm & 0x7f;
+	dst[46] = (unsigned long)ia64_rse_skip_regs((unsigned long *)dst[46],
+			sof - sol);
+
+	buf = (u64 *) per_cpu_ptr(crash_notes, cpu);
+	if (!buf)
+		return;
+	buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS, prstatus,
+			sizeof(*prstatus));
+	final_note(buf);
+}
+
+#ifdef CONFIG_SMP
+static int
+kdump_wait_cpu_freeze(void)
+{
+	int cpu_num = num_online_cpus() - 1;
+	int timeout = 1000;
+	while(timeout-- > 0) {
+		if (atomic_read(&kdump_cpu_frozen) == cpu_num)
+			return 0;
+		udelay(1000);
+	}
+	return 1;
+}
+#endif
+
+void
+machine_crash_shutdown(struct pt_regs *pt)
+{
+	/* This function is only called after the system
+	 * has paniced or is otherwise in a critical state.
+	 * The minimum amount of code to allow a kexec'd kernel
+	 * to run successfully needs to happen here.
+	 *
+	 * In practice this means shooting down the other cpus in
+	 * an SMP system.
+	 */
+	kexec_disable_iosapic();
+#ifdef CONFIG_SMP
+	/*
+	 * If kdump_on_init is set and an INIT is asserted here, kdump will
+	 * be started again via INIT monarch.
+	 */
+	local_irq_disable();
+	ia64_set_psr_mc();	/* mask MCA/INIT */
+	if (atomic_inc_return(&kdump_in_progress) != 1)
+		unw_init_running(kdump_cpu_freeze, NULL);
+
+	/*
+	 * Now this cpu is ready for kdump.
+	 * Stop all others by IPI or INIT.  They could receive INIT from
+	 * outside and might be INIT monarch, but only thing they have to
+	 * do is falling into kdump_cpu_freeze().
+	 *
+	 * If an INIT is asserted here:
+	 * - All receivers might be slaves, since some of cpus could already
+	 *   be frozen and INIT might be masked on monarch.  In this case,
+	 *   all slaves will be frozen soon since kdump_in_progress will let
+	 *   them into DIE_INIT_SLAVE_LEAVE.
+	 * - One might be a monarch, but INIT rendezvous will fail since
+	 *   at least this cpu already have INIT masked so it never join
+	 *   to the rendezvous.  In this case, all slaves and monarch will
+	 *   be frozen soon with no wait since the INIT rendezvous is skipped
+	 *   by kdump_in_progress.
+	 */
+	kdump_smp_send_stop();
+	/* not all cpu response to IPI, send INIT to freeze them */
+	if (kdump_wait_cpu_freeze()) {
+		kdump_smp_send_init();
+		/* wait again, don't go ahead if possible */
+		kdump_wait_cpu_freeze();
+	}
+#endif
+}
+
+static void
+machine_kdump_on_init(void)
+{
+	crash_save_vmcoreinfo();
+	local_irq_disable();
+	kexec_disable_iosapic();
+	machine_kexec(ia64_kimage);
+}
+
+void
+kdump_cpu_freeze(struct unw_frame_info *info, void *arg)
+{
+	int cpuid;
+
+	local_irq_disable();
+	cpuid = smp_processor_id();
+	crash_save_this_cpu();
+	current->thread.ksp = (__u64)info->sw - 16;
+
+	ia64_set_psr_mc();	/* mask MCA/INIT and stop reentrance */
+
+	atomic_inc(&kdump_cpu_frozen);
+	kdump_status[cpuid] = 1;
+	mb();
+	for (;;)
+		cpu_relax();
+}
+
+static int
+kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data)
+{
+	struct ia64_mca_notify_die *nd;
+	struct die_args *args = data;
+
+	if (atomic_read(&kdump_in_progress)) {
+		switch (val) {
+		case DIE_INIT_MONARCH_LEAVE:
+			if (!kdump_freeze_monarch)
+				break;
+			/* fall through */
+		case DIE_INIT_SLAVE_LEAVE:
+		case DIE_INIT_MONARCH_ENTER:
+		case DIE_MCA_RENDZVOUS_LEAVE:
+			unw_init_running(kdump_cpu_freeze, NULL);
+			break;
+		}
+	}
+
+	if (!kdump_on_init && !kdump_on_fatal_mca)
+		return NOTIFY_DONE;
+
+	if (!ia64_kimage) {
+		if (val == DIE_INIT_MONARCH_LEAVE)
+			ia64_mca_printk(KERN_NOTICE
+					"%s: kdump not configured\n",
+					__func__);
+		return NOTIFY_DONE;
+	}
+
+	if (val != DIE_INIT_MONARCH_LEAVE &&
+	    val != DIE_INIT_MONARCH_PROCESS &&
+	    val != DIE_MCA_MONARCH_LEAVE)
+		return NOTIFY_DONE;
+
+	nd = (struct ia64_mca_notify_die *)args->err;
+
+	switch (val) {
+	case DIE_INIT_MONARCH_PROCESS:
+		/* Reason code 1 means machine check rendezvous*/
+		if (kdump_on_init && (nd->sos->rv_rc != 1)) {
+			if (atomic_inc_return(&kdump_in_progress) != 1)
+				kdump_freeze_monarch = 1;
+		}
+		break;
+	case DIE_INIT_MONARCH_LEAVE:
+		/* Reason code 1 means machine check rendezvous*/
+		if (kdump_on_init && (nd->sos->rv_rc != 1))
+			machine_kdump_on_init();
+		break;
+	case DIE_MCA_MONARCH_LEAVE:
+		/* *(nd->data) indicate if MCA is recoverable */
+		if (kdump_on_fatal_mca && !(*(nd->data))) {
+			if (atomic_inc_return(&kdump_in_progress) == 1)
+				machine_kdump_on_init();
+			/* We got fatal MCA while kdump!? No way!! */
+		}
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table kdump_ctl_table[] = {
+	{
+		.procname = "kdump_on_init",
+		.data = &kdump_on_init,
+		.maxlen = sizeof(int),
+		.mode = 0644,
+		.proc_handler = proc_dointvec,
+	},
+	{
+		.procname = "kdump_on_fatal_mca",
+		.data = &kdump_on_fatal_mca,
+		.maxlen = sizeof(int),
+		.mode = 0644,
+		.proc_handler = proc_dointvec,
+	},
+	{ }
+};
+
+static struct ctl_table sys_table[] = {
+	{
+	  .procname = "kernel",
+	  .mode = 0555,
+	  .child = kdump_ctl_table,
+	},
+	{ }
+};
+#endif
+
+static int
+machine_crash_setup(void)
+{
+	/* be notified before default_monarch_init_process */
+	static struct notifier_block kdump_init_notifier_nb = {
+		.notifier_call = kdump_init_notifier,
+		.priority = 1,
+	};
+	int ret;
+	if((ret = register_die_notifier(&kdump_init_notifier_nb)) != 0)
+		return ret;
+#ifdef CONFIG_SYSCTL
+	register_sysctl_table(sys_table);
+#endif
+	return 0;
+}
+
+__initcall(machine_crash_setup);
+
diff --git a/arch/ia64/kernel/crash_dump.c b/arch/ia64/kernel/crash_dump.c
new file mode 100644
index 00000000000..c8c9298666f
--- /dev/null
+++ b/arch/ia64/kernel/crash_dump.c
@@ -0,0 +1,50 @@
+/*
+ *	kernel/crash_dump.c - Memory preserving reboot related code.
+ *
+ *	Created by: Simon Horman <horms@verge.net.au>
+ *	Original code moved from kernel/crash.c
+ *	Original code comment copied from the i386 version of this file
+ */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/crash_dump.h>
+
+#include <asm/page.h>
+#include <asm/uaccess.h>
+
+/**
+ * copy_oldmem_page - copy one page from "oldmem"
+ * @pfn: page frame number to be copied
+ * @buf: target memory address for the copy; this can be in kernel address
+ *	space or user address space (see @userbuf)
+ * @csize: number of bytes to copy
+ * @offset: offset in bytes into the page (based on pfn) to begin the copy
+ * @userbuf: if set, @buf is in user address space, use copy_to_user(),
+ *	otherwise @buf is in kernel address space, use memcpy().
+ *
+ * Copy a page from "oldmem". For this page, there is no pte mapped
+ * in the current kernel. We stitch up a pte, similar to kmap_atomic.
+ *
+ * Calling copy_to_user() in atomic context is not desirable. Hence first
+ * copying the data to a pre-allocated kernel page and then copying to user
+ * space in non-atomic context.
+ */
+ssize_t
+copy_oldmem_page(unsigned long pfn, char *buf,
+		size_t csize, unsigned long offset, int userbuf)
+{
+	void  *vaddr;
+
+	if (!csize)
+		return 0;
+	vaddr = __va(pfn<<PAGE_SHIFT);
+	if (userbuf) {
+		if (copy_to_user(buf, (vaddr + offset), csize)) {
+			return -EFAULT;
+		}
+	} else
+		memcpy(buf, (vaddr + offset), csize);
+	return csize;
+}
+
diff --git a/arch/ia64/kernel/cyclone.c b/arch/ia64/kernel/cyclone.c
index e00b21514f7..4826ff957a3 100644
--- a/arch/ia64/kernel/cyclone.c
+++ b/arch/ia64/kernel/cyclone.c
@@ -3,6 +3,7 @@
 #include <linux/time.h>
 #include <linux/errno.h>
 #include <linux/timex.h>
+#include <linux/clocksource.h>
 #include <asm/io.h>
 
 /* IBM Summit (EXA) Cyclone counter code*/
@@ -18,22 +19,28 @@ void __init cyclone_setup(void)
 	use_cyclone = 1;
 }
 
+static void __iomem *cyclone_mc;
 
-struct time_interpolator cyclone_interpolator = {
-	.source =	TIME_SOURCE_MMIO64,
-	.shift =	16,
-	.frequency =	CYCLONE_TIMER_FREQ,
-	.drift =	-100,
-	.mask =		(1LL << 40) - 1
+static cycle_t read_cyclone(struct clocksource *cs)
+{
+	return (cycle_t)readq((void __iomem *)cyclone_mc);
+}
+
+static struct clocksource clocksource_cyclone = {
+        .name           = "cyclone",
+        .rating         = 300,
+        .read           = read_cyclone,
+        .mask           = (1LL << 40) - 1,
+        .flags          = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
 int __init init_cyclone_clock(void)
 {
-	u64* reg;
+	u64 __iomem *reg;
 	u64 base;	/* saved cyclone base address */
 	u64 offset;	/* offset from pageaddr to cyclone_timer register */
 	int i;
-	u32* volatile cyclone_timer;	/* Cyclone MPMC0 register */
+	u32 __iomem *cyclone_timer;	/* Cyclone MPMC0 register */
 
 	if (!use_cyclone)
 		return 0;
@@ -42,25 +49,28 @@ int __init init_cyclone_clock(void)
 
 	/* find base address */
 	offset = (CYCLONE_CBAR_ADDR);
-	reg = (u64*)ioremap_nocache(offset, sizeof(u64));
+	reg = ioremap_nocache(offset, sizeof(u64));
 	if(!reg){
-		printk(KERN_ERR "Summit chipset: Could not find valid CBAR register.\n");
+		printk(KERN_ERR "Summit chipset: Could not find valid CBAR"
+				" register.\n");
 		use_cyclone = 0;
 		return -ENODEV;
 	}
 	base = readq(reg);
+	iounmap(reg);
 	if(!base){
-		printk(KERN_ERR "Summit chipset: Could not find valid CBAR value.\n");
+		printk(KERN_ERR "Summit chipset: Could not find valid CBAR"
+				" value.\n");
 		use_cyclone = 0;
 		return -ENODEV;
 	}
-	iounmap(reg);
 
 	/* setup PMCC */
 	offset = (base + CYCLONE_PMCC_OFFSET);
-	reg = (u64*)ioremap_nocache(offset, sizeof(u64));
+	reg = ioremap_nocache(offset, sizeof(u64));
 	if(!reg){
-		printk(KERN_ERR "Summit chipset: Could not find valid PMCC register.\n");
+		printk(KERN_ERR "Summit chipset: Could not find valid PMCC"
+				" register.\n");
 		use_cyclone = 0;
 		return -ENODEV;
 	}
@@ -69,9 +79,10 @@ int __init init_cyclone_clock(void)
 
 	/* setup MPCS */
 	offset = (base + CYCLONE_MPCS_OFFSET);
-	reg = (u64*)ioremap_nocache(offset, sizeof(u64));
+	reg = ioremap_nocache(offset, sizeof(u64));
 	if(!reg){
-		printk(KERN_ERR "Summit chipset: Could not find valid MPCS register.\n");
+		printk(KERN_ERR "Summit chipset: Could not find valid MPCS"
+				" register.\n");
 		use_cyclone = 0;
 		return -ENODEV;
 	}
@@ -80,9 +91,10 @@ int __init init_cyclone_clock(void)
 
 	/* map in cyclone_timer */
 	offset = (base + CYCLONE_MPMC_OFFSET);
-	cyclone_timer = (u32*)ioremap_nocache(offset, sizeof(u32));
+	cyclone_timer = ioremap_nocache(offset, sizeof(u32));
 	if(!cyclone_timer){
-		printk(KERN_ERR "Summit chipset: Could not find valid MPMC register.\n");
+		printk(KERN_ERR "Summit chipset: Could not find valid MPMC"
+				" register.\n");
 		use_cyclone = 0;
 		return -ENODEV;
 	}
@@ -93,16 +105,18 @@ int __init init_cyclone_clock(void)
 		int stall = 100;
 		while(stall--) barrier();
 		if(readl(cyclone_timer) == old){
-			printk(KERN_ERR "Summit chipset: Counter not counting! DISABLED\n");
+			printk(KERN_ERR "Summit chipset: Counter not counting!"
+					" DISABLED\n");
 			iounmap(cyclone_timer);
-			cyclone_timer = 0;
+			cyclone_timer = NULL;
 			use_cyclone = 0;
 			return -ENODEV;
 		}
 	}
 	/* initialize last tick */
-	cyclone_interpolator.addr = cyclone_timer;
-	register_time_interpolator(&cyclone_interpolator);
+	cyclone_mc = cyclone_timer;
+	clocksource_cyclone.archdata.fsys_mmio = cyclone_timer;
+	clocksource_register_hz(&clocksource_cyclone, CYCLONE_TIMER_FREQ);
 
 	return 0;
 }
diff --git a/arch/ia64/kernel/dma-mapping.c b/arch/ia64/kernel/dma-mapping.c
new file mode 100644
index 00000000000..7f791623820
--- /dev/null
+++ b/arch/ia64/kernel/dma-mapping.c
@@ -0,0 +1,24 @@
+#include <linux/dma-mapping.h>
+#include <linux/export.h>
+
+/* Set this to 1 if there is a HW IOMMU in the system */
+int iommu_detected __read_mostly;
+
+struct dma_map_ops *dma_ops;
+EXPORT_SYMBOL(dma_ops);
+
+#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16)
+
+static int __init dma_init(void)
+{
+	dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
+
+	return 0;
+}
+fs_initcall(dma_init);
+
+struct dma_map_ops *dma_get_ops(struct device *dev)
+{
+	return dma_ops;
+}
+EXPORT_SYMBOL(dma_get_ops);
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index 12cfedce73b..741b99c1a0b 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -1,13 +1,16 @@
 /*
  * Extensible Firmware Interface
  *
- * Based on Extensible Firmware Interface Specification version 0.9 April 30, 1999
+ * Based on Extensible Firmware Interface Specification version 0.9
+ * April 30, 1999
  *
  * Copyright (C) 1999 VA Linux Systems
  * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
  * Copyright (C) 1999-2003 Hewlett-Packard Co.
  *	David Mosberger-Tang <davidm@hpl.hp.com>
  *	Stephane Eranian <eranian@hpl.hp.com>
+ * (c) Copyright 2006 Hewlett-Packard Development Company, L.P.
+ *	Bjorn Helgaas <bjorn.helgaas@hp.com>
  *
  * All EFI Runtime Services are not implemented yet as EFI only
  * supports physical mode addressing on SoftSDV. This is to be fixed
@@ -18,13 +21,17 @@
  * Goutham Rao: <goutham.rao@intel.com>
  *	Skip non-WB memory and ignore empty memory ranges.
  */
-#include <linux/config.h>
 #include <linux/module.h>
+#include <linux/bootmem.h>
+#include <linux/crash_dump.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/types.h>
+#include <linux/slab.h>
 #include <linux/time.h>
 #include <linux/efi.h>
+#include <linux/kexec.h>
+#include <linux/mm.h>
 
 #include <asm/io.h>
 #include <asm/kregs.h>
@@ -32,157 +39,176 @@
 #include <asm/pgtable.h>
 #include <asm/processor.h>
 #include <asm/mca.h>
+#include <asm/setup.h>
+#include <asm/tlbflush.h>
 
 #define EFI_DEBUG	0
 
+static __initdata unsigned long palo_phys;
+
+static __initdata efi_config_table_type_t arch_tables[] = {
+	{PROCESSOR_ABSTRACTION_LAYER_OVERWRITE_GUID, "PALO", &palo_phys},
+	{NULL_GUID, NULL, 0},
+};
+
 extern efi_status_t efi_call_phys (void *, ...);
 
-struct efi efi;
-EXPORT_SYMBOL(efi);
 static efi_runtime_services_t *runtime;
-static unsigned long mem_limit = ~0UL, max_addr = ~0UL;
+static u64 mem_limit = ~0UL, max_addr = ~0UL, min_addr = 0UL;
 
 #define efi_call_virt(f, args...)	(*(f))(args)
 
-#define STUB_GET_TIME(prefix, adjust_arg)							  \
-static efi_status_t										  \
-prefix##_get_time (efi_time_t *tm, efi_time_cap_t *tc)						  \
-{												  \
-	struct ia64_fpreg fr[6];								  \
-	efi_time_cap_t *atc = NULL;								  \
-	efi_status_t ret;									  \
-												  \
-	if (tc)											  \
-		atc = adjust_arg(tc);								  \
-	ia64_save_scratch_fpregs(fr);								  \
-	ret = efi_call_##prefix((efi_get_time_t *) __va(runtime->get_time), adjust_arg(tm), atc); \
-	ia64_load_scratch_fpregs(fr);								  \
-	return ret;										  \
+#define STUB_GET_TIME(prefix, adjust_arg)				       \
+static efi_status_t							       \
+prefix##_get_time (efi_time_t *tm, efi_time_cap_t *tc)			       \
+{									       \
+	struct ia64_fpreg fr[6];					       \
+	efi_time_cap_t *atc = NULL;					       \
+	efi_status_t ret;						       \
+									       \
+	if (tc)								       \
+		atc = adjust_arg(tc);					       \
+	ia64_save_scratch_fpregs(fr);					       \
+	ret = efi_call_##prefix((efi_get_time_t *) __va(runtime->get_time),    \
+				adjust_arg(tm), atc);			       \
+	ia64_load_scratch_fpregs(fr);					       \
+	return ret;							       \
 }
 
-#define STUB_SET_TIME(prefix, adjust_arg)							\
-static efi_status_t										\
-prefix##_set_time (efi_time_t *tm)								\
-{												\
-	struct ia64_fpreg fr[6];								\
-	efi_status_t ret;									\
-												\
-	ia64_save_scratch_fpregs(fr);								\
-	ret = efi_call_##prefix((efi_set_time_t *) __va(runtime->set_time), adjust_arg(tm));	\
-	ia64_load_scratch_fpregs(fr);								\
-	return ret;										\
+#define STUB_SET_TIME(prefix, adjust_arg)				       \
+static efi_status_t							       \
+prefix##_set_time (efi_time_t *tm)					       \
+{									       \
+	struct ia64_fpreg fr[6];					       \
+	efi_status_t ret;						       \
+									       \
+	ia64_save_scratch_fpregs(fr);					       \
+	ret = efi_call_##prefix((efi_set_time_t *) __va(runtime->set_time),    \
+				adjust_arg(tm));			       \
+	ia64_load_scratch_fpregs(fr);					       \
+	return ret;							       \
 }
 
-#define STUB_GET_WAKEUP_TIME(prefix, adjust_arg)						\
-static efi_status_t										\
-prefix##_get_wakeup_time (efi_bool_t *enabled, efi_bool_t *pending, efi_time_t *tm)		\
-{												\
-	struct ia64_fpreg fr[6];								\
-	efi_status_t ret;									\
-												\
-	ia64_save_scratch_fpregs(fr);								\
-	ret = efi_call_##prefix((efi_get_wakeup_time_t *) __va(runtime->get_wakeup_time),	\
-				adjust_arg(enabled), adjust_arg(pending), adjust_arg(tm));	\
-	ia64_load_scratch_fpregs(fr);								\
-	return ret;										\
+#define STUB_GET_WAKEUP_TIME(prefix, adjust_arg)			       \
+static efi_status_t							       \
+prefix##_get_wakeup_time (efi_bool_t *enabled, efi_bool_t *pending,	       \
+			  efi_time_t *tm)				       \
+{									       \
+	struct ia64_fpreg fr[6];					       \
+	efi_status_t ret;						       \
+									       \
+	ia64_save_scratch_fpregs(fr);					       \
+	ret = efi_call_##prefix(					       \
+		(efi_get_wakeup_time_t *) __va(runtime->get_wakeup_time),      \
+		adjust_arg(enabled), adjust_arg(pending), adjust_arg(tm));     \
+	ia64_load_scratch_fpregs(fr);					       \
+	return ret;							       \
 }
 
-#define STUB_SET_WAKEUP_TIME(prefix, adjust_arg)						\
-static efi_status_t										\
-prefix##_set_wakeup_time (efi_bool_t enabled, efi_time_t *tm)					\
-{												\
-	struct ia64_fpreg fr[6];								\
-	efi_time_t *atm = NULL;									\
-	efi_status_t ret;									\
-												\
-	if (tm)											\
-		atm = adjust_arg(tm);								\
-	ia64_save_scratch_fpregs(fr);								\
-	ret = efi_call_##prefix((efi_set_wakeup_time_t *) __va(runtime->set_wakeup_time),	\
-				enabled, atm);							\
-	ia64_load_scratch_fpregs(fr);								\
-	return ret;										\
+#define STUB_SET_WAKEUP_TIME(prefix, adjust_arg)			       \
+static efi_status_t							       \
+prefix##_set_wakeup_time (efi_bool_t enabled, efi_time_t *tm)		       \
+{									       \
+	struct ia64_fpreg fr[6];					       \
+	efi_time_t *atm = NULL;						       \
+	efi_status_t ret;						       \
+									       \
+	if (tm)								       \
+		atm = adjust_arg(tm);					       \
+	ia64_save_scratch_fpregs(fr);					       \
+	ret = efi_call_##prefix(					       \
+		(efi_set_wakeup_time_t *) __va(runtime->set_wakeup_time),      \
+		enabled, atm);						       \
+	ia64_load_scratch_fpregs(fr);					       \
+	return ret;							       \
 }
 
-#define STUB_GET_VARIABLE(prefix, adjust_arg)						\
-static efi_status_t									\
-prefix##_get_variable (efi_char16_t *name, efi_guid_t *vendor, u32 *attr,		\
-		       unsigned long *data_size, void *data)				\
-{											\
-	struct ia64_fpreg fr[6];							\
-	u32 *aattr = NULL;									\
-	efi_status_t ret;								\
-											\
-	if (attr)									\
-		aattr = adjust_arg(attr);						\
-	ia64_save_scratch_fpregs(fr);							\
-	ret = efi_call_##prefix((efi_get_variable_t *) __va(runtime->get_variable),	\
-				adjust_arg(name), adjust_arg(vendor), aattr,		\
-				adjust_arg(data_size), adjust_arg(data));		\
-	ia64_load_scratch_fpregs(fr);							\
-	return ret;									\
+#define STUB_GET_VARIABLE(prefix, adjust_arg)				       \
+static efi_status_t							       \
+prefix##_get_variable (efi_char16_t *name, efi_guid_t *vendor, u32 *attr,      \
+		       unsigned long *data_size, void *data)		       \
+{									       \
+	struct ia64_fpreg fr[6];					       \
+	u32 *aattr = NULL;						       \
+	efi_status_t ret;						       \
+									       \
+	if (attr)							       \
+		aattr = adjust_arg(attr);				       \
+	ia64_save_scratch_fpregs(fr);					       \
+	ret = efi_call_##prefix(					       \
+		(efi_get_variable_t *) __va(runtime->get_variable),	       \
+		adjust_arg(name), adjust_arg(vendor), aattr,		       \
+		adjust_arg(data_size), adjust_arg(data));		       \
+	ia64_load_scratch_fpregs(fr);					       \
+	return ret;							       \
 }
 
-#define STUB_GET_NEXT_VARIABLE(prefix, adjust_arg)						\
-static efi_status_t										\
-prefix##_get_next_variable (unsigned long *name_size, efi_char16_t *name, efi_guid_t *vendor)	\
-{												\
-	struct ia64_fpreg fr[6];								\
-	efi_status_t ret;									\
-												\
-	ia64_save_scratch_fpregs(fr);								\
-	ret = efi_call_##prefix((efi_get_next_variable_t *) __va(runtime->get_next_variable),	\
-				adjust_arg(name_size), adjust_arg(name), adjust_arg(vendor));	\
-	ia64_load_scratch_fpregs(fr);								\
-	return ret;										\
+#define STUB_GET_NEXT_VARIABLE(prefix, adjust_arg)			       \
+static efi_status_t							       \
+prefix##_get_next_variable (unsigned long *name_size, efi_char16_t *name,      \
+			    efi_guid_t *vendor)				       \
+{									       \
+	struct ia64_fpreg fr[6];					       \
+	efi_status_t ret;						       \
+									       \
+	ia64_save_scratch_fpregs(fr);					       \
+	ret = efi_call_##prefix(					       \
+		(efi_get_next_variable_t *) __va(runtime->get_next_variable),  \
+		adjust_arg(name_size), adjust_arg(name), adjust_arg(vendor));  \
+	ia64_load_scratch_fpregs(fr);					       \
+	return ret;							       \
 }
 
-#define STUB_SET_VARIABLE(prefix, adjust_arg)						\
-static efi_status_t									\
-prefix##_set_variable (efi_char16_t *name, efi_guid_t *vendor, unsigned long attr,	\
-		       unsigned long data_size, void *data)				\
-{											\
-	struct ia64_fpreg fr[6];							\
-	efi_status_t ret;								\
-											\
-	ia64_save_scratch_fpregs(fr);							\
-	ret = efi_call_##prefix((efi_set_variable_t *) __va(runtime->set_variable),	\
-				adjust_arg(name), adjust_arg(vendor), attr, data_size,	\
-				adjust_arg(data));					\
-	ia64_load_scratch_fpregs(fr);							\
-	return ret;									\
+#define STUB_SET_VARIABLE(prefix, adjust_arg)				       \
+static efi_status_t							       \
+prefix##_set_variable (efi_char16_t *name, efi_guid_t *vendor,		       \
+		       u32 attr, unsigned long data_size,		       \
+		       void *data)					       \
+{									       \
+	struct ia64_fpreg fr[6];					       \
+	efi_status_t ret;						       \
+									       \
+	ia64_save_scratch_fpregs(fr);					       \
+	ret = efi_call_##prefix(					       \
+		(efi_set_variable_t *) __va(runtime->set_variable),	       \
+		adjust_arg(name), adjust_arg(vendor), attr, data_size,	       \
+		adjust_arg(data));					       \
+	ia64_load_scratch_fpregs(fr);					       \
+	return ret;							       \
 }
 
-#define STUB_GET_NEXT_HIGH_MONO_COUNT(prefix, adjust_arg)					\
-static efi_status_t										\
-prefix##_get_next_high_mono_count (u32 *count)							\
-{												\
-	struct ia64_fpreg fr[6];								\
-	efi_status_t ret;									\
-												\
-	ia64_save_scratch_fpregs(fr);								\
-	ret = efi_call_##prefix((efi_get_next_high_mono_count_t *)				\
-				__va(runtime->get_next_high_mono_count), adjust_arg(count));	\
-	ia64_load_scratch_fpregs(fr);								\
-	return ret;										\
+#define STUB_GET_NEXT_HIGH_MONO_COUNT(prefix, adjust_arg)		       \
+static efi_status_t							       \
+prefix##_get_next_high_mono_count (u32 *count)				       \
+{									       \
+	struct ia64_fpreg fr[6];					       \
+	efi_status_t ret;						       \
+									       \
+	ia64_save_scratch_fpregs(fr);					       \
+	ret = efi_call_##prefix((efi_get_next_high_mono_count_t *)	       \
+				__va(runtime->get_next_high_mono_count),       \
+				adjust_arg(count));			       \
+	ia64_load_scratch_fpregs(fr);					       \
+	return ret;							       \
 }
 
-#define STUB_RESET_SYSTEM(prefix, adjust_arg)					\
-static void									\
-prefix##_reset_system (int reset_type, efi_status_t status,			\
-		       unsigned long data_size, efi_char16_t *data)		\
-{										\
-	struct ia64_fpreg fr[6];						\
-	efi_char16_t *adata = NULL;						\
-										\
-	if (data)								\
-		adata = adjust_arg(data);					\
-										\
-	ia64_save_scratch_fpregs(fr);						\
-	efi_call_##prefix((efi_reset_system_t *) __va(runtime->reset_system),	\
-			  reset_type, status, data_size, adata);		\
-	/* should not return, but just in case... */				\
-	ia64_load_scratch_fpregs(fr);						\
+#define STUB_RESET_SYSTEM(prefix, adjust_arg)				       \
+static void								       \
+prefix##_reset_system (int reset_type, efi_status_t status,		       \
+		       unsigned long data_size, efi_char16_t *data)	       \
+{									       \
+	struct ia64_fpreg fr[6];					       \
+	efi_char16_t *adata = NULL;					       \
+									       \
+	if (data)							       \
+		adata = adjust_arg(data);				       \
+									       \
+	ia64_save_scratch_fpregs(fr);					       \
+	efi_call_##prefix(						       \
+		(efi_reset_system_t *) __va(runtime->reset_system),	       \
+		reset_type, status, data_size, adata);			       \
+	/* should not return, but just in case... */			       \
+	ia64_load_scratch_fpregs(fr);					       \
 }
 
 #define phys_ptr(arg)	((__typeof__(arg)) ia64_tpa(arg))
@@ -214,16 +240,18 @@ efi_gettimeofday (struct timespec *ts)
 {
 	efi_time_t tm;
 
-	memset(ts, 0, sizeof(ts));
-	if ((*efi.get_time)(&tm, NULL) != EFI_SUCCESS)
+	if ((*efi.get_time)(&tm, NULL) != EFI_SUCCESS) {
+		memset(ts, 0, sizeof(*ts));
 		return;
+	}
 
-	ts->tv_sec = mktime(tm.year, tm.month, tm.day, tm.hour, tm.minute, tm.second);
+	ts->tv_sec = mktime(tm.year, tm.month, tm.day,
+			    tm.hour, tm.minute, tm.second);
 	ts->tv_nsec = tm.nanosecond;
 }
 
 static int
-is_available_memory (efi_memory_desc_t *md)
+is_memory_available (efi_memory_desc_t *md)
 {
 	if (!(md->attribute & EFI_MEMORY_WB))
 		return 0;
@@ -292,8 +320,8 @@ walk (efi_freemem_callback_t callback, void *arg, u64 attr)
 }
 
 /*
- * Walks the EFI memory map and calls CALLBACK once for each EFI memory descriptor that
- * has memory that is available for OS use.
+ * Walk the EFI memory map and call CALLBACK once for each EFI memory
+ * descriptor that has memory that is available for OS use.
  */
 void
 efi_memmap_walk (efi_freemem_callback_t callback, void *arg)
@@ -302,8 +330,8 @@ efi_memmap_walk (efi_freemem_callback_t callback, void *arg)
 }
 
 /*
- * Walks the EFI memory map and calls CALLBACK once for each EFI memory descriptor that
- * has memory that is available for uncached allocator.
+ * Walk the EFI memory map and call CALLBACK once for each EFI memory
+ * descriptor that has memory that is available for uncached allocator.
  */
 void
 efi_memmap_walk_uc (efi_freemem_callback_t callback, void *arg)
@@ -312,11 +340,10 @@ efi_memmap_walk_uc (efi_freemem_callback_t callback, void *arg)
 }
 
 /*
- * Look for the PAL_CODE region reported by EFI and maps it using an
+ * Look for the PAL_CODE region reported by EFI and map it using an
  * ITR to enable safe PAL calls in virtual mode.  See IA-64 Processor
  * Abstraction Layer chapter 11 in ADAG
  */
-
 void *
 efi_get_pal_addr (void)
 {
@@ -336,53 +363,90 @@ efi_get_pal_addr (void)
 			continue;
 
 		if (++pal_code_count > 1) {
-			printk(KERN_ERR "Too many EFI Pal Code memory ranges, dropped @ %lx\n",
-			       md->phys_addr);
+			printk(KERN_ERR "Too many EFI Pal Code memory ranges, "
+			       "dropped @ %llx\n", md->phys_addr);
 			continue;
 		}
 		/*
-		 * The only ITLB entry in region 7 that is used is the one installed by
-		 * __start().  That entry covers a 64MB range.
+		 * The only ITLB entry in region 7 that is used is the one
+		 * installed by __start().  That entry covers a 64MB range.
 		 */
 		mask  = ~((1 << KERNEL_TR_PAGE_SHIFT) - 1);
 		vaddr = PAGE_OFFSET + md->phys_addr;
 
 		/*
-		 * We must check that the PAL mapping won't overlap with the kernel
-		 * mapping.
+		 * We must check that the PAL mapping won't overlap with the
+		 * kernel mapping.
 		 *
-		 * PAL code is guaranteed to be aligned on a power of 2 between 4k and
-		 * 256KB and that only one ITR is needed to map it. This implies that the
-		 * PAL code is always aligned on its size, i.e., the closest matching page
-		 * size supported by the TLB. Therefore PAL code is guaranteed never to
-		 * cross a 64MB unless it is bigger than 64MB (very unlikely!).  So for
-		 * now the following test is enough to determine whether or not we need a
-		 * dedicated ITR for the PAL code.
+		 * PAL code is guaranteed to be aligned on a power of 2 between
+		 * 4k and 256KB and that only one ITR is needed to map it. This
+		 * implies that the PAL code is always aligned on its size,
+		 * i.e., the closest matching page size supported by the TLB.
+		 * Therefore PAL code is guaranteed never to cross a 64MB unless
+		 * it is bigger than 64MB (very unlikely!).  So for now the
+		 * following test is enough to determine whether or not we need
+		 * a dedicated ITR for the PAL code.
 		 */
 		if ((vaddr & mask) == (KERNEL_START & mask)) {
 			printk(KERN_INFO "%s: no need to install ITR for PAL code\n",
-			       __FUNCTION__);
+			       __func__);
 			continue;
 		}
 
-		if (md->num_pages << EFI_PAGE_SHIFT > IA64_GRANULE_SIZE)
-			panic("Woah!  PAL code size bigger than a granule!");
+		if (efi_md_size(md) > IA64_GRANULE_SIZE)
+			panic("Whoa!  PAL code size bigger than a granule!");
 
 #if EFI_DEBUG
 		mask  = ~((1 << IA64_GRANULE_SHIFT) - 1);
 
-		printk(KERN_INFO "CPU %d: mapping PAL code [0x%lx-0x%lx) into [0x%lx-0x%lx)\n",
-			smp_processor_id(), md->phys_addr,
-			md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
-			vaddr & mask, (vaddr & mask) + IA64_GRANULE_SIZE);
+		printk(KERN_INFO "CPU %d: mapping PAL code "
+                       "[0x%lx-0x%lx) into [0x%lx-0x%lx)\n",
+                       smp_processor_id(), md->phys_addr,
+                       md->phys_addr + efi_md_size(md),
+                       vaddr & mask, (vaddr & mask) + IA64_GRANULE_SIZE);
 #endif
 		return __va(md->phys_addr);
 	}
-	printk(KERN_WARNING "%s: no PAL-code memory-descriptor found",
-	       __FUNCTION__);
+	printk(KERN_WARNING "%s: no PAL-code memory-descriptor found\n",
+	       __func__);
 	return NULL;
 }
 
+
+static u8 __init palo_checksum(u8 *buffer, u32 length)
+{
+	u8 sum = 0;
+	u8 *end = buffer + length;
+
+	while (buffer < end)
+		sum = (u8) (sum + *(buffer++));
+
+	return sum;
+}
+
+/*
+ * Parse and handle PALO table which is published at:
+ * http://www.dig64.org/home/DIG64_PALO_R1_0.pdf
+ */
+static void __init handle_palo(unsigned long phys_addr)
+{
+	struct palo_table *palo = __va(phys_addr);
+	u8  checksum;
+
+	if (strncmp(palo->signature, PALO_SIG, sizeof(PALO_SIG) - 1)) {
+		printk(KERN_INFO "PALO signature incorrect.\n");
+		return;
+	}
+
+	checksum = palo_checksum((u8 *)palo, palo->length);
+	if (checksum) {
+		printk(KERN_INFO "PALO checksum incorrect.\n");
+		return;
+	}
+
+	setup_ptcg_sem(palo->max_tlb_purges, NPTCG_FROM_PALO);
+}
+
 void
 efi_map_pal_code (void)
 {
@@ -396,30 +460,37 @@ efi_map_pal_code (void)
 	 * Cannot write to CRx with PSR.ic=1
 	 */
 	psr = ia64_clear_ic();
-	ia64_itr(0x1, IA64_TR_PALCODE, GRANULEROUNDDOWN((unsigned long) pal_vaddr),
+	ia64_itr(0x1, IA64_TR_PALCODE,
+		 GRANULEROUNDDOWN((unsigned long) pal_vaddr),
 		 pte_val(pfn_pte(__pa(pal_vaddr) >> PAGE_SHIFT, PAGE_KERNEL)),
 		 IA64_GRANULE_SHIFT);
+	paravirt_dv_serialize_data();
 	ia64_set_psr(psr);		/* restore psr */
-	ia64_srlz_i();
 }
 
 void __init
 efi_init (void)
 {
 	void *efi_map_start, *efi_map_end;
-	efi_config_table_t *config_tables;
 	efi_char16_t *c16;
 	u64 efi_desc_size;
 	char *cp, vendor[100] = "unknown";
-	extern char saved_command_line[];
 	int i;
 
-	/* it's too early to be able to use the standard kernel command line support... */
-	for (cp = saved_command_line; *cp; ) {
+	set_bit(EFI_BOOT, &efi.flags);
+	set_bit(EFI_64BIT, &efi.flags);
+
+	/*
+	 * It's too early to be able to use the standard kernel command line
+	 * support...
+	 */
+	for (cp = boot_command_line; *cp; ) {
 		if (memcmp(cp, "mem=", 4) == 0) {
 			mem_limit = memparse(cp + 4, &cp);
 		} else if (memcmp(cp, "max_addr=", 9) == 0) {
 			max_addr = GRANULEROUNDDOWN(memparse(cp + 9, &cp));
+		} else if (memcmp(cp, "min_addr=", 9) == 0) {
+			min_addr = GRANULEROUNDDOWN(memparse(cp + 9, &cp));
 		} else {
 			while (*cp != ' ' && *cp)
 				++cp;
@@ -427,8 +498,12 @@ efi_init (void)
 				++cp;
 		}
 	}
+	if (min_addr != 0UL)
+		printk(KERN_INFO "Ignoring memory below %lluMB\n",
+		       min_addr >> 20);
 	if (max_addr != ~0UL)
-		printk(KERN_INFO "Ignoring memory above %luMB\n", max_addr >> 20);
+		printk(KERN_INFO "Ignoring memory above %lluMB\n",
+		       max_addr >> 20);
 
 	efi.systab = __va(ia64_boot_param->efi_systab);
 
@@ -436,16 +511,14 @@ efi_init (void)
 	 * Verify the EFI Table
 	 */
 	if (efi.systab == NULL)
-		panic("Woah! Can't find EFI system table.\n");
+		panic("Whoa! Can't find EFI system table.\n");
 	if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
-		panic("Woah! EFI system table signature incorrect\n");
-	if ((efi.systab->hdr.revision ^ EFI_SYSTEM_TABLE_REVISION) >> 16 != 0)
-		printk(KERN_WARNING "Warning: EFI system table major version mismatch: "
-		       "got %d.%02d, expected %d.%02d\n",
-		       efi.systab->hdr.revision >> 16, efi.systab->hdr.revision & 0xffff,
-		       EFI_SYSTEM_TABLE_REVISION >> 16, EFI_SYSTEM_TABLE_REVISION & 0xffff);
-
-	config_tables = __va(efi.systab->tables);
+		panic("Whoa! EFI system table signature incorrect\n");
+	if ((efi.systab->hdr.revision >> 16) == 0)
+		printk(KERN_WARNING "Warning: EFI system table version "
+		       "%d.%02d, expected 1.00 or greater\n",
+		       efi.systab->hdr.revision >> 16,
+		       efi.systab->hdr.revision & 0xffff);
 
 	/* Show what we know for posterity */
 	c16 = __va(efi.systab->fw_vendor);
@@ -456,39 +529,18 @@ efi_init (void)
 	}
 
 	printk(KERN_INFO "EFI v%u.%.02u by %s:",
-	       efi.systab->hdr.revision >> 16, efi.systab->hdr.revision & 0xffff, vendor);
-
-	efi.mps        = EFI_INVALID_TABLE_ADDR;
-	efi.acpi       = EFI_INVALID_TABLE_ADDR;
-	efi.acpi20     = EFI_INVALID_TABLE_ADDR;
-	efi.smbios     = EFI_INVALID_TABLE_ADDR;
-	efi.sal_systab = EFI_INVALID_TABLE_ADDR;
-	efi.boot_info  = EFI_INVALID_TABLE_ADDR;
-	efi.hcdp       = EFI_INVALID_TABLE_ADDR;
-	efi.uga        = EFI_INVALID_TABLE_ADDR;
-
-	for (i = 0; i < (int) efi.systab->nr_tables; i++) {
-		if (efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID) == 0) {
-			efi.mps = config_tables[i].table;
-			printk(" MPS=0x%lx", config_tables[i].table);
-		} else if (efi_guidcmp(config_tables[i].guid, ACPI_20_TABLE_GUID) == 0) {
-			efi.acpi20 = config_tables[i].table;
-			printk(" ACPI 2.0=0x%lx", config_tables[i].table);
-		} else if (efi_guidcmp(config_tables[i].guid, ACPI_TABLE_GUID) == 0) {
-			efi.acpi = config_tables[i].table;
-			printk(" ACPI=0x%lx", config_tables[i].table);
-		} else if (efi_guidcmp(config_tables[i].guid, SMBIOS_TABLE_GUID) == 0) {
-			efi.smbios = config_tables[i].table;
-			printk(" SMBIOS=0x%lx", config_tables[i].table);
-		} else if (efi_guidcmp(config_tables[i].guid, SAL_SYSTEM_TABLE_GUID) == 0) {
-			efi.sal_systab = config_tables[i].table;
-			printk(" SALsystab=0x%lx", config_tables[i].table);
-		} else if (efi_guidcmp(config_tables[i].guid, HCDP_TABLE_GUID) == 0) {
-			efi.hcdp = config_tables[i].table;
-			printk(" HCDP=0x%lx", config_tables[i].table);
-		}
-	}
-	printk("\n");
+	       efi.systab->hdr.revision >> 16,
+	       efi.systab->hdr.revision & 0xffff, vendor);
+
+	set_bit(EFI_SYSTEM_TABLES, &efi.flags);
+
+	palo_phys      = EFI_INVALID_TABLE_ADDR;
+
+	if (efi_config_init(arch_tables) != 0)
+		return;
+
+	if (palo_phys != EFI_INVALID_TABLE_ADDR)
+		handle_palo(palo_phys);
 
 	runtime = __va(efi.systab->runtime);
 	efi.get_time = phys_get_time;
@@ -511,12 +563,33 @@ efi_init (void)
 		efi_memory_desc_t *md;
 		void *p;
 
-		for (i = 0, p = efi_map_start; p < efi_map_end; ++i, p += efi_desc_size) {
+		for (i = 0, p = efi_map_start; p < efi_map_end;
+		     ++i, p += efi_desc_size)
+		{
+			const char *unit;
+			unsigned long size;
+
 			md = p;
-			printk("mem%02u: type=%u, attr=0x%lx, range=[0x%016lx-0x%016lx) (%luMB)\n",
+			size = md->num_pages << EFI_PAGE_SHIFT;
+
+			if ((size >> 40) > 0) {
+				size >>= 40;
+				unit = "TB";
+			} else if ((size >> 30) > 0) {
+				size >>= 30;
+				unit = "GB";
+			} else if ((size >> 20) > 0) {
+				size >>= 20;
+				unit = "MB";
+			} else {
+				size >>= 10;
+				unit = "KB";
+			}
+
+			printk("mem%02d: type=%2u, attr=0x%016lx, "
+			       "range=[0x%016lx-0x%016lx) (%4lu%s)\n",
 			       i, md->type, md->attribute, md->phys_addr,
-			       md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
-			       md->num_pages >> (20 - EFI_PAGE_SHIFT));
+			       md->phys_addr + efi_md_size(md), size, unit);
 		}
 	}
 #endif
@@ -541,8 +614,8 @@ efi_enter_virtual_mode (void)
 		md = p;
 		if (md->attribute & EFI_MEMORY_RUNTIME) {
 			/*
-			 * Some descriptors have multiple bits set, so the order of
-			 * the tests is relevant.
+			 * Some descriptors have multiple bits set, so the
+			 * order of the tests is relevant.
 			 */
 			if (md->attribute & EFI_MEMORY_WB) {
 				md->virt_addr = (u64) __va(md->phys_addr);
@@ -550,21 +623,26 @@ efi_enter_virtual_mode (void)
 				md->virt_addr = (u64) ioremap(md->phys_addr, 0);
 			} else if (md->attribute & EFI_MEMORY_WC) {
 #if 0
-				md->virt_addr = ia64_remap(md->phys_addr, (_PAGE_A | _PAGE_P
-									   | _PAGE_D
-									   | _PAGE_MA_WC
-									   | _PAGE_PL_0
-									   | _PAGE_AR_RW));
+				md->virt_addr = ia64_remap(md->phys_addr,
+							   (_PAGE_A |
+							    _PAGE_P |
+							    _PAGE_D |
+							    _PAGE_MA_WC |
+							    _PAGE_PL_0 |
+							    _PAGE_AR_RW));
 #else
 				printk(KERN_INFO "EFI_MEMORY_WC mapping\n");
 				md->virt_addr = (u64) ioremap(md->phys_addr, 0);
 #endif
 			} else if (md->attribute & EFI_MEMORY_WT) {
 #if 0
-				md->virt_addr = ia64_remap(md->phys_addr, (_PAGE_A | _PAGE_P
-									   | _PAGE_D | _PAGE_MA_WT
-									   | _PAGE_PL_0
-									   | _PAGE_AR_RW));
+				md->virt_addr = ia64_remap(md->phys_addr,
+							   (_PAGE_A |
+							    _PAGE_P |
+							    _PAGE_D |
+							    _PAGE_MA_WT |
+							    _PAGE_PL_0 |
+							    _PAGE_AR_RW));
 #else
 				printk(KERN_INFO "EFI_MEMORY_WT mapping\n");
 				md->virt_addr = (u64) ioremap(md->phys_addr, 0);
@@ -575,16 +653,20 @@ efi_enter_virtual_mode (void)
 
 	status = efi_call_phys(__va(runtime->set_virtual_address_map),
 			       ia64_boot_param->efi_memmap_size,
-			       efi_desc_size, ia64_boot_param->efi_memdesc_version,
+			       efi_desc_size,
+			       ia64_boot_param->efi_memdesc_version,
 			       ia64_boot_param->efi_memmap);
 	if (status != EFI_SUCCESS) {
-		printk(KERN_WARNING "warning: unable to switch EFI into virtual mode "
-		       "(status=%lu)\n", status);
+		printk(KERN_WARNING "warning: unable to switch EFI into "
+		       "virtual mode (status=%lu)\n", status);
 		return;
 	}
 
+	set_bit(EFI_RUNTIME_SERVICES, &efi.flags);
+
 	/*
-	 * Now that EFI is in virtual mode, we call the EFI functions more efficiently:
+	 * Now that EFI is in virtual mode, we call the EFI functions more
+	 * efficiently:
 	 */
 	efi.get_time = virt_get_time;
 	efi.set_time = virt_set_time;
@@ -598,8 +680,8 @@ efi_enter_virtual_mode (void)
 }
 
 /*
- * Walk the EFI memory map looking for the I/O port range.  There can only be one entry of
- * this type, other I/O port ranges should be described via ACPI.
+ * Walk the EFI memory map looking for the I/O port range.  There can only be
+ * one entry of this type, other I/O port ranges should be described via ACPI.
  */
 u64
 efi_get_iobase (void)
@@ -622,6 +704,18 @@ efi_get_iobase (void)
 	return 0;
 }
 
+static struct kern_memdesc *
+kern_memory_descriptor (unsigned long phys_addr)
+{
+	struct kern_memdesc *md;
+
+	for (md = kern_memmap; md->start != ~0UL; md++) {
+		if (phys_addr - md->start < (md->num_pages << EFI_PAGE_SHIFT))
+			 return md;
+	}
+	return NULL;
+}
+
 static efi_memory_desc_t *
 efi_memory_descriptor (unsigned long phys_addr)
 {
@@ -636,27 +730,29 @@ efi_memory_descriptor (unsigned long phys_addr)
 	for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
 		md = p;
 
-		if (phys_addr - md->phys_addr < (md->num_pages << EFI_PAGE_SHIFT))
+		if (phys_addr - md->phys_addr < efi_md_size(md))
 			 return md;
 	}
-	return 0;
+	return NULL;
 }
 
 static int
-efi_memmap_has_mmio (void)
+efi_memmap_intersects (unsigned long phys_addr, unsigned long size)
 {
 	void *efi_map_start, *efi_map_end, *p;
 	efi_memory_desc_t *md;
 	u64 efi_desc_size;
+	unsigned long end;
 
 	efi_map_start = __va(ia64_boot_param->efi_memmap);
 	efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
 	efi_desc_size = ia64_boot_param->efi_memdesc_size;
 
+	end = phys_addr + size;
+
 	for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
 		md = p;
-
-		if (md->type == EFI_MEMORY_MAPPED_IO)
+		if (md->phys_addr < end && efi_md_end(md) > phys_addr)
 			return 1;
 	}
 	return 0;
@@ -683,71 +779,142 @@ efi_mem_attributes (unsigned long phys_addr)
 }
 EXPORT_SYMBOL(efi_mem_attributes);
 
-/*
- * Determines whether the memory at phys_addr supports the desired
- * attribute (WB, UC, etc).  If this returns 1, the caller can safely
- * access size bytes at phys_addr with the specified attribute.
- */
-int
-efi_mem_attribute_range (unsigned long phys_addr, unsigned long size, u64 attr)
+u64
+efi_mem_attribute (unsigned long phys_addr, unsigned long size)
 {
 	unsigned long end = phys_addr + size;
 	efi_memory_desc_t *md = efi_memory_descriptor(phys_addr);
+	u64 attr;
+
+	if (!md)
+		return 0;
 
 	/*
-	 * Some firmware doesn't report MMIO regions in the EFI memory
-	 * map.  The Intel BigSur (a.k.a. HP i2000) has this problem.
-	 * On those platforms, we have to assume UC is valid everywhere.
+	 * EFI_MEMORY_RUNTIME is not a memory attribute; it just tells
+	 * the kernel that firmware needs this region mapped.
 	 */
-	if (!md || (md->attribute & attr) != attr) {
-		if (attr == EFI_MEMORY_UC && !efi_memmap_has_mmio())
-			return 1;
+	attr = md->attribute & ~EFI_MEMORY_RUNTIME;
+	do {
+		unsigned long md_end = efi_md_end(md);
+
+		if (end <= md_end)
+			return attr;
+
+		md = efi_memory_descriptor(md_end);
+		if (!md || (md->attribute & ~EFI_MEMORY_RUNTIME) != attr)
+			return 0;
+	} while (md);
+	return 0;	/* never reached */
+}
+
+u64
+kern_mem_attribute (unsigned long phys_addr, unsigned long size)
+{
+	unsigned long end = phys_addr + size;
+	struct kern_memdesc *md;
+	u64 attr;
+
+	/*
+	 * This is a hack for ioremap calls before we set up kern_memmap.
+	 * Maybe we should do efi_memmap_init() earlier instead.
+	 */
+	if (!kern_memmap) {
+		attr = efi_mem_attribute(phys_addr, size);
+		if (attr & EFI_MEMORY_WB)
+			return EFI_MEMORY_WB;
 		return 0;
 	}
 
+	md = kern_memory_descriptor(phys_addr);
+	if (!md)
+		return 0;
+
+	attr = md->attribute;
 	do {
-		unsigned long md_end = efi_md_end(md);
+		unsigned long md_end = kmd_end(md);
 
 		if (end <= md_end)
-			return 1;
+			return attr;
 
-		md = efi_memory_descriptor(md_end);
-		if (!md || (md->attribute & attr) != attr)
+		md = kern_memory_descriptor(md_end);
+		if (!md || md->attribute != attr)
 			return 0;
 	} while (md);
-	return 0;
+	return 0;	/* never reached */
 }
+EXPORT_SYMBOL(kern_mem_attribute);
 
-/*
- * For /dev/mem, we only allow read & write system calls to access
- * write-back memory, because read & write don't allow the user to
- * control access size.
- */
 int
-valid_phys_addr_range (unsigned long phys_addr, unsigned long size)
+valid_phys_addr_range (phys_addr_t phys_addr, unsigned long size)
 {
-	return efi_mem_attribute_range(phys_addr, size, EFI_MEMORY_WB);
+	u64 attr;
+
+	/*
+	 * /dev/mem reads and writes use copy_to_user(), which implicitly
+	 * uses a granule-sized kernel identity mapping.  It's really
+	 * only safe to do this for regions in kern_memmap.  For more
+	 * details, see Documentation/ia64/aliasing.txt.
+	 */
+	attr = kern_mem_attribute(phys_addr, size);
+	if (attr & EFI_MEMORY_WB || attr & EFI_MEMORY_UC)
+		return 1;
+	return 0;
 }
 
-/*
- * We allow mmap of anything in the EFI memory map that supports
- * either write-back or uncacheable access.  For uncacheable regions,
- * the supported access sizes are system-dependent, and the user is
- * responsible for using the correct size.
- *
- * Note that this doesn't currently allow access to hot-added memory,
- * because that doesn't appear in the boot-time EFI memory map.
- */
 int
-valid_mmap_phys_addr_range (unsigned long phys_addr, unsigned long size)
+valid_mmap_phys_addr_range (unsigned long pfn, unsigned long size)
 {
-	if (efi_mem_attribute_range(phys_addr, size, EFI_MEMORY_WB))
-		return 1;
+	unsigned long phys_addr = pfn << PAGE_SHIFT;
+	u64 attr;
+
+	attr = efi_mem_attribute(phys_addr, size);
 
-	if (efi_mem_attribute_range(phys_addr, size, EFI_MEMORY_UC))
+	/*
+	 * /dev/mem mmap uses normal user pages, so we don't need the entire
+	 * granule, but the entire region we're mapping must support the same
+	 * attribute.
+	 */
+	if (attr & EFI_MEMORY_WB || attr & EFI_MEMORY_UC)
 		return 1;
 
-	return 0;
+	/*
+	 * Intel firmware doesn't tell us about all the MMIO regions, so
+	 * in general we have to allow mmap requests.  But if EFI *does*
+	 * tell us about anything inside this region, we should deny it.
+	 * The user can always map a smaller region to avoid the overlap.
+	 */
+	if (efi_memmap_intersects(phys_addr, size))
+		return 0;
+
+	return 1;
+}
+
+pgprot_t
+phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size,
+		     pgprot_t vma_prot)
+{
+	unsigned long phys_addr = pfn << PAGE_SHIFT;
+	u64 attr;
+
+	/*
+	 * For /dev/mem mmap, we use user mappings, but if the region is
+	 * in kern_memmap (and hence may be covered by a kernel mapping),
+	 * we must use the same attribute as the kernel mapping.
+	 */
+	attr = kern_mem_attribute(phys_addr, size);
+	if (attr & EFI_MEMORY_WB)
+		return pgprot_cacheable(vma_prot);
+	else if (attr & EFI_MEMORY_UC)
+		return pgprot_noncached(vma_prot);
+
+	/*
+	 * Some chipsets don't support UC access to memory.  If
+	 * WB is supported, we prefer that.
+	 */
+	if (efi_mem_attribute(phys_addr, size) & EFI_MEMORY_WB)
+		return pgprot_cacheable(vma_prot);
+
+	return pgprot_noncached(vma_prot);
 }
 
 int __init
@@ -789,7 +956,7 @@ efi_uart_console_only(void)
 				return 1;
 			uart = 0;
 		}
-		hdr = (struct efi_generic_dev_path *) ((u8 *) hdr + hdr->length);
+		hdr = (struct efi_generic_dev_path *)((u8 *) hdr + hdr->length);
 	}
 	printk(KERN_ERR "Malformed %s value\n", name);
 	return 0;
@@ -827,10 +994,12 @@ find_memmap_space (void)
 		if (!efi_wb(md)) {
 			continue;
 		}
-		if (pmd == NULL || !efi_wb(pmd) || efi_md_end(pmd) != md->phys_addr) {
+		if (pmd == NULL || !efi_wb(pmd) ||
+		    efi_md_end(pmd) != md->phys_addr) {
 			contig_low = GRANULEROUNDUP(md->phys_addr);
 			contig_high = efi_md_end(md);
-			for (q = p + efi_desc_size; q < efi_map_end; q += efi_desc_size) {
+			for (q = p + efi_desc_size; q < efi_map_end;
+			     q += efi_desc_size) {
 				check_md = q;
 				if (!efi_wb(check_md))
 					break;
@@ -840,14 +1009,15 @@ find_memmap_space (void)
 			}
 			contig_high = GRANULEROUNDDOWN(contig_high);
 		}
-		if (!is_available_memory(md) || md->type == EFI_LOADER_DATA)
+		if (!is_memory_available(md) || md->type == EFI_LOADER_DATA)
 			continue;
 
 		/* Round ends inward to granule boundaries */
 		as = max(contig_low, md->phys_addr);
 		ae = min(contig_high, efi_md_end(md));
 
-		/* keep within max_addr= command line arg */
+		/* keep within max_addr= and min_addr= command line arg */
+		as = max(as, min_addr);
 		ae = min(ae, max_addr);
 		if (ae <= as)
 			continue;
@@ -873,10 +1043,10 @@ find_memmap_space (void)
  * to use.  We can allocate partial granules only if the unavailable
  * parts exist, and are WB.
  */
-void
-efi_memmap_init(unsigned long *s, unsigned long *e)
+unsigned long
+efi_memmap_init(u64 *s, u64 *e)
 {
-	struct kern_memdesc *k, *prev = 0;
+	struct kern_memdesc *k, *prev = NULL;
 	u64	contig_low=0, contig_high=0;
 	u64	as, ae, lim;
 	void *efi_map_start, *efi_map_end, *p, *q;
@@ -893,8 +1063,9 @@ efi_memmap_init(unsigned long *s, unsigned long *e)
 	for (p = efi_map_start; p < efi_map_end; pmd = md, p += efi_desc_size) {
 		md = p;
 		if (!efi_wb(md)) {
-			if (efi_uc(md) && (md->type == EFI_CONVENTIONAL_MEMORY ||
-				    	   md->type == EFI_BOOT_SERVICES_DATA)) {
+			if (efi_uc(md) &&
+			    (md->type == EFI_CONVENTIONAL_MEMORY ||
+			     md->type == EFI_BOOT_SERVICES_DATA)) {
 				k->attribute = EFI_MEMORY_UC;
 				k->start = md->phys_addr;
 				k->num_pages = md->num_pages;
@@ -902,10 +1073,12 @@ efi_memmap_init(unsigned long *s, unsigned long *e)
 			}
 			continue;
 		}
-		if (pmd == NULL || !efi_wb(pmd) || efi_md_end(pmd) != md->phys_addr) {
+		if (pmd == NULL || !efi_wb(pmd) ||
+		    efi_md_end(pmd) != md->phys_addr) {
 			contig_low = GRANULEROUNDUP(md->phys_addr);
 			contig_high = efi_md_end(md);
-			for (q = p + efi_desc_size; q < efi_map_end; q += efi_desc_size) {
+			for (q = p + efi_desc_size; q < efi_map_end;
+			     q += efi_desc_size) {
 				check_md = q;
 				if (!efi_wb(check_md))
 					break;
@@ -915,7 +1088,7 @@ efi_memmap_init(unsigned long *s, unsigned long *e)
 			}
 			contig_high = GRANULEROUNDDOWN(contig_high);
 		}
-		if (!is_available_memory(md))
+		if (!is_memory_available(md))
 			continue;
 
 		/*
@@ -925,13 +1098,17 @@ efi_memmap_init(unsigned long *s, unsigned long *e)
 		if (md->phys_addr < contig_low) {
 			lim = min(efi_md_end(md), contig_low);
 			if (efi_uc(md)) {
-				if (k > kern_memmap && (k-1)->attribute == EFI_MEMORY_UC &&
+				if (k > kern_memmap &&
+				    (k-1)->attribute == EFI_MEMORY_UC &&
 				    kmd_end(k-1) == md->phys_addr) {
-					(k-1)->num_pages += (lim - md->phys_addr) >> EFI_PAGE_SHIFT;
+					(k-1)->num_pages +=
+						(lim - md->phys_addr)
+						>> EFI_PAGE_SHIFT;
 				} else {
 					k->attribute = EFI_MEMORY_UC;
 					k->start = md->phys_addr;
-					k->num_pages = (lim - md->phys_addr) >> EFI_PAGE_SHIFT;
+					k->num_pages = (lim - md->phys_addr)
+						>> EFI_PAGE_SHIFT;
 					k++;
 				}
 			}
@@ -949,7 +1126,8 @@ efi_memmap_init(unsigned long *s, unsigned long *e)
 				} else {
 					k->attribute = EFI_MEMORY_UC;
 					k->start = lim;
-					k->num_pages = (efi_md_end(md) - lim) >> EFI_PAGE_SHIFT;
+					k->num_pages = (efi_md_end(md) - lim)
+						>> EFI_PAGE_SHIFT;
 					k++;
 				}
 			}
@@ -957,7 +1135,8 @@ efi_memmap_init(unsigned long *s, unsigned long *e)
 		} else
 			ae = efi_md_end(md);
 
-		/* keep within max_addr= command line arg */
+		/* keep within max_addr= and min_addr= command line arg */
+		as = max(as, min_addr);
 		ae = min(ae, max_addr);
 		if (ae <= as)
 			continue;
@@ -984,11 +1163,14 @@ efi_memmap_init(unsigned long *s, unsigned long *e)
 	/* reserve the memory we are using for kern_memmap */
 	*s = (u64)kern_memmap;
 	*e = (u64)++k;
+
+	return total_mem;
 }
 
 void
 efi_initialize_iomem_resources(struct resource *code_resource,
-			       struct resource *data_resource)
+			       struct resource *data_resource,
+			       struct resource *bss_resource)
 {
 	struct resource *res;
 	void *efi_map_start, *efi_map_end, *p;
@@ -1009,7 +1191,7 @@ efi_initialize_iomem_resources(struct resource *code_resource,
 		if (md->num_pages == 0) /* should not happen */
 			continue;
 
-		flags = IORESOURCE_MEM;
+		flags = IORESOURCE_MEM | IORESOURCE_BUSY;
 		switch (md->type) {
 
 			case EFI_MEMORY_MAPPED_IO:
@@ -1024,19 +1206,19 @@ efi_initialize_iomem_resources(struct resource *code_resource,
 				if (md->attribute & EFI_MEMORY_WP) {
 					name = "System ROM";
 					flags |= IORESOURCE_READONLY;
-				} else {
+				} else if (md->attribute == EFI_MEMORY_UC)
+					name = "Uncached RAM";
+				else
 					name = "System RAM";
-				}
 				break;
 
 			case EFI_ACPI_MEMORY_NVS:
 				name = "ACPI Non-volatile Storage";
-				flags |= IORESOURCE_BUSY;
 				break;
 
 			case EFI_UNUSABLE_MEMORY:
 				name = "reserved";
-				flags |= IORESOURCE_BUSY | IORESOURCE_DISABLED;
+				flags |= IORESOURCE_DISABLED;
 				break;
 
 			case EFI_RESERVED_TYPE:
@@ -1045,18 +1227,19 @@ efi_initialize_iomem_resources(struct resource *code_resource,
 			case EFI_ACPI_RECLAIM_MEMORY:
 			default:
 				name = "reserved";
-				flags |= IORESOURCE_BUSY;
 				break;
 		}
 
-		if ((res = kzalloc(sizeof(struct resource), GFP_KERNEL)) == NULL) {
-			printk(KERN_ERR "failed to alocate resource for iomem\n");
+		if ((res = kzalloc(sizeof(struct resource),
+				   GFP_KERNEL)) == NULL) {
+			printk(KERN_ERR
+			       "failed to allocate resource for iomem\n");
 			return;
 		}
 
 		res->name = name;
 		res->start = md->phys_addr;
-		res->end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - 1;
+		res->end = md->phys_addr + efi_md_size(md) - 1;
 		res->flags = flags;
 
 		if (insert_resource(&iomem_resource, res) < 0)
@@ -1069,6 +1252,89 @@ efi_initialize_iomem_resources(struct resource *code_resource,
 			 */
 			insert_resource(res, code_resource);
 			insert_resource(res, data_resource);
+			insert_resource(res, bss_resource);
+#ifdef CONFIG_KEXEC
+                        insert_resource(res, &efi_memmap_res);
+                        insert_resource(res, &boot_param_res);
+			if (crashk_res.end > crashk_res.start)
+				insert_resource(res, &crashk_res);
+#endif
+		}
+	}
+}
+
+#ifdef CONFIG_KEXEC
+/* find a block of memory aligned to 64M exclude reserved regions
+   rsvd_regions are sorted
+ */
+unsigned long __init
+kdump_find_rsvd_region (unsigned long size, struct rsvd_region *r, int n)
+{
+	int i;
+	u64 start, end;
+	u64 alignment = 1UL << _PAGE_SIZE_64M;
+	void *efi_map_start, *efi_map_end, *p;
+	efi_memory_desc_t *md;
+	u64 efi_desc_size;
+
+	efi_map_start = __va(ia64_boot_param->efi_memmap);
+	efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
+	efi_desc_size = ia64_boot_param->efi_memdesc_size;
+
+	for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
+		md = p;
+		if (!efi_wb(md))
+			continue;
+		start = ALIGN(md->phys_addr, alignment);
+		end = efi_md_end(md);
+		for (i = 0; i < n; i++) {
+			if (__pa(r[i].start) >= start && __pa(r[i].end) < end) {
+				if (__pa(r[i].start) > start + size)
+					return start;
+				start = ALIGN(__pa(r[i].end), alignment);
+				if (i < n-1 &&
+				    __pa(r[i+1].start) < start + size)
+					continue;
+				else
+					break;
+			}
+		}
+		if (end > start + size)
+			return start;
+	}
+
+	printk(KERN_WARNING
+	       "Cannot reserve 0x%lx byte of memory for crashdump\n", size);
+	return ~0UL;
+}
+#endif
+
+#ifdef CONFIG_CRASH_DUMP
+/* locate the size find a the descriptor at a certain address */
+unsigned long __init
+vmcore_find_descriptor_size (unsigned long address)
+{
+	void *efi_map_start, *efi_map_end, *p;
+	efi_memory_desc_t *md;
+	u64 efi_desc_size;
+	unsigned long ret = 0;
+
+	efi_map_start = __va(ia64_boot_param->efi_memmap);
+	efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
+	efi_desc_size = ia64_boot_param->efi_memdesc_size;
+
+	for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
+		md = p;
+		if (efi_wb(md) && md->type == EFI_LOADER_DATA
+		    && md->phys_addr == address) {
+			ret = efi_md_size(md);
+			break;
 		}
 	}
+
+	if (ret == 0)
+		printk(KERN_WARNING "Cannot locate EFI vmcore descriptor\n");
+
+	return ret;
 }
+#endif
diff --git a/arch/ia64/kernel/efi_stub.S b/arch/ia64/kernel/efi_stub.S
index 5a7fe70212a..a56e161d751 100644
--- a/arch/ia64/kernel/efi_stub.S
+++ b/arch/ia64/kernel/efi_stub.S
@@ -61,7 +61,7 @@ GLOBAL_ENTRY(efi_call_phys)
 	or loc3=loc3,r17
 	mov b6=r2
 	;;
-	andcm r16=loc3,r16		// get psr with IT, DT, and RT bits cleared
+	andcm r16=loc3,r16	// get psr with IT, DT, and RT bits cleared
 	br.call.sptk.many rp=ia64_switch_mode_phys
 .ret0:	mov out4=in5
 	mov out0=in1
diff --git a/arch/ia64/kernel/elfcore.c b/arch/ia64/kernel/elfcore.c
new file mode 100644
index 00000000000..04bc8fd5f89
--- /dev/null
+++ b/arch/ia64/kernel/elfcore.c
@@ -0,0 +1,76 @@
+#include <linux/elf.h>
+#include <linux/coredump.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+
+#include <asm/elf.h>
+
+
+Elf64_Half elf_core_extra_phdrs(void)
+{
+	return GATE_EHDR->e_phnum;
+}
+
+int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset)
+{
+	const struct elf_phdr *const gate_phdrs =
+		(const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff);
+	int i;
+	Elf64_Off ofs = 0;
+
+	for (i = 0; i < GATE_EHDR->e_phnum; ++i) {
+		struct elf_phdr phdr = gate_phdrs[i];
+
+		if (phdr.p_type == PT_LOAD) {
+			phdr.p_memsz = PAGE_ALIGN(phdr.p_memsz);
+			phdr.p_filesz = phdr.p_memsz;
+			if (ofs == 0) {
+				ofs = phdr.p_offset = offset;
+				offset += phdr.p_filesz;
+			} else {
+				phdr.p_offset = ofs;
+			}
+		} else {
+			phdr.p_offset += ofs;
+		}
+		phdr.p_paddr = 0; /* match other core phdrs */
+		if (!dump_emit(cprm, &phdr, sizeof(phdr)))
+			return 0;
+	}
+	return 1;
+}
+
+int elf_core_write_extra_data(struct coredump_params *cprm)
+{
+	const struct elf_phdr *const gate_phdrs =
+		(const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff);
+	int i;
+
+	for (i = 0; i < GATE_EHDR->e_phnum; ++i) {
+		if (gate_phdrs[i].p_type == PT_LOAD) {
+			void *addr = (void *)gate_phdrs[i].p_vaddr;
+			size_t memsz = PAGE_ALIGN(gate_phdrs[i].p_memsz);
+
+			if (!dump_emit(cprm, addr, memsz))
+				return 0;
+			break;
+		}
+	}
+	return 1;
+}
+
+size_t elf_core_extra_data_size(void)
+{
+	const struct elf_phdr *const gate_phdrs =
+		(const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff);
+	int i;
+	size_t size = 0;
+
+	for (i = 0; i < GATE_EHDR->e_phnum; ++i) {
+		if (gate_phdrs[i].p_type == PT_LOAD) {
+			size += PAGE_ALIGN(gate_phdrs[i].p_memsz);
+			break;
+		}
+	}
+	return size;
+}
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index bcb80ca5cf4..ba3d03503e8 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -1,5 +1,5 @@
 /*
- * ia64/kernel/entry.S
+ * arch/ia64/kernel/entry.S
  *
  * Kernel entry points.
  *
@@ -23,6 +23,11 @@
  * 11/07/2000
  */
 /*
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *                    pv_ops.
+ */
+/*
  * Global (preserved) predicate usage on syscall entry/exit path:
  *
  *	pKStk:		See entry.h.
@@ -31,7 +36,6 @@
  *	pNonSys:	!pSys
  */
 
-#include <linux/config.h>
 
 #include <asm/asmmacro.h>
 #include <asm/cache.h>
@@ -43,9 +47,11 @@
 #include <asm/processor.h>
 #include <asm/thread_info.h>
 #include <asm/unistd.h>
+#include <asm/ftrace.h>
 
 #include "minstate.h"
 
+#ifdef __IA64_ASM_PARAVIRTUALIZED_NATIVE
 	/*
 	 * execve() is special because in case of success, we need to
 	 * setup a null register window frame.
@@ -55,31 +61,20 @@ ENTRY(ia64_execve)
 	 * Allocate 8 input registers since ptrace() may clobber them
 	 */
 	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
-	alloc loc1=ar.pfs,8,2,4,0
+	alloc loc1=ar.pfs,8,2,3,0
 	mov loc0=rp
 	.body
 	mov out0=in0			// filename
 	;;				// stop bit between alloc and call
 	mov out1=in1			// argv
 	mov out2=in2			// envp
-	add out3=16,sp			// regs
 	br.call.sptk.many rp=sys_execve
 .ret0:
-#ifdef CONFIG_IA32_SUPPORT
-	/*
-	 * Check if we're returning to ia32 mode. If so, we need to restore ia32 registers
-	 * from pt_regs.
-	 */
-	adds r16=PT(CR_IPSR)+16,sp
-	;;
-	ld8 r16=[r16]
-#endif
 	cmp4.ge p6,p7=r8,r0
 	mov ar.pfs=loc1			// restore ar.pfs
 	sxt4 r8=r8			// return 64-bit result
 	;;
 	stf.spill [sp]=f0
-(p6)	cmp.ne pKStk,pUStk=r0,r0	// a successful execve() lands us in user-mode...
 	mov rp=loc0
 (p6)	mov ar.pfs=r0			// clear ar.pfs on success
 (p7)	br.ret.sptk.many rp
@@ -102,12 +97,6 @@ ENTRY(ia64_execve)
 	ldf.fill f23=[sp];	ldf.fill f24=[sp];	mov f25=f0
 	ldf.fill f26=[sp];	ldf.fill f27=[sp];	mov f28=f0
 	ldf.fill f29=[sp];	ldf.fill f30=[sp];	mov f31=f0
-#ifdef CONFIG_IA32_SUPPORT
-	tbit.nz p6,p0=r16, IA64_PSR_IS_BIT
-	movl loc0=ia64_ret_from_ia32_execve
-	;;
-(p6)	mov rp=loc0
-#endif
 	br.ret.sptk.many rp
 END(ia64_execve)
 
@@ -127,13 +116,12 @@ GLOBAL_ENTRY(sys_clone2)
 	mov loc1=r16				// save ar.pfs across do_fork
 	.body
 	mov out1=in1
-	mov out3=in2
+	mov out2=in2
 	tbit.nz p6,p0=in0,CLONE_SETTLS_BIT
-	mov out4=in3	// parent_tidptr: valid only w/CLONE_PARENT_SETTID
+	mov out3=in3	// parent_tidptr: valid only w/CLONE_PARENT_SETTID
 	;;
 (p6)	st8 [r2]=in5				// store TLS in r16 for copy_thread()
-	mov out5=in4	// child_tidptr:  valid only w/CLONE_CHILD_SETTID or CLONE_CHILD_CLEARTID
-	adds out2=IA64_SWITCH_STACK_SIZE+16,sp	// out2 = &regs
+	mov out4=in4	// child_tidptr:  valid only w/CLONE_CHILD_SETTID or CLONE_CHILD_CLEARTID
 	mov out0=in0				// out0 = clone_flags
 	br.call.sptk.many rp=do_fork
 .ret1:	.restore sp
@@ -159,13 +147,12 @@ GLOBAL_ENTRY(sys_clone)
 	mov loc1=r16				// save ar.pfs across do_fork
 	.body
 	mov out1=in1
-	mov out3=16				// stacksize (compensates for 16-byte scratch area)
+	mov out2=16				// stacksize (compensates for 16-byte scratch area)
 	tbit.nz p6,p0=in0,CLONE_SETTLS_BIT
-	mov out4=in2	// parent_tidptr: valid only w/CLONE_PARENT_SETTID
+	mov out3=in2	// parent_tidptr: valid only w/CLONE_PARENT_SETTID
 	;;
 (p6)	st8 [r2]=in4				// store TLS in r13 (tp)
-	mov out5=in3	// child_tidptr:  valid only w/CLONE_CHILD_SETTID or CLONE_CHILD_CLEARTID
-	adds out2=IA64_SWITCH_STACK_SIZE+16,sp	// out2 = &regs
+	mov out4=in3	// child_tidptr:  valid only w/CLONE_CHILD_SETTID or CLONE_CHILD_CLEARTID
 	mov out0=in0				// out0 = clone_flags
 	br.call.sptk.many rp=do_fork
 .ret2:	.restore sp
@@ -174,6 +161,7 @@ GLOBAL_ENTRY(sys_clone)
 	mov rp=loc0
 	br.ret.sptk.many rp
 END(sys_clone)
+#endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */
 
 /*
  * prev_task <- ia64_switch_to(struct task_struct *next)
@@ -181,7 +169,7 @@ END(sys_clone)
  *	called.  The code starting at .map relies on this.  The rest of the code
  *	doesn't care about the interrupt masking status.
  */
-GLOBAL_ENTRY(ia64_switch_to)
+GLOBAL_ENTRY(__paravirt_switch_to)
 	.prologue
 	alloc r16=ar.pfs,1,0,0,0
 	DO_SAVE_SWITCH_STACK
@@ -205,7 +193,7 @@ GLOBAL_ENTRY(ia64_switch_to)
 	;;
 .done:
 	ld8 sp=[r21]			// load kernel stack pointer of new task
-	mov IA64_KR(CURRENT)=in0	// update "current" application register
+	MOV_TO_KR(CURRENT, in0, r8, r9)		// update "current" application register
 	mov r8=r13			// return pointer to previously running task
 	mov r13=in0			// set "current" pointer
 	;;
@@ -217,26 +205,25 @@ GLOBAL_ENTRY(ia64_switch_to)
 	br.ret.sptk.many rp		// boogie on out in new context
 
 .map:
-	rsm psr.ic			// interrupts (psr.i) are already disabled here
+	RSM_PSR_IC(r25)			// interrupts (psr.i) are already disabled here
 	movl r25=PAGE_KERNEL
 	;;
 	srlz.d
 	or r23=r25,r20			// construct PA | page properties
 	mov r25=IA64_GRANULE_SHIFT<<2
 	;;
-	mov cr.itir=r25
-	mov cr.ifa=in0			// VA of next task...
+	MOV_TO_ITIR(p0, r25, r8)
+	MOV_TO_IFA(in0, r8)		// VA of next task...
 	;;
 	mov r25=IA64_TR_CURRENT_STACK
-	mov IA64_KR(CURRENT_STACK)=r26	// remember last page we mapped...
+	MOV_TO_KR(CURRENT_STACK, r26, r8, r9)	// remember last page we mapped...
 	;;
 	itr.d dtr[r25]=r23		// wire in new mapping...
-	ssm psr.ic			// reenable the psr.ic bit
-	;;
-	srlz.d
+	SSM_PSR_IC_AND_SRLZ_D(r8, r9)	// reenable the psr.ic bit
 	br.cond.sptk .done
-END(ia64_switch_to)
+END(__paravirt_switch_to)
 
+#ifdef __IA64_ASM_PARAVIRTUALIZED_NATIVE
 /*
  * Note that interrupts are enabled during save_switch_stack and load_switch_stack.  This
  * means that we may get an interrupt with "sp" pointing to the new kernel stack while
@@ -376,7 +363,7 @@ END(save_switch_stack)
  *	- b7 holds address to return to
  *	- must not touch r8-r11
  */
-ENTRY(load_switch_stack)
+GLOBAL_ENTRY(load_switch_stack)
 	.prologue
 	.altrp b7
 
@@ -493,18 +480,6 @@ GLOBAL_ENTRY(prefetch_stack)
 	br.ret.sptk.many rp
 END(prefetch_stack)
 
-GLOBAL_ENTRY(execve)
-	mov r15=__NR_execve			// put syscall number in place
-	break __BREAK_SYSCALL
-	br.ret.sptk.many rp
-END(execve)
-
-GLOBAL_ENTRY(clone)
-	mov r15=__NR_clone			// put syscall number in place
-	break __BREAK_SYSCALL
-	br.ret.sptk.many rp
-END(clone)
-
 	/*
 	 * Invoke a system call, but do some tracing before and after the call.
 	 * We MUST preserve the current register frame throughout this routine
@@ -529,6 +504,11 @@ GLOBAL_ENTRY(ia64_trace_syscall)
  	stf.spill [r16]=f10
  	stf.spill [r17]=f11
 	br.call.sptk.many rp=syscall_trace_enter // give parent a chance to catch syscall args
+	cmp.lt p6,p0=r8,r0			// check tracehook
+	adds r2=PT(R8)+16,sp			// r2 = &pt_regs.r8
+	adds r3=PT(R10)+16,sp			// r3 = &pt_regs.r10
+	mov r10=0
+(p6)	br.cond.sptk strace_error		// syscall failed ->
 	adds r16=PT(F6)+16,sp
 	adds r17=PT(F7)+16,sp
 	;;
@@ -571,7 +551,8 @@ GLOBAL_ENTRY(ia64_trace_syscall)
 	br.call.sptk.many rp=syscall_trace_leave // give parent a chance to catch return value
 .ret3:
 (pUStk)	cmp.eq.unc p6,p0=r0,r0			// p6 <- pUStk
-	br.cond.sptk .work_pending_syscall_end
+(pUStk)	rsm psr.i				// disable interrupts
+	br.cond.sptk ia64_work_pending_syscall_end
 
 strace_error:
 	ld8 r3=[r2]				// load pt_regs.r8
@@ -602,6 +583,27 @@ GLOBAL_ENTRY(ia64_strace_leave_kernel)
 .ret4:	br.cond.sptk ia64_leave_kernel
 END(ia64_strace_leave_kernel)
 
+ENTRY(call_payload)
+	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(0)
+	/* call the kernel_thread payload; fn is in r4, arg - in r5 */
+	alloc loc1=ar.pfs,0,3,1,0
+	mov loc0=rp
+	mov loc2=gp
+	mov out0=r5		// arg
+	ld8 r14 = [r4], 8	// fn.address
+	;;
+	mov b6 = r14
+	ld8 gp = [r4]		// fn.gp
+	;;
+	br.call.sptk.many rp=b6	// fn(arg)
+.ret12:	mov gp=loc2
+	mov rp=loc0
+	mov ar.pfs=loc1
+	/* ... and if it has returned, we are going to userland */
+	cmp.ne pKStk,pUStk=r0,r0
+	br.ret.sptk.many rp
+END(call_payload)
+
 GLOBAL_ENTRY(ia64_ret_from_clone)
 	PT_REGS_UNWIND_INFO(0)
 {	/*
@@ -618,6 +620,7 @@ GLOBAL_ENTRY(ia64_ret_from_clone)
 	br.call.sptk.many rp=ia64_invoke_schedule_tail
 }
 .ret8:
+(pKStk)	br.call.sptk.many rp=call_payload
 	adds r2=TI_FLAGS+IA64_TASK_SIZE,r13
 	;;
 	ld4 r2=[r2]
@@ -636,8 +639,17 @@ GLOBAL_ENTRY(ia64_ret_from_syscall)
 	adds r2=PT(R8)+16,sp			// r2 = &pt_regs.r8
 	mov r10=r0				// clear error indication in r10
 (p7)	br.cond.spnt handle_syscall_error	// handle potential syscall failure
+#ifdef CONFIG_PARAVIRT
+	;;
+	br.cond.sptk.few ia64_leave_syscall
+	;;
+#endif /* CONFIG_PARAVIRT */
 END(ia64_ret_from_syscall)
+#ifndef CONFIG_PARAVIRT
 	// fall through
+#endif
+#endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */
+
 /*
  * ia64_leave_syscall(): Same as ia64_leave_kernel, except that it doesn't
  *	need to switch to bank 0 and doesn't restore the scratch registers.
@@ -682,7 +694,7 @@ END(ia64_ret_from_syscall)
  *	      ar.csd: cleared
  *	      ar.ssd: cleared
  */
-ENTRY(ia64_leave_syscall)
+GLOBAL_ENTRY(__paravirt_leave_syscall)
 	PT_REGS_UNWIND_INFO(0)
 	/*
 	 * work.need_resched etc. mustn't get changed by this CPU before it returns to
@@ -692,11 +704,11 @@ ENTRY(ia64_leave_syscall)
 	 * extra work.  We always check for extra work when returning to user-level.
 	 * With CONFIG_PREEMPT, we also check for extra work when the preempt_count
 	 * is 0.  After extra work processing has been completed, execution
-	 * resumes at .work_processed_syscall with p6 set to 1 if the extra-work-check
+	 * resumes at ia64_work_processed_syscall with p6 set to 1 if the extra-work-check
 	 * needs to be redone.
 	 */
 #ifdef CONFIG_PREEMPT
-	rsm psr.i				// disable interrupts
+	RSM_PSR_I(p0, r2, r18)			// disable interrupts
 	cmp.eq pLvSys,p0=r0,r0			// pLvSys=1: leave from syscall
 (pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13
 	;;
@@ -706,11 +718,22 @@ ENTRY(ia64_leave_syscall)
 	;;
 	cmp.eq p6,p0=r21,r0		// p6 <- pUStk || (preempt_count == 0)
 #else /* !CONFIG_PREEMPT */
-(pUStk)	rsm psr.i
+	RSM_PSR_I(pUStk, r2, r18)
 	cmp.eq pLvSys,p0=r0,r0		// pLvSys=1: leave from syscall
 (pUStk)	cmp.eq.unc p6,p0=r0,r0		// p6 <- pUStk
 #endif
-.work_processed_syscall:
+.global __paravirt_work_processed_syscall;
+__paravirt_work_processed_syscall:
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+	adds r2=PT(LOADRS)+16,r12
+	MOV_FROM_ITC(pUStk, p9, r22, r19)	// fetch time at leave
+	adds r18=TI_FLAGS+IA64_TASK_SIZE,r13
+	;;
+(p6)	ld4 r31=[r18]				// load current_thread_info()->flags
+	ld8 r19=[r2],PT(B6)-PT(LOADRS)		// load ar.rsc value for "loadrs"
+	adds r3=PT(AR_BSPSTORE)+16,r12		// deferred
+	;;
+#else
 	adds r2=PT(LOADRS)+16,r12
 	adds r3=PT(AR_BSPSTORE)+16,r12
 	adds r18=TI_FLAGS+IA64_TASK_SIZE,r13
@@ -719,6 +742,7 @@ ENTRY(ia64_leave_syscall)
 	ld8 r19=[r2],PT(B6)-PT(LOADRS)		// load ar.rsc value for "loadrs"
 	nop.i 0
 	;;
+#endif
 	mov r16=ar.bsp				// M2  get existing backing store pointer
 	ld8 r18=[r2],PT(R9)-PT(B6)		// load b6
 (p6)	and r15=TIF_WORK_MASK,r31		// any work other than TIF_SYSCALL_TRACE?
@@ -733,19 +757,28 @@ ENTRY(ia64_leave_syscall)
 (pNonSys) break 0		//      bug check: we shouldn't be here if pNonSys is TRUE!
 	;;
 	invala			// M0|1 invalidate ALAT
-	rsm psr.i | psr.ic	// M2   turn off interrupts and interruption collection
+	RSM_PSR_I_IC(r28, r29, r30)	// M2   turn off interrupts and interruption collection
 	cmp.eq p9,p0=r0,r0	// A    set p9 to indicate that we should restore cr.ifs
 
 	ld8 r29=[r2],16		// M0|1 load cr.ipsr
 	ld8 r28=[r3],16		// M0|1 load cr.iip
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+(pUStk) add r14=TI_AC_LEAVE+IA64_TASK_SIZE,r13
+	;;
+	ld8 r30=[r2],16		// M0|1 load cr.ifs
+	ld8 r25=[r3],16		// M0|1 load ar.unat
+(pUStk) add r15=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
+	;;
+#else
 	mov r22=r0		// A    clear r22
 	;;
 	ld8 r30=[r2],16		// M0|1 load cr.ifs
 	ld8 r25=[r3],16		// M0|1 load ar.unat
 (pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
 	;;
+#endif
 	ld8 r26=[r2],PT(B0)-PT(AR_PFS)	// M0|1 load ar.pfs
-(pKStk)	mov r22=psr			// M2   read PSR now that interrupts are disabled
+	MOV_FROM_PSR(pKStk, r22, r21)	// M2   read PSR now that interrupts are disabled
 	nop 0
 	;;
 	ld8 r21=[r2],PT(AR_RNAT)-PT(B0) // M0|1 load b0
@@ -760,7 +793,11 @@ ENTRY(ia64_leave_syscall)
 	ld8.fill r1=[r3],16			// M0|1 load r1
 (pUStk) mov r17=1				// A
 	;;
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+(pUStk) st1 [r15]=r17				// M2|3
+#else
 (pUStk) st1 [r14]=r17				// M2|3
+#endif
 	ld8.fill r13=[r3],16			// M0|1
 	mov f8=f0				// F    clear f8
 	;;
@@ -768,21 +805,30 @@ ENTRY(ia64_leave_syscall)
 	ld8.fill r15=[r3]			// M0|1 restore r15
 	mov b6=r18				// I0   restore b6
 
-	addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0 // A
+	LOAD_PHYS_STACK_REG_SIZE(r17)
 	mov f9=f0					// F    clear f9
 (pKStk) br.cond.dpnt.many skip_rbs_switch		// B
 
 	srlz.d				// M0   ensure interruption collection is off (for cover)
 	shr.u r18=r19,16		// I0|1 get byte size of existing "dirty" partition
-	cover				// B    add current frame into dirty partition & set cr.ifs
+	COVER				// B    add current frame into dirty partition & set cr.ifs
+	;;
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+	mov r19=ar.bsp			// M2   get new backing store pointer
+	st8 [r14]=r22			// M	save time at leave
+	mov f10=f0			// F    clear f10
+
+	mov r22=r0			// A	clear r22
+	movl r14=__kernel_syscall_via_epc // X
 	;;
-(pUStk) ld4 r17=[r17]			// M0|1 r17 = cpu_data->phys_stacked_size_p8
+#else
 	mov r19=ar.bsp			// M2   get new backing store pointer
 	mov f10=f0			// F    clear f10
 
 	nop.m 0
 	movl r14=__kernel_syscall_via_epc // X
 	;;
+#endif
 	mov.m ar.csd=r0			// M2   clear ar.csd
 	mov.m ar.ccv=r0			// M2   clear ar.ccv
 	mov b7=r14			// I0   clear b7 (hint with __kernel_syscall_via_epc)
@@ -790,22 +836,9 @@ ENTRY(ia64_leave_syscall)
 	mov.m ar.ssd=r0			// M2   clear ar.ssd
 	mov f11=f0			// F    clear f11
 	br.cond.sptk.many rbs_switch	// B
-END(ia64_leave_syscall)
+END(__paravirt_leave_syscall)
 
-#ifdef CONFIG_IA32_SUPPORT
-GLOBAL_ENTRY(ia64_ret_from_ia32_execve)
-	PT_REGS_UNWIND_INFO(0)
-	adds r2=PT(R8)+16,sp			// r2 = &pt_regs.r8
-	adds r3=PT(R10)+16,sp			// r3 = &pt_regs.r10
-	;;
-	.mem.offset 0,0
-	st8.spill [r2]=r8	// store return value in slot for r8 and set unat bit
-	.mem.offset 8,0
-	st8.spill [r3]=r0	// clear error indication in slot for r10 and set unat bit
-END(ia64_ret_from_ia32_execve)
-	// fall through
-#endif /* CONFIG_IA32_SUPPORT */
-GLOBAL_ENTRY(ia64_leave_kernel)
+GLOBAL_ENTRY(__paravirt_leave_kernel)
 	PT_REGS_UNWIND_INFO(0)
 	/*
 	 * work.need_resched etc. mustn't get changed by this CPU before it returns to
@@ -819,7 +852,7 @@ GLOBAL_ENTRY(ia64_leave_kernel)
 	 * needs to be redone.
 	 */
 #ifdef CONFIG_PREEMPT
-	rsm psr.i				// disable interrupts
+	RSM_PSR_I(p0, r17, r31)			// disable interrupts
 	cmp.eq p0,pLvSys=r0,r0			// pLvSys=0: leave from kernel
 (pKStk)	adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13
 	;;
@@ -829,7 +862,7 @@ GLOBAL_ENTRY(ia64_leave_kernel)
 	;;
 	cmp.eq p6,p0=r21,r0		// p6 <- pUStk || (preempt_count == 0)
 #else
-(pUStk)	rsm psr.i
+	RSM_PSR_I(pUStk, r17, r31)
 	cmp.eq p0,pLvSys=r0,r0		// pLvSys=0: leave from kernel
 (pUStk)	cmp.eq.unc p6,p0=r0,r0		// p6 <- pUStk
 #endif
@@ -877,7 +910,7 @@ GLOBAL_ENTRY(ia64_leave_kernel)
 	mov ar.csd=r30
 	mov ar.ssd=r31
 	;;
-	rsm psr.i | psr.ic	// initiate turning off of interrupt and interruption collection
+	RSM_PSR_I_IC(r23, r22, r25)	// initiate turning off of interrupt and interruption collection
 	invala			// invalidate ALAT
 	;;
 	ld8.fill r22=[r2],24
@@ -909,16 +942,24 @@ GLOBAL_ENTRY(ia64_leave_kernel)
 	mov ar.ccv=r15
 	;;
 	ldf.fill f11=[r2]
-	bsw.0			// switch back to bank 0 (no stop bit required beforehand...)
+	BSW_0(r2, r3, r15)	// switch back to bank 0 (no stop bit required beforehand...)
 	;;
 (pUStk)	mov r18=IA64_KR(CURRENT)// M2 (12 cycle read latency)
 	adds r16=PT(CR_IPSR)+16,r12
 	adds r17=PT(CR_IIP)+16,r12
 
-(pKStk)	mov r22=psr		// M2 read PSR now that interrupts are disabled
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+	.pred.rel.mutex pUStk,pKStk
+	MOV_FROM_PSR(pKStk, r22, r29)	// M2 read PSR now that interrupts are disabled
+	MOV_FROM_ITC(pUStk, p9, r22, r29)	// M  fetch time at leave
+	nop.i 0
+	;;
+#else
+	MOV_FROM_PSR(pKStk, r22, r29)	// M2 read PSR now that interrupts are disabled
 	nop.i 0
 	nop.i 0
 	;;
+#endif
 	ld8 r29=[r16],16	// load cr.ipsr
 	ld8 r28=[r17],16	// load cr.iip
 	;;
@@ -940,24 +981,45 @@ GLOBAL_ENTRY(ia64_leave_kernel)
 	;;
 	ld8.fill r12=[r16],16
 	ld8.fill r13=[r17],16
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+(pUStk)	adds r3=TI_AC_LEAVE+IA64_TASK_SIZE,r18
+#else
 (pUStk)	adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18
+#endif
 	;;
 	ld8 r20=[r16],16	// ar.fpsr
 	ld8.fill r15=[r17],16
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+(pUStk)	adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18	// deferred
+#endif
 	;;
 	ld8.fill r14=[r16],16
 	ld8.fill r2=[r17]
 (pUStk)	mov r17=1
 	;;
-	ld8.fill r3=[r16]
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+	//  mmi_ :  ld8 st1 shr;;         mmi_ : st8 st1 shr;;
+	//  mib  :  mov add br        ->  mib  : ld8 add br
+	//  bbb_ :  br  nop cover;;       mbb_ : mov br  cover;;
+	//
+	//  no one require bsp in r16 if (pKStk) branch is selected.
+(pUStk)	st8 [r3]=r22		// save time at leave
 (pUStk)	st1 [r18]=r17		// restore current->thread.on_ustack
 	shr.u r18=r19,16	// get byte size of existing "dirty" partition
 	;;
+	ld8.fill r3=[r16]	// deferred
+	LOAD_PHYS_STACK_REG_SIZE(r17)
+(pKStk)	br.cond.dpnt skip_rbs_switch
 	mov r16=ar.bsp		// get existing backing store pointer
-	addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0
+#else
+	ld8.fill r3=[r16]
+(pUStk)	st1 [r18]=r17		// restore current->thread.on_ustack
+	shr.u r18=r19,16	// get byte size of existing "dirty" partition
 	;;
-	ld4 r17=[r17]		// r17 = cpu_data->phys_stacked_size_p8
+	mov r16=ar.bsp		// get existing backing store pointer
+	LOAD_PHYS_STACK_REG_SIZE(r17)
 (pKStk)	br.cond.dpnt skip_rbs_switch
+#endif
 
 	/*
 	 * Restore user backing store.
@@ -965,7 +1027,7 @@ GLOBAL_ENTRY(ia64_leave_kernel)
 	 * NOTE: alloc, loadrs, and cover can't be predicated.
 	 */
 (pNonSys) br.cond.dpnt dont_preserve_current_frame
-	cover				// add current frame into dirty partition and set cr.ifs
+	COVER				// add current frame into dirty partition and set cr.ifs
 	;;
 	mov r19=ar.bsp			// get new backing store pointer
 rbs_switch:
@@ -1068,16 +1130,16 @@ skip_rbs_switch:
 (pKStk)	dep r29=r22,r29,21,1	// I0 update ipsr.pp with psr.pp
 (pLvSys)mov r16=r0		// A  clear r16 for leave_syscall, no-op otherwise
 	;;
-	mov cr.ipsr=r29		// M2
+	MOV_TO_IPSR(p0, r29, r25)	// M2
 	mov ar.pfs=r26		// I0
 (pLvSys)mov r17=r0		// A  clear r17 for leave_syscall, no-op otherwise
 
-(p9)	mov cr.ifs=r30		// M2
+	MOV_TO_IFS(p9, r30, r25)// M2
 	mov b0=r21		// I0
 (pLvSys)mov r18=r0		// A  clear r18 for leave_syscall, no-op otherwise
 
 	mov ar.fpsr=r20		// M2
-	mov cr.iip=r28		// M2
+	MOV_TO_IIP(r28, r25)	// M2
 	nop 0
 	;;
 (pUStk)	mov ar.rnat=r24		// M2 must happen with RSE in lazy mode
@@ -1086,7 +1148,7 @@ skip_rbs_switch:
 
 	mov ar.rsc=r27		// M2
 	mov pr=r31,-1		// I0
-	rfi			// B
+	RFI			// B
 
 	/*
 	 * On entry:
@@ -1094,6 +1156,9 @@ skip_rbs_switch:
 	 *	r31 = current->thread_info->flags
 	 * On exit:
 	 *	p6 = TRUE if work-pending-check needs to be redone
+	 *
+	 * Interrupts are disabled on entry, reenabled depend on work, and
+	 * disabled on exit.
 	 */
 .work_pending_syscall:
 	add r2=-8,r2
@@ -1102,42 +1167,30 @@ skip_rbs_switch:
 	st8 [r2]=r8
 	st8 [r3]=r10
 .work_pending:
-	tbit.z p6,p0=r31,TIF_NEED_RESCHED		// current_thread_info()->need_resched==0?
+	tbit.z p6,p0=r31,TIF_NEED_RESCHED	// is resched not needed?
 (p6)	br.cond.sptk.few .notify
-#ifdef CONFIG_PREEMPT
-(pKStk) dep r21=-1,r0,PREEMPT_ACTIVE_BIT,1
-	;;
-(pKStk) st4 [r20]=r21
-	ssm psr.i		// enable interrupts
-#endif
-	br.call.spnt.many rp=schedule
-.ret9:	cmp.eq p6,p0=r0,r0				// p6 <- 1
-	rsm psr.i		// disable interrupts
-	;;
-#ifdef CONFIG_PREEMPT
-(pKStk)	adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13
-	;;
-(pKStk)	st4 [r20]=r0		// preempt_count() <- 0
-#endif
-(pLvSys)br.cond.sptk.few  .work_pending_syscall_end
-	br.cond.sptk.many .work_processed_kernel	// re-check
+	br.call.spnt.many rp=preempt_schedule_irq
+.ret9:	cmp.eq p6,p0=r0,r0	// p6 <- 1 (re-check)
+(pLvSys)br.cond.sptk.few  __paravirt_pending_syscall_end
+	br.cond.sptk.many .work_processed_kernel
 
 .notify:
 (pUStk)	br.call.spnt.many rp=notify_resume_user
-.ret10:	cmp.ne p6,p0=r0,r0				// p6 <- 0
-(pLvSys)br.cond.sptk.few  .work_pending_syscall_end
-	br.cond.sptk.many .work_processed_kernel	// don't re-check
+.ret10:	cmp.ne p6,p0=r0,r0	// p6 <- 0 (don't re-check)
+(pLvSys)br.cond.sptk.few  __paravirt_pending_syscall_end
+	br.cond.sptk.many .work_processed_kernel
 
-.work_pending_syscall_end:
+.global __paravirt_pending_syscall_end;
+__paravirt_pending_syscall_end:
 	adds r2=PT(R8)+16,r12
 	adds r3=PT(R10)+16,r12
 	;;
 	ld8 r8=[r2]
 	ld8 r10=[r3]
-	br.cond.sptk.many .work_processed_syscall	// re-check
-
-END(ia64_leave_kernel)
+	br.cond.sptk.many __paravirt_work_processed_syscall_target
+END(__paravirt_leave_kernel)
 
+#ifdef __IA64_ASM_PARAVIRTUALIZED_NATIVE
 ENTRY(handle_syscall_error)
 	/*
 	 * Some system calls (e.g., ptrace, mmap) can return arbitrary values which could
@@ -1172,11 +1225,14 @@ GLOBAL_ENTRY(ia64_invoke_schedule_tail)
 END(ia64_invoke_schedule_tail)
 
 	/*
-	 * Setup stack and call do_notify_resume_user().  Note that pSys and pNonSys need to
-	 * be set up by the caller.  We declare 8 input registers so the system call
-	 * args get preserved, in case we need to restart a system call.
+	 * Setup stack and call do_notify_resume_user(), keeping interrupts
+	 * disabled.
+	 *
+	 * Note that pSys and pNonSys need to be set up by the caller.
+	 * We declare 8 input registers so the system call args get preserved,
+	 * in case we need to restart a system call.
 	 */
-ENTRY(notify_resume_user)
+GLOBAL_ENTRY(notify_resume_user)
 	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
 	alloc loc1=ar.pfs,8,2,3,0 // preserve all eight input regs in case of syscall restart!
 	mov r9=ar.unat
@@ -1203,32 +1259,6 @@ ENTRY(notify_resume_user)
 	br.ret.sptk.many rp
 END(notify_resume_user)
 
-GLOBAL_ENTRY(sys_rt_sigsuspend)
-	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
-	alloc loc1=ar.pfs,8,2,3,0 // preserve all eight input regs in case of syscall restart!
-	mov r9=ar.unat
-	mov loc0=rp				// save return address
-	mov out0=in0				// mask
-	mov out1=in1				// sigsetsize
-	adds out2=8,sp				// out2=&sigscratch->ar_pfs
-	;;
-	.fframe 16
-	.spillsp ar.unat, 16
-	st8 [sp]=r9,-16				// allocate space for ar.unat and save it
-	st8 [out2]=loc1,-8			// save ar.pfs, out2=&sigscratch
-	.body
-	br.call.sptk.many rp=ia64_rt_sigsuspend
-.ret17:	.restore sp
-	adds sp=16,sp				// pop scratch stack space
-	;;
-	ld8 r9=[sp]				// load new unat from sw->caller_unat
-	mov rp=loc0
-	;;
-	mov ar.unat=r9
-	mov ar.pfs=loc1
-	br.ret.sptk.many rp
-END(sys_rt_sigsuspend)
-
 ENTRY(sys_rt_sigreturn)
 	PT_REGS_UNWIND_INFO(0)
 	/*
@@ -1264,7 +1294,7 @@ ENTRY(sys_rt_sigreturn)
 	adds sp=16,sp
 	;;
 	ld8 r9=[sp]				// load new ar.unat
-	mov.sptk b7=r8,ia64_leave_kernel
+	mov.sptk b7=r8,ia64_native_leave_kernel
 	;;
 	mov ar.unat=r9
 	br.many b7
@@ -1328,6 +1358,105 @@ GLOBAL_ENTRY(unw_init_running)
 	br.ret.sptk.many rp
 END(unw_init_running)
 
+#ifdef CONFIG_FUNCTION_TRACER
+#ifdef CONFIG_DYNAMIC_FTRACE
+GLOBAL_ENTRY(_mcount)
+	br ftrace_stub
+END(_mcount)
+
+.here:
+	br.ret.sptk.many b0
+
+GLOBAL_ENTRY(ftrace_caller)
+	alloc out0 = ar.pfs, 8, 0, 4, 0
+	mov out3 = r0
+	;;
+	mov out2 = b0
+	add r3 = 0x20, r3
+	mov out1 = r1;
+	br.call.sptk.many b0 = ftrace_patch_gp
+	//this might be called from module, so we must patch gp
+ftrace_patch_gp:
+	movl gp=__gp
+	mov b0 = r3
+	;;
+.global ftrace_call;
+ftrace_call:
+{
+	.mlx
+	nop.m 0x0
+	movl r3 = .here;;
+}
+	alloc loc0 = ar.pfs, 4, 4, 2, 0
+	;;
+	mov loc1 = b0
+	mov out0 = b0
+	mov loc2 = r8
+	mov loc3 = r15
+	;;
+	adds out0 = -MCOUNT_INSN_SIZE, out0
+	mov out1 = in2
+	mov b6 = r3
+
+	br.call.sptk.many b0 = b6
+	;;
+	mov ar.pfs = loc0
+	mov b0 = loc1
+	mov r8 = loc2
+	mov r15 = loc3
+	br ftrace_stub
+	;;
+END(ftrace_caller)
+
+#else
+GLOBAL_ENTRY(_mcount)
+	movl r2 = ftrace_stub
+	movl r3 = ftrace_trace_function;;
+	ld8 r3 = [r3];;
+	ld8 r3 = [r3];;
+	cmp.eq p7,p0 = r2, r3
+(p7)	br.sptk.many ftrace_stub
+	;;
+
+	alloc loc0 = ar.pfs, 4, 4, 2, 0
+	;;
+	mov loc1 = b0
+	mov out0 = b0
+	mov loc2 = r8
+	mov loc3 = r15
+	;;
+	adds out0 = -MCOUNT_INSN_SIZE, out0
+	mov out1 = in2
+	mov b6 = r3
+
+	br.call.sptk.many b0 = b6
+	;;
+	mov ar.pfs = loc0
+	mov b0 = loc1
+	mov r8 = loc2
+	mov r15 = loc3
+	br ftrace_stub
+	;;
+END(_mcount)
+#endif
+
+GLOBAL_ENTRY(ftrace_stub)
+	mov r3 = b0
+	movl r2 = _mcount_ret_helper
+	;;
+	mov b6 = r2
+	mov b7 = r3
+	br.ret.sptk.many b6
+
+_mcount_ret_helper:
+	mov b0 = r42
+	mov r1 = r41
+	mov ar.pfs = r40
+	br b7
+END(ftrace_stub)
+
+#endif /* CONFIG_FUNCTION_TRACER */
+
 	.rodata
 	.align 8
 	.globl sys_call_table
@@ -1366,7 +1495,7 @@ sys_call_table:
 	data8 sys_mkdir				// 1055
 	data8 sys_rmdir
 	data8 sys_dup
-	data8 sys_pipe
+	data8 sys_ia64_pipe
 	data8 sys_times
 	data8 ia64_brk				// 1060
 	data8 sys_setgid
@@ -1477,7 +1606,7 @@ sys_call_table:
 	data8 sys_sched_get_priority_min
 	data8 sys_sched_rr_get_interval
 	data8 sys_nanosleep
-	data8 sys_nfsservctl
+	data8 sys_ni_syscall			// old nfsservctl
 	data8 sys_prctl				// 1170
 	data8 sys_getpagesize
 	data8 sys_mmap2
@@ -1576,7 +1705,7 @@ sys_call_table:
 	data8 sys_mq_timedreceive		// 1265
 	data8 sys_mq_notify
 	data8 sys_mq_getsetattr
-	data8 sys_ni_syscall			// reserved for kexec_load
+	data8 sys_kexec_load
 	data8 sys_ni_syscall			// reserved for vserver
 	data8 sys_waitid			// 1270
 	data8 sys_add_key
@@ -1584,7 +1713,7 @@ sys_call_table:
 	data8 sys_keyctl
 	data8 sys_ioprio_set
 	data8 sys_ioprio_get			// 1275
-	data8 sys_ni_syscall
+	data8 sys_move_pages
 	data8 sys_inotify_init
 	data8 sys_inotify_add_watch
 	data8 sys_inotify_rm_watch
@@ -1602,8 +1731,8 @@ sys_call_table:
 	data8 sys_readlinkat
 	data8 sys_fchmodat
 	data8 sys_faccessat
-	data8 sys_ni_syscall			// reserved for pselect
-	data8 sys_ni_syscall			// 1295 reserved for ppoll
+	data8 sys_pselect6
+	data8 sys_ppoll				// 1295
 	data8 sys_unshare
 	data8 sys_splice
 	data8 sys_set_robust_list
@@ -1611,5 +1740,42 @@ sys_call_table:
 	data8 sys_sync_file_range		// 1300
 	data8 sys_tee
 	data8 sys_vmsplice
+	data8 sys_fallocate
+	data8 sys_getcpu
+	data8 sys_epoll_pwait			// 1305
+	data8 sys_utimensat
+	data8 sys_signalfd
+	data8 sys_ni_syscall
+	data8 sys_eventfd
+	data8 sys_timerfd_create		// 1310
+	data8 sys_timerfd_settime
+	data8 sys_timerfd_gettime
+	data8 sys_signalfd4
+	data8 sys_eventfd2
+	data8 sys_epoll_create1			// 1315
+	data8 sys_dup3
+	data8 sys_pipe2
+	data8 sys_inotify_init1
+	data8 sys_preadv
+	data8 sys_pwritev			// 1320
+	data8 sys_rt_tgsigqueueinfo
+	data8 sys_recvmmsg
+	data8 sys_fanotify_init
+	data8 sys_fanotify_mark
+	data8 sys_prlimit64			// 1325
+	data8 sys_name_to_handle_at
+	data8 sys_open_by_handle_at
+	data8 sys_clock_adjtime
+	data8 sys_syncfs
+	data8 sys_setns				// 1330
+	data8 sys_sendmmsg
+	data8 sys_process_vm_readv
+	data8 sys_process_vm_writev
+	data8 sys_accept4
+	data8 sys_finit_module			// 1335
+	data8 sys_sched_setattr
+	data8 sys_sched_getattr
+	data8 sys_renameat2
 
 	.org sys_call_table + 8*NR_syscalls	// guard against failures to increase NR_syscalls
+#endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */
diff --git a/arch/ia64/kernel/entry.h b/arch/ia64/kernel/entry.h
index 78eeb079341..b83edac0296 100644
--- a/arch/ia64/kernel/entry.h
+++ b/arch/ia64/kernel/entry.h
@@ -1,4 +1,3 @@
-#include <linux/config.h>
 
 /*
  * Preserved registers that are shared between code in ivt.S and
@@ -23,6 +22,7 @@
 
 #define PT(f)		(IA64_PT_REGS_##f##_OFFSET)
 #define SW(f)		(IA64_SWITCH_STACK_##f##_OFFSET)
+#define SOS(f)		(IA64_SAL_OS_STATE_##f##_OFFSET)
 
 #define PT_REGS_SAVES(off)			\
 	.unwabi 3, 'i';				\
diff --git a/arch/ia64/kernel/err_inject.c b/arch/ia64/kernel/err_inject.c
new file mode 100644
index 00000000000..0c161ed6d18
--- /dev/null
+++ b/arch/ia64/kernel/err_inject.c
@@ -0,0 +1,314 @@
+/*
+ * err_inject.c -
+ *	1.) Inject errors to a processor.
+ *	2.) Query error injection capabilities.
+ * This driver along with user space code can be acting as an error
+ * injection tool.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT.  See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Written by: Fenghua Yu <fenghua.yu@intel.com>, Intel Corporation
+ * Copyright (C) 2006, Intel Corp.  All rights reserved.
+ *
+ */
+#include <linux/device.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/cpu.h>
+#include <linux/module.h>
+
+#define ERR_INJ_DEBUG
+
+#define ERR_DATA_BUFFER_SIZE 3 		// Three 8-byte;
+
+#define define_one_ro(name) 						\
+static DEVICE_ATTR(name, 0444, show_##name, NULL)
+
+#define define_one_rw(name) 						\
+static DEVICE_ATTR(name, 0644, show_##name, store_##name)
+
+static u64 call_start[NR_CPUS];
+static u64 phys_addr[NR_CPUS];
+static u64 err_type_info[NR_CPUS];
+static u64 err_struct_info[NR_CPUS];
+static struct {
+	u64 data1;
+	u64 data2;
+	u64 data3;
+} __attribute__((__aligned__(16))) err_data_buffer[NR_CPUS];
+static s64 status[NR_CPUS];
+static u64 capabilities[NR_CPUS];
+static u64 resources[NR_CPUS];
+
+#define show(name) 							\
+static ssize_t 								\
+show_##name(struct device *dev, struct device_attribute *attr,	\
+		char *buf)						\
+{									\
+	u32 cpu=dev->id;						\
+	return sprintf(buf, "%lx\n", name[cpu]);			\
+}
+
+#define store(name)							\
+static ssize_t 								\
+store_##name(struct device *dev, struct device_attribute *attr,	\
+					const char *buf, size_t size)	\
+{									\
+	unsigned int cpu=dev->id;					\
+	name[cpu] = simple_strtoull(buf, NULL, 16);			\
+	return size;							\
+}
+
+show(call_start)
+
+/* It's user's responsibility to call the PAL procedure on a specific
+ * processor. The cpu number in driver is only used for storing data.
+ */
+static ssize_t
+store_call_start(struct device *dev, struct device_attribute *attr,
+		const char *buf, size_t size)
+{
+	unsigned int cpu=dev->id;
+	unsigned long call_start = simple_strtoull(buf, NULL, 16);
+
+#ifdef ERR_INJ_DEBUG
+	printk(KERN_DEBUG "pal_mc_err_inject for cpu%d:\n", cpu);
+	printk(KERN_DEBUG "err_type_info=%lx,\n", err_type_info[cpu]);
+	printk(KERN_DEBUG "err_struct_info=%lx,\n", err_struct_info[cpu]);
+	printk(KERN_DEBUG "err_data_buffer=%lx, %lx, %lx.\n",
+			  err_data_buffer[cpu].data1,
+			  err_data_buffer[cpu].data2,
+			  err_data_buffer[cpu].data3);
+#endif
+	switch (call_start) {
+	    case 0: /* Do nothing. */
+		break;
+	    case 1: /* Call pal_mc_error_inject in physical mode. */
+		status[cpu]=ia64_pal_mc_error_inject_phys(err_type_info[cpu],
+					err_struct_info[cpu],
+					ia64_tpa(&err_data_buffer[cpu]),
+					&capabilities[cpu],
+			 		&resources[cpu]);
+		break;
+	    case 2: /* Call pal_mc_error_inject in virtual mode. */
+		status[cpu]=ia64_pal_mc_error_inject_virt(err_type_info[cpu],
+					err_struct_info[cpu],
+					ia64_tpa(&err_data_buffer[cpu]),
+					&capabilities[cpu],
+			 		&resources[cpu]);
+		break;
+	    default:
+		status[cpu] = -EINVAL;
+		break;
+	}
+
+#ifdef ERR_INJ_DEBUG
+	printk(KERN_DEBUG "Returns: status=%d,\n", (int)status[cpu]);
+	printk(KERN_DEBUG "capapbilities=%lx,\n", capabilities[cpu]);
+	printk(KERN_DEBUG "resources=%lx\n", resources[cpu]);
+#endif
+	return size;
+}
+
+show(err_type_info)
+store(err_type_info)
+
+static ssize_t
+show_virtual_to_phys(struct device *dev, struct device_attribute *attr,
+			char *buf)
+{
+	unsigned int cpu=dev->id;
+	return sprintf(buf, "%lx\n", phys_addr[cpu]);
+}
+
+static ssize_t
+store_virtual_to_phys(struct device *dev, struct device_attribute *attr,
+			const char *buf, size_t size)
+{
+	unsigned int cpu=dev->id;
+	u64 virt_addr=simple_strtoull(buf, NULL, 16);
+	int ret;
+
+        ret = get_user_pages(current, current->mm, virt_addr,
+                        1, VM_READ, 0, NULL, NULL);
+	if (ret<=0) {
+#ifdef ERR_INJ_DEBUG
+		printk("Virtual address %lx is not existing.\n",virt_addr);
+#endif
+		return -EINVAL;
+	}
+
+	phys_addr[cpu] = ia64_tpa(virt_addr);
+	return size;
+}
+
+show(err_struct_info)
+store(err_struct_info)
+
+static ssize_t
+show_err_data_buffer(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	unsigned int cpu=dev->id;
+
+	return sprintf(buf, "%lx, %lx, %lx\n",
+			err_data_buffer[cpu].data1,
+			err_data_buffer[cpu].data2,
+			err_data_buffer[cpu].data3);
+}
+
+static ssize_t
+store_err_data_buffer(struct device *dev,
+			struct device_attribute *attr,
+			const char *buf, size_t size)
+{
+	unsigned int cpu=dev->id;
+	int ret;
+
+#ifdef ERR_INJ_DEBUG
+	printk("write err_data_buffer=[%lx,%lx,%lx] on cpu%d\n",
+		 err_data_buffer[cpu].data1,
+		 err_data_buffer[cpu].data2,
+		 err_data_buffer[cpu].data3,
+		 cpu);
+#endif
+	ret=sscanf(buf, "%lx, %lx, %lx",
+			&err_data_buffer[cpu].data1,
+			&err_data_buffer[cpu].data2,
+			&err_data_buffer[cpu].data3);
+	if (ret!=ERR_DATA_BUFFER_SIZE)
+		return -EINVAL;
+
+	return size;
+}
+
+show(status)
+show(capabilities)
+show(resources)
+
+define_one_rw(call_start);
+define_one_rw(err_type_info);
+define_one_rw(err_struct_info);
+define_one_rw(err_data_buffer);
+define_one_rw(virtual_to_phys);
+define_one_ro(status);
+define_one_ro(capabilities);
+define_one_ro(resources);
+
+static struct attribute *default_attrs[] = {
+	&dev_attr_call_start.attr,
+	&dev_attr_virtual_to_phys.attr,
+	&dev_attr_err_type_info.attr,
+	&dev_attr_err_struct_info.attr,
+	&dev_attr_err_data_buffer.attr,
+	&dev_attr_status.attr,
+	&dev_attr_capabilities.attr,
+	&dev_attr_resources.attr,
+	NULL
+};
+
+static struct attribute_group err_inject_attr_group = {
+	.attrs = default_attrs,
+	.name = "err_inject"
+};
+/* Add/Remove err_inject interface for CPU device */
+static int err_inject_add_dev(struct device *sys_dev)
+{
+	return sysfs_create_group(&sys_dev->kobj, &err_inject_attr_group);
+}
+
+static int err_inject_remove_dev(struct device *sys_dev)
+{
+	sysfs_remove_group(&sys_dev->kobj, &err_inject_attr_group);
+	return 0;
+}
+static int err_inject_cpu_callback(struct notifier_block *nfb,
+		unsigned long action, void *hcpu)
+{
+	unsigned int cpu = (unsigned long)hcpu;
+	struct device *sys_dev;
+
+	sys_dev = get_cpu_device(cpu);
+	switch (action) {
+	case CPU_ONLINE:
+	case CPU_ONLINE_FROZEN:
+		err_inject_add_dev(sys_dev);
+		break;
+	case CPU_DEAD:
+	case CPU_DEAD_FROZEN:
+		err_inject_remove_dev(sys_dev);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block err_inject_cpu_notifier =
+{
+	.notifier_call = err_inject_cpu_callback,
+};
+
+static int __init
+err_inject_init(void)
+{
+	int i;
+
+#ifdef ERR_INJ_DEBUG
+	printk(KERN_INFO "Enter error injection driver.\n");
+#endif
+
+	cpu_notifier_register_begin();
+
+	for_each_online_cpu(i) {
+		err_inject_cpu_callback(&err_inject_cpu_notifier, CPU_ONLINE,
+				(void *)(long)i);
+	}
+
+	__register_hotcpu_notifier(&err_inject_cpu_notifier);
+
+	cpu_notifier_register_done();
+
+	return 0;
+}
+
+static void __exit
+err_inject_exit(void)
+{
+	int i;
+	struct device *sys_dev;
+
+#ifdef ERR_INJ_DEBUG
+	printk(KERN_INFO "Exit error injection driver.\n");
+#endif
+
+	cpu_notifier_register_begin();
+
+	for_each_online_cpu(i) {
+		sys_dev = get_cpu_device(i);
+		sysfs_remove_group(&sys_dev->kobj, &err_inject_attr_group);
+	}
+
+	__unregister_hotcpu_notifier(&err_inject_cpu_notifier);
+
+	cpu_notifier_register_done();
+}
+
+module_init(err_inject_init);
+module_exit(err_inject_exit);
+
+MODULE_AUTHOR("Fenghua Yu <fenghua.yu@intel.com>");
+MODULE_DESCRIPTION("MC error injection kernel sysfs interface");
+MODULE_LICENSE("GPL");
diff --git a/arch/ia64/kernel/esi.c b/arch/ia64/kernel/esi.c
new file mode 100644
index 00000000000..b091111270c
--- /dev/null
+++ b/arch/ia64/kernel/esi.c
@@ -0,0 +1,205 @@
+/*
+ * Extensible SAL Interface (ESI) support routines.
+ *
+ * Copyright (C) 2006 Hewlett-Packard Co
+ * 	Alex Williamson <alex.williamson@hp.com>
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/string.h>
+
+#include <asm/esi.h>
+#include <asm/sal.h>
+
+MODULE_AUTHOR("Alex Williamson <alex.williamson@hp.com>");
+MODULE_DESCRIPTION("Extensible SAL Interface (ESI) support");
+MODULE_LICENSE("GPL");
+
+#define MODULE_NAME	"esi"
+
+#define ESI_TABLE_GUID					\
+    EFI_GUID(0x43EA58DC, 0xCF28, 0x4b06, 0xB3,		\
+	     0x91, 0xB7, 0x50, 0x59, 0x34, 0x2B, 0xD4)
+
+enum esi_systab_entry_type {
+	ESI_DESC_ENTRY_POINT = 0
+};
+
+/*
+ * Entry type:	Size:
+ *	0	48
+ */
+#define ESI_DESC_SIZE(type)	"\060"[(unsigned) (type)]
+
+typedef struct ia64_esi_desc_entry_point {
+	u8 type;
+	u8 reserved1[15];
+	u64 esi_proc;
+	u64 gp;
+	efi_guid_t guid;
+} ia64_esi_desc_entry_point_t;
+
+struct pdesc {
+	void *addr;
+	void *gp;
+};
+
+static struct ia64_sal_systab *esi_systab;
+
+static int __init esi_init (void)
+{
+	efi_config_table_t *config_tables;
+	struct ia64_sal_systab *systab;
+	unsigned long esi = 0;
+	char *p;
+	int i;
+
+	config_tables = __va(efi.systab->tables);
+
+	for (i = 0; i < (int) efi.systab->nr_tables; ++i) {
+		if (efi_guidcmp(config_tables[i].guid, ESI_TABLE_GUID) == 0) {
+			esi = config_tables[i].table;
+			break;
+		}
+	}
+
+	if (!esi)
+		return -ENODEV;
+
+	systab = __va(esi);
+
+	if (strncmp(systab->signature, "ESIT", 4) != 0) {
+		printk(KERN_ERR "bad signature in ESI system table!");
+		return -ENODEV;
+	}
+
+	p = (char *) (systab + 1);
+	for (i = 0; i < systab->entry_count; i++) {
+		/*
+		 * The first byte of each entry type contains the type
+		 * descriptor.
+		 */
+		switch (*p) {
+		      case ESI_DESC_ENTRY_POINT:
+			break;
+		      default:
+			printk(KERN_WARNING "Unknown table type %d found in "
+			       "ESI table, ignoring rest of table\n", *p);
+			return -ENODEV;
+		}
+
+		p += ESI_DESC_SIZE(*p);
+	}
+
+	esi_systab = systab;
+	return 0;
+}
+
+
+int ia64_esi_call (efi_guid_t guid, struct ia64_sal_retval *isrvp,
+		   enum esi_proc_type proc_type, u64 func,
+		   u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6,
+		   u64 arg7)
+{
+	struct ia64_fpreg fr[6];
+	unsigned long flags = 0;
+	int i;
+	char *p;
+
+	if (!esi_systab)
+		return -1;
+
+	p = (char *) (esi_systab + 1);
+	for (i = 0; i < esi_systab->entry_count; i++) {
+		if (*p == ESI_DESC_ENTRY_POINT) {
+			ia64_esi_desc_entry_point_t *esi = (void *)p;
+			if (!efi_guidcmp(guid, esi->guid)) {
+				ia64_sal_handler esi_proc;
+				struct pdesc pdesc;
+
+				pdesc.addr = __va(esi->esi_proc);
+				pdesc.gp = __va(esi->gp);
+
+				esi_proc = (ia64_sal_handler) &pdesc;
+
+				ia64_save_scratch_fpregs(fr);
+				if (proc_type == ESI_PROC_SERIALIZED)
+					spin_lock_irqsave(&sal_lock, flags);
+				else if (proc_type == ESI_PROC_MP_SAFE)
+					local_irq_save(flags);
+				else
+					preempt_disable();
+				*isrvp = (*esi_proc)(func, arg1, arg2, arg3,
+						     arg4, arg5, arg6, arg7);
+				if (proc_type == ESI_PROC_SERIALIZED)
+					spin_unlock_irqrestore(&sal_lock,
+							       flags);
+				else if (proc_type == ESI_PROC_MP_SAFE)
+					local_irq_restore(flags);
+				else
+					preempt_enable();
+				ia64_load_scratch_fpregs(fr);
+				return 0;
+			}
+		}
+		p += ESI_DESC_SIZE(*p);
+	}
+	return -1;
+}
+EXPORT_SYMBOL_GPL(ia64_esi_call);
+
+int ia64_esi_call_phys (efi_guid_t guid, struct ia64_sal_retval *isrvp,
+			u64 func, u64 arg1, u64 arg2, u64 arg3, u64 arg4,
+			u64 arg5, u64 arg6, u64 arg7)
+{
+	struct ia64_fpreg fr[6];
+	unsigned long flags;
+	u64 esi_params[8];
+	char *p;
+	int i;
+
+	if (!esi_systab)
+		return -1;
+
+	p = (char *) (esi_systab + 1);
+	for (i = 0; i < esi_systab->entry_count; i++) {
+		if (*p == ESI_DESC_ENTRY_POINT) {
+			ia64_esi_desc_entry_point_t *esi = (void *)p;
+			if (!efi_guidcmp(guid, esi->guid)) {
+				ia64_sal_handler esi_proc;
+				struct pdesc pdesc;
+
+				pdesc.addr = (void *)esi->esi_proc;
+				pdesc.gp = (void *)esi->gp;
+
+				esi_proc = (ia64_sal_handler) &pdesc;
+
+				esi_params[0] = func;
+				esi_params[1] = arg1;
+				esi_params[2] = arg2;
+				esi_params[3] = arg3;
+				esi_params[4] = arg4;
+				esi_params[5] = arg5;
+				esi_params[6] = arg6;
+				esi_params[7] = arg7;
+				ia64_save_scratch_fpregs(fr);
+				spin_lock_irqsave(&sal_lock, flags);
+				*isrvp = esi_call_phys(esi_proc, esi_params);
+				spin_unlock_irqrestore(&sal_lock, flags);
+				ia64_load_scratch_fpregs(fr);
+				return 0;
+			}
+		}
+		p += ESI_DESC_SIZE(*p);
+	}
+	return -1;
+}
+EXPORT_SYMBOL_GPL(ia64_esi_call_phys);
+
+static void __exit esi_exit (void)
+{
+}
+
+module_init(esi_init);
+module_exit(esi_exit);	/* makes module removable... */
diff --git a/arch/ia64/kernel/esi_stub.S b/arch/ia64/kernel/esi_stub.S
new file mode 100644
index 00000000000..6b3d6c1f99b
--- /dev/null
+++ b/arch/ia64/kernel/esi_stub.S
@@ -0,0 +1,96 @@
+/*
+ * ESI call stub.
+ *
+ * Copyright (C) 2005 Hewlett-Packard Co
+ *	Alex Williamson <alex.williamson@hp.com>
+ *
+ * Based on EFI call stub by David Mosberger.  The stub is virtually
+ * identical to the one for EFI phys-mode calls, except that ESI
+ * calls may have up to 8 arguments, so they get passed to this routine
+ * through memory.
+ *
+ * This stub allows us to make ESI calls in physical mode with interrupts
+ * turned off.  ESI calls may not support calling from virtual mode.
+ *
+ * Google for "Extensible SAL specification" for a document describing the
+ * ESI standard.
+ */
+
+/*
+ * PSR settings as per SAL spec (Chapter 8 in the "IA-64 System
+ * Abstraction Layer Specification", revision 2.6e).  Note that
+ * psr.dfl and psr.dfh MUST be cleared, despite what this manual says.
+ * Otherwise, SAL dies whenever it's trying to do an IA-32 BIOS call
+ * (the br.ia instruction fails unless psr.dfl and psr.dfh are
+ * cleared).  Fortunately, SAL promises not to touch the floating
+ * point regs, so at least we don't have to save f2-f127.
+ */
+#define PSR_BITS_TO_CLEAR						\
+	(IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT | IA64_PSR_RT |		\
+	 IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED |	\
+	 IA64_PSR_DFL | IA64_PSR_DFH)
+
+#define PSR_BITS_TO_SET							\
+	(IA64_PSR_BN)
+
+#include <asm/processor.h>
+#include <asm/asmmacro.h>
+
+/*
+ * Inputs:
+ *	in0 = address of function descriptor of ESI routine to call
+ *	in1 = address of array of ESI parameters
+ *
+ * Outputs:
+ *	r8 = result returned by called function
+ */
+GLOBAL_ENTRY(esi_call_phys)
+	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2)
+	alloc loc1=ar.pfs,2,7,8,0
+	ld8 r2=[in0],8			// load ESI function's entry point
+	mov loc0=rp
+	.body
+	;;
+	ld8 out0=[in1],8		// ESI params loaded from array
+	;;				// passing all as inputs doesn't work
+	ld8 out1=[in1],8
+	;;
+	ld8 out2=[in1],8
+	;;
+	ld8 out3=[in1],8
+	;;
+	ld8 out4=[in1],8
+	;;
+	ld8 out5=[in1],8
+	;;
+	ld8 out6=[in1],8
+	;;
+	ld8 out7=[in1]
+	mov loc2=gp			// save global pointer
+	mov loc4=ar.rsc			// save RSE configuration
+	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
+	;;
+	ld8 gp=[in0]			// load ESI function's global pointer
+	movl r16=PSR_BITS_TO_CLEAR
+	mov loc3=psr			// save processor status word
+	movl r17=PSR_BITS_TO_SET
+	;;
+	or loc3=loc3,r17
+	mov b6=r2
+	;;
+	andcm r16=loc3,r16	// get psr with IT, DT, and RT bits cleared
+	br.call.sptk.many rp=ia64_switch_mode_phys
+.ret0:	mov loc5=r19			// old ar.bsp
+	mov loc6=r20			// old sp
+	br.call.sptk.many rp=b6		// call the ESI function
+.ret1:	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
+	mov r16=loc3			// save virtual mode psr
+	mov r19=loc5			// save virtual mode bspstore
+	mov r20=loc6			// save virtual mode sp
+	br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode
+.ret2:	mov ar.rsc=loc4			// restore RSE configuration
+	mov ar.pfs=loc1
+	mov rp=loc0
+	mov gp=loc2
+	br.ret.sptk.many rp
+END(esi_call_phys)
diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S
index 7a05b1cb2ad..abc6dee3799 100644
--- a/arch/ia64/kernel/fsys.S
+++ b/arch/ia64/kernel/fsys.S
@@ -10,6 +10,8 @@
  *			probably broke it along the way... ;-)
  * 13-Jul-04 clameter   Implement fsys_clock_gettime and revise fsys_gettimeofday to make
  *                      it capable of using memory based clocks without falling back to C code.
+ * 08-Feb-07 Fenghua Yu Implement fsys_getcpu.
+ *
  */
 
 #include <asm/asmmacro.h>
@@ -19,10 +21,10 @@
 #include <asm/thread_info.h>
 #include <asm/sal.h>
 #include <asm/signal.h>
-#include <asm/system.h>
 #include <asm/unistd.h>
 
 #include "entry.h"
+#include "paravirt_inst.h"
 
 /*
  * See Documentation/ia64/fsys.txt for details on fsyscalls.
@@ -59,80 +61,59 @@ ENTRY(fsys_getpid)
 	.prologue
 	.altrp b6
 	.body
-	add r9=TI_FLAGS+IA64_TASK_SIZE,r16
-	;;
-	ld4 r9=[r9]
-	add r8=IA64_TASK_TGID_OFFSET,r16
-	;;
-	and r9=TIF_ALLWORK_MASK,r9
-	ld4 r8=[r8]				// r8 = current->tgid
-	;;
-	cmp.ne p8,p0=0,r9
-(p8)	br.spnt.many fsys_fallback_syscall
-	FSYS_RETURN
-END(fsys_getpid)
-
-ENTRY(fsys_getppid)
-	.prologue
-	.altrp b6
-	.body
 	add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16
 	;;
 	ld8 r17=[r17]				// r17 = current->group_leader
 	add r9=TI_FLAGS+IA64_TASK_SIZE,r16
 	;;
-
 	ld4 r9=[r9]
-	add r17=IA64_TASK_REAL_PARENT_OFFSET,r17 // r17 = &current->group_leader->real_parent
+	add r17=IA64_TASK_TGIDLINK_OFFSET,r17
 	;;
 	and r9=TIF_ALLWORK_MASK,r9
-
-1:	ld8 r18=[r17]				// r18 = current->group_leader->real_parent
+	ld8 r17=[r17]				// r17 = current->group_leader->pids[PIDTYPE_PID].pid
 	;;
-	cmp.ne p8,p0=0,r9
-	add r8=IA64_TASK_TGID_OFFSET,r18	// r8 = &current->group_leader->real_parent->tgid
+	add r8=IA64_PID_LEVEL_OFFSET,r17
 	;;
-
-	/*
-	 * The .acq is needed to ensure that the read of tgid has returned its data before
-	 * we re-check "real_parent".
-	 */
-	ld4.acq r8=[r8]				// r8 = current->group_leader->real_parent->tgid
-#ifdef CONFIG_SMP
-	/*
-	 * Re-read current->group_leader->real_parent.
-	 */
-	ld8 r19=[r17]				// r19 = current->group_leader->real_parent
-(p8)	br.spnt.many fsys_fallback_syscall
+	ld4 r8=[r8]				// r8 = pid->level
+	add r17=IA64_PID_UPID_OFFSET,r17	// r17 = &pid->numbers[0]
 	;;
-	cmp.ne p6,p0=r18,r19			// did real_parent change?
-	mov r19=0			// i must not leak kernel bits...
-(p6)	br.cond.spnt.few 1b			// yes -> redo the read of tgid and the check
+	shl r8=r8,IA64_UPID_SHIFT
 	;;
-	mov r17=0			// i must not leak kernel bits...
-	mov r18=0			// i must not leak kernel bits...
-#else
-	mov r17=0			// i must not leak kernel bits...
-	mov r18=0			// i must not leak kernel bits...
-	mov r19=0			// i must not leak kernel bits...
-#endif
+	add r17=r17,r8				// r17 = &pid->numbers[pid->level]
+	;;
+	ld4 r8=[r17]				// r8 = pid->numbers[pid->level].nr
+	;;
+	mov r17=0
+	;;
+	cmp.ne p8,p0=0,r9
+(p8)	br.spnt.many fsys_fallback_syscall
 	FSYS_RETURN
-END(fsys_getppid)
+END(fsys_getpid)
 
 ENTRY(fsys_set_tid_address)
 	.prologue
 	.altrp b6
 	.body
 	add r9=TI_FLAGS+IA64_TASK_SIZE,r16
+	add r17=IA64_TASK_TGIDLINK_OFFSET,r16
 	;;
 	ld4 r9=[r9]
 	tnat.z p6,p7=r32		// check argument register for being NaT
+	ld8 r17=[r17]				// r17 = current->pids[PIDTYPE_PID].pid
 	;;
 	and r9=TIF_ALLWORK_MASK,r9
-	add r8=IA64_TASK_PID_OFFSET,r16
+	add r8=IA64_PID_LEVEL_OFFSET,r17
 	add r18=IA64_TASK_CLEAR_CHILD_TID_OFFSET,r16
 	;;
-	ld4 r8=[r8]
+	ld4 r8=[r8]				// r8 = pid->level
+	add r17=IA64_PID_UPID_OFFSET,r17	// r17 = &pid->numbers[0]
+	;;
+	shl r8=r8,IA64_UPID_SHIFT
+	;;
+	add r17=r17,r8				// r17 = &pid->numbers[pid->level]
+	;;
+	ld4 r8=[r17]				// r8 = pid->numbers[pid->level].nr
+	;;
 	cmp.ne p8,p0=0,r9
 	mov r17=-1
 	;;
@@ -145,12 +126,11 @@ ENTRY(fsys_set_tid_address)
 	FSYS_RETURN
 END(fsys_set_tid_address)
 
-/*
- * Ensure that the time interpolator structure is compatible with the asm code
- */
-#if IA64_TIME_INTERPOLATOR_SOURCE_OFFSET !=0 || IA64_TIME_INTERPOLATOR_SHIFT_OFFSET != 2 \
-	|| IA64_TIME_INTERPOLATOR_JITTER_OFFSET != 3 || IA64_TIME_INTERPOLATOR_NSEC_OFFSET != 4
-#error fsys_gettimeofday incompatible with changes to struct time_interpolator
+#if IA64_GTOD_SEQ_OFFSET !=0
+#error fsys_gettimeofday incompatible with changes to struct fsyscall_gtod_data_t
+#endif
+#if IA64_ITC_JITTER_OFFSET !=0
+#error fsys_gettimeofday incompatible with changes to struct itc_jitter_data_t
 #endif
 #define CLOCK_REALTIME 0
 #define CLOCK_MONOTONIC 1
@@ -177,124 +157,120 @@ ENTRY(fsys_gettimeofday)
 	// r11 = preserved: saved ar.pfs
 	// r12 = preserved: memory stack
 	// r13 = preserved: thread pointer
-	// r14 = address of mask / mask
+	// r14 = address of mask / mask value
 	// r15 = preserved: system call number
 	// r16 = preserved: current task pointer
-	// r17 = wall to monotonic use
-	// r18 = time_interpolator->offset
-	// r19 = address of wall_to_monotonic
-	// r20 = pointer to struct time_interpolator / pointer to time_interpolator->address
-	// r21 = shift factor
-	// r22 = address of time interpolator->last_counter
-	// r23 = address of time_interpolator->last_cycle
-	// r24 = adress of time_interpolator->offset
-	// r25 = last_cycle value
-	// r26 = last_counter value
-	// r27 = pointer to xtime
+	// r17 = (not used)
+	// r18 = (not used)
+	// r19 = address of itc_lastcycle
+	// r20 = struct fsyscall_gtod_data (= address of gtod_lock.sequence)
+	// r21 = address of mmio_ptr
+	// r22 = address of wall_time or monotonic_time
+	// r23 = address of shift / value
+	// r24 = address mult factor / cycle_last value
+	// r25 = itc_lastcycle value
+	// r26 = address clocksource cycle_last
+	// r27 = (not used)
 	// r28 = sequence number at the beginning of critcal section
-	// r29 = address of seqlock
+	// r29 = address of itc_jitter
 	// r30 = time processing flags / memory address
 	// r31 = pointer to result
 	// Predicates
 	// p6,p7 short term use
 	// p8 = timesource ar.itc
 	// p9 = timesource mmio64
-	// p10 = timesource mmio32
+	// p10 = timesource mmio32 - not used
 	// p11 = timesource not to be handled by asm code
-	// p12 = memory time source ( = p9 | p10)
-	// p13 = do cmpxchg with time_interpolator_last_cycle
+	// p12 = memory time source ( = p9 | p10) - not used
+	// p13 = do cmpxchg with itc_lastcycle
 	// p14 = Divide by 1000
 	// p15 = Add monotonic
 	//
-	// Note that instructions are optimized for McKinley. McKinley can process two
-	// bundles simultaneously and therefore we continuously try to feed the CPU
-	// two bundles and then a stop.
-	tnat.nz p6,p0 = r31	// branch deferred since it does not fit into bundle structure
-	mov pr = r30,0xc000	// Set predicates according to function
+	// Note that instructions are optimized for McKinley. McKinley can
+	// process two bundles simultaneously and therefore we continuously
+	// try to feed the CPU two bundles and then a stop.
+
 	add r2 = TI_FLAGS+IA64_TASK_SIZE,r16
-	movl r20 = time_interpolator
-	;;
-	ld8 r20 = [r20]		// get pointer to time_interpolator structure
-	movl r29 = xtime_lock
-	ld4 r2 = [r2]		// process work pending flags
-	movl r27 = xtime
-	;;	// only one bundle here
-	ld8 r21 = [r20]		// first quad with control information
+	tnat.nz p6,p0 = r31		// guard against Nat argument
+(p6)	br.cond.spnt.few .fail_einval
+	movl r20 = fsyscall_gtod_data // load fsyscall gettimeofday data address
+	;;
+	ld4 r2 = [r2]			// process work pending flags
+	movl r29 = itc_jitter_data	// itc_jitter
+	add r22 = IA64_GTOD_WALL_TIME_OFFSET,r20	// wall_time
+	add r21 = IA64_CLKSRC_MMIO_OFFSET,r20
+	mov pr = r30,0xc000	// Set predicates according to function
+	;;
 	and r2 = TIF_ALLWORK_MASK,r2
-(p6)    br.cond.spnt.few .fail_einval	// deferred branch
+	add r19 = IA64_ITC_LASTCYCLE_OFFSET,r29
+(p15)	add r22 = IA64_GTOD_MONO_TIME_OFFSET,r20	// monotonic_time
 	;;
-	add r10 = IA64_TIME_INTERPOLATOR_ADDRESS_OFFSET,r20
-	extr r3 = r21,32,32	// time_interpolator->nsec_per_cyc
-	extr r8 = r21,0,16	// time_interpolator->source
+	add r26 = IA64_CLKSRC_CYCLE_LAST_OFFSET,r20	// clksrc_cycle_last
 	cmp.ne p6, p0 = 0, r2	// Fallback if work is scheduled
-(p6)    br.cond.spnt.many fsys_fallback_syscall
+(p6)	br.cond.spnt.many fsys_fallback_syscall
 	;;
-	cmp.eq p8,p12 = 0,r8	// Check for cpu timer
-	cmp.eq p9,p0 = 1,r8	// MMIO64 ?
-	extr r2 = r21,24,8	// time_interpolator->jitter
-	cmp.eq p10,p0 = 2,r8	// MMIO32 ?
-	cmp.ltu p11,p0 = 2,r8	// function or other clock
-(p11)	br.cond.spnt.many fsys_fallback_syscall
+	// Begin critical section
+.time_redo:
+	ld4.acq r28 = [r20]	// gtod_lock.sequence, Must take first
 	;;
-	setf.sig f7 = r3	// Setup for scaling of counter
-(p15)	movl r19 = wall_to_monotonic
-(p12)	ld8 r30 = [r10]
-	cmp.ne p13,p0 = r2,r0	// need jitter compensation?
-	extr r21 = r21,16,8	// shift factor
+	and r28 = ~1,r28	// And make sequence even to force retry if odd
 	;;
-.time_redo:
-	.pred.rel.mutex p8,p9,p10
-	ld4.acq r28 = [r29]	// xtime_lock.sequence. Must come first for locking purposes
-(p8)	mov r2 = ar.itc		// CPU_TIMER. 36 clocks latency!!!
-	add r22 = IA64_TIME_INTERPOLATOR_LAST_COUNTER_OFFSET,r20
-(p9)	ld8 r2 = [r30]		// readq(ti->address). Could also have latency issues..
-(p10)	ld4 r2 = [r30]		// readw(ti->address)
-(p13)	add r23 = IA64_TIME_INTERPOLATOR_LAST_CYCLE_OFFSET,r20
-	;;			// could be removed by moving the last add upward
-	ld8 r26 = [r22]		// time_interpolator->last_counter
-(p13)	ld8 r25 = [r23]		// time interpolator->last_cycle
-	add r24 = IA64_TIME_INTERPOLATOR_OFFSET_OFFSET,r20
-(p15)	ld8 r17 = [r19],IA64_TIMESPEC_TV_NSEC_OFFSET
- 	ld8 r9 = [r27],IA64_TIMESPEC_TV_NSEC_OFFSET
-	add r14 = IA64_TIME_INTERPOLATOR_MASK_OFFSET, r20
-	;;
-	ld8 r18 = [r24]		// time_interpolator->offset
-	ld8 r8 = [r27],-IA64_TIMESPEC_TV_NSEC_OFFSET	// xtime.tv_nsec
-(p13)	sub r3 = r25,r2	// Diff needed before comparison (thanks davidm)
-	;;
-	ld8 r14 = [r14]		// time_interpolator->mask
-(p13)	cmp.gt.unc p6,p7 = r3,r0	// check if it is less than last. p6,p7 cleared
-	sub r10 = r2,r26	// current_counter - last_counter
-	;;
-(p6)	sub r10 = r25,r26	// time we got was less than last_cycle
+	ld8 r30 = [r21]		// clocksource->mmio_ptr
+	add r24 = IA64_CLKSRC_MULT_OFFSET,r20
+	ld4 r2 = [r29]		// itc_jitter value
+	add r23 = IA64_CLKSRC_SHIFT_OFFSET,r20
+	add r14 = IA64_CLKSRC_MASK_OFFSET,r20
+	;;
+	ld4 r3 = [r24]		// clocksource mult value
+	ld8 r14 = [r14]         // clocksource mask value
+	cmp.eq p8,p9 = 0,r30	// use cpu timer if no mmio_ptr
+	;;
+	setf.sig f7 = r3	// Setup for mult scaling of counter
+(p8)	cmp.ne p13,p0 = r2,r0	// need itc_jitter compensation, set p13
+	ld4 r23 = [r23]		// clocksource shift value
+	ld8 r24 = [r26]		// get clksrc_cycle_last value
+(p9)	cmp.eq p13,p0 = 0,r30	// if mmio_ptr, clear p13 jitter control
+	;;
+	.pred.rel.mutex p8,p9
+	MOV_FROM_ITC(p8, p6, r2, r10)	// CPU_TIMER. 36 clocks latency!!!
+(p9)	ld8 r2 = [r30]		// MMIO_TIMER. Could also have latency issues..
+(p13)	ld8 r25 = [r19]		// get itc_lastcycle value
+	ld8 r9 = [r22],IA64_TIMESPEC_TV_NSEC_OFFSET	// tv_sec
+	;;
+	ld8 r8 = [r22],-IA64_TIMESPEC_TV_NSEC_OFFSET	// tv_nsec
+(p13)	sub r3 = r25,r2		// Diff needed before comparison (thanks davidm)
+	;;
+(p13)	cmp.gt.unc p6,p7 = r3,r0 // check if it is less than last. p6,p7 cleared
+	sub r10 = r2,r24	// current_cycle - last_cycle
+	;;
+(p6)	sub r10 = r25,r24	// time we got was less than last_cycle
 (p7)	mov ar.ccv = r25	// more than last_cycle. Prep for cmpxchg
 	;;
+(p7)	cmpxchg8.rel r3 = [r19],r2,ar.ccv
+	;;
+(p7)	cmp.ne p7,p0 = r25,r3	// if cmpxchg not successful
+	;;
+(p7)	sub r10 = r3,r24	// then use new last_cycle instead
+	;;
 	and r10 = r10,r14	// Apply mask
 	;;
 	setf.sig f8 = r10
 	nop.i 123
 	;;
-(p7)	cmpxchg8.rel r3 = [r23],r2,ar.ccv
-EX(.fail_efault, probe.w.fault r31, 3)	// This takes 5 cycles and we have spare time
+	// fault check takes 5 cycles and we have spare time
+EX(.fail_efault, probe.w.fault r31, 3)
 	xmpy.l f8 = f8,f7	// nsec_per_cyc*(counter-last_counter)
-(p15)	add r9 = r9,r17		// Add wall to monotonic.secs to result secs
 	;;
-(p15)	ld8 r17 = [r19],-IA64_TIMESPEC_TV_NSEC_OFFSET
-(p7)	cmp.ne p7,p0 = r25,r3	// if cmpxchg not successful redo
-	// simulate tbit.nz.or p7,p0 = r28,0
-	and r28 = ~1,r28	// Make sequence even to force retry if odd
 	getf.sig r2 = f8
 	mf
-	add r8 = r8,r18		// Add time interpolator offset
 	;;
-	ld4 r10 = [r29]		// xtime_lock.sequence
-(p15)	add r8 = r8, r17	// Add monotonic.nsecs to nsecs
-	shr.u r2 = r2,r21
-	;;		// overloaded 3 bundles!
-	// End critical section.
+	ld4 r10 = [r20]		// gtod_lock.sequence
+	shr.u r2 = r2,r23	// shift by factor
+	;;
 	add r8 = r8,r2		// Add xtime.nsecs
-	cmp4.ne.or p7,p0 = r28,r10
-(p7)	br.cond.dpnt.few .time_redo	// sequence number changed ?
+	cmp4.ne p7,p0 = r28,r10
+(p7)	br.cond.dpnt.few .time_redo	// sequence number changed, redo
+	// End critical section.
 	// Now r8=tv->tv_nsec and r9=tv->tv_sec
 	mov r10 = r0
 	movl r2 = 1000000000
@@ -304,23 +280,23 @@ EX(.fail_efault, probe.w.fault r31, 3)	// This takes 5 cycles and we have spare
 .time_normalize:
 	mov r21 = r8
 	cmp.ge p6,p0 = r8,r2
-(p14)	shr.u r20 = r8, 3		// We can repeat this if necessary just wasting some time
+(p14)	shr.u r20 = r8, 3 // We can repeat this if necessary just wasting time
 	;;
 (p14)	setf.sig f8 = r20
 (p6)	sub r8 = r8,r2
-(p6)	add r9 = 1,r9			// two nops before the branch.
-(p14)	setf.sig f7 = r3		// Chances for repeats are 1 in 10000 for gettod
+(p6)	add r9 = 1,r9		// two nops before the branch.
+(p14)	setf.sig f7 = r3	// Chances for repeats are 1 in 10000 for gettod
 (p6)	br.cond.dpnt.few .time_normalize
 	;;
 	// Divided by 8 though shift. Now divide by 125
 	// The compiler was able to do that with a multiply
 	// and a shift and we do the same
-EX(.fail_efault, probe.w.fault r23, 3)		// This also costs 5 cycles
-(p14)	xmpy.hu f8 = f8, f7			// xmpy has 5 cycles latency so use it...
+EX(.fail_efault, probe.w.fault r23, 3)	// This also costs 5 cycles
+(p14)	xmpy.hu f8 = f8, f7		// xmpy has 5 cycles latency so use it
 	;;
-	mov r8 = r0
 (p14)	getf.sig r2 = f8
 	;;
+	mov r8 = r0
 (p14)	shr.u r21 = r2, 4
 	;;
 EX(.fail_efault, st8 [r31] = r9)
@@ -349,161 +325,61 @@ ENTRY(fsys_clock_gettime)
 END(fsys_clock_gettime)
 
 /*
- * long fsys_rt_sigprocmask (int how, sigset_t *set, sigset_t *oset, size_t sigsetsize).
+ * fsys_getcpu doesn't use the third parameter in this implementation. It reads
+ * current_thread_info()->cpu and corresponding node in cpu_to_node_map.
  */
-#if _NSIG_WORDS != 1
-# error Sorry, fsys_rt_sigprocmask() needs to be updated for _NSIG_WORDS != 1.
-#endif
-ENTRY(fsys_rt_sigprocmask)
+ENTRY(fsys_getcpu)
 	.prologue
 	.altrp b6
 	.body
-
-	add r2=IA64_TASK_BLOCKED_OFFSET,r16
-	add r9=TI_FLAGS+IA64_TASK_SIZE,r16
-	cmp4.ltu p6,p0=SIG_SETMASK,r32
-
-	cmp.ne p15,p0=r0,r34			// oset != NULL?
-	tnat.nz p8,p0=r34
-	add r31=IA64_TASK_SIGHAND_OFFSET,r16
 	;;
-	ld8 r3=[r2]				// read/prefetch current->blocked
-	ld4 r9=[r9]
-	tnat.nz.or p6,p0=r35
-
-	cmp.ne.or p6,p0=_NSIG_WORDS*8,r35
-	tnat.nz.or p6,p0=r32
-(p6)	br.spnt.few .fail_einval		// fail with EINVAL
+	add r2=TI_FLAGS+IA64_TASK_SIZE,r16
+	tnat.nz p6,p0 = r32			// guard against NaT argument
+	add r3=TI_CPU+IA64_TASK_SIZE,r16
 	;;
-#ifdef CONFIG_SMP
-	ld8 r31=[r31]				// r31 <- current->sighand
-#endif
-	and r9=TIF_ALLWORK_MASK,r9
-	tnat.nz.or p8,p0=r33
-	;;
-	cmp.ne p7,p0=0,r9
-	cmp.eq p6,p0=r0,r33			// set == NULL?
-	add r31=IA64_SIGHAND_SIGLOCK_OFFSET,r31	// r31 <- current->sighand->siglock
-(p8)	br.spnt.few .fail_efault		// fail with EFAULT
-(p7)	br.spnt.many fsys_fallback_syscall	// got pending kernel work...
-(p6)	br.dpnt.many .store_mask		// -> short-circuit to just reading the signal mask
-
-	/* Argh, we actually have to do some work and _update_ the signal mask: */
-
-EX(.fail_efault, probe.r.fault r33, 3)		// verify user has read-access to *set
-EX(.fail_efault, ld8 r14=[r33])			// r14 <- *set
-	mov r17=(1 << (SIGKILL - 1)) | (1 << (SIGSTOP - 1))
+	ld4 r3=[r3]				// M r3 = thread_info->cpu
+	ld4 r2=[r2]				// M r2 = thread_info->flags
+(p6)    br.cond.spnt.few .fail_einval		// B
 	;;
-
-	rsm psr.i				// mask interrupt delivery
-	mov ar.ccv=0
-	andcm r14=r14,r17			// filter out SIGKILL & SIGSTOP
-
-#ifdef CONFIG_SMP
-	mov r17=1
+	tnat.nz p7,p0 = r33			// I guard against NaT argument
+(p7)    br.cond.spnt.few .fail_einval		// B
 	;;
-	cmpxchg4.acq r18=[r31],r17,ar.ccv	// try to acquire the lock
-	mov r8=EINVAL			// default to EINVAL
+	cmp.ne p6,p0=r32,r0
+	cmp.ne p7,p0=r33,r0
 	;;
-	ld8 r3=[r2]			// re-read current->blocked now that we hold the lock
-	cmp4.ne p6,p0=r18,r0
-(p6)	br.cond.spnt.many .lock_contention
+#ifdef CONFIG_NUMA
+	movl r17=cpu_to_node_map
 	;;
-#else
-	ld8 r3=[r2]			// re-read current->blocked now that we hold the lock
-	mov r8=EINVAL			// default to EINVAL
-#endif
-	add r18=IA64_TASK_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r16
-	add r19=IA64_TASK_SIGNAL_OFFSET,r16
-	cmp4.eq p6,p0=SIG_BLOCK,r32
-	;;
-	ld8 r19=[r19]			// r19 <- current->signal
-	cmp4.eq p7,p0=SIG_UNBLOCK,r32
-	cmp4.eq p8,p0=SIG_SETMASK,r32
-	;;
-	ld8 r18=[r18]			// r18 <- current->pending.signal
-	.pred.rel.mutex p6,p7,p8
-(p6)	or r14=r3,r14			// SIG_BLOCK
-(p7)	andcm r14=r3,r14		// SIG_UNBLOCK
-
-(p8)	mov r14=r14			// SIG_SETMASK
-(p6)	mov r8=0			// clear error code
-	// recalc_sigpending()
-	add r17=IA64_SIGNAL_GROUP_STOP_COUNT_OFFSET,r19
-
-	add r19=IA64_SIGNAL_SHARED_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r19
+EX(.fail_efault, (p6) probe.w.fault r32, 3)		// M This takes 5 cycles
+EX(.fail_efault, (p7) probe.w.fault r33, 3)		// M This takes 5 cycles
+	shladd r18=r3,1,r17
 	;;
-	ld4 r17=[r17]		// r17 <- current->signal->group_stop_count
-(p7)	mov r8=0		// clear error code
-
-	ld8 r19=[r19]		// r19 <- current->signal->shared_pending
+	ld2 r20=[r18]				// r20 = cpu_to_node_map[cpu]
+	and r2 = TIF_ALLWORK_MASK,r2
 	;;
-	cmp4.gt p6,p7=r17,r0	// p6/p7 <- (current->signal->group_stop_count > 0)?
-(p8)	mov r8=0		// clear error code
-
-	or r18=r18,r19		// r18 <- current->pending | current->signal->shared_pending
+	cmp.ne p8,p0=0,r2
+(p8)	br.spnt.many fsys_fallback_syscall
 	;;
-	// r18 <- (current->pending | current->signal->shared_pending) & ~current->blocked:
-	andcm r18=r18,r14
-	add r9=TI_FLAGS+IA64_TASK_SIZE,r16
 	;;
-
-(p7)	cmp.ne.or.andcm p6,p7=r18,r0		// p6/p7 <- signal pending
-	mov r19=0					// i must not leak kernel bits...
-(p6)	br.cond.dpnt.many .sig_pending
+EX(.fail_efault, (p6) st4 [r32] = r3)
+EX(.fail_efault, (p7) st2 [r33] = r20)
+	mov r8=0
 	;;
-
-1:	ld4 r17=[r9]				// r17 <- current->thread_info->flags
+#else
+EX(.fail_efault, (p6) probe.w.fault r32, 3)		// M This takes 5 cycles
+EX(.fail_efault, (p7) probe.w.fault r33, 3)		// M This takes 5 cycles
+	and r2 = TIF_ALLWORK_MASK,r2
 	;;
-	mov ar.ccv=r17
-	and r18=~_TIF_SIGPENDING,r17		// r18 <- r17 & ~(1 << TIF_SIGPENDING)
+	cmp.ne p8,p0=0,r2
+(p8)	br.spnt.many fsys_fallback_syscall
 	;;
-
-	st8 [r2]=r14				// update current->blocked with new mask
-	cmpxchg4.acq r8=[r9],r18,ar.ccv		// current->thread_info->flags <- r18
+EX(.fail_efault, (p6) st4 [r32] = r3)
+EX(.fail_efault, (p7) st2 [r33] = r0)
+	mov r8=0
 	;;
-	cmp.ne p6,p0=r17,r8			// update failed?
-(p6)	br.cond.spnt.few 1b			// yes -> retry
-
-#ifdef CONFIG_SMP
-	st4.rel [r31]=r0			// release the lock
 #endif
-	ssm psr.i
-	;;
-
-	srlz.d					// ensure psr.i is set again
-	mov r18=0					// i must not leak kernel bits...
-
-.store_mask:
-EX(.fail_efault, (p15) probe.w.fault r34, 3)	// verify user has write-access to *oset
-EX(.fail_efault, (p15) st8 [r34]=r3)
-	mov r2=0					// i must not leak kernel bits...
-	mov r3=0					// i must not leak kernel bits...
-	mov r8=0				// return 0
-	mov r9=0					// i must not leak kernel bits...
-	mov r14=0					// i must not leak kernel bits...
-	mov r17=0					// i must not leak kernel bits...
-	mov r31=0					// i must not leak kernel bits...
 	FSYS_RETURN
-
-.sig_pending:
-#ifdef CONFIG_SMP
-	st4.rel [r31]=r0			// release the lock
-#endif
-	ssm psr.i
-	;;
-	srlz.d
-	br.sptk.many fsys_fallback_syscall	// with signal pending, do the heavy-weight syscall
-
-#ifdef CONFIG_SMP
-.lock_contention:
-	/* Rather than spinning here, fall back on doing a heavy-weight syscall.  */
-	ssm psr.i
-	;;
-	srlz.d
-	br.sptk.many fsys_fallback_syscall
-#endif
-END(fsys_rt_sigprocmask)
+END(fsys_getcpu)
 
 ENTRY(fsys_fallback_syscall)
 	.prologue
@@ -516,17 +392,17 @@ ENTRY(fsys_fallback_syscall)
 	adds r17=-1024,r15
 	movl r14=sys_call_table
 	;;
-	rsm psr.i
+	RSM_PSR_I(p0, r26, r27)
 	shladd r18=r17,3,r14
 	;;
 	ld8 r18=[r18]				// load normal (heavy-weight) syscall entry-point
-	mov r29=psr				// read psr (12 cyc load latency)
+	MOV_FROM_PSR(p0, r29, r26)		// read psr (12 cyc load latency)
 	mov r27=ar.rsc
 	mov r21=ar.fpsr
 	mov r26=ar.pfs
 END(fsys_fallback_syscall)
 	/* FALL THROUGH */
-GLOBAL_ENTRY(fsys_bubble_down)
+GLOBAL_ENTRY(paravirt_fsys_bubble_down)
 	.prologue
 	.altrp b6
 	.body
@@ -564,7 +440,7 @@ GLOBAL_ENTRY(fsys_bubble_down)
 	 *
 	 * PSR.BE : already is turned off in __kernel_syscall_via_epc()
 	 * PSR.AC : don't care (kernel normally turns PSR.AC on)
-	 * PSR.I  : already turned off by the time fsys_bubble_down gets
+	 * PSR.I  : already turned off by the time paravirt_fsys_bubble_down gets
 	 *	    invoked
 	 * PSR.DFL: always 0 (kernel never turns it on)
 	 * PSR.DFH: don't care --- kernel never touches f32-f127 on its own
@@ -574,7 +450,7 @@ GLOBAL_ENTRY(fsys_bubble_down)
 	 * PSR.DB : don't care --- kernel never enables kernel-level
 	 *	    breakpoints
 	 * PSR.TB : must be 0 already; if it wasn't zero on entry to
-	 *          __kernel_syscall_via_epc, the branch to fsys_bubble_down
+	 *          __kernel_syscall_via_epc, the branch to paravirt_fsys_bubble_down
 	 *          will trigger a taken branch; the taken-trap-handler then
 	 *          converts the syscall into a break-based system-call.
 	 */
@@ -606,7 +482,11 @@ GLOBAL_ENTRY(fsys_bubble_down)
 	nop.i 0
 	;;
 	mov ar.rsc=0				// M2   set enforced lazy mode, pl 0, LE, loadrs=0
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+	MOV_FROM_ITC(p0, p6, r30, r23)		// M    get cycle for accounting
+#else
 	nop.m 0
+#endif
 	nop.i 0
 	;;
 	mov r23=ar.bspstore			// M2 (12 cyc) save ar.bspstore
@@ -628,25 +508,47 @@ GLOBAL_ENTRY(fsys_bubble_down)
 	cmp.ne pKStk,pUStk=r0,r0		// A    set pKStk <- 0, pUStk <- 1
 	br.call.sptk.many b7=ia64_syscall_setup	// B
 	;;
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+	// mov.m r30=ar.itc is called in advance
+	add r16=TI_AC_STAMP+IA64_TASK_SIZE,r2
+	add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r2
+	;;
+	ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP	// time at last check in kernel
+	ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE	// time at leave kernel
+	;;
+	ld8 r20=[r16],TI_AC_STAMP-TI_AC_STIME	// cumulated stime
+	ld8 r21=[r17]				// cumulated utime
+	sub r22=r19,r18				// stime before leave kernel
+	;;
+	st8 [r16]=r30,TI_AC_STIME-TI_AC_STAMP	// update stamp
+	sub r18=r30,r19				// elapsed time in user mode
+	;;
+	add r20=r20,r22				// sum stime
+	add r21=r21,r18				// sum utime
+	;;
+	st8 [r16]=r20				// update stime
+	st8 [r17]=r21				// update utime
+	;;
+#endif
 	mov ar.rsc=0x3				// M2   set eager mode, pl 0, LE, loadrs=0
 	mov rp=r14				// I0   set the real return addr
 	and r3=_TIF_SYSCALL_TRACEAUDIT,r3	// A
 	;;
-	ssm psr.i				// M2   we're on kernel stacks now, reenable irqs
+	SSM_PSR_I(p0, p6, r22)			// M2   we're on kernel stacks now, reenable irqs
 	cmp.eq p8,p0=r3,r0			// A
 (p10)	br.cond.spnt.many ia64_ret_from_syscall	// B    return if bad call-frame or r15 is a NaT
 
 	nop.m 0
 (p8)	br.call.sptk.many b6=b6			// B    (ignore return address)
 	br.cond.spnt ia64_trace_syscall		// B
-END(fsys_bubble_down)
+END(paravirt_fsys_bubble_down)
 
 	.rodata
 	.align 8
-	.globl fsyscall_table
+	.globl paravirt_fsyscall_table
 
-	data8 fsys_bubble_down
-fsyscall_table:
+	data8 paravirt_fsys_bubble_down
+paravirt_fsyscall_table:
 	data8 fsys_ni_syscall
 	data8 0				// exit			// 1025
 	data8 0				// read
@@ -665,7 +567,7 @@ fsyscall_table:
 	data8 0				// chown
 	data8 0				// lseek		// 1040
 	data8 fsys_getpid		// getpid
-	data8 fsys_getppid		// getppid
+	data8 0				// getppid
 	data8 0				// mount
 	data8 0				// umount
 	data8 0				// setuid		// 1045
@@ -802,7 +704,7 @@ fsyscall_table:
 	data8 0				// sigaltstack
 	data8 0				// rt_sigaction
 	data8 0				// rt_sigpending
-	data8 fsys_rt_sigprocmask	// rt_sigprocmask
+	data8 0				// rt_sigprocmask
 	data8 0				// rt_sigqueueinfo	// 1180
 	data8 0				// rt_sigreturn
 	data8 0				// rt_sigsuspend
@@ -878,7 +780,57 @@ fsyscall_table:
 	data8 0				// timer_delete
 	data8 0				// clock_settime
 	data8 fsys_clock_gettime	// clock_gettime
+	data8 0				// clock_getres		// 1255
+	data8 0				// clock_nanosleep
+	data8 0				// fstatfs64
+	data8 0				// statfs64
+	data8 0				// mbind
+	data8 0				// get_mempolicy	// 1260
+	data8 0				// set_mempolicy
+	data8 0				// mq_open
+	data8 0				// mq_unlink
+	data8 0				// mq_timedsend
+	data8 0				// mq_timedreceive	// 1265
+	data8 0				// mq_notify
+	data8 0				// mq_getsetattr
+	data8 0				// kexec_load
+	data8 0				// vserver
+	data8 0				// waitid		// 1270
+	data8 0				// add_key
+	data8 0				// request_key
+	data8 0				// keyctl
+	data8 0				// ioprio_set
+	data8 0				// ioprio_get		// 1275
+	data8 0				// move_pages
+	data8 0				// inotify_init
+	data8 0				// inotify_add_watch
+	data8 0				// inotify_rm_watch
+	data8 0				// migrate_pages	// 1280
+	data8 0				// openat
+	data8 0				// mkdirat
+	data8 0				// mknodat
+	data8 0				// fchownat
+	data8 0				// futimesat		// 1285
+	data8 0				// newfstatat
+	data8 0				// unlinkat
+	data8 0				// renameat
+	data8 0				// linkat
+	data8 0				// symlinkat		// 1290
+	data8 0				// readlinkat
+	data8 0				// fchmodat
+	data8 0				// faccessat
+	data8 0
+	data8 0							// 1295
+	data8 0				// unshare
+	data8 0				// splice
+	data8 0				// set_robust_list
+	data8 0				// get_robust_list
+	data8 0				// sync_file_range	// 1300
+	data8 0				// tee
+	data8 0				// vmsplice
+	data8 0
+	data8 fsys_getcpu		// getcpu		// 1304
 
 	// fill in zeros for the remaining entries
 	.zero:
-	.space fsyscall_table + 8*NR_syscalls - .zero, 0
+	.space paravirt_fsyscall_table + 8*NR_syscalls - .zero, 0
diff --git a/arch/ia64/kernel/fsyscall_gtod_data.h b/arch/ia64/kernel/fsyscall_gtod_data.h
new file mode 100644
index 00000000000..146b15b5fec
--- /dev/null
+++ b/arch/ia64/kernel/fsyscall_gtod_data.h
@@ -0,0 +1,23 @@
+/*
+ * (c) Copyright 2007 Hewlett-Packard Development Company, L.P.
+ *        Contributed by Peter Keilty <peter.keilty@hp.com>
+ *
+ * fsyscall gettimeofday data
+ */
+
+struct fsyscall_gtod_data_t {
+	seqcount_t	seq;
+	struct timespec	wall_time;
+	struct timespec monotonic_time;
+	cycle_t		clk_mask;
+	u32		clk_mult;
+	u32		clk_shift;
+	void		*clk_fsys_mmio;
+	cycle_t		clk_cycle_last;
+} ____cacheline_aligned;
+
+struct itc_jitter_data_t {
+	int		itc_jitter;
+	cycle_t		itc_lastcycle;
+} ____cacheline_aligned;
+
diff --git a/arch/ia64/kernel/ftrace.c b/arch/ia64/kernel/ftrace.c
new file mode 100644
index 00000000000..3b0c2aa0785
--- /dev/null
+++ b/arch/ia64/kernel/ftrace.c
@@ -0,0 +1,204 @@
+/*
+ * Dynamic function tracing support.
+ *
+ * Copyright (C) 2008 Shaohua Li <shaohua.li@intel.com>
+ *
+ * For licencing details, see COPYING.
+ *
+ * Defines low-level handling of mcount calls when the kernel
+ * is compiled with the -pg flag. When using dynamic ftrace, the
+ * mcount call-sites get patched lazily with NOP till they are
+ * enabled. All code mutation routines here take effect atomically.
+ */
+
+#include <linux/uaccess.h>
+#include <linux/ftrace.h>
+
+#include <asm/cacheflush.h>
+#include <asm/patch.h>
+
+/* In IA64, each function will be added below two bundles with -pg option */
+static unsigned char __attribute__((aligned(8)))
+ftrace_orig_code[MCOUNT_INSN_SIZE] = {
+	0x02, 0x40, 0x31, 0x10, 0x80, 0x05, /* alloc r40=ar.pfs,12,8,0 */
+	0xb0, 0x02, 0x00, 0x00, 0x42, 0x40, /* mov r43=r0;; */
+	0x05, 0x00, 0xc4, 0x00,             /* mov r42=b0 */
+	0x11, 0x48, 0x01, 0x02, 0x00, 0x21, /* mov r41=r1 */
+	0x00, 0x00, 0x00, 0x02, 0x00, 0x00, /* nop.i 0x0 */
+	0x08, 0x00, 0x00, 0x50              /* br.call.sptk.many b0 = _mcount;; */
+};
+
+struct ftrace_orig_insn {
+	u64 dummy1, dummy2, dummy3;
+	u64 dummy4:64-41+13;
+	u64 imm20:20;
+	u64 dummy5:3;
+	u64 sign:1;
+	u64 dummy6:4;
+};
+
+/* mcount stub will be converted below for nop */
+static unsigned char ftrace_nop_code[MCOUNT_INSN_SIZE] = {
+	0x00, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MII] nop.m 0x0 */
+	0x30, 0x00, 0x00, 0x60, 0x00, 0x00, /* mov r3=ip */
+	0x00, 0x00, 0x04, 0x00,             /* nop.i 0x0 */
+	0x05, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0x0 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* nop.x 0x0;; */
+	0x00, 0x00, 0x04, 0x00
+};
+
+static unsigned char *ftrace_nop_replace(void)
+{
+	return ftrace_nop_code;
+}
+
+/*
+ * mcount stub will be converted below for call
+ * Note: Just the last instruction is changed against nop
+ * */
+static unsigned char __attribute__((aligned(8)))
+ftrace_call_code[MCOUNT_INSN_SIZE] = {
+	0x00, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MII] nop.m 0x0 */
+	0x30, 0x00, 0x00, 0x60, 0x00, 0x00, /* mov r3=ip */
+	0x00, 0x00, 0x04, 0x00,             /* nop.i 0x0 */
+	0x05, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0x0 */
+	0xff, 0xff, 0xff, 0xff, 0x7f, 0x00, /* brl.many .;;*/
+	0xf8, 0xff, 0xff, 0xc8
+};
+
+struct ftrace_call_insn {
+	u64 dummy1, dummy2;
+	u64 dummy3:48;
+	u64 imm39_l:16;
+	u64 imm39_h:23;
+	u64 dummy4:13;
+	u64 imm20:20;
+	u64 dummy5:3;
+	u64 i:1;
+	u64 dummy6:4;
+};
+
+static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
+{
+	struct ftrace_call_insn *code = (void *)ftrace_call_code;
+	unsigned long offset = addr - (ip + 0x10);
+
+	code->imm39_l = offset >> 24;
+	code->imm39_h = offset >> 40;
+	code->imm20 = offset >> 4;
+	code->i = offset >> 63;
+	return ftrace_call_code;
+}
+
+static int
+ftrace_modify_code(unsigned long ip, unsigned char *old_code,
+		   unsigned char *new_code, int do_check)
+{
+	unsigned char replaced[MCOUNT_INSN_SIZE];
+
+	/*
+	 * Note: Due to modules and __init, code can
+	 *  disappear and change, we need to protect against faulting
+	 *  as well as code changing. We do this by using the
+	 *  probe_kernel_* functions.
+	 *
+	 * No real locking needed, this code is run through
+	 * kstop_machine, or before SMP starts.
+	 */
+
+	if (!do_check)
+		goto skip_check;
+
+	/* read the text we want to modify */
+	if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
+		return -EFAULT;
+
+	/* Make sure it is what we expect it to be */
+	if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)
+		return -EINVAL;
+
+skip_check:
+	/* replace the text with the new text */
+	if (probe_kernel_write(((void *)ip), new_code, MCOUNT_INSN_SIZE))
+		return -EPERM;
+	flush_icache_range(ip, ip + MCOUNT_INSN_SIZE);
+
+	return 0;
+}
+
+static int ftrace_make_nop_check(struct dyn_ftrace *rec, unsigned long addr)
+{
+	unsigned char __attribute__((aligned(8))) replaced[MCOUNT_INSN_SIZE];
+	unsigned long ip = rec->ip;
+
+	if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
+		return -EFAULT;
+	if (rec->flags & FTRACE_FL_CONVERTED) {
+		struct ftrace_call_insn *call_insn, *tmp_call;
+
+		call_insn = (void *)ftrace_call_code;
+		tmp_call = (void *)replaced;
+		call_insn->imm39_l = tmp_call->imm39_l;
+		call_insn->imm39_h = tmp_call->imm39_h;
+		call_insn->imm20 = tmp_call->imm20;
+		call_insn->i = tmp_call->i;
+		if (memcmp(replaced, ftrace_call_code, MCOUNT_INSN_SIZE) != 0)
+			return -EINVAL;
+		return 0;
+	} else {
+		struct ftrace_orig_insn *call_insn, *tmp_call;
+
+		call_insn = (void *)ftrace_orig_code;
+		tmp_call = (void *)replaced;
+		call_insn->sign = tmp_call->sign;
+		call_insn->imm20 = tmp_call->imm20;
+		if (memcmp(replaced, ftrace_orig_code, MCOUNT_INSN_SIZE) != 0)
+			return -EINVAL;
+		return 0;
+	}
+}
+
+int ftrace_make_nop(struct module *mod,
+		    struct dyn_ftrace *rec, unsigned long addr)
+{
+	int ret;
+	char *new;
+
+	ret = ftrace_make_nop_check(rec, addr);
+	if (ret)
+		return ret;
+	new = ftrace_nop_replace();
+	return ftrace_modify_code(rec->ip, NULL, new, 0);
+}
+
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+	unsigned long ip = rec->ip;
+	unsigned char *old, *new;
+
+	old=  ftrace_nop_replace();
+	new = ftrace_call_replace(ip, addr);
+	return ftrace_modify_code(ip, old, new, 1);
+}
+
+/* in IA64, _mcount can't directly call ftrace_stub. Only jump is ok */
+int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+	unsigned long ip;
+	unsigned long addr = ((struct fnptr *)ftrace_call)->ip;
+
+	if (func == ftrace_stub)
+		return 0;
+	ip = ((struct fnptr *)func)->ip;
+
+	ia64_patch_imm64(addr + 2, ip);
+
+	flush_icache_range(addr, addr + 16);
+	return 0;
+}
+
+/* run from kstop_machine */
+int __init ftrace_dyn_arch_init(void)
+{
+	return 0;
+}
diff --git a/arch/ia64/kernel/gate-data.S b/arch/ia64/kernel/gate-data.S
index 258c0a3238f..b3ef1c72e13 100644
--- a/arch/ia64/kernel/gate-data.S
+++ b/arch/ia64/kernel/gate-data.S
@@ -1,3 +1,3 @@
-	.section .data.gate, "aw"
+	.section .data..gate, "aw"
 
 	.incbin "arch/ia64/kernel/gate.so"
diff --git a/arch/ia64/kernel/gate.S b/arch/ia64/kernel/gate.S
index 86064ca9895..b5f8bdd8618 100644
--- a/arch/ia64/kernel/gate.S
+++ b/arch/ia64/kernel/gate.S
@@ -6,14 +6,15 @@
  * 	David Mosberger-Tang <davidm@hpl.hp.com>
  */
 
-#include <linux/config.h>
 
 #include <asm/asmmacro.h>
 #include <asm/errno.h>
 #include <asm/asm-offsets.h>
 #include <asm/sigcontext.h>
-#include <asm/system.h>
 #include <asm/unistd.h>
+#include <asm/kregs.h>
+#include <asm/page.h>
+#include "paravirt_inst.h"
 
 /*
  * We can't easily refer to symbols inside the kernel.  To avoid full runtime relocation,
@@ -21,17 +22,18 @@
  * to targets outside the shared object) and to avoid multi-phase kernel builds, we
  * simply create minimalistic "patch lists" in special ELF sections.
  */
-	.section ".data.patch.fsyscall_table", "a"
+	.section ".data..patch.fsyscall_table", "a"
 	.previous
 #define LOAD_FSYSCALL_TABLE(reg)			\
 [1:]	movl reg=0;					\
-	.xdata4 ".data.patch.fsyscall_table", 1b-.
+	.xdata4 ".data..patch.fsyscall_table", 1b-.
 
-	.section ".data.patch.brl_fsys_bubble_down", "a"
+	.section ".data..patch.brl_fsys_bubble_down", "a"
 	.previous
 #define BRL_COND_FSYS_BUBBLE_DOWN(pr)			\
 [1:](pr)brl.cond.sptk 0;				\
-	.xdata4 ".data.patch.brl_fsys_bubble_down", 1b-.
+	;;						\
+	.xdata4 ".data..patch.brl_fsys_bubble_down", 1b-.
 
 GLOBAL_ENTRY(__kernel_syscall_via_break)
 	.prologue
@@ -48,87 +50,6 @@ GLOBAL_ENTRY(__kernel_syscall_via_break)
 }
 END(__kernel_syscall_via_break)
 
-/*
- * On entry:
- *	r11 = saved ar.pfs
- *	r15 = system call #
- *	b0  = saved return address
- *	b6  = return address
- * On exit:
- *	r11 = saved ar.pfs
- *	r15 = system call #
- *	b0  = saved return address
- *	all other "scratch" registers:	undefined
- *	all "preserved" registers:	same as on entry
- */
-
-GLOBAL_ENTRY(__kernel_syscall_via_epc)
-	.prologue
-	.altrp b6
-	.body
-{
-	/*
-	 * Note: the kernel cannot assume that the first two instructions in this
-	 * bundle get executed.  The remaining code must be safe even if
-	 * they do not get executed.
-	 */
-	adds r17=-1024,r15			// A
-	mov r10=0				// A    default to successful syscall execution
-	epc					// B	causes split-issue
-}
-	;;
-	rsm psr.be | psr.i			// M2 (5 cyc to srlz.d)
-	LOAD_FSYSCALL_TABLE(r14)		// X
-	;;
-	mov r16=IA64_KR(CURRENT)		// M2 (12 cyc)
-	shladd r18=r17,3,r14			// A
-	mov r19=NR_syscalls-1			// A
-	;;
-	lfetch [r18]				// M0|1
-	mov r29=psr				// M2 (12 cyc)
-	// If r17 is a NaT, p6 will be zero
-	cmp.geu p6,p7=r19,r17			// A    (sysnr > 0 && sysnr < 1024+NR_syscalls)?
-	;;
-	mov r21=ar.fpsr				// M2 (12 cyc)
-	tnat.nz p10,p9=r15			// I0
-	mov.i r26=ar.pfs			// I0 (would stall anyhow due to srlz.d...)
-	;;
-	srlz.d					// M0 (forces split-issue) ensure PSR.BE==0
-(p6)	ld8 r18=[r18]				// M0|1
-	nop.i 0
-	;;
-	nop.m 0
-(p6)	tbit.z.unc p8,p0=r18,0			// I0 (dual-issues with "mov b7=r18"!)
-	nop.i 0
-	;;
-(p8)	ssm psr.i
-(p6)	mov b7=r18				// I0
-(p8)	br.dptk.many b7				// B
-
-	mov r27=ar.rsc				// M2 (12 cyc)
-/*
- * brl.cond doesn't work as intended because the linker would convert this branch
- * into a branch to a PLT.  Perhaps there will be a way to avoid this with some
- * future version of the linker.  In the meantime, we just use an indirect branch
- * instead.
- */
-#ifdef CONFIG_ITANIUM
-(p6)	add r14=-8,r14				// r14 <- addr of fsys_bubble_down entry
-	;;
-(p6)	ld8 r14=[r14]				// r14 <- fsys_bubble_down
-	;;
-(p6)	mov b7=r14
-(p6)	br.sptk.many b7
-#else
-	BRL_COND_FSYS_BUBBLE_DOWN(p6)
-#endif
-	ssm psr.i
-	mov r10=-1
-(p10)	mov r8=EINVAL
-(p9)	mov r8=ENOSYS
-	FSYS_RETURN
-END(__kernel_syscall_via_epc)
-
 #	define ARG0_OFF		(16 + IA64_SIGFRAME_ARG0_OFFSET)
 #	define ARG1_OFF		(16 + IA64_SIGFRAME_ARG1_OFFSET)
 #	define ARG2_OFF		(16 + IA64_SIGFRAME_ARG2_OFFSET)
@@ -374,3 +295,92 @@ restore_rbs:
 	// invala not necessary as that will happen when returning to user-mode
 	br.cond.sptk back_from_restore_rbs
 END(__kernel_sigtramp)
+
+/*
+ * On entry:
+ *	r11 = saved ar.pfs
+ *	r15 = system call #
+ *	b0  = saved return address
+ *	b6  = return address
+ * On exit:
+ *	r11 = saved ar.pfs
+ *	r15 = system call #
+ *	b0  = saved return address
+ *	all other "scratch" registers:	undefined
+ *	all "preserved" registers:	same as on entry
+ */
+
+GLOBAL_ENTRY(__kernel_syscall_via_epc)
+	.prologue
+	.altrp b6
+	.body
+{
+	/*
+	 * Note: the kernel cannot assume that the first two instructions in this
+	 * bundle get executed.  The remaining code must be safe even if
+	 * they do not get executed.
+	 */
+	adds r17=-1024,r15			// A
+	mov r10=0				// A    default to successful syscall execution
+	epc					// B	causes split-issue
+}
+	;;
+	RSM_PSR_BE_I(r20, r22)			// M2 (5 cyc to srlz.d)
+	LOAD_FSYSCALL_TABLE(r14)		// X
+	;;
+	mov r16=IA64_KR(CURRENT)		// M2 (12 cyc)
+	shladd r18=r17,3,r14			// A
+	mov r19=NR_syscalls-1			// A
+	;;
+	lfetch [r18]				// M0|1
+	MOV_FROM_PSR(p0, r29, r8)		// M2 (12 cyc)
+	// If r17 is a NaT, p6 will be zero
+	cmp.geu p6,p7=r19,r17			// A    (sysnr > 0 && sysnr < 1024+NR_syscalls)?
+	;;
+	mov r21=ar.fpsr				// M2 (12 cyc)
+	tnat.nz p10,p9=r15			// I0
+	mov.i r26=ar.pfs			// I0 (would stall anyhow due to srlz.d...)
+	;;
+	srlz.d					// M0 (forces split-issue) ensure PSR.BE==0
+(p6)	ld8 r18=[r18]				// M0|1
+	nop.i 0
+	;;
+	nop.m 0
+(p6)	tbit.z.unc p8,p0=r18,0			// I0 (dual-issues with "mov b7=r18"!)
+	nop.i 0
+	;;
+	SSM_PSR_I(p8, p14, r25)
+(p6)	mov b7=r18				// I0
+(p8)	br.dptk.many b7				// B
+
+	mov r27=ar.rsc				// M2 (12 cyc)
+/*
+ * brl.cond doesn't work as intended because the linker would convert this branch
+ * into a branch to a PLT.  Perhaps there will be a way to avoid this with some
+ * future version of the linker.  In the meantime, we just use an indirect branch
+ * instead.
+ */
+#ifdef CONFIG_ITANIUM
+(p6)	add r14=-8,r14				// r14 <- addr of fsys_bubble_down entry
+	;;
+(p6)	ld8 r14=[r14]				// r14 <- fsys_bubble_down
+	;;
+(p6)	mov b7=r14
+(p6)	br.sptk.many b7
+#else
+	BRL_COND_FSYS_BUBBLE_DOWN(p6)
+#endif
+	SSM_PSR_I(p0, p14, r10)
+	mov r10=-1
+(p10)	mov r8=EINVAL
+(p9)	mov r8=ENOSYS
+	FSYS_RETURN
+
+#ifdef CONFIG_PARAVIRT
+	/*
+	 * padd to make the size of this symbol constant
+	 * independent of paravirtualization.
+	 */
+	.align PAGE_SIZE / 8
+#endif
+END(__kernel_syscall_via_epc)
diff --git a/arch/ia64/kernel/gate.lds.S b/arch/ia64/kernel/gate.lds.S
index 7c99e6ec3da..e518f7902af 100644
--- a/arch/ia64/kernel/gate.lds.S
+++ b/arch/ia64/kernel/gate.lds.S
@@ -1,80 +1,92 @@
 /*
- * Linker script for gate DSO.  The gate pages are an ELF shared object prelinked to its
- * virtual address, with only one read-only segment and one execute-only segment (both fit
- * in one page).  This script controls its layout.
+ * Linker script for gate DSO.  The gate pages are an ELF shared object
+ * prelinked to its virtual address, with only one read-only segment and
+ * one execute-only segment (both fit in one page).  This script controls
+ * its layout.
  */
 
-#include <linux/config.h>
-
-#include <asm/system.h>
+#include <asm/page.h>
+#include "paravirt_patchlist.h"
 
 SECTIONS
 {
-  . = GATE_ADDR + SIZEOF_HEADERS;
-
-  .hash				: { *(.hash) }				:readable
-  .dynsym			: { *(.dynsym) }
-  .dynstr			: { *(.dynstr) }
-  .gnu.version			: { *(.gnu.version) }
-  .gnu.version_d		: { *(.gnu.version_d) }
-  .gnu.version_r		: { *(.gnu.version_r) }
-  .dynamic			: { *(.dynamic) }			:readable :dynamic
-
-  /*
-   * This linker script is used both with -r and with -shared.  For the layouts to match,
-   * we need to skip more than enough space for the dynamic symbol table et al.  If this
-   * amount is insufficient, ld -shared will barf.  Just increase it here.
-   */
-  . = GATE_ADDR + 0x500;
-
-  .data.patch			: {
-				    __start_gate_mckinley_e9_patchlist = .;
-				    *(.data.patch.mckinley_e9)
-				    __end_gate_mckinley_e9_patchlist = .;
-
-				    __start_gate_vtop_patchlist = .;
-				    *(.data.patch.vtop)
-				    __end_gate_vtop_patchlist = .;
-
-				    __start_gate_fsyscall_patchlist = .;
-				    *(.data.patch.fsyscall_table)
-				    __end_gate_fsyscall_patchlist = .;
-
-				    __start_gate_brl_fsys_bubble_down_patchlist = .;
-				    *(.data.patch.brl_fsys_bubble_down)
-				    __end_gate_brl_fsys_bubble_down_patchlist = .;
-  }									:readable
-  .IA_64.unwind_info		: { *(.IA_64.unwind_info*) }
-  .IA_64.unwind			: { *(.IA_64.unwind*) }			:readable :unwind
+	. = GATE_ADDR + SIZEOF_HEADERS;
+
+	.hash			: { *(.hash) }		:readable
+	.gnu.hash		: { *(.gnu.hash) }
+	.dynsym			: { *(.dynsym) }
+	.dynstr			: { *(.dynstr) }
+	.gnu.version		: { *(.gnu.version) }
+	.gnu.version_d		: { *(.gnu.version_d) }
+	.gnu.version_r		: { *(.gnu.version_r) }
+
+	.note			: { *(.note*) }		:readable	:note
+
+	.dynamic		: { *(.dynamic) }	:readable	:dynamic
+
+	/*
+	 * This linker script is used both with -r and with -shared.  For
+	 * the layouts to match, we need to skip more than enough space for
+	 * the dynamic symbol table et al.  If this amount is insufficient,
+	 * ld -shared will barf.  Just increase it here.
+	 */
+	. = GATE_ADDR + 0x600;
+
+	.data..patch		: {
+		__paravirt_start_gate_mckinley_e9_patchlist = .;
+		*(.data..patch.mckinley_e9)
+		__paravirt_end_gate_mckinley_e9_patchlist = .;
+
+		__paravirt_start_gate_vtop_patchlist = .;
+		*(.data..patch.vtop)
+		__paravirt_end_gate_vtop_patchlist = .;
+
+		__paravirt_start_gate_fsyscall_patchlist = .;
+		*(.data..patch.fsyscall_table)
+		__paravirt_end_gate_fsyscall_patchlist = .;
+
+		__paravirt_start_gate_brl_fsys_bubble_down_patchlist = .;
+		*(.data..patch.brl_fsys_bubble_down)
+		__paravirt_end_gate_brl_fsys_bubble_down_patchlist = .;
+	}						:readable
+
+	.IA_64.unwind_info	: { *(.IA_64.unwind_info*) }
+	.IA_64.unwind		: { *(.IA_64.unwind*) }	:readable	:unwind
 #ifdef HAVE_BUGGY_SEGREL
-  .text (GATE_ADDR + PAGE_SIZE)	: { *(.text) *(.text.*) }		:readable
+	.text (GATE_ADDR + PAGE_SIZE) : { *(.text) *(.text.*) }	:readable
 #else
-  . = ALIGN (PERCPU_PAGE_SIZE) + (. & (PERCPU_PAGE_SIZE - 1));
-  .text				: { *(.text) *(.text.*) }		:epc
+	. = ALIGN(PERCPU_PAGE_SIZE) + (. & (PERCPU_PAGE_SIZE - 1));
+	.text			: { *(.text) *(.text.*) }	:epc
 #endif
 
-  /DISCARD/			: {
-  	*(.got.plt) *(.got)
-	*(.data .data.* .gnu.linkonce.d.*)
-	*(.dynbss)
-	*(.bss .bss.* .gnu.linkonce.b.*)
-	*(__ex_table)
-	*(__mca_table)
-  }
+	/DISCARD/		: {
+		*(.got.plt) *(.got)
+		*(.data .data.* .gnu.linkonce.d.*)
+		*(.dynbss)
+		*(.bss .bss.* .gnu.linkonce.b.*)
+		*(__ex_table)
+		*(__mca_table)
+	}
 }
 
 /*
+ * ld does not recognize this name token; use the constant.
+ */
+#define	PT_IA_64_UNWIND	0x70000001
+
+/*
  * We must supply the ELF program headers explicitly to get just one
  * PT_LOAD segment, and set the flags explicitly to make segments read-only.
  */
 PHDRS
 {
-  readable  PT_LOAD	FILEHDR	PHDRS	FLAGS(4);	/* PF_R */
+	readable	PT_LOAD	FILEHDR	PHDRS	FLAGS(4);	/* PF_R */
 #ifndef HAVE_BUGGY_SEGREL
-  epc	    PT_LOAD	FILEHDR PHDRS	FLAGS(1);	/* PF_X */
+	epc		PT_LOAD	FILEHDR PHDRS	FLAGS(1);	/* PF_X */
 #endif
-  dynamic   PT_DYNAMIC			FLAGS(4);	/* PF_R */
-  unwind    0x70000001; /* PT_IA_64_UNWIND, but ld doesn't match the name */
+	dynamic		PT_DYNAMIC		FLAGS(4);	/* PF_R */
+	note		PT_NOTE			FLAGS(4);	/* PF_R */
+	unwind		PT_IA_64_UNWIND;
 }
 
 /*
@@ -82,14 +94,14 @@ PHDRS
  */
 VERSION
 {
-  LINUX_2.5 {
-    global:
-	__kernel_syscall_via_break;
-	__kernel_syscall_via_epc;
-	__kernel_sigtramp;
-
-    local: *;
-  };
+	LINUX_2.5 {
+	global:
+		__kernel_syscall_via_break;
+		__kernel_syscall_via_epc;
+		__kernel_sigtramp;
+
+	local: *;
+	};
 }
 
 /* The ELF entry point can be used to set the AT_SYSINFO value.  */
diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S
index f1778a84ea6..a4acddad0c7 100644
--- a/arch/ia64/kernel/head.S
+++ b/arch/ia64/kernel/head.S
@@ -19,7 +19,6 @@
  *   Support for CPU Hotplug
  */
 
-#include <linux/config.h>
 
 #include <asm/asmmacro.h>
 #include <asm/fpu.h>
@@ -27,11 +26,13 @@
 #include <asm/mmu_context.h>
 #include <asm/asm-offsets.h>
 #include <asm/pal.h>
+#include <asm/paravirt.h>
 #include <asm/pgtable.h>
 #include <asm/processor.h>
 #include <asm/ptrace.h>
-#include <asm/system.h>
 #include <asm/mca_asm.h>
+#include <linux/init.h>
+#include <linux/linkage.h>
 
 #ifdef CONFIG_HOTPLUG_CPU
 #define SAL_PSR_BITS_TO_SET				\
@@ -165,7 +166,7 @@ RestRR:											\
 	mov _tmp2=((ia64_rid(IA64_REGION_ID_KERNEL, (num<<61)) << 8) | (pgsize << 2) | vhpt);; \
 	mov rr[_tmp1]=_tmp2
 
-	.section __special_page_section,"ax"
+	__PAGE_ALIGNED_DATA
 
 	.global empty_zero_page
 empty_zero_page:
@@ -179,7 +180,7 @@ swapper_pg_dir:
 halt_msg:
 	stringz "Halting kernel\n"
 
-	.text
+	__REF
 
 	.global start_ap
 
@@ -198,6 +199,11 @@ start_ap:
 	;;
 	srlz.i
 	;;
+ {
+	flushrs				// must be first insn in group
+	srlz.i
+ }
+	;;
 	/*
 	 * Save the region registers, predicate before they get clobbered
 	 */
@@ -352,7 +358,32 @@ start_ap:
 	mov ar.rsc=0		// place RSE in enforced lazy mode
 	;;
 	loadrs			// clear the dirty partition
-	mov IA64_KR(PER_CPU_DATA)=r0	// clear physical per-CPU base
+	movl r19=__phys_per_cpu_start
+	mov r18=PERCPU_PAGE_SIZE
+	;;
+#ifndef CONFIG_SMP
+	add r19=r19,r18
+	;;
+#else
+(isAP)	br.few 2f
+	movl r20=__cpu0_per_cpu
+	;;
+	shr.u r18=r18,3
+1:
+	ld8 r21=[r19],8;;
+	st8[r20]=r21,8
+	adds r18=-1,r18;;
+	cmp4.lt p7,p6=0,r18
+(p7)	br.cond.dptk.few 1b
+	mov r19=r20
+	;;
+2:
+#endif
+	tpa r19=r19
+	;;
+	.pred.rel.mutex isBP,isAP
+(isBP)	mov IA64_KR(PER_CPU_DATA)=r19	// per-CPU base for cpu0
+(isAP)	mov IA64_KR(PER_CPU_DATA)=r0	// clear physical per-CPU base
 	;;
 	mov ar.bspstore=r2	// establish the new RSE stack
 	;;
@@ -363,6 +394,41 @@ start_ap:
 	;;
 (isBP)	st8 [r2]=r28		// save the address of the boot param area passed by the bootloader
 
+#ifdef CONFIG_PARAVIRT
+
+	movl r14=hypervisor_setup_hooks
+	movl r15=hypervisor_type
+	mov r16=num_hypervisor_hooks
+	;;
+	ld8 r2=[r15]
+	;;
+	cmp.ltu p7,p0=r2,r16	// array size check
+	shladd r8=r2,3,r14
+	;;
+(p7)	ld8 r9=[r8]
+	;;
+(p7)	mov b1=r9
+(p7)	cmp.ne.unc p7,p0=r9,r0	// no actual branch to NULL
+	;;
+(p7)	br.call.sptk.many rp=b1
+
+	__INITDATA
+
+default_setup_hook = 0		// Currently nothing needs to be done.
+
+	.global hypervisor_type
+hypervisor_type:
+	data8		PARAVIRT_HYPERVISOR_TYPE_DEFAULT
+
+	// must have the same order with PARAVIRT_HYPERVISOR_TYPE_xxx
+
+hypervisor_setup_hooks:
+	data8		default_setup_hook
+num_hypervisor_hooks = (. - hypervisor_setup_hooks) / 8
+	.previous
+
+#endif
+
 #ifdef CONFIG_SMP
 (isAP)	br.call.sptk.many rp=start_secondary
 .ret0:
@@ -388,6 +454,8 @@ self:	hint @pause
 	br.sptk.many self		// endless loop
 END(_start)
 
+	.text
+
 GLOBAL_ENTRY(ia64_save_debug_regs)
 	alloc r16=ar.pfs,1,0,0,0
 	mov r20=ar.lc			// preserve ar.lc
@@ -854,7 +922,6 @@ END(__ia64_init_fpu)
  */
 GLOBAL_ENTRY(ia64_switch_mode_phys)
  {
-	alloc r2=ar.pfs,0,0,0,0
 	rsm psr.i | psr.ic		// disable interrupts and interrupt collection
 	mov r15=ip
  }
@@ -903,7 +970,6 @@ END(ia64_switch_mode_phys)
  */
 GLOBAL_ENTRY(ia64_switch_mode_virt)
  {
-	alloc r2=ar.pfs,0,0,0,0
 	rsm psr.i | psr.ic		// disable interrupts and interrupt collection
 	mov r15=ip
  }
@@ -966,7 +1032,7 @@ END(ia64_delay_loop)
  * Return a CPU-local timestamp in nano-seconds.  This timestamp is
  * NOT synchronized across CPUs its return value must never be
  * compared against the values returned on another CPU.  The usage in
- * kernel/sched.c ensures that.
+ * kernel/sched/core.c ensures that.
  *
  * The return-value of sched_clock() is NOT supposed to wrap-around.
  * If it did, it would cause some scheduling hiccups (at the worst).
@@ -980,8 +1046,8 @@ END(ia64_delay_loop)
  * except that the multiplication and the shift are done with 128-bit
  * intermediate precision so that we can produce a full 64-bit result.
  */
-GLOBAL_ENTRY(sched_clock)
-	addl r8=THIS_CPU(cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0
+GLOBAL_ENTRY(ia64_native_sched_clock)
+	addl r8=THIS_CPU(ia64_cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0
 	mov.m r9=ar.itc		// fetch cycle-counter				(35 cyc)
 	;;
 	ldf8 f8=[r8]
@@ -996,20 +1062,33 @@ GLOBAL_ENTRY(sched_clock)
 	;;
 	shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT
 	br.ret.sptk.many rp
-END(sched_clock)
+END(ia64_native_sched_clock)
+#ifndef CONFIG_PARAVIRT
+	//unsigned long long
+	//sched_clock(void) __attribute__((alias("ia64_native_sched_clock")));
+	.global sched_clock
+sched_clock = ia64_native_sched_clock
+#endif
 
-GLOBAL_ENTRY(start_kernel_thread)
-	.prologue
-	.save rp, r0				// this is the end of the call-chain
-	.body
-	alloc r2 = ar.pfs, 0, 0, 2, 0
-	mov out0 = r9
-	mov out1 = r11;;
-	br.call.sptk.many rp = kernel_thread_helper;;
-	mov out0 = r8
-	br.call.sptk.many rp = sys_exit;;
-1:	br.sptk.few 1b				// not reached
-END(start_kernel_thread)
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+GLOBAL_ENTRY(cycle_to_cputime)
+	alloc r16=ar.pfs,1,0,0,0
+	addl r8=THIS_CPU(ia64_cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0
+	;;
+	ldf8 f8=[r8]
+	;;
+	setf.sig f9=r32
+	;;
+	xmpy.lu f10=f9,f8	// calculate low 64 bits of 128-bit product	(4 cyc)
+	xmpy.hu f11=f9,f8	// calculate high 64 bits of 128-bit product
+	;;
+	getf.sig r8=f10		//						(5 cyc)
+	getf.sig r9=f11
+	;;
+	shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT
+	br.ret.sptk.many rp
+END(cycle_to_cputime)
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 
 #ifdef CONFIG_IA64_BRL_EMU
 
@@ -1034,95 +1113,6 @@ SET_REG(b5);
 #endif /* CONFIG_IA64_BRL_EMU */
 
 #ifdef CONFIG_SMP
-	/*
-	 * This routine handles spinlock contention.  It uses a non-standard calling
-	 * convention to avoid converting leaf routines into interior routines.  Because
-	 * of this special convention, there are several restrictions:
-	 *
-	 * - do not use gp relative variables, this code is called from the kernel
-	 *   and from modules, r1 is undefined.
-	 * - do not use stacked registers, the caller owns them.
-	 * - do not use the scratch stack space, the caller owns it.
-	 * - do not use any registers other than the ones listed below
-	 *
-	 * Inputs:
-	 *   ar.pfs - saved CFM of caller
-	 *   ar.ccv - 0 (and available for use)
-	 *   r27    - flags from spin_lock_irqsave or 0.  Must be preserved.
-	 *   r28    - available for use.
-	 *   r29    - available for use.
-	 *   r30    - available for use.
-	 *   r31    - address of lock, available for use.
-	 *   b6     - return address
-	 *   p14    - available for use.
-	 *   p15    - used to track flag status.
-	 *
-	 * If you patch this code to use more registers, do not forget to update
-	 * the clobber lists for spin_lock() in include/asm-ia64/spinlock.h.
-	 */
-
-#if (__GNUC__ == 3 && __GNUC_MINOR__ < 3)
-
-GLOBAL_ENTRY(ia64_spinlock_contention_pre3_4)
-	.prologue
-	.save ar.pfs, r0	// this code effectively has a zero frame size
-	.save rp, r28
-	.body
-	nop 0
-	tbit.nz p15,p0=r27,IA64_PSR_I_BIT
-	.restore sp		// pop existing prologue after next insn
-	mov b6 = r28
-	.prologue
-	.save ar.pfs, r0
-	.altrp b6
-	.body
-	;;
-(p15)	ssm psr.i		// reenable interrupts if they were on
-				// DavidM says that srlz.d is slow and is not required in this case
-.wait:
-	// exponential backoff, kdb, lockmeter etc. go in here
-	hint @pause
-	ld4 r30=[r31]		// don't use ld4.bias; if it's contended, we won't write the word
-	nop 0
-	;;
-	cmp4.ne p14,p0=r30,r0
-(p14)	br.cond.sptk.few .wait
-(p15)	rsm psr.i		// disable interrupts if we reenabled them
-	br.cond.sptk.few b6	// lock is now free, try to acquire
-	.global ia64_spinlock_contention_pre3_4_end	// for kernprof
-ia64_spinlock_contention_pre3_4_end:
-END(ia64_spinlock_contention_pre3_4)
-
-#else
-
-GLOBAL_ENTRY(ia64_spinlock_contention)
-	.prologue
-	.altrp b6
-	.body
-	tbit.nz p15,p0=r27,IA64_PSR_I_BIT
-	;;
-.wait:
-(p15)	ssm psr.i		// reenable interrupts if they were on
-				// DavidM says that srlz.d is slow and is not required in this case
-.wait2:
-	// exponential backoff, kdb, lockmeter etc. go in here
-	hint @pause
-	ld4 r30=[r31]		// don't use ld4.bias; if it's contended, we won't write the word
-	;;
-	cmp4.ne p14,p0=r30,r0
-	mov r30 = 1
-(p14)	br.cond.sptk.few .wait2
-(p15)	rsm psr.i		// disable interrupts if we reenabled them
-	;;
-	cmpxchg4.acq r30=[r31], r30, ar.ccv
-	;;
-	cmp4.ne p14,p0=r0,r30
-(p14)	br.cond.sptk.few .wait
-
-	br.ret.sptk.many b6	// lock is now taken
-END(ia64_spinlock_contention)
-
-#endif
 
 #ifdef CONFIG_HOTPLUG_CPU
 GLOBAL_ENTRY(ia64_jump_to_sal)
@@ -1146,7 +1136,7 @@ GLOBAL_ENTRY(ia64_jump_to_sal)
 	movl r16=SAL_PSR_BITS_TO_SET;;
 	mov cr.ipsr=r16
 	mov cr.ifs=r0;;
-	rfi;;
+	rfi;;			// note: this unmask MCA/INIT (psr.mc)
 1:
 	/*
 	 * Invalidate all TLB data/inst
@@ -1172,6 +1162,7 @@ tlb_purge_done:
 	RESTORE_REG(cr.dcr, r25, r17);;
 	RESTORE_REG(cr.iva, r25, r17);;
 	RESTORE_REG(cr.pta, r25, r17);;
+	srlz.d;;	// required not to violate RAW dependency
 	RESTORE_REG(cr.itv, r25, r17);;
 	RESTORE_REG(cr.pmv, r25, r17);;
 	RESTORE_REG(cr.cmcv, r25, r17);;
diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c
index bbcfd08378a..5b7791dd396 100644
--- a/arch/ia64/kernel/ia64_ksyms.c
+++ b/arch/ia64/kernel/ia64_ksyms.c
@@ -5,7 +5,6 @@
  * All other exports should be put directly after the definition.
  */
 
-#include <linux/config.h>
 #include <linux/module.h>
 
 #include <linux/string.h>
@@ -13,17 +12,16 @@ EXPORT_SYMBOL(memset);
 EXPORT_SYMBOL(memcpy);
 EXPORT_SYMBOL(strlen);
 
+#include <asm/pgtable.h>
+EXPORT_SYMBOL_GPL(empty_zero_page);
+
 #include <asm/checksum.h>
 EXPORT_SYMBOL(ip_fast_csum);		/* hand-coded assembly */
-
-#include <asm/semaphore.h>
-EXPORT_SYMBOL(__down);
-EXPORT_SYMBOL(__down_interruptible);
-EXPORT_SYMBOL(__down_trylock);
-EXPORT_SYMBOL(__up);
+EXPORT_SYMBOL(csum_ipv6_magic);
 
 #include <asm/page.h>
 EXPORT_SYMBOL(clear_page);
+EXPORT_SYMBOL(copy_page);
 
 #ifdef CONFIG_VIRTUAL_MEM_MAP
 #include <linux/bootmem.h>
@@ -32,9 +30,9 @@ EXPORT_SYMBOL(max_low_pfn);	/* defined by bootmem.c, but not exported by generic
 #endif
 
 #include <asm/processor.h>
-EXPORT_SYMBOL(per_cpu__cpu_info);
+EXPORT_SYMBOL(ia64_cpu_info);
 #ifdef CONFIG_SMP
-EXPORT_SYMBOL(per_cpu__local_per_cpu_offset);
+EXPORT_SYMBOL(local_per_cpu_offset);
 #endif
 
 #include <asm/uaccess.h>
@@ -63,7 +61,7 @@ EXPORT_SYMBOL(__udivdi3);
 EXPORT_SYMBOL(__moddi3);
 EXPORT_SYMBOL(__umoddi3);
 
-#if defined(CONFIG_MD_RAID5) || defined(CONFIG_MD_RAID5_MODULE)
+#if defined(CONFIG_MD_RAID456) || defined(CONFIG_MD_RAID456_MODULE)
 extern void xor_ia64_2(void);
 extern void xor_ia64_3(void);
 extern void xor_ia64_4(void);
@@ -86,25 +84,15 @@ EXPORT_SYMBOL(ia64_save_scratch_fpregs);
 #include <asm/unwind.h>
 EXPORT_SYMBOL(unw_init_running);
 
-#ifdef ASM_SUPPORTED
-# ifdef CONFIG_SMP
-#  if (__GNUC__ == 3 && __GNUC_MINOR__ < 3)
-/*
- * This is not a normal routine and we don't want a function descriptor for it, so we use
- * a fake declaration here.
- */
-extern char ia64_spinlock_contention_pre3_4;
-EXPORT_SYMBOL(ia64_spinlock_contention_pre3_4);
-#  else
-/*
- * This is not a normal routine and we don't want a function descriptor for it, so we use
- * a fake declaration here.
- */
-extern char ia64_spinlock_contention;
-EXPORT_SYMBOL(ia64_spinlock_contention);
-#  endif
-# endif
+#if defined(CONFIG_IA64_ESI) || defined(CONFIG_IA64_ESI_MODULE)
+extern void esi_call_phys (void);
+EXPORT_SYMBOL_GPL(esi_call_phys);
 #endif
-
 extern char ia64_ivt[];
 EXPORT_SYMBOL(ia64_ivt);
+
+#include <asm/ftrace.h>
+#ifdef CONFIG_FUNCTION_TRACER
+/* mcount is defined in assembly */
+EXPORT_SYMBOL(_mcount);
+#endif
diff --git a/arch/ia64/kernel/init_task.c b/arch/ia64/kernel/init_task.c
index b69c397ed1b..f9efe9739d3 100644
--- a/arch/ia64/kernel/init_task.c
+++ b/arch/ia64/kernel/init_task.c
@@ -8,6 +8,7 @@
 
 #include <linux/init.h>
 #include <linux/mm.h>
+#include <linux/fs.h>
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/init_task.h>
@@ -16,19 +17,13 @@
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 
-static struct fs_struct init_fs = INIT_FS;
-static struct files_struct init_files = INIT_FILES;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
-struct mm_struct init_mm = INIT_MM(init_mm);
-
-EXPORT_SYMBOL(init_mm);
-
 /*
  * Initial task structure.
  *
  * We need to make sure that this is properly aligned due to the way process stacks are
- * handled. This is done by having a special ".data.init_task" section...
+ * handled. This is done by having a special ".data..init_task" section...
  */
 #define init_thread_info	init_task_mem.s.thread_info
 
@@ -38,7 +33,8 @@ union {
 		struct thread_info thread_info;
 	} s;
 	unsigned long stack[KERNEL_STACK_SIZE/sizeof (unsigned long)];
-} init_task_mem asm ("init_task") __attribute__((section(".data.init_task"))) = {{
+} init_task_mem asm ("init_task") __init_task_data =
+	{{
 	.task =		INIT_TASK(init_task_mem.s.task),
 	.thread_info =	INIT_THREAD_INFO(init_task_mem.s.task)
 }};
diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c
index d58c1c5c903..cd44a57c73b 100644
--- a/arch/ia64/kernel/iosapic.c
+++ b/arch/ia64/kernel/iosapic.c
@@ -69,17 +69,16 @@
  *     systems, we use one-to-one mapping between IA-64 vector and IRQ.  A
  *     platform can implement platform_irq_to_vector(irq) and
  *     platform_local_vector_to_irq(vector) APIs to differentiate the mapping.
- *     Please see also include/asm-ia64/hw_irq.h for those APIs.
+ *     Please see also arch/ia64/include/asm/hw_irq.h for those APIs.
  *
  * To sum up, there are three levels of mappings involved:
  *
  *	PCI pin -> global system interrupt (GSI) -> IA-64 vector <-> IRQ
  *
  * Note: The term "IRQ" is loosely used everywhere in Linux kernel to
- * describeinterrupts.  Now we use "IRQ" only for Linux IRQ's.  ISA IRQ
+ * describe interrupts.  Now we use "IRQ" only for Linux IRQ's.  ISA IRQ
  * (isa_irq) is the only exception in this source code.
  */
-#include <linux/config.h>
 
 #include <linux/acpi.h>
 #include <linux/init.h>
@@ -87,8 +86,8 @@
 #include <linux/kernel.h>
 #include <linux/list.h>
 #include <linux/pci.h>
+#include <linux/slab.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/string.h>
 #include <linux/bootmem.h>
 
@@ -99,7 +98,6 @@
 #include <asm/machvec.h>
 #include <asm/processor.h>
 #include <asm/ptrace.h>
-#include <asm/system.h>
 
 #undef DEBUG_INTERRUPT_ROUTING
 
@@ -109,10 +107,6 @@
 #define DBG(fmt...)
 #endif
 
-#define NR_PREALLOCATE_RTE_ENTRIES \
-	(PAGE_SIZE / sizeof(struct iosapic_rte_info))
-#define RTE_PREALLOCATED	(1)
-
 static DEFINE_SPINLOCK(iosapic_lock);
 
 /*
@@ -120,21 +114,30 @@ static DEFINE_SPINLOCK(iosapic_lock);
  * vector.
  */
 
-struct iosapic_rte_info {
-	struct list_head rte_list;	/* node in list of RTEs sharing the
-					 * same vector */
+#define NO_REF_RTE	0
+
+static struct iosapic {
 	char __iomem	*addr;		/* base address of IOSAPIC */
-	unsigned int	gsi_base;	/* first GSI assigned to this
-					 * IOSAPIC */
+	unsigned int	gsi_base;	/* GSI base */
+	unsigned short	num_rte;	/* # of RTEs on this IOSAPIC */
+	int		rtes_inuse;	/* # of RTEs in use on this IOSAPIC */
+#ifdef CONFIG_NUMA
+	unsigned short	node;		/* numa node association via pxm */
+#endif
+	spinlock_t	lock;		/* lock for indirect reg access */
+} iosapic_lists[NR_IOSAPICS];
+
+struct iosapic_rte_info {
+	struct list_head rte_list;	/* RTEs sharing the same vector */
 	char		rte_index;	/* IOSAPIC RTE index */
 	int		refcnt;		/* reference counter */
-	unsigned int	flags;		/* flags */
+	struct iosapic	*iosapic;
 } ____cacheline_aligned;
 
 static struct iosapic_intr_info {
 	struct list_head rtes;		/* RTEs using this vector (empty =>
 					 * not an IOSAPIC interrupt) */
-	int		count;		/* # of RTEs that shares this vector */
+	int		count;		/* # of registered RTEs */
 	u32		low32;		/* current value of low word of
 					 * Redirection table entry */
 	unsigned int	dest;		/* destination CPU physical ID */
@@ -142,23 +145,19 @@ static struct iosapic_intr_info {
 	unsigned char 	polarity: 1;	/* interrupt polarity
 					 * (see iosapic.h) */
 	unsigned char	trigger	: 1;	/* trigger mode (see iosapic.h) */
-} iosapic_intr_info[IA64_NUM_VECTORS];
+} iosapic_intr_info[NR_IRQS];
 
-static struct iosapic {
-	char __iomem	*addr;		/* base address of IOSAPIC */
-	unsigned int 	gsi_base;	/* first GSI assigned to this
-					 * IOSAPIC */
-	unsigned short 	num_rte;	/* # of RTEs on this IOSAPIC */
-	int		rtes_inuse;	/* # of RTEs in use on this IOSAPIC */
-#ifdef CONFIG_NUMA
-	unsigned short	node;		/* numa node association via pxm */
-#endif
-} iosapic_lists[NR_IOSAPICS];
+static unsigned char pcat_compat;	/* 8259 compatibility flag */
 
-static unsigned char pcat_compat __devinitdata;	/* 8259 compatibility flag */
+static inline void
+iosapic_write(struct iosapic *iosapic, unsigned int reg, u32 val)
+{
+	unsigned long flags;
 
-static int iosapic_kmalloc_ok;
-static LIST_HEAD(free_rte_list);
+	spin_lock_irqsave(&iosapic->lock, flags);
+	__iosapic_write(iosapic->addr, reg, val);
+	spin_unlock_irqrestore(&iosapic->lock, flags);
+}
 
 /*
  * Find an IOSAPIC associated with a GSI
@@ -177,95 +176,68 @@ find_iosapic (unsigned int gsi)
 	return -1;
 }
 
-static inline int
-_gsi_to_vector (unsigned int gsi)
+static inline int __gsi_to_irq(unsigned int gsi)
 {
+	int irq;
 	struct iosapic_intr_info *info;
 	struct iosapic_rte_info *rte;
 
-	for (info = iosapic_intr_info; info <
-		     iosapic_intr_info + IA64_NUM_VECTORS; ++info)
+	for (irq = 0; irq < NR_IRQS; irq++) {
+		info = &iosapic_intr_info[irq];
 		list_for_each_entry(rte, &info->rtes, rte_list)
-			if (rte->gsi_base + rte->rte_index == gsi)
-				return info - iosapic_intr_info;
+			if (rte->iosapic->gsi_base + rte->rte_index == gsi)
+				return irq;
+	}
 	return -1;
 }
 
-/*
- * Translate GSI number to the corresponding IA-64 interrupt vector.  If no
- * entry exists, return -1.
- */
-inline int
-gsi_to_vector (unsigned int gsi)
-{
-	return _gsi_to_vector(gsi);
-}
-
 int
 gsi_to_irq (unsigned int gsi)
 {
 	unsigned long flags;
 	int irq;
-	/*
-	 * XXX fix me: this assumes an identity mapping between IA-64 vector
-	 * and Linux irq numbers...
-	 */
+
 	spin_lock_irqsave(&iosapic_lock, flags);
-	{
-		irq = _gsi_to_vector(gsi);
-	}
+	irq = __gsi_to_irq(gsi);
 	spin_unlock_irqrestore(&iosapic_lock, flags);
-
 	return irq;
 }
 
-static struct iosapic_rte_info *gsi_vector_to_rte(unsigned int gsi,
-						  unsigned int vec)
+static struct iosapic_rte_info *find_rte(unsigned int irq, unsigned int gsi)
 {
 	struct iosapic_rte_info *rte;
 
-	list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list)
-		if (rte->gsi_base + rte->rte_index == gsi)
+	list_for_each_entry(rte, &iosapic_intr_info[irq].rtes, rte_list)
+		if (rte->iosapic->gsi_base + rte->rte_index == gsi)
 			return rte;
 	return NULL;
 }
 
 static void
-set_rte (unsigned int gsi, unsigned int vector, unsigned int dest, int mask)
+set_rte (unsigned int gsi, unsigned int irq, unsigned int dest, int mask)
 {
 	unsigned long pol, trigger, dmode;
 	u32 low32, high32;
-	char __iomem *addr;
 	int rte_index;
 	char redir;
 	struct iosapic_rte_info *rte;
+	ia64_vector vector = irq_to_vector(irq);
 
 	DBG(KERN_DEBUG"IOSAPIC: routing vector %d to 0x%x\n", vector, dest);
 
-	rte = gsi_vector_to_rte(gsi, vector);
+	rte = find_rte(irq, gsi);
 	if (!rte)
 		return;		/* not an IOSAPIC interrupt */
 
 	rte_index = rte->rte_index;
-	addr	= rte->addr;
-	pol     = iosapic_intr_info[vector].polarity;
-	trigger = iosapic_intr_info[vector].trigger;
-	dmode   = iosapic_intr_info[vector].dmode;
+	pol     = iosapic_intr_info[irq].polarity;
+	trigger = iosapic_intr_info[irq].trigger;
+	dmode   = iosapic_intr_info[irq].dmode;
 
 	redir = (dmode == IOSAPIC_LOWEST_PRIORITY) ? 1 : 0;
 
 #ifdef CONFIG_SMP
-	{
-		unsigned int irq;
-
-		for (irq = 0; irq < NR_IRQS; ++irq)
-			if (irq_to_vector(irq) == vector) {
-				set_irq_affinity_info(irq,
-						      (int)(dest & 0xffff),
-						      redir);
-				break;
-			}
-	}
+	set_irq_affinity_info(irq, (int)(dest & 0xffff), redir);
 #endif
 
 	low32 = ((pol << IOSAPIC_POLARITY_SHIFT) |
@@ -277,126 +249,132 @@ set_rte (unsigned int gsi, unsigned int vector, unsigned int dest, int mask)
 	/* dest contains both id and eid */
 	high32 = (dest << IOSAPIC_DEST_SHIFT);
 
-	iosapic_write(addr, IOSAPIC_RTE_HIGH(rte_index), high32);
-	iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
-	iosapic_intr_info[vector].low32 = low32;
-	iosapic_intr_info[vector].dest = dest;
+	iosapic_write(rte->iosapic, IOSAPIC_RTE_HIGH(rte_index), high32);
+	iosapic_write(rte->iosapic, IOSAPIC_RTE_LOW(rte_index), low32);
+	iosapic_intr_info[irq].low32 = low32;
+	iosapic_intr_info[irq].dest = dest;
 }
 
 static void
-nop (unsigned int irq)
+nop (struct irq_data *data)
 {
 	/* do nothing... */
 }
 
+
+#ifdef CONFIG_KEXEC
+void
+kexec_disable_iosapic(void)
+{
+	struct iosapic_intr_info *info;
+	struct iosapic_rte_info *rte;
+	ia64_vector vec;
+	int irq;
+
+	for (irq = 0; irq < NR_IRQS; irq++) {
+		info = &iosapic_intr_info[irq];
+		vec = irq_to_vector(irq);
+		list_for_each_entry(rte, &info->rtes,
+				rte_list) {
+			iosapic_write(rte->iosapic,
+					IOSAPIC_RTE_LOW(rte->rte_index),
+					IOSAPIC_MASK|vec);
+			iosapic_eoi(rte->iosapic->addr, vec);
+		}
+	}
+}
+#endif
+
 static void
-mask_irq (unsigned int irq)
+mask_irq (struct irq_data *data)
 {
-	unsigned long flags;
-	char __iomem *addr;
+	unsigned int irq = data->irq;
 	u32 low32;
 	int rte_index;
-	ia64_vector vec = irq_to_vector(irq);
 	struct iosapic_rte_info *rte;
 
-	if (list_empty(&iosapic_intr_info[vec].rtes))
+	if (!iosapic_intr_info[irq].count)
 		return;			/* not an IOSAPIC interrupt! */
 
-	spin_lock_irqsave(&iosapic_lock, flags);
-	{
-		/* set only the mask bit */
-		low32 = iosapic_intr_info[vec].low32 |= IOSAPIC_MASK;
-		list_for_each_entry(rte, &iosapic_intr_info[vec].rtes,
-				    rte_list) {
-			addr = rte->addr;
-			rte_index = rte->rte_index;
-			iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
-		}
+	/* set only the mask bit */
+	low32 = iosapic_intr_info[irq].low32 |= IOSAPIC_MASK;
+	list_for_each_entry(rte, &iosapic_intr_info[irq].rtes, rte_list) {
+		rte_index = rte->rte_index;
+		iosapic_write(rte->iosapic, IOSAPIC_RTE_LOW(rte_index), low32);
 	}
-	spin_unlock_irqrestore(&iosapic_lock, flags);
 }
 
 static void
-unmask_irq (unsigned int irq)
+unmask_irq (struct irq_data *data)
 {
-	unsigned long flags;
-	char __iomem *addr;
+	unsigned int irq = data->irq;
 	u32 low32;
 	int rte_index;
-	ia64_vector vec = irq_to_vector(irq);
 	struct iosapic_rte_info *rte;
 
-	if (list_empty(&iosapic_intr_info[vec].rtes))
+	if (!iosapic_intr_info[irq].count)
 		return;			/* not an IOSAPIC interrupt! */
 
-	spin_lock_irqsave(&iosapic_lock, flags);
-	{
-		low32 = iosapic_intr_info[vec].low32 &= ~IOSAPIC_MASK;
-		list_for_each_entry(rte, &iosapic_intr_info[vec].rtes,
-				    rte_list) {
-			addr = rte->addr;
-			rte_index = rte->rte_index;
-			iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
-		}
+	low32 = iosapic_intr_info[irq].low32 &= ~IOSAPIC_MASK;
+	list_for_each_entry(rte, &iosapic_intr_info[irq].rtes, rte_list) {
+		rte_index = rte->rte_index;
+		iosapic_write(rte->iosapic, IOSAPIC_RTE_LOW(rte_index), low32);
 	}
-	spin_unlock_irqrestore(&iosapic_lock, flags);
 }
 
 
-static void
-iosapic_set_affinity (unsigned int irq, cpumask_t mask)
+static int
+iosapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
+		     bool force)
 {
 #ifdef CONFIG_SMP
-	unsigned long flags;
+	unsigned int irq = data->irq;
 	u32 high32, low32;
-	int dest, rte_index;
-	char __iomem *addr;
+	int cpu, dest, rte_index;
 	int redir = (irq & IA64_IRQ_REDIRECTED) ? 1 : 0;
-	ia64_vector vec;
 	struct iosapic_rte_info *rte;
+	struct iosapic *iosapic;
 
 	irq &= (~IA64_IRQ_REDIRECTED);
-	vec = irq_to_vector(irq);
 
-	if (cpus_empty(mask))
-		return;
+	cpu = cpumask_first_and(cpu_online_mask, mask);
+	if (cpu >= nr_cpu_ids)
+		return -1;
+
+	if (irq_prepare_move(irq, cpu))
+		return -1;
 
-	dest = cpu_physical_id(first_cpu(mask));
+	dest = cpu_physical_id(cpu);
 
-	if (list_empty(&iosapic_intr_info[vec].rtes))
-		return;			/* not an IOSAPIC interrupt */
+	if (!iosapic_intr_info[irq].count)
+		return -1;			/* not an IOSAPIC interrupt */
 
 	set_irq_affinity_info(irq, dest, redir);
 
 	/* dest contains both id and eid */
 	high32 = dest << IOSAPIC_DEST_SHIFT;
 
-	spin_lock_irqsave(&iosapic_lock, flags);
-	{
-		low32 = iosapic_intr_info[vec].low32 &
-			~(7 << IOSAPIC_DELIVERY_SHIFT);
-
-		if (redir)
-		        /* change delivery mode to lowest priority */
-			low32 |= (IOSAPIC_LOWEST_PRIORITY <<
-				  IOSAPIC_DELIVERY_SHIFT);
-		else
-		        /* change delivery mode to fixed */
-			low32 |= (IOSAPIC_FIXED << IOSAPIC_DELIVERY_SHIFT);
-
-		iosapic_intr_info[vec].low32 = low32;
-		iosapic_intr_info[vec].dest = dest;
-		list_for_each_entry(rte, &iosapic_intr_info[vec].rtes,
-				    rte_list) {
-			addr = rte->addr;
-			rte_index = rte->rte_index;
-			iosapic_write(addr, IOSAPIC_RTE_HIGH(rte_index),
-				      high32);
-			iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
-		}
+	low32 = iosapic_intr_info[irq].low32 & ~(7 << IOSAPIC_DELIVERY_SHIFT);
+	if (redir)
+		/* change delivery mode to lowest priority */
+		low32 |= (IOSAPIC_LOWEST_PRIORITY << IOSAPIC_DELIVERY_SHIFT);
+	else
+		/* change delivery mode to fixed */
+		low32 |= (IOSAPIC_FIXED << IOSAPIC_DELIVERY_SHIFT);
+	low32 &= IOSAPIC_VECTOR_MASK;
+	low32 |= irq_to_vector(irq);
+
+	iosapic_intr_info[irq].low32 = low32;
+	iosapic_intr_info[irq].dest = dest;
+	list_for_each_entry(rte, &iosapic_intr_info[irq].rtes, rte_list) {
+		iosapic = rte->iosapic;
+		rte_index = rte->rte_index;
+		iosapic_write(iosapic, IOSAPIC_RTE_HIGH(rte_index), high32);
+		iosapic_write(iosapic, IOSAPIC_RTE_LOW(rte_index), low32);
 	}
-	spin_unlock_irqrestore(&iosapic_lock, flags);
+
 #endif
+	return 0;
 }
 
 /*
@@ -404,21 +382,34 @@ iosapic_set_affinity (unsigned int irq, cpumask_t mask)
  */
 
 static unsigned int
-iosapic_startup_level_irq (unsigned int irq)
+iosapic_startup_level_irq (struct irq_data *data)
 {
-	unmask_irq(irq);
+	unmask_irq(data);
 	return 0;
 }
 
 static void
-iosapic_end_level_irq (unsigned int irq)
+iosapic_unmask_level_irq (struct irq_data *data)
 {
+	unsigned int irq = data->irq;
 	ia64_vector vec = irq_to_vector(irq);
 	struct iosapic_rte_info *rte;
+	int do_unmask_irq = 0;
+
+	irq_complete_move(irq);
+	if (unlikely(irqd_is_setaffinity_pending(data))) {
+		do_unmask_irq = 1;
+		mask_irq(data);
+	} else
+		unmask_irq(data);
 
-	move_native_irq(irq);
-	list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list)
-		iosapic_eoi(rte->addr, vec);
+	list_for_each_entry(rte, &iosapic_intr_info[irq].rtes, rte_list)
+		iosapic_eoi(rte->iosapic->addr, vec);
+
+	if (unlikely(do_unmask_irq)) {
+		irq_move_masked_irq(data);
+		unmask_irq(data);
+	}
 }
 
 #define iosapic_shutdown_level_irq	mask_irq
@@ -426,15 +417,16 @@ iosapic_end_level_irq (unsigned int irq)
 #define iosapic_disable_level_irq	mask_irq
 #define iosapic_ack_level_irq		nop
 
-struct hw_interrupt_type irq_type_iosapic_level = {
-	.typename =	"IO-SAPIC-level",
-	.startup =	iosapic_startup_level_irq,
-	.shutdown =	iosapic_shutdown_level_irq,
-	.enable =	iosapic_enable_level_irq,
-	.disable =	iosapic_disable_level_irq,
-	.ack =		iosapic_ack_level_irq,
-	.end =		iosapic_end_level_irq,
-	.set_affinity =	iosapic_set_affinity
+static struct irq_chip irq_type_iosapic_level = {
+	.name =			"IO-SAPIC-level",
+	.irq_startup =		iosapic_startup_level_irq,
+	.irq_shutdown =		iosapic_shutdown_level_irq,
+	.irq_enable =		iosapic_enable_level_irq,
+	.irq_disable =		iosapic_disable_level_irq,
+	.irq_ack =		iosapic_ack_level_irq,
+	.irq_mask =		mask_irq,
+	.irq_unmask =		iosapic_unmask_level_irq,
+	.irq_set_affinity =	iosapic_set_affinity
 };
 
 /*
@@ -442,9 +434,9 @@ struct hw_interrupt_type irq_type_iosapic_level = {
  */
 
 static unsigned int
-iosapic_startup_edge_irq (unsigned int irq)
+iosapic_startup_edge_irq (struct irq_data *data)
 {
-	unmask_irq(irq);
+	unmask_irq(data);
 	/*
 	 * IOSAPIC simply drops interrupts pended while the
 	 * corresponding pin was masked, so we can't know if an
@@ -454,37 +446,28 @@ iosapic_startup_edge_irq (unsigned int irq)
 }
 
 static void
-iosapic_ack_edge_irq (unsigned int irq)
+iosapic_ack_edge_irq (struct irq_data *data)
 {
-	irq_desc_t *idesc = irq_descp(irq);
-
-	move_native_irq(irq);
-	/*
-	 * Once we have recorded IRQ_PENDING already, we can mask the
-	 * interrupt for real. This prevents IRQ storms from unhandled
-	 * devices.
-	 */
-	if ((idesc->status & (IRQ_PENDING|IRQ_DISABLED)) ==
-	    (IRQ_PENDING|IRQ_DISABLED))
-		mask_irq(irq);
+	irq_complete_move(data->irq);
+	irq_move_irq(data);
 }
 
 #define iosapic_enable_edge_irq		unmask_irq
 #define iosapic_disable_edge_irq	nop
-#define iosapic_end_edge_irq		nop
-
-struct hw_interrupt_type irq_type_iosapic_edge = {
-	.typename =	"IO-SAPIC-edge",
-	.startup =	iosapic_startup_edge_irq,
-	.shutdown =	iosapic_disable_edge_irq,
-	.enable =	iosapic_enable_edge_irq,
-	.disable =	iosapic_disable_edge_irq,
-	.ack =		iosapic_ack_edge_irq,
-	.end =		iosapic_end_edge_irq,
-	.set_affinity =	iosapic_set_affinity
+
+static struct irq_chip irq_type_iosapic_edge = {
+	.name =			"IO-SAPIC-edge",
+	.irq_startup =		iosapic_startup_edge_irq,
+	.irq_shutdown =		iosapic_disable_edge_irq,
+	.irq_enable =		iosapic_enable_edge_irq,
+	.irq_disable =		iosapic_disable_edge_irq,
+	.irq_ack =		iosapic_ack_edge_irq,
+	.irq_mask =		mask_irq,
+	.irq_unmask =		unmask_irq,
+	.irq_set_affinity =	iosapic_set_affinity
 };
 
-unsigned int
+static unsigned int
 iosapic_version (char __iomem *addr)
 {
 	/*
@@ -496,13 +479,12 @@ iosapic_version (char __iomem *addr)
 	 *	unsigned int reserved2 : 8;
 	 * }
 	 */
-	return iosapic_read(addr, IOSAPIC_VERSION);
+	return __iosapic_read(addr, IOSAPIC_VERSION);
 }
 
-static int iosapic_find_sharable_vector (unsigned long trigger,
-					 unsigned long pol)
+static int iosapic_find_sharable_irq(unsigned long trigger, unsigned long pol)
 {
-	int i, vector = -1, min_count = -1;
+	int i, irq = -ENOSPC, min_count = -1;
 	struct iosapic_intr_info *info;
 
 	/*
@@ -510,21 +492,21 @@ static int iosapic_find_sharable_vector (unsigned long trigger,
 	 * supported yet
 	 */
 	if (trigger == IOSAPIC_EDGE)
-		return -1;
+		return -EINVAL;
 
-	for (i = IA64_FIRST_DEVICE_VECTOR; i <= IA64_LAST_DEVICE_VECTOR; i++) {
+	for (i = 0; i < NR_IRQS; i++) {
 		info = &iosapic_intr_info[i];
 		if (info->trigger == trigger && info->polarity == pol &&
-		    (info->dmode == IOSAPIC_FIXED || info->dmode ==
-		     IOSAPIC_LOWEST_PRIORITY)) {
+		    (info->dmode == IOSAPIC_FIXED ||
+		     info->dmode == IOSAPIC_LOWEST_PRIORITY) &&
+		    can_request_irq(i, IRQF_SHARED)) {
 			if (min_count == -1 || info->count < min_count) {
-				vector = i;
+				irq = i;
 				min_count = info->count;
 			}
 		}
 	}
-
-	return vector;
+	return irq;
 }
 
 /*
@@ -532,158 +514,122 @@ static int iosapic_find_sharable_vector (unsigned long trigger,
  *  assign a new vector for the other and make the vector available
  */
 static void __init
-iosapic_reassign_vector (int vector)
+iosapic_reassign_vector (int irq)
 {
-	int new_vector;
+	int new_irq;
 
-	if (!list_empty(&iosapic_intr_info[vector].rtes)) {
-		new_vector = assign_irq_vector(AUTO_ASSIGN);
-		if (new_vector < 0)
-			panic("%s: out of interrupt vectors!\n", __FUNCTION__);
+	if (iosapic_intr_info[irq].count) {
+		new_irq = create_irq();
+		if (new_irq < 0)
+			panic("%s: out of interrupt vectors!\n", __func__);
 		printk(KERN_INFO "Reassigning vector %d to %d\n",
-		       vector, new_vector);
-		memcpy(&iosapic_intr_info[new_vector], &iosapic_intr_info[vector],
+		       irq_to_vector(irq), irq_to_vector(new_irq));
+		memcpy(&iosapic_intr_info[new_irq], &iosapic_intr_info[irq],
 		       sizeof(struct iosapic_intr_info));
-		INIT_LIST_HEAD(&iosapic_intr_info[new_vector].rtes);
-		list_move(iosapic_intr_info[vector].rtes.next,
-			  &iosapic_intr_info[new_vector].rtes);
-		memset(&iosapic_intr_info[vector], 0,
+		INIT_LIST_HEAD(&iosapic_intr_info[new_irq].rtes);
+		list_move(iosapic_intr_info[irq].rtes.next,
+			  &iosapic_intr_info[new_irq].rtes);
+		memset(&iosapic_intr_info[irq], 0,
 		       sizeof(struct iosapic_intr_info));
-		iosapic_intr_info[vector].low32 = IOSAPIC_MASK;
-		INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes);
+		iosapic_intr_info[irq].low32 = IOSAPIC_MASK;
+		INIT_LIST_HEAD(&iosapic_intr_info[irq].rtes);
 	}
 }
 
-static struct iosapic_rte_info *iosapic_alloc_rte (void)
+static inline int irq_is_shared (int irq)
 {
-	int i;
-	struct iosapic_rte_info *rte;
-	int preallocated = 0;
-
-	if (!iosapic_kmalloc_ok && list_empty(&free_rte_list)) {
-		rte = alloc_bootmem(sizeof(struct iosapic_rte_info) *
-				    NR_PREALLOCATE_RTE_ENTRIES);
-		if (!rte)
-			return NULL;
-		for (i = 0; i < NR_PREALLOCATE_RTE_ENTRIES; i++, rte++)
-			list_add(&rte->rte_list, &free_rte_list);
-	}
-
-	if (!list_empty(&free_rte_list)) {
-		rte = list_entry(free_rte_list.next, struct iosapic_rte_info,
-				 rte_list);
-		list_del(&rte->rte_list);
-		preallocated++;
-	} else {
-		rte = kmalloc(sizeof(struct iosapic_rte_info), GFP_ATOMIC);
-		if (!rte)
-			return NULL;
-	}
-
-	memset(rte, 0, sizeof(struct iosapic_rte_info));
-	if (preallocated)
-		rte->flags |= RTE_PREALLOCATED;
-
-	return rte;
+	return (iosapic_intr_info[irq].count > 1);
 }
 
-static void iosapic_free_rte (struct iosapic_rte_info *rte)
+struct irq_chip*
+ia64_native_iosapic_get_irq_chip(unsigned long trigger)
 {
-	if (rte->flags & RTE_PREALLOCATED)
-		list_add_tail(&rte->rte_list, &free_rte_list);
+	if (trigger == IOSAPIC_EDGE)
+		return &irq_type_iosapic_edge;
 	else
-		kfree(rte);
-}
-
-static inline int vector_is_shared (int vector)
-{
-	return (iosapic_intr_info[vector].count > 1);
+		return &irq_type_iosapic_level;
 }
 
 static int
-register_intr (unsigned int gsi, int vector, unsigned char delivery,
+register_intr (unsigned int gsi, int irq, unsigned char delivery,
 	       unsigned long polarity, unsigned long trigger)
 {
-	irq_desc_t *idesc;
-	struct hw_interrupt_type *irq_type;
-	int rte_index;
+	struct irq_chip *chip, *irq_type;
 	int index;
-	unsigned long gsi_base;
-	void __iomem *iosapic_address;
 	struct iosapic_rte_info *rte;
 
 	index = find_iosapic(gsi);
 	if (index < 0) {
 		printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n",
-		       __FUNCTION__, gsi);
+		       __func__, gsi);
 		return -ENODEV;
 	}
 
-	iosapic_address = iosapic_lists[index].addr;
-	gsi_base = iosapic_lists[index].gsi_base;
-
-	rte = gsi_vector_to_rte(gsi, vector);
+	rte = find_rte(irq, gsi);
 	if (!rte) {
-		rte = iosapic_alloc_rte();
+		rte = kzalloc(sizeof (*rte), GFP_ATOMIC);
 		if (!rte) {
 			printk(KERN_WARNING "%s: cannot allocate memory\n",
-			       __FUNCTION__);
+			       __func__);
 			return -ENOMEM;
 		}
 
-		rte_index = gsi - gsi_base;
-		rte->rte_index	= rte_index;
-		rte->addr	= iosapic_address;
-		rte->gsi_base	= gsi_base;
+		rte->iosapic	= &iosapic_lists[index];
+		rte->rte_index	= gsi - rte->iosapic->gsi_base;
 		rte->refcnt++;
-		list_add_tail(&rte->rte_list, &iosapic_intr_info[vector].rtes);
-		iosapic_intr_info[vector].count++;
+		list_add_tail(&rte->rte_list, &iosapic_intr_info[irq].rtes);
+		iosapic_intr_info[irq].count++;
 		iosapic_lists[index].rtes_inuse++;
 	}
-	else if (vector_is_shared(vector)) {
-		struct iosapic_intr_info *info = &iosapic_intr_info[vector];
-		if (info->trigger != trigger || info->polarity != polarity) {
+	else if (rte->refcnt == NO_REF_RTE) {
+		struct iosapic_intr_info *info = &iosapic_intr_info[irq];
+		if (info->count > 0 &&
+		    (info->trigger != trigger || info->polarity != polarity)){
 			printk (KERN_WARNING
 				"%s: cannot override the interrupt\n",
-				__FUNCTION__);
+				__func__);
 			return -EINVAL;
 		}
+		rte->refcnt++;
+		iosapic_intr_info[irq].count++;
+		iosapic_lists[index].rtes_inuse++;
 	}
 
-	iosapic_intr_info[vector].polarity = polarity;
-	iosapic_intr_info[vector].dmode    = delivery;
-	iosapic_intr_info[vector].trigger  = trigger;
+	iosapic_intr_info[irq].polarity = polarity;
+	iosapic_intr_info[irq].dmode    = delivery;
+	iosapic_intr_info[irq].trigger  = trigger;
 
-	if (trigger == IOSAPIC_EDGE)
-		irq_type = &irq_type_iosapic_edge;
-	else
-		irq_type = &irq_type_iosapic_level;
+	irq_type = iosapic_get_irq_chip(trigger);
 
-	idesc = irq_descp(vector);
-	if (idesc->handler != irq_type) {
-		if (idesc->handler != &no_irq_type)
+	chip = irq_get_chip(irq);
+	if (irq_type != NULL && chip != irq_type) {
+		if (chip != &no_irq_chip)
 			printk(KERN_WARNING
 			       "%s: changing vector %d from %s to %s\n",
-			       __FUNCTION__, vector,
-			       idesc->handler->typename, irq_type->typename);
-		idesc->handler = irq_type;
+			       __func__, irq_to_vector(irq),
+			       chip->name, irq_type->name);
+		chip = irq_type;
 	}
+	__irq_set_chip_handler_name_locked(irq, chip, trigger == IOSAPIC_EDGE ?
+					   handle_edge_irq : handle_level_irq,
+					   NULL);
 	return 0;
 }
 
 static unsigned int
-get_target_cpu (unsigned int gsi, int vector)
+get_target_cpu (unsigned int gsi, int irq)
 {
 #ifdef CONFIG_SMP
 	static int cpu = -1;
 	extern int cpe_vector;
+	cpumask_t domain = irq_to_domain(irq);
 
 	/*
 	 * In case of vector shared by multiple RTEs, all RTEs that
 	 * share the vector need to use the same destination CPU.
 	 */
-	if (!list_empty(&iosapic_intr_info[vector].rtes))
-		return iosapic_intr_info[vector].dest;
+	if (iosapic_intr_info[irq].count)
+		return iosapic_intr_info[irq].dest;
 
 	/*
 	 * If the platform supports redirection via XTP, let it
@@ -700,39 +646,38 @@ get_target_cpu (unsigned int gsi, int vector)
 		return cpu_physical_id(smp_processor_id());
 
 #ifdef CONFIG_ACPI
-	if (cpe_vector > 0 && vector == IA64_CPEP_VECTOR)
+	if (cpe_vector > 0 && irq_to_vector(irq) == IA64_CPEP_VECTOR)
 		return get_cpei_target_cpu();
 #endif
 
 #ifdef CONFIG_NUMA
 	{
 		int num_cpus, cpu_index, iosapic_index, numa_cpu, i = 0;
-		cpumask_t cpu_mask;
+		const struct cpumask *cpu_mask;
 
 		iosapic_index = find_iosapic(gsi);
 		if (iosapic_index < 0 ||
 		    iosapic_lists[iosapic_index].node == MAX_NUMNODES)
 			goto skip_numa_setup;
 
-		cpu_mask = node_to_cpumask(iosapic_lists[iosapic_index].node);
-
-		for_each_cpu_mask(numa_cpu, cpu_mask) {
-			if (!cpu_online(numa_cpu))
-				cpu_clear(numa_cpu, cpu_mask);
+		cpu_mask = cpumask_of_node(iosapic_lists[iosapic_index].node);
+		num_cpus = 0;
+		for_each_cpu_and(numa_cpu, cpu_mask, &domain) {
+			if (cpu_online(numa_cpu))
+				num_cpus++;
 		}
 
-		num_cpus = cpus_weight(cpu_mask);
-
 		if (!num_cpus)
 			goto skip_numa_setup;
 
-		/* Use vector assignment to distribute across cpus in node */
-		cpu_index = vector % num_cpus;
+		/* Use irq assignment to distribute across cpus in node */
+		cpu_index = irq % num_cpus;
 
-		for (numa_cpu = first_cpu(cpu_mask) ; i < cpu_index ; i++)
-			numa_cpu = next_cpu(numa_cpu, cpu_mask);
+		for_each_cpu_and(numa_cpu, cpu_mask, &domain)
+			if (cpu_online(numa_cpu) && i++ >= cpu_index)
+				break;
 
-		if (numa_cpu != NR_CPUS)
+		if (numa_cpu < nr_cpu_ids)
 			return cpu_physical_id(numa_cpu);
 	}
 skip_numa_setup:
@@ -743,9 +688,9 @@ skip_numa_setup:
 	 * case of NUMA.)
 	 */
 	do {
-		if (++cpu >= NR_CPUS)
+		if (++cpu >= nr_cpu_ids)
 			cpu = 0;
-	} while (!cpu_online(cpu));
+	} while (!cpu_online(cpu) || !cpu_isset(cpu, domain));
 
 	return cpu_physical_id(cpu);
 #else  /* CONFIG_SMP */
@@ -753,6 +698,15 @@ skip_numa_setup:
 #endif
 }
 
+static inline unsigned char choose_dmode(void)
+{
+#ifdef CONFIG_SMP
+	if (smp_int_redirect & SMP_IRQ_REDIRECTION)
+		return IOSAPIC_LOWEST_PRIORITY;
+#endif
+	return IOSAPIC_FIXED;
+}
+
 /*
  * ACPI can describe IOSAPIC interrupts via static tables and namespace
  * methods.  This provides an interface to register those interrupts and
@@ -762,85 +716,76 @@ int
 iosapic_register_intr (unsigned int gsi,
 		       unsigned long polarity, unsigned long trigger)
 {
-	int vector, mask = 1, err;
+	int irq, mask = 1, err;
 	unsigned int dest;
 	unsigned long flags;
 	struct iosapic_rte_info *rte;
 	u32 low32;
-again:
+	unsigned char dmode;
+	struct irq_desc *desc;
+
 	/*
 	 * If this GSI has already been registered (i.e., it's a
 	 * shared interrupt, or we lost a race to register it),
 	 * don't touch the RTE.
 	 */
 	spin_lock_irqsave(&iosapic_lock, flags);
-	{
-		vector = gsi_to_vector(gsi);
-		if (vector > 0) {
-			rte = gsi_vector_to_rte(gsi, vector);
+	irq = __gsi_to_irq(gsi);
+	if (irq > 0) {
+		rte = find_rte(irq, gsi);
+		if(iosapic_intr_info[irq].count == 0) {
+			assign_irq_vector(irq);
+			irq_init_desc(irq);
+		} else if (rte->refcnt != NO_REF_RTE) {
 			rte->refcnt++;
-			spin_unlock_irqrestore(&iosapic_lock, flags);
-			return vector;
+			goto unlock_iosapic_lock;
 		}
-	}
-	spin_unlock_irqrestore(&iosapic_lock, flags);
+	} else
+		irq = create_irq();
 
 	/* If vector is running out, we try to find a sharable vector */
-	vector = assign_irq_vector(AUTO_ASSIGN);
-	if (vector < 0) {
-		vector = iosapic_find_sharable_vector(trigger, polarity);
-  		if (vector < 0)
-			return -ENOSPC;
+	if (irq < 0) {
+		irq = iosapic_find_sharable_irq(trigger, polarity);
+		if (irq < 0)
+			goto unlock_iosapic_lock;
 	}
 
-	spin_lock_irqsave(&irq_descp(vector)->lock, flags);
-	spin_lock(&iosapic_lock);
-	{
-		if (gsi_to_vector(gsi) > 0) {
-			if (list_empty(&iosapic_intr_info[vector].rtes))
-				free_irq_vector(vector);
-			spin_unlock(&iosapic_lock);
-			spin_unlock_irqrestore(&irq_descp(vector)->lock,
-					       flags);
-			goto again;
-		}
-
-		dest = get_target_cpu(gsi, vector);
-		err = register_intr(gsi, vector, IOSAPIC_LOWEST_PRIORITY,
-			      polarity, trigger);
-		if (err < 0) {
-			spin_unlock(&iosapic_lock);
-			spin_unlock_irqrestore(&irq_descp(vector)->lock,
-					       flags);
-			return err;
-		}
-
-		/*
-		 * If the vector is shared and already unmasked for
-		 * other interrupt sources, don't mask it.
-		 */
-		low32 = iosapic_intr_info[vector].low32;
-		if (vector_is_shared(vector) && !(low32 & IOSAPIC_MASK))
-			mask = 0;
-		set_rte(gsi, vector, dest, mask);
+	desc = irq_to_desc(irq);
+	raw_spin_lock(&desc->lock);
+	dest = get_target_cpu(gsi, irq);
+	dmode = choose_dmode();
+	err = register_intr(gsi, irq, dmode, polarity, trigger);
+	if (err < 0) {
+		raw_spin_unlock(&desc->lock);
+		irq = err;
+		goto unlock_iosapic_lock;
 	}
-	spin_unlock(&iosapic_lock);
-	spin_unlock_irqrestore(&irq_descp(vector)->lock, flags);
+
+	/*
+	 * If the vector is shared and already unmasked for other
+	 * interrupt sources, don't mask it.
+	 */
+	low32 = iosapic_intr_info[irq].low32;
+	if (irq_is_shared(irq) && !(low32 & IOSAPIC_MASK))
+		mask = 0;
+	set_rte(gsi, irq, dest, mask);
 
 	printk(KERN_INFO "GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d\n",
 	       gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"),
 	       (polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
-	       cpu_logical_id(dest), dest, vector);
+	       cpu_logical_id(dest), dest, irq_to_vector(irq));
 
-	return vector;
+	raw_spin_unlock(&desc->lock);
+ unlock_iosapic_lock:
+	spin_unlock_irqrestore(&iosapic_lock, flags);
+	return irq;
 }
 
 void
 iosapic_unregister_intr (unsigned int gsi)
 {
 	unsigned long flags;
-	int irq, vector, index;
-	irq_desc_t *idesc;
+	int irq, index;
 	u32 low32;
 	unsigned long trigger, polarity;
 	unsigned int dest;
@@ -858,73 +803,55 @@ iosapic_unregister_intr (unsigned int gsi)
 		WARN_ON(1);
 		return;
 	}
-	vector = irq_to_vector(irq);
 
-	idesc = irq_descp(irq);
-	spin_lock_irqsave(&idesc->lock, flags);
-	spin_lock(&iosapic_lock);
-	{
-		if ((rte = gsi_vector_to_rte(gsi, vector)) == NULL) {
-			printk(KERN_ERR
-			       "iosapic_unregister_intr(%u) unbalanced\n",
-			       gsi);
-			WARN_ON(1);
-			goto out;
-		}
+	spin_lock_irqsave(&iosapic_lock, flags);
+	if ((rte = find_rte(irq, gsi)) == NULL) {
+		printk(KERN_ERR "iosapic_unregister_intr(%u) unbalanced\n",
+		       gsi);
+		WARN_ON(1);
+		goto out;
+	}
 
-		if (--rte->refcnt > 0)
-			goto out;
+	if (--rte->refcnt > 0)
+		goto out;
 
-		/* Mask the interrupt */
-		low32 = iosapic_intr_info[vector].low32 | IOSAPIC_MASK;
-		iosapic_write(rte->addr, IOSAPIC_RTE_LOW(rte->rte_index),
-			      low32);
-
-		/* Remove the rte entry from the list */
-		list_del(&rte->rte_list);
-		iosapic_intr_info[vector].count--;
-		iosapic_free_rte(rte);
-		index = find_iosapic(gsi);
-		iosapic_lists[index].rtes_inuse--;
-		WARN_ON(iosapic_lists[index].rtes_inuse < 0);
-
-		trigger	 = iosapic_intr_info[vector].trigger;
-		polarity = iosapic_intr_info[vector].polarity;
-		dest     = iosapic_intr_info[vector].dest;
-		printk(KERN_INFO
-		       "GSI %u (%s, %s) -> CPU %d (0x%04x)"
-		       " vector %d unregistered\n",
-		       gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"),
-		       (polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
-		       cpu_logical_id(dest), dest, vector);
-
-		if (list_empty(&iosapic_intr_info[vector].rtes)) {
-			/* Sanity check */
-			BUG_ON(iosapic_intr_info[vector].count);
-
-			/* Clear the interrupt controller descriptor */
-			idesc->handler = &no_irq_type;
-
-			/* Clear the interrupt information */
-			memset(&iosapic_intr_info[vector], 0,
-			       sizeof(struct iosapic_intr_info));
-			iosapic_intr_info[vector].low32 |= IOSAPIC_MASK;
-			INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes);
-
-			if (idesc->action) {
-				printk(KERN_ERR
-				       "interrupt handlers still exist on"
-				       "IRQ %u\n", irq);
-				WARN_ON(1);
-			}
+	rte->refcnt = NO_REF_RTE;
 
-			/* Free the interrupt vector */
-			free_irq_vector(vector);
-		}
+	/* Mask the interrupt */
+	low32 = iosapic_intr_info[irq].low32 | IOSAPIC_MASK;
+	iosapic_write(rte->iosapic, IOSAPIC_RTE_LOW(rte->rte_index), low32);
+
+	iosapic_intr_info[irq].count--;
+	index = find_iosapic(gsi);
+	iosapic_lists[index].rtes_inuse--;
+	WARN_ON(iosapic_lists[index].rtes_inuse < 0);
+
+	trigger  = iosapic_intr_info[irq].trigger;
+	polarity = iosapic_intr_info[irq].polarity;
+	dest     = iosapic_intr_info[irq].dest;
+	printk(KERN_INFO
+	       "GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d unregistered\n",
+	       gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"),
+	       (polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
+	       cpu_logical_id(dest), dest, irq_to_vector(irq));
+
+	if (iosapic_intr_info[irq].count == 0) {
+#ifdef CONFIG_SMP
+		/* Clear affinity */
+		cpumask_setall(irq_get_irq_data(irq)->affinity);
+#endif
+		/* Clear the interrupt information */
+		iosapic_intr_info[irq].dest = 0;
+		iosapic_intr_info[irq].dmode = 0;
+		iosapic_intr_info[irq].polarity = 0;
+		iosapic_intr_info[irq].trigger = 0;
+		iosapic_intr_info[irq].low32 |= IOSAPIC_MASK;
+
+		/* Destroy and reserve IRQ */
+		destroy_and_reserve_irq(irq);
 	}
  out:
-	spin_unlock(&iosapic_lock);
-	spin_unlock_irqrestore(&idesc->lock, flags);
+	spin_unlock_irqrestore(&iosapic_lock, flags);
 }
 
 /*
@@ -937,37 +864,40 @@ iosapic_register_platform_intr (u32 int_type, unsigned int gsi,
 {
 	static const char * const name[] = {"unknown", "PMI", "INIT", "CPEI"};
 	unsigned char delivery;
-	int vector, mask = 0;
+	int irq, vector, mask = 0;
 	unsigned int dest = ((id << 8) | eid) & 0xffff;
 
 	switch (int_type) {
 	      case ACPI_INTERRUPT_PMI:
-		vector = iosapic_vector;
+		irq = vector = iosapic_vector;
+		bind_irq_vector(irq, vector, CPU_MASK_ALL);
 		/*
 		 * since PMI vector is alloc'd by FW(ACPI) not by kernel,
 		 * we need to make sure the vector is available
 		 */
-		iosapic_reassign_vector(vector);
+		iosapic_reassign_vector(irq);
 		delivery = IOSAPIC_PMI;
 		break;
 	      case ACPI_INTERRUPT_INIT:
-		vector = assign_irq_vector(AUTO_ASSIGN);
-		if (vector < 0)
-			panic("%s: out of interrupt vectors!\n", __FUNCTION__);
+		irq = create_irq();
+		if (irq < 0)
+			panic("%s: out of interrupt vectors!\n", __func__);
+		vector = irq_to_vector(irq);
 		delivery = IOSAPIC_INIT;
 		break;
 	      case ACPI_INTERRUPT_CPEI:
-		vector = IA64_CPE_VECTOR;
-		delivery = IOSAPIC_LOWEST_PRIORITY;
+		irq = vector = IA64_CPE_VECTOR;
+		BUG_ON(bind_irq_vector(irq, vector, CPU_MASK_ALL));
+		delivery = IOSAPIC_FIXED;
 		mask = 1;
 		break;
 	      default:
-		printk(KERN_ERR "%s: invalid int type 0x%x\n", __FUNCTION__,
+		printk(KERN_ERR "%s: invalid int type 0x%x\n", __func__,
 		       int_type);
 		return -1;
 	}
 
-	register_intr(gsi, vector, delivery, polarity, trigger);
+	register_intr(gsi, irq, delivery, polarity, trigger);
 
 	printk(KERN_INFO
 	       "PLATFORM int %s (0x%x): GSI %u (%s, %s) -> CPU %d (0x%04x)"
@@ -977,45 +907,36 @@ iosapic_register_platform_intr (u32 int_type, unsigned int gsi,
 	       (polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
 	       cpu_logical_id(dest), dest, vector);
 
-	set_rte(gsi, vector, dest, mask);
+	set_rte(gsi, irq, dest, mask);
 	return vector;
 }
 
 /*
  * ACPI calls this when it finds an entry for a legacy ISA IRQ override.
  */
-void __init
-iosapic_override_isa_irq (unsigned int isa_irq, unsigned int gsi,
-			  unsigned long polarity,
-			  unsigned long trigger)
+void iosapic_override_isa_irq(unsigned int isa_irq, unsigned int gsi,
+			      unsigned long polarity, unsigned long trigger)
 {
-	int vector;
+	int vector, irq;
 	unsigned int dest = cpu_physical_id(smp_processor_id());
+	unsigned char dmode;
 
-	vector = isa_irq_to_vector(isa_irq);
-
-	register_intr(gsi, vector, IOSAPIC_LOWEST_PRIORITY, polarity, trigger);
+	irq = vector = isa_irq_to_vector(isa_irq);
+	BUG_ON(bind_irq_vector(irq, vector, CPU_MASK_ALL));
+	dmode = choose_dmode();
+	register_intr(gsi, irq, dmode, polarity, trigger);
 
 	DBG("ISA: IRQ %u -> GSI %u (%s,%s) -> CPU %d (0x%04x) vector %d\n",
 	    isa_irq, gsi, trigger == IOSAPIC_EDGE ? "edge" : "level",
 	    polarity == IOSAPIC_POL_HIGH ? "high" : "low",
 	    cpu_logical_id(dest), dest, vector);
 
-	set_rte(gsi, vector, dest, 1);
+	set_rte(gsi, irq, dest, 1);
 }
 
 void __init
-iosapic_system_init (int system_pcat_compat)
+ia64_native_iosapic_pcat_compat_init(void)
 {
-	int vector;
-
-	for (vector = 0; vector < IA64_NUM_VECTORS; ++vector) {
-		iosapic_intr_info[vector].low32 = IOSAPIC_MASK;
-		/* mark as unused */
-		INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes);
-	}
-
-	pcat_compat = system_pcat_compat;
 	if (pcat_compat) {
 		/*
 		 * Disable the compatibility mode interrupts (8259 style),
@@ -1023,12 +944,30 @@ iosapic_system_init (int system_pcat_compat)
 		 */
 		printk(KERN_INFO
 		       "%s: Disabling PC-AT compatible 8259 interrupts\n",
-		       __FUNCTION__);
+		       __func__);
 		outb(0xff, 0xA1);
 		outb(0xff, 0x21);
 	}
 }
 
+void __init
+iosapic_system_init (int system_pcat_compat)
+{
+	int irq;
+
+	for (irq = 0; irq < NR_IRQS; ++irq) {
+		iosapic_intr_info[irq].low32 = IOSAPIC_MASK;
+		/* mark as unused */
+		INIT_LIST_HEAD(&iosapic_intr_info[irq].rtes);
+
+		iosapic_intr_info[irq].count = 0;
+	}
+
+	pcat_compat = system_pcat_compat;
+	if (pcat_compat)
+		iosapic_pcat_compat_init();
+}
+
 static inline int
 iosapic_alloc (void)
 {
@@ -1038,7 +977,7 @@ iosapic_alloc (void)
 		if (!iosapic_lists[index].addr)
 			return index;
 
-	printk(KERN_WARNING "%s: failed to allocate iosapic\n", __FUNCTION__);
+	printk(KERN_WARNING "%s: failed to allocate iosapic\n", __func__);
 	return -1;
 }
 
@@ -1071,8 +1010,27 @@ iosapic_check_gsi_range (unsigned int gsi_base, unsigned int ver)
 	return 0;
 }
 
-int __devinit
-iosapic_init (unsigned long phys_addr, unsigned int gsi_base)
+static int
+iosapic_delete_rte(unsigned int irq, unsigned int gsi)
+{
+	struct iosapic_rte_info *rte, *temp;
+
+	list_for_each_entry_safe(rte, temp, &iosapic_intr_info[irq].rtes,
+								rte_list) {
+		if (rte->iosapic->gsi_base + rte->rte_index == gsi) {
+			if (rte->refcnt)
+				return -EBUSY;
+
+			list_del(&rte->rte_list);
+			kfree(rte);
+			return 0;
+		}
+	}
+
+	return -EINVAL;
+}
+
+int iosapic_init(unsigned long phys_addr, unsigned int gsi_base)
 {
 	int num_rte, err, index;
 	unsigned int isa_irq, ver;
@@ -1080,31 +1038,39 @@ iosapic_init (unsigned long phys_addr, unsigned int gsi_base)
 	unsigned long flags;
 
 	spin_lock_irqsave(&iosapic_lock, flags);
-	{
-		addr = ioremap(phys_addr, 0);
-		ver = iosapic_version(addr);
+	index = find_iosapic(gsi_base);
+	if (index >= 0) {
+		spin_unlock_irqrestore(&iosapic_lock, flags);
+		return -EBUSY;
+	}
 
-		if ((err = iosapic_check_gsi_range(gsi_base, ver))) {
-			iounmap(addr);
-			spin_unlock_irqrestore(&iosapic_lock, flags);
-			return err;
-		}
+	addr = ioremap(phys_addr, 0);
+	if (addr == NULL) {
+		spin_unlock_irqrestore(&iosapic_lock, flags);
+		return -ENOMEM;
+	}
+	ver = iosapic_version(addr);
+	if ((err = iosapic_check_gsi_range(gsi_base, ver))) {
+		iounmap(addr);
+		spin_unlock_irqrestore(&iosapic_lock, flags);
+		return err;
+	}
 
-		/*
-		 * The MAX_REDIR register holds the highest input pin
-		 * number (starting from 0).
-		 * We add 1 so that we can use it for number of pins (= RTEs)
-		 */
-		num_rte = ((ver >> 16) & 0xff) + 1;
+	/*
+	 * The MAX_REDIR register holds the highest input pin number
+	 * (starting from 0).  We add 1 so that we can use it for
+	 * number of pins (= RTEs)
+	 */
+	num_rte = ((ver >> 16) & 0xff) + 1;
 
-		index = iosapic_alloc();
-		iosapic_lists[index].addr = addr;
-		iosapic_lists[index].gsi_base = gsi_base;
-		iosapic_lists[index].num_rte = num_rte;
+	index = iosapic_alloc();
+	iosapic_lists[index].addr = addr;
+	iosapic_lists[index].gsi_base = gsi_base;
+	iosapic_lists[index].num_rte = num_rte;
 #ifdef CONFIG_NUMA
-		iosapic_lists[index].node = MAX_NUMNODES;
+	iosapic_lists[index].node = MAX_NUMNODES;
 #endif
-	}
+	spin_lock_init(&iosapic_lists[index].lock);
 	spin_unlock_irqrestore(&iosapic_lock, flags);
 
 	if ((gsi_base == 0) && pcat_compat) {
@@ -1121,59 +1087,55 @@ iosapic_init (unsigned long phys_addr, unsigned int gsi_base)
 	return 0;
 }
 
-#ifdef CONFIG_HOTPLUG
-int
-iosapic_remove (unsigned int gsi_base)
+int iosapic_remove(unsigned int gsi_base)
 {
-	int index, err = 0;
+	int i, irq, index, err = 0;
 	unsigned long flags;
 
 	spin_lock_irqsave(&iosapic_lock, flags);
-	{
-		index = find_iosapic(gsi_base);
-		if (index < 0) {
-			printk(KERN_WARNING "%s: No IOSAPIC for GSI base %u\n",
-			       __FUNCTION__, gsi_base);
-			goto out;
-		}
+	index = find_iosapic(gsi_base);
+	if (index < 0) {
+		printk(KERN_WARNING "%s: No IOSAPIC for GSI base %u\n",
+		       __func__, gsi_base);
+		goto out;
+	}
 
-		if (iosapic_lists[index].rtes_inuse) {
-			err = -EBUSY;
-			printk(KERN_WARNING
-			       "%s: IOSAPIC for GSI base %u is busy\n",
-			       __FUNCTION__, gsi_base);
-			goto out;
-		}
+	if (iosapic_lists[index].rtes_inuse) {
+		err = -EBUSY;
+		printk(KERN_WARNING "%s: IOSAPIC for GSI base %u is busy\n",
+		       __func__, gsi_base);
+		goto out;
+	}
+
+	for (i = gsi_base; i < gsi_base + iosapic_lists[index].num_rte; i++) {
+		irq = __gsi_to_irq(i);
+		if (irq < 0)
+			continue;
 
-		iounmap(iosapic_lists[index].addr);
-		iosapic_free(index);
+		err = iosapic_delete_rte(irq, i);
+		if (err)
+			goto out;
 	}
+
+	iounmap(iosapic_lists[index].addr);
+	iosapic_free(index);
  out:
 	spin_unlock_irqrestore(&iosapic_lock, flags);
 	return err;
 }
-#endif /* CONFIG_HOTPLUG */
 
 #ifdef CONFIG_NUMA
-void __devinit
-map_iosapic_to_node(unsigned int gsi_base, int node)
+void map_iosapic_to_node(unsigned int gsi_base, int node)
 {
 	int index;
 
 	index = find_iosapic(gsi_base);
 	if (index < 0) {
 		printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n",
-		       __FUNCTION__, gsi_base);
+		       __func__, gsi_base);
 		return;
 	}
 	iosapic_lists[index].node = node;
 	return;
 }
 #endif
-
-static int __init iosapic_enable_kmalloc (void)
-{
-	iosapic_kmalloc_ok = 1;
-	return 0;
-}
-core_initcall (iosapic_enable_kmalloc);
diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c
index 9c72ea3f643..f2c41828113 100644
--- a/arch/ia64/kernel/irq.c
+++ b/arch/ia64/kernel/irq.c
@@ -4,7 +4,7 @@
  *	Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
  *
  * This file contains the code used by various IRQ handling routines:
- * asking for different IRQ's should be done through these routines
+ * asking for different IRQs should be done through these routines
  * instead of just grabbing them. Thus setups with different IRQ numbers
  * shouldn't result in any weird surprises, and installing new handlers
  * should be easier.
@@ -12,7 +12,7 @@
  * Copyright (C) Ashok Raj<ashok.raj@intel.com>, Intel Corporation 2004
  *
  * 4/14/2004: Added code to handle cpu migration and do safe irq
- *			migration without lossing interrupts for iosapic
+ *			migration without losing interrupts for iosapic
  *			architecture.
  */
 
@@ -23,6 +23,8 @@
 #include <linux/interrupt.h>
 #include <linux/kernel_stat.h>
 
+#include <asm/mca.h>
+
 /*
  * 'what should we do if we get a hw irq event on an illegal vector'.
  * each architecture has to answer this themselves.
@@ -33,9 +35,14 @@ void ack_bad_irq(unsigned int irq)
 }
 
 #ifdef CONFIG_IA64_GENERIC
+ia64_vector __ia64_irq_to_vector(int irq)
+{
+	return irq_cfg[irq].vector;
+}
+
 unsigned int __ia64_local_vector_to_irq (ia64_vector vec)
 {
-	return (unsigned int) vec;
+	return __get_cpu_var(vector_irq)[vec];
 }
 #endif
 
@@ -48,45 +55,9 @@ atomic_t irq_err_count;
 /*
  * /proc/interrupts printing:
  */
-
-int show_interrupts(struct seq_file *p, void *v)
+int arch_show_interrupts(struct seq_file *p, int prec)
 {
-	int i = *(loff_t *) v, j;
-	struct irqaction * action;
-	unsigned long flags;
-
-	if (i == 0) {
-		seq_printf(p, "           ");
-		for_each_online_cpu(j) {
-			seq_printf(p, "CPU%d       ",j);
-		}
-		seq_putc(p, '\n');
-	}
-
-	if (i < NR_IRQS) {
-		spin_lock_irqsave(&irq_desc[i].lock, flags);
-		action = irq_desc[i].action;
-		if (!action)
-			goto skip;
-		seq_printf(p, "%3d: ",i);
-#ifndef CONFIG_SMP
-		seq_printf(p, "%10u ", kstat_irqs(i));
-#else
-		for_each_online_cpu(j) {
-			seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
-		}
-#endif
-		seq_printf(p, " %14s", irq_desc[i].handler->typename);
-		seq_printf(p, "  %s", action->name);
-
-		for (action=action->next; action; action = action->next)
-			seq_printf(p, ", %s", action->name);
-
-		seq_putc(p, '\n');
-skip:
-		spin_unlock_irqrestore(&irq_desc[i].lock, flags);
-	} else if (i == NR_IRQS)
-		seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
+	seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
 	return 0;
 }
 
@@ -95,32 +66,49 @@ static char irq_redir [NR_IRQS]; // = { [0 ... NR_IRQS-1] = 1 };
 
 void set_irq_affinity_info (unsigned int irq, int hwid, int redir)
 {
-	cpumask_t mask = CPU_MASK_NONE;
-
-	cpu_set(cpu_logical_id(hwid), mask);
-
 	if (irq < NR_IRQS) {
-		irq_affinity[irq] = mask;
+		cpumask_copy(irq_get_irq_data(irq)->affinity,
+			     cpumask_of(cpu_logical_id(hwid)));
 		irq_redir[irq] = (char) (redir & 0xff);
 	}
 }
+
+bool is_affinity_mask_valid(const struct cpumask *cpumask)
+{
+	if (ia64_platform_is("sn2")) {
+		/* Only allow one CPU to be specified in the smp_affinity mask */
+		if (cpumask_weight(cpumask) != 1)
+			return false;
+	}
+	return true;
+}
+
 #endif /* CONFIG_SMP */
 
+int __init arch_early_irq_init(void)
+{
+	ia64_mca_irq_init();
+	return 0;
+}
+
 #ifdef CONFIG_HOTPLUG_CPU
 unsigned int vectors_in_migration[NR_IRQS];
 
 /*
- * Since cpu_online_map is already updated, we just need to check for
+ * Since cpu_online_mask is already updated, we just need to check for
  * affinity that has zeros
  */
 static void migrate_irqs(void)
 {
-	cpumask_t	mask;
-	irq_desc_t *desc;
 	int 		irq, new_cpu;
 
 	for (irq=0; irq < NR_IRQS; irq++) {
-		desc = irq_descp(irq);
+		struct irq_desc *desc = irq_to_desc(irq);
+		struct irq_data *data = irq_desc_get_irq_data(desc);
+		struct irq_chip *chip = irq_data_get_irq_chip(data);
+
+		if (irqd_irq_disabled(data))
+			continue;
 
 		/*
 		 * No handling for now.
@@ -128,31 +116,31 @@ static void migrate_irqs(void)
 		 * tell CPU not to respond to these local intr sources.
 		 * such as ITV,CPEI,MCA etc.
 		 */
-		if (desc->status == IRQ_PER_CPU)
+		if (irqd_is_per_cpu(data))
 			continue;
 
-		cpus_and(mask, irq_affinity[irq], cpu_online_map);
-		if (any_online_cpu(mask) == NR_CPUS) {
+		if (cpumask_any_and(data->affinity, cpu_online_mask)
+		    >= nr_cpu_ids) {
 			/*
 			 * Save it for phase 2 processing
 			 */
 			vectors_in_migration[irq] = irq;
 
-			new_cpu = any_online_cpu(cpu_online_map);
-			mask = cpumask_of_cpu(new_cpu);
+			new_cpu = cpumask_any(cpu_online_mask);
 
 			/*
 			 * Al three are essential, currently WARN_ON.. maybe panic?
 			 */
-			if (desc->handler && desc->handler->disable &&
-				desc->handler->enable && desc->handler->set_affinity) {
-				desc->handler->disable(irq);
-				desc->handler->set_affinity(irq, mask);
-				desc->handler->enable(irq);
+			if (chip && chip->irq_disable &&
+				chip->irq_enable && chip->irq_set_affinity) {
+				chip->irq_disable(data);
+				chip->irq_set_affinity(data,
+						       cpumask_of(new_cpu), false);
+				chip->irq_enable(data);
 			} else {
-				WARN_ON((!(desc->handler) || !(desc->handler->disable) ||
-						!(desc->handler->enable) ||
-						!(desc->handler->set_affinity)));
+				WARN_ON((!chip || !chip->irq_disable ||
+					 !chip->irq_enable ||
+					 !chip->irq_set_affinity));
 			}
 		}
 	}
@@ -162,21 +150,21 @@ void fixup_irqs(void)
 {
 	unsigned int irq;
 	extern void ia64_process_pending_intr(void);
-	extern void ia64_disable_timer(void);
 	extern volatile int time_keeper_id;
 
-	ia64_disable_timer();
+	/* Mask ITV to disable timer */
+	ia64_set_itv(1 << 16);
 
 	/*
 	 * Find a new timesync master
 	 */
 	if (smp_processor_id() == time_keeper_id) {
-		time_keeper_id = first_cpu(cpu_online_map);
+		time_keeper_id = cpumask_first(cpu_online_mask);
 		printk ("CPU %d is now promoted to time-keeper master\n", time_keeper_id);
 	}
 
 	/*
-	 * Phase 1: Locate irq's bound to this cpu and
+	 * Phase 1: Locate IRQs bound to this cpu and
 	 * relocate them for cpu removal.
 	 */
 	migrate_irqs();
@@ -194,8 +182,11 @@ void fixup_irqs(void)
 	 */
 	for (irq=0; irq < NR_IRQS; irq++) {
 		if (vectors_in_migration[irq]) {
+			struct pt_regs *old_regs = set_irq_regs(NULL);
+
 			vectors_in_migration[irq]=0;
-			__do_IRQ(irq, NULL);
+			generic_handle_irq(irq);
+			set_irq_regs(old_regs);
 		}
 	}
 
diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c
index 6c4d59fd036..03ea78ed64a 100644
--- a/arch/ia64/kernel/irq_ia64.c
+++ b/arch/ia64/kernel/irq_ia64.c
@@ -1,5 +1,5 @@
 /*
- * linux/arch/ia64/kernel/irq.c
+ * linux/arch/ia64/kernel/irq_ia64.c
  *
  * Copyright (C) 1998-2001 Hewlett-Packard Co
  *	Stephane Eranian <eranian@hpl.hp.com>
@@ -14,7 +14,6 @@
  *						Added CPU Hotplug handling for IPF.
  */
 
-#include <linux/config.h>
 #include <linux/module.h>
 
 #include <linux/jiffies.h>
@@ -23,14 +22,15 @@
 #include <linux/interrupt.h>
 #include <linux/ioport.h>
 #include <linux/kernel_stat.h>
-#include <linux/slab.h>
 #include <linux/ptrace.h>
-#include <linux/random.h>	/* for rand_initialize_irq() */
 #include <linux/signal.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/threads.h>
 #include <linux/bitops.h>
+#include <linux/irq.h>
+#include <linux/ratelimit.h>
+#include <linux/acpi.h>
+#include <linux/sched.h>
 
 #include <asm/delay.h>
 #include <asm/intrinsics.h>
@@ -38,7 +38,7 @@
 #include <asm/hw_irq.h>
 #include <asm/machvec.h>
 #include <asm/pgtable.h>
-#include <asm/system.h>
+#include <asm/tlbflush.h>
 
 #ifdef CONFIG_PERFMON
 # include <asm/perfmon.h>
@@ -46,10 +46,22 @@
 
 #define IRQ_DEBUG	0
 
+#define IRQ_VECTOR_UNASSIGNED	(0)
+
+#define IRQ_UNUSED		(0)
+#define IRQ_USED		(1)
+#define IRQ_RSVD		(2)
+
+/* These can be overridden in platform_irq_init */
+int ia64_first_device_vector = IA64_DEF_FIRST_DEVICE_VECTOR;
+int ia64_last_device_vector = IA64_DEF_LAST_DEVICE_VECTOR;
+
 /* default base addr of IPI table */
 void __iomem *ipi_base_addr = ((void __iomem *)
 			       (__IA64_UNCACHED_OFFSET | IA64_IPI_DEFAULT_BASE_ADDR));
 
+static cpumask_t vector_allocation_domain(int cpu);
+
 /*
  * Legacy IRQ to IA-64 vector translation table.
  */
@@ -60,39 +72,365 @@ __u8 isa_irq_to_vector_map[16] = {
 };
 EXPORT_SYMBOL(isa_irq_to_vector_map);
 
-static unsigned long ia64_vector_mask[BITS_TO_LONGS(IA64_NUM_DEVICE_VECTORS)];
+DEFINE_SPINLOCK(vector_lock);
 
-int
-assign_irq_vector (int irq)
+struct irq_cfg irq_cfg[NR_IRQS] __read_mostly = {
+	[0 ... NR_IRQS - 1] = {
+		.vector = IRQ_VECTOR_UNASSIGNED,
+		.domain = CPU_MASK_NONE
+	}
+};
+
+DEFINE_PER_CPU(int[IA64_NUM_VECTORS], vector_irq) = {
+	[0 ... IA64_NUM_VECTORS - 1] = -1
+};
+
+static cpumask_t vector_table[IA64_NUM_VECTORS] = {
+	[0 ... IA64_NUM_VECTORS - 1] = CPU_MASK_NONE
+};
+
+static int irq_status[NR_IRQS] = {
+	[0 ... NR_IRQS -1] = IRQ_UNUSED
+};
+
+static inline int find_unassigned_irq(void)
 {
+	int irq;
+
+	for (irq = IA64_FIRST_DEVICE_VECTOR; irq < NR_IRQS; irq++)
+		if (irq_status[irq] == IRQ_UNUSED)
+			return irq;
+	return -ENOSPC;
+}
+
+static inline int find_unassigned_vector(cpumask_t domain)
+{
+	cpumask_t mask;
 	int pos, vector;
- again:
-	pos = find_first_zero_bit(ia64_vector_mask, IA64_NUM_DEVICE_VECTORS);
-	vector = IA64_FIRST_DEVICE_VECTOR + pos;
-	if (vector > IA64_LAST_DEVICE_VECTOR)
-		return -ENOSPC;
-	if (test_and_set_bit(pos, ia64_vector_mask))
-		goto again;
+
+	cpumask_and(&mask, &domain, cpu_online_mask);
+	if (cpus_empty(mask))
+		return -EINVAL;
+
+	for (pos = 0; pos < IA64_NUM_DEVICE_VECTORS; pos++) {
+		vector = IA64_FIRST_DEVICE_VECTOR + pos;
+		cpus_and(mask, domain, vector_table[vector]);
+		if (!cpus_empty(mask))
+			continue;
+		return vector;
+	}
+	return -ENOSPC;
+}
+
+static int __bind_irq_vector(int irq, int vector, cpumask_t domain)
+{
+	cpumask_t mask;
+	int cpu;
+	struct irq_cfg *cfg = &irq_cfg[irq];
+
+	BUG_ON((unsigned)irq >= NR_IRQS);
+	BUG_ON((unsigned)vector >= IA64_NUM_VECTORS);
+
+	cpumask_and(&mask, &domain, cpu_online_mask);
+	if (cpus_empty(mask))
+		return -EINVAL;
+	if ((cfg->vector == vector) && cpus_equal(cfg->domain, domain))
+		return 0;
+	if (cfg->vector != IRQ_VECTOR_UNASSIGNED)
+		return -EBUSY;
+	for_each_cpu_mask(cpu, mask)
+		per_cpu(vector_irq, cpu)[vector] = irq;
+	cfg->vector = vector;
+	cfg->domain = domain;
+	irq_status[irq] = IRQ_USED;
+	cpus_or(vector_table[vector], vector_table[vector], domain);
+	return 0;
+}
+
+int bind_irq_vector(int irq, int vector, cpumask_t domain)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&vector_lock, flags);
+	ret = __bind_irq_vector(irq, vector, domain);
+	spin_unlock_irqrestore(&vector_lock, flags);
+	return ret;
+}
+
+static void __clear_irq_vector(int irq)
+{
+	int vector, cpu;
+	cpumask_t mask;
+	cpumask_t domain;
+	struct irq_cfg *cfg = &irq_cfg[irq];
+
+	BUG_ON((unsigned)irq >= NR_IRQS);
+	BUG_ON(cfg->vector == IRQ_VECTOR_UNASSIGNED);
+	vector = cfg->vector;
+	domain = cfg->domain;
+	cpumask_and(&mask, &cfg->domain, cpu_online_mask);
+	for_each_cpu_mask(cpu, mask)
+		per_cpu(vector_irq, cpu)[vector] = -1;
+	cfg->vector = IRQ_VECTOR_UNASSIGNED;
+	cfg->domain = CPU_MASK_NONE;
+	irq_status[irq] = IRQ_UNUSED;
+	cpus_andnot(vector_table[vector], vector_table[vector], domain);
+}
+
+static void clear_irq_vector(int irq)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&vector_lock, flags);
+	__clear_irq_vector(irq);
+	spin_unlock_irqrestore(&vector_lock, flags);
+}
+
+int
+ia64_native_assign_irq_vector (int irq)
+{
+	unsigned long flags;
+	int vector, cpu;
+	cpumask_t domain = CPU_MASK_NONE;
+
+	vector = -ENOSPC;
+
+	spin_lock_irqsave(&vector_lock, flags);
+	for_each_online_cpu(cpu) {
+		domain = vector_allocation_domain(cpu);
+		vector = find_unassigned_vector(domain);
+		if (vector >= 0)
+			break;
+	}
+	if (vector < 0)
+		goto out;
+	if (irq == AUTO_ASSIGN)
+		irq = vector;
+	BUG_ON(__bind_irq_vector(irq, vector, domain));
+ out:
+	spin_unlock_irqrestore(&vector_lock, flags);
 	return vector;
 }
 
 void
-free_irq_vector (int vector)
+ia64_native_free_irq_vector (int vector)
+{
+	if (vector < IA64_FIRST_DEVICE_VECTOR ||
+	    vector > IA64_LAST_DEVICE_VECTOR)
+		return;
+	clear_irq_vector(vector);
+}
+
+int
+reserve_irq_vector (int vector)
+{
+	if (vector < IA64_FIRST_DEVICE_VECTOR ||
+	    vector > IA64_LAST_DEVICE_VECTOR)
+		return -EINVAL;
+	return !!bind_irq_vector(vector, vector, CPU_MASK_ALL);
+}
+
+/*
+ * Initialize vector_irq on a new cpu. This function must be called
+ * with vector_lock held.
+ */
+void __setup_vector_irq(int cpu)
+{
+	int irq, vector;
+
+	/* Clear vector_irq */
+	for (vector = 0; vector < IA64_NUM_VECTORS; ++vector)
+		per_cpu(vector_irq, cpu)[vector] = -1;
+	/* Mark the inuse vectors */
+	for (irq = 0; irq < NR_IRQS; ++irq) {
+		if (!cpu_isset(cpu, irq_cfg[irq].domain))
+			continue;
+		vector = irq_to_vector(irq);
+		per_cpu(vector_irq, cpu)[vector] = irq;
+	}
+}
+
+#if defined(CONFIG_SMP) && (defined(CONFIG_IA64_GENERIC) || defined(CONFIG_IA64_DIG))
+
+static enum vector_domain_type {
+	VECTOR_DOMAIN_NONE,
+	VECTOR_DOMAIN_PERCPU
+} vector_domain_type = VECTOR_DOMAIN_NONE;
+
+static cpumask_t vector_allocation_domain(int cpu)
 {
-	int pos;
+	if (vector_domain_type == VECTOR_DOMAIN_PERCPU)
+		return cpumask_of_cpu(cpu);
+	return CPU_MASK_ALL;
+}
+
+static int __irq_prepare_move(int irq, int cpu)
+{
+	struct irq_cfg *cfg = &irq_cfg[irq];
+	int vector;
+	cpumask_t domain;
+
+	if (cfg->move_in_progress || cfg->move_cleanup_count)
+		return -EBUSY;
+	if (cfg->vector == IRQ_VECTOR_UNASSIGNED || !cpu_online(cpu))
+		return -EINVAL;
+	if (cpu_isset(cpu, cfg->domain))
+		return 0;
+	domain = vector_allocation_domain(cpu);
+	vector = find_unassigned_vector(domain);
+	if (vector < 0)
+		return -ENOSPC;
+	cfg->move_in_progress = 1;
+	cfg->old_domain = cfg->domain;
+	cfg->vector = IRQ_VECTOR_UNASSIGNED;
+	cfg->domain = CPU_MASK_NONE;
+	BUG_ON(__bind_irq_vector(irq, vector, domain));
+	return 0;
+}
+
+int irq_prepare_move(int irq, int cpu)
+{
+	unsigned long flags;
+	int ret;
 
-	if (vector < IA64_FIRST_DEVICE_VECTOR || vector > IA64_LAST_DEVICE_VECTOR)
+	spin_lock_irqsave(&vector_lock, flags);
+	ret = __irq_prepare_move(irq, cpu);
+	spin_unlock_irqrestore(&vector_lock, flags);
+	return ret;
+}
+
+void irq_complete_move(unsigned irq)
+{
+	struct irq_cfg *cfg = &irq_cfg[irq];
+	cpumask_t cleanup_mask;
+	int i;
+
+	if (likely(!cfg->move_in_progress))
+		return;
+
+	if (unlikely(cpu_isset(smp_processor_id(), cfg->old_domain)))
 		return;
 
-	pos = vector - IA64_FIRST_DEVICE_VECTOR;
-	if (!test_and_clear_bit(pos, ia64_vector_mask))
-		printk(KERN_WARNING "%s: double free!\n", __FUNCTION__);
+	cpumask_and(&cleanup_mask, &cfg->old_domain, cpu_online_mask);
+	cfg->move_cleanup_count = cpus_weight(cleanup_mask);
+	for_each_cpu_mask(i, cleanup_mask)
+		platform_send_ipi(i, IA64_IRQ_MOVE_VECTOR, IA64_IPI_DM_INT, 0);
+	cfg->move_in_progress = 0;
+}
+
+static irqreturn_t smp_irq_move_cleanup_interrupt(int irq, void *dev_id)
+{
+	int me = smp_processor_id();
+	ia64_vector vector;
+	unsigned long flags;
+
+	for (vector = IA64_FIRST_DEVICE_VECTOR;
+	     vector < IA64_LAST_DEVICE_VECTOR; vector++) {
+		int irq;
+		struct irq_desc *desc;
+		struct irq_cfg *cfg;
+		irq = __get_cpu_var(vector_irq)[vector];
+		if (irq < 0)
+			continue;
+
+		desc = irq_to_desc(irq);
+		cfg = irq_cfg + irq;
+		raw_spin_lock(&desc->lock);
+		if (!cfg->move_cleanup_count)
+			goto unlock;
+
+		if (!cpu_isset(me, cfg->old_domain))
+			goto unlock;
+
+		spin_lock_irqsave(&vector_lock, flags);
+		__get_cpu_var(vector_irq)[vector] = -1;
+		cpu_clear(me, vector_table[vector]);
+		spin_unlock_irqrestore(&vector_lock, flags);
+		cfg->move_cleanup_count--;
+	unlock:
+		raw_spin_unlock(&desc->lock);
+	}
+	return IRQ_HANDLED;
+}
+
+static struct irqaction irq_move_irqaction = {
+	.handler =	smp_irq_move_cleanup_interrupt,
+	.name =		"irq_move"
+};
+
+static int __init parse_vector_domain(char *arg)
+{
+	if (!arg)
+		return -EINVAL;
+	if (!strcmp(arg, "percpu")) {
+		vector_domain_type = VECTOR_DOMAIN_PERCPU;
+		no_int_routing = 1;
+	}
+	return 0;
+}
+early_param("vector", parse_vector_domain);
+#else
+static cpumask_t vector_allocation_domain(int cpu)
+{
+	return CPU_MASK_ALL;
+}
+#endif
+
+
+void destroy_and_reserve_irq(unsigned int irq)
+{
+	unsigned long flags;
+
+	irq_init_desc(irq);
+	spin_lock_irqsave(&vector_lock, flags);
+	__clear_irq_vector(irq);
+	irq_status[irq] = IRQ_RSVD;
+	spin_unlock_irqrestore(&vector_lock, flags);
+}
+
+/*
+ * Dynamic irq allocate and deallocation for MSI
+ */
+int create_irq(void)
+{
+	unsigned long flags;
+	int irq, vector, cpu;
+	cpumask_t domain = CPU_MASK_NONE;
+
+	irq = vector = -ENOSPC;
+	spin_lock_irqsave(&vector_lock, flags);
+	for_each_online_cpu(cpu) {
+		domain = vector_allocation_domain(cpu);
+		vector = find_unassigned_vector(domain);
+		if (vector >= 0)
+			break;
+	}
+	if (vector < 0)
+		goto out;
+	irq = find_unassigned_irq();
+	if (irq < 0)
+		goto out;
+	BUG_ON(__bind_irq_vector(irq, vector, domain));
+ out:
+	spin_unlock_irqrestore(&vector_lock, flags);
+	if (irq >= 0)
+		irq_init_desc(irq);
+	return irq;
+}
+
+void destroy_irq(unsigned int irq)
+{
+	irq_init_desc(irq);
+	clear_irq_vector(irq);
 }
 
 #ifdef CONFIG_SMP
 #	define IS_RESCHEDULE(vec)	(vec == IA64_IPI_RESCHEDULE)
+#	define IS_LOCAL_TLB_FLUSH(vec)	(vec == IA64_IPI_LOCAL_TLB_FLUSH)
 #else
 #	define IS_RESCHEDULE(vec)	(0)
+#	define IS_LOCAL_TLB_FLUSH(vec)	(0)
 #endif
 /*
  * That's where the IVT branches when we get an external
@@ -102,6 +440,7 @@ free_irq_vector (int vector)
 void
 ia64_handle_irq (ia64_vector vector, struct pt_regs *regs)
 {
+	struct pt_regs *old_regs = set_irq_regs(regs);
 	unsigned long saved_tpr;
 
 #if IRQ_DEBUG
@@ -119,13 +458,9 @@ ia64_handle_irq (ia64_vector vector, struct pt_regs *regs)
 		sp = ia64_getreg(_IA64_REG_SP);
 
 		if ((sp - bsp) < 1024) {
-			static unsigned char count;
-			static long last_time;
+			static DEFINE_RATELIMIT_STATE(ratelimit, 5 * HZ, 5);
 
-			if (jiffies - last_time > 5*HZ)
-				count = 0;
-			if (++count < 5) {
-				last_time = jiffies;
+			if (__ratelimit(&ratelimit)) {
 				printk("ia64_handle_irq: DANGER: less than "
 				       "1KB of free stack space!!\n"
 				       "(bsp=0x%lx, sp=%lx)\n", bsp, sp);
@@ -143,11 +478,25 @@ ia64_handle_irq (ia64_vector vector, struct pt_regs *regs)
 	saved_tpr = ia64_getreg(_IA64_REG_CR_TPR);
 	ia64_srlz_d();
 	while (vector != IA64_SPURIOUS_INT_VECTOR) {
-		if (!IS_RESCHEDULE(vector)) {
+		int irq = local_vector_to_irq(vector);
+
+		if (unlikely(IS_LOCAL_TLB_FLUSH(vector))) {
+			smp_local_flush_tlb();
+			kstat_incr_irq_this_cpu(irq);
+		} else if (unlikely(IS_RESCHEDULE(vector))) {
+			scheduler_ipi();
+			kstat_incr_irq_this_cpu(irq);
+		} else {
 			ia64_setreg(_IA64_REG_CR_TPR, vector);
 			ia64_srlz_d();
 
-			__do_IRQ(local_vector_to_irq(vector), regs);
+			if (unlikely(irq < 0)) {
+				printk(KERN_ERR "%s: Unexpected interrupt "
+				       "vector %d on CPU %d is not mapped "
+				       "to any IRQ!\n", __func__, vector,
+				       smp_processor_id());
+			} else
+				generic_handle_irq(irq);
 
 			/*
 			 * Disable interrupts and send EOI:
@@ -164,6 +513,7 @@ ia64_handle_irq (ia64_vector vector, struct pt_regs *regs)
 	 * come through until ia64_eoi() has been done.
 	 */
 	irq_exit();
+	set_irq_regs(old_regs);
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
@@ -179,15 +529,24 @@ void ia64_process_pending_intr(void)
 
 	vector = ia64_get_ivr();
 
-	 irq_enter();
-	 saved_tpr = ia64_getreg(_IA64_REG_CR_TPR);
-	 ia64_srlz_d();
+	irq_enter();
+	saved_tpr = ia64_getreg(_IA64_REG_CR_TPR);
+	ia64_srlz_d();
 
 	 /*
 	  * Perform normal interrupt style processing
 	  */
 	while (vector != IA64_SPURIOUS_INT_VECTOR) {
-		if (!IS_RESCHEDULE(vector)) {
+		int irq = local_vector_to_irq(vector);
+
+		if (unlikely(IS_LOCAL_TLB_FLUSH(vector))) {
+			smp_local_flush_tlb();
+			kstat_incr_irq_this_cpu(irq);
+		} else if (unlikely(IS_RESCHEDULE(vector))) {
+			kstat_incr_irq_this_cpu(irq);
+		} else {
+			struct pt_regs *old_regs = set_irq_regs(NULL);
+
 			ia64_setreg(_IA64_REG_CR_TPR, vector);
 			ia64_srlz_d();
 
@@ -197,8 +556,16 @@ void ia64_process_pending_intr(void)
 			 * it will work. I hope it works!.
 			 * Probably could shared code.
 			 */
-			vectors_in_migration[local_vector_to_irq(vector)]=0;
-			__do_IRQ(local_vector_to_irq(vector), NULL);
+			if (unlikely(irq < 0)) {
+				printk(KERN_ERR "%s: Unexpected interrupt "
+				       "vector %d on CPU %d not being mapped "
+				       "to any IRQ!!\n", __func__, vector,
+				       smp_processor_id());
+			} else {
+				vectors_in_migration[irq]=0;
+				generic_handle_irq(irq);
+			}
+			set_irq_regs(old_regs);
 
 			/*
 			 * Disable interrupts and send EOI
@@ -215,37 +582,69 @@ void ia64_process_pending_intr(void)
 
 
 #ifdef CONFIG_SMP
-extern irqreturn_t handle_IPI (int irq, void *dev_id, struct pt_regs *regs);
+
+static irqreturn_t dummy_handler (int irq, void *dev_id)
+{
+	BUG();
+}
 
 static struct irqaction ipi_irqaction = {
 	.handler =	handle_IPI,
-	.flags =	SA_INTERRUPT,
 	.name =		"IPI"
 };
+
+/*
+ * KVM uses this interrupt to force a cpu out of guest mode
+ */
+static struct irqaction resched_irqaction = {
+	.handler =	dummy_handler,
+	.name =		"resched"
+};
+
+static struct irqaction tlb_irqaction = {
+	.handler =	dummy_handler,
+	.name =		"tlb_flush"
+};
+
 #endif
 
 void
-register_percpu_irq (ia64_vector vec, struct irqaction *action)
+ia64_native_register_percpu_irq (ia64_vector vec, struct irqaction *action)
 {
-	irq_desc_t *desc;
 	unsigned int irq;
 
-	for (irq = 0; irq < NR_IRQS; ++irq)
-		if (irq_to_vector(irq) == vec) {
-			desc = irq_descp(irq);
-			desc->status |= IRQ_PER_CPU;
-			desc->handler = &irq_type_ia64_lsapic;
-			if (action)
-				setup_irq(irq, action);
-		}
+	irq = vec;
+	BUG_ON(bind_irq_vector(irq, vec, CPU_MASK_ALL));
+	irq_set_status_flags(irq, IRQ_PER_CPU);
+	irq_set_chip(irq, &irq_type_ia64_lsapic);
+	if (action)
+		setup_irq(irq, action);
+	irq_set_handler(irq, handle_percpu_irq);
+}
+
+void __init
+ia64_native_register_ipi(void)
+{
+#ifdef CONFIG_SMP
+	register_percpu_irq(IA64_IPI_VECTOR, &ipi_irqaction);
+	register_percpu_irq(IA64_IPI_RESCHEDULE, &resched_irqaction);
+	register_percpu_irq(IA64_IPI_LOCAL_TLB_FLUSH, &tlb_irqaction);
+#endif
 }
 
 void __init
 init_IRQ (void)
 {
+#ifdef CONFIG_ACPI
+	acpi_boot_init();
+#endif
+	ia64_register_ipi();
 	register_percpu_irq(IA64_SPURIOUS_INT_VECTOR, NULL);
 #ifdef CONFIG_SMP
-	register_percpu_irq(IA64_IPI_VECTOR, &ipi_irqaction);
+#if defined(CONFIG_IA64_GENERIC) || defined(CONFIG_IA64_DIG)
+	if (vector_domain_type != VECTOR_DOMAIN_NONE)
+		register_percpu_irq(IA64_IRQ_MOVE_VECTOR, &irq_move_irqaction);
+#endif
 #endif
 #ifdef CONFIG_PERFMON
 	pfm_init_percpu();
@@ -260,11 +659,7 @@ ia64_send_ipi (int cpu, int vector, int delivery_mode, int redirect)
 	unsigned long ipi_data;
 	unsigned long phys_cpu_id;
 
-#ifdef CONFIG_SMP
 	phys_cpu_id = cpu_physical_id(cpu);
-#else
-	phys_cpu_id = (ia64_getreg(_IA64_REG_CR_LID) >> 16) & 0xffff;
-#endif
 
 	/*
 	 * cpu number is in 8bit ID and 8bit EID
diff --git a/arch/ia64/kernel/irq_lsapic.c b/arch/ia64/kernel/irq_lsapic.c
index ea14e6a0440..1b3a776e516 100644
--- a/arch/ia64/kernel/irq_lsapic.c
+++ b/arch/ia64/kernel/irq_lsapic.c
@@ -15,23 +15,30 @@
 #include <linux/irq.h>
 
 static unsigned int
-lsapic_noop_startup (unsigned int irq)
+lsapic_noop_startup (struct irq_data *data)
 {
 	return 0;
 }
 
 static void
-lsapic_noop (unsigned int irq)
+lsapic_noop (struct irq_data *data)
 {
-	/* nuthing to do... */
+	/* nothing to do... */
 }
 
-struct hw_interrupt_type irq_type_ia64_lsapic = {
-	.typename =	"LSAPIC",
-	.startup =	lsapic_noop_startup,
-	.shutdown =	lsapic_noop,
-	.enable =	lsapic_noop,
-	.disable =	lsapic_noop,
-	.ack =		lsapic_noop,
-	.end =		lsapic_noop
+static int lsapic_retrigger(struct irq_data *data)
+{
+	ia64_resend_irq(data->irq);
+
+	return 1;
+}
+
+struct irq_chip irq_type_ia64_lsapic = {
+	.name =			"LSAPIC",
+	.irq_startup =		lsapic_noop_startup,
+	.irq_shutdown =		lsapic_noop,
+	.irq_enable =		lsapic_noop,
+	.irq_disable =		lsapic_noop,
+	.irq_ack =		lsapic_noop,
+	.irq_retrigger =	lsapic_retrigger,
 };
diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S
index 829a43cab79..18e794a5724 100644
--- a/arch/ia64/kernel/ivt.S
+++ b/arch/ia64/kernel/ivt.S
@@ -12,6 +12,14 @@
  *
  * 00/08/23 Asit Mallick <asit.k.mallick@intel.com> TLB handling for SMP
  * 00/12/20 David Mosberger-Tang <davidm@hpl.hp.com> DTLB/ITLB handler now uses virtual PT.
+ *
+ * Copyright (C) 2005 Hewlett-Packard Co
+ *	Dan Magenheimer <dan.magenheimer@hp.com>
+ *      Xen paravirtualization
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *                    pv_ops.
+ *      Yaozu (Eddie) Dong <eddie.dong@intel.com>
  */
 /*
  * This file defines the interruption vector table used by the CPU.
@@ -38,22 +46,19 @@
  * Table is based upon EAS2.6 (Oct 1999)
  */
 
-#include <linux/config.h>
 
 #include <asm/asmmacro.h>
 #include <asm/break.h>
-#include <asm/ia32.h>
 #include <asm/kregs.h>
 #include <asm/asm-offsets.h>
 #include <asm/pgtable.h>
 #include <asm/processor.h>
 #include <asm/ptrace.h>
-#include <asm/system.h>
 #include <asm/thread_info.h>
 #include <asm/unistd.h>
 #include <asm/errno.h>
 
-#if 1
+#if 0
 # define PSR_DEFAULT_BITS	psr.ac
 #else
 # define PSR_DEFAULT_BITS	0
@@ -76,7 +81,7 @@
 	mov r19=n;;			/* prepare to save predicates */		\
 	br.sptk.many dispatch_to_fault_handler
 
-	.section .text.ivt,"ax"
+	.section .text..ivt,"ax"
 
 	.align 32768	// align on 32KB boundary
 	.global ia64_ivt
@@ -103,13 +108,13 @@ ENTRY(vhpt_miss)
 	 *	- the faulting virtual address uses unimplemented address bits
 	 *	- the faulting virtual address has no valid page table mapping
 	 */
-	mov r16=cr.ifa				// get address that caused the TLB miss
+	MOV_FROM_IFA(r16)			// get address that caused the TLB miss
 #ifdef CONFIG_HUGETLB_PAGE
 	movl r18=PAGE_SHIFT
-	mov r25=cr.itir
+	MOV_FROM_ITIR(r25)
 #endif
 	;;
-	rsm psr.dt				// use physical addressing for data
+	RSM_PSR_DT				// use physical addressing for data
 	mov r31=pr				// save the predicate registers
 	mov r19=IA64_KR(PT_BASE)		// get page table base address
 	shl r21=r16,3				// shift bit 60 into sign bit
@@ -169,21 +174,21 @@ ENTRY(vhpt_miss)
 	dep r21=r19,r20,3,(PAGE_SHIFT-3)	// r21=pte_offset(pmd,addr)
 	;;
 (p7)	ld8 r18=[r21]				// read *pte
-	mov r19=cr.isr				// cr.isr bit 32 tells us if this is an insn miss
+	MOV_FROM_ISR(r19)			// cr.isr bit 32 tells us if this is an insn miss
 	;;
 (p7)	tbit.z p6,p7=r18,_PAGE_P_BIT		// page present bit cleared?
-	mov r22=cr.iha				// get the VHPT address that caused the TLB miss
+	MOV_FROM_IHA(r22)			// get the VHPT address that caused the TLB miss
 	;;					// avoid RAW on p7
 (p7)	tbit.nz.unc p10,p11=r19,32		// is it an instruction TLB miss?
 	dep r23=0,r20,0,PAGE_SHIFT		// clear low bits to get page address
 	;;
-(p10)	itc.i r18				// insert the instruction TLB entry
-(p11)	itc.d r18				// insert the data TLB entry
+	ITC_I_AND_D(p10, p11, r18, r24)		// insert the instruction TLB entry and
+						// insert the data TLB entry
 (p6)	br.cond.spnt.many page_fault		// handle bad address/page not present (page fault)
-	mov cr.ifa=r22
+	MOV_TO_IFA(r22, r24)
 
 #ifdef CONFIG_HUGETLB_PAGE
-(p8)	mov cr.itir=r25				// change to default page-size for VHPT
+	MOV_TO_ITIR(p8, r25, r24)		// change to default page-size for VHPT
 #endif
 
 	/*
@@ -193,7 +198,7 @@ ENTRY(vhpt_miss)
 	 */
 	adds r24=__DIRTY_BITS_NO_ED|_PAGE_PL_0|_PAGE_AR_RW,r23
 	;;
-(p7)	itc.d r24
+	ITC_D(p7, r24, r25)
 	;;
 #ifdef CONFIG_SMP
 	/*
@@ -235,7 +240,7 @@ ENTRY(vhpt_miss)
 #endif
 
 	mov pr=r31,-1				// restore predicate registers
-	rfi
+	RFI
 END(vhpt_miss)
 
 	.org ia64_ivt+0x400
@@ -249,11 +254,11 @@ ENTRY(itlb_miss)
 	 * mode, walk the page table, and then re-execute the PTE read and
 	 * go on normally after that.
 	 */
-	mov r16=cr.ifa				// get virtual address
+	MOV_FROM_IFA(r16)			// get virtual address
 	mov r29=b0				// save b0
 	mov r31=pr				// save predicates
 .itlb_fault:
-	mov r17=cr.iha				// get virtual address of PTE
+	MOV_FROM_IHA(r17)			// get virtual address of PTE
 	movl r30=1f				// load nested fault continuation point
 	;;
 1:	ld8 r18=[r17]				// read *pte
@@ -262,7 +267,7 @@ ENTRY(itlb_miss)
 	tbit.z p6,p0=r18,_PAGE_P_BIT		// page present bit cleared?
 (p6)	br.cond.spnt page_fault
 	;;
-	itc.i r18
+	ITC_I(p0, r18, r19)
 	;;
 #ifdef CONFIG_SMP
 	/*
@@ -279,7 +284,7 @@ ENTRY(itlb_miss)
 (p7)	ptc.l r16,r20
 #endif
 	mov pr=r31,-1
-	rfi
+	RFI
 END(itlb_miss)
 
 	.org ia64_ivt+0x0800
@@ -293,11 +298,11 @@ ENTRY(dtlb_miss)
 	 * mode, walk the page table, and then re-execute the PTE read and
 	 * go on normally after that.
 	 */
-	mov r16=cr.ifa				// get virtual address
+	MOV_FROM_IFA(r16)			// get virtual address
 	mov r29=b0				// save b0
 	mov r31=pr				// save predicates
 dtlb_fault:
-	mov r17=cr.iha				// get virtual address of PTE
+	MOV_FROM_IHA(r17)			// get virtual address of PTE
 	movl r30=1f				// load nested fault continuation point
 	;;
 1:	ld8 r18=[r17]				// read *pte
@@ -306,7 +311,7 @@ dtlb_fault:
 	tbit.z p6,p0=r18,_PAGE_P_BIT		// page present bit cleared?
 (p6)	br.cond.spnt page_fault
 	;;
-	itc.d r18
+	ITC_D(p0, r18, r19)
 	;;
 #ifdef CONFIG_SMP
 	/*
@@ -323,7 +328,7 @@ dtlb_fault:
 (p7)	ptc.l r16,r20
 #endif
 	mov pr=r31,-1
-	rfi
+	RFI
 END(dtlb_miss)
 
 	.org ia64_ivt+0x0c00
@@ -331,9 +336,9 @@ END(dtlb_miss)
 // 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19)
 ENTRY(alt_itlb_miss)
 	DBG_FAULT(3)
-	mov r16=cr.ifa		// get address that caused the TLB miss
+	MOV_FROM_IFA(r16)	// get address that caused the TLB miss
 	movl r17=PAGE_KERNEL
-	mov r21=cr.ipsr
+	MOV_FROM_IPSR(p0, r21)
 	movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
 	mov r31=pr
 	;;
@@ -342,9 +347,9 @@ ENTRY(alt_itlb_miss)
 	;;
 	cmp.gt p8,p0=6,r22			// user mode
 	;;
-(p8)	thash r17=r16
+	THASH(p8, r17, r16, r23)
 	;;
-(p8)	mov cr.iha=r17
+	MOV_TO_IHA(p8, r17, r23)
 (p8)	mov r29=b0				// save b0
 (p8)	br.cond.dptk .itlb_fault
 #endif
@@ -359,9 +364,9 @@ ENTRY(alt_itlb_miss)
 	or r19=r19,r18		// set bit 4 (uncached) if the access was to region 6
 (p8)	br.cond.spnt page_fault
 	;;
-	itc.i r19		// insert the TLB entry
+	ITC_I(p0, r19, r18)	// insert the TLB entry
 	mov pr=r31,-1
-	rfi
+	RFI
 END(alt_itlb_miss)
 
 	.org ia64_ivt+0x1000
@@ -369,45 +374,54 @@ END(alt_itlb_miss)
 // 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46)
 ENTRY(alt_dtlb_miss)
 	DBG_FAULT(4)
-	mov r16=cr.ifa		// get address that caused the TLB miss
+	MOV_FROM_IFA(r16)	// get address that caused the TLB miss
 	movl r17=PAGE_KERNEL
-	mov r20=cr.isr
+	MOV_FROM_ISR(r20)
 	movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
-	mov r21=cr.ipsr
+	MOV_FROM_IPSR(p0, r21)
 	mov r31=pr
+	mov r24=PERCPU_ADDR
 	;;
 #ifdef CONFIG_DISABLE_VHPT
 	shr.u r22=r16,61			// get the region number into r21
 	;;
 	cmp.gt p8,p0=6,r22			// access to region 0-5
 	;;
-(p8)	thash r17=r16
+	THASH(p8, r17, r16, r25)
 	;;
-(p8)	mov cr.iha=r17
+	MOV_TO_IHA(p8, r17, r25)
 (p8)	mov r29=b0				// save b0
 (p8)	br.cond.dptk dtlb_fault
 #endif
+	cmp.ge p10,p11=r16,r24			// access to per_cpu_data?
+	tbit.z p12,p0=r16,61			// access to region 6?
+	mov r25=PERCPU_PAGE_SHIFT << 2
+	mov r26=PERCPU_PAGE_SIZE
+	nop.m 0
+	nop.b 0
+	;;
+(p10)	mov r19=IA64_KR(PER_CPU_DATA)
+(p11)	and r19=r19,r16				// clear non-ppn fields
 	extr.u r23=r21,IA64_PSR_CPL0_BIT,2	// extract psr.cpl
 	and r22=IA64_ISR_CODE_MASK,r20		// get the isr.code field
 	tbit.nz p6,p7=r20,IA64_ISR_SP_BIT	// is speculation bit on?
-	shr.u r18=r16,57			// move address bit 61 to bit 4
-	and r19=r19,r16				// clear ed, reserved bits, and PTE control bits
 	tbit.nz p9,p0=r20,IA64_ISR_NA_BIT	// is non-access bit on?
 	;;
-	andcm r18=0x10,r18	// bit 4=~address-bit(61)
+(p10)	sub r19=r19,r26
+	MOV_TO_ITIR(p10, r25, r24)
 	cmp.ne p8,p0=r0,r23
 (p9)	cmp.eq.or.andcm p6,p7=IA64_ISR_CODE_LFETCH,r22	// check isr.code field
+(p12)	dep r17=-1,r17,4,1			// set ma=UC for region 6 addr
 (p8)	br.cond.spnt page_fault
 
 	dep r21=-1,r21,IA64_PSR_ED_BIT,1
-	or r19=r19,r17		// insert PTE control bits into r19
 	;;
-	or r19=r19,r18		// set bit 4 (uncached) if the access was to region 6
-(p6)	mov cr.ipsr=r21
+	or r19=r19,r17		// insert PTE control bits into r19
+	MOV_TO_IPSR(p6, r21, r24)
 	;;
-(p7)	itc.d r19		// insert the TLB entry
+	ITC_D(p7, r19, r18)	// insert the TLB entry
 	mov pr=r31,-1
-	rfi
+	RFI
 END(alt_dtlb_miss)
 
 	.org ia64_ivt+0x1400
@@ -436,10 +450,10 @@ ENTRY(nested_dtlb_miss)
 	 *
 	 * Clobbered:	b0, r18, r19, r21, r22, psr.dt (cleared)
 	 */
-	rsm psr.dt				// switch to using physical data addressing
+	RSM_PSR_DT				// switch to using physical data addressing
 	mov r19=IA64_KR(PT_BASE)		// get the page table base address
 	shl r21=r16,3				// shift bit 60 into sign bit
-	mov r18=cr.itir
+	MOV_FROM_ITIR(r18)
 	;;
 	shr.u r17=r16,61			// get the region number into r17
 	extr.u r18=r18,2,6			// get the faulting page size
@@ -499,33 +513,6 @@ ENTRY(ikey_miss)
 	FAULT(6)
 END(ikey_miss)
 
-	//-----------------------------------------------------------------------------------
-	// call do_page_fault (predicates are in r31, psr.dt may be off, r16 is faulting address)
-ENTRY(page_fault)
-	ssm psr.dt
-	;;
-	srlz.i
-	;;
-	SAVE_MIN_WITH_COVER
-	alloc r15=ar.pfs,0,0,3,0
-	mov out0=cr.ifa
-	mov out1=cr.isr
-	adds r3=8,r2				// set up second base pointer
-	;;
-	ssm psr.ic | PSR_DEFAULT_BITS
-	;;
-	srlz.i					// guarantee that interruption collectin is on
-	;;
-(p15)	ssm psr.i				// restore psr.i
-	movl r14=ia64_leave_kernel
-	;;
-	SAVE_REST
-	mov rp=r14
-	;;
-	adds out2=16,r12			// out2 = pointer to pt_regs
-	br.call.sptk.many b6=ia64_do_page_fault	// ignore return address
-END(page_fault)
-
 	.org ia64_ivt+0x1c00
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
@@ -548,10 +535,10 @@ ENTRY(dirty_bit)
 	 * page table TLB entry isn't present, we take a nested TLB miss hit where we look
 	 * up the physical address of the L3 PTE and then continue at label 1 below.
 	 */
-	mov r16=cr.ifa				// get the address that caused the fault
+	MOV_FROM_IFA(r16)			// get the address that caused the fault
 	movl r30=1f				// load continuation point in case of nested fault
 	;;
-	thash r17=r16				// compute virtual address of L3 PTE
+	THASH(p0, r17, r16, r18)		// compute virtual address of L3 PTE
 	mov r29=b0				// save b0 in case of nested fault
 	mov r31=pr				// save pr
 #ifdef CONFIG_SMP
@@ -568,7 +555,7 @@ ENTRY(dirty_bit)
 	;;
 (p6)	cmp.eq p6,p7=r26,r18			// Only compare if page is present
 	;;
-(p6)	itc.d r25				// install updated PTE
+	ITC_D(p6, r25, r18)			// install updated PTE
 	;;
 	/*
 	 * Tell the assemblers dependency-violation checker that the above "itc" instructions
@@ -591,10 +578,10 @@ ENTRY(dirty_bit)
 	mov b0=r29				// restore b0
 	;;
 	st8 [r17]=r18				// store back updated PTE
-	itc.d r18				// install updated PTE
+	ITC_D(p0, r18, r16)			// install updated PTE
 #endif
 	mov pr=r31,-1				// restore pr
-	rfi
+	RFI
 END(dirty_bit)
 
 	.org ia64_ivt+0x2400
@@ -603,22 +590,22 @@ END(dirty_bit)
 ENTRY(iaccess_bit)
 	DBG_FAULT(9)
 	// Like Entry 8, except for instruction access
-	mov r16=cr.ifa				// get the address that caused the fault
+	MOV_FROM_IFA(r16)			// get the address that caused the fault
 	movl r30=1f				// load continuation point in case of nested fault
 	mov r31=pr				// save predicates
 #ifdef CONFIG_ITANIUM
 	/*
 	 * Erratum 10 (IFA may contain incorrect address) has "NoFix" status.
 	 */
-	mov r17=cr.ipsr
+	MOV_FROM_IPSR(p0, r17)
 	;;
-	mov r18=cr.iip
+	MOV_FROM_IIP(r18)
 	tbit.z p6,p0=r17,IA64_PSR_IS_BIT	// IA64 instruction set?
 	;;
 (p6)	mov r16=r18				// if so, use cr.iip instead of cr.ifa
 #endif /* CONFIG_ITANIUM */
 	;;
-	thash r17=r16				// compute virtual address of L3 PTE
+	THASH(p0, r17, r16, r18)		// compute virtual address of L3 PTE
 	mov r29=b0				// save b0 in case of nested fault)
 #ifdef CONFIG_SMP
 	mov r28=ar.ccv				// save ar.ccv
@@ -634,7 +621,7 @@ ENTRY(iaccess_bit)
 	;;
 (p6)	cmp.eq p6,p7=r26,r18			// Only if page present
 	;;
-(p6)	itc.i r25				// install updated PTE
+	ITC_I(p6, r25, r26)			// install updated PTE
 	;;
 	/*
 	 * Tell the assemblers dependency-violation checker that the above "itc" instructions
@@ -657,10 +644,10 @@ ENTRY(iaccess_bit)
 	mov b0=r29				// restore b0
 	;;
 	st8 [r17]=r18				// store back updated PTE
-	itc.i r18				// install updated PTE
+	ITC_I(p0, r18, r16)			// install updated PTE
 #endif /* !CONFIG_SMP */
 	mov pr=r31,-1
-	rfi
+	RFI
 END(iaccess_bit)
 
 	.org ia64_ivt+0x2800
@@ -669,10 +656,10 @@ END(iaccess_bit)
 ENTRY(daccess_bit)
 	DBG_FAULT(10)
 	// Like Entry 8, except for data access
-	mov r16=cr.ifa				// get the address that caused the fault
+	MOV_FROM_IFA(r16)			// get the address that caused the fault
 	movl r30=1f				// load continuation point in case of nested fault
 	;;
-	thash r17=r16				// compute virtual address of L3 PTE
+	THASH(p0, r17, r16, r18)		// compute virtual address of L3 PTE
 	mov r31=pr
 	mov r29=b0				// save b0 in case of nested fault)
 #ifdef CONFIG_SMP
@@ -689,7 +676,7 @@ ENTRY(daccess_bit)
 	;;
 (p6)	cmp.eq p6,p7=r26,r18			// Only if page is present
 	;;
-(p6)	itc.d r25				// install updated PTE
+	ITC_D(p6, r25, r26)			// install updated PTE
 	/*
 	 * Tell the assemblers dependency-violation checker that the above "itc" instructions
 	 * cannot possibly affect the following loads:
@@ -709,11 +696,11 @@ ENTRY(daccess_bit)
 	or r18=_PAGE_A,r18			// set the accessed bit
 	;;
 	st8 [r17]=r18				// store back updated PTE
-	itc.d r18				// install updated PTE
+	ITC_D(p0, r18, r16)			// install updated PTE
 #endif
 	mov b0=r29				// restore b0
 	mov pr=r31,-1
-	rfi
+	RFI
 END(daccess_bit)
 
 	.org ia64_ivt+0x2c00
@@ -737,10 +724,10 @@ ENTRY(break_fault)
 	 */
 	DBG_FAULT(11)
 	mov.m r16=IA64_KR(CURRENT)		// M2 r16 <- current task (12 cyc)
-	mov r29=cr.ipsr				// M2 (12 cyc)
+	MOV_FROM_IPSR(p0, r29)			// M2 (12 cyc)
 	mov r31=pr				// I0 (2 cyc)
 
-	mov r17=cr.iim				// M2 (2 cyc)
+	MOV_FROM_IIM(r17)			// M2 (2 cyc)
 	mov.m r27=ar.rsc			// M2 (12 cyc)
 	mov r18=__IA64_BREAK_SYSCALL		// A
 
@@ -759,7 +746,7 @@ ENTRY(break_fault)
 	nop.m 0
 	movl r30=sys_call_table			// X
 
-	mov r28=cr.iip				// M2 (2 cyc)
+	MOV_FROM_IIP(r28)			// M2 (2 cyc)
 	cmp.eq p0,p7=r18,r17			// I0 is this a system call?
 (p7)	br.cond.spnt non_syscall		// B  no ->
 	//
@@ -797,8 +784,13 @@ ENTRY(break_fault)
 
 (p8)	adds r28=16,r28				// A    switch cr.iip to next bundle
 (p9)	adds r8=1,r8				// A    increment ei to next slot
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+	;;
+	mov b6=r30				// I0   setup syscall handler branch reg early
+#else
 	nop.i 0
 	;;
+#endif
 
 	mov.m r25=ar.unat			// M2 (5 cyc)
 	dep r29=r8,r29,41,2			// I0   insert new ei into cr.ipsr
@@ -809,7 +801,11 @@ ENTRY(break_fault)
 	//
 ///////////////////////////////////////////////////////////////////////
 	st1 [r16]=r0				// M2|3 clear current->thread.on_ustack flag
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+	MOV_FROM_ITC(p0, p14, r30, r18)		// M    get cycle for accounting
+#else
 	mov b6=r30				// I0   setup syscall handler branch reg early
+#endif
 	cmp.eq pKStk,pUStk=r0,r17		// A    were we on kernel stacks already?
 
 	and r9=_TIF_SYSCALL_TRACEAUDIT,r9	// A    mask trace or audit
@@ -821,20 +817,43 @@ ENTRY(break_fault)
 	cmp.eq p14,p0=r9,r0			// A    are syscalls being traced/audited?
 	br.call.sptk.many b7=ia64_syscall_setup	// B
 1:
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+	// mov.m r30=ar.itc is called in advance, and r13 is current
+	add r16=TI_AC_STAMP+IA64_TASK_SIZE,r13	// A
+	add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r13	// A
+(pKStk)	br.cond.spnt .skip_accounting		// B	unlikely skip
+	;;
+	ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP	// M  get last stamp
+	ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE	// M  time at leave
+	;;
+	ld8 r20=[r16],TI_AC_STAMP-TI_AC_STIME	// M  cumulated stime
+	ld8 r21=[r17]				// M  cumulated utime
+	sub r22=r19,r18				// A  stime before leave
+	;;
+	st8 [r16]=r30,TI_AC_STIME-TI_AC_STAMP	// M  update stamp
+	sub r18=r30,r19				// A  elapsed time in user
+	;;
+	add r20=r20,r22				// A  sum stime
+	add r21=r21,r18				// A  sum utime
+	;;
+	st8 [r16]=r20				// M  update stime
+	st8 [r17]=r21				// M  update utime
+	;;
+.skip_accounting:
+#endif
 	mov ar.rsc=0x3				// M2   set eager mode, pl 0, LE, loadrs=0
 	nop 0
-	bsw.1					// B (6 cyc) regs are saved, switch to bank 1
+	BSW_1(r2, r14)				// B (6 cyc) regs are saved, switch to bank 1
 	;;
 
-	ssm psr.ic | PSR_DEFAULT_BITS		// M2	now it's safe to re-enable intr.-collection
+	SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, r16)	// M2	now it's safe to re-enable intr.-collection
+						// M0   ensure interruption collection is on
 	movl r3=ia64_ret_from_syscall		// X
 	;;
-
-	srlz.i					// M0   ensure interruption collection is on
 	mov rp=r3				// I0   set the real return addr
 (p10)	br.cond.spnt.many ia64_ret_from_syscall	// B    return if bad call-frame or r15 is a NaT
 
-(p15)	ssm psr.i				// M2   restore psr.i
+	SSM_PSR_I(p15, p15, r16)		// M2   restore psr.i
 (p14)	br.call.sptk.many b6=b6			// B    invoke syscall-handker (ignore return addr)
 	br.cond.spnt.many ia64_trace_syscall	// B	do syscall-tracing thingamagic
 	// NOT REACHED
@@ -854,27 +873,8 @@ END(break_fault)
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x3000 Entry 12 (size 64 bundles) External Interrupt (4)
 ENTRY(interrupt)
-	DBG_FAULT(12)
-	mov r31=pr		// prepare to save predicates
-	;;
-	SAVE_MIN_WITH_COVER	// uses r31; defines r2 and r3
-	ssm psr.ic | PSR_DEFAULT_BITS
-	;;
-	adds r3=8,r2		// set up second base pointer for SAVE_REST
-	srlz.i			// ensure everybody knows psr.ic is back on
-	;;
-	SAVE_REST
-	;;
-	MCA_RECOVER_RANGE(interrupt)
-	alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group
-	mov out0=cr.ivr		// pass cr.ivr as first arg
-	add out1=16,sp		// pass pointer to pt_regs as second arg
-	;;
-	srlz.d			// make sure we see the effect of cr.ivr
-	movl r14=ia64_leave_kernel
-	;;
-	mov rp=r14
-	br.call.sptk.many b6=ia64_handle_irq
+	/* interrupt handler has become too big to fit this area. */
+	br.sptk.many __interrupt
 END(interrupt)
 
 	.org ia64_ivt+0x3400
@@ -920,6 +920,7 @@ END(interrupt)
 	 *	- r27: saved ar.rsc
 	 *	- r28: saved cr.iip
 	 *	- r29: saved cr.ipsr
+	 *	- r30: ar.itc for accounting (don't touch)
 	 *	- r31: saved pr
 	 *	-  b0: original contents (to be saved)
 	 * On exit:
@@ -936,6 +937,7 @@ END(interrupt)
 	 *	- ar.fpsr: set to kernel settings
 	 *	-  b6: preserved (same as on entry)
 	 */
+#ifdef __IA64_ASM_PARAVIRTUALIZED_NATIVE
 GLOBAL_ENTRY(ia64_syscall_setup)
 #if PT(B6) != 0
 # error This code assumes that b6 is the first field in pt_regs.
@@ -1027,6 +1029,7 @@ GLOBAL_ENTRY(ia64_syscall_setup)
 (p10)	mov r8=-EINVAL
 	br.ret.sptk.many b7
 END(ia64_syscall_setup)
+#endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */
 
 	.org ia64_ivt+0x3c00
 /////////////////////////////////////////////////////////////////////////////////////////
@@ -1034,53 +1037,46 @@ END(ia64_syscall_setup)
 	DBG_FAULT(15)
 	FAULT(15)
 
+	.org ia64_ivt+0x4000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x4000 Entry 16 (size 64 bundles) Reserved
+	DBG_FAULT(16)
+	FAULT(16)
+
+#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(__IA64_ASM_PARAVIRTUALIZED_NATIVE)
 	/*
-	 * Squatting in this space ...
+	 * There is no particular reason for this code to be here, other than
+	 * that there happens to be space here that would go unused otherwise.
+	 * If this fault ever gets "unreserved", simply moved the following
+	 * code to a more suitable spot...
 	 *
-	 * This special case dispatcher for illegal operation faults allows preserved
-	 * registers to be modified through a callback function (asm only) that is handed
-	 * back from the fault handler in r8. Up to three arguments can be passed to the
-	 * callback function by returning an aggregate with the callback as its first
-	 * element, followed by the arguments.
+	 * account_sys_enter is called from SAVE_MIN* macros if accounting is
+	 * enabled and if the macro is entered from user mode.
 	 */
-ENTRY(dispatch_illegal_op_fault)
-	.prologue
-	.body
-	SAVE_MIN_WITH_COVER
-	ssm psr.ic | PSR_DEFAULT_BITS
+GLOBAL_ENTRY(account_sys_enter)
+	// mov.m r20=ar.itc is called in advance, and r13 is current
+	add r16=TI_AC_STAMP+IA64_TASK_SIZE,r13
+	add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r13
 	;;
-	srlz.i		// guarantee that interruption collection is on
+	ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP	// time at last check in kernel
+	ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE	// time at left from kernel
+        ;;
+	ld8 r23=[r16],TI_AC_STAMP-TI_AC_STIME	// cumulated stime
+	ld8 r21=[r17]				// cumulated utime
+	sub r22=r19,r18				// stime before leave kernel
 	;;
-(p15)	ssm psr.i	// restore psr.i
-	adds r3=8,r2	// set up second base pointer for SAVE_REST
+	st8 [r16]=r20,TI_AC_STIME-TI_AC_STAMP	// update stamp
+	sub r18=r20,r19				// elapsed time in user mode
 	;;
-	alloc r14=ar.pfs,0,0,1,0	// must be first in insn group
-	mov out0=ar.ec
+	add r23=r23,r22				// sum stime
+	add r21=r21,r18				// sum utime
 	;;
-	SAVE_REST
-	PT_REGS_UNWIND_INFO(0)
+	st8 [r16]=r23				// update stime
+	st8 [r17]=r21				// update utime
 	;;
-	br.call.sptk.many rp=ia64_illegal_op_fault
-.ret0:	;;
-	alloc r14=ar.pfs,0,0,3,0	// must be first in insn group
-	mov out0=r9
-	mov out1=r10
-	mov out2=r11
-	movl r15=ia64_leave_kernel
-	;;
-	mov rp=r15
-	mov b6=r8
-	;;
-	cmp.ne p6,p0=0,r8
-(p6)	br.call.dpnt.many b6=b6		// call returns to ia64_leave_kernel
-	br.sptk.many ia64_leave_kernel
-END(dispatch_illegal_op_fault)
-
-	.org ia64_ivt+0x4000
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x4000 Entry 16 (size 64 bundles) Reserved
-	DBG_FAULT(16)
-	FAULT(16)
+	br.ret.sptk.many rp
+END(account_sys_enter)
+#endif
 
 	.org ia64_ivt+0x4400
 /////////////////////////////////////////////////////////////////////////////////////////
@@ -1088,110 +1084,18 @@ END(dispatch_illegal_op_fault)
 	DBG_FAULT(17)
 	FAULT(17)
 
-ENTRY(non_syscall)
-	mov ar.rsc=r27			// restore ar.rsc before SAVE_MIN_WITH_COVER
-	;;
-	SAVE_MIN_WITH_COVER
-
-	// There is no particular reason for this code to be here, other than that
-	// there happens to be space here that would go unused otherwise.  If this
-	// fault ever gets "unreserved", simply moved the following code to a more
-	// suitable spot...
-
-	alloc r14=ar.pfs,0,0,2,0
-	mov out0=cr.iim
-	add out1=16,sp
-	adds r3=8,r2			// set up second base pointer for SAVE_REST
-
-	ssm psr.ic | PSR_DEFAULT_BITS
-	;;
-	srlz.i				// guarantee that interruption collection is on
-	;;
-(p15)	ssm psr.i			// restore psr.i
-	movl r15=ia64_leave_kernel
-	;;
-	SAVE_REST
-	mov rp=r15
-	;;
-	br.call.sptk.many b6=ia64_bad_break	// avoid WAW on CFM and ignore return addr
-END(non_syscall)
-
 	.org ia64_ivt+0x4800
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x4800 Entry 18 (size 64 bundles) Reserved
 	DBG_FAULT(18)
 	FAULT(18)
 
-	/*
-	 * There is no particular reason for this code to be here, other than that
-	 * there happens to be space here that would go unused otherwise.  If this
-	 * fault ever gets "unreserved", simply moved the following code to a more
-	 * suitable spot...
-	 */
-
-ENTRY(dispatch_unaligned_handler)
-	SAVE_MIN_WITH_COVER
-	;;
-	alloc r14=ar.pfs,0,0,2,0		// now it's safe (must be first in insn group!)
-	mov out0=cr.ifa
-	adds out1=16,sp
-
-	ssm psr.ic | PSR_DEFAULT_BITS
-	;;
-	srlz.i					// guarantee that interruption collection is on
-	;;
-(p15)	ssm psr.i				// restore psr.i
-	adds r3=8,r2				// set up second base pointer
-	;;
-	SAVE_REST
-	movl r14=ia64_leave_kernel
-	;;
-	mov rp=r14
-	br.sptk.many ia64_prepare_handle_unaligned
-END(dispatch_unaligned_handler)
-
 	.org ia64_ivt+0x4c00
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x4c00 Entry 19 (size 64 bundles) Reserved
 	DBG_FAULT(19)
 	FAULT(19)
 
-	/*
-	 * There is no particular reason for this code to be here, other than that
-	 * there happens to be space here that would go unused otherwise.  If this
-	 * fault ever gets "unreserved", simply moved the following code to a more
-	 * suitable spot...
-	 */
-
-ENTRY(dispatch_to_fault_handler)
-	/*
-	 * Input:
-	 *	psr.ic:	off
-	 *	r19:	fault vector number (e.g., 24 for General Exception)
-	 *	r31:	contains saved predicates (pr)
-	 */
-	SAVE_MIN_WITH_COVER_R19
-	alloc r14=ar.pfs,0,0,5,0
-	mov out0=r15
-	mov out1=cr.isr
-	mov out2=cr.ifa
-	mov out3=cr.iim
-	mov out4=cr.itir
-	;;
-	ssm psr.ic | PSR_DEFAULT_BITS
-	;;
-	srlz.i					// guarantee that interruption collection is on
-	;;
-(p15)	ssm psr.i				// restore psr.i
-	adds r3=8,r2				// set up second base pointer for SAVE_REST
-	;;
-	SAVE_REST
-	movl r14=ia64_leave_kernel
-	;;
-	mov rp=r14
-	br.call.sptk.many b6=ia64_fault
-END(dispatch_to_fault_handler)
-
 //
 // --- End of long entries, Beginning of short entries
 //
@@ -1201,8 +1105,8 @@ END(dispatch_to_fault_handler)
 // 0x5000 Entry 20 (size 16 bundles) Page Not Present (10,22,49)
 ENTRY(page_not_present)
 	DBG_FAULT(20)
-	mov r16=cr.ifa
-	rsm psr.dt
+	MOV_FROM_IFA(r16)
+	RSM_PSR_DT
 	/*
 	 * The Linux page fault handler doesn't expect non-present pages to be in
 	 * the TLB.  Flush the existing entry now, so we meet that expectation.
@@ -1221,8 +1125,8 @@ END(page_not_present)
 // 0x5100 Entry 21 (size 16 bundles) Key Permission (13,25,52)
 ENTRY(key_permission)
 	DBG_FAULT(21)
-	mov r16=cr.ifa
-	rsm psr.dt
+	MOV_FROM_IFA(r16)
+	RSM_PSR_DT
 	mov r31=pr
 	;;
 	srlz.d
@@ -1234,8 +1138,8 @@ END(key_permission)
 // 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26)
 ENTRY(iaccess_rights)
 	DBG_FAULT(22)
-	mov r16=cr.ifa
-	rsm psr.dt
+	MOV_FROM_IFA(r16)
+	RSM_PSR_DT
 	mov r31=pr
 	;;
 	srlz.d
@@ -1247,8 +1151,8 @@ END(iaccess_rights)
 // 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53)
 ENTRY(daccess_rights)
 	DBG_FAULT(23)
-	mov r16=cr.ifa
-	rsm psr.dt
+	MOV_FROM_IFA(r16)
+	RSM_PSR_DT
 	mov r31=pr
 	;;
 	srlz.d
@@ -1260,7 +1164,7 @@ END(daccess_rights)
 // 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39)
 ENTRY(general_exception)
 	DBG_FAULT(24)
-	mov r16=cr.isr
+	MOV_FROM_ISR(r16)
 	mov r31=pr
 	;;
 	cmp4.eq p6,p0=0,r16
@@ -1289,8 +1193,8 @@ END(disabled_fp_reg)
 ENTRY(nat_consumption)
 	DBG_FAULT(26)
 
-	mov r16=cr.ipsr
-	mov r17=cr.isr
+	MOV_FROM_IPSR(p0, r16)
+	MOV_FROM_ISR(r17)
 	mov r31=pr				// save PR
 	;;
 	and r18=0xf,r17				// r18 = cr.ipsr.code{3:0}
@@ -1300,10 +1204,10 @@ ENTRY(nat_consumption)
 	dep r16=-1,r16,IA64_PSR_ED_BIT,1
 (p6)	br.cond.spnt 1f		// branch if (cr.ispr.na == 0 || cr.ipsr.code{3:0} != LFETCH)
 	;;
-	mov cr.ipsr=r16		// set cr.ipsr.na
+	MOV_TO_IPSR(p0, r16, r18)
 	mov pr=r31,-1
 	;;
-	rfi
+	RFI
 
 1:	mov pr=r31,-1
 	;;
@@ -1325,26 +1229,26 @@ ENTRY(speculation_vector)
 	 *
 	 * cr.imm contains zero_ext(imm21)
 	 */
-	mov r18=cr.iim
+	MOV_FROM_IIM(r18)
 	;;
-	mov r17=cr.iip
+	MOV_FROM_IIP(r17)
 	shl r18=r18,43			// put sign bit in position (43=64-21)
 	;;
 
-	mov r16=cr.ipsr
+	MOV_FROM_IPSR(p0, r16)
 	shr r18=r18,39			// sign extend (39=43-4)
 	;;
 
 	add r17=r17,r18			// now add the offset
 	;;
-	mov cr.iip=r17
+	MOV_TO_IIP(r17, r19)
 	dep r16=0,r16,41,2		// clear EI
 	;;
 
-	mov cr.ipsr=r16
+	MOV_TO_IPSR(p0, r16, r19)
 	;;
 
-	rfi				// and go back
+	RFI
 END(speculation_vector)
 
 	.org ia64_ivt+0x5800
@@ -1480,28 +1384,6 @@ END(ia32_exception)
 // 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept  (30,31,59,70,71)
 ENTRY(ia32_intercept)
 	DBG_FAULT(46)
-#ifdef	CONFIG_IA32_SUPPORT
-	mov r31=pr
-	mov r16=cr.isr
-	;;
-	extr.u r17=r16,16,8	// get ISR.code
-	mov r18=ar.eflag
-	mov r19=cr.iim		// old eflag value
-	;;
-	cmp.ne p6,p0=2,r17
-(p6)	br.cond.spnt 1f		// not a system flag fault
-	xor r16=r18,r19
-	;;
-	extr.u r17=r16,18,1	// get the eflags.ac bit
-	;;
-	cmp.eq p6,p0=0,r17
-(p6)	br.cond.spnt 1f		// eflags.ac bit didn't change
-	;;
-	mov pr=r31,-1		// restore predicate registers
-	rfi
-
-1:
-#endif	// CONFIG_IA32_SUPPORT
 	FAULT(46)
 END(ia32_intercept)
 
@@ -1510,12 +1392,7 @@ END(ia32_intercept)
 // 0x6b00 Entry 47 (size 16 bundles) IA-32 Interrupt  (74)
 ENTRY(ia32_interrupt)
 	DBG_FAULT(47)
-#ifdef CONFIG_IA32_SUPPORT
-	mov r31=pr
-	br.sptk.many dispatch_to_ia32_handler
-#else
 	FAULT(47)
-#endif
 END(ia32_interrupt)
 
 	.org ia64_ivt+0x6c00
@@ -1638,89 +1515,174 @@ END(ia32_interrupt)
 	DBG_FAULT(67)
 	FAULT(67)
 
-#ifdef CONFIG_IA32_SUPPORT
+	//-----------------------------------------------------------------------------------
+	// call do_page_fault (predicates are in r31, psr.dt may be off, r16 is faulting address)
+ENTRY(page_fault)
+	SSM_PSR_DT_AND_SRLZ_I
+	;;
+	SAVE_MIN_WITH_COVER
+	alloc r15=ar.pfs,0,0,3,0
+	MOV_FROM_IFA(out0)
+	MOV_FROM_ISR(out1)
+	SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r14, r3)
+	adds r3=8,r2				// set up second base pointer
+	SSM_PSR_I(p15, p15, r14)		// restore psr.i
+	movl r14=ia64_leave_kernel
+	;;
+	SAVE_REST
+	mov rp=r14
+	;;
+	adds out2=16,r12			// out2 = pointer to pt_regs
+	br.call.sptk.many b6=ia64_do_page_fault	// ignore return address
+END(page_fault)
 
-	/*
-	 * There is no particular reason for this code to be here, other than that
-	 * there happens to be space here that would go unused otherwise.  If this
-	 * fault ever gets "unreserved", simply moved the following code to a more
-	 * suitable spot...
-	 */
+ENTRY(non_syscall)
+	mov ar.rsc=r27			// restore ar.rsc before SAVE_MIN_WITH_COVER
+	;;
+	SAVE_MIN_WITH_COVER
+
+	// There is no particular reason for this code to be here, other than that
+	// there happens to be space here that would go unused otherwise.  If this
+	// fault ever gets "unreserved", simply moved the following code to a more
+	// suitable spot...
 
-	// IA32 interrupt entry point
+	alloc r14=ar.pfs,0,0,2,0
+	MOV_FROM_IIM(out0)
+	add out1=16,sp
+	adds r3=8,r2			// set up second base pointer for SAVE_REST
 
-ENTRY(dispatch_to_ia32_handler)
-	SAVE_MIN
+	SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r15, r24)
+					// guarantee that interruption collection is on
+	SSM_PSR_I(p15, p15, r15)	// restore psr.i
+	movl r15=ia64_leave_kernel
 	;;
-	mov r14=cr.isr
-	ssm psr.ic | PSR_DEFAULT_BITS
+	SAVE_REST
+	mov rp=r15
 	;;
-	srlz.i					// guarantee that interruption collection is on
+	br.call.sptk.many b6=ia64_bad_break	// avoid WAW on CFM and ignore return addr
+END(non_syscall)
+
+ENTRY(__interrupt)
+	DBG_FAULT(12)
+	mov r31=pr		// prepare to save predicates
 	;;
-(p15)	ssm psr.i
-	adds r3=8,r2		// Base pointer for SAVE_REST
+	SAVE_MIN_WITH_COVER	// uses r31; defines r2 and r3
+	SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, r14)
+				// ensure everybody knows psr.ic is back on
+	adds r3=8,r2		// set up second base pointer for SAVE_REST
 	;;
 	SAVE_REST
 	;;
-	mov r15=0x80
-	shr r14=r14,16		// Get interrupt number
-	;;
-	cmp.ne p6,p0=r14,r15
-(p6)	br.call.dpnt.many b6=non_ia32_syscall
-
-	adds r14=IA64_PT_REGS_R8_OFFSET + 16,sp	// 16 byte hole per SW conventions
-	adds r15=IA64_PT_REGS_R1_OFFSET + 16,sp
+	MCA_RECOVER_RANGE(interrupt)
+	alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group
+	MOV_FROM_IVR(out0, r8)	// pass cr.ivr as first arg
+	add out1=16,sp		// pass pointer to pt_regs as second arg
 	;;
-	cmp.eq pSys,pNonSys=r0,r0 // set pSys=1, pNonSys=0
-	ld8 r8=[r14]		// get r8
+	srlz.d			// make sure we see the effect of cr.ivr
+	movl r14=ia64_leave_kernel
 	;;
-	st8 [r15]=r8		// save original EAX in r1 (IA32 procs don't use the GP)
+	mov rp=r14
+	br.call.sptk.many b6=ia64_handle_irq
+END(__interrupt)
+
+	/*
+	 * There is no particular reason for this code to be here, other than that
+	 * there happens to be space here that would go unused otherwise.  If this
+	 * fault ever gets "unreserved", simply moved the following code to a more
+	 * suitable spot...
+	 */
+
+ENTRY(dispatch_unaligned_handler)
+	SAVE_MIN_WITH_COVER
 	;;
-	alloc r15=ar.pfs,0,0,6,0	// must first in an insn group
+	alloc r14=ar.pfs,0,0,2,0		// now it's safe (must be first in insn group!)
+	MOV_FROM_IFA(out0)
+	adds out1=16,sp
+
+	SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, r24)
+						// guarantee that interruption collection is on
+	SSM_PSR_I(p15, p15, r3)			// restore psr.i
+	adds r3=8,r2				// set up second base pointer
 	;;
-	ld4 r8=[r14],8		// r8 == eax (syscall number)
-	mov r15=IA32_NR_syscalls
+	SAVE_REST
+	movl r14=ia64_leave_kernel
 	;;
-	cmp.ltu.unc p6,p7=r8,r15
-	ld4 out1=[r14],8	// r9 == ecx
+	mov rp=r14
+	br.sptk.many ia64_prepare_handle_unaligned
+END(dispatch_unaligned_handler)
+
+	/*
+	 * There is no particular reason for this code to be here, other than that
+	 * there happens to be space here that would go unused otherwise.  If this
+	 * fault ever gets "unreserved", simply moved the following code to a more
+	 * suitable spot...
+	 */
+
+ENTRY(dispatch_to_fault_handler)
+	/*
+	 * Input:
+	 *	psr.ic:	off
+	 *	r19:	fault vector number (e.g., 24 for General Exception)
+	 *	r31:	contains saved predicates (pr)
+	 */
+	SAVE_MIN_WITH_COVER_R19
+	alloc r14=ar.pfs,0,0,5,0
+	MOV_FROM_ISR(out1)
+	MOV_FROM_IFA(out2)
+	MOV_FROM_IIM(out3)
+	MOV_FROM_ITIR(out4)
 	;;
-	ld4 out2=[r14],8	// r10 == edx
+	SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, out0)
+						// guarantee that interruption collection is on
+	mov out0=r15
 	;;
-	ld4 out0=[r14]		// r11 == ebx
-	adds r14=(IA64_PT_REGS_R13_OFFSET) + 16,sp
+	SSM_PSR_I(p15, p15, r3)			// restore psr.i
+	adds r3=8,r2				// set up second base pointer for SAVE_REST
 	;;
-	ld4 out5=[r14],PT(R14)-PT(R13)	// r13 == ebp
+	SAVE_REST
+	movl r14=ia64_leave_kernel
 	;;
-	ld4 out3=[r14],PT(R15)-PT(R14)	// r14 == esi
-	adds r2=TI_FLAGS+IA64_TASK_SIZE,r13
+	mov rp=r14
+	br.call.sptk.many b6=ia64_fault
+END(dispatch_to_fault_handler)
+
+	/*
+	 * Squatting in this space ...
+	 *
+	 * This special case dispatcher for illegal operation faults allows preserved
+	 * registers to be modified through a callback function (asm only) that is handed
+	 * back from the fault handler in r8. Up to three arguments can be passed to the
+	 * callback function by returning an aggregate with the callback as its first
+	 * element, followed by the arguments.
+	 */
+ENTRY(dispatch_illegal_op_fault)
+	.prologue
+	.body
+	SAVE_MIN_WITH_COVER
+	SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, r24)
+				// guarantee that interruption collection is on
 	;;
-	ld4 out4=[r14]		// r15 == edi
-	movl r16=ia32_syscall_table
+	SSM_PSR_I(p15, p15, r3)	// restore psr.i
+	adds r3=8,r2	// set up second base pointer for SAVE_REST
 	;;
-(p6)	shladd r16=r8,3,r16	// force ni_syscall if not valid syscall number
-	ld4 r2=[r2]		// r2 = current_thread_info()->flags
+	alloc r14=ar.pfs,0,0,1,0	// must be first in insn group
+	mov out0=ar.ec
 	;;
-	ld8 r16=[r16]
-	and r2=_TIF_SYSCALL_TRACEAUDIT,r2	// mask trace or audit
+	SAVE_REST
+	PT_REGS_UNWIND_INFO(0)
 	;;
-	mov b6=r16
-	movl r15=ia32_ret_from_syscall
-	cmp.eq p8,p0=r2,r0
+	br.call.sptk.many rp=ia64_illegal_op_fault
+.ret0:	;;
+	alloc r14=ar.pfs,0,0,3,0	// must be first in insn group
+	mov out0=r9
+	mov out1=r10
+	mov out2=r11
+	movl r15=ia64_leave_kernel
 	;;
 	mov rp=r15
-(p8)	br.call.sptk.many b6=b6
-	br.cond.sptk ia32_trace_syscall
-
-non_ia32_syscall:
-	alloc r15=ar.pfs,0,0,2,0
-	mov out0=r14				// interrupt #
-	add out1=16,sp				// pointer to pt_regs
-	;;			// avoid WAW on CFM
-	br.call.sptk.many rp=ia32_bad_interrupt
-.ret1:	movl r15=ia64_leave_kernel
+	mov b6=r8
 	;;
-	mov rp=r15
-	br.ret.sptk.many rp
-END(dispatch_to_ia32_handler)
-
-#endif /* CONFIG_IA32_SUPPORT */
+	cmp.ne p6,p0=0,r8
+(p6)	br.call.dpnt.many b6=b6		// call returns to ia64_leave_kernel
+	br.sptk.many ia64_leave_kernel
+END(dispatch_illegal_op_fault)
diff --git a/arch/ia64/kernel/jprobes.S b/arch/ia64/kernel/jprobes.S
index 5cd6226f44f..f69389c7be1 100644
--- a/arch/ia64/kernel/jprobes.S
+++ b/arch/ia64/kernel/jprobes.S
@@ -45,13 +45,14 @@
  * to the correct location.
  */
 #include <asm/asmmacro.h>
+#include <asm/break.h>
 
 	/*
 	 * void jprobe_break(void)
 	 */
 	.section .kprobes.text, "ax"
 ENTRY(jprobe_break)
-	break.m 0x80300
+	break.m __IA64_BREAK_JPROBE
 END(jprobe_break)
 
 	/*
diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c
index f9039f88d01..074fde49c9e 100644
--- a/arch/ia64/kernel/kprobes.c
+++ b/arch/ia64/kernel/kprobes.c
@@ -23,16 +23,15 @@
  *              <anil.s.keshavamurthy@intel.com> adapted from i386
  */
 
-#include <linux/config.h>
 #include <linux/kprobes.h>
 #include <linux/ptrace.h>
 #include <linux/string.h>
 #include <linux/slab.h>
 #include <linux/preempt.h>
 #include <linux/moduleloader.h>
+#include <linux/kdebug.h>
 
 #include <asm/pgtable.h>
-#include <asm/kdebug.h>
 #include <asm/sections.h>
 #include <asm/uaccess.h>
 
@@ -41,6 +40,8 @@ extern void jprobe_inst_return(void);
 DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
 DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
 
+struct kretprobe_blackpoint kretprobe_blacklist[] = {{NULL, NULL}};
+
 enum instruction_type {A, I, M, F, B, L, X, u};
 static enum instruction_type bundle_encoding[32][3] = {
   { M, I, I },				/* 00 */
@@ -77,6 +78,20 @@ static enum instruction_type bundle_encoding[32][3] = {
   { u, u, u },  			/* 1F */
 };
 
+/* Insert a long branch code */
+static void __kprobes set_brl_inst(void *from, void *to)
+{
+	s64 rel = ((s64) to - (s64) from) >> 4;
+	bundle_t *brl;
+	brl = (bundle_t *) ((u64) from & ~0xf);
+	brl->quad0.template = 0x05;	/* [MLX](stop) */
+	brl->quad0.slot0 = NOP_M_INST;	/* nop.m 0x0 */
+	brl->quad0.slot1_p0 = ((rel >> 20) & 0x7fffffffff) << 2;
+	brl->quad1.slot1_p1 = (((rel >> 20) & 0x7fffffffff) << 2) >> (64 - 46);
+	/* brl.cond.sptk.many.clr rel<<4 (qp=0) */
+	brl->quad1.slot2 = BRL_INST(rel >> 59, rel & 0xfffff);
+}
+
 /*
  * In this function we check to see if the instruction
  * is IP relative instruction and update the kprobe
@@ -89,9 +104,10 @@ static void __kprobes update_kprobe_inst_flag(uint template, uint  slot,
 {
 	p->ainsn.inst_flag = 0;
 	p->ainsn.target_br_reg = 0;
+	p->ainsn.slot = slot;
 
 	/* Check for Break instruction
- 	 * Bits 37:40 Major opcode to be zero
+	 * Bits 37:40 Major opcode to be zero
 	 * Bits 27:32 X6 to be zero
 	 * Bits 32:35 X3 to be zero
 	 */
@@ -105,19 +121,19 @@ static void __kprobes update_kprobe_inst_flag(uint template, uint  slot,
 		switch (major_opcode) {
 		  case INDIRECT_CALL_OPCODE:
 	 		p->ainsn.inst_flag |= INST_FLAG_FIX_BRANCH_REG;
- 			p->ainsn.target_br_reg = ((kprobe_inst >> 6) & 0x7);
- 			break;
+			p->ainsn.target_br_reg = ((kprobe_inst >> 6) & 0x7);
+			break;
 		  case IP_RELATIVE_PREDICT_OPCODE:
 		  case IP_RELATIVE_BRANCH_OPCODE:
 			p->ainsn.inst_flag |= INST_FLAG_FIX_RELATIVE_IP_ADDR;
- 			break;
+			break;
 		  case IP_RELATIVE_CALL_OPCODE:
- 			p->ainsn.inst_flag |= INST_FLAG_FIX_RELATIVE_IP_ADDR;
- 			p->ainsn.inst_flag |= INST_FLAG_FIX_BRANCH_REG;
- 			p->ainsn.target_br_reg = ((kprobe_inst >> 6) & 0x7);
- 			break;
+			p->ainsn.inst_flag |= INST_FLAG_FIX_RELATIVE_IP_ADDR;
+			p->ainsn.inst_flag |= INST_FLAG_FIX_BRANCH_REG;
+			p->ainsn.target_br_reg = ((kprobe_inst >> 6) & 0x7);
+			break;
 		}
- 	} else if (bundle_encoding[template][slot] == X) {
+	} else if (bundle_encoding[template][slot] == X) {
 		switch (major_opcode) {
 		  case LONG_CALL_OPCODE:
 			p->ainsn.inst_flag |= INST_FLAG_FIX_BRANCH_REG;
@@ -130,20 +146,66 @@ static void __kprobes update_kprobe_inst_flag(uint template, uint  slot,
 
 /*
  * In this function we check to see if the instruction
+ * (qp) cmpx.crel.ctype p1,p2=r2,r3
+ * on which we are inserting kprobe is cmp instruction
+ * with ctype as unc.
+ */
+static uint __kprobes is_cmp_ctype_unc_inst(uint template, uint slot,
+					    uint major_opcode,
+					    unsigned long kprobe_inst)
+{
+	cmp_inst_t cmp_inst;
+	uint ctype_unc = 0;
+
+	if (!((bundle_encoding[template][slot] == I) ||
+		(bundle_encoding[template][slot] == M)))
+		goto out;
+
+	if (!((major_opcode == 0xC) || (major_opcode == 0xD) ||
+		(major_opcode == 0xE)))
+		goto out;
+
+	cmp_inst.l = kprobe_inst;
+	if ((cmp_inst.f.x2 == 0) || (cmp_inst.f.x2 == 1)) {
+		/* Integer compare - Register Register (A6 type)*/
+		if ((cmp_inst.f.tb == 0) && (cmp_inst.f.ta == 0)
+				&&(cmp_inst.f.c == 1))
+			ctype_unc = 1;
+	} else if ((cmp_inst.f.x2 == 2)||(cmp_inst.f.x2 == 3)) {
+		/* Integer compare - Immediate Register (A8 type)*/
+		if ((cmp_inst.f.ta == 0) &&(cmp_inst.f.c == 1))
+			ctype_unc = 1;
+	}
+out:
+	return ctype_unc;
+}
+
+/*
+ * In this function we check to see if the instruction
  * on which we are inserting kprobe is supported.
- * Returns 0 if supported
+ * Returns qp value if supported
  * Returns -EINVAL if unsupported
  */
 static int __kprobes unsupported_inst(uint template, uint  slot,
 				      uint major_opcode,
 				      unsigned long kprobe_inst,
-				      struct kprobe *p)
+				      unsigned long addr)
 {
-	unsigned long addr = (unsigned long)p->addr;
+	int qp;
+
+	qp = kprobe_inst & 0x3f;
+	if (is_cmp_ctype_unc_inst(template, slot, major_opcode, kprobe_inst)) {
+		if (slot == 1 && qp)  {
+			printk(KERN_WARNING "Kprobes on cmp unc "
+					"instruction on slot 1 at <0x%lx> "
+					"is not supported\n", addr);
+			return -EINVAL;
 
-	if (bundle_encoding[template][slot] == I) {
-		switch (major_opcode) {
-			case 0x0: //I_UNIT_MISC_OPCODE:
+		}
+		qp = 0;
+	}
+	else if (bundle_encoding[template][slot] == I) {
+		if (major_opcode == 0) {
 			/*
 			 * Check for Integer speculation instruction
 			 * - Bit 33-35 to be equal to 0x1
@@ -151,10 +213,9 @@ static int __kprobes unsupported_inst(uint template, uint  slot,
 			if (((kprobe_inst >> 33) & 0x7) == 1) {
 				printk(KERN_WARNING
 					"Kprobes on speculation inst at <0x%lx> not supported\n",
-					addr);
+						addr);
 				return -EINVAL;
 			}
-
 			/*
 			 * IP relative mov instruction
 			 *  - Bit 27-35 to be equal to 0x30
@@ -162,50 +223,74 @@ static int __kprobes unsupported_inst(uint template, uint  slot,
 			if (((kprobe_inst >> 27) & 0x1FF) == 0x30) {
 				printk(KERN_WARNING
 					"Kprobes on \"mov r1=ip\" at <0x%lx> not supported\n",
-					addr);
+						addr);
 				return -EINVAL;
 
 			}
 		}
+		else if ((major_opcode == 5) &&	!(kprobe_inst & (0xFUl << 33)) &&
+				(kprobe_inst & (0x1UL << 12))) {
+			/* test bit instructions, tbit,tnat,tf
+			 * bit 33-36 to be equal to 0
+			 * bit 12 to be equal to 1
+			 */
+			if (slot == 1 && qp) {
+				printk(KERN_WARNING "Kprobes on test bit "
+						"instruction on slot at <0x%lx> "
+						"is not supported\n", addr);
+				return -EINVAL;
+			}
+			qp = 0;
+		}
 	}
-	return 0;
-}
-
-
-/*
- * In this function we check to see if the instruction
- * (qp) cmpx.crel.ctype p1,p2=r2,r3
- * on which we are inserting kprobe is cmp instruction
- * with ctype as unc.
- */
-static uint __kprobes is_cmp_ctype_unc_inst(uint template, uint slot,
-					    uint major_opcode,
-					    unsigned long kprobe_inst)
-{
-	cmp_inst_t cmp_inst;
-	uint ctype_unc = 0;
-
-	if (!((bundle_encoding[template][slot] == I) ||
-		(bundle_encoding[template][slot] == M)))
-		goto out;
-
-	if (!((major_opcode == 0xC) || (major_opcode == 0xD) ||
-		(major_opcode == 0xE)))
-		goto out;
+	else if (bundle_encoding[template][slot] == B) {
+		if (major_opcode == 7) {
+			/* IP-Relative Predict major code is 7 */
+			printk(KERN_WARNING "Kprobes on IP-Relative"
+					"Predict is not supported\n");
+			return -EINVAL;
+		}
+		else if (major_opcode == 2) {
+			/* Indirect Predict, major code is 2
+			 * bit 27-32 to be equal to 10 or 11
+			 */
+			int x6=(kprobe_inst >> 27) & 0x3F;
+			if ((x6 == 0x10) || (x6 == 0x11)) {
+				printk(KERN_WARNING "Kprobes on "
+					"Indirect Predict is not supported\n");
+				return -EINVAL;
+			}
+		}
+	}
+	/* kernel does not use float instruction, here for safety kprobe
+	 * will judge whether it is fcmp/flass/float approximation instruction
+	 */
+	else if (unlikely(bundle_encoding[template][slot] == F)) {
+		if ((major_opcode == 4 || major_opcode == 5) &&
+				(kprobe_inst  & (0x1 << 12))) {
+			/* fcmp/fclass unc instruction */
+			if (slot == 1 && qp) {
+				printk(KERN_WARNING "Kprobes on fcmp/fclass "
+					"instruction on slot at <0x%lx> "
+					"is not supported\n", addr);
+				return -EINVAL;
 
-	cmp_inst.l = kprobe_inst;
-	if ((cmp_inst.f.x2 == 0) || (cmp_inst.f.x2 == 1)) {
-		/* Integere compare - Register Register (A6 type)*/
-		if ((cmp_inst.f.tb == 0) && (cmp_inst.f.ta == 0)
-				&&(cmp_inst.f.c == 1))
-			ctype_unc = 1;
-	} else if ((cmp_inst.f.x2 == 2)||(cmp_inst.f.x2 == 3)) {
-		/* Integere compare - Immediate Register (A8 type)*/
-		if ((cmp_inst.f.ta == 0) &&(cmp_inst.f.c == 1))
-			ctype_unc = 1;
+			}
+			qp = 0;
+		}
+		if ((major_opcode == 0 || major_opcode == 1) &&
+			(kprobe_inst & (0x1UL << 33))) {
+			/* float Approximation instruction */
+			if (slot == 1 && qp) {
+				printk(KERN_WARNING "Kprobes on float Approx "
+					"instr at <0x%lx> is not supported\n",
+						addr);
+				return -EINVAL;
+			}
+			qp = 0;
+		}
 	}
-out:
-	return ctype_unc;
+	return qp;
 }
 
 /*
@@ -215,20 +300,17 @@ out:
 static void __kprobes prepare_break_inst(uint template, uint  slot,
 					 uint major_opcode,
 					 unsigned long kprobe_inst,
-					 struct kprobe *p)
+					 struct kprobe *p,
+					 int qp)
 {
 	unsigned long break_inst = BREAK_INST;
-	bundle_t *bundle = &p->ainsn.insn.bundle;
+	bundle_t *bundle = &p->opcode.bundle;
 
 	/*
 	 * Copy the original kprobe_inst qualifying predicate(qp)
-	 * to the break instruction iff !is_cmp_ctype_unc_inst
-	 * because for cmp instruction with ctype equal to unc,
-	 * which is a special instruction always needs to be
-	 * executed regradless of qp
+	 * to the break instruction
 	 */
-	if (!is_cmp_ctype_unc_inst(template, slot, major_opcode, kprobe_inst))
-		break_inst |= (0x3f & kprobe_inst);
+	break_inst |= qp;
 
 	switch (slot) {
 	  case 0:
@@ -261,18 +343,18 @@ static void __kprobes get_kprobe_inst(bundle_t *bundle, uint slot,
 
 	switch (slot) {
 	  case 0:
- 		*major_opcode = (bundle->quad0.slot0 >> SLOT0_OPCODE_SHIFT);
- 		*kprobe_inst = bundle->quad0.slot0;
-		break;
+		*major_opcode = (bundle->quad0.slot0 >> SLOT0_OPCODE_SHIFT);
+		*kprobe_inst = bundle->quad0.slot0;
+		  break;
 	  case 1:
- 		*major_opcode = (bundle->quad1.slot1_p1 >> SLOT1_p1_OPCODE_SHIFT);
-  		kprobe_inst_p0 = bundle->quad0.slot1_p0;
-  		kprobe_inst_p1 = bundle->quad1.slot1_p1;
-  		*kprobe_inst = kprobe_inst_p0 | (kprobe_inst_p1 << (64-46));
+		*major_opcode = (bundle->quad1.slot1_p1 >> SLOT1_p1_OPCODE_SHIFT);
+		kprobe_inst_p0 = bundle->quad0.slot1_p0;
+		kprobe_inst_p1 = bundle->quad1.slot1_p1;
+		*kprobe_inst = kprobe_inst_p0 | (kprobe_inst_p1 << (64-46));
 		break;
 	  case 2:
- 		*major_opcode = (bundle->quad1.slot2 >> SLOT2_OPCODE_SHIFT);
- 		*kprobe_inst = bundle->quad1.slot2;
+		*major_opcode = (bundle->quad1.slot2 >> SLOT2_OPCODE_SHIFT);
+		*kprobe_inst = bundle->quad1.slot2;
 		break;
 	}
 }
@@ -293,15 +375,9 @@ static int __kprobes valid_kprobe_addr(int template, int slot,
 		return -EINVAL;
 	}
 
- 	if (in_ivt_functions(addr)) {
- 		printk(KERN_WARNING "Kprobes can't be inserted inside "
+	if (in_ivt_functions(addr)) {
+		printk(KERN_WARNING "Kprobes can't be inserted inside "
 				"IVT functions at 0x%lx\n", addr);
- 		return -EINVAL;
- 	}
-
-	if (slot == 1 && bundle_encoding[template][1] != L) {
-		printk(KERN_WARNING "Inserting kprobes on slot #1 "
-		       "is not supported\n");
 		return -EINVAL;
 	}
 
@@ -310,14 +386,19 @@ static int __kprobes valid_kprobe_addr(int template, int slot,
 
 static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
 {
-	kcb->prev_kprobe.kp = kprobe_running();
-	kcb->prev_kprobe.status = kcb->kprobe_status;
+	unsigned int i;
+	i = atomic_add_return(1, &kcb->prev_kprobe_index);
+	kcb->prev_kprobe[i-1].kp = kprobe_running();
+	kcb->prev_kprobe[i-1].status = kcb->kprobe_status;
 }
 
 static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
 {
-	__get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
-	kcb->kprobe_status = kcb->prev_kprobe.status;
+	unsigned int i;
+	i = atomic_read(&kcb->prev_kprobe_index);
+	__get_cpu_var(current_kprobe) = kcb->prev_kprobe[i-1].kp;
+	kcb->kprobe_status = kcb->prev_kprobe[i-1].status;
+	atomic_sub(1, &kcb->prev_kprobe_index);
 }
 
 static void __kprobes set_current_kprobe(struct kprobe *p,
@@ -341,19 +422,19 @@ static void kretprobe_trampoline(void)
 int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 {
 	struct kretprobe_instance *ri = NULL;
-	struct hlist_head *head;
-	struct hlist_node *node, *tmp;
+	struct hlist_head *head, empty_rp;
+	struct hlist_node *tmp;
 	unsigned long flags, orig_ret_address = 0;
 	unsigned long trampoline_address =
 		((struct fnptr *)kretprobe_trampoline)->ip;
 
-	spin_lock_irqsave(&kretprobe_lock, flags);
-	head = kretprobe_inst_table_head(current);
+	INIT_HLIST_HEAD(&empty_rp);
+	kretprobe_hash_lock(current, &head, &flags);
 
 	/*
 	 * It is possible to have multiple instances associated with a given
 	 * task either because an multiple functions in the call path
-	 * have a return probe installed on them, and/or more then one return
+	 * have a return probe installed on them, and/or more than one return
 	 * return probe was registered for a target function.
 	 *
 	 * We can handle this because:
@@ -363,7 +444,24 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 	 *       real return address, and all the rest will point to
 	 *       kretprobe_trampoline
 	 */
-	hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
+	hlist_for_each_entry_safe(ri, tmp, head, hlist) {
+		if (ri->task != current)
+			/* another task is sharing our hash bucket */
+			continue;
+
+		orig_ret_address = (unsigned long)ri->ret_addr;
+		if (orig_ret_address != trampoline_address)
+			/*
+			 * This is the real return address. Any other
+			 * instances associated with this task are for
+			 * other calls deeper on the call stack
+			 */
+			break;
+	}
+
+	regs->cr_iip = orig_ret_address;
+
+	hlist_for_each_entry_safe(ri, tmp, head, hlist) {
 		if (ri->task != current)
 			/* another task is sharing our hash bucket */
 			continue;
@@ -372,7 +470,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 			ri->rp->handler(ri, regs);
 
 		orig_ret_address = (unsigned long)ri->ret_addr;
-		recycle_rp_inst(ri);
+		recycle_rp_inst(ri, &empty_rp);
 
 		if (orig_ret_address != trampoline_address)
 			/*
@@ -383,13 +481,16 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 			break;
 	}
 
-	BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address));
-	regs->cr_iip = orig_ret_address;
+	kretprobe_assert(ri, orig_ret_address, trampoline_address);
 
 	reset_current_kprobe();
-	spin_unlock_irqrestore(&kretprobe_lock, flags);
+	kretprobe_hash_unlock(current, &flags);
 	preempt_enable_no_resched();
 
+	hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
+		hlist_del(&ri->hlist);
+		kfree(ri);
+	}
 	/*
 	 * By returning a non-zero value, we are telling
 	 * kprobe_handler() that we don't want the post_handler
@@ -398,23 +499,83 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 	return 1;
 }
 
-/* Called with kretprobe_lock held */
-void __kprobes arch_prepare_kretprobe(struct kretprobe *rp,
+void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
 				      struct pt_regs *regs)
 {
-	struct kretprobe_instance *ri;
+	ri->ret_addr = (kprobe_opcode_t *)regs->b0;
+
+	/* Replace the return addr with trampoline addr */
+	regs->b0 = ((struct fnptr *)kretprobe_trampoline)->ip;
+}
+
+/* Check the instruction in the slot is break */
+static int __kprobes __is_ia64_break_inst(bundle_t *bundle, uint slot)
+{
+	unsigned int major_opcode;
+	unsigned int template = bundle->quad0.template;
+	unsigned long kprobe_inst;
 
-	if ((ri = get_free_rp_inst(rp)) != NULL) {
-		ri->rp = rp;
-		ri->task = current;
-		ri->ret_addr = (kprobe_opcode_t *)regs->b0;
+	/* Move to slot 2, if bundle is MLX type and kprobe slot is 1 */
+	if (slot == 1 && bundle_encoding[template][1] == L)
+		slot++;
 
-		/* Replace the return addr with trampoline addr */
-		regs->b0 = ((struct fnptr *)kretprobe_trampoline)->ip;
+	/* Get Kprobe probe instruction at given slot*/
+	get_kprobe_inst(bundle, slot, &kprobe_inst, &major_opcode);
 
-		add_rp_inst(ri);
-	} else {
-		rp->nmissed++;
+	/* For break instruction,
+	 * Bits 37:40 Major opcode to be zero
+	 * Bits 27:32 X6 to be zero
+	 * Bits 32:35 X3 to be zero
+	 */
+	if (major_opcode || ((kprobe_inst >> 27) & 0x1FF)) {
+		/* Not a break instruction */
+		return 0;
+	}
+
+	/* Is a break instruction */
+	return 1;
+}
+
+/*
+ * In this function, we check whether the target bundle modifies IP or
+ * it triggers an exception. If so, it cannot be boostable.
+ */
+static int __kprobes can_boost(bundle_t *bundle, uint slot,
+			       unsigned long bundle_addr)
+{
+	unsigned int template = bundle->quad0.template;
+
+	do {
+		if (search_exception_tables(bundle_addr + slot) ||
+		    __is_ia64_break_inst(bundle, slot))
+			return 0;	/* exception may occur in this bundle*/
+	} while ((++slot) < 3);
+	template &= 0x1e;
+	if (template >= 0x10 /* including B unit */ ||
+	    template == 0x04 /* including X unit */ ||
+	    template == 0x06) /* undefined */
+		return 0;
+
+	return 1;
+}
+
+/* Prepare long jump bundle and disables other boosters if need */
+static void __kprobes prepare_booster(struct kprobe *p)
+{
+	unsigned long addr = (unsigned long)p->addr & ~0xFULL;
+	unsigned int slot = (unsigned long)p->addr & 0xf;
+	struct kprobe *other_kp;
+
+	if (can_boost(&p->ainsn.insn[0].bundle, slot, addr)) {
+		set_brl_inst(&p->ainsn.insn[1].bundle, (bundle_t *)addr + 1);
+		p->ainsn.inst_flag |= INST_FLAG_BOOSTABLE;
+	}
+
+	/* disables boosters in previous slots */
+	for (; addr < (unsigned long)p->addr; addr++) {
+		other_kp = get_kprobe((void *)addr);
+		if (other_kp)
+			other_kp->ainsn.inst_flag &= ~INST_FLAG_BOOSTABLE;
 	}
 }
 
@@ -424,75 +585,123 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
 	unsigned long *kprobe_addr = (unsigned long *)(addr & ~0xFULL);
 	unsigned long kprobe_inst=0;
 	unsigned int slot = addr & 0xf, template, major_opcode = 0;
-	bundle_t *bundle = &p->ainsn.insn.bundle;
-
-	memcpy(&p->opcode.bundle, kprobe_addr, sizeof(bundle_t));
-	memcpy(&p->ainsn.insn.bundle, kprobe_addr, sizeof(bundle_t));
+	bundle_t *bundle;
+	int qp;
 
- 	template = bundle->quad0.template;
+	bundle = &((kprobe_opcode_t *)kprobe_addr)->bundle;
+	template = bundle->quad0.template;
 
 	if(valid_kprobe_addr(template, slot, addr))
 		return -EINVAL;
 
 	/* Move to slot 2, if bundle is MLX type and kprobe slot is 1 */
- 	if (slot == 1 && bundle_encoding[template][1] == L)
-  		slot++;
+	if (slot == 1 && bundle_encoding[template][1] == L)
+		slot++;
 
 	/* Get kprobe_inst and major_opcode from the bundle */
 	get_kprobe_inst(bundle, slot, &kprobe_inst, &major_opcode);
 
-	if (unsupported_inst(template, slot, major_opcode, kprobe_inst, p))
-			return -EINVAL;
+	qp = unsupported_inst(template, slot, major_opcode, kprobe_inst, addr);
+	if (qp < 0)
+		return -EINVAL;
 
-	prepare_break_inst(template, slot, major_opcode, kprobe_inst, p);
+	p->ainsn.insn = get_insn_slot();
+	if (!p->ainsn.insn)
+		return -ENOMEM;
+	memcpy(&p->opcode, kprobe_addr, sizeof(kprobe_opcode_t));
+	memcpy(p->ainsn.insn, kprobe_addr, sizeof(kprobe_opcode_t));
+
+	prepare_break_inst(template, slot, major_opcode, kprobe_inst, p, qp);
+
+	prepare_booster(p);
 
 	return 0;
 }
 
 void __kprobes arch_arm_kprobe(struct kprobe *p)
 {
-	unsigned long addr = (unsigned long)p->addr;
-	unsigned long arm_addr = addr & ~0xFULL;
+	unsigned long arm_addr;
+	bundle_t *src, *dest;
+
+	arm_addr = ((unsigned long)p->addr) & ~0xFUL;
+	dest = &((kprobe_opcode_t *)arm_addr)->bundle;
+	src = &p->opcode.bundle;
+
+	flush_icache_range((unsigned long)p->ainsn.insn,
+			   (unsigned long)p->ainsn.insn +
+			   sizeof(kprobe_opcode_t) * MAX_INSN_SIZE);
 
-	memcpy((char *)arm_addr, &p->ainsn.insn.bundle, sizeof(bundle_t));
-	flush_icache_range(arm_addr, arm_addr + sizeof(bundle_t));
+	switch (p->ainsn.slot) {
+		case 0:
+			dest->quad0.slot0 = src->quad0.slot0;
+			break;
+		case 1:
+			dest->quad1.slot1_p1 = src->quad1.slot1_p1;
+			break;
+		case 2:
+			dest->quad1.slot2 = src->quad1.slot2;
+			break;
+	}
+	flush_icache_range(arm_addr, arm_addr + sizeof(kprobe_opcode_t));
 }
 
 void __kprobes arch_disarm_kprobe(struct kprobe *p)
 {
-	unsigned long addr = (unsigned long)p->addr;
-	unsigned long arm_addr = addr & ~0xFULL;
-
-	/* p->opcode contains the original unaltered bundle */
-	memcpy((char *) arm_addr, (char *) &p->opcode.bundle, sizeof(bundle_t));
-	flush_icache_range(arm_addr, arm_addr + sizeof(bundle_t));
+	unsigned long arm_addr;
+	bundle_t *src, *dest;
+
+	arm_addr = ((unsigned long)p->addr) & ~0xFUL;
+	dest = &((kprobe_opcode_t *)arm_addr)->bundle;
+	/* p->ainsn.insn contains the original unaltered kprobe_opcode_t */
+	src = &p->ainsn.insn->bundle;
+	switch (p->ainsn.slot) {
+		case 0:
+			dest->quad0.slot0 = src->quad0.slot0;
+			break;
+		case 1:
+			dest->quad1.slot1_p1 = src->quad1.slot1_p1;
+			break;
+		case 2:
+			dest->quad1.slot2 = src->quad1.slot2;
+			break;
+	}
+	flush_icache_range(arm_addr, arm_addr + sizeof(kprobe_opcode_t));
 }
 
+void __kprobes arch_remove_kprobe(struct kprobe *p)
+{
+	if (p->ainsn.insn) {
+		free_insn_slot(p->ainsn.insn,
+			       p->ainsn.inst_flag & INST_FLAG_BOOSTABLE);
+		p->ainsn.insn = NULL;
+	}
+}
 /*
  * We are resuming execution after a single step fault, so the pt_regs
  * structure reflects the register state after we executed the instruction
- * located in the kprobe (p->ainsn.insn.bundle).  We still need to adjust
+ * located in the kprobe (p->ainsn.insn->bundle).  We still need to adjust
  * the ip to point back to the original stack address. To set the IP address
  * to original stack address, handle the case where we need to fixup the
  * relative IP address and/or fixup branch register.
  */
 static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
 {
-  	unsigned long bundle_addr = ((unsigned long) (&p->opcode.bundle)) & ~0xFULL;
-  	unsigned long resume_addr = (unsigned long)p->addr & ~0xFULL;
- 	unsigned long template;
- 	int slot = ((unsigned long)p->addr & 0xf);
+	unsigned long bundle_addr = (unsigned long) (&p->ainsn.insn->bundle);
+	unsigned long resume_addr = (unsigned long)p->addr & ~0xFULL;
+	unsigned long template;
+	int slot = ((unsigned long)p->addr & 0xf);
 
-	template = p->opcode.bundle.quad0.template;
+	template = p->ainsn.insn->bundle.quad0.template;
 
- 	if (slot == 1 && bundle_encoding[template][1] == L)
- 		slot = 2;
+	if (slot == 1 && bundle_encoding[template][1] == L)
+		slot = 2;
 
-	if (p->ainsn.inst_flag) {
+	if (p->ainsn.inst_flag & ~INST_FLAG_BOOSTABLE) {
 
 		if (p->ainsn.inst_flag & INST_FLAG_FIX_RELATIVE_IP_ADDR) {
 			/* Fix relative IP address */
- 			regs->cr_iip = (regs->cr_iip - bundle_addr) + resume_addr;
+			regs->cr_iip = (regs->cr_iip - bundle_addr) +
+					resume_addr;
 		}
 
 		if (p->ainsn.inst_flag & INST_FLAG_FIX_BRANCH_REG) {
@@ -529,23 +738,23 @@ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
 	}
 
 	if (slot == 2) {
- 		if (regs->cr_iip == bundle_addr + 0x10) {
- 			regs->cr_iip = resume_addr + 0x10;
- 		}
- 	} else {
- 		if (regs->cr_iip == bundle_addr) {
- 			regs->cr_iip = resume_addr;
- 		}
+		if (regs->cr_iip == bundle_addr + 0x10) {
+			regs->cr_iip = resume_addr + 0x10;
+		}
+	} else {
+		if (regs->cr_iip == bundle_addr) {
+			regs->cr_iip = resume_addr;
+		}
 	}
 
 turn_ss_off:
-  	/* Turn off Single Step bit */
-  	ia64_psr(regs)->ss = 0;
+	/* Turn off Single Step bit */
+	ia64_psr(regs)->ss = 0;
 }
 
 static void __kprobes prepare_ss(struct kprobe *p, struct pt_regs *regs)
 {
-	unsigned long bundle_addr = (unsigned long) &p->opcode.bundle;
+	unsigned long bundle_addr = (unsigned long) &p->ainsn.insn->bundle;
 	unsigned long slot = (unsigned long)p->addr & 0xf;
 
 	/* single step inline if break instruction */
@@ -566,33 +775,12 @@ static void __kprobes prepare_ss(struct kprobe *p, struct pt_regs *regs)
 static int __kprobes is_ia64_break_inst(struct pt_regs *regs)
 {
 	unsigned int slot = ia64_psr(regs)->ri;
-	unsigned int template, major_opcode;
-	unsigned long kprobe_inst;
 	unsigned long *kprobe_addr = (unsigned long *)regs->cr_iip;
 	bundle_t bundle;
 
 	memcpy(&bundle, kprobe_addr, sizeof(bundle_t));
-	template = bundle.quad0.template;
 
-	/* Move to slot 2, if bundle is MLX type and kprobe slot is 1 */
-	if (slot == 1 && bundle_encoding[template][1] == L)
-  		slot++;
-
-	/* Get Kprobe probe instruction at given slot*/
-	get_kprobe_inst(&bundle, slot, &kprobe_inst, &major_opcode);
-
-	/* For break instruction,
-	 * Bits 37:40 Major opcode to be zero
-	 * Bits 27:32 X6 to be zero
-	 * Bits 32:35 X3 to be zero
-	 */
-	if (major_opcode || ((kprobe_inst >> 27) & 0x1FF) ) {
-		/* Not a break instruction */
-		return 0;
-	}
-
-	/* Is a break instruction */
-	return 1;
+	return __is_ia64_break_inst(&bundle, slot);
 }
 
 static int __kprobes pre_kprobes_handler(struct die_args *args)
@@ -616,7 +804,7 @@ static int __kprobes pre_kprobes_handler(struct die_args *args)
 		if (p) {
 			if ((kcb->kprobe_status == KPROBE_HIT_SS) &&
 	 		     (p->ainsn.inst_flag == INST_FLAG_BREAK_INST)) {
-  				ia64_psr(regs)->ss = 0;
+				ia64_psr(regs)->ss = 0;
 				goto no_kprobe;
 			}
 			/* We have reentered the pre_kprobe_handler(), since
@@ -682,6 +870,19 @@ static int __kprobes pre_kprobes_handler(struct die_args *args)
 		return 1;
 
 ss_probe:
+#if !defined(CONFIG_PREEMPT)
+	if (p->ainsn.inst_flag == INST_FLAG_BOOSTABLE && !p->post_handler) {
+		/* Boost up -- we can execute copied instructions directly */
+		ia64_psr(regs)->ri = p->ainsn.slot;
+		regs->cr_iip = (unsigned long)&p->ainsn.insn->bundle & ~0xFULL;
+		/* turn single stepping off */
+		ia64_psr(regs)->ss = 0;
+
+		reset_current_kprobe();
+		preempt_enable_no_resched();
+		return 1;
+	}
+#endif
 	prepare_ss(p, regs);
 	kcb->kprobe_status = KPROBE_HIT_SS;
 	return 1;
@@ -718,7 +919,7 @@ out:
 	return 1;
 }
 
-static int __kprobes kprobes_fault_handler(struct pt_regs *regs, int trapnr)
+int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
 {
 	struct kprobe *cur = kprobe_running();
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
@@ -746,7 +947,7 @@ static int __kprobes kprobes_fault_handler(struct pt_regs *regs, int trapnr)
 	case KPROBE_HIT_SSDONE:
 		/*
 		 * We increment the nmissed count for accounting,
-		 * we can also use npre/npostfault count for accouting
+		 * we can also use npre/npostfault count for accounting
 		 * these specific fault cases.
 		 */
 		kprobes_inc_nmissed_count(cur);
@@ -760,6 +961,12 @@ static int __kprobes kprobes_fault_handler(struct pt_regs *regs, int trapnr)
 		 */
 		if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
 			return 1;
+		/*
+		 * In case the user-specified fault handler returned
+		 * zero, try to fix up.
+		 */
+		if (ia64_done_with_exception(regs))
+			return 1;
 
 		/*
 		 * Let ia64_do_page_fault() fix it.
@@ -784,7 +991,9 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
 	switch(val) {
 	case DIE_BREAK:
 		/* err is break number from ia64_bad_break() */
-		if (args->err == 0x80200 || args->err == 0x80300 || args->err == 0)
+		if ((args->err >> 12) == (__IA64_BREAK_KPROBE >> 12)
+			|| args->err == __IA64_BREAK_JPROBE
+			|| args->err == 0)
 			if (pre_kprobes_handler(args))
 				ret = NOTIFY_STOP;
 		break;
@@ -794,13 +1003,6 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
 			if (post_kprobes_handler(args->regs))
 				ret = NOTIFY_STOP;
 		break;
-	case DIE_PAGE_FAULT:
-		/* kprobe_running() needs smp_processor_id() */
-		preempt_disable();
-		if (kprobe_running() &&
-			kprobes_fault_handler(args->regs, args->trapnr))
-			ret = NOTIFY_STOP;
-		preempt_enable();
 	default:
 		break;
 	}
@@ -828,15 +1030,20 @@ static void ia64_get_bsp_cfm(struct unw_frame_info *info, void *arg)
 			return;
 		}
 	} while (unw_unwind(info) >= 0);
-	lp->bsp = 0;
+	lp->bsp = NULL;
 	lp->cfm = 0;
 	return;
 }
 
+unsigned long arch_deref_entry_point(void *entry)
+{
+	return ((struct fnptr *)entry)->ip;
+}
+
 int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
 {
 	struct jprobe *jp = container_of(p, struct jprobe, kp);
-	unsigned long addr = ((struct fnptr *)(jp->entry))->ip;
+	unsigned long addr = arch_deref_entry_point(jp->entry);
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
 	struct param_bsp_cfm pa;
 	int bytes;
@@ -844,7 +1051,7 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
 	/*
 	 * Callee owns the argument space and could overwrite it, eg
 	 * tail call optimization. So to be absolutely safe
-	 * we save the argument space before transfering the control
+	 * we save the argument space before transferring the control
 	 * to instrumented jprobe function which runs in
 	 * the process context
 	 */
@@ -870,11 +1077,16 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
 	 * fix the return address to our jprobe_inst_return() function
 	 * in the jprobes.S file
 	 */
- 	regs->b0 = ((struct fnptr *)(jprobe_inst_return))->ip;
+	regs->b0 = ((struct fnptr *)(jprobe_inst_return))->ip;
 
 	return 1;
 }
 
+/* ia64 does not need this */
+void __kprobes jprobe_return(void)
+{
+}
+
 int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 {
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
@@ -906,3 +1118,12 @@ int __init arch_init_kprobes(void)
 		(kprobe_opcode_t *)((struct fnptr *)kretprobe_trampoline)->ip;
 	return register_kprobe(&trampoline_p);
 }
+
+int __kprobes arch_trampoline_kprobe(struct kprobe *p)
+{
+	if (p->addr ==
+		(kprobe_opcode_t *)((struct fnptr *)kretprobe_trampoline)->ip)
+		return 1;
+
+	return 0;
+}
diff --git a/arch/ia64/kernel/machine_kexec.c b/arch/ia64/kernel/machine_kexec.c
new file mode 100644
index 00000000000..5151a649c96
--- /dev/null
+++ b/arch/ia64/kernel/machine_kexec.c
@@ -0,0 +1,170 @@
+/*
+ * arch/ia64/kernel/machine_kexec.c
+ *
+ * Handle transition of Linux booting another kernel
+ * Copyright (C) 2005 Hewlett-Packard Development Comapny, L.P.
+ * Copyright (C) 2005 Khalid Aziz <khalid.aziz@hp.com>
+ * Copyright (C) 2006 Intel Corp, Zou Nan hai <nanhai.zou@intel.com>
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2.  See the file COPYING for more details.
+ */
+
+#include <linux/mm.h>
+#include <linux/kexec.h>
+#include <linux/cpu.h>
+#include <linux/irq.h>
+#include <linux/efi.h>
+#include <linux/numa.h>
+#include <linux/mmzone.h>
+
+#include <asm/numa.h>
+#include <asm/mmu_context.h>
+#include <asm/setup.h>
+#include <asm/delay.h>
+#include <asm/meminit.h>
+#include <asm/processor.h>
+#include <asm/sal.h>
+#include <asm/mca.h>
+
+typedef void (*relocate_new_kernel_t)(
+					unsigned long indirection_page,
+					unsigned long start_address,
+					struct ia64_boot_param *boot_param,
+					unsigned long pal_addr) __noreturn;
+
+struct kimage *ia64_kimage;
+
+struct resource efi_memmap_res = {
+        .name  = "EFI Memory Map",
+        .start = 0,
+        .end   = 0,
+        .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+};
+
+struct resource boot_param_res = {
+        .name  = "Boot parameter",
+        .start = 0,
+        .end   = 0,
+        .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+};
+
+
+/*
+ * Do what every setup is needed on image and the
+ * reboot code buffer to allow us to avoid allocations
+ * later.
+ */
+int machine_kexec_prepare(struct kimage *image)
+{
+	void *control_code_buffer;
+	const unsigned long *func;
+
+	func = (unsigned long *)&relocate_new_kernel;
+	/* Pre-load control code buffer to minimize work in kexec path */
+	control_code_buffer = page_address(image->control_code_page);
+	memcpy((void *)control_code_buffer, (const void *)func[0],
+			relocate_new_kernel_size);
+	flush_icache_range((unsigned long)control_code_buffer,
+			(unsigned long)control_code_buffer + relocate_new_kernel_size);
+	ia64_kimage = image;
+
+	return 0;
+}
+
+void machine_kexec_cleanup(struct kimage *image)
+{
+}
+
+/*
+ * Do not allocate memory (or fail in any way) in machine_kexec().
+ * We are past the point of no return, committed to rebooting now.
+ */
+static void ia64_machine_kexec(struct unw_frame_info *info, void *arg)
+{
+	struct kimage *image = arg;
+	relocate_new_kernel_t rnk;
+	void *pal_addr = efi_get_pal_addr();
+	unsigned long code_addr;
+	int ii;
+	u64 fp, gp;
+	ia64_fptr_t *init_handler = (ia64_fptr_t *)ia64_os_init_on_kdump;
+
+	BUG_ON(!image);
+	code_addr = (unsigned long)page_address(image->control_code_page);
+	if (image->type == KEXEC_TYPE_CRASH) {
+		crash_save_this_cpu();
+		current->thread.ksp = (__u64)info->sw - 16;
+
+		/* Register noop init handler */
+		fp = ia64_tpa(init_handler->fp);
+		gp = ia64_tpa(ia64_getreg(_IA64_REG_GP));
+		ia64_sal_set_vectors(SAL_VECTOR_OS_INIT, fp, gp, 0, fp, gp, 0);
+	} else {
+		/* Unregister init handlers of current kernel */
+		ia64_sal_set_vectors(SAL_VECTOR_OS_INIT, 0, 0, 0, 0, 0, 0);
+	}
+
+	/* Unregister mca handler - No more recovery on current kernel */
+	ia64_sal_set_vectors(SAL_VECTOR_OS_MCA, 0, 0, 0, 0, 0, 0);
+
+	/* Interrupts aren't acceptable while we reboot */
+	local_irq_disable();
+
+	/* Mask CMC and Performance Monitor interrupts */
+	ia64_setreg(_IA64_REG_CR_PMV, 1 << 16);
+	ia64_setreg(_IA64_REG_CR_CMCV, 1 << 16);
+
+	/* Mask ITV and Local Redirect Registers */
+	ia64_set_itv(1 << 16);
+	ia64_set_lrr0(1 << 16);
+	ia64_set_lrr1(1 << 16);
+
+	/* terminate possible nested in-service interrupts */
+	for (ii = 0; ii < 16; ii++)
+		ia64_eoi();
+
+	/* unmask TPR and clear any pending interrupts */
+	ia64_setreg(_IA64_REG_CR_TPR, 0);
+	ia64_srlz_d();
+	while (ia64_get_ivr() != IA64_SPURIOUS_INT_VECTOR)
+		ia64_eoi();
+	platform_kernel_launch_event();
+	rnk = (relocate_new_kernel_t)&code_addr;
+	(*rnk)(image->head, image->start, ia64_boot_param,
+		     GRANULEROUNDDOWN((unsigned long) pal_addr));
+	BUG();
+}
+
+void machine_kexec(struct kimage *image)
+{
+	BUG_ON(!image);
+	unw_init_running(ia64_machine_kexec, image);
+	for(;;);
+}
+
+void arch_crash_save_vmcoreinfo(void)
+{
+#if defined(CONFIG_DISCONTIGMEM) || defined(CONFIG_SPARSEMEM)
+	VMCOREINFO_SYMBOL(pgdat_list);
+	VMCOREINFO_LENGTH(pgdat_list, MAX_NUMNODES);
+#endif
+#ifdef CONFIG_NUMA
+	VMCOREINFO_SYMBOL(node_memblk);
+	VMCOREINFO_LENGTH(node_memblk, NR_NODE_MEMBLKS);
+	VMCOREINFO_STRUCT_SIZE(node_memblk_s);
+	VMCOREINFO_OFFSET(node_memblk_s, start_paddr);
+	VMCOREINFO_OFFSET(node_memblk_s, size);
+#endif
+#ifdef CONFIG_PGTABLE_3
+	VMCOREINFO_CONFIG(PGTABLE_3);
+#elif defined(CONFIG_PGTABLE_4)
+	VMCOREINFO_CONFIG(PGTABLE_4);
+#endif
+}
+
+unsigned long paddr_vmcoreinfo_note(void)
+{
+	return ia64_tpa((unsigned long)(char *)&vmcoreinfo_note);
+}
+
diff --git a/arch/ia64/kernel/machvec.c b/arch/ia64/kernel/machvec.c
index 4b0b71d5aef..f5a1e5246b3 100644
--- a/arch/ia64/kernel/machvec.c
+++ b/arch/ia64/kernel/machvec.c
@@ -1,8 +1,6 @@
-#include <linux/config.h>
 #include <linux/module.h>
-
+#include <linux/dma-mapping.h>
 #include <asm/machvec.h>
-#include <asm/system.h>
 
 #ifdef CONFIG_IA64_GENERIC
 
@@ -14,14 +12,6 @@
 struct ia64_machine_vector ia64_mv;
 EXPORT_SYMBOL(ia64_mv);
 
-static __initdata const char *mvec_name;
-static __init int setup_mvec(char *s)
-{
-	mvec_name = s;
-	return 0;
-}
-early_param("machvec", setup_mvec);
-
 static struct ia64_machine_vector * __init
 lookup_machvec (const char *name)
 {
@@ -36,13 +26,13 @@ lookup_machvec (const char *name)
 	return 0;
 }
 
-void
+void __init
 machvec_init (const char *name)
 {
 	struct ia64_machine_vector *mv;
 
 	if (!name)
-		name = mvec_name ? mvec_name : acpi_get_sysname();
+		name = acpi_get_sysname();
 	mv = lookup_machvec(name);
 	if (!mv)
 		panic("generic kernel failed to find machine vector for"
@@ -52,6 +42,23 @@ machvec_init (const char *name)
 	printk(KERN_INFO "booting generic kernel on platform %s\n", name);
 }
 
+void __init
+machvec_init_from_cmdline(const char *cmdline)
+{
+	char str[64];
+	const char *start;
+	char *end;
+
+	if (! (start = strstr(cmdline, "machvec=")) )
+		return machvec_init(NULL);
+
+	strlcpy(str, start + strlen("machvec="), sizeof(str));
+	if ( (end = strchr(str, ' ')) )
+		*end = '\0';
+
+	return machvec_init(str);
+}
+
 #endif /* CONFIG_IA64_GENERIC */
 
 void
@@ -61,20 +68,22 @@ machvec_setup (char **arg)
 EXPORT_SYMBOL(machvec_setup);
 
 void
-machvec_timer_interrupt (int irq, void *dev_id, struct pt_regs *regs)
+machvec_timer_interrupt (int irq, void *dev_id)
 {
 }
 EXPORT_SYMBOL(machvec_timer_interrupt);
 
 void
-machvec_dma_sync_single (struct device *hwdev, dma_addr_t dma_handle, size_t size, int dir)
+machvec_dma_sync_single(struct device *hwdev, dma_addr_t dma_handle, size_t size,
+			enum dma_data_direction dir)
 {
 	mb();
 }
 EXPORT_SYMBOL(machvec_dma_sync_single);
 
 void
-machvec_dma_sync_sg (struct device *hwdev, struct scatterlist *sg, int n, int dir)
+machvec_dma_sync_sg(struct device *hwdev, struct scatterlist *sg, int n,
+		    enum dma_data_direction dir)
 {
 	mb();
 }
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 6a0880639bc..db7b36bb068 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -2,66 +2,79 @@
  * File:	mca.c
  * Purpose:	Generic MCA handling layer
  *
- * Updated for latest kernel
  * Copyright (C) 2003 Hewlett-Packard Co
  *	David Mosberger-Tang <davidm@hpl.hp.com>
  *
  * Copyright (C) 2002 Dell Inc.
- * Copyright (C) Matt Domsch (Matt_Domsch@dell.com)
+ * Copyright (C) Matt Domsch <Matt_Domsch@dell.com>
  *
  * Copyright (C) 2002 Intel
- * Copyright (C) Jenna Hall (jenna.s.hall@intel.com)
+ * Copyright (C) Jenna Hall <jenna.s.hall@intel.com>
  *
  * Copyright (C) 2001 Intel
- * Copyright (C) Fred Lewis (frederick.v.lewis@intel.com)
+ * Copyright (C) Fred Lewis <frederick.v.lewis@intel.com>
  *
  * Copyright (C) 2000 Intel
- * Copyright (C) Chuck Fleckenstein (cfleck@co.intel.com)
+ * Copyright (C) Chuck Fleckenstein <cfleck@co.intel.com>
  *
- * Copyright (C) 1999, 2004 Silicon Graphics, Inc.
- * Copyright (C) Vijay Chander(vijay@engr.sgi.com)
+ * Copyright (C) 1999, 2004-2008 Silicon Graphics, Inc.
+ * Copyright (C) Vijay Chander <vijay@engr.sgi.com>
  *
- * 03/04/15 D. Mosberger Added INIT backtrace support.
- * 02/03/25 M. Domsch	GUID cleanups
+ * Copyright (C) 2006 FUJITSU LIMITED
+ * Copyright (C) Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
  *
- * 02/01/04 J. Hall	Aligned MCA stack to 16 bytes, added platform vs. CPU
- *			error flag, set SAL default return values, changed
- *			error record structure to linked list, added init call
- *			to sal_get_state_info_size().
+ * 2000-03-29 Chuck Fleckenstein <cfleck@co.intel.com>
+ *	      Fixed PAL/SAL update issues, began MCA bug fixes, logging issues,
+ *	      added min save state dump, added INIT handler.
  *
- * 01/01/03 F. Lewis    Added setup of CMCI and CPEI IRQs, logging of corrected
- *                      platform errors, completed code for logging of
- *                      corrected & uncorrected machine check errors, and
- *                      updated for conformance with Nov. 2000 revision of the
- *                      SAL 3.0 spec.
- * 00/03/29 C. Fleckenstein  Fixed PAL/SAL update issues, began MCA bug fixes, logging issues,
- *                           added min save state dump, added INIT handler.
+ * 2001-01-03 Fred Lewis <frederick.v.lewis@intel.com>
+ *	      Added setup of CMCI and CPEI IRQs, logging of corrected platform
+ *	      errors, completed code for logging of corrected & uncorrected
+ *	      machine check errors, and updated for conformance with Nov. 2000
+ *	      revision of the SAL 3.0 spec.
+ *
+ * 2002-01-04 Jenna Hall <jenna.s.hall@intel.com>
+ *	      Aligned MCA stack to 16 bytes, added platform vs. CPU error flag,
+ *	      set SAL default return values, changed error record structure to
+ *	      linked list, added init call to sal_get_state_info_size().
+ *
+ * 2002-03-25 Matt Domsch <Matt_Domsch@dell.com>
+ *	      GUID cleanups.
+ *
+ * 2003-04-15 David Mosberger-Tang <davidm@hpl.hp.com>
+ *	      Added INIT backtrace support.
  *
  * 2003-12-08 Keith Owens <kaos@sgi.com>
- *            smp_call_function() must not be called from interrupt context (can
- *            deadlock on tasklist_lock).  Use keventd to call smp_call_function().
+ *	      smp_call_function() must not be called from interrupt context
+ *	      (can deadlock on tasklist_lock).
+ *	      Use keventd to call smp_call_function().
  *
  * 2004-02-01 Keith Owens <kaos@sgi.com>
- *            Avoid deadlock when using printk() for MCA and INIT records.
- *            Delete all record printing code, moved to salinfo_decode in user space.
- *            Mark variables and functions static where possible.
- *            Delete dead variables and functions.
- *            Reorder to remove the need for forward declarations and to consolidate
- *            related code.
+ *	      Avoid deadlock when using printk() for MCA and INIT records.
+ *	      Delete all record printing code, moved to salinfo_decode in user
+ *	      space.  Mark variables and functions static where possible.
+ *	      Delete dead variables and functions.  Reorder to remove the need
+ *	      for forward declarations and to consolidate related code.
  *
  * 2005-08-12 Keith Owens <kaos@sgi.com>
- *	      Convert MCA/INIT handlers to use per event stacks and SAL/OS state.
+ *	      Convert MCA/INIT handlers to use per event stacks and SAL/OS
+ *	      state.
  *
  * 2005-10-07 Keith Owens <kaos@sgi.com>
  *	      Add notify_die() hooks.
+ *
+ * 2006-09-15 Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
+ *	      Add printing support for MCA/INIT.
+ *
+ * 2007-04-27 Russ Anderson <rja@sgi.com>
+ *	      Support multiple cpus going through OS_MCA in the same event.
  */
-#include <linux/config.h>
+#include <linux/jiffies.h>
 #include <linux/types.h>
 #include <linux/init.h>
 #include <linux/sched.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
-#include <linux/smp_lock.h>
 #include <linux/bootmem.h>
 #include <linux/acpi.h>
 #include <linux/timer.h>
@@ -70,19 +83,22 @@
 #include <linux/smp.h>
 #include <linux/workqueue.h>
 #include <linux/cpumask.h>
+#include <linux/kdebug.h>
+#include <linux/cpu.h>
+#include <linux/gfp.h>
 
 #include <asm/delay.h>
-#include <asm/kdebug.h>
 #include <asm/machvec.h>
 #include <asm/meminit.h>
 #include <asm/page.h>
 #include <asm/ptrace.h>
-#include <asm/system.h>
 #include <asm/sal.h>
 #include <asm/mca.h>
+#include <asm/kexec.h>
 
 #include <asm/irq.h>
 #include <asm/hw_irq.h>
+#include <asm/tlb.h>
 
 #include "mca_drv.h"
 #include "entry.h"
@@ -93,12 +109,26 @@
 # define IA64_MCA_DEBUG(fmt...)
 #endif
 
+#define NOTIFY_INIT(event, regs, arg, spin)				\
+do {									\
+	if ((notify_die((event), "INIT", (regs), (arg), 0, 0)		\
+			== NOTIFY_STOP) && ((spin) == 1))		\
+		ia64_mca_spin(__func__);				\
+} while (0)
+
+#define NOTIFY_MCA(event, regs, arg, spin)				\
+do {									\
+	if ((notify_die((event), "MCA", (regs), (arg), 0, 0)		\
+			== NOTIFY_STOP) && ((spin) == 1))		\
+		ia64_mca_spin(__func__);				\
+} while (0)
+
 /* Used by mca_asm.S */
-u32				ia64_mca_serialize;
 DEFINE_PER_CPU(u64, ia64_mca_data); /* == __per_cpu_mca[smp_processor_id()] */
 DEFINE_PER_CPU(u64, ia64_mca_per_cpu_pte); /* PTE to map per-CPU area */
 DEFINE_PER_CPU(u64, ia64_mca_pal_pte);	    /* PTE to map PAL code */
 DEFINE_PER_CPU(u64, ia64_mca_pal_base);    /* vaddr PAL code granule */
+DEFINE_PER_CPU(u64, ia64_mca_tr_reload);   /* Flag for TR reload */
 
 unsigned long __per_cpu_mca[NR_CPUS];
 
@@ -116,7 +146,9 @@ static ia64_mc_info_t		ia64_mc_info;
 #define CPE_HISTORY_LENGTH    5
 #define CMC_HISTORY_LENGTH    5
 
+#ifdef CONFIG_ACPI
 static struct timer_list cpe_poll_timer;
+#endif
 static struct timer_list cmc_poll_timer;
 /*
  * This variable tells whether we are currently in polling mode.
@@ -137,11 +169,175 @@ extern void salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe);
 
 static int mca_init __initdata;
 
+/*
+ * limited & delayed printing support for MCA/INIT handler
+ */
+
+#define mprintk(fmt...) ia64_mca_printk(fmt)
+
+#define MLOGBUF_SIZE (512+256*NR_CPUS)
+#define MLOGBUF_MSGMAX 256
+static char mlogbuf[MLOGBUF_SIZE];
+static DEFINE_SPINLOCK(mlogbuf_wlock);	/* mca context only */
+static DEFINE_SPINLOCK(mlogbuf_rlock);	/* normal context only */
+static unsigned long mlogbuf_start;
+static unsigned long mlogbuf_end;
+static unsigned int mlogbuf_finished = 0;
+static unsigned long mlogbuf_timestamp = 0;
+
+static int loglevel_save = -1;
+#define BREAK_LOGLEVEL(__console_loglevel)		\
+	oops_in_progress = 1;				\
+	if (loglevel_save < 0)				\
+		loglevel_save = __console_loglevel;	\
+	__console_loglevel = 15;
+
+#define RESTORE_LOGLEVEL(__console_loglevel)		\
+	if (loglevel_save >= 0) {			\
+		__console_loglevel = loglevel_save;	\
+		loglevel_save = -1;			\
+	}						\
+	mlogbuf_finished = 0;				\
+	oops_in_progress = 0;
+
+/*
+ * Push messages into buffer, print them later if not urgent.
+ */
+void ia64_mca_printk(const char *fmt, ...)
+{
+	va_list args;
+	int printed_len;
+	char temp_buf[MLOGBUF_MSGMAX];
+	char *p;
+
+	va_start(args, fmt);
+	printed_len = vscnprintf(temp_buf, sizeof(temp_buf), fmt, args);
+	va_end(args);
+
+	/* Copy the output into mlogbuf */
+	if (oops_in_progress) {
+		/* mlogbuf was abandoned, use printk directly instead. */
+		printk("%s", temp_buf);
+	} else {
+		spin_lock(&mlogbuf_wlock);
+		for (p = temp_buf; *p; p++) {
+			unsigned long next = (mlogbuf_end + 1) % MLOGBUF_SIZE;
+			if (next != mlogbuf_start) {
+				mlogbuf[mlogbuf_end] = *p;
+				mlogbuf_end = next;
+			} else {
+				/* buffer full */
+				break;
+			}
+		}
+		mlogbuf[mlogbuf_end] = '\0';
+		spin_unlock(&mlogbuf_wlock);
+	}
+}
+EXPORT_SYMBOL(ia64_mca_printk);
+
+/*
+ * Print buffered messages.
+ *  NOTE: call this after returning normal context. (ex. from salinfod)
+ */
+void ia64_mlogbuf_dump(void)
+{
+	char temp_buf[MLOGBUF_MSGMAX];
+	char *p;
+	unsigned long index;
+	unsigned long flags;
+	unsigned int printed_len;
+
+	/* Get output from mlogbuf */
+	while (mlogbuf_start != mlogbuf_end) {
+		temp_buf[0] = '\0';
+		p = temp_buf;
+		printed_len = 0;
+
+		spin_lock_irqsave(&mlogbuf_rlock, flags);
+
+		index = mlogbuf_start;
+		while (index != mlogbuf_end) {
+			*p = mlogbuf[index];
+			index = (index + 1) % MLOGBUF_SIZE;
+			if (!*p)
+				break;
+			p++;
+			if (++printed_len >= MLOGBUF_MSGMAX - 1)
+				break;
+		}
+		*p = '\0';
+		if (temp_buf[0])
+			printk("%s", temp_buf);
+		mlogbuf_start = index;
+
+		mlogbuf_timestamp = 0;
+		spin_unlock_irqrestore(&mlogbuf_rlock, flags);
+	}
+}
+EXPORT_SYMBOL(ia64_mlogbuf_dump);
+
+/*
+ * Call this if system is going to down or if immediate flushing messages to
+ * console is required. (ex. recovery was failed, crash dump is going to be
+ * invoked, long-wait rendezvous etc.)
+ *  NOTE: this should be called from monarch.
+ */
+static void ia64_mlogbuf_finish(int wait)
+{
+	BREAK_LOGLEVEL(console_loglevel);
+
+	spin_lock_init(&mlogbuf_rlock);
+	ia64_mlogbuf_dump();
+	printk(KERN_EMERG "mlogbuf_finish: printing switched to urgent mode, "
+		"MCA/INIT might be dodgy or fail.\n");
+
+	if (!wait)
+		return;
+
+	/* wait for console */
+	printk("Delaying for 5 seconds...\n");
+	udelay(5*1000000);
+
+	mlogbuf_finished = 1;
+}
+
+/*
+ * Print buffered messages from INIT context.
+ */
+static void ia64_mlogbuf_dump_from_init(void)
+{
+	if (mlogbuf_finished)
+		return;
+
+	if (mlogbuf_timestamp &&
+			time_before(jiffies, mlogbuf_timestamp + 30 * HZ)) {
+		printk(KERN_ERR "INIT: mlogbuf_dump is interrupted by INIT "
+			" and the system seems to be messed up.\n");
+		ia64_mlogbuf_finish(0);
+		return;
+	}
+
+	if (!spin_trylock(&mlogbuf_rlock)) {
+		printk(KERN_ERR "INIT: mlogbuf_dump is interrupted by INIT. "
+			"Generated messages other than stack dump will be "
+			"buffered to mlogbuf and will be printed later.\n");
+		printk(KERN_ERR "INIT: If messages would not printed after "
+			"this INIT, wait 30sec and assert INIT again.\n");
+		if (!mlogbuf_timestamp)
+			mlogbuf_timestamp = jiffies;
+		return;
+	}
+	spin_unlock(&mlogbuf_rlock);
+	ia64_mlogbuf_dump();
+}
 
 static void inline
 ia64_mca_spin(const char *func)
 {
-	printk(KERN_EMERG "%s: spinning here, not returning to SAL\n", func);
+	if (monarch_cpu == smp_processor_id())
+		ia64_mlogbuf_finish(0);
+	mprintk(KERN_EMERG "%s: spinning here, not returning to SAL\n", func);
 	while (1)
 		cpu_relax();
 }
@@ -222,7 +418,7 @@ ia64_log_get(int sal_info_type, u8 **buffer, int irq_safe)
 {
 	sal_log_record_header_t     *log_buffer;
 	u64                         total_len = 0;
-	int                         s;
+	unsigned long               s;
 
 	IA64_LOG_LOCK(sal_info_type);
 
@@ -235,8 +431,8 @@ ia64_log_get(int sal_info_type, u8 **buffer, int irq_safe)
 		IA64_LOG_INDEX_INC(sal_info_type);
 		IA64_LOG_UNLOCK(sal_info_type);
 		if (irq_safe) {
-			IA64_MCA_DEBUG("%s: SAL error record type %d retrieved. "
-				       "Record length = %ld\n", __FUNCTION__, sal_info_type, total_len);
+			IA64_MCA_DEBUG("%s: SAL error record type %d retrieved. Record length = %ld\n",
+				       __func__, sal_info_type, total_len);
 		}
 		*buffer = (u8 *) log_buffer;
 		return total_len;
@@ -333,21 +529,18 @@ int cpe_vector = -1;
 int ia64_cpe_irq = -1;
 
 static irqreturn_t
-ia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs)
+ia64_mca_cpe_int_handler (int cpe_irq, void *arg)
 {
 	static unsigned long	cpe_history[CPE_HISTORY_LENGTH];
 	static int		index;
 	static DEFINE_SPINLOCK(cpe_history_lock);
 
 	IA64_MCA_DEBUG("%s: received interrupt vector = %#x on CPU %d\n",
-		       __FUNCTION__, cpe_irq, smp_processor_id());
+		       __func__, cpe_irq, smp_processor_id());
 
 	/* SAL spec states this should run w/ interrupts enabled */
 	local_irq_enable();
 
-	/* Get the CPE error record and log it */
-	ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CPE);
-
 	spin_lock(&cpe_history_lock);
 	if (!cpe_poll_enabled && cpe_vector >= 0) {
 
@@ -376,7 +569,7 @@ ia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs)
 			mod_timer(&cpe_poll_timer, jiffies + MIN_CPE_POLL_INTERVAL);
 
 			/* lock already released, get out now */
-			return IRQ_HANDLED;
+			goto out;
 		} else {
 			cpe_history[index++] = now;
 			if (index == CPE_HISTORY_LENGTH)
@@ -384,6 +577,12 @@ ia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs)
 		}
 	}
 	spin_unlock(&cpe_history_lock);
+out:
+	/* Get the CPE error record and log it */
+	ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CPE);
+
+	local_irq_disable();
+
 	return IRQ_HANDLED;
 }
 
@@ -401,7 +600,7 @@ ia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs)
  *  Outputs
  *      None
  */
-static void __init
+void
 ia64_mca_register_cpev (int cpev)
 {
 	/* Register the CPE interrupt vector with SAL */
@@ -415,7 +614,7 @@ ia64_mca_register_cpev (int cpev)
 	}
 
 	IA64_MCA_DEBUG("%s: corrected platform error "
-		       "vector %#x registered\n", __FUNCTION__, cpev);
+		       "vector %#x registered\n", __func__, cpev);
 }
 #endif /* CONFIG_ACPI */
 
@@ -432,7 +631,7 @@ ia64_mca_register_cpev (int cpev)
  * Outputs
  *	None
  */
-void __cpuinit
+void
 ia64_mca_cmc_vector_setup (void)
 {
 	cmcv_reg_t	cmcv;
@@ -442,12 +641,11 @@ ia64_mca_cmc_vector_setup (void)
 	cmcv.cmcv_vector	= IA64_CMC_VECTOR;
 	ia64_setreg(_IA64_REG_CR_CMCV, cmcv.cmcv_regval);
 
-	IA64_MCA_DEBUG("%s: CPU %d corrected "
-		       "machine check vector %#x registered.\n",
-		       __FUNCTION__, smp_processor_id(), IA64_CMC_VECTOR);
+	IA64_MCA_DEBUG("%s: CPU %d corrected machine check vector %#x registered.\n",
+		       __func__, smp_processor_id(), IA64_CMC_VECTOR);
 
 	IA64_MCA_DEBUG("%s: CPU %d CMCV = %#016lx\n",
-		       __FUNCTION__, smp_processor_id(), ia64_getreg(_IA64_REG_CR_CMCV));
+		       __func__, smp_processor_id(), ia64_getreg(_IA64_REG_CR_CMCV));
 }
 
 /*
@@ -472,9 +670,8 @@ ia64_mca_cmc_vector_disable (void *dummy)
 	cmcv.cmcv_mask = 1; /* Mask/disable interrupt */
 	ia64_setreg(_IA64_REG_CR_CMCV, cmcv.cmcv_regval);
 
-	IA64_MCA_DEBUG("%s: CPU %d corrected "
-		       "machine check vector %#x disabled.\n",
-		       __FUNCTION__, smp_processor_id(), cmcv.cmcv_vector);
+	IA64_MCA_DEBUG("%s: CPU %d corrected machine check vector %#x disabled.\n",
+		       __func__, smp_processor_id(), cmcv.cmcv_vector);
 }
 
 /*
@@ -499,9 +696,8 @@ ia64_mca_cmc_vector_enable (void *dummy)
 	cmcv.cmcv_mask = 0; /* Unmask/enable interrupt */
 	ia64_setreg(_IA64_REG_CR_CMCV, cmcv.cmcv_regval);
 
-	IA64_MCA_DEBUG("%s: CPU %d corrected "
-		       "machine check vector %#x enabled.\n",
-		       __FUNCTION__, smp_processor_id(), cmcv.cmcv_vector);
+	IA64_MCA_DEBUG("%s: CPU %d corrected machine check vector %#x enabled.\n",
+		       __func__, smp_processor_id(), cmcv.cmcv_vector);
 }
 
 /*
@@ -511,9 +707,9 @@ ia64_mca_cmc_vector_enable (void *dummy)
  * disable the cmc interrupt vector.
  */
 static void
-ia64_mca_cmc_vector_disable_keventd(void *unused)
+ia64_mca_cmc_vector_disable_keventd(struct work_struct *unused)
 {
-	on_each_cpu(ia64_mca_cmc_vector_disable, NULL, 1, 0);
+	on_each_cpu(ia64_mca_cmc_vector_disable, NULL, 0);
 }
 
 /*
@@ -523,16 +719,15 @@ ia64_mca_cmc_vector_disable_keventd(void *unused)
  * enable the cmc interrupt vector.
  */
 static void
-ia64_mca_cmc_vector_enable_keventd(void *unused)
+ia64_mca_cmc_vector_enable_keventd(struct work_struct *unused)
 {
-	on_each_cpu(ia64_mca_cmc_vector_enable, NULL, 1, 0);
+	on_each_cpu(ia64_mca_cmc_vector_enable, NULL, 0);
 }
 
 /*
  * ia64_mca_wakeup
  *
- *	Send an inter-cpu interrupt to wake-up a particular cpu
- *	and mark that cpu to be out of rendez.
+ *	Send an inter-cpu interrupt to wake-up a particular cpu.
  *
  *  Inputs  :   cpuid
  *  Outputs :   None
@@ -541,14 +736,12 @@ static void
 ia64_mca_wakeup(int cpu)
 {
 	platform_send_ipi(cpu, IA64_MCA_WAKEUP_VECTOR, IA64_IPI_DM_INT, 0);
-	ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
-
 }
 
 /*
  * ia64_mca_wakeup_all
  *
- *	Wakeup all the cpus which have rendez'ed previously.
+ *	Wakeup all the slave cpus which have rendez'ed previously.
  *
  *  Inputs  :   None
  *  Outputs :   None
@@ -571,13 +764,16 @@ ia64_mca_wakeup_all(void)
  *
  *	This is handler used to put slave processors into spinloop
  *	while the monarch processor does the mca handling and later
- *	wake each slave up once the monarch is done.
+ *	wake each slave up once the monarch is done.  The state
+ *	IA64_MCA_RENDEZ_CHECKIN_DONE indicates the cpu is rendez'ed
+ *	in SAL.  The state IA64_MCA_RENDEZ_CHECKIN_NOTDONE indicates
+ *	the cpu has come out of OS rendezvous.
  *
  *  Inputs  :   None
  *  Outputs :   None
  */
 static irqreturn_t
-ia64_mca_rendez_int_handler(int rendez_irq, void *arg, struct pt_regs *regs)
+ia64_mca_rendez_int_handler(int rendez_irq, void *arg)
 {
 	unsigned long flags;
 	int cpu = smp_processor_id();
@@ -586,9 +782,8 @@ ia64_mca_rendez_int_handler(int rendez_irq, void *arg, struct pt_regs *regs)
 
 	/* Mask all interrupts */
 	local_irq_save(flags);
-	if (notify_die(DIE_MCA_RENDZVOUS_ENTER, "MCA", regs, (long)&nd, 0, 0)
-			== NOTIFY_STOP)
-		ia64_mca_spin(__FUNCTION__);
+
+	NOTIFY_MCA(DIE_MCA_RENDZVOUS_ENTER, get_irq_regs(), (long)&nd, 1);
 
 	ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_DONE;
 	/* Register with the SAL monarch that the slave has
@@ -596,18 +791,15 @@ ia64_mca_rendez_int_handler(int rendez_irq, void *arg, struct pt_regs *regs)
 	 */
 	ia64_sal_mc_rendez();
 
-	if (notify_die(DIE_MCA_RENDZVOUS_PROCESS, "MCA", regs, (long)&nd, 0, 0)
-			== NOTIFY_STOP)
-		ia64_mca_spin(__FUNCTION__);
+	NOTIFY_MCA(DIE_MCA_RENDZVOUS_PROCESS, get_irq_regs(), (long)&nd, 1);
 
 	/* Wait for the monarch cpu to exit. */
 	while (monarch_cpu != -1)
 	       cpu_relax();	/* spin until monarch leaves */
 
-	if (notify_die(DIE_MCA_RENDZVOUS_LEAVE, "MCA", regs, (long)&nd, 0, 0)
-			== NOTIFY_STOP)
-		ia64_mca_spin(__FUNCTION__);
+	NOTIFY_MCA(DIE_MCA_RENDZVOUS_LEAVE, get_irq_regs(), (long)&nd, 1);
 
+	ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
 	/* Enable all interrupts */
 	local_irq_restore(flags);
 	return IRQ_HANDLED;
@@ -624,12 +816,11 @@ ia64_mca_rendez_int_handler(int rendez_irq, void *arg, struct pt_regs *regs)
  *
  *  Inputs  :   wakeup_irq  (Wakeup-interrupt bit)
  *	arg		(Interrupt handler specific argument)
- *	ptregs		(Exception frame at the time of the interrupt)
  *  Outputs :   None
  *
  */
 static irqreturn_t
-ia64_mca_wakeup_int_handler(int wakeup_irq, void *arg, struct pt_regs *ptregs)
+ia64_mca_wakeup_int_handler(int wakeup_irq, void *arg)
 {
 	return IRQ_HANDLED;
 }
@@ -661,7 +852,7 @@ EXPORT_SYMBOL(ia64_unreg_MCA_extension);
 
 
 static inline void
-copy_reg(const u64 *fr, u64 fnat, u64 *tr, u64 *tnat)
+copy_reg(const u64 *fr, u64 fnat, unsigned long *tr, unsigned long *tnat)
 {
 	u64 fslot, tslot, nat;
 	*tr = *fr;
@@ -679,7 +870,7 @@ copy_reg(const u64 *fr, u64 fnat, u64 *tr, u64 *tnat)
  */
 
 static void
-ia64_mca_modify_comm(const task_t *previous_current)
+ia64_mca_modify_comm(const struct task_struct *previous_current)
 {
 	char *p, comm[sizeof(current->comm)];
 	if (previous_current->pid)
@@ -698,6 +889,65 @@ ia64_mca_modify_comm(const task_t *previous_current)
 	memcpy(current->comm, comm, sizeof(current->comm));
 }
 
+static void
+finish_pt_regs(struct pt_regs *regs, struct ia64_sal_os_state *sos,
+		unsigned long *nat)
+{
+	const pal_min_state_area_t *ms = sos->pal_min_state;
+	const u64 *bank;
+
+	/* If ipsr.ic then use pmsa_{iip,ipsr,ifs}, else use
+	 * pmsa_{xip,xpsr,xfs}
+	 */
+	if (ia64_psr(regs)->ic) {
+		regs->cr_iip = ms->pmsa_iip;
+		regs->cr_ipsr = ms->pmsa_ipsr;
+		regs->cr_ifs = ms->pmsa_ifs;
+	} else {
+		regs->cr_iip = ms->pmsa_xip;
+		regs->cr_ipsr = ms->pmsa_xpsr;
+		regs->cr_ifs = ms->pmsa_xfs;
+
+		sos->iip = ms->pmsa_iip;
+		sos->ipsr = ms->pmsa_ipsr;
+		sos->ifs = ms->pmsa_ifs;
+	}
+	regs->pr = ms->pmsa_pr;
+	regs->b0 = ms->pmsa_br0;
+	regs->ar_rsc = ms->pmsa_rsc;
+	copy_reg(&ms->pmsa_gr[1-1], ms->pmsa_nat_bits, &regs->r1, nat);
+	copy_reg(&ms->pmsa_gr[2-1], ms->pmsa_nat_bits, &regs->r2, nat);
+	copy_reg(&ms->pmsa_gr[3-1], ms->pmsa_nat_bits, &regs->r3, nat);
+	copy_reg(&ms->pmsa_gr[8-1], ms->pmsa_nat_bits, &regs->r8, nat);
+	copy_reg(&ms->pmsa_gr[9-1], ms->pmsa_nat_bits, &regs->r9, nat);
+	copy_reg(&ms->pmsa_gr[10-1], ms->pmsa_nat_bits, &regs->r10, nat);
+	copy_reg(&ms->pmsa_gr[11-1], ms->pmsa_nat_bits, &regs->r11, nat);
+	copy_reg(&ms->pmsa_gr[12-1], ms->pmsa_nat_bits, &regs->r12, nat);
+	copy_reg(&ms->pmsa_gr[13-1], ms->pmsa_nat_bits, &regs->r13, nat);
+	copy_reg(&ms->pmsa_gr[14-1], ms->pmsa_nat_bits, &regs->r14, nat);
+	copy_reg(&ms->pmsa_gr[15-1], ms->pmsa_nat_bits, &regs->r15, nat);
+	if (ia64_psr(regs)->bn)
+		bank = ms->pmsa_bank1_gr;
+	else
+		bank = ms->pmsa_bank0_gr;
+	copy_reg(&bank[16-16], ms->pmsa_nat_bits, &regs->r16, nat);
+	copy_reg(&bank[17-16], ms->pmsa_nat_bits, &regs->r17, nat);
+	copy_reg(&bank[18-16], ms->pmsa_nat_bits, &regs->r18, nat);
+	copy_reg(&bank[19-16], ms->pmsa_nat_bits, &regs->r19, nat);
+	copy_reg(&bank[20-16], ms->pmsa_nat_bits, &regs->r20, nat);
+	copy_reg(&bank[21-16], ms->pmsa_nat_bits, &regs->r21, nat);
+	copy_reg(&bank[22-16], ms->pmsa_nat_bits, &regs->r22, nat);
+	copy_reg(&bank[23-16], ms->pmsa_nat_bits, &regs->r23, nat);
+	copy_reg(&bank[24-16], ms->pmsa_nat_bits, &regs->r24, nat);
+	copy_reg(&bank[25-16], ms->pmsa_nat_bits, &regs->r25, nat);
+	copy_reg(&bank[26-16], ms->pmsa_nat_bits, &regs->r26, nat);
+	copy_reg(&bank[27-16], ms->pmsa_nat_bits, &regs->r27, nat);
+	copy_reg(&bank[28-16], ms->pmsa_nat_bits, &regs->r28, nat);
+	copy_reg(&bank[29-16], ms->pmsa_nat_bits, &regs->r29, nat);
+	copy_reg(&bank[30-16], ms->pmsa_nat_bits, &regs->r30, nat);
+	copy_reg(&bank[31-16], ms->pmsa_nat_bits, &regs->r31, nat);
+}
+
 /* On entry to this routine, we are running on the per cpu stack, see
  * mca_asm.h.  The original stack has not been touched by this event.  Some of
  * the original stack's registers will be in the RBS on this stack.  This stack
@@ -710,7 +960,7 @@ ia64_mca_modify_comm(const task_t *previous_current)
  * that we can do backtrace on the MCA/INIT handler code itself.
  */
 
-static task_t *
+static struct task_struct *
 ia64_mca_modify_original_stack(struct pt_regs *regs,
 		const struct switch_stack *sw,
 		struct ia64_sal_os_state *sos,
@@ -720,19 +970,18 @@ ia64_mca_modify_original_stack(struct pt_regs *regs,
 	ia64_va va;
 	extern char ia64_leave_kernel[];	/* Need asm address, not function descriptor */
 	const pal_min_state_area_t *ms = sos->pal_min_state;
-	task_t *previous_current;
+	struct task_struct *previous_current;
 	struct pt_regs *old_regs;
 	struct switch_stack *old_sw;
 	unsigned size = sizeof(struct pt_regs) +
 			sizeof(struct switch_stack) + 16;
-	u64 *old_bspstore, *old_bsp;
-	u64 *new_bspstore, *new_bsp;
-	u64 old_unat, old_rnat, new_rnat, nat;
+	unsigned long *old_bspstore, *old_bsp;
+	unsigned long *new_bspstore, *new_bsp;
+	unsigned long old_unat, old_rnat, new_rnat, nat;
 	u64 slots, loadrs = regs->loadrs;
 	u64 r12 = ms->pmsa_gr[12-1], r13 = ms->pmsa_gr[13-1];
 	u64 ar_bspstore = regs->ar_bspstore;
 	u64 ar_bsp = regs->ar_bspstore + (loadrs >> 16);
-	const u64 *bank;
 	const char *msg;
 	int cpu = smp_processor_id();
 
@@ -779,10 +1028,10 @@ ia64_mca_modify_original_stack(struct pt_regs *regs,
 	 * loadrs for the new stack and save it in the new pt_regs, where
 	 * ia64_old_stack() can get it.
 	 */
-	old_bspstore = (u64 *)ar_bspstore;
-	old_bsp = (u64 *)ar_bsp;
+	old_bspstore = (unsigned long *)ar_bspstore;
+	old_bsp = (unsigned long *)ar_bsp;
 	slots = ia64_rse_num_regs(old_bspstore, old_bsp);
-	new_bspstore = (u64 *)((u64)current + IA64_RBS_OFFSET);
+	new_bspstore = (unsigned long *)((u64)current + IA64_RBS_OFFSET);
 	new_bsp = ia64_rse_skip_regs(new_bspstore, slots);
 	regs->loadrs = (new_bsp - new_bspstore) * 8 << 16;
 
@@ -796,11 +1045,12 @@ ia64_mca_modify_original_stack(struct pt_regs *regs,
 		goto no_mod;
 	}
 
+	if (r13 != sos->prev_IA64_KR_CURRENT) {
+		msg = "inconsistent previous current and r13";
+		goto no_mod;
+	}
+
 	if (!mca_recover_range(ms->pmsa_iip)) {
-		if (r13 != sos->prev_IA64_KR_CURRENT) {
-			msg = "inconsistent previous current and r13";
-			goto no_mod;
-		}
 		if ((r12 - r13) >= KERNEL_STACK_SIZE) {
 			msg = "inconsistent r12 and r13";
 			goto no_mod;
@@ -834,54 +1084,9 @@ ia64_mca_modify_original_stack(struct pt_regs *regs,
 	p = (char *)r12 - sizeof(*regs);
 	old_regs = (struct pt_regs *)p;
 	memcpy(old_regs, regs, sizeof(*regs));
-	/* If ipsr.ic then use pmsa_{iip,ipsr,ifs}, else use
-	 * pmsa_{xip,xpsr,xfs}
-	 */
-	if (ia64_psr(regs)->ic) {
-		old_regs->cr_iip = ms->pmsa_iip;
-		old_regs->cr_ipsr = ms->pmsa_ipsr;
-		old_regs->cr_ifs = ms->pmsa_ifs;
-	} else {
-		old_regs->cr_iip = ms->pmsa_xip;
-		old_regs->cr_ipsr = ms->pmsa_xpsr;
-		old_regs->cr_ifs = ms->pmsa_xfs;
-	}
-	old_regs->pr = ms->pmsa_pr;
-	old_regs->b0 = ms->pmsa_br0;
 	old_regs->loadrs = loadrs;
-	old_regs->ar_rsc = ms->pmsa_rsc;
 	old_unat = old_regs->ar_unat;
-	copy_reg(&ms->pmsa_gr[1-1], ms->pmsa_nat_bits, &old_regs->r1, &old_unat);
-	copy_reg(&ms->pmsa_gr[2-1], ms->pmsa_nat_bits, &old_regs->r2, &old_unat);
-	copy_reg(&ms->pmsa_gr[3-1], ms->pmsa_nat_bits, &old_regs->r3, &old_unat);
-	copy_reg(&ms->pmsa_gr[8-1], ms->pmsa_nat_bits, &old_regs->r8, &old_unat);
-	copy_reg(&ms->pmsa_gr[9-1], ms->pmsa_nat_bits, &old_regs->r9, &old_unat);
-	copy_reg(&ms->pmsa_gr[10-1], ms->pmsa_nat_bits, &old_regs->r10, &old_unat);
-	copy_reg(&ms->pmsa_gr[11-1], ms->pmsa_nat_bits, &old_regs->r11, &old_unat);
-	copy_reg(&ms->pmsa_gr[12-1], ms->pmsa_nat_bits, &old_regs->r12, &old_unat);
-	copy_reg(&ms->pmsa_gr[13-1], ms->pmsa_nat_bits, &old_regs->r13, &old_unat);
-	copy_reg(&ms->pmsa_gr[14-1], ms->pmsa_nat_bits, &old_regs->r14, &old_unat);
-	copy_reg(&ms->pmsa_gr[15-1], ms->pmsa_nat_bits, &old_regs->r15, &old_unat);
-	if (ia64_psr(old_regs)->bn)
-		bank = ms->pmsa_bank1_gr;
-	else
-		bank = ms->pmsa_bank0_gr;
-	copy_reg(&bank[16-16], ms->pmsa_nat_bits, &old_regs->r16, &old_unat);
-	copy_reg(&bank[17-16], ms->pmsa_nat_bits, &old_regs->r17, &old_unat);
-	copy_reg(&bank[18-16], ms->pmsa_nat_bits, &old_regs->r18, &old_unat);
-	copy_reg(&bank[19-16], ms->pmsa_nat_bits, &old_regs->r19, &old_unat);
-	copy_reg(&bank[20-16], ms->pmsa_nat_bits, &old_regs->r20, &old_unat);
-	copy_reg(&bank[21-16], ms->pmsa_nat_bits, &old_regs->r21, &old_unat);
-	copy_reg(&bank[22-16], ms->pmsa_nat_bits, &old_regs->r22, &old_unat);
-	copy_reg(&bank[23-16], ms->pmsa_nat_bits, &old_regs->r23, &old_unat);
-	copy_reg(&bank[24-16], ms->pmsa_nat_bits, &old_regs->r24, &old_unat);
-	copy_reg(&bank[25-16], ms->pmsa_nat_bits, &old_regs->r25, &old_unat);
-	copy_reg(&bank[26-16], ms->pmsa_nat_bits, &old_regs->r26, &old_unat);
-	copy_reg(&bank[27-16], ms->pmsa_nat_bits, &old_regs->r27, &old_unat);
-	copy_reg(&bank[28-16], ms->pmsa_nat_bits, &old_regs->r28, &old_unat);
-	copy_reg(&bank[29-16], ms->pmsa_nat_bits, &old_regs->r29, &old_unat);
-	copy_reg(&bank[30-16], ms->pmsa_nat_bits, &old_regs->r30, &old_unat);
-	copy_reg(&bank[31-16], ms->pmsa_nat_bits, &old_regs->r31, &old_unat);
+	finish_pt_regs(old_regs, sos, &old_unat);
 
 	/* Next stack a struct switch_stack.  mca_asm.S built a partial
 	 * switch_stack, copy it and fill in the blanks using pt_regs and
@@ -949,8 +1154,10 @@ ia64_mca_modify_original_stack(struct pt_regs *regs,
 	return previous_current;
 
 no_mod:
-	printk(KERN_INFO "cpu %d, %s %s, original stack not modified\n",
+	mprintk(KERN_INFO "cpu %d, %s %s, original stack not modified\n",
 			smp_processor_id(), type, msg);
+	old_unat = regs->ar_unat;
+	finish_pt_regs(regs, sos, &old_unat);
 	return previous_current;
 }
 
@@ -965,45 +1172,92 @@ no_mod:
 static void
 ia64_wait_for_slaves(int monarch, const char *type)
 {
-	int c, wait = 0, missing = 0;
-	for_each_online_cpu(c) {
-		if (c == monarch)
-			continue;
-		if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) {
-			udelay(1000);		/* short wait first */
-			wait = 1;
-			break;
-		}
-	}
-	if (!wait)
-		goto all_in;
-	for_each_online_cpu(c) {
-		if (c == monarch)
-			continue;
-		if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) {
-			udelay(5*1000000);	/* wait 5 seconds for slaves (arbitrary) */
-			if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE)
-				missing = 1;
-			break;
+	int c, i , wait;
+
+	/*
+	 * wait 5 seconds total for slaves (arbitrary)
+	 */
+	for (i = 0; i < 5000; i++) {
+		wait = 0;
+		for_each_online_cpu(c) {
+			if (c == monarch)
+				continue;
+			if (ia64_mc_info.imi_rendez_checkin[c]
+					== IA64_MCA_RENDEZ_CHECKIN_NOTDONE) {
+				udelay(1000);		/* short wait */
+				wait = 1;
+				break;
+			}
 		}
+		if (!wait)
+			goto all_in;
 	}
-	if (!missing)
-		goto all_in;
-	printk(KERN_INFO "OS %s slave did not rendezvous on cpu", type);
+
+	/*
+	 * Maybe slave(s) dead. Print buffered messages immediately.
+	 */
+	ia64_mlogbuf_finish(0);
+	mprintk(KERN_INFO "OS %s slave did not rendezvous on cpu", type);
 	for_each_online_cpu(c) {
 		if (c == monarch)
 			continue;
 		if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE)
-			printk(" %d", c);
+			mprintk(" %d", c);
 	}
-	printk("\n");
+	mprintk("\n");
 	return;
 
 all_in:
-	printk(KERN_INFO "All OS %s slaves have reached rendezvous\n", type);
+	mprintk(KERN_INFO "All OS %s slaves have reached rendezvous\n", type);
 	return;
 }
 
+/*  mca_insert_tr
+ *
+ *  Switch rid when TR reload and needed!
+ *  iord: 1: itr, 2: itr;
+ *
+*/
+static void mca_insert_tr(u64 iord)
+{
+
+	int i;
+	u64 old_rr;
+	struct ia64_tr_entry *p;
+	unsigned long psr;
+	int cpu = smp_processor_id();
+
+	if (!ia64_idtrs[cpu])
+		return;
+
+	psr = ia64_clear_ic();
+	for (i = IA64_TR_ALLOC_BASE; i < IA64_TR_ALLOC_MAX; i++) {
+		p = ia64_idtrs[cpu] + (iord - 1) * IA64_TR_ALLOC_MAX;
+		if (p->pte & 0x1) {
+			old_rr = ia64_get_rr(p->ifa);
+			if (old_rr != p->rr) {
+				ia64_set_rr(p->ifa, p->rr);
+				ia64_srlz_d();
+			}
+			ia64_ptr(iord, p->ifa, p->itir >> 2);
+			ia64_srlz_i();
+			if (iord & 0x1) {
+				ia64_itr(0x1, i, p->ifa, p->pte, p->itir >> 2);
+				ia64_srlz_i();
+			}
+			if (iord & 0x2) {
+				ia64_itr(0x2, i, p->ifa, p->pte, p->itir >> 2);
+				ia64_srlz_i();
+			}
+			if (old_rr != p->rr) {
+				ia64_set_rr(p->ifa, old_rr);
+				ia64_srlz_d();
+			}
+		}
+	}
+	ia64_set_psr(psr);
+}
+
 /*
  * ia64_mca_handler
  *
@@ -1016,48 +1270,63 @@ all_in:
  *	further MCA logging is enabled by clearing logs.
  *	Monarch also has the duty of sending wakeup-IPIs to pull the
  *	slave processors out of rendezvous spinloop.
+ *
+ *	If multiple processors call into OS_MCA, the first will become
+ *	the monarch.  Subsequent cpus will be recorded in the mca_cpu
+ *	bitmask.  After the first monarch has processed its MCA, it
+ *	will wake up the next cpu in the mca_cpu bitmask and then go
+ *	into the rendezvous loop.  When all processors have serviced
+ *	their MCA, the last monarch frees up the rest of the processors.
  */
 void
 ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
 		 struct ia64_sal_os_state *sos)
 {
-	pal_processor_state_info_t *psp = (pal_processor_state_info_t *)
-		&sos->proc_state_param;
 	int recover, cpu = smp_processor_id();
-	task_t *previous_current;
+	struct task_struct *previous_current;
 	struct ia64_mca_notify_die nd =
-		{ .sos = sos, .monarch_cpu = &monarch_cpu };
+		{ .sos = sos, .monarch_cpu = &monarch_cpu, .data = &recover };
+	static atomic_t mca_count;
+	static cpumask_t mca_cpu;
 
-	oops_in_progress = 1;	/* FIXME: make printk NMI/MCA/INIT safe */
-	console_loglevel = 15;	/* make sure printks make it to console */
-	printk(KERN_INFO "Entered OS MCA handler. PSP=%lx cpu=%d monarch=%ld\n",
-		sos->proc_state_param, cpu, sos->monarch);
+	if (atomic_add_return(1, &mca_count) == 1) {
+		monarch_cpu = cpu;
+		sos->monarch = 1;
+	} else {
+		cpu_set(cpu, mca_cpu);
+		sos->monarch = 0;
+	}
+	mprintk(KERN_INFO "Entered OS MCA handler. PSP=%lx cpu=%d "
+		"monarch=%ld\n", sos->proc_state_param, cpu, sos->monarch);
 
 	previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA");
-	monarch_cpu = cpu;
-	if (notify_die(DIE_MCA_MONARCH_ENTER, "MCA", regs, (long)&nd, 0, 0)
-			== NOTIFY_STOP)
-		ia64_mca_spin(__FUNCTION__);
-	ia64_wait_for_slaves(cpu, "MCA");
-
-	/* Wakeup all the processors which are spinning in the rendezvous loop.
-	 * They will leave SAL, then spin in the OS with interrupts disabled
-	 * until this monarch cpu leaves the MCA handler.  That gets control
-	 * back to the OS so we can backtrace the other cpus, backtrace when
-	 * spinning in SAL does not work.
-	 */
-	ia64_mca_wakeup_all();
-	if (notify_die(DIE_MCA_MONARCH_PROCESS, "MCA", regs, (long)&nd, 0, 0)
-			== NOTIFY_STOP)
-		ia64_mca_spin(__FUNCTION__);
+
+	NOTIFY_MCA(DIE_MCA_MONARCH_ENTER, regs, (long)&nd, 1);
+
+	ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_CONCURRENT_MCA;
+	if (sos->monarch) {
+		ia64_wait_for_slaves(cpu, "MCA");
+
+		/* Wakeup all the processors which are spinning in the
+		 * rendezvous loop.  They will leave SAL, then spin in the OS
+		 * with interrupts disabled until this monarch cpu leaves the
+		 * MCA handler.  That gets control back to the OS so we can
+		 * backtrace the other cpus, backtrace when spinning in SAL
+		 * does not work.
+		 */
+		ia64_mca_wakeup_all();
+	} else {
+		while (cpu_isset(cpu, mca_cpu))
+			cpu_relax();	/* spin until monarch wakes us */
+	}
+
+	NOTIFY_MCA(DIE_MCA_MONARCH_PROCESS, regs, (long)&nd, 1);
 
 	/* Get the MCA error record and log it */
 	ia64_mca_log_sal_error_record(SAL_INFO_TYPE_MCA);
 
-	/* TLB error is only exist in this SAL error record */
-	recover = (psp->tc && !(psp->cc || psp->bc || psp->rc || psp->uc))
-	/* other error recovery */
-	   || (ia64_mca_ucmc_extension
+	/* MCA error recovery */
+	recover = (ia64_mca_ucmc_extension
 		&& ia64_mca_ucmc_extension(
 			IA64_LOG_CURR_BUFFER(SAL_INFO_TYPE_MCA),
 			sos));
@@ -1067,17 +1336,44 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
 		rh->severity = sal_log_severity_corrected;
 		ia64_sal_clear_state_info(SAL_INFO_TYPE_MCA);
 		sos->os_status = IA64_MCA_CORRECTED;
+	} else {
+		/* Dump buffered message to console */
+		ia64_mlogbuf_finish(1);
 	}
-	if (notify_die(DIE_MCA_MONARCH_LEAVE, "MCA", regs, (long)&nd, 0, recover)
-			== NOTIFY_STOP)
-		ia64_mca_spin(__FUNCTION__);
 
+	if (__get_cpu_var(ia64_mca_tr_reload)) {
+		mca_insert_tr(0x1); /*Reload dynamic itrs*/
+		mca_insert_tr(0x2); /*Reload dynamic itrs*/
+	}
+
+	NOTIFY_MCA(DIE_MCA_MONARCH_LEAVE, regs, (long)&nd, 1);
+
+	if (atomic_dec_return(&mca_count) > 0) {
+		int i;
+
+		/* wake up the next monarch cpu,
+		 * and put this cpu in the rendez loop.
+		 */
+		for_each_online_cpu(i) {
+			if (cpu_isset(i, mca_cpu)) {
+				monarch_cpu = i;
+				cpu_clear(i, mca_cpu);	/* wake next cpu */
+				while (monarch_cpu != -1)
+					cpu_relax();	/* spin until last cpu leaves */
+				set_curr_task(cpu, previous_current);
+				ia64_mc_info.imi_rendez_checkin[cpu]
+						= IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
+				return;
+			}
+		}
+	}
 	set_curr_task(cpu, previous_current);
-	monarch_cpu = -1;
+	ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
+	monarch_cpu = -1;	/* This frees the slaves and previous monarchs */
 }
 
-static DECLARE_WORK(cmc_disable_work, ia64_mca_cmc_vector_disable_keventd, NULL);
-static DECLARE_WORK(cmc_enable_work, ia64_mca_cmc_vector_enable_keventd, NULL);
+static DECLARE_WORK(cmc_disable_work, ia64_mca_cmc_vector_disable_keventd);
+static DECLARE_WORK(cmc_enable_work, ia64_mca_cmc_vector_enable_keventd);
 
 /*
  * ia64_mca_cmc_int_handler
@@ -1089,27 +1385,23 @@ static DECLARE_WORK(cmc_enable_work, ia64_mca_cmc_vector_enable_keventd, NULL);
  * Inputs
  *      interrupt number
  *      client data arg ptr
- *      saved registers ptr
  *
  * Outputs
  *	None
  */
 static irqreturn_t
-ia64_mca_cmc_int_handler(int cmc_irq, void *arg, struct pt_regs *ptregs)
+ia64_mca_cmc_int_handler(int cmc_irq, void *arg)
 {
 	static unsigned long	cmc_history[CMC_HISTORY_LENGTH];
 	static int		index;
 	static DEFINE_SPINLOCK(cmc_history_lock);
 
 	IA64_MCA_DEBUG("%s: received interrupt vector = %#x on CPU %d\n",
-		       __FUNCTION__, cmc_irq, smp_processor_id());
+		       __func__, cmc_irq, smp_processor_id());
 
 	/* SAL spec states this should run w/ interrupts enabled */
 	local_irq_enable();
 
-	/* Get the CMC error record and log it */
-	ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CMC);
-
 	spin_lock(&cmc_history_lock);
 	if (!cmc_polling_enabled) {
 		int i, count = 1; /* we know 1 happened now */
@@ -1142,7 +1434,7 @@ ia64_mca_cmc_int_handler(int cmc_irq, void *arg, struct pt_regs *ptregs)
 			mod_timer(&cmc_poll_timer, jiffies + CMC_POLL_INTERVAL);
 
 			/* lock already released, get out now */
-			return IRQ_HANDLED;
+			goto out;
 		} else {
 			cmc_history[index++] = now;
 			if (index == CMC_HISTORY_LENGTH)
@@ -1150,6 +1442,12 @@ ia64_mca_cmc_int_handler(int cmc_irq, void *arg, struct pt_regs *ptregs)
 		}
 	}
 	spin_unlock(&cmc_history_lock);
+out:
+	/* Get the CMC error record and log it */
+	ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CMC);
+
+	local_irq_disable();
+
 	return IRQ_HANDLED;
 }
 
@@ -1163,12 +1461,11 @@ ia64_mca_cmc_int_handler(int cmc_irq, void *arg, struct pt_regs *ptregs)
  * Inputs
  *	interrupt number
  *	client data arg ptr
- *	saved registers ptr
  * Outputs
  * 	handled
  */
 static irqreturn_t
-ia64_mca_cmc_int_caller(int cmc_irq, void *arg, struct pt_regs *ptregs)
+ia64_mca_cmc_int_caller(int cmc_irq, void *arg)
 {
 	static int start_count = -1;
 	unsigned int cpuid;
@@ -1179,11 +1476,11 @@ ia64_mca_cmc_int_caller(int cmc_irq, void *arg, struct pt_regs *ptregs)
 	if (start_count == -1)
 		start_count = IA64_LOG_COUNT(SAL_INFO_TYPE_CMC);
 
-	ia64_mca_cmc_int_handler(cmc_irq, arg, ptregs);
+	ia64_mca_cmc_int_handler(cmc_irq, arg);
 
-	for (++cpuid ; cpuid < NR_CPUS && !cpu_online(cpuid) ; cpuid++);
+	cpuid = cpumask_next(cpuid+1, cpu_online_mask);
 
-	if (cpuid < NR_CPUS) {
+	if (cpuid < nr_cpu_ids) {
 		platform_send_ipi(cpuid, IA64_CMCP_VECTOR, IA64_IPI_DM_INT, 0);
 	} else {
 		/* If no log record, switch out of polling mode */
@@ -1217,7 +1514,8 @@ static void
 ia64_mca_cmc_poll (unsigned long dummy)
 {
 	/* Trigger a CMC interrupt cascade  */
-	platform_send_ipi(first_cpu(cpu_online_map), IA64_CMCP_VECTOR, IA64_IPI_DM_INT, 0);
+	platform_send_ipi(cpumask_first(cpu_online_mask), IA64_CMCP_VECTOR,
+							IA64_IPI_DM_INT, 0);
 }
 
 /*
@@ -1230,14 +1528,13 @@ ia64_mca_cmc_poll (unsigned long dummy)
  * Inputs
  *	interrupt number
  *	client data arg ptr
- *	saved registers ptr
  * Outputs
  * 	handled
  */
 #ifdef CONFIG_ACPI
 
 static irqreturn_t
-ia64_mca_cpe_int_caller(int cpe_irq, void *arg, struct pt_regs *ptregs)
+ia64_mca_cpe_int_caller(int cpe_irq, void *arg)
 {
 	static int start_count = -1;
 	static int poll_time = MIN_CPE_POLL_INTERVAL;
@@ -1249,9 +1546,9 @@ ia64_mca_cpe_int_caller(int cpe_irq, void *arg, struct pt_regs *ptregs)
 	if (start_count == -1)
 		start_count = IA64_LOG_COUNT(SAL_INFO_TYPE_CPE);
 
-	ia64_mca_cpe_int_handler(cpe_irq, arg, ptregs);
+	ia64_mca_cpe_int_handler(cpe_irq, arg);
 
-	for (++cpuid ; cpuid < NR_CPUS && !cpu_online(cpuid) ; cpuid++);
+	cpuid = cpumask_next(cpuid+1, cpu_online_mask);
 
 	if (cpuid < NR_CPUS) {
 		platform_send_ipi(cpuid, IA64_CPEP_VECTOR, IA64_IPI_DM_INT, 0);
@@ -1294,7 +1591,8 @@ static void
 ia64_mca_cpe_poll (unsigned long dummy)
 {
 	/* Trigger a CPE interrupt cascade  */
-	platform_send_ipi(first_cpu(cpu_online_map), IA64_CPEP_VECTOR, IA64_IPI_DM_INT, 0);
+	platform_send_ipi(cpumask_first(cpu_online_mask), IA64_CPEP_VECTOR,
+							IA64_IPI_DM_INT, 0);
 }
 
 #endif /* CONFIG_ACPI */
@@ -1306,6 +1604,19 @@ default_monarch_init_process(struct notifier_block *self, unsigned long val, voi
 	struct task_struct *g, *t;
 	if (val != DIE_INIT_MONARCH_PROCESS)
 		return NOTIFY_DONE;
+#ifdef CONFIG_KEXEC
+	if (atomic_read(&kdump_in_progress))
+		return NOTIFY_DONE;
+#endif
+
+	/*
+	 * FIXME: mlogbuf will brim over with INIT stack dumps.
+	 * To enable show_stack from INIT, we use oops_in_progress which should
+	 * be used in real oops. This would cause something wrong after INIT.
+	 */
+	BREAK_LOGLEVEL(console_loglevel);
+	ia64_mlogbuf_dump_from_init();
+
 	printk(KERN_ERR "Processes interrupted by INIT -");
 	for_each_online_cpu(c) {
 		struct ia64_sal_os_state *s;
@@ -1327,6 +1638,8 @@ default_monarch_init_process(struct notifier_block *self, unsigned long val, voi
 		} while_each_thread (g, t);
 		read_unlock(&tasklist_lock);
 	}
+	/* FIXME: This will not restore zapped printk locks. */
+	RESTORE_LOGLEVEL(console_loglevel);
 	return NOTIFY_DONE;
 }
 
@@ -1353,17 +1666,14 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw,
 {
 	static atomic_t slaves;
 	static atomic_t monarchs;
-	task_t *previous_current;
+	struct task_struct *previous_current;
 	int cpu = smp_processor_id();
 	struct ia64_mca_notify_die nd =
 		{ .sos = sos, .monarch_cpu = &monarch_cpu };
 
-	oops_in_progress = 1;	/* FIXME: make printk NMI/MCA/INIT safe */
-	console_loglevel = 15;	/* make sure printks make it to console */
+	NOTIFY_INIT(DIE_INIT_ENTER, regs, (long)&nd, 0);
 
-	(void) notify_die(DIE_INIT_ENTER, "INIT", regs, (long)&nd, 0, 0);
-
-	printk(KERN_INFO "Entered OS INIT handler. PSP=%lx cpu=%d monarch=%ld\n",
+	mprintk(KERN_INFO "Entered OS INIT handler. PSP=%lx cpu=%d monarch=%ld\n",
 		sos->proc_state_param, cpu, sos->monarch);
 	salinfo_log_wakeup(SAL_INFO_TYPE_INIT, NULL, 0, 0);
 
@@ -1376,8 +1686,8 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw,
 	 * fix their proms and get their customers updated.
 	 */
 	if (!sos->monarch && atomic_add_return(1, &slaves) == num_online_cpus()) {
-		printk(KERN_WARNING "%s: Promoting cpu %d to monarch.\n",
-		       __FUNCTION__, cpu);
+		mprintk(KERN_WARNING "%s: Promoting cpu %d to monarch.\n",
+		        __func__, cpu);
 		atomic_dec(&slaves);
 		sos->monarch = 1;
 	}
@@ -1388,28 +1698,37 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw,
 	 * fix their proms and get their customers updated.
 	 */
 	if (sos->monarch && atomic_add_return(1, &monarchs) > 1) {
-		printk(KERN_WARNING "%s: Demoting cpu %d to slave.\n",
-			       __FUNCTION__, cpu);
+		mprintk(KERN_WARNING "%s: Demoting cpu %d to slave.\n",
+			       __func__, cpu);
 		atomic_dec(&monarchs);
 		sos->monarch = 0;
 	}
 
 	if (!sos->monarch) {
 		ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_INIT;
+
+#ifdef CONFIG_KEXEC
+		while (monarch_cpu == -1 && !atomic_read(&kdump_in_progress))
+			udelay(1000);
+#else
 		while (monarch_cpu == -1)
-		       cpu_relax();	/* spin until monarch enters */
-		if (notify_die(DIE_INIT_SLAVE_ENTER, "INIT", regs, (long)&nd, 0, 0)
-				== NOTIFY_STOP)
-			ia64_mca_spin(__FUNCTION__);
-		if (notify_die(DIE_INIT_SLAVE_PROCESS, "INIT", regs, (long)&nd, 0, 0)
-				== NOTIFY_STOP)
-			ia64_mca_spin(__FUNCTION__);
+			cpu_relax();	/* spin until monarch enters */
+#endif
+
+		NOTIFY_INIT(DIE_INIT_SLAVE_ENTER, regs, (long)&nd, 1);
+		NOTIFY_INIT(DIE_INIT_SLAVE_PROCESS, regs, (long)&nd, 1);
+
+#ifdef CONFIG_KEXEC
+		while (monarch_cpu != -1 && !atomic_read(&kdump_in_progress))
+			udelay(1000);
+#else
 		while (monarch_cpu != -1)
-		       cpu_relax();	/* spin until monarch leaves */
-		if (notify_die(DIE_INIT_SLAVE_LEAVE, "INIT", regs, (long)&nd, 0, 0)
-				== NOTIFY_STOP)
-			ia64_mca_spin(__FUNCTION__);
-		printk("Slave on cpu %d returning to normal service.\n", cpu);
+			cpu_relax();	/* spin until monarch leaves */
+#endif
+
+		NOTIFY_INIT(DIE_INIT_SLAVE_LEAVE, regs, (long)&nd, 1);
+
+		mprintk("Slave on cpu %d returning to normal service.\n", cpu);
 		set_curr_task(cpu, previous_current);
 		ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
 		atomic_dec(&slaves);
@@ -1417,9 +1736,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw,
 	}
 
 	monarch_cpu = cpu;
-	if (notify_die(DIE_INIT_MONARCH_ENTER, "INIT", regs, (long)&nd, 0, 0)
-			== NOTIFY_STOP)
-		ia64_mca_spin(__FUNCTION__);
+	NOTIFY_INIT(DIE_INIT_MONARCH_ENTER, regs, (long)&nd, 1);
 
 	/*
 	 * Wait for a bit.  On some machines (e.g., HP's zx2000 and zx6000, INIT can be
@@ -1427,20 +1744,17 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw,
 	 * same serial line, the user will need some time to switch out of the BMC before
 	 * the dump begins.
 	 */
-	printk("Delaying for 5 seconds...\n");
+	mprintk("Delaying for 5 seconds...\n");
 	udelay(5*1000000);
 	ia64_wait_for_slaves(cpu, "INIT");
 	/* If nobody intercepts DIE_INIT_MONARCH_PROCESS then we drop through
 	 * to default_monarch_init_process() above and just print all the
 	 * tasks.
 	 */
-	if (notify_die(DIE_INIT_MONARCH_PROCESS, "INIT", regs, (long)&nd, 0, 0)
-			== NOTIFY_STOP)
-		ia64_mca_spin(__FUNCTION__);
-	if (notify_die(DIE_INIT_MONARCH_LEAVE, "INIT", regs, (long)&nd, 0, 0)
-			== NOTIFY_STOP)
-		ia64_mca_spin(__FUNCTION__);
-	printk("\nINIT dump complete.  Monarch on cpu %d returning to normal service.\n", cpu);
+	NOTIFY_INIT(DIE_INIT_MONARCH_PROCESS, regs, (long)&nd, 1);
+	NOTIFY_INIT(DIE_INIT_MONARCH_LEAVE, regs, (long)&nd, 1);
+
+	mprintk("\nINIT dump complete.  Monarch on cpu %d returning to normal service.\n", cpu);
 	atomic_dec(&monarchs);
 	set_curr_task(cpu, previous_current);
 	monarch_cpu = -1;
@@ -1458,38 +1772,32 @@ __setup("disable_cpe_poll", ia64_mca_disable_cpe_polling);
 
 static struct irqaction cmci_irqaction = {
 	.handler =	ia64_mca_cmc_int_handler,
-	.flags =	SA_INTERRUPT,
 	.name =		"cmc_hndlr"
 };
 
 static struct irqaction cmcp_irqaction = {
 	.handler =	ia64_mca_cmc_int_caller,
-	.flags =	SA_INTERRUPT,
 	.name =		"cmc_poll"
 };
 
 static struct irqaction mca_rdzv_irqaction = {
 	.handler =	ia64_mca_rendez_int_handler,
-	.flags =	SA_INTERRUPT,
 	.name =		"mca_rdzv"
 };
 
 static struct irqaction mca_wkup_irqaction = {
 	.handler =	ia64_mca_wakeup_int_handler,
-	.flags =	SA_INTERRUPT,
 	.name =		"mca_wkup"
 };
 
 #ifdef CONFIG_ACPI
 static struct irqaction mca_cpe_irqaction = {
 	.handler =	ia64_mca_cpe_int_handler,
-	.flags =	SA_INTERRUPT,
 	.name =		"cpe_hndlr"
 };
 
 static struct irqaction mca_cpep_irqaction = {
 	.handler =	ia64_mca_cpe_int_caller,
-	.flags =	SA_INTERRUPT,
 	.name =		"cpe_poll"
 };
 #endif /* CONFIG_ACPI */
@@ -1500,7 +1808,7 @@ static struct irqaction mca_cpep_irqaction = {
  * format most of the fields.
  */
 
-static void __cpuinit
+static void
 format_mca_init_stack(void *mca_data, unsigned long offset,
 		const char *type, int cpu)
 {
@@ -1512,7 +1820,7 @@ format_mca_init_stack(void *mca_data, unsigned long offset,
 	ti->preempt_count = 1;
 	ti->task = p;
 	ti->cpu = cpu;
-	p->thread_info = ti;
+	p->stack = ti;
 	p->state = TASK_UNINTERRUPTIBLE;
 	cpu_set(cpu, p->cpus_allowed);
 	INIT_LIST_HEAD(&p->tasks);
@@ -1522,43 +1830,45 @@ format_mca_init_stack(void *mca_data, unsigned long offset,
 	strncpy(p->comm, type, sizeof(p->comm)-1);
 }
 
-/* Do per-CPU MCA-related initialization.  */
+/* Caller prevents this from being called after init */
+static void * __init_refok mca_bootmem(void)
+{
+	return __alloc_bootmem(sizeof(struct ia64_mca_cpu),
+	                    KERNEL_STACK_SIZE, 0);
+}
 
-void __cpuinit
+/* Do per-CPU MCA-related initialization.  */
+void
 ia64_mca_cpu_init(void *cpu_data)
 {
 	void *pal_vaddr;
+	void *data;
+	long sz = sizeof(struct ia64_mca_cpu);
+	int cpu = smp_processor_id();
 	static int first_time = 1;
 
-	if (first_time) {
-		void *mca_data;
-		int cpu;
-
-		first_time = 0;
-		mca_data = alloc_bootmem(sizeof(struct ia64_mca_cpu)
-					 * NR_CPUS + KERNEL_STACK_SIZE);
-		mca_data = (void *)(((unsigned long)mca_data +
-					KERNEL_STACK_SIZE - 1) &
-				(-KERNEL_STACK_SIZE));
-		for (cpu = 0; cpu < NR_CPUS; cpu++) {
-			format_mca_init_stack(mca_data,
-					offsetof(struct ia64_mca_cpu, mca_stack),
-					"MCA", cpu);
-			format_mca_init_stack(mca_data,
-					offsetof(struct ia64_mca_cpu, init_stack),
-					"INIT", cpu);
-			__per_cpu_mca[cpu] = __pa(mca_data);
-			mca_data += sizeof(struct ia64_mca_cpu);
-		}
-	}
-
 	/*
-	 * The MCA info structure was allocated earlier and its
-	 * physical address saved in __per_cpu_mca[cpu].  Copy that
-	 * address * to ia64_mca_data so we can access it as a per-CPU
-	 * variable.
+	 * Structure will already be allocated if cpu has been online,
+	 * then offlined.
 	 */
-	__get_cpu_var(ia64_mca_data) = __per_cpu_mca[smp_processor_id()];
+	if (__per_cpu_mca[cpu]) {
+		data = __va(__per_cpu_mca[cpu]);
+	} else {
+		if (first_time) {
+			data = mca_bootmem();
+			first_time = 0;
+		} else
+			data = (void *)__get_free_pages(GFP_KERNEL,
+							get_order(sz));
+		if (!data)
+			panic("Could not allocate MCA memory for cpu %d\n",
+					cpu);
+	}
+	format_mca_init_stack(data, offsetof(struct ia64_mca_cpu, mca_stack),
+		"MCA", cpu);
+	format_mca_init_stack(data, offsetof(struct ia64_mca_cpu, init_stack),
+		"INIT", cpu);
+	__get_cpu_var(ia64_mca_data) = __per_cpu_mca[cpu] = __pa(data);
 
 	/*
 	 * Stash away a copy of the PTE needed to map the per-CPU page.
@@ -1580,6 +1890,36 @@ ia64_mca_cpu_init(void *cpu_data)
 							      PAGE_KERNEL));
 }
 
+static void ia64_mca_cmc_vector_adjust(void *dummy)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	if (!cmc_polling_enabled)
+		ia64_mca_cmc_vector_enable(NULL);
+	local_irq_restore(flags);
+}
+
+static int mca_cpu_callback(struct notifier_block *nfb,
+				      unsigned long action,
+				      void *hcpu)
+{
+	int hotcpu = (unsigned long) hcpu;
+
+	switch (action) {
+	case CPU_ONLINE:
+	case CPU_ONLINE_FROZEN:
+		smp_call_function_single(hotcpu, ia64_mca_cmc_vector_adjust,
+					 NULL, 0);
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block mca_cpu_notifier = {
+	.notifier_call = mca_cpu_callback
+};
+
 /*
  * ia64_mca_init
  *
@@ -1607,15 +1947,15 @@ ia64_mca_init(void)
 	ia64_fptr_t *init_hldlr_ptr_slave = (ia64_fptr_t *)ia64_os_init_dispatch_slave;
 	ia64_fptr_t *mca_hldlr_ptr = (ia64_fptr_t *)ia64_os_mca_dispatch;
 	int i;
-	s64 rc;
+	long rc;
 	struct ia64_sal_retval isrv;
-	u64 timeout = IA64_MCA_RENDEZ_TIMEOUT;	/* platform specific */
+	unsigned long timeout = IA64_MCA_RENDEZ_TIMEOUT; /* platform specific */
 	static struct notifier_block default_init_monarch_nb = {
 		.notifier_call = default_monarch_init_process,
 		.priority = 0/* we need to notified last */
 	};
 
-	IA64_MCA_DEBUG("%s: begin\n", __FUNCTION__);
+	IA64_MCA_DEBUG("%s: begin\n", __func__);
 
 	/* Clear the Rendez checkin flag for all cpus */
 	for(i = 0 ; i < NR_CPUS; i++)
@@ -1639,7 +1979,7 @@ ia64_mca_init(void)
 			printk(KERN_INFO "Increasing MCA rendezvous timeout from "
 				"%ld to %ld milliseconds\n", timeout, isrv.v0);
 			timeout = isrv.v0;
-			(void) notify_die(DIE_MCA_NEW_TIMEOUT, "MCA", NULL, timeout, 0, 0);
+			NOTIFY_MCA(DIE_MCA_NEW_TIMEOUT, NULL, timeout, 0);
 			continue;
 		}
 		printk(KERN_ERR "Failed to register rendezvous interrupt "
@@ -1659,7 +1999,7 @@ ia64_mca_init(void)
 		return;
 	}
 
-	IA64_MCA_DEBUG("%s: registered MCA rendezvous spinloop and wakeup mech.\n", __FUNCTION__);
+	IA64_MCA_DEBUG("%s: registered MCA rendezvous spinloop and wakeup mech.\n", __func__);
 
 	ia64_mc_info.imi_mca_handler        = ia64_tpa(mca_hldlr_ptr->fp);
 	/*
@@ -1680,7 +2020,7 @@ ia64_mca_init(void)
 		return;
 	}
 
-	IA64_MCA_DEBUG("%s: registered OS MCA handler with SAL at 0x%lx, gp = 0x%lx\n", __FUNCTION__,
+	IA64_MCA_DEBUG("%s: registered OS MCA handler with SAL at 0x%lx, gp = 0x%lx\n", __func__,
 		       ia64_mc_info.imi_mca_handler, ia64_tpa(mca_hldlr_ptr->gp));
 
 	/*
@@ -1692,7 +2032,7 @@ ia64_mca_init(void)
 	ia64_mc_info.imi_slave_init_handler		= ia64_tpa(init_hldlr_ptr_slave->fp);
 	ia64_mc_info.imi_slave_init_handler_size	= 0;
 
-	IA64_MCA_DEBUG("%s: OS INIT handler at %lx\n", __FUNCTION__,
+	IA64_MCA_DEBUG("%s: OS INIT handler at %lx\n", __func__,
 		       ia64_mc_info.imi_monarch_init_handler);
 
 	/* Register the os init handler with SAL */
@@ -1713,8 +2053,31 @@ ia64_mca_init(void)
 		return;
 	}
 
-	IA64_MCA_DEBUG("%s: registered OS INIT handler with SAL\n", __FUNCTION__);
+	IA64_MCA_DEBUG("%s: registered OS INIT handler with SAL\n", __func__);
 
+	/* Initialize the areas set aside by the OS to buffer the
+	 * platform/processor error states for MCA/INIT/CMC
+	 * handling.
+	 */
+	ia64_log_init(SAL_INFO_TYPE_MCA);
+	ia64_log_init(SAL_INFO_TYPE_INIT);
+	ia64_log_init(SAL_INFO_TYPE_CMC);
+	ia64_log_init(SAL_INFO_TYPE_CPE);
+
+	mca_init = 1;
+	printk(KERN_INFO "MCA related initialization done\n");
+}
+
+
+/*
+ * These pieces cannot be done in ia64_mca_init() because it is called before
+ * early_irq_init() which would wipe out our percpu irq registrations. But we
+ * cannot leave them until ia64_mca_late_init() because by then all the other
+ * processors have been brought online and have set their own CMC vectors to
+ * point at a non-existant action. Called from arch_early_irq_init().
+ */
+void __init ia64_mca_irq_init(void)
+{
 	/*
 	 *  Configure the CMCI/P vector and handler. Interrupts for CMC are
 	 *  per-processor, so AP CMC interrupts are setup in smp_callin() (smpboot.c).
@@ -1733,18 +2096,6 @@ ia64_mca_init(void)
 	/* Setup the CPEI/P handler */
 	register_percpu_irq(IA64_CPEP_VECTOR, &mca_cpep_irqaction);
 #endif
-
-	/* Initialize the areas set aside by the OS to buffer the
-	 * platform/processor error states for MCA/INIT/CMC
-	 * handling.
-	 */
-	ia64_log_init(SAL_INFO_TYPE_MCA);
-	ia64_log_init(SAL_INFO_TYPE_INIT);
-	ia64_log_init(SAL_INFO_TYPE_CMC);
-	ia64_log_init(SAL_INFO_TYPE_CPE);
-
-	mca_init = 1;
-	printk(KERN_INFO "MCA related initialization done\n");
 }
 
 /*
@@ -1763,6 +2114,8 @@ ia64_mca_late_init(void)
 	if (!mca_init)
 		return 0;
 
+	register_hotcpu_notifier(&mca_cpu_notifier);
+
 	/* Setup the CMCI/P vector and handler */
 	init_timer(&cmc_poll_timer);
 	cmc_poll_timer.function = ia64_mca_cmc_poll;
@@ -1771,7 +2124,7 @@ ia64_mca_late_init(void)
 	cmc_polling_enabled = 0;
 	schedule_work(&cmc_enable_work);
 
-	IA64_MCA_DEBUG("%s: CMCI/P setup and enabled.\n", __FUNCTION__);
+	IA64_MCA_DEBUG("%s: CMCI/P setup and enabled.\n", __func__);
 
 #ifdef CONFIG_ACPI
 	/* Setup the CPEI/P vector and handler */
@@ -1780,27 +2133,29 @@ ia64_mca_late_init(void)
 	cpe_poll_timer.function = ia64_mca_cpe_poll;
 
 	{
-		irq_desc_t *desc;
 		unsigned int irq;
 
 		if (cpe_vector >= 0) {
 			/* If platform supports CPEI, enable the irq. */
-			cpe_poll_enabled = 0;
-			for (irq = 0; irq < NR_IRQS; ++irq)
-				if (irq_to_vector(irq) == cpe_vector) {
-					desc = irq_descp(irq);
-					desc->status |= IRQ_PER_CPU;
-					setup_irq(irq, &mca_cpe_irqaction);
-					ia64_cpe_irq = irq;
-				}
-			ia64_mca_register_cpev(cpe_vector);
-			IA64_MCA_DEBUG("%s: CPEI/P setup and enabled.\n", __FUNCTION__);
-		} else {
-			/* If platform doesn't support CPEI, get the timer going. */
-			if (cpe_poll_enabled) {
-				ia64_mca_cpe_poll(0UL);
-				IA64_MCA_DEBUG("%s: CPEP setup and enabled.\n", __FUNCTION__);
+			irq = local_vector_to_irq(cpe_vector);
+			if (irq > 0) {
+				cpe_poll_enabled = 0;
+				irq_set_status_flags(irq, IRQ_PER_CPU);
+				setup_irq(irq, &mca_cpe_irqaction);
+				ia64_cpe_irq = irq;
+				ia64_mca_register_cpev(cpe_vector);
+				IA64_MCA_DEBUG("%s: CPEI/P setup and enabled.\n",
+					__func__);
+				return 0;
 			}
+			printk(KERN_ERR "%s: Failed to find irq for CPE "
+					"interrupt handler, vector %d\n",
+					__func__, cpe_vector);
+		}
+		/* If platform doesn't support CPEI, get the timer going. */
+		if (cpe_poll_enabled) {
+			ia64_mca_cpe_poll(0UL);
+			IA64_MCA_DEBUG("%s: CPEP setup and enabled.\n", __func__);
 		}
 	}
 #endif
diff --git a/arch/ia64/kernel/mca_asm.S b/arch/ia64/kernel/mca_asm.S
index 6dff024cd62..d5bdf9de36b 100644
--- a/arch/ia64/kernel/mca_asm.S
+++ b/arch/ia64/kernel/mca_asm.S
@@ -1,25 +1,28 @@
-//
-// assembly portion of the IA64 MCA handling
-//
-// Mods by cfleck to integrate into kernel build
-// 00/03/15 davidm Added various stop bits to get a clean compile
-//
-// 00/03/29 cfleck Added code to save INIT handoff state in pt_regs format, switch to temp
-//		   kstack, switch modes, jump to C INIT handler
-//
-// 02/01/04 J.Hall <jenna.s.hall@intel.com>
-//		   Before entering virtual mode code:
-//		   1. Check for TLB CPU error
-//		   2. Restore current thread pointer to kr6
-//		   3. Move stack ptr 16 bytes to conform to C calling convention
-//
-// 04/11/12 Russ Anderson <rja@sgi.com>
-//		   Added per cpu MCA/INIT stack save areas.
-//
-// 12/08/05 Keith Owens <kaos@sgi.com>
-//		   Use per cpu MCA/INIT stacks for all data.
-//
-#include <linux/config.h>
+/*
+ * File:	mca_asm.S
+ * Purpose:	assembly portion of the IA64 MCA handling
+ *
+ * Mods by cfleck to integrate into kernel build
+ *
+ * 2000-03-15 David Mosberger-Tang <davidm@hpl.hp.com>
+ *		Added various stop bits to get a clean compile
+ *
+ * 2000-03-29 Chuck Fleckenstein <cfleck@co.intel.com>
+ *		Added code to save INIT handoff state in pt_regs format,
+ *		switch to temp kstack, switch modes, jump to C INIT handler
+ *
+ * 2002-01-04 J.Hall <jenna.s.hall@intel.com>
+ *		Before entering virtual mode code:
+ *		 1. Check for TLB CPU error
+ *		 2. Restore current thread pointer to kr6
+ *		 3. Move stack ptr 16 bytes to conform to C calling convention
+ *
+ * 2004-11-12 Russ Anderson <rja@sgi.com>
+ *		Added per cpu MCA/INIT stack save areas.
+ *
+ * 2005-12-08 Keith Owens <kaos@sgi.com>
+ *		Use per cpu MCA/INIT stacks for all data.
+ */
 #include <linux/threads.h>
 
 #include <asm/asmmacro.h>
@@ -37,6 +40,7 @@
 
 	.global ia64_do_tlb_purge
 	.global ia64_os_mca_dispatch
+	.global ia64_os_init_on_kdump
 	.global ia64_os_init_dispatch_monarch
 	.global ia64_os_init_dispatch_slave
 
@@ -55,7 +59,7 @@
 ia64_do_tlb_purge:
 #define O(member)	IA64_CPUINFO_##member##_OFFSET
 
-	GET_THIS_PADDR(r2, cpu_info)	// load phys addr of cpu_info into r2
+	GET_THIS_PADDR(r2, ia64_cpu_info) // load phys addr of cpu_info into r2
 	;;
 	addl r17=O(PTCE_STRIDE),r2
 	addl r2=O(PTCE_BASE),r2
@@ -102,14 +106,6 @@ ia64_do_tlb_purge:
 	;;
 	srlz.d
 	;;
-	// 2. Purge DTR for PERCPU data.
-	movl r16=PERCPU_ADDR
-	mov r18=PERCPU_PAGE_SHIFT<<2
-	;;
-	ptr.d r16,r18
-	;;
-	srlz.d
-	;;
 	// 3. Purge ITR for PAL code.
 	GET_THIS_PADDR(r2, ia64_mca_pal_base)
 	;;
@@ -142,14 +138,6 @@ ia64_do_tlb_purge:
 //StartMain////////////////////////////////////////////////////////////////////
 
 ia64_os_mca_dispatch:
-	// Serialize all MCA processing
-	mov	r3=1;;
-	LOAD_PHYSICAL(p0,r2,ia64_mca_serialize);;
-ia64_os_mca_spin:
-	xchg4	r4=[r2],r3;;
-	cmp.ne	p6,p0=r4,r0
-(p6)	br ia64_os_mca_spin
-
 	mov r3=IA64_MCA_CPU_MCA_STACK_OFFSET	// use the MCA stack
 	LOAD_PHYSICAL(p0,r2,1f)			// return address
 	mov r19=1				// All MCA events are treated as monarch (for now)
@@ -159,7 +147,7 @@ ia64_os_mca_spin:
 	GET_IA64_MCA_DATA(r2)
 	// Using MCA stack, struct ia64_sal_os_state, variable proc_state_param
 	;;
-	add r3=IA64_MCA_CPU_MCA_STACK_OFFSET+MCA_SOS_OFFSET+IA64_SAL_OS_STATE_PROC_STATE_PARAM_OFFSET, r2
+	add r3=IA64_MCA_CPU_MCA_STACK_OFFSET+MCA_SOS_OFFSET+SOS(PROC_STATE_PARAM), r2
 	;;
 	ld8 r18=[r3]				// Get processor state parameter on existing PALE_CHECK.
 	;;
@@ -197,22 +185,6 @@ ia64_reload_tr:
 	srlz.i
 	srlz.d
 	;;
-	// 2. Reload DTR register for PERCPU data.
-	GET_THIS_PADDR(r2, ia64_mca_per_cpu_pte)
-	;;
-	movl r16=PERCPU_ADDR		// vaddr
-	movl r18=PERCPU_PAGE_SHIFT<<2
-	;;
-	mov cr.itir=r18
-	mov cr.ifa=r16
-	;;
-	ld8 r18=[r2]			// load per-CPU PTE
-	mov r16=IA64_TR_PERCPU_DATA;
-	;;
-	itr.d dtr[r16]=r18
-	;;
-	srlz.d
-	;;
 	// 3. Reload ITR for PAL code.
 	GET_THIS_PADDR(r2, ia64_mca_pal_pte)
 	;;
@@ -248,8 +220,13 @@ ia64_reload_tr:
 	mov r20=IA64_TR_CURRENT_STACK
 	;;
 	itr.d dtr[r20]=r16
+	GET_THIS_PADDR(r2, ia64_mca_tr_reload)
+	mov r18 = 1
 	;;
 	srlz.d
+	;;
+	st8 [r2] =r18
+	;;
 
 done_tlb_purge_and_reload:
 
@@ -316,10 +293,6 @@ END(ia64_os_mca_virtual_begin)
 
 	mov		b0=r12			// SAL_CHECK return address
 
-	// release lock
-	LOAD_PHYSICAL(p0,r3,ia64_mca_serialize);;
-	st4.rel		[r3]=r0
-
 	br		b0
 
 //EndMain//////////////////////////////////////////////////////////////////////
@@ -327,6 +300,25 @@ END(ia64_os_mca_virtual_begin)
 //StartMain////////////////////////////////////////////////////////////////////
 
 //
+// NOP init handler for kdump.  In panic situation, we may receive INIT
+// while kernel transition.  Since we initialize registers on leave from
+// current kernel, no longer monarch/slave handlers of current kernel in
+// virtual mode are called safely.
+// We can unregister these init handlers from SAL, however then the INIT
+// will result in warmboot by SAL and we cannot retrieve the crashdump.
+// Therefore register this NOP function to SAL, to prevent entering virtual
+// mode and resulting warmboot by SAL.
+//
+ia64_os_init_on_kdump:
+	mov		r8=r0		// IA64_INIT_RESUME
+	mov             r9=r10		// SAL_GP
+	mov		r22=r17		// *minstate
+	;;
+	mov		r10=r0		// return to same context
+	mov		b0=r12		// SAL_CHECK return address
+	br		b0
+
+//
 // SAL to OS entry point for INIT on all processors.  This has been defined for
 // registration purposes with SAL as a part of ia64_mca_init.  Monarch and
 // slave INIT have identical processing, except for the value of the
@@ -479,9 +471,11 @@ ia64_state_save:
 	st8 [temp2]=r11,16	// rv_rc
 	mov r11=cr.iipa
 	;;
-	st8 [temp1]=r18,16	// proc_state_param
-	st8 [temp2]=r19,16	// monarch
+	st8 [temp1]=r18		// proc_state_param
+	st8 [temp2]=r19		// monarch
 	mov r6=IA64_KR(CURRENT)
+	add temp1=SOS(SAL_RA), regs
+	add temp2=SOS(SAL_GP), regs
 	;;
 	st8 [temp1]=r12,16	// sal_ra
 	st8 [temp2]=r10,16	// sal_gp
@@ -503,12 +497,14 @@ ia64_state_save:
 	st8 [temp2]=r11,16	// cr.iipa
 	mov r12=cr.iim
 	;;
-	st8 [temp1]=r12,16	// cr.iim
+	st8 [temp1]=r12		// cr.iim
 (p1)	mov r12=IA64_MCA_COLD_BOOT
 (p2)	mov r12=IA64_INIT_WARM_BOOT
 	mov r6=cr.iha
+	add temp1=SOS(OS_STATUS), regs
 	;;
-	st8 [temp2]=r6,16	// cr.iha
+	st8 [temp2]=r6		// cr.iha
+	add temp2=SOS(CONTEXT), regs
 	st8 [temp1]=r12		// os_status, default is cold boot
 	mov r6=IA64_MCA_SAME_CONTEXT
 	;;
@@ -820,8 +816,8 @@ ia64_state_restore:
 	// Restore the SAL to OS state. The previous code left regs at pt_regs.
 	add regs=MCA_SOS_OFFSET-MCA_PT_REGS_OFFSET, regs
 	;;
-	add temp1=IA64_SAL_OS_STATE_COMMON_OFFSET, regs
-	add temp2=IA64_SAL_OS_STATE_COMMON_OFFSET+8, regs
+	add temp1=SOS(SAL_RA), regs
+	add temp2=SOS(SAL_GP), regs
 	;;
 	ld8 r12=[temp1],16	// sal_ra
 	ld8 r9=[temp2],16	// sal_gp
@@ -842,8 +838,10 @@ ia64_state_restore:
 	;;
 	mov cr.itir=temp3
 	mov cr.iipa=temp4
-	ld8 temp3=[temp1],16	// cr.iim
-	ld8 temp4=[temp2],16	// cr.iha
+	ld8 temp3=[temp1]	// cr.iim
+	ld8 temp4=[temp2]		// cr.iha
+	add temp1=SOS(OS_STATUS), regs
+	add temp2=SOS(CONTEXT), regs
 	;;
 	mov cr.iim=temp3
 	mov cr.iha=temp4
@@ -916,7 +914,7 @@ ia64_state_restore:
 
 ia64_new_stack:
 	add regs=MCA_PT_REGS_OFFSET, r3
-	add temp2=MCA_SOS_OFFSET+IA64_SAL_OS_STATE_PAL_MIN_STATE_OFFSET, r3
+	add temp2=MCA_SOS_OFFSET+SOS(PAL_MIN_STATE), r3
 	mov b0=r2			// save return address
 	GET_IA64_MCA_DATA(temp1)
 	invala
@@ -1020,18 +1018,13 @@ ia64_old_stack:
 
 ia64_set_kernel_registers:
 	add temp3=MCA_SP_OFFSET, r3
-	add temp4=MCA_SOS_OFFSET+IA64_SAL_OS_STATE_OS_GP_OFFSET, r3
 	mov b0=r2		// save return address
 	GET_IA64_MCA_DATA(temp1)
 	;;
-	add temp4=temp4, temp1	// &struct ia64_sal_os_state.os_gp
 	add r12=temp1, temp3	// kernel stack pointer on MCA/INIT stack
 	add r13=temp1, r3	// set current to start of MCA/INIT stack
 	add r20=temp1, r3	// physical start of MCA/INIT stack
 	;;
-	ld8 r1=[temp4]		// OS GP from SAL OS state
-	;;
-	DATA_PA_TO_VA(r1,temp1)
 	DATA_PA_TO_VA(r12,temp2)
 	DATA_PA_TO_VA(r13,temp3)
 	;;
@@ -1062,6 +1055,10 @@ ia64_set_kernel_registers:
 	mov cr.itir=r18
 	mov cr.ifa=r13
 	mov r20=IA64_TR_CURRENT_STACK
+
+	movl r17=FPSR_DEFAULT
+	;;
+	mov.m ar.fpsr=r17			// set ar.fpsr to kernel default value
 	;;
 	itr.d dtr[r20]=r21
 	;;
@@ -1096,3 +1093,30 @@ GLOBAL_ENTRY(ia64_get_rnat)
 	mov ar.rsc=3
 	br.ret.sptk.many rp
 END(ia64_get_rnat)
+
+
+// void ia64_set_psr_mc(void)
+//
+// Set psr.mc bit to mask MCA/INIT.
+GLOBAL_ENTRY(ia64_set_psr_mc)
+	rsm psr.i | psr.ic		// disable interrupts
+	;;
+	srlz.d
+	;;
+	mov r14 = psr			// get psr{36:35,31:0}
+	movl r15 = 1f
+	;;
+	dep r14 = -1, r14, PSR_MC, 1	// set psr.mc
+	;;
+	dep r14 = -1, r14, PSR_IC, 1	// set psr.ic
+	;;
+	dep r14 = -1, r14, PSR_BN, 1	// keep bank1 in use
+	;;
+	mov cr.ipsr = r14
+	mov cr.ifs = r0
+	mov cr.iip = r15
+	;;
+	rfi
+1:
+	br.ret.sptk.many rp
+END(ia64_set_psr_mc)
diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c
index ca6666b51cc..94f8bf777af 100644
--- a/arch/ia64/kernel/mca_drv.c
+++ b/arch/ia64/kernel/mca_drv.c
@@ -3,19 +3,17 @@
  * Purpose:	Generic MCA handling layer
  *
  * Copyright (C) 2004 FUJITSU LIMITED
- * Copyright (C) Hidetoshi Seto (seto.hidetoshi@jp.fujitsu.com)
+ * Copyright (C) 2004 Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
  * Copyright (C) 2005 Silicon Graphics, Inc
  * Copyright (C) 2005 Keith Owens <kaos@sgi.com>
  * Copyright (C) 2006 Russ Anderson <rja@sgi.com>
  */
-#include <linux/config.h>
 #include <linux/types.h>
 #include <linux/init.h>
 #include <linux/sched.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/kallsyms.h>
-#include <linux/smp_lock.h>
 #include <linux/bootmem.h>
 #include <linux/acpi.h>
 #include <linux/timer.h>
@@ -24,12 +22,12 @@
 #include <linux/smp.h>
 #include <linux/workqueue.h>
 #include <linux/mm.h>
+#include <linux/slab.h>
 
 #include <asm/delay.h>
 #include <asm/machvec.h>
 #include <asm/page.h>
 #include <asm/ptrace.h>
-#include <asm/system.h>
 #include <asm/sal.h>
 #include <asm/mca.h>
 
@@ -80,14 +78,30 @@ static int
 fatal_mca(const char *fmt, ...)
 {
 	va_list args;
+	char buf[256];
 
 	va_start(args, fmt);
-	vprintk(fmt, args);
+	vsnprintf(buf, sizeof(buf), fmt, args);
 	va_end(args);
+	ia64_mca_printk(KERN_ALERT "MCA: %s\n", buf);
 
 	return MCA_NOT_RECOVERED;
 }
 
+static int
+mca_recovered(const char *fmt, ...)
+{
+	va_list args;
+	char buf[256];
+
+	va_start(args, fmt);
+	vsnprintf(buf, sizeof(buf), fmt, args);
+	va_end(args);
+	ia64_mca_printk(KERN_INFO "MCA: %s\n", buf);
+
+	return MCA_RECOVERED;
+}
+
 /**
  * mca_page_isolate - isolate a poisoned page in order not to use it later
  * @paddr:	poisoned memory location
@@ -141,9 +155,11 @@ mca_page_isolate(unsigned long paddr)
 void
 mca_handler_bh(unsigned long paddr, void *iip, unsigned long ipsr)
 {
+	ia64_mlogbuf_dump();
 	printk(KERN_ERR "OS_MCA: process [cpu %d, pid: %d, uid: %d, "
 		"iip: %p, psr: 0x%lx,paddr: 0x%lx](%s) encounters MCA.\n",
-		raw_smp_processor_id(), current->pid, current->uid,
+	       raw_smp_processor_id(), current->pid,
+		from_kuid(&init_user_ns, current_uid()),
 		iip, ipsr, paddr, current->comm);
 
 	spin_lock(&mca_bh_lock);
@@ -333,7 +349,7 @@ init_record_index_pools(void)
 
 	/* - 3 - */
 	slidx_pool.max_idx = (rec_max_size/sect_min_size) * 2 + 1;
-	slidx_pool.buffer = (slidx_list_t *)
+	slidx_pool.buffer =
 		kmalloc(slidx_pool.max_idx * sizeof(slidx_list_t), GFP_KERNEL);
 
 	return slidx_pool.buffer ? 0 : -ENOMEM;
@@ -419,6 +435,50 @@ is_mca_global(peidx_table_t *peidx, pal_bus_check_info_t *pbci,
 }
 
 /**
+ * get_target_identifier - Get the valid Cache or Bus check target identifier.
+ * @peidx:	pointer of index of processor error section
+ *
+ * Return value:
+ *	target address on Success / 0 on Failure
+ */
+static u64
+get_target_identifier(peidx_table_t *peidx)
+{
+	u64 target_address = 0;
+	sal_log_mod_error_info_t *smei;
+	pal_cache_check_info_t *pcci;
+	int i, level = 9;
+
+	/*
+	 * Look through the cache checks for a valid target identifier
+	 * If more than one valid target identifier, return the one
+	 * with the lowest cache level.
+	 */
+	for (i = 0; i < peidx_cache_check_num(peidx); i++) {
+		smei = (sal_log_mod_error_info_t *)peidx_cache_check(peidx, i);
+		if (smei->valid.target_identifier && smei->target_identifier) {
+			pcci = (pal_cache_check_info_t *)&(smei->check_info);
+			if (!target_address || (pcci->level < level)) {
+				target_address = smei->target_identifier;
+				level = pcci->level;
+				continue;
+			}
+		}
+	}
+	if (target_address)
+		return target_address;
+
+	/*
+	 * Look at the bus check for a valid target identifier
+	 */
+	smei = peidx_bus_check(peidx, 0);
+	if (smei && smei->valid.target_identifier)
+		return smei->target_identifier;
+
+	return 0;
+}
+
+/**
  * recover_from_read_error - Try to recover the errors which type are "read"s.
  * @slidx:	pointer of index of SAL error record
  * @peidx:	pointer of index of processor error section
@@ -434,14 +494,15 @@ recover_from_read_error(slidx_table_t *slidx,
 			peidx_table_t *peidx, pal_bus_check_info_t *pbci,
 			struct ia64_sal_os_state *sos)
 {
-	sal_log_mod_error_info_t *smei;
+	u64 target_identifier;
 	pal_min_state_area_t *pmsa;
 	struct ia64_psr *psr1, *psr2;
 	ia64_fptr_t *mca_hdlr_bh = (ia64_fptr_t*)mca_handler_bhhook;
 
 	/* Is target address valid? */
-	if (!pbci->tv)
-		return fatal_mca(KERN_ALERT "MCA: target address not valid\n");
+	target_identifier = get_target_identifier(peidx);
+	if (!target_identifier)
+		return fatal_mca("target address not valid");
 
 	/*
 	 * cpu read or memory-mapped io read
@@ -459,7 +520,7 @@ recover_from_read_error(slidx_table_t *slidx,
 
 	/* Is minstate valid? */
 	if (!peidx_bottom(peidx) || !(peidx_bottom(peidx)->valid.minstate))
-		return fatal_mca(KERN_ALERT "MCA: minstate not valid\n");
+		return fatal_mca("minstate not valid");
 	psr1 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_ipsr);
 	psr2 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_xpsr);
 
@@ -471,35 +532,32 @@ recover_from_read_error(slidx_table_t *slidx,
 	pmsa = sos->pal_min_state;
 	if (psr1->cpl != 0 ||
 	   ((psr2->cpl != 0) && mca_recover_range(pmsa->pmsa_iip))) {
-		smei = peidx_bus_check(peidx, 0);
-		if (smei->valid.target_identifier) {
-			/*
-			 *  setup for resume to bottom half of MCA,
-			 * "mca_handler_bhhook"
-			 */
-			/* pass to bhhook as argument (gr8, ...) */
-			pmsa->pmsa_gr[8-1] = smei->target_identifier;
-			pmsa->pmsa_gr[9-1] = pmsa->pmsa_iip;
-			pmsa->pmsa_gr[10-1] = pmsa->pmsa_ipsr;
-			/* set interrupted return address (but no use) */
-			pmsa->pmsa_br0 = pmsa->pmsa_iip;
-			/* change resume address to bottom half */
-			pmsa->pmsa_iip = mca_hdlr_bh->fp;
-			pmsa->pmsa_gr[1-1] = mca_hdlr_bh->gp;
-			/* set cpl with kernel mode */
-			psr2 = (struct ia64_psr *)&pmsa->pmsa_ipsr;
-			psr2->cpl = 0;
-			psr2->ri  = 0;
-			psr2->bn  = 1;
-			psr2->i  = 0;
-
-			return MCA_RECOVERED;
-		}
-
+		/*
+		 *  setup for resume to bottom half of MCA,
+		 * "mca_handler_bhhook"
+		 */
+		/* pass to bhhook as argument (gr8, ...) */
+		pmsa->pmsa_gr[8-1] = target_identifier;
+		pmsa->pmsa_gr[9-1] = pmsa->pmsa_iip;
+		pmsa->pmsa_gr[10-1] = pmsa->pmsa_ipsr;
+		/* set interrupted return address (but no use) */
+		pmsa->pmsa_br0 = pmsa->pmsa_iip;
+		/* change resume address to bottom half */
+		pmsa->pmsa_iip = mca_hdlr_bh->fp;
+		pmsa->pmsa_gr[1-1] = mca_hdlr_bh->gp;
+		/* set cpl with kernel mode */
+		psr2 = (struct ia64_psr *)&pmsa->pmsa_ipsr;
+		psr2->cpl = 0;
+		psr2->ri  = 0;
+		psr2->bn  = 1;
+		psr2->i  = 0;
+
+		return mca_recovered("user memory corruption. "
+				"kill affected process - recovered.");
 	}
 
-	return fatal_mca(KERN_ALERT "MCA: kernel context not recovered,"
-			  " iip 0x%lx\n", pmsa->pmsa_iip);
+	return fatal_mca("kernel context not recovered, iip 0x%lx\n",
+			 pmsa->pmsa_iip);
 }
 
 /**
@@ -544,11 +602,40 @@ recover_from_platform_error(slidx_table_t *slidx, peidx_table_t *peidx,
 		default:
 			break;
 		}
+	} else if (psp->cc && !psp->bc) {	/* Cache error */
+		status = recover_from_read_error(slidx, peidx, pbci, sos);
 	}
 
 	return status;
 }
 
+/*
+ * recover_from_tlb_check
+ * @peidx:	pointer of index of processor error section
+ *
+ * Return value:
+ *	1 on Success / 0 on Failure
+ */
+static int
+recover_from_tlb_check(peidx_table_t *peidx)
+{
+	sal_log_mod_error_info_t *smei;
+	pal_tlb_check_info_t *ptci;
+
+	smei = (sal_log_mod_error_info_t *)peidx_tlb_check(peidx, 0);
+	ptci = (pal_tlb_check_info_t *)&(smei->check_info);
+
+	/*
+	 * Look for signature of a duplicate TLB DTC entry, which is
+	 * a SW bug and always fatal.
+	 */
+	if (ptci->op == PAL_TLB_CHECK_OP_PURGE
+	    && !(ptci->itr || ptci->dtc || ptci->itc))
+		return fatal_mca("Duplicate TLB entry");
+
+	return mca_recovered("TLB check recovered");
+}
+
 /**
  * recover_from_processor_error
  * @platform:	whether there are some platform error section or not
@@ -560,13 +647,6 @@ recover_from_platform_error(slidx_table_t *slidx, peidx_table_t *peidx,
  * Return value:
  *	1 on Success / 0 on Failure
  */
-/*
- *  Later we try to recover when below all conditions are satisfied.
- *   1. Only one processor error section is exist.
- *   2. BUS_CHECK is exist and the others are not exist.(Except TLB_CHECK)
- *   3. The entry of BUS_CHECK_INFO is 1.
- *   4. "External bus error" flag is set and the others are not set.
- */
 
 static int
 recover_from_processor_error(int platform, slidx_table_t *slidx,
@@ -585,51 +665,52 @@ recover_from_processor_error(int platform, slidx_table_t *slidx,
 	 * The machine check is corrected.
 	 */
 	if (psp->cm == 1)
-		return MCA_RECOVERED;
+		return mca_recovered("machine check is already corrected.");
 
 	/*
 	 * The error was not contained.  Software must be reset.
 	 */
 	if (psp->us || psp->ci == 0)
-		return fatal_mca(KERN_ALERT "MCA: error not contained\n");
+		return fatal_mca("error not contained");
+
+	/*
+	 * Look for recoverable TLB check
+	 */
+	if (psp->tc && !(psp->cc || psp->bc || psp->rc || psp->uc))
+		return recover_from_tlb_check(peidx);
 
 	/*
 	 * The cache check and bus check bits have four possible states
 	 *   cc bc
-	 *    0  0	Weird record, not recovered
-	 *    1  0	Cache error, not recovered
-	 *    0  1	I/O error, attempt recovery
 	 *    1  1	Memory error, attempt recovery
+	 *    1  0	Cache error, attempt recovery
+	 *    0  1	I/O error, attempt recovery
+	 *    0  0	Other error type, not recovered
 	 */
-	if (psp->bc == 0 || pbci == NULL)
-		return fatal_mca(KERN_ALERT "MCA: No bus check\n");
+	if (psp->cc == 0 && (psp->bc == 0 || pbci == NULL))
+		return fatal_mca("No cache or bus check");
 
 	/*
-	 * Sorry, we cannot handle so many.
+	 * Cannot handle more than one bus check.
 	 */
 	if (peidx_bus_check_num(peidx) > 1)
-		return fatal_mca(KERN_ALERT "MCA: Too many bus checks\n");
-	/*
-	 * Well, here is only one bus error.
-	 */
+		return fatal_mca("Too many bus checks");
+
 	if (pbci->ib)
-		return fatal_mca(KERN_ALERT "MCA: Internal Bus error\n");
-	if (pbci->cc)
-		return fatal_mca(KERN_ALERT "MCA: Cache-cache error\n");
+		return fatal_mca("Internal Bus error");
 	if (pbci->eb && pbci->bsi > 0)
-		return fatal_mca(KERN_ALERT "MCA: External bus check fatal status\n");
+		return fatal_mca("External bus check fatal status");
 
 	/*
-	 * This is a local MCA and estimated as recoverble external bus error.
-	 * (e.g. a load from poisoned memory)
-	 * This means "there are some platform errors".
+	 * This is a local MCA and estimated as a recoverable error.
 	 */
 	if (platform)
 		return recover_from_platform_error(slidx, peidx, pbci, sos);
+
 	/*
 	 * On account of strange SAL error record, we cannot recover.
 	 */
-	return fatal_mca(KERN_ALERT "MCA: Strange SAL record\n");
+	return fatal_mca("Strange SAL record");
 }
 
 /**
@@ -658,10 +739,10 @@ mca_try_to_recover(void *rec, struct ia64_sal_os_state *sos)
 
 	 /* Now, OS can recover when there is one processor error section */
 	if (n_proc_err > 1)
-		return fatal_mca(KERN_ALERT "MCA: Too Many Errors\n");
+		return fatal_mca("Too Many Errors");
 	else if (n_proc_err == 0)
-		/* Weird SAL record ... We need not to recover */
-		return fatal_mca(KERN_ALERT "MCA: Weird SAL record\n");
+		/* Weird SAL record ... We can't do anything */
+		return fatal_mca("Weird SAL record");
 
 	/* Make index of processor error section */
 	mca_make_peidx((sal_log_processor_info_t*)
@@ -672,7 +753,7 @@ mca_try_to_recover(void *rec, struct ia64_sal_os_state *sos)
 
 	/* Check whether MCA is global or not */
 	if (is_mca_global(&peidx, &pbci, sos))
-		return fatal_mca(KERN_ALERT "MCA: global MCA\n");
+		return fatal_mca("global MCA");
 	
 	/* Try to recover a processor error */
 	return recover_from_processor_error(platform_err, &slidx, &peidx,
diff --git a/arch/ia64/kernel/mca_drv.h b/arch/ia64/kernel/mca_drv.h
index 31a2e52bb16..53b8ecb5b4b 100644
--- a/arch/ia64/kernel/mca_drv.h
+++ b/arch/ia64/kernel/mca_drv.h
@@ -3,7 +3,7 @@
  * Purpose:	Define helpers for Generic MCA handling
  *
  * Copyright (C) 2004 FUJITSU LIMITED
- * Copyright (C) Hidetoshi Seto (seto.hidetoshi@jp.fujitsu.com)
+ * Copyright (C) 2004 Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
  */
 /*
  * Processor error section:
@@ -118,3 +118,5 @@ struct mca_table_entry {
 
 extern const struct mca_table_entry *search_mca_tables (unsigned long addr);
 extern int mca_recover_range(unsigned long);
+extern void ia64_mlogbuf_dump(void);
+
diff --git a/arch/ia64/kernel/mca_drv_asm.S b/arch/ia64/kernel/mca_drv_asm.S
index e6a580d354b..767ac2c20d1 100644
--- a/arch/ia64/kernel/mca_drv_asm.S
+++ b/arch/ia64/kernel/mca_drv_asm.S
@@ -3,9 +3,8 @@
  * Purpose:     Assembly portion of Generic MCA handling
  *
  * Copyright (C) 2004 FUJITSU LIMITED
- * Copyright (C) Hidetoshi Seto (seto.hidetoshi@jp.fujitsu.com)
+ * Copyright (C) 2004 Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
  */
-#include <linux/config.h>
 #include <linux/threads.h>
 
 #include <asm/asmmacro.h>
@@ -41,7 +40,11 @@ GLOBAL_ENTRY(mca_handler_bhhook)
 	mov	b6=loc1
 	;;
 	mov	loc1=rp
-	ssm	psr.i | psr.ic
+	ssm	psr.ic
+	;;
+	srlz.i
+	;;
+	ssm	psr.i
 	br.call.sptk.many rp=b6		// does not return ...
 	;;
 	mov	ar.pfs=loc0
diff --git a/arch/ia64/kernel/minstate.h b/arch/ia64/kernel/minstate.h
index 85ed54179af..cc82a7d744c 100644
--- a/arch/ia64/kernel/minstate.h
+++ b/arch/ia64/kernel/minstate.h
@@ -1,8 +1,23 @@
-#include <linux/config.h>
 
 #include <asm/cache.h>
 
 #include "entry.h"
+#include "paravirt_inst.h"
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+/* read ar.itc in advance, and use it before leaving bank 0 */
+#define ACCOUNT_GET_STAMP				\
+(pUStk) mov.m r20=ar.itc;
+#define ACCOUNT_SYS_ENTER				\
+(pUStk) br.call.spnt rp=account_sys_enter		\
+	;;
+#else
+#define ACCOUNT_GET_STAMP
+#define ACCOUNT_SYS_ENTER
+#endif
+
+.section ".data..patch.rse", "a"
+.previous
 
 /*
  * DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves
@@ -29,16 +44,16 @@
  * Note that psr.ic is NOT turned on by this macro.  This is so that
  * we can pass interruption state as arguments to a handler.
  */
-#define DO_SAVE_MIN(COVER,SAVE_IFS,EXTRA)							\
+#define IA64_NATIVE_DO_SAVE_MIN(__COVER,SAVE_IFS,EXTRA,WORKAROUND)				\
 	mov r16=IA64_KR(CURRENT);	/* M */							\
 	mov r27=ar.rsc;			/* M */							\
 	mov r20=r1;			/* A */							\
 	mov r25=ar.unat;		/* M */							\
-	mov r29=cr.ipsr;		/* M */							\
+	MOV_FROM_IPSR(p0,r29);		/* M */							\
 	mov r26=ar.pfs;			/* I */							\
-	mov r28=cr.iip;			/* M */							\
+	MOV_FROM_IIP(r28);			/* M */						\
 	mov r21=ar.fpsr;		/* M */							\
-	COVER;				/* B;; (or nothing) */					\
+	__COVER;				/* B;; (or nothing) */				\
 	;;											\
 	adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16;						\
 	;;											\
@@ -76,6 +91,7 @@
 	tbit.nz p15,p0=r29,IA64_PSR_I_BIT;							\
 	mov r29=b0										\
 	;;											\
+	WORKAROUND;										\
 	adds r16=PT(R8),r1;	/* initialize first base pointer */				\
 	adds r17=PT(R9),r1;	/* initialize second base pointer */				\
 (pKStk)	mov r18=r0;		/* make sure r18 isn't NaT */					\
@@ -123,11 +139,13 @@
 	;;											\
 .mem.offset 0,0; st8.spill [r16]=r2,16;								\
 .mem.offset 8,0; st8.spill [r17]=r3,16;								\
+	ACCOUNT_GET_STAMP									\
 	adds r2=IA64_PT_REGS_R16_OFFSET,r1;							\
 	;;											\
 	EXTRA;											\
 	movl r1=__gp;		/* establish kernel global pointer */				\
 	;;											\
+	ACCOUNT_SYS_ENTER									\
 	bsw.1;			/* switch back to bank 1 (must be last in insn group) */	\
 	;;
 
@@ -193,6 +211,40 @@
 	st8 [r25]=r10;      	/* ar.ssd */	\
 	;;
 
-#define SAVE_MIN_WITH_COVER	DO_SAVE_MIN(cover, mov r30=cr.ifs,)
-#define SAVE_MIN_WITH_COVER_R19	DO_SAVE_MIN(cover, mov r30=cr.ifs, mov r15=r19)
-#define SAVE_MIN		DO_SAVE_MIN(     , mov r30=r0, )
+#define RSE_WORKAROUND				\
+(pUStk) extr.u r17=r18,3,6;			\
+(pUStk)	sub r16=r18,r22;			\
+[1:](pKStk)	br.cond.sptk.many 1f;		\
+	.xdata4 ".data..patch.rse",1b-.		\
+	;;					\
+	cmp.ge p6,p7 = 33,r17;			\
+	;;					\
+(p6)	mov r17=0x310;				\
+(p7)	mov r17=0x308;				\
+	;;					\
+	cmp.leu p1,p0=r16,r17;			\
+(p1)	br.cond.sptk.many 1f;			\
+	dep.z r17=r26,0,62;			\
+	movl r16=2f;				\
+	;;					\
+	mov ar.pfs=r17;				\
+	dep r27=r0,r27,16,14;			\
+	mov b0=r16;				\
+	;;					\
+	br.ret.sptk b0;				\
+	;;					\
+2:						\
+	mov ar.rsc=r0				\
+	;;					\
+	flushrs;				\
+	;;					\
+	mov ar.bspstore=r22			\
+	;;					\
+	mov r18=ar.bsp;				\
+	;;					\
+1:						\
+	.pred.rel "mutex", pKStk, pUStk
+
+#define SAVE_MIN_WITH_COVER	DO_SAVE_MIN(COVER, mov r30=cr.ifs, , RSE_WORKAROUND)
+#define SAVE_MIN_WITH_COVER_R19	DO_SAVE_MIN(COVER, mov r30=cr.ifs, mov r15=r19, RSE_WORKAROUND)
+#define SAVE_MIN			DO_SAVE_MIN(     , mov r30=r0, , )
diff --git a/arch/ia64/kernel/module.c b/arch/ia64/kernel/module.c
index 3a30cfc9574..24603be24c1 100644
--- a/arch/ia64/kernel/module.c
+++ b/arch/ia64/kernel/module.c
@@ -25,7 +25,6 @@
    SEGREL64LSB
  */
 
-#include <linux/config.h>
 
 #include <linux/kernel.h>
 #include <linux/sched.h>
@@ -136,15 +135,6 @@ static const char *reloc_name[256] = {
 
 #undef N
 
-struct got_entry {
-	uint64_t val;
-};
-
-struct fdesc {
-	uint64_t ip;
-	uint64_t gp;
-};
-
 /* Opaque struct for insns, to protect against derefs. */
 struct insn;
 
@@ -181,7 +171,8 @@ apply_imm60 (struct module *mod, struct insn *insn, uint64_t val)
 		return 0;
 	}
 	if (val + ((uint64_t) 1 << 59) >= (1UL << 60)) {
-		printk(KERN_ERR "%s: value %ld out of IMM60 range\n", mod->name, (int64_t) val);
+		printk(KERN_ERR "%s: value %ld out of IMM60 range\n",
+			mod->name, (long) val);
 		return 0;
 	}
 	ia64_patch_imm60((u64) insn, val);
@@ -192,7 +183,8 @@ static int
 apply_imm22 (struct module *mod, struct insn *insn, uint64_t val)
 {
 	if (val + (1 << 21) >= (1 << 22)) {
-		printk(KERN_ERR "%s: value %li out of IMM22 range\n", mod->name, (int64_t)val);
+		printk(KERN_ERR "%s: value %li out of IMM22 range\n",
+			mod->name, (long)val);
 		return 0;
 	}
 	ia64_patch((u64) insn, 0x01fffcfe000UL, (  ((val & 0x200000UL) << 15) /* bit 21 -> 36 */
@@ -206,7 +198,8 @@ static int
 apply_imm21b (struct module *mod, struct insn *insn, uint64_t val)
 {
 	if (val + (1 << 20) >= (1 << 21)) {
-		printk(KERN_ERR "%s: value %li out of IMM21b range\n", mod->name, (int64_t)val);
+		printk(KERN_ERR "%s: value %li out of IMM21b range\n",
+			mod->name, (long)val);
 		return 0;
 	}
 	ia64_patch((u64) insn, 0x11ffffe000UL, (  ((val & 0x100000UL) << 16) /* bit 20 -> 36 */
@@ -311,18 +304,11 @@ plt_target (struct plt_entry *plt)
 
 #endif /* !USE_BRL */
 
-void *
-module_alloc (unsigned long size)
-{
-	if (!size)
-		return NULL;
-	return vmalloc(size);
-}
-
 void
 module_free (struct module *mod, void *module_region)
 {
-	if (mod->arch.init_unw_table && module_region == mod->module_init) {
+	if (mod && mod->arch.init_unw_table &&
+	    module_region == mod->module_init) {
 		unw_remove_unwind_table(mod->arch.init_unw_table);
 		mod->arch.init_unw_table = NULL;
 	}
@@ -455,6 +441,14 @@ module_frob_arch_sections (Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, char *secstrings,
 			mod->arch.opd = s;
 		else if (strcmp(".IA_64.unwind", secstrings + s->sh_name) == 0)
 			mod->arch.unwind = s;
+#ifdef CONFIG_PARAVIRT
+		else if (strcmp(".paravirt_bundles",
+				secstrings + s->sh_name) == 0)
+			mod->arch.paravirt_bundles = s;
+		else if (strcmp(".paravirt_insts",
+				secstrings + s->sh_name) == 0)
+			mod->arch.paravirt_insts = s;
+#endif
 
 	if (!mod->arch.core_plt || !mod->arch.init_plt || !mod->arch.got || !mod->arch.opd) {
 		printk(KERN_ERR "%s: sections missing\n", mod->name);
@@ -494,7 +488,7 @@ module_frob_arch_sections (Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, char *secstrings,
 	mod->arch.opd->sh_addralign = 8;
 	mod->arch.opd->sh_size = fdescs * sizeof(struct fdesc);
 	DEBUGP("%s: core.plt=%lx, init.plt=%lx, got=%lx, fdesc=%lx\n",
-	       __FUNCTION__, mod->arch.core_plt->sh_size, mod->arch.init_plt->sh_size,
+	       __func__, mod->arch.core_plt->sh_size, mod->arch.init_plt->sh_size,
 	       mod->arch.got->sh_size, mod->arch.opd->sh_size);
 	return 0;
 }
@@ -534,8 +528,7 @@ get_ltoff (struct module *mod, uint64_t value, int *okp)
 			goto found;
 
 	/* Not enough GOT entries? */
-	if (e >= (struct got_entry *) (mod->arch.got->sh_addr + mod->arch.got->sh_size))
-		BUG();
+	BUG_ON(e >= (struct got_entry *) (mod->arch.got->sh_addr + mod->arch.got->sh_size));
 
 	e->val = value;
 	++mod->arch.next_got_entry;
@@ -586,7 +579,7 @@ get_plt (struct module *mod, const struct insn *insn, uint64_t value, int *okp)
 #if ARCH_MODULE_DEBUG
 	if (plt_target(plt) != target_ip) {
 		printk("%s: mistargeted PLT: wanted %lx, got %lx\n",
-		       __FUNCTION__, target_ip, plt_target(plt));
+		       __func__, target_ip, plt_target(plt));
 		*okp = 0;
 		return 0;
 	}
@@ -703,8 +696,9 @@ do_reloc (struct module *mod, uint8_t r_type, Elf64_Sym *sym, uint64_t addend,
 	      case RV_PCREL2:
 		if (r_type == R_IA64_PCREL21BI) {
 			if (!is_internal(mod, val)) {
-				printk(KERN_ERR "%s: %s reloc against non-local symbol (%lx)\n",
-				       __FUNCTION__, reloc_name[r_type], val);
+				printk(KERN_ERR "%s: %s reloc against "
+					"non-local symbol (%lx)\n", __func__,
+					reloc_name[r_type], (unsigned long)val);
 				return -ENOEXEC;
 			}
 			format = RF_INSN21B;
@@ -738,7 +732,7 @@ do_reloc (struct module *mod, uint8_t r_type, Elf64_Sym *sym, uint64_t addend,
 		      case R_IA64_LDXMOV:
 			if (gp_addressable(mod, val)) {
 				/* turn "ld8" into "mov": */
-				DEBUGP("%s: patching ld8 at %p to mov\n", __FUNCTION__, location);
+				DEBUGP("%s: patching ld8 at %p to mov\n", __func__, location);
 				ia64_patch((u64) location, 0x1fff80fe000UL, 0x10000000000UL);
 			}
 			return 0;
@@ -772,7 +766,7 @@ do_reloc (struct module *mod, uint8_t r_type, Elf64_Sym *sym, uint64_t addend,
 	if (!ok)
 		return -ENOEXEC;
 
-	DEBUGP("%s: [%p]<-%016lx = %s(%lx)\n", __FUNCTION__, location, val,
+	DEBUGP("%s: [%p]<-%016lx = %s(%lx)\n", __func__, location, val,
 	       reloc_name[r_type] ? reloc_name[r_type] : "?", sym->st_value + addend);
 
 	switch (format) {
@@ -808,7 +802,7 @@ apply_relocate_add (Elf64_Shdr *sechdrs, const char *strtab, unsigned int symind
 	Elf64_Shdr *target_sec;
 	int ret;
 
-	DEBUGP("%s: applying section %u (%u relocs) to %u\n", __FUNCTION__,
+	DEBUGP("%s: applying section %u (%u relocs) to %u\n", __func__,
 	       relsec, n, sechdrs[relsec].sh_info);
 
 	target_sec = sechdrs + sechdrs[relsec].sh_info;
@@ -836,7 +830,7 @@ apply_relocate_add (Elf64_Shdr *sechdrs, const char *strtab, unsigned int symind
 			gp = mod->core_size / 2;
 		gp = (uint64_t) mod->module_core + ((gp + 7) & -8);
 		mod->arch.gp = gp;
-		DEBUGP("%s: placing gp at 0x%lx\n", __FUNCTION__, gp);
+		DEBUGP("%s: placing gp at 0x%lx\n", __func__, gp);
 	}
 
 	for (i = 0; i < n; i++) {
@@ -851,18 +845,10 @@ apply_relocate_add (Elf64_Shdr *sechdrs, const char *strtab, unsigned int symind
 	return 0;
 }
 
-int
-apply_relocate (Elf64_Shdr *sechdrs, const char *strtab, unsigned int symindex,
-		unsigned int relsec, struct module *mod)
-{
-	printk(KERN_ERR "module %s: REL relocs in section %u unsupported\n", mod->name, relsec);
-	return -ENOEXEC;
-}
-
 /*
  * Modules contain a single unwind table which covers both the core and the init text
  * sections but since the two are not contiguous, we need to split this table up such that
- * we can register (and unregister) each "segment" seperately.  Fortunately, this sounds
+ * we can register (and unregister) each "segment" separately.  Fortunately, this sounds
  * more complicated than it really is.
  */
 static void
@@ -904,7 +890,7 @@ register_unwind_table (struct module *mod)
 		init = start + num_core;
 	}
 
-	DEBUGP("%s: name=%s, gp=%lx, num_init=%lu, num_core=%lu\n", __FUNCTION__,
+	DEBUGP("%s: name=%s, gp=%lx, num_init=%lu, num_core=%lu\n", __func__,
 	       mod->name, mod->arch.gp, num_init, num_core);
 
 	/*
@@ -913,13 +899,13 @@ register_unwind_table (struct module *mod)
 	if (num_core > 0) {
 		mod->arch.core_unw_table = unw_add_unwind_table(mod->name, 0, mod->arch.gp,
 								core, core + num_core);
-		DEBUGP("%s:  core: handle=%p [%p-%p)\n", __FUNCTION__,
+		DEBUGP("%s:  core: handle=%p [%p-%p)\n", __func__,
 		       mod->arch.core_unw_table, core, core + num_core);
 	}
 	if (num_init > 0) {
 		mod->arch.init_unw_table = unw_add_unwind_table(mod->name, 0, mod->arch.gp,
 								init, init + num_init);
-		DEBUGP("%s:  init: handle=%p [%p-%p)\n", __FUNCTION__,
+		DEBUGP("%s:  init: handle=%p [%p-%p)\n", __func__,
 		       mod->arch.init_unw_table, init, init + num_init);
 	}
 }
@@ -927,9 +913,33 @@ register_unwind_table (struct module *mod)
 int
 module_finalize (const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mod)
 {
-	DEBUGP("%s: init: entry=%p\n", __FUNCTION__, mod->init);
+	DEBUGP("%s: init: entry=%p\n", __func__, mod->init);
 	if (mod->arch.unwind)
 		register_unwind_table(mod);
+#ifdef CONFIG_PARAVIRT
+        if (mod->arch.paravirt_bundles) {
+                struct paravirt_patch_site_bundle *start =
+                        (struct paravirt_patch_site_bundle *)
+                        mod->arch.paravirt_bundles->sh_addr;
+                struct paravirt_patch_site_bundle *end =
+                        (struct paravirt_patch_site_bundle *)
+                        (mod->arch.paravirt_bundles->sh_addr +
+                         mod->arch.paravirt_bundles->sh_size);
+
+                paravirt_patch_apply_bundle(start, end);
+        }
+        if (mod->arch.paravirt_insts) {
+                struct paravirt_patch_site_inst *start =
+                        (struct paravirt_patch_site_inst *)
+                        mod->arch.paravirt_insts->sh_addr;
+                struct paravirt_patch_site_inst *end =
+                        (struct paravirt_patch_site_inst *)
+                        (mod->arch.paravirt_insts->sh_addr +
+                         mod->arch.paravirt_insts->sh_size);
+
+                paravirt_patch_apply_inst(start, end);
+        }
+#endif
 	return 0;
 }
 
@@ -941,14 +951,3 @@ module_arch_cleanup (struct module *mod)
 	if (mod->arch.core_unw_table)
 		unw_remove_unwind_table(mod->arch.core_unw_table);
 }
-
-#ifdef CONFIG_SMP
-void
-percpu_modcopy (void *pcpudst, const void *src, unsigned long size)
-{
-	unsigned int i;
-	for_each_possible_cpu(i) {
-		memcpy(pcpudst + __per_cpu_offset[i], src, size);
-	}
-}
-#endif /* CONFIG_SMP */
diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c
new file mode 100644
index 00000000000..c430f9198d1
--- /dev/null
+++ b/arch/ia64/kernel/msi_ia64.c
@@ -0,0 +1,208 @@
+/*
+ * MSI hooks for standard x86 apic
+ */
+
+#include <linux/pci.h>
+#include <linux/irq.h>
+#include <linux/msi.h>
+#include <linux/dmar.h>
+#include <asm/smp.h>
+#include <asm/msidef.h>
+
+static struct irq_chip	ia64_msi_chip;
+
+#ifdef CONFIG_SMP
+static int ia64_set_msi_irq_affinity(struct irq_data *idata,
+				     const cpumask_t *cpu_mask, bool force)
+{
+	struct msi_msg msg;
+	u32 addr, data;
+	int cpu = cpumask_first_and(cpu_mask, cpu_online_mask);
+	unsigned int irq = idata->irq;
+
+	if (irq_prepare_move(irq, cpu))
+		return -1;
+
+	get_cached_msi_msg(irq, &msg);
+
+	addr = msg.address_lo;
+	addr &= MSI_ADDR_DEST_ID_MASK;
+	addr |= MSI_ADDR_DEST_ID_CPU(cpu_physical_id(cpu));
+	msg.address_lo = addr;
+
+	data = msg.data;
+	data &= MSI_DATA_VECTOR_MASK;
+	data |= MSI_DATA_VECTOR(irq_to_vector(irq));
+	msg.data = data;
+
+	write_msi_msg(irq, &msg);
+	cpumask_copy(idata->affinity, cpumask_of(cpu));
+
+	return 0;
+}
+#endif /* CONFIG_SMP */
+
+int ia64_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc)
+{
+	struct msi_msg	msg;
+	unsigned long	dest_phys_id;
+	int	irq, vector;
+	cpumask_t mask;
+
+	irq = create_irq();
+	if (irq < 0)
+		return irq;
+
+	irq_set_msi_desc(irq, desc);
+	cpumask_and(&mask, &(irq_to_domain(irq)), cpu_online_mask);
+	dest_phys_id = cpu_physical_id(first_cpu(mask));
+	vector = irq_to_vector(irq);
+
+	msg.address_hi = 0;
+	msg.address_lo =
+		MSI_ADDR_HEADER |
+		MSI_ADDR_DEST_MODE_PHYS |
+		MSI_ADDR_REDIRECTION_CPU |
+		MSI_ADDR_DEST_ID_CPU(dest_phys_id);
+
+	msg.data =
+		MSI_DATA_TRIGGER_EDGE |
+		MSI_DATA_LEVEL_ASSERT |
+		MSI_DATA_DELIVERY_FIXED |
+		MSI_DATA_VECTOR(vector);
+
+	write_msi_msg(irq, &msg);
+	irq_set_chip_and_handler(irq, &ia64_msi_chip, handle_edge_irq);
+
+	return 0;
+}
+
+void ia64_teardown_msi_irq(unsigned int irq)
+{
+	destroy_irq(irq);
+}
+
+static void ia64_ack_msi_irq(struct irq_data *data)
+{
+	irq_complete_move(data->irq);
+	irq_move_irq(data);
+	ia64_eoi();
+}
+
+static int ia64_msi_retrigger_irq(struct irq_data *data)
+{
+	unsigned int vector = irq_to_vector(data->irq);
+	ia64_resend_irq(vector);
+
+	return 1;
+}
+
+/*
+ * Generic ops used on most IA64 platforms.
+ */
+static struct irq_chip ia64_msi_chip = {
+	.name			= "PCI-MSI",
+	.irq_mask		= mask_msi_irq,
+	.irq_unmask		= unmask_msi_irq,
+	.irq_ack		= ia64_ack_msi_irq,
+#ifdef CONFIG_SMP
+	.irq_set_affinity	= ia64_set_msi_irq_affinity,
+#endif
+	.irq_retrigger		= ia64_msi_retrigger_irq,
+};
+
+
+int arch_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc)
+{
+	if (platform_setup_msi_irq)
+		return platform_setup_msi_irq(pdev, desc);
+
+	return ia64_setup_msi_irq(pdev, desc);
+}
+
+void arch_teardown_msi_irq(unsigned int irq)
+{
+	if (platform_teardown_msi_irq)
+		return platform_teardown_msi_irq(irq);
+
+	return ia64_teardown_msi_irq(irq);
+}
+
+#ifdef CONFIG_INTEL_IOMMU
+#ifdef CONFIG_SMP
+static int dmar_msi_set_affinity(struct irq_data *data,
+				 const struct cpumask *mask, bool force)
+{
+	unsigned int irq = data->irq;
+	struct irq_cfg *cfg = irq_cfg + irq;
+	struct msi_msg msg;
+	int cpu = cpumask_first_and(mask, cpu_online_mask);
+
+	if (irq_prepare_move(irq, cpu))
+		return -1;
+
+	dmar_msi_read(irq, &msg);
+
+	msg.data &= ~MSI_DATA_VECTOR_MASK;
+	msg.data |= MSI_DATA_VECTOR(cfg->vector);
+	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
+	msg.address_lo |= MSI_ADDR_DEST_ID_CPU(cpu_physical_id(cpu));
+
+	dmar_msi_write(irq, &msg);
+	cpumask_copy(data->affinity, mask);
+
+	return 0;
+}
+#endif /* CONFIG_SMP */
+
+static struct irq_chip dmar_msi_type = {
+	.name = "DMAR_MSI",
+	.irq_unmask = dmar_msi_unmask,
+	.irq_mask = dmar_msi_mask,
+	.irq_ack = ia64_ack_msi_irq,
+#ifdef CONFIG_SMP
+	.irq_set_affinity = dmar_msi_set_affinity,
+#endif
+	.irq_retrigger = ia64_msi_retrigger_irq,
+};
+
+static int
+msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
+{
+	struct irq_cfg *cfg = irq_cfg + irq;
+	unsigned dest;
+	cpumask_t mask;
+
+	cpumask_and(&mask, &(irq_to_domain(irq)), cpu_online_mask);
+	dest = cpu_physical_id(first_cpu(mask));
+
+	msg->address_hi = 0;
+	msg->address_lo =
+		MSI_ADDR_HEADER |
+		MSI_ADDR_DEST_MODE_PHYS |
+		MSI_ADDR_REDIRECTION_CPU |
+		MSI_ADDR_DEST_ID_CPU(dest);
+
+	msg->data =
+		MSI_DATA_TRIGGER_EDGE |
+		MSI_DATA_LEVEL_ASSERT |
+		MSI_DATA_DELIVERY_FIXED |
+		MSI_DATA_VECTOR(cfg->vector);
+	return 0;
+}
+
+int arch_setup_dmar_msi(unsigned int irq)
+{
+	int ret;
+	struct msi_msg msg;
+
+	ret = msi_compose_msg(NULL, irq, &msg);
+	if (ret < 0)
+		return ret;
+	dmar_msi_write(irq, &msg);
+	irq_set_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq,
+				      "edge");
+	return 0;
+}
+#endif /* CONFIG_INTEL_IOMMU */
+
diff --git a/arch/ia64/kernel/nr-irqs.c b/arch/ia64/kernel/nr-irqs.c
new file mode 100644
index 00000000000..f6769cd54bd
--- /dev/null
+++ b/arch/ia64/kernel/nr-irqs.c
@@ -0,0 +1,21 @@
+/*
+ * calculate
+ * NR_IRQS = max(IA64_NATIVE_NR_IRQS, XEN_NR_IRQS, FOO_NR_IRQS...)
+ * depending on config.
+ * This must be calculated before processing asm-offset.c.
+ */
+
+#define ASM_OFFSETS_C 1
+
+#include <linux/kbuild.h>
+#include <linux/threads.h>
+#include <asm/native/irq.h>
+
+void foo(void)
+{
+	union paravirt_nr_irqs_max {
+		char ia64_native_nr_irqs[IA64_NATIVE_NR_IRQS];
+	};
+
+	DEFINE(NR_IRQS, sizeof (union paravirt_nr_irqs_max));
+}
diff --git a/arch/ia64/kernel/numa.c b/arch/ia64/kernel/numa.c
index 0766493d4d0..d288cde9360 100644
--- a/arch/ia64/kernel/numa.c
+++ b/arch/ia64/kernel/numa.c
@@ -19,7 +19,6 @@
  * Copyright (C) 2004 Silicon Graphics, Inc.
  *   Jesse Barnes <jbarnes@sgi.com>
  */
-#include <linux/config.h>
 #include <linux/topology.h>
 #include <linux/module.h>
 #include <asm/processor.h>
@@ -29,6 +28,37 @@ u16 cpu_to_node_map[NR_CPUS] __cacheline_aligned;
 EXPORT_SYMBOL(cpu_to_node_map);
 
 cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned;
+EXPORT_SYMBOL(node_to_cpu_mask);
+
+void map_cpu_to_node(int cpu, int nid)
+{
+	int oldnid;
+	if (nid < 0) { /* just initialize by zero */
+		cpu_to_node_map[cpu] = 0;
+		return;
+	}
+	/* sanity check first */
+	oldnid = cpu_to_node_map[cpu];
+	if (cpu_isset(cpu, node_to_cpu_mask[oldnid])) {
+		return; /* nothing to do */
+	}
+	/* we don't have cpu-driven node hot add yet...
+	   In usual case, node is created from SRAT at boot time. */
+	if (!node_online(nid))
+		nid = first_online_node;
+	cpu_to_node_map[cpu] = nid;
+	cpu_set(cpu, node_to_cpu_mask[nid]);
+	return;
+}
+
+void unmap_cpu_from_node(int cpu, int nid)
+{
+	WARN_ON(!cpu_isset(cpu, node_to_cpu_mask[nid]));
+	WARN_ON(cpu_to_node_map[cpu] != nid);
+	cpu_to_node_map[cpu] = 0;
+	cpu_clear(cpu, node_to_cpu_mask[nid]);
+}
+
 
 /**
  * build_cpu_to_node_map - setup cpu to node and node to cpumask arrays
@@ -43,15 +73,13 @@ void __init build_cpu_to_node_map(void)
 	for(node=0; node < MAX_NUMNODES; node++)
 		cpus_clear(node_to_cpu_mask[node]);
 
-	for(cpu = 0; cpu < NR_CPUS; ++cpu) {
+	for_each_possible_early_cpu(cpu) {
 		node = -1;
 		for (i = 0; i < NR_CPUS; ++i)
 			if (cpu_physical_id(cpu) == node_cpuid[i].phys_id) {
 				node = node_cpuid[i].nid;
 				break;
 			}
-		cpu_to_node_map[cpu] = (node >= 0) ? node : 0;
-		if (node >= 0)
-			cpu_set(cpu, node_to_cpu_mask[node]);
+		map_cpu_to_node(cpu, node);
 	}
 }
diff --git a/arch/ia64/kernel/pal.S b/arch/ia64/kernel/pal.S
index 5018c7f2e7a..0b533441c3c 100644
--- a/arch/ia64/kernel/pal.S
+++ b/arch/ia64/kernel/pal.S
@@ -21,11 +21,12 @@ pal_entry_point:
 	.text
 
 /*
- * Set the PAL entry point address.  This could be written in C code, but we do it here
- * to keep it all in one module (besides, it's so trivial that it's
+ * Set the PAL entry point address.  This could be written in C code, but we
+ * do it here to keep it all in one module (besides, it's so trivial that it's
  * not a big deal).
  *
- * in0		Address of the PAL entry point (text address, NOT a function descriptor).
+ * in0		Address of the PAL entry point (text address, NOT a function
+ *		descriptor).
  */
 GLOBAL_ENTRY(ia64_pal_handler_init)
 	alloc r3=ar.pfs,1,0,0,0
@@ -36,9 +37,9 @@ GLOBAL_ENTRY(ia64_pal_handler_init)
 END(ia64_pal_handler_init)
 
 /*
- * Default PAL call handler.  This needs to be coded in assembly because it uses
- * the static calling convention, i.e., the RSE may not be used and calls are
- * done via "br.cond" (not "br.call").
+ * Default PAL call handler.  This needs to be coded in assembly because it
+ * uses the static calling convention, i.e., the RSE may not be used and
+ * calls are done via "br.cond" (not "br.call").
  */
 GLOBAL_ENTRY(ia64_pal_default_handler)
 	mov r8=-1
@@ -50,12 +51,10 @@ END(ia64_pal_default_handler)
  *
  * in0         Index of PAL service
  * in1 - in3   Remaining PAL arguments
- * in4	       1 ==> clear psr.ic,  0 ==> don't clear psr.ic
- *
  */
 GLOBAL_ENTRY(ia64_pal_call_static)
-	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(5)
-	alloc loc1 = ar.pfs,5,5,0,0
+	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(4)
+	alloc loc1 = ar.pfs,4,5,0,0
 	movl loc2 = pal_entry_point
 1:	{
 	  mov r28 = in0
@@ -64,7 +63,6 @@ GLOBAL_ENTRY(ia64_pal_call_static)
 	}
 	;;
 	ld8 loc2 = [loc2]		// loc2 <- entry point
-	tbit.nz p6,p7 = in4, 0
 	adds r8 = 1f-1b,r8
 	mov loc4=ar.rsc			// save RSE configuration
 	;;
@@ -74,13 +72,11 @@ GLOBAL_ENTRY(ia64_pal_call_static)
 	.body
 	mov r30 = in2
 
-(p6)	rsm psr.i | psr.ic
 	mov r31 = in3
 	mov b7 = loc2
 
-(p7)	rsm psr.i
+	rsm psr.i
 	;;
-(p6)	srlz.i
 	mov rp = r8
 	br.cond.sptk.many b7
 1:	mov psr.l = loc3
@@ -96,8 +92,8 @@ END(ia64_pal_call_static)
  * Make a PAL call using the stacked registers calling convention.
  *
  * Inputs:
- * 	in0         Index of PAL service
- * 	in2 - in3   Remaning PAL arguments
+ *	in0         Index of PAL service
+ *	in2 - in3   Remaining PAL arguments
  */
 GLOBAL_ENTRY(ia64_pal_call_stacked)
 	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(4)
@@ -131,18 +127,18 @@ END(ia64_pal_call_stacked)
  * Make a physical mode PAL call using the static registers calling convention.
  *
  * Inputs:
- * 	in0         Index of PAL service
- * 	in2 - in3   Remaning PAL arguments
+ *	in0         Index of PAL service
+ *	in2 - in3   Remaining PAL arguments
  *
  * PSR_LP, PSR_TB, PSR_ID, PSR_DA are never set by the kernel.
  * So we don't need to clear them.
  */
-#define PAL_PSR_BITS_TO_CLEAR							\
-	(IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT  | IA64_PSR_DB | IA64_PSR_RT |	\
-	 IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED |		\
+#define PAL_PSR_BITS_TO_CLEAR						      \
+	(IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT  | IA64_PSR_DB | IA64_PSR_RT |\
+	 IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED |	      \
 	 IA64_PSR_DFL | IA64_PSR_DFH)
 
-#define PAL_PSR_BITS_TO_SET							\
+#define PAL_PSR_BITS_TO_SET						      \
 	(IA64_PSR_BN)
 
 
@@ -178,7 +174,7 @@ GLOBAL_ENTRY(ia64_pal_call_phys_static)
 	;;
 	andcm r16=loc3,r16		// removes bits to clear from psr
 	br.call.sptk.many rp=ia64_switch_mode_phys
-.ret1:	mov rp = r8			// install return address (physical)
+	mov rp = r8			// install return address (physical)
 	mov loc5 = r19
 	mov loc6 = r20
 	br.cond.sptk.many b7
@@ -188,7 +184,6 @@ GLOBAL_ENTRY(ia64_pal_call_phys_static)
 	mov r19=loc5
 	mov r20=loc6
 	br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode
-.ret2:
 	mov psr.l = loc3		// restore init PSR
 
 	mov ar.pfs = loc1
@@ -203,8 +198,8 @@ END(ia64_pal_call_phys_static)
  * Make a PAL call using the stacked registers in physical mode.
  *
  * Inputs:
- * 	in0         Index of PAL service
- * 	in2 - in3   Remaning PAL arguments
+ *	in0         Index of PAL service
+ *	in2 - in3   Remaining PAL arguments
  */
 GLOBAL_ENTRY(ia64_pal_call_phys_stacked)
 	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(5)
@@ -212,17 +207,12 @@ GLOBAL_ENTRY(ia64_pal_call_phys_stacked)
 	movl	loc2 = pal_entry_point
 1:	{
 	  mov r28  = in0		// copy procedure index
-	  mov loc0 = rp		// save rp
+	  mov loc0 = rp			// save rp
 	}
 	.body
 	;;
 	ld8 loc2 = [loc2]		// loc2 <- entry point
-	mov out0 = in0		// first argument
-	mov out1 = in1		// copy arg2
-	mov out2 = in2		// copy arg3
-	mov out3 = in3		// copy arg3
-	;;
-	mov loc3 = psr		// save psr
+	mov loc3 = psr			// save psr
 	;;
 	mov loc4=ar.rsc			// save RSE configuration
 	dep.z loc2=loc2,0,61		// convert pal entry point to physical
@@ -236,18 +226,23 @@ GLOBAL_ENTRY(ia64_pal_call_phys_stacked)
 	;;
 	andcm r16=loc3,r16		// removes bits to clear from psr
 	br.call.sptk.many rp=ia64_switch_mode_phys
-.ret6:
+
+	mov out0 = in0			// first argument
+	mov out1 = in1			// copy arg2
+	mov out2 = in2			// copy arg3
+	mov out3 = in3			// copy arg3
 	mov loc5 = r19
 	mov loc6 = r20
+
 	br.call.sptk.many rp=b7		// now make the call
-.ret7:
+
 	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
 	mov r16=loc3			// r16= original psr
 	mov r19=loc5
 	mov r20=loc6
-	br.call.sptk.many rp=ia64_switch_mode_virt	// return to virtual mode
+	br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode
 
-.ret8:	mov psr.l  = loc3		// restore init PSR
+	mov psr.l  = loc3		// restore init PSR
 	mov ar.pfs = loc1
 	mov rp = loc0
 	;;
@@ -257,10 +252,11 @@ GLOBAL_ENTRY(ia64_pal_call_phys_stacked)
 END(ia64_pal_call_phys_stacked)
 
 /*
- * Save scratch fp scratch regs which aren't saved in pt_regs already (fp10-fp15).
+ * Save scratch fp scratch regs which aren't saved in pt_regs already
+ * (fp10-fp15).
  *
- * NOTE: We need to do this since firmware (SAL and PAL) may use any of the scratch
- * regs fp-low partition.
+ * NOTE: We need to do this since firmware (SAL and PAL) may use any of the
+ * scratch regs fp-low partition.
  *
  * Inputs:
  *      in0	Address of stack storage for fp regs
diff --git a/arch/ia64/kernel/palinfo.c b/arch/ia64/kernel/palinfo.c
index 859fb37ff49..c39c3cd3ac3 100644
--- a/arch/ia64/kernel/palinfo.c
+++ b/arch/ia64/kernel/palinfo.c
@@ -16,12 +16,13 @@
  * 02/05/2001   S.Eranian	fixed module support
  * 10/23/2001	S.Eranian	updated pal_perf_mon_info bug fixes
  * 03/24/2004	Ashok Raj	updated to work with CPU Hotplug
+ * 10/26/2006   Russ Anderson	updated processor features to rev 2.2 spec
  */
-#include <linux/config.h>
 #include <linux/types.h>
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/proc_fs.h>
+#include <linux/seq_file.h>
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/efi.h>
@@ -41,7 +42,7 @@ MODULE_LICENSE("GPL");
 
 #define PALINFO_VERSION "0.5"
 
-typedef int (*palinfo_func_t)(char*);
+typedef int (*palinfo_func_t)(struct seq_file *);
 
 typedef struct {
 	const char		*name;		/* name of the proc entry */
@@ -54,7 +55,7 @@ typedef struct {
  *  A bunch of string array to get pretty printing
  */
 
-static char *cache_types[] = {
+static const char *cache_types[] = {
 	"",			/* not used */
 	"Instruction",
 	"Data",
@@ -122,19 +123,16 @@ static const char *mem_attrib[]={
  *	- a pointer to the end of the buffer
  *
  */
-static char *
-bitvector_process(char *p, u64 vector)
+static void bitvector_process(struct seq_file *m, u64 vector)
 {
 	int i,j;
-	const char *units[]={ "", "K", "M", "G", "T" };
+	static const char *units[]={ "", "K", "M", "G", "T" };
 
 	for (i=0, j=0; i < 64; i++ , j=i/10) {
-		if (vector & 0x1) {
-			p += sprintf(p, "%d%s ", 1 << (i-j*10), units[j]);
-		}
+		if (vector & 0x1)
+			seq_printf(m, "%d%s ", 1 << (i-j*10), units[j]);
 		vector >>= 1;
 	}
-	return p;
 }
 
 /*
@@ -149,8 +147,7 @@ bitvector_process(char *p, u64 vector)
  *	- a pointer to the end of the buffer
  *
  */
-static char *
-bitregister_process(char *p, u64 *reg_info, int max)
+static void bitregister_process(struct seq_file *m, u64 *reg_info, int max)
 {
 	int i, begin, skip = 0;
 	u64 value = reg_info[0];
@@ -163,9 +160,9 @@ bitregister_process(char *p, u64 *reg_info, int max)
 
 		if ((value & 0x1) == 0 && skip == 0) {
 			if (begin  <= i - 2)
-				p += sprintf(p, "%d-%d ", begin, i-1);
+				seq_printf(m, "%d-%d ", begin, i-1);
 			else
-				p += sprintf(p, "%d ", i-1);
+				seq_printf(m, "%d ", i-1);
 			skip  = 1;
 			begin = -1;
 		} else if ((value & 0x1) && skip == 1) {
@@ -176,19 +173,15 @@ bitregister_process(char *p, u64 *reg_info, int max)
 	}
 	if (begin > -1) {
 		if (begin < 127)
-			p += sprintf(p, "%d-127", begin);
+			seq_printf(m, "%d-127", begin);
 		else
-			p += sprintf(p, "127");
+			seq_puts(m, "127");
 	}
-
-	return p;
 }
 
-static int
-power_info(char *page)
+static int power_info(struct seq_file *m)
 {
 	s64 status;
-	char *p = page;
 	u64 halt_info_buffer[8];
 	pal_power_mgmt_info_u_t *halt_info =(pal_power_mgmt_info_u_t *)halt_info_buffer;
 	int i;
@@ -198,103 +191,103 @@ power_info(char *page)
 
 	for (i=0; i < 8 ; i++ ) {
 		if (halt_info[i].pal_power_mgmt_info_s.im == 1) {
-			p += sprintf(p,	"Power level %d:\n"
-				     "\tentry_latency       : %d cycles\n"
-				     "\texit_latency        : %d cycles\n"
-				     "\tpower consumption   : %d mW\n"
-				     "\tCache+TLB coherency : %s\n", i,
-				     halt_info[i].pal_power_mgmt_info_s.entry_latency,
-				     halt_info[i].pal_power_mgmt_info_s.exit_latency,
-				     halt_info[i].pal_power_mgmt_info_s.power_consumption,
-				     halt_info[i].pal_power_mgmt_info_s.co ? "Yes" : "No");
+			seq_printf(m,
+				   "Power level %d:\n"
+				   "\tentry_latency       : %d cycles\n"
+				   "\texit_latency        : %d cycles\n"
+				   "\tpower consumption   : %d mW\n"
+				   "\tCache+TLB coherency : %s\n", i,
+				   halt_info[i].pal_power_mgmt_info_s.entry_latency,
+				   halt_info[i].pal_power_mgmt_info_s.exit_latency,
+				   halt_info[i].pal_power_mgmt_info_s.power_consumption,
+				   halt_info[i].pal_power_mgmt_info_s.co ? "Yes" : "No");
 		} else {
-			p += sprintf(p,"Power level %d: not implemented\n",i);
+			seq_printf(m,"Power level %d: not implemented\n", i);
 		}
 	}
-	return p - page;
+	return 0;
 }
 
-static int
-cache_info(char *page)
+static int cache_info(struct seq_file *m)
 {
-	char *p = page;
-	u64 i, levels, unique_caches;
+	unsigned long i, levels, unique_caches;
 	pal_cache_config_info_t cci;
 	int j, k;
-	s64 status;
+	long status;
 
 	if ((status = ia64_pal_cache_summary(&levels, &unique_caches)) != 0) {
 		printk(KERN_ERR "ia64_pal_cache_summary=%ld\n", status);
 		return 0;
 	}
 
-	p += sprintf(p, "Cache levels  : %ld\nUnique caches : %ld\n\n", levels, unique_caches);
+	seq_printf(m, "Cache levels  : %ld\nUnique caches : %ld\n\n",
+		   levels, unique_caches);
 
 	for (i=0; i < levels; i++) {
-
 		for (j=2; j >0 ; j--) {
-
 			/* even without unification some level may not be present */
-			if ((status=ia64_pal_cache_config_info(i,j, &cci)) != 0) {
+			if ((status=ia64_pal_cache_config_info(i,j, &cci)) != 0)
 				continue;
-			}
-			p += sprintf(p,
-				     "%s Cache level %lu:\n"
-				     "\tSize           : %u bytes\n"
-				     "\tAttributes     : ",
-				     cache_types[j+cci.pcci_unified], i+1,
-				     cci.pcci_cache_size);
-
-			if (cci.pcci_unified) p += sprintf(p, "Unified ");
-
-			p += sprintf(p, "%s\n", cache_mattrib[cci.pcci_cache_attr]);
-
-			p += sprintf(p,
-				     "\tAssociativity  : %d\n"
-				     "\tLine size      : %d bytes\n"
-				     "\tStride         : %d bytes\n",
-				     cci.pcci_assoc, 1<<cci.pcci_line_size, 1<<cci.pcci_stride);
+
+			seq_printf(m,
+				   "%s Cache level %lu:\n"
+				   "\tSize           : %u bytes\n"
+				   "\tAttributes     : ",
+				   cache_types[j+cci.pcci_unified], i+1,
+				   cci.pcci_cache_size);
+
+			if (cci.pcci_unified)
+				seq_puts(m, "Unified ");
+
+			seq_printf(m, "%s\n", cache_mattrib[cci.pcci_cache_attr]);
+
+			seq_printf(m,
+				   "\tAssociativity  : %d\n"
+				   "\tLine size      : %d bytes\n"
+				   "\tStride         : %d bytes\n",
+				   cci.pcci_assoc,
+				   1<<cci.pcci_line_size,
+				   1<<cci.pcci_stride);
 			if (j == 1)
-				p += sprintf(p, "\tStore latency  : N/A\n");
+				seq_puts(m, "\tStore latency  : N/A\n");
 			else
-				p += sprintf(p, "\tStore latency  : %d cycle(s)\n",
-						cci.pcci_st_latency);
+				seq_printf(m, "\tStore latency  : %d cycle(s)\n",
+					   cci.pcci_st_latency);
 
-			p += sprintf(p,
-				     "\tLoad latency   : %d cycle(s)\n"
-				     "\tStore hints    : ", cci.pcci_ld_latency);
+			seq_printf(m,
+				   "\tLoad latency   : %d cycle(s)\n"
+				   "\tStore hints    : ", cci.pcci_ld_latency);
 
 			for(k=0; k < 8; k++ ) {
 				if ( cci.pcci_st_hints & 0x1)
-					p += sprintf(p, "[%s]", cache_st_hints[k]);
+					seq_printf(m, "[%s]", cache_st_hints[k]);
 				cci.pcci_st_hints >>=1;
 			}
-			p += sprintf(p, "\n\tLoad hints     : ");
+			seq_puts(m, "\n\tLoad hints     : ");
 
 			for(k=0; k < 8; k++ ) {
 				if (cci.pcci_ld_hints & 0x1)
-					p += sprintf(p, "[%s]", cache_ld_hints[k]);
+					seq_printf(m, "[%s]", cache_ld_hints[k]);
 				cci.pcci_ld_hints >>=1;
 			}
-			p += sprintf(p,
-				     "\n\tAlias boundary : %d byte(s)\n"
-				     "\tTag LSB        : %d\n"
-				     "\tTag MSB        : %d\n",
-				     1<<cci.pcci_alias_boundary, cci.pcci_tag_lsb,
-				     cci.pcci_tag_msb);
+			seq_printf(m,
+				   "\n\tAlias boundary : %d byte(s)\n"
+				   "\tTag LSB        : %d\n"
+				   "\tTag MSB        : %d\n",
+				   1<<cci.pcci_alias_boundary, cci.pcci_tag_lsb,
+				   cci.pcci_tag_msb);
 
 			/* when unified, data(j=2) is enough */
-			if (cci.pcci_unified) break;
+			if (cci.pcci_unified)
+				break;
 		}
 	}
-	return p - page;
+	return 0;
 }
 
 
-static int
-vm_info(char *page)
+static int vm_info(struct seq_file *m)
 {
-	char *p = page;
 	u64 tr_pages =0, vw_pages=0, tc_pages;
 	u64 attrib;
 	pal_vm_info_1_u_t vm_info_1;
@@ -303,63 +296,70 @@ vm_info(char *page)
 	ia64_ptce_info_t ptce;
 	const char *sep;
 	int i, j;
-	s64 status;
+	long status;
 
 	if ((status = ia64_pal_vm_summary(&vm_info_1, &vm_info_2)) !=0) {
 		printk(KERN_ERR "ia64_pal_vm_summary=%ld\n", status);
 	} else {
 
-		p += sprintf(p,
+		seq_printf(m,
 		     "Physical Address Space         : %d bits\n"
 		     "Virtual Address Space          : %d bits\n"
 		     "Protection Key Registers(PKR)  : %d\n"
 		     "Implemented bits in PKR.key    : %d\n"
 		     "Hash Tag ID                    : 0x%x\n"
-		     "Size of RR.rid                 : %d\n",
+		     "Size of RR.rid                 : %d\n"
+		     "Max Purges                     : ",
 		     vm_info_1.pal_vm_info_1_s.phys_add_size,
 		     vm_info_2.pal_vm_info_2_s.impl_va_msb+1,
 		     vm_info_1.pal_vm_info_1_s.max_pkr+1,
 		     vm_info_1.pal_vm_info_1_s.key_size,
 		     vm_info_1.pal_vm_info_1_s.hash_tag_id,
 		     vm_info_2.pal_vm_info_2_s.rid_size);
+		if (vm_info_2.pal_vm_info_2_s.max_purges == PAL_MAX_PURGES)
+			seq_puts(m, "unlimited\n");
+		else
+			seq_printf(m, "%d\n",
+		     		vm_info_2.pal_vm_info_2_s.max_purges ?
+				vm_info_2.pal_vm_info_2_s.max_purges : 1);
 	}
 
 	if (ia64_pal_mem_attrib(&attrib) == 0) {
-		p += sprintf(p, "Supported memory attributes    : ");
+		seq_puts(m, "Supported memory attributes    : ");
 		sep = "";
 		for (i = 0; i < 8; i++) {
 			if (attrib & (1 << i)) {
-				p += sprintf(p, "%s%s", sep, mem_attrib[i]);
+				seq_printf(m, "%s%s", sep, mem_attrib[i]);
 				sep = ", ";
 			}
 		}
-		p += sprintf(p, "\n");
+		seq_putc(m, '\n');
 	}
 
 	if ((status = ia64_pal_vm_page_size(&tr_pages, &vw_pages)) !=0) {
 		printk(KERN_ERR "ia64_pal_vm_page_size=%ld\n", status);
 	} else {
 
-		p += sprintf(p,
-			     "\nTLB walker                     : %simplemented\n"
-			     "Number of DTR                  : %d\n"
-			     "Number of ITR                  : %d\n"
-			     "TLB insertable page sizes      : ",
-			     vm_info_1.pal_vm_info_1_s.vw ? "" : "not ",
-			     vm_info_1.pal_vm_info_1_s.max_dtr_entry+1,
-			     vm_info_1.pal_vm_info_1_s.max_itr_entry+1);
-
+		seq_printf(m,
+			   "\nTLB walker                     : %simplemented\n"
+			   "Number of DTR                  : %d\n"
+			   "Number of ITR                  : %d\n"
+			   "TLB insertable page sizes      : ",
+			   vm_info_1.pal_vm_info_1_s.vw ? "" : "not ",
+			   vm_info_1.pal_vm_info_1_s.max_dtr_entry+1,
+			   vm_info_1.pal_vm_info_1_s.max_itr_entry+1);
 
-		p = bitvector_process(p, tr_pages);
+		bitvector_process(m, tr_pages);
 
-		p += sprintf(p, "\nTLB purgeable page sizes       : ");
+		seq_puts(m, "\nTLB purgeable page sizes       : ");
 
-		p = bitvector_process(p, vw_pages);
+		bitvector_process(m, vw_pages);
 	}
-	if ((status=ia64_get_ptce(&ptce)) != 0) {
+
+	if ((status = ia64_get_ptce(&ptce)) != 0) {
 		printk(KERN_ERR "ia64_get_ptce=%ld\n", status);
 	} else {
-		p += sprintf(p,
+		seq_printf(m,
 		     "\nPurge base address             : 0x%016lx\n"
 		     "Purge outer loop count         : %d\n"
 		     "Purge inner loop count         : %d\n"
@@ -368,7 +368,7 @@ vm_info(char *page)
 		     ptce.base, ptce.count[0], ptce.count[1],
 		     ptce.stride[0], ptce.stride[1]);
 
-		p += sprintf(p,
+		seq_printf(m,
 		     "TC Levels                      : %d\n"
 		     "Unique TC(s)                   : %d\n",
 		     vm_info_1.pal_vm_info_1_s.num_tc_levels,
@@ -378,13 +378,11 @@ vm_info(char *page)
 			for (j=2; j>0 ; j--) {
 				tc_pages = 0; /* just in case */
 
-
 				/* even without unification, some levels may not be present */
-				if ((status=ia64_pal_vm_info(i,j, &tc_info, &tc_pages)) != 0) {
+				if ((status=ia64_pal_vm_info(i,j, &tc_info, &tc_pages)) != 0)
 					continue;
-				}
 
-				p += sprintf(p,
+				seq_printf(m,
 				     "\n%s Translation Cache Level %d:\n"
 				     "\tHash sets           : %d\n"
 				     "\tAssociativity       : %d\n"
@@ -396,15 +394,15 @@ vm_info(char *page)
 				     tc_info.tc_num_entries);
 
 				if (tc_info.tc_pf)
-					p += sprintf(p, "PreferredPageSizeOptimized ");
+					seq_puts(m, "PreferredPageSizeOptimized ");
 				if (tc_info.tc_unified)
-					p += sprintf(p, "Unified ");
+					seq_puts(m, "Unified ");
 				if (tc_info.tc_reduce_tr)
-					p += sprintf(p, "TCReduction");
+					seq_puts(m, "TCReduction");
 
-				p += sprintf(p, "\n\tSupported page sizes: ");
+				seq_puts(m, "\n\tSupported page sizes: ");
 
-				p = bitvector_process(p, tc_pages);
+				bitvector_process(m, tc_pages);
 
 				/* when unified date (j=2) is enough */
 				if (tc_info.tc_unified)
@@ -412,22 +410,20 @@ vm_info(char *page)
 			}
 		}
 	}
-	p += sprintf(p, "\n");
 
-	return p - page;
+	seq_putc(m, '\n');
+	return 0;
 }
 
 
-static int
-register_info(char *page)
+static int register_info(struct seq_file *m)
 {
-	char *p = page;
 	u64 reg_info[2];
 	u64 info;
-	u64 phys_stacked;
+	unsigned long phys_stacked;
 	pal_hints_u_t hints;
-	u64 iregs, dregs;
-	char *info_type[]={
+	unsigned long iregs, dregs;
+	static const char * const info_type[] = {
 		"Implemented AR(s)",
 		"AR(s) with read side-effects",
 		"Implemented CR(s)",
@@ -435,40 +431,40 @@ register_info(char *page)
 	};
 
 	for(info=0; info < 4; info++) {
-
-		if (ia64_pal_register_info(info, &reg_info[0], &reg_info[1]) != 0) return 0;
-
-		p += sprintf(p, "%-32s : ", info_type[info]);
-
-		p = bitregister_process(p, reg_info, 128);
-
-		p += sprintf(p, "\n");
+		if (ia64_pal_register_info(info, &reg_info[0], &reg_info[1]) != 0)
+			return 0;
+		seq_printf(m, "%-32s : ", info_type[info]);
+		bitregister_process(m, reg_info, 128);
+		seq_putc(m, '\n');
 	}
 
-	if (ia64_pal_rse_info(&phys_stacked, &hints) == 0) {
+	if (ia64_pal_rse_info(&phys_stacked, &hints) == 0)
+		seq_printf(m,
+			   "RSE stacked physical registers   : %ld\n"
+			   "RSE load/store hints             : %ld (%s)\n",
+			   phys_stacked, hints.ph_data,
+			   hints.ph_data < RSE_HINTS_COUNT ? rse_hints[hints.ph_data]: "(??)");
 
-	p += sprintf(p,
-		     "RSE stacked physical registers   : %ld\n"
-		     "RSE load/store hints             : %ld (%s)\n",
-		     phys_stacked, hints.ph_data,
-		     hints.ph_data < RSE_HINTS_COUNT ? rse_hints[hints.ph_data]: "(??)");
-	}
 	if (ia64_pal_debug_info(&iregs, &dregs))
 		return 0;
 
-	p += sprintf(p,
-		     "Instruction debug register pairs : %ld\n"
-		     "Data debug register pairs        : %ld\n", iregs, dregs);
+	seq_printf(m,
+		   "Instruction debug register pairs : %ld\n"
+		   "Data debug register pairs        : %ld\n", iregs, dregs);
 
-	return p - page;
+	return 0;
 }
 
-static const char *proc_features[]={
+static const char *const proc_features_0[]={		/* Feature set 0 */
 	NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
 	NULL,NULL,NULL,NULL,NULL,NULL,NULL, NULL,NULL,
 	NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
 	NULL,NULL,NULL,NULL,NULL, NULL,NULL,NULL,NULL,
-	NULL,NULL,NULL,NULL,NULL,
+	"Unimplemented instruction address fault",
+	"INIT, PMI, and LINT pins",
+	"Simple unimplemented instr addresses",
+	"Variable P-state performance",
+	"Virtual machine features implemented",
 	"XIP,XPSR,XFS implemented",
 	"XR1-XR3 implemented",
 	"Disable dynamic predicate prediction",
@@ -476,7 +472,11 @@ static const char *proc_features[]={
 	"Disable dynamic data cache prefetch",
 	"Disable dynamic inst cache prefetch",
 	"Disable dynamic branch prediction",
-	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+	NULL, NULL, NULL, NULL,
+	"Disable P-states",
+	"Enable MCA on Data Poisoning",
+	"Enable vmsw instruction",
+	"Enable extern environmental notification",
 	"Disable BINIT on processor time-out",
 	"Disable dynamic power management (DPM)",
 	"Disable coherency",
@@ -487,29 +487,91 @@ static const char *proc_features[]={
 	"Enable BERR promotion"
 };
 
+static const char *const proc_features_16[]={		/* Feature set 16 */
+	"Disable ETM",
+	"Enable ETM",
+	"Enable MCA on half-way timer",
+	"Enable snoop WC",
+	NULL,
+	"Enable Fast Deferral",
+	"Disable MCA on memory aliasing",
+	"Enable RSB",
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+	"DP system processor",
+	"Low Voltage",
+	"HT supported",
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+	NULL, NULL, NULL, NULL, NULL
+};
 
-static int
-processor_info(char *page)
+static const char *const *const proc_features[]={
+	proc_features_0,
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+	NULL, NULL, NULL, NULL,
+	proc_features_16,
+	NULL, NULL, NULL, NULL,
+};
+
+static void feature_set_info(struct seq_file *m, u64 avail, u64 status, u64 control,
+			     unsigned long set)
 {
-	char *p = page;
-	const char **v = proc_features;
-	u64 avail=1, status=1, control=1;
+	const char *const *vf, *const *v;
 	int i;
-	s64 ret;
 
-	if ((ret=ia64_pal_proc_get_features(&avail, &status, &control)) != 0) return 0;
+	vf = v = proc_features[set];
+	for(i=0; i < 64; i++, avail >>=1, status >>=1, control >>=1) {
 
-	for(i=0; i < 64; i++, v++,avail >>=1, status >>=1, control >>=1) {
-		if ( ! *v ) continue;
-		p += sprintf(p, "%-40s : %s%s %s\n", *v,
-				avail & 0x1 ? "" : "NotImpl",
-				avail & 0x1 ? (status & 0x1 ? "On" : "Off"): "",
-				avail & 0x1 ? (control & 0x1 ? "Ctrl" : "NoCtrl"): "");
+		if (!(control))		/* No remaining bits set */
+			break;
+		if (!(avail & 0x1))	/* Print only bits that are available */
+			continue;
+		if (vf)
+			v = vf + i;
+		if ( v && *v ) {
+			seq_printf(m, "%-40s : %s %s\n", *v,
+				avail & 0x1 ? (status & 0x1 ?
+					      "On " : "Off"): "",
+				avail & 0x1 ? (control & 0x1 ?
+						"Ctrl" : "NoCtrl"): "");
+		} else {
+			seq_printf(m, "Feature set %2ld bit %2d\t\t\t"
+					" : %s %s\n",
+				set, i,
+				avail & 0x1 ? (status & 0x1 ?
+						"On " : "Off"): "",
+				avail & 0x1 ? (control & 0x1 ?
+						"Ctrl" : "NoCtrl"): "");
+		}
 	}
-	return p - page;
 }
 
-static const char *bus_features[]={
+static int processor_info(struct seq_file *m)
+{
+	u64 avail=1, status=1, control=1, feature_set=0;
+	s64 ret;
+
+	do {
+		ret = ia64_pal_proc_get_features(&avail, &status, &control,
+						feature_set);
+		if (ret < 0)
+			return 0;
+
+		if (ret == 1) {
+			feature_set++;
+			continue;
+		}
+
+		feature_set_info(m, avail, status, control, feature_set);
+		feature_set++;
+	} while(1);
+
+	return 0;
+}
+
+static const char *const bus_features[]={
 	NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
 	NULL,NULL,NULL,NULL,NULL,NULL,NULL, NULL,NULL,
 	NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
@@ -535,168 +597,155 @@ static const char *bus_features[]={
 };
 
 
-static int
-bus_info(char *page)
+static int bus_info(struct seq_file *m)
 {
-	char *p = page;
-	const char **v = bus_features;
+	const char *const *v = bus_features;
 	pal_bus_features_u_t av, st, ct;
 	u64 avail, status, control;
 	int i;
 	s64 ret;
 
-	if ((ret=ia64_pal_bus_get_features(&av, &st, &ct)) != 0) return 0;
+	if ((ret=ia64_pal_bus_get_features(&av, &st, &ct)) != 0)
+		return 0;
 
 	avail   = av.pal_bus_features_val;
 	status  = st.pal_bus_features_val;
 	control = ct.pal_bus_features_val;
 
 	for(i=0; i < 64; i++, v++, avail >>=1, status >>=1, control >>=1) {
-		if ( ! *v ) continue;
-		p += sprintf(p, "%-48s : %s%s %s\n", *v,
-				avail & 0x1 ? "" : "NotImpl",
-				avail & 0x1 ? (status  & 0x1 ? "On" : "Off"): "",
-				avail & 0x1 ? (control & 0x1 ? "Ctrl" : "NoCtrl"): "");
+		if ( ! *v )
+			continue;
+		seq_printf(m, "%-48s : %s%s %s\n", *v,
+			   avail & 0x1 ? "" : "NotImpl",
+			   avail & 0x1 ? (status  & 0x1 ? "On" : "Off"): "",
+			   avail & 0x1 ? (control & 0x1 ? "Ctrl" : "NoCtrl"): "");
 	}
-	return p - page;
+	return 0;
 }
 
-static int
-version_info(char *page)
+static int version_info(struct seq_file *m)
 {
 	pal_version_u_t min_ver, cur_ver;
-	char *p = page;
 
-	/* The PAL_VERSION call is advertised as being able to support
-	 * both physical and virtual mode calls. This seems to be a documentation
-	 * bug rather than firmware bug. In fact, it does only support physical mode.
-	 * So now the code reflects this fact and the pal_version() has been updated
-	 * accordingly.
-	 */
-	if (ia64_pal_version(&min_ver, &cur_ver) != 0) return 0;
-
-	p += sprintf(p,
-		     "PAL_vendor : 0x%02x (min=0x%02x)\n"
-		     "PAL_A      : %x.%x.%x (min=%x.%x.%x)\n"
-		     "PAL_B      : %x.%x.%x (min=%x.%x.%x)\n",
-		     cur_ver.pal_version_s.pv_pal_vendor, min_ver.pal_version_s.pv_pal_vendor,
-
-		     cur_ver.pal_version_s.pv_pal_a_model>>4,
-		     cur_ver.pal_version_s.pv_pal_a_model&0xf, cur_ver.pal_version_s.pv_pal_a_rev,
-		     min_ver.pal_version_s.pv_pal_a_model>>4,
-		     min_ver.pal_version_s.pv_pal_a_model&0xf, min_ver.pal_version_s.pv_pal_a_rev,
-
-		     cur_ver.pal_version_s.pv_pal_b_model>>4,
-		     cur_ver.pal_version_s.pv_pal_b_model&0xf, cur_ver.pal_version_s.pv_pal_b_rev,
-		     min_ver.pal_version_s.pv_pal_b_model>>4,
-		     min_ver.pal_version_s.pv_pal_b_model&0xf, min_ver.pal_version_s.pv_pal_b_rev);
-	return p - page;
+	if (ia64_pal_version(&min_ver, &cur_ver) != 0)
+		return 0;
+
+	seq_printf(m,
+		   "PAL_vendor : 0x%02x (min=0x%02x)\n"
+		   "PAL_A      : %02x.%02x (min=%02x.%02x)\n"
+		   "PAL_B      : %02x.%02x (min=%02x.%02x)\n",
+		   cur_ver.pal_version_s.pv_pal_vendor,
+		   min_ver.pal_version_s.pv_pal_vendor,
+		   cur_ver.pal_version_s.pv_pal_a_model,
+		   cur_ver.pal_version_s.pv_pal_a_rev,
+		   min_ver.pal_version_s.pv_pal_a_model,
+		   min_ver.pal_version_s.pv_pal_a_rev,
+		   cur_ver.pal_version_s.pv_pal_b_model,
+		   cur_ver.pal_version_s.pv_pal_b_rev,
+		   min_ver.pal_version_s.pv_pal_b_model,
+		   min_ver.pal_version_s.pv_pal_b_rev);
+	return 0;
 }
 
-static int
-perfmon_info(char *page)
+static int perfmon_info(struct seq_file *m)
 {
-	char *p = page;
 	u64 pm_buffer[16];
 	pal_perf_mon_info_u_t pm_info;
 
-	if (ia64_pal_perf_mon_info(pm_buffer, &pm_info) != 0) return 0;
-
-	p += sprintf(p,
-		     "PMC/PMD pairs                 : %d\n"
-		     "Counter width                 : %d bits\n"
-		     "Cycle event number            : %d\n"
-		     "Retired event number          : %d\n"
-		     "Implemented PMC               : ",
-		     pm_info.pal_perf_mon_info_s.generic, pm_info.pal_perf_mon_info_s.width,
-		     pm_info.pal_perf_mon_info_s.cycles, pm_info.pal_perf_mon_info_s.retired);
+	if (ia64_pal_perf_mon_info(pm_buffer, &pm_info) != 0)
+		return 0;
 
-	p = bitregister_process(p, pm_buffer, 256);
-	p += sprintf(p, "\nImplemented PMD               : ");
-	p = bitregister_process(p, pm_buffer+4, 256);
-	p += sprintf(p, "\nCycles count capable          : ");
-	p = bitregister_process(p, pm_buffer+8, 256);
-	p += sprintf(p, "\nRetired bundles count capable : ");
+	seq_printf(m,
+		   "PMC/PMD pairs                 : %d\n"
+		   "Counter width                 : %d bits\n"
+		   "Cycle event number            : %d\n"
+		   "Retired event number          : %d\n"
+		   "Implemented PMC               : ",
+		   pm_info.pal_perf_mon_info_s.generic,
+		   pm_info.pal_perf_mon_info_s.width,
+		   pm_info.pal_perf_mon_info_s.cycles,
+		   pm_info.pal_perf_mon_info_s.retired);
+
+	bitregister_process(m, pm_buffer, 256);
+	seq_puts(m, "\nImplemented PMD               : ");
+	bitregister_process(m, pm_buffer+4, 256);
+	seq_puts(m, "\nCycles count capable          : ");
+	bitregister_process(m, pm_buffer+8, 256);
+	seq_puts(m, "\nRetired bundles count capable : ");
 
 #ifdef CONFIG_ITANIUM
 	/*
 	 * PAL_PERF_MON_INFO reports that only PMC4 can be used to count CPU_CYCLES
 	 * which is wrong, both PMC4 and PMD5 support it.
 	 */
-	if (pm_buffer[12] == 0x10) pm_buffer[12]=0x30;
+	if (pm_buffer[12] == 0x10)
+		pm_buffer[12]=0x30;
 #endif
 
-	p = bitregister_process(p, pm_buffer+12, 256);
-
-	p += sprintf(p, "\n");
-
-	return p - page;
+	bitregister_process(m, pm_buffer+12, 256);
+	seq_putc(m, '\n');
+	return 0;
 }
 
-static int
-frequency_info(char *page)
+static int frequency_info(struct seq_file *m)
 {
-	char *p = page;
 	struct pal_freq_ratio proc, itc, bus;
-	u64 base;
+	unsigned long base;
 
 	if (ia64_pal_freq_base(&base) == -1)
-		p += sprintf(p, "Output clock            : not implemented\n");
+		seq_puts(m, "Output clock            : not implemented\n");
 	else
-		p += sprintf(p, "Output clock            : %ld ticks/s\n", base);
+		seq_printf(m, "Output clock            : %ld ticks/s\n", base);
 
 	if (ia64_pal_freq_ratios(&proc, &bus, &itc) != 0) return 0;
 
-	p += sprintf(p,
+	seq_printf(m,
 		     "Processor/Clock ratio   : %d/%d\n"
 		     "Bus/Clock ratio         : %d/%d\n"
 		     "ITC/Clock ratio         : %d/%d\n",
 		     proc.num, proc.den, bus.num, bus.den, itc.num, itc.den);
-
-	return p - page;
+	return 0;
 }
 
-static int
-tr_info(char *page)
+static int tr_info(struct seq_file *m)
 {
-	char *p = page;
-	s64 status;
+	long status;
 	pal_tr_valid_u_t tr_valid;
 	u64 tr_buffer[4];
 	pal_vm_info_1_u_t vm_info_1;
 	pal_vm_info_2_u_t vm_info_2;
-	u64 i, j;
-	u64 max[3], pgm;
+	unsigned long i, j;
+	unsigned long max[3], pgm;
 	struct ifa_reg {
-		u64 valid:1;
-		u64 ig:11;
-		u64 vpn:52;
+		unsigned long valid:1;
+		unsigned long ig:11;
+		unsigned long vpn:52;
 	} *ifa_reg;
 	struct itir_reg {
-		u64 rv1:2;
-		u64 ps:6;
-		u64 key:24;
-		u64 rv2:32;
+		unsigned long rv1:2;
+		unsigned long ps:6;
+		unsigned long key:24;
+		unsigned long rv2:32;
 	} *itir_reg;
 	struct gr_reg {
-		u64 p:1;
-		u64 rv1:1;
-		u64 ma:3;
-		u64 a:1;
-		u64 d:1;
-		u64 pl:2;
-		u64 ar:3;
-		u64 ppn:38;
-		u64 rv2:2;
-		u64 ed:1;
-		u64 ig:11;
+		unsigned long p:1;
+		unsigned long rv1:1;
+		unsigned long ma:3;
+		unsigned long a:1;
+		unsigned long d:1;
+		unsigned long pl:2;
+		unsigned long ar:3;
+		unsigned long ppn:38;
+		unsigned long rv2:2;
+		unsigned long ed:1;
+		unsigned long ig:11;
 	} *gr_reg;
 	struct rid_reg {
-		u64 ig1:1;
-		u64 rv1:1;
-		u64 ig2:6;
-		u64 rid:24;
-		u64 rv2:32;
+		unsigned long ig1:1;
+		unsigned long rv1:1;
+		unsigned long ig2:6;
+		unsigned long rid:24;
+		unsigned long rv2:32;
 	} *rid_reg;
 
 	if ((status = ia64_pal_vm_summary(&vm_info_1, &vm_info_2)) !=0) {
@@ -718,39 +767,40 @@ tr_info(char *page)
 
 		ifa_reg  = (struct ifa_reg *)&tr_buffer[2];
 
-		if (ifa_reg->valid == 0) continue;
+		if (ifa_reg->valid == 0)
+			continue;
 
 		gr_reg   = (struct gr_reg *)tr_buffer;
 		itir_reg = (struct itir_reg *)&tr_buffer[1];
 		rid_reg  = (struct rid_reg *)&tr_buffer[3];
 
 		pgm	 = -1 << (itir_reg->ps - 12);
-		p += sprintf(p,
-			     "%cTR%lu: av=%d pv=%d dv=%d mv=%d\n"
-			     "\tppn  : 0x%lx\n"
-			     "\tvpn  : 0x%lx\n"
-			     "\tps   : ",
-			     "ID"[i], j,
-			     tr_valid.pal_tr_valid_s.access_rights_valid,
-			     tr_valid.pal_tr_valid_s.priv_level_valid,
-			     tr_valid.pal_tr_valid_s.dirty_bit_valid,
-			     tr_valid.pal_tr_valid_s.mem_attr_valid,
-			     (gr_reg->ppn & pgm)<< 12, (ifa_reg->vpn & pgm)<< 12);
-
-		p = bitvector_process(p, 1<< itir_reg->ps);
-
-		p += sprintf(p,
-			     "\n\tpl   : %d\n"
-			     "\tar   : %d\n"
-			     "\trid  : %x\n"
-			     "\tp    : %d\n"
-			     "\tma   : %d\n"
-			     "\td    : %d\n",
-			     gr_reg->pl, gr_reg->ar, rid_reg->rid, gr_reg->p, gr_reg->ma,
-			     gr_reg->d);
+		seq_printf(m,
+			   "%cTR%lu: av=%d pv=%d dv=%d mv=%d\n"
+			   "\tppn  : 0x%lx\n"
+			   "\tvpn  : 0x%lx\n"
+			   "\tps   : ",
+			   "ID"[i], j,
+			   tr_valid.pal_tr_valid_s.access_rights_valid,
+			   tr_valid.pal_tr_valid_s.priv_level_valid,
+			   tr_valid.pal_tr_valid_s.dirty_bit_valid,
+			   tr_valid.pal_tr_valid_s.mem_attr_valid,
+			   (gr_reg->ppn & pgm)<< 12, (ifa_reg->vpn & pgm)<< 12);
+
+		bitvector_process(m, 1<< itir_reg->ps);
+
+		seq_printf(m,
+			   "\n\tpl   : %d\n"
+			   "\tar   : %d\n"
+			   "\trid  : %x\n"
+			   "\tp    : %d\n"
+			   "\tma   : %d\n"
+			   "\td    : %d\n",
+			   gr_reg->pl, gr_reg->ar, rid_reg->rid, gr_reg->p, gr_reg->ma,
+			   gr_reg->d);
 		}
 	}
-	return p - page;
+	return 0;
 }
 
 
@@ -758,7 +808,7 @@ tr_info(char *page)
 /*
  * List {name,function} pairs for every entry in /proc/palinfo/cpu*
  */
-static palinfo_entry_t palinfo_entries[]={
+static const palinfo_entry_t palinfo_entries[]={
 	{ "version_info",	version_info, },
 	{ "vm_info",		vm_info, },
 	{ "cache_info",		cache_info, },
@@ -773,17 +823,6 @@ static palinfo_entry_t palinfo_entries[]={
 
 #define NR_PALINFO_ENTRIES	(int) ARRAY_SIZE(palinfo_entries)
 
-/*
- * this array is used to keep track of the proc entries we create. This is
- * required in the module mode when we need to remove all entries. The procfs code
- * does not do recursion of deletion
- *
- * Notes:
- *	- +1 accounts for the cpuN directory entry in /proc/pal
- */
-#define NR_PALINFO_PROC_ENTRIES	(NR_CPUS*(NR_PALINFO_ENTRIES+1))
-
-static struct proc_dir_entry *palinfo_proc_entries[NR_PALINFO_PROC_ENTRIES];
 static struct proc_dir_entry *palinfo_dir;
 
 /*
@@ -811,7 +850,7 @@ typedef union {
  */
 typedef struct {
 	palinfo_func_t	func;	/* pointer to function to call */
-	char		*page;	/* buffer to store results */
+	struct seq_file *m;	/* buffer to store results */
 	int		ret;	/* return value from call */
 } palinfo_smp_data_t;
 
@@ -824,13 +863,7 @@ static void
 palinfo_smp_call(void *info)
 {
 	palinfo_smp_data_t *data = (palinfo_smp_data_t *)info;
-	if (data == NULL) {
-		printk(KERN_ERR "palinfo: data pointer is NULL\n");
-		data->ret = 0; /* no output */
-		return;
-	}
-	/* does this actual call */
-	data->ret = (*data->func)(data->page);
+	data->ret = (*data->func)(data->m);
 }
 
 /*
@@ -840,18 +873,18 @@ palinfo_smp_call(void *info)
  *	otherwise how many bytes in the "page" buffer were written
  */
 static
-int palinfo_handle_smp(pal_func_cpu_u_t *f, char *page)
+int palinfo_handle_smp(struct seq_file *m, pal_func_cpu_u_t *f)
 {
 	palinfo_smp_data_t ptr;
 	int ret;
 
 	ptr.func = palinfo_entries[f->func_id].proc_read;
-	ptr.page = page;
+	ptr.m = m;
 	ptr.ret  = 0; /* just in case */
 
 
 	/* will send IPI to other CPU and wait for completion of remote call */
-	if ((ret=smp_call_function_single(f->req_cpu, palinfo_smp_call, &ptr, 0, 1))) {
+	if ((ret=smp_call_function_single(f->req_cpu, palinfo_smp_call, &ptr, 1))) {
 		printk(KERN_ERR "palinfo: remote CPU call from %d to %d on function %d: "
 		       "error %d\n", smp_processor_id(), f->req_cpu, f->func_id, ret);
 		return 0;
@@ -860,7 +893,7 @@ int palinfo_handle_smp(pal_func_cpu_u_t *f, char *page)
 }
 #else /* ! CONFIG_SMP */
 static
-int palinfo_handle_smp(pal_func_cpu_u_t *f, char *page)
+int palinfo_handle_smp(struct seq_file *m, pal_func_cpu_u_t *f)
 {
 	printk(KERN_ERR "palinfo: should not be called with non SMP kernel\n");
 	return 0;
@@ -870,115 +903,84 @@ int palinfo_handle_smp(pal_func_cpu_u_t *f, char *page)
 /*
  * Entry point routine: all calls go through this function
  */
-static int
-palinfo_read_entry(char *page, char **start, off_t off, int count, int *eof, void *data)
+static int proc_palinfo_show(struct seq_file *m, void *v)
 {
-	int len=0;
-	pal_func_cpu_u_t *f = (pal_func_cpu_u_t *)&data;
+	pal_func_cpu_u_t *f = (pal_func_cpu_u_t *)&m->private;
 
 	/*
 	 * in SMP mode, we may need to call another CPU to get correct
 	 * information. PAL, by definition, is processor specific
 	 */
 	if (f->req_cpu == get_cpu())
-		len = (*palinfo_entries[f->func_id].proc_read)(page);
+		(*palinfo_entries[f->func_id].proc_read)(m);
 	else
-		len = palinfo_handle_smp(f, page);
+		palinfo_handle_smp(m, f);
 
 	put_cpu();
+	return 0;
+}
 
-	if (len <= off+count) *eof = 1;
-
-	*start = page + off;
-	len   -= off;
-
-	if (len>count) len = count;
-	if (len<0) len = 0;
-
-	return len;
+static int proc_palinfo_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, proc_palinfo_show, PDE_DATA(inode));
 }
 
+static const struct file_operations proc_palinfo_fops = {
+	.open		= proc_palinfo_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
 static void
 create_palinfo_proc_entries(unsigned int cpu)
 {
-#	define CPUSTR	"cpu%d"
-
 	pal_func_cpu_u_t f;
-	struct proc_dir_entry **pdir;
 	struct proc_dir_entry *cpu_dir;
 	int j;
-	char cpustr[sizeof(CPUSTR)];
-
-
-	/*
-	 * we keep track of created entries in a depth-first order for
-	 * cleanup purposes. Each entry is stored into palinfo_proc_entries
-	 */
-	sprintf(cpustr,CPUSTR, cpu);
+	char cpustr[3+4+1];	/* cpu numbers are up to 4095 on itanic */
+	sprintf(cpustr, "cpu%d", cpu);
 
 	cpu_dir = proc_mkdir(cpustr, palinfo_dir);
+	if (!cpu_dir)
+		return;
 
 	f.req_cpu = cpu;
 
-	/*
-	 * Compute the location to store per cpu entries
-	 * We dont store the top level entry in this list, but
-	 * remove it finally after removing all cpu entries.
-	 */
-	pdir = &palinfo_proc_entries[cpu*(NR_PALINFO_ENTRIES+1)];
-	*pdir++ = cpu_dir;
 	for (j=0; j < NR_PALINFO_ENTRIES; j++) {
 		f.func_id = j;
-		*pdir = create_proc_read_entry(
-				palinfo_entries[j].name, 0, cpu_dir,
-				palinfo_read_entry, (void *)f.value);
-		if (*pdir)
-			(*pdir)->owner = THIS_MODULE;
-		pdir++;
+		proc_create_data(palinfo_entries[j].name, 0, cpu_dir,
+				 &proc_palinfo_fops, (void *)f.value);
 	}
 }
 
 static void
 remove_palinfo_proc_entries(unsigned int hcpu)
 {
-	int j;
-	struct proc_dir_entry *cpu_dir, **pdir;
-
-	pdir = &palinfo_proc_entries[hcpu*(NR_PALINFO_ENTRIES+1)];
-	cpu_dir = *pdir;
-	*pdir++=NULL;
-	for (j=0; j < (NR_PALINFO_ENTRIES); j++) {
-		if ((*pdir)) {
-			remove_proc_entry ((*pdir)->name, cpu_dir);
-			*pdir ++= NULL;
-		}
-	}
-
-	if (cpu_dir) {
-		remove_proc_entry(cpu_dir->name, palinfo_dir);
-	}
+	char cpustr[3+4+1];	/* cpu numbers are up to 4095 on itanic */
+	sprintf(cpustr, "cpu%d", hcpu);
+	remove_proc_subtree(cpustr, palinfo_dir);
 }
 
 static int palinfo_cpu_callback(struct notifier_block *nfb,
-								unsigned long action,
-								void *hcpu)
+					unsigned long action, void *hcpu)
 {
 	unsigned int hotcpu = (unsigned long)hcpu;
 
 	switch (action) {
 	case CPU_ONLINE:
+	case CPU_ONLINE_FROZEN:
 		create_palinfo_proc_entries(hotcpu);
 		break;
-#ifdef CONFIG_HOTPLUG_CPU
 	case CPU_DEAD:
+	case CPU_DEAD_FROZEN:
 		remove_palinfo_proc_entries(hotcpu);
 		break;
-#endif
 	}
 	return NOTIFY_OK;
 }
 
-static struct notifier_block palinfo_cpu_notifier =
+static struct notifier_block __refdata palinfo_cpu_notifier =
 {
 	.notifier_call = palinfo_cpu_callback,
 	.priority = 0,
@@ -991,6 +993,10 @@ palinfo_init(void)
 
 	printk(KERN_INFO "PAL Information Facility v%s\n", PALINFO_VERSION);
 	palinfo_dir = proc_mkdir("pal", NULL);
+	if (!palinfo_dir)
+		return -ENOMEM;
+
+	cpu_notifier_register_begin();
 
 	/* Create palinfo dirs in /proc for all online cpus */
 	for_each_online_cpu(i) {
@@ -998,7 +1004,9 @@ palinfo_init(void)
 	}
 
 	/* Register for future delivery via notify registration */
-	register_cpu_notifier(&palinfo_cpu_notifier);
+	__register_hotcpu_notifier(&palinfo_cpu_notifier);
+
+	cpu_notifier_register_done();
 
 	return 0;
 }
@@ -1006,22 +1014,8 @@ palinfo_init(void)
 static void __exit
 palinfo_exit(void)
 {
-	int i = 0;
-
-	/* remove all nodes: depth first pass. Could optimize this  */
-	for_each_online_cpu(i) {
-		remove_palinfo_proc_entries(i);
-	}
-
-	/*
-	 * Remove the top level entry finally
-	 */
-	remove_proc_entry(palinfo_dir->name, NULL);
-
-	/*
-	 * Unregister from cpu notifier callbacks
-	 */
-	unregister_cpu_notifier(&palinfo_cpu_notifier);
+	unregister_hotcpu_notifier(&palinfo_cpu_notifier);
+	remove_proc_subtree("pal", NULL);
 }
 
 module_init(palinfo_init);
diff --git a/arch/ia64/kernel/paravirt.c b/arch/ia64/kernel/paravirt.c
new file mode 100644
index 00000000000..1b22f6de293
--- /dev/null
+++ b/arch/ia64/kernel/paravirt.c
@@ -0,0 +1,902 @@
+/******************************************************************************
+ * arch/ia64/kernel/paravirt.c
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *     Yaozu (Eddie) Dong <eddie.dong@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <linux/init.h>
+
+#include <linux/compiler.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/types.h>
+
+#include <asm/iosapic.h>
+#include <asm/paravirt.h>
+
+/***************************************************************************
+ * general info
+ */
+struct pv_info pv_info = {
+	.kernel_rpl = 0,
+	.paravirt_enabled = 0,
+	.name = "bare hardware"
+};
+
+/***************************************************************************
+ * pv_init_ops
+ * initialization hooks.
+ */
+
+static void __init
+ia64_native_patch_branch(unsigned long tag, unsigned long type);
+
+struct pv_init_ops pv_init_ops =
+{
+#ifdef ASM_SUPPORTED
+	.patch_bundle = ia64_native_patch_bundle,
+#endif
+	.patch_branch = ia64_native_patch_branch,
+};
+
+/***************************************************************************
+ * pv_cpu_ops
+ * intrinsics hooks.
+ */
+
+#ifndef ASM_SUPPORTED
+/* ia64_native_xxx are macros so that we have to make them real functions */
+
+#define DEFINE_VOID_FUNC1(name)					\
+	static void						\
+	ia64_native_ ## name ## _func(unsigned long arg)	\
+	{							\
+		ia64_native_ ## name(arg);			\
+	}
+
+#define DEFINE_VOID_FUNC1_VOID(name)				\
+	static void						\
+	ia64_native_ ## name ## _func(void *arg)		\
+	{							\
+		ia64_native_ ## name(arg);			\
+	}
+
+#define DEFINE_VOID_FUNC2(name)					\
+	static void						\
+	ia64_native_ ## name ## _func(unsigned long arg0,	\
+				      unsigned long arg1)	\
+	{							\
+		ia64_native_ ## name(arg0, arg1);		\
+	}
+
+#define DEFINE_FUNC0(name)			\
+	static unsigned long			\
+	ia64_native_ ## name ## _func(void)	\
+	{					\
+		return ia64_native_ ## name();	\
+	}
+
+#define DEFINE_FUNC1(name, type)			\
+	static unsigned long				\
+	ia64_native_ ## name ## _func(type arg)		\
+	{						\
+		return ia64_native_ ## name(arg);	\
+	}						\
+
+DEFINE_VOID_FUNC1_VOID(fc);
+DEFINE_VOID_FUNC1(intrin_local_irq_restore);
+
+DEFINE_VOID_FUNC2(ptcga);
+DEFINE_VOID_FUNC2(set_rr);
+
+DEFINE_FUNC0(get_psr_i);
+
+DEFINE_FUNC1(thash, unsigned long);
+DEFINE_FUNC1(get_cpuid, int);
+DEFINE_FUNC1(get_pmd, int);
+DEFINE_FUNC1(get_rr, unsigned long);
+
+static void
+ia64_native_ssm_i_func(void)
+{
+	ia64_native_ssm(IA64_PSR_I);
+}
+
+static void
+ia64_native_rsm_i_func(void)
+{
+	ia64_native_rsm(IA64_PSR_I);
+}
+
+static void
+ia64_native_set_rr0_to_rr4_func(unsigned long val0, unsigned long val1,
+				unsigned long val2, unsigned long val3,
+				unsigned long val4)
+{
+	ia64_native_set_rr0_to_rr4(val0, val1, val2, val3, val4);
+}
+
+#define CASE_GET_REG(id)				\
+	case _IA64_REG_ ## id:				\
+	res = ia64_native_getreg(_IA64_REG_ ## id);	\
+	break;
+#define CASE_GET_AR(id) CASE_GET_REG(AR_ ## id)
+#define CASE_GET_CR(id) CASE_GET_REG(CR_ ## id)
+
+unsigned long
+ia64_native_getreg_func(int regnum)
+{
+	unsigned long res = -1;
+	switch (regnum) {
+	CASE_GET_REG(GP);
+	/*CASE_GET_REG(IP);*/ /* returned ip value shouldn't be constant */
+	CASE_GET_REG(PSR);
+	CASE_GET_REG(TP);
+	CASE_GET_REG(SP);
+
+	CASE_GET_AR(KR0);
+	CASE_GET_AR(KR1);
+	CASE_GET_AR(KR2);
+	CASE_GET_AR(KR3);
+	CASE_GET_AR(KR4);
+	CASE_GET_AR(KR5);
+	CASE_GET_AR(KR6);
+	CASE_GET_AR(KR7);
+	CASE_GET_AR(RSC);
+	CASE_GET_AR(BSP);
+	CASE_GET_AR(BSPSTORE);
+	CASE_GET_AR(RNAT);
+	CASE_GET_AR(FCR);
+	CASE_GET_AR(EFLAG);
+	CASE_GET_AR(CSD);
+	CASE_GET_AR(SSD);
+	CASE_GET_AR(CFLAG);
+	CASE_GET_AR(FSR);
+	CASE_GET_AR(FIR);
+	CASE_GET_AR(FDR);
+	CASE_GET_AR(CCV);
+	CASE_GET_AR(UNAT);
+	CASE_GET_AR(FPSR);
+	CASE_GET_AR(ITC);
+	CASE_GET_AR(PFS);
+	CASE_GET_AR(LC);
+	CASE_GET_AR(EC);
+
+	CASE_GET_CR(DCR);
+	CASE_GET_CR(ITM);
+	CASE_GET_CR(IVA);
+	CASE_GET_CR(PTA);
+	CASE_GET_CR(IPSR);
+	CASE_GET_CR(ISR);
+	CASE_GET_CR(IIP);
+	CASE_GET_CR(IFA);
+	CASE_GET_CR(ITIR);
+	CASE_GET_CR(IIPA);
+	CASE_GET_CR(IFS);
+	CASE_GET_CR(IIM);
+	CASE_GET_CR(IHA);
+	CASE_GET_CR(LID);
+	CASE_GET_CR(IVR);
+	CASE_GET_CR(TPR);
+	CASE_GET_CR(EOI);
+	CASE_GET_CR(IRR0);
+	CASE_GET_CR(IRR1);
+	CASE_GET_CR(IRR2);
+	CASE_GET_CR(IRR3);
+	CASE_GET_CR(ITV);
+	CASE_GET_CR(PMV);
+	CASE_GET_CR(CMCV);
+	CASE_GET_CR(LRR0);
+	CASE_GET_CR(LRR1);
+
+	default:
+		printk(KERN_CRIT "wrong_getreg %d\n", regnum);
+		break;
+	}
+	return res;
+}
+
+#define CASE_SET_REG(id)				\
+	case _IA64_REG_ ## id:				\
+	ia64_native_setreg(_IA64_REG_ ## id, val);	\
+	break;
+#define CASE_SET_AR(id) CASE_SET_REG(AR_ ## id)
+#define CASE_SET_CR(id) CASE_SET_REG(CR_ ## id)
+
+void
+ia64_native_setreg_func(int regnum, unsigned long val)
+{
+	switch (regnum) {
+	case _IA64_REG_PSR_L:
+		ia64_native_setreg(_IA64_REG_PSR_L, val);
+		ia64_dv_serialize_data();
+		break;
+	CASE_SET_REG(SP);
+	CASE_SET_REG(GP);
+
+	CASE_SET_AR(KR0);
+	CASE_SET_AR(KR1);
+	CASE_SET_AR(KR2);
+	CASE_SET_AR(KR3);
+	CASE_SET_AR(KR4);
+	CASE_SET_AR(KR5);
+	CASE_SET_AR(KR6);
+	CASE_SET_AR(KR7);
+	CASE_SET_AR(RSC);
+	CASE_SET_AR(BSP);
+	CASE_SET_AR(BSPSTORE);
+	CASE_SET_AR(RNAT);
+	CASE_SET_AR(FCR);
+	CASE_SET_AR(EFLAG);
+	CASE_SET_AR(CSD);
+	CASE_SET_AR(SSD);
+	CASE_SET_AR(CFLAG);
+	CASE_SET_AR(FSR);
+	CASE_SET_AR(FIR);
+	CASE_SET_AR(FDR);
+	CASE_SET_AR(CCV);
+	CASE_SET_AR(UNAT);
+	CASE_SET_AR(FPSR);
+	CASE_SET_AR(ITC);
+	CASE_SET_AR(PFS);
+	CASE_SET_AR(LC);
+	CASE_SET_AR(EC);
+
+	CASE_SET_CR(DCR);
+	CASE_SET_CR(ITM);
+	CASE_SET_CR(IVA);
+	CASE_SET_CR(PTA);
+	CASE_SET_CR(IPSR);
+	CASE_SET_CR(ISR);
+	CASE_SET_CR(IIP);
+	CASE_SET_CR(IFA);
+	CASE_SET_CR(ITIR);
+	CASE_SET_CR(IIPA);
+	CASE_SET_CR(IFS);
+	CASE_SET_CR(IIM);
+	CASE_SET_CR(IHA);
+	CASE_SET_CR(LID);
+	CASE_SET_CR(IVR);
+	CASE_SET_CR(TPR);
+	CASE_SET_CR(EOI);
+	CASE_SET_CR(IRR0);
+	CASE_SET_CR(IRR1);
+	CASE_SET_CR(IRR2);
+	CASE_SET_CR(IRR3);
+	CASE_SET_CR(ITV);
+	CASE_SET_CR(PMV);
+	CASE_SET_CR(CMCV);
+	CASE_SET_CR(LRR0);
+	CASE_SET_CR(LRR1);
+	default:
+		printk(KERN_CRIT "wrong setreg %d\n", regnum);
+		break;
+	}
+}
+#else
+
+#define __DEFINE_FUNC(name, code)					\
+	extern const char ia64_native_ ## name ## _direct_start[];	\
+	extern const char ia64_native_ ## name ## _direct_end[];	\
+	asm (".align 32\n"						\
+	     ".proc ia64_native_" #name "_func\n"			\
+	     "ia64_native_" #name "_func:\n"				\
+	     "ia64_native_" #name "_direct_start:\n"			\
+	     code							\
+	     "ia64_native_" #name "_direct_end:\n"			\
+	     "br.cond.sptk.many b6\n"					\
+	     ".endp ia64_native_" #name "_func\n")
+
+#define DEFINE_VOID_FUNC0(name, code)				\
+	extern void						\
+	ia64_native_ ## name ## _func(void);			\
+	__DEFINE_FUNC(name, code)
+
+#define DEFINE_VOID_FUNC1(name, code)				\
+	extern void						\
+	ia64_native_ ## name ## _func(unsigned long arg);	\
+	__DEFINE_FUNC(name, code)
+
+#define DEFINE_VOID_FUNC1_VOID(name, code)			\
+	extern void						\
+	ia64_native_ ## name ## _func(void *arg);		\
+	__DEFINE_FUNC(name, code)
+
+#define DEFINE_VOID_FUNC2(name, code)				\
+	extern void						\
+	ia64_native_ ## name ## _func(unsigned long arg0,	\
+				      unsigned long arg1);	\
+	__DEFINE_FUNC(name, code)
+
+#define DEFINE_FUNC0(name, code)		\
+	extern unsigned long			\
+	ia64_native_ ## name ## _func(void);	\
+	__DEFINE_FUNC(name, code)
+
+#define DEFINE_FUNC1(name, type, code)			\
+	extern unsigned long				\
+	ia64_native_ ## name ## _func(type arg);	\
+	__DEFINE_FUNC(name, code)
+
+DEFINE_VOID_FUNC1_VOID(fc,
+		       "fc r8\n");
+DEFINE_VOID_FUNC1(intrin_local_irq_restore,
+		  ";;\n"
+		  "     cmp.ne p6, p7 = r8, r0\n"
+		  ";;\n"
+		  "(p6) ssm psr.i\n"
+		  "(p7) rsm psr.i\n"
+		  ";;\n"
+		  "(p6) srlz.d\n");
+
+DEFINE_VOID_FUNC2(ptcga,
+		  "ptc.ga r8, r9\n");
+DEFINE_VOID_FUNC2(set_rr,
+		  "mov rr[r8] = r9\n");
+
+/* ia64_native_getreg(_IA64_REG_PSR) & IA64_PSR_I */
+DEFINE_FUNC0(get_psr_i,
+	     "mov r2 = " __stringify(1 << IA64_PSR_I_BIT) "\n"
+	     "mov r8 = psr\n"
+	     ";;\n"
+	     "and r8 = r2, r8\n");
+
+DEFINE_FUNC1(thash, unsigned long,
+	     "thash r8 = r8\n");
+DEFINE_FUNC1(get_cpuid, int,
+	     "mov r8 = cpuid[r8]\n");
+DEFINE_FUNC1(get_pmd, int,
+	     "mov r8 = pmd[r8]\n");
+DEFINE_FUNC1(get_rr, unsigned long,
+	     "mov r8 = rr[r8]\n");
+
+DEFINE_VOID_FUNC0(ssm_i,
+		  "ssm psr.i\n");
+DEFINE_VOID_FUNC0(rsm_i,
+		  "rsm psr.i\n");
+
+extern void
+ia64_native_set_rr0_to_rr4_func(unsigned long val0, unsigned long val1,
+				unsigned long val2, unsigned long val3,
+				unsigned long val4);
+__DEFINE_FUNC(set_rr0_to_rr4,
+	      "mov rr[r0] = r8\n"
+	      "movl r2 = 0x2000000000000000\n"
+	      ";;\n"
+	      "mov rr[r2] = r9\n"
+	      "shl r3 = r2, 1\n"	/* movl r3 = 0x4000000000000000 */
+	      ";;\n"
+	      "add r2 = r2, r3\n"	/* movl r2 = 0x6000000000000000 */
+	      "mov rr[r3] = r10\n"
+	      ";;\n"
+	      "mov rr[r2] = r11\n"
+	      "shl r3 = r3, 1\n"	/* movl r3 = 0x8000000000000000 */
+	      ";;\n"
+	      "mov rr[r3] = r14\n");
+
+extern unsigned long ia64_native_getreg_func(int regnum);
+asm(".global ia64_native_getreg_func\n");
+#define __DEFINE_GET_REG(id, reg)			\
+	"mov r2 = " __stringify(_IA64_REG_ ## id) "\n"	\
+	";;\n"						\
+	"cmp.eq p6, p0 = r2, r8\n"			\
+	";;\n"						\
+	"(p6) mov r8 = " #reg "\n"			\
+	"(p6) br.cond.sptk.many b6\n"			\
+	";;\n"
+#define __DEFINE_GET_AR(id, reg)	__DEFINE_GET_REG(AR_ ## id, ar.reg)
+#define __DEFINE_GET_CR(id, reg)	__DEFINE_GET_REG(CR_ ## id, cr.reg)
+
+__DEFINE_FUNC(getreg,
+	      __DEFINE_GET_REG(GP, gp)
+	      /*__DEFINE_GET_REG(IP, ip)*/ /* returned ip value shouldn't be constant */
+	      __DEFINE_GET_REG(PSR, psr)
+	      __DEFINE_GET_REG(TP, tp)
+	      __DEFINE_GET_REG(SP, sp)
+
+	      __DEFINE_GET_REG(AR_KR0, ar0)
+	      __DEFINE_GET_REG(AR_KR1, ar1)
+	      __DEFINE_GET_REG(AR_KR2, ar2)
+	      __DEFINE_GET_REG(AR_KR3, ar3)
+	      __DEFINE_GET_REG(AR_KR4, ar4)
+	      __DEFINE_GET_REG(AR_KR5, ar5)
+	      __DEFINE_GET_REG(AR_KR6, ar6)
+	      __DEFINE_GET_REG(AR_KR7, ar7)
+	      __DEFINE_GET_AR(RSC, rsc)
+	      __DEFINE_GET_AR(BSP, bsp)
+	      __DEFINE_GET_AR(BSPSTORE, bspstore)
+	      __DEFINE_GET_AR(RNAT, rnat)
+	      __DEFINE_GET_AR(FCR, fcr)
+	      __DEFINE_GET_AR(EFLAG, eflag)
+	      __DEFINE_GET_AR(CSD, csd)
+	      __DEFINE_GET_AR(SSD, ssd)
+	      __DEFINE_GET_REG(AR_CFLAG, ar27)
+	      __DEFINE_GET_AR(FSR, fsr)
+	      __DEFINE_GET_AR(FIR, fir)
+	      __DEFINE_GET_AR(FDR, fdr)
+	      __DEFINE_GET_AR(CCV, ccv)
+	      __DEFINE_GET_AR(UNAT, unat)
+	      __DEFINE_GET_AR(FPSR, fpsr)
+	      __DEFINE_GET_AR(ITC, itc)
+	      __DEFINE_GET_AR(PFS, pfs)
+	      __DEFINE_GET_AR(LC, lc)
+	      __DEFINE_GET_AR(EC, ec)
+
+	      __DEFINE_GET_CR(DCR, dcr)
+	      __DEFINE_GET_CR(ITM, itm)
+	      __DEFINE_GET_CR(IVA, iva)
+	      __DEFINE_GET_CR(PTA, pta)
+	      __DEFINE_GET_CR(IPSR, ipsr)
+	      __DEFINE_GET_CR(ISR, isr)
+	      __DEFINE_GET_CR(IIP, iip)
+	      __DEFINE_GET_CR(IFA, ifa)
+	      __DEFINE_GET_CR(ITIR, itir)
+	      __DEFINE_GET_CR(IIPA, iipa)
+	      __DEFINE_GET_CR(IFS, ifs)
+	      __DEFINE_GET_CR(IIM, iim)
+	      __DEFINE_GET_CR(IHA, iha)
+	      __DEFINE_GET_CR(LID, lid)
+	      __DEFINE_GET_CR(IVR, ivr)
+	      __DEFINE_GET_CR(TPR, tpr)
+	      __DEFINE_GET_CR(EOI, eoi)
+	      __DEFINE_GET_CR(IRR0, irr0)
+	      __DEFINE_GET_CR(IRR1, irr1)
+	      __DEFINE_GET_CR(IRR2, irr2)
+	      __DEFINE_GET_CR(IRR3, irr3)
+	      __DEFINE_GET_CR(ITV, itv)
+	      __DEFINE_GET_CR(PMV, pmv)
+	      __DEFINE_GET_CR(CMCV, cmcv)
+	      __DEFINE_GET_CR(LRR0, lrr0)
+	      __DEFINE_GET_CR(LRR1, lrr1)
+
+	      "mov r8 = -1\n"	/* unsupported case */
+	);
+
+extern void ia64_native_setreg_func(int regnum, unsigned long val);
+asm(".global ia64_native_setreg_func\n");
+#define __DEFINE_SET_REG(id, reg)			\
+	"mov r2 = " __stringify(_IA64_REG_ ## id) "\n"	\
+	";;\n"						\
+	"cmp.eq p6, p0 = r2, r9\n"			\
+	";;\n"						\
+	"(p6) mov " #reg " = r8\n"			\
+	"(p6) br.cond.sptk.many b6\n"			\
+	";;\n"
+#define __DEFINE_SET_AR(id, reg)	__DEFINE_SET_REG(AR_ ## id, ar.reg)
+#define __DEFINE_SET_CR(id, reg)	__DEFINE_SET_REG(CR_ ## id, cr.reg)
+__DEFINE_FUNC(setreg,
+	      "mov r2 = " __stringify(_IA64_REG_PSR_L) "\n"
+	      ";;\n"
+	      "cmp.eq p6, p0 = r2, r9\n"
+	      ";;\n"
+	      "(p6) mov psr.l = r8\n"
+#ifdef HAVE_SERIALIZE_DIRECTIVE
+	      ".serialize.data\n"
+#endif
+	      "(p6) br.cond.sptk.many b6\n"
+	      __DEFINE_SET_REG(GP, gp)
+	      __DEFINE_SET_REG(SP, sp)
+
+	      __DEFINE_SET_REG(AR_KR0, ar0)
+	      __DEFINE_SET_REG(AR_KR1, ar1)
+	      __DEFINE_SET_REG(AR_KR2, ar2)
+	      __DEFINE_SET_REG(AR_KR3, ar3)
+	      __DEFINE_SET_REG(AR_KR4, ar4)
+	      __DEFINE_SET_REG(AR_KR5, ar5)
+	      __DEFINE_SET_REG(AR_KR6, ar6)
+	      __DEFINE_SET_REG(AR_KR7, ar7)
+	      __DEFINE_SET_AR(RSC, rsc)
+	      __DEFINE_SET_AR(BSP, bsp)
+	      __DEFINE_SET_AR(BSPSTORE, bspstore)
+	      __DEFINE_SET_AR(RNAT, rnat)
+	      __DEFINE_SET_AR(FCR, fcr)
+	      __DEFINE_SET_AR(EFLAG, eflag)
+	      __DEFINE_SET_AR(CSD, csd)
+	      __DEFINE_SET_AR(SSD, ssd)
+	      __DEFINE_SET_REG(AR_CFLAG, ar27)
+	      __DEFINE_SET_AR(FSR, fsr)
+	      __DEFINE_SET_AR(FIR, fir)
+	      __DEFINE_SET_AR(FDR, fdr)
+	      __DEFINE_SET_AR(CCV, ccv)
+	      __DEFINE_SET_AR(UNAT, unat)
+	      __DEFINE_SET_AR(FPSR, fpsr)
+	      __DEFINE_SET_AR(ITC, itc)
+	      __DEFINE_SET_AR(PFS, pfs)
+	      __DEFINE_SET_AR(LC, lc)
+	      __DEFINE_SET_AR(EC, ec)
+
+	      __DEFINE_SET_CR(DCR, dcr)
+	      __DEFINE_SET_CR(ITM, itm)
+	      __DEFINE_SET_CR(IVA, iva)
+	      __DEFINE_SET_CR(PTA, pta)
+	      __DEFINE_SET_CR(IPSR, ipsr)
+	      __DEFINE_SET_CR(ISR, isr)
+	      __DEFINE_SET_CR(IIP, iip)
+	      __DEFINE_SET_CR(IFA, ifa)
+	      __DEFINE_SET_CR(ITIR, itir)
+	      __DEFINE_SET_CR(IIPA, iipa)
+	      __DEFINE_SET_CR(IFS, ifs)
+	      __DEFINE_SET_CR(IIM, iim)
+	      __DEFINE_SET_CR(IHA, iha)
+	      __DEFINE_SET_CR(LID, lid)
+	      __DEFINE_SET_CR(IVR, ivr)
+	      __DEFINE_SET_CR(TPR, tpr)
+	      __DEFINE_SET_CR(EOI, eoi)
+	      __DEFINE_SET_CR(IRR0, irr0)
+	      __DEFINE_SET_CR(IRR1, irr1)
+	      __DEFINE_SET_CR(IRR2, irr2)
+	      __DEFINE_SET_CR(IRR3, irr3)
+	      __DEFINE_SET_CR(ITV, itv)
+	      __DEFINE_SET_CR(PMV, pmv)
+	      __DEFINE_SET_CR(CMCV, cmcv)
+	      __DEFINE_SET_CR(LRR0, lrr0)
+	      __DEFINE_SET_CR(LRR1, lrr1)
+	);
+#endif
+
+struct pv_cpu_ops pv_cpu_ops = {
+	.fc		= ia64_native_fc_func,
+	.thash		= ia64_native_thash_func,
+	.get_cpuid	= ia64_native_get_cpuid_func,
+	.get_pmd	= ia64_native_get_pmd_func,
+	.ptcga		= ia64_native_ptcga_func,
+	.get_rr		= ia64_native_get_rr_func,
+	.set_rr		= ia64_native_set_rr_func,
+	.set_rr0_to_rr4	= ia64_native_set_rr0_to_rr4_func,
+	.ssm_i		= ia64_native_ssm_i_func,
+	.getreg		= ia64_native_getreg_func,
+	.setreg		= ia64_native_setreg_func,
+	.rsm_i		= ia64_native_rsm_i_func,
+	.get_psr_i	= ia64_native_get_psr_i_func,
+	.intrin_local_irq_restore
+			= ia64_native_intrin_local_irq_restore_func,
+};
+EXPORT_SYMBOL(pv_cpu_ops);
+
+/******************************************************************************
+ * replacement of hand written assembly codes.
+ */
+
+void
+paravirt_cpu_asm_init(const struct pv_cpu_asm_switch *cpu_asm_switch)
+{
+	extern unsigned long paravirt_switch_to_targ;
+	extern unsigned long paravirt_leave_syscall_targ;
+	extern unsigned long paravirt_work_processed_syscall_targ;
+	extern unsigned long paravirt_leave_kernel_targ;
+
+	paravirt_switch_to_targ = cpu_asm_switch->switch_to;
+	paravirt_leave_syscall_targ = cpu_asm_switch->leave_syscall;
+	paravirt_work_processed_syscall_targ =
+		cpu_asm_switch->work_processed_syscall;
+	paravirt_leave_kernel_targ = cpu_asm_switch->leave_kernel;
+}
+
+/***************************************************************************
+ * pv_iosapic_ops
+ * iosapic read/write hooks.
+ */
+
+static unsigned int
+ia64_native_iosapic_read(char __iomem *iosapic, unsigned int reg)
+{
+	return __ia64_native_iosapic_read(iosapic, reg);
+}
+
+static void
+ia64_native_iosapic_write(char __iomem *iosapic, unsigned int reg, u32 val)
+{
+	__ia64_native_iosapic_write(iosapic, reg, val);
+}
+
+struct pv_iosapic_ops pv_iosapic_ops = {
+	.pcat_compat_init = ia64_native_iosapic_pcat_compat_init,
+	.__get_irq_chip = ia64_native_iosapic_get_irq_chip,
+
+	.__read = ia64_native_iosapic_read,
+	.__write = ia64_native_iosapic_write,
+};
+
+/***************************************************************************
+ * pv_irq_ops
+ * irq operations
+ */
+
+struct pv_irq_ops pv_irq_ops = {
+	.register_ipi = ia64_native_register_ipi,
+
+	.assign_irq_vector = ia64_native_assign_irq_vector,
+	.free_irq_vector = ia64_native_free_irq_vector,
+	.register_percpu_irq = ia64_native_register_percpu_irq,
+
+	.resend_irq = ia64_native_resend_irq,
+};
+
+/***************************************************************************
+ * pv_time_ops
+ * time operations
+ */
+struct static_key paravirt_steal_enabled;
+struct static_key paravirt_steal_rq_enabled;
+
+static int
+ia64_native_do_steal_accounting(unsigned long *new_itm)
+{
+	return 0;
+}
+
+struct pv_time_ops pv_time_ops = {
+	.do_steal_accounting = ia64_native_do_steal_accounting,
+	.sched_clock = ia64_native_sched_clock,
+};
+
+/***************************************************************************
+ * binary pacthing
+ * pv_init_ops.patch_bundle
+ */
+
+#ifdef ASM_SUPPORTED
+#define IA64_NATIVE_PATCH_DEFINE_GET_REG(name, reg)	\
+	__DEFINE_FUNC(get_ ## name,			\
+		      ";;\n"				\
+		      "mov r8 = " #reg "\n"		\
+		      ";;\n")
+
+#define IA64_NATIVE_PATCH_DEFINE_SET_REG(name, reg)	\
+	__DEFINE_FUNC(set_ ## name,			\
+		      ";;\n"				\
+		      "mov " #reg " = r8\n"		\
+		      ";;\n")
+
+#define IA64_NATIVE_PATCH_DEFINE_REG(name, reg)		\
+	IA64_NATIVE_PATCH_DEFINE_GET_REG(name, reg);	\
+	IA64_NATIVE_PATCH_DEFINE_SET_REG(name, reg)	\
+
+#define IA64_NATIVE_PATCH_DEFINE_AR(name, reg)			\
+	IA64_NATIVE_PATCH_DEFINE_REG(ar_ ## name, ar.reg)
+
+#define IA64_NATIVE_PATCH_DEFINE_CR(name, reg)			\
+	IA64_NATIVE_PATCH_DEFINE_REG(cr_ ## name, cr.reg)
+
+
+IA64_NATIVE_PATCH_DEFINE_GET_REG(psr, psr);
+IA64_NATIVE_PATCH_DEFINE_GET_REG(tp, tp);
+
+/* IA64_NATIVE_PATCH_DEFINE_SET_REG(psr_l, psr.l); */
+__DEFINE_FUNC(set_psr_l,
+	      ";;\n"
+	      "mov psr.l = r8\n"
+#ifdef HAVE_SERIALIZE_DIRECTIVE
+	      ".serialize.data\n"
+#endif
+	      ";;\n");
+
+IA64_NATIVE_PATCH_DEFINE_REG(gp, gp);
+IA64_NATIVE_PATCH_DEFINE_REG(sp, sp);
+
+IA64_NATIVE_PATCH_DEFINE_REG(kr0, ar0);
+IA64_NATIVE_PATCH_DEFINE_REG(kr1, ar1);
+IA64_NATIVE_PATCH_DEFINE_REG(kr2, ar2);
+IA64_NATIVE_PATCH_DEFINE_REG(kr3, ar3);
+IA64_NATIVE_PATCH_DEFINE_REG(kr4, ar4);
+IA64_NATIVE_PATCH_DEFINE_REG(kr5, ar5);
+IA64_NATIVE_PATCH_DEFINE_REG(kr6, ar6);
+IA64_NATIVE_PATCH_DEFINE_REG(kr7, ar7);
+
+IA64_NATIVE_PATCH_DEFINE_AR(rsc, rsc);
+IA64_NATIVE_PATCH_DEFINE_AR(bsp, bsp);
+IA64_NATIVE_PATCH_DEFINE_AR(bspstore, bspstore);
+IA64_NATIVE_PATCH_DEFINE_AR(rnat, rnat);
+IA64_NATIVE_PATCH_DEFINE_AR(fcr, fcr);
+IA64_NATIVE_PATCH_DEFINE_AR(eflag, eflag);
+IA64_NATIVE_PATCH_DEFINE_AR(csd, csd);
+IA64_NATIVE_PATCH_DEFINE_AR(ssd, ssd);
+IA64_NATIVE_PATCH_DEFINE_REG(ar27, ar27);
+IA64_NATIVE_PATCH_DEFINE_AR(fsr, fsr);
+IA64_NATIVE_PATCH_DEFINE_AR(fir, fir);
+IA64_NATIVE_PATCH_DEFINE_AR(fdr, fdr);
+IA64_NATIVE_PATCH_DEFINE_AR(ccv, ccv);
+IA64_NATIVE_PATCH_DEFINE_AR(unat, unat);
+IA64_NATIVE_PATCH_DEFINE_AR(fpsr, fpsr);
+IA64_NATIVE_PATCH_DEFINE_AR(itc, itc);
+IA64_NATIVE_PATCH_DEFINE_AR(pfs, pfs);
+IA64_NATIVE_PATCH_DEFINE_AR(lc, lc);
+IA64_NATIVE_PATCH_DEFINE_AR(ec, ec);
+
+IA64_NATIVE_PATCH_DEFINE_CR(dcr, dcr);
+IA64_NATIVE_PATCH_DEFINE_CR(itm, itm);
+IA64_NATIVE_PATCH_DEFINE_CR(iva, iva);
+IA64_NATIVE_PATCH_DEFINE_CR(pta, pta);
+IA64_NATIVE_PATCH_DEFINE_CR(ipsr, ipsr);
+IA64_NATIVE_PATCH_DEFINE_CR(isr, isr);
+IA64_NATIVE_PATCH_DEFINE_CR(iip, iip);
+IA64_NATIVE_PATCH_DEFINE_CR(ifa, ifa);
+IA64_NATIVE_PATCH_DEFINE_CR(itir, itir);
+IA64_NATIVE_PATCH_DEFINE_CR(iipa, iipa);
+IA64_NATIVE_PATCH_DEFINE_CR(ifs, ifs);
+IA64_NATIVE_PATCH_DEFINE_CR(iim, iim);
+IA64_NATIVE_PATCH_DEFINE_CR(iha, iha);
+IA64_NATIVE_PATCH_DEFINE_CR(lid, lid);
+IA64_NATIVE_PATCH_DEFINE_CR(ivr, ivr);
+IA64_NATIVE_PATCH_DEFINE_CR(tpr, tpr);
+IA64_NATIVE_PATCH_DEFINE_CR(eoi, eoi);
+IA64_NATIVE_PATCH_DEFINE_CR(irr0, irr0);
+IA64_NATIVE_PATCH_DEFINE_CR(irr1, irr1);
+IA64_NATIVE_PATCH_DEFINE_CR(irr2, irr2);
+IA64_NATIVE_PATCH_DEFINE_CR(irr3, irr3);
+IA64_NATIVE_PATCH_DEFINE_CR(itv, itv);
+IA64_NATIVE_PATCH_DEFINE_CR(pmv, pmv);
+IA64_NATIVE_PATCH_DEFINE_CR(cmcv, cmcv);
+IA64_NATIVE_PATCH_DEFINE_CR(lrr0, lrr0);
+IA64_NATIVE_PATCH_DEFINE_CR(lrr1, lrr1);
+
+static const struct paravirt_patch_bundle_elem ia64_native_patch_bundle_elems[]
+__initdata_or_module =
+{
+#define IA64_NATIVE_PATCH_BUNDLE_ELEM(name, type)		\
+	{							\
+		(void*)ia64_native_ ## name ## _direct_start,	\
+		(void*)ia64_native_ ## name ## _direct_end,	\
+		PARAVIRT_PATCH_TYPE_ ## type,			\
+	}
+
+	IA64_NATIVE_PATCH_BUNDLE_ELEM(fc, FC),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM(thash, THASH),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM(get_cpuid, GET_CPUID),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM(get_pmd, GET_PMD),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM(ptcga, PTCGA),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM(get_rr, GET_RR),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM(set_rr, SET_RR),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM(set_rr0_to_rr4, SET_RR0_TO_RR4),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM(ssm_i, SSM_I),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM(rsm_i, RSM_I),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM(get_psr_i, GET_PSR_I),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM(intrin_local_irq_restore,
+				      INTRIN_LOCAL_IRQ_RESTORE),
+
+#define IA64_NATIVE_PATCH_BUNDLE_ELEM_GETREG(name, reg)			\
+	{								\
+		(void*)ia64_native_get_ ## name ## _direct_start,	\
+		(void*)ia64_native_get_ ## name ## _direct_end,		\
+		PARAVIRT_PATCH_TYPE_GETREG + _IA64_REG_ ## reg,		\
+	}
+
+#define IA64_NATIVE_PATCH_BUNDLE_ELEM_SETREG(name, reg)			\
+	{								\
+		(void*)ia64_native_set_ ## name ## _direct_start,	\
+		(void*)ia64_native_set_ ## name ## _direct_end,		\
+		PARAVIRT_PATCH_TYPE_SETREG + _IA64_REG_ ## reg,		\
+	}
+
+#define IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(name, reg)		\
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_GETREG(name, reg),	\
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_SETREG(name, reg)		\
+
+#define IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(name, reg)		\
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(ar_ ## name, AR_ ## reg)
+
+#define IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(name, reg)		\
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(cr_ ## name, CR_ ## reg)
+
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_GETREG(psr, PSR),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_GETREG(tp, TP),
+
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_SETREG(psr_l, PSR_L),
+
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(gp, GP),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(sp, SP),
+
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr0, AR_KR0),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr1, AR_KR1),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr2, AR_KR2),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr3, AR_KR3),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr4, AR_KR4),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr5, AR_KR5),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr6, AR_KR6),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr7, AR_KR7),
+
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(rsc, RSC),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(bsp, BSP),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(bspstore, BSPSTORE),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(rnat, RNAT),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(fcr, FCR),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(eflag, EFLAG),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(csd, CSD),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(ssd, SSD),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(ar27, AR_CFLAG),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(fsr, FSR),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(fir, FIR),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(fdr, FDR),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(ccv, CCV),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(unat, UNAT),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(fpsr, FPSR),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(itc, ITC),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(pfs, PFS),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(lc, LC),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(ec, EC),
+
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(dcr, DCR),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(itm, ITM),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(iva, IVA),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(pta, PTA),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(ipsr, IPSR),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(isr, ISR),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(iip, IIP),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(ifa, IFA),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(itir, ITIR),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(iipa, IIPA),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(ifs, IFS),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(iim, IIM),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(iha, IHA),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(lid, LID),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(ivr, IVR),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(tpr, TPR),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(eoi, EOI),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(irr0, IRR0),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(irr1, IRR1),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(irr2, IRR2),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(irr3, IRR3),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(itv, ITV),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(pmv, PMV),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(cmcv, CMCV),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(lrr0, LRR0),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(lrr1, LRR1),
+};
+
+unsigned long __init_or_module
+ia64_native_patch_bundle(void *sbundle, void *ebundle, unsigned long type)
+{
+	const unsigned long nelems = sizeof(ia64_native_patch_bundle_elems) /
+		sizeof(ia64_native_patch_bundle_elems[0]);
+
+	return __paravirt_patch_apply_bundle(sbundle, ebundle, type,
+					      ia64_native_patch_bundle_elems,
+					      nelems, NULL);
+}
+#endif /* ASM_SUPPOTED */
+
+extern const char ia64_native_switch_to[];
+extern const char ia64_native_leave_syscall[];
+extern const char ia64_native_work_processed_syscall[];
+extern const char ia64_native_leave_kernel[];
+
+const struct paravirt_patch_branch_target ia64_native_branch_target[]
+__initconst = {
+#define PARAVIRT_BR_TARGET(name, type)			\
+	{						\
+		ia64_native_ ## name,			\
+		PARAVIRT_PATCH_TYPE_BR_ ## type,	\
+	}
+	PARAVIRT_BR_TARGET(switch_to, SWITCH_TO),
+	PARAVIRT_BR_TARGET(leave_syscall, LEAVE_SYSCALL),
+	PARAVIRT_BR_TARGET(work_processed_syscall, WORK_PROCESSED_SYSCALL),
+	PARAVIRT_BR_TARGET(leave_kernel, LEAVE_KERNEL),
+};
+
+static void __init
+ia64_native_patch_branch(unsigned long tag, unsigned long type)
+{
+	const unsigned long nelem =
+		sizeof(ia64_native_branch_target) /
+		sizeof(ia64_native_branch_target[0]);
+	__paravirt_patch_apply_branch(tag, type,
+				      ia64_native_branch_target, nelem);
+}
diff --git a/arch/ia64/kernel/paravirt_inst.h b/arch/ia64/kernel/paravirt_inst.h
new file mode 100644
index 00000000000..1ad7512b5f6
--- /dev/null
+++ b/arch/ia64/kernel/paravirt_inst.h
@@ -0,0 +1,28 @@
+/******************************************************************************
+ * linux/arch/ia64/xen/paravirt_inst.h
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#ifdef __IA64_ASM_PARAVIRTUALIZED_PVCHECK
+#include <asm/native/pvchk_inst.h>
+#else
+#include <asm/native/inst.h>
+#endif
+
diff --git a/arch/ia64/kernel/paravirt_patch.c b/arch/ia64/kernel/paravirt_patch.c
new file mode 100644
index 00000000000..bfdfef1b1ff
--- /dev/null
+++ b/arch/ia64/kernel/paravirt_patch.c
@@ -0,0 +1,514 @@
+/******************************************************************************
+ * linux/arch/ia64/xen/paravirt_patch.c
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <linux/init.h>
+#include <asm/intrinsics.h>
+#include <asm/kprobes.h>
+#include <asm/paravirt.h>
+#include <asm/paravirt_patch.h>
+
+typedef union ia64_inst {
+        struct {
+		unsigned long long qp : 6;
+		unsigned long long : 31;
+		unsigned long long opcode : 4;
+		unsigned long long reserved : 23;
+        } generic;
+        unsigned long long l;
+} ia64_inst_t;
+
+/*
+ * flush_icache_range() can't be used here.
+ * we are here before cpu_init() which initializes
+ * ia64_i_cache_stride_shift. flush_icache_range() uses it.
+ */
+void __init_or_module
+paravirt_flush_i_cache_range(const void *instr, unsigned long size)
+{
+	extern void paravirt_fc_i(const void *addr);
+	unsigned long i;
+
+	for (i = 0; i < size; i += sizeof(bundle_t))
+		paravirt_fc_i(instr + i);
+}
+
+bundle_t* __init_or_module
+paravirt_get_bundle(unsigned long tag)
+{
+	return (bundle_t *)(tag & ~3UL);
+}
+
+unsigned long __init_or_module
+paravirt_get_slot(unsigned long tag)
+{
+	return tag & 3UL;
+}
+
+unsigned long __init_or_module
+paravirt_get_num_inst(unsigned long stag, unsigned long etag)
+{
+	bundle_t *sbundle = paravirt_get_bundle(stag);
+	unsigned long sslot = paravirt_get_slot(stag);
+	bundle_t *ebundle = paravirt_get_bundle(etag);
+	unsigned long eslot = paravirt_get_slot(etag);
+
+	return (ebundle - sbundle) * 3 + eslot - sslot + 1;
+}
+
+unsigned long __init_or_module
+paravirt_get_next_tag(unsigned long tag)
+{
+	unsigned long slot = paravirt_get_slot(tag);
+
+	switch (slot) {
+	case 0:
+	case 1:
+		return tag + 1;
+	case 2: {
+		bundle_t *bundle = paravirt_get_bundle(tag);
+		return (unsigned long)(bundle + 1);
+	}
+	default:
+		BUG();
+	}
+	/* NOTREACHED */
+}
+
+ia64_inst_t __init_or_module
+paravirt_read_slot0(const bundle_t *bundle)
+{
+	ia64_inst_t inst;
+	inst.l = bundle->quad0.slot0;
+	return inst;
+}
+
+ia64_inst_t __init_or_module
+paravirt_read_slot1(const bundle_t *bundle)
+{
+	ia64_inst_t inst;
+	inst.l = bundle->quad0.slot1_p0 |
+		((unsigned long long)bundle->quad1.slot1_p1 << 18UL);
+	return inst;
+}
+
+ia64_inst_t __init_or_module
+paravirt_read_slot2(const bundle_t *bundle)
+{
+	ia64_inst_t inst;
+	inst.l = bundle->quad1.slot2;
+	return inst;
+}
+
+ia64_inst_t __init_or_module
+paravirt_read_inst(unsigned long tag)
+{
+	bundle_t *bundle = paravirt_get_bundle(tag);
+	unsigned long slot = paravirt_get_slot(tag);
+
+	switch (slot) {
+	case 0:
+		return paravirt_read_slot0(bundle);
+	case 1:
+		return paravirt_read_slot1(bundle);
+	case 2:
+		return paravirt_read_slot2(bundle);
+	default:
+		BUG();
+	}
+	/* NOTREACHED */
+}
+
+void __init_or_module
+paravirt_write_slot0(bundle_t *bundle, ia64_inst_t inst)
+{
+	bundle->quad0.slot0 = inst.l;
+}
+
+void __init_or_module
+paravirt_write_slot1(bundle_t *bundle, ia64_inst_t inst)
+{
+	bundle->quad0.slot1_p0 = inst.l;
+	bundle->quad1.slot1_p1 = inst.l >> 18UL;
+}
+
+void __init_or_module
+paravirt_write_slot2(bundle_t *bundle, ia64_inst_t inst)
+{
+	bundle->quad1.slot2 = inst.l;
+}
+
+void __init_or_module
+paravirt_write_inst(unsigned long tag, ia64_inst_t inst)
+{
+	bundle_t *bundle = paravirt_get_bundle(tag);
+	unsigned long slot = paravirt_get_slot(tag);
+
+	switch (slot) {
+	case 0:
+		paravirt_write_slot0(bundle, inst);
+		break;
+	case 1:
+		paravirt_write_slot1(bundle, inst);
+		break;
+	case 2:
+		paravirt_write_slot2(bundle, inst);
+		break;
+	default:
+		BUG();
+		break;
+	}
+	paravirt_flush_i_cache_range(bundle, sizeof(*bundle));
+}
+
+/* for debug */
+void
+paravirt_print_bundle(const bundle_t *bundle)
+{
+	const unsigned long *quad = (const unsigned long *)bundle;
+	ia64_inst_t slot0 = paravirt_read_slot0(bundle);
+	ia64_inst_t slot1 = paravirt_read_slot1(bundle);
+	ia64_inst_t slot2 = paravirt_read_slot2(bundle);
+
+	printk(KERN_DEBUG
+	       "bundle 0x%p 0x%016lx 0x%016lx\n", bundle, quad[0], quad[1]);
+	printk(KERN_DEBUG
+	       "bundle template 0x%x\n",
+	       bundle->quad0.template);
+	printk(KERN_DEBUG
+	       "slot0 0x%lx slot1_p0 0x%lx slot1_p1 0x%lx slot2 0x%lx\n",
+	       (unsigned long)bundle->quad0.slot0,
+	       (unsigned long)bundle->quad0.slot1_p0,
+	       (unsigned long)bundle->quad1.slot1_p1,
+	       (unsigned long)bundle->quad1.slot2);
+	printk(KERN_DEBUG
+	       "slot0 0x%016llx slot1 0x%016llx slot2 0x%016llx\n",
+	       slot0.l, slot1.l, slot2.l);
+}
+
+static int noreplace_paravirt __init_or_module = 0;
+
+static int __init setup_noreplace_paravirt(char *str)
+{
+	noreplace_paravirt = 1;
+	return 1;
+}
+__setup("noreplace-paravirt", setup_noreplace_paravirt);
+
+#ifdef ASM_SUPPORTED
+static void __init_or_module
+fill_nop_bundle(void *sbundle, void *ebundle)
+{
+	extern const char paravirt_nop_bundle[];
+	extern const unsigned long paravirt_nop_bundle_size;
+
+	void *bundle = sbundle;
+
+	BUG_ON((((unsigned long)sbundle) % sizeof(bundle_t)) != 0);
+	BUG_ON((((unsigned long)ebundle) % sizeof(bundle_t)) != 0);
+
+	while (bundle < ebundle) {
+		memcpy(bundle, paravirt_nop_bundle, paravirt_nop_bundle_size);
+
+		bundle += paravirt_nop_bundle_size;
+	}
+}
+
+/* helper function */
+unsigned long __init_or_module
+__paravirt_patch_apply_bundle(void *sbundle, void *ebundle, unsigned long type,
+			      const struct paravirt_patch_bundle_elem *elems,
+			      unsigned long nelems,
+			      const struct paravirt_patch_bundle_elem **found)
+{
+	unsigned long used = 0;
+	unsigned long i;
+
+	BUG_ON((((unsigned long)sbundle) % sizeof(bundle_t)) != 0);
+	BUG_ON((((unsigned long)ebundle) % sizeof(bundle_t)) != 0);
+
+	found = NULL;
+	for (i = 0; i < nelems; i++) {
+		const struct paravirt_patch_bundle_elem *p = &elems[i];
+		if (p->type == type) {
+			unsigned long need = p->ebundle - p->sbundle;
+			unsigned long room = ebundle - sbundle;
+
+			if (found != NULL)
+				*found = p;
+
+			if (room < need) {
+				/* no room to replace. skip it */
+				printk(KERN_DEBUG
+				       "the space is too small to put "
+				       "bundles. type %ld need %ld room %ld\n",
+				       type, need, room);
+				break;
+			}
+
+			used = need;
+			memcpy(sbundle, p->sbundle, used);
+			break;
+		}
+	}
+
+	return used;
+}
+
+void __init_or_module
+paravirt_patch_apply_bundle(const struct paravirt_patch_site_bundle *start,
+			    const struct paravirt_patch_site_bundle *end)
+{
+	const struct paravirt_patch_site_bundle *p;
+
+	if (noreplace_paravirt)
+		return;
+	if (pv_init_ops.patch_bundle == NULL)
+		return;
+
+	for (p = start; p < end; p++) {
+		unsigned long used;
+
+		used = (*pv_init_ops.patch_bundle)(p->sbundle, p->ebundle,
+						   p->type);
+		if (used == 0)
+			continue;
+
+		fill_nop_bundle(p->sbundle + used, p->ebundle);
+		paravirt_flush_i_cache_range(p->sbundle,
+					     p->ebundle - p->sbundle);
+	}
+	ia64_sync_i();
+	ia64_srlz_i();
+}
+
+/*
+ * nop.i, nop.m, nop.f instruction are same format.
+ * but nop.b has differennt format.
+ * This doesn't support nop.b for now.
+ */
+static void __init_or_module
+fill_nop_inst(unsigned long stag, unsigned long etag)
+{
+	extern const bundle_t paravirt_nop_mfi_inst_bundle[];
+	unsigned long tag;
+	const ia64_inst_t nop_inst =
+		paravirt_read_slot0(paravirt_nop_mfi_inst_bundle);
+
+	for (tag = stag; tag < etag; tag = paravirt_get_next_tag(tag))
+		paravirt_write_inst(tag, nop_inst);
+}
+
+void __init_or_module
+paravirt_patch_apply_inst(const struct paravirt_patch_site_inst *start,
+			  const struct paravirt_patch_site_inst *end)
+{
+	const struct paravirt_patch_site_inst *p;
+
+	if (noreplace_paravirt)
+		return;
+	if (pv_init_ops.patch_inst == NULL)
+		return;
+
+	for (p = start; p < end; p++) {
+		unsigned long tag;
+		bundle_t *sbundle;
+		bundle_t *ebundle;
+
+		tag = (*pv_init_ops.patch_inst)(p->stag, p->etag, p->type);
+		if (tag == p->stag)
+			continue;
+
+		fill_nop_inst(tag, p->etag);
+		sbundle = paravirt_get_bundle(p->stag);
+		ebundle = paravirt_get_bundle(p->etag) + 1;
+		paravirt_flush_i_cache_range(sbundle, (ebundle - sbundle) *
+					     sizeof(bundle_t));
+	}
+	ia64_sync_i();
+	ia64_srlz_i();
+}
+#endif /* ASM_SUPPOTED */
+
+/* brl.cond.sptk.many <target64> X3 */
+typedef union inst_x3_op {
+	ia64_inst_t inst;
+	struct {
+		unsigned long qp: 6;
+		unsigned long btyp: 3;
+		unsigned long unused: 3;
+		unsigned long p: 1;
+		unsigned long imm20b: 20;
+		unsigned long wh: 2;
+		unsigned long d: 1;
+		unsigned long i: 1;
+		unsigned long opcode: 4;
+	};
+	unsigned long l;
+} inst_x3_op_t;
+
+typedef union inst_x3_imm {
+	ia64_inst_t inst;
+	struct {
+		unsigned long unused: 2;
+		unsigned long imm39: 39;
+	};
+	unsigned long l;
+} inst_x3_imm_t;
+
+void __init_or_module
+paravirt_patch_reloc_brl(unsigned long tag, const void *target)
+{
+	unsigned long tag_op = paravirt_get_next_tag(tag);
+	unsigned long tag_imm = tag;
+	bundle_t *bundle = paravirt_get_bundle(tag);
+
+	ia64_inst_t inst_op = paravirt_read_inst(tag_op);
+	ia64_inst_t inst_imm = paravirt_read_inst(tag_imm);
+
+	inst_x3_op_t inst_x3_op = { .l = inst_op.l };
+	inst_x3_imm_t inst_x3_imm = { .l = inst_imm.l };
+
+	unsigned long imm60 =
+		((unsigned long)target - (unsigned long)bundle) >> 4;
+
+	BUG_ON(paravirt_get_slot(tag) != 1); /* MLX */
+	BUG_ON(((unsigned long)target & (sizeof(bundle_t) - 1)) != 0);
+
+	/* imm60[59] 1bit */
+	inst_x3_op.i = (imm60 >> 59) & 1;
+	/* imm60[19:0] 20bit */
+	inst_x3_op.imm20b = imm60 & ((1UL << 20) - 1);
+	/* imm60[58:20] 39bit */
+	inst_x3_imm.imm39 = (imm60 >> 20) & ((1UL << 39) - 1);
+
+	inst_op.l = inst_x3_op.l;
+	inst_imm.l = inst_x3_imm.l;
+
+	paravirt_write_inst(tag_op, inst_op);
+	paravirt_write_inst(tag_imm, inst_imm);
+}
+
+/* br.cond.sptk.many <target25>	B1 */
+typedef union inst_b1 {
+	ia64_inst_t inst;
+	struct {
+		unsigned long qp: 6;
+		unsigned long btype: 3;
+		unsigned long unused: 3;
+		unsigned long p: 1;
+		unsigned long imm20b: 20;
+		unsigned long wh: 2;
+		unsigned long d: 1;
+		unsigned long s: 1;
+		unsigned long opcode: 4;
+	};
+	unsigned long l;
+} inst_b1_t;
+
+void __init
+paravirt_patch_reloc_br(unsigned long tag, const void *target)
+{
+	bundle_t *bundle = paravirt_get_bundle(tag);
+	ia64_inst_t inst = paravirt_read_inst(tag);
+	unsigned long target25 = (unsigned long)target - (unsigned long)bundle;
+	inst_b1_t inst_b1;
+
+	BUG_ON(((unsigned long)target & (sizeof(bundle_t) - 1)) != 0);
+
+	inst_b1.l = inst.l;
+	if (target25 & (1UL << 63))
+		inst_b1.s = 1;
+	else
+		inst_b1.s = 0;
+
+	inst_b1.imm20b = target25 >> 4;
+	inst.l = inst_b1.l;
+
+	paravirt_write_inst(tag, inst);
+}
+
+void __init
+__paravirt_patch_apply_branch(
+	unsigned long tag, unsigned long type,
+	const struct paravirt_patch_branch_target *entries,
+	unsigned int nr_entries)
+{
+	unsigned int i;
+	for (i = 0; i < nr_entries; i++) {
+		if (entries[i].type == type) {
+			paravirt_patch_reloc_br(tag, entries[i].entry);
+			break;
+		}
+	}
+}
+
+static void __init
+paravirt_patch_apply_branch(const struct paravirt_patch_site_branch *start,
+			    const struct paravirt_patch_site_branch *end)
+{
+	const struct paravirt_patch_site_branch *p;
+
+	if (noreplace_paravirt)
+		return;
+	if (pv_init_ops.patch_branch == NULL)
+		return;
+
+	for (p = start; p < end; p++)
+		(*pv_init_ops.patch_branch)(p->tag, p->type);
+
+	ia64_sync_i();
+	ia64_srlz_i();
+}
+
+void __init
+paravirt_patch_apply(void)
+{
+	extern const char __start_paravirt_bundles[];
+	extern const char __stop_paravirt_bundles[];
+	extern const char __start_paravirt_insts[];
+	extern const char __stop_paravirt_insts[];
+	extern const char __start_paravirt_branches[];
+	extern const char __stop_paravirt_branches[];
+
+	paravirt_patch_apply_bundle((const struct paravirt_patch_site_bundle *)
+				    __start_paravirt_bundles,
+				    (const struct paravirt_patch_site_bundle *)
+				    __stop_paravirt_bundles);
+	paravirt_patch_apply_inst((const struct paravirt_patch_site_inst *)
+				  __start_paravirt_insts,
+				  (const struct paravirt_patch_site_inst *)
+				  __stop_paravirt_insts);
+	paravirt_patch_apply_branch((const struct paravirt_patch_site_branch *)
+				    __start_paravirt_branches,
+				    (const struct paravirt_patch_site_branch *)
+				    __stop_paravirt_branches);
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "linux"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * indent-tabs-mode: t
+ * End:
+ */
diff --git a/arch/ia64/kernel/paravirt_patchlist.c b/arch/ia64/kernel/paravirt_patchlist.c
new file mode 100644
index 00000000000..0a70720662e
--- /dev/null
+++ b/arch/ia64/kernel/paravirt_patchlist.c
@@ -0,0 +1,81 @@
+/******************************************************************************
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <linux/bug.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <asm/paravirt.h>
+
+#define DECLARE(name)						\
+	extern unsigned long					\
+		__ia64_native_start_gate_##name##_patchlist[];	\
+	extern unsigned long					\
+		__ia64_native_end_gate_##name##_patchlist[]
+
+DECLARE(fsyscall);
+DECLARE(brl_fsys_bubble_down);
+DECLARE(vtop);
+DECLARE(mckinley_e9);
+
+extern unsigned long __start_gate_section[];
+
+#define ASSIGN(name)							    \
+	.start_##name##_patchlist =					    \
+		(unsigned long)__ia64_native_start_gate_##name##_patchlist, \
+	.end_##name##_patchlist =					    \
+		(unsigned long)__ia64_native_end_gate_##name##_patchlist
+
+struct pv_patchdata pv_patchdata __initdata = {
+	ASSIGN(fsyscall),
+	ASSIGN(brl_fsys_bubble_down),
+	ASSIGN(vtop),
+	ASSIGN(mckinley_e9),
+
+	.gate_section = (void*)__start_gate_section,
+};
+
+
+unsigned long __init
+paravirt_get_gate_patchlist(enum pv_gate_patchlist type)
+{
+
+#define CASE(NAME, name)					\
+	case PV_GATE_START_##NAME:				\
+		return pv_patchdata.start_##name##_patchlist;	\
+	case PV_GATE_END_##NAME:				\
+		return pv_patchdata.end_##name##_patchlist;	\
+
+	switch (type) {
+		CASE(FSYSCALL, fsyscall);
+		CASE(BRL_FSYS_BUBBLE_DOWN, brl_fsys_bubble_down);
+		CASE(VTOP, vtop);
+		CASE(MCKINLEY_E9, mckinley_e9);
+	default:
+		BUG();
+		break;
+	}
+	return 0;
+}
+
+void * __init
+paravirt_get_gate_section(void)
+{
+	return pv_patchdata.gate_section;
+}
diff --git a/arch/ia64/kernel/paravirt_patchlist.h b/arch/ia64/kernel/paravirt_patchlist.h
new file mode 100644
index 00000000000..67cffc3643a
--- /dev/null
+++ b/arch/ia64/kernel/paravirt_patchlist.h
@@ -0,0 +1,24 @@
+/******************************************************************************
+ * linux/arch/ia64/xen/paravirt_patchlist.h
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <asm/native/patchlist.h>
+
diff --git a/arch/ia64/kernel/paravirtentry.S b/arch/ia64/kernel/paravirtentry.S
new file mode 100644
index 00000000000..92d880c4d3d
--- /dev/null
+++ b/arch/ia64/kernel/paravirtentry.S
@@ -0,0 +1,121 @@
+/******************************************************************************
+ * linux/arch/ia64/xen/paravirtentry.S
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <linux/init.h>
+#include <asm/asmmacro.h>
+#include <asm/asm-offsets.h>
+#include <asm/paravirt_privop.h>
+#include <asm/paravirt_patch.h>
+#include "entry.h"
+
+#define DATA8(sym, init_value)			\
+	.pushsection .data..read_mostly ;	\
+	.align 8 ;				\
+	.global sym ;				\
+	sym: ;					\
+	data8 init_value ;			\
+	.popsection
+
+#define BRANCH(targ, reg, breg, type)					\
+	PARAVIRT_PATCH_SITE_BR(PARAVIRT_PATCH_TYPE_BR_ ## type) ;	\
+	;;								\
+	movl reg=targ ;							\
+	;;								\
+	ld8 reg=[reg] ;							\
+	;;								\
+	mov breg=reg ;							\
+	br.cond.sptk.many breg
+
+#define BRANCH_PROC(sym, reg, breg, type)				\
+	DATA8(paravirt_ ## sym ## _targ, ia64_native_ ## sym) ;		\
+	GLOBAL_ENTRY(paravirt_ ## sym) ;				\
+		BRANCH(paravirt_ ## sym ## _targ, reg, breg, type) ;	\
+	END(paravirt_ ## sym)
+
+#define BRANCH_PROC_UNWINFO(sym, reg, breg, type)			\
+	DATA8(paravirt_ ## sym ## _targ, ia64_native_ ## sym) ;		\
+	GLOBAL_ENTRY(paravirt_ ## sym) ;				\
+		PT_REGS_UNWIND_INFO(0) ;				\
+		BRANCH(paravirt_ ## sym ## _targ, reg, breg, type) ;	\
+	END(paravirt_ ## sym)
+
+
+BRANCH_PROC(switch_to, r22, b7, SWITCH_TO)
+BRANCH_PROC_UNWINFO(leave_syscall, r22, b7, LEAVE_SYSCALL)
+BRANCH_PROC(work_processed_syscall, r2, b7, WORK_PROCESSED_SYSCALL)
+BRANCH_PROC_UNWINFO(leave_kernel, r22, b7, LEAVE_KERNEL)
+
+
+#ifdef CONFIG_MODULES
+#define __INIT_OR_MODULE	.text
+#define __INITDATA_OR_MODULE	.data
+#else
+#define __INIT_OR_MODULE	__INIT
+#define __INITDATA_OR_MODULE	__INITDATA
+#endif /* CONFIG_MODULES */
+
+	__INIT_OR_MODULE
+	GLOBAL_ENTRY(paravirt_fc_i)
+	fc.i r32
+	br.ret.sptk.many rp
+	END(paravirt_fc_i)
+	__FINIT
+
+	__INIT_OR_MODULE
+	.align 32
+	GLOBAL_ENTRY(paravirt_nop_b_inst_bundle)
+	{
+		nop.b 0
+		nop.b 0
+		nop.b 0
+	}
+	END(paravirt_nop_b_inst_bundle)
+	__FINIT
+
+	/* NOTE: nop.[mfi] has same format */
+	__INIT_OR_MODULE
+	GLOBAL_ENTRY(paravirt_nop_mfi_inst_bundle)
+	{
+		nop.m 0
+		nop.f 0
+		nop.i 0
+	}
+	END(paravirt_nop_mfi_inst_bundle)
+	__FINIT
+
+	__INIT_OR_MODULE
+	GLOBAL_ENTRY(paravirt_nop_bundle)
+paravirt_nop_bundle_start:
+	{
+		nop 0
+		nop 0
+		nop 0
+	}
+paravirt_nop_bundle_end:
+	END(paravirt_nop_bundle)
+	__FINIT
+
+	__INITDATA_OR_MODULE
+	.align 8
+	.global paravirt_nop_bundle_size
+paravirt_nop_bundle_size:
+	data8	paravirt_nop_bundle_end - paravirt_nop_bundle_start
diff --git a/arch/ia64/kernel/patch.c b/arch/ia64/kernel/patch.c
index bc11bb096f5..1cf09179371 100644
--- a/arch/ia64/kernel/patch.c
+++ b/arch/ia64/kernel/patch.c
@@ -7,10 +7,10 @@
 #include <linux/init.h>
 #include <linux/string.h>
 
+#include <asm/paravirt.h>
 #include <asm/patch.h>
 #include <asm/processor.h>
 #include <asm/sections.h>
-#include <asm/system.h>
 #include <asm/unistd.h>
 
 /*
@@ -115,6 +115,29 @@ ia64_patch_vtop (unsigned long start, unsigned long end)
 	ia64_srlz_i();
 }
 
+/*
+ * Disable the RSE workaround by turning the conditional branch
+ * that we tagged in each place the workaround was used into an
+ * unconditional branch.
+ */
+void __init
+ia64_patch_rse (unsigned long start, unsigned long end)
+{
+	s32 *offp = (s32 *) start;
+	u64 ip, *b;
+
+	while (offp < (s32 *) end) {
+		ip = (u64) offp + *offp;
+
+		b = (u64 *)(ip & -16);
+		b[1] &= ~0xf800000L;
+		ia64_fc((void *) ip);
+		++offp;
+	}
+	ia64_sync_i();
+	ia64_srlz_i();
+}
+
 void __init
 ia64_patch_mckinley_e9 (unsigned long start, unsigned long end)
 {
@@ -129,19 +152,16 @@ ia64_patch_mckinley_e9 (unsigned long start, unsigned long end)
 		first_time = 0;
 		if (need_workaround)
 			printk(KERN_INFO "Leaving McKinley Errata 9 workaround enabled\n");
-		else
-			printk(KERN_INFO "McKinley Errata 9 workaround not needed; "
-			       "disabling it\n");
 	}
 	if (need_workaround)
 		return;
 
 	while (offp < (s32 *) end) {
 		wp = (u64 *) ia64_imva((char *) offp + *offp);
-		wp[0] = 0x0000000100000000UL; /* nop.m 0; nop.i 0; nop.i 0 */
-		wp[1] = 0x0004000000000200UL;
-		wp[2] = 0x0000000100000011UL; /* nop.m 0; nop.i 0; br.ret.sptk.many b6 */
-		wp[3] = 0x0084006880000200UL;
+		wp[0] = 0x0000000100000011UL; /* nop.m 0; nop.i 0; br.ret.sptk.many b6 */
+		wp[1] = 0x0084006880000200UL;
+		wp[2] = 0x0000000100000000UL; /* nop.m 0; nop.i 0; nop.i 0 */
+		wp[3] = 0x0004000000000200UL;
 		ia64_fc(wp); ia64_fc(wp + 2);
 		++offp;
 	}
@@ -149,16 +169,35 @@ ia64_patch_mckinley_e9 (unsigned long start, unsigned long end)
 	ia64_srlz_i();
 }
 
+extern unsigned long ia64_native_fsyscall_table[NR_syscalls];
+extern char ia64_native_fsys_bubble_down[];
+struct pv_fsys_data pv_fsys_data __initdata = {
+	.fsyscall_table = (unsigned long *)ia64_native_fsyscall_table,
+	.fsys_bubble_down = (void *)ia64_native_fsys_bubble_down,
+};
+
+unsigned long * __init
+paravirt_get_fsyscall_table(void)
+{
+	return pv_fsys_data.fsyscall_table;
+}
+
+char * __init
+paravirt_get_fsys_bubble_down(void)
+{
+	return pv_fsys_data.fsys_bubble_down;
+}
+
 static void __init
 patch_fsyscall_table (unsigned long start, unsigned long end)
 {
-	extern unsigned long fsyscall_table[NR_syscalls];
+	u64 fsyscall_table = (u64)paravirt_get_fsyscall_table();
 	s32 *offp = (s32 *) start;
 	u64 ip;
 
 	while (offp < (s32 *) end) {
 		ip = (u64) ia64_imva((char *) offp + *offp);
-		ia64_patch_imm64(ip, (u64) fsyscall_table);
+		ia64_patch_imm64(ip, fsyscall_table);
 		ia64_fc((void *) ip);
 		++offp;
 	}
@@ -169,7 +208,7 @@ patch_fsyscall_table (unsigned long start, unsigned long end)
 static void __init
 patch_brl_fsys_bubble_down (unsigned long start, unsigned long end)
 {
-	extern char fsys_bubble_down[];
+	u64 fsys_bubble_down = (u64)paravirt_get_fsys_bubble_down();
 	s32 *offp = (s32 *) start;
 	u64 ip;
 
@@ -187,11 +226,31 @@ patch_brl_fsys_bubble_down (unsigned long start, unsigned long end)
 void __init
 ia64_patch_gate (void)
 {
-#	define START(name)	((unsigned long) __start_gate_##name##_patchlist)
-#	define END(name)	((unsigned long)__end_gate_##name##_patchlist)
+#	define START(name)	paravirt_get_gate_patchlist(PV_GATE_START_##name)
+#	define END(name)	paravirt_get_gate_patchlist(PV_GATE_END_##name)
+
+	patch_fsyscall_table(START(FSYSCALL), END(FSYSCALL));
+	patch_brl_fsys_bubble_down(START(BRL_FSYS_BUBBLE_DOWN), END(BRL_FSYS_BUBBLE_DOWN));
+	ia64_patch_vtop(START(VTOP), END(VTOP));
+	ia64_patch_mckinley_e9(START(MCKINLEY_E9), END(MCKINLEY_E9));
+}
+
+void ia64_patch_phys_stack_reg(unsigned long val)
+{
+	s32 * offp = (s32 *) __start___phys_stack_reg_patchlist;
+	s32 * end = (s32 *) __end___phys_stack_reg_patchlist;
+	u64 ip, mask, imm;
 
-	patch_fsyscall_table(START(fsyscall), END(fsyscall));
-	patch_brl_fsys_bubble_down(START(brl_fsys_bubble_down), END(brl_fsys_bubble_down));
-	ia64_patch_vtop(START(vtop), END(vtop));
-	ia64_patch_mckinley_e9(START(mckinley_e9), END(mckinley_e9));
+	/* see instruction format A4: adds r1 = imm13, r3 */
+	mask = (0x3fUL << 27) | (0x7f << 13);
+	imm = (((val >> 7) & 0x3f) << 27) | (val & 0x7f) << 13;
+
+	while (offp < end) {
+		ip = (u64) offp + *offp;
+		ia64_patch(ip, mask, imm);
+		ia64_fc((void *)ip);
+		++offp;
+	}
+	ia64_sync_i();
+	ia64_srlz_i();
 }
diff --git a/arch/ia64/kernel/pci-dma.c b/arch/ia64/kernel/pci-dma.c
new file mode 100644
index 00000000000..992c1098c52
--- /dev/null
+++ b/arch/ia64/kernel/pci-dma.c
@@ -0,0 +1,110 @@
+/*
+ * Dynamic DMA mapping support.
+ */
+
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/pci.h>
+#include <linux/module.h>
+#include <linux/dmar.h>
+#include <asm/iommu.h>
+#include <asm/machvec.h>
+#include <linux/dma-mapping.h>
+
+
+#ifdef CONFIG_INTEL_IOMMU
+
+#include <linux/kernel.h>
+
+#include <asm/page.h>
+
+dma_addr_t bad_dma_address __read_mostly;
+EXPORT_SYMBOL(bad_dma_address);
+
+static int iommu_sac_force __read_mostly;
+
+int no_iommu __read_mostly;
+#ifdef CONFIG_IOMMU_DEBUG
+int force_iommu __read_mostly = 1;
+#else
+int force_iommu __read_mostly;
+#endif
+
+int iommu_pass_through;
+
+extern struct dma_map_ops intel_dma_ops;
+
+static int __init pci_iommu_init(void)
+{
+	if (iommu_detected)
+		intel_iommu_init();
+
+	return 0;
+}
+
+/* Must execute after PCI subsystem */
+fs_initcall(pci_iommu_init);
+
+void pci_iommu_shutdown(void)
+{
+	return;
+}
+
+void __init
+iommu_dma_init(void)
+{
+	return;
+}
+
+int iommu_dma_supported(struct device *dev, u64 mask)
+{
+	/* Copied from i386. Doesn't make much sense, because it will
+	   only work for pci_alloc_coherent.
+	   The caller just has to use GFP_DMA in this case. */
+	if (mask < DMA_BIT_MASK(24))
+		return 0;
+
+	/* Tell the device to use SAC when IOMMU force is on.  This
+	   allows the driver to use cheaper accesses in some cases.
+
+	   Problem with this is that if we overflow the IOMMU area and
+	   return DAC as fallback address the device may not handle it
+	   correctly.
+
+	   As a special case some controllers have a 39bit address
+	   mode that is as efficient as 32bit (aic79xx). Don't force
+	   SAC for these.  Assume all masks <= 40 bits are of this
+	   type. Normally this doesn't make any difference, but gives
+	   more gentle handling of IOMMU overflow. */
+	if (iommu_sac_force && (mask >= DMA_BIT_MASK(40))) {
+		dev_info(dev, "Force SAC with mask %llx\n", mask);
+		return 0;
+	}
+
+	return 1;
+}
+EXPORT_SYMBOL(iommu_dma_supported);
+
+void __init pci_iommu_alloc(void)
+{
+	dma_ops = &intel_dma_ops;
+
+	dma_ops->sync_single_for_cpu = machvec_dma_sync_single;
+	dma_ops->sync_sg_for_cpu = machvec_dma_sync_sg;
+	dma_ops->sync_single_for_device = machvec_dma_sync_single;
+	dma_ops->sync_sg_for_device = machvec_dma_sync_sg;
+	dma_ops->dma_supported = iommu_dma_supported;
+
+	/*
+	 * The order of these functions is important for
+	 * fall-back/fail-over reasons
+	 */
+	detect_intel_iommu();
+
+#ifdef CONFIG_SWIOTLB
+	pci_swiotlb_init();
+#endif
+}
+
+#endif
diff --git a/arch/ia64/kernel/pci-swiotlb.c b/arch/ia64/kernel/pci-swiotlb.c
new file mode 100644
index 00000000000..939260aeac9
--- /dev/null
+++ b/arch/ia64/kernel/pci-swiotlb.c
@@ -0,0 +1,67 @@
+/* Glue code to lib/swiotlb.c */
+
+#include <linux/pci.h>
+#include <linux/gfp.h>
+#include <linux/cache.h>
+#include <linux/module.h>
+#include <linux/dma-mapping.h>
+
+#include <asm/swiotlb.h>
+#include <asm/dma.h>
+#include <asm/iommu.h>
+#include <asm/machvec.h>
+
+int swiotlb __read_mostly;
+EXPORT_SYMBOL(swiotlb);
+
+static void *ia64_swiotlb_alloc_coherent(struct device *dev, size_t size,
+					 dma_addr_t *dma_handle, gfp_t gfp,
+					 struct dma_attrs *attrs)
+{
+	if (dev->coherent_dma_mask != DMA_BIT_MASK(64))
+		gfp |= GFP_DMA;
+	return swiotlb_alloc_coherent(dev, size, dma_handle, gfp);
+}
+
+static void ia64_swiotlb_free_coherent(struct device *dev, size_t size,
+				       void *vaddr, dma_addr_t dma_addr,
+				       struct dma_attrs *attrs)
+{
+	swiotlb_free_coherent(dev, size, vaddr, dma_addr);
+}
+
+struct dma_map_ops swiotlb_dma_ops = {
+	.alloc = ia64_swiotlb_alloc_coherent,
+	.free = ia64_swiotlb_free_coherent,
+	.map_page = swiotlb_map_page,
+	.unmap_page = swiotlb_unmap_page,
+	.map_sg = swiotlb_map_sg_attrs,
+	.unmap_sg = swiotlb_unmap_sg_attrs,
+	.sync_single_for_cpu = swiotlb_sync_single_for_cpu,
+	.sync_single_for_device = swiotlb_sync_single_for_device,
+	.sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
+	.sync_sg_for_device = swiotlb_sync_sg_for_device,
+	.dma_supported = swiotlb_dma_supported,
+	.mapping_error = swiotlb_dma_mapping_error,
+};
+
+void __init swiotlb_dma_init(void)
+{
+	dma_ops = &swiotlb_dma_ops;
+	swiotlb_init(1);
+}
+
+void __init pci_swiotlb_init(void)
+{
+	if (!iommu_detected) {
+#ifdef CONFIG_IA64_GENERIC
+		swiotlb = 1;
+		printk(KERN_INFO "PCI-DMA: Re-initialize machine vector.\n");
+		machvec_init("dig");
+		swiotlb_init(1);
+		dma_ops = &swiotlb_dma_ops;
+#else
+		panic("Unable to find Intel IOMMU");
+#endif
+	}
+}
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 077f21216b6..5845ffea67c 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -19,12 +19,10 @@
  * 	http://www.hpl.hp.com/research/linux/perfmon
  */
 
-#include <linux/config.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/interrupt.h>
-#include <linux/smp_lock.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/init.h>
@@ -35,12 +33,16 @@
 #include <linux/file.h>
 #include <linux/poll.h>
 #include <linux/vfs.h>
+#include <linux/smp.h>
 #include <linux/pagemap.h>
 #include <linux/mount.h>
 #include <linux/bitops.h>
 #include <linux/capability.h>
 #include <linux/rcupdate.h>
 #include <linux/completion.h>
+#include <linux/tracehook.h>
+#include <linux/slab.h>
+#include <linux/cpu.h>
 
 #include <asm/errno.h>
 #include <asm/intrinsics.h>
@@ -48,7 +50,6 @@
 #include <asm/perfmon.h>
 #include <asm/processor.h>
 #include <asm/signal.h>
-#include <asm/system.h>
 #include <asm/uaccess.h>
 #include <asm/delay.h>
 
@@ -63,6 +64,9 @@
 
 #define PFM_INVALID_ACTIVATION	(~0UL)
 
+#define PFM_NUM_PMC_REGS	64	/* PMC save area for ctxsw */
+#define PFM_NUM_PMD_REGS	64	/* PMD save area for ctxsw */
+
 /*
  * depth of message queue
  */
@@ -144,7 +148,7 @@
  * in UP:
  * 	- we need to protect against PMU overflow interrupts (local_irq_disable)
  *
- * spin_lock_irqsave()/spin_lock_irqrestore():
+ * spin_lock_irqsave()/spin_unlock_irqrestore():
  * 	in SMP: local_irq_disable + spin_lock
  * 	in UP : local_irq_disable
  *
@@ -156,14 +160,14 @@
  */
 #define PROTECT_CTX(c, f) \
 	do {  \
-		DPRINT(("spinlock_irq_save ctx %p by [%d]\n", c, current->pid)); \
+		DPRINT(("spinlock_irq_save ctx %p by [%d]\n", c, task_pid_nr(current))); \
 		spin_lock_irqsave(&(c)->ctx_lock, f); \
-		DPRINT(("spinlocked ctx %p  by [%d]\n", c, current->pid)); \
+		DPRINT(("spinlocked ctx %p  by [%d]\n", c, task_pid_nr(current))); \
 	} while(0)
 
 #define UNPROTECT_CTX(c, f) \
 	do { \
-		DPRINT(("spinlock_irq_restore ctx %p by [%d]\n", c, current->pid)); \
+		DPRINT(("spinlock_irq_restore ctx %p by [%d]\n", c, task_pid_nr(current))); \
 		spin_unlock_irqrestore(&(c)->ctx_lock, f); \
 	} while(0)
 
@@ -225,12 +229,12 @@
 #ifdef PFM_DEBUGGING
 #define DPRINT(a) \
 	do { \
-		if (unlikely(pfm_sysctl.debug >0)) { printk("%s.%d: CPU%d [%d] ", __FUNCTION__, __LINE__, smp_processor_id(), current->pid); printk a; } \
+		if (unlikely(pfm_sysctl.debug >0)) { printk("%s.%d: CPU%d [%d] ", __func__, __LINE__, smp_processor_id(), task_pid_nr(current)); printk a; } \
 	} while (0)
 
 #define DPRINT_ovfl(a) \
 	do { \
-		if (unlikely(pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0)) { printk("%s.%d: CPU%d [%d] ", __FUNCTION__, __LINE__, smp_processor_id(), current->pid); printk a; } \
+		if (unlikely(pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0)) { printk("%s.%d: CPU%d [%d] ", __func__, __LINE__, smp_processor_id(), task_pid_nr(current)); printk a; } \
 	} while (0)
 #endif
 
@@ -297,16 +301,19 @@ typedef struct pfm_context {
 	unsigned long		ctx_reload_pmcs[4];	/* bitmask of force reload PMC on ctxsw in */
 	unsigned long		ctx_used_monitors[4];	/* bitmask of monitor PMC being used */
 
-	unsigned long		ctx_pmcs[IA64_NUM_PMC_REGS];	/*  saved copies of PMC values */
+	unsigned long		ctx_pmcs[PFM_NUM_PMC_REGS];	/*  saved copies of PMC values */
 
 	unsigned int		ctx_used_ibrs[1];		/* bitmask of used IBR (speedup ctxsw in) */
 	unsigned int		ctx_used_dbrs[1];		/* bitmask of used DBR (speedup ctxsw in) */
 	unsigned long		ctx_dbrs[IA64_NUM_DBG_REGS];	/* DBR values (cache) when not loaded */
 	unsigned long		ctx_ibrs[IA64_NUM_DBG_REGS];	/* IBR values (cache) when not loaded */
 
-	pfm_counter_t		ctx_pmds[IA64_NUM_PMD_REGS]; /* software state for PMDS */
+	pfm_counter_t		ctx_pmds[PFM_NUM_PMD_REGS]; /* software state for PMDS */
+
+	unsigned long		th_pmcs[PFM_NUM_PMC_REGS];	/* PMC thread save state */
+	unsigned long		th_pmds[PFM_NUM_PMD_REGS];	/* PMD thread save state */
 
-	u64			ctx_saved_psr_up;	/* only contains psr.up value */
+	unsigned long		ctx_saved_psr_up;	/* only contains psr.up value */
 
 	unsigned long		ctx_last_activation;	/* context last activation number for last_cpu */
 	unsigned int		ctx_last_cpu;		/* CPU id of current or last CPU used (SMP only) */
@@ -514,25 +521,56 @@ static pmu_config_t		*pmu_conf;
 pfm_sysctl_t pfm_sysctl;
 EXPORT_SYMBOL(pfm_sysctl);
 
-static ctl_table pfm_ctl_table[]={
-	{1, "debug", &pfm_sysctl.debug, sizeof(int), 0666, NULL, &proc_dointvec, NULL,},
-	{2, "debug_ovfl", &pfm_sysctl.debug_ovfl, sizeof(int), 0666, NULL, &proc_dointvec, NULL,},
-	{3, "fastctxsw", &pfm_sysctl.fastctxsw, sizeof(int), 0600, NULL, &proc_dointvec, NULL,},
-	{4, "expert_mode", &pfm_sysctl.expert_mode, sizeof(int), 0600, NULL, &proc_dointvec, NULL,},
-	{ 0, },
+static struct ctl_table pfm_ctl_table[] = {
+	{
+		.procname	= "debug",
+		.data		= &pfm_sysctl.debug,
+		.maxlen		= sizeof(int),
+		.mode		= 0666,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "debug_ovfl",
+		.data		= &pfm_sysctl.debug_ovfl,
+		.maxlen		= sizeof(int),
+		.mode		= 0666,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "fastctxsw",
+		.data		= &pfm_sysctl.fastctxsw,
+		.maxlen		= sizeof(int),
+		.mode		= 0600,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "expert_mode",
+		.data		= &pfm_sysctl.expert_mode,
+		.maxlen		= sizeof(int),
+		.mode		= 0600,
+		.proc_handler	= proc_dointvec,
+	},
+	{}
 };
-static ctl_table pfm_sysctl_dir[] = {
-	{1, "perfmon", NULL, 0, 0755, pfm_ctl_table, },
- 	{0,},
+static struct ctl_table pfm_sysctl_dir[] = {
+	{
+		.procname	= "perfmon",
+		.mode		= 0555,
+		.child		= pfm_ctl_table,
+	},
+ 	{}
 };
-static ctl_table pfm_sysctl_root[] = {
-	{1, "kernel", NULL, 0, 0755, pfm_sysctl_dir, },
- 	{0,},
+static struct ctl_table pfm_sysctl_root[] = {
+	{
+		.procname	= "kernel",
+		.mode		= 0555,
+		.child		= pfm_sysctl_dir,
+	},
+ 	{}
 };
 static struct ctl_table_header *pfm_sysctl_header;
 
 static int pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs);
-static int pfm_flush(struct file *filp);
 
 #define pfm_get_cpu_var(v)		__ia64_per_cpu_var(v)
 #define pfm_get_cpu_data(a,b)		per_cpu(a, b)
@@ -544,21 +582,6 @@ pfm_put_task(struct task_struct *task)
 }
 
 static inline void
-pfm_set_task_notify(struct task_struct *task)
-{
-	struct thread_info *info;
-
-	info = (struct thread_info *) ((char *) task + IA64_TASK_SIZE);
-	set_bit(TIF_NOTIFY_RESUME, &info->flags);
-}
-
-static inline void
-pfm_clear_task_notify(void)
-{
-	clear_thread_flag(TIF_NOTIFY_RESUME);
-}
-
-static inline void
 pfm_reserve_page(unsigned long a)
 {
 	SetPageReserved(vmalloc_to_page((void *)a));
@@ -582,30 +605,22 @@ pfm_unprotect_ctx_ctxsw(pfm_context_t *x, unsigned long f)
 	spin_unlock(&(x)->ctx_lock);
 }
 
-static inline unsigned int
-pfm_do_munmap(struct mm_struct *mm, unsigned long addr, size_t len, int acct)
-{
-	return do_munmap(mm, addr, len);
-}
-
-static inline unsigned long 
-pfm_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags, unsigned long exec)
-{
-	return get_unmapped_area(file, addr, len, pgoff, flags);
-}
-
+/* forward declaration */
+static const struct dentry_operations pfmfs_dentry_operations;
 
-static struct super_block *
-pfmfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data)
+static struct dentry *
+pfmfs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data)
 {
-	return get_sb_pseudo(fs_type, "pfm:", NULL, PFMFS_MAGIC);
+	return mount_pseudo(fs_type, "pfm:", NULL, &pfmfs_dentry_operations,
+			PFMFS_MAGIC);
 }
 
 static struct file_system_type pfm_fs_type = {
 	.name     = "pfmfs",
-	.get_sb   = pfmfs_get_sb,
+	.mount    = pfmfs_mount,
 	.kill_sb  = kill_anon_super,
 };
+MODULE_ALIAS_FS("pfmfs");
 
 DEFINE_PER_CPU(unsigned long, pfm_syst_info);
 DEFINE_PER_CPU(struct task_struct *, pmu_owner);
@@ -615,7 +630,7 @@ EXPORT_PER_CPU_SYMBOL_GPL(pfm_syst_info);
 
 
 /* forward declaration */
-static struct file_operations pfm_file_ops;
+static const struct file_operations pfm_file_ops;
 
 /*
  * forward declarations
@@ -806,10 +821,9 @@ pfm_rvmalloc(unsigned long size)
 	unsigned long addr;
 
 	size = PAGE_ALIGN(size);
-	mem  = vmalloc(size);
+	mem  = vzalloc(size);
 	if (mem) {
 		//printk("perfmon: CPU%d pfm_rvmalloc(%ld)=%p\n", smp_processor_id(), size, mem);
-		memset(mem, 0, size);
 		addr = (unsigned long)mem;
 		while (size > 0) {
 			pfm_reserve_page(addr);
@@ -839,7 +853,7 @@ pfm_rvfree(void *mem, unsigned long size)
 }
 
 static pfm_context_t *
-pfm_context_alloc(void)
+pfm_context_alloc(int ctx_flags)
 {
 	pfm_context_t *ctx;
 
@@ -847,10 +861,49 @@ pfm_context_alloc(void)
 	 * allocate context descriptor 
 	 * must be able to free with interrupts disabled
 	 */
-	ctx = kmalloc(sizeof(pfm_context_t), GFP_KERNEL);
+	ctx = kzalloc(sizeof(pfm_context_t), GFP_KERNEL);
 	if (ctx) {
-		memset(ctx, 0, sizeof(pfm_context_t));
 		DPRINT(("alloc ctx @%p\n", ctx));
+
+		/*
+		 * init context protection lock
+		 */
+		spin_lock_init(&ctx->ctx_lock);
+
+		/*
+		 * context is unloaded
+		 */
+		ctx->ctx_state = PFM_CTX_UNLOADED;
+
+		/*
+		 * initialization of context's flags
+		 */
+		ctx->ctx_fl_block       = (ctx_flags & PFM_FL_NOTIFY_BLOCK) ? 1 : 0;
+		ctx->ctx_fl_system      = (ctx_flags & PFM_FL_SYSTEM_WIDE) ? 1: 0;
+		ctx->ctx_fl_no_msg      = (ctx_flags & PFM_FL_OVFL_NO_MSG) ? 1: 0;
+		/*
+		 * will move to set properties
+		 * ctx->ctx_fl_excl_idle   = (ctx_flags & PFM_FL_EXCL_IDLE) ? 1: 0;
+		 */
+
+		/*
+		 * init restart semaphore to locked
+		 */
+		init_completion(&ctx->ctx_restart_done);
+
+		/*
+		 * activation is used in SMP only
+		 */
+		ctx->ctx_last_activation = PFM_INVALID_ACTIVATION;
+		SET_LAST_CPU(ctx, -1);
+
+		/*
+		 * initialize notification message queue
+		 */
+		ctx->ctx_msgq_head = ctx->ctx_msgq_tail = 0;
+		init_waitqueue_head(&ctx->ctx_msgq_wait);
+		init_waitqueue_head(&ctx->ctx_zombieq);
+
 	}
 	return ctx;
 }
@@ -868,11 +921,10 @@ static void
 pfm_mask_monitoring(struct task_struct *task)
 {
 	pfm_context_t *ctx = PFM_GET_CTX(task);
-	struct thread_struct *th = &task->thread;
 	unsigned long mask, val, ovfl_mask;
 	int i;
 
-	DPRINT_ovfl(("masking monitoring for [%d]\n", task->pid));
+	DPRINT_ovfl(("masking monitoring for [%d]\n", task_pid_nr(task)));
 
 	ovfl_mask = pmu_conf->ovfl_val;
 	/*
@@ -889,7 +941,7 @@ pfm_mask_monitoring(struct task_struct *task)
 	 * So in both cases, the live register contains the owner's
 	 * state. We can ONLY touch the PMU registers and NOT the PSR.
 	 *
-	 * As a consequence to this call, the thread->pmds[] array
+	 * As a consequence to this call, the ctx->th_pmds[] array
 	 * contains stale information which must be ignored
 	 * when context is reloaded AND monitoring is active (see
 	 * pfm_restart).
@@ -924,9 +976,9 @@ pfm_mask_monitoring(struct task_struct *task)
 	mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER;
 	for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) {
 		if ((mask & 0x1) == 0UL) continue;
-		ia64_set_pmc(i, th->pmcs[i] & ~0xfUL);
-		th->pmcs[i] &= ~0xfUL;
-		DPRINT_ovfl(("pmc[%d]=0x%lx\n", i, th->pmcs[i]));
+		ia64_set_pmc(i, ctx->th_pmcs[i] & ~0xfUL);
+		ctx->th_pmcs[i] &= ~0xfUL;
+		DPRINT_ovfl(("pmc[%d]=0x%lx\n", i, ctx->th_pmcs[i]));
 	}
 	/*
 	 * make all of this visible
@@ -943,7 +995,6 @@ static void
 pfm_restore_monitoring(struct task_struct *task)
 {
 	pfm_context_t *ctx = PFM_GET_CTX(task);
-	struct thread_struct *th = &task->thread;
 	unsigned long mask, ovfl_mask;
 	unsigned long psr, val;
 	int i, is_system;
@@ -952,12 +1003,12 @@ pfm_restore_monitoring(struct task_struct *task)
 	ovfl_mask = pmu_conf->ovfl_val;
 
 	if (task != current) {
-		printk(KERN_ERR "perfmon.%d: invalid task[%d] current[%d]\n", __LINE__, task->pid, current->pid);
+		printk(KERN_ERR "perfmon.%d: invalid task[%d] current[%d]\n", __LINE__, task_pid_nr(task), task_pid_nr(current));
 		return;
 	}
 	if (ctx->ctx_state != PFM_CTX_MASKED) {
 		printk(KERN_ERR "perfmon.%d: task[%d] current[%d] invalid state=%d\n", __LINE__,
-			task->pid, current->pid, ctx->ctx_state);
+			task_pid_nr(task), task_pid_nr(current), ctx->ctx_state);
 		return;
 	}
 	psr = pfm_get_psr();
@@ -1009,9 +1060,10 @@ pfm_restore_monitoring(struct task_struct *task)
 	mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER;
 	for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) {
 		if ((mask & 0x1) == 0UL) continue;
-		th->pmcs[i] = ctx->ctx_pmcs[i];
-		ia64_set_pmc(i, th->pmcs[i]);
-		DPRINT(("[%d] pmc[%d]=0x%lx\n", task->pid, i, th->pmcs[i]));
+		ctx->th_pmcs[i] = ctx->ctx_pmcs[i];
+		ia64_set_pmc(i, ctx->th_pmcs[i]);
+		DPRINT(("[%d] pmc[%d]=0x%lx\n",
+					task_pid_nr(task), i, ctx->th_pmcs[i]));
 	}
 	ia64_srlz_d();
 
@@ -1070,7 +1122,6 @@ pfm_restore_pmds(unsigned long *pmds, unsigned long mask)
 static inline void
 pfm_copy_pmds(struct task_struct *task, pfm_context_t *ctx)
 {
-	struct thread_struct *thread = &task->thread;
 	unsigned long ovfl_val = pmu_conf->ovfl_val;
 	unsigned long mask = ctx->ctx_all_pmds[0];
 	unsigned long val;
@@ -1092,11 +1143,11 @@ pfm_copy_pmds(struct task_struct *task, pfm_context_t *ctx)
 			ctx->ctx_pmds[i].val = val & ~ovfl_val;
 			 val &= ovfl_val;
 		}
-		thread->pmds[i] = val;
+		ctx->th_pmds[i] = val;
 
 		DPRINT(("pmd[%d]=0x%lx soft_val=0x%lx\n",
 			i,
-			thread->pmds[i],
+			ctx->th_pmds[i],
 			ctx->ctx_pmds[i].val));
 	}
 }
@@ -1107,7 +1158,6 @@ pfm_copy_pmds(struct task_struct *task, pfm_context_t *ctx)
 static inline void
 pfm_copy_pmcs(struct task_struct *task, pfm_context_t *ctx)
 {
-	struct thread_struct *thread = &task->thread;
 	unsigned long mask = ctx->ctx_all_pmcs[0];
 	int i;
 
@@ -1115,8 +1165,8 @@ pfm_copy_pmcs(struct task_struct *task, pfm_context_t *ctx)
 
 	for (i=0; mask; i++, mask>>=1) {
 		/* masking 0 with ovfl_val yields 0 */
-		thread->pmcs[i] = ctx->ctx_pmcs[i];
-		DPRINT(("pmc[%d]=0x%lx\n", i, thread->pmcs[i]));
+		ctx->th_pmcs[i] = ctx->ctx_pmcs[i];
+		DPRINT(("pmc[%d]=0x%lx\n", i, ctx->th_pmcs[i]));
 	}
 }
 
@@ -1273,14 +1323,12 @@ out:
 }
 EXPORT_SYMBOL(pfm_unregister_buffer_fmt);
 
-extern void update_pal_halt_status(int);
-
 static int
 pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned int cpu)
 {
 	unsigned long flags;
 	/*
-	 * validy checks on cpu_mask have been done upstream
+	 * validity checks on cpu_mask have been done upstream
 	 */
 	LOCK_PFS(flags);
 
@@ -1322,9 +1370,9 @@ pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned int cpu)
 		cpu));
 
 	/*
-	 * disable default_idle() to go to PAL_HALT
+	 * Force idle() into poll mode
 	 */
-	update_pal_halt_status(0);
+	cpu_idle_poll_ctrl(true);
 
 	UNLOCK_PFS(flags);
 
@@ -1332,7 +1380,7 @@ pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned int cpu)
 
 error_conflict:
 	DPRINT(("system wide not possible, conflicting session [%d] on CPU%d\n",
-  		pfm_sessions.pfs_sys_session[cpu]->pid,
+  		task_pid_nr(pfm_sessions.pfs_sys_session[cpu]),
 		cpu));
 abort:
 	UNLOCK_PFS(flags);
@@ -1346,7 +1394,7 @@ pfm_unreserve_session(pfm_context_t *ctx, int is_syswide, unsigned int cpu)
 {
 	unsigned long flags;
 	/*
-	 * validy checks on cpu_mask have been done upstream
+	 * validity checks on cpu_mask have been done upstream
 	 */
 	LOCK_PFS(flags);
 
@@ -1381,11 +1429,8 @@ pfm_unreserve_session(pfm_context_t *ctx, int is_syswide, unsigned int cpu)
 		is_syswide,
 		cpu));
 
-	/*
-	 * if possible, enable default_idle() to go into PAL_HALT
-	 */
-	if (pfm_sessions.pfs_task_sessions == 0 && pfm_sessions.pfs_sys_sessions == 0)
-		update_pal_halt_status(1);
+	/* Undo forced polling. Last session reenables pal_halt */
+	cpu_idle_poll_ctrl(false);
 
 	UNLOCK_PFS(flags);
 
@@ -1398,13 +1443,14 @@ pfm_unreserve_session(pfm_context_t *ctx, int is_syswide, unsigned int cpu)
  * a PROTECT_CTX() section.
  */
 static int
-pfm_remove_smpl_mapping(struct task_struct *task, void *vaddr, unsigned long size)
+pfm_remove_smpl_mapping(void *vaddr, unsigned long size)
 {
+	struct task_struct *task = current;
 	int r;
 
 	/* sanity checks */
 	if (task->mm == NULL || size == 0UL || vaddr == NULL) {
-		printk(KERN_ERR "perfmon: pfm_remove_smpl_mapping [%d] invalid context mm=%p\n", task->pid, task->mm);
+		printk(KERN_ERR "perfmon: pfm_remove_smpl_mapping [%d] invalid context mm=%p\n", task_pid_nr(task), task->mm);
 		return -EINVAL;
 	}
 
@@ -1413,15 +1459,10 @@ pfm_remove_smpl_mapping(struct task_struct *task, void *vaddr, unsigned long siz
 	/*
 	 * does the actual unmapping
 	 */
-	down_write(&task->mm->mmap_sem);
-
-	DPRINT(("down_write done smpl_vaddr=%p size=%lu\n", vaddr, size));
+	r = vm_munmap((unsigned long)vaddr, size);
 
-	r = pfm_do_munmap(task->mm, (unsigned long)vaddr, size, 0);
-
-	up_write(&task->mm->mmap_sem);
 	if (r !=0) {
-		printk(KERN_ERR "perfmon: [%d] unable to unmap sampling buffer @%p size=%lu\n", task->pid, vaddr, size);
+		printk(KERN_ERR "perfmon: [%d] unable to unmap sampling buffer @%p size=%lu\n", task_pid_nr(task), vaddr, size);
 	}
 
 	DPRINT(("do_unmap(%p, %lu)=%d\n", vaddr, size, r));
@@ -1463,7 +1504,7 @@ pfm_free_smpl_buffer(pfm_context_t *ctx)
 	return 0;
 
 invalid_free:
-	printk(KERN_ERR "perfmon: pfm_free_smpl_buffer [%d] no buffer\n", current->pid);
+	printk(KERN_ERR "perfmon: pfm_free_smpl_buffer [%d] no buffer\n", task_pid_nr(current));
 	return -EINVAL;
 }
 #endif
@@ -1483,7 +1524,7 @@ pfm_exit_smpl_buffer(pfm_buffer_fmt_t *fmt)
  * any operations on the root directory. However, we need a non-trivial
  * d_name - pfm: will go nicely and kill the special-casing in procfs.
  */
-static struct vfsmount *pfmfs_mnt;
+static struct vfsmount *pfmfs_mnt __read_mostly;
 
 static int __init
 init_pfm_fs(void)
@@ -1500,13 +1541,6 @@ init_pfm_fs(void)
 	return err;
 }
 
-static void __exit
-exit_pfm_fs(void)
-{
-	unregister_filesystem(&pfm_fs_type);
-	mntput(pfmfs_mnt);
-}
-
 static ssize_t
 pfm_read(struct file *filp, char __user *buf, size_t size, loff_t *ppos)
 {
@@ -1516,13 +1550,13 @@ pfm_read(struct file *filp, char __user *buf, size_t size, loff_t *ppos)
 	unsigned long flags;
   	DECLARE_WAITQUEUE(wait, current);
 	if (PFM_IS_FILE(filp) == 0) {
-		printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", current->pid);
+		printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", task_pid_nr(current));
 		return -EINVAL;
 	}
 
-	ctx = (pfm_context_t *)filp->private_data;
+	ctx = filp->private_data;
 	if (ctx == NULL) {
-		printk(KERN_ERR "perfmon: pfm_read: NULL ctx [%d]\n", current->pid);
+		printk(KERN_ERR "perfmon: pfm_read: NULL ctx [%d]\n", task_pid_nr(current));
 		return -EINVAL;
 	}
 
@@ -1576,7 +1610,7 @@ pfm_read(struct file *filp, char __user *buf, size_t size, loff_t *ppos)
 
 		PROTECT_CTX(ctx, flags);
 	}
-	DPRINT(("[%d] back to running ret=%ld\n", current->pid, ret));
+	DPRINT(("[%d] back to running ret=%ld\n", task_pid_nr(current), ret));
   	set_current_state(TASK_RUNNING);
 	remove_wait_queue(&ctx->ctx_msgq_wait, &wait);
 
@@ -1585,7 +1619,7 @@ pfm_read(struct file *filp, char __user *buf, size_t size, loff_t *ppos)
 	ret = -EINVAL;
 	msg = pfm_get_next_msg(ctx);
 	if (msg == NULL) {
-		printk(KERN_ERR "perfmon: pfm_read no msg for ctx=%p [%d]\n", ctx, current->pid);
+		printk(KERN_ERR "perfmon: pfm_read no msg for ctx=%p [%d]\n", ctx, task_pid_nr(current));
 		goto abort_locked;
 	}
 
@@ -1616,13 +1650,13 @@ pfm_poll(struct file *filp, poll_table * wait)
 	unsigned int mask = 0;
 
 	if (PFM_IS_FILE(filp) == 0) {
-		printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", current->pid);
+		printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", task_pid_nr(current));
 		return 0;
 	}
 
-	ctx = (pfm_context_t *)filp->private_data;
+	ctx = filp->private_data;
 	if (ctx == NULL) {
-		printk(KERN_ERR "perfmon: pfm_poll: NULL ctx [%d]\n", current->pid);
+		printk(KERN_ERR "perfmon: pfm_poll: NULL ctx [%d]\n", task_pid_nr(current));
 		return 0;
 	}
 
@@ -1643,8 +1677,8 @@ pfm_poll(struct file *filp, poll_table * wait)
 	return mask;
 }
 
-static int
-pfm_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg)
+static long
+pfm_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
 	DPRINT(("pfm_ioctl called\n"));
 	return -EINVAL;
@@ -1661,7 +1695,7 @@ pfm_do_fasync(int fd, struct file *filp, pfm_context_t *ctx, int on)
 	ret = fasync_helper (fd, filp, on, &ctx->ctx_async_queue);
 
 	DPRINT(("pfm_fasync called by [%d] on ctx_fd=%d on=%d async_queue=%p ret=%d\n",
-		current->pid,
+		task_pid_nr(current),
 		fd,
 		on,
 		ctx->ctx_async_queue, ret));
@@ -1676,13 +1710,13 @@ pfm_fasync(int fd, struct file *filp, int on)
 	int ret;
 
 	if (PFM_IS_FILE(filp) == 0) {
-		printk(KERN_ERR "perfmon: pfm_fasync bad magic [%d]\n", current->pid);
+		printk(KERN_ERR "perfmon: pfm_fasync bad magic [%d]\n", task_pid_nr(current));
 		return -EBADF;
 	}
 
-	ctx = (pfm_context_t *)filp->private_data;
+	ctx = filp->private_data;
 	if (ctx == NULL) {
-		printk(KERN_ERR "perfmon: pfm_fasync NULL ctx [%d]\n", current->pid);
+		printk(KERN_ERR "perfmon: pfm_fasync NULL ctx [%d]\n", task_pid_nr(current));
 		return -EBADF;
 	}
 	/*
@@ -1728,7 +1762,7 @@ pfm_syswide_force_stop(void *info)
 	if (owner != ctx->ctx_task) {
 		printk(KERN_ERR "perfmon: pfm_syswide_force_stop CPU%d unexpected owner [%d] instead of [%d]\n",
 			smp_processor_id(),
-			owner->pid, ctx->ctx_task->pid);
+			task_pid_nr(owner), task_pid_nr(ctx->ctx_task));
 		return;
 	}
 	if (GET_PMU_CTX() != ctx) {
@@ -1738,7 +1772,7 @@ pfm_syswide_force_stop(void *info)
 		return;
 	}
 
-	DPRINT(("on CPU%d forcing system wide stop for [%d]\n", smp_processor_id(), ctx->ctx_task->pid));	
+	DPRINT(("on CPU%d forcing system wide stop for [%d]\n", smp_processor_id(), task_pid_nr(ctx->ctx_task)));
 	/*
 	 * the context is already protected in pfm_close(), we simply
 	 * need to mask interrupts to avoid a PMU interrupt race on
@@ -1763,7 +1797,7 @@ pfm_syswide_cleanup_other_cpu(pfm_context_t *ctx)
 	int ret;
 
 	DPRINT(("calling CPU%d for cleanup\n", ctx->ctx_cpu));
-	ret = smp_call_function_single(ctx->ctx_cpu, pfm_syswide_force_stop, ctx, 0, 1);
+	ret = smp_call_function_single(ctx->ctx_cpu, pfm_syswide_force_stop, ctx, 1);
 	DPRINT(("called CPU%d for cleanup ret=%d\n", ctx->ctx_cpu, ret));
 }
 #endif /* CONFIG_SMP */
@@ -1773,7 +1807,7 @@ pfm_syswide_cleanup_other_cpu(pfm_context_t *ctx)
  * When caller is self-monitoring, the context is unloaded.
  */
 static int
-pfm_flush(struct file *filp)
+pfm_flush(struct file *filp, fl_owner_t id)
 {
 	pfm_context_t *ctx;
 	struct task_struct *task;
@@ -1788,16 +1822,16 @@ pfm_flush(struct file *filp)
 		return -EBADF;
 	}
 
-	ctx = (pfm_context_t *)filp->private_data;
+	ctx = filp->private_data;
 	if (ctx == NULL) {
-		printk(KERN_ERR "perfmon: pfm_flush: NULL ctx [%d]\n", current->pid);
+		printk(KERN_ERR "perfmon: pfm_flush: NULL ctx [%d]\n", task_pid_nr(current));
 		return -EBADF;
 	}
 
 	/*
 	 * remove our file from the async queue, if we use this mode.
 	 * This can be done without the context being protected. We come
-	 * here when the context has become unreacheable by other tasks.
+	 * here when the context has become unreachable by other tasks.
 	 *
 	 * We may still have active monitoring at this point and we may
 	 * end up in pfm_overflow_handler(). However, fasync_helper()
@@ -1807,11 +1841,6 @@ pfm_flush(struct file *filp)
 	 * invoked after, it will find an empty queue and no
 	 * signal will be sent. In both case, we are safe
 	 */
-	if (filp->f_flags & FASYNC) {
-		DPRINT(("cleaning up async_queue=%p\n", ctx->ctx_async_queue));
-		pfm_do_fasync (-1, filp, ctx, 0);
-	}
-
 	PROTECT_CTX(ctx, flags);
 
 	state     = ctx->ctx_state;
@@ -1897,7 +1926,7 @@ pfm_flush(struct file *filp)
 	 * because some VM function reenables interrupts.
 	 *
 	 */
-	if (smpl_buf_vaddr) pfm_remove_smpl_mapping(current, smpl_buf_vaddr, smpl_buf_size);
+	if (smpl_buf_vaddr) pfm_remove_smpl_mapping(smpl_buf_vaddr, smpl_buf_size);
 
 	return 0;
 }
@@ -1936,9 +1965,9 @@ pfm_close(struct inode *inode, struct file *filp)
 		return -EBADF;
 	}
 	
-	ctx = (pfm_context_t *)filp->private_data;
+	ctx = filp->private_data;
 	if (ctx == NULL) {
-		printk(KERN_ERR "perfmon: pfm_close: NULL ctx [%d]\n", current->pid);
+		printk(KERN_ERR "perfmon: pfm_close: NULL ctx [%d]\n", task_pid_nr(current));
 		return -EBADF;
 	}
 
@@ -2035,7 +2064,7 @@ pfm_close(struct inode *inode, struct file *filp)
 	 	 */
 		ctx->ctx_state = PFM_CTX_ZOMBIE;
 
-		DPRINT(("zombie ctx for [%d]\n", task->pid));
+		DPRINT(("zombie ctx for [%d]\n", task_pid_nr(task)));
 		/*
 		 * cannot free the context on the spot. deferred until
 		 * the task notices the ZOMBIE state
@@ -2094,7 +2123,7 @@ doit:
 	filp->private_data = NULL;
 
 	/*
-	 * if we free on the spot, the context is now completely unreacheable
+	 * if we free on the spot, the context is now completely unreachable
 	 * from the callers side. The monitored task side is also cut, so we
 	 * can freely cut.
 	 *
@@ -2125,116 +2154,73 @@ pfm_no_open(struct inode *irrelevant, struct file *dontcare)
 
 
 
-static struct file_operations pfm_file_ops = {
-	.llseek   = no_llseek,
-	.read     = pfm_read,
-	.write    = pfm_write,
-	.poll     = pfm_poll,
-	.ioctl    = pfm_ioctl,
-	.open     = pfm_no_open,	/* special open code to disallow open via /proc */
-	.fasync   = pfm_fasync,
-	.release  = pfm_close,
-	.flush	  = pfm_flush
+static const struct file_operations pfm_file_ops = {
+	.llseek		= no_llseek,
+	.read		= pfm_read,
+	.write		= pfm_write,
+	.poll		= pfm_poll,
+	.unlocked_ioctl = pfm_ioctl,
+	.open		= pfm_no_open,	/* special open code to disallow open via /proc */
+	.fasync		= pfm_fasync,
+	.release	= pfm_close,
+	.flush		= pfm_flush
 };
 
-static int
-pfmfs_delete_dentry(struct dentry *dentry)
+static char *pfmfs_dname(struct dentry *dentry, char *buffer, int buflen)
 {
-	return 1;
+	return dynamic_dname(dentry, buffer, buflen, "pfm:[%lu]",
+			     dentry->d_inode->i_ino);
 }
 
-static struct dentry_operations pfmfs_dentry_operations = {
-	.d_delete = pfmfs_delete_dentry,
+static const struct dentry_operations pfmfs_dentry_operations = {
+	.d_delete = always_delete_dentry,
+	.d_dname = pfmfs_dname,
 };
 
 
-static int
-pfm_alloc_fd(struct file **cfile)
+static struct file *
+pfm_alloc_file(pfm_context_t *ctx)
 {
-	int fd, ret = 0;
-	struct file *file = NULL;
-	struct inode * inode;
-	char name[32];
-	struct qstr this;
-
-	fd = get_unused_fd();
-	if (fd < 0) return -ENFILE;
-
-	ret = -ENFILE;
-
-	file = get_empty_filp();
-	if (!file) goto out;
+	struct file *file;
+	struct inode *inode;
+	struct path path;
+	struct qstr this = { .name = "" };
 
 	/*
 	 * allocate a new inode
 	 */
 	inode = new_inode(pfmfs_mnt->mnt_sb);
-	if (!inode) goto out;
+	if (!inode)
+		return ERR_PTR(-ENOMEM);
 
 	DPRINT(("new inode ino=%ld @%p\n", inode->i_ino, inode));
 
 	inode->i_mode = S_IFCHR|S_IRUGO;
-	inode->i_uid  = current->fsuid;
-	inode->i_gid  = current->fsgid;
-
-	sprintf(name, "[%lu]", inode->i_ino);
-	this.name = name;
-	this.len  = strlen(name);
-	this.hash = inode->i_ino;
-
-	ret = -ENOMEM;
+	inode->i_uid  = current_fsuid();
+	inode->i_gid  = current_fsgid();
 
 	/*
 	 * allocate a new dcache entry
 	 */
-	file->f_dentry = d_alloc(pfmfs_mnt->mnt_sb->s_root, &this);
-	if (!file->f_dentry) goto out;
+	path.dentry = d_alloc(pfmfs_mnt->mnt_root, &this);
+	if (!path.dentry) {
+		iput(inode);
+		return ERR_PTR(-ENOMEM);
+	}
+	path.mnt = mntget(pfmfs_mnt);
 
-	file->f_dentry->d_op = &pfmfs_dentry_operations;
+	d_add(path.dentry, inode);
 
-	d_add(file->f_dentry, inode);
-	file->f_vfsmnt = mntget(pfmfs_mnt);
-	file->f_mapping = inode->i_mapping;
+	file = alloc_file(&path, FMODE_READ, &pfm_file_ops);
+	if (IS_ERR(file)) {
+		path_put(&path);
+		return file;
+	}
 
-	file->f_op    = &pfm_file_ops;
-	file->f_mode  = FMODE_READ;
 	file->f_flags = O_RDONLY;
-	file->f_pos   = 0;
+	file->private_data = ctx;
 
-	/*
-	 * may have to delay until context is attached?
-	 */
-	fd_install(fd, file);
-
-	/*
-	 * the file structure we will use
-	 */
-	*cfile = file;
-
-	return fd;
-out:
-	if (file) put_filp(file);
-	put_unused_fd(fd);
-	return ret;
-}
-
-static void
-pfm_free_fd(int fd, struct file *file)
-{
-	struct files_struct *files = current->files;
-	struct fdtable *fdt;
-
-	/* 
-	 * there ie no fd_uninstall(), so we do it here
-	 */
-	spin_lock(&files->file_lock);
-	fdt = files_fdtable(files);
-	rcu_assign_pointer(fdt->fd[fd], NULL);
-	spin_unlock(&files->file_lock);
-
-	if (file)
-		put_filp(file);
-	put_unused_fd(fd);
+	return file;
 }
 
 static int
@@ -2260,7 +2246,7 @@ pfm_remap_buffer(struct vm_area_struct *vma, unsigned long buf, unsigned long ad
  * allocate a sampling buffer and remaps it into the user address space of the task
  */
 static int
-pfm_smpl_buffer_alloc(struct task_struct *task, pfm_context_t *ctx, unsigned long rsize, void **user_vaddr)
+pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t *ctx, unsigned long rsize, void **user_vaddr)
 {
 	struct mm_struct *mm = task->mm;
 	struct vm_area_struct *vma = NULL;
@@ -2283,7 +2269,7 @@ pfm_smpl_buffer_alloc(struct task_struct *task, pfm_context_t *ctx, unsigned lon
 	 * if ((mm->total_vm << PAGE_SHIFT) + len> task->rlim[RLIMIT_AS].rlim_cur)
 	 * 	return -ENOMEM;
 	 */
-	if (size > task->signal->rlim[RLIMIT_MEMLOCK].rlim_cur)
+	if (size > task_rlimit(task, RLIMIT_MEMLOCK))
 		return -ENOMEM;
 
 	/*
@@ -2300,18 +2286,19 @@ pfm_smpl_buffer_alloc(struct task_struct *task, pfm_context_t *ctx, unsigned lon
 	DPRINT(("smpl_buf @%p\n", smpl_buf));
 
 	/* allocate vma */
-	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+	vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
 	if (!vma) {
 		DPRINT(("Cannot allocate vma\n"));
 		goto error_kmem;
 	}
-	memset(vma, 0, sizeof(*vma));
+	INIT_LIST_HEAD(&vma->anon_vma_chain);
 
 	/*
 	 * partially initialize the vma for the sampling buffer
 	 */
 	vma->vm_mm	     = mm;
-	vma->vm_flags	     = VM_READ| VM_MAYREAD |VM_RESERVED;
+	vma->vm_file	     = get_file(filp);
+	vma->vm_flags	     = VM_READ|VM_MAYREAD|VM_DONTEXPAND|VM_DONTDUMP;
 	vma->vm_page_prot    = PAGE_READONLY; /* XXX may need to change */
 
 	/*
@@ -2331,8 +2318,8 @@ pfm_smpl_buffer_alloc(struct task_struct *task, pfm_context_t *ctx, unsigned lon
 	down_write(&task->mm->mmap_sem);
 
 	/* find some free area in address space, must have mmap sem held */
-	vma->vm_start = pfm_get_unmapped_area(NULL, 0, size, 0, MAP_PRIVATE|MAP_ANONYMOUS, 0);
-	if (vma->vm_start == 0UL) {
+	vma->vm_start = get_unmapped_area(NULL, 0, size, 0, MAP_PRIVATE|MAP_ANONYMOUS);
+	if (IS_ERR_VALUE(vma->vm_start)) {
 		DPRINT(("Cannot find unmapped area for size %ld\n", size));
 		up_write(&task->mm->mmap_sem);
 		goto error;
@@ -2355,7 +2342,6 @@ pfm_smpl_buffer_alloc(struct task_struct *task, pfm_context_t *ctx, unsigned lon
 	 */
 	insert_vm_struct(mm, vma);
 
-	mm->total_vm  += size >> PAGE_SHIFT;
 	vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file,
 							vma_pages(vma));
 	up_write(&task->mm->mmap_sem);
@@ -2382,22 +2368,33 @@ error_kmem:
 static int
 pfm_bad_permissions(struct task_struct *task)
 {
+	const struct cred *tcred;
+	kuid_t uid = current_uid();
+	kgid_t gid = current_gid();
+	int ret;
+
+	rcu_read_lock();
+	tcred = __task_cred(task);
+
 	/* inspired by ptrace_attach() */
 	DPRINT(("cur: uid=%d gid=%d task: euid=%d suid=%d uid=%d egid=%d sgid=%d\n",
-		current->uid,
-		current->gid,
-		task->euid,
-		task->suid,
-		task->uid,
-		task->egid,
-		task->sgid));
-
-	return ((current->uid != task->euid)
-	    || (current->uid != task->suid)
-	    || (current->uid != task->uid)
-	    || (current->gid != task->egid)
-	    || (current->gid != task->sgid)
-	    || (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE);
+		from_kuid(&init_user_ns, uid),
+		from_kgid(&init_user_ns, gid),
+		from_kuid(&init_user_ns, tcred->euid),
+		from_kuid(&init_user_ns, tcred->suid),
+		from_kuid(&init_user_ns, tcred->uid),
+		from_kgid(&init_user_ns, tcred->egid),
+		from_kgid(&init_user_ns, tcred->sgid)));
+
+	ret = ((!uid_eq(uid, tcred->euid))
+	       || (!uid_eq(uid, tcred->suid))
+	       || (!uid_eq(uid, tcred->uid))
+	       || (!gid_eq(gid, tcred->egid))
+	       || (!gid_eq(gid, tcred->sgid))
+	       || (!gid_eq(gid, tcred->gid))) && !capable(CAP_SYS_PTRACE);
+
+	rcu_read_unlock();
+	return ret;
 }
 
 static int
@@ -2426,7 +2423,7 @@ pfarg_is_sane(struct task_struct *task, pfarg_context_t *pfx)
 }
 
 static int
-pfm_setup_buffer_fmt(struct task_struct *task, pfm_context_t *ctx, unsigned int ctx_flags,
+pfm_setup_buffer_fmt(struct task_struct *task, struct file *filp, pfm_context_t *ctx, unsigned int ctx_flags,
 		     unsigned int cpu, pfarg_context_t *arg)
 {
 	pfm_buffer_fmt_t *fmt = NULL;
@@ -2439,7 +2436,7 @@ pfm_setup_buffer_fmt(struct task_struct *task, pfm_context_t *ctx, unsigned int
 	/* invoke and lock buffer format, if found */
 	fmt = pfm_find_buffer_fmt(arg->ctx_smpl_buf_id);
 	if (fmt == NULL) {
-		DPRINT(("[%d] cannot find buffer format\n", task->pid));
+		DPRINT(("[%d] cannot find buffer format\n", task_pid_nr(task)));
 		return -EINVAL;
 	}
 
@@ -2450,12 +2447,13 @@ pfm_setup_buffer_fmt(struct task_struct *task, pfm_context_t *ctx, unsigned int
 
 	ret = pfm_buf_fmt_validate(fmt, task, ctx_flags, cpu, fmt_arg);
 
-	DPRINT(("[%d] after validate(0x%x,%d,%p)=%d\n", task->pid, ctx_flags, cpu, fmt_arg, ret));
+	DPRINT(("[%d] after validate(0x%x,%d,%p)=%d\n", task_pid_nr(task), ctx_flags, cpu, fmt_arg, ret));
 
 	if (ret) goto error;
 
 	/* link buffer format and context */
 	ctx->ctx_buf_fmt = fmt;
+	ctx->ctx_fl_is_sampling = 1; /* assume record() is defined */
 
 	/*
 	 * check if buffer format wants to use perfmon buffer allocation/mapping service
@@ -2467,7 +2465,7 @@ pfm_setup_buffer_fmt(struct task_struct *task, pfm_context_t *ctx, unsigned int
 		/*
 		 * buffer is always remapped into the caller's address space
 		 */
-		ret = pfm_smpl_buffer_alloc(current, ctx, size, &uaddr);
+		ret = pfm_smpl_buffer_alloc(current, filp, ctx, size, &uaddr);
 		if (ret) goto error;
 
 		/* keep track of user address of buffer */
@@ -2522,7 +2520,7 @@ pfm_reset_pmu_state(pfm_context_t *ctx)
 	ctx->ctx_all_pmcs[0] = pmu_conf->impl_pmcs[0] & ~0x1;
 
 	/*
-	 * bitmask of all PMDs that are accesible to this context
+	 * bitmask of all PMDs that are accessible to this context
 	 */
 	ctx->ctx_all_pmds[0] = pmu_conf->impl_pmds[0];
 
@@ -2572,23 +2570,23 @@ pfm_task_incompatible(pfm_context_t *ctx, struct task_struct *task)
 	 * no kernel task or task not owner by caller
 	 */
 	if (task->mm == NULL) {
-		DPRINT(("task [%d] has not memory context (kernel thread)\n", task->pid));
+		DPRINT(("task [%d] has not memory context (kernel thread)\n", task_pid_nr(task)));
 		return -EPERM;
 	}
 	if (pfm_bad_permissions(task)) {
-		DPRINT(("no permission to attach to  [%d]\n", task->pid));
+		DPRINT(("no permission to attach to  [%d]\n", task_pid_nr(task)));
 		return -EPERM;
 	}
 	/*
 	 * cannot block in self-monitoring mode
 	 */
 	if (CTX_OVFL_NOBLOCK(ctx) == 0 && task == current) {
-		DPRINT(("cannot load a blocking context on self for [%d]\n", task->pid));
+		DPRINT(("cannot load a blocking context on self for [%d]\n", task_pid_nr(task)));
 		return -EINVAL;
 	}
 
 	if (task->exit_state == EXIT_ZOMBIE) {
-		DPRINT(("cannot attach to  zombie task [%d]\n", task->pid));
+		DPRINT(("cannot attach to  zombie task [%d]\n", task_pid_nr(task)));
 		return -EBUSY;
 	}
 
@@ -2597,14 +2595,14 @@ pfm_task_incompatible(pfm_context_t *ctx, struct task_struct *task)
 	 */
 	if (task == current) return 0;
 
-	if ((task->state != TASK_STOPPED) && (task->state != TASK_TRACED)) {
-		DPRINT(("cannot attach to non-stopped task [%d] state=%ld\n", task->pid, task->state));
+	if (!task_is_stopped_or_traced(task)) {
+		DPRINT(("cannot attach to non-stopped task [%d] state=%ld\n", task_pid_nr(task), task->state));
 		return -EBUSY;
 	}
 	/*
 	 * make sure the task is off any CPU
 	 */
-	wait_task_inactive(task);
+	wait_task_inactive(task, 0);
 
 	/* more to come... */
 
@@ -2620,11 +2618,11 @@ pfm_get_task(pfm_context_t *ctx, pid_t pid, struct task_struct **task)
 	/* XXX: need to add more checks here */
 	if (pid < 2) return -EPERM;
 
-	if (pid != current->pid) {
+	if (pid != task_pid_vnr(current)) {
 
 		read_lock(&tasklist_lock);
 
-		p = find_task_by_pid(pid);
+		p = find_task_by_vpid(pid);
 
 		/* make sure task cannot go away while we operate on it */
 		if (p) get_task_struct(p);
@@ -2650,79 +2648,46 @@ pfm_context_create(pfm_context_t *ctx, void *arg, int count, struct pt_regs *reg
 {
 	pfarg_context_t *req = (pfarg_context_t *)arg;
 	struct file *filp;
+	struct path path;
 	int ctx_flags;
+	int fd;
 	int ret;
 
 	/* let's check the arguments first */
 	ret = pfarg_is_sane(current, req);
-	if (ret < 0) return ret;
+	if (ret < 0)
+		return ret;
 
 	ctx_flags = req->ctx_flags;
 
 	ret = -ENOMEM;
 
-	ctx = pfm_context_alloc();
-	if (!ctx) goto error;
+	fd = get_unused_fd();
+	if (fd < 0)
+		return fd;
 
-	ret = pfm_alloc_fd(&filp);
-	if (ret < 0) goto error_file;
+	ctx = pfm_context_alloc(ctx_flags);
+	if (!ctx)
+		goto error;
 
-	req->ctx_fd = ctx->ctx_fd = ret;
+	filp = pfm_alloc_file(ctx);
+	if (IS_ERR(filp)) {
+		ret = PTR_ERR(filp);
+		goto error_file;
+	}
 
-	/*
-	 * attach context to file
-	 */
-	filp->private_data = ctx;
+	req->ctx_fd = ctx->ctx_fd = fd;
 
 	/*
 	 * does the user want to sample?
 	 */
 	if (pfm_uuid_cmp(req->ctx_smpl_buf_id, pfm_null_uuid)) {
-		ret = pfm_setup_buffer_fmt(current, ctx, ctx_flags, 0, req);
-		if (ret) goto buffer_error;
+		ret = pfm_setup_buffer_fmt(current, filp, ctx, ctx_flags, 0, req);
+		if (ret)
+			goto buffer_error;
 	}
 
-	/*
-	 * init context protection lock
-	 */
-	spin_lock_init(&ctx->ctx_lock);
-
-	/*
-	 * context is unloaded
-	 */
-	ctx->ctx_state = PFM_CTX_UNLOADED;
-
-	/*
-	 * initialization of context's flags
-	 */
-	ctx->ctx_fl_block       = (ctx_flags & PFM_FL_NOTIFY_BLOCK) ? 1 : 0;
-	ctx->ctx_fl_system      = (ctx_flags & PFM_FL_SYSTEM_WIDE) ? 1: 0;
-	ctx->ctx_fl_is_sampling = ctx->ctx_buf_fmt ? 1 : 0; /* assume record() is defined */
-	ctx->ctx_fl_no_msg      = (ctx_flags & PFM_FL_OVFL_NO_MSG) ? 1: 0;
-	/*
-	 * will move to set properties
-	 * ctx->ctx_fl_excl_idle   = (ctx_flags & PFM_FL_EXCL_IDLE) ? 1: 0;
-	 */
-
-	/*
-	 * init restart semaphore to locked
-	 */
-	init_completion(&ctx->ctx_restart_done);
-
-	/*
-	 * activation is used in SMP only
-	 */
-	ctx->ctx_last_activation = PFM_INVALID_ACTIVATION;
-	SET_LAST_CPU(ctx, -1);
-
-	/*
-	 * initialize notification message queue
-	 */
-	ctx->ctx_msgq_head = ctx->ctx_msgq_tail = 0;
-	init_waitqueue_head(&ctx->ctx_msgq_wait);
-	init_waitqueue_head(&ctx->ctx_zombieq);
-
-	DPRINT(("ctx=%p flags=0x%x system=%d notify_block=%d excl_idle=%d no_msg=%d ctx_fd=%d \n",
+	DPRINT(("ctx=%p flags=0x%x system=%d notify_block=%d excl_idle=%d no_msg=%d ctx_fd=%d\n",
 		ctx,
 		ctx_flags,
 		ctx->ctx_fl_system,
@@ -2736,10 +2701,14 @@ pfm_context_create(pfm_context_t *ctx, void *arg, int count, struct pt_regs *reg
 	 */
 	pfm_reset_pmu_state(ctx);
 
+	fd_install(fd, filp);
+
 	return 0;
 
 buffer_error:
-	pfm_free_fd(ctx->ctx_fd, filp);
+	path = filp->f_path;
+	put_filp(filp);
+	path_put(&path);
 
 	if (ctx->ctx_buf_fmt) {
 		pfm_buf_fmt_exit(ctx->ctx_buf_fmt, current, NULL, regs);
@@ -2748,6 +2717,7 @@ error_file:
 	pfm_context_free(ctx);
 
 error:
+	put_unused_fd(fd);
 	return ret;
 }
 
@@ -2860,7 +2830,6 @@ pfm_reset_regs(pfm_context_t *ctx, unsigned long *ovfl_regs, int is_long_reset)
 static int
 pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 {
-	struct thread_struct *thread = NULL;
 	struct task_struct *task;
 	pfarg_reg_t *req = (pfarg_reg_t *)arg;
 	unsigned long value, pmc_pm;
@@ -2881,7 +2850,6 @@ pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 	if (state == PFM_CTX_ZOMBIE) return -EINVAL;
 
 	if (is_loaded) {
-		thread = &task->thread;
 		/*
 		 * In system wide and when the context is loaded, access can only happen
 		 * when the caller is running on the CPU being monitored by the session.
@@ -3036,7 +3004,7 @@ pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 		 *
 		 * The value in ctx_pmcs[] can only be changed in pfm_write_pmcs().
 		 *
-		 * The value in thread->pmcs[] may be modified on overflow, i.e.,  when
+		 * The value in th_pmcs[] may be modified on overflow, i.e.,  when
 		 * monitoring needs to be stopped.
 		 */
 		if (is_monitor) CTX_USED_MONITOR(ctx, 1UL << cnum);
@@ -3050,7 +3018,7 @@ pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 			/*
 			 * write thread state
 			 */
-			if (is_system == 0) thread->pmcs[cnum] = value;
+			if (is_system == 0) ctx->th_pmcs[cnum] = value;
 
 			/*
 			 * write hardware register if we can
@@ -3102,7 +3070,6 @@ error:
 static int
 pfm_write_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 {
-	struct thread_struct *thread = NULL;
 	struct task_struct *task;
 	pfarg_reg_t *req = (pfarg_reg_t *)arg;
 	unsigned long value, hw_value, ovfl_mask;
@@ -3126,7 +3093,6 @@ pfm_write_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 	 * the owner of the local PMU.
 	 */
 	if (likely(is_loaded)) {
-		thread = &task->thread;
 		/*
 		 * In system wide and when the context is loaded, access can only happen
 		 * when the caller is running on the CPU being monitored by the session.
@@ -3234,7 +3200,7 @@ pfm_write_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 			/*
 		 	 * write thread state
 		 	 */
-			if (is_system == 0) thread->pmds[cnum] = hw_value;
+			if (is_system == 0) ctx->th_pmds[cnum] = hw_value;
 
 			/*
 			 * write hardware register if we can
@@ -3300,7 +3266,6 @@ abort_mission:
 static int
 pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 {
-	struct thread_struct *thread = NULL;
 	struct task_struct *task;
 	unsigned long val = 0UL, lval, ovfl_mask, sval;
 	pfarg_reg_t *req = (pfarg_reg_t *)arg;
@@ -3324,7 +3289,6 @@ pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 	if (state == PFM_CTX_ZOMBIE) return -EINVAL;
 
 	if (likely(is_loaded)) {
-		thread = &task->thread;
 		/*
 		 * In system wide and when the context is loaded, access can only happen
 		 * when the caller is running on the CPU being monitored by the session.
@@ -3361,7 +3325,7 @@ pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 		if (unlikely(!PMD_IS_IMPL(cnum))) goto error;
 		/*
 		 * we can only read the register that we use. That includes
-		 * the one we explicitely initialize AND the one we want included
+		 * the one we explicitly initialize AND the one we want included
 		 * in the sampling buffer (smpl_regs).
 		 *
 		 * Having this restriction allows optimization in the ctxsw routine
@@ -3386,7 +3350,7 @@ pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 			 * if context is zombie, then task does not exist anymore.
 			 * In this case, we use the full value saved in the context (pfm_flush_regs()).
 			 */
-			val = is_loaded ? thread->pmds[cnum] : 0UL;
+			val = is_loaded ? ctx->th_pmds[cnum] : 0UL;
 		}
 		rd_func = pmu_conf->pmd_desc[cnum].read_check;
 
@@ -3485,7 +3449,7 @@ pfm_use_debug_registers(struct task_struct *task)
 
 	if (pmu_conf->use_rr_dbregs == 0) return 0;
 
-	DPRINT(("called for [%d]\n", task->pid));
+	DPRINT(("called for [%d]\n", task_pid_nr(task)));
 
 	/*
 	 * do it only once
@@ -3516,7 +3480,7 @@ pfm_use_debug_registers(struct task_struct *task)
 	DPRINT(("ptrace_use_dbregs=%u  sys_use_dbregs=%u by [%d] ret = %d\n",
 		  pfm_sessions.pfs_ptrace_use_dbregs,
 		  pfm_sessions.pfs_sys_use_dbregs,
-		  task->pid, ret));
+		  task_pid_nr(task), ret));
 
 	UNLOCK_PFS(flags);
 
@@ -3528,7 +3492,7 @@ pfm_use_debug_registers(struct task_struct *task)
  * IA64_THREAD_DBG_VALID set. This indicates a task which was
  * able to use the debug registers for debugging purposes via
  * ptrace(). Therefore we know it was not using them for
- * perfmormance monitoring, so we only decrement the number
+ * performance monitoring, so we only decrement the number
  * of "ptraced" debug register users to keep the count up to date
  */
 int
@@ -3541,7 +3505,7 @@ pfm_release_debug_registers(struct task_struct *task)
 
 	LOCK_PFS(flags);
 	if (pfm_sessions.pfs_ptrace_use_dbregs == 0) {
-		printk(KERN_ERR "perfmon: invalid release for [%d] ptrace_use_dbregs=0\n", task->pid);
+		printk(KERN_ERR "perfmon: invalid release for [%d] ptrace_use_dbregs=0\n", task_pid_nr(task));
 		ret = -1;
 	}  else {
 		pfm_sessions.pfs_ptrace_use_dbregs--;
@@ -3593,7 +3557,7 @@ pfm_restart(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 
 	/* sanity check */
 	if (unlikely(task == NULL)) {
-		printk(KERN_ERR "perfmon: [%d] pfm_restart no task\n", current->pid);
+		printk(KERN_ERR "perfmon: [%d] pfm_restart no task\n", task_pid_nr(current));
 		return -EINVAL;
 	}
 
@@ -3602,7 +3566,7 @@ pfm_restart(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 		fmt = ctx->ctx_buf_fmt;
 
 		DPRINT(("restarting self %d ovfl=0x%lx\n",
-			task->pid,
+			task_pid_nr(task),
 			ctx->ctx_ovfl_regs[0]));
 
 		if (CTX_HAS_SMPL(ctx)) {
@@ -3626,11 +3590,11 @@ pfm_restart(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 				pfm_reset_regs(ctx, ctx->ctx_ovfl_regs, PFM_PMD_LONG_RESET);
 
 			if (rst_ctrl.bits.mask_monitoring == 0) {
-				DPRINT(("resuming monitoring for [%d]\n", task->pid));
+				DPRINT(("resuming monitoring for [%d]\n", task_pid_nr(task)));
 
 				if (state == PFM_CTX_MASKED) pfm_restore_monitoring(task);
 			} else {
-				DPRINT(("keeping monitoring stopped for [%d]\n", task->pid));
+				DPRINT(("keeping monitoring stopped for [%d]\n", task_pid_nr(task)));
 
 				// cannot use pfm_stop_monitoring(task, regs);
 			}
@@ -3681,22 +3645,22 @@ pfm_restart(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 	 * if non-blocking, then we ensure that the task will go into
 	 * pfm_handle_work() before returning to user mode.
 	 *
-	 * We cannot explicitely reset another task, it MUST always
+	 * We cannot explicitly reset another task, it MUST always
 	 * be done by the task itself. This works for system wide because
 	 * the tool that is controlling the session is logically doing 
 	 * "self-monitoring".
 	 */
 	if (CTX_OVFL_NOBLOCK(ctx) == 0 && state == PFM_CTX_MASKED) {
-		DPRINT(("unblocking [%d] \n", task->pid));
+		DPRINT(("unblocking [%d]\n", task_pid_nr(task)));
 		complete(&ctx->ctx_restart_done);
 	} else {
-		DPRINT(("[%d] armed exit trap\n", task->pid));
+		DPRINT(("[%d] armed exit trap\n", task_pid_nr(task)));
 
 		ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_RESET;
 
 		PFM_SET_WORK_PENDING(task, 1);
 
-		pfm_set_task_notify(task);
+		set_notify_resume(task);
 
 		/*
 		 * XXX: send reschedule if task runs on another CPU
@@ -3778,7 +3742,7 @@ pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_
 	 * don't bother if we are loaded and task is being debugged
 	 */
 	if (is_loaded && (thread->flags & IA64_THREAD_DBG_VALID) != 0) {
-		DPRINT(("debug registers already in use for [%d]\n", task->pid));
+		DPRINT(("debug registers already in use for [%d]\n", task_pid_nr(task)));
 		return -EBUSY;
 	}
 
@@ -3819,7 +3783,7 @@ pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_
 	 * is shared by all processes running on it
  	 */
 	if (first_time && can_access_pmu) {
-		DPRINT(("[%d] clearing ibrs, dbrs\n", task->pid));
+		DPRINT(("[%d] clearing ibrs, dbrs\n", task_pid_nr(task)));
 		for (i=0; i < pmu_conf->num_ibrs; i++) {
 			ia64_set_ibr(i, 0UL);
 			ia64_dv_serialize_instruction();
@@ -4008,7 +3972,7 @@ pfm_stop(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 		return -EBUSY;
 	}
 	DPRINT(("task [%d] ctx_state=%d is_system=%d\n",
-		PFM_CTX_TASK(ctx)->pid,
+		task_pid_nr(PFM_CTX_TASK(ctx)),
 		state,
 		is_system));
 	/*
@@ -4066,7 +4030,7 @@ pfm_stop(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 		 * monitoring disabled in kernel at next reschedule
 		 */
 		ctx->ctx_saved_psr_up = 0;
-		DPRINT(("task=[%d]\n", task->pid));
+		DPRINT(("task=[%d]\n", task_pid_nr(task)));
 	}
 	return 0;
 }
@@ -4191,10 +4155,10 @@ pfm_check_task_exist(pfm_context_t *ctx)
 	do_each_thread (g, t) {
 		if (t->thread.pfm_context == ctx) {
 			ret = 0;
-			break;
+			goto out;
 		}
 	} while_each_thread (g, t);
-
+out:
 	read_unlock(&tasklist_lock);
 
 	DPRINT(("pfm_check_task_exist: ret=%d ctx=%p\n", ret, ctx));
@@ -4271,11 +4235,12 @@ pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 
 		if (is_system) {
 			if (pfm_sessions.pfs_ptrace_use_dbregs) {
-				DPRINT(("cannot load [%d] dbregs in use\n", task->pid));
+				DPRINT(("cannot load [%d] dbregs in use\n",
+							task_pid_nr(task)));
 				ret = -EBUSY;
 			} else {
 				pfm_sessions.pfs_sys_use_dbregs++;
-				DPRINT(("load [%d] increased sys_use_dbreg=%u\n", task->pid, pfm_sessions.pfs_sys_use_dbregs));
+				DPRINT(("load [%d] increased sys_use_dbreg=%u\n", task_pid_nr(task), pfm_sessions.pfs_sys_use_dbregs));
 				set_dbregs = 1;
 			}
 		}
@@ -4355,8 +4320,8 @@ pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 	pfm_copy_pmds(task, ctx);
 	pfm_copy_pmcs(task, ctx);
 
-	pmcs_source = thread->pmcs;
-	pmds_source = thread->pmds;
+	pmcs_source = ctx->th_pmcs;
+	pmds_source = ctx->th_pmds;
 
 	/*
 	 * always the case for system-wide
@@ -4367,7 +4332,7 @@ pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 
 			/* allow user level control */
 			ia64_psr(regs)->sp = 0;
-			DPRINT(("clearing psr.sp for [%d]\n", task->pid));
+			DPRINT(("clearing psr.sp for [%d]\n", task_pid_nr(task)));
 
 			SET_LAST_CPU(ctx, smp_processor_id());
 			INC_ACTIVATION();
@@ -4402,7 +4367,7 @@ pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 		 */
 		SET_PMU_OWNER(task, ctx);
 
-		DPRINT(("context loaded on PMU for [%d]\n", task->pid));
+		DPRINT(("context loaded on PMU for [%d]\n", task_pid_nr(task)));
 	} else {
 		/*
 		 * when not current, task MUST be stopped, so this is safe
@@ -4466,7 +4431,7 @@ pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *reg
 	int prev_state, is_system;
 	int ret;
 
-	DPRINT(("ctx_state=%d task [%d]\n", ctx->ctx_state, task ? task->pid : -1));
+	DPRINT(("ctx_state=%d task [%d]\n", ctx->ctx_state, task ? task_pid_nr(task) : -1));
 
 	prev_state = ctx->ctx_state;
 	is_system  = ctx->ctx_fl_system;
@@ -4541,7 +4506,7 @@ pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *reg
 		 */
 		ia64_psr(regs)->sp = 1;
 
-		DPRINT(("setting psr.sp for [%d]\n", task->pid));
+		DPRINT(("setting psr.sp for [%d]\n", task_pid_nr(task)));
 	}
 	/*
 	 * save PMDs to context
@@ -4581,7 +4546,7 @@ pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *reg
 	ctx->ctx_fl_can_restart  = 0;
 	ctx->ctx_fl_going_zombie = 0;
 
-	DPRINT(("disconnected [%d] from context\n", task->pid));
+	DPRINT(("disconnected [%d] from context\n", task_pid_nr(task)));
 
 	return 0;
 }
@@ -4604,22 +4569,22 @@ pfm_exit_thread(struct task_struct *task)
 
 	PROTECT_CTX(ctx, flags);
 
-	DPRINT(("state=%d task [%d]\n", ctx->ctx_state, task->pid));
+	DPRINT(("state=%d task [%d]\n", ctx->ctx_state, task_pid_nr(task)));
 
 	state = ctx->ctx_state;
 	switch(state) {
 		case PFM_CTX_UNLOADED:
 			/*
-	 		 * only comes to thios function if pfm_context is not NULL, i.e., cannot
+	 		 * only comes to this function if pfm_context is not NULL, i.e., cannot
 			 * be in unloaded state
 	 		 */
-			printk(KERN_ERR "perfmon: pfm_exit_thread [%d] ctx unloaded\n", task->pid);
+			printk(KERN_ERR "perfmon: pfm_exit_thread [%d] ctx unloaded\n", task_pid_nr(task));
 			break;
 		case PFM_CTX_LOADED:
 		case PFM_CTX_MASKED:
 			ret = pfm_context_unload(ctx, NULL, 0, regs);
 			if (ret) {
-				printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task->pid, state, ret);
+				printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task_pid_nr(task), state, ret);
 			}
 			DPRINT(("ctx unloaded for current state was %d\n", state));
 
@@ -4628,12 +4593,12 @@ pfm_exit_thread(struct task_struct *task)
 		case PFM_CTX_ZOMBIE:
 			ret = pfm_context_unload(ctx, NULL, 0, regs);
 			if (ret) {
-				printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task->pid, state, ret);
+				printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task_pid_nr(task), state, ret);
 			}
 			free_ok = 1;
 			break;
 		default:
-			printk(KERN_ERR "perfmon: pfm_exit_thread [%d] unexpected state=%d\n", task->pid, state);
+			printk(KERN_ERR "perfmon: pfm_exit_thread [%d] unexpected state=%d\n", task_pid_nr(task), state);
 			break;
 	}
 	UNPROTECT_CTX(ctx, flags);
@@ -4717,7 +4682,7 @@ recheck:
 	DPRINT(("context %d state=%d [%d] task_state=%ld must_stop=%d\n",
 		ctx->ctx_fd,
 		state,
-		task->pid,
+		task_pid_nr(task),
 		task->state, PFM_CMD_STOPPED(cmd)));
 
 	/*
@@ -4763,8 +4728,8 @@ recheck:
 	 * the task must be stopped.
 	 */
 	if (PFM_CMD_STOPPED(cmd)) {
-		if ((task->state != TASK_STOPPED) && (task->state != TASK_TRACED)) {
-			DPRINT(("[%d] task not in stopped state\n", task->pid));
+		if (!task_is_stopped_or_traced(task)) {
+			DPRINT(("[%d] task not in stopped state\n", task_pid_nr(task)));
 			return -EBUSY;
 		}
 		/*
@@ -4785,7 +4750,7 @@ recheck:
 
 		UNPROTECT_CTX(ctx, flags);
 
-		wait_task_inactive(task);
+		wait_task_inactive(task, 0);
 
 		PROTECT_CTX(ctx, flags);
 
@@ -4806,7 +4771,7 @@ recheck:
 asmlinkage long
 sys_perfmonctl (int fd, int cmd, void __user *arg, int count)
 {
-	struct file *file = NULL;
+	struct fd f = {NULL, 0};
 	pfm_context_t *ctx = NULL;
 	unsigned long flags = 0UL;
 	void *args_k = NULL;
@@ -4857,7 +4822,7 @@ restart_args:
 	 * limit abuse to min page size
 	 */
 	if (unlikely(sz > PFM_MAX_ARGSIZE)) {
-		printk(KERN_ERR "perfmon: [%d] argument too big %lu\n", current->pid, sz);
+		printk(KERN_ERR "perfmon: [%d] argument too big %lu\n", task_pid_nr(current), sz);
 		return -E2BIG;
 	}
 
@@ -4903,17 +4868,17 @@ restart_args:
 
 	ret = -EBADF;
 
-	file = fget(fd);
-	if (unlikely(file == NULL)) {
+	f = fdget(fd);
+	if (unlikely(f.file == NULL)) {
 		DPRINT(("invalid fd %d\n", fd));
 		goto error_args;
 	}
-	if (unlikely(PFM_IS_FILE(file) == 0)) {
+	if (unlikely(PFM_IS_FILE(f.file) == 0)) {
 		DPRINT(("fd %d not related to perfmon\n", fd));
 		goto error_args;
 	}
 
-	ctx = (pfm_context_t *)file->private_data;
+	ctx = f.file->private_data;
 	if (unlikely(ctx == NULL)) {
 		DPRINT(("no context for fd %d\n", fd));
 		goto error_args;
@@ -4937,13 +4902,15 @@ abort_locked:
 	if (likely(ctx)) {
 		DPRINT(("context unlocked\n"));
 		UNPROTECT_CTX(ctx, flags);
-		fput(file);
 	}
 
 	/* copy argument back to user, if needed */
 	if (call_made && PFM_CMD_RW_ARG(cmd) && copy_to_user(arg, args_k, base_sz*count)) ret = -EFAULT;
 
 error_args:
+	if (f.file)
+		fdput(f);
+
 	kfree(args_k);
 
 	DPRINT(("cmd=%s ret=%ld\n", PFM_CMD_NAME(cmd), ret));
@@ -5002,11 +4969,11 @@ pfm_context_force_terminate(pfm_context_t *ctx, struct pt_regs *regs)
 {
 	int ret;
 
-	DPRINT(("entering for [%d]\n", current->pid));
+	DPRINT(("entering for [%d]\n", task_pid_nr(current)));
 
 	ret = pfm_context_unload(ctx, NULL, 0, regs);
 	if (ret) {
-		printk(KERN_ERR "pfm_context_force_terminate: [%d] unloaded failed with %d\n", current->pid, ret);
+		printk(KERN_ERR "pfm_context_force_terminate: [%d] unloaded failed with %d\n", task_pid_nr(current), ret);
 	}
 
 	/*
@@ -5022,12 +4989,13 @@ pfm_context_force_terminate(pfm_context_t *ctx, struct pt_regs *regs)
 }
 
 static int pfm_ovfl_notify_user(pfm_context_t *ctx, unsigned long ovfl_pmds);
+
  /*
   * pfm_handle_work() can be called with interrupts enabled
   * (TIF_NEED_RESCHED) or disabled. The down_interruptible
   * call may sleep, therefore we must re-enable interrupts
   * to avoid deadlocks. It is safe to do so because this function
-  * is called ONLY when returning to user level (PUStk=1), in which case
+  * is called ONLY when returning to user level (pUStk=1), in which case
   * there is no risk of kernel stack overflow due to deep
   * interrupt nesting.
   */
@@ -5043,7 +5011,8 @@ pfm_handle_work(void)
 
 	ctx = PFM_GET_CTX(current);
 	if (ctx == NULL) {
-		printk(KERN_ERR "perfmon: [%d] has no PFM context\n", current->pid);
+		printk(KERN_ERR "perfmon: [%d] has no PFM context\n",
+			task_pid_nr(current));
 		return;
 	}
 
@@ -5051,8 +5020,6 @@ pfm_handle_work(void)
 
 	PFM_SET_WORK_PENDING(current, 0);
 
-	pfm_clear_task_notify();
-
 	regs = task_pt_regs(current);
 
 	/*
@@ -5067,11 +5034,12 @@ pfm_handle_work(void)
 	/*
 	 * must be done before we check for simple-reset mode
 	 */
-	if (ctx->ctx_fl_going_zombie || ctx->ctx_state == PFM_CTX_ZOMBIE) goto do_zombie;
-
+	if (ctx->ctx_fl_going_zombie || ctx->ctx_state == PFM_CTX_ZOMBIE)
+		goto do_zombie;
 
 	//if (CTX_OVFL_NOBLOCK(ctx)) goto skip_blocking;
-	if (reason == PFM_TRAP_REASON_RESET) goto skip_blocking;
+	if (reason == PFM_TRAP_REASON_RESET)
+		goto skip_blocking;
 
 	/*
 	 * restore interrupt mask to what it was on entry.
@@ -5119,7 +5087,8 @@ do_zombie:
 	/*
 	 * in case of interruption of down() we don't restart anything
 	 */
-	if (ret < 0) goto nothing_to_do;
+	if (ret < 0)
+		goto nothing_to_do;
 
 skip_blocking:
 	pfm_resume_after_ovfl(ctx, ovfl_regs, regs);
@@ -5211,10 +5180,10 @@ pfm_end_notify_user(pfm_context_t *ctx)
 
 /*
  * main overflow processing routine.
- * it can be called from the interrupt path or explicitely during the context switch code
+ * it can be called from the interrupt path or explicitly during the context switch code
  */
-static void
-pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, struct pt_regs *regs)
+static void pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx,
+				unsigned long pmc0, struct pt_regs *regs)
 {
 	pfm_ovfl_arg_t *ovfl_arg;
 	unsigned long mask;
@@ -5240,7 +5209,7 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str
 	DPRINT_ovfl(("pmc0=0x%lx pid=%d iip=0x%lx, %s "
 		     "used_pmds=0x%lx\n",
 			pmc0,
-			task ? task->pid: -1,
+			task ? task_pid_nr(task): -1,
 			(regs ? regs->cr_iip : 0),
 			CTX_OVFL_NOBLOCK(ctx) ? "nonblocking" : "blocking",
 			ctx->ctx_used_pmds[0]));
@@ -5419,7 +5388,7 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str
 			 * when coming from ctxsw, current still points to the
 			 * previous task, therefore we must work with task and not current.
 			 */
-			pfm_set_task_notify(task);
+			set_notify_resume(task);
 		}
 		/*
 		 * defer until state is changed (shorten spin window). the context is locked
@@ -5429,7 +5398,7 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str
 	}
 
 	DPRINT_ovfl(("owner [%d] pending=%ld reason=%u ovfl_pmds=0x%lx ovfl_notify=0x%lx masked=%d\n",
-			GET_PMU_OWNER() ? GET_PMU_OWNER()->pid : -1,
+			GET_PMU_OWNER() ? task_pid_nr(GET_PMU_OWNER()) : -1,
 			PFM_GET_WORK_PENDING(task),
 			ctx->ctx_fl_trap_reason,
 			ovfl_pmds,
@@ -5454,7 +5423,7 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str
 sanity_check:
 	printk(KERN_ERR "perfmon: CPU%d overflow handler [%d] pmc0=0x%lx\n",
 			smp_processor_id(),
-			task ? task->pid : -1,
+			task ? task_pid_nr(task) : -1,
 			pmc0);
 	return;
 
@@ -5487,7 +5456,7 @@ stop_monitoring:
 	 *
 	 * Overall pretty hairy stuff....
 	 */
-	DPRINT(("ctx is zombie for [%d], converted to spurious\n", task ? task->pid: -1));
+	DPRINT(("ctx is zombie for [%d], converted to spurious\n", task ? task_pid_nr(task): -1));
 	pfm_clear_psr_up();
 	ia64_psr(regs)->up = 0;
 	ia64_psr(regs)->sp = 1;
@@ -5495,7 +5464,7 @@ stop_monitoring:
 }
 
 static int
-pfm_do_interrupt_handler(int irq, void *arg, struct pt_regs *regs)
+pfm_do_interrupt_handler(void *arg, struct pt_regs *regs)
 {
 	struct task_struct *task;
 	pfm_context_t *ctx;
@@ -5548,24 +5517,25 @@ pfm_do_interrupt_handler(int irq, void *arg, struct pt_regs *regs)
 
 report_spurious1:
 	printk(KERN_INFO "perfmon: spurious overflow interrupt on CPU%d: process %d has no PFM context\n",
-		this_cpu, task->pid);
+		this_cpu, task_pid_nr(task));
 	pfm_unfreeze_pmu();
 	return -1;
 report_spurious2:
 	printk(KERN_INFO "perfmon: spurious overflow interrupt on CPU%d: process %d, invalid flag\n", 
 		this_cpu, 
-		task->pid);
+		task_pid_nr(task));
 	pfm_unfreeze_pmu();
 	return -1;
 }
 
 static irqreturn_t
-pfm_interrupt_handler(int irq, void *arg, struct pt_regs *regs)
+pfm_interrupt_handler(int irq, void *arg)
 {
 	unsigned long start_cycles, total_cycles;
 	unsigned long min, max;
 	int this_cpu;
 	int ret;
+	struct pt_regs *regs = get_irq_regs();
 
 	this_cpu = get_cpu();
 	if (likely(!pfm_alt_intr_handler)) {
@@ -5574,7 +5544,7 @@ pfm_interrupt_handler(int irq, void *arg, struct pt_regs *regs)
 
 		start_cycles = ia64_get_itc();
 
-		ret = pfm_do_interrupt_handler(irq, arg, regs);
+		ret = pfm_do_interrupt_handler(arg, regs);
 
 		total_cycles = ia64_get_itc();
 
@@ -5594,7 +5564,7 @@ pfm_interrupt_handler(int irq, void *arg, struct pt_regs *regs)
 		(*pfm_alt_intr_handler->handler)(irq, arg, regs);
 	}
 
-	put_cpu_no_resched();
+	put_cpu();
 	return IRQ_HANDLED;
 }
 
@@ -5602,7 +5572,7 @@ pfm_interrupt_handler(int irq, void *arg, struct pt_regs *regs)
  * /proc/perfmon interface, for debug only
  */
 
-#define PFM_PROC_SHOW_HEADER	((void *)NR_CPUS+1)
+#define PFM_PROC_SHOW_HEADER	((void *)(long)nr_cpu_ids+1)
 
 static void *
 pfm_proc_start(struct seq_file *m, loff_t *pos)
@@ -5611,7 +5581,7 @@ pfm_proc_start(struct seq_file *m, loff_t *pos)
 		return PFM_PROC_SHOW_HEADER;
 	}
 
-	while (*pos <= NR_CPUS) {
+	while (*pos <= nr_cpu_ids) {
 		if (cpu_online(*pos - 1)) {
 			return (void *)*pos;
 		}
@@ -5671,24 +5641,8 @@ pfm_proc_show_header(struct seq_file *m)
 
 	list_for_each(pos, &pfm_buffer_fmt_list) {
 		entry = list_entry(pos, pfm_buffer_fmt_t, fmt_list);
-		seq_printf(m, "format                    : %02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x %s\n",
-			entry->fmt_uuid[0],
-			entry->fmt_uuid[1],
-			entry->fmt_uuid[2],
-			entry->fmt_uuid[3],
-			entry->fmt_uuid[4],
-			entry->fmt_uuid[5],
-			entry->fmt_uuid[6],
-			entry->fmt_uuid[7],
-			entry->fmt_uuid[8],
-			entry->fmt_uuid[9],
-			entry->fmt_uuid[10],
-			entry->fmt_uuid[11],
-			entry->fmt_uuid[12],
-			entry->fmt_uuid[13],
-			entry->fmt_uuid[14],
-			entry->fmt_uuid[15],
-			entry->fmt_name);
+		seq_printf(m, "format                    : %16phD %s\n",
+			   entry->fmt_uuid, entry->fmt_name);
 	}
 	spin_unlock(&pfm_buffer_fmt_lock);
 
@@ -5763,7 +5717,7 @@ pfm_proc_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-struct seq_operations pfm_seq_ops = {
+const struct seq_operations pfm_seq_ops = {
 	.start =	pfm_proc_start,
  	.next =		pfm_proc_next,
  	.stop =		pfm_proc_stop,
@@ -5840,7 +5794,8 @@ pfm_force_cleanup(pfm_context_t *ctx, struct pt_regs *regs)
 	ia64_psr(regs)->sp = 1;
 
 	if (GET_PMU_OWNER() == task) {
-		DPRINT(("cleared ownership for [%d]\n", ctx->ctx_task->pid));
+		DPRINT(("cleared ownership for [%d]\n",
+					task_pid_nr(ctx->ctx_task)));
 		SET_PMU_OWNER(NULL, NULL);
 	}
 
@@ -5852,7 +5807,7 @@ pfm_force_cleanup(pfm_context_t *ctx, struct pt_regs *regs)
 	task->thread.pfm_context  = NULL;
 	task->thread.flags       &= ~IA64_THREAD_PM_VALID;
 
-	DPRINT(("force cleanup for [%d]\n",  task->pid));
+	DPRINT(("force cleanup for [%d]\n",  task_pid_nr(task)));
 }
 
 
@@ -5863,14 +5818,12 @@ void
 pfm_save_regs(struct task_struct *task)
 {
 	pfm_context_t *ctx;
-	struct thread_struct *t;
 	unsigned long flags;
 	u64 psr;
 
 
 	ctx = PFM_GET_CTX(task);
 	if (ctx == NULL) return;
-	t = &task->thread;
 
 	/*
  	 * we always come here with interrupts ALREADY disabled by
@@ -5928,19 +5881,19 @@ pfm_save_regs(struct task_struct *task)
 	 * guarantee we will be schedule at that same
 	 * CPU again.
 	 */
-	pfm_save_pmds(t->pmds, ctx->ctx_used_pmds[0]);
+	pfm_save_pmds(ctx->th_pmds, ctx->ctx_used_pmds[0]);
 
 	/*
 	 * save pmc0 ia64_srlz_d() done in pfm_save_pmds()
 	 * we will need it on the restore path to check
 	 * for pending overflow.
 	 */
-	t->pmcs[0] = ia64_get_pmc(0);
+	ctx->th_pmcs[0] = ia64_get_pmc(0);
 
 	/*
 	 * unfreeze PMU if had pending overflows
 	 */
-	if (t->pmcs[0] & ~0x1UL) pfm_unfreeze_pmu();
+	if (ctx->th_pmcs[0] & ~0x1UL) pfm_unfreeze_pmu();
 
 	/*
 	 * finally, allow context access.
@@ -5985,7 +5938,6 @@ static void
 pfm_lazy_save_regs (struct task_struct *task)
 {
 	pfm_context_t *ctx;
-	struct thread_struct *t;
 	unsigned long flags;
 
 	{ u64 psr  = pfm_get_psr();
@@ -5993,7 +5945,6 @@ pfm_lazy_save_regs (struct task_struct *task)
 	}
 
 	ctx = PFM_GET_CTX(task);
-	t   = &task->thread;
 
 	/*
 	 * we need to mask PMU overflow here to
@@ -6018,19 +5969,19 @@ pfm_lazy_save_regs (struct task_struct *task)
 	/*
 	 * save all the pmds we use
 	 */
-	pfm_save_pmds(t->pmds, ctx->ctx_used_pmds[0]);
+	pfm_save_pmds(ctx->th_pmds, ctx->ctx_used_pmds[0]);
 
 	/*
 	 * save pmc0 ia64_srlz_d() done in pfm_save_pmds()
 	 * it is needed to check for pended overflow
 	 * on the restore path
 	 */
-	t->pmcs[0] = ia64_get_pmc(0);
+	ctx->th_pmcs[0] = ia64_get_pmc(0);
 
 	/*
 	 * unfreeze PMU if had pending overflows
 	 */
-	if (t->pmcs[0] & ~0x1UL) pfm_unfreeze_pmu();
+	if (ctx->th_pmcs[0] & ~0x1UL) pfm_unfreeze_pmu();
 
 	/*
 	 * now get can unmask PMU interrupts, they will
@@ -6049,7 +6000,6 @@ void
 pfm_load_regs (struct task_struct *task)
 {
 	pfm_context_t *ctx;
-	struct thread_struct *t;
 	unsigned long pmc_mask = 0UL, pmd_mask = 0UL;
 	unsigned long flags;
 	u64 psr, psr_up;
@@ -6060,11 +6010,10 @@ pfm_load_regs (struct task_struct *task)
 
 	BUG_ON(GET_PMU_OWNER());
 
-	t     = &task->thread;
 	/*
 	 * possible on unload
 	 */
-	if (unlikely((t->flags & IA64_THREAD_PM_VALID) == 0)) return;
+	if (unlikely((task->thread.flags & IA64_THREAD_PM_VALID) == 0)) return;
 
 	/*
  	 * we always come here with interrupts ALREADY disabled by
@@ -6146,26 +6095,26 @@ pfm_load_regs (struct task_struct *task)
 	 *
 	 * XXX: optimize here
 	 */
-	if (pmd_mask) pfm_restore_pmds(t->pmds, pmd_mask);
-	if (pmc_mask) pfm_restore_pmcs(t->pmcs, pmc_mask);
+	if (pmd_mask) pfm_restore_pmds(ctx->th_pmds, pmd_mask);
+	if (pmc_mask) pfm_restore_pmcs(ctx->th_pmcs, pmc_mask);
 
 	/*
 	 * check for pending overflow at the time the state
 	 * was saved.
 	 */
-	if (unlikely(PMC0_HAS_OVFL(t->pmcs[0]))) {
+	if (unlikely(PMC0_HAS_OVFL(ctx->th_pmcs[0]))) {
 		/*
 		 * reload pmc0 with the overflow information
 		 * On McKinley PMU, this will trigger a PMU interrupt
 		 */
-		ia64_set_pmc(0, t->pmcs[0]);
+		ia64_set_pmc(0, ctx->th_pmcs[0]);
 		ia64_srlz_d();
-		t->pmcs[0] = 0UL;
+		ctx->th_pmcs[0] = 0UL;
 
 		/*
 		 * will replay the PMU interrupt
 		 */
-		if (need_irq_resend) hw_resend_irq(NULL, IA64_PERFMON_VECTOR);
+		if (need_irq_resend) ia64_resend_irq(IA64_PERFMON_VECTOR);
 
 		pfm_stats[smp_processor_id()].pfm_replay_ovfl_intr_count++;
 	}
@@ -6213,7 +6162,6 @@ pfm_load_regs (struct task_struct *task)
 void
 pfm_load_regs (struct task_struct *task)
 {
-	struct thread_struct *t;
 	pfm_context_t *ctx;
 	struct task_struct *owner;
 	unsigned long pmd_mask, pmc_mask;
@@ -6222,7 +6170,6 @@ pfm_load_regs (struct task_struct *task)
 
 	owner = GET_PMU_OWNER();
 	ctx   = PFM_GET_CTX(task);
-	t     = &task->thread;
 	psr   = pfm_get_psr();
 
 	BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP));
@@ -6285,27 +6232,27 @@ pfm_load_regs (struct task_struct *task)
 	 */
 	pmc_mask = ctx->ctx_all_pmcs[0];
 
-	pfm_restore_pmds(t->pmds, pmd_mask);
-	pfm_restore_pmcs(t->pmcs, pmc_mask);
+	pfm_restore_pmds(ctx->th_pmds, pmd_mask);
+	pfm_restore_pmcs(ctx->th_pmcs, pmc_mask);
 
 	/*
 	 * check for pending overflow at the time the state
 	 * was saved.
 	 */
-	if (unlikely(PMC0_HAS_OVFL(t->pmcs[0]))) {
+	if (unlikely(PMC0_HAS_OVFL(ctx->th_pmcs[0]))) {
 		/*
 		 * reload pmc0 with the overflow information
 		 * On McKinley PMU, this will trigger a PMU interrupt
 		 */
-		ia64_set_pmc(0, t->pmcs[0]);
+		ia64_set_pmc(0, ctx->th_pmcs[0]);
 		ia64_srlz_d();
 
-		t->pmcs[0] = 0UL;
+		ctx->th_pmcs[0] = 0UL;
 
 		/*
 		 * will replay the PMU interrupt
 		 */
-		if (need_irq_resend) hw_resend_irq(NULL, IA64_PERFMON_VECTOR);
+		if (need_irq_resend) ia64_resend_irq(IA64_PERFMON_VECTOR);
 
 		pfm_stats[smp_processor_id()].pfm_replay_ovfl_intr_count++;
 	}
@@ -6375,11 +6322,11 @@ pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx)
 		 */
 		pfm_unfreeze_pmu();
 	} else {
-		pmc0 = task->thread.pmcs[0];
+		pmc0 = ctx->th_pmcs[0];
 		/*
 		 * clear whatever overflow status bits there were
 		 */
-		task->thread.pmcs[0] = 0;
+		ctx->th_pmcs[0] = 0;
 	}
 	ovfl_val = pmu_conf->ovfl_val;
 	/*
@@ -6400,11 +6347,11 @@ pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx)
 		/*
 		 * can access PMU always true in system wide mode
 		 */
-		val = pmd_val = can_access_pmu ? ia64_get_pmd(i) : task->thread.pmds[i];
+		val = pmd_val = can_access_pmu ? ia64_get_pmd(i) : ctx->th_pmds[i];
 
 		if (PMD_IS_COUNTING(i)) {
 			DPRINT(("[%d] pmd[%d] ctx_pmd=0x%lx hw_pmd=0x%lx\n",
-				task->pid,
+				task_pid_nr(task),
 				i,
 				ctx->ctx_pmds[i].val,
 				val & ovfl_val));
@@ -6426,13 +6373,13 @@ pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx)
 			 */
 			if (pmc0 & (1UL << i)) {
 				val += 1 + ovfl_val;
-				DPRINT(("[%d] pmd[%d] overflowed\n", task->pid, i));
+				DPRINT(("[%d] pmd[%d] overflowed\n", task_pid_nr(task), i));
 			}
 		}
 
-		DPRINT(("[%d] ctx_pmd[%d]=0x%lx  pmd_val=0x%lx\n", task->pid, i, val, pmd_val));
+		DPRINT(("[%d] ctx_pmd[%d]=0x%lx  pmd_val=0x%lx\n", task_pid_nr(task), i, val, pmd_val));
 
-		if (is_self) task->thread.pmds[i] = pmd_val;
+		if (is_self) ctx->th_pmds[i] = pmd_val;
 
 		ctx->ctx_pmds[i].val = val;
 	}
@@ -6440,7 +6387,6 @@ pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx)
 
 static struct irqaction perfmon_irqaction = {
 	.handler = pfm_interrupt_handler,
-	.flags   = SA_INTERRUPT,
 	.name    = "perfmon"
 };
 
@@ -6519,7 +6465,7 @@ pfm_install_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl)
 	}
 
 	/* save the current system wide pmu states */
-	ret = on_each_cpu(pfm_alt_save_pmu_state, NULL, 0, 1);
+	ret = on_each_cpu(pfm_alt_save_pmu_state, NULL, 1);
 	if (ret) {
 		DPRINT(("on_each_cpu() failed: %d\n", ret));
 		goto cleanup_reserve;
@@ -6564,7 +6510,7 @@ pfm_remove_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl)
 
 	pfm_alt_intr_handler = NULL;
 
-	ret = on_each_cpu(pfm_alt_restore_pmu_state, NULL, 0, 1);
+	ret = on_each_cpu(pfm_alt_restore_pmu_state, NULL, 1);
 	if (ret) {
 		DPRINT(("on_each_cpu() failed: %d\n", ret));
 	}
@@ -6607,7 +6553,7 @@ found:
 	return 0;
 }
 
-static struct file_operations pfm_proc_fops = {
+static const struct file_operations pfm_proc_fops = {
 	.open		= pfm_proc_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
@@ -6676,7 +6622,7 @@ pfm_init(void)
 	       ffz(pmu_conf->ovfl_val));
 
 	/* sanity check */
-	if (pmu_conf->num_pmds >= IA64_NUM_PMD_REGS || pmu_conf->num_pmcs >= IA64_NUM_PMC_REGS) {
+	if (pmu_conf->num_pmds >= PFM_NUM_PMD_REGS || pmu_conf->num_pmcs >= PFM_NUM_PMC_REGS) {
 		printk(KERN_ERR "perfmon: not enough pmc/pmd, perfmon disabled\n");
 		pmu_conf = NULL;
 		return -1;
@@ -6685,21 +6631,17 @@ pfm_init(void)
 	/*
 	 * create /proc/perfmon (mostly for debugging purposes)
 	 */
- 	perfmon_dir = create_proc_entry("perfmon", S_IRUGO, NULL);
+	perfmon_dir = proc_create("perfmon", S_IRUGO, NULL, &pfm_proc_fops);
 	if (perfmon_dir == NULL) {
 		printk(KERN_ERR "perfmon: cannot create /proc entry, perfmon disabled\n");
 		pmu_conf = NULL;
 		return -1;
 	}
-  	/*
- 	 * install customized file operations for /proc/perfmon entry
- 	 */
- 	perfmon_dir->proc_fops = &pfm_proc_fops;
 
 	/*
 	 * create /proc/sys/kernel/perfmon (for debugging purposes)
 	 */
-	pfm_sysctl_header = register_sysctl_table(pfm_sysctl_root, 0);
+	pfm_sysctl_header = register_sysctl_table(pfm_sysctl_root);
 
 	/*
 	 * initialize all our spinlocks
@@ -6751,7 +6693,6 @@ void
 dump_pmu_state(const char *from)
 {
 	struct task_struct *task;
-	struct thread_struct *t;
 	struct pt_regs *regs;
 	pfm_context_t *ctx;
 	unsigned long psr, dcr, info, flags;
@@ -6772,14 +6713,14 @@ dump_pmu_state(const char *from)
 	printk("CPU%d from %s() current [%d] iip=0x%lx %s\n", 
 		this_cpu, 
 		from, 
-		current->pid, 
+		task_pid_nr(current),
 		regs->cr_iip,
 		current->comm);
 
 	task = GET_PMU_OWNER();
 	ctx  = GET_PMU_CTX();
 
-	printk("->CPU%d owner [%d] ctx=%p\n", this_cpu, task ? task->pid : -1, ctx);
+	printk("->CPU%d owner [%d] ctx=%p\n", this_cpu, task ? task_pid_nr(task) : -1, ctx);
 
 	psr = pfm_get_psr();
 
@@ -6796,16 +6737,14 @@ dump_pmu_state(const char *from)
 	ia64_psr(regs)->up = 0;
 	ia64_psr(regs)->pp = 0;
 
-	t = &current->thread;
-
 	for (i=1; PMC_IS_LAST(i) == 0; i++) {
 		if (PMC_IS_IMPL(i) == 0) continue;
-		printk("->CPU%d pmc[%d]=0x%lx thread_pmc[%d]=0x%lx\n", this_cpu, i, ia64_get_pmc(i), i, t->pmcs[i]);
+		printk("->CPU%d pmc[%d]=0x%lx thread_pmc[%d]=0x%lx\n", this_cpu, i, ia64_get_pmc(i), i, ctx->th_pmcs[i]);
 	}
 
 	for (i=1; PMD_IS_LAST(i) == 0; i++) {
 		if (PMD_IS_IMPL(i) == 0) continue;
-		printk("->CPU%d pmd[%d]=0x%lx thread_pmd[%d]=0x%lx\n", this_cpu, i, ia64_get_pmd(i), i, t->pmds[i]);
+		printk("->CPU%d pmd[%d]=0x%lx thread_pmd[%d]=0x%lx\n", this_cpu, i, ia64_get_pmd(i), i, ctx->th_pmds[i]);
 	}
 
 	if (ctx) {
@@ -6829,7 +6768,7 @@ pfm_inherit(struct task_struct *task, struct pt_regs *regs)
 {
 	struct thread_struct *thread;
 
-	DPRINT(("perfmon: pfm_inherit clearing state for [%d]\n", task->pid));
+	DPRINT(("perfmon: pfm_inherit clearing state for [%d]\n", task_pid_nr(task)));
 
 	thread = &task->thread;
 
diff --git a/arch/ia64/kernel/perfmon_default_smpl.c b/arch/ia64/kernel/perfmon_default_smpl.c
index 344941db0a9..30c644ea44c 100644
--- a/arch/ia64/kernel/perfmon_default_smpl.c
+++ b/arch/ia64/kernel/perfmon_default_smpl.c
@@ -8,7 +8,6 @@
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/module.h>
-#include <linux/config.h>
 #include <linux/init.h>
 #include <asm/delay.h>
 #include <linux/smp.h>
@@ -25,12 +24,12 @@ MODULE_LICENSE("GPL");
 #ifdef DEFAULT_DEBUG
 #define DPRINT(a) \
 	do { \
-		if (unlikely(pfm_sysctl.debug >0)) { printk("%s.%d: CPU%d ", __FUNCTION__, __LINE__, smp_processor_id()); printk a; } \
+		if (unlikely(pfm_sysctl.debug >0)) { printk("%s.%d: CPU%d ", __func__, __LINE__, smp_processor_id()); printk a; } \
 	} while (0)
 
 #define DPRINT_ovfl(a) \
 	do { \
-		if (unlikely(pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0)) { printk("%s.%d: CPU%d ", __FUNCTION__, __LINE__, smp_processor_id()); printk a; } \
+		if (unlikely(pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0)) { printk("%s.%d: CPU%d ", __func__, __LINE__, smp_processor_id()); printk a; } \
 	} while (0)
 
 #else
@@ -45,11 +44,11 @@ default_validate(struct task_struct *task, unsigned int flags, int cpu, void *da
 	int ret = 0;
 
 	if (data == NULL) {
-		DPRINT(("[%d] no argument passed\n", task->pid));
+		DPRINT(("[%d] no argument passed\n", task_pid_nr(task)));
 		return -EINVAL;
 	}
 
-	DPRINT(("[%d] validate flags=0x%x CPU%d\n", task->pid, flags, cpu));
+	DPRINT(("[%d] validate flags=0x%x CPU%d\n", task_pid_nr(task), flags, cpu));
 
 	/*
 	 * must hold at least the buffer header + one minimally sized entry
@@ -89,7 +88,7 @@ default_init(struct task_struct *task, void *buf, unsigned int flags, int cpu, v
 	hdr->hdr_count        = 0UL;
 
 	DPRINT(("[%d] buffer=%p buf_size=%lu hdr_size=%lu hdr_version=%u cur_offs=%lu\n",
-		task->pid,
+		task_pid_nr(task),
 		buf,
 		hdr->hdr_buf_size,
 		sizeof(*hdr),
@@ -151,7 +150,7 @@ default_handler(struct task_struct *task, void *buf, pfm_ovfl_arg_t *arg, struct
 	 * current = task running at the time of the overflow.
 	 *
 	 * per-task mode:
-	 * 	- this is ususally the task being monitored.
+	 * 	- this is usually the task being monitored.
 	 * 	  Under certain conditions, it might be a different task
 	 *
 	 * system-wide:
@@ -246,7 +245,7 @@ default_restart(struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, stru
 static int
 default_exit(struct task_struct *task, void *buf, struct pt_regs *regs)
 {
-	DPRINT(("[%d] exit(%p)\n", task->pid, buf));
+	DPRINT(("[%d] exit(%p)\n", task_pid_nr(task), buf));
 	return 0;
 }
 
diff --git a/arch/ia64/kernel/perfmon_mckinley.h b/arch/ia64/kernel/perfmon_mckinley.h
index 9becccda289..c4bec7a9d18 100644
--- a/arch/ia64/kernel/perfmon_mckinley.h
+++ b/arch/ia64/kernel/perfmon_mckinley.h
@@ -181,7 +181,7 @@ static pmu_config_t pmu_conf_mck={
 	.pmc_desc      = pfm_mck_pmc_desc,
 	.num_ibrs       = 8,
 	.num_dbrs       = 8,
-	.use_rr_dbregs = 1 /* debug register are use for range retrictions */
+	.use_rr_dbregs = 1 /* debug register are use for range restrictions */
 };
 
 
diff --git a/arch/ia64/kernel/perfmon_montecito.h b/arch/ia64/kernel/perfmon_montecito.h
index cd06ac6a686..7f8da4c7ca6 100644
--- a/arch/ia64/kernel/perfmon_montecito.h
+++ b/arch/ia64/kernel/perfmon_montecito.h
@@ -45,16 +45,16 @@ static pfm_reg_desc_t pfm_mont_pmc_desc[PMU_MAX_PMCS]={
 /* pmc29 */ { PFM_REG_NOTIMPL, },
 /* pmc30 */ { PFM_REG_NOTIMPL, },
 /* pmc31 */ { PFM_REG_NOTIMPL, },
-/* pmc32 */ { PFM_REG_CONFIG,  0, 0x30f01ffffffffff, 0x30f01ffffffffff, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
-/* pmc33 */ { PFM_REG_CONFIG,  0, 0x0,  0x1ffffffffff, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
-/* pmc34 */ { PFM_REG_CONFIG,  0, 0xf01ffffffffff, 0xf01ffffffffff, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
-/* pmc35 */ { PFM_REG_CONFIG,  0, 0x0,  0x1ffffffffff, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
+/* pmc32 */ { PFM_REG_CONFIG,  0, 0x30f01ffffffffffUL, 0x30f01ffffffffffUL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
+/* pmc33 */ { PFM_REG_CONFIG,  0, 0x0,  0x1ffffffffffUL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
+/* pmc34 */ { PFM_REG_CONFIG,  0, 0xf01ffffffffffUL, 0xf01ffffffffffUL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
+/* pmc35 */ { PFM_REG_CONFIG,  0, 0x0,  0x1ffffffffffUL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
 /* pmc36 */ { PFM_REG_CONFIG,  0, 0xfffffff0, 0xf, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
 /* pmc37 */ { PFM_REG_MONITOR, 4, 0x0, 0x3fff, NULL, pfm_mont_pmc_check, {RDEP_MONT_IEAR, 0, 0, 0}, {0, 0, 0, 0}},
 /* pmc38 */ { PFM_REG_CONFIG,  0, 0xdb6, 0x2492, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
 /* pmc39 */ { PFM_REG_MONITOR, 6, 0x0, 0xffcf, NULL, pfm_mont_pmc_check, {RDEP_MONT_ETB,0, 0, 0}, {0,0, 0, 0}},
 /* pmc40 */ { PFM_REG_MONITOR, 6, 0x2000000, 0xf01cf, NULL, pfm_mont_pmc_check, {RDEP_MONT_DEAR,0, 0, 0}, {0,0, 0, 0}},
-/* pmc41 */ { PFM_REG_CONFIG,  0, 0x00002078fefefefe, 0x1e00018181818, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
+/* pmc41 */ { PFM_REG_CONFIG,  0, 0x00002078fefefefeUL, 0x1e00018181818UL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
 /* pmc42 */ { PFM_REG_MONITOR, 6, 0x0, 0x7ff4f, NULL, pfm_mont_pmc_check, {RDEP_MONT_ETB,0, 0, 0}, {0,0, 0, 0}},
 	    { PFM_REG_END    , 0, 0x0, -1, NULL, NULL, {0,}, {0,}}, /* end marker */
 };
@@ -185,7 +185,7 @@ pfm_mont_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cn
 	DPRINT(("cnum=%u val=0x%lx, using_dbreg=%d loaded=%d\n", cnum, tmpval, ctx->ctx_fl_using_dbreg, is_loaded));
 
 	if (cnum == 41 && is_loaded 
-	    && (tmpval & 0x1e00000000000) && (tmpval & 0x18181818UL) != 0x18181818UL && ctx->ctx_fl_using_dbreg == 0) {
+	    && (tmpval & 0x1e00000000000UL) && (tmpval & 0x18181818UL) != 0x18181818UL && ctx->ctx_fl_using_dbreg == 0) {
 
 		DPRINT(("pmc[%d]=0x%lx has active pmc41 settings, clearing dbr\n", cnum, tmpval));
 
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index 355d57970ba..55d4ba47a90 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -8,9 +8,6 @@
  * 2005-10-07 Keith Owens <kaos@sgi.com>
  *	      Add notify_die() hooks.
  */
-#define __KERNEL_SYSCALLS__	/* see <asm/unistd.h> */
-#include <linux/config.h>
-
 #include <linux/cpu.h>
 #include <linux/pm.h>
 #include <linux/elf.h>
@@ -18,28 +15,31 @@
 #include <linux/kallsyms.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
+#include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/notifier.h>
 #include <linux/personality.h>
 #include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/smp_lock.h>
 #include <linux/stddef.h>
 #include <linux/thread_info.h>
 #include <linux/unistd.h>
 #include <linux/efi.h>
 #include <linux/interrupt.h>
 #include <linux/delay.h>
+#include <linux/kdebug.h>
+#include <linux/utsname.h>
+#include <linux/tracehook.h>
+#include <linux/rcupdate.h>
 
 #include <asm/cpu.h>
 #include <asm/delay.h>
 #include <asm/elf.h>
-#include <asm/ia32.h>
 #include <asm/irq.h>
-#include <asm/kdebug.h>
+#include <asm/kexec.h>
 #include <asm/pgalloc.h>
 #include <asm/processor.h>
 #include <asm/sal.h>
+#include <asm/switch_to.h>
 #include <asm/tlbflush.h>
 #include <asm/uaccess.h>
 #include <asm/unwind.h>
@@ -54,10 +54,11 @@
 #include "sigframe.h"
 
 void (*ia64_mark_idle)(int);
-static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
 
-unsigned long boot_option_idle_override = 0;
+unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE;
 EXPORT_SYMBOL(boot_option_idle_override);
+void (*pm_power_off) (void);
+EXPORT_SYMBOL(pm_power_off);
 
 void
 ia64_do_show_stack (struct unw_frame_info *info, void *arg)
@@ -95,22 +96,16 @@ show_stack (struct task_struct *task, unsigned long *sp)
 }
 
 void
-dump_stack (void)
-{
-	show_stack(NULL, NULL);
-}
-
-EXPORT_SYMBOL(dump_stack);
-
-void
 show_regs (struct pt_regs *regs)
 {
 	unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri;
 
 	print_modules();
-	printk("\nPid: %d, CPU %d, comm: %20s\n", current->pid, smp_processor_id(), current->comm);
-	printk("psr : %016lx ifs : %016lx ip  : [<%016lx>]    %s\n",
-	       regs->cr_ipsr, regs->cr_ifs, ip, print_tainted());
+	printk("\n");
+	show_regs_print_info(KERN_DEFAULT);
+	printk("psr : %016lx ifs : %016lx ip  : [<%016lx>]    %s (%s)\n",
+	       regs->cr_ipsr, regs->cr_ifs, ip, print_tainted(),
+	       init_utsname()->release);
 	print_symbol("ip is at %s\n", ip);
 	printk("unat: %016lx pfs : %016lx rsc : %016lx\n",
 	       regs->ar_unat, regs->ar_pfs, regs->ar_rsc);
@@ -157,11 +152,21 @@ show_regs (struct pt_regs *regs)
 		show_stack(NULL, NULL);
 }
 
+/* local support for deprecated console_print */
 void
-do_notify_resume_user (sigset_t *oldset, struct sigscratch *scr, long in_syscall)
+console_print(const char *s)
+{
+	printk(KERN_EMERG "%s", s);
+}
+
+void
+do_notify_resume_user(sigset_t *unused, struct sigscratch *scr, long in_syscall)
 {
 	if (fsys_mode(current, &scr->pt)) {
-		/* defer signal-handling etc. until we return to privilege-level 0.  */
+		/*
+		 * defer signal-handling etc. until we return to
+		 * privilege-level 0.
+		 */
 		if (!ia64_psr(&scr->pt)->lp)
 			ia64_psr(&scr->pt)->lp = 1;
 		return;
@@ -169,50 +174,44 @@ do_notify_resume_user (sigset_t *oldset, struct sigscratch *scr, long in_syscall
 
 #ifdef CONFIG_PERFMON
 	if (current->thread.pfm_needs_checking)
+		/*
+		 * Note: pfm_handle_work() allow us to call it with interrupts
+		 * disabled, and may enable interrupts within the function.
+		 */
 		pfm_handle_work();
 #endif
 
 	/* deal with pending signal delivery */
-	if (test_thread_flag(TIF_SIGPENDING))
-		ia64_do_signal(oldset, scr, in_syscall);
-}
+	if (test_thread_flag(TIF_SIGPENDING)) {
+		local_irq_enable();	/* force interrupt enable */
+		ia64_do_signal(scr, in_syscall);
+	}
+
+	if (test_and_clear_thread_flag(TIF_NOTIFY_RESUME)) {
+		local_irq_enable();	/* force interrupt enable */
+		tracehook_notify_resume(&scr->pt);
+	}
 
-static int pal_halt        = 1;
-static int can_do_pal_halt = 1;
+	/* copy user rbs to kernel rbs */
+	if (unlikely(test_thread_flag(TIF_RESTORE_RSE))) {
+		local_irq_enable();	/* force interrupt enable */
+		ia64_sync_krbs();
+	}
+
+	local_irq_disable();	/* force interrupt disable */
+}
 
 static int __init nohalt_setup(char * str)
 {
-	pal_halt = can_do_pal_halt = 0;
+	cpu_idle_poll_ctrl(true);
 	return 1;
 }
 __setup("nohalt", nohalt_setup);
 
-void
-update_pal_halt_status(int status)
-{
-	can_do_pal_halt = pal_halt && status;
-}
-
-/*
- * We use this if we don't have any better idle routine..
- */
-void
-default_idle (void)
-{
-	local_irq_enable();
-	while (!need_resched()) {
-		if (can_do_pal_halt)
-			safe_halt();
-		else
-			cpu_relax();
-	}
-}
-
 #ifdef CONFIG_HOTPLUG_CPU
 /* We don't actually take CPU down, just spin without interrupts. */
 static inline void play_dead(void)
 {
-	extern void ia64_cpu_local_tick (void);
 	unsigned int this_cpu = smp_processor_id();
 
 	/* Ack it */
@@ -235,76 +234,29 @@ static inline void play_dead(void)
 }
 #endif /* CONFIG_HOTPLUG_CPU */
 
-void cpu_idle_wait(void)
+void arch_cpu_idle_dead(void)
 {
-	unsigned int cpu, this_cpu = get_cpu();
-	cpumask_t map;
-
-	set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
-	put_cpu();
-
-	cpus_clear(map);
-	for_each_online_cpu(cpu) {
-		per_cpu(cpu_idle_state, cpu) = 1;
-		cpu_set(cpu, map);
-	}
-
-	__get_cpu_var(cpu_idle_state) = 0;
-
-	wmb();
-	do {
-		ssleep(1);
-		for_each_online_cpu(cpu) {
-			if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu))
-				cpu_clear(cpu, map);
-		}
-		cpus_and(map, map, cpu_online_map);
-	} while (!cpus_empty(map));
+	play_dead();
 }
-EXPORT_SYMBOL_GPL(cpu_idle_wait);
 
-void __attribute__((noreturn))
-cpu_idle (void)
+void arch_cpu_idle(void)
 {
 	void (*mark_idle)(int) = ia64_mark_idle;
-  	int cpu = smp_processor_id();
-
-	/* endless idle loop with no priority at all */
-	while (1) {
-		if (can_do_pal_halt)
-			clear_thread_flag(TIF_POLLING_NRFLAG);
-		else
-			set_thread_flag(TIF_POLLING_NRFLAG);
 
-		if (!need_resched()) {
-			void (*idle)(void);
 #ifdef CONFIG_SMP
-			min_xtp();
+	min_xtp();
 #endif
-			if (__get_cpu_var(cpu_idle_state))
-				__get_cpu_var(cpu_idle_state) = 0;
-
-			rmb();
-			if (mark_idle)
-				(*mark_idle)(1);
-
-			idle = pm_idle;
-			if (!idle)
-				idle = default_idle;
-			(*idle)();
-			if (mark_idle)
-				(*mark_idle)(0);
+	rmb();
+	if (mark_idle)
+		(*mark_idle)(1);
+
+	safe_halt();
+
+	if (mark_idle)
+		(*mark_idle)(0);
 #ifdef CONFIG_SMP
-			normal_xtp();
+	normal_xtp();
 #endif
-		}
-		preempt_enable_no_resched();
-		schedule();
-		preempt_disable();
-		check_pgt_cache();
-		if (cpu_is_offline(cpu))
-			play_dead();
-	}
 }
 
 void
@@ -325,11 +277,6 @@ ia64_save_extra (struct task_struct *task)
 	if (info & PFM_CPUINFO_SYST_WIDE)
 		pfm_syst_wide_update_task(task, info, 0);
 #endif
-
-#ifdef CONFIG_IA32_SUPPORT
-	if (IS_IA32_PROCESS(task_pt_regs(task)))
-		ia32_save_state(task);
-#endif
 }
 
 void
@@ -350,11 +297,6 @@ ia64_load_extra (struct task_struct *task)
 	if (info & PFM_CPUINFO_SYST_WIDE) 
 		pfm_syst_wide_update_task(task, info, 1);
 #endif
-
-#ifdef CONFIG_IA32_SUPPORT
-	if (IS_IA32_PROCESS(task_pt_regs(task)))
-		ia32_load_state(task);
-#endif
 }
 
 /*
@@ -389,77 +331,26 @@ ia64_load_extra (struct task_struct *task)
  * so there is nothing to worry about.
  */
 int
-copy_thread (int nr, unsigned long clone_flags,
+copy_thread(unsigned long clone_flags,
 	     unsigned long user_stack_base, unsigned long user_stack_size,
-	     struct task_struct *p, struct pt_regs *regs)
+	     struct task_struct *p)
 {
-	extern char ia64_ret_from_clone, ia32_ret_from_clone;
+	extern char ia64_ret_from_clone;
 	struct switch_stack *child_stack, *stack;
 	unsigned long rbs, child_rbs, rbs_size;
 	struct pt_regs *child_ptregs;
+	struct pt_regs *regs = current_pt_regs();
 	int retval = 0;
 
-#ifdef CONFIG_SMP
-	/*
-	 * For SMP idle threads, fork_by_hand() calls do_fork with
-	 * NULL regs.
-	 */
-	if (!regs)
-		return 0;
-#endif
-
-	stack = ((struct switch_stack *) regs) - 1;
-
 	child_ptregs = (struct pt_regs *) ((unsigned long) p + IA64_STK_OFFSET) - 1;
 	child_stack = (struct switch_stack *) child_ptregs - 1;
 
-	/* copy parent's switch_stack & pt_regs to child: */
-	memcpy(child_stack, stack, sizeof(*child_ptregs) + sizeof(*child_stack));
-
 	rbs = (unsigned long) current + IA64_RBS_OFFSET;
 	child_rbs = (unsigned long) p + IA64_RBS_OFFSET;
-	rbs_size = stack->ar_bspstore - rbs;
-
-	/* copy the parent's register backing store to the child: */
-	memcpy((void *) child_rbs, (void *) rbs, rbs_size);
-
-	if (likely(user_mode(child_ptregs))) {
-		if ((clone_flags & CLONE_SETTLS) && !IS_IA32_PROCESS(regs))
-			child_ptregs->r13 = regs->r16;	/* see sys_clone2() in entry.S */
-		if (user_stack_base) {
-			child_ptregs->r12 = user_stack_base + user_stack_size - 16;
-			child_ptregs->ar_bspstore = user_stack_base;
-			child_ptregs->ar_rnat = 0;
-			child_ptregs->loadrs = 0;
-		}
-	} else {
-		/*
-		 * Note: we simply preserve the relative position of
-		 * the stack pointer here.  There is no need to
-		 * allocate a scratch area here, since that will have
-		 * been taken care of by the caller of sys_clone()
-		 * already.
-		 */
-		child_ptregs->r12 = (unsigned long) child_ptregs - 16; /* kernel sp */
-		child_ptregs->r13 = (unsigned long) p;		/* set `current' pointer */
-	}
-	child_stack->ar_bspstore = child_rbs + rbs_size;
-	if (IS_IA32_PROCESS(regs))
-		child_stack->b0 = (unsigned long) &ia32_ret_from_clone;
-	else
-		child_stack->b0 = (unsigned long) &ia64_ret_from_clone;
 
 	/* copy parts of thread_struct: */
 	p->thread.ksp = (unsigned long) child_stack - 16;
 
-	/* stop some PSR bits from being inherited.
-	 * the psr.up/psr.pp bits must be cleared on fork but inherited on execve()
-	 * therefore we must specify them explicitly here and not include them in
-	 * IA64_PSR_BITS_TO_CLEAR.
-	 */
-	child_ptregs->cr_ipsr = ((child_ptregs->cr_ipsr | IA64_PSR_BITS_TO_SET)
-				 & ~(IA64_PSR_BITS_TO_CLEAR | IA64_PSR_PP | IA64_PSR_UP));
-
 	/*
 	 * NOTE: The calling convention considers all floating point
 	 * registers in the high partition (fph) to be scratch.  Since
@@ -481,22 +372,65 @@ copy_thread (int nr, unsigned long clone_flags,
 #	define THREAD_FLAGS_TO_SET	0
 	p->thread.flags = ((current->thread.flags & ~THREAD_FLAGS_TO_CLEAR)
 			   | THREAD_FLAGS_TO_SET);
+
 	ia64_drop_fpu(p);	/* don't pick up stale state from a CPU's fph */
-#ifdef CONFIG_IA32_SUPPORT
-	/*
-	 * If we're cloning an IA32 task then save the IA32 extra
-	 * state from the current task to the new task
-	 */
-	if (IS_IA32_PROCESS(task_pt_regs(current))) {
-		ia32_save_state(p);
-		if (clone_flags & CLONE_SETTLS)
-			retval = ia32_clone_tls(p, child_ptregs);
-
-		/* Copy partially mapped page list */
-		if (!retval)
-			retval = ia32_copy_partial_page_list(p, clone_flags);
+
+	if (unlikely(p->flags & PF_KTHREAD)) {
+		if (unlikely(!user_stack_base)) {
+			/* fork_idle() called us */
+			return 0;
+		}
+		memset(child_stack, 0, sizeof(*child_ptregs) + sizeof(*child_stack));
+		child_stack->r4 = user_stack_base;	/* payload */
+		child_stack->r5 = user_stack_size;	/* argument */
+		/*
+		 * Preserve PSR bits, except for bits 32-34 and 37-45,
+		 * which we can't read.
+		 */
+		child_ptregs->cr_ipsr = ia64_getreg(_IA64_REG_PSR) | IA64_PSR_BN;
+		/* mark as valid, empty frame */
+		child_ptregs->cr_ifs = 1UL << 63;
+		child_stack->ar_fpsr = child_ptregs->ar_fpsr
+			= ia64_getreg(_IA64_REG_AR_FPSR);
+		child_stack->pr = (1 << PRED_KERNEL_STACK);
+		child_stack->ar_bspstore = child_rbs;
+		child_stack->b0 = (unsigned long) &ia64_ret_from_clone;
+
+		/* stop some PSR bits from being inherited.
+		 * the psr.up/psr.pp bits must be cleared on fork but inherited on execve()
+		 * therefore we must specify them explicitly here and not include them in
+		 * IA64_PSR_BITS_TO_CLEAR.
+		 */
+		child_ptregs->cr_ipsr = ((child_ptregs->cr_ipsr | IA64_PSR_BITS_TO_SET)
+				 & ~(IA64_PSR_BITS_TO_CLEAR | IA64_PSR_PP | IA64_PSR_UP));
+
+		return 0;
 	}
-#endif
+	stack = ((struct switch_stack *) regs) - 1;
+	/* copy parent's switch_stack & pt_regs to child: */
+	memcpy(child_stack, stack, sizeof(*child_ptregs) + sizeof(*child_stack));
+
+	/* copy the parent's register backing store to the child: */
+	rbs_size = stack->ar_bspstore - rbs;
+	memcpy((void *) child_rbs, (void *) rbs, rbs_size);
+	if (clone_flags & CLONE_SETTLS)
+		child_ptregs->r13 = regs->r16;	/* see sys_clone2() in entry.S */
+	if (user_stack_base) {
+		child_ptregs->r12 = user_stack_base + user_stack_size - 16;
+		child_ptregs->ar_bspstore = user_stack_base;
+		child_ptregs->ar_rnat = 0;
+		child_ptregs->loadrs = 0;
+	}
+	child_stack->ar_bspstore = child_rbs + rbs_size;
+	child_stack->b0 = (unsigned long) &ia64_ret_from_clone;
+
+	/* stop some PSR bits from being inherited.
+	 * the psr.up/psr.pp bits must be cleared on fork but inherited on execve()
+	 * therefore we must specify them explicitly here and not include them in
+	 * IA64_PSR_BITS_TO_CLEAR.
+	 */
+	child_ptregs->cr_ipsr = ((child_ptregs->cr_ipsr | IA64_PSR_BITS_TO_SET)
+				 & ~(IA64_PSR_BITS_TO_CLEAR | IA64_PSR_PP | IA64_PSR_UP));
 
 #ifdef CONFIG_PERFMON
 	if (current->thread.pfm_context)
@@ -508,7 +442,8 @@ copy_thread (int nr, unsigned long clone_flags,
 static void
 do_copy_task_regs (struct task_struct *task, struct unw_frame_info *info, void *arg)
 {
-	unsigned long mask, sp, nat_bits = 0, ip, ar_rnat, urbs_end, cfm;
+	unsigned long mask, sp, nat_bits = 0, ar_rnat, urbs_end, cfm;
+	unsigned long uninitialized_var(ip);	/* GCC be quiet */
 	elf_greg_t *dst = arg;
 	struct pt_regs *pt;
 	char nat;
@@ -610,21 +545,6 @@ do_dump_fpu (struct unw_frame_info *info, void *arg)
 	do_dump_task_fpu(current, info, arg);
 }
 
-int
-dump_task_regs(struct task_struct *task, elf_gregset_t *regs)
-{
-	struct unw_frame_info tcore_info;
-
-	if (current == task) {
-		unw_init_running(do_copy_regs, regs);
-	} else {
-		memset(&tcore_info, 0, sizeof(tcore_info));
-		unw_init_from_blocked_task(&tcore_info, task);
-		do_copy_task_regs(task, &tcore_info, regs);
-	}
-	return 1;
-}
-
 void
 ia64_elf_core_copy_regs (struct pt_regs *pt, elf_gregset_t dst)
 {
@@ -632,85 +552,12 @@ ia64_elf_core_copy_regs (struct pt_regs *pt, elf_gregset_t dst)
 }
 
 int
-dump_task_fpu (struct task_struct *task, elf_fpregset_t *dst)
-{
-	struct unw_frame_info tcore_info;
-
-	if (current == task) {
-		unw_init_running(do_dump_fpu, dst);
-	} else {
-		memset(&tcore_info, 0, sizeof(tcore_info));
-		unw_init_from_blocked_task(&tcore_info, task);
-		do_dump_task_fpu(task, &tcore_info, dst);
-	}
-	return 1;
-}
-
-int
 dump_fpu (struct pt_regs *pt, elf_fpregset_t dst)
 {
 	unw_init_running(do_dump_fpu, dst);
 	return 1;	/* f0-f31 are always valid so we always return 1 */
 }
 
-long
-sys_execve (char __user *filename, char __user * __user *argv, char __user * __user *envp,
-	    struct pt_regs *regs)
-{
-	char *fname;
-	int error;
-
-	fname = getname(filename);
-	error = PTR_ERR(fname);
-	if (IS_ERR(fname))
-		goto out;
-	error = do_execve(fname, argv, envp, regs);
-	putname(fname);
-out:
-	return error;
-}
-
-pid_t
-kernel_thread (int (*fn)(void *), void *arg, unsigned long flags)
-{
-	extern void start_kernel_thread (void);
-	unsigned long *helper_fptr = (unsigned long *) &start_kernel_thread;
-	struct {
-		struct switch_stack sw;
-		struct pt_regs pt;
-	} regs;
-
-	memset(&regs, 0, sizeof(regs));
-	regs.pt.cr_iip = helper_fptr[0];	/* set entry point (IP) */
-	regs.pt.r1 = helper_fptr[1];		/* set GP */
-	regs.pt.r9 = (unsigned long) fn;	/* 1st argument */
-	regs.pt.r11 = (unsigned long) arg;	/* 2nd argument */
-	/* Preserve PSR bits, except for bits 32-34 and 37-45, which we can't read.  */
-	regs.pt.cr_ipsr = ia64_getreg(_IA64_REG_PSR) | IA64_PSR_BN;
-	regs.pt.cr_ifs = 1UL << 63;		/* mark as valid, empty frame */
-	regs.sw.ar_fpsr = regs.pt.ar_fpsr = ia64_getreg(_IA64_REG_AR_FPSR);
-	regs.sw.ar_bspstore = (unsigned long) current + IA64_RBS_OFFSET;
-	regs.sw.pr = (1 << PRED_KERNEL_STACK);
-	return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs.pt, 0, NULL, NULL);
-}
-EXPORT_SYMBOL(kernel_thread);
-
-/* This gets called from kernel_thread() via ia64_invoke_thread_helper().  */
-int
-kernel_thread_helper (int (*fn)(void *), void *arg)
-{
-#ifdef CONFIG_IA32_SUPPORT
-	if (IS_IA32_PROCESS(task_pt_regs(current))) {
-		/* A kernel thread is always a 64-bit process. */
-		current->thread.map_base  = DEFAULT_MAP_BASE;
-		current->thread.task_size = DEFAULT_TASK_SIZE;
-		ia64_set_kr(IA64_KR_IO_BASE, current->thread.old_iob);
-		ia64_set_kr(IA64_KR_TSSD, current->thread.old_k1);
-	}
-#endif
-	return (*fn)(arg);
-}
-
 /*
  * Flush thread state.  This is called when a thread does an execve().
  */
@@ -720,13 +567,6 @@ flush_thread (void)
 	/* drop floating-point and debug-register state if it exists: */
 	current->thread.flags &= ~(IA64_THREAD_FPH_VALID | IA64_THREAD_DBG_VALID);
 	ia64_drop_fpu(current);
-#ifdef CONFIG_IA32_SUPPORT
-	if (IS_IA32_PROCESS(task_pt_regs(current))) {
-		ia32_drop_partial_page_list(current);
-		current->thread.task_size = IA32_PAGE_OFFSET;
-		set_fs(USER_DS);
-	}
-#endif
 }
 
 /*
@@ -747,8 +587,6 @@ exit_thread (void)
 	if (current->thread.flags & IA64_THREAD_DBG_VALID)
 		pfm_release_debug_registers(current);
 #endif
-	if (IS_IA32_PROCESS(task_pt_regs(current)))
-		ia32_drop_partial_page_list(current);
 }
 
 unsigned long
@@ -758,6 +596,9 @@ get_wchan (struct task_struct *p)
 	unsigned long ip;
 	int count = 0;
 
+	if (!p || p == current || p->state == TASK_RUNNING)
+		return 0;
+
 	/*
 	 * Note: p may not be a blocked task (it could be current or
 	 * another process running on some other CPU.  Rather than
@@ -768,6 +609,8 @@ get_wchan (struct task_struct *p)
 	 */
 	unw_init_from_blocked_task(&info, p);
 	do {
+		if (p->state == TASK_RUNNING)
+			return 0;
 		if (unw_unwind(&info) < 0)
 			return 0;
 		unw_get_ip(&info, &ip);
@@ -800,6 +643,21 @@ cpu_halt (void)
 		ia64_pal_halt(min_power_state);
 }
 
+void machine_shutdown(void)
+{
+#ifdef CONFIG_HOTPLUG_CPU
+	int cpu;
+
+	for_each_online_cpu(cpu) {
+		if (cpu != smp_processor_id())
+			cpu_down(cpu);
+	}
+#endif
+#ifdef CONFIG_KEXEC
+	kexec_disable_iosapic();
+#endif
+}
+
 void
 machine_restart (char *restart_cmd)
 {
diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
index e61e15e28d8..b7a5fffe092 100644
--- a/arch/ia64/kernel/ptrace.c
+++ b/arch/ia64/kernel/ptrace.c
@@ -3,27 +3,29 @@
  *
  * Copyright (C) 1999-2005 Hewlett-Packard Co
  *	David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 2006 Intel Co
+ *  2006-08-12	- IA64 Native Utrace implementation support added by
+ *	Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
  *
  * Derived from the x86 and Alpha versions.
  */
-#include <linux/config.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
-#include <linux/slab.h>
 #include <linux/mm.h>
 #include <linux/errno.h>
 #include <linux/ptrace.h>
-#include <linux/smp_lock.h>
 #include <linux/user.h>
 #include <linux/security.h>
 #include <linux/audit.h>
 #include <linux/signal.h>
+#include <linux/regset.h>
+#include <linux/elf.h>
+#include <linux/tracehook.h>
 
 #include <asm/pgtable.h>
 #include <asm/processor.h>
 #include <asm/ptrace_offsets.h>
 #include <asm/rse.h>
-#include <asm/system.h>
 #include <asm/uaccess.h>
 #include <asm/unwind.h>
 #ifdef CONFIG_PERFMON
@@ -548,77 +550,126 @@ ia64_sync_user_rbs (struct task_struct *child, struct switch_stack *sw,
 	return 0;
 }
 
-static inline int
-thread_matches (struct task_struct *thread, unsigned long addr)
+static long
+ia64_sync_kernel_rbs (struct task_struct *child, struct switch_stack *sw,
+		unsigned long user_rbs_start, unsigned long user_rbs_end)
 {
-	unsigned long thread_rbs_end;
-	struct pt_regs *thread_regs;
+	unsigned long addr, val;
+	long ret;
 
-	if (ptrace_check_attach(thread, 0) < 0)
-		/*
-		 * If the thread is not in an attachable state, we'll
-		 * ignore it.  The net effect is that if ADDR happens
-		 * to overlap with the portion of the thread's
-		 * register backing store that is currently residing
-		 * on the thread's kernel stack, then ptrace() may end
-		 * up accessing a stale value.  But if the thread
-		 * isn't stopped, that's a problem anyhow, so we're
-		 * doing as well as we can...
-		 */
-		return 0;
+	/* now copy word for word from user rbs to kernel rbs: */
+	for (addr = user_rbs_start; addr < user_rbs_end; addr += 8) {
+		if (access_process_vm(child, addr, &val, sizeof(val), 0)
+				!= sizeof(val))
+			return -EIO;
 
-	thread_regs = task_pt_regs(thread);
-	thread_rbs_end = ia64_get_user_rbs_end(thread, thread_regs, NULL);
-	if (!on_kernel_rbs(addr, thread_regs->ar_bspstore, thread_rbs_end))
-		return 0;
+		ret = ia64_poke(child, sw, user_rbs_end, addr, val);
+		if (ret < 0)
+			return ret;
+	}
+	return 0;
+}
+
+typedef long (*syncfunc_t)(struct task_struct *, struct switch_stack *,
+			    unsigned long, unsigned long);
+
+static void do_sync_rbs(struct unw_frame_info *info, void *arg)
+{
+	struct pt_regs *pt;
+	unsigned long urbs_end;
+	syncfunc_t fn = arg;
 
-	return 1;	/* looks like we've got a winner */
+	if (unw_unwind_to_user(info) < 0)
+		return;
+	pt = task_pt_regs(info->task);
+	urbs_end = ia64_get_user_rbs_end(info->task, pt, NULL);
+
+	fn(info->task, info->sw, pt->ar_bspstore, urbs_end);
 }
 
 /*
- * GDB apparently wants to be able to read the register-backing store
- * of any thread when attached to a given process.  If we are peeking
- * or poking an address that happens to reside in the kernel-backing
- * store of another thread, we need to attach to that thread, because
- * otherwise we end up accessing stale data.
- *
- * task_list_lock must be read-locked before calling this routine!
+ * when a thread is stopped (ptraced), debugger might change thread's user
+ * stack (change memory directly), and we must avoid the RSE stored in kernel
+ * to override user stack (user space's RSE is newer than kernel's in the
+ * case). To workaround the issue, we copy kernel RSE to user RSE before the
+ * task is stopped, so user RSE has updated data.  we then copy user RSE to
+ * kernel after the task is resummed from traced stop and kernel will use the
+ * newer RSE to return to user. TIF_RESTORE_RSE is the flag to indicate we need
+ * synchronize user RSE to kernel.
  */
-static struct task_struct *
-find_thread_for_addr (struct task_struct *child, unsigned long addr)
+void ia64_ptrace_stop(void)
 {
-	struct task_struct *p;
-	struct mm_struct *mm;
-	struct list_head *this, *next;
-	int mm_users;
+	if (test_and_set_tsk_thread_flag(current, TIF_RESTORE_RSE))
+		return;
+	set_notify_resume(current);
+	unw_init_running(do_sync_rbs, ia64_sync_user_rbs);
+}
 
-	if (!(mm = get_task_mm(child)))
-		return child;
+/*
+ * This is called to read back the register backing store.
+ */
+void ia64_sync_krbs(void)
+{
+	clear_tsk_thread_flag(current, TIF_RESTORE_RSE);
 
-	/* -1 because of our get_task_mm(): */
-	mm_users = atomic_read(&mm->mm_users) - 1;
-	if (mm_users <= 1)
-		goto out;		/* not multi-threaded */
+	unw_init_running(do_sync_rbs, ia64_sync_kernel_rbs);
+}
+
+/*
+ * After PTRACE_ATTACH, a thread's register backing store area in user
+ * space is assumed to contain correct data whenever the thread is
+ * stopped.  arch_ptrace_stop takes care of this on tracing stops.
+ * But if the child was already stopped for job control when we attach
+ * to it, then it might not ever get into ptrace_stop by the time we
+ * want to examine the user memory containing the RBS.
+ */
+void
+ptrace_attach_sync_user_rbs (struct task_struct *child)
+{
+	int stopped = 0;
+	struct unw_frame_info info;
 
 	/*
-	 * Traverse the current process' children list.  Every task that
-	 * one attaches to becomes a child.  And it is only attached children
-	 * of the debugger that are of interest (ptrace_check_attach checks
-	 * for this).
+	 * If the child is in TASK_STOPPED, we need to change that to
+	 * TASK_TRACED momentarily while we operate on it.  This ensures
+	 * that the child won't be woken up and return to user mode while
+	 * we are doing the sync.  (It can only be woken up for SIGKILL.)
 	 */
- 	list_for_each_safe(this, next, &current->children) {
-		p = list_entry(this, struct task_struct, sibling);
-		if (p->mm != mm)
-			continue;
-		if (thread_matches(p, addr)) {
-			child = p;
-			goto out;
+
+	read_lock(&tasklist_lock);
+	if (child->sighand) {
+		spin_lock_irq(&child->sighand->siglock);
+		if (child->state == TASK_STOPPED &&
+		    !test_and_set_tsk_thread_flag(child, TIF_RESTORE_RSE)) {
+			set_notify_resume(child);
+
+			child->state = TASK_TRACED;
+			stopped = 1;
 		}
+		spin_unlock_irq(&child->sighand->siglock);
 	}
+	read_unlock(&tasklist_lock);
+
+	if (!stopped)
+		return;
+
+	unw_init_from_blocked_task(&info, child);
+	do_sync_rbs(&info, ia64_sync_user_rbs);
 
-  out:
-	mmput(mm);
-	return child;
+	/*
+	 * Now move the child back into TASK_STOPPED if it should be in a
+	 * job control stop, so that SIGCONT can be used to wake it up.
+	 */
+	read_lock(&tasklist_lock);
+	if (child->sighand) {
+		spin_lock_irq(&child->sighand->siglock);
+		if (child->state == TASK_TRACED &&
+		    (child->signal->flags & SIGNAL_STOP_STOPPED)) {
+			child->state = TASK_STOPPED;
+		}
+		spin_unlock_irq(&child->sighand->siglock);
+	}
+	read_unlock(&tasklist_lock);
 }
 
 /*
@@ -664,25 +715,6 @@ ia64_sync_fph (struct task_struct *task)
 	psr->dfh = 1;
 }
 
-static int
-access_fr (struct unw_frame_info *info, int regnum, int hi,
-	   unsigned long *data, int write_access)
-{
-	struct ia64_fpreg fpval;
-	int ret;
-
-	ret = unw_get_fr(info, regnum, &fpval);
-	if (ret < 0)
-		return ret;
-
-	if (write_access) {
-		fpval.u.bits[hi] = *data;
-		ret = unw_set_fr(info, regnum, fpval);
-	} else
-		*data = fpval.u.bits[hi];
-	return ret;
-}
-
 /*
  * Change the machine-state of CHILD such that it will return via the normal
  * kernel exit-path, rather than the syscall-exit path.
@@ -704,14 +736,14 @@ convert_to_non_syscall (struct task_struct *child, struct pt_regs  *pt,
 		if ((long)((unsigned long)child + IA64_STK_OFFSET - sp)
 		    < IA64_PT_REGS_SIZE) {
 			dprintk("ptrace.%s: ran off the top of the kernel "
-				"stack\n", __FUNCTION__);
+				"stack\n", __func__);
 			return;
 		}
 		if (unw_get_pr (&prev_info, &pr) < 0) {
 			unw_get_rp(&prev_info, &ip);
 			dprintk("ptrace.%s: failed to read "
 				"predicate register (ip=0x%lx)\n",
-				__FUNCTION__, ip);
+				__func__, ip);
 			return;
 		}
 		if (unw_is_intr_frame(&info)
@@ -784,326 +816,7 @@ access_nat_bits (struct task_struct *child, struct pt_regs *pt,
 
 static int
 access_uarea (struct task_struct *child, unsigned long addr,
-	      unsigned long *data, int write_access)
-{
-	unsigned long *ptr, regnum, urbs_end, rnat_addr, cfm;
-	struct switch_stack *sw;
-	struct pt_regs *pt;
-#	define pt_reg_addr(pt, reg)	((void *)			    \
-					 ((unsigned long) (pt)		    \
-					  + offsetof(struct pt_regs, reg)))
-
-
-	pt = task_pt_regs(child);
-	sw = (struct switch_stack *) (child->thread.ksp + 16);
-
-	if ((addr & 0x7) != 0) {
-		dprintk("ptrace: unaligned register address 0x%lx\n", addr);
-		return -1;
-	}
-
-	if (addr < PT_F127 + 16) {
-		/* accessing fph */
-		if (write_access)
-			ia64_sync_fph(child);
-		else
-			ia64_flush_fph(child);
-		ptr = (unsigned long *)
-			((unsigned long) &child->thread.fph + addr);
-	} else if ((addr >= PT_F10) && (addr < PT_F11 + 16)) {
-		/* scratch registers untouched by kernel (saved in pt_regs) */
-		ptr = pt_reg_addr(pt, f10) + (addr - PT_F10);
-	} else if (addr >= PT_F12 && addr < PT_F15 + 16) {
-		/*
-		 * Scratch registers untouched by kernel (saved in
-		 * switch_stack).
-		 */
-		ptr = (unsigned long *) ((long) sw
-					 + (addr - PT_NAT_BITS - 32));
-	} else if (addr < PT_AR_LC + 8) {
-		/* preserved state: */
-		struct unw_frame_info info;
-		char nat = 0;
-		int ret;
-
-		unw_init_from_blocked_task(&info, child);
-		if (unw_unwind_to_user(&info) < 0)
-			return -1;
-
-		switch (addr) {
-		      case PT_NAT_BITS:
-			return access_nat_bits(child, pt, &info,
-					       data, write_access);
-
-		      case PT_R4: case PT_R5: case PT_R6: case PT_R7:
-			if (write_access) {
-				/* read NaT bit first: */
-				unsigned long dummy;
-
-				ret = unw_get_gr(&info, (addr - PT_R4)/8 + 4,
-						 &dummy, &nat);
-				if (ret < 0)
-					return ret;
-			}
-			return unw_access_gr(&info, (addr - PT_R4)/8 + 4, data,
-					     &nat, write_access);
-
-		      case PT_B1: case PT_B2: case PT_B3:
-		      case PT_B4: case PT_B5:
-			return unw_access_br(&info, (addr - PT_B1)/8 + 1, data,
-					     write_access);
-
-		      case PT_AR_EC:
-			return unw_access_ar(&info, UNW_AR_EC, data,
-					     write_access);
-
-		      case PT_AR_LC:
-			return unw_access_ar(&info, UNW_AR_LC, data,
-					     write_access);
-
-		      default:
-			if (addr >= PT_F2 && addr < PT_F5 + 16)
-				return access_fr(&info, (addr - PT_F2)/16 + 2,
-						 (addr & 8) != 0, data,
-						 write_access);
-			else if (addr >= PT_F16 && addr < PT_F31 + 16)
-				return access_fr(&info,
-						 (addr - PT_F16)/16 + 16,
-						 (addr & 8) != 0,
-						 data, write_access);
-			else {
-				dprintk("ptrace: rejecting access to register "
-					"address 0x%lx\n", addr);
-				return -1;
-			}
-		}
-	} else if (addr < PT_F9+16) {
-		/* scratch state */
-		switch (addr) {
-		      case PT_AR_BSP:
-			/*
-			 * By convention, we use PT_AR_BSP to refer to
-			 * the end of the user-level backing store.
-			 * Use ia64_rse_skip_regs(PT_AR_BSP, -CFM.sof)
-			 * to get the real value of ar.bsp at the time
-			 * the kernel was entered.
-			 *
-			 * Furthermore, when changing the contents of
-			 * PT_AR_BSP (or PT_CFM) we MUST copy any
-			 * users-level stacked registers that are
-			 * stored on the kernel stack back to
-			 * user-space because otherwise, we might end
-			 * up clobbering kernel stacked registers.
-			 * Also, if this happens while the task is
-			 * blocked in a system call, which convert the
-			 * state such that the non-system-call exit
-			 * path is used.  This ensures that the proper
-			 * state will be picked up when resuming
-			 * execution.  However, it *also* means that
-			 * once we write PT_AR_BSP/PT_CFM, it won't be
-			 * possible to modify the syscall arguments of
-			 * the pending system call any longer.  This
-			 * shouldn't be an issue because modifying
-			 * PT_AR_BSP/PT_CFM generally implies that
-			 * we're either abandoning the pending system
-			 * call or that we defer it's re-execution
-			 * (e.g., due to GDB doing an inferior
-			 * function call).
-			 */
-			urbs_end = ia64_get_user_rbs_end(child, pt, &cfm);
-			if (write_access) {
-				if (*data != urbs_end) {
-					if (ia64_sync_user_rbs(child, sw,
-							       pt->ar_bspstore,
-							       urbs_end) < 0)
-						return -1;
-					if (in_syscall(pt))
-						convert_to_non_syscall(child,
-								       pt,
-								       cfm);
-					/*
-					 * Simulate user-level write
-					 * of ar.bsp:
-					 */
-					pt->loadrs = 0;
-					pt->ar_bspstore = *data;
-				}
-			} else
-				*data = urbs_end;
-			return 0;
-
-		      case PT_CFM:
-			urbs_end = ia64_get_user_rbs_end(child, pt, &cfm);
-			if (write_access) {
-				if (((cfm ^ *data) & PFM_MASK) != 0) {
-					if (ia64_sync_user_rbs(child, sw,
-							       pt->ar_bspstore,
-							       urbs_end) < 0)
-						return -1;
-					if (in_syscall(pt))
-						convert_to_non_syscall(child,
-								       pt,
-								       cfm);
-					pt->cr_ifs = ((pt->cr_ifs & ~PFM_MASK)
-						      | (*data & PFM_MASK));
-				}
-			} else
-				*data = cfm;
-			return 0;
-
-		      case PT_CR_IPSR:
-			if (write_access)
-				pt->cr_ipsr = ((*data & IPSR_MASK)
-					       | (pt->cr_ipsr & ~IPSR_MASK));
-			else
-				*data = (pt->cr_ipsr & IPSR_MASK);
-			return 0;
-
-		      case PT_AR_RSC:
-			if (write_access)
-				pt->ar_rsc = *data | (3 << 2); /* force PL3 */
-			else
-				*data = pt->ar_rsc;
-			return 0;
-
-		      case PT_AR_RNAT:
-			urbs_end = ia64_get_user_rbs_end(child, pt, NULL);
-			rnat_addr = (long) ia64_rse_rnat_addr((long *)
-							      urbs_end);
-			if (write_access)
-				return ia64_poke(child, sw, urbs_end,
-						 rnat_addr, *data);
-			else
-				return ia64_peek(child, sw, urbs_end,
-						 rnat_addr, data);
-
-		      case PT_R1:
-			ptr = pt_reg_addr(pt, r1);
-			break;
-		      case PT_R2:  case PT_R3:
-			ptr = pt_reg_addr(pt, r2) + (addr - PT_R2);
-			break;
-		      case PT_R8:  case PT_R9:  case PT_R10: case PT_R11:
-			ptr = pt_reg_addr(pt, r8) + (addr - PT_R8);
-			break;
-		      case PT_R12: case PT_R13:
-			ptr = pt_reg_addr(pt, r12) + (addr - PT_R12);
-			break;
-		      case PT_R14:
-			ptr = pt_reg_addr(pt, r14);
-			break;
-		      case PT_R15:
-			ptr = pt_reg_addr(pt, r15);
-			break;
-		      case PT_R16: case PT_R17: case PT_R18: case PT_R19:
-		      case PT_R20: case PT_R21: case PT_R22: case PT_R23:
-		      case PT_R24: case PT_R25: case PT_R26: case PT_R27:
-		      case PT_R28: case PT_R29: case PT_R30: case PT_R31:
-			ptr = pt_reg_addr(pt, r16) + (addr - PT_R16);
-			break;
-		      case PT_B0:
-			ptr = pt_reg_addr(pt, b0);
-			break;
-		      case PT_B6:
-			ptr = pt_reg_addr(pt, b6);
-			break;
-		      case PT_B7:
-			ptr = pt_reg_addr(pt, b7);
-			break;
-		      case PT_F6:  case PT_F6+8: case PT_F7: case PT_F7+8:
-		      case PT_F8:  case PT_F8+8: case PT_F9: case PT_F9+8:
-			ptr = pt_reg_addr(pt, f6) + (addr - PT_F6);
-			break;
-		      case PT_AR_BSPSTORE:
-			ptr = pt_reg_addr(pt, ar_bspstore);
-			break;
-		      case PT_AR_UNAT:
-			ptr = pt_reg_addr(pt, ar_unat);
-			break;
-		      case PT_AR_PFS:
-			ptr = pt_reg_addr(pt, ar_pfs);
-			break;
-		      case PT_AR_CCV:
-			ptr = pt_reg_addr(pt, ar_ccv);
-			break;
-		      case PT_AR_FPSR:
-			ptr = pt_reg_addr(pt, ar_fpsr);
-			break;
-		      case PT_CR_IIP:
-			ptr = pt_reg_addr(pt, cr_iip);
-			break;
-		      case PT_PR:
-			ptr = pt_reg_addr(pt, pr);
-			break;
-			/* scratch register */
-
-		      default:
-			/* disallow accessing anything else... */
-			dprintk("ptrace: rejecting access to register "
-				"address 0x%lx\n", addr);
-			return -1;
-		}
-	} else if (addr <= PT_AR_SSD) {
-		ptr = pt_reg_addr(pt, ar_csd) + (addr - PT_AR_CSD);
-	} else {
-		/* access debug registers */
-
-		if (addr >= PT_IBR) {
-			regnum = (addr - PT_IBR) >> 3;
-			ptr = &child->thread.ibr[0];
-		} else {
-			regnum = (addr - PT_DBR) >> 3;
-			ptr = &child->thread.dbr[0];
-		}
-
-		if (regnum >= 8) {
-			dprintk("ptrace: rejecting access to register "
-				"address 0x%lx\n", addr);
-			return -1;
-		}
-#ifdef CONFIG_PERFMON
-		/*
-		 * Check if debug registers are used by perfmon. This
-		 * test must be done once we know that we can do the
-		 * operation, i.e. the arguments are all valid, but
-		 * before we start modifying the state.
-		 *
-		 * Perfmon needs to keep a count of how many processes
-		 * are trying to modify the debug registers for system
-		 * wide monitoring sessions.
-		 *
-		 * We also include read access here, because they may
-		 * cause the PMU-installed debug register state
-		 * (dbr[], ibr[]) to be reset. The two arrays are also
-		 * used by perfmon, but we do not use
-		 * IA64_THREAD_DBG_VALID. The registers are restored
-		 * by the PMU context switch code.
-		 */
-		if (pfm_use_debug_registers(child)) return -1;
-#endif
-
-		if (!(child->thread.flags & IA64_THREAD_DBG_VALID)) {
-			child->thread.flags |= IA64_THREAD_DBG_VALID;
-			memset(child->thread.dbr, 0,
-			       sizeof(child->thread.dbr));
-			memset(child->thread.ibr, 0,
-			       sizeof(child->thread.ibr));
-		}
-
-		ptr += regnum;
-
-		if ((regnum & 1) && write_access) {
-			/* don't let the user set kernel-level breakpoints: */
-			*ptr = *data & ~(7UL << 56);
-			return 0;
-		}
-	}
-	if (write_access)
-		*ptr = *data;
-	else
-		*data = *ptr;
-	return 0;
-}
+	      unsigned long *data, int write_access);
 
 static long
 ptrace_getregs (struct task_struct *child, struct pt_all_user_regs __user *ppr)
@@ -1395,277 +1108,1087 @@ ptrace_setregs (struct task_struct *child, struct pt_all_user_regs __user *ppr)
 	return ret;
 }
 
-/*
- * Called by kernel/ptrace.c when detaching..
- *
- * Make sure the single step bit is not set.
- */
 void
-ptrace_disable (struct task_struct *child)
+user_enable_single_step (struct task_struct *child)
 {
 	struct ia64_psr *child_psr = ia64_psr(task_pt_regs(child));
 
-	/* make sure the single step/taken-branch trap bits are not set: */
-	child_psr->ss = 0;
-	child_psr->tb = 0;
+	set_tsk_thread_flag(child, TIF_SINGLESTEP);
+	child_psr->ss = 1;
 }
 
-asmlinkage long
-sys_ptrace (long request, pid_t pid, unsigned long addr, unsigned long data)
+void
+user_enable_block_step (struct task_struct *child)
 {
-	struct pt_regs *pt;
-	unsigned long urbs_end, peek_or_poke;
-	struct task_struct *child;
-	struct switch_stack *sw;
-	long ret;
+	struct ia64_psr *child_psr = ia64_psr(task_pt_regs(child));
 
-	lock_kernel();
-	ret = -EPERM;
-	if (request == PTRACE_TRACEME) {
-		ret = ptrace_traceme();
-		goto out;
-	}
+	set_tsk_thread_flag(child, TIF_SINGLESTEP);
+	child_psr->tb = 1;
+}
 
-	peek_or_poke = (request == PTRACE_PEEKTEXT
-			|| request == PTRACE_PEEKDATA
-			|| request == PTRACE_POKETEXT
-			|| request == PTRACE_POKEDATA);
-	ret = -ESRCH;
-	read_lock(&tasklist_lock);
-	{
-		child = find_task_by_pid(pid);
-		if (child) {
-			if (peek_or_poke)
-				child = find_thread_for_addr(child, addr);
-			get_task_struct(child);
-		}
-	}
-	read_unlock(&tasklist_lock);
-	if (!child)
-		goto out;
-	ret = -EPERM;
-	if (pid == 1)		/* no messing around with init! */
-		goto out_tsk;
-
-	if (request == PTRACE_ATTACH) {
-		ret = ptrace_attach(child);
-		goto out_tsk;
-	}
+void
+user_disable_single_step (struct task_struct *child)
+{
+	struct ia64_psr *child_psr = ia64_psr(task_pt_regs(child));
 
-	ret = ptrace_check_attach(child, request == PTRACE_KILL);
-	if (ret < 0)
-		goto out_tsk;
+	/* make sure the single step/taken-branch trap bits are not set: */
+	clear_tsk_thread_flag(child, TIF_SINGLESTEP);
+	child_psr->ss = 0;
+	child_psr->tb = 0;
+}
 
-	pt = task_pt_regs(child);
-	sw = (struct switch_stack *) (child->thread.ksp + 16);
+/*
+ * Called by kernel/ptrace.c when detaching..
+ *
+ * Make sure the single step bit is not set.
+ */
+void
+ptrace_disable (struct task_struct *child)
+{
+	user_disable_single_step(child);
+}
 
+long
+arch_ptrace (struct task_struct *child, long request,
+	     unsigned long addr, unsigned long data)
+{
 	switch (request) {
-	      case PTRACE_PEEKTEXT:
-	      case PTRACE_PEEKDATA:
+	case PTRACE_PEEKTEXT:
+	case PTRACE_PEEKDATA:
 		/* read word at location addr */
-		urbs_end = ia64_get_user_rbs_end(child, pt, NULL);
-		ret = ia64_peek(child, sw, urbs_end, addr, &data);
-		if (ret == 0) {
-			ret = data;
-			/* ensure "ret" is not mistaken as an error code: */
-			force_successful_syscall_return();
-		}
-		goto out_tsk;
+		if (access_process_vm(child, addr, &data, sizeof(data), 0)
+		    != sizeof(data))
+			return -EIO;
+		/* ensure return value is not mistaken for error code */
+		force_successful_syscall_return();
+		return data;
 
-	      case PTRACE_POKETEXT:
-	      case PTRACE_POKEDATA:
-		/* write the word at location addr */
-		urbs_end = ia64_get_user_rbs_end(child, pt, NULL);
-		ret = ia64_poke(child, sw, urbs_end, addr, data);
-		goto out_tsk;
+	/* PTRACE_POKETEXT and PTRACE_POKEDATA is handled
+	 * by the generic ptrace_request().
+	 */
 
-	      case PTRACE_PEEKUSR:
+	case PTRACE_PEEKUSR:
 		/* read the word at addr in the USER area */
-		if (access_uarea(child, addr, &data, 0) < 0) {
-			ret = -EIO;
-			goto out_tsk;
-		}
-		ret = data;
-		/* ensure "ret" is not mistaken as an error code */
+		if (access_uarea(child, addr, &data, 0) < 0)
+			return -EIO;
+		/* ensure return value is not mistaken for error code */
 		force_successful_syscall_return();
-		goto out_tsk;
+		return data;
 
-	      case PTRACE_POKEUSR:
+	case PTRACE_POKEUSR:
 		/* write the word at addr in the USER area */
-		if (access_uarea(child, addr, &data, 1) < 0) {
-			ret = -EIO;
-			goto out_tsk;
-		}
-		ret = 0;
-		goto out_tsk;
+		if (access_uarea(child, addr, &data, 1) < 0)
+			return -EIO;
+		return 0;
 
-	      case PTRACE_OLD_GETSIGINFO:
+	case PTRACE_OLD_GETSIGINFO:
 		/* for backwards-compatibility */
-		ret = ptrace_request(child, PTRACE_GETSIGINFO, addr, data);
-		goto out_tsk;
+		return ptrace_request(child, PTRACE_GETSIGINFO, addr, data);
 
-	      case PTRACE_OLD_SETSIGINFO:
+	case PTRACE_OLD_SETSIGINFO:
 		/* for backwards-compatibility */
-		ret = ptrace_request(child, PTRACE_SETSIGINFO, addr, data);
-		goto out_tsk;
-
-	      case PTRACE_SYSCALL:
-		/* continue and stop at next (return from) syscall */
-	      case PTRACE_CONT:
-		/* restart after signal. */
-		ret = -EIO;
-		if (!valid_signal(data))
-			goto out_tsk;
-		if (request == PTRACE_SYSCALL)
-			set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-		else
-			clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-		child->exit_code = data;
+		return ptrace_request(child, PTRACE_SETSIGINFO, addr, data);
 
-		/*
-		 * Make sure the single step/taken-branch trap bits
-		 * are not set:
-		 */
-		ia64_psr(pt)->ss = 0;
-		ia64_psr(pt)->tb = 0;
+	case PTRACE_GETREGS:
+		return ptrace_getregs(child,
+				      (struct pt_all_user_regs __user *) data);
 
-		wake_up_process(child);
-		ret = 0;
-		goto out_tsk;
+	case PTRACE_SETREGS:
+		return ptrace_setregs(child,
+				      (struct pt_all_user_regs __user *) data);
 
-	      case PTRACE_KILL:
-		/*
-		 * Make the child exit.  Best I can do is send it a
-		 * sigkill.  Perhaps it should be put in the status
-		 * that it wants to exit.
-		 */
-		if (child->exit_state == EXIT_ZOMBIE)
-			/* already dead */
-			goto out_tsk;
-		child->exit_code = SIGKILL;
-
-		ptrace_disable(child);
-		wake_up_process(child);
-		ret = 0;
-		goto out_tsk;
-
-	      case PTRACE_SINGLESTEP:
-		/* let child execute for one instruction */
-	      case PTRACE_SINGLEBLOCK:
-		ret = -EIO;
-		if (!valid_signal(data))
-			goto out_tsk;
-
-		clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-		if (request == PTRACE_SINGLESTEP) {
-			ia64_psr(pt)->ss = 1;
-		} else {
-			ia64_psr(pt)->tb = 1;
+	default:
+		return ptrace_request(child, request, addr, data);
+	}
+}
+
+
+/* "asmlinkage" so the input arguments are preserved... */
+
+asmlinkage long
+syscall_trace_enter (long arg0, long arg1, long arg2, long arg3,
+		     long arg4, long arg5, long arg6, long arg7,
+		     struct pt_regs regs)
+{
+	if (test_thread_flag(TIF_SYSCALL_TRACE))
+		if (tracehook_report_syscall_entry(&regs))
+			return -ENOSYS;
+
+	/* copy user rbs to kernel rbs */
+	if (test_thread_flag(TIF_RESTORE_RSE))
+		ia64_sync_krbs();
+
+
+	audit_syscall_entry(AUDIT_ARCH_IA64, regs.r15, arg0, arg1, arg2, arg3);
+
+	return 0;
+}
+
+/* "asmlinkage" so the input arguments are preserved... */
+
+asmlinkage void
+syscall_trace_leave (long arg0, long arg1, long arg2, long arg3,
+		     long arg4, long arg5, long arg6, long arg7,
+		     struct pt_regs regs)
+{
+	int step;
+
+	audit_syscall_exit(&regs);
+
+	step = test_thread_flag(TIF_SINGLESTEP);
+	if (step || test_thread_flag(TIF_SYSCALL_TRACE))
+		tracehook_report_syscall_exit(&regs, step);
+
+	/* copy user rbs to kernel rbs */
+	if (test_thread_flag(TIF_RESTORE_RSE))
+		ia64_sync_krbs();
+}
+
+/* Utrace implementation starts here */
+struct regset_get {
+	void *kbuf;
+	void __user *ubuf;
+};
+
+struct regset_set {
+	const void *kbuf;
+	const void __user *ubuf;
+};
+
+struct regset_getset {
+	struct task_struct *target;
+	const struct user_regset *regset;
+	union {
+		struct regset_get get;
+		struct regset_set set;
+	} u;
+	unsigned int pos;
+	unsigned int count;
+	int ret;
+};
+
+static int
+access_elf_gpreg(struct task_struct *target, struct unw_frame_info *info,
+		unsigned long addr, unsigned long *data, int write_access)
+{
+	struct pt_regs *pt;
+	unsigned long *ptr = NULL;
+	int ret;
+	char nat = 0;
+
+	pt = task_pt_regs(target);
+	switch (addr) {
+	case ELF_GR_OFFSET(1):
+		ptr = &pt->r1;
+		break;
+	case ELF_GR_OFFSET(2):
+	case ELF_GR_OFFSET(3):
+		ptr = (void *)&pt->r2 + (addr - ELF_GR_OFFSET(2));
+		break;
+	case ELF_GR_OFFSET(4) ... ELF_GR_OFFSET(7):
+		if (write_access) {
+			/* read NaT bit first: */
+			unsigned long dummy;
+
+			ret = unw_get_gr(info, addr/8, &dummy, &nat);
+			if (ret < 0)
+				return ret;
 		}
-		child->exit_code = data;
-
-		/* give it a chance to run. */
-		wake_up_process(child);
-		ret = 0;
-		goto out_tsk;
-
-	      case PTRACE_DETACH:
-		/* detach a process that was attached. */
-		ret = ptrace_detach(child, data);
-		goto out_tsk;
-
-	      case PTRACE_GETREGS:
-		ret = ptrace_getregs(child,
-				     (struct pt_all_user_regs __user *) data);
-		goto out_tsk;
-
-	      case PTRACE_SETREGS:
-		ret = ptrace_setregs(child,
-				     (struct pt_all_user_regs __user *) data);
-		goto out_tsk;
-
-	      default:
-		ret = ptrace_request(child, request, addr, data);
-		goto out_tsk;
+		return unw_access_gr(info, addr/8, data, &nat, write_access);
+	case ELF_GR_OFFSET(8) ... ELF_GR_OFFSET(11):
+		ptr = (void *)&pt->r8 + addr - ELF_GR_OFFSET(8);
+		break;
+	case ELF_GR_OFFSET(12):
+	case ELF_GR_OFFSET(13):
+		ptr = (void *)&pt->r12 + addr - ELF_GR_OFFSET(12);
+		break;
+	case ELF_GR_OFFSET(14):
+		ptr = &pt->r14;
+		break;
+	case ELF_GR_OFFSET(15):
+		ptr = &pt->r15;
 	}
-  out_tsk:
-	put_task_struct(child);
-  out:
-	unlock_kernel();
-	return ret;
+	if (write_access)
+		*ptr = *data;
+	else
+		*data = *ptr;
+	return 0;
 }
 
+static int
+access_elf_breg(struct task_struct *target, struct unw_frame_info *info,
+		unsigned long addr, unsigned long *data, int write_access)
+{
+	struct pt_regs *pt;
+	unsigned long *ptr = NULL;
+
+	pt = task_pt_regs(target);
+	switch (addr) {
+	case ELF_BR_OFFSET(0):
+		ptr = &pt->b0;
+		break;
+	case ELF_BR_OFFSET(1) ... ELF_BR_OFFSET(5):
+		return unw_access_br(info, (addr - ELF_BR_OFFSET(0))/8,
+				     data, write_access);
+	case ELF_BR_OFFSET(6):
+		ptr = &pt->b6;
+		break;
+	case ELF_BR_OFFSET(7):
+		ptr = &pt->b7;
+	}
+	if (write_access)
+		*ptr = *data;
+	else
+		*data = *ptr;
+	return 0;
+}
 
-void
-syscall_trace (void)
+static int
+access_elf_areg(struct task_struct *target, struct unw_frame_info *info,
+		unsigned long addr, unsigned long *data, int write_access)
 {
-	if (!test_thread_flag(TIF_SYSCALL_TRACE))
-		return;
-	if (!(current->ptrace & PT_PTRACED))
+	struct pt_regs *pt;
+	unsigned long cfm, urbs_end;
+	unsigned long *ptr = NULL;
+
+	pt = task_pt_regs(target);
+	if (addr >= ELF_AR_RSC_OFFSET && addr <= ELF_AR_SSD_OFFSET) {
+		switch (addr) {
+		case ELF_AR_RSC_OFFSET:
+			/* force PL3 */
+			if (write_access)
+				pt->ar_rsc = *data | (3 << 2);
+			else
+				*data = pt->ar_rsc;
+			return 0;
+		case ELF_AR_BSP_OFFSET:
+			/*
+			 * By convention, we use PT_AR_BSP to refer to
+			 * the end of the user-level backing store.
+			 * Use ia64_rse_skip_regs(PT_AR_BSP, -CFM.sof)
+			 * to get the real value of ar.bsp at the time
+			 * the kernel was entered.
+			 *
+			 * Furthermore, when changing the contents of
+			 * PT_AR_BSP (or PT_CFM) while the task is
+			 * blocked in a system call, convert the state
+			 * so that the non-system-call exit
+			 * path is used.  This ensures that the proper
+			 * state will be picked up when resuming
+			 * execution.  However, it *also* means that
+			 * once we write PT_AR_BSP/PT_CFM, it won't be
+			 * possible to modify the syscall arguments of
+			 * the pending system call any longer.  This
+			 * shouldn't be an issue because modifying
+			 * PT_AR_BSP/PT_CFM generally implies that
+			 * we're either abandoning the pending system
+			 * call or that we defer it's re-execution
+			 * (e.g., due to GDB doing an inferior
+			 * function call).
+			 */
+			urbs_end = ia64_get_user_rbs_end(target, pt, &cfm);
+			if (write_access) {
+				if (*data != urbs_end) {
+					if (in_syscall(pt))
+						convert_to_non_syscall(target,
+								       pt,
+								       cfm);
+					/*
+					 * Simulate user-level write
+					 * of ar.bsp:
+					 */
+					pt->loadrs = 0;
+					pt->ar_bspstore = *data;
+				}
+			} else
+				*data = urbs_end;
+			return 0;
+		case ELF_AR_BSPSTORE_OFFSET:
+			ptr = &pt->ar_bspstore;
+			break;
+		case ELF_AR_RNAT_OFFSET:
+			ptr = &pt->ar_rnat;
+			break;
+		case ELF_AR_CCV_OFFSET:
+			ptr = &pt->ar_ccv;
+			break;
+		case ELF_AR_UNAT_OFFSET:
+			ptr = &pt->ar_unat;
+			break;
+		case ELF_AR_FPSR_OFFSET:
+			ptr = &pt->ar_fpsr;
+			break;
+		case ELF_AR_PFS_OFFSET:
+			ptr = &pt->ar_pfs;
+			break;
+		case ELF_AR_LC_OFFSET:
+			return unw_access_ar(info, UNW_AR_LC, data,
+					     write_access);
+		case ELF_AR_EC_OFFSET:
+			return unw_access_ar(info, UNW_AR_EC, data,
+					     write_access);
+		case ELF_AR_CSD_OFFSET:
+			ptr = &pt->ar_csd;
+			break;
+		case ELF_AR_SSD_OFFSET:
+			ptr = &pt->ar_ssd;
+		}
+	} else if (addr >= ELF_CR_IIP_OFFSET && addr <= ELF_CR_IPSR_OFFSET) {
+		switch (addr) {
+		case ELF_CR_IIP_OFFSET:
+			ptr = &pt->cr_iip;
+			break;
+		case ELF_CFM_OFFSET:
+			urbs_end = ia64_get_user_rbs_end(target, pt, &cfm);
+			if (write_access) {
+				if (((cfm ^ *data) & PFM_MASK) != 0) {
+					if (in_syscall(pt))
+						convert_to_non_syscall(target,
+								       pt,
+								       cfm);
+					pt->cr_ifs = ((pt->cr_ifs & ~PFM_MASK)
+						      | (*data & PFM_MASK));
+				}
+			} else
+				*data = cfm;
+			return 0;
+		case ELF_CR_IPSR_OFFSET:
+			if (write_access) {
+				unsigned long tmp = *data;
+				/* psr.ri==3 is a reserved value: SDM 2:25 */
+				if ((tmp & IA64_PSR_RI) == IA64_PSR_RI)
+					tmp &= ~IA64_PSR_RI;
+				pt->cr_ipsr = ((tmp & IPSR_MASK)
+					       | (pt->cr_ipsr & ~IPSR_MASK));
+			} else
+				*data = (pt->cr_ipsr & IPSR_MASK);
+			return 0;
+		}
+	} else if (addr == ELF_NAT_OFFSET)
+		return access_nat_bits(target, pt, info,
+				       data, write_access);
+	else if (addr == ELF_PR_OFFSET)
+		ptr = &pt->pr;
+	else
+		return -1;
+
+	if (write_access)
+		*ptr = *data;
+	else
+		*data = *ptr;
+
+	return 0;
+}
+
+static int
+access_elf_reg(struct task_struct *target, struct unw_frame_info *info,
+		unsigned long addr, unsigned long *data, int write_access)
+{
+	if (addr >= ELF_GR_OFFSET(1) && addr <= ELF_GR_OFFSET(15))
+		return access_elf_gpreg(target, info, addr, data, write_access);
+	else if (addr >= ELF_BR_OFFSET(0) && addr <= ELF_BR_OFFSET(7))
+		return access_elf_breg(target, info, addr, data, write_access);
+	else
+		return access_elf_areg(target, info, addr, data, write_access);
+}
+
+void do_gpregs_get(struct unw_frame_info *info, void *arg)
+{
+	struct pt_regs *pt;
+	struct regset_getset *dst = arg;
+	elf_greg_t tmp[16];
+	unsigned int i, index, min_copy;
+
+	if (unw_unwind_to_user(info) < 0)
 		return;
+
 	/*
-	 * The 0x80 provides a way for the tracing parent to
-	 * distinguish between a syscall stop and SIGTRAP delivery.
+	 * coredump format:
+	 *      r0-r31
+	 *      NaT bits (for r0-r31; bit N == 1 iff rN is a NaT)
+	 *      predicate registers (p0-p63)
+	 *      b0-b7
+	 *      ip cfm user-mask
+	 *      ar.rsc ar.bsp ar.bspstore ar.rnat
+	 *      ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec
 	 */
-	ptrace_notify(SIGTRAP
-		      | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80 : 0));
 
-	/*
-	 * This isn't the same as continuing with a signal, but it
-	 * will do for normal use.  strace only continues with a
-	 * signal if the stopping signal is not SIGTRAP.  -brl
+
+	/* Skip r0 */
+	if (dst->count > 0 && dst->pos < ELF_GR_OFFSET(1)) {
+		dst->ret = user_regset_copyout_zero(&dst->pos, &dst->count,
+						      &dst->u.get.kbuf,
+						      &dst->u.get.ubuf,
+						      0, ELF_GR_OFFSET(1));
+		if (dst->ret || dst->count == 0)
+			return;
+	}
+
+	/* gr1 - gr15 */
+	if (dst->count > 0 && dst->pos < ELF_GR_OFFSET(16)) {
+		index = (dst->pos - ELF_GR_OFFSET(1)) / sizeof(elf_greg_t);
+		min_copy = ELF_GR_OFFSET(16) > (dst->pos + dst->count) ?
+			 (dst->pos + dst->count) : ELF_GR_OFFSET(16);
+		for (i = dst->pos; i < min_copy; i += sizeof(elf_greg_t),
+				index++)
+			if (access_elf_reg(dst->target, info, i,
+						&tmp[index], 0) < 0) {
+				dst->ret = -EIO;
+				return;
+			}
+		dst->ret = user_regset_copyout(&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf, tmp,
+				ELF_GR_OFFSET(1), ELF_GR_OFFSET(16));
+		if (dst->ret || dst->count == 0)
+			return;
+	}
+
+	/* r16-r31 */
+	if (dst->count > 0 && dst->pos < ELF_NAT_OFFSET) {
+		pt = task_pt_regs(dst->target);
+		dst->ret = user_regset_copyout(&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf, &pt->r16,
+				ELF_GR_OFFSET(16), ELF_NAT_OFFSET);
+		if (dst->ret || dst->count == 0)
+			return;
+	}
+
+	/* nat, pr, b0 - b7 */
+	if (dst->count > 0 && dst->pos < ELF_CR_IIP_OFFSET) {
+		index = (dst->pos - ELF_NAT_OFFSET) / sizeof(elf_greg_t);
+		min_copy = ELF_CR_IIP_OFFSET > (dst->pos + dst->count) ?
+			 (dst->pos + dst->count) : ELF_CR_IIP_OFFSET;
+		for (i = dst->pos; i < min_copy; i += sizeof(elf_greg_t),
+				index++)
+			if (access_elf_reg(dst->target, info, i,
+						&tmp[index], 0) < 0) {
+				dst->ret = -EIO;
+				return;
+			}
+		dst->ret = user_regset_copyout(&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf, tmp,
+				ELF_NAT_OFFSET, ELF_CR_IIP_OFFSET);
+		if (dst->ret || dst->count == 0)
+			return;
+	}
+
+	/* ip cfm psr ar.rsc ar.bsp ar.bspstore ar.rnat
+	 * ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec ar.csd ar.ssd
 	 */
-	if (current->exit_code) {
-		send_sig(current->exit_code, current, 1);
-		current->exit_code = 0;
+	if (dst->count > 0 && dst->pos < (ELF_AR_END_OFFSET)) {
+		index = (dst->pos - ELF_CR_IIP_OFFSET) / sizeof(elf_greg_t);
+		min_copy = ELF_AR_END_OFFSET > (dst->pos + dst->count) ?
+			 (dst->pos + dst->count) : ELF_AR_END_OFFSET;
+		for (i = dst->pos; i < min_copy; i += sizeof(elf_greg_t),
+				index++)
+			if (access_elf_reg(dst->target, info, i,
+						&tmp[index], 0) < 0) {
+				dst->ret = -EIO;
+				return;
+			}
+		dst->ret = user_regset_copyout(&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf, tmp,
+				ELF_CR_IIP_OFFSET, ELF_AR_END_OFFSET);
 	}
 }
 
-/* "asmlinkage" so the input arguments are preserved... */
+void do_gpregs_set(struct unw_frame_info *info, void *arg)
+{
+	struct pt_regs *pt;
+	struct regset_getset *dst = arg;
+	elf_greg_t tmp[16];
+	unsigned int i, index;
 
-asmlinkage void
-syscall_trace_enter (long arg0, long arg1, long arg2, long arg3,
-		     long arg4, long arg5, long arg6, long arg7,
-		     struct pt_regs regs)
+	if (unw_unwind_to_user(info) < 0)
+		return;
+
+	/* Skip r0 */
+	if (dst->count > 0 && dst->pos < ELF_GR_OFFSET(1)) {
+		dst->ret = user_regset_copyin_ignore(&dst->pos, &dst->count,
+						       &dst->u.set.kbuf,
+						       &dst->u.set.ubuf,
+						       0, ELF_GR_OFFSET(1));
+		if (dst->ret || dst->count == 0)
+			return;
+	}
+
+	/* gr1-gr15 */
+	if (dst->count > 0 && dst->pos < ELF_GR_OFFSET(16)) {
+		i = dst->pos;
+		index = (dst->pos - ELF_GR_OFFSET(1)) / sizeof(elf_greg_t);
+		dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+				&dst->u.set.kbuf, &dst->u.set.ubuf, tmp,
+				ELF_GR_OFFSET(1), ELF_GR_OFFSET(16));
+		if (dst->ret)
+			return;
+		for ( ; i < dst->pos; i += sizeof(elf_greg_t), index++)
+			if (access_elf_reg(dst->target, info, i,
+						&tmp[index], 1) < 0) {
+				dst->ret = -EIO;
+				return;
+			}
+		if (dst->count == 0)
+			return;
+	}
+
+	/* gr16-gr31 */
+	if (dst->count > 0 && dst->pos < ELF_NAT_OFFSET) {
+		pt = task_pt_regs(dst->target);
+		dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+				&dst->u.set.kbuf, &dst->u.set.ubuf, &pt->r16,
+				ELF_GR_OFFSET(16), ELF_NAT_OFFSET);
+		if (dst->ret || dst->count == 0)
+			return;
+	}
+
+	/* nat, pr, b0 - b7 */
+	if (dst->count > 0 && dst->pos < ELF_CR_IIP_OFFSET) {
+		i = dst->pos;
+		index = (dst->pos - ELF_NAT_OFFSET) / sizeof(elf_greg_t);
+		dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+				&dst->u.set.kbuf, &dst->u.set.ubuf, tmp,
+				ELF_NAT_OFFSET, ELF_CR_IIP_OFFSET);
+		if (dst->ret)
+			return;
+		for (; i < dst->pos; i += sizeof(elf_greg_t), index++)
+			if (access_elf_reg(dst->target, info, i,
+						&tmp[index], 1) < 0) {
+				dst->ret = -EIO;
+				return;
+			}
+		if (dst->count == 0)
+			return;
+	}
+
+	/* ip cfm psr ar.rsc ar.bsp ar.bspstore ar.rnat
+	 * ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec ar.csd ar.ssd
+	 */
+	if (dst->count > 0 && dst->pos < (ELF_AR_END_OFFSET)) {
+		i = dst->pos;
+		index = (dst->pos - ELF_CR_IIP_OFFSET) / sizeof(elf_greg_t);
+		dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+				&dst->u.set.kbuf, &dst->u.set.ubuf, tmp,
+				ELF_CR_IIP_OFFSET, ELF_AR_END_OFFSET);
+		if (dst->ret)
+			return;
+		for ( ; i < dst->pos; i += sizeof(elf_greg_t), index++)
+			if (access_elf_reg(dst->target, info, i,
+						&tmp[index], 1) < 0) {
+				dst->ret = -EIO;
+				return;
+			}
+	}
+}
+
+#define ELF_FP_OFFSET(i)	(i * sizeof(elf_fpreg_t))
+
+void do_fpregs_get(struct unw_frame_info *info, void *arg)
+{
+	struct regset_getset *dst = arg;
+	struct task_struct *task = dst->target;
+	elf_fpreg_t tmp[30];
+	int index, min_copy, i;
+
+	if (unw_unwind_to_user(info) < 0)
+		return;
+
+	/* Skip pos 0 and 1 */
+	if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(2)) {
+		dst->ret = user_regset_copyout_zero(&dst->pos, &dst->count,
+						      &dst->u.get.kbuf,
+						      &dst->u.get.ubuf,
+						      0, ELF_FP_OFFSET(2));
+		if (dst->count == 0 || dst->ret)
+			return;
+	}
+
+	/* fr2-fr31 */
+	if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(32)) {
+		index = (dst->pos - ELF_FP_OFFSET(2)) / sizeof(elf_fpreg_t);
+
+		min_copy = min(((unsigned int)ELF_FP_OFFSET(32)),
+				dst->pos + dst->count);
+		for (i = dst->pos; i < min_copy; i += sizeof(elf_fpreg_t),
+				index++)
+			if (unw_get_fr(info, i / sizeof(elf_fpreg_t),
+					 &tmp[index])) {
+				dst->ret = -EIO;
+				return;
+			}
+		dst->ret = user_regset_copyout(&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf, tmp,
+				ELF_FP_OFFSET(2), ELF_FP_OFFSET(32));
+		if (dst->count == 0 || dst->ret)
+			return;
+	}
+
+	/* fph */
+	if (dst->count > 0) {
+		ia64_flush_fph(dst->target);
+		if (task->thread.flags & IA64_THREAD_FPH_VALID)
+			dst->ret = user_regset_copyout(
+				&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf,
+				&dst->target->thread.fph,
+				ELF_FP_OFFSET(32), -1);
+		else
+			/* Zero fill instead.  */
+			dst->ret = user_regset_copyout_zero(
+				&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf,
+				ELF_FP_OFFSET(32), -1);
+	}
+}
+
+void do_fpregs_set(struct unw_frame_info *info, void *arg)
 {
-	if (test_thread_flag(TIF_SYSCALL_TRACE) 
-	    && (current->ptrace & PT_PTRACED))
-		syscall_trace();
-
-	if (unlikely(current->audit_context)) {
-		long syscall;
-		int arch;
-
-		if (IS_IA32_PROCESS(&regs)) {
-			syscall = regs.r1;
-			arch = AUDIT_ARCH_I386;
-		} else {
-			syscall = regs.r15;
-			arch = AUDIT_ARCH_IA64;
+	struct regset_getset *dst = arg;
+	elf_fpreg_t fpreg, tmp[30];
+	int index, start, end;
+
+	if (unw_unwind_to_user(info) < 0)
+		return;
+
+	/* Skip pos 0 and 1 */
+	if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(2)) {
+		dst->ret = user_regset_copyin_ignore(&dst->pos, &dst->count,
+						       &dst->u.set.kbuf,
+						       &dst->u.set.ubuf,
+						       0, ELF_FP_OFFSET(2));
+		if (dst->count == 0 || dst->ret)
+			return;
+	}
+
+	/* fr2-fr31 */
+	if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(32)) {
+		start = dst->pos;
+		end = min(((unsigned int)ELF_FP_OFFSET(32)),
+			 dst->pos + dst->count);
+		dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+				&dst->u.set.kbuf, &dst->u.set.ubuf, tmp,
+				ELF_FP_OFFSET(2), ELF_FP_OFFSET(32));
+		if (dst->ret)
+			return;
+
+		if (start & 0xF) { /* only write high part */
+			if (unw_get_fr(info, start / sizeof(elf_fpreg_t),
+					 &fpreg)) {
+				dst->ret = -EIO;
+				return;
+			}
+			tmp[start / sizeof(elf_fpreg_t) - 2].u.bits[0]
+				= fpreg.u.bits[0];
+			start &= ~0xFUL;
+		}
+		if (end & 0xF) { /* only write low part */
+			if (unw_get_fr(info, end / sizeof(elf_fpreg_t),
+					&fpreg)) {
+				dst->ret = -EIO;
+				return;
+			}
+			tmp[end / sizeof(elf_fpreg_t) - 2].u.bits[1]
+				= fpreg.u.bits[1];
+			end = (end + 0xF) & ~0xFUL;
 		}
 
-		audit_syscall_entry(arch, syscall, arg0, arg1, arg2, arg3);
+		for ( ;	start < end ; start += sizeof(elf_fpreg_t)) {
+			index = start / sizeof(elf_fpreg_t);
+			if (unw_set_fr(info, index, tmp[index - 2])) {
+				dst->ret = -EIO;
+				return;
+			}
+		}
+		if (dst->ret || dst->count == 0)
+			return;
 	}
 
+	/* fph */
+	if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(128)) {
+		ia64_sync_fph(dst->target);
+		dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+						&dst->u.set.kbuf,
+						&dst->u.set.ubuf,
+						&dst->target->thread.fph,
+						ELF_FP_OFFSET(32), -1);
+	}
 }
 
-/* "asmlinkage" so the input arguments are preserved... */
+static int
+do_regset_call(void (*call)(struct unw_frame_info *, void *),
+	       struct task_struct *target,
+	       const struct user_regset *regset,
+	       unsigned int pos, unsigned int count,
+	       const void *kbuf, const void __user *ubuf)
+{
+	struct regset_getset info = { .target = target, .regset = regset,
+				 .pos = pos, .count = count,
+				 .u.set = { .kbuf = kbuf, .ubuf = ubuf },
+				 .ret = 0 };
 
-asmlinkage void
-syscall_trace_leave (long arg0, long arg1, long arg2, long arg3,
-		     long arg4, long arg5, long arg6, long arg7,
-		     struct pt_regs regs)
+	if (target == current)
+		unw_init_running(call, &info);
+	else {
+		struct unw_frame_info ufi;
+		memset(&ufi, 0, sizeof(ufi));
+		unw_init_from_blocked_task(&ufi, target);
+		(*call)(&ufi, &info);
+	}
+
+	return info.ret;
+}
+
+static int
+gpregs_get(struct task_struct *target,
+	   const struct user_regset *regset,
+	   unsigned int pos, unsigned int count,
+	   void *kbuf, void __user *ubuf)
+{
+	return do_regset_call(do_gpregs_get, target, regset, pos, count,
+		kbuf, ubuf);
+}
+
+static int gpregs_set(struct task_struct *target,
+		const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		const void *kbuf, const void __user *ubuf)
+{
+	return do_regset_call(do_gpregs_set, target, regset, pos, count,
+		kbuf, ubuf);
+}
+
+static void do_gpregs_writeback(struct unw_frame_info *info, void *arg)
+{
+	do_sync_rbs(info, ia64_sync_user_rbs);
+}
+
+/*
+ * This is called to write back the register backing store.
+ * ptrace does this before it stops, so that a tracer reading the user
+ * memory after the thread stops will get the current register data.
+ */
+static int
+gpregs_writeback(struct task_struct *target,
+		 const struct user_regset *regset,
+		 int now)
+{
+	if (test_and_set_tsk_thread_flag(target, TIF_RESTORE_RSE))
+		return 0;
+	set_notify_resume(target);
+	return do_regset_call(do_gpregs_writeback, target, regset, 0, 0,
+		NULL, NULL);
+}
+
+static int
+fpregs_active(struct task_struct *target, const struct user_regset *regset)
+{
+	return (target->thread.flags & IA64_THREAD_FPH_VALID) ? 128 : 32;
+}
+
+static int fpregs_get(struct task_struct *target,
+		const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		void *kbuf, void __user *ubuf)
+{
+	return do_regset_call(do_fpregs_get, target, regset, pos, count,
+		kbuf, ubuf);
+}
+
+static int fpregs_set(struct task_struct *target,
+		const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		const void *kbuf, const void __user *ubuf)
+{
+	return do_regset_call(do_fpregs_set, target, regset, pos, count,
+		kbuf, ubuf);
+}
+
+static int
+access_uarea(struct task_struct *child, unsigned long addr,
+	      unsigned long *data, int write_access)
 {
-	if (unlikely(current->audit_context)) {
-		int success = AUDITSC_RESULT(regs.r10);
-		long result = regs.r8;
+	unsigned int pos = -1; /* an invalid value */
+	int ret;
+	unsigned long *ptr, regnum;
+
+	if ((addr & 0x7) != 0) {
+		dprintk("ptrace: unaligned register address 0x%lx\n", addr);
+		return -1;
+	}
+	if ((addr >= PT_NAT_BITS + 8 && addr < PT_F2) ||
+		(addr >= PT_R7 + 8 && addr < PT_B1) ||
+		(addr >= PT_AR_LC + 8 && addr < PT_CR_IPSR) ||
+		(addr >= PT_AR_SSD + 8 && addr < PT_DBR)) {
+		dprintk("ptrace: rejecting access to register "
+					"address 0x%lx\n", addr);
+		return -1;
+	}
+
+	switch (addr) {
+	case PT_F32 ... (PT_F127 + 15):
+		pos = addr - PT_F32 + ELF_FP_OFFSET(32);
+		break;
+	case PT_F2 ... (PT_F5 + 15):
+		pos = addr - PT_F2 + ELF_FP_OFFSET(2);
+		break;
+	case PT_F10 ... (PT_F31 + 15):
+		pos = addr - PT_F10 + ELF_FP_OFFSET(10);
+		break;
+	case PT_F6 ... (PT_F9 + 15):
+		pos = addr - PT_F6 + ELF_FP_OFFSET(6);
+		break;
+	}
+
+	if (pos != -1) {
+		if (write_access)
+			ret = fpregs_set(child, NULL, pos,
+				sizeof(unsigned long), data, NULL);
+		else
+			ret = fpregs_get(child, NULL, pos,
+				sizeof(unsigned long), data, NULL);
+		if (ret != 0)
+			return -1;
+		return 0;
+	}
 
-		if (success != AUDITSC_SUCCESS)
-			result = -result;
-		audit_syscall_exit(success, result);
+	switch (addr) {
+	case PT_NAT_BITS:
+		pos = ELF_NAT_OFFSET;
+		break;
+	case PT_R4 ... PT_R7:
+		pos = addr - PT_R4 + ELF_GR_OFFSET(4);
+		break;
+	case PT_B1 ... PT_B5:
+		pos = addr - PT_B1 + ELF_BR_OFFSET(1);
+		break;
+	case PT_AR_EC:
+		pos = ELF_AR_EC_OFFSET;
+		break;
+	case PT_AR_LC:
+		pos = ELF_AR_LC_OFFSET;
+		break;
+	case PT_CR_IPSR:
+		pos = ELF_CR_IPSR_OFFSET;
+		break;
+	case PT_CR_IIP:
+		pos = ELF_CR_IIP_OFFSET;
+		break;
+	case PT_CFM:
+		pos = ELF_CFM_OFFSET;
+		break;
+	case PT_AR_UNAT:
+		pos = ELF_AR_UNAT_OFFSET;
+		break;
+	case PT_AR_PFS:
+		pos = ELF_AR_PFS_OFFSET;
+		break;
+	case PT_AR_RSC:
+		pos = ELF_AR_RSC_OFFSET;
+		break;
+	case PT_AR_RNAT:
+		pos = ELF_AR_RNAT_OFFSET;
+		break;
+	case PT_AR_BSPSTORE:
+		pos = ELF_AR_BSPSTORE_OFFSET;
+		break;
+	case PT_PR:
+		pos = ELF_PR_OFFSET;
+		break;
+	case PT_B6:
+		pos = ELF_BR_OFFSET(6);
+		break;
+	case PT_AR_BSP:
+		pos = ELF_AR_BSP_OFFSET;
+		break;
+	case PT_R1 ... PT_R3:
+		pos = addr - PT_R1 + ELF_GR_OFFSET(1);
+		break;
+	case PT_R12 ... PT_R15:
+		pos = addr - PT_R12 + ELF_GR_OFFSET(12);
+		break;
+	case PT_R8 ... PT_R11:
+		pos = addr - PT_R8 + ELF_GR_OFFSET(8);
+		break;
+	case PT_R16 ... PT_R31:
+		pos = addr - PT_R16 + ELF_GR_OFFSET(16);
+		break;
+	case PT_AR_CCV:
+		pos = ELF_AR_CCV_OFFSET;
+		break;
+	case PT_AR_FPSR:
+		pos = ELF_AR_FPSR_OFFSET;
+		break;
+	case PT_B0:
+		pos = ELF_BR_OFFSET(0);
+		break;
+	case PT_B7:
+		pos = ELF_BR_OFFSET(7);
+		break;
+	case PT_AR_CSD:
+		pos = ELF_AR_CSD_OFFSET;
+		break;
+	case PT_AR_SSD:
+		pos = ELF_AR_SSD_OFFSET;
+		break;
+	}
+
+	if (pos != -1) {
+		if (write_access)
+			ret = gpregs_set(child, NULL, pos,
+				sizeof(unsigned long), data, NULL);
+		else
+			ret = gpregs_get(child, NULL, pos,
+				sizeof(unsigned long), data, NULL);
+		if (ret != 0)
+			return -1;
+		return 0;
 	}
 
-	if (test_thread_flag(TIF_SYSCALL_TRACE)
-	    && (current->ptrace & PT_PTRACED))
-		syscall_trace();
+	/* access debug registers */
+	if (addr >= PT_IBR) {
+		regnum = (addr - PT_IBR) >> 3;
+		ptr = &child->thread.ibr[0];
+	} else {
+		regnum = (addr - PT_DBR) >> 3;
+		ptr = &child->thread.dbr[0];
+	}
+
+	if (regnum >= 8) {
+		dprintk("ptrace: rejecting access to register "
+				"address 0x%lx\n", addr);
+		return -1;
+	}
+#ifdef CONFIG_PERFMON
+	/*
+	 * Check if debug registers are used by perfmon. This
+	 * test must be done once we know that we can do the
+	 * operation, i.e. the arguments are all valid, but
+	 * before we start modifying the state.
+	 *
+	 * Perfmon needs to keep a count of how many processes
+	 * are trying to modify the debug registers for system
+	 * wide monitoring sessions.
+	 *
+	 * We also include read access here, because they may
+	 * cause the PMU-installed debug register state
+	 * (dbr[], ibr[]) to be reset. The two arrays are also
+	 * used by perfmon, but we do not use
+	 * IA64_THREAD_DBG_VALID. The registers are restored
+	 * by the PMU context switch code.
+	 */
+	if (pfm_use_debug_registers(child))
+		return -1;
+#endif
+
+	if (!(child->thread.flags & IA64_THREAD_DBG_VALID)) {
+		child->thread.flags |= IA64_THREAD_DBG_VALID;
+		memset(child->thread.dbr, 0,
+				sizeof(child->thread.dbr));
+		memset(child->thread.ibr, 0,
+				sizeof(child->thread.ibr));
+	}
+
+	ptr += regnum;
+
+	if ((regnum & 1) && write_access) {
+		/* don't let the user set kernel-level breakpoints: */
+		*ptr = *data & ~(7UL << 56);
+		return 0;
+	}
+	if (write_access)
+		*ptr = *data;
+	else
+		*data = *ptr;
+	return 0;
+}
+
+static const struct user_regset native_regsets[] = {
+	{
+		.core_note_type = NT_PRSTATUS,
+		.n = ELF_NGREG,
+		.size = sizeof(elf_greg_t), .align = sizeof(elf_greg_t),
+		.get = gpregs_get, .set = gpregs_set,
+		.writeback = gpregs_writeback
+	},
+	{
+		.core_note_type = NT_PRFPREG,
+		.n = ELF_NFPREG,
+		.size = sizeof(elf_fpreg_t), .align = sizeof(elf_fpreg_t),
+		.get = fpregs_get, .set = fpregs_set, .active = fpregs_active
+	},
+};
+
+static const struct user_regset_view user_ia64_view = {
+	.name = "ia64",
+	.e_machine = EM_IA_64,
+	.regsets = native_regsets, .n = ARRAY_SIZE(native_regsets)
+};
+
+const struct user_regset_view *task_user_regset_view(struct task_struct *tsk)
+{
+	return &user_ia64_view;
+}
+
+struct syscall_get_set_args {
+	unsigned int i;
+	unsigned int n;
+	unsigned long *args;
+	struct pt_regs *regs;
+	int rw;
+};
+
+static void syscall_get_set_args_cb(struct unw_frame_info *info, void *data)
+{
+	struct syscall_get_set_args *args = data;
+	struct pt_regs *pt = args->regs;
+	unsigned long *krbs, cfm, ndirty;
+	int i, count;
+
+	if (unw_unwind_to_user(info) < 0)
+		return;
+
+	cfm = pt->cr_ifs;
+	krbs = (unsigned long *)info->task + IA64_RBS_OFFSET/8;
+	ndirty = ia64_rse_num_regs(krbs, krbs + (pt->loadrs >> 19));
+
+	count = 0;
+	if (in_syscall(pt))
+		count = min_t(int, args->n, cfm & 0x7f);
+
+	for (i = 0; i < count; i++) {
+		if (args->rw)
+			*ia64_rse_skip_regs(krbs, ndirty + i + args->i) =
+				args->args[i];
+		else
+			args->args[i] = *ia64_rse_skip_regs(krbs,
+				ndirty + i + args->i);
+	}
+
+	if (!args->rw) {
+		while (i < args->n) {
+			args->args[i] = 0;
+			i++;
+		}
+	}
+}
+
+void ia64_syscall_get_set_arguments(struct task_struct *task,
+	struct pt_regs *regs, unsigned int i, unsigned int n,
+	unsigned long *args, int rw)
+{
+	struct syscall_get_set_args data = {
+		.i = i,
+		.n = n,
+		.args = args,
+		.regs = regs,
+		.rw = rw,
+	};
+
+	if (task == current)
+		unw_init_running(syscall_get_set_args_cb, &data);
+	else {
+		struct unw_frame_info ufi;
+		memset(&ufi, 0, sizeof(ufi));
+		unw_init_from_blocked_task(&ufi, task);
+		syscall_get_set_args_cb(&ufi, &data);
+	}
 }
diff --git a/arch/ia64/kernel/relocate_kernel.S b/arch/ia64/kernel/relocate_kernel.S
new file mode 100644
index 00000000000..c370e02f006
--- /dev/null
+++ b/arch/ia64/kernel/relocate_kernel.S
@@ -0,0 +1,325 @@
+/*
+ * arch/ia64/kernel/relocate_kernel.S
+ *
+ * Relocate kexec'able kernel and start it
+ *
+ * Copyright (C) 2005 Hewlett-Packard Development Company, L.P.
+ * Copyright (C) 2005 Khalid Aziz  <khalid.aziz@hp.com>
+ * Copyright (C) 2005 Intel Corp,  Zou Nan hai <nanhai.zou@intel.com>
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2.  See the file COPYING for more details.
+ */
+#include <asm/asmmacro.h>
+#include <asm/kregs.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/mca_asm.h>
+
+       /* Must be relocatable PIC code callable as a C function
+        */
+GLOBAL_ENTRY(relocate_new_kernel)
+	.prologue
+	alloc r31=ar.pfs,4,0,0,0
+        .body
+.reloc_entry:
+{
+	rsm psr.i| psr.ic
+	mov r2=ip
+}
+	;;
+{
+        flushrs                         // must be first insn in group
+        srlz.i
+}
+	;;
+	dep r2=0,r2,61,3		//to physical address
+	;;
+	//first switch to physical mode
+	add r3=1f-.reloc_entry, r2
+	movl r16 = IA64_PSR_AC|IA64_PSR_BN|IA64_PSR_IC
+	mov ar.rsc=0	          	// put RSE in enforced lazy mode
+	;;
+	add sp=(memory_stack_end - 16 - .reloc_entry),r2
+	add r8=(register_stack - .reloc_entry),r2
+	;;
+	mov r18=ar.rnat
+	mov ar.bspstore=r8
+	;;
+        mov cr.ipsr=r16
+        mov cr.iip=r3
+        mov cr.ifs=r0
+	srlz.i
+	;;
+	mov ar.rnat=r18
+	rfi				// note: this unmask MCA/INIT (psr.mc)
+	;;
+1:
+	//physical mode code begin
+	mov b6=in1
+	dep r28=0,in2,61,3	//to physical address
+
+	// purge all TC entries
+#define O(member)       IA64_CPUINFO_##member##_OFFSET
+        GET_THIS_PADDR(r2, ia64_cpu_info) // load phys addr of cpu_info into r2
+        ;;
+        addl r17=O(PTCE_STRIDE),r2
+        addl r2=O(PTCE_BASE),r2
+        ;;
+        ld8 r18=[r2],(O(PTCE_COUNT)-O(PTCE_BASE));;    	// r18=ptce_base
+        ld4 r19=[r2],4                                  // r19=ptce_count[0]
+        ld4 r21=[r17],4                                 // r21=ptce_stride[0]
+        ;;
+        ld4 r20=[r2]                                    // r20=ptce_count[1]
+        ld4 r22=[r17]                                   // r22=ptce_stride[1]
+        mov r24=r0
+        ;;
+        adds r20=-1,r20
+        ;;
+#undef O
+2:
+        cmp.ltu p6,p7=r24,r19
+(p7)    br.cond.dpnt.few 4f
+        mov ar.lc=r20
+3:
+        ptc.e r18
+        ;;
+        add r18=r22,r18
+        br.cloop.sptk.few 3b
+        ;;
+        add r18=r21,r18
+        add r24=1,r24
+        ;;
+        br.sptk.few 2b
+4:
+        srlz.i
+        ;;
+	// purge TR entry for kernel text and data
+        movl r16=KERNEL_START
+        mov r18=KERNEL_TR_PAGE_SHIFT<<2
+        ;;
+        ptr.i r16, r18
+        ptr.d r16, r18
+        ;;
+        srlz.i
+        ;;
+
+        // purge TR entry for pal code
+        mov r16=in3
+        mov r18=IA64_GRANULE_SHIFT<<2
+        ;;
+        ptr.i r16,r18
+        ;;
+        srlz.i
+	;;
+
+        // purge TR entry for stack
+        mov r16=IA64_KR(CURRENT_STACK)
+        ;;
+        shl r16=r16,IA64_GRANULE_SHIFT
+        movl r19=PAGE_OFFSET
+        ;;
+        add r16=r19,r16
+        mov r18=IA64_GRANULE_SHIFT<<2
+        ;;
+        ptr.d r16,r18
+        ;;
+        srlz.i
+	;;
+
+	//copy segments
+	movl r16=PAGE_MASK
+        mov  r30=in0                    // in0 is page_list
+        br.sptk.few .dest_page
+	;;
+.loop:
+	ld8  r30=[in0], 8;;
+.dest_page:
+	tbit.z p0, p6=r30, 0;;    	// 0x1 dest page
+(p6)	and r17=r30, r16
+(p6)	br.cond.sptk.few .loop;;
+
+	tbit.z p0, p6=r30, 1;;		// 0x2 indirect page
+(p6)	and in0=r30, r16
+(p6)	br.cond.sptk.few .loop;;
+
+	tbit.z p0, p6=r30, 2;;		// 0x4 end flag
+(p6)	br.cond.sptk.few .end_loop;;
+
+	tbit.z p6, p0=r30, 3;;		// 0x8 source page
+(p6)	br.cond.sptk.few .loop
+
+	and r18=r30, r16
+
+	// simple copy page, may optimize later
+	movl r14=PAGE_SIZE/8 - 1;;
+	mov ar.lc=r14;;
+1:
+	ld8 r14=[r18], 8;;
+	st8 [r17]=r14;;
+	fc.i r17
+	add r17=8, r17
+	br.ctop.sptk.few 1b
+	br.sptk.few .loop
+	;;
+
+.end_loop:
+	sync.i			// for fc.i
+	;;
+	srlz.i
+	;;
+	srlz.d
+	;;
+	br.call.sptk.many b0=b6;;
+
+.align  32
+memory_stack:
+	.fill           8192, 1, 0
+memory_stack_end:
+register_stack:
+	.fill           8192, 1, 0
+register_stack_end:
+relocate_new_kernel_end:
+END(relocate_new_kernel)
+
+.global relocate_new_kernel_size
+relocate_new_kernel_size:
+	data8	relocate_new_kernel_end - relocate_new_kernel
+
+GLOBAL_ENTRY(ia64_dump_cpu_regs)
+        .prologue
+        alloc loc0=ar.pfs,1,2,0,0
+        .body
+        mov     ar.rsc=0                // put RSE in enforced lazy mode
+        add     loc1=4*8, in0           // save r4 and r5 first
+        ;;
+{
+        flushrs                         // flush dirty regs to backing store
+        srlz.i
+}
+        st8 [loc1]=r4, 8
+        ;;
+        st8 [loc1]=r5, 8
+        ;;
+        add loc1=32*8, in0
+        mov r4=ar.rnat
+        ;;
+        st8 [in0]=r0, 8			// r0
+        st8 [loc1]=r4, 8		// rnat
+        mov r5=pr
+        ;;
+        st8 [in0]=r1, 8			// r1
+        st8 [loc1]=r5, 8		// pr
+        mov r4=b0
+        ;;
+        st8 [in0]=r2, 8			// r2
+        st8 [loc1]=r4, 8		// b0
+        mov r5=b1;
+        ;;
+        st8 [in0]=r3, 24		// r3
+        st8 [loc1]=r5, 8		// b1
+        mov r4=b2
+        ;;
+        st8 [in0]=r6, 8			// r6
+        st8 [loc1]=r4, 8		// b2
+	mov r5=b3
+        ;;
+        st8 [in0]=r7, 8			// r7
+        st8 [loc1]=r5, 8		// b3
+        mov r4=b4
+        ;;
+        st8 [in0]=r8, 8			// r8
+        st8 [loc1]=r4, 8		// b4
+        mov r5=b5
+        ;;
+        st8 [in0]=r9, 8			// r9
+        st8 [loc1]=r5, 8		// b5
+        mov r4=b6
+        ;;
+        st8 [in0]=r10, 8		// r10
+        st8 [loc1]=r5, 8		// b6
+        mov r5=b7
+        ;;
+        st8 [in0]=r11, 8		// r11
+        st8 [loc1]=r5, 8		// b7
+        mov r4=b0
+        ;;
+        st8 [in0]=r12, 8		// r12
+        st8 [loc1]=r4, 8		// ip
+        mov r5=loc0
+	;;
+        st8 [in0]=r13, 8		// r13
+        extr.u r5=r5, 0, 38		// ar.pfs.pfm
+	mov r4=r0			// user mask
+        ;;
+        st8 [in0]=r14, 8		// r14
+        st8 [loc1]=r5, 8		// cfm
+        ;;
+        st8 [in0]=r15, 8		// r15
+        st8 [loc1]=r4, 8        	// user mask
+	mov r5=ar.rsc
+        ;;
+        st8 [in0]=r16, 8		// r16
+        st8 [loc1]=r5, 8        	// ar.rsc
+        mov r4=ar.bsp
+        ;;
+        st8 [in0]=r17, 8		// r17
+        st8 [loc1]=r4, 8        	// ar.bsp
+        mov r5=ar.bspstore
+        ;;
+        st8 [in0]=r18, 8		// r18
+        st8 [loc1]=r5, 8        	// ar.bspstore
+        mov r4=ar.rnat
+        ;;
+        st8 [in0]=r19, 8		// r19
+        st8 [loc1]=r4, 8        	// ar.rnat
+        mov r5=ar.ccv
+        ;;
+        st8 [in0]=r20, 8		// r20
+	st8 [loc1]=r5, 8        	// ar.ccv
+        mov r4=ar.unat
+        ;;
+        st8 [in0]=r21, 8		// r21
+        st8 [loc1]=r4, 8        	// ar.unat
+        mov r5 = ar.fpsr
+        ;;
+        st8 [in0]=r22, 8		// r22
+        st8 [loc1]=r5, 8        	// ar.fpsr
+        mov r4 = ar.unat
+        ;;
+        st8 [in0]=r23, 8		// r23
+        st8 [loc1]=r4, 8        	// unat
+        mov r5 = ar.fpsr
+        ;;
+        st8 [in0]=r24, 8		// r24
+        st8 [loc1]=r5, 8        	// fpsr
+        mov r4 = ar.pfs
+        ;;
+        st8 [in0]=r25, 8		// r25
+        st8 [loc1]=r4, 8        	// ar.pfs
+        mov r5 = ar.lc
+        ;;
+        st8 [in0]=r26, 8		// r26
+        st8 [loc1]=r5, 8        	// ar.lc
+        mov r4 = ar.ec
+        ;;
+        st8 [in0]=r27, 8		// r27
+        st8 [loc1]=r4, 8        	// ar.ec
+        mov r5 = ar.csd
+        ;;
+        st8 [in0]=r28, 8		// r28
+        st8 [loc1]=r5, 8        	// ar.csd
+        mov r4 = ar.ssd
+        ;;
+        st8 [in0]=r29, 8		// r29
+        st8 [loc1]=r4, 8        	// ar.ssd
+        ;;
+        st8 [in0]=r30, 8		// r30
+        ;;
+	st8 [in0]=r31, 8		// r31
+        mov ar.pfs=loc0
+        ;;
+        br.ret.sptk.many rp
+END(ia64_dump_cpu_regs)
+
+
diff --git a/arch/ia64/kernel/sal.c b/arch/ia64/kernel/sal.c
index 056f7a6eedc..0464173ea56 100644
--- a/arch/ia64/kernel/sal.c
+++ b/arch/ia64/kernel/sal.c
@@ -6,7 +6,6 @@
  * Copyright (C) 1999 VA Linux Systems
  * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
  */
-#include <linux/config.h>
 
 #include <linux/kernel.h>
 #include <linux/init.h>
@@ -110,6 +109,13 @@ check_versions (struct ia64_sal_systab *systab)
 		sal_revision = SAL_VERSION_CODE(2, 8);
 		sal_version = SAL_VERSION_CODE(0, 0);
 	}
+
+	if (ia64_platform_is("sn2") && (sal_revision == SAL_VERSION_CODE(2, 9)))
+		/*
+		 * SGI Altix has hard-coded version 2.9 in their prom
+		 * but they actually implement 3.2, so let's fix it here.
+		 */
+		sal_revision = SAL_VERSION_CODE(3, 2);
 }
 
 static void __init
@@ -135,7 +141,7 @@ set_smp_redirect (int flag)
 	 * interrupt redirection. The reason is this would require that
 	 * All interrupts be stopped and hard bind the irq to a cpu.
 	 * Later when the interrupt is fired we need to set the redir hint
-	 * on again in the vector. This is combersome for something that the
+	 * on again in the vector. This is cumbersome for something that the
 	 * user mode irq balancer will solve anyways.
 	 */
 	no_int_routing=1;
@@ -195,9 +201,8 @@ static void __init
 chk_nointroute_opt(void)
 {
 	char *cp;
-	extern char saved_command_line[];
 
-	for (cp = saved_command_line; *cp; ) {
+	for (cp = boot_command_line; *cp; ) {
 		if (memcmp(cp, "nointroute", 10) == 0) {
 			no_int_routing = 1;
 			printk ("no_int_routing on\n");
@@ -224,30 +229,41 @@ static void __init sal_desc_ap_wakeup(void *p) { }
  */
 static int sal_cache_flush_drops_interrupts;
 
-static void __init
+static int __init
+force_pal_cache_flush(char *str)
+{
+	sal_cache_flush_drops_interrupts = 1;
+	return 0;
+}
+early_param("force_pal_cache_flush", force_pal_cache_flush);
+
+void __init
 check_sal_cache_flush (void)
 {
-	unsigned long flags, itv;
+	unsigned long flags;
 	int cpu;
-	u64 vector;
+	u64 vector, cache_type = 3;
+	struct ia64_sal_retval isrv;
+
+	if (sal_cache_flush_drops_interrupts)
+		return;
 
 	cpu = get_cpu();
 	local_irq_save(flags);
 
 	/*
-	 * Schedule a timer interrupt, wait until it's reported, and see if
-	 * SAL_CACHE_FLUSH drops it.
+	 * Send ourselves a timer interrupt, wait until it's reported, and see
+	 * if SAL_CACHE_FLUSH drops it.
 	 */
-	itv = ia64_get_itv();
-	BUG_ON((itv & (1 << 16)) == 0);
-
-	ia64_set_itv(IA64_TIMER_VECTOR);
-	ia64_set_itm(ia64_get_itc() + 1000);
+	platform_send_ipi(cpu, IA64_TIMER_VECTOR, IA64_IPI_DM_INT, 0);
 
 	while (!ia64_get_irr(IA64_TIMER_VECTOR))
 		cpu_relax();
 
-	ia64_sal_cache_flush(3);
+	SAL_CALL(isrv, SAL_CACHE_FLUSH, cache_type, 0, 0, 0, 0, 0, 0);
+
+	if (isrv.status)
+		printk(KERN_ERR "SAL_CAL_FLUSH failed with %ld\n", isrv.status);
 
 	if (ia64_get_irr(IA64_TIMER_VECTOR)) {
 		vector = ia64_get_ivr();
@@ -260,7 +276,6 @@ check_sal_cache_flush (void)
 		ia64_eoi();
 	}
 
-	ia64_set_itv(itv);
 	local_irq_restore(flags);
 	put_cpu();
 }
@@ -286,6 +301,7 @@ ia64_sal_cache_flush (u64 cache_type)
 	SAL_CALL(isrv, SAL_CACHE_FLUSH, cache_type, 0, 0, 0, 0, 0, 0);
 	return isrv.status;
 }
+EXPORT_SYMBOL_GPL(ia64_sal_cache_flush);
 
 void __init
 ia64_sal_init (struct ia64_sal_systab *systab)
@@ -336,7 +352,6 @@ ia64_sal_init (struct ia64_sal_systab *systab)
 		p += SAL_DESC_SIZE(*p);
 	}
 
-	check_sal_cache_flush();
 }
 
 int
@@ -375,3 +390,16 @@ ia64_sal_oemcall_reentrant(struct ia64_sal_retval *isrvp, u64 oemfunc,
 	return 0;
 }
 EXPORT_SYMBOL(ia64_sal_oemcall_reentrant);
+
+long
+ia64_sal_freq_base (unsigned long which, unsigned long *ticks_per_second,
+		    unsigned long *drift_info)
+{
+	struct ia64_sal_retval isrv;
+
+	SAL_CALL(isrv, SAL_FREQ_BASE, which, 0, 0, 0, 0, 0, 0);
+	*ticks_per_second = isrv.v0;
+	*drift_info = isrv.v1;
+	return isrv.status;
+}
+EXPORT_SYMBOL_GPL(ia64_sal_freq_base);
diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c
index 663a186ad19..ee9719eebb1 100644
--- a/arch/ia64/kernel/salinfo.c
+++ b/arch/ia64/kernel/salinfo.c
@@ -40,13 +40,13 @@
 #include <linux/cpu.h>
 #include <linux/types.h>
 #include <linux/proc_fs.h>
+#include <linux/seq_file.h>
 #include <linux/module.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/timer.h>
 #include <linux/vmalloc.h>
+#include <linux/semaphore.h>
 
-#include <asm/semaphore.h>
 #include <asm/sal.h>
 #include <asm/uaccess.h>
 
@@ -54,7 +54,7 @@ MODULE_AUTHOR("Jesse Barnes <jbarnes@sgi.com>");
 MODULE_DESCRIPTION("/proc interface to IA-64 SAL features");
 MODULE_LICENSE("GPL");
 
-static int salinfo_read(char *page, char **start, off_t off, int count, int *eof, void *data);
+static const struct file_operations proc_salinfo_fops;
 
 typedef struct {
 	const char		*name;		/* name of the proc entry */
@@ -66,7 +66,7 @@ typedef struct {
  * List {name,feature} pairs for every entry in /proc/sal/<feature>
  * that this module exports
  */
-static salinfo_entry_t salinfo_entries[]={
+static const salinfo_entry_t salinfo_entries[]={
 	{ "bus_lock",           IA64_SAL_PLATFORM_FEATURE_BUS_LOCK, },
 	{ "irq_redirection",	IA64_SAL_PLATFORM_FEATURE_IRQ_REDIR_HINT, },
 	{ "ipi_redirection",	IA64_SAL_PLATFORM_FEATURE_IPI_REDIR_HINT, },
@@ -163,7 +163,7 @@ static DEFINE_SPINLOCK(data_saved_lock);
 /** salinfo_platform_oemdata - optional callback to decode oemdata from an error
  * record.
  * @sect_header: pointer to the start of the section to decode.
- * @oemdata: returns vmalloc area containing the decded output.
+ * @oemdata: returns vmalloc area containing the decoded output.
  * @oemdata_size: returns length of decoded output (strlen).
  *
  * Description: If user space asks for oem data to be decoded by the kernel
@@ -193,7 +193,7 @@ struct salinfo_platform_oemdata_parms {
 static void
 salinfo_work_to_do(struct salinfo_data *data)
 {
-	down_trylock(&data->mutex);
+	(void)(down_trylock(&data->mutex) ?: 0);
 	up(&data->mutex);
 }
 
@@ -266,6 +266,7 @@ salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe)
 /* Check for outstanding MCA/INIT records every minute (arbitrary) */
 #define SALINFO_TIMER_DELAY (60*HZ)
 static struct timer_list salinfo_timer;
+extern void ia64_mlogbuf_dump(void);
 
 static void
 salinfo_timeout_check(struct salinfo_data *data)
@@ -283,6 +284,7 @@ salinfo_timeout_check(struct salinfo_data *data)
 static void
 salinfo_timeout (unsigned long arg)
 {
+	ia64_mlogbuf_dump();
 	salinfo_timeout_check(salinfo_data + SAL_INFO_TYPE_MCA);
 	salinfo_timeout_check(salinfo_data + SAL_INFO_TYPE_INIT);
 	salinfo_timer.expires = jiffies + SALINFO_TIMER_DELAY;
@@ -300,9 +302,7 @@ salinfo_event_open(struct inode *inode, struct file *file)
 static ssize_t
 salinfo_event_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos)
 {
-	struct inode *inode = file->f_dentry->d_inode;
-	struct proc_dir_entry *entry = PDE(inode);
-	struct salinfo_data *data = entry->data;
+	struct salinfo_data *data = PDE_DATA(file_inode(file));
 	char cmd[32];
 	size_t size;
 	int i, n, cpu = -1;
@@ -316,7 +316,7 @@ retry:
 	}
 
 	n = data->cpu_check;
-	for (i = 0; i < NR_CPUS; i++) {
+	for (i = 0; i < nr_cpu_ids; i++) {
 		if (cpu_isset(n, data->cpu_event)) {
 			if (!cpu_online(n)) {
 				cpu_clear(n, data->cpu_event);
@@ -325,16 +325,18 @@ retry:
 			cpu = n;
 			break;
 		}
-		if (++n == NR_CPUS)
+		if (++n == nr_cpu_ids)
 			n = 0;
 	}
 
 	if (cpu == -1)
 		goto retry;
 
+	ia64_mlogbuf_dump();
+
 	/* for next read, start checking at next CPU */
 	data->cpu_check = cpu;
-	if (++data->cpu_check == NR_CPUS)
+	if (++data->cpu_check == nr_cpu_ids)
 		data->cpu_check = 0;
 
 	snprintf(cmd, sizeof(cmd), "read %d\n", cpu);
@@ -348,16 +350,16 @@ retry:
 	return size;
 }
 
-static struct file_operations salinfo_event_fops = {
+static const struct file_operations salinfo_event_fops = {
 	.open  = salinfo_event_open,
 	.read  = salinfo_event_read,
+	.llseek = noop_llseek,
 };
 
 static int
 salinfo_log_open(struct inode *inode, struct file *file)
 {
-	struct proc_dir_entry *entry = PDE(inode);
-	struct salinfo_data *data = entry->data;
+	struct salinfo_data *data = PDE_DATA(inode);
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
@@ -382,8 +384,7 @@ salinfo_log_open(struct inode *inode, struct file *file)
 static int
 salinfo_log_release(struct inode *inode, struct file *file)
 {
-	struct proc_dir_entry *entry = PDE(inode);
-	struct salinfo_data *data = entry->data;
+	struct salinfo_data *data = PDE_DATA(inode);
 
 	if (data->state == STATE_NO_DATA) {
 		vfree(data->log_buffer);
@@ -401,10 +402,9 @@ static void
 call_on_cpu(int cpu, void (*fn)(void *), void *arg)
 {
 	cpumask_t save_cpus_allowed = current->cpus_allowed;
-	cpumask_t new_cpus_allowed = cpumask_of_cpu(cpu);
-	set_cpus_allowed(current, new_cpus_allowed);
+	set_cpus_allowed_ptr(current, cpumask_of(cpu));
 	(*fn)(arg);
-	set_cpus_allowed(current, save_cpus_allowed);
+	set_cpus_allowed_ptr(current, &save_cpus_allowed);
 }
 
 static void
@@ -460,9 +460,7 @@ retry:
 static ssize_t
 salinfo_log_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos)
 {
-	struct inode *inode = file->f_dentry->d_inode;
-	struct proc_dir_entry *entry = PDE(inode);
-	struct salinfo_data *data = entry->data;
+	struct salinfo_data *data = PDE_DATA(file_inode(file));
 	u8 *buf;
 	u64 bufsize;
 
@@ -521,9 +519,7 @@ salinfo_log_clear(struct salinfo_data *data, int cpu)
 static ssize_t
 salinfo_log_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos)
 {
-	struct inode *inode = file->f_dentry->d_inode;
-	struct proc_dir_entry *entry = PDE(inode);
-	struct salinfo_data *data = entry->data;
+	struct salinfo_data *data = PDE_DATA(file_inode(file));
 	char cmd[32];
 	size_t size;
 	u32 offset;
@@ -564,14 +560,14 @@ salinfo_log_write(struct file *file, const char __user *buffer, size_t count, lo
 	return count;
 }
 
-static struct file_operations salinfo_data_fops = {
+static const struct file_operations salinfo_data_fops = {
 	.open    = salinfo_log_open,
 	.release = salinfo_log_release,
 	.read    = salinfo_log_read,
 	.write   = salinfo_log_write,
+	.llseek  = default_llseek,
 };
 
-#ifdef	CONFIG_HOTPLUG_CPU
 static int
 salinfo_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
 {
@@ -580,6 +576,7 @@ salinfo_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu
 	struct salinfo_data *data;
 	switch (action) {
 	case CPU_ONLINE:
+	case CPU_ONLINE_FROZEN:
 		spin_lock_irqsave(&data_saved_lock, flags);
 		for (i = 0, data = salinfo_data;
 		     i < ARRAY_SIZE(salinfo_data);
@@ -590,6 +587,7 @@ salinfo_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu
 		spin_unlock_irqrestore(&data_saved_lock, flags);
 		break;
 	case CPU_DEAD:
+	case CPU_DEAD_FROZEN:
 		spin_lock_irqsave(&data_saved_lock, flags);
 		for (i = 0, data = salinfo_data;
 		     i < ARRAY_SIZE(salinfo_data);
@@ -616,7 +614,6 @@ static struct notifier_block salinfo_cpu_notifier =
 	.notifier_call = salinfo_cpu_callback,
 	.priority = 0,
 };
-#endif	/* CONFIG_HOTPLUG_CPU */
 
 static int __init
 salinfo_init(void)
@@ -633,30 +630,31 @@ salinfo_init(void)
 
 	for (i=0; i < NR_SALINFO_ENTRIES; i++) {
 		/* pass the feature bit in question as misc data */
-		*sdir++ = create_proc_read_entry (salinfo_entries[i].name, 0, salinfo_dir,
-						  salinfo_read, (void *)salinfo_entries[i].feature);
+		*sdir++ = proc_create_data(salinfo_entries[i].name, 0, salinfo_dir,
+					   &proc_salinfo_fops,
+					   (void *)salinfo_entries[i].feature);
 	}
 
+	cpu_notifier_register_begin();
+
 	for (i = 0; i < ARRAY_SIZE(salinfo_log_name); i++) {
 		data = salinfo_data + i;
 		data->type = i;
-		init_MUTEX(&data->mutex);
+		sema_init(&data->mutex, 1);
 		dir = proc_mkdir(salinfo_log_name[i], salinfo_dir);
 		if (!dir)
 			continue;
 
-		entry = create_proc_entry("event", S_IRUSR, dir);
+		entry = proc_create_data("event", S_IRUSR, dir,
+					 &salinfo_event_fops, data);
 		if (!entry)
 			continue;
-		entry->data = data;
-		entry->proc_fops = &salinfo_event_fops;
 		*sdir++ = entry;
 
-		entry = create_proc_entry("data", S_IRUSR | S_IWUSR, dir);
+		entry = proc_create_data("data", S_IRUSR | S_IWUSR, dir,
+					 &salinfo_data_fops, data);
 		if (!entry)
 			continue;
-		entry->data = data;
-		entry->proc_fops = &salinfo_data_fops;
 		*sdir++ = entry;
 
 		/* we missed any events before now */
@@ -673,9 +671,9 @@ salinfo_init(void)
 	salinfo_timer.function = &salinfo_timeout;
 	add_timer(&salinfo_timer);
 
-#ifdef	CONFIG_HOTPLUG_CPU
-	register_cpu_notifier(&salinfo_cpu_notifier);
-#endif
+	__register_hotcpu_notifier(&salinfo_cpu_notifier);
+
+	cpu_notifier_register_done();
 
 	return 0;
 }
@@ -684,22 +682,23 @@ salinfo_init(void)
  * 'data' contains an integer that corresponds to the feature we're
  * testing
  */
-static int
-salinfo_read(char *page, char **start, off_t off, int count, int *eof, void *data)
+static int proc_salinfo_show(struct seq_file *m, void *v)
 {
-	int len = 0;
-
-	len = sprintf(page, (sal_platform_features & (unsigned long)data) ? "1\n" : "0\n");
-
-	if (len <= off+count) *eof = 1;
-
-	*start = page + off;
-	len   -= off;
-
-	if (len>count) len = count;
-	if (len<0) len = 0;
+	unsigned long data = (unsigned long)v;
+	seq_puts(m, (sal_platform_features & data) ? "1\n" : "0\n");
+	return 0;
+}
 
-	return len;
+static int proc_salinfo_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, proc_salinfo_show, PDE_DATA(inode));
 }
 
+static const struct file_operations proc_salinfo_fops = {
+	.open		= proc_salinfo_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
 module_init(salinfo_init);
diff --git a/arch/ia64/kernel/semaphore.c b/arch/ia64/kernel/semaphore.c
deleted file mode 100644
index 2724ef3fbae..00000000000
--- a/arch/ia64/kernel/semaphore.c
+++ /dev/null
@@ -1,165 +0,0 @@
-/*
- * IA-64 semaphore implementation (derived from x86 version).
- *
- * Copyright (C) 1999-2000, 2002 Hewlett-Packard Co
- *	David Mosberger-Tang <davidm@hpl.hp.com>
- */
-
-/*
- * Semaphores are implemented using a two-way counter: The "count"
- * variable is decremented for each process that tries to acquire the
- * semaphore, while the "sleepers" variable is a count of such
- * acquires.
- *
- * Notably, the inline "up()" and "down()" functions can efficiently
- * test if they need to do any extra work (up needs to do something
- * only if count was negative before the increment operation.
- *
- * "sleeping" and the contention routine ordering is protected
- * by the spinlock in the semaphore's waitqueue head.
- *
- * Note that these functions are only called when there is contention
- * on the lock, and as such all this is the "non-critical" part of the
- * whole semaphore business. The critical part is the inline stuff in
- * <asm/semaphore.h> where we want to avoid any extra jumps and calls.
- */
-#include <linux/sched.h>
-#include <linux/init.h>
-
-#include <asm/errno.h>
-#include <asm/semaphore.h>
-
-/*
- * Logic:
- *  - Only on a boundary condition do we need to care. When we go
- *    from a negative count to a non-negative, we wake people up.
- *  - When we go from a non-negative count to a negative do we
- *    (a) synchronize with the "sleepers" count and (b) make sure
- *    that we're on the wakeup list before we synchronize so that
- *    we cannot lose wakeup events.
- */
-
-void
-__up (struct semaphore *sem)
-{
-	wake_up(&sem->wait);
-}
-
-void __sched __down (struct semaphore *sem)
-{
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-	unsigned long flags;
-
-	tsk->state = TASK_UNINTERRUPTIBLE;
-	spin_lock_irqsave(&sem->wait.lock, flags);
-	add_wait_queue_exclusive_locked(&sem->wait, &wait);
-
-	sem->sleepers++;
-	for (;;) {
-		int sleepers = sem->sleepers;
-
-		/*
-		 * Add "everybody else" into it. They aren't
-		 * playing, because we own the spinlock in
-		 * the wait_queue_head.
-		 */
-		if (!atomic_add_negative(sleepers - 1, &sem->count)) {
-			sem->sleepers = 0;
-			break;
-		}
-		sem->sleepers = 1;	/* us - see -1 above */
-		spin_unlock_irqrestore(&sem->wait.lock, flags);
-
-		schedule();
-
-		spin_lock_irqsave(&sem->wait.lock, flags);
-		tsk->state = TASK_UNINTERRUPTIBLE;
-	}
-	remove_wait_queue_locked(&sem->wait, &wait);
-	wake_up_locked(&sem->wait);
-	spin_unlock_irqrestore(&sem->wait.lock, flags);
-	tsk->state = TASK_RUNNING;
-}
-
-int __sched __down_interruptible (struct semaphore * sem)
-{
-	int retval = 0;
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-	unsigned long flags;
-
-	tsk->state = TASK_INTERRUPTIBLE;
-	spin_lock_irqsave(&sem->wait.lock, flags);
-	add_wait_queue_exclusive_locked(&sem->wait, &wait);
-
-	sem->sleepers ++;
-	for (;;) {
-		int sleepers = sem->sleepers;
-
-		/*
-		 * With signals pending, this turns into
-		 * the trylock failure case - we won't be
-		 * sleeping, and we* can't get the lock as
-		 * it has contention. Just correct the count
-		 * and exit.
-		 */
-		if (signal_pending(current)) {
-			retval = -EINTR;
-			sem->sleepers = 0;
-			atomic_add(sleepers, &sem->count);
-			break;
-		}
-
-		/*
-		 * Add "everybody else" into it. They aren't
-		 * playing, because we own the spinlock in
-		 * wait_queue_head. The "-1" is because we're
-		 * still hoping to get the semaphore.
-		 */
-		if (!atomic_add_negative(sleepers - 1, &sem->count)) {
-			sem->sleepers = 0;
-			break;
-		}
-		sem->sleepers = 1;	/* us - see -1 above */
-		spin_unlock_irqrestore(&sem->wait.lock, flags);
-
-		schedule();
-
-		spin_lock_irqsave(&sem->wait.lock, flags);
-		tsk->state = TASK_INTERRUPTIBLE;
-	}
-	remove_wait_queue_locked(&sem->wait, &wait);
-	wake_up_locked(&sem->wait);
-	spin_unlock_irqrestore(&sem->wait.lock, flags);
-
-	tsk->state = TASK_RUNNING;
-	return retval;
-}
-
-/*
- * Trylock failed - make sure we correct for having decremented the
- * count.
- */
-int
-__down_trylock (struct semaphore *sem)
-{
-	unsigned long flags;
-	int sleepers;
-
-	spin_lock_irqsave(&sem->wait.lock, flags);
-	sleepers = sem->sleepers + 1;
-	sem->sleepers = 0;
-
-	/*
-	 * Add "everybody else" and us into it. They aren't
-	 * playing, because we own the spinlock in the
-	 * wait_queue_head.
-	 */
-	if (!atomic_add_negative(sleepers, &sem->count)) {
-		wake_up_locked(&sem->wait);
-	}
-
-	spin_unlock_irqrestore(&sem->wait.lock, flags);
-	return 1;
-}
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index e4dfda1eb7d..d86669bcdfb 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -22,7 +22,6 @@
  * 06/24/99 W.Drummond	added boot_cpu_data.
  * 05/28/05 Z. Menyhart	Dynamic stride size for "flush_icache_range()"
  */
-#include <linux/config.h>
 #include <linux/module.h>
 #include <linux/init.h>
 
@@ -36,7 +35,7 @@
 #include <linux/seq_file.h>
 #include <linux/string.h>
 #include <linux/threads.h>
-#include <linux/tty.h>
+#include <linux/screen_info.h>
 #include <linux/dmi.h>
 #include <linux/serial.h>
 #include <linux/serial_core.h>
@@ -44,23 +43,25 @@
 #include <linux/initrd.h>
 #include <linux/pm.h>
 #include <linux/cpufreq.h>
+#include <linux/kexec.h>
+#include <linux/crash_dump.h>
 
-#include <asm/ia32.h>
 #include <asm/machvec.h>
 #include <asm/mca.h>
 #include <asm/meminit.h>
 #include <asm/page.h>
+#include <asm/paravirt.h>
+#include <asm/paravirt_patch.h>
 #include <asm/patch.h>
 #include <asm/pgtable.h>
 #include <asm/processor.h>
 #include <asm/sal.h>
 #include <asm/sections.h>
-#include <asm/serial.h>
 #include <asm/setup.h>
 #include <asm/smp.h>
-#include <asm/system.h>
+#include <asm/tlbflush.h>
 #include <asm/unistd.h>
-#include <asm/system.h>
+#include <asm/hpsim.h>
 
 #if defined(CONFIG_SMP) && (IA64_CPU_SIZE > PAGE_SIZE)
 # error "struct cpuinfo_ia64 too big!"
@@ -71,11 +72,8 @@ unsigned long __per_cpu_offset[NR_CPUS];
 EXPORT_SYMBOL(__per_cpu_offset);
 #endif
 
-extern void ia64_setup_printk_clock(void);
-
-DEFINE_PER_CPU(struct cpuinfo_ia64, cpu_info);
+DEFINE_PER_CPU(struct cpuinfo_ia64, ia64_cpu_info);
 DEFINE_PER_CPU(unsigned long, local_per_cpu_offset);
-DEFINE_PER_CPU(unsigned long, ia64_phys_stacked_size_p8);
 unsigned long ia64_cycles_per_usec;
 struct ia64_boot_param *ia64_boot_param;
 struct screen_info screen_info;
@@ -91,17 +89,13 @@ static struct resource code_resource = {
 	.name	= "Kernel code",
 	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
 };
-extern void efi_initialize_iomem_resources(struct resource *,
-		struct resource *);
-extern char _text[], _end[], _etext[];
 
-unsigned long ia64_max_cacheline_size;
+static struct resource bss_resource = {
+	.name	= "Kernel bss",
+	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
+};
 
-int dma_get_cache_alignment(void)
-{
-        return ia64_max_cacheline_size;
-}
-EXPORT_SYMBOL(dma_get_cache_alignment);
+unsigned long ia64_max_cacheline_size;
 
 unsigned long ia64_iobase;	/* virtual address for I/O accesses */
 EXPORT_SYMBOL(ia64_iobase);
@@ -115,6 +109,13 @@ unsigned int num_io_spaces;
  */
 #define	I_CACHE_STRIDE_SHIFT	5	/* Safest way to go: 32 bytes by 32 bytes */
 unsigned long ia64_i_cache_stride_shift = ~0;
+/*
+ * "clflush_cache_range()" needs to know what processor dependent stride size to
+ * use when it flushes cache lines including both d-cache and i-cache.
+ */
+/* Safest way to go: 32 bytes by 32 bytes */
+#define	CACHE_STRIDE_SHIFT	5
+unsigned long ia64_cache_stride_shift = ~0;
 
 /*
  * The merge_mask variable needs to be set to (max(iommu_page_size(iommu)) - 1).  This
@@ -142,9 +143,9 @@ int num_rsvd_regions __initdata;
  * This routine does not assume the incoming segments are sorted.
  */
 int __init
-filter_rsvd_memory (unsigned long start, unsigned long end, void *arg)
+filter_rsvd_memory (u64 start, u64 end, void *arg)
 {
-	unsigned long range_start, range_end, prev_start;
+	u64 range_start, range_end, prev_start;
 	void (*func)(unsigned long, unsigned long, int);
 	int i;
 
@@ -177,6 +178,29 @@ filter_rsvd_memory (unsigned long start, unsigned long end, void *arg)
 	return 0;
 }
 
+/*
+ * Similar to "filter_rsvd_memory()", but the reserved memory ranges
+ * are not filtered out.
+ */
+int __init
+filter_memory(u64 start, u64 end, void *arg)
+{
+	void (*func)(unsigned long, unsigned long, int);
+
+#if IGNORE_PFN0
+	if (start == PAGE_OFFSET) {
+		printk(KERN_WARNING "warning: skipping physical page 0\n");
+		start += PAGE_SIZE;
+		if (start >= end)
+			return 0;
+	}
+#endif
+	func = arg;
+	if (start < end)
+		call_pernode_memory(__pa(start), end - start, func);
+	return 0;
+}
+
 static void __init
 sort_regions (struct rsvd_region *rsvd_region, int max)
 {
@@ -195,6 +219,23 @@ sort_regions (struct rsvd_region *rsvd_region, int max)
 	}
 }
 
+/* merge overlaps */
+static int __init
+merge_regions (struct rsvd_region *rsvd_region, int max)
+{
+	int i;
+	for (i = 1; i < max; ++i) {
+		if (rsvd_region[i].start >= rsvd_region[i-1].end)
+			continue;
+		if (rsvd_region[i].end > rsvd_region[i-1].end)
+			rsvd_region[i-1].end = rsvd_region[i].end;
+		--max;
+		memmove(&rsvd_region[i], &rsvd_region[i+1],
+			(max - i) * sizeof(struct rsvd_region));
+	}
+	return max;
+}
+
 /*
  * Request address space for all standard resources
  */
@@ -203,25 +244,101 @@ static int __init register_memory(void)
 	code_resource.start = ia64_tpa(_text);
 	code_resource.end   = ia64_tpa(_etext) - 1;
 	data_resource.start = ia64_tpa(_etext);
-	data_resource.end   = ia64_tpa(_end) - 1;
-	efi_initialize_iomem_resources(&code_resource, &data_resource);
+	data_resource.end   = ia64_tpa(_edata) - 1;
+	bss_resource.start  = ia64_tpa(__bss_start);
+	bss_resource.end    = ia64_tpa(_end) - 1;
+	efi_initialize_iomem_resources(&code_resource, &data_resource,
+			&bss_resource);
 
 	return 0;
 }
 
 __initcall(register_memory);
 
+
+#ifdef CONFIG_KEXEC
+
+/*
+ * This function checks if the reserved crashkernel is allowed on the specific
+ * IA64 machine flavour. Machines without an IO TLB use swiotlb and require
+ * some memory below 4 GB (i.e. in 32 bit area), see the implementation of
+ * lib/swiotlb.c. The hpzx1 architecture has an IO TLB but cannot use that
+ * in kdump case. See the comment in sba_init() in sba_iommu.c.
+ *
+ * So, the only machvec that really supports loading the kdump kernel
+ * over 4 GB is "sn2".
+ */
+static int __init check_crashkernel_memory(unsigned long pbase, size_t size)
+{
+	if (ia64_platform_is("sn2") || ia64_platform_is("uv"))
+		return 1;
+	else
+		return pbase < (1UL << 32);
+}
+
+static void __init setup_crashkernel(unsigned long total, int *n)
+{
+	unsigned long long base = 0, size = 0;
+	int ret;
+
+	ret = parse_crashkernel(boot_command_line, total,
+			&size, &base);
+	if (ret == 0 && size > 0) {
+		if (!base) {
+			sort_regions(rsvd_region, *n);
+			*n = merge_regions(rsvd_region, *n);
+			base = kdump_find_rsvd_region(size,
+					rsvd_region, *n);
+		}
+
+		if (!check_crashkernel_memory(base, size)) {
+			pr_warning("crashkernel: There would be kdump memory "
+				"at %ld GB but this is unusable because it "
+				"must\nbe below 4 GB. Change the memory "
+				"configuration of the machine.\n",
+				(unsigned long)(base >> 30));
+			return;
+		}
+
+		if (base != ~0UL) {
+			printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
+					"for crashkernel (System RAM: %ldMB)\n",
+					(unsigned long)(size >> 20),
+					(unsigned long)(base >> 20),
+					(unsigned long)(total >> 20));
+			rsvd_region[*n].start =
+				(unsigned long)__va(base);
+			rsvd_region[*n].end =
+				(unsigned long)__va(base + size);
+			(*n)++;
+			crashk_res.start = base;
+			crashk_res.end = base + size - 1;
+		}
+	}
+	efi_memmap_res.start = ia64_boot_param->efi_memmap;
+	efi_memmap_res.end = efi_memmap_res.start +
+		ia64_boot_param->efi_memmap_size;
+	boot_param_res.start = __pa(ia64_boot_param);
+	boot_param_res.end = boot_param_res.start +
+		sizeof(*ia64_boot_param);
+}
+#else
+static inline void __init setup_crashkernel(unsigned long total, int *n)
+{}
+#endif
+
 /**
  * reserve_memory - setup reserved memory areas
  *
  * Setup the reserved memory areas set aside for the boot parameters,
  * initrd, etc.  There are currently %IA64_MAX_RSVD_REGIONS defined,
- * see include/asm-ia64/meminit.h if you need to define more.
+ * see arch/ia64/include/asm/meminit.h if you need to define more.
  */
 void __init
 reserve_memory (void)
 {
 	int n = 0;
+	unsigned long total_memory;
 
 	/*
 	 * none of the entries in this table overlap
@@ -243,6 +360,8 @@ reserve_memory (void)
 	rsvd_region[n].end   = (unsigned long) ia64_imva(_end);
 	n++;
 
+	n += paravirt_reserve_memory(&rsvd_region[n]);
+
 #ifdef CONFIG_BLK_DEV_INITRD
 	if (ia64_boot_param->initrd_start) {
 		rsvd_region[n].start = (unsigned long)__va(ia64_boot_param->initrd_start);
@@ -251,19 +370,30 @@ reserve_memory (void)
 	}
 #endif
 
-	efi_memmap_init(&rsvd_region[n].start, &rsvd_region[n].end);
+#ifdef CONFIG_CRASH_DUMP
+	if (reserve_elfcorehdr(&rsvd_region[n].start,
+			       &rsvd_region[n].end) == 0)
+		n++;
+#endif
+
+	total_memory = efi_memmap_init(&rsvd_region[n].start, &rsvd_region[n].end);
 	n++;
 
+	setup_crashkernel(total_memory, &n);
+
 	/* end of memory marker */
 	rsvd_region[n].start = ~0UL;
 	rsvd_region[n].end   = ~0UL;
 	n++;
 
 	num_rsvd_regions = n;
+	BUG_ON(IA64_MAX_RSVD_REGIONS + 1 < n);
 
 	sort_regions(rsvd_region, num_rsvd_regions);
+	num_rsvd_regions = merge_regions(rsvd_region, num_rsvd_regions);
 }
 
+
 /**
  * find_initrd - get initrd parameters from the boot parameter structure
  *
@@ -278,7 +408,7 @@ find_initrd (void)
 		initrd_start = (unsigned long)__va(ia64_boot_param->initrd_start);
 		initrd_end   = initrd_start+ia64_boot_param->initrd_size;
 
-		printk(KERN_INFO "Initial ramdisk at: 0x%lx (%lu bytes)\n",
+		printk(KERN_INFO "Initial ramdisk at: 0x%lx (%llu bytes)\n",
 		       initrd_start, ia64_boot_param->initrd_size);
 	}
 #endif
@@ -344,10 +474,8 @@ early_console_setup (char *cmdline)
 	if (!efi_setup_pcdp_console(cmdline))
 		earlycons++;
 #endif
-#ifdef CONFIG_SERIAL_8250_CONSOLE
-	if (!early_serial_console_init(cmdline))
+	if (!simcons_register())
 		earlycons++;
-#endif
 
 	return (earlycons) ? 0 : -1;
 }
@@ -357,38 +485,10 @@ mark_bsp_online (void)
 {
 #ifdef CONFIG_SMP
 	/* If we register an early console, allow CPU 0 to printk */
-	cpu_set(smp_processor_id(), cpu_online_map);
+	set_cpu_online(smp_processor_id(), true);
 #endif
 }
 
-#ifdef CONFIG_SMP
-static void __init
-check_for_logical_procs (void)
-{
-	pal_logical_to_physical_t info;
-	s64 status;
-
-	status = ia64_pal_logical_to_phys(0, &info);
-	if (status == -1) {
-		printk(KERN_INFO "No logical to physical processor mapping "
-		       "available\n");
-		return;
-	}
-	if (status) {
-		printk(KERN_ERR "ia64_pal_logical_to_phys failed with %ld\n",
-		       status);
-		return;
-	}
-	/*
-	 * Total number of siblings that BSP has.  Though not all of them 
-	 * may have booted successfully. The correct number of siblings 
-	 * booted is in info.overview_num_log.
-	 */
-	smp_num_siblings = info.overview_tpc;
-	smp_num_cpucores = info.overview_cpp;
-}
-#endif
-
 static __initdata int nomca;
 static __init int setup_nomca(char *s)
 {
@@ -397,70 +497,105 @@ static __init int setup_nomca(char *s)
 }
 early_param("nomca", setup_nomca);
 
+#ifdef CONFIG_CRASH_DUMP
+int __init reserve_elfcorehdr(u64 *start, u64 *end)
+{
+	u64 length;
+
+	/* We get the address using the kernel command line,
+	 * but the size is extracted from the EFI tables.
+	 * Both address and size are required for reservation
+	 * to work properly.
+	 */
+
+	if (!is_vmcore_usable())
+		return -EINVAL;
+
+	if ((length = vmcore_find_descriptor_size(elfcorehdr_addr)) == 0) {
+		vmcore_unusable();
+		return -EINVAL;
+	}
+
+	*start = (unsigned long)__va(elfcorehdr_addr);
+	*end = *start + length;
+	return 0;
+}
+
+#endif /* CONFIG_PROC_VMCORE */
+
 void __init
 setup_arch (char **cmdline_p)
 {
 	unw_init();
 
+	paravirt_arch_setup_early();
+
 	ia64_patch_vtop((u64) __start___vtop_patchlist, (u64) __end___vtop_patchlist);
+	paravirt_patch_apply();
 
 	*cmdline_p = __va(ia64_boot_param->command_line);
-	strlcpy(saved_command_line, *cmdline_p, COMMAND_LINE_SIZE);
+	strlcpy(boot_command_line, *cmdline_p, COMMAND_LINE_SIZE);
 
 	efi_init();
 	io_port_init();
 
-	parse_early_param();
-
 #ifdef CONFIG_IA64_GENERIC
-	machvec_init(NULL);
+	/* machvec needs to be parsed from the command line
+	 * before parse_early_param() is called to ensure
+	 * that ia64_mv is initialised before any command line
+	 * settings may cause console setup to occur
+	 */
+	machvec_init_from_cmdline(*cmdline_p);
 #endif
 
+	parse_early_param();
+
 	if (early_console_setup(*cmdline_p) == 0)
 		mark_bsp_online();
 
 #ifdef CONFIG_ACPI
 	/* Initialize the ACPI boot-time table parser */
 	acpi_table_init();
+	early_acpi_boot_init();
 # ifdef CONFIG_ACPI_NUMA
 	acpi_numa_init();
-# endif
-#else
-# ifdef CONFIG_SMP
-	smp_build_cpu_map();	/* happens, e.g., with the Ski simulator */
+#  ifdef CONFIG_ACPI_HOTPLUG_CPU
+	prefill_possible_map();
+#  endif
+	per_cpu_scan_finalize((cpus_weight(early_cpu_possible_map) == 0 ?
+		32 : cpus_weight(early_cpu_possible_map)),
+		additional_cpus > 0 ? additional_cpus : 0);
 # endif
 #endif /* CONFIG_APCI_BOOT */
 
+#ifdef CONFIG_SMP
+	smp_build_cpu_map();
+#endif
 	find_memory();
 
 	/* process SAL system table: */
 	ia64_sal_init(__va(efi.sal_systab));
 
-	ia64_setup_printk_clock();
+#ifdef CONFIG_ITANIUM
+	ia64_patch_rse((u64) __start___rse_patchlist, (u64) __end___rse_patchlist);
+#else
+	{
+		unsigned long num_phys_stacked;
+
+		if (ia64_pal_rse_info(&num_phys_stacked, 0) == 0 && num_phys_stacked > 96)
+			ia64_patch_rse((u64) __start___rse_patchlist, (u64) __end___rse_patchlist);
+	}
+#endif
 
 #ifdef CONFIG_SMP
 	cpu_physical_id(0) = hard_smp_processor_id();
-
-	cpu_set(0, cpu_sibling_map[0]);
-	cpu_set(0, cpu_core_map[0]);
-
-	check_for_logical_procs();
-	if (smp_num_cpucores > 1)
-		printk(KERN_INFO
-		       "cpu package is Multi-Core capable: number of cores=%d\n",
-		       smp_num_cpucores);
-	if (smp_num_siblings > 1)
-		printk(KERN_INFO
-		       "cpu package is Multi-Threading capable: number of siblings=%d\n",
-		       smp_num_siblings);
 #endif
 
 	cpu_init();	/* initialize the bootstrap CPU */
 	mmu_context_init();	/* initialize context_id bitmap */
 
-#ifdef CONFIG_ACPI
-	acpi_boot_init();
-#endif
+	paravirt_banner();
+	paravirt_arch_setup_console(cmdline_p);
 
 #ifdef CONFIG_VT
 	if (!conswitchp) {
@@ -481,15 +616,20 @@ setup_arch (char **cmdline_p)
 #endif
 
 	/* enable IA-64 Machine Check Abort Handling unless disabled */
+	if (paravirt_arch_setup_nomca())
+		nomca = 1;
 	if (!nomca)
 		ia64_mca_init();
 
 	platform_setup(cmdline_p);
+#ifndef CONFIG_IA64_HP_SIM
+	check_sal_cache_flush();
+#endif
 	paging_init();
 }
 
 /*
- * Display cpu info for all cpu's.
+ * Display cpu info for all CPUs.
  */
 static int
 show_cpuinfo (struct seq_file *m, void *v)
@@ -509,40 +649,31 @@ show_cpuinfo (struct seq_file *m, void *v)
 		{ 1UL << 1, "spontaneous deferral"},
 		{ 1UL << 2, "16-byte atomic ops" }
 	};
-	char family[32], features[128], *cp, sep;
+	char features[128], *cp, *sep;
 	struct cpuinfo_ia64 *c = v;
 	unsigned long mask;
 	unsigned long proc_freq;
-	int i;
+	int i, size;
 
 	mask = c->features;
 
-	switch (c->family) {
-	      case 0x07:	memcpy(family, "Itanium", 8); break;
-	      case 0x1f:	memcpy(family, "Itanium 2", 10); break;
-	      default:		sprintf(family, "%u", c->family); break;
-	}
-
 	/* build the feature string: */
-	memcpy(features, " standard", 10);
+	memcpy(features, "standard", 9);
 	cp = features;
-	sep = 0;
-	for (i = 0; i < (int) ARRAY_SIZE(feature_bits); ++i) {
+	size = sizeof(features);
+	sep = "";
+	for (i = 0; i < ARRAY_SIZE(feature_bits) && size > 1; ++i) {
 		if (mask & feature_bits[i].mask) {
-			if (sep)
-				*cp++ = sep;
-			sep = ',';
-			*cp++ = ' ';
-			strcpy(cp, feature_bits[i].feature_name);
-			cp += strlen(feature_bits[i].feature_name);
+			cp += snprintf(cp, size, "%s%s", sep,
+				       feature_bits[i].feature_name),
+			sep = ", ";
 			mask &= ~feature_bits[i].mask;
+			size = sizeof(features) - (cp - features);
 		}
 	}
-	if (mask) {
-		/* print unknown features as a hex value: */
-		if (sep)
-			*cp++ = sep;
-		sprintf(cp, " 0x%lx", mask);
+	if (mask && size > 1) {
+		/* print unknown features as a hex value */
+		snprintf(cp, size, "%s0x%lx", sep, mask);
 	}
 
 	proc_freq = cpufreq_quick_get(cpunum);
@@ -553,29 +684,32 @@ show_cpuinfo (struct seq_file *m, void *v)
 		   "processor  : %d\n"
 		   "vendor     : %s\n"
 		   "arch       : IA-64\n"
-		   "family     : %s\n"
+		   "family     : %u\n"
 		   "model      : %u\n"
+		   "model name : %s\n"
 		   "revision   : %u\n"
 		   "archrev    : %u\n"
-		   "features   :%s\n"	/* don't change this---it _is_ right! */
+		   "features   : %s\n"
 		   "cpu number : %lu\n"
 		   "cpu regs   : %u\n"
-		   "cpu MHz    : %lu.%06lu\n"
+		   "cpu MHz    : %lu.%03lu\n"
 		   "itc MHz    : %lu.%06lu\n"
 		   "BogoMIPS   : %lu.%02lu\n",
-		   cpunum, c->vendor, family, c->model, c->revision, c->archrev,
+		   cpunum, c->vendor, c->family, c->model,
+		   c->model_name, c->revision, c->archrev,
 		   features, c->ppn, c->number,
 		   proc_freq / 1000, proc_freq % 1000,
 		   c->itc_freq / 1000000, c->itc_freq % 1000000,
 		   lpj*HZ/500000, (lpj*HZ/5000) % 100);
 #ifdef CONFIG_SMP
 	seq_printf(m, "siblings   : %u\n", cpus_weight(cpu_core_map[cpunum]));
+	if (c->socket_id != -1)
+		seq_printf(m, "physical id: %u\n", c->socket_id);
 	if (c->threads_per_core > 1 || c->cores_per_socket > 1)
 		seq_printf(m,
-		   	   "physical id: %u\n"
-		   	   "core id    : %u\n"
-		   	   "thread id  : %u\n",
-		   	   c->socket_id, c->core_id, c->thread_id);
+			   "core id    : %u\n"
+			   "thread id  : %u\n",
+			   c->core_id, c->thread_id);
 #endif
 	seq_printf(m,"\n");
 
@@ -586,10 +720,10 @@ static void *
 c_start (struct seq_file *m, loff_t *pos)
 {
 #ifdef CONFIG_SMP
-	while (*pos < NR_CPUS && !cpu_isset(*pos, cpu_online_map))
+	while (*pos < nr_cpu_ids && !cpu_online(*pos))
 		++*pos;
 #endif
-	return *pos < NR_CPUS ? cpu_data(*pos) : NULL;
+	return *pos < nr_cpu_ids ? cpu_data(*pos) : NULL;
 }
 
 static void *
@@ -604,14 +738,47 @@ c_stop (struct seq_file *m, void *v)
 {
 }
 
-struct seq_operations cpuinfo_op = {
+const struct seq_operations cpuinfo_op = {
 	.start =	c_start,
 	.next =		c_next,
 	.stop =		c_stop,
 	.show =		show_cpuinfo
 };
 
-static void __cpuinit
+#define MAX_BRANDS	8
+static char brandname[MAX_BRANDS][128];
+
+static char *
+get_model_name(__u8 family, __u8 model)
+{
+	static int overflow;
+	char brand[128];
+	int i;
+
+	memcpy(brand, "Unknown", 8);
+	if (ia64_pal_get_brand_info(brand)) {
+		if (family == 0x7)
+			memcpy(brand, "Merced", 7);
+		else if (family == 0x1f) switch (model) {
+			case 0: memcpy(brand, "McKinley", 9); break;
+			case 1: memcpy(brand, "Madison", 8); break;
+			case 2: memcpy(brand, "Madison up to 9M cache", 23); break;
+		}
+	}
+	for (i = 0; i < MAX_BRANDS; i++)
+		if (strcmp(brandname[i], brand) == 0)
+			return brandname[i];
+	for (i = 0; i < MAX_BRANDS; i++)
+		if (brandname[i][0] == '\0')
+			return strcpy(brandname[i], brand);
+	if (overflow++ == 0)
+		printk(KERN_ERR
+		       "%s: Table overflow. Some processor model information will be missing\n",
+		       __func__);
+	return "Unknown";
+}
+
+static void
 identify_cpu (struct cpuinfo_ia64 *c)
 {
 	union {
@@ -640,7 +807,6 @@ identify_cpu (struct cpuinfo_ia64 *c)
 	pal_status_t status;
 	unsigned long impl_va_msb = 50, phys_addr_size = 44;	/* Itanium defaults */
 	int i;
-
 	for (i = 0; i < 5; ++i)
 		cpuid.bits[i] = ia64_get_cpuid(i);
 
@@ -649,12 +815,15 @@ identify_cpu (struct cpuinfo_ia64 *c)
 	c->cpu = smp_processor_id();
 
 	/* below default values will be overwritten  by identify_siblings() 
-	 * for Multi-Threading/Multi-Core capable cpu's
+	 * for Multi-Threading/Multi-Core capable CPUs
 	 */
 	c->threads_per_core = c->cores_per_socket = c->num_log = 1;
 	c->socket_id = -1;
 
 	identify_siblings(c);
+
+	if (c->threads_per_core > smp_num_siblings)
+		smp_num_siblings = c->threads_per_core;
 #endif
 	c->ppn = cpuid.field.ppn;
 	c->number = cpuid.field.number;
@@ -663,6 +832,7 @@ identify_cpu (struct cpuinfo_ia64 *c)
 	c->family = cpuid.field.family;
 	c->archrev = cpuid.field.archrev;
 	c->features = cpuid.field.features;
+	c->model_name = get_model_name(c->family, c->model);
 
 	status = ia64_pal_vm_summary(&vm1, &vm2);
 	if (status == PAL_STATUS_SUCCESS) {
@@ -673,66 +843,63 @@ identify_cpu (struct cpuinfo_ia64 *c)
 	c->unimpl_pa_mask = ~((1L<<63) | ((1L << phys_addr_size) - 1));
 }
 
-void
-setup_per_cpu_areas (void)
-{
-	/* start_kernel() requires this... */
-#ifdef CONFIG_ACPI_HOTPLUG_CPU
-	prefill_possible_map();
-#endif
-}
-
 /*
- * Calculate the max. cache line size.
+ * Do the following calculations:
  *
- * In addition, the minimum of the i-cache stride sizes is calculated for
- * "flush_icache_range()".
+ * 1. the max. cache line size.
+ * 2. the minimum of the i-cache stride sizes for "flush_icache_range()".
+ * 3. the minimum of the cache stride sizes for "clflush_cache_range()".
  */
-static void __cpuinit
-get_max_cacheline_size (void)
+static void
+get_cache_info(void)
 {
 	unsigned long line_size, max = 1;
-	unsigned int cache_size = 0;
-	u64 l, levels, unique_caches;
-        pal_cache_config_info_t cci;
-        s64 status;
+	unsigned long l, levels, unique_caches;
+	pal_cache_config_info_t cci;
+	long status;
 
         status = ia64_pal_cache_summary(&levels, &unique_caches);
         if (status != 0) {
                 printk(KERN_ERR "%s: ia64_pal_cache_summary() failed (status=%ld)\n",
-                       __FUNCTION__, status);
+                       __func__, status);
                 max = SMP_CACHE_BYTES;
 		/* Safest setup for "flush_icache_range()" */
 		ia64_i_cache_stride_shift = I_CACHE_STRIDE_SHIFT;
+		/* Safest setup for "clflush_cache_range()" */
+		ia64_cache_stride_shift = CACHE_STRIDE_SHIFT;
 		goto out;
         }
 
 	for (l = 0; l < levels; ++l) {
-		status = ia64_pal_cache_config_info(l, /* cache_type (data_or_unified)= */ 2,
-						    &cci);
+		/* cache_type (data_or_unified)=2 */
+		status = ia64_pal_cache_config_info(l, 2, &cci);
 		if (status != 0) {
-			printk(KERN_ERR
-			       "%s: ia64_pal_cache_config_info(l=%lu, 2) failed (status=%ld)\n",
-			       __FUNCTION__, l, status);
+			printk(KERN_ERR "%s: ia64_pal_cache_config_info"
+				"(l=%lu, 2) failed (status=%ld)\n",
+				__func__, l, status);
 			max = SMP_CACHE_BYTES;
 			/* The safest setup for "flush_icache_range()" */
 			cci.pcci_stride = I_CACHE_STRIDE_SHIFT;
+			/* The safest setup for "clflush_cache_range()" */
+			ia64_cache_stride_shift = CACHE_STRIDE_SHIFT;
 			cci.pcci_unified = 1;
+		} else {
+			if (cci.pcci_stride < ia64_cache_stride_shift)
+				ia64_cache_stride_shift = cci.pcci_stride;
+
+			line_size = 1 << cci.pcci_line_size;
+			if (line_size > max)
+				max = line_size;
 		}
-		line_size = 1 << cci.pcci_line_size;
-		if (line_size > max)
-			max = line_size;
-		if (cache_size < cci.pcci_cache_size)
-			cache_size = cci.pcci_cache_size;
+
 		if (!cci.pcci_unified) {
-			status = ia64_pal_cache_config_info(l,
-						    /* cache_type (instruction)= */ 1,
-						    &cci);
+			/* cache_type (instruction)=1*/
+			status = ia64_pal_cache_config_info(l, 1, &cci);
 			if (status != 0) {
-				printk(KERN_ERR
-				"%s: ia64_pal_cache_config_info(l=%lu, 1) failed (status=%ld)\n",
-					__FUNCTION__, l, status);
-				/* The safest setup for "flush_icache_range()" */
+				printk(KERN_ERR "%s: ia64_pal_cache_config_info"
+					"(l=%lu, 1) failed (status=%ld)\n",
+					__func__, l, status);
+				/* The safest setup for flush_icache_range() */
 				cci.pcci_stride = I_CACHE_STRIDE_SHIFT;
 			}
 		}
@@ -740,9 +907,6 @@ get_max_cacheline_size (void)
 			ia64_i_cache_stride_shift = cci.pcci_stride;
 	}
   out:
-#ifdef CONFIG_SMP
-	max_cache_size = max(max_cache_size, cache_size);
-#endif
 	if (max > ia64_max_cacheline_size)
 		ia64_max_cacheline_size = max;
 }
@@ -751,10 +915,11 @@ get_max_cacheline_size (void)
  * cpu_init() initializes state that is per-CPU.  This function acts
  * as a 'CPU state barrier', nothing should get across.
  */
-void __cpuinit
+void
 cpu_init (void)
 {
-	extern void __cpuinit ia64_mmu_init (void *);
+	extern void ia64_mmu_init(void *);
+	static unsigned long max_num_phys_stacked = IA64_NUM_PHYS_STACK_REG;
 	unsigned long num_phys_stacked;
 	pal_vm_info_2_u_t vmi;
 	unsigned int max_ctx;
@@ -762,16 +927,28 @@ cpu_init (void)
 	void *cpu_data;
 
 	cpu_data = per_cpu_init();
-
+#ifdef CONFIG_SMP
 	/*
-	 * We set ar.k3 so that assembly code in MCA handler can compute
-	 * physical addresses of per cpu variables with a simple:
-	 *   phys = ar.k3 + &per_cpu_var
+	 * insert boot cpu into sibling and core mapes
+	 * (must be done after per_cpu area is setup)
 	 */
-	ia64_set_kr(IA64_KR_PER_CPU_DATA,
-		    ia64_tpa(cpu_data) - (long) __per_cpu_start);
+	if (smp_processor_id() == 0) {
+		cpu_set(0, per_cpu(cpu_sibling_map, 0));
+		cpu_set(0, cpu_core_map[0]);
+	} else {
+		/*
+		 * Set ar.k3 so that assembly code in MCA handler can compute
+		 * physical addresses of per cpu variables with a simple:
+		 *   phys = ar.k3 + &per_cpu_var
+		 * and the alt-dtlb-miss handler can set per-cpu mapping into
+		 * the TLB when needed. head.S already did this for cpu0.
+		 */
+		ia64_set_kr(IA64_KR_PER_CPU_DATA,
+			    ia64_tpa(cpu_data) - (long) __per_cpu_start);
+	}
+#endif
 
-	get_max_cacheline_size();
+	get_cache_info();
 
 	/*
 	 * We can't pass "local_cpu_data" to identify_cpu() because we haven't called
@@ -779,7 +956,7 @@ cpu_init (void)
 	 * depends on the data returned by identify_cpu().  We break the dependency by
 	 * accessing cpu_data() through the canonical per-CPU address.
 	 */
-	cpu_info = cpu_data + ((char *) &__ia64_per_cpu_var(cpu_info) - __per_cpu_start);
+	cpu_info = cpu_data + ((char *) &__ia64_per_cpu_var(ia64_cpu_info) - __per_cpu_start);
 	identify_cpu(cpu_info);
 
 #ifdef CONFIG_MCKINLEY
@@ -823,17 +1000,12 @@ cpu_init (void)
 					| IA64_DCR_DA | IA64_DCR_DD | IA64_DCR_LC));
 	atomic_inc(&init_mm.mm_count);
 	current->active_mm = &init_mm;
-	if (current->mm)
-		BUG();
+	BUG_ON(current->mm);
 
 	ia64_mmu_init(ia64_imva(cpu_data));
 	ia64_mca_cpu_init(ia64_imva(cpu_data));
 
-#ifdef CONFIG_IA32_SUPPORT
-	ia32_cpu_init();
-#endif
-
-	/* Clear ITC to eliminiate sched_clock() overflows in human time.  */
+	/* Clear ITC to eliminate sched_clock() overflows in human time.  */
 	ia64_set_itc(0);
 
 	/* disable all local interrupt sources: */
@@ -845,14 +1017,20 @@ cpu_init (void)
 
 	/* clear TPR & XTP to enable all interrupt classes: */
 	ia64_setreg(_IA64_REG_CR_TPR, 0);
+
+	/* Clear any pending interrupts left by SAL/EFI */
+	while (ia64_get_ivr() != IA64_SPURIOUS_INT_VECTOR)
+		ia64_eoi();
+
 #ifdef CONFIG_SMP
 	normal_xtp();
 #endif
 
 	/* set ia64_ctx.max_rid to the maximum RID that is supported by all CPUs: */
-	if (ia64_pal_vm_summary(NULL, &vmi) == 0)
+	if (ia64_pal_vm_summary(NULL, &vmi) == 0) {
 		max_ctx = (1U << (vmi.pal_vm_info_2_s.rid_size - 3)) - 1;
-	else {
+		setup_ptcg_sem(vmi.pal_vm_info_2_s.max_purges, NPTCG_FROM_PAL);
+	} else {
 		printk(KERN_WARNING "cpu_init: PAL VM summary failed, assuming 18 RID bits\n");
 		max_ctx = (1U << 15) - 1;	/* use architected minimum */
 	}
@@ -868,18 +1046,11 @@ cpu_init (void)
 		num_phys_stacked = 96;
 	}
 	/* size of physical stacked register partition plus 8 bytes: */
-	__get_cpu_var(ia64_phys_stacked_size_p8) = num_phys_stacked*8 + 8;
+	if (num_phys_stacked > max_num_phys_stacked) {
+		ia64_patch_phys_stack_reg(num_phys_stacked*8 + 8);
+		max_num_phys_stacked = num_phys_stacked;
+	}
 	platform_cpu_init();
-	pm_idle = default_idle;
-}
-
-/*
- * On SMP systems, when the scheduler does migration-cost autodetection,
- * it needs a way to flush as much of the CPU's caches as possible.
- */
-void sched_cacheflush(void)
-{
-	ia64_sal_cache_flush(3);
 }
 
 void __init
@@ -892,6 +1063,8 @@ check_bugs (void)
 static int __init run_dmi_scan(void)
 {
 	dmi_scan_machine();
+	dmi_memdev_walk();
+	dmi_set_dump_stack_arch_desc();
 	return 0;
 }
 core_initcall(run_dmi_scan);
diff --git a/arch/ia64/kernel/sigframe.h b/arch/ia64/kernel/sigframe.h
index 37b986cb86e..9fd9a1933b3 100644
--- a/arch/ia64/kernel/sigframe.h
+++ b/arch/ia64/kernel/sigframe.h
@@ -22,4 +22,4 @@ struct sigframe {
 	struct sigcontext sc;
 };
 
-extern long ia64_do_signal (sigset_t *, struct sigscratch *, long);
+extern void ia64_do_signal (struct sigscratch *, long);
diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c
index 1d7903ee212..33cab9a8adf 100644
--- a/arch/ia64/kernel/signal.c
+++ b/arch/ia64/kernel/signal.c
@@ -7,22 +7,20 @@
  * Derived from i386 and Alpha versions.
  */
 
-#include <linux/config.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/ptrace.h>
+#include <linux/tracehook.h>
 #include <linux/sched.h>
 #include <linux/signal.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/stddef.h>
 #include <linux/tty.h>
 #include <linux/binfmts.h>
 #include <linux/unistd.h>
 #include <linux/wait.h>
 
-#include <asm/ia32.h>
 #include <asm/intrinsics.h>
 #include <asm/uaccess.h>
 #include <asm/rse.h>
@@ -32,7 +30,6 @@
 
 #define DEBUG_SIG	0
 #define STACK_ALIGN	16		/* minimal alignment for stack pointer */
-#define _BLOCKABLE	(~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
 
 #if _NSIG_WORDS > 1
 # define PUT_SIGSET(k,u)	__copy_to_user((u)->sig, (k)->sig, sizeof(sigset_t))
@@ -42,55 +39,6 @@
 # define GET_SIGSET(k,u)	__get_user((k)->sig[0], &(u)->sig[0])
 #endif
 
-long
-ia64_rt_sigsuspend (sigset_t __user *uset, size_t sigsetsize, struct sigscratch *scr)
-{
-	sigset_t oldset, set;
-
-	/* XXX: Don't preclude handling different sized sigset_t's.  */
-	if (sigsetsize != sizeof(sigset_t))
-		return -EINVAL;
-
-	if (!access_ok(VERIFY_READ, uset, sigsetsize))
-		return -EFAULT;
-
-	if (GET_SIGSET(&set, uset))
-		return -EFAULT;
-
-	sigdelsetmask(&set, ~_BLOCKABLE);
-
-	spin_lock_irq(&current->sighand->siglock);
-	{
-		oldset = current->blocked;
-		current->blocked = set;
-		recalc_sigpending();
-	}
-	spin_unlock_irq(&current->sighand->siglock);
-
-	/*
-	 * The return below usually returns to the signal handler.  We need to
-	 * pre-set the correct error code here to ensure that the right values
-	 * get saved in sigcontext by ia64_do_signal.
-	 */
-	scr->pt.r8 = EINTR;
-	scr->pt.r10 = -1;
-
-	while (1) {
-		current->state = TASK_INTERRUPTIBLE;
-		schedule();
-		if (ia64_do_signal(&oldset, scr, 1))
-			return -EINTR;
-	}
-}
-
-asmlinkage long
-sys_sigaltstack (const stack_t __user *uss, stack_t __user *uoss, long arg2,
-		 long arg3, long arg4, long arg5, long arg6, long arg7,
-		 struct pt_regs regs)
-{
-	return do_sigaltstack(uss, uoss, regs.r12);
-}
-
 static long
 restore_sigcontext (struct sigcontext __user *sc, struct sigscratch *scr)
 {
@@ -141,7 +89,7 @@ restore_sigcontext (struct sigcontext __user *sc, struct sigscratch *scr)
 	if ((flags & IA64_SC_FLAG_FPH_VALID) != 0) {
 		struct ia64_psr *psr = ia64_psr(&scr->pt);
 
-		__copy_from_user(current->thread.fph, &sc->sc_fr[32], 96*16);
+		err |= __copy_from_user(current->thread.fph, &sc->sc_fr[32], 96*16);
 		psr->mfh = 0;	/* drop signal handler's fph contents... */
 		preempt_disable();
 		if (psr->dfh)
@@ -157,7 +105,7 @@ restore_sigcontext (struct sigcontext __user *sc, struct sigscratch *scr)
 }
 
 int
-copy_siginfo_to_user (siginfo_t __user *to, siginfo_t *from)
+copy_siginfo_to_user (siginfo_t __user *to, const siginfo_t *from)
 {
 	if (!access_ok(VERIFY_WRITE, to, sizeof(siginfo_t)))
 		return -EFAULT;
@@ -243,14 +191,7 @@ ia64_rt_sigreturn (struct sigscratch *scr)
 	if (GET_SIGSET(&set, &sc->sc_mask))
 		goto give_sigsegv;
 
-	sigdelsetmask(&set, ~_BLOCKABLE);
-
-	spin_lock_irq(&current->sighand->siglock);
-	{
-		current->blocked = set;
-		recalc_sigpending();
-	}
-	spin_unlock_irq(&current->sighand->siglock);
+	set_current_blocked(&set);
 
 	if (restore_sigcontext(sc, scr))
 		goto give_sigsegv;
@@ -259,19 +200,16 @@ ia64_rt_sigreturn (struct sigscratch *scr)
 	printk("SIG return (%s:%d): sp=%lx ip=%lx\n",
 	       current->comm, current->pid, scr->pt.r12, scr->pt.cr_iip);
 #endif
-	/*
-	 * It is more difficult to avoid calling this function than to
-	 * call it and ignore errors.
-	 */
-	do_sigaltstack(&sc->sc_stack, NULL, scr->pt.r12);
+	if (restore_altstack(&sc->sc_stack))
+		goto give_sigsegv;
 	return retval;
 
   give_sigsegv:
 	si.si_signo = SIGSEGV;
 	si.si_errno = 0;
 	si.si_code = SI_KERNEL;
-	si.si_pid = current->pid;
-	si.si_uid = current->uid;
+	si.si_pid = task_pid_vnr(current);
+	si.si_uid = from_kuid_munged(current_user_ns(), current_uid());
 	si.si_addr = sc;
 	force_sig_info(SIGSEGV, &si, current);
 	return retval;
@@ -287,7 +225,7 @@ static long
 setup_sigcontext (struct sigcontext __user *sc, sigset_t *mask, struct sigscratch *scr)
 {
 	unsigned long flags = 0, ifs, cfm, nat;
-	long err;
+	long err = 0;
 
 	ifs = scr->pt.cr_ifs;
 
@@ -300,12 +238,12 @@ setup_sigcontext (struct sigcontext __user *sc, sigset_t *mask, struct sigscratc
 	ia64_flush_fph(current);
 	if ((current->thread.flags & IA64_THREAD_FPH_VALID)) {
 		flags |= IA64_SC_FLAG_FPH_VALID;
-		__copy_to_user(&sc->sc_fr[32], current->thread.fph, 96*16);
+		err = __copy_to_user(&sc->sc_fr[32], current->thread.fph, 96*16);
 	}
 
 	nat = ia64_get_scratch_nat_bits(&scr->pt, scr->scratch_unat);
 
-	err  = __put_user(flags, &sc->sc_flags);
+	err |= __put_user(flags, &sc->sc_flags);
 	err |= __put_user(nat, &sc->sc_nat);
 	err |= PUT_SIGSET(mask, &sc->sc_mask);
 	err |= __put_user(cfm, &sc->sc_cfm);
@@ -323,15 +261,7 @@ setup_sigcontext (struct sigcontext __user *sc, sigset_t *mask, struct sigscratc
 	err |= __copy_to_user(&sc->sc_gr[15], &scr->pt.r15, 8);		/* r15 */
 	err |= __put_user(scr->pt.cr_iip + ia64_psr(&scr->pt)->ri, &sc->sc_ip);
 
-	if (flags & IA64_SC_FLAG_IN_SYSCALL) {
-		/* Clear scratch registers if the signal interrupted a system call. */
-		err |= __put_user(0, &sc->sc_ar_ccv);				/* ar.ccv */
-		err |= __put_user(0, &sc->sc_br[7]);				/* b7 */
-		err |= __put_user(0, &sc->sc_gr[14]);				/* r14 */
-		err |= __clear_user(&sc->sc_ar25, 2*8);			/* ar.csd & ar.ssd */
-		err |= __clear_user(&sc->sc_gr[2], 2*8);			/* r2-r3 */
-		err |= __clear_user(&sc->sc_gr[16], 16*8);			/* r16-r31 */
-	} else {
+	if (!(flags & IA64_SC_FLAG_IN_SYSCALL)) {
 		/* Copy scratch regs to sigcontext if the signal didn't interrupt a syscall. */
 		err |= __put_user(scr->pt.ar_ccv, &sc->sc_ar_ccv);		/* ar.ccv */
 		err |= __put_user(scr->pt.b7, &sc->sc_br[7]);			/* b7 */
@@ -375,8 +305,8 @@ force_sigsegv_info (int sig, void __user *addr)
 	si.si_signo = SIGSEGV;
 	si.si_errno = 0;
 	si.si_code = SI_KERNEL;
-	si.si_pid = current->pid;
-	si.si_uid = current->uid;
+	si.si_pid = task_pid_vnr(current);
+	si.si_uid = from_kuid_munged(current_user_ns(), current_uid());
 	si.si_addr = addr;
 	force_sig_info(SIGSEGV, &si, current);
 	return 0;
@@ -393,15 +323,33 @@ setup_frame (int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set,
 
 	new_sp = scr->pt.r12;
 	tramp_addr = (unsigned long) __kernel_sigtramp;
-	if ((ka->sa.sa_flags & SA_ONSTACK) && sas_ss_flags(new_sp) == 0) {
-		new_sp = current->sas_ss_sp + current->sas_ss_size;
-		/*
-		 * We need to check for the register stack being on the signal stack
-		 * separately, because it's switched separately (memory stack is switched
-		 * in the kernel, register stack is switched in the signal trampoline).
-		 */
-		if (!rbs_on_sig_stack(scr->pt.ar_bspstore))
-			new_rbs = (current->sas_ss_sp + sizeof(long) - 1) & ~(sizeof(long) - 1);
+	if (ka->sa.sa_flags & SA_ONSTACK) {
+		int onstack = sas_ss_flags(new_sp);
+
+		if (onstack == 0) {
+			new_sp = current->sas_ss_sp + current->sas_ss_size;
+			/*
+			 * We need to check for the register stack being on the
+			 * signal stack separately, because it's switched
+			 * separately (memory stack is switched in the kernel,
+			 * register stack is switched in the signal trampoline).
+			 */
+			if (!rbs_on_sig_stack(scr->pt.ar_bspstore))
+				new_rbs = ALIGN(current->sas_ss_sp,
+						sizeof(long));
+		} else if (onstack == SS_ONSTACK) {
+			unsigned long check_sp;
+
+			/*
+			 * If we are on the alternate signal stack and would
+			 * overflow it, don't. Return an always-bogus address
+			 * instead so we will die with SIGSEGV.
+			 */
+			check_sp = (new_sp - sizeof(*frame)) & -STACK_ALIGN;
+			if (!likely(on_sig_stack(check_sp)))
+				return force_sigsegv_info(sig, (void __user *)
+							  check_sp);
+		}
 	}
 	frame = (void __user *) ((new_sp - sizeof(*frame)) & -STACK_ALIGN);
 
@@ -417,9 +365,7 @@ setup_frame (int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set,
 
 	err |= copy_siginfo_to_user(&frame->info, info);
 
-	err |= __put_user(current->sas_ss_sp, &frame->sc.sc_stack.ss_sp);
-	err |= __put_user(current->sas_ss_size, &frame->sc.sc_stack.ss_size);
-	err |= __put_user(sas_ss_flags(scr->pt.r12), &frame->sc.sc_stack.ss_flags);
+	err |= __save_altstack(&frame->sc.sc_stack, scr->pt.r12);
 	err |= setup_sigcontext(&frame->sc, set, scr);
 
 	if (unlikely(err))
@@ -454,24 +400,15 @@ setup_frame (int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set,
 }
 
 static long
-handle_signal (unsigned long sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *oldset,
+handle_signal (unsigned long sig, struct k_sigaction *ka, siginfo_t *info,
 	       struct sigscratch *scr)
 {
-	if (IS_IA32_PROCESS(&scr->pt)) {
-		/* send signal to IA-32 process */
-		if (!ia32_setup_frame1(sig, ka, info, oldset, &scr->pt))
-			return 0;
-	} else
-		/* send signal to IA-64 process */
-		if (!setup_frame(sig, ka, info, oldset, scr))
-			return 0;
-
-	spin_lock_irq(&current->sighand->siglock);
-	sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
-	if (!(ka->sa.sa_flags & SA_NODEFER))
-		sigaddset(&current->blocked, sig);
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
+	if (!setup_frame(sig, ka, info, sigmask_to_save(), scr))
+		return 0;
+
+	signal_delivered(sig, info, ka, &scr->pt,
+				 test_thread_flag(TIF_SINGLESTEP));
+
 	return 1;
 }
 
@@ -479,25 +416,13 @@ handle_signal (unsigned long sig, struct k_sigaction *ka, siginfo_t *info, sigse
  * Note that `init' is a special process: it doesn't get signals it doesn't want to
  * handle.  Thus you cannot kill init even with a SIGKILL even by mistake.
  */
-long
-ia64_do_signal (sigset_t *oldset, struct sigscratch *scr, long in_syscall)
+void
+ia64_do_signal (struct sigscratch *scr, long in_syscall)
 {
 	struct k_sigaction ka;
 	siginfo_t info;
 	long restart = in_syscall;
 	long errno = scr->pt.r8;
-#	define ERR_CODE(c)	(IS_IA32_PROCESS(&scr->pt) ? -(c) : (c))
-
-	/*
-	 * In the ia64_leave_kernel code path, we want the common case to go fast, which
-	 * is why we may in certain cases get here from kernel mode. Just return without
-	 * doing anything if so.
-	 */
-	if (!user_mode(&scr->pt))
-		return 0;
-
-	if (!oldset)
-		oldset = &current->blocked;
 
 	/*
 	 * This only loops in the rare cases of handle_signal() failing, in which case we
@@ -512,14 +437,7 @@ ia64_do_signal (sigset_t *oldset, struct sigscratch *scr, long in_syscall)
 		 * inferior call), thus it's important to check for restarting _after_
 		 * get_signal_to_deliver().
 		 */
-		if (IS_IA32_PROCESS(&scr->pt)) {
-			if (in_syscall) {
-				if (errno >= 0)
-					restart = 0;
-				else
-					errno = -errno;
-			}
-		} else if ((long) scr->pt.r10 != -1)
+		if ((long) scr->pt.r10 != -1)
 			/*
 			 * A system calls has to be restarted only if one of the error codes
 			 * ERESTARTNOHAND, ERESTARTSYS, or ERESTARTNOINTR is returned.  If r10
@@ -535,22 +453,18 @@ ia64_do_signal (sigset_t *oldset, struct sigscratch *scr, long in_syscall)
 			switch (errno) {
 			      case ERESTART_RESTARTBLOCK:
 			      case ERESTARTNOHAND:
-				scr->pt.r8 = ERR_CODE(EINTR);
+				scr->pt.r8 = EINTR;
 				/* note: scr->pt.r10 is already -1 */
 				break;
 
 			      case ERESTARTSYS:
 				if ((ka.sa.sa_flags & SA_RESTART) == 0) {
-					scr->pt.r8 = ERR_CODE(EINTR);
+					scr->pt.r8 = EINTR;
 					/* note: scr->pt.r10 is already -1 */
 					break;
 				}
 			      case ERESTARTNOINTR:
-				if (IS_IA32_PROCESS(&scr->pt)) {
-					scr->pt.r8 = scr->pt.r1;
-					scr->pt.cr_iip -= 2;
-				} else
-					ia64_decrement_ip(&scr->pt);
+				ia64_decrement_ip(&scr->pt);
 				restart = 0; /* don't restart twice if handle_signal() fails... */
 			}
 		}
@@ -559,8 +473,8 @@ ia64_do_signal (sigset_t *oldset, struct sigscratch *scr, long in_syscall)
 		 * Whee!  Actually deliver the signal.  If the delivery failed, we need to
 		 * continue to iterate in this loop so we can deliver the SIGSEGV...
 		 */
-		if (handle_signal(signr, &ka, &info, oldset, scr))
-			return 1;
+		if (handle_signal(signr, &ka, &info, scr))
+			return;
 	}
 
 	/* Did we come from a system call? */
@@ -569,22 +483,18 @@ ia64_do_signal (sigset_t *oldset, struct sigscratch *scr, long in_syscall)
 		if (errno == ERESTARTNOHAND || errno == ERESTARTSYS || errno == ERESTARTNOINTR
 		    || errno == ERESTART_RESTARTBLOCK)
 		{
-			if (IS_IA32_PROCESS(&scr->pt)) {
-				scr->pt.r8 = scr->pt.r1;
-				scr->pt.cr_iip -= 2;
-				if (errno == ERESTART_RESTARTBLOCK)
-					scr->pt.r8 = 0;	/* x86 version of __NR_restart_syscall */
-			} else {
-				/*
-				 * Note: the syscall number is in r15 which is saved in
-				 * pt_regs so all we need to do here is adjust ip so that
-				 * the "break" instruction gets re-executed.
-				 */
-				ia64_decrement_ip(&scr->pt);
-				if (errno == ERESTART_RESTARTBLOCK)
-					scr->pt.r15 = __NR_restart_syscall;
-			}
+			/*
+			 * Note: the syscall number is in r15 which is saved in
+			 * pt_regs so all we need to do here is adjust ip so that
+			 * the "break" instruction gets re-executed.
+			 */
+			ia64_decrement_ip(&scr->pt);
+			if (errno == ERESTART_RESTARTBLOCK)
+				scr->pt.r15 = __NR_restart_syscall;
 		}
 	}
-	return 0;
+
+	/* if there's no signal to deliver, we just put the saved sigmask
+	 * back */
+	restore_saved_sigmask();
 }
diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c
index 657ac99a451..9fcd4e63048 100644
--- a/arch/ia64/kernel/smp.c
+++ b/arch/ia64/kernel/smp.c
@@ -30,8 +30,9 @@
 #include <linux/delay.h>
 #include <linux/efi.h>
 #include <linux/bitops.h>
+#include <linux/kexec.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/current.h>
 #include <asm/delay.h>
 #include <asm/machvec.h>
@@ -43,54 +44,39 @@
 #include <asm/processor.h>
 #include <asm/ptrace.h>
 #include <asm/sal.h>
-#include <asm/system.h>
 #include <asm/tlbflush.h>
 #include <asm/unistd.h>
 #include <asm/mca.h>
 
 /*
- * Structure and data for smp_call_function(). This is designed to minimise static memory
- * requirements. It also looks cleaner.
+ * Note: alignment of 4 entries/cacheline was empirically determined
+ * to be a good tradeoff between hot cachelines & spreading the array
+ * across too many cacheline.
  */
-static  __cacheline_aligned DEFINE_SPINLOCK(call_lock);
+static struct local_tlb_flush_counts {
+	unsigned int count;
+} __attribute__((__aligned__(32))) local_tlb_flush_counts[NR_CPUS];
 
-struct call_data_struct {
-	void (*func) (void *info);
-	void *info;
-	long wait;
-	atomic_t started;
-	atomic_t finished;
-};
-
-static volatile struct call_data_struct *call_data;
+static DEFINE_PER_CPU_SHARED_ALIGNED(unsigned short [NR_CPUS],
+				     shadow_flush_counts);
 
 #define IPI_CALL_FUNC		0
 #define IPI_CPU_STOP		1
+#define IPI_CALL_FUNC_SINGLE	2
+#define IPI_KDUMP_CPU_STOP	3
 
 /* This needs to be cacheline aligned because it is written to by *other* CPUs.  */
-static DEFINE_PER_CPU(u64, ipi_operation) ____cacheline_aligned;
+static DEFINE_PER_CPU_SHARED_ALIGNED(unsigned long, ipi_operation);
 
 extern void cpu_halt (void);
 
-void
-lock_ipi_calllock(void)
-{
-	spin_lock_irq(&call_lock);
-}
-
-void
-unlock_ipi_calllock(void)
-{
-	spin_unlock_irq(&call_lock);
-}
-
 static void
-stop_this_cpu (void)
+stop_this_cpu(void)
 {
 	/*
 	 * Remove this CPU:
 	 */
-	cpu_clear(smp_processor_id(), cpu_online_map);
+	set_cpu_online(smp_processor_id(), false);
 	max_xtp();
 	local_irq_disable();
 	cpu_halt();
@@ -108,7 +94,7 @@ cpu_die(void)
 }
 
 irqreturn_t
-handle_IPI (int irq, void *dev_id, struct pt_regs *regs)
+handle_IPI (int irq, void *dev_id)
 {
 	int this_cpu = get_cpu();
 	unsigned long *pending_ipis = &__ia64_per_cpu_var(ipi_operation);
@@ -124,40 +110,23 @@ handle_IPI (int irq, void *dev_id, struct pt_regs *regs)
 			ops &= ~(1 << which);
 
 			switch (which) {
-			      case IPI_CALL_FUNC:
-			      {
-				      struct call_data_struct *data;
-				      void (*func)(void *info);
-				      void *info;
-				      int wait;
-
-				      /* release the 'pointer lock' */
-				      data = (struct call_data_struct *) call_data;
-				      func = data->func;
-				      info = data->info;
-				      wait = data->wait;
-
-				      mb();
-				      atomic_inc(&data->started);
-				      /*
-				       * At this point the structure may be gone unless
-				       * wait is true.
-				       */
-				      (*func)(info);
-
-				      /* Notify the sending CPU that the task is done.  */
-				      mb();
-				      if (wait)
-					      atomic_inc(&data->finished);
-			      }
-			      break;
-
-			      case IPI_CPU_STOP:
+			case IPI_CPU_STOP:
 				stop_this_cpu();
 				break;
-
-			      default:
-				printk(KERN_CRIT "Unknown IPI on CPU %d: %lu\n", this_cpu, which);
+			case IPI_CALL_FUNC:
+				generic_smp_call_function_interrupt();
+				break;
+			case IPI_CALL_FUNC_SINGLE:
+				generic_smp_call_function_single_interrupt();
+				break;
+#ifdef CONFIG_KEXEC
+			case IPI_KDUMP_CPU_STOP:
+				unw_init_running(kdump_cpu_freeze, NULL);
+				break;
+#endif
+			default:
+				printk(KERN_CRIT "Unknown IPI on CPU %d: %lu\n",
+						this_cpu, which);
 				break;
 			}
 		} while (ops);
@@ -167,8 +136,10 @@ handle_IPI (int irq, void *dev_id, struct pt_regs *regs)
 	return IRQ_HANDLED;
 }
 
+
+
 /*
- * Called with preeemption disabled.
+ * Called with preemption disabled.
  */
 static inline void
 send_IPI_single (int dest_cpu, int op)
@@ -178,7 +149,7 @@ send_IPI_single (int dest_cpu, int op)
 }
 
 /*
- * Called with preeemption disabled.
+ * Called with preemption disabled.
  */
 static inline void
 send_IPI_allbutself (int op)
@@ -192,7 +163,20 @@ send_IPI_allbutself (int op)
 }
 
 /*
- * Called with preeemption disabled.
+ * Called with preemption disabled.
+ */
+static inline void
+send_IPI_mask(const struct cpumask *mask, int op)
+{
+	unsigned int cpu;
+
+	for_each_cpu(cpu, mask) {
+			send_IPI_single(cpu, op);
+	}
+}
+
+/*
+ * Called with preemption disabled.
  */
 static inline void
 send_IPI_all (int op)
@@ -205,7 +189,7 @@ send_IPI_all (int op)
 }
 
 /*
- * Called with preeemption disabled.
+ * Called with preemption disabled.
  */
 static inline void
 send_IPI_self (int op)
@@ -213,155 +197,134 @@ send_IPI_self (int op)
 	send_IPI_single(smp_processor_id(), op);
 }
 
+#ifdef CONFIG_KEXEC
+void
+kdump_smp_send_stop(void)
+{
+ 	send_IPI_allbutself(IPI_KDUMP_CPU_STOP);
+}
+
+void
+kdump_smp_send_init(void)
+{
+	unsigned int cpu, self_cpu;
+	self_cpu = smp_processor_id();
+	for_each_online_cpu(cpu) {
+		if (cpu != self_cpu) {
+			if(kdump_status[cpu] == 0)
+				platform_send_ipi(cpu, 0, IA64_IPI_DM_INIT, 0);
+		}
+	}
+}
+#endif
 /*
- * Called with preeemption disabled.
+ * Called with preemption disabled.
  */
 void
 smp_send_reschedule (int cpu)
 {
 	platform_send_ipi(cpu, IA64_IPI_RESCHEDULE, IA64_IPI_DM_INT, 0);
 }
+EXPORT_SYMBOL_GPL(smp_send_reschedule);
 
-void
-smp_flush_tlb_all (void)
+/*
+ * Called with preemption disabled.
+ */
+static void
+smp_send_local_flush_tlb (int cpu)
 {
-	on_each_cpu((void (*)(void *))local_flush_tlb_all, NULL, 1, 1);
+	platform_send_ipi(cpu, IA64_IPI_LOCAL_TLB_FLUSH, IA64_IPI_DM_INT, 0);
 }
 
 void
-smp_flush_tlb_mm (struct mm_struct *mm)
+smp_local_flush_tlb(void)
 {
-	preempt_disable();
-	/* this happens for the common case of a single-threaded fork():  */
-	if (likely(mm == current->active_mm && atomic_read(&mm->mm_users) == 1))
-	{
-		local_finish_flush_tlb_mm(mm);
-		preempt_enable();
-		return;
-	}
-
-	preempt_enable();
 	/*
-	 * We could optimize this further by using mm->cpu_vm_mask to track which CPUs
-	 * have been running in the address space.  It's not clear that this is worth the
-	 * trouble though: to avoid races, we have to raise the IPI on the target CPU
-	 * anyhow, and once a CPU is interrupted, the cost of local_flush_tlb_all() is
-	 * rather trivial.
+	 * Use atomic ops. Otherwise, the load/increment/store sequence from
+	 * a "++" operation can have the line stolen between the load & store.
+	 * The overhead of the atomic op in negligible in this case & offers
+	 * significant benefit for the brief periods where lots of cpus
+	 * are simultaneously flushing TLBs.
 	 */
-	on_each_cpu((void (*)(void *))local_finish_flush_tlb_mm, mm, 1, 1);
+	ia64_fetchadd(1, &local_tlb_flush_counts[smp_processor_id()].count, acq);
+	local_flush_tlb_all();
 }
 
-/*
- * Run a function on another CPU
- *  <func>	The function to run. This must be fast and non-blocking.
- *  <info>	An arbitrary pointer to pass to the function.
- *  <nonatomic>	Currently unused.
- *  <wait>	If true, wait until function has completed on other CPUs.
- *  [RETURNS]   0 on success, else a negative status code.
- *
- * Does not return until the remote CPU is nearly ready to execute <func>
- * or is or has executed.
- */
+#define FLUSH_DELAY	5 /* Usec backoff to eliminate excessive cacheline bouncing */
 
-int
-smp_call_function_single (int cpuid, void (*func) (void *info), void *info, int nonatomic,
-			  int wait)
+void
+smp_flush_tlb_cpumask(cpumask_t xcpumask)
 {
-	struct call_data_struct data;
-	int cpus = 1;
-	int me = get_cpu(); /* prevent preemption and reschedule on another processor */
-
-	if (cpuid == me) {
-		printk(KERN_INFO "%s: trying to call self\n", __FUNCTION__);
-		put_cpu();
-		return -EBUSY;
-	}
+	unsigned short *counts = __ia64_per_cpu_var(shadow_flush_counts);
+	cpumask_t cpumask = xcpumask;
+	int mycpu, cpu, flush_mycpu = 0;
 
-	data.func = func;
-	data.info = info;
-	atomic_set(&data.started, 0);
-	data.wait = wait;
-	if (wait)
-		atomic_set(&data.finished, 0);
+	preempt_disable();
+	mycpu = smp_processor_id();
 
-	spin_lock_bh(&call_lock);
+	for_each_cpu_mask(cpu, cpumask)
+		counts[cpu] = local_tlb_flush_counts[cpu].count & 0xffff;
 
-	call_data = &data;
-	mb();	/* ensure store to call_data precedes setting of IPI_CALL_FUNC */
-  	send_IPI_single(cpuid, IPI_CALL_FUNC);
+	mb();
+	for_each_cpu_mask(cpu, cpumask) {
+		if (cpu == mycpu)
+			flush_mycpu = 1;
+		else
+			smp_send_local_flush_tlb(cpu);
+	}
 
-	/* Wait for response */
-	while (atomic_read(&data.started) != cpus)
-		cpu_relax();
+	if (flush_mycpu)
+		smp_local_flush_tlb();
 
-	if (wait)
-		while (atomic_read(&data.finished) != cpus)
-			cpu_relax();
-	call_data = NULL;
+	for_each_cpu_mask(cpu, cpumask)
+		while(counts[cpu] == (local_tlb_flush_counts[cpu].count & 0xffff))
+			udelay(FLUSH_DELAY);
 
-	spin_unlock_bh(&call_lock);
-	put_cpu();
-	return 0;
+	preempt_enable();
 }
-EXPORT_SYMBOL(smp_call_function_single);
 
-/*
- * this function sends a 'generic call function' IPI to all other CPUs
- * in the system.
- */
-
-/*
- *  [SUMMARY]	Run a function on all other CPUs.
- *  <func>	The function to run. This must be fast and non-blocking.
- *  <info>	An arbitrary pointer to pass to the function.
- *  <nonatomic>	currently unused.
- *  <wait>	If true, wait (atomically) until function has completed on other CPUs.
- *  [RETURNS]   0 on success, else a negative status code.
- *
- * Does not return until remote CPUs are nearly ready to execute <func> or are or have
- * executed.
- *
- * You must not call this function with disabled interrupts or from a
- * hardware interrupt handler or from a bottom half handler.
- */
-int
-smp_call_function (void (*func) (void *info), void *info, int nonatomic, int wait)
+void
+smp_flush_tlb_all (void)
 {
-	struct call_data_struct data;
-	int cpus = num_online_cpus()-1;
-
-	if (!cpus)
-		return 0;
-
-	/* Can deadlock when called with interrupts disabled */
-	WARN_ON(irqs_disabled());
-
-	data.func = func;
-	data.info = info;
-	atomic_set(&data.started, 0);
-	data.wait = wait;
-	if (wait)
-		atomic_set(&data.finished, 0);
-
-	spin_lock(&call_lock);
-
-	call_data = &data;
-	mb();	/* ensure store to call_data precedes setting of IPI_CALL_FUNC */
-	send_IPI_allbutself(IPI_CALL_FUNC);
+	on_each_cpu((void (*)(void *))local_flush_tlb_all, NULL, 1);
+}
 
-	/* Wait for response */
-	while (atomic_read(&data.started) != cpus)
-		cpu_relax();
+void
+smp_flush_tlb_mm (struct mm_struct *mm)
+{
+	cpumask_var_t cpus;
+	preempt_disable();
+	/* this happens for the common case of a single-threaded fork():  */
+	if (likely(mm == current->active_mm && atomic_read(&mm->mm_users) == 1))
+	{
+		local_finish_flush_tlb_mm(mm);
+		preempt_enable();
+		return;
+	}
+	if (!alloc_cpumask_var(&cpus, GFP_ATOMIC)) {
+		smp_call_function((void (*)(void *))local_finish_flush_tlb_mm,
+			mm, 1);
+	} else {
+		cpumask_copy(cpus, mm_cpumask(mm));
+		smp_call_function_many(cpus,
+			(void (*)(void *))local_finish_flush_tlb_mm, mm, 1);
+		free_cpumask_var(cpus);
+	}
+	local_irq_disable();
+	local_finish_flush_tlb_mm(mm);
+	local_irq_enable();
+	preempt_enable();
+}
 
-	if (wait)
-		while (atomic_read(&data.finished) != cpus)
-			cpu_relax();
-	call_data = NULL;
+void arch_send_call_function_single_ipi(int cpu)
+{
+	send_IPI_single(cpu, IPI_CALL_FUNC_SINGLE);
+}
 
-	spin_unlock(&call_lock);
-	return 0;
+void arch_send_call_function_ipi_mask(const struct cpumask *mask)
+{
+	send_IPI_mask(mask, IPI_CALL_FUNC);
 }
-EXPORT_SYMBOL(smp_call_function);
 
 /*
  * this function calls the 'stop' function on all other CPUs in the system.
@@ -372,7 +335,7 @@ smp_send_stop (void)
 	send_IPI_allbutself(IPI_CPU_STOP);
 }
 
-int __init
+int
 setup_profiling_timer (unsigned int multiplier)
 {
 	return -EINVAL;
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index 44e9547878a..547a48d78bd 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -21,7 +21,6 @@
  * 05/01/30 Suresh Siddha <suresh.b.siddha@intel.com>
  *						Setup cpu_sibling_map and cpu_core_map
  */
-#include <linux/config.h>
 
 #include <linux/module.h>
 #include <linux/acpi.h>
@@ -36,30 +35,29 @@
 #include <linux/mm.h>
 #include <linux/notifier.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/spinlock.h>
 #include <linux/efi.h>
 #include <linux/percpu.h>
 #include <linux/bitops.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/cache.h>
 #include <asm/current.h>
 #include <asm/delay.h>
-#include <asm/ia32.h>
 #include <asm/io.h>
 #include <asm/irq.h>
 #include <asm/machvec.h>
 #include <asm/mca.h>
 #include <asm/page.h>
+#include <asm/paravirt.h>
 #include <asm/pgalloc.h>
 #include <asm/pgtable.h>
 #include <asm/processor.h>
 #include <asm/ptrace.h>
 #include <asm/sal.h>
-#include <asm/system.h>
 #include <asm/tlbflush.h>
 #include <asm/unistd.h>
+#include <asm/sn/arch.h>
 
 #define SMP_DEBUG 0
 
@@ -77,13 +75,6 @@
 #endif
 
 /*
- * Store all idle threads, this can be reused instead of creating
- * a new thread. Also avoids complicated thread destroy functionality
- * for idle threads.
- */
-struct task_struct *idle_thread_array[NR_CPUS];
-
-/*
  * Global array allocated for NR_CPUS at boot time
  */
 struct sal_to_os_boot sal_boot_rendez_state[NR_CPUS];
@@ -96,13 +87,7 @@ struct sal_to_os_boot *sal_state_for_booting_cpu = &sal_boot_rendez_state[0];
 
 #define set_brendez_area(x) (sal_state_for_booting_cpu = &sal_boot_rendez_state[(x)]);
 
-#define get_idle_for_cpu(x)		(idle_thread_array[(x)])
-#define set_idle_for_cpu(x,p)	(idle_thread_array[(x)] = (p))
-
 #else
-
-#define get_idle_for_cpu(x)		(NULL)
-#define set_idle_for_cpu(x,p)
 #define set_brendez_area(x)
 #endif
 
@@ -121,27 +106,22 @@ static volatile unsigned long go[SLAVE + 1];
 
 #define DEBUG_ITC_SYNC	0
 
-extern void __devinit calibrate_delay (void);
 extern void start_ap (void);
 extern unsigned long ia64_iobase;
 
-task_t *task_for_booting_cpu;
+struct task_struct *task_for_booting_cpu;
 
 /*
  * State for each CPU
  */
 DEFINE_PER_CPU(int, cpu_state);
 
-/* Bitmasks of currently online, and possible CPUs */
-cpumask_t cpu_online_map;
-EXPORT_SYMBOL(cpu_online_map);
-cpumask_t cpu_possible_map = CPU_MASK_NONE;
-EXPORT_SYMBOL(cpu_possible_map);
-
 cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
-cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
+EXPORT_SYMBOL(cpu_core_map);
+DEFINE_PER_CPU_SHARED_ALIGNED(cpumask_t, cpu_sibling_map);
+EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
+
 int smp_num_siblings = 1;
-int smp_num_cpucores = 1;
 
 /* which logical CPU number maps to which CPU (physical APIC ID) */
 volatile int ia64_cpu_to_sapicid[NR_CPUS];
@@ -318,7 +298,7 @@ ia64_sync_itc (unsigned int master)
 
 	go[MASTER] = 1;
 
-	if (smp_call_function_single(master, sync_master, NULL, 1, 0) < 0) {
+	if (smp_call_function_single(master, sync_master, NULL, 0) < 0) {
 		printk(KERN_ERR "sync_itc: failed to get attention of CPU %u!\n", master);
 		return;
 	}
@@ -367,15 +347,15 @@ ia64_sync_itc (unsigned int master)
 /*
  * Ideally sets up per-cpu profiling hooks.  Doesn't do much now...
  */
-static inline void __devinit
-smp_setup_percpu_timer (void)
+static inline void smp_setup_percpu_timer(void)
 {
 }
 
-static void __devinit
+static void
 smp_callin (void)
 {
 	int cpuid, phys_id, itc_master;
+	struct cpuinfo_ia64 *last_cpuinfo, *this_cpuinfo;
 	extern void ia64_init_itm(void);
 	extern volatile int time_keeper_id;
 
@@ -395,10 +375,19 @@ smp_callin (void)
 
 	fix_b0_for_bsp();
 
-	lock_ipi_calllock();
-	cpu_set(cpuid, cpu_online_map);
-	unlock_ipi_calllock();
+	/*
+	 * numa_node_id() works after this.
+	 */
+	set_numa_node(cpu_to_node_map[cpuid]);
+	set_numa_mem(local_memory_node(cpu_to_node_map[cpuid]));
+
+	spin_lock(&vector_lock);
+	/* Setup the per cpu irq handling data structures */
+	__setup_vector_irq(cpuid);
+	notify_cpu_starting(cpuid);
+	set_cpu_online(cpuid, true);
 	per_cpu(cpu_state, cpuid) = CPU_ONLINE;
+	spin_unlock(&vector_lock);
 
 	smp_setup_percpu_timer();
 
@@ -425,12 +414,22 @@ smp_callin (void)
 	 * Get our bogomips.
 	 */
 	ia64_init_itm();
-	calibrate_delay();
-	local_cpu_data->loops_per_jiffy = loops_per_jiffy;
 
-#ifdef CONFIG_IA32_SUPPORT
-	ia32_gdt_init();
-#endif
+	/*
+	 * Delay calibration can be skipped if new processor is identical to the
+	 * previous processor.
+	 */
+	last_cpuinfo = cpu_data(cpuid - 1);
+	this_cpuinfo = local_cpu_data;
+	if (last_cpuinfo->itc_freq != this_cpuinfo->itc_freq ||
+	    last_cpuinfo->proc_freq != this_cpuinfo->proc_freq ||
+	    last_cpuinfo->features != this_cpuinfo->features ||
+	    last_cpuinfo->revision != this_cpuinfo->revision ||
+	    last_cpuinfo->family != this_cpuinfo->family ||
+	    last_cpuinfo->archrev != this_cpuinfo->archrev ||
+	    last_cpuinfo->model != this_cpuinfo->model)
+		calibrate_delay();
+	local_cpu_data->loops_per_jiffy = loops_per_jiffy;
 
 	/*
 	 * Allow the master to continue.
@@ -443,75 +442,29 @@ smp_callin (void)
 /*
  * Activate a secondary processor.  head.S calls this.
  */
-int __devinit
+int
 start_secondary (void *unused)
 {
 	/* Early console may use I/O ports */
 	ia64_set_kr(IA64_KR_IO_BASE, __pa(ia64_iobase));
+#ifndef CONFIG_PRINTK_TIME
 	Dprintk("start_secondary: starting CPU 0x%x\n", hard_smp_processor_id());
+#endif
 	efi_map_pal_code();
 	cpu_init();
 	preempt_disable();
 	smp_callin();
 
-	cpu_idle();
+	cpu_startup_entry(CPUHP_ONLINE);
 	return 0;
 }
 
-struct pt_regs * __devinit idle_regs(struct pt_regs *regs)
-{
-	return NULL;
-}
-
-struct create_idle {
-	struct task_struct *idle;
-	struct completion done;
-	int cpu;
-};
-
-void
-do_fork_idle(void *_c_idle)
-{
-	struct create_idle *c_idle = _c_idle;
-
-	c_idle->idle = fork_idle(c_idle->cpu);
-	complete(&c_idle->done);
-}
-
-static int __devinit
-do_boot_cpu (int sapicid, int cpu)
+static int
+do_boot_cpu (int sapicid, int cpu, struct task_struct *idle)
 {
 	int timeout;
-	struct create_idle c_idle = {
-		.cpu	= cpu,
-		.done	= COMPLETION_INITIALIZER(c_idle.done),
-	};
-	DECLARE_WORK(work, do_fork_idle, &c_idle);
-
- 	c_idle.idle = get_idle_for_cpu(cpu);
- 	if (c_idle.idle) {
-		init_idle(c_idle.idle, cpu);
- 		goto do_rest;
-	}
-
-	/*
-	 * We can't use kernel_thread since we must avoid to reschedule the child.
-	 */
-	if (!keventd_up() || current_is_keventd())
-		work.func(work.data);
-	else {
-		schedule_work(&work);
-		wait_for_completion(&c_idle.done);
-	}
-
-	if (IS_ERR(c_idle.idle))
-		panic("failed fork for CPU %d", cpu);
-
-	set_idle_for_cpu(cpu, c_idle.idle);
-
-do_rest:
-	task_for_booting_cpu = c_idle.idle;
 
+	task_for_booting_cpu = idle;
 	Dprintk("Sending wakeup vector %lu to AP 0x%x/0x%x.\n", ap_wakeup_vector, cpu, sapicid);
 
 	set_brendez_area(cpu);
@@ -531,7 +484,7 @@ do_rest:
 	if (!cpu_isset(cpu, cpu_callin_map)) {
 		printk(KERN_ERR "Processor 0x%x/0x%x is stuck.\n", cpu, sapicid);
 		ia64_cpu_to_sapicid[cpu] = -1;
-		cpu_clear(cpu, cpu_online_map);  /* was set in smp_callin() */
+		set_cpu_online(cpu, false);  /* was set in smp_callin() */
 		return -EINVAL;
 	}
 	return 0;
@@ -561,15 +514,14 @@ smp_build_cpu_map (void)
 	}
 
 	ia64_cpu_to_sapicid[0] = boot_cpu_id;
-	cpus_clear(cpu_present_map);
-	cpu_set(0, cpu_present_map);
-	cpu_set(0, cpu_possible_map);
+	init_cpu_present(cpumask_of(0));
+	set_cpu_possible(0, true);
 	for (cpu = 1, i = 0; i < smp_boot_data.cpu_count; i++) {
 		sapicid = smp_boot_data.cpu_phys_id[i];
 		if (sapicid == boot_cpu_id)
 			continue;
-		cpu_set(cpu, cpu_present_map);
-		cpu_set(cpu, cpu_possible_map);
+		set_cpu_present(cpu, true);
+		set_cpu_possible(cpu, true);
 		ia64_cpu_to_sapicid[cpu] = sapicid;
 		cpu++;
 	}
@@ -589,10 +541,6 @@ smp_prepare_cpus (unsigned int max_cpus)
 
 	smp_setup_percpu_timer();
 
-	/*
-	 * We have the boot CPU online for sure.
-	 */
-	cpu_set(0, cpu_online_map);
 	cpu_set(0, cpu_callin_map);
 
 	local_cpu_data->loops_per_jiffy = loops_per_jiffy;
@@ -607,21 +555,20 @@ smp_prepare_cpus (unsigned int max_cpus)
 	 */
 	if (!max_cpus) {
 		printk(KERN_INFO "SMP mode deactivated.\n");
-		cpus_clear(cpu_online_map);
-		cpus_clear(cpu_present_map);
-		cpus_clear(cpu_possible_map);
-		cpu_set(0, cpu_online_map);
-		cpu_set(0, cpu_present_map);
-		cpu_set(0, cpu_possible_map);
+		init_cpu_online(cpumask_of(0));
+		init_cpu_present(cpumask_of(0));
+		init_cpu_possible(cpumask_of(0));
 		return;
 	}
 }
 
-void __devinit smp_prepare_boot_cpu(void)
+void smp_prepare_boot_cpu(void)
 {
-	cpu_set(smp_processor_id(), cpu_online_map);
+	set_cpu_online(smp_processor_id(), true);
 	cpu_set(smp_processor_id(), cpu_callin_map);
+	set_numa_node(cpu_to_node_map[smp_processor_id()]);
 	per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
+	paravirt_post_smp_prepare_boot_cpu();
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
@@ -630,12 +577,12 @@ clear_cpu_sibling_map(int cpu)
 {
 	int i;
 
-	for_each_cpu_mask(i, cpu_sibling_map[cpu])
-		cpu_clear(cpu, cpu_sibling_map[i]);
+	for_each_cpu_mask(i, per_cpu(cpu_sibling_map, cpu))
+		cpu_clear(cpu, per_cpu(cpu_sibling_map, i));
 	for_each_cpu_mask(i, cpu_core_map[cpu])
 		cpu_clear(cpu, cpu_core_map[i]);
 
-	cpu_sibling_map[cpu] = cpu_core_map[cpu] = CPU_MASK_NONE;
+	per_cpu(cpu_sibling_map, cpu) = cpu_core_map[cpu] = CPU_MASK_NONE;
 }
 
 static void
@@ -646,7 +593,7 @@ remove_siblinginfo(int cpu)
 	if (cpu_data(cpu)->threads_per_core == 1 &&
 	    cpu_data(cpu)->cores_per_socket == 1) {
 		cpu_clear(cpu, cpu_core_map[cpu]);
-		cpu_clear(cpu, cpu_sibling_map[cpu]);
+		cpu_clear(cpu, per_cpu(cpu_sibling_map, cpu));
 		return;
 	}
 
@@ -661,8 +608,8 @@ extern void fixup_irqs(void);
 int migrate_platform_irqs(unsigned int cpu)
 {
 	int new_cpei_cpu;
-	irq_desc_t *desc = NULL;
-	cpumask_t 	mask;
+	struct irq_data *data = NULL;
+	const struct cpumask *mask;
 	int 		retval = 0;
 
 	/*
@@ -674,23 +621,23 @@ int migrate_platform_irqs(unsigned int cpu)
 			/*
 			 * Now re-target the CPEI to a different processor
 			 */
-			new_cpei_cpu = any_online_cpu(cpu_online_map);
-			mask = cpumask_of_cpu(new_cpei_cpu);
+			new_cpei_cpu = cpumask_any(cpu_online_mask);
+			mask = cpumask_of(new_cpei_cpu);
 			set_cpei_target_cpu(new_cpei_cpu);
-			desc = irq_descp(ia64_cpe_irq);
+			data = irq_get_irq_data(ia64_cpe_irq);
 			/*
-			 * Switch for now, immediatly, we need to do fake intr
+			 * Switch for now, immediately, we need to do fake intr
 			 * as other interrupts, but need to study CPEI behaviour with
 			 * polling before making changes.
 			 */
-			if (desc) {
-				desc->handler->disable(ia64_cpe_irq);
-				desc->handler->set_affinity(ia64_cpe_irq, mask);
-				desc->handler->enable(ia64_cpe_irq);
-				printk ("Re-targetting CPEI to cpu %d\n", new_cpei_cpu);
+			if (data && data->chip) {
+				data->chip->irq_disable(data);
+				data->chip->irq_set_affinity(data, mask, false);
+				data->chip->irq_enable(data);
+				printk ("Re-targeting CPEI to cpu %d\n", new_cpei_cpu);
 			}
 		}
-		if (!desc) {
+		if (!data) {
 			printk ("Unable to retarget CPEI, offline cpu [%d] failed\n", cpu);
 			retval = -EBUSY;
 		}
@@ -711,15 +658,19 @@ int __cpu_disable(void)
 		return (-EBUSY);
 	}
 
-	cpu_clear(cpu, cpu_online_map);
+	if (ia64_platform_is("sn2")) {
+		if (!sn_cpu_disable_allowed(cpu))
+			return -EBUSY;
+	}
+
+	set_cpu_online(cpu, false);
 
 	if (migrate_platform_irqs(cpu)) {
-		cpu_set(cpu, cpu_online_map);
-		return (-EBUSY);
+		set_cpu_online(cpu, true);
+		return -EBUSY;
 	}
 
 	remove_siblinginfo(cpu);
-	cpu_clear(cpu, cpu_online_map);
 	fixup_irqs();
 	local_flush_tlb_all();
 	cpu_clear(cpu, cpu_callin_map);
@@ -741,17 +692,6 @@ void __cpu_die(unsigned int cpu)
 	}
  	printk(KERN_ERR "CPU %u didn't die...\n", cpu);
 }
-#else /* !CONFIG_HOTPLUG_CPU */
-int __cpu_disable(void)
-{
-	return -ENOSYS;
-}
-
-void __cpu_die(unsigned int cpu)
-{
-	/* We said "no" in __cpu_disable */
-	BUG();
-}
 #endif /* CONFIG_HOTPLUG_CPU */
 
 void
@@ -772,8 +712,7 @@ smp_cpus_done (unsigned int dummy)
 	       (int)num_online_cpus(), bogosum/(500000/HZ), (bogosum/(5000/HZ))%100);
 }
 
-static inline void __devinit
-set_cpu_sibling_map(int cpu)
+static inline void set_cpu_sibling_map(int cpu)
 {
 	int i;
 
@@ -782,15 +721,15 @@ set_cpu_sibling_map(int cpu)
 			cpu_set(i, cpu_core_map[cpu]);
 			cpu_set(cpu, cpu_core_map[i]);
 			if (cpu_data(cpu)->core_id == cpu_data(i)->core_id) {
-				cpu_set(i, cpu_sibling_map[cpu]);
-				cpu_set(cpu, cpu_sibling_map[i]);
+				cpu_set(i, per_cpu(cpu_sibling_map, cpu));
+				cpu_set(cpu, per_cpu(cpu_sibling_map, i));
 			}
 		}
 	}
 }
 
-int __devinit
-__cpu_up (unsigned int cpu)
+int
+__cpu_up(unsigned int cpu, struct task_struct *tidle)
 {
 	int ret;
 	int sapicid;
@@ -808,13 +747,13 @@ __cpu_up (unsigned int cpu)
 
 	per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
 	/* Processor goes to start_secondary(), sets online flag */
-	ret = do_boot_cpu(sapicid, cpu);
+	ret = do_boot_cpu(sapicid, cpu, tidle);
 	if (ret < 0)
 		return ret;
 
 	if (cpu_data(cpu)->threads_per_core == 1 &&
 	    cpu_data(cpu)->cores_per_socket == 1) {
-		cpu_set(cpu, cpu_sibling_map[cpu]);
+		cpu_set(cpu, per_cpu(cpu_sibling_map, cpu));
 		cpu_set(cpu, cpu_core_map[cpu]);
 		return 0;
 	}
@@ -825,7 +764,7 @@ __cpu_up (unsigned int cpu)
 }
 
 /*
- * Assume that CPU's have been discovered by some platform-dependent interface.  For
+ * Assume that CPUs have been discovered by some platform-dependent interface.  For
  * SoftSDV/Lion, that would be ACPI.
  *
  * Setup of the IPI irq handler is done in irq.c:init_IRQ_SMP().
@@ -839,7 +778,7 @@ init_smp_config(void)
 	} *ap_startup;
 	long sal_ret;
 
-	/* Tell SAL where to drop the AP's.  */
+	/* Tell SAL where to drop the APs.  */
 	ap_startup = (struct fptr *) start_ap;
 	sal_ret = ia64_sal_set_vectors(SAL_VECTOR_OS_BOOT_RENDEZ,
 				       ia64_tpa(ap_startup->fp), ia64_tpa(ap_startup->gp), 0, 0, 0, 0);
@@ -852,27 +791,40 @@ init_smp_config(void)
  * identify_siblings(cpu) gets called from identify_cpu. This populates the 
  * information related to logical execution units in per_cpu_data structure.
  */
-void __devinit
-identify_siblings(struct cpuinfo_ia64 *c)
+void identify_siblings(struct cpuinfo_ia64 *c)
 {
-	s64 status;
+	long status;
 	u16 pltid;
 	pal_logical_to_physical_t info;
 
-	if (smp_num_cpucores == 1 && smp_num_siblings == 1)
-		return;
+	status = ia64_pal_logical_to_phys(-1, &info);
+	if (status != PAL_STATUS_SUCCESS) {
+		if (status != PAL_STATUS_UNIMPLEMENTED) {
+			printk(KERN_ERR
+				"ia64_pal_logical_to_phys failed with %ld\n",
+				status);
+			return;
+		}
 
-	if ((status = ia64_pal_logical_to_phys(-1, &info)) != PAL_STATUS_SUCCESS) {
-		printk(KERN_ERR "ia64_pal_logical_to_phys failed with %ld\n",
-		       status);
-		return;
+		info.overview_ppid = 0;
+		info.overview_cpp  = 1;
+		info.overview_tpc  = 1;
 	}
-	if ((status = ia64_sal_physical_id_info(&pltid)) != PAL_STATUS_SUCCESS) {
-		printk(KERN_ERR "ia64_sal_pltid failed with %ld\n", status);
+
+	status = ia64_sal_physical_id_info(&pltid);
+	if (status != PAL_STATUS_SUCCESS) {
+		if (status != PAL_STATUS_UNIMPLEMENTED)
+			printk(KERN_ERR
+				"ia64_sal_pltid failed with %ld\n",
+				status);
 		return;
 	}
 
 	c->socket_id =  (pltid << 8) | info.overview_ppid;
+
+	if (info.overview_cpp == 1 && info.overview_tpc == 1)
+		return;
+
 	c->cores_per_socket = info.overview_cpp;
 	c->threads_per_core = info.overview_tpc;
 	c->num_log = info.overview_num_log;
@@ -880,3 +832,27 @@ identify_siblings(struct cpuinfo_ia64 *c)
 	c->core_id = info.log1_cid;
 	c->thread_id = info.log1_tid;
 }
+
+/*
+ * returns non zero, if multi-threading is enabled
+ * on at least one physical package. Due to hotplug cpu
+ * and (maxcpus=), all threads may not necessarily be enabled
+ * even though the processor supports multi-threading.
+ */
+int is_multithreading_enabled(void)
+{
+	int i, j;
+
+	for_each_present_cpu(i) {
+		for_each_present_cpu(j) {
+			if (j == i)
+				continue;
+			if ((cpu_data(j)->socket_id == cpu_data(i)->socket_id)) {
+				if (cpu_data(j)->core_id == cpu_data(i)->core_id)
+					return 1;
+			}
+		}
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(is_multithreading_enabled);
diff --git a/arch/ia64/kernel/stacktrace.c b/arch/ia64/kernel/stacktrace.c
new file mode 100644
index 00000000000..5af2783a87f
--- /dev/null
+++ b/arch/ia64/kernel/stacktrace.c
@@ -0,0 +1,39 @@
+/*
+ * arch/ia64/kernel/stacktrace.c
+ *
+ * Stack trace management functions
+ *
+ */
+#include <linux/sched.h>
+#include <linux/stacktrace.h>
+#include <linux/module.h>
+
+static void
+ia64_do_save_stack(struct unw_frame_info *info, void *arg)
+{
+	struct stack_trace *trace = arg;
+	unsigned long ip;
+	int skip = trace->skip;
+
+	trace->nr_entries = 0;
+	do {
+		unw_get_ip(info, &ip);
+		if (ip == 0)
+			break;
+		if (skip == 0) {
+			trace->entries[trace->nr_entries++] = ip;
+			if (trace->nr_entries == trace->max_entries)
+				break;
+		} else
+			skip--;
+	} while (unw_unwind(info) >= 0);
+}
+
+/*
+ * Save stack-backtrace addresses into a stack_trace buffer.
+ */
+void save_stack_trace(struct stack_trace *trace)
+{
+	unw_init_running(ia64_do_save_stack, trace);
+}
+EXPORT_SYMBOL(save_stack_trace);
diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c
index c7b943f1019..41e33f84c18 100644
--- a/arch/ia64/kernel/sys_ia64.c
+++ b/arch/ia64/kernel/sys_ia64.c
@@ -5,7 +5,6 @@
  * Copyright (C) 1999-2000, 2002-2003, 2005 Hewlett-Packard Co
  *	David Mosberger-Tang <davidm@hpl.hp.com>
  */
-#include <linux/config.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
@@ -14,7 +13,6 @@
 #include <linux/shm.h>
 #include <linux/file.h>		/* doh, must come after sched.h... */
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/syscalls.h>
 #include <linux/highuid.h>
 #include <linux/hugetlb.h>
@@ -27,19 +25,26 @@ arch_get_unmapped_area (struct file *filp, unsigned long addr, unsigned long len
 			unsigned long pgoff, unsigned long flags)
 {
 	long map_shared = (flags & MAP_SHARED);
-	unsigned long start_addr, align_mask = PAGE_SIZE - 1;
+	unsigned long align_mask = 0;
 	struct mm_struct *mm = current->mm;
-	struct vm_area_struct *vma;
+	struct vm_unmapped_area_info info;
 
 	if (len > RGN_MAP_LIMIT)
 		return -ENOMEM;
 
+	/* handle fixed mapping: prevent overlap with huge pages */
+	if (flags & MAP_FIXED) {
+		if (is_hugepage_only_range(mm, addr, len))
+			return -EINVAL;
+		return addr;
+	}
+
 #ifdef CONFIG_HUGETLB_PAGE
 	if (REGION_NUMBER(addr) == RGN_HPAGE)
 		addr = 0;
 #endif
 	if (!addr)
-		addr = mm->free_area_cache;
+		addr = TASK_UNMAPPED_BASE;
 
 	if (map_shared && (TASK_SIZE > 0xfffffffful))
 		/*
@@ -48,28 +53,15 @@ arch_get_unmapped_area (struct file *filp, unsigned long addr, unsigned long len
 		 * tasks, we prefer to avoid exhausting the address space too quickly by
 		 * limiting alignment to a single page.
 		 */
-		align_mask = SHMLBA - 1;
-
-  full_search:
-	start_addr = addr = (addr + align_mask) & ~align_mask;
-
-	for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
-		/* At this point:  (!vma || addr < vma->vm_end). */
-		if (TASK_SIZE - len < addr || RGN_MAP_LIMIT - len < REGION_OFFSET(addr)) {
-			if (start_addr != TASK_UNMAPPED_BASE) {
-				/* Start a new search --- just in case we missed some holes.  */
-				addr = TASK_UNMAPPED_BASE;
-				goto full_search;
-			}
-			return -ENOMEM;
-		}
-		if (!vma || addr + len <= vma->vm_start) {
-			/* Remember the address where we stopped this search:  */
-			mm->free_area_cache = addr + len;
-			return addr;
-		}
-		addr = (vma->vm_end + align_mask) & ~align_mask;
-	}
+		align_mask = PAGE_MASK & (SHMLBA - 1);
+
+	info.flags = 0;
+	info.length = len;
+	info.low_limit = addr;
+	info.high_limit = TASK_SIZE;
+	info.align_mask = align_mask;
+	info.align_offset = 0;
+	return vm_unmapped_area(&info);
 }
 
 asmlinkage long
@@ -95,51 +87,7 @@ sys_getpagesize (void)
 asmlinkage unsigned long
 ia64_brk (unsigned long brk)
 {
-	unsigned long rlim, retval, newbrk, oldbrk;
-	struct mm_struct *mm = current->mm;
-
-	/*
-	 * Most of this replicates the code in sys_brk() except for an additional safety
-	 * check and the clearing of r8.  However, we can't call sys_brk() because we need
-	 * to acquire the mmap_sem before we can do the test...
-	 */
-	down_write(&mm->mmap_sem);
-
-	if (brk < mm->end_code)
-		goto out;
-	newbrk = PAGE_ALIGN(brk);
-	oldbrk = PAGE_ALIGN(mm->brk);
-	if (oldbrk == newbrk)
-		goto set_brk;
-
-	/* Always allow shrinking brk. */
-	if (brk <= mm->brk) {
-		if (!do_munmap(mm, newbrk, oldbrk-newbrk))
-			goto set_brk;
-		goto out;
-	}
-
-	/* Check against unimplemented/unmapped addresses: */
-	if ((newbrk - oldbrk) > RGN_MAP_LIMIT || REGION_OFFSET(newbrk) > RGN_MAP_LIMIT)
-		goto out;
-
-	/* Check against rlimit.. */
-	rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur;
-	if (rlim < RLIM_INFINITY && brk - mm->start_data > rlim)
-		goto out;
-
-	/* Check against existing mmap mappings. */
-	if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))
-		goto out;
-
-	/* Ok, looks good - let it rip. */
-	if (do_brk(oldbrk, newbrk-oldbrk) != oldbrk)
-		goto out;
-set_brk:
-	mm->brk = brk;
-out:
-	retval = mm->brk;
-	up_write(&mm->mmap_sem);
+	unsigned long retval = sys_brk(brk);
 	force_successful_syscall_return();
 	return retval;
 }
@@ -149,13 +97,13 @@ out:
  * and r9) as this is faster than doing a copy_to_user().
  */
 asmlinkage long
-sys_pipe (void)
+sys_ia64_pipe (void)
 {
 	struct pt_regs *regs = task_pt_regs(current);
 	int fd[2];
 	int retval;
 
-	retval = do_pipe(fd);
+	retval = do_pipe_flags(fd, 0);
 	if (retval)
 		goto out;
 	retval = fd[0];
@@ -164,49 +112,20 @@ sys_pipe (void)
 	return retval;
 }
 
-static inline unsigned long
-do_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, unsigned long pgoff)
+int ia64_mmap_check(unsigned long addr, unsigned long len,
+		unsigned long flags)
 {
 	unsigned long roff;
-	struct file *file = NULL;
-
-	flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
-	if (!(flags & MAP_ANONYMOUS)) {
-		file = fget(fd);
-		if (!file)
-			return -EBADF;
-
-		if (!file->f_op || !file->f_op->mmap) {
-			addr = -ENODEV;
-			goto out;
-		}
-	}
-
-	/* Careful about overflows.. */
-	len = PAGE_ALIGN(len);
-	if (!len || len > TASK_SIZE) {
-		addr = -EINVAL;
-		goto out;
-	}
 
 	/*
-	 * Don't permit mappings into unmapped space, the virtual page table of a region,
-	 * or across a region boundary.  Note: RGN_MAP_LIMIT is equal to 2^n-PAGE_SIZE
-	 * (for some integer n <= 61) and len > 0.
+	 * Don't permit mappings into unmapped space, the virtual page table
+	 * of a region, or across a region boundary.  Note: RGN_MAP_LIMIT is
+	 * equal to 2^n-PAGE_SIZE (for some integer n <= 61) and len > 0.
 	 */
 	roff = REGION_OFFSET(addr);
-	if ((len > RGN_MAP_LIMIT) || (roff > (RGN_MAP_LIMIT - len))) {
-		addr = -EINVAL;
-		goto out;
-	}
-
-	down_write(&current->mm->mmap_sem);
-	addr = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
-	up_write(&current->mm->mmap_sem);
-
-out:	if (file)
-		fput(file);
-	return addr;
+	if ((len > RGN_MAP_LIMIT) || (roff > (RGN_MAP_LIMIT - len)))
+		return -EINVAL;
+	return 0;
 }
 
 /*
@@ -217,7 +136,7 @@ out:	if (file)
 asmlinkage unsigned long
 sys_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, long pgoff)
 {
-	addr = do_mmap2(addr, len, prot, flags, fd, pgoff);
+	addr = sys_mmap_pgoff(addr, len, prot, flags, fd, pgoff);
 	if (!IS_ERR((void *) addr))
 		force_successful_syscall_return();
 	return addr;
@@ -229,7 +148,7 @@ sys_mmap (unsigned long addr, unsigned long len, int prot, int flags, int fd, lo
 	if (offset_in_page(off) != 0)
 		return -EINVAL;
 
-	addr = do_mmap2(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
+	addr = sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
 	if (!IS_ERR((void *) addr))
 		force_successful_syscall_return();
 	return addr;
@@ -239,22 +158,9 @@ asmlinkage unsigned long
 ia64_mremap (unsigned long addr, unsigned long old_len, unsigned long new_len, unsigned long flags,
 	     unsigned long new_addr)
 {
-	extern unsigned long do_mremap (unsigned long addr,
-					unsigned long old_len,
-					unsigned long new_len,
-					unsigned long flags,
-					unsigned long new_addr);
-
-	down_write(&current->mm->mmap_sem);
-	{
-		addr = do_mremap(addr, old_len, new_len, flags, new_addr);
-	}
-	up_write(&current->mm->mmap_sem);
-
-	if (IS_ERR((void *) addr))
-		return addr;
-
-	force_successful_syscall_return();
+	addr = sys_mremap(addr, old_len, new_len, flags, new_addr);
+	if (!IS_ERR((void *) addr))
+		force_successful_syscall_return();
 	return addr;
 }
 
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 49958904045..71c52bc7c28 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -8,7 +8,6 @@
  * Copyright (C) 1999-2000 VA Linux Systems
  * Copyright (C) 1999-2000 Walt Drummond <drummond@valinux.com>
  */
-#include <linux/config.h>
 
 #include <linux/cpu.h>
 #include <linux/init.h>
@@ -19,18 +18,25 @@
 #include <linux/time.h>
 #include <linux/interrupt.h>
 #include <linux/efi.h>
-#include <linux/profile.h>
 #include <linux/timex.h>
+#include <linux/timekeeper_internal.h>
+#include <linux/platform_device.h>
 
 #include <asm/machvec.h>
 #include <asm/delay.h>
 #include <asm/hw_irq.h>
+#include <asm/paravirt.h>
 #include <asm/ptrace.h>
 #include <asm/sal.h>
 #include <asm/sections.h>
-#include <asm/system.h>
 
-extern unsigned long wall_jiffies;
+#include "fsyscall_gtod_data.h"
+
+static cycle_t itc_get_cycles(struct clocksource *cs);
+
+struct fsyscall_gtod_data_t fsyscall_gtod_data;
+
+struct itc_jitter_data_t itc_jitter_data;
 
 volatile int time_keeper_id = 0; /* smp_processor_id() of time-keeper */
 
@@ -41,22 +47,114 @@ EXPORT_SYMBOL(last_cli_ip);
 
 #endif
 
-static struct time_interpolator itc_interpolator = {
-	.shift = 16,
-	.mask = 0xffffffffffffffffLL,
-	.source = TIME_SOURCE_CPU
+#ifdef CONFIG_PARAVIRT
+/* We need to define a real function for sched_clock, to override the
+   weak default version */
+unsigned long long sched_clock(void)
+{
+        return paravirt_sched_clock();
+}
+#endif
+
+#ifdef CONFIG_PARAVIRT
+static void
+paravirt_clocksource_resume(struct clocksource *cs)
+{
+	if (pv_time_ops.clocksource_resume)
+		pv_time_ops.clocksource_resume();
+}
+#endif
+
+static struct clocksource clocksource_itc = {
+	.name           = "itc",
+	.rating         = 350,
+	.read           = itc_get_cycles,
+	.mask           = CLOCKSOURCE_MASK(64),
+	.flags          = CLOCK_SOURCE_IS_CONTINUOUS,
+#ifdef CONFIG_PARAVIRT
+	.resume		= paravirt_clocksource_resume,
+#endif
 };
+static struct clocksource *itc_clocksource;
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+
+#include <linux/kernel_stat.h>
+
+extern cputime_t cycle_to_cputime(u64 cyc);
+
+void vtime_account_user(struct task_struct *tsk)
+{
+	cputime_t delta_utime;
+	struct thread_info *ti = task_thread_info(tsk);
+
+	if (ti->ac_utime) {
+		delta_utime = cycle_to_cputime(ti->ac_utime);
+		account_user_time(tsk, delta_utime, delta_utime);
+		ti->ac_utime = 0;
+	}
+}
+
+/*
+ * Called from the context switch with interrupts disabled, to charge all
+ * accumulated times to the current process, and to prepare accounting on
+ * the next process.
+ */
+void arch_vtime_task_switch(struct task_struct *prev)
+{
+	struct thread_info *pi = task_thread_info(prev);
+	struct thread_info *ni = task_thread_info(current);
+
+	pi->ac_stamp = ni->ac_stamp;
+	ni->ac_stime = ni->ac_utime = 0;
+}
+
+/*
+ * Account time for a transition between system, hard irq or soft irq state.
+ * Note that this function is called with interrupts enabled.
+ */
+static cputime_t vtime_delta(struct task_struct *tsk)
+{
+	struct thread_info *ti = task_thread_info(tsk);
+	cputime_t delta_stime;
+	__u64 now;
+
+	WARN_ON_ONCE(!irqs_disabled());
+
+	now = ia64_get_itc();
+
+	delta_stime = cycle_to_cputime(ti->ac_stime + (now - ti->ac_stamp));
+	ti->ac_stime = 0;
+	ti->ac_stamp = now;
+
+	return delta_stime;
+}
+
+void vtime_account_system(struct task_struct *tsk)
+{
+	cputime_t delta = vtime_delta(tsk);
+
+	account_system_time(tsk, 0, delta, delta);
+}
+EXPORT_SYMBOL_GPL(vtime_account_system);
+
+void vtime_account_idle(struct task_struct *tsk)
+{
+	account_idle_time(vtime_delta(tsk));
+}
+
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 
 static irqreturn_t
-timer_interrupt (int irq, void *dev_id, struct pt_regs *regs)
+timer_interrupt (int irq, void *dev_id)
 {
 	unsigned long new_itm;
 
-	if (unlikely(cpu_is_offline(smp_processor_id()))) {
+	if (cpu_is_offline(smp_processor_id())) {
 		return IRQ_HANDLED;
 	}
 
-	platform_timer_interrupt(irq, dev_id, regs);
+	platform_timer_interrupt(irq, dev_id);
 
 	new_itm = local_cpu_data->itm_next;
 
@@ -64,38 +162,40 @@ timer_interrupt (int irq, void *dev_id, struct pt_regs *regs)
 		printk(KERN_ERR "Oops: timer tick before it's due (itc=%lx,itm=%lx)\n",
 		       ia64_get_itc(), new_itm);
 
-	profile_tick(CPU_PROFILING, regs);
+	profile_tick(CPU_PROFILING);
+
+	if (paravirt_do_steal_accounting(&new_itm))
+		goto skip_process_time_accounting;
 
 	while (1) {
-		update_process_times(user_mode(regs));
+		update_process_times(user_mode(get_irq_regs()));
 
 		new_itm += local_cpu_data->itm_delta;
 
-		if (smp_processor_id() == time_keeper_id) {
-			/*
-			 * Here we are in the timer irq handler. We have irqs locally
-			 * disabled, but we don't know if the timer_bh is running on
-			 * another CPU. We need to avoid to SMP race by acquiring the
-			 * xtime_lock.
-			 */
-			write_seqlock(&xtime_lock);
-			do_timer(regs);
-			local_cpu_data->itm_next = new_itm;
-			write_sequnlock(&xtime_lock);
-		} else
-			local_cpu_data->itm_next = new_itm;
+		if (smp_processor_id() == time_keeper_id)
+			xtime_update(1);
+
+		local_cpu_data->itm_next = new_itm;
 
 		if (time_after(new_itm, ia64_get_itc()))
 			break;
+
+		/*
+		 * Allow IPIs to interrupt the timer loop.
+		 */
+		local_irq_enable();
+		local_irq_disable();
 	}
 
+skip_process_time_accounting:
+
 	do {
 		/*
 		 * If we're too close to the next clock tick for
 		 * comfort, we increase the safety margin by
 		 * intentionally dropping the next tick(s).  We do NOT
 		 * update itm.next because that would force us to call
-		 * do_timer() which in turn would let our clock run
+		 * xtime_update() which in turn would let our clock run
 		 * too fast (with the potentially devastating effect
 		 * of losing monotony of time).
 		 */
@@ -144,8 +244,7 @@ static int __init nojitter_setup(char *str)
 __setup("nojitter", nojitter_setup);
 
 
-void __devinit
-ia64_init_itm (void)
+void ia64_init_itm(void)
 {
 	unsigned long platform_base_freq, itc_freq;
 	struct pal_freq_ratio itc_ratio, proc_ratio;
@@ -208,8 +307,6 @@ ia64_init_itm (void)
 					+ itc_freq/2)/itc_freq;
 
 	if (!(sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT)) {
-		itc_interpolator.frequency = local_cpu_data->itc_freq;
-		itc_interpolator.drift = itc_drift;
 #ifdef CONFIG_SMP
 		/* On IA64 in an SMP configuration ITCs are never accurately synchronized.
 		 * Jitter compensation requires a cmpxchg which may limit
@@ -221,38 +318,97 @@ ia64_init_itm (void)
 		 * even going backward) if the ITC offsets between the individual CPUs
 		 * are too large.
 		 */
-		if (!nojitter) itc_interpolator.jitter = 1;
+		if (!nojitter)
+			itc_jitter_data.itc_jitter = 1;
 #endif
-		register_time_interpolator(&itc_interpolator);
-	}
+	} else
+		/*
+		 * ITC is drifty and we have not synchronized the ITCs in smpboot.c.
+		 * ITC values may fluctuate significantly between processors.
+		 * Clock should not be used for hrtimers. Mark itc as only
+		 * useful for boot and testing.
+		 *
+		 * Note that jitter compensation is off! There is no point of
+		 * synchronizing ITCs since they may be large differentials
+		 * that change over time.
+		 *
+		 * The only way to fix this would be to repeatedly sync the
+		 * ITCs. Until that time we have to avoid ITC.
+		 */
+		clocksource_itc.rating = 50;
+
+	paravirt_init_missing_ticks_accounting(smp_processor_id());
+
+	/* avoid softlock up message when cpu is unplug and plugged again. */
+	touch_softlockup_watchdog();
 
 	/* Setup the CPU local timer tick */
 	ia64_cpu_local_tick();
+
+	if (!itc_clocksource) {
+		clocksource_register_hz(&clocksource_itc,
+						local_cpu_data->itc_freq);
+		itc_clocksource = &clocksource_itc;
+	}
 }
 
+static cycle_t itc_get_cycles(struct clocksource *cs)
+{
+	unsigned long lcycle, now, ret;
+
+	if (!itc_jitter_data.itc_jitter)
+		return get_cycles();
+
+	lcycle = itc_jitter_data.itc_lastcycle;
+	now = get_cycles();
+	if (lcycle && time_after(lcycle, now))
+		return lcycle;
+
+	/*
+	 * Keep track of the last timer value returned.
+	 * In an SMP environment, you could lose out in contention of
+	 * cmpxchg. If so, your cmpxchg returns new value which the
+	 * winner of contention updated to. Use the new value instead.
+	 */
+	ret = cmpxchg(&itc_jitter_data.itc_lastcycle, lcycle, now);
+	if (unlikely(ret != lcycle))
+		return ret;
+
+	return now;
+}
+
+
 static struct irqaction timer_irqaction = {
 	.handler =	timer_interrupt,
-	.flags =	SA_INTERRUPT,
+	.flags =	IRQF_IRQPOLL,
 	.name =		"timer"
 };
 
-void __devinit ia64_disable_timer(void)
+static struct platform_device rtc_efi_dev = {
+	.name = "rtc-efi",
+	.id = -1,
+};
+
+static int __init rtc_init(void)
 {
-	ia64_set_itv(1 << 16);
+	if (platform_device_register(&rtc_efi_dev) < 0)
+		printk(KERN_ERR "unable to register rtc device...\n");
+
+	/* not necessarily an error */
+	return 0;
+}
+module_init(rtc_init);
+
+void read_persistent_clock(struct timespec *ts)
+{
+	efi_gettimeofday(ts);
 }
 
 void __init
 time_init (void)
 {
 	register_percpu_irq(IA64_TIMER_VECTOR, &timer_irqaction);
-	efi_gettimeofday(&xtime);
 	ia64_init_itm();
-
-	/*
-	 * Initialize wall_to_monotonic such that adding it to xtime will yield zero, the
-	 * tv_nsec field must be normalized (i.e., 0 <= nsec < NSEC_PER_SEC).
-	 */
-	set_normalized_timespec(&wall_to_monotonic, -xtime.tv_sec, -xtime.tv_nsec);
 }
 
 /*
@@ -279,29 +435,37 @@ udelay (unsigned long usecs)
 }
 EXPORT_SYMBOL(udelay);
 
-static unsigned long long ia64_itc_printk_clock(void)
+/* IA64 doesn't cache the timezone */
+void update_vsyscall_tz(void)
 {
-	if (ia64_get_kr(IA64_KR_PER_CPU_DATA))
-		return sched_clock();
-	return 0;
 }
 
-static unsigned long long ia64_default_printk_clock(void)
+void update_vsyscall_old(struct timespec *wall, struct timespec *wtm,
+			struct clocksource *c, u32 mult)
 {
-	return (unsigned long long)(jiffies_64 - INITIAL_JIFFIES) *
-		(1000000000/HZ);
-}
-
-unsigned long long (*ia64_printk_clock)(void) = &ia64_default_printk_clock;
+	write_seqcount_begin(&fsyscall_gtod_data.seq);
+
+        /* copy fsyscall clock data */
+        fsyscall_gtod_data.clk_mask = c->mask;
+        fsyscall_gtod_data.clk_mult = mult;
+        fsyscall_gtod_data.clk_shift = c->shift;
+        fsyscall_gtod_data.clk_fsys_mmio = c->archdata.fsys_mmio;
+        fsyscall_gtod_data.clk_cycle_last = c->cycle_last;
+
+	/* copy kernel time structures */
+        fsyscall_gtod_data.wall_time.tv_sec = wall->tv_sec;
+        fsyscall_gtod_data.wall_time.tv_nsec = wall->tv_nsec;
+	fsyscall_gtod_data.monotonic_time.tv_sec = wtm->tv_sec
+							+ wall->tv_sec;
+	fsyscall_gtod_data.monotonic_time.tv_nsec = wtm->tv_nsec
+							+ wall->tv_nsec;
+
+	/* normalize */
+	while (fsyscall_gtod_data.monotonic_time.tv_nsec >= NSEC_PER_SEC) {
+		fsyscall_gtod_data.monotonic_time.tv_nsec -= NSEC_PER_SEC;
+		fsyscall_gtod_data.monotonic_time.tv_sec++;
+	}
 
-unsigned long long printk_clock(void)
-{
-	return ia64_printk_clock();
+	write_seqcount_end(&fsyscall_gtod_data.seq);
 }
 
-void __init
-ia64_setup_printk_clock(void)
-{
-	if (!(sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT))
-		ia64_printk_clock = ia64_itc_printk_clock;
-}
diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c
index 4f3a16b37f8..f295f9abba4 100644
--- a/arch/ia64/kernel/topology.c
+++ b/arch/ia64/kernel/topology.c
@@ -13,59 +13,61 @@
  *	Populate cpu cache entries in sysfs for cpu cache info
  */
 
-#include <linux/config.h>
 #include <linux/cpu.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/node.h>
+#include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/bootmem.h>
 #include <linux/nodemask.h>
 #include <linux/notifier.h>
+#include <linux/export.h>
 #include <asm/mmzone.h>
 #include <asm/numa.h>
 #include <asm/cpu.h>
 
-#ifdef CONFIG_NUMA
-static struct node *sysfs_nodes;
-#endif
 static struct ia64_cpu *sysfs_cpus;
 
-int arch_register_cpu(int num)
+void arch_fix_phys_package_id(int num, u32 slot)
 {
-	struct node *parent = NULL;
-	
-#ifdef CONFIG_NUMA
-	parent = &sysfs_nodes[cpu_to_node(num)];
-#endif /* CONFIG_NUMA */
+#ifdef CONFIG_SMP
+	if (cpu_data(num)->socket_id == -1)
+		cpu_data(num)->socket_id = slot;
+#endif
+}
+EXPORT_SYMBOL_GPL(arch_fix_phys_package_id);
 
-#if defined (CONFIG_ACPI) && defined (CONFIG_HOTPLUG_CPU)
+
+#ifdef CONFIG_HOTPLUG_CPU
+int __ref arch_register_cpu(int num)
+{
+#ifdef CONFIG_ACPI
 	/*
-	 * If CPEI cannot be re-targetted, and this is
-	 * CPEI target, then dont create the control file
+	 * If CPEI can be re-targeted or if this is not
+	 * CPEI target, then it is hotpluggable
 	 */
-	if (!can_cpei_retarget() && is_cpu_cpei_target(num))
-		sysfs_cpus[num].cpu.no_control = 1;
+	if (can_cpei_retarget() || !is_cpu_cpei_target(num))
+		sysfs_cpus[num].cpu.hotpluggable = 1;
+	map_cpu_to_node(num, node_cpuid[num].nid);
 #endif
-
-	return register_cpu(&sysfs_cpus[num].cpu, num, parent);
+	return register_cpu(&sysfs_cpus[num].cpu, num);
 }
+EXPORT_SYMBOL(arch_register_cpu);
 
-#ifdef CONFIG_HOTPLUG_CPU
-
-void arch_unregister_cpu(int num)
+void __ref arch_unregister_cpu(int num)
 {
-	struct node *parent = NULL;
-
-#ifdef CONFIG_NUMA
-	int node = cpu_to_node(num);
-	parent = &sysfs_nodes[node];
-#endif /* CONFIG_NUMA */
-
-	return unregister_cpu(&sysfs_cpus[num].cpu, parent);
+	unregister_cpu(&sysfs_cpus[num].cpu);
+#ifdef CONFIG_ACPI
+	unmap_cpu_from_node(num, cpu_to_node(num));
+#endif
 }
-EXPORT_SYMBOL(arch_register_cpu);
 EXPORT_SYMBOL(arch_unregister_cpu);
+#else
+static int __init arch_register_cpu(int num)
+{
+	return register_cpu(&sysfs_cpus[num].cpu, num);
+}
 #endif /*CONFIG_HOTPLUG_CPU*/
 
 
@@ -74,26 +76,18 @@ static int __init topology_init(void)
 	int i, err = 0;
 
 #ifdef CONFIG_NUMA
-	sysfs_nodes = kzalloc(sizeof(struct node) * MAX_NUMNODES, GFP_KERNEL);
-	if (!sysfs_nodes) {
-		err = -ENOMEM;
-		goto out;
-	}
-
 	/*
 	 * MCD - Do we want to register all ONLINE nodes, or all POSSIBLE nodes?
 	 */
 	for_each_online_node(i) {
-		if ((err = register_node(&sysfs_nodes[i], i, 0)))
+		if ((err = register_one_node(i)))
 			goto out;
 	}
 #endif
 
 	sysfs_cpus = kzalloc(sizeof(struct ia64_cpu) * NR_CPUS, GFP_KERNEL);
-	if (!sysfs_cpus) {
-		err = -ENOMEM;
-		goto out;
-	}
+	if (!sysfs_cpus)
+		panic("kzalloc in topology_init failed - NR_CPUS too big?");
 
 	for_each_present_cpu(i) {
 		if((err = arch_register_cpu(i)))
@@ -145,7 +139,7 @@ static struct cpu_cache_info	all_cpu_cache_info[NR_CPUS];
 #define LEAF_KOBJECT_PTR(x,y)    (&all_cpu_cache_info[x].cache_leaves[y])
 
 #ifdef CONFIG_SMP
-static void cache_shared_cpu_map_setup( unsigned int cpu,
+static void cache_shared_cpu_map_setup(unsigned int cpu,
 		struct cache_info * this_leaf)
 {
 	pal_cache_shared_info_t	csi;
@@ -166,7 +160,7 @@ static void cache_shared_cpu_map_setup( unsigned int cpu,
 
 	num_shared = (int) csi.num_shared;
 	do {
-		for_each_cpu(j)
+		for_each_possible_cpu(j)
 			if (cpu_data(cpu)->socket_id == cpu_data(j)->socket_id
 				&& cpu_data(j)->core_id == csi.log1_cid
 				&& cpu_data(j)->thread_id == csi.log1_tid)
@@ -226,8 +220,9 @@ static ssize_t show_shared_cpu_map(struct cache_info *this_leaf, char *buf)
 	ssize_t	len;
 	cpumask_t shared_cpu_map;
 
-	cpus_and(shared_cpu_map, this_leaf->shared_cpu_map, cpu_online_map);
-	len = cpumask_scnprintf(buf, NR_CPUS+1, shared_cpu_map);
+	cpumask_and(&shared_cpu_map,
+				&this_leaf->shared_cpu_map, cpu_online_mask);
+	len = cpumask_scnprintf(buf, NR_CPUS+1, &shared_cpu_map);
 	len += sprintf(buf+len, "\n");
 	return len;
 }
@@ -280,7 +275,7 @@ static struct attribute * cache_default_attrs[] = {
 #define to_object(k) container_of(k, struct cache_info, kobj)
 #define to_attr(a) container_of(a, struct cache_attr, attr)
 
-static ssize_t cache_show(struct kobject * kobj, struct attribute * attr, char * buf)
+static ssize_t ia64_cache_show(struct kobject * kobj, struct attribute * attr, char * buf)
 {
 	struct cache_attr *fattr = to_attr(attr);
 	struct cache_info *this_leaf = to_object(kobj);
@@ -290,8 +285,8 @@ static ssize_t cache_show(struct kobject * kobj, struct attribute * attr, char *
 	return ret;
 }
 
-static struct sysfs_ops cache_sysfs_ops = {
-	.show   = cache_show
+static const struct sysfs_ops cache_sysfs_ops = {
+	.show   = ia64_cache_show
 };
 
 static struct kobj_type cache_ktype = {
@@ -303,7 +298,7 @@ static struct kobj_type cache_ktype_percpu_entry = {
 	.sysfs_ops	= &cache_sysfs_ops,
 };
 
-static void __cpuinit cpu_cache_sysfs_exit(unsigned int cpu)
+static void cpu_cache_sysfs_exit(unsigned int cpu)
 {
 	kfree(all_cpu_cache_info[cpu].cache_leaves);
 	all_cpu_cache_info[cpu].cache_leaves = NULL;
@@ -312,12 +307,12 @@ static void __cpuinit cpu_cache_sysfs_exit(unsigned int cpu)
 	return;
 }
 
-static int __cpuinit cpu_cache_sysfs_init(unsigned int cpu)
+static int cpu_cache_sysfs_init(unsigned int cpu)
 {
-	u64 i, levels, unique_caches;
+	unsigned long i, levels, unique_caches;
 	pal_cache_config_info_t cci;
 	int j;
-	s64 status;
+	long status;
 	struct cache_info *this_cache;
 	int num_cache_leaves = 0;
 
@@ -356,7 +351,7 @@ static int __cpuinit cpu_cache_sysfs_init(unsigned int cpu)
 }
 
 /* Add cache interface for CPU device */
-static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
+static int cache_add_dev(struct device *sys_dev)
 {
 	unsigned int cpu = sys_dev->id;
 	unsigned long i, j;
@@ -368,50 +363,54 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
 		return 0;
 
 	oldmask = current->cpus_allowed;
-	retval = set_cpus_allowed(current, cpumask_of_cpu(cpu));
+	retval = set_cpus_allowed_ptr(current, cpumask_of(cpu));
 	if (unlikely(retval))
 		return retval;
 
 	retval = cpu_cache_sysfs_init(cpu);
-	set_cpus_allowed(current, oldmask);
+	set_cpus_allowed_ptr(current, &oldmask);
 	if (unlikely(retval < 0))
 		return retval;
 
-	all_cpu_cache_info[cpu].kobj.parent = &sys_dev->kobj;
-	kobject_set_name(&all_cpu_cache_info[cpu].kobj, "%s", "cache");
-	all_cpu_cache_info[cpu].kobj.ktype = &cache_ktype_percpu_entry;
-	retval = kobject_register(&all_cpu_cache_info[cpu].kobj);
+	retval = kobject_init_and_add(&all_cpu_cache_info[cpu].kobj,
+				      &cache_ktype_percpu_entry, &sys_dev->kobj,
+				      "%s", "cache");
+	if (unlikely(retval < 0)) {
+		cpu_cache_sysfs_exit(cpu);
+		return retval;
+	}
 
 	for (i = 0; i < all_cpu_cache_info[cpu].num_cache_leaves; i++) {
 		this_object = LEAF_KOBJECT_PTR(cpu,i);
-		this_object->kobj.parent = &all_cpu_cache_info[cpu].kobj;
-		kobject_set_name(&(this_object->kobj), "index%1lu", i);
-		this_object->kobj.ktype = &cache_ktype;
-		retval = kobject_register(&(this_object->kobj));
+		retval = kobject_init_and_add(&(this_object->kobj),
+					      &cache_ktype,
+					      &all_cpu_cache_info[cpu].kobj,
+					      "index%1lu", i);
 		if (unlikely(retval)) {
 			for (j = 0; j < i; j++) {
-				kobject_unregister(
-					&(LEAF_KOBJECT_PTR(cpu,j)->kobj));
+				kobject_put(&(LEAF_KOBJECT_PTR(cpu,j)->kobj));
 			}
-			kobject_unregister(&all_cpu_cache_info[cpu].kobj);
+			kobject_put(&all_cpu_cache_info[cpu].kobj);
 			cpu_cache_sysfs_exit(cpu);
-			break;
+			return retval;
 		}
+		kobject_uevent(&(this_object->kobj), KOBJ_ADD);
 	}
+	kobject_uevent(&all_cpu_cache_info[cpu].kobj, KOBJ_ADD);
 	return retval;
 }
 
 /* Remove cache interface for CPU device */
-static int __cpuinit cache_remove_dev(struct sys_device * sys_dev)
+static int cache_remove_dev(struct device *sys_dev)
 {
 	unsigned int cpu = sys_dev->id;
 	unsigned long i;
 
 	for (i = 0; i < all_cpu_cache_info[cpu].num_cache_leaves; i++)
-		kobject_unregister(&(LEAF_KOBJECT_PTR(cpu,i)->kobj));
+		kobject_put(&(LEAF_KOBJECT_PTR(cpu,i)->kobj));
 
 	if (all_cpu_cache_info[cpu].kobj.parent) {
-		kobject_unregister(&all_cpu_cache_info[cpu].kobj);
+		kobject_put(&all_cpu_cache_info[cpu].kobj);
 		memset(&all_cpu_cache_info[cpu].kobj,
 			0,
 			sizeof(struct kobject));
@@ -430,14 +429,16 @@ static int cache_cpu_callback(struct notifier_block *nfb,
 		unsigned long action, void *hcpu)
 {
 	unsigned int cpu = (unsigned long)hcpu;
-	struct sys_device *sys_dev;
+	struct device *sys_dev;
 
-	sys_dev = get_cpu_sysdev(cpu);
+	sys_dev = get_cpu_device(cpu);
 	switch (action) {
 	case CPU_ONLINE:
+	case CPU_ONLINE_FROZEN:
 		cache_add_dev(sys_dev);
 		break;
 	case CPU_DEAD:
+	case CPU_DEAD_FROZEN:
 		cache_remove_dev(sys_dev);
 		break;
 	}
@@ -449,16 +450,20 @@ static struct notifier_block cache_cpu_notifier =
 	.notifier_call = cache_cpu_callback
 };
 
-static int __cpuinit cache_sysfs_init(void)
+static int __init cache_sysfs_init(void)
 {
 	int i;
 
+	cpu_notifier_register_begin();
+
 	for_each_online_cpu(i) {
-		cache_cpu_callback(&cache_cpu_notifier, CPU_ONLINE,
-				(void *)(long)i);
+		struct device *sys_dev = get_cpu_device((unsigned int)i);
+		cache_add_dev(sys_dev);
 	}
 
-	register_cpu_notifier(&cache_cpu_notifier);
+	__register_hotcpu_notifier(&cache_cpu_notifier);
+
+	cpu_notifier_register_done();
 
 	return 0;
 }
diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c
index 7c1ddc8ac44..d3636e67a98 100644
--- a/arch/ia64/kernel/traps.c
+++ b/arch/ia64/kernel/traps.c
@@ -7,7 +7,6 @@
  * 05/12/00 grao <goutham.rao@intel.com> : added isr in siginfo for SIGFPE
  */
 
-#include <linux/config.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/sched.h>
@@ -17,35 +16,17 @@
 #include <linux/hardirq.h>
 #include <linux/kprobes.h>
 #include <linux/delay.h>		/* for ssleep() */
+#include <linux/kdebug.h>
 
 #include <asm/fpswa.h>
-#include <asm/ia32.h>
 #include <asm/intrinsics.h>
 #include <asm/processor.h>
 #include <asm/uaccess.h>
-#include <asm/kdebug.h>
-
-extern spinlock_t timerlist_lock;
+#include <asm/setup.h>
 
 fpswa_interface_t *fpswa_interface;
 EXPORT_SYMBOL(fpswa_interface);
 
-ATOMIC_NOTIFIER_HEAD(ia64die_chain);
-
-int
-register_die_notifier(struct notifier_block *nb)
-{
-	return atomic_notifier_chain_register(&ia64die_chain, nb);
-}
-EXPORT_SYMBOL_GPL(register_die_notifier);
-
-int
-unregister_die_notifier(struct notifier_block *nb)
-{
-	return atomic_notifier_chain_unregister(&ia64die_chain, nb);
-}
-EXPORT_SYMBOL_GPL(unregister_die_notifier);
-
 void __init
 trap_init (void)
 {
@@ -54,35 +35,7 @@ trap_init (void)
 		fpswa_interface = __va(ia64_boot_param->fpswa);
 }
 
-/*
- * Unlock any spinlocks which will prevent us from getting the message out (timerlist_lock
- * is acquired through the console unblank code)
- */
-void
-bust_spinlocks (int yes)
-{
-	int loglevel_save = console_loglevel;
-
-	if (yes) {
-		oops_in_progress = 1;
-		return;
-	}
-
-#ifdef CONFIG_VT
-	unblank_screen();
-#endif
-	oops_in_progress = 0;
-	/*
-	 * OK, the message is on the console.  Now we call printk() without
-	 * oops_in_progress set so that printk will give klogd a poke.  Hold onto
-	 * your hats...
-	 */
-	console_loglevel = 15;		/* NMI oopser may have shut the console up */
-	printk(" ");
-	console_loglevel = loglevel_save;
-}
-
-void
+int
 die (const char *str, struct pt_regs *regs, long err)
 {
 	static struct {
@@ -90,9 +43,9 @@ die (const char *str, struct pt_regs *regs, long err)
 		u32 lock_owner;
 		int lock_owner_depth;
 	} die = {
-		.lock =			SPIN_LOCK_UNLOCKED,
-		.lock_owner =		-1,
-		.lock_owner_depth =	0
+		.lock =	__SPIN_LOCK_UNLOCKED(die.lock),
+		.lock_owner = -1,
+		.lock_owner_depth = 0
 	};
 	static int die_counter;
 	int cpu = get_cpu();
@@ -108,30 +61,36 @@ die (const char *str, struct pt_regs *regs, long err)
 
 	if (++die.lock_owner_depth < 3) {
 		printk("%s[%d]: %s %ld [%d]\n",
-			current->comm, current->pid, str, err, ++die_counter);
-		(void) notify_die(DIE_OOPS, (char *)str, regs, err, 255, SIGSEGV);
-		show_regs(regs);
+		current->comm, task_pid_nr(current), str, err, ++die_counter);
+		if (notify_die(DIE_OOPS, str, regs, err, 255, SIGSEGV)
+	            != NOTIFY_STOP)
+			show_regs(regs);
+		else
+			regs = NULL;
   	} else
 		printk(KERN_ERR "Recursive die() failure, output suppressed\n");
 
 	bust_spinlocks(0);
 	die.lock_owner = -1;
+	add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
 	spin_unlock_irq(&die.lock);
 
-	if (panic_on_oops) {
-		printk(KERN_EMERG "Fatal exception: panic in 5 seconds\n");
-		ssleep(5);
+	if (!regs)
+		return 1;
+
+	if (panic_on_oops)
 		panic("Fatal exception");
-	}
 
   	do_exit(SIGSEGV);
+	return 0;
 }
 
-void
+int
 die_if_kernel (char *str, struct pt_regs *regs, long err)
 {
 	if (!user_mode(regs))
-		die(str, regs, err);
+		return die(str, regs, err);
+	return 0;
 }
 
 void
@@ -151,7 +110,8 @@ __kprobes ia64_bad_break (unsigned long break_num, struct pt_regs *regs)
 		if (notify_die(DIE_BREAK, "break 0", regs, break_num, TRAP_BRKPT, SIGTRAP)
 			       	== NOTIFY_STOP)
 			return;
-		die_if_kernel("bugcheck!", regs, break_num);
+		if (die_if_kernel("bugcheck!", regs, break_num))
+			return;
 		sig = SIGILL; code = ILL_ILLOPC;
 		break;
 
@@ -204,8 +164,9 @@ __kprobes ia64_bad_break (unsigned long break_num, struct pt_regs *regs)
 		break;
 
 	      default:
-		if (break_num < 0x40000 || break_num > 0x100000)
-			die_if_kernel("Bad break", regs, break_num);
+		if ((break_num < 0x40000 || break_num > 0x100000)
+		    && die_if_kernel("Bad break", regs, break_num))
+			return;
 
 		if (break_num < 0x80000) {
 			sig = SIGILL; code = __ILL_BREAK;
@@ -311,6 +272,15 @@ fp_emulate (int fp_fault, void *bundle, long *ipsr, long *fpsr, long *isr, long
 	return ret.status;
 }
 
+struct fpu_swa_msg {
+	unsigned long count;
+	unsigned long time;
+};
+static DEFINE_PER_CPU(struct fpu_swa_msg, cpulast);
+DECLARE_PER_CPU(struct fpu_swa_msg, cpulast);
+static struct fpu_swa_msg last __cacheline_aligned;
+
+
 /*
  * Handle floating-point assist faults and traps.
  */
@@ -320,8 +290,6 @@ handle_fpu_swa (int fp_fault, struct pt_regs *regs, unsigned long isr)
 	long exception, bundle[2];
 	unsigned long fault_ip;
 	struct siginfo siginfo;
-	static int fpu_swa_count = 0;
-	static unsigned long last_time;
 
 	fault_ip = regs->cr_iip;
 	if (!fp_fault && (ia64_psr(regs)->ri == 0))
@@ -329,14 +297,37 @@ handle_fpu_swa (int fp_fault, struct pt_regs *regs, unsigned long isr)
 	if (copy_from_user(bundle, (void __user *) fault_ip, sizeof(bundle)))
 		return -1;
 
-	if (jiffies - last_time > 5*HZ)
-		fpu_swa_count = 0;
-	if ((fpu_swa_count < 4) && !(current->thread.flags & IA64_THREAD_FPEMU_NOPRINT)) {
-		last_time = jiffies;
-		++fpu_swa_count;
-		printk(KERN_WARNING
-		       "%s(%d): floating-point assist fault at ip %016lx, isr %016lx\n",
-		       current->comm, current->pid, regs->cr_iip + ia64_psr(regs)->ri, isr);
+	if (!(current->thread.flags & IA64_THREAD_FPEMU_NOPRINT))  {
+		unsigned long count, current_jiffies = jiffies;
+		struct fpu_swa_msg *cp = &__get_cpu_var(cpulast);
+
+		if (unlikely(current_jiffies > cp->time))
+			cp->count = 0;
+		if (unlikely(cp->count < 5)) {
+			cp->count++;
+			cp->time = current_jiffies + 5 * HZ;
+
+			/* minimize races by grabbing a copy of count BEFORE checking last.time. */
+			count = last.count;
+			barrier();
+
+			/*
+			 * Lower 4 bits are used as a count. Upper bits are a sequence
+			 * number that is updated when count is reset. The cmpxchg will
+			 * fail is seqno has changed. This minimizes mutiple cpus
+			 * resetting the count.
+			 */
+			if (current_jiffies > last.time)
+				(void) cmpxchg_acq(&last.count, count, 16 + (count & ~15));
+
+			/* used fetchadd to atomically update the count */
+			if ((last.count & 15) < 5 && (ia64_fetchadd(1, &last.count, acq) & 15) < 5) {
+				last.time = current_jiffies + 5 * HZ;
+				printk(KERN_WARNING
+		       			"%s(%d): floating-point assist fault at ip %016lx, isr %016lx\n",
+		       			current->comm, task_pid_nr(current), regs->cr_iip + ia64_psr(regs)->ri, isr);
+			}
+		}
 	}
 
 	exception = fp_emulate(fp_fault, bundle, &regs->cr_ipsr, &regs->ar_fpsr, &isr, &regs->pr,
@@ -421,14 +412,15 @@ ia64_illegal_op_fault (unsigned long ec, long arg1, long arg2, long arg3,
 #endif
 
 	sprintf(buf, "IA-64 Illegal operation fault");
-	die_if_kernel(buf, &regs, 0);
+	rv.fkt = 0;
+	if (die_if_kernel(buf, &regs, 0))
+		return rv;
 
 	memset(&si, 0, sizeof(si));
 	si.si_signo = SIGILL;
 	si.si_code = ILL_ILLOPC;
 	si.si_addr = (void __user *) (regs.cr_iip + ia64_psr(&regs)->ri);
 	force_sig_info(SIGILL, &si, current);
-	rv.fkt = 0;
 	return rv;
 }
 
@@ -472,7 +464,7 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
 		if (code == 8) {
 # ifdef CONFIG_IA64_PRINT_HAZARDS
 			printk("%s[%d]: possible hazard @ ip=%016lx (pr = %016lx)\n",
-			       current->comm, current->pid,
+			       current->comm, task_pid_nr(current),
 			       regs.cr_iip + ia64_psr(&regs)->ri, regs.pr);
 # endif
 			return;
@@ -634,21 +626,13 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
 		break;
 
 	      case 45:
-#ifdef CONFIG_IA32_SUPPORT
-		if (ia32_exception(&regs, isr) == 0)
-			return;
-#endif
 		printk(KERN_ERR "Unexpected IA-32 exception (Trap 45)\n");
 		printk(KERN_ERR "  iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx\n",
 		       iip, ifa, isr);
 		force_sig(SIGSEGV, current);
-		break;
+		return;
 
 	      case 46:
-#ifdef CONFIG_IA32_SUPPORT
-		if (ia32_intercept(&regs, isr) == 0)
-			return;
-#endif
 		printk(KERN_ERR "Unexpected IA-32 intercept trap (Trap 46)\n");
 		printk(KERN_ERR "  iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx, iim - 0x%lx\n",
 		       iip, ifa, isr, iim);
@@ -663,6 +647,6 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
 		sprintf(buf, "Fault %lu", vector);
 		break;
 	}
-	die_if_kernel(buf, &regs, error);
-	force_sig(SIGILL, current);
+	if (!die_if_kernel(buf, &regs, error))
+		force_sig(SIGILL, current);
 }
diff --git a/arch/ia64/kernel/unaligned.c b/arch/ia64/kernel/unaligned.c
index 1e357550c77..622772b7fb6 100644
--- a/arch/ia64/kernel/unaligned.c
+++ b/arch/ia64/kernel/unaligned.c
@@ -13,10 +13,11 @@
  * 2001/08/13	Correct size of extended floats (float_fsz) from 16 to 10 bytes.
  * 2001/01/17	Add support emulation of unaligned kernel accesses.
  */
+#include <linux/jiffies.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
-#include <linux/smp_lock.h>
 #include <linux/tty.h>
+#include <linux/ratelimit.h>
 
 #include <asm/intrinsics.h>
 #include <asm/processor.h>
@@ -24,12 +25,12 @@
 #include <asm/uaccess.h>
 #include <asm/unaligned.h>
 
-extern void die_if_kernel(char *str, struct pt_regs *regs, long err);
+extern int die_if_kernel(char *str, struct pt_regs *regs, long err);
 
 #undef DEBUG_UNALIGNED_TRAP
 
 #ifdef DEBUG_UNALIGNED_TRAP
-# define DPRINT(a...)	do { printk("%s %u: ", __FUNCTION__, __LINE__); printk (a); } while (0)
+# define DPRINT(a...)	do { printk("%s %u: ", __func__, __LINE__); printk (a); } while (0)
 # define DDUMP(str,vp,len)	dump(str, vp, len)
 
 static void
@@ -59,7 +60,7 @@ dump (const char *str, void *vp, size_t len)
  *  (i.e. don't allow attacker to fill up logs with unaligned accesses).
  */
 int no_unaligned_warning;
-static int noprint_warning;
+int unaligned_dump_stack;
 
 /*
  * For M-unit:
@@ -675,9 +676,10 @@ emulate_load_updates (update_t type, load_store_t ld, struct pt_regs *regs, unsi
 	 * just in case.
 	 */
 	if (ld.x6_op == 1 || ld.x6_op == 3) {
-		printk(KERN_ERR "%s: register update on speculative load, error\n", __FUNCTION__);
-		die_if_kernel("unaligned reference on speculative load with register update\n",
-			      regs, 30);
+		printk(KERN_ERR "%s: register update on speculative load, error\n", __func__);
+		if (die_if_kernel("unaligned reference on speculative load with register update\n",
+				  regs, 30))
+			return;
 	}
 
 
@@ -1104,7 +1106,7 @@ emulate_load_floatpair (unsigned long ifa, load_store_t ld, struct pt_regs *regs
 		 */
 		if (ld.x6_op == 1 || ld.x6_op == 3)
 			printk(KERN_ERR "%s: register update on speculative load pair, error\n",
-			       __FUNCTION__);
+			       __func__);
 
 		setreg(ld.r3, ifa, 0, regs);
 	}
@@ -1282,24 +1284,9 @@ emulate_store_float (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
 /*
  * Make sure we log the unaligned access, so that user/sysadmin can notice it and
  * eventually fix the program.  However, we don't want to do that for every access so we
- * pace it with jiffies.  This isn't really MP-safe, but it doesn't really have to be
- * either...
+ * pace it with jiffies.
  */
-static int
-within_logging_rate_limit (void)
-{
-	static unsigned long count, last_time;
-
-	if (jiffies - last_time > 5*HZ)
-		count = 0;
-	if (count < 5) {
-		last_time = jiffies;
-		count++;
-		return 1;
-	}
-	return 0;
-
-}
+static DEFINE_RATELIMIT_STATE(logging_rate_limit, 5 * HZ, 5);
 
 void
 ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs)
@@ -1318,7 +1305,8 @@ ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs)
 
 	if (ia64_psr(regs)->be) {
 		/* we don't support big-endian accesses */
-		die_if_kernel("big-endian unaligned accesses are not supported", regs, 0);
+		if (die_if_kernel("big-endian unaligned accesses are not supported", regs, 0))
+			return;
 		goto force_sigbus;
 	}
 
@@ -1335,13 +1323,14 @@ ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs)
 
 		if (!no_unaligned_warning &&
 		    !(current->thread.flags & IA64_THREAD_UAC_NOPRINT) &&
-		    within_logging_rate_limit())
+		    __ratelimit(&logging_rate_limit))
 		{
 			char buf[200];	/* comm[] is at most 16 bytes... */
 			size_t len;
 
 			len = sprintf(buf, "%s(%d): unaligned access to 0x%016lx, "
-				      "ip=0x%016lx\n\r", current->comm, current->pid,
+				      "ip=0x%016lx\n\r", current->comm,
+				      task_pid_nr(current),
 				      ifa, regs->cr_iip + ipsr->ri);
 			/*
 			 * Don't call tty_write_message() if we're in the kernel; we might
@@ -1353,9 +1342,8 @@ ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs)
 			/* watch for command names containing %s */
 			printk(KERN_WARNING "%s", buf);
 		} else {
-			if (no_unaligned_warning && !noprint_warning) {
-				noprint_warning = 1;
-				printk(KERN_WARNING "%s(%d) encountered an "
+			if (no_unaligned_warning) {
+				printk_once(KERN_WARNING "%s(%d) encountered an "
 				       "unaligned exception which required\n"
 				       "kernel assistance, which degrades "
 				       "the performance of the application.\n"
@@ -1364,13 +1352,16 @@ ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs)
 				       "administrator\n"
 				       "echo 0 > /proc/sys/kernel/ignore-"
 				       "unaligned-usertrap to re-enable\n",
-				       current->comm, current->pid);
+				       current->comm, task_pid_nr(current));
 			}
 		}
 	} else {
-		if (within_logging_rate_limit())
+		if (__ratelimit(&logging_rate_limit)) {
 			printk(KERN_WARNING "kernel unaligned access to 0x%016lx, ip=0x%016lx\n",
 			       ifa, regs->cr_iip + ipsr->ri);
+			if (unaligned_dump_stack)
+				dump_stack();
+		}
 		set_fs(KERNEL_DS);
 	}
 
@@ -1488,16 +1479,19 @@ ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs)
 	      case LDFA_OP:
 	      case LDFCCLR_OP:
 	      case LDFCNC_OP:
-	      case LDF_IMM_OP:
-	      case LDFA_IMM_OP:
-	      case LDFCCLR_IMM_OP:
-	      case LDFCNC_IMM_OP:
 		if (u.insn.x)
 			ret = emulate_load_floatpair(ifa, u.insn, regs);
 		else
 			ret = emulate_load_float(ifa, u.insn, regs);
 		break;
 
+	      case LDF_IMM_OP:
+	      case LDFA_IMM_OP:
+	      case LDFCCLR_IMM_OP:
+	      case LDFCNC_IMM_OP:
+		ret = emulate_load_float(ifa, u.insn, regs);
+		break;
+
 	      case STF_OP:
 	      case STF_IMM_OP:
 		ret = emulate_store_float(ifa, u.insn, regs);
@@ -1531,7 +1525,8 @@ ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs)
 			ia64_handle_exception(regs, eh);
 			goto done;
 		}
-		die_if_kernel("error during unaligned kernel access\n", regs, ret);
+		if (die_if_kernel("error during unaligned kernel access\n", regs, ret))
+			return;
 		/* NOT_REACHED */
 	}
   force_sigbus:
diff --git a/arch/ia64/kernel/uncached.c b/arch/ia64/kernel/uncached.c
index fcd2bad0286..20e8a9b21d7 100644
--- a/arch/ia64/kernel/uncached.c
+++ b/arch/ia64/kernel/uncached.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2001-2005 Silicon Graphics, Inc.  All rights reserved.
+ * Copyright (C) 2001-2008 Silicon Graphics, Inc.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License
@@ -18,79 +18,97 @@
 #include <linux/init.h>
 #include <linux/errno.h>
 #include <linux/string.h>
-#include <linux/slab.h>
 #include <linux/efi.h>
 #include <linux/genalloc.h>
+#include <linux/gfp.h>
 #include <asm/page.h>
 #include <asm/pal.h>
-#include <asm/system.h>
 #include <asm/pgtable.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/tlbflush.h>
 #include <asm/sn/arch.h>
 
-#define DEBUG	0
 
-#if DEBUG
-#define dprintk			printk
-#else
-#define dprintk(x...)		do { } while (0)
-#endif
+extern void __init efi_memmap_walk_uc(efi_freemem_callback_t, void *);
 
-void __init efi_memmap_walk_uc (efi_freemem_callback_t callback);
+struct uncached_pool {
+	struct gen_pool *pool;
+	struct mutex add_chunk_mutex;	/* serialize adding a converted chunk */
+	int nchunks_added;		/* #of converted chunks added to pool */
+	atomic_t status;		/* smp called function's return status*/
+};
 
-#define MAX_UNCACHED_GRANULES	5
-static int allocated_granules;
+#define MAX_CONVERTED_CHUNKS_PER_NODE	2
 
-struct gen_pool *uncached_pool[MAX_NUMNODES];
+struct uncached_pool uncached_pools[MAX_NUMNODES];
 
 
 static void uncached_ipi_visibility(void *data)
 {
 	int status;
+	struct uncached_pool *uc_pool = (struct uncached_pool *)data;
 
 	status = ia64_pal_prefetch_visibility(PAL_VISIBILITY_PHYSICAL);
 	if ((status != PAL_VISIBILITY_OK) &&
 	    (status != PAL_VISIBILITY_OK_REMOTE_NEEDED))
-		printk(KERN_DEBUG "pal_prefetch_visibility() returns %i on "
-		       "CPU %i\n", status, raw_smp_processor_id());
+		atomic_inc(&uc_pool->status);
 }
 
 
 static void uncached_ipi_mc_drain(void *data)
 {
 	int status;
+	struct uncached_pool *uc_pool = (struct uncached_pool *)data;
+
 	status = ia64_pal_mc_drain();
-	if (status)
-		printk(KERN_WARNING "ia64_pal_mc_drain() failed with %i on "
-		       "CPU %i\n", status, raw_smp_processor_id());
+	if (status != PAL_STATUS_SUCCESS)
+		atomic_inc(&uc_pool->status);
 }
 
 
-static unsigned long
-uncached_get_new_chunk(struct gen_pool *poolp)
+/*
+ * Add a new chunk of uncached memory pages to the specified pool.
+ *
+ * @pool: pool to add new chunk of uncached memory to
+ * @nid: node id of node to allocate memory from, or -1
+ *
+ * This is accomplished by first allocating a granule of cached memory pages
+ * and then converting them to uncached memory pages.
+ */
+static int uncached_add_chunk(struct uncached_pool *uc_pool, int nid)
 {
 	struct page *page;
-	void *tmp;
-	int status, i;
-	unsigned long addr, node;
+	int status, i, nchunks_added = uc_pool->nchunks_added;
+	unsigned long c_addr, uc_addr;
 
-	if (allocated_granules >= MAX_UNCACHED_GRANULES)
+	if (mutex_lock_interruptible(&uc_pool->add_chunk_mutex) != 0)
+		return -1;	/* interrupted by a signal */
+
+	if (uc_pool->nchunks_added > nchunks_added) {
+		/* someone added a new chunk while we were waiting */
+		mutex_unlock(&uc_pool->add_chunk_mutex);
 		return 0;
+	}
+
+	if (uc_pool->nchunks_added >= MAX_CONVERTED_CHUNKS_PER_NODE) {
+		mutex_unlock(&uc_pool->add_chunk_mutex);
+		return -1;
+	}
+
+	/* attempt to allocate a granule's worth of cached memory pages */
 
-	node = poolp->private;
-	page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO,
+	page = alloc_pages_exact_node(nid,
+				GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE,
 				IA64_GRANULE_SHIFT-PAGE_SHIFT);
+	if (!page) {
+		mutex_unlock(&uc_pool->add_chunk_mutex);
+		return -1;
+	}
 
-	dprintk(KERN_INFO "get_new_chunk page %p, addr %lx\n",
-		page, (unsigned long)(page-vmem_map) << PAGE_SHIFT);
+	/* convert the memory pages from cached to uncached */
 
-	/*
-	 * Do magic if no mem on local node! XXX
-	 */
-	if (!page)
-		return 0;
-	tmp = page_address(page);
+	c_addr = (unsigned long)page_address(page);
+	uc_addr = c_addr - PAGE_OFFSET + __IA64_UNCACHED_OFFSET;
 
 	/*
 	 * There's a small race here where it's possible for someone to
@@ -100,76 +118,99 @@ uncached_get_new_chunk(struct gen_pool *poolp)
 	for (i = 0; i < (IA64_GRANULE_SIZE / PAGE_SIZE); i++)
 		SetPageUncached(&page[i]);
 
-	flush_tlb_kernel_range(tmp, tmp + IA64_GRANULE_SIZE);
+	flush_tlb_kernel_range(uc_addr, uc_addr + IA64_GRANULE_SIZE);
 
 	status = ia64_pal_prefetch_visibility(PAL_VISIBILITY_PHYSICAL);
+	if (status == PAL_VISIBILITY_OK_REMOTE_NEEDED) {
+		atomic_set(&uc_pool->status, 0);
+		status = smp_call_function(uncached_ipi_visibility, uc_pool, 1);
+		if (status || atomic_read(&uc_pool->status))
+			goto failed;
+	} else if (status != PAL_VISIBILITY_OK)
+		goto failed;
 
-	dprintk(KERN_INFO "pal_prefetch_visibility() returns %i on cpu %i\n",
-		status, raw_smp_processor_id());
-
-	if (!status) {
-		status = smp_call_function(uncached_ipi_visibility, NULL, 0, 1);
-		if (status)
-			printk(KERN_WARNING "smp_call_function failed for "
-			       "uncached_ipi_visibility! (%i)\n", status);
-	}
+	preempt_disable();
 
 	if (ia64_platform_is("sn2"))
-		sn_flush_all_caches((unsigned long)tmp, IA64_GRANULE_SIZE);
+		sn_flush_all_caches(uc_addr, IA64_GRANULE_SIZE);
 	else
-		flush_icache_range((unsigned long)tmp,
-				   (unsigned long)tmp+IA64_GRANULE_SIZE);
+		flush_icache_range(uc_addr, uc_addr + IA64_GRANULE_SIZE);
 
-	ia64_pal_mc_drain();
-	status = smp_call_function(uncached_ipi_mc_drain, NULL, 0, 1);
+	/* flush the just introduced uncached translation from the TLB */
+	local_flush_tlb_all();
+
+	preempt_enable();
+
+	status = ia64_pal_mc_drain();
+	if (status != PAL_STATUS_SUCCESS)
+		goto failed;
+	atomic_set(&uc_pool->status, 0);
+	status = smp_call_function(uncached_ipi_mc_drain, uc_pool, 1);
+	if (status || atomic_read(&uc_pool->status))
+		goto failed;
+
+	/*
+	 * The chunk of memory pages has been converted to uncached so now we
+	 * can add it to the pool.
+	 */
+	status = gen_pool_add(uc_pool->pool, uc_addr, IA64_GRANULE_SIZE, nid);
 	if (status)
-		printk(KERN_WARNING "smp_call_function failed for "
-		       "uncached_ipi_mc_drain! (%i)\n", status);
+		goto failed;
+
+	uc_pool->nchunks_added++;
+	mutex_unlock(&uc_pool->add_chunk_mutex);
+	return 0;
 
-	addr = (unsigned long)tmp - PAGE_OFFSET + __IA64_UNCACHED_OFFSET;
+	/* failed to convert or add the chunk so give it back to the kernel */
+failed:
+	for (i = 0; i < (IA64_GRANULE_SIZE / PAGE_SIZE); i++)
+		ClearPageUncached(&page[i]);
 
-	allocated_granules++;
-	return addr;
+	free_pages(c_addr, IA64_GRANULE_SHIFT-PAGE_SHIFT);
+	mutex_unlock(&uc_pool->add_chunk_mutex);
+	return -1;
 }
 
 
 /*
  * uncached_alloc_page
  *
- * Allocate 1 uncached page. Allocates on the requested node. If no
- * uncached pages are available on the requested node, roundrobin starting
- * with higher nodes.
+ * @starting_nid: node id of node to start with, or -1
+ * @n_pages: number of contiguous pages to allocate
+ *
+ * Allocate the specified number of contiguous uncached pages on the
+ * the requested node. If not enough contiguous uncached pages are available
+ * on the requested node, roundrobin starting with the next higher node.
  */
-unsigned long
-uncached_alloc_page(int nid)
+unsigned long uncached_alloc_page(int starting_nid, int n_pages)
 {
-	unsigned long maddr;
+	unsigned long uc_addr;
+	struct uncached_pool *uc_pool;
+	int nid;
 
-	maddr = gen_pool_alloc(uncached_pool[nid], PAGE_SIZE);
+	if (unlikely(starting_nid >= MAX_NUMNODES))
+		return 0;
 
-	dprintk(KERN_DEBUG "uncached_alloc_page returns %lx on node %i\n",
-		maddr, nid);
+	if (starting_nid < 0)
+		starting_nid = numa_node_id();
+	nid = starting_nid;
 
-	/*
-	 * If no memory is availble on our local node, try the
-	 * remaining nodes in the system.
-	 */
-	if (!maddr) {
-		int i;
-
-		for (i = MAX_NUMNODES - 1; i >= 0; i--) {
-			if (i == nid || !node_online(i))
-				continue;
-			maddr = gen_pool_alloc(uncached_pool[i], PAGE_SIZE);
-			dprintk(KERN_DEBUG "uncached_alloc_page alternate search "
-				"returns %lx on node %i\n", maddr, i);
-			if (maddr) {
-				break;
-			}
-		}
-	}
+	do {
+		if (!node_state(nid, N_HIGH_MEMORY))
+			continue;
+		uc_pool = &uncached_pools[nid];
+		if (uc_pool->pool == NULL)
+			continue;
+		do {
+			uc_addr = gen_pool_alloc(uc_pool->pool,
+						 n_pages * PAGE_SIZE);
+			if (uc_addr != 0)
+				return uc_addr;
+		} while (uncached_add_chunk(uc_pool, nid) == 0);
 
-	return maddr;
+	} while ((nid = (nid + 1) % MAX_NUMNODES) != starting_nid);
+
+	return 0;
 }
 EXPORT_SYMBOL(uncached_alloc_page);
 
@@ -177,21 +218,23 @@ EXPORT_SYMBOL(uncached_alloc_page);
 /*
  * uncached_free_page
  *
- * Free a single uncached page.
+ * @uc_addr: uncached address of first page to free
+ * @n_pages: number of contiguous pages to free
+ *
+ * Free the specified number of uncached pages.
  */
-void
-uncached_free_page(unsigned long maddr)
+void uncached_free_page(unsigned long uc_addr, int n_pages)
 {
-	int node;
+	int nid = paddr_to_nid(uc_addr - __IA64_UNCACHED_OFFSET);
+	struct gen_pool *pool = uncached_pools[nid].pool;
 
-	node = paddr_to_nid(maddr - __IA64_UNCACHED_OFFSET);
+	if (unlikely(pool == NULL))
+		return;
 
-	dprintk(KERN_DEBUG "uncached_free_page(%lx) on node %i\n", maddr, node);
+	if ((uc_addr & (0XFUL << 60)) != __IA64_UNCACHED_OFFSET)
+		panic("uncached_free_page invalid address %lx\n", uc_addr);
 
-	if ((maddr & (0XFUL << 60)) != __IA64_UNCACHED_OFFSET)
-		panic("uncached_free_page invalid address %lx\n", maddr);
-
-	gen_pool_free(uncached_pool[node], maddr, PAGE_SIZE);
+	gen_pool_free(pool, uc_addr, n_pages * PAGE_SIZE);
 }
 EXPORT_SYMBOL(uncached_free_page);
 
@@ -199,43 +242,39 @@ EXPORT_SYMBOL(uncached_free_page);
 /*
  * uncached_build_memmap,
  *
+ * @uc_start: uncached starting address of a chunk of uncached memory
+ * @uc_end: uncached ending address of a chunk of uncached memory
+ * @arg: ignored, (NULL argument passed in on call to efi_memmap_walk_uc())
+ *
  * Called at boot time to build a map of pages that can be used for
  * memory special operations.
  */
-static int __init
-uncached_build_memmap(unsigned long start, unsigned long end, void *arg)
+static int __init uncached_build_memmap(u64 uc_start, u64 uc_end, void *arg)
 {
-	long length = end - start;
-	int node;
-
-	dprintk(KERN_ERR "uncached_build_memmap(%lx %lx)\n", start, end);
+	int nid = paddr_to_nid(uc_start - __IA64_UNCACHED_OFFSET);
+	struct gen_pool *pool = uncached_pools[nid].pool;
+	size_t size = uc_end - uc_start;
 
 	touch_softlockup_watchdog();
-	memset((char *)start, 0, length);
 
-	node = paddr_to_nid(start - __IA64_UNCACHED_OFFSET);
-
-	for (; start < end ; start += PAGE_SIZE) {
-		dprintk(KERN_INFO "sticking %lx into the pool!\n", start);
-		gen_pool_free(uncached_pool[node], start, PAGE_SIZE);
+	if (pool != NULL) {
+		memset((char *)uc_start, 0, size);
+		(void) gen_pool_add(pool, uc_start, size, nid);
 	}
-
 	return 0;
 }
 
 
-static int __init uncached_init(void) {
-	int i;
+static int __init uncached_init(void)
+{
+	int nid;
 
-	for (i = 0; i < MAX_NUMNODES; i++) {
-		if (!node_online(i))
-			continue;
-		uncached_pool[i] = gen_pool_create(0, IA64_GRANULE_SHIFT,
-						   &uncached_get_new_chunk, i);
+	for_each_node_state(nid, N_ONLINE) {
+		uncached_pools[nid].pool = gen_pool_create(PAGE_SHIFT, nid);
+		mutex_init(&uncached_pools[nid].add_chunk_mutex);
 	}
 
-	efi_memmap_walk_uc(uncached_build_memmap);
-
+	efi_memmap_walk_uc(uncached_build_memmap, NULL);
 	return 0;
 }
 
diff --git a/arch/ia64/kernel/unwind.c b/arch/ia64/kernel/unwind.c
index 93d5a3b41f6..8f66195999e 100644
--- a/arch/ia64/kernel/unwind.c
+++ b/arch/ia64/kernel/unwind.c
@@ -2,7 +2,7 @@
  * Copyright (C) 1999-2004 Hewlett-Packard Co
  *	David Mosberger-Tang <davidm@hpl.hp.com>
  * Copyright (C) 2003 Fenghua Yu <fenghua.yu@intel.com>
- * 	- Change pt_regs_off() to make it less dependant on pt_regs structure.
+ * 	- Change pt_regs_off() to make it less dependent on pt_regs structure.
  */
 /*
  * This file implements call frame unwind support for the Linux
@@ -41,7 +41,6 @@
 #include <asm/ptrace_offsets.h>
 #include <asm/rse.h>
 #include <asm/sections.h>
-#include <asm/system.h>
 #include <asm/uaccess.h>
 
 #include "entry.h"
@@ -60,6 +59,7 @@
 #  define UNW_DEBUG_ON(n)	unw_debug_level >= n
    /* Do not code a printk level, not all debug lines end in newline */
 #  define UNW_DPRINT(n, ...)  if (UNW_DEBUG_ON(n)) printk(__VA_ARGS__)
+#  undef inline
 #  define inline
 #else /* !UNW_DEBUG */
 #  define UNW_DEBUG_ON(n)  0
@@ -145,7 +145,7 @@ static struct {
 # endif
 } unw = {
 	.tables = &unw.kernel_table,
-	.lock = SPIN_LOCK_UNLOCKED,
+	.lock = __SPIN_LOCK_UNLOCKED(unw.lock),
 	.save_order = {
 		UNW_REG_RP, UNW_REG_PFS, UNW_REG_PSP, UNW_REG_PR,
 		UNW_REG_UNAT, UNW_REG_LC, UNW_REG_FPSR, UNW_REG_PRI_UNAT_GR
@@ -256,7 +256,7 @@ pt_regs_off (unsigned long reg)
 		off = unw.pt_regs_offsets[reg];
 
 	if (off < 0) {
-		UNW_DPRINT(0, "unwind.%s: bad scratch reg r%lu\n", __FUNCTION__, reg);
+		UNW_DPRINT(0, "unwind.%s: bad scratch reg r%lu\n", __func__, reg);
 		off = 0;
 	}
 	return (unsigned long) off;
@@ -267,13 +267,13 @@ get_scratch_regs (struct unw_frame_info *info)
 {
 	if (!info->pt) {
 		/* This should not happen with valid unwind info.  */
-		UNW_DPRINT(0, "unwind.%s: bad unwind info: resetting info->pt\n", __FUNCTION__);
+		UNW_DPRINT(0, "unwind.%s: bad unwind info: resetting info->pt\n", __func__);
 		if (info->flags & UNW_FLAG_INTERRUPT_FRAME)
 			info->pt = (unsigned long) ((struct pt_regs *) info->psp - 1);
 		else
 			info->pt = info->sp - 16;
 	}
-	UNW_DPRINT(3, "unwind.%s: sp 0x%lx pt 0x%lx\n", __FUNCTION__, info->sp, info->pt);
+	UNW_DPRINT(3, "unwind.%s: sp 0x%lx pt 0x%lx\n", __func__, info->sp, info->pt);
 	return (struct pt_regs *) info->pt;
 }
 
@@ -293,7 +293,7 @@ unw_access_gr (struct unw_frame_info *info, int regnum, unsigned long *val, char
 			return 0;
 		}
 		UNW_DPRINT(0, "unwind.%s: trying to access non-existent r%u\n",
-			   __FUNCTION__, regnum);
+			   __func__, regnum);
 		return -1;
 	}
 
@@ -340,7 +340,7 @@ unw_access_gr (struct unw_frame_info *info, int regnum, unsigned long *val, char
 					{
 						UNW_DPRINT(0, "unwind.%s: %p outside of regstk "
 							"[0x%lx-0x%lx)\n",
-							__FUNCTION__, (void *) addr,
+							__func__, (void *) addr,
 							info->regstk.limit,
 							info->regstk.top);
 						return -1;
@@ -373,7 +373,7 @@ unw_access_gr (struct unw_frame_info *info, int regnum, unsigned long *val, char
 		    || (unsigned long) addr >= info->regstk.top)
 		{
 			UNW_DPRINT(0, "unwind.%s: ignoring attempt to access register outside "
-				   "of rbs\n",  __FUNCTION__);
+				   "of rbs\n",  __func__);
 			return -1;
 		}
 		if ((unsigned long) nat_addr >= info->regstk.top)
@@ -384,7 +384,7 @@ unw_access_gr (struct unw_frame_info *info, int regnum, unsigned long *val, char
 	if (write) {
 		if (read_only(addr)) {
 			UNW_DPRINT(0, "unwind.%s: ignoring attempt to write read-only location\n",
-				__FUNCTION__);
+				__func__);
 		} else {
 			*addr = *val;
 			if (*nat)
@@ -426,13 +426,13 @@ unw_access_br (struct unw_frame_info *info, int regnum, unsigned long *val, int
 
 	      default:
 		UNW_DPRINT(0, "unwind.%s: trying to access non-existent b%u\n",
-			   __FUNCTION__, regnum);
+			   __func__, regnum);
 		return -1;
 	}
 	if (write)
 		if (read_only(addr)) {
 			UNW_DPRINT(0, "unwind.%s: ignoring attempt to write read-only location\n",
-				__FUNCTION__);
+				__func__);
 		} else
 			*addr = *val;
 	else
@@ -449,7 +449,7 @@ unw_access_fr (struct unw_frame_info *info, int regnum, struct ia64_fpreg *val,
 
 	if ((unsigned) (regnum - 2) >= 126) {
 		UNW_DPRINT(0, "unwind.%s: trying to access non-existent f%u\n",
-			   __FUNCTION__, regnum);
+			   __func__, regnum);
 		return -1;
 	}
 
@@ -481,7 +481,7 @@ unw_access_fr (struct unw_frame_info *info, int regnum, struct ia64_fpreg *val,
 	if (write)
 		if (read_only(addr)) {
 			UNW_DPRINT(0, "unwind.%s: ignoring attempt to write read-only location\n",
-				__FUNCTION__);
+				__func__);
 		} else
 			*addr = *val;
 	else
@@ -571,14 +571,14 @@ unw_access_ar (struct unw_frame_info *info, int regnum, unsigned long *val, int
 
 	      default:
 		UNW_DPRINT(0, "unwind.%s: trying to access non-existent ar%u\n",
-			   __FUNCTION__, regnum);
+			   __func__, regnum);
 		return -1;
 	}
 
 	if (write) {
 		if (read_only(addr)) {
 			UNW_DPRINT(0, "unwind.%s: ignoring attempt to write read-only location\n",
-				__FUNCTION__);
+				__func__);
 		} else
 			*addr = *val;
 	} else
@@ -599,7 +599,7 @@ unw_access_pr (struct unw_frame_info *info, unsigned long *val, int write)
 	if (write) {
 		if (read_only(addr)) {
 			UNW_DPRINT(0, "unwind.%s: ignoring attempt to write read-only location\n",
-				__FUNCTION__);
+				__func__);
 		} else
 			*addr = *val;
 	} else
@@ -698,7 +698,7 @@ decode_abreg (unsigned char abreg, int memory)
 	      default:
 		break;
 	}
-	UNW_DPRINT(0, "unwind.%s: bad abreg=0x%x\n", __FUNCTION__, abreg);
+	UNW_DPRINT(0, "unwind.%s: bad abreg=0x%x\n", __func__, abreg);
 	return UNW_REG_LC;
 }
 
@@ -738,7 +738,7 @@ spill_next_when (struct unw_reg_info **regp, struct unw_reg_info *lim, unw_word
 			return;
 		}
 	}
-	UNW_DPRINT(0, "unwind.%s: excess spill!\n",  __FUNCTION__);
+	UNW_DPRINT(0, "unwind.%s: excess spill!\n",  __func__);
 }
 
 static inline void
@@ -854,11 +854,11 @@ desc_abi (unsigned char abi, unsigned char context, struct unw_state_record *sr)
 {
 	if (abi == 3 && context == 'i') {
 		sr->flags |= UNW_FLAG_INTERRUPT_FRAME;
-		UNW_DPRINT(3, "unwind.%s: interrupt frame\n",  __FUNCTION__);
+		UNW_DPRINT(3, "unwind.%s: interrupt frame\n",  __func__);
 	}
 	else
 		UNW_DPRINT(0, "unwind%s: ignoring unwabi(abi=0x%x,context=0x%x)\n",
-				__FUNCTION__, abi, context);
+				__func__, abi, context);
 }
 
 static inline void
@@ -1203,10 +1203,10 @@ desc_spill_sprel_p (unsigned char qp, unw_word t, unsigned char abreg, unw_word
 static inline unw_hash_index_t
 hash (unsigned long ip)
 {
-#	define hashmagic	0x9e3779b97f4a7c16UL	/* based on (sqrt(5)/2-1)*2^64 */
+	/* magic number = ((sqrt(5)-1)/2)*2^64 */
+	static const unsigned long hashmagic = 0x9e3779b97f4a7c16UL;
 
-	return (ip >> 4)*hashmagic >> (64 - UNW_LOG_HASH_SIZE);
-#undef hashmagic
+	return (ip >> 4) * hashmagic >> (64 - UNW_LOG_HASH_SIZE);
 }
 
 static inline long
@@ -1346,7 +1346,7 @@ script_emit (struct unw_script *script, struct unw_insn insn)
 {
 	if (script->count >= UNW_MAX_SCRIPT_LEN) {
 		UNW_DPRINT(0, "unwind.%s: script exceeds maximum size of %u instructions!\n",
-			__FUNCTION__, UNW_MAX_SCRIPT_LEN);
+			__func__, UNW_MAX_SCRIPT_LEN);
 		return;
 	}
 	script->insn[script->count++] = insn;
@@ -1388,7 +1388,7 @@ emit_nat_info (struct unw_state_record *sr, int i, struct unw_script *script)
 
 	      default:
 		UNW_DPRINT(0, "unwind.%s: don't know how to emit nat info for where = %u\n",
-			   __FUNCTION__, r->where);
+			   __func__, r->where);
 		return;
 	}
 	insn.opc = opc;
@@ -1445,7 +1445,7 @@ compile_reg (struct unw_state_record *sr, int i, struct unw_script *script)
 				val = offsetof(struct pt_regs, f6) + 16*(rval - 6);
 			else
 				UNW_DPRINT(0, "unwind.%s: kernel may not touch f%lu\n",
-					   __FUNCTION__, rval);
+					   __func__, rval);
 		}
 		break;
 
@@ -1473,7 +1473,7 @@ compile_reg (struct unw_state_record *sr, int i, struct unw_script *script)
 
 	      default:
 		UNW_DPRINT(0, "unwind%s: register %u has unexpected `where' value of %u\n",
-			   __FUNCTION__, i, r->where);
+			   __func__, i, r->where);
 		break;
 	}
 	insn.opc = opc;
@@ -1530,7 +1530,7 @@ build_script (struct unw_frame_info *info)
 	struct unw_labeled_state *ls, *next;
 	unsigned long ip = info->ip;
 	struct unw_state_record sr;
-	struct unw_table *table;
+	struct unw_table *table, *prev;
 	struct unw_reg_info *r;
 	struct unw_insn insn;
 	u8 *dp, *desc_end;
@@ -1546,10 +1546,10 @@ build_script (struct unw_frame_info *info)
 		r->when = UNW_WHEN_NEVER;
 	sr.pr_val = info->pr;
 
-	UNW_DPRINT(3, "unwind.%s: ip 0x%lx\n", __FUNCTION__, ip);
+	UNW_DPRINT(3, "unwind.%s: ip 0x%lx\n", __func__, ip);
 	script = script_new(ip);
 	if (!script) {
-		UNW_DPRINT(0, "unwind.%s: failed to create unwind script\n",  __FUNCTION__);
+		UNW_DPRINT(0, "unwind.%s: failed to create unwind script\n",  __func__);
 		STAT(unw.stat.script.build_time += ia64_get_itc() - start);
 		return NULL;
 	}
@@ -1559,16 +1559,31 @@ build_script (struct unw_frame_info *info)
 
 	STAT(parse_start = ia64_get_itc());
 
+	prev = NULL;
 	for (table = unw.tables; table; table = table->next) {
 		if (ip >= table->start && ip < table->end) {
+			/*
+			 * Leave the kernel unwind table at the very front,
+			 * lest moving it breaks some assumption elsewhere.
+			 * Otherwise, move the matching table to the second
+			 * position in the list so that traversals can benefit
+			 * from commonality in backtrace paths.
+			 */
+			if (prev && prev != unw.tables) {
+				/* unw is safe - we're already spinlocked */
+				prev->next = table->next;
+				table->next = unw.tables->next;
+				unw.tables->next = table;
+			}
 			e = lookup(table, ip - table->segment_base);
 			break;
 		}
+		prev = table;
 	}
 	if (!e) {
 		/* no info, return default unwinder (leaf proc, no mem stack, no saved regs)  */
 		UNW_DPRINT(1, "unwind.%s: no unwind info for ip=0x%lx (prev ip=0x%lx)\n",
-			__FUNCTION__, ip, unw.cache[info->prev_script].ip);
+			__func__, ip, unw.cache[info->prev_script].ip);
 		sr.curr.reg[UNW_REG_RP].where = UNW_WHERE_BR;
 		sr.curr.reg[UNW_REG_RP].when = -1;
 		sr.curr.reg[UNW_REG_RP].val = 0;
@@ -1617,13 +1632,13 @@ build_script (struct unw_frame_info *info)
 		sr.curr.reg[UNW_REG_RP].when = -1;
 		sr.curr.reg[UNW_REG_RP].val = sr.return_link_reg;
 		UNW_DPRINT(1, "unwind.%s: using default for rp at ip=0x%lx where=%d val=0x%lx\n",
-			   __FUNCTION__, ip, sr.curr.reg[UNW_REG_RP].where,
+			   __func__, ip, sr.curr.reg[UNW_REG_RP].where,
 			   sr.curr.reg[UNW_REG_RP].val);
 	}
 
 #ifdef UNW_DEBUG
 	UNW_DPRINT(1, "unwind.%s: state record for func 0x%lx, t=%u:\n",
-		__FUNCTION__, table->segment_base + e->start_offset, sr.when_target);
+		__func__, table->segment_base + e->start_offset, sr.when_target);
 	for (r = sr.curr.reg; r < sr.curr.reg + UNW_NUM_REGS; ++r) {
 		if (r->where != UNW_WHERE_NONE || r->when != UNW_WHEN_NEVER) {
 			UNW_DPRINT(1, "  %s <- ", unw.preg_name[r - sr.curr.reg]);
@@ -1745,7 +1760,7 @@ run_script (struct unw_script *script, struct unw_frame_info *state)
 			} else {
 				s[dst] = 0;
 				UNW_DPRINT(0, "unwind.%s: no state->pt, dst=%ld, val=%ld\n",
-					   __FUNCTION__, dst, val);
+					   __func__, dst, val);
 			}
 			break;
 
@@ -1755,7 +1770,7 @@ run_script (struct unw_script *script, struct unw_frame_info *state)
 			else {
 				s[dst] = 0;
 				UNW_DPRINT(0, "unwind.%s: UNW_INSN_MOVE_CONST bad val=%ld\n",
-					   __FUNCTION__, val);
+					   __func__, val);
 			}
 			break;
 
@@ -1790,7 +1805,7 @@ run_script (struct unw_script *script, struct unw_frame_info *state)
 			    || s[val] < TASK_SIZE)
 			{
 				UNW_DPRINT(0, "unwind.%s: rejecting bad psp=0x%lx\n",
-					   __FUNCTION__, s[val]);
+					   __func__, s[val]);
 				break;
 			}
 #endif
@@ -1824,7 +1839,7 @@ find_save_locs (struct unw_frame_info *info)
 	if ((info->ip & (local_cpu_data->unimpl_va_mask | 0xf)) || info->ip < TASK_SIZE) {
 		/* don't let obviously bad addresses pollute the cache */
 		/* FIXME: should really be level 0 but it occurs too often. KAO */
-		UNW_DPRINT(1, "unwind.%s: rejecting bad ip=0x%lx\n", __FUNCTION__, info->ip);
+		UNW_DPRINT(1, "unwind.%s: rejecting bad ip=0x%lx\n", __func__, info->ip);
 		info->rp_loc = NULL;
 		return -1;
 	}
@@ -1837,7 +1852,7 @@ find_save_locs (struct unw_frame_info *info)
 			spin_unlock_irqrestore(&unw.lock, flags);
 			UNW_DPRINT(0,
 				   "unwind.%s: failed to locate/build unwind script for ip %lx\n",
-				   __FUNCTION__, info->ip);
+				   __func__, info->ip);
 			return -1;
 		}
 		have_write_lock = 1;
@@ -1855,6 +1870,14 @@ find_save_locs (struct unw_frame_info *info)
 	return 0;
 }
 
+static int
+unw_valid(const struct unw_frame_info *info, unsigned long* p)
+{
+	unsigned long loc = (unsigned long)p;
+	return (loc >= info->regstk.limit && loc < info->regstk.top) ||
+	       (loc >= info->memstk.top && loc < info->memstk.limit);
+}
+
 int
 unw_unwind (struct unw_frame_info *info)
 {
@@ -1869,27 +1892,29 @@ unw_unwind (struct unw_frame_info *info)
 	prev_sp = info->sp;
 	prev_bsp = info->bsp;
 
-	/* restore the ip */
-	if (!info->rp_loc) {
+	/* validate the return IP pointer */
+	if (!unw_valid(info, info->rp_loc)) {
 		/* FIXME: should really be level 0 but it occurs too often. KAO */
 		UNW_DPRINT(1, "unwind.%s: failed to locate return link (ip=0x%lx)!\n",
-			   __FUNCTION__, info->ip);
+			   __func__, info->ip);
 		STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags));
 		return -1;
 	}
+	/* restore the ip */
 	ip = info->ip = *info->rp_loc;
 	if (ip < GATE_ADDR) {
-		UNW_DPRINT(2, "unwind.%s: reached user-space (ip=0x%lx)\n", __FUNCTION__, ip);
+		UNW_DPRINT(2, "unwind.%s: reached user-space (ip=0x%lx)\n", __func__, ip);
 		STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags));
 		return -1;
 	}
 
-	/* restore the cfm: */
-	if (!info->pfs_loc) {
-		UNW_DPRINT(0, "unwind.%s: failed to locate ar.pfs!\n", __FUNCTION__);
+	/* validate the previous stack frame pointer */
+	if (!unw_valid(info, info->pfs_loc)) {
+		UNW_DPRINT(0, "unwind.%s: failed to locate ar.pfs!\n", __func__);
 		STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags));
 		return -1;
 	}
+	/* restore the cfm: */
 	info->cfm_loc = info->pfs_loc;
 
 	/* restore the bsp: */
@@ -1901,13 +1926,13 @@ unw_unwind (struct unw_frame_info *info)
 			num_regs = *info->cfm_loc & 0x7f;		/* size of frame */
 		info->pfs_loc =
 			(unsigned long *) (info->pt + offsetof(struct pt_regs, ar_pfs));
-		UNW_DPRINT(3, "unwind.%s: interrupt_frame pt 0x%lx\n", __FUNCTION__, info->pt);
+		UNW_DPRINT(3, "unwind.%s: interrupt_frame pt 0x%lx\n", __func__, info->pt);
 	} else
 		num_regs = (*info->cfm_loc >> 7) & 0x7f;	/* size of locals */
 	info->bsp = (unsigned long) ia64_rse_skip_regs((unsigned long *) info->bsp, -num_regs);
 	if (info->bsp < info->regstk.limit || info->bsp > info->regstk.top) {
 		UNW_DPRINT(0, "unwind.%s: bsp (0x%lx) out of range [0x%lx-0x%lx]\n",
-			__FUNCTION__, info->bsp, info->regstk.limit, info->regstk.top);
+			__func__, info->bsp, info->regstk.limit, info->regstk.top);
 		STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags));
 		return -1;
 	}
@@ -1916,14 +1941,14 @@ unw_unwind (struct unw_frame_info *info)
 	info->sp = info->psp;
 	if (info->sp < info->memstk.top || info->sp > info->memstk.limit) {
 		UNW_DPRINT(0, "unwind.%s: sp (0x%lx) out of range [0x%lx-0x%lx]\n",
-			__FUNCTION__, info->sp, info->memstk.top, info->memstk.limit);
+			__func__, info->sp, info->memstk.top, info->memstk.limit);
 		STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags));
 		return -1;
 	}
 
 	if (info->ip == prev_ip && info->sp == prev_sp && info->bsp == prev_bsp) {
 		UNW_DPRINT(0, "unwind.%s: ip, sp, bsp unchanged; stopping here (ip=0x%lx)\n",
-			   __FUNCTION__, ip);
+			   __func__, ip);
 		STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags));
 		return -1;
 	}
@@ -1943,14 +1968,14 @@ EXPORT_SYMBOL(unw_unwind);
 int
 unw_unwind_to_user (struct unw_frame_info *info)
 {
-	unsigned long ip, sp, pr = 0;
+	unsigned long ip, sp, pr = info->pr;
 
-	while (unw_unwind(info) >= 0) {
+	do {
 		unw_get_sp(info, &sp);
 		if ((long)((unsigned long)info->task + IA64_STK_OFFSET - sp)
 		    < IA64_PT_REGS_SIZE) {
 			UNW_DPRINT(0, "unwind.%s: ran off the top of the kernel stack\n",
-				   __FUNCTION__);
+				   __func__);
 			break;
 		}
 		if (unw_is_intr_frame(info) &&
@@ -1960,13 +1985,13 @@ unw_unwind_to_user (struct unw_frame_info *info)
 			unw_get_rp(info, &ip);
 			UNW_DPRINT(0, "unwind.%s: failed to read "
 				   "predicate register (ip=0x%lx)\n",
-				__FUNCTION__, ip);
+				__func__, ip);
 			return -1;
 		}
-	}
+	} while (unw_unwind(info) >= 0);
 	unw_get_ip(info, &ip);
 	UNW_DPRINT(0, "unwind.%s: failed to unwind to user-level (ip=0x%lx)\n",
-		   __FUNCTION__, ip);
+		   __func__, ip);
 	return -1;
 }
 EXPORT_SYMBOL(unw_unwind_to_user);
@@ -1991,13 +2016,16 @@ init_frame_info (struct unw_frame_info *info, struct task_struct *t,
 	memset(info, 0, sizeof(*info));
 
 	rbslimit = (unsigned long) t + IA64_RBS_OFFSET;
+	stklimit = (unsigned long) t + IA64_STK_OFFSET;
+
 	rbstop   = sw->ar_bspstore;
-	if (rbstop - (unsigned long) t >= IA64_STK_OFFSET)
+	if (rbstop > stklimit || rbstop < rbslimit)
 		rbstop = rbslimit;
 
-	stklimit = (unsigned long) t + IA64_STK_OFFSET;
 	if (stktop <= rbstop)
 		stktop = rbstop;
+	if (stktop > stklimit)
+		stktop = stklimit;
 
 	info->regstk.limit = rbslimit;
 	info->regstk.top   = rbstop;
@@ -2014,7 +2042,7 @@ init_frame_info (struct unw_frame_info *info, struct task_struct *t,
 		   "  pr     0x%lx\n"
 		   "  sw     0x%lx\n"
 		   "  sp     0x%lx\n",
-		   __FUNCTION__, (unsigned long) t, rbslimit, rbstop, stktop, stklimit,
+		   __func__, (unsigned long) t, rbslimit, rbstop, stktop, stklimit,
 		   info->pr, (unsigned long) info->sw, info->sp);
 	STAT(unw.stat.api.init_time += ia64_get_itc() - start; local_irq_restore(flags));
 }
@@ -2033,7 +2061,7 @@ unw_init_frame_info (struct unw_frame_info *info, struct task_struct *t, struct
 		   "  bsp    0x%lx\n"
 		   "  sol    0x%lx\n"
 		   "  ip     0x%lx\n",
-		   __FUNCTION__, info->bsp, sol, info->ip);
+		   __func__, info->bsp, sol, info->ip);
 	find_save_locs(info);
 }
 
@@ -2044,7 +2072,7 @@ unw_init_from_blocked_task (struct unw_frame_info *info, struct task_struct *t)
 {
 	struct switch_stack *sw = (struct switch_stack *) (t->thread.ksp + 16);
 
-	UNW_DPRINT(1, "unwind.%s\n", __FUNCTION__);
+	UNW_DPRINT(1, "unwind.%s\n", __func__);
 	unw_init_frame_info(info, t, sw);
 }
 EXPORT_SYMBOL(unw_init_from_blocked_task);
@@ -2074,7 +2102,7 @@ unw_add_unwind_table (const char *name, unsigned long segment_base, unsigned lon
 
 	if (end - start <= 0) {
 		UNW_DPRINT(0, "unwind.%s: ignoring attempt to insert empty unwind table\n",
-			   __FUNCTION__);
+			   __func__);
 		return NULL;
 	}
 
@@ -2105,14 +2133,14 @@ unw_remove_unwind_table (void *handle)
 
 	if (!handle) {
 		UNW_DPRINT(0, "unwind.%s: ignoring attempt to remove non-existent unwind table\n",
-			   __FUNCTION__);
+			   __func__);
 		return;
 	}
 
 	table = handle;
 	if (table == &unw.kernel_table) {
 		UNW_DPRINT(0, "unwind.%s: sorry, freeing the kernel's unwind table is a "
-			   "no-can-do!\n", __FUNCTION__);
+			   "no-can-do!\n", __func__);
 		return;
 	}
 
@@ -2125,7 +2153,7 @@ unw_remove_unwind_table (void *handle)
 				break;
 		if (!prev) {
 			UNW_DPRINT(0, "unwind.%s: failed to find unwind table %p\n",
-				   __FUNCTION__, (void *) table);
+				   __func__, (void *) table);
 			spin_unlock_irqrestore(&unw.lock, flags);
 			return;
 		}
@@ -2135,7 +2163,7 @@ unw_remove_unwind_table (void *handle)
 
 	/* next, remove hash table entries for this table */
 
-	for (index = 0; index <= UNW_HASH_SIZE; ++index) {
+	for (index = 0; index < UNW_HASH_SIZE; ++index) {
 		tmp = unw.cache + unw.hash[index];
 		if (unw.hash[index] >= UNW_CACHE_SIZE
 		    || tmp->ip < table->start || tmp->ip >= table->end)
@@ -2171,7 +2199,7 @@ create_gate_table (void)
 		}
 
 	if (!punw) {
-		printk("%s: failed to find gate DSO's unwind table!\n", __FUNCTION__);
+		printk("%s: failed to find gate DSO's unwind table!\n", __func__);
 		return 0;
 	}
 
@@ -2188,7 +2216,7 @@ create_gate_table (void)
 	unw.gate_table = kmalloc(size, GFP_KERNEL);
 	if (!unw.gate_table) {
 		unw.gate_table_size = 0;
-		printk(KERN_ERR "%s: unable to create unwind data for gate page!\n", __FUNCTION__);
+		printk(KERN_ERR "%s: unable to create unwind data for gate page!\n", __func__);
 		return 0;
 	}
 	unw.gate_table_size = size;
diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S
index 783600fe52b..84f8a52ac5a 100644
--- a/arch/ia64/kernel/vmlinux.lds.S
+++ b/arch/ia64/kernel/vmlinux.lds.S
@@ -1,270 +1,248 @@
-#include <linux/config.h>
 
 #include <asm/cache.h>
 #include <asm/ptrace.h>
-#include <asm/system.h>
 #include <asm/pgtable.h>
 
-#define LOAD_OFFSET	(KERNEL_START - KERNEL_TR_PAGE_SIZE)
 #include <asm-generic/vmlinux.lds.h>
 
-#define IVT_TEXT							\
-		VMLINUX_SYMBOL(__start_ivt_text) = .;			\
-		*(.text.ivt)						\
-		VMLINUX_SYMBOL(__end_ivt_text) = .;
-
 OUTPUT_FORMAT("elf64-ia64-little")
 OUTPUT_ARCH(ia64)
 ENTRY(phys_start)
 jiffies = jiffies_64;
+
 PHDRS {
-  code   PT_LOAD;
-  percpu PT_LOAD;
-  data   PT_LOAD;
+	code   PT_LOAD;
+	percpu PT_LOAD;
+	data   PT_LOAD;
+	note   PT_NOTE;
+	unwind 0x70000001; /* PT_IA_64_UNWIND, but ld doesn't match the name */
 }
-SECTIONS
-{
-  /* Sections to be discarded */
-  /DISCARD/ : {
-	*(.exit.text)
-	*(.exit.data)
-	*(.exitcall.exit)
-	*(.IA_64.unwind.exit.text)
-	*(.IA_64.unwind_info.exit.text)
-	}
-
-  v = PAGE_OFFSET;	/* this symbol is here to make debugging easier... */
-  phys_start = _start - LOAD_OFFSET;
-
-  code : { } :code
-  . = KERNEL_START;
-
-  _text = .;
-  _stext = .;
-
-  .text : AT(ADDR(.text) - LOAD_OFFSET)
-    {
-	IVT_TEXT
-	*(.text)
-	SCHED_TEXT
-	LOCK_TEXT
-	KPROBES_TEXT
-	*(.gnu.linkonce.t*)
-    }
-  .text2 : AT(ADDR(.text2) - LOAD_OFFSET)
-	{ *(.text2) }
-#ifdef CONFIG_SMP
-  .text.lock : AT(ADDR(.text.lock) - LOAD_OFFSET)
-	{ *(.text.lock) }
-#endif
-  _etext = .;
 
-  /* Read-only data */
-
-  /* Exception table */
-  . = ALIGN(16);
-  __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET)
-	{
-	  __start___ex_table = .;
-	  *(__ex_table)
-	  __stop___ex_table = .;
+SECTIONS {
+	/*
+	 * unwind exit sections must be discarded before
+	 * the rest of the sections get included.
+	 */
+	/DISCARD/ : {
+		*(.IA_64.unwind.exit.text)
+		*(.IA_64.unwind_info.exit.text)
+		*(.comment)
+		*(.note)
 	}
 
-  /* MCA table */
-  . = ALIGN(16);
-  __mca_table : AT(ADDR(__mca_table) - LOAD_OFFSET)
-	{
-	  __start___mca_table = .;
-	  *(__mca_table)
-	  __stop___mca_table = .;
+	v = PAGE_OFFSET; /* this symbol is here to make debugging easier... */
+	phys_start = _start - LOAD_OFFSET;
+
+	code : {
+	} :code
+	. = KERNEL_START;
+
+	_text = .;
+	_stext = .;
+
+	.text : AT(ADDR(.text) - LOAD_OFFSET) {
+		__start_ivt_text = .;
+		*(.text..ivt)
+		__end_ivt_text = .;
+		TEXT_TEXT
+		SCHED_TEXT
+		LOCK_TEXT
+		KPROBES_TEXT
+		*(.gnu.linkonce.t*)
 	}
 
-  /* Global data */
-  _data = .;
-
-  /* Unwind info & table: */
-  . = ALIGN(8);
-  .IA_64.unwind_info : AT(ADDR(.IA_64.unwind_info) - LOAD_OFFSET)
-	{ *(.IA_64.unwind_info*) }
-  .IA_64.unwind : AT(ADDR(.IA_64.unwind) - LOAD_OFFSET)
-	{
-	  __start_unwind = .;
-	  *(.IA_64.unwind*)
-	  __end_unwind = .;
+	.text2 : AT(ADDR(.text2) - LOAD_OFFSET)	{
+		*(.text2)
 	}
 
-  RODATA
+#ifdef CONFIG_SMP
+	.text..lock : AT(ADDR(.text..lock) - LOAD_OFFSET) {
+		*(.text..lock)
+	}
+#endif
+	_etext = .;
+
+	/*
+	 * Read-only data
+	 */
+	NOTES :code :note       /* put .notes in text and mark in PT_NOTE  */
+	code_continues : {
+	} : code               /* switch back to regular program...  */
+
+	EXCEPTION_TABLE(16)
+
+	/* MCA table */
+	. = ALIGN(16);
+	__mca_table : AT(ADDR(__mca_table) - LOAD_OFFSET) {
+		__start___mca_table = .;
+		*(__mca_table)
+		__stop___mca_table = .;
+	}
 
-  .opd : AT(ADDR(.opd) - LOAD_OFFSET)
-	{ *(.opd) }
+	.data..patch.phys_stack_reg : AT(ADDR(.data..patch.phys_stack_reg) - LOAD_OFFSET) {
+		__start___phys_stack_reg_patchlist = .;
+		*(.data..patch.phys_stack_reg)
+		__end___phys_stack_reg_patchlist = .;
+	}
 
-  /* Initialization code and data: */
+	/*
+	 * Global data
+	 */
+	_data = .;
 
-  . = ALIGN(PAGE_SIZE);
-  __init_begin = .;
-  .init.text : AT(ADDR(.init.text) - LOAD_OFFSET)
-	{
-	  _sinittext = .;
-	  *(.init.text)
-	  _einittext = .;
+	/* Unwind info & table: */
+	. = ALIGN(8);
+	.IA_64.unwind_info : AT(ADDR(.IA_64.unwind_info) - LOAD_OFFSET) {
+		*(.IA_64.unwind_info*)
+	}
+	.IA_64.unwind : AT(ADDR(.IA_64.unwind) - LOAD_OFFSET) {
+		__start_unwind = .;
+		*(.IA_64.unwind*)
+		__end_unwind = .;
+	} :code :unwind
+	code_continues2 : {
+	} : code
+
+	RODATA
+
+	.opd : AT(ADDR(.opd) - LOAD_OFFSET) {
+		*(.opd)
 	}
 
-  .init.data : AT(ADDR(.init.data) - LOAD_OFFSET)
-	{ *(.init.data) }
+	/*
+	 * Initialization code and data:
+	 */
+	. = ALIGN(PAGE_SIZE);
+	__init_begin = .;
 
-  .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET)
-	{
-	  __initramfs_start = .;
-	  *(.init.ramfs)
-	  __initramfs_end = .;
-	}
+	INIT_TEXT_SECTION(PAGE_SIZE)
+	INIT_DATA_SECTION(16)
 
-   . = ALIGN(16);
-  .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET)
-        {
-	  __setup_start = .;
-	  *(.init.setup)
-	  __setup_end = .;
+	.data..patch.vtop : AT(ADDR(.data..patch.vtop) - LOAD_OFFSET) {
+		__start___vtop_patchlist = .;
+		*(.data..patch.vtop)
+		__end___vtop_patchlist = .;
 	}
-  .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET)
-	{
-	  __initcall_start = .;
-	  *(.initcall1.init)
-	  *(.initcall2.init)
-	  *(.initcall3.init)
-	  *(.initcall4.init)
-	  *(.initcall5.init)
-	  *(.initcall6.init)
-	  *(.initcall7.init)
-	  __initcall_end = .;
+
+	.data..patch.rse : AT(ADDR(.data..patch.rse) - LOAD_OFFSET) {
+		__start___rse_patchlist = .;
+		*(.data..patch.rse)
+		__end___rse_patchlist = .;
 	}
 
-  .data.patch.vtop : AT(ADDR(.data.patch.vtop) - LOAD_OFFSET)
-	{
-	  __start___vtop_patchlist = .;
-	  *(.data.patch.vtop)
-	  __end___vtop_patchlist = .;
+	.data..patch.mckinley_e9 : AT(ADDR(.data..patch.mckinley_e9) - LOAD_OFFSET) {
+		__start___mckinley_e9_bundles = .;
+		*(.data..patch.mckinley_e9)
+		__end___mckinley_e9_bundles = .;
 	}
 
-  .data.patch.mckinley_e9 : AT(ADDR(.data.patch.mckinley_e9) - LOAD_OFFSET)
-	{
-	  __start___mckinley_e9_bundles = .;
-	  *(.data.patch.mckinley_e9)
-	  __end___mckinley_e9_bundles = .;
+#if defined(CONFIG_PARAVIRT)
+	. = ALIGN(16);
+	.paravirt_bundles : AT(ADDR(.paravirt_bundles) - LOAD_OFFSET) {
+		__start_paravirt_bundles = .;
+		*(.paravirt_bundles)
+		__stop_paravirt_bundles = .;
 	}
+	. = ALIGN(16);
+	.paravirt_insts : AT(ADDR(.paravirt_insts) - LOAD_OFFSET) {
+		__start_paravirt_insts = .;
+		*(.paravirt_insts)
+		__stop_paravirt_insts = .;
+	}
+	. = ALIGN(16);
+	.paravirt_branches : AT(ADDR(.paravirt_branches) - LOAD_OFFSET) {
+		__start_paravirt_branches = .;
+		*(.paravirt_branches)
+		__stop_paravirt_branches = .;
+	}
+#endif
 
 #if defined(CONFIG_IA64_GENERIC)
-  /* Machine Vector */
-  . = ALIGN(16);
-  .machvec : AT(ADDR(.machvec) - LOAD_OFFSET)
-	{
-	  machvec_start = .;
-	  *(.machvec)
-	  machvec_end = .;
+	/* Machine Vector */
+	. = ALIGN(16);
+	.machvec : AT(ADDR(.machvec) - LOAD_OFFSET) {
+		machvec_start = .;
+		*(.machvec)
+		machvec_end = .;
 	}
 #endif
 
-   __con_initcall_start = .;
-  .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET)
-	{ *(.con_initcall.init) }
-  __con_initcall_end = .;
-  __security_initcall_start = .;
-  .security_initcall.init : AT(ADDR(.security_initcall.init) - LOAD_OFFSET)
-	{ *(.security_initcall.init) }
-  __security_initcall_end = .;
-  . = ALIGN(PAGE_SIZE);
-  __init_end = .;
-
-  /* The initial task and kernel stack */
-  .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET)
-	{ *(.data.init_task) }
-
-  .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET)
-        { *(__special_page_section)
-	  __start_gate_section = .;
-	  *(.data.gate)
-	  __stop_gate_section = .;
+#ifdef	CONFIG_SMP
+	. = ALIGN(PERCPU_PAGE_SIZE);
+	__cpu0_per_cpu = .;
+	. = . + PERCPU_PAGE_SIZE;   /* cpu0 per-cpu space */
+#endif
+
+	. = ALIGN(PAGE_SIZE);
+	__init_end = .;
+
+	.data..page_aligned : AT(ADDR(.data..page_aligned) - LOAD_OFFSET) {
+		PAGE_ALIGNED_DATA(PAGE_SIZE)
+		. = ALIGN(PAGE_SIZE);
+		__start_gate_section = .;
+		*(.data..gate)
+		__stop_gate_section = .;
+	}
+	/*
+	 * make sure the gate page doesn't expose
+	 * kernel data
+	 */
+	. = ALIGN(PAGE_SIZE);
+
+	/* Per-cpu data: */
+	. = ALIGN(PERCPU_PAGE_SIZE);
+	PERCPU_VADDR(SMP_CACHE_BYTES, PERCPU_ADDR, :percpu)
+	__phys_per_cpu_start = __per_cpu_load;
+	/*
+	 * ensure percpu data fits
+	 * into percpu page size
+	 */
+	. = __phys_per_cpu_start + PERCPU_PAGE_SIZE;
+
+	data : {
+	} :data
+	.data : AT(ADDR(.data) - LOAD_OFFSET) {
+		_sdata  =  .;
+		INIT_TASK_DATA(PAGE_SIZE)
+		CACHELINE_ALIGNED_DATA(SMP_CACHE_BYTES)
+		READ_MOSTLY_DATA(SMP_CACHE_BYTES)
+		DATA_DATA
+		*(.data1)
+		*(.gnu.linkonce.d*)
+		CONSTRUCTORS
+	}
+
+	. = ALIGN(16);	/* gp must be 16-byte aligned for exc. table */
+	.got : AT(ADDR(.got) - LOAD_OFFSET) {
+		*(.got.plt)
+		*(.got)
 	}
-  . = ALIGN(PAGE_SIZE);		/* make sure the gate page doesn't expose kernel data */
-
-  .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET)
-        { *(.data.read_mostly) }
-
-  .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET)
-        { *(.data.cacheline_aligned) }
-
-  /* Per-cpu data: */
-  percpu : { } :percpu
-  . = ALIGN(PERCPU_PAGE_SIZE);
-  __phys_per_cpu_start = .;
-  .data.percpu PERCPU_ADDR : AT(__phys_per_cpu_start - LOAD_OFFSET)
-	{
-		__per_cpu_start = .;
-		*(.data.percpu)
-		__per_cpu_end = .;
+	__gp = ADDR(.got) + 0x200000;
+
+	/*
+	 * We want the small data sections together,
+	 * so single-instruction offsets can access
+	 * them all, and initialized data all before
+	 * uninitialized, so we can shorten the
+	 * on-disk segment size.
+	 */
+	.sdata : AT(ADDR(.sdata) - LOAD_OFFSET) {
+		*(.sdata)
+		*(.sdata1)
+		*(.srdata)
 	}
-  . = __phys_per_cpu_start + PERCPU_PAGE_SIZE;	/* ensure percpu data fits into percpu page size */
-
-  data : { } :data
-  .data : AT(ADDR(.data) - LOAD_OFFSET)
-	{ *(.data) *(.data1) *(.gnu.linkonce.d*) CONSTRUCTORS }
-
-  . = ALIGN(16);	/* gp must be 16-byte aligned for exc. table */
-  .got : AT(ADDR(.got) - LOAD_OFFSET)
-	{ *(.got.plt) *(.got) }
-  __gp = ADDR(.got) + 0x200000;
-  /* We want the small data sections together, so single-instruction offsets
-     can access them all, and initialized data all before uninitialized, so
-     we can shorten the on-disk segment size.  */
-  .sdata : AT(ADDR(.sdata) - LOAD_OFFSET)
-	{ *(.sdata) *(.sdata1) *(.srdata) }
-  _edata  =  .;
-  _bss = .;
-  .sbss : AT(ADDR(.sbss) - LOAD_OFFSET)
-	{ *(.sbss) *(.scommon) }
-  .bss : AT(ADDR(.bss) - LOAD_OFFSET)
-	{ *(.bss) *(COMMON) }
-
-  _end = .;
-
-  code : { } :code
-  /* Stabs debugging sections.  */
-  .stab 0 : { *(.stab) }
-  .stabstr 0 : { *(.stabstr) }
-  .stab.excl 0 : { *(.stab.excl) }
-  .stab.exclstr 0 : { *(.stab.exclstr) }
-  .stab.index 0 : { *(.stab.index) }
-  .stab.indexstr 0 : { *(.stab.indexstr) }
-  /* DWARF debug sections.
-     Symbols in the DWARF debugging sections are relative to the beginning
-     of the section so we begin them at 0.  */
-  /* DWARF 1 */
-  .debug          0 : { *(.debug) }
-  .line           0 : { *(.line) }
-  /* GNU DWARF 1 extensions */
-  .debug_srcinfo  0 : { *(.debug_srcinfo) }
-  .debug_sfnames  0 : { *(.debug_sfnames) }
-  /* DWARF 1.1 and DWARF 2 */
-  .debug_aranges  0 : { *(.debug_aranges) }
-  .debug_pubnames 0 : { *(.debug_pubnames) }
-  /* DWARF 2 */
-  .debug_info     0 : { *(.debug_info) }
-  .debug_abbrev   0 : { *(.debug_abbrev) }
-  .debug_line     0 : { *(.debug_line) }
-  .debug_frame    0 : { *(.debug_frame) }
-  .debug_str      0 : { *(.debug_str) }
-  .debug_loc      0 : { *(.debug_loc) }
-  .debug_macinfo  0 : { *(.debug_macinfo) }
-  /* SGI/MIPS DWARF 2 extensions */
-  .debug_weaknames 0 : { *(.debug_weaknames) }
-  .debug_funcnames 0 : { *(.debug_funcnames) }
-  .debug_typenames 0 : { *(.debug_typenames) }
-  .debug_varnames  0 : { *(.debug_varnames) }
-  /* These must appear regardless of  .  */
-  /* Discard them for now since Intel SoftSDV cannot handle them.
-  .comment 0 : { *(.comment) }
-  .note 0 : { *(.note) }
-  */
-  /DISCARD/ : { *(.comment) }
-  /DISCARD/ : { *(.note) }
+	_edata  =  .;
+
+	BSS_SECTION(0, 0, 0)
+
+	_end = .;
+
+	code : {
+	} :code
+
+	STABS_DEBUG
+	DWARF_DEBUG
+
+	/* Default discards */
+	DISCARDS
 }
diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig
new file mode 100644
index 00000000000..990b86420cc
--- /dev/null
+++ b/arch/ia64/kvm/Kconfig
@@ -0,0 +1,65 @@
+#
+# KVM configuration
+#
+
+source "virt/kvm/Kconfig"
+
+menuconfig VIRTUALIZATION
+	bool "Virtualization"
+	depends on HAVE_KVM || IA64
+	default y
+	---help---
+	  Say Y here to get to see options for using your Linux host to run other
+	  operating systems inside virtual machines (guests).
+	  This option alone does not add any kernel code.
+
+	  If you say N, all options in this submenu will be skipped and disabled.
+
+if VIRTUALIZATION
+
+config KVM
+	tristate "Kernel-based Virtual Machine (KVM) support"
+	depends on BROKEN
+	depends on HAVE_KVM && MODULES
+	depends on BROKEN
+	select PREEMPT_NOTIFIERS
+	select ANON_INODES
+	select HAVE_KVM_IRQCHIP
+	select HAVE_KVM_IRQ_ROUTING
+	select KVM_APIC_ARCHITECTURE
+	select KVM_MMIO
+	---help---
+	  Support hosting fully virtualized guest machines using hardware
+	  virtualization extensions.  You will need a fairly recent
+	  processor equipped with virtualization extensions. You will also
+	  need to select one or more of the processor modules below.
+
+	  This module provides access to the hardware capabilities through
+	  a character device node named /dev/kvm.
+
+	  To compile this as a module, choose M here: the module
+	  will be called kvm.
+
+	  If unsure, say N.
+
+config KVM_INTEL
+	tristate "KVM for Intel Itanium 2 processors support"
+	depends on KVM && m
+	---help---
+	  Provides support for KVM on Itanium 2 processors equipped with the VT
+	  extensions.
+
+config KVM_DEVICE_ASSIGNMENT
+	bool "KVM legacy PCI device assignment support"
+	depends on KVM && PCI && IOMMU_API
+	default y
+	---help---
+	  Provide support for legacy PCI device assignment through KVM.  The
+	  kernel now also supports a full featured userspace device driver
+	  framework through VFIO, which supersedes much of this support.
+
+	  If unsure, say Y.
+
+source drivers/vhost/Kconfig
+
+endif # VIRTUALIZATION
diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile
new file mode 100644
index 00000000000..18e45ec49bb
--- /dev/null
+++ b/arch/ia64/kvm/Makefile
@@ -0,0 +1,67 @@
+#This Make file is to generate asm-offsets.h and build source.
+#
+
+#Generate asm-offsets.h for vmm module build
+offsets-file := asm-offsets.h
+
+always  := $(offsets-file)
+targets := $(offsets-file)
+targets += arch/ia64/kvm/asm-offsets.s
+
+# Default sed regexp - multiline due to syntax constraints
+define sed-y
+	"/^->/{s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; s:->::; p;}"
+endef
+
+quiet_cmd_offsets = GEN     $@
+define cmd_offsets
+	(set -e; \
+	 echo "#ifndef __ASM_KVM_OFFSETS_H__"; \
+	 echo "#define __ASM_KVM_OFFSETS_H__"; \
+	 echo "/*"; \
+	 echo " * DO NOT MODIFY."; \
+	 echo " *"; \
+	 echo " * This file was generated by Makefile"; \
+	 echo " *"; \
+	 echo " */"; \
+	 echo ""; \
+	 sed -ne $(sed-y) $<; \
+	 echo ""; \
+	 echo "#endif" ) > $@
+endef
+
+# We use internal rules to avoid the "is up to date" message from make
+arch/ia64/kvm/asm-offsets.s: arch/ia64/kvm/asm-offsets.c \
+			$(wildcard $(srctree)/arch/ia64/include/asm/*.h)\
+			$(wildcard $(srctree)/include/linux/*.h)
+	$(call if_changed_dep,cc_s_c)
+
+$(obj)/$(offsets-file): arch/ia64/kvm/asm-offsets.s
+	$(call cmd,offsets)
+
+FORCE : $(obj)/$(offsets-file)
+
+#
+# Makefile for Kernel-based Virtual Machine module
+#
+
+ccflags-y := -Ivirt/kvm -Iarch/ia64/kvm/
+asflags-y := -Ivirt/kvm -Iarch/ia64/kvm/
+KVM := ../../../virt/kvm
+
+common-objs = $(KVM)/kvm_main.o $(KVM)/ioapic.o \
+		$(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o
+
+ifeq ($(CONFIG_KVM_DEVICE_ASSIGNMENT),y)
+common-objs += $(KVM)/assigned-dev.o $(KVM)/iommu.o
+endif
+
+kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o
+obj-$(CONFIG_KVM) += kvm.o
+
+CFLAGS_vcpu.o += -mfixed-range=f2-f5,f12-f127
+kvm-intel-objs = vmm.o vmm_ivt.o trampoline.o vcpu.o optvfault.o mmio.o \
+	vtlb.o process.o kvm_lib.o
+#Add link memcpy and memset to avoid possible structure assignment error
+kvm-intel-objs += memcpy.o memset.o
+obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/arch/ia64/kvm/asm-offsets.c b/arch/ia64/kvm/asm-offsets.c
new file mode 100644
index 00000000000..9324c875caf
--- /dev/null
+++ b/arch/ia64/kvm/asm-offsets.c
@@ -0,0 +1,241 @@
+/*
+ * asm-offsets.c Generate definitions needed by assembly language modules.
+ * This code generates raw asm output which is post-processed
+ * to extract and format the required data.
+ *
+ * Anthony Xu    <anthony.xu@intel.com>
+ * Xiantao Zhang <xiantao.zhang@intel.com>
+ * Copyright (c) 2007 Intel Corporation  KVM support.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/kbuild.h>
+
+#include "vcpu.h"
+
+void foo(void)
+{
+	DEFINE(VMM_TASK_SIZE, sizeof(struct kvm_vcpu));
+	DEFINE(VMM_PT_REGS_SIZE, sizeof(struct kvm_pt_regs));
+
+	BLANK();
+
+	DEFINE(VMM_VCPU_META_RR0_OFFSET,
+			offsetof(struct kvm_vcpu, arch.metaphysical_rr0));
+	DEFINE(VMM_VCPU_META_SAVED_RR0_OFFSET,
+			offsetof(struct kvm_vcpu,
+				arch.metaphysical_saved_rr0));
+	DEFINE(VMM_VCPU_VRR0_OFFSET,
+			offsetof(struct kvm_vcpu, arch.vrr[0]));
+	DEFINE(VMM_VPD_IRR0_OFFSET,
+			offsetof(struct vpd, irr[0]));
+	DEFINE(VMM_VCPU_ITC_CHECK_OFFSET,
+			offsetof(struct kvm_vcpu, arch.itc_check));
+	DEFINE(VMM_VCPU_IRQ_CHECK_OFFSET,
+			offsetof(struct kvm_vcpu, arch.irq_check));
+	DEFINE(VMM_VPD_VHPI_OFFSET,
+			offsetof(struct vpd, vhpi));
+	DEFINE(VMM_VCPU_VSA_BASE_OFFSET,
+			offsetof(struct kvm_vcpu, arch.vsa_base));
+	DEFINE(VMM_VCPU_VPD_OFFSET,
+			offsetof(struct kvm_vcpu, arch.vpd));
+	DEFINE(VMM_VCPU_IRQ_CHECK,
+			offsetof(struct kvm_vcpu, arch.irq_check));
+	DEFINE(VMM_VCPU_TIMER_PENDING,
+			offsetof(struct kvm_vcpu, arch.timer_pending));
+	DEFINE(VMM_VCPU_META_SAVED_RR0_OFFSET,
+			offsetof(struct kvm_vcpu, arch.metaphysical_saved_rr0));
+	DEFINE(VMM_VCPU_MODE_FLAGS_OFFSET,
+			offsetof(struct kvm_vcpu, arch.mode_flags));
+	DEFINE(VMM_VCPU_ITC_OFS_OFFSET,
+			offsetof(struct kvm_vcpu, arch.itc_offset));
+	DEFINE(VMM_VCPU_LAST_ITC_OFFSET,
+			offsetof(struct kvm_vcpu, arch.last_itc));
+	DEFINE(VMM_VCPU_SAVED_GP_OFFSET,
+			offsetof(struct kvm_vcpu, arch.saved_gp));
+
+	BLANK();
+
+	DEFINE(VMM_PT_REGS_B6_OFFSET,
+				offsetof(struct kvm_pt_regs, b6));
+	DEFINE(VMM_PT_REGS_B7_OFFSET,
+				offsetof(struct kvm_pt_regs, b7));
+	DEFINE(VMM_PT_REGS_AR_CSD_OFFSET,
+				offsetof(struct kvm_pt_regs, ar_csd));
+	DEFINE(VMM_PT_REGS_AR_SSD_OFFSET,
+				offsetof(struct kvm_pt_regs, ar_ssd));
+	DEFINE(VMM_PT_REGS_R8_OFFSET,
+				offsetof(struct kvm_pt_regs, r8));
+	DEFINE(VMM_PT_REGS_R9_OFFSET,
+				offsetof(struct kvm_pt_regs, r9));
+	DEFINE(VMM_PT_REGS_R10_OFFSET,
+				offsetof(struct kvm_pt_regs, r10));
+	DEFINE(VMM_PT_REGS_R11_OFFSET,
+				offsetof(struct kvm_pt_regs, r11));
+	DEFINE(VMM_PT_REGS_CR_IPSR_OFFSET,
+				offsetof(struct kvm_pt_regs, cr_ipsr));
+	DEFINE(VMM_PT_REGS_CR_IIP_OFFSET,
+				offsetof(struct kvm_pt_regs, cr_iip));
+	DEFINE(VMM_PT_REGS_CR_IFS_OFFSET,
+				offsetof(struct kvm_pt_regs, cr_ifs));
+	DEFINE(VMM_PT_REGS_AR_UNAT_OFFSET,
+				offsetof(struct kvm_pt_regs, ar_unat));
+	DEFINE(VMM_PT_REGS_AR_PFS_OFFSET,
+				offsetof(struct kvm_pt_regs, ar_pfs));
+	DEFINE(VMM_PT_REGS_AR_RSC_OFFSET,
+				offsetof(struct kvm_pt_regs, ar_rsc));
+	DEFINE(VMM_PT_REGS_AR_RNAT_OFFSET,
+				offsetof(struct kvm_pt_regs, ar_rnat));
+
+	DEFINE(VMM_PT_REGS_AR_BSPSTORE_OFFSET,
+				offsetof(struct kvm_pt_regs, ar_bspstore));
+	DEFINE(VMM_PT_REGS_PR_OFFSET,
+				offsetof(struct kvm_pt_regs, pr));
+	DEFINE(VMM_PT_REGS_B0_OFFSET,
+				offsetof(struct kvm_pt_regs, b0));
+	DEFINE(VMM_PT_REGS_LOADRS_OFFSET,
+				offsetof(struct kvm_pt_regs, loadrs));
+	DEFINE(VMM_PT_REGS_R1_OFFSET,
+				offsetof(struct kvm_pt_regs, r1));
+	DEFINE(VMM_PT_REGS_R12_OFFSET,
+				offsetof(struct kvm_pt_regs, r12));
+	DEFINE(VMM_PT_REGS_R13_OFFSET,
+				offsetof(struct kvm_pt_regs, r13));
+	DEFINE(VMM_PT_REGS_AR_FPSR_OFFSET,
+				offsetof(struct kvm_pt_regs, ar_fpsr));
+	DEFINE(VMM_PT_REGS_R15_OFFSET,
+				offsetof(struct kvm_pt_regs, r15));
+	DEFINE(VMM_PT_REGS_R14_OFFSET,
+				offsetof(struct kvm_pt_regs, r14));
+	DEFINE(VMM_PT_REGS_R2_OFFSET,
+				offsetof(struct kvm_pt_regs, r2));
+	DEFINE(VMM_PT_REGS_R3_OFFSET,
+				offsetof(struct kvm_pt_regs, r3));
+	DEFINE(VMM_PT_REGS_R16_OFFSET,
+				offsetof(struct kvm_pt_regs, r16));
+	DEFINE(VMM_PT_REGS_R17_OFFSET,
+				offsetof(struct kvm_pt_regs, r17));
+	DEFINE(VMM_PT_REGS_R18_OFFSET,
+				offsetof(struct kvm_pt_regs, r18));
+	DEFINE(VMM_PT_REGS_R19_OFFSET,
+				offsetof(struct kvm_pt_regs, r19));
+	DEFINE(VMM_PT_REGS_R20_OFFSET,
+				offsetof(struct kvm_pt_regs, r20));
+	DEFINE(VMM_PT_REGS_R21_OFFSET,
+				offsetof(struct kvm_pt_regs, r21));
+	DEFINE(VMM_PT_REGS_R22_OFFSET,
+				offsetof(struct kvm_pt_regs, r22));
+	DEFINE(VMM_PT_REGS_R23_OFFSET,
+				offsetof(struct kvm_pt_regs, r23));
+	DEFINE(VMM_PT_REGS_R24_OFFSET,
+				offsetof(struct kvm_pt_regs, r24));
+	DEFINE(VMM_PT_REGS_R25_OFFSET,
+				offsetof(struct kvm_pt_regs, r25));
+	DEFINE(VMM_PT_REGS_R26_OFFSET,
+				offsetof(struct kvm_pt_regs, r26));
+	DEFINE(VMM_PT_REGS_R27_OFFSET,
+				offsetof(struct kvm_pt_regs, r27));
+	DEFINE(VMM_PT_REGS_R28_OFFSET,
+				offsetof(struct kvm_pt_regs, r28));
+	DEFINE(VMM_PT_REGS_R29_OFFSET,
+				offsetof(struct kvm_pt_regs, r29));
+	DEFINE(VMM_PT_REGS_R30_OFFSET,
+				offsetof(struct kvm_pt_regs, r30));
+	DEFINE(VMM_PT_REGS_R31_OFFSET,
+				offsetof(struct kvm_pt_regs, r31));
+	DEFINE(VMM_PT_REGS_AR_CCV_OFFSET,
+				offsetof(struct kvm_pt_regs, ar_ccv));
+	DEFINE(VMM_PT_REGS_F6_OFFSET,
+				offsetof(struct kvm_pt_regs, f6));
+	DEFINE(VMM_PT_REGS_F7_OFFSET,
+				offsetof(struct kvm_pt_regs, f7));
+	DEFINE(VMM_PT_REGS_F8_OFFSET,
+				offsetof(struct kvm_pt_regs, f8));
+	DEFINE(VMM_PT_REGS_F9_OFFSET,
+				offsetof(struct kvm_pt_regs, f9));
+	DEFINE(VMM_PT_REGS_F10_OFFSET,
+				offsetof(struct kvm_pt_regs, f10));
+	DEFINE(VMM_PT_REGS_F11_OFFSET,
+				offsetof(struct kvm_pt_regs, f11));
+	DEFINE(VMM_PT_REGS_R4_OFFSET,
+				offsetof(struct kvm_pt_regs, r4));
+	DEFINE(VMM_PT_REGS_R5_OFFSET,
+				offsetof(struct kvm_pt_regs, r5));
+	DEFINE(VMM_PT_REGS_R6_OFFSET,
+				offsetof(struct kvm_pt_regs, r6));
+	DEFINE(VMM_PT_REGS_R7_OFFSET,
+				offsetof(struct kvm_pt_regs, r7));
+	DEFINE(VMM_PT_REGS_EML_UNAT_OFFSET,
+				offsetof(struct kvm_pt_regs, eml_unat));
+	DEFINE(VMM_VCPU_IIPA_OFFSET,
+				offsetof(struct kvm_vcpu, arch.cr_iipa));
+	DEFINE(VMM_VCPU_OPCODE_OFFSET,
+				offsetof(struct kvm_vcpu, arch.opcode));
+	DEFINE(VMM_VCPU_CAUSE_OFFSET, offsetof(struct kvm_vcpu, arch.cause));
+	DEFINE(VMM_VCPU_ISR_OFFSET,
+				offsetof(struct kvm_vcpu, arch.cr_isr));
+	DEFINE(VMM_PT_REGS_R16_SLOT,
+				(((offsetof(struct kvm_pt_regs, r16)
+				- sizeof(struct kvm_pt_regs)) >> 3) & 0x3f));
+	DEFINE(VMM_VCPU_MODE_FLAGS_OFFSET,
+				offsetof(struct kvm_vcpu, arch.mode_flags));
+	DEFINE(VMM_VCPU_GP_OFFSET, offsetof(struct kvm_vcpu, arch.__gp));
+	BLANK();
+
+	DEFINE(VMM_VPD_BASE_OFFSET, offsetof(struct kvm_vcpu, arch.vpd));
+	DEFINE(VMM_VPD_VIFS_OFFSET, offsetof(struct vpd, ifs));
+	DEFINE(VMM_VLSAPIC_INSVC_BASE_OFFSET,
+			offsetof(struct kvm_vcpu, arch.insvc[0]));
+	DEFINE(VMM_VPD_VPTA_OFFSET, offsetof(struct vpd, pta));
+	DEFINE(VMM_VPD_VPSR_OFFSET, offsetof(struct vpd, vpsr));
+
+	DEFINE(VMM_CTX_R4_OFFSET, offsetof(union context, gr[4]));
+	DEFINE(VMM_CTX_R5_OFFSET, offsetof(union context, gr[5]));
+	DEFINE(VMM_CTX_R12_OFFSET, offsetof(union context, gr[12]));
+	DEFINE(VMM_CTX_R13_OFFSET, offsetof(union context, gr[13]));
+	DEFINE(VMM_CTX_KR0_OFFSET, offsetof(union context, ar[0]));
+	DEFINE(VMM_CTX_KR1_OFFSET, offsetof(union context, ar[1]));
+	DEFINE(VMM_CTX_B0_OFFSET, offsetof(union context, br[0]));
+	DEFINE(VMM_CTX_B1_OFFSET, offsetof(union context, br[1]));
+	DEFINE(VMM_CTX_B2_OFFSET, offsetof(union context, br[2]));
+	DEFINE(VMM_CTX_RR0_OFFSET, offsetof(union context, rr[0]));
+	DEFINE(VMM_CTX_RSC_OFFSET, offsetof(union context, ar[16]));
+	DEFINE(VMM_CTX_BSPSTORE_OFFSET, offsetof(union context, ar[18]));
+	DEFINE(VMM_CTX_RNAT_OFFSET, offsetof(union context, ar[19]));
+	DEFINE(VMM_CTX_FCR_OFFSET, offsetof(union context, ar[21]));
+	DEFINE(VMM_CTX_EFLAG_OFFSET, offsetof(union context, ar[24]));
+	DEFINE(VMM_CTX_CFLG_OFFSET, offsetof(union context, ar[27]));
+	DEFINE(VMM_CTX_FSR_OFFSET, offsetof(union context, ar[28]));
+	DEFINE(VMM_CTX_FIR_OFFSET, offsetof(union context, ar[29]));
+	DEFINE(VMM_CTX_FDR_OFFSET, offsetof(union context, ar[30]));
+	DEFINE(VMM_CTX_UNAT_OFFSET, offsetof(union context, ar[36]));
+	DEFINE(VMM_CTX_FPSR_OFFSET, offsetof(union context, ar[40]));
+	DEFINE(VMM_CTX_PFS_OFFSET, offsetof(union context, ar[64]));
+	DEFINE(VMM_CTX_LC_OFFSET, offsetof(union context, ar[65]));
+	DEFINE(VMM_CTX_DCR_OFFSET, offsetof(union context, cr[0]));
+	DEFINE(VMM_CTX_IVA_OFFSET, offsetof(union context, cr[2]));
+	DEFINE(VMM_CTX_PTA_OFFSET, offsetof(union context, cr[8]));
+	DEFINE(VMM_CTX_IBR0_OFFSET, offsetof(union context, ibr[0]));
+	DEFINE(VMM_CTX_DBR0_OFFSET, offsetof(union context, dbr[0]));
+	DEFINE(VMM_CTX_F2_OFFSET, offsetof(union context, fr[2]));
+	DEFINE(VMM_CTX_F3_OFFSET, offsetof(union context, fr[3]));
+	DEFINE(VMM_CTX_F32_OFFSET, offsetof(union context, fr[32]));
+	DEFINE(VMM_CTX_F33_OFFSET, offsetof(union context, fr[33]));
+	DEFINE(VMM_CTX_PKR0_OFFSET, offsetof(union context, pkr[0]));
+	DEFINE(VMM_CTX_PSR_OFFSET, offsetof(union context, psr));
+	BLANK();
+}
diff --git a/arch/ia64/kvm/irq.h b/arch/ia64/kvm/irq.h
new file mode 100644
index 00000000000..c0785a72827
--- /dev/null
+++ b/arch/ia64/kvm/irq.h
@@ -0,0 +1,33 @@
+/*
+ * irq.h: In-kernel interrupt controller related definitions
+ * Copyright (c) 2008, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Authors:
+ *   Xiantao Zhang <xiantao.zhang@intel.com>
+ *
+ */
+
+#ifndef __IRQ_H
+#define __IRQ_H
+
+#include "lapic.h"
+
+static inline int irqchip_in_kernel(struct kvm *kvm)
+{
+	return 1;
+}
+
+#endif
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
new file mode 100644
index 00000000000..6a4309bb821
--- /dev/null
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -0,0 +1,1972 @@
+/*
+ * kvm_ia64.c: Basic KVM support On Itanium series processors
+ *
+ *
+ * 	Copyright (C) 2007, Intel Corporation.
+ *  	Xiantao Zhang  (xiantao.zhang@intel.com)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/percpu.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+#include <linux/kvm_host.h>
+#include <linux/kvm.h>
+#include <linux/bitops.h>
+#include <linux/hrtimer.h>
+#include <linux/uaccess.h>
+#include <linux/iommu.h>
+#include <linux/intel-iommu.h>
+#include <linux/pci.h>
+
+#include <asm/pgtable.h>
+#include <asm/gcc_intrin.h>
+#include <asm/pal.h>
+#include <asm/cacheflush.h>
+#include <asm/div64.h>
+#include <asm/tlb.h>
+#include <asm/elf.h>
+#include <asm/sn/addrs.h>
+#include <asm/sn/clksupport.h>
+#include <asm/sn/shub_mmr.h>
+
+#include "misc.h"
+#include "vti.h"
+#include "iodev.h"
+#include "ioapic.h"
+#include "lapic.h"
+#include "irq.h"
+
+static unsigned long kvm_vmm_base;
+static unsigned long kvm_vsa_base;
+static unsigned long kvm_vm_buffer;
+static unsigned long kvm_vm_buffer_size;
+unsigned long kvm_vmm_gp;
+
+static long vp_env_info;
+
+static struct kvm_vmm_info *kvm_vmm_info;
+
+static DEFINE_PER_CPU(struct kvm_vcpu *, last_vcpu);
+
+struct kvm_stats_debugfs_item debugfs_entries[] = {
+	{ NULL }
+};
+
+static unsigned long kvm_get_itc(struct kvm_vcpu *vcpu)
+{
+#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
+	if (vcpu->kvm->arch.is_sn2)
+		return rtc_time();
+	else
+#endif
+		return ia64_getreg(_IA64_REG_AR_ITC);
+}
+
+static void kvm_flush_icache(unsigned long start, unsigned long len)
+{
+	int l;
+
+	for (l = 0; l < (len + 32); l += 32)
+		ia64_fc((void *)(start + l));
+
+	ia64_sync_i();
+	ia64_srlz_i();
+}
+
+static void kvm_flush_tlb_all(void)
+{
+	unsigned long i, j, count0, count1, stride0, stride1, addr;
+	long flags;
+
+	addr    = local_cpu_data->ptce_base;
+	count0  = local_cpu_data->ptce_count[0];
+	count1  = local_cpu_data->ptce_count[1];
+	stride0 = local_cpu_data->ptce_stride[0];
+	stride1 = local_cpu_data->ptce_stride[1];
+
+	local_irq_save(flags);
+	for (i = 0; i < count0; ++i) {
+		for (j = 0; j < count1; ++j) {
+			ia64_ptce(addr);
+			addr += stride1;
+		}
+		addr += stride0;
+	}
+	local_irq_restore(flags);
+	ia64_srlz_i();			/* srlz.i implies srlz.d */
+}
+
+long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler)
+{
+	struct ia64_pal_retval iprv;
+
+	PAL_CALL_STK(iprv, PAL_VP_CREATE, (u64)vpd, (u64)host_iva,
+			(u64)opt_handler);
+
+	return iprv.status;
+}
+
+static  DEFINE_SPINLOCK(vp_lock);
+
+int kvm_arch_hardware_enable(void *garbage)
+{
+	long  status;
+	long  tmp_base;
+	unsigned long pte;
+	unsigned long saved_psr;
+	int slot;
+
+	pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), PAGE_KERNEL));
+	local_irq_save(saved_psr);
+	slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
+	local_irq_restore(saved_psr);
+	if (slot < 0)
+		return -EINVAL;
+
+	spin_lock(&vp_lock);
+	status = ia64_pal_vp_init_env(kvm_vsa_base ?
+				VP_INIT_ENV : VP_INIT_ENV_INITALIZE,
+			__pa(kvm_vm_buffer), KVM_VM_BUFFER_BASE, &tmp_base);
+	if (status != 0) {
+		spin_unlock(&vp_lock);
+		printk(KERN_WARNING"kvm: Failed to Enable VT Support!!!!\n");
+		return -EINVAL;
+	}
+
+	if (!kvm_vsa_base) {
+		kvm_vsa_base = tmp_base;
+		printk(KERN_INFO"kvm: kvm_vsa_base:0x%lx\n", kvm_vsa_base);
+	}
+	spin_unlock(&vp_lock);
+	ia64_ptr_entry(0x3, slot);
+
+	return 0;
+}
+
+void kvm_arch_hardware_disable(void *garbage)
+{
+
+	long status;
+	int slot;
+	unsigned long pte;
+	unsigned long saved_psr;
+	unsigned long host_iva = ia64_getreg(_IA64_REG_CR_IVA);
+
+	pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base),
+				PAGE_KERNEL));
+
+	local_irq_save(saved_psr);
+	slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
+	local_irq_restore(saved_psr);
+	if (slot < 0)
+		return;
+
+	status = ia64_pal_vp_exit_env(host_iva);
+	if (status)
+		printk(KERN_DEBUG"kvm: Failed to disable VT support! :%ld\n",
+				status);
+	ia64_ptr_entry(0x3, slot);
+}
+
+void kvm_arch_check_processor_compat(void *rtn)
+{
+	*(int *)rtn = 0;
+}
+
+int kvm_dev_ioctl_check_extension(long ext)
+{
+
+	int r;
+
+	switch (ext) {
+	case KVM_CAP_IRQCHIP:
+	case KVM_CAP_MP_STATE:
+	case KVM_CAP_IRQ_INJECT_STATUS:
+	case KVM_CAP_IOAPIC_POLARITY_IGNORED:
+		r = 1;
+		break;
+	case KVM_CAP_COALESCED_MMIO:
+		r = KVM_COALESCED_MMIO_PAGE_OFFSET;
+		break;
+#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
+	case KVM_CAP_IOMMU:
+		r = iommu_present(&pci_bus_type);
+		break;
+#endif
+	default:
+		r = 0;
+	}
+	return r;
+
+}
+
+static int handle_vm_error(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
+	kvm_run->hw.hardware_exit_reason = 1;
+	return 0;
+}
+
+static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	struct kvm_mmio_req *p;
+	struct kvm_io_device *mmio_dev;
+	int r;
+
+	p = kvm_get_vcpu_ioreq(vcpu);
+
+	if ((p->addr & PAGE_MASK) == IOAPIC_DEFAULT_BASE_ADDRESS)
+		goto mmio;
+	vcpu->mmio_needed = 1;
+	vcpu->mmio_fragments[0].gpa = kvm_run->mmio.phys_addr = p->addr;
+	vcpu->mmio_fragments[0].len = kvm_run->mmio.len = p->size;
+	vcpu->mmio_is_write = kvm_run->mmio.is_write = !p->dir;
+
+	if (vcpu->mmio_is_write)
+		memcpy(vcpu->arch.mmio_data, &p->data, p->size);
+	memcpy(kvm_run->mmio.data, &p->data, p->size);
+	kvm_run->exit_reason = KVM_EXIT_MMIO;
+	return 0;
+mmio:
+	if (p->dir)
+		r = kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, p->addr,
+				    p->size, &p->data);
+	else
+		r = kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, p->addr,
+				     p->size, &p->data);
+	if (r)
+		printk(KERN_ERR"kvm: No iodevice found! addr:%lx\n", p->addr);
+	p->state = STATE_IORESP_READY;
+
+	return 1;
+}
+
+static int handle_pal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	struct exit_ctl_data *p;
+
+	p = kvm_get_exit_data(vcpu);
+
+	if (p->exit_reason == EXIT_REASON_PAL_CALL)
+		return kvm_pal_emul(vcpu, kvm_run);
+	else {
+		kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
+		kvm_run->hw.hardware_exit_reason = 2;
+		return 0;
+	}
+}
+
+static int handle_sal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	struct exit_ctl_data *p;
+
+	p = kvm_get_exit_data(vcpu);
+
+	if (p->exit_reason == EXIT_REASON_SAL_CALL) {
+		kvm_sal_emul(vcpu);
+		return 1;
+	} else {
+		kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
+		kvm_run->hw.hardware_exit_reason = 3;
+		return 0;
+	}
+
+}
+
+static int __apic_accept_irq(struct kvm_vcpu *vcpu, uint64_t vector)
+{
+	struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
+
+	if (!test_and_set_bit(vector, &vpd->irr[0])) {
+		vcpu->arch.irq_new_pending = 1;
+		kvm_vcpu_kick(vcpu);
+		return 1;
+	}
+	return 0;
+}
+
+/*
+ *  offset: address offset to IPI space.
+ *  value:  deliver value.
+ */
+static void vcpu_deliver_ipi(struct kvm_vcpu *vcpu, uint64_t dm,
+				uint64_t vector)
+{
+	switch (dm) {
+	case SAPIC_FIXED:
+		break;
+	case SAPIC_NMI:
+		vector = 2;
+		break;
+	case SAPIC_EXTINT:
+		vector = 0;
+		break;
+	case SAPIC_INIT:
+	case SAPIC_PMI:
+	default:
+		printk(KERN_ERR"kvm: Unimplemented Deliver reserved IPI!\n");
+		return;
+	}
+	__apic_accept_irq(vcpu, vector);
+}
+
+static struct kvm_vcpu *lid_to_vcpu(struct kvm *kvm, unsigned long id,
+			unsigned long eid)
+{
+	union ia64_lid lid;
+	int i;
+	struct kvm_vcpu *vcpu;
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		lid.val = VCPU_LID(vcpu);
+		if (lid.id == id && lid.eid == eid)
+			return vcpu;
+	}
+
+	return NULL;
+}
+
+static int handle_ipi(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	struct exit_ctl_data *p = kvm_get_exit_data(vcpu);
+	struct kvm_vcpu *target_vcpu;
+	struct kvm_pt_regs *regs;
+	union ia64_ipi_a addr = p->u.ipi_data.addr;
+	union ia64_ipi_d data = p->u.ipi_data.data;
+
+	target_vcpu = lid_to_vcpu(vcpu->kvm, addr.id, addr.eid);
+	if (!target_vcpu)
+		return handle_vm_error(vcpu, kvm_run);
+
+	if (!target_vcpu->arch.launched) {
+		regs = vcpu_regs(target_vcpu);
+
+		regs->cr_iip = vcpu->kvm->arch.rdv_sal_data.boot_ip;
+		regs->r1 = vcpu->kvm->arch.rdv_sal_data.boot_gp;
+
+		target_vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+		if (waitqueue_active(&target_vcpu->wq))
+			wake_up_interruptible(&target_vcpu->wq);
+	} else {
+		vcpu_deliver_ipi(target_vcpu, data.dm, data.vector);
+		if (target_vcpu != vcpu)
+			kvm_vcpu_kick(target_vcpu);
+	}
+
+	return 1;
+}
+
+struct call_data {
+	struct kvm_ptc_g ptc_g_data;
+	struct kvm_vcpu *vcpu;
+};
+
+static void vcpu_global_purge(void *info)
+{
+	struct call_data *p = (struct call_data *)info;
+	struct kvm_vcpu *vcpu = p->vcpu;
+
+	if (test_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
+		return;
+
+	set_bit(KVM_REQ_PTC_G, &vcpu->requests);
+	if (vcpu->arch.ptc_g_count < MAX_PTC_G_NUM) {
+		vcpu->arch.ptc_g_data[vcpu->arch.ptc_g_count++] =
+							p->ptc_g_data;
+	} else {
+		clear_bit(KVM_REQ_PTC_G, &vcpu->requests);
+		vcpu->arch.ptc_g_count = 0;
+		set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests);
+	}
+}
+
+static int handle_global_purge(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	struct exit_ctl_data *p = kvm_get_exit_data(vcpu);
+	struct kvm *kvm = vcpu->kvm;
+	struct call_data call_data;
+	int i;
+	struct kvm_vcpu *vcpui;
+
+	call_data.ptc_g_data = p->u.ptc_g_data;
+
+	kvm_for_each_vcpu(i, vcpui, kvm) {
+		if (vcpui->arch.mp_state == KVM_MP_STATE_UNINITIALIZED ||
+				vcpu == vcpui)
+			continue;
+
+		if (waitqueue_active(&vcpui->wq))
+			wake_up_interruptible(&vcpui->wq);
+
+		if (vcpui->cpu != -1) {
+			call_data.vcpu = vcpui;
+			smp_call_function_single(vcpui->cpu,
+					vcpu_global_purge, &call_data, 1);
+		} else
+			printk(KERN_WARNING"kvm: Uninit vcpu received ipi!\n");
+
+	}
+	return 1;
+}
+
+static int handle_switch_rr6(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	return 1;
+}
+
+static int kvm_sn2_setup_mappings(struct kvm_vcpu *vcpu)
+{
+	unsigned long pte, rtc_phys_addr, map_addr;
+	int slot;
+
+	map_addr = KVM_VMM_BASE + (1UL << KVM_VMM_SHIFT);
+	rtc_phys_addr = LOCAL_MMR_OFFSET | SH_RTC;
+	pte = pte_val(mk_pte_phys(rtc_phys_addr, PAGE_KERNEL_UC));
+	slot = ia64_itr_entry(0x3, map_addr, pte, PAGE_SHIFT);
+	vcpu->arch.sn_rtc_tr_slot = slot;
+	if (slot < 0) {
+		printk(KERN_ERR "Mayday mayday! RTC mapping failed!\n");
+		slot = 0;
+	}
+	return slot;
+}
+
+int kvm_emulate_halt(struct kvm_vcpu *vcpu)
+{
+
+	ktime_t kt;
+	long itc_diff;
+	unsigned long vcpu_now_itc;
+	unsigned long expires;
+	struct hrtimer *p_ht = &vcpu->arch.hlt_timer;
+	unsigned long cyc_per_usec = local_cpu_data->cyc_per_usec;
+	struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
+
+	if (irqchip_in_kernel(vcpu->kvm)) {
+
+		vcpu_now_itc = kvm_get_itc(vcpu) + vcpu->arch.itc_offset;
+
+		if (time_after(vcpu_now_itc, vpd->itm)) {
+			vcpu->arch.timer_check = 1;
+			return 1;
+		}
+		itc_diff = vpd->itm - vcpu_now_itc;
+		if (itc_diff < 0)
+			itc_diff = -itc_diff;
+
+		expires = div64_u64(itc_diff, cyc_per_usec);
+		kt = ktime_set(0, 1000 * expires);
+
+		vcpu->arch.ht_active = 1;
+		hrtimer_start(p_ht, kt, HRTIMER_MODE_ABS);
+
+		vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
+		kvm_vcpu_block(vcpu);
+		hrtimer_cancel(p_ht);
+		vcpu->arch.ht_active = 0;
+
+		if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests) ||
+				kvm_cpu_has_pending_timer(vcpu))
+			if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
+				vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+
+		if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE)
+			return -EINTR;
+		return 1;
+	} else {
+		printk(KERN_ERR"kvm: Unsupported userspace halt!");
+		return 0;
+	}
+}
+
+static int handle_vm_shutdown(struct kvm_vcpu *vcpu,
+		struct kvm_run *kvm_run)
+{
+	kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
+	return 0;
+}
+
+static int handle_external_interrupt(struct kvm_vcpu *vcpu,
+		struct kvm_run *kvm_run)
+{
+	return 1;
+}
+
+static int handle_vcpu_debug(struct kvm_vcpu *vcpu,
+				struct kvm_run *kvm_run)
+{
+	printk("VMM: %s", vcpu->arch.log_buf);
+	return 1;
+}
+
+static int (*kvm_vti_exit_handlers[])(struct kvm_vcpu *vcpu,
+		struct kvm_run *kvm_run) = {
+	[EXIT_REASON_VM_PANIC]              = handle_vm_error,
+	[EXIT_REASON_MMIO_INSTRUCTION]      = handle_mmio,
+	[EXIT_REASON_PAL_CALL]              = handle_pal_call,
+	[EXIT_REASON_SAL_CALL]              = handle_sal_call,
+	[EXIT_REASON_SWITCH_RR6]            = handle_switch_rr6,
+	[EXIT_REASON_VM_DESTROY]            = handle_vm_shutdown,
+	[EXIT_REASON_EXTERNAL_INTERRUPT]    = handle_external_interrupt,
+	[EXIT_REASON_IPI]		    = handle_ipi,
+	[EXIT_REASON_PTC_G]		    = handle_global_purge,
+	[EXIT_REASON_DEBUG]		    = handle_vcpu_debug,
+
+};
+
+static const int kvm_vti_max_exit_handlers =
+		sizeof(kvm_vti_exit_handlers)/sizeof(*kvm_vti_exit_handlers);
+
+static uint32_t kvm_get_exit_reason(struct kvm_vcpu *vcpu)
+{
+	struct exit_ctl_data *p_exit_data;
+
+	p_exit_data = kvm_get_exit_data(vcpu);
+	return p_exit_data->exit_reason;
+}
+
+/*
+ * The guest has exited.  See if we can fix it or if we need userspace
+ * assistance.
+ */
+static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
+{
+	u32 exit_reason = kvm_get_exit_reason(vcpu);
+	vcpu->arch.last_exit = exit_reason;
+
+	if (exit_reason < kvm_vti_max_exit_handlers
+			&& kvm_vti_exit_handlers[exit_reason])
+		return kvm_vti_exit_handlers[exit_reason](vcpu, kvm_run);
+	else {
+		kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
+		kvm_run->hw.hardware_exit_reason = exit_reason;
+	}
+	return 0;
+}
+
+static inline void vti_set_rr6(unsigned long rr6)
+{
+	ia64_set_rr(RR6, rr6);
+	ia64_srlz_i();
+}
+
+static int kvm_insert_vmm_mapping(struct kvm_vcpu *vcpu)
+{
+	unsigned long pte;
+	struct kvm *kvm = vcpu->kvm;
+	int r;
+
+	/*Insert a pair of tr to map vmm*/
+	pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), PAGE_KERNEL));
+	r = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
+	if (r < 0)
+		goto out;
+	vcpu->arch.vmm_tr_slot = r;
+	/*Insert a pairt of tr to map data of vm*/
+	pte = pte_val(mk_pte_phys(__pa(kvm->arch.vm_base), PAGE_KERNEL));
+	r = ia64_itr_entry(0x3, KVM_VM_DATA_BASE,
+					pte, KVM_VM_DATA_SHIFT);
+	if (r < 0)
+		goto out;
+	vcpu->arch.vm_tr_slot = r;
+
+#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
+	if (kvm->arch.is_sn2) {
+		r = kvm_sn2_setup_mappings(vcpu);
+		if (r < 0)
+			goto out;
+	}
+#endif
+
+	r = 0;
+out:
+	return r;
+}
+
+static void kvm_purge_vmm_mapping(struct kvm_vcpu *vcpu)
+{
+	struct kvm *kvm = vcpu->kvm;
+	ia64_ptr_entry(0x3, vcpu->arch.vmm_tr_slot);
+	ia64_ptr_entry(0x3, vcpu->arch.vm_tr_slot);
+#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
+	if (kvm->arch.is_sn2)
+		ia64_ptr_entry(0x3, vcpu->arch.sn_rtc_tr_slot);
+#endif
+}
+
+static int kvm_vcpu_pre_transition(struct kvm_vcpu *vcpu)
+{
+	unsigned long psr;
+	int r;
+	int cpu = smp_processor_id();
+
+	if (vcpu->arch.last_run_cpu != cpu ||
+			per_cpu(last_vcpu, cpu) != vcpu) {
+		per_cpu(last_vcpu, cpu) = vcpu;
+		vcpu->arch.last_run_cpu = cpu;
+		kvm_flush_tlb_all();
+	}
+
+	vcpu->arch.host_rr6 = ia64_get_rr(RR6);
+	vti_set_rr6(vcpu->arch.vmm_rr);
+	local_irq_save(psr);
+	r = kvm_insert_vmm_mapping(vcpu);
+	local_irq_restore(psr);
+	return r;
+}
+
+static void kvm_vcpu_post_transition(struct kvm_vcpu *vcpu)
+{
+	kvm_purge_vmm_mapping(vcpu);
+	vti_set_rr6(vcpu->arch.host_rr6);
+}
+
+static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	union context *host_ctx, *guest_ctx;
+	int r, idx;
+
+	idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+again:
+	if (signal_pending(current)) {
+		r = -EINTR;
+		kvm_run->exit_reason = KVM_EXIT_INTR;
+		goto out;
+	}
+
+	preempt_disable();
+	local_irq_disable();
+
+	/*Get host and guest context with guest address space.*/
+	host_ctx = kvm_get_host_context(vcpu);
+	guest_ctx = kvm_get_guest_context(vcpu);
+
+	clear_bit(KVM_REQ_KICK, &vcpu->requests);
+
+	r = kvm_vcpu_pre_transition(vcpu);
+	if (r < 0)
+		goto vcpu_run_fail;
+
+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
+	vcpu->mode = IN_GUEST_MODE;
+	kvm_guest_enter();
+
+	/*
+	 * Transition to the guest
+	 */
+	kvm_vmm_info->tramp_entry(host_ctx, guest_ctx);
+
+	kvm_vcpu_post_transition(vcpu);
+
+	vcpu->arch.launched = 1;
+	set_bit(KVM_REQ_KICK, &vcpu->requests);
+	local_irq_enable();
+
+	/*
+	 * We must have an instruction between local_irq_enable() and
+	 * kvm_guest_exit(), so the timer interrupt isn't delayed by
+	 * the interrupt shadow.  The stat.exits increment will do nicely.
+	 * But we need to prevent reordering, hence this barrier():
+	 */
+	barrier();
+	kvm_guest_exit();
+	vcpu->mode = OUTSIDE_GUEST_MODE;
+	preempt_enable();
+
+	idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+	r = kvm_handle_exit(kvm_run, vcpu);
+
+	if (r > 0) {
+		if (!need_resched())
+			goto again;
+	}
+
+out:
+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
+	if (r > 0) {
+		cond_resched();
+		idx = srcu_read_lock(&vcpu->kvm->srcu);
+		goto again;
+	}
+
+	return r;
+
+vcpu_run_fail:
+	local_irq_enable();
+	preempt_enable();
+	kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+	goto out;
+}
+
+static void kvm_set_mmio_data(struct kvm_vcpu *vcpu)
+{
+	struct kvm_mmio_req *p = kvm_get_vcpu_ioreq(vcpu);
+
+	if (!vcpu->mmio_is_write)
+		memcpy(&p->data, vcpu->arch.mmio_data, 8);
+	p->state = STATE_IORESP_READY;
+}
+
+int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	int r;
+	sigset_t sigsaved;
+
+	if (vcpu->sigset_active)
+		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
+
+	if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
+		kvm_vcpu_block(vcpu);
+		clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
+		r = -EAGAIN;
+		goto out;
+	}
+
+	if (vcpu->mmio_needed) {
+		memcpy(vcpu->arch.mmio_data, kvm_run->mmio.data, 8);
+		kvm_set_mmio_data(vcpu);
+		vcpu->mmio_read_completed = 1;
+		vcpu->mmio_needed = 0;
+	}
+	r = __vcpu_run(vcpu, kvm_run);
+out:
+	if (vcpu->sigset_active)
+		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+
+	return r;
+}
+
+struct kvm *kvm_arch_alloc_vm(void)
+{
+
+	struct kvm *kvm;
+	uint64_t  vm_base;
+
+	BUG_ON(sizeof(struct kvm) > KVM_VM_STRUCT_SIZE);
+
+	vm_base = __get_free_pages(GFP_KERNEL, get_order(KVM_VM_DATA_SIZE));
+
+	if (!vm_base)
+		return NULL;
+
+	memset((void *)vm_base, 0, KVM_VM_DATA_SIZE);
+	kvm = (struct kvm *)(vm_base +
+			offsetof(struct kvm_vm_data, kvm_vm_struct));
+	kvm->arch.vm_base = vm_base;
+	printk(KERN_DEBUG"kvm: vm's data area:0x%lx\n", vm_base);
+
+	return kvm;
+}
+
+struct kvm_ia64_io_range {
+	unsigned long start;
+	unsigned long size;
+	unsigned long type;
+};
+
+static const struct kvm_ia64_io_range io_ranges[] = {
+	{VGA_IO_START, VGA_IO_SIZE, GPFN_FRAME_BUFFER},
+	{MMIO_START, MMIO_SIZE, GPFN_LOW_MMIO},
+	{LEGACY_IO_START, LEGACY_IO_SIZE, GPFN_LEGACY_IO},
+	{IO_SAPIC_START, IO_SAPIC_SIZE, GPFN_IOSAPIC},
+	{PIB_START, PIB_SIZE, GPFN_PIB},
+};
+
+static void kvm_build_io_pmt(struct kvm *kvm)
+{
+	unsigned long i, j;
+
+	/* Mark I/O ranges */
+	for (i = 0; i < (sizeof(io_ranges) / sizeof(struct kvm_io_range));
+							i++) {
+		for (j = io_ranges[i].start;
+				j < io_ranges[i].start + io_ranges[i].size;
+				j += PAGE_SIZE)
+			kvm_set_pmt_entry(kvm, j >> PAGE_SHIFT,
+					io_ranges[i].type, 0);
+	}
+
+}
+
+/*Use unused rids to virtualize guest rid.*/
+#define GUEST_PHYSICAL_RR0	0x1739
+#define GUEST_PHYSICAL_RR4	0x2739
+#define VMM_INIT_RR		0x1660
+
+int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
+{
+	BUG_ON(!kvm);
+
+	if (type)
+		return -EINVAL;
+
+	kvm->arch.is_sn2 = ia64_platform_is("sn2");
+
+	kvm->arch.metaphysical_rr0 = GUEST_PHYSICAL_RR0;
+	kvm->arch.metaphysical_rr4 = GUEST_PHYSICAL_RR4;
+	kvm->arch.vmm_init_rr = VMM_INIT_RR;
+
+	/*
+	 *Fill P2M entries for MMIO/IO ranges
+	 */
+	kvm_build_io_pmt(kvm);
+
+	INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
+
+	/* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
+	set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
+
+	return 0;
+}
+
+static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm,
+					struct kvm_irqchip *chip)
+{
+	int r;
+
+	r = 0;
+	switch (chip->chip_id) {
+	case KVM_IRQCHIP_IOAPIC:
+		r = kvm_get_ioapic(kvm, &chip->chip.ioapic);
+		break;
+	default:
+		r = -EINVAL;
+		break;
+	}
+	return r;
+}
+
+static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
+{
+	int r;
+
+	r = 0;
+	switch (chip->chip_id) {
+	case KVM_IRQCHIP_IOAPIC:
+		r = kvm_set_ioapic(kvm, &chip->chip.ioapic);
+		break;
+	default:
+		r = -EINVAL;
+		break;
+	}
+	return r;
+}
+
+#define RESTORE_REGS(_x) vcpu->arch._x = regs->_x
+
+int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+	struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
+	int i;
+
+	for (i = 0; i < 16; i++) {
+		vpd->vgr[i] = regs->vpd.vgr[i];
+		vpd->vbgr[i] = regs->vpd.vbgr[i];
+	}
+	for (i = 0; i < 128; i++)
+		vpd->vcr[i] = regs->vpd.vcr[i];
+	vpd->vhpi = regs->vpd.vhpi;
+	vpd->vnat = regs->vpd.vnat;
+	vpd->vbnat = regs->vpd.vbnat;
+	vpd->vpsr = regs->vpd.vpsr;
+
+	vpd->vpr = regs->vpd.vpr;
+
+	memcpy(&vcpu->arch.guest, &regs->saved_guest, sizeof(union context));
+
+	RESTORE_REGS(mp_state);
+	RESTORE_REGS(vmm_rr);
+	memcpy(vcpu->arch.itrs, regs->itrs, sizeof(struct thash_data) * NITRS);
+	memcpy(vcpu->arch.dtrs, regs->dtrs, sizeof(struct thash_data) * NDTRS);
+	RESTORE_REGS(itr_regions);
+	RESTORE_REGS(dtr_regions);
+	RESTORE_REGS(tc_regions);
+	RESTORE_REGS(irq_check);
+	RESTORE_REGS(itc_check);
+	RESTORE_REGS(timer_check);
+	RESTORE_REGS(timer_pending);
+	RESTORE_REGS(last_itc);
+	for (i = 0; i < 8; i++) {
+		vcpu->arch.vrr[i] = regs->vrr[i];
+		vcpu->arch.ibr[i] = regs->ibr[i];
+		vcpu->arch.dbr[i] = regs->dbr[i];
+	}
+	for (i = 0; i < 4; i++)
+		vcpu->arch.insvc[i] = regs->insvc[i];
+	RESTORE_REGS(xtp);
+	RESTORE_REGS(metaphysical_rr0);
+	RESTORE_REGS(metaphysical_rr4);
+	RESTORE_REGS(metaphysical_saved_rr0);
+	RESTORE_REGS(metaphysical_saved_rr4);
+	RESTORE_REGS(fp_psr);
+	RESTORE_REGS(saved_gp);
+
+	vcpu->arch.irq_new_pending = 1;
+	vcpu->arch.itc_offset = regs->saved_itc - kvm_get_itc(vcpu);
+	set_bit(KVM_REQ_RESUME, &vcpu->requests);
+
+	return 0;
+}
+
+int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
+		bool line_status)
+{
+	if (!irqchip_in_kernel(kvm))
+		return -ENXIO;
+
+	irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
+					irq_event->irq, irq_event->level,
+					line_status);
+	return 0;
+}
+
+long kvm_arch_vm_ioctl(struct file *filp,
+		unsigned int ioctl, unsigned long arg)
+{
+	struct kvm *kvm = filp->private_data;
+	void __user *argp = (void __user *)arg;
+	int r = -ENOTTY;
+
+	switch (ioctl) {
+	case KVM_CREATE_IRQCHIP:
+		r = -EFAULT;
+		r = kvm_ioapic_init(kvm);
+		if (r)
+			goto out;
+		r = kvm_setup_default_irq_routing(kvm);
+		if (r) {
+			mutex_lock(&kvm->slots_lock);
+			kvm_ioapic_destroy(kvm);
+			mutex_unlock(&kvm->slots_lock);
+			goto out;
+		}
+		break;
+	case KVM_GET_IRQCHIP: {
+		/* 0: PIC master, 1: PIC slave, 2: IOAPIC */
+		struct kvm_irqchip chip;
+
+		r = -EFAULT;
+		if (copy_from_user(&chip, argp, sizeof chip))
+				goto out;
+		r = -ENXIO;
+		if (!irqchip_in_kernel(kvm))
+			goto out;
+		r = kvm_vm_ioctl_get_irqchip(kvm, &chip);
+		if (r)
+			goto out;
+		r = -EFAULT;
+		if (copy_to_user(argp, &chip, sizeof chip))
+				goto out;
+		r = 0;
+		break;
+		}
+	case KVM_SET_IRQCHIP: {
+		/* 0: PIC master, 1: PIC slave, 2: IOAPIC */
+		struct kvm_irqchip chip;
+
+		r = -EFAULT;
+		if (copy_from_user(&chip, argp, sizeof chip))
+				goto out;
+		r = -ENXIO;
+		if (!irqchip_in_kernel(kvm))
+			goto out;
+		r = kvm_vm_ioctl_set_irqchip(kvm, &chip);
+		if (r)
+			goto out;
+		r = 0;
+		break;
+		}
+	default:
+		;
+	}
+out:
+	return r;
+}
+
+int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
+		struct kvm_sregs *sregs)
+{
+	return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
+		struct kvm_sregs *sregs)
+{
+	return -EINVAL;
+
+}
+int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
+		struct kvm_translation *tr)
+{
+
+	return -EINVAL;
+}
+
+static int kvm_alloc_vmm_area(void)
+{
+	if (!kvm_vmm_base && (kvm_vm_buffer_size < KVM_VM_BUFFER_SIZE)) {
+		kvm_vmm_base = __get_free_pages(GFP_KERNEL,
+				get_order(KVM_VMM_SIZE));
+		if (!kvm_vmm_base)
+			return -ENOMEM;
+
+		memset((void *)kvm_vmm_base, 0, KVM_VMM_SIZE);
+		kvm_vm_buffer = kvm_vmm_base + VMM_SIZE;
+
+		printk(KERN_DEBUG"kvm:VMM's Base Addr:0x%lx, vm_buffer:0x%lx\n",
+				kvm_vmm_base, kvm_vm_buffer);
+	}
+
+	return 0;
+}
+
+static void kvm_free_vmm_area(void)
+{
+	if (kvm_vmm_base) {
+		/*Zero this area before free to avoid bits leak!!*/
+		memset((void *)kvm_vmm_base, 0, KVM_VMM_SIZE);
+		free_pages(kvm_vmm_base, get_order(KVM_VMM_SIZE));
+		kvm_vmm_base  = 0;
+		kvm_vm_buffer = 0;
+		kvm_vsa_base = 0;
+	}
+}
+
+static int vti_init_vpd(struct kvm_vcpu *vcpu)
+{
+	int i;
+	union cpuid3_t cpuid3;
+	struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
+
+	if (IS_ERR(vpd))
+		return PTR_ERR(vpd);
+
+	/* CPUID init */
+	for (i = 0; i < 5; i++)
+		vpd->vcpuid[i] = ia64_get_cpuid(i);
+
+	/* Limit the CPUID number to 5 */
+	cpuid3.value = vpd->vcpuid[3];
+	cpuid3.number = 4;	/* 5 - 1 */
+	vpd->vcpuid[3] = cpuid3.value;
+
+	/*Set vac and vdc fields*/
+	vpd->vac.a_from_int_cr = 1;
+	vpd->vac.a_to_int_cr = 1;
+	vpd->vac.a_from_psr = 1;
+	vpd->vac.a_from_cpuid = 1;
+	vpd->vac.a_cover = 1;
+	vpd->vac.a_bsw = 1;
+	vpd->vac.a_int = 1;
+	vpd->vdc.d_vmsw = 1;
+
+	/*Set virtual buffer*/
+	vpd->virt_env_vaddr = KVM_VM_BUFFER_BASE;
+
+	return 0;
+}
+
+static int vti_create_vp(struct kvm_vcpu *vcpu)
+{
+	long ret;
+	struct vpd *vpd = vcpu->arch.vpd;
+	unsigned long  vmm_ivt;
+
+	vmm_ivt = kvm_vmm_info->vmm_ivt;
+
+	printk(KERN_DEBUG "kvm: vcpu:%p,ivt: 0x%lx\n", vcpu, vmm_ivt);
+
+	ret = ia64_pal_vp_create((u64 *)vpd, (u64 *)vmm_ivt, 0);
+
+	if (ret) {
+		printk(KERN_ERR"kvm: ia64_pal_vp_create failed!\n");
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static void init_ptce_info(struct kvm_vcpu *vcpu)
+{
+	ia64_ptce_info_t ptce = {0};
+
+	ia64_get_ptce(&ptce);
+	vcpu->arch.ptce_base = ptce.base;
+	vcpu->arch.ptce_count[0] = ptce.count[0];
+	vcpu->arch.ptce_count[1] = ptce.count[1];
+	vcpu->arch.ptce_stride[0] = ptce.stride[0];
+	vcpu->arch.ptce_stride[1] = ptce.stride[1];
+}
+
+static void kvm_migrate_hlt_timer(struct kvm_vcpu *vcpu)
+{
+	struct hrtimer *p_ht = &vcpu->arch.hlt_timer;
+
+	if (hrtimer_cancel(p_ht))
+		hrtimer_start_expires(p_ht, HRTIMER_MODE_ABS);
+}
+
+static enum hrtimer_restart hlt_timer_fn(struct hrtimer *data)
+{
+	struct kvm_vcpu *vcpu;
+	wait_queue_head_t *q;
+
+	vcpu  = container_of(data, struct kvm_vcpu, arch.hlt_timer);
+	q = &vcpu->wq;
+
+	if (vcpu->arch.mp_state != KVM_MP_STATE_HALTED)
+		goto out;
+
+	if (waitqueue_active(q))
+		wake_up_interruptible(q);
+
+out:
+	vcpu->arch.timer_fired = 1;
+	vcpu->arch.timer_check = 1;
+	return HRTIMER_NORESTART;
+}
+
+#define PALE_RESET_ENTRY    0x80000000ffffffb0UL
+
+bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
+{
+	return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL);
+}
+
+int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
+{
+	struct kvm_vcpu *v;
+	int r;
+	int i;
+	long itc_offset;
+	struct kvm *kvm = vcpu->kvm;
+	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+
+	union context *p_ctx = &vcpu->arch.guest;
+	struct kvm_vcpu *vmm_vcpu = to_guest(vcpu->kvm, vcpu);
+
+	/*Init vcpu context for first run.*/
+	if (IS_ERR(vmm_vcpu))
+		return PTR_ERR(vmm_vcpu);
+
+	if (kvm_vcpu_is_bsp(vcpu)) {
+		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+
+		/*Set entry address for first run.*/
+		regs->cr_iip = PALE_RESET_ENTRY;
+
+		/*Initialize itc offset for vcpus*/
+		itc_offset = 0UL - kvm_get_itc(vcpu);
+		for (i = 0; i < KVM_MAX_VCPUS; i++) {
+			v = (struct kvm_vcpu *)((char *)vcpu +
+					sizeof(struct kvm_vcpu_data) * i);
+			v->arch.itc_offset = itc_offset;
+			v->arch.last_itc = 0;
+		}
+	} else
+		vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
+
+	r = -ENOMEM;
+	vcpu->arch.apic = kzalloc(sizeof(struct kvm_lapic), GFP_KERNEL);
+	if (!vcpu->arch.apic)
+		goto out;
+	vcpu->arch.apic->vcpu = vcpu;
+
+	p_ctx->gr[1] = 0;
+	p_ctx->gr[12] = (unsigned long)((char *)vmm_vcpu + KVM_STK_OFFSET);
+	p_ctx->gr[13] = (unsigned long)vmm_vcpu;
+	p_ctx->psr = 0x1008522000UL;
+	p_ctx->ar[40] = FPSR_DEFAULT; /*fpsr*/
+	p_ctx->caller_unat = 0;
+	p_ctx->pr = 0x0;
+	p_ctx->ar[36] = 0x0; /*unat*/
+	p_ctx->ar[19] = 0x0; /*rnat*/
+	p_ctx->ar[18] = (unsigned long)vmm_vcpu +
+				((sizeof(struct kvm_vcpu)+15) & ~15);
+	p_ctx->ar[64] = 0x0; /*pfs*/
+	p_ctx->cr[0] = 0x7e04UL;
+	p_ctx->cr[2] = (unsigned long)kvm_vmm_info->vmm_ivt;
+	p_ctx->cr[8] = 0x3c;
+
+	/*Initialize region register*/
+	p_ctx->rr[0] = 0x30;
+	p_ctx->rr[1] = 0x30;
+	p_ctx->rr[2] = 0x30;
+	p_ctx->rr[3] = 0x30;
+	p_ctx->rr[4] = 0x30;
+	p_ctx->rr[5] = 0x30;
+	p_ctx->rr[7] = 0x30;
+
+	/*Initialize branch register 0*/
+	p_ctx->br[0] = *(unsigned long *)kvm_vmm_info->vmm_entry;
+
+	vcpu->arch.vmm_rr = kvm->arch.vmm_init_rr;
+	vcpu->arch.metaphysical_rr0 = kvm->arch.metaphysical_rr0;
+	vcpu->arch.metaphysical_rr4 = kvm->arch.metaphysical_rr4;
+
+	hrtimer_init(&vcpu->arch.hlt_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	vcpu->arch.hlt_timer.function = hlt_timer_fn;
+
+	vcpu->arch.last_run_cpu = -1;
+	vcpu->arch.vpd = (struct vpd *)VPD_BASE(vcpu->vcpu_id);
+	vcpu->arch.vsa_base = kvm_vsa_base;
+	vcpu->arch.__gp = kvm_vmm_gp;
+	vcpu->arch.dirty_log_lock_pa = __pa(&kvm->arch.dirty_log_lock);
+	vcpu->arch.vhpt.hash = (struct thash_data *)VHPT_BASE(vcpu->vcpu_id);
+	vcpu->arch.vtlb.hash = (struct thash_data *)VTLB_BASE(vcpu->vcpu_id);
+	init_ptce_info(vcpu);
+
+	r = 0;
+out:
+	return r;
+}
+
+static int vti_vcpu_setup(struct kvm_vcpu *vcpu, int id)
+{
+	unsigned long psr;
+	int r;
+
+	local_irq_save(psr);
+	r = kvm_insert_vmm_mapping(vcpu);
+	local_irq_restore(psr);
+	if (r)
+		goto fail;
+	r = kvm_vcpu_init(vcpu, vcpu->kvm, id);
+	if (r)
+		goto fail;
+
+	r = vti_init_vpd(vcpu);
+	if (r) {
+		printk(KERN_DEBUG"kvm: vpd init error!!\n");
+		goto uninit;
+	}
+
+	r = vti_create_vp(vcpu);
+	if (r)
+		goto uninit;
+
+	kvm_purge_vmm_mapping(vcpu);
+
+	return 0;
+uninit:
+	kvm_vcpu_uninit(vcpu);
+fail:
+	return r;
+}
+
+struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
+		unsigned int id)
+{
+	struct kvm_vcpu *vcpu;
+	unsigned long vm_base = kvm->arch.vm_base;
+	int r;
+	int cpu;
+
+	BUG_ON(sizeof(struct kvm_vcpu) > VCPU_STRUCT_SIZE/2);
+
+	r = -EINVAL;
+	if (id >= KVM_MAX_VCPUS) {
+		printk(KERN_ERR"kvm: Can't configure vcpus > %ld",
+				KVM_MAX_VCPUS);
+		goto fail;
+	}
+
+	r = -ENOMEM;
+	if (!vm_base) {
+		printk(KERN_ERR"kvm: Create vcpu[%d] error!\n", id);
+		goto fail;
+	}
+	vcpu = (struct kvm_vcpu *)(vm_base + offsetof(struct kvm_vm_data,
+					vcpu_data[id].vcpu_struct));
+	vcpu->kvm = kvm;
+
+	cpu = get_cpu();
+	r = vti_vcpu_setup(vcpu, id);
+	put_cpu();
+
+	if (r) {
+		printk(KERN_DEBUG"kvm: vcpu_setup error!!\n");
+		goto fail;
+	}
+
+	return vcpu;
+fail:
+	return ERR_PTR(r);
+}
+
+int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+	return 0;
+}
+
+int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
+{
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+	return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+	return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+					struct kvm_guest_debug *dbg)
+{
+	return -EINVAL;
+}
+
+void kvm_arch_free_vm(struct kvm *kvm)
+{
+	unsigned long vm_base = kvm->arch.vm_base;
+
+	if (vm_base) {
+		memset((void *)vm_base, 0, KVM_VM_DATA_SIZE);
+		free_pages(vm_base, get_order(KVM_VM_DATA_SIZE));
+	}
+
+}
+
+static void kvm_release_vm_pages(struct kvm *kvm)
+{
+	struct kvm_memslots *slots;
+	struct kvm_memory_slot *memslot;
+	int j;
+
+	slots = kvm_memslots(kvm);
+	kvm_for_each_memslot(memslot, slots) {
+		for (j = 0; j < memslot->npages; j++) {
+			if (memslot->rmap[j])
+				put_page((struct page *)memslot->rmap[j]);
+		}
+	}
+}
+
+void kvm_arch_sync_events(struct kvm *kvm)
+{
+}
+
+void kvm_arch_destroy_vm(struct kvm *kvm)
+{
+	kvm_iommu_unmap_guest(kvm);
+	kvm_free_all_assigned_devices(kvm);
+	kfree(kvm->arch.vioapic);
+	kvm_release_vm_pages(kvm);
+}
+
+void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
+{
+}
+
+void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+	if (cpu != vcpu->cpu) {
+		vcpu->cpu = cpu;
+		if (vcpu->arch.ht_active)
+			kvm_migrate_hlt_timer(vcpu);
+	}
+}
+
+#define SAVE_REGS(_x) 	regs->_x = vcpu->arch._x
+
+int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+	struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
+	int i;
+
+	vcpu_load(vcpu);
+
+	for (i = 0; i < 16; i++) {
+		regs->vpd.vgr[i] = vpd->vgr[i];
+		regs->vpd.vbgr[i] = vpd->vbgr[i];
+	}
+	for (i = 0; i < 128; i++)
+		regs->vpd.vcr[i] = vpd->vcr[i];
+	regs->vpd.vhpi = vpd->vhpi;
+	regs->vpd.vnat = vpd->vnat;
+	regs->vpd.vbnat = vpd->vbnat;
+	regs->vpd.vpsr = vpd->vpsr;
+	regs->vpd.vpr = vpd->vpr;
+
+	memcpy(&regs->saved_guest, &vcpu->arch.guest, sizeof(union context));
+
+	SAVE_REGS(mp_state);
+	SAVE_REGS(vmm_rr);
+	memcpy(regs->itrs, vcpu->arch.itrs, sizeof(struct thash_data) * NITRS);
+	memcpy(regs->dtrs, vcpu->arch.dtrs, sizeof(struct thash_data) * NDTRS);
+	SAVE_REGS(itr_regions);
+	SAVE_REGS(dtr_regions);
+	SAVE_REGS(tc_regions);
+	SAVE_REGS(irq_check);
+	SAVE_REGS(itc_check);
+	SAVE_REGS(timer_check);
+	SAVE_REGS(timer_pending);
+	SAVE_REGS(last_itc);
+	for (i = 0; i < 8; i++) {
+		regs->vrr[i] = vcpu->arch.vrr[i];
+		regs->ibr[i] = vcpu->arch.ibr[i];
+		regs->dbr[i] = vcpu->arch.dbr[i];
+	}
+	for (i = 0; i < 4; i++)
+		regs->insvc[i] = vcpu->arch.insvc[i];
+	regs->saved_itc = vcpu->arch.itc_offset + kvm_get_itc(vcpu);
+	SAVE_REGS(xtp);
+	SAVE_REGS(metaphysical_rr0);
+	SAVE_REGS(metaphysical_rr4);
+	SAVE_REGS(metaphysical_saved_rr0);
+	SAVE_REGS(metaphysical_saved_rr4);
+	SAVE_REGS(fp_psr);
+	SAVE_REGS(saved_gp);
+
+	vcpu_put(vcpu);
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_stack(struct kvm_vcpu *vcpu,
+				  struct kvm_ia64_vcpu_stack *stack)
+{
+	memcpy(stack, vcpu, sizeof(struct kvm_ia64_vcpu_stack));
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_stack(struct kvm_vcpu *vcpu,
+				  struct kvm_ia64_vcpu_stack *stack)
+{
+	memcpy(vcpu + 1, &stack->stack[0] + sizeof(struct kvm_vcpu),
+	       sizeof(struct kvm_ia64_vcpu_stack) - sizeof(struct kvm_vcpu));
+
+	vcpu->arch.exit_data = ((struct kvm_vcpu *)stack)->arch.exit_data;
+	return 0;
+}
+
+void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
+{
+
+	hrtimer_cancel(&vcpu->arch.hlt_timer);
+	kfree(vcpu->arch.apic);
+}
+
+
+long kvm_arch_vcpu_ioctl(struct file *filp,
+			 unsigned int ioctl, unsigned long arg)
+{
+	struct kvm_vcpu *vcpu = filp->private_data;
+	void __user *argp = (void __user *)arg;
+	struct kvm_ia64_vcpu_stack *stack = NULL;
+	long r;
+
+	switch (ioctl) {
+	case KVM_IA64_VCPU_GET_STACK: {
+		struct kvm_ia64_vcpu_stack __user *user_stack;
+	        void __user *first_p = argp;
+
+		r = -EFAULT;
+		if (copy_from_user(&user_stack, first_p, sizeof(void *)))
+			goto out;
+
+		if (!access_ok(VERIFY_WRITE, user_stack,
+			       sizeof(struct kvm_ia64_vcpu_stack))) {
+			printk(KERN_INFO "KVM_IA64_VCPU_GET_STACK: "
+			       "Illegal user destination address for stack\n");
+			goto out;
+		}
+		stack = kzalloc(sizeof(struct kvm_ia64_vcpu_stack), GFP_KERNEL);
+		if (!stack) {
+			r = -ENOMEM;
+			goto out;
+		}
+
+		r = kvm_arch_vcpu_ioctl_get_stack(vcpu, stack);
+		if (r)
+			goto out;
+
+		if (copy_to_user(user_stack, stack,
+				 sizeof(struct kvm_ia64_vcpu_stack))) {
+			r = -EFAULT;
+			goto out;
+		}
+
+		break;
+	}
+	case KVM_IA64_VCPU_SET_STACK: {
+		struct kvm_ia64_vcpu_stack __user *user_stack;
+	        void __user *first_p = argp;
+
+		r = -EFAULT;
+		if (copy_from_user(&user_stack, first_p, sizeof(void *)))
+			goto out;
+
+		if (!access_ok(VERIFY_READ, user_stack,
+			    sizeof(struct kvm_ia64_vcpu_stack))) {
+			printk(KERN_INFO "KVM_IA64_VCPU_SET_STACK: "
+			       "Illegal user address for stack\n");
+			goto out;
+		}
+		stack = kmalloc(sizeof(struct kvm_ia64_vcpu_stack), GFP_KERNEL);
+		if (!stack) {
+			r = -ENOMEM;
+			goto out;
+		}
+		if (copy_from_user(stack, user_stack,
+				   sizeof(struct kvm_ia64_vcpu_stack)))
+			goto out;
+
+		r = kvm_arch_vcpu_ioctl_set_stack(vcpu, stack);
+		break;
+	}
+
+	default:
+		r = -EINVAL;
+	}
+
+out:
+	kfree(stack);
+	return r;
+}
+
+int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
+{
+	return VM_FAULT_SIGBUS;
+}
+
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
+			   struct kvm_memory_slot *dont)
+{
+}
+
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+			    unsigned long npages)
+{
+	return 0;
+}
+
+void kvm_arch_memslots_updated(struct kvm *kvm)
+{
+}
+
+int kvm_arch_prepare_memory_region(struct kvm *kvm,
+		struct kvm_memory_slot *memslot,
+		struct kvm_userspace_memory_region *mem,
+		enum kvm_mr_change change)
+{
+	unsigned long i;
+	unsigned long pfn;
+	int npages = memslot->npages;
+	unsigned long base_gfn = memslot->base_gfn;
+
+	if (base_gfn + npages > (KVM_MAX_MEM_SIZE >> PAGE_SHIFT))
+		return -ENOMEM;
+
+	for (i = 0; i < npages; i++) {
+		pfn = gfn_to_pfn(kvm, base_gfn + i);
+		if (!kvm_is_mmio_pfn(pfn)) {
+			kvm_set_pmt_entry(kvm, base_gfn + i,
+					pfn << PAGE_SHIFT,
+				_PAGE_AR_RWX | _PAGE_MA_WB);
+			memslot->rmap[i] = (unsigned long)pfn_to_page(pfn);
+		} else {
+			kvm_set_pmt_entry(kvm, base_gfn + i,
+					GPFN_PHYS_MMIO | (pfn << PAGE_SHIFT),
+					_PAGE_MA_UC);
+			memslot->rmap[i] = 0;
+			}
+	}
+
+	return 0;
+}
+
+void kvm_arch_commit_memory_region(struct kvm *kvm,
+		struct kvm_userspace_memory_region *mem,
+		const struct kvm_memory_slot *old,
+		enum kvm_mr_change change)
+{
+	return;
+}
+
+void kvm_arch_flush_shadow_all(struct kvm *kvm)
+{
+	kvm_flush_remote_tlbs(kvm);
+}
+
+void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
+				   struct kvm_memory_slot *slot)
+{
+	kvm_arch_flush_shadow_all();
+}
+
+long kvm_arch_dev_ioctl(struct file *filp,
+			unsigned int ioctl, unsigned long arg)
+{
+	return -EINVAL;
+}
+
+void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
+{
+	kvm_vcpu_uninit(vcpu);
+}
+
+static int vti_cpu_has_kvm_support(void)
+{
+	long  avail = 1, status = 1, control = 1;
+	long ret;
+
+	ret = ia64_pal_proc_get_features(&avail, &status, &control, 0);
+	if (ret)
+		goto out;
+
+	if (!(avail & PAL_PROC_VM_BIT))
+		goto out;
+
+	printk(KERN_DEBUG"kvm: Hardware Supports VT\n");
+
+	ret = ia64_pal_vp_env_info(&kvm_vm_buffer_size, &vp_env_info);
+	if (ret)
+		goto out;
+	printk(KERN_DEBUG"kvm: VM Buffer Size:0x%lx\n", kvm_vm_buffer_size);
+
+	if (!(vp_env_info & VP_OPCODE)) {
+		printk(KERN_WARNING"kvm: No opcode ability on hardware, "
+				"vm_env_info:0x%lx\n", vp_env_info);
+	}
+
+	return 1;
+out:
+	return 0;
+}
+
+
+/*
+ * On SN2, the ITC isn't stable, so copy in fast path code to use the
+ * SN2 RTC, replacing the ITC based default verion.
+ */
+static void kvm_patch_vmm(struct kvm_vmm_info *vmm_info,
+			  struct module *module)
+{
+	unsigned long new_ar, new_ar_sn2;
+	unsigned long module_base;
+
+	if (!ia64_platform_is("sn2"))
+		return;
+
+	module_base = (unsigned long)module->module_core;
+
+	new_ar = kvm_vmm_base + vmm_info->patch_mov_ar - module_base;
+	new_ar_sn2 = kvm_vmm_base + vmm_info->patch_mov_ar_sn2 - module_base;
+
+	printk(KERN_INFO "kvm: Patching ITC emulation to use SGI SN2 RTC "
+	       "as source\n");
+
+	/*
+	 * Copy the SN2 version of mov_ar into place. They are both
+	 * the same size, so 6 bundles is sufficient (6 * 0x10).
+	 */
+	memcpy((void *)new_ar, (void *)new_ar_sn2, 0x60);
+}
+
+static int kvm_relocate_vmm(struct kvm_vmm_info *vmm_info,
+			    struct module *module)
+{
+	unsigned long module_base;
+	unsigned long vmm_size;
+
+	unsigned long vmm_offset, func_offset, fdesc_offset;
+	struct fdesc *p_fdesc;
+
+	BUG_ON(!module);
+
+	if (!kvm_vmm_base) {
+		printk("kvm: kvm area hasn't been initialized yet!!\n");
+		return -EFAULT;
+	}
+
+	/*Calculate new position of relocated vmm module.*/
+	module_base = (unsigned long)module->module_core;
+	vmm_size = module->core_size;
+	if (unlikely(vmm_size > KVM_VMM_SIZE))
+		return -EFAULT;
+
+	memcpy((void *)kvm_vmm_base, (void *)module_base, vmm_size);
+	kvm_patch_vmm(vmm_info, module);
+	kvm_flush_icache(kvm_vmm_base, vmm_size);
+
+	/*Recalculate kvm_vmm_info based on new VMM*/
+	vmm_offset = vmm_info->vmm_ivt - module_base;
+	kvm_vmm_info->vmm_ivt = KVM_VMM_BASE + vmm_offset;
+	printk(KERN_DEBUG"kvm: Relocated VMM's IVT Base Addr:%lx\n",
+			kvm_vmm_info->vmm_ivt);
+
+	fdesc_offset = (unsigned long)vmm_info->vmm_entry - module_base;
+	kvm_vmm_info->vmm_entry = (kvm_vmm_entry *)(KVM_VMM_BASE +
+							fdesc_offset);
+	func_offset = *(unsigned long *)vmm_info->vmm_entry - module_base;
+	p_fdesc = (struct fdesc *)(kvm_vmm_base + fdesc_offset);
+	p_fdesc->ip = KVM_VMM_BASE + func_offset;
+	p_fdesc->gp = KVM_VMM_BASE+(p_fdesc->gp - module_base);
+
+	printk(KERN_DEBUG"kvm: Relocated VMM's Init Entry Addr:%lx\n",
+			KVM_VMM_BASE+func_offset);
+
+	fdesc_offset = (unsigned long)vmm_info->tramp_entry - module_base;
+	kvm_vmm_info->tramp_entry = (kvm_tramp_entry *)(KVM_VMM_BASE +
+			fdesc_offset);
+	func_offset = *(unsigned long *)vmm_info->tramp_entry - module_base;
+	p_fdesc = (struct fdesc *)(kvm_vmm_base + fdesc_offset);
+	p_fdesc->ip = KVM_VMM_BASE + func_offset;
+	p_fdesc->gp = KVM_VMM_BASE + (p_fdesc->gp - module_base);
+
+	kvm_vmm_gp = p_fdesc->gp;
+
+	printk(KERN_DEBUG"kvm: Relocated VMM's Entry IP:%p\n",
+						kvm_vmm_info->vmm_entry);
+	printk(KERN_DEBUG"kvm: Relocated VMM's Trampoline Entry IP:0x%lx\n",
+						KVM_VMM_BASE + func_offset);
+
+	return 0;
+}
+
+int kvm_arch_init(void *opaque)
+{
+	int r;
+	struct kvm_vmm_info *vmm_info = (struct kvm_vmm_info *)opaque;
+
+	if (!vti_cpu_has_kvm_support()) {
+		printk(KERN_ERR "kvm: No Hardware Virtualization Support!\n");
+		r = -EOPNOTSUPP;
+		goto out;
+	}
+
+	if (kvm_vmm_info) {
+		printk(KERN_ERR "kvm: Already loaded VMM module!\n");
+		r = -EEXIST;
+		goto out;
+	}
+
+	r = -ENOMEM;
+	kvm_vmm_info = kzalloc(sizeof(struct kvm_vmm_info), GFP_KERNEL);
+	if (!kvm_vmm_info)
+		goto out;
+
+	if (kvm_alloc_vmm_area())
+		goto out_free0;
+
+	r = kvm_relocate_vmm(vmm_info, vmm_info->module);
+	if (r)
+		goto out_free1;
+
+	return 0;
+
+out_free1:
+	kvm_free_vmm_area();
+out_free0:
+	kfree(kvm_vmm_info);
+out:
+	return r;
+}
+
+void kvm_arch_exit(void)
+{
+	kvm_free_vmm_area();
+	kfree(kvm_vmm_info);
+	kvm_vmm_info = NULL;
+}
+
+static void kvm_ia64_sync_dirty_log(struct kvm *kvm,
+				    struct kvm_memory_slot *memslot)
+{
+	int i;
+	long base;
+	unsigned long n;
+	unsigned long *dirty_bitmap = (unsigned long *)(kvm->arch.vm_base +
+			offsetof(struct kvm_vm_data, kvm_mem_dirty_log));
+
+	n = kvm_dirty_bitmap_bytes(memslot);
+	base = memslot->base_gfn / BITS_PER_LONG;
+
+	spin_lock(&kvm->arch.dirty_log_lock);
+	for (i = 0; i < n/sizeof(long); ++i) {
+		memslot->dirty_bitmap[i] = dirty_bitmap[base + i];
+		dirty_bitmap[base + i] = 0;
+	}
+	spin_unlock(&kvm->arch.dirty_log_lock);
+}
+
+int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
+		struct kvm_dirty_log *log)
+{
+	int r;
+	unsigned long n;
+	struct kvm_memory_slot *memslot;
+	int is_dirty = 0;
+
+	mutex_lock(&kvm->slots_lock);
+
+	r = -EINVAL;
+	if (log->slot >= KVM_USER_MEM_SLOTS)
+		goto out;
+
+	memslot = id_to_memslot(kvm->memslots, log->slot);
+	r = -ENOENT;
+	if (!memslot->dirty_bitmap)
+		goto out;
+
+	kvm_ia64_sync_dirty_log(kvm, memslot);
+	r = kvm_get_dirty_log(kvm, log, &is_dirty);
+	if (r)
+		goto out;
+
+	/* If nothing is dirty, don't bother messing with page tables. */
+	if (is_dirty) {
+		kvm_flush_remote_tlbs(kvm);
+		n = kvm_dirty_bitmap_bytes(memslot);
+		memset(memslot->dirty_bitmap, 0, n);
+	}
+	r = 0;
+out:
+	mutex_unlock(&kvm->slots_lock);
+	return r;
+}
+
+int kvm_arch_hardware_setup(void)
+{
+	return 0;
+}
+
+void kvm_arch_hardware_unsetup(void)
+{
+}
+
+int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
+{
+	return __apic_accept_irq(vcpu, irq->vector);
+}
+
+int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest)
+{
+	return apic->vcpu->vcpu_id == dest;
+}
+
+int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
+{
+	return 0;
+}
+
+int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
+{
+	return vcpu1->arch.xtp - vcpu2->arch.xtp;
+}
+
+int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
+		int short_hand, int dest, int dest_mode)
+{
+	struct kvm_lapic *target = vcpu->arch.apic;
+	return (dest_mode == 0) ?
+		kvm_apic_match_physical_addr(target, dest) :
+		kvm_apic_match_logical_addr(target, dest);
+}
+
+static int find_highest_bits(int *dat)
+{
+	u32  bits, bitnum;
+	int i;
+
+	/* loop for all 256 bits */
+	for (i = 7; i >= 0 ; i--) {
+		bits = dat[i];
+		if (bits) {
+			bitnum = fls(bits);
+			return i * 32 + bitnum - 1;
+		}
+	}
+
+	return -1;
+}
+
+int kvm_highest_pending_irq(struct kvm_vcpu *vcpu)
+{
+    struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
+
+    if (vpd->irr[0] & (1UL << NMI_VECTOR))
+		return NMI_VECTOR;
+    if (vpd->irr[0] & (1UL << ExtINT_VECTOR))
+		return ExtINT_VECTOR;
+
+    return find_highest_bits((int *)&vpd->irr[0]);
+}
+
+int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.timer_fired;
+}
+
+int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
+{
+	return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) ||
+		(kvm_highest_pending_irq(vcpu) != -1);
+}
+
+int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
+{
+	return (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests));
+}
+
+int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
+				    struct kvm_mp_state *mp_state)
+{
+	mp_state->mp_state = vcpu->arch.mp_state;
+	return 0;
+}
+
+static int vcpu_reset(struct kvm_vcpu *vcpu)
+{
+	int r;
+	long psr;
+	local_irq_save(psr);
+	r = kvm_insert_vmm_mapping(vcpu);
+	local_irq_restore(psr);
+	if (r)
+		goto fail;
+
+	vcpu->arch.launched = 0;
+	kvm_arch_vcpu_uninit(vcpu);
+	r = kvm_arch_vcpu_init(vcpu);
+	if (r)
+		goto fail;
+
+	kvm_purge_vmm_mapping(vcpu);
+	r = 0;
+fail:
+	return r;
+}
+
+int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
+				    struct kvm_mp_state *mp_state)
+{
+	int r = 0;
+
+	vcpu->arch.mp_state = mp_state->mp_state;
+	if (vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)
+		r = vcpu_reset(vcpu);
+	return r;
+}
diff --git a/arch/ia64/kvm/kvm_fw.c b/arch/ia64/kvm/kvm_fw.c
new file mode 100644
index 00000000000..cb548ee9fca
--- /dev/null
+++ b/arch/ia64/kvm/kvm_fw.c
@@ -0,0 +1,674 @@
+/*
+ * PAL/SAL call delegation
+ *
+ * Copyright (c) 2004 Li Susie <susie.li@intel.com>
+ * Copyright (c) 2005 Yu Ke <ke.yu@intel.com>
+ * Copyright (c) 2007 Xiantao Zhang <xiantao.zhang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/smp.h>
+#include <asm/sn/addrs.h>
+#include <asm/sn/clksupport.h>
+#include <asm/sn/shub_mmr.h>
+
+#include "vti.h"
+#include "misc.h"
+
+#include <asm/pal.h>
+#include <asm/sal.h>
+#include <asm/tlb.h>
+
+/*
+ * Handy macros to make sure that the PAL return values start out
+ * as something meaningful.
+ */
+#define INIT_PAL_STATUS_UNIMPLEMENTED(x)		\
+	{						\
+		x.status = PAL_STATUS_UNIMPLEMENTED;	\
+		x.v0 = 0;				\
+		x.v1 = 0;				\
+		x.v2 = 0;				\
+	}
+
+#define INIT_PAL_STATUS_SUCCESS(x)			\
+	{						\
+		x.status = PAL_STATUS_SUCCESS;		\
+		x.v0 = 0;				\
+		x.v1 = 0;				\
+		x.v2 = 0;				\
+    }
+
+static void kvm_get_pal_call_data(struct kvm_vcpu *vcpu,
+		u64 *gr28, u64 *gr29, u64 *gr30, u64 *gr31) {
+	struct exit_ctl_data *p;
+
+	if (vcpu) {
+		p = &vcpu->arch.exit_data;
+		if (p->exit_reason == EXIT_REASON_PAL_CALL) {
+			*gr28 = p->u.pal_data.gr28;
+			*gr29 = p->u.pal_data.gr29;
+			*gr30 = p->u.pal_data.gr30;
+			*gr31 = p->u.pal_data.gr31;
+			return ;
+		}
+	}
+	printk(KERN_DEBUG"Failed to get vcpu pal data!!!\n");
+}
+
+static void set_pal_result(struct kvm_vcpu *vcpu,
+		struct ia64_pal_retval result) {
+
+	struct exit_ctl_data *p;
+
+	p = kvm_get_exit_data(vcpu);
+	if (p->exit_reason == EXIT_REASON_PAL_CALL) {
+		p->u.pal_data.ret = result;
+		return ;
+	}
+	INIT_PAL_STATUS_UNIMPLEMENTED(p->u.pal_data.ret);
+}
+
+static void set_sal_result(struct kvm_vcpu *vcpu,
+		struct sal_ret_values result) {
+	struct exit_ctl_data *p;
+
+	p = kvm_get_exit_data(vcpu);
+	if (p->exit_reason == EXIT_REASON_SAL_CALL) {
+		p->u.sal_data.ret = result;
+		return ;
+	}
+	printk(KERN_WARNING"Failed to set sal result!!\n");
+}
+
+struct cache_flush_args {
+	u64 cache_type;
+	u64 operation;
+	u64 progress;
+	long status;
+};
+
+cpumask_t cpu_cache_coherent_map;
+
+static void remote_pal_cache_flush(void *data)
+{
+	struct cache_flush_args *args = data;
+	long status;
+	u64 progress = args->progress;
+
+	status = ia64_pal_cache_flush(args->cache_type, args->operation,
+					&progress, NULL);
+	if (status != 0)
+	args->status = status;
+}
+
+static struct ia64_pal_retval pal_cache_flush(struct kvm_vcpu *vcpu)
+{
+	u64 gr28, gr29, gr30, gr31;
+	struct ia64_pal_retval result = {0, 0, 0, 0};
+	struct cache_flush_args args = {0, 0, 0, 0};
+	long psr;
+
+	gr28 = gr29 = gr30 = gr31 = 0;
+	kvm_get_pal_call_data(vcpu, &gr28, &gr29, &gr30, &gr31);
+
+	if (gr31 != 0)
+		printk(KERN_ERR"vcpu:%p called cache_flush error!\n", vcpu);
+
+	/* Always call Host Pal in int=1 */
+	gr30 &= ~PAL_CACHE_FLUSH_CHK_INTRS;
+	args.cache_type = gr29;
+	args.operation = gr30;
+	smp_call_function(remote_pal_cache_flush,
+				(void *)&args, 1);
+	if (args.status != 0)
+		printk(KERN_ERR"pal_cache_flush error!,"
+				"status:0x%lx\n", args.status);
+	/*
+	 * Call Host PAL cache flush
+	 * Clear psr.ic when call PAL_CACHE_FLUSH
+	 */
+	local_irq_save(psr);
+	result.status = ia64_pal_cache_flush(gr29, gr30, &result.v1,
+						&result.v0);
+	local_irq_restore(psr);
+	if (result.status != 0)
+		printk(KERN_ERR"vcpu:%p crashed due to cache_flush err:%ld"
+				"in1:%lx,in2:%lx\n",
+				vcpu, result.status, gr29, gr30);
+
+#if 0
+	if (gr29 == PAL_CACHE_TYPE_COHERENT) {
+		cpus_setall(vcpu->arch.cache_coherent_map);
+		cpu_clear(vcpu->cpu, vcpu->arch.cache_coherent_map);
+		cpus_setall(cpu_cache_coherent_map);
+		cpu_clear(vcpu->cpu, cpu_cache_coherent_map);
+	}
+#endif
+	return result;
+}
+
+struct ia64_pal_retval pal_cache_summary(struct kvm_vcpu *vcpu)
+{
+
+	struct ia64_pal_retval result;
+
+	PAL_CALL(result, PAL_CACHE_SUMMARY, 0, 0, 0);
+	return result;
+}
+
+static struct ia64_pal_retval pal_freq_base(struct kvm_vcpu *vcpu)
+{
+
+	struct ia64_pal_retval result;
+
+	PAL_CALL(result, PAL_FREQ_BASE, 0, 0, 0);
+
+	/*
+	 * PAL_FREQ_BASE may not be implemented in some platforms,
+	 * call SAL instead.
+	 */
+	if (result.v0 == 0) {
+		result.status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM,
+							&result.v0,
+							&result.v1);
+		result.v2 = 0;
+	}
+
+	return result;
+}
+
+/*
+ * On the SGI SN2, the ITC isn't stable. Emulation backed by the SN2
+ * RTC is used instead. This function patches the ratios from SAL
+ * to match the RTC before providing them to the guest.
+ */
+static void sn2_patch_itc_freq_ratios(struct ia64_pal_retval *result)
+{
+	struct pal_freq_ratio *ratio;
+	unsigned long sal_freq, sal_drift, factor;
+
+	result->status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM,
+					    &sal_freq, &sal_drift);
+	ratio = (struct pal_freq_ratio *)&result->v2;
+	factor = ((sal_freq * 3) + (sn_rtc_cycles_per_second / 2)) /
+		sn_rtc_cycles_per_second;
+
+	ratio->num = 3;
+	ratio->den = factor;
+}
+
+static struct ia64_pal_retval pal_freq_ratios(struct kvm_vcpu *vcpu)
+{
+	struct ia64_pal_retval result;
+
+	PAL_CALL(result, PAL_FREQ_RATIOS, 0, 0, 0);
+
+	if (vcpu->kvm->arch.is_sn2)
+		sn2_patch_itc_freq_ratios(&result);
+
+	return result;
+}
+
+static struct ia64_pal_retval pal_logical_to_physica(struct kvm_vcpu *vcpu)
+{
+	struct ia64_pal_retval result;
+
+	INIT_PAL_STATUS_UNIMPLEMENTED(result);
+	return result;
+}
+
+static struct ia64_pal_retval pal_platform_addr(struct kvm_vcpu *vcpu)
+{
+
+	struct ia64_pal_retval result;
+
+	INIT_PAL_STATUS_SUCCESS(result);
+	return result;
+}
+
+static struct ia64_pal_retval pal_proc_get_features(struct kvm_vcpu *vcpu)
+{
+
+	struct ia64_pal_retval result = {0, 0, 0, 0};
+	long in0, in1, in2, in3;
+
+	kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
+	result.status = ia64_pal_proc_get_features(&result.v0, &result.v1,
+			&result.v2, in2);
+
+	return result;
+}
+
+static struct ia64_pal_retval pal_register_info(struct kvm_vcpu *vcpu)
+{
+
+	struct ia64_pal_retval result = {0, 0, 0, 0};
+	long in0, in1, in2, in3;
+
+	kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
+	result.status = ia64_pal_register_info(in1, &result.v1, &result.v2);
+
+	return result;
+}
+
+static struct ia64_pal_retval pal_cache_info(struct kvm_vcpu *vcpu)
+{
+
+	pal_cache_config_info_t ci;
+	long status;
+	unsigned long in0, in1, in2, in3, r9, r10;
+
+	kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
+	status = ia64_pal_cache_config_info(in1, in2, &ci);
+	r9 = ci.pcci_info_1.pcci1_data;
+	r10 = ci.pcci_info_2.pcci2_data;
+	return ((struct ia64_pal_retval){status, r9, r10, 0});
+}
+
+#define GUEST_IMPL_VA_MSB	59
+#define GUEST_RID_BITS		18
+
+static struct ia64_pal_retval pal_vm_summary(struct kvm_vcpu *vcpu)
+{
+
+	pal_vm_info_1_u_t vminfo1;
+	pal_vm_info_2_u_t vminfo2;
+	struct ia64_pal_retval result;
+
+	PAL_CALL(result, PAL_VM_SUMMARY, 0, 0, 0);
+	if (!result.status) {
+		vminfo1.pvi1_val = result.v0;
+		vminfo1.pal_vm_info_1_s.max_itr_entry = 8;
+		vminfo1.pal_vm_info_1_s.max_dtr_entry = 8;
+		result.v0 = vminfo1.pvi1_val;
+		vminfo2.pal_vm_info_2_s.impl_va_msb = GUEST_IMPL_VA_MSB;
+		vminfo2.pal_vm_info_2_s.rid_size = GUEST_RID_BITS;
+		result.v1 = vminfo2.pvi2_val;
+	}
+
+	return result;
+}
+
+static struct ia64_pal_retval pal_vm_info(struct kvm_vcpu *vcpu)
+{
+	struct ia64_pal_retval result;
+	unsigned long in0, in1, in2, in3;
+
+	kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
+
+	result.status = ia64_pal_vm_info(in1, in2,
+			(pal_tc_info_u_t *)&result.v1, &result.v2);
+
+	return result;
+}
+
+static  u64 kvm_get_pal_call_index(struct kvm_vcpu *vcpu)
+{
+	u64 index = 0;
+	struct exit_ctl_data *p;
+
+	p = kvm_get_exit_data(vcpu);
+	if (p->exit_reason == EXIT_REASON_PAL_CALL)
+		index = p->u.pal_data.gr28;
+
+	return index;
+}
+
+static void prepare_for_halt(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.timer_pending = 1;
+	vcpu->arch.timer_fired = 0;
+}
+
+static struct ia64_pal_retval pal_perf_mon_info(struct kvm_vcpu *vcpu)
+{
+	long status;
+	unsigned long in0, in1, in2, in3, r9;
+	unsigned long pm_buffer[16];
+
+	kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
+	status = ia64_pal_perf_mon_info(pm_buffer,
+				(pal_perf_mon_info_u_t *) &r9);
+	if (status != 0) {
+		printk(KERN_DEBUG"PAL_PERF_MON_INFO fails ret=%ld\n", status);
+	} else {
+		if (in1)
+			memcpy((void *)in1, pm_buffer, sizeof(pm_buffer));
+		else {
+			status = PAL_STATUS_EINVAL;
+			printk(KERN_WARNING"Invalid parameters "
+						"for PAL call:0x%lx!\n", in0);
+		}
+	}
+	return (struct ia64_pal_retval){status, r9, 0, 0};
+}
+
+static struct ia64_pal_retval pal_halt_info(struct kvm_vcpu *vcpu)
+{
+	unsigned long in0, in1, in2, in3;
+	long status;
+	unsigned long res = 1000UL | (1000UL << 16) | (10UL << 32)
+					| (1UL << 61) | (1UL << 60);
+
+	kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
+	if (in1) {
+		memcpy((void *)in1, &res, sizeof(res));
+		status = 0;
+	} else{
+		status = PAL_STATUS_EINVAL;
+		printk(KERN_WARNING"Invalid parameters "
+					"for PAL call:0x%lx!\n", in0);
+	}
+
+	return (struct ia64_pal_retval){status, 0, 0, 0};
+}
+
+static struct ia64_pal_retval pal_mem_attrib(struct kvm_vcpu *vcpu)
+{
+	unsigned long r9;
+	long status;
+
+	status = ia64_pal_mem_attrib(&r9);
+
+	return (struct ia64_pal_retval){status, r9, 0, 0};
+}
+
+static void remote_pal_prefetch_visibility(void *v)
+{
+	s64 trans_type = (s64)v;
+	ia64_pal_prefetch_visibility(trans_type);
+}
+
+static struct ia64_pal_retval pal_prefetch_visibility(struct kvm_vcpu *vcpu)
+{
+	struct ia64_pal_retval result = {0, 0, 0, 0};
+	unsigned long in0, in1, in2, in3;
+	kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
+	result.status = ia64_pal_prefetch_visibility(in1);
+	if (result.status == 0) {
+		/* Must be performed on all remote processors
+		in the coherence domain. */
+		smp_call_function(remote_pal_prefetch_visibility,
+					(void *)in1, 1);
+		/* Unnecessary on remote processor for other vcpus!*/
+		result.status = 1;
+	}
+	return result;
+}
+
+static void remote_pal_mc_drain(void *v)
+{
+	ia64_pal_mc_drain();
+}
+
+static struct ia64_pal_retval pal_get_brand_info(struct kvm_vcpu *vcpu)
+{
+	struct ia64_pal_retval result = {0, 0, 0, 0};
+	unsigned long in0, in1, in2, in3;
+
+	kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
+
+	if (in1 == 0 && in2) {
+		char brand_info[128];
+		result.status = ia64_pal_get_brand_info(brand_info);
+		if (result.status == PAL_STATUS_SUCCESS)
+			memcpy((void *)in2, brand_info, 128);
+	} else {
+		result.status = PAL_STATUS_REQUIRES_MEMORY;
+		printk(KERN_WARNING"Invalid parameters for "
+					"PAL call:0x%lx!\n", in0);
+	}
+
+	return result;
+}
+
+int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+
+	u64 gr28;
+	struct ia64_pal_retval result;
+	int ret = 1;
+
+	gr28 = kvm_get_pal_call_index(vcpu);
+	switch (gr28) {
+	case PAL_CACHE_FLUSH:
+		result = pal_cache_flush(vcpu);
+		break;
+	case PAL_MEM_ATTRIB:
+		result = pal_mem_attrib(vcpu);
+		break;
+	case PAL_CACHE_SUMMARY:
+		result = pal_cache_summary(vcpu);
+		break;
+	case PAL_PERF_MON_INFO:
+		result = pal_perf_mon_info(vcpu);
+		break;
+	case PAL_HALT_INFO:
+		result = pal_halt_info(vcpu);
+		break;
+	case PAL_HALT_LIGHT:
+	{
+		INIT_PAL_STATUS_SUCCESS(result);
+		prepare_for_halt(vcpu);
+		if (kvm_highest_pending_irq(vcpu) == -1)
+			ret = kvm_emulate_halt(vcpu);
+	}
+		break;
+
+	case PAL_PREFETCH_VISIBILITY:
+		result = pal_prefetch_visibility(vcpu);
+		break;
+	case PAL_MC_DRAIN:
+		result.status = ia64_pal_mc_drain();
+		/* FIXME: All vcpus likely call PAL_MC_DRAIN.
+		   That causes the congestion. */
+		smp_call_function(remote_pal_mc_drain, NULL, 1);
+		break;
+
+	case PAL_FREQ_RATIOS:
+		result = pal_freq_ratios(vcpu);
+		break;
+
+	case PAL_FREQ_BASE:
+		result = pal_freq_base(vcpu);
+		break;
+
+	case PAL_LOGICAL_TO_PHYSICAL :
+		result = pal_logical_to_physica(vcpu);
+		break;
+
+	case PAL_VM_SUMMARY :
+		result = pal_vm_summary(vcpu);
+		break;
+
+	case PAL_VM_INFO :
+		result = pal_vm_info(vcpu);
+		break;
+	case PAL_PLATFORM_ADDR :
+		result = pal_platform_addr(vcpu);
+		break;
+	case PAL_CACHE_INFO:
+		result = pal_cache_info(vcpu);
+		break;
+	case PAL_PTCE_INFO:
+		INIT_PAL_STATUS_SUCCESS(result);
+		result.v1 = (1L << 32) | 1L;
+		break;
+	case PAL_REGISTER_INFO:
+		result = pal_register_info(vcpu);
+		break;
+	case PAL_VM_PAGE_SIZE:
+		result.status = ia64_pal_vm_page_size(&result.v0,
+							&result.v1);
+		break;
+	case PAL_RSE_INFO:
+		result.status = ia64_pal_rse_info(&result.v0,
+					(pal_hints_u_t *)&result.v1);
+		break;
+	case PAL_PROC_GET_FEATURES:
+		result = pal_proc_get_features(vcpu);
+		break;
+	case PAL_DEBUG_INFO:
+		result.status = ia64_pal_debug_info(&result.v0,
+							&result.v1);
+		break;
+	case PAL_VERSION:
+		result.status = ia64_pal_version(
+				(pal_version_u_t *)&result.v0,
+				(pal_version_u_t *)&result.v1);
+		break;
+	case PAL_FIXED_ADDR:
+		result.status = PAL_STATUS_SUCCESS;
+		result.v0 = vcpu->vcpu_id;
+		break;
+	case PAL_BRAND_INFO:
+		result = pal_get_brand_info(vcpu);
+		break;
+	case PAL_GET_PSTATE:
+	case PAL_CACHE_SHARED_INFO:
+		INIT_PAL_STATUS_UNIMPLEMENTED(result);
+		break;
+	default:
+		INIT_PAL_STATUS_UNIMPLEMENTED(result);
+		printk(KERN_WARNING"kvm: Unsupported pal call,"
+					" index:0x%lx\n", gr28);
+	}
+	set_pal_result(vcpu, result);
+	return ret;
+}
+
+static struct sal_ret_values sal_emulator(struct kvm *kvm,
+				long index, unsigned long in1,
+				unsigned long in2, unsigned long in3,
+				unsigned long in4, unsigned long in5,
+				unsigned long in6, unsigned long in7)
+{
+	unsigned long r9  = 0;
+	unsigned long r10 = 0;
+	long r11 = 0;
+	long status;
+
+	status = 0;
+	switch (index) {
+	case SAL_FREQ_BASE:
+		status = ia64_sal_freq_base(in1, &r9, &r10);
+		break;
+	case SAL_PCI_CONFIG_READ:
+		printk(KERN_WARNING"kvm: Not allowed to call here!"
+			" SAL_PCI_CONFIG_READ\n");
+		break;
+	case SAL_PCI_CONFIG_WRITE:
+		printk(KERN_WARNING"kvm: Not allowed to call here!"
+			" SAL_PCI_CONFIG_WRITE\n");
+		break;
+	case SAL_SET_VECTORS:
+		if (in1 == SAL_VECTOR_OS_BOOT_RENDEZ) {
+			if (in4 != 0 || in5 != 0 || in6 != 0 || in7 != 0) {
+				status = -2;
+			} else {
+				kvm->arch.rdv_sal_data.boot_ip = in2;
+				kvm->arch.rdv_sal_data.boot_gp = in3;
+			}
+			printk("Rendvous called! iip:%lx\n\n", in2);
+		} else
+			printk(KERN_WARNING"kvm: CALLED SAL_SET_VECTORS %lu."
+							"ignored...\n", in1);
+		break;
+	case SAL_GET_STATE_INFO:
+		/* No more info.  */
+		status = -5;
+		r9 = 0;
+		break;
+	case SAL_GET_STATE_INFO_SIZE:
+		/* Return a dummy size.  */
+		status = 0;
+		r9 = 128;
+		break;
+	case SAL_CLEAR_STATE_INFO:
+		/* Noop.  */
+		break;
+	case SAL_MC_RENDEZ:
+		printk(KERN_WARNING
+			"kvm: called SAL_MC_RENDEZ. ignored...\n");
+		break;
+	case SAL_MC_SET_PARAMS:
+		printk(KERN_WARNING
+			"kvm: called  SAL_MC_SET_PARAMS.ignored!\n");
+		break;
+	case SAL_CACHE_FLUSH:
+		if (1) {
+			/*Flush using SAL.
+			This method is faster but has a side
+			effect on other vcpu running on
+			this cpu.  */
+			status = ia64_sal_cache_flush(in1);
+		} else {
+			/*Maybe need to implement the method
+			without side effect!*/
+			status = 0;
+		}
+		break;
+	case SAL_CACHE_INIT:
+		printk(KERN_WARNING
+			"kvm: called SAL_CACHE_INIT.  ignored...\n");
+		break;
+	case SAL_UPDATE_PAL:
+		printk(KERN_WARNING
+			"kvm: CALLED SAL_UPDATE_PAL.  ignored...\n");
+		break;
+	default:
+		printk(KERN_WARNING"kvm: called SAL_CALL with unknown index."
+						" index:%ld\n", index);
+		status = -1;
+		break;
+	}
+	return ((struct sal_ret_values) {status, r9, r10, r11});
+}
+
+static void kvm_get_sal_call_data(struct kvm_vcpu *vcpu, u64 *in0, u64 *in1,
+		u64 *in2, u64 *in3, u64 *in4, u64 *in5, u64 *in6, u64 *in7){
+
+	struct exit_ctl_data *p;
+
+	p = kvm_get_exit_data(vcpu);
+
+	if (p->exit_reason == EXIT_REASON_SAL_CALL) {
+		*in0 = p->u.sal_data.in0;
+		*in1 = p->u.sal_data.in1;
+		*in2 = p->u.sal_data.in2;
+		*in3 = p->u.sal_data.in3;
+		*in4 = p->u.sal_data.in4;
+		*in5 = p->u.sal_data.in5;
+		*in6 = p->u.sal_data.in6;
+		*in7 = p->u.sal_data.in7;
+		return ;
+	}
+	*in0 = 0;
+}
+
+void kvm_sal_emul(struct kvm_vcpu *vcpu)
+{
+
+	struct sal_ret_values result;
+	u64 index, in1, in2, in3, in4, in5, in6, in7;
+
+	kvm_get_sal_call_data(vcpu, &index, &in1, &in2,
+			&in3, &in4, &in5, &in6, &in7);
+	result = sal_emulator(vcpu->kvm, index, in1, in2, in3,
+					in4, in5, in6, in7);
+	set_sal_result(vcpu, result);
+}
diff --git a/arch/ia64/kvm/kvm_lib.c b/arch/ia64/kvm/kvm_lib.c
new file mode 100644
index 00000000000..f1268b8e6f9
--- /dev/null
+++ b/arch/ia64/kvm/kvm_lib.c
@@ -0,0 +1,21 @@
+/*
+ * kvm_lib.c: Compile some libraries for kvm-intel module.
+ *
+ *	Just include kernel's library, and disable symbols export.
+ * 	Copyright (C) 2008, Intel Corporation.
+ *  	Xiantao Zhang  (xiantao.zhang@intel.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#undef CONFIG_MODULES
+#include <linux/module.h>
+#undef CONFIG_KALLSYMS
+#undef EXPORT_SYMBOL
+#undef EXPORT_SYMBOL_GPL
+#define EXPORT_SYMBOL(sym)
+#define EXPORT_SYMBOL_GPL(sym)
+#include "../../../lib/vsprintf.c"
+#include "../../../lib/ctype.c"
diff --git a/arch/ia64/kvm/kvm_minstate.h b/arch/ia64/kvm/kvm_minstate.h
new file mode 100644
index 00000000000..b2bcaa2787a
--- /dev/null
+++ b/arch/ia64/kvm/kvm_minstate.h
@@ -0,0 +1,266 @@
+/*
+ *  kvm_minstate.h: min save macros
+ *  Copyright (c) 2007, Intel Corporation.
+ *
+ *  Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
+ *  Xiantao Zhang (xiantao.zhang@intel.com)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+
+#include <asm/asmmacro.h>
+#include <asm/types.h>
+#include <asm/kregs.h>
+#include <asm/kvm_host.h>
+
+#include "asm-offsets.h"
+
+#define KVM_MINSTATE_START_SAVE_MIN	     					\
+	mov ar.rsc = 0;/* set enforced lazy mode, pl 0, little-endian, loadrs=0 */\
+	;;									\
+	mov.m r28 = ar.rnat;                                  			\
+	addl r22 = VMM_RBS_OFFSET,r1;            /* compute base of RBS */	\
+	;;									\
+	lfetch.fault.excl.nt1 [r22];						\
+	addl r1 = KVM_STK_OFFSET-VMM_PT_REGS_SIZE, r1;  \
+	mov r23 = ar.bspstore;			/* save ar.bspstore */          \
+	;;									\
+	mov ar.bspstore = r22;				/* switch to kernel RBS */\
+	;;									\
+	mov r18 = ar.bsp;							\
+	mov ar.rsc = 0x3;     /* set eager mode, pl 0, little-endian, loadrs=0 */
+
+
+
+#define KVM_MINSTATE_END_SAVE_MIN						\
+	bsw.1;          /* switch back to bank 1 (must be last in insn group) */\
+	;;
+
+
+#define PAL_VSA_SYNC_READ						\
+	/* begin to call pal vps sync_read */				\
+{.mii;									\
+	add r25 = VMM_VPD_BASE_OFFSET, r21;				\
+	nop 0x0;							\
+	mov r24=ip;							\
+	;;								\
+}									\
+{.mmb									\
+	add r24=0x20, r24;						\
+	ld8 r25 = [r25];      /* read vpd base */			\
+	br.cond.sptk kvm_vps_sync_read;		/*call the service*/	\
+	;;								\
+};									\
+
+
+#define KVM_MINSTATE_GET_CURRENT(reg)   mov reg=r21
+
+/*
+ * KVM_DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves
+ * the minimum state necessary that allows us to turn psr.ic back
+ * on.
+ *
+ * Assumed state upon entry:
+ *  psr.ic: off
+ *  r31:	contains saved predicates (pr)
+ *
+ * Upon exit, the state is as follows:
+ *  psr.ic: off
+ *   r2 = points to &pt_regs.r16
+ *   r8 = contents of ar.ccv
+ *   r9 = contents of ar.csd
+ *  r10 = contents of ar.ssd
+ *  r11 = FPSR_DEFAULT
+ *  r12 = kernel sp (kernel virtual address)
+ *  r13 = points to current task_struct (kernel virtual address)
+ *  p15 = TRUE if psr.i is set in cr.ipsr
+ *  predicate registers (other than p2, p3, and p15), b6, r3, r14, r15:
+ *	  preserved
+ *
+ * Note that psr.ic is NOT turned on by this macro.  This is so that
+ * we can pass interruption state as arguments to a handler.
+ */
+
+
+#define PT(f) (VMM_PT_REGS_##f##_OFFSET)
+
+#define KVM_DO_SAVE_MIN(COVER,SAVE_IFS,EXTRA)			\
+	KVM_MINSTATE_GET_CURRENT(r16);  /* M (or M;;I) */	\
+	mov r27 = ar.rsc;         /* M */			\
+	mov r20 = r1;         /* A */				\
+	mov r25 = ar.unat;        /* M */			\
+	mov r29 = cr.ipsr;        /* M */			\
+	mov r26 = ar.pfs;         /* I */			\
+	mov r18 = cr.isr;         				\
+	COVER;              /* B;; (or nothing) */		\
+	;;							\
+	tbit.z p0,p15 = r29,IA64_PSR_I_BIT;			\
+	mov r1 = r16;						\
+/*	mov r21=r16;	*/					\
+	/* switch from user to kernel RBS: */			\
+	;;							\
+	invala;             /* M */				\
+	SAVE_IFS;						\
+	;;							\
+	KVM_MINSTATE_START_SAVE_MIN				\
+	adds r17 = 2*L1_CACHE_BYTES,r1;/* cache-line size */	\
+	adds r16 = PT(CR_IPSR),r1;				\
+	;;							\
+	lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES;		\
+	st8 [r16] = r29;      /* save cr.ipsr */		\
+	;;							\
+	lfetch.fault.excl.nt1 [r17];				\
+	tbit.nz p15,p0 = r29,IA64_PSR_I_BIT;			\
+	mov r29 = b0						\
+	;;							\
+	adds r16 = PT(R8),r1; /* initialize first base pointer */\
+	adds r17 = PT(R9),r1; /* initialize second base pointer */\
+	;;							\
+.mem.offset 0,0; st8.spill [r16] = r8,16;			\
+.mem.offset 8,0; st8.spill [r17] = r9,16;			\
+	;;							\
+.mem.offset 0,0; st8.spill [r16] = r10,24;			\
+.mem.offset 8,0; st8.spill [r17] = r11,24;			\
+	;;							\
+	mov r9 = cr.iip;         /* M */			\
+	mov r10 = ar.fpsr;        /* M */			\
+	;;							\
+	st8 [r16] = r9,16;    /* save cr.iip */			\
+	st8 [r17] = r30,16;   /* save cr.ifs */			\
+	sub r18 = r18,r22;    /* r18=RSE.ndirty*8 */		\
+	;;							\
+	st8 [r16] = r25,16;   /* save ar.unat */		\
+	st8 [r17] = r26,16;    /* save ar.pfs */		\
+	shl r18 = r18,16;     /* calu ar.rsc used for "loadrs" */\
+	;;							\
+	st8 [r16] = r27,16;   /* save ar.rsc */			\
+	st8 [r17] = r28,16;   /* save ar.rnat */		\
+	;;          /* avoid RAW on r16 & r17 */		\
+	st8 [r16] = r23,16;   /* save ar.bspstore */		\
+	st8 [r17] = r31,16;   /* save predicates */		\
+	;;							\
+	st8 [r16] = r29,16;   /* save b0 */			\
+	st8 [r17] = r18,16;   /* save ar.rsc value for "loadrs" */\
+	;;							\
+.mem.offset 0,0; st8.spill [r16] = r20,16;/* save original r1 */  \
+.mem.offset 8,0; st8.spill [r17] = r12,16;			\
+	adds r12 = -16,r1;    /* switch to kernel memory stack */  \
+	;;							\
+.mem.offset 0,0; st8.spill [r16] = r13,16;			\
+.mem.offset 8,0; st8.spill [r17] = r10,16;	/* save ar.fpsr */\
+	mov r13 = r21;   /* establish `current' */		\
+	;;							\
+.mem.offset 0,0; st8.spill [r16] = r15,16;			\
+.mem.offset 8,0; st8.spill [r17] = r14,16;			\
+	;;							\
+.mem.offset 0,0; st8.spill [r16] = r2,16;			\
+.mem.offset 8,0; st8.spill [r17] = r3,16;			\
+	adds r2 = VMM_PT_REGS_R16_OFFSET,r1;			\
+	 ;;							\
+	adds r16 = VMM_VCPU_IIPA_OFFSET,r13;			\
+	adds r17 = VMM_VCPU_ISR_OFFSET,r13;			\
+	mov r26 = cr.iipa;					\
+	mov r27 = cr.isr;					\
+	;;							\
+	st8 [r16] = r26;					\
+	st8 [r17] = r27;					\
+	;;							\
+	EXTRA;							\
+	mov r8 = ar.ccv;					\
+	mov r9 = ar.csd;					\
+	mov r10 = ar.ssd;					\
+	movl r11 = FPSR_DEFAULT;   /* L-unit */			\
+	adds r17 = VMM_VCPU_GP_OFFSET,r13;			\
+	;;							\
+	ld8 r1 = [r17];/* establish kernel global pointer */	\
+	;;							\
+	PAL_VSA_SYNC_READ					\
+	KVM_MINSTATE_END_SAVE_MIN
+
+/*
+ * SAVE_REST saves the remainder of pt_regs (with psr.ic on).
+ *
+ * Assumed state upon entry:
+ *  psr.ic: on
+ *  r2: points to &pt_regs.f6
+ *  r3: points to &pt_regs.f7
+ *  r8: contents of ar.ccv
+ *  r9: contents of ar.csd
+ *  r10:	contents of ar.ssd
+ *  r11:	FPSR_DEFAULT
+ *
+ * Registers r14 and r15 are guaranteed not to be touched by SAVE_REST.
+ */
+#define KVM_SAVE_REST				\
+.mem.offset 0,0; st8.spill [r2] = r16,16;	\
+.mem.offset 8,0; st8.spill [r3] = r17,16;	\
+	;;				\
+.mem.offset 0,0; st8.spill [r2] = r18,16;	\
+.mem.offset 8,0; st8.spill [r3] = r19,16;	\
+	;;				\
+.mem.offset 0,0; st8.spill [r2] = r20,16;	\
+.mem.offset 8,0; st8.spill [r3] = r21,16;	\
+	mov r18=b6;			\
+	;;				\
+.mem.offset 0,0; st8.spill [r2] = r22,16;	\
+.mem.offset 8,0; st8.spill [r3] = r23,16;	\
+	mov r19 = b7;				\
+	;;					\
+.mem.offset 0,0; st8.spill [r2] = r24,16;	\
+.mem.offset 8,0; st8.spill [r3] = r25,16;	\
+	;;					\
+.mem.offset 0,0; st8.spill [r2] = r26,16;	\
+.mem.offset 8,0; st8.spill [r3] = r27,16;	\
+	;;					\
+.mem.offset 0,0; st8.spill [r2] = r28,16;	\
+.mem.offset 8,0; st8.spill [r3] = r29,16;	\
+	;;					\
+.mem.offset 0,0; st8.spill [r2] = r30,16;	\
+.mem.offset 8,0; st8.spill [r3] = r31,32;	\
+	;;					\
+	mov ar.fpsr = r11;			\
+	st8 [r2] = r8,8;			\
+	adds r24 = PT(B6)-PT(F7),r3;		\
+	adds r25 = PT(B7)-PT(F7),r3;		\
+	;;					\
+	st8 [r24] = r18,16;       /* b6 */	\
+	st8 [r25] = r19,16;       /* b7 */	\
+	adds r2 = PT(R4)-PT(F6),r2;		\
+	adds r3 = PT(R5)-PT(F7),r3;		\
+	;;					\
+	st8 [r24] = r9;	/* ar.csd */		\
+	st8 [r25] = r10;	/* ar.ssd */	\
+	;;					\
+	mov r18 = ar.unat;			\
+	adds r19 = PT(EML_UNAT)-PT(R4),r2;	\
+	;;					\
+	st8 [r19] = r18; /* eml_unat */ 	\
+
+
+#define KVM_SAVE_EXTRA				\
+.mem.offset 0,0; st8.spill [r2] = r4,16;	\
+.mem.offset 8,0; st8.spill [r3] = r5,16;	\
+	;;					\
+.mem.offset 0,0; st8.spill [r2] = r6,16;	\
+.mem.offset 8,0; st8.spill [r3] = r7;		\
+	;;					\
+	mov r26 = ar.unat;			\
+	;;					\
+	st8 [r2] = r26;/* eml_unat */ 		\
+
+#define KVM_SAVE_MIN_WITH_COVER		KVM_DO_SAVE_MIN(cover, mov r30 = cr.ifs,)
+#define KVM_SAVE_MIN_WITH_COVER_R19	KVM_DO_SAVE_MIN(cover, mov r30 = cr.ifs, mov r15 = r19)
+#define KVM_SAVE_MIN			KVM_DO_SAVE_MIN(     , mov r30 = r0, )
diff --git a/arch/ia64/kvm/lapic.h b/arch/ia64/kvm/lapic.h
new file mode 100644
index 00000000000..c5f92a926a9
--- /dev/null
+++ b/arch/ia64/kvm/lapic.h
@@ -0,0 +1,30 @@
+#ifndef __KVM_IA64_LAPIC_H
+#define __KVM_IA64_LAPIC_H
+
+#include <linux/kvm_host.h>
+
+/*
+ * vlsapic
+ */
+struct kvm_lapic{
+	struct kvm_vcpu *vcpu;
+	uint64_t insvc[4];
+	uint64_t vhpi;
+	uint8_t xtp;
+	uint8_t pal_init_pending;
+	uint8_t pad[2];
+};
+
+int kvm_create_lapic(struct kvm_vcpu *vcpu);
+void kvm_free_lapic(struct kvm_vcpu *vcpu);
+
+int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
+int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
+int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
+		int short_hand, int dest, int dest_mode);
+int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
+int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
+#define kvm_apic_present(x) (true)
+#define kvm_lapic_enabled(x) (true)
+
+#endif
diff --git a/arch/ia64/kvm/memcpy.S b/arch/ia64/kvm/memcpy.S
new file mode 100644
index 00000000000..c04cdbe9f80
--- /dev/null
+++ b/arch/ia64/kvm/memcpy.S
@@ -0,0 +1 @@
+#include "../lib/memcpy.S"
diff --git a/arch/ia64/kvm/memset.S b/arch/ia64/kvm/memset.S
new file mode 100644
index 00000000000..83c3066d844
--- /dev/null
+++ b/arch/ia64/kvm/memset.S
@@ -0,0 +1 @@
+#include "../lib/memset.S"
diff --git a/arch/ia64/kvm/misc.h b/arch/ia64/kvm/misc.h
new file mode 100644
index 00000000000..dd979e00b57
--- /dev/null
+++ b/arch/ia64/kvm/misc.h
@@ -0,0 +1,94 @@
+#ifndef __KVM_IA64_MISC_H
+#define __KVM_IA64_MISC_H
+
+#include <linux/kvm_host.h>
+/*
+ * misc.h
+ * 	Copyright (C) 2007, Intel Corporation.
+ *  	Xiantao Zhang  (xiantao.zhang@intel.com)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+/*
+ *Return p2m base address at host side!
+ */
+static inline uint64_t *kvm_host_get_pmt(struct kvm *kvm)
+{
+	return (uint64_t *)(kvm->arch.vm_base +
+				offsetof(struct kvm_vm_data, kvm_p2m));
+}
+
+static inline void kvm_set_pmt_entry(struct kvm *kvm, gfn_t gfn,
+		u64 paddr, u64 mem_flags)
+{
+	uint64_t *pmt_base = kvm_host_get_pmt(kvm);
+	unsigned long pte;
+
+	pte = PAGE_ALIGN(paddr) | mem_flags;
+	pmt_base[gfn] = pte;
+}
+
+/*Function for translating host address to guest address*/
+
+static inline void *to_guest(struct kvm *kvm, void *addr)
+{
+	return (void *)((unsigned long)(addr) - kvm->arch.vm_base +
+			KVM_VM_DATA_BASE);
+}
+
+/*Function for translating guest address to host address*/
+
+static inline void *to_host(struct kvm *kvm, void *addr)
+{
+	return (void *)((unsigned long)addr - KVM_VM_DATA_BASE
+			+ kvm->arch.vm_base);
+}
+
+/* Get host context of the vcpu */
+static inline union context *kvm_get_host_context(struct kvm_vcpu *vcpu)
+{
+	union context *ctx = &vcpu->arch.host;
+	return to_guest(vcpu->kvm, ctx);
+}
+
+/* Get guest context of the vcpu */
+static inline union context *kvm_get_guest_context(struct kvm_vcpu *vcpu)
+{
+	union context *ctx = &vcpu->arch.guest;
+	return  to_guest(vcpu->kvm, ctx);
+}
+
+/* kvm get exit data from gvmm! */
+static inline struct exit_ctl_data *kvm_get_exit_data(struct kvm_vcpu *vcpu)
+{
+	return &vcpu->arch.exit_data;
+}
+
+/*kvm get vcpu ioreq for kvm module!*/
+static inline struct kvm_mmio_req *kvm_get_vcpu_ioreq(struct kvm_vcpu *vcpu)
+{
+	struct exit_ctl_data *p_ctl_data;
+
+	if (vcpu) {
+		p_ctl_data = kvm_get_exit_data(vcpu);
+		if (p_ctl_data->exit_reason == EXIT_REASON_MMIO_INSTRUCTION)
+			return &p_ctl_data->u.ioreq;
+	}
+
+	return NULL;
+}
+
+#endif
diff --git a/arch/ia64/kvm/mmio.c b/arch/ia64/kvm/mmio.c
new file mode 100644
index 00000000000..f1e17d3d6cd
--- /dev/null
+++ b/arch/ia64/kvm/mmio.c
@@ -0,0 +1,336 @@
+/*
+ * mmio.c: MMIO emulation components.
+ * Copyright (c) 2004, Intel Corporation.
+ *  Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com)
+ *  Kun Tian (Kevin Tian) (Kevin.tian@intel.com)
+ *
+ * Copyright (c) 2007 Intel Corporation  KVM support.
+ * Xuefei Xu (Anthony Xu) (anthony.xu@intel.com)
+ * Xiantao Zhang  (xiantao.zhang@intel.com)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include <linux/kvm_host.h>
+
+#include "vcpu.h"
+
+static void vlsapic_write_xtp(struct kvm_vcpu *v, uint8_t val)
+{
+	VLSAPIC_XTP(v) = val;
+}
+
+/*
+ * LSAPIC OFFSET
+ */
+#define PIB_LOW_HALF(ofst)     !(ofst & (1 << 20))
+#define PIB_OFST_INTA          0x1E0000
+#define PIB_OFST_XTP           0x1E0008
+
+/*
+ * execute write IPI op.
+ */
+static void vlsapic_write_ipi(struct kvm_vcpu *vcpu,
+					uint64_t addr, uint64_t data)
+{
+	struct exit_ctl_data *p = &current_vcpu->arch.exit_data;
+	unsigned long psr;
+
+	local_irq_save(psr);
+
+	p->exit_reason = EXIT_REASON_IPI;
+	p->u.ipi_data.addr.val = addr;
+	p->u.ipi_data.data.val = data;
+	vmm_transition(current_vcpu);
+
+	local_irq_restore(psr);
+
+}
+
+void lsapic_write(struct kvm_vcpu *v, unsigned long addr,
+			unsigned long length, unsigned long val)
+{
+	addr &= (PIB_SIZE - 1);
+
+	switch (addr) {
+	case PIB_OFST_INTA:
+		panic_vm(v, "Undefined write on PIB INTA\n");
+		break;
+	case PIB_OFST_XTP:
+		if (length == 1) {
+			vlsapic_write_xtp(v, val);
+		} else {
+			panic_vm(v, "Undefined write on PIB XTP\n");
+		}
+		break;
+	default:
+		if (PIB_LOW_HALF(addr)) {
+			/*Lower half */
+			if (length != 8)
+				panic_vm(v, "Can't LHF write with size %ld!\n",
+						length);
+			else
+				vlsapic_write_ipi(v, addr, val);
+		} else {   /*Upper half */
+			panic_vm(v, "IPI-UHF write %lx\n", addr);
+		}
+		break;
+	}
+}
+
+unsigned long lsapic_read(struct kvm_vcpu *v, unsigned long addr,
+		unsigned long length)
+{
+	uint64_t result = 0;
+
+	addr &= (PIB_SIZE - 1);
+
+	switch (addr) {
+	case PIB_OFST_INTA:
+		if (length == 1) /* 1 byte load */
+			; /* There is no i8259, there is no INTA access*/
+		else
+			panic_vm(v, "Undefined read on PIB INTA\n");
+
+		break;
+	case PIB_OFST_XTP:
+		if (length == 1) {
+			result = VLSAPIC_XTP(v);
+		} else {
+			panic_vm(v, "Undefined read on PIB XTP\n");
+		}
+		break;
+	default:
+		panic_vm(v, "Undefined addr access for lsapic!\n");
+		break;
+	}
+	return result;
+}
+
+static void mmio_access(struct kvm_vcpu *vcpu, u64 src_pa, u64 *dest,
+					u16 s, int ma, int dir)
+{
+	unsigned long iot;
+	struct exit_ctl_data *p = &vcpu->arch.exit_data;
+	unsigned long psr;
+
+	iot = __gpfn_is_io(src_pa >> PAGE_SHIFT);
+
+	local_irq_save(psr);
+
+	/*Intercept the access for PIB range*/
+	if (iot == GPFN_PIB) {
+		if (!dir)
+			lsapic_write(vcpu, src_pa, s, *dest);
+		else
+			*dest = lsapic_read(vcpu, src_pa, s);
+		goto out;
+	}
+	p->exit_reason = EXIT_REASON_MMIO_INSTRUCTION;
+	p->u.ioreq.addr = src_pa;
+	p->u.ioreq.size = s;
+	p->u.ioreq.dir = dir;
+	if (dir == IOREQ_WRITE)
+		p->u.ioreq.data = *dest;
+	p->u.ioreq.state = STATE_IOREQ_READY;
+	vmm_transition(vcpu);
+
+	if (p->u.ioreq.state == STATE_IORESP_READY) {
+		if (dir == IOREQ_READ)
+			/* it's necessary to ensure zero extending */
+			*dest = p->u.ioreq.data & (~0UL >> (64-(s*8)));
+	} else
+		panic_vm(vcpu, "Unhandled mmio access returned!\n");
+out:
+	local_irq_restore(psr);
+	return ;
+}
+
+/*
+   dir 1: read 0:write
+   inst_type 0:integer 1:floating point
+ */
+#define SL_INTEGER	0	/* store/load interger*/
+#define SL_FLOATING	1     	/* store/load floating*/
+
+void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma)
+{
+	struct kvm_pt_regs *regs;
+	IA64_BUNDLE bundle;
+	int slot, dir = 0;
+	int inst_type = -1;
+	u16 size = 0;
+	u64 data, slot1a, slot1b, temp, update_reg;
+	s32 imm;
+	INST64 inst;
+
+	regs = vcpu_regs(vcpu);
+
+	if (fetch_code(vcpu, regs->cr_iip, &bundle)) {
+		/* if fetch code fail, return and try again */
+		return;
+	}
+	slot = ((struct ia64_psr *)&(regs->cr_ipsr))->ri;
+	if (!slot)
+		inst.inst = bundle.slot0;
+	else if (slot == 1) {
+		slot1a = bundle.slot1a;
+		slot1b = bundle.slot1b;
+		inst.inst = slot1a + (slot1b << 18);
+	} else if (slot == 2)
+		inst.inst = bundle.slot2;
+
+	/* Integer Load/Store */
+	if (inst.M1.major == 4 && inst.M1.m == 0 && inst.M1.x == 0) {
+		inst_type = SL_INTEGER;
+		size = (inst.M1.x6 & 0x3);
+		if ((inst.M1.x6 >> 2) > 0xb) {
+			/*write*/
+			dir = IOREQ_WRITE;
+			data = vcpu_get_gr(vcpu, inst.M4.r2);
+		} else if ((inst.M1.x6 >> 2) < 0xb) {
+			/*read*/
+			dir = IOREQ_READ;
+		}
+	} else if (inst.M2.major == 4 && inst.M2.m == 1 && inst.M2.x == 0) {
+		/* Integer Load + Reg update */
+		inst_type = SL_INTEGER;
+		dir = IOREQ_READ;
+		size = (inst.M2.x6 & 0x3);
+		temp = vcpu_get_gr(vcpu, inst.M2.r3);
+		update_reg = vcpu_get_gr(vcpu, inst.M2.r2);
+		temp += update_reg;
+		vcpu_set_gr(vcpu, inst.M2.r3, temp, 0);
+	} else if (inst.M3.major == 5) {
+		/*Integer Load/Store + Imm update*/
+		inst_type = SL_INTEGER;
+		size = (inst.M3.x6&0x3);
+		if ((inst.M5.x6 >> 2) > 0xb) {
+			/*write*/
+			dir = IOREQ_WRITE;
+			data = vcpu_get_gr(vcpu, inst.M5.r2);
+			temp = vcpu_get_gr(vcpu, inst.M5.r3);
+			imm = (inst.M5.s << 31) | (inst.M5.i << 30) |
+				(inst.M5.imm7 << 23);
+			temp += imm >> 23;
+			vcpu_set_gr(vcpu, inst.M5.r3, temp, 0);
+
+		} else if ((inst.M3.x6 >> 2) < 0xb) {
+			/*read*/
+			dir = IOREQ_READ;
+			temp = vcpu_get_gr(vcpu, inst.M3.r3);
+			imm = (inst.M3.s << 31) | (inst.M3.i << 30) |
+				(inst.M3.imm7 << 23);
+			temp += imm >> 23;
+			vcpu_set_gr(vcpu, inst.M3.r3, temp, 0);
+
+		}
+	} else if (inst.M9.major == 6 && inst.M9.x6 == 0x3B
+				&& inst.M9.m == 0 && inst.M9.x == 0) {
+		/* Floating-point spill*/
+		struct ia64_fpreg v;
+
+		inst_type = SL_FLOATING;
+		dir = IOREQ_WRITE;
+		vcpu_get_fpreg(vcpu, inst.M9.f2, &v);
+		/* Write high word. FIXME: this is a kludge!  */
+		v.u.bits[1] &= 0x3ffff;
+		mmio_access(vcpu, padr + 8, (u64 *)&v.u.bits[1], 8,
+			    ma, IOREQ_WRITE);
+		data = v.u.bits[0];
+		size = 3;
+	} else if (inst.M10.major == 7 && inst.M10.x6 == 0x3B) {
+		/* Floating-point spill + Imm update */
+		struct ia64_fpreg v;
+
+		inst_type = SL_FLOATING;
+		dir = IOREQ_WRITE;
+		vcpu_get_fpreg(vcpu, inst.M10.f2, &v);
+		temp = vcpu_get_gr(vcpu, inst.M10.r3);
+		imm = (inst.M10.s << 31) | (inst.M10.i << 30) |
+			(inst.M10.imm7 << 23);
+		temp += imm >> 23;
+		vcpu_set_gr(vcpu, inst.M10.r3, temp, 0);
+
+		/* Write high word.FIXME: this is a kludge!  */
+		v.u.bits[1] &= 0x3ffff;
+		mmio_access(vcpu, padr + 8, (u64 *)&v.u.bits[1],
+			    8, ma, IOREQ_WRITE);
+		data = v.u.bits[0];
+		size = 3;
+	} else if (inst.M10.major == 7 && inst.M10.x6 == 0x31) {
+		/* Floating-point stf8 + Imm update */
+		struct ia64_fpreg v;
+		inst_type = SL_FLOATING;
+		dir = IOREQ_WRITE;
+		size = 3;
+		vcpu_get_fpreg(vcpu, inst.M10.f2, &v);
+		data = v.u.bits[0]; /* Significand.  */
+		temp = vcpu_get_gr(vcpu, inst.M10.r3);
+		imm = (inst.M10.s << 31) | (inst.M10.i << 30) |
+			(inst.M10.imm7 << 23);
+		temp += imm >> 23;
+		vcpu_set_gr(vcpu, inst.M10.r3, temp, 0);
+	} else if (inst.M15.major == 7 && inst.M15.x6 >= 0x2c
+			&& inst.M15.x6 <= 0x2f) {
+		temp = vcpu_get_gr(vcpu, inst.M15.r3);
+		imm = (inst.M15.s << 31) | (inst.M15.i << 30) |
+			(inst.M15.imm7 << 23);
+		temp += imm >> 23;
+		vcpu_set_gr(vcpu, inst.M15.r3, temp, 0);
+
+		vcpu_increment_iip(vcpu);
+		return;
+	} else if (inst.M12.major == 6 && inst.M12.m == 1
+			&& inst.M12.x == 1 && inst.M12.x6 == 1) {
+		/* Floating-point Load Pair + Imm ldfp8 M12*/
+		struct ia64_fpreg v;
+
+		inst_type = SL_FLOATING;
+		dir = IOREQ_READ;
+		size = 8;     /*ldfd*/
+		mmio_access(vcpu, padr, &data, size, ma, dir);
+		v.u.bits[0] = data;
+		v.u.bits[1] = 0x1003E;
+		vcpu_set_fpreg(vcpu, inst.M12.f1, &v);
+		padr += 8;
+		mmio_access(vcpu, padr, &data, size, ma, dir);
+		v.u.bits[0] = data;
+		v.u.bits[1] = 0x1003E;
+		vcpu_set_fpreg(vcpu, inst.M12.f2, &v);
+		padr += 8;
+		vcpu_set_gr(vcpu, inst.M12.r3, padr, 0);
+		vcpu_increment_iip(vcpu);
+		return;
+	} else {
+		inst_type = -1;
+		panic_vm(vcpu, "Unsupported MMIO access instruction! "
+				"Bunld[0]=0x%lx, Bundle[1]=0x%lx\n",
+				bundle.i64[0], bundle.i64[1]);
+	}
+
+	size = 1 << size;
+	if (dir == IOREQ_WRITE) {
+		mmio_access(vcpu, padr, &data, size, ma, dir);
+	} else {
+		mmio_access(vcpu, padr, &data, size, ma, dir);
+		if (inst_type == SL_INTEGER)
+			vcpu_set_gr(vcpu, inst.M1.r1, data, 0);
+		else
+			panic_vm(vcpu, "Unsupported instruction type!\n");
+
+	}
+	vcpu_increment_iip(vcpu);
+}
diff --git a/arch/ia64/kvm/optvfault.S b/arch/ia64/kvm/optvfault.S
new file mode 100644
index 00000000000..f793be3efff
--- /dev/null
+++ b/arch/ia64/kvm/optvfault.S
@@ -0,0 +1,1090 @@
+/*
+ * arch/ia64/kvm/optvfault.S
+ * optimize virtualization fault handler
+ *
+ * Copyright (C) 2006 Intel Co
+ *	Xuefei Xu (Anthony Xu) <anthony.xu@intel.com>
+ * Copyright (C) 2008 Intel Co
+ *      Add the support for Tukwila processors.
+ *	Xiantao Zhang <xiantao.zhang@intel.com>
+ */
+
+#include <asm/asmmacro.h>
+#include <asm/processor.h>
+#include <asm/kvm_host.h>
+
+#include "vti.h"
+#include "asm-offsets.h"
+
+#define ACCE_MOV_FROM_AR
+#define ACCE_MOV_FROM_RR
+#define ACCE_MOV_TO_RR
+#define ACCE_RSM
+#define ACCE_SSM
+#define ACCE_MOV_TO_PSR
+#define ACCE_THASH
+
+#define VMX_VPS_SYNC_READ			\
+	add r16=VMM_VPD_BASE_OFFSET,r21;	\
+	mov r17 = b0;				\
+	mov r18 = r24;				\
+	mov r19 = r25;				\
+	mov r20 = r31;				\
+	;;					\
+{.mii;						\
+	ld8 r16 = [r16];			\
+	nop 0x0;				\
+	mov r24 = ip;				\
+	;;					\
+};						\
+{.mmb;						\
+	add r24=0x20, r24;			\
+	mov r25 =r16;				\
+	br.sptk.many kvm_vps_sync_read;		\
+};						\
+	mov b0 = r17;				\
+	mov r24 = r18;				\
+	mov r25 = r19;				\
+	mov r31 = r20
+
+ENTRY(kvm_vps_entry)
+	adds r29 = VMM_VCPU_VSA_BASE_OFFSET,r21
+	;;
+	ld8 r29 = [r29]
+	;;
+	add r29 = r29, r30
+	;;
+	mov b0 = r29
+	br.sptk.many b0
+END(kvm_vps_entry)
+
+/*
+ *	Inputs:
+ *	r24 : return address
+ *  	r25 : vpd
+ *	r29 : scratch
+ *
+ */
+GLOBAL_ENTRY(kvm_vps_sync_read)
+	movl r30 = PAL_VPS_SYNC_READ
+	;;
+	br.sptk.many kvm_vps_entry
+END(kvm_vps_sync_read)
+
+/*
+ *	Inputs:
+ *	r24 : return address
+ *  	r25 : vpd
+ *	r29 : scratch
+ *
+ */
+GLOBAL_ENTRY(kvm_vps_sync_write)
+	movl r30 = PAL_VPS_SYNC_WRITE
+	;;
+	br.sptk.many kvm_vps_entry
+END(kvm_vps_sync_write)
+
+/*
+ *	Inputs:
+ *	r23 : pr
+ *	r24 : guest b0
+ *  	r25 : vpd
+ *
+ */
+GLOBAL_ENTRY(kvm_vps_resume_normal)
+	movl r30 = PAL_VPS_RESUME_NORMAL
+	;;
+	mov pr=r23,-2
+	br.sptk.many kvm_vps_entry
+END(kvm_vps_resume_normal)
+
+/*
+ *	Inputs:
+ *	r23 : pr
+ *	r24 : guest b0
+ *  	r25 : vpd
+ *	r17 : isr
+ */
+GLOBAL_ENTRY(kvm_vps_resume_handler)
+	movl r30 = PAL_VPS_RESUME_HANDLER
+	;;
+	ld8 r26=[r25]
+	shr r17=r17,IA64_ISR_IR_BIT
+	;;
+	dep r26=r17,r26,63,1   // bit 63 of r26 indicate whether enable CFLE
+	mov pr=r23,-2
+	br.sptk.many kvm_vps_entry
+END(kvm_vps_resume_handler)
+
+//mov r1=ar3
+GLOBAL_ENTRY(kvm_asm_mov_from_ar)
+#ifndef ACCE_MOV_FROM_AR
+	br.many kvm_virtualization_fault_back
+#endif
+	add r18=VMM_VCPU_ITC_OFS_OFFSET, r21
+	add r16=VMM_VCPU_LAST_ITC_OFFSET,r21
+	extr.u r17=r25,6,7
+	;;
+	ld8 r18=[r18]
+	mov r19=ar.itc
+	mov r24=b0
+	;;
+	add r19=r19,r18
+	addl r20=@gprel(asm_mov_to_reg),gp
+	;;
+	st8 [r16] = r19
+	adds r30=kvm_resume_to_guest-asm_mov_to_reg,r20
+	shladd r17=r17,4,r20
+	;;
+	mov b0=r17
+	br.sptk.few b0
+	;;
+END(kvm_asm_mov_from_ar)
+
+/*
+ * Special SGI SN2 optimized version of mov_from_ar using the SN2 RTC
+ * clock as it's source for emulating the ITC. This version will be
+ * copied on top of the original version if the host is determined to
+ * be an SN2.
+ */
+GLOBAL_ENTRY(kvm_asm_mov_from_ar_sn2)
+	add r18=VMM_VCPU_ITC_OFS_OFFSET, r21
+	movl r19 = (KVM_VMM_BASE+(1<<KVM_VMM_SHIFT))
+
+	add r16=VMM_VCPU_LAST_ITC_OFFSET,r21
+	extr.u r17=r25,6,7
+	mov r24=b0
+	;;
+	ld8 r18=[r18]
+	ld8 r19=[r19]
+	addl r20=@gprel(asm_mov_to_reg),gp
+	;;
+	add r19=r19,r18
+	shladd r17=r17,4,r20
+	;;
+	adds r30=kvm_resume_to_guest-asm_mov_to_reg,r20
+	st8 [r16] = r19
+	mov b0=r17
+	br.sptk.few b0
+	;;
+END(kvm_asm_mov_from_ar_sn2)
+
+
+
+// mov r1=rr[r3]
+GLOBAL_ENTRY(kvm_asm_mov_from_rr)
+#ifndef ACCE_MOV_FROM_RR
+	br.many kvm_virtualization_fault_back
+#endif
+	extr.u r16=r25,20,7
+	extr.u r17=r25,6,7
+	addl r20=@gprel(asm_mov_from_reg),gp
+	;;
+	adds r30=kvm_asm_mov_from_rr_back_1-asm_mov_from_reg,r20
+	shladd r16=r16,4,r20
+	mov r24=b0
+	;;
+	add r27=VMM_VCPU_VRR0_OFFSET,r21
+	mov b0=r16
+	br.many b0
+	;;
+kvm_asm_mov_from_rr_back_1:
+	adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20
+	adds r22=asm_mov_to_reg-asm_mov_from_reg,r20
+	shr.u r26=r19,61
+	;;
+	shladd r17=r17,4,r22
+	shladd r27=r26,3,r27
+	;;
+	ld8 r19=[r27]
+	mov b0=r17
+	br.many b0
+END(kvm_asm_mov_from_rr)
+
+
+// mov rr[r3]=r2
+GLOBAL_ENTRY(kvm_asm_mov_to_rr)
+#ifndef ACCE_MOV_TO_RR
+	br.many kvm_virtualization_fault_back
+#endif
+	extr.u r16=r25,20,7
+	extr.u r17=r25,13,7
+	addl r20=@gprel(asm_mov_from_reg),gp
+	;;
+	adds r30=kvm_asm_mov_to_rr_back_1-asm_mov_from_reg,r20
+	shladd r16=r16,4,r20
+	mov r22=b0
+	;;
+	add r27=VMM_VCPU_VRR0_OFFSET,r21
+	mov b0=r16
+	br.many b0
+	;;
+kvm_asm_mov_to_rr_back_1:
+	adds r30=kvm_asm_mov_to_rr_back_2-asm_mov_from_reg,r20
+	shr.u r23=r19,61
+	shladd r17=r17,4,r20
+	;;
+	//if rr6, go back
+	cmp.eq p6,p0=6,r23
+	mov b0=r22
+	(p6) br.cond.dpnt.many kvm_virtualization_fault_back
+	;;
+	mov r28=r19
+	mov b0=r17
+	br.many b0
+kvm_asm_mov_to_rr_back_2:
+	adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20
+	shladd r27=r23,3,r27
+	;; // vrr.rid<<4 |0xe
+	st8 [r27]=r19
+	mov b0=r30
+	;;
+	extr.u r16=r19,8,26
+	extr.u r18 =r19,2,6
+	mov r17 =0xe
+	;;
+	shladd r16 = r16, 4, r17
+	extr.u r19 =r19,0,8
+	;;
+	shl r16 = r16,8
+	;;
+	add r19 = r19, r16
+	;; //set ve 1
+	dep r19=-1,r19,0,1
+	cmp.lt p6,p0=14,r18
+	;;
+	(p6) mov r18=14
+	;;
+	(p6) dep r19=r18,r19,2,6
+	;;
+	cmp.eq p6,p0=0,r23
+	;;
+	cmp.eq.or p6,p0=4,r23
+	;;
+	adds r16=VMM_VCPU_MODE_FLAGS_OFFSET,r21
+	(p6) adds r17=VMM_VCPU_META_SAVED_RR0_OFFSET,r21
+	;;
+	ld4 r16=[r16]
+	cmp.eq p7,p0=r0,r0
+	(p6) shladd r17=r23,1,r17
+	;;
+	(p6) st8 [r17]=r19
+	(p6) tbit.nz p6,p7=r16,0
+	;;
+	(p7) mov rr[r28]=r19
+	mov r24=r22
+	br.many b0
+END(kvm_asm_mov_to_rr)
+
+
+//rsm
+GLOBAL_ENTRY(kvm_asm_rsm)
+#ifndef ACCE_RSM
+	br.many kvm_virtualization_fault_back
+#endif
+	VMX_VPS_SYNC_READ
+	;;
+	extr.u r26=r25,6,21
+	extr.u r27=r25,31,2
+	;;
+	extr.u r28=r25,36,1
+	dep r26=r27,r26,21,2
+	;;
+	add r17=VPD_VPSR_START_OFFSET,r16
+	add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21
+	//r26 is imm24
+	dep r26=r28,r26,23,1
+	;;
+	ld8 r18=[r17]
+	movl r28=IA64_PSR_IC+IA64_PSR_I+IA64_PSR_DT+IA64_PSR_SI
+	ld4 r23=[r22]
+	sub r27=-1,r26
+	mov r24=b0
+	;;
+	mov r20=cr.ipsr
+	or r28=r27,r28
+	and r19=r18,r27
+	;;
+	st8 [r17]=r19
+	and r20=r20,r28
+	/* Comment it out due to short of fp lazy alorgithm support
+	adds r27=IA64_VCPU_FP_PSR_OFFSET,r21
+	;;
+	ld8 r27=[r27]
+	;;
+	tbit.nz p8,p0= r27,IA64_PSR_DFH_BIT
+	;;
+	(p8) dep r20=-1,r20,IA64_PSR_DFH_BIT,1
+	*/
+	;;
+	mov cr.ipsr=r20
+	tbit.nz p6,p0=r23,0
+	;;
+	tbit.z.or p6,p0=r26,IA64_PSR_DT_BIT
+	(p6) br.dptk kvm_resume_to_guest_with_sync
+	;;
+	add r26=VMM_VCPU_META_RR0_OFFSET,r21
+	add r27=VMM_VCPU_META_RR0_OFFSET+8,r21
+	dep r23=-1,r23,0,1
+	;;
+	ld8 r26=[r26]
+	ld8 r27=[r27]
+	st4 [r22]=r23
+	dep.z r28=4,61,3
+	;;
+	mov rr[r0]=r26
+	;;
+	mov rr[r28]=r27
+	;;
+	srlz.d
+	br.many kvm_resume_to_guest_with_sync
+END(kvm_asm_rsm)
+
+
+//ssm
+GLOBAL_ENTRY(kvm_asm_ssm)
+#ifndef ACCE_SSM
+	br.many kvm_virtualization_fault_back
+#endif
+	VMX_VPS_SYNC_READ
+	;;
+	extr.u r26=r25,6,21
+	extr.u r27=r25,31,2
+	;;
+	extr.u r28=r25,36,1
+	dep r26=r27,r26,21,2
+	;;  //r26 is imm24
+	add r27=VPD_VPSR_START_OFFSET,r16
+	dep r26=r28,r26,23,1
+	;;  //r19 vpsr
+	ld8 r29=[r27]
+	mov r24=b0
+	;;
+	add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21
+	mov r20=cr.ipsr
+	or r19=r29,r26
+	;;
+	ld4 r23=[r22]
+	st8 [r27]=r19
+	or r20=r20,r26
+	;;
+	mov cr.ipsr=r20
+	movl r28=IA64_PSR_DT+IA64_PSR_RT+IA64_PSR_IT
+	;;
+	and r19=r28,r19
+	tbit.z p6,p0=r23,0
+	;;
+	cmp.ne.or p6,p0=r28,r19
+	(p6) br.dptk kvm_asm_ssm_1
+	;;
+	add r26=VMM_VCPU_META_SAVED_RR0_OFFSET,r21
+	add r27=VMM_VCPU_META_SAVED_RR0_OFFSET+8,r21
+	dep r23=0,r23,0,1
+	;;
+	ld8 r26=[r26]
+	ld8 r27=[r27]
+	st4 [r22]=r23
+	dep.z r28=4,61,3
+	;;
+	mov rr[r0]=r26
+	;;
+	mov rr[r28]=r27
+	;;
+	srlz.d
+	;;
+kvm_asm_ssm_1:
+	tbit.nz p6,p0=r29,IA64_PSR_I_BIT
+	;;
+	tbit.z.or p6,p0=r19,IA64_PSR_I_BIT
+	(p6) br.dptk kvm_resume_to_guest_with_sync
+	;;
+	add r29=VPD_VTPR_START_OFFSET,r16
+	add r30=VPD_VHPI_START_OFFSET,r16
+	;;
+	ld8 r29=[r29]
+	ld8 r30=[r30]
+	;;
+	extr.u r17=r29,4,4
+	extr.u r18=r29,16,1
+	;;
+	dep r17=r18,r17,4,1
+	;;
+	cmp.gt p6,p0=r30,r17
+	(p6) br.dpnt.few kvm_asm_dispatch_vexirq
+	br.many kvm_resume_to_guest_with_sync
+END(kvm_asm_ssm)
+
+
+//mov psr.l=r2
+GLOBAL_ENTRY(kvm_asm_mov_to_psr)
+#ifndef ACCE_MOV_TO_PSR
+	br.many kvm_virtualization_fault_back
+#endif
+	VMX_VPS_SYNC_READ
+	;;
+	extr.u r26=r25,13,7 //r2
+	addl r20=@gprel(asm_mov_from_reg),gp
+	;;
+	adds r30=kvm_asm_mov_to_psr_back-asm_mov_from_reg,r20
+	shladd r26=r26,4,r20
+	mov r24=b0
+	;;
+	add r27=VPD_VPSR_START_OFFSET,r16
+	mov b0=r26
+	br.many b0
+	;;
+kvm_asm_mov_to_psr_back:
+	ld8 r17=[r27]
+	add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21
+	dep r19=0,r19,32,32
+	;;
+	ld4 r23=[r22]
+	dep r18=0,r17,0,32
+	;;
+	add r30=r18,r19
+	movl r28=IA64_PSR_DT+IA64_PSR_RT+IA64_PSR_IT
+	;;
+	st8 [r27]=r30
+	and r27=r28,r30
+	and r29=r28,r17
+	;;
+	cmp.eq p5,p0=r29,r27
+	cmp.eq p6,p7=r28,r27
+	(p5) br.many kvm_asm_mov_to_psr_1
+	;;
+	//virtual to physical
+	(p7) add r26=VMM_VCPU_META_RR0_OFFSET,r21
+	(p7) add r27=VMM_VCPU_META_RR0_OFFSET+8,r21
+	(p7) dep r23=-1,r23,0,1
+	;;
+	//physical to virtual
+	(p6) add r26=VMM_VCPU_META_SAVED_RR0_OFFSET,r21
+	(p6) add r27=VMM_VCPU_META_SAVED_RR0_OFFSET+8,r21
+	(p6) dep r23=0,r23,0,1
+	;;
+	ld8 r26=[r26]
+	ld8 r27=[r27]
+	st4 [r22]=r23
+	dep.z r28=4,61,3
+	;;
+	mov rr[r0]=r26
+	;;
+	mov rr[r28]=r27
+	;;
+	srlz.d
+	;;
+kvm_asm_mov_to_psr_1:
+	mov r20=cr.ipsr
+	movl r28=IA64_PSR_IC+IA64_PSR_I+IA64_PSR_DT+IA64_PSR_SI+IA64_PSR_RT
+	;;
+	or r19=r19,r28
+	dep r20=0,r20,0,32
+	;;
+	add r20=r19,r20
+	mov b0=r24
+	;;
+	/* Comment it out due to short of fp lazy algorithm support
+	adds r27=IA64_VCPU_FP_PSR_OFFSET,r21
+	;;
+	ld8 r27=[r27]
+	;;
+	tbit.nz p8,p0=r27,IA64_PSR_DFH_BIT
+	;;
+	(p8) dep r20=-1,r20,IA64_PSR_DFH_BIT,1
+	;;
+	*/
+	mov cr.ipsr=r20
+	cmp.ne p6,p0=r0,r0
+	;;
+	tbit.nz.or p6,p0=r17,IA64_PSR_I_BIT
+	tbit.z.or p6,p0=r30,IA64_PSR_I_BIT
+	(p6) br.dpnt.few kvm_resume_to_guest_with_sync
+	;;
+	add r29=VPD_VTPR_START_OFFSET,r16
+	add r30=VPD_VHPI_START_OFFSET,r16
+	;;
+	ld8 r29=[r29]
+	ld8 r30=[r30]
+	;;
+	extr.u r17=r29,4,4
+	extr.u r18=r29,16,1
+	;;
+	dep r17=r18,r17,4,1
+	;;
+	cmp.gt p6,p0=r30,r17
+	(p6) br.dpnt.few kvm_asm_dispatch_vexirq
+	br.many kvm_resume_to_guest_with_sync
+END(kvm_asm_mov_to_psr)
+
+
+ENTRY(kvm_asm_dispatch_vexirq)
+//increment iip
+	mov r17 = b0
+	mov r18 = r31
+{.mii
+	add r25=VMM_VPD_BASE_OFFSET,r21
+	nop 0x0
+	mov r24 = ip
+	;;
+}
+{.mmb
+	add r24 = 0x20, r24
+	ld8 r25 = [r25]
+	br.sptk.many kvm_vps_sync_write
+}
+	mov b0 =r17
+	mov r16=cr.ipsr
+	mov r31 = r18
+	mov r19 = 37
+	;;
+	extr.u r17=r16,IA64_PSR_RI_BIT,2
+	tbit.nz p6,p7=r16,IA64_PSR_RI_BIT+1
+	;;
+	(p6) mov r18=cr.iip
+	(p6) mov r17=r0
+	(p7) add r17=1,r17
+	;;
+	(p6) add r18=0x10,r18
+	dep r16=r17,r16,IA64_PSR_RI_BIT,2
+	;;
+	(p6) mov cr.iip=r18
+	mov cr.ipsr=r16
+	mov r30 =1
+	br.many kvm_dispatch_vexirq
+END(kvm_asm_dispatch_vexirq)
+
+// thash
+// TODO: add support when pta.vf = 1
+GLOBAL_ENTRY(kvm_asm_thash)
+#ifndef ACCE_THASH
+	br.many kvm_virtualization_fault_back
+#endif
+	extr.u r17=r25,20,7		// get r3 from opcode in r25
+	extr.u r18=r25,6,7		// get r1 from opcode in r25
+	addl r20=@gprel(asm_mov_from_reg),gp
+	;;
+	adds r30=kvm_asm_thash_back1-asm_mov_from_reg,r20
+	shladd r17=r17,4,r20	// get addr of MOVE_FROM_REG(r17)
+	adds r16=VMM_VPD_BASE_OFFSET,r21	// get vcpu.arch.priveregs
+	;;
+	mov r24=b0
+	;;
+	ld8 r16=[r16]		// get VPD addr
+	mov b0=r17
+	br.many b0			// r19 return value
+	;;
+kvm_asm_thash_back1:
+	shr.u r23=r19,61		// get RR number
+	adds r28=VMM_VCPU_VRR0_OFFSET,r21	// get vcpu->arch.vrr[0]'s addr
+	adds r16=VMM_VPD_VPTA_OFFSET,r16	// get vpta
+	;;
+	shladd r27=r23,3,r28	// get vcpu->arch.vrr[r23]'s addr
+	ld8 r17=[r16]		// get PTA
+	mov r26=1
+	;;
+	extr.u r29=r17,2,6	// get pta.size
+	ld8 r28=[r27]		// get vcpu->arch.vrr[r23]'s value
+	;;
+	mov b0=r24
+	//Fallback to C if pta.vf is set
+	tbit.nz p6,p0=r17, 8
+	;;
+	(p6) mov r24=EVENT_THASH
+	(p6) br.cond.dpnt.many kvm_virtualization_fault_back
+	extr.u r28=r28,2,6	// get rr.ps
+	shl r22=r26,r29		// 1UL << pta.size
+	;;
+	shr.u r23=r19,r28	// vaddr >> rr.ps
+	adds r26=3,r29		// pta.size + 3
+	shl r27=r17,3		// pta << 3
+	;;
+	shl r23=r23,3		// (vaddr >> rr.ps) << 3
+	shr.u r27=r27,r26	// (pta << 3) >> (pta.size+3)
+	movl r16=7<<61
+	;;
+	adds r22=-1,r22		// (1UL << pta.size) - 1
+	shl r27=r27,r29		// ((pta<<3)>>(pta.size+3))<<pta.size
+	and r19=r19,r16		// vaddr & VRN_MASK
+	;;
+	and r22=r22,r23		// vhpt_offset
+	or r19=r19,r27 // (vadr&VRN_MASK)|(((pta<<3)>>(pta.size + 3))<<pta.size)
+	adds r26=asm_mov_to_reg-asm_mov_from_reg,r20
+	;;
+	or r19=r19,r22		// calc pval
+	shladd r17=r18,4,r26
+	adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20
+	;;
+	mov b0=r17
+	br.many b0
+END(kvm_asm_thash)
+
+#define MOV_TO_REG0	\
+{;			\
+	nop.b 0x0;		\
+	nop.b 0x0;		\
+	nop.b 0x0;		\
+	;;			\
+};
+
+
+#define MOV_TO_REG(n)	\
+{;			\
+	mov r##n##=r19;	\
+	mov b0=r30;	\
+	br.sptk.many b0;	\
+	;;			\
+};
+
+
+#define MOV_FROM_REG(n)	\
+{;				\
+	mov r19=r##n##;		\
+	mov b0=r30;		\
+	br.sptk.many b0;		\
+	;;				\
+};
+
+
+#define MOV_TO_BANK0_REG(n)			\
+ENTRY_MIN_ALIGN(asm_mov_to_bank0_reg##n##);	\
+{;						\
+	mov r26=r2;				\
+	mov r2=r19;				\
+	bsw.1;					\
+	;;						\
+};						\
+{;						\
+	mov r##n##=r2;				\
+	nop.b 0x0;					\
+	bsw.0;					\
+	;;						\
+};						\
+{;						\
+	mov r2=r26;				\
+	mov b0=r30;				\
+	br.sptk.many b0;				\
+	;;						\
+};						\
+END(asm_mov_to_bank0_reg##n##)
+
+
+#define MOV_FROM_BANK0_REG(n)			\
+ENTRY_MIN_ALIGN(asm_mov_from_bank0_reg##n##);	\
+{;						\
+	mov r26=r2;				\
+	nop.b 0x0;					\
+	bsw.1;					\
+	;;						\
+};						\
+{;						\
+	mov r2=r##n##;				\
+	nop.b 0x0;					\
+	bsw.0;					\
+	;;						\
+};						\
+{;						\
+	mov r19=r2;				\
+	mov r2=r26;				\
+	mov b0=r30;				\
+};						\
+{;						\
+	nop.b 0x0;					\
+	nop.b 0x0;					\
+	br.sptk.many b0;				\
+	;;						\
+};						\
+END(asm_mov_from_bank0_reg##n##)
+
+
+#define JMP_TO_MOV_TO_BANK0_REG(n)		\
+{;						\
+	nop.b 0x0;					\
+	nop.b 0x0;					\
+	br.sptk.many asm_mov_to_bank0_reg##n##;	\
+	;;						\
+}
+
+
+#define JMP_TO_MOV_FROM_BANK0_REG(n)		\
+{;						\
+	nop.b 0x0;					\
+	nop.b 0x0;					\
+	br.sptk.many asm_mov_from_bank0_reg##n##;	\
+	;;						\
+}
+
+
+MOV_FROM_BANK0_REG(16)
+MOV_FROM_BANK0_REG(17)
+MOV_FROM_BANK0_REG(18)
+MOV_FROM_BANK0_REG(19)
+MOV_FROM_BANK0_REG(20)
+MOV_FROM_BANK0_REG(21)
+MOV_FROM_BANK0_REG(22)
+MOV_FROM_BANK0_REG(23)
+MOV_FROM_BANK0_REG(24)
+MOV_FROM_BANK0_REG(25)
+MOV_FROM_BANK0_REG(26)
+MOV_FROM_BANK0_REG(27)
+MOV_FROM_BANK0_REG(28)
+MOV_FROM_BANK0_REG(29)
+MOV_FROM_BANK0_REG(30)
+MOV_FROM_BANK0_REG(31)
+
+
+// mov from reg table
+ENTRY(asm_mov_from_reg)
+	MOV_FROM_REG(0)
+	MOV_FROM_REG(1)
+	MOV_FROM_REG(2)
+	MOV_FROM_REG(3)
+	MOV_FROM_REG(4)
+	MOV_FROM_REG(5)
+	MOV_FROM_REG(6)
+	MOV_FROM_REG(7)
+	MOV_FROM_REG(8)
+	MOV_FROM_REG(9)
+	MOV_FROM_REG(10)
+	MOV_FROM_REG(11)
+	MOV_FROM_REG(12)
+	MOV_FROM_REG(13)
+	MOV_FROM_REG(14)
+	MOV_FROM_REG(15)
+	JMP_TO_MOV_FROM_BANK0_REG(16)
+	JMP_TO_MOV_FROM_BANK0_REG(17)
+	JMP_TO_MOV_FROM_BANK0_REG(18)
+	JMP_TO_MOV_FROM_BANK0_REG(19)
+	JMP_TO_MOV_FROM_BANK0_REG(20)
+	JMP_TO_MOV_FROM_BANK0_REG(21)
+	JMP_TO_MOV_FROM_BANK0_REG(22)
+	JMP_TO_MOV_FROM_BANK0_REG(23)
+	JMP_TO_MOV_FROM_BANK0_REG(24)
+	JMP_TO_MOV_FROM_BANK0_REG(25)
+	JMP_TO_MOV_FROM_BANK0_REG(26)
+	JMP_TO_MOV_FROM_BANK0_REG(27)
+	JMP_TO_MOV_FROM_BANK0_REG(28)
+	JMP_TO_MOV_FROM_BANK0_REG(29)
+	JMP_TO_MOV_FROM_BANK0_REG(30)
+	JMP_TO_MOV_FROM_BANK0_REG(31)
+	MOV_FROM_REG(32)
+	MOV_FROM_REG(33)
+	MOV_FROM_REG(34)
+	MOV_FROM_REG(35)
+	MOV_FROM_REG(36)
+	MOV_FROM_REG(37)
+	MOV_FROM_REG(38)
+	MOV_FROM_REG(39)
+	MOV_FROM_REG(40)
+	MOV_FROM_REG(41)
+	MOV_FROM_REG(42)
+	MOV_FROM_REG(43)
+	MOV_FROM_REG(44)
+	MOV_FROM_REG(45)
+	MOV_FROM_REG(46)
+	MOV_FROM_REG(47)
+	MOV_FROM_REG(48)
+	MOV_FROM_REG(49)
+	MOV_FROM_REG(50)
+	MOV_FROM_REG(51)
+	MOV_FROM_REG(52)
+	MOV_FROM_REG(53)
+	MOV_FROM_REG(54)
+	MOV_FROM_REG(55)
+	MOV_FROM_REG(56)
+	MOV_FROM_REG(57)
+	MOV_FROM_REG(58)
+	MOV_FROM_REG(59)
+	MOV_FROM_REG(60)
+	MOV_FROM_REG(61)
+	MOV_FROM_REG(62)
+	MOV_FROM_REG(63)
+	MOV_FROM_REG(64)
+	MOV_FROM_REG(65)
+	MOV_FROM_REG(66)
+	MOV_FROM_REG(67)
+	MOV_FROM_REG(68)
+	MOV_FROM_REG(69)
+	MOV_FROM_REG(70)
+	MOV_FROM_REG(71)
+	MOV_FROM_REG(72)
+	MOV_FROM_REG(73)
+	MOV_FROM_REG(74)
+	MOV_FROM_REG(75)
+	MOV_FROM_REG(76)
+	MOV_FROM_REG(77)
+	MOV_FROM_REG(78)
+	MOV_FROM_REG(79)
+	MOV_FROM_REG(80)
+	MOV_FROM_REG(81)
+	MOV_FROM_REG(82)
+	MOV_FROM_REG(83)
+	MOV_FROM_REG(84)
+	MOV_FROM_REG(85)
+	MOV_FROM_REG(86)
+	MOV_FROM_REG(87)
+	MOV_FROM_REG(88)
+	MOV_FROM_REG(89)
+	MOV_FROM_REG(90)
+	MOV_FROM_REG(91)
+	MOV_FROM_REG(92)
+	MOV_FROM_REG(93)
+	MOV_FROM_REG(94)
+	MOV_FROM_REG(95)
+	MOV_FROM_REG(96)
+	MOV_FROM_REG(97)
+	MOV_FROM_REG(98)
+	MOV_FROM_REG(99)
+	MOV_FROM_REG(100)
+	MOV_FROM_REG(101)
+	MOV_FROM_REG(102)
+	MOV_FROM_REG(103)
+	MOV_FROM_REG(104)
+	MOV_FROM_REG(105)
+	MOV_FROM_REG(106)
+	MOV_FROM_REG(107)
+	MOV_FROM_REG(108)
+	MOV_FROM_REG(109)
+	MOV_FROM_REG(110)
+	MOV_FROM_REG(111)
+	MOV_FROM_REG(112)
+	MOV_FROM_REG(113)
+	MOV_FROM_REG(114)
+	MOV_FROM_REG(115)
+	MOV_FROM_REG(116)
+	MOV_FROM_REG(117)
+	MOV_FROM_REG(118)
+	MOV_FROM_REG(119)
+	MOV_FROM_REG(120)
+	MOV_FROM_REG(121)
+	MOV_FROM_REG(122)
+	MOV_FROM_REG(123)
+	MOV_FROM_REG(124)
+	MOV_FROM_REG(125)
+	MOV_FROM_REG(126)
+	MOV_FROM_REG(127)
+END(asm_mov_from_reg)
+
+
+/* must be in bank 0
+ * parameter:
+ * r31: pr
+ * r24: b0
+ */
+ENTRY(kvm_resume_to_guest_with_sync)
+	adds r19=VMM_VPD_BASE_OFFSET,r21
+	mov r16 = r31
+	mov r17 = r24
+	;;
+{.mii
+	ld8 r25 =[r19]
+	nop 0x0
+	mov r24 = ip
+	;;
+}
+{.mmb
+	add r24 =0x20, r24
+	nop 0x0
+	br.sptk.many kvm_vps_sync_write
+}
+
+	mov r31 = r16
+	mov r24 =r17
+	;;
+	br.sptk.many kvm_resume_to_guest
+END(kvm_resume_to_guest_with_sync)
+
+ENTRY(kvm_resume_to_guest)
+	adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
+	;;
+	ld8 r1 =[r16]
+	adds r20 = VMM_VCPU_VSA_BASE_OFFSET,r21
+	;;
+	mov r16=cr.ipsr
+	;;
+	ld8 r20 = [r20]
+	adds r19=VMM_VPD_BASE_OFFSET,r21
+	;;
+	ld8 r25=[r19]
+	extr.u r17=r16,IA64_PSR_RI_BIT,2
+	tbit.nz p6,p7=r16,IA64_PSR_RI_BIT+1
+	;;
+	(p6) mov r18=cr.iip
+	(p6) mov r17=r0
+	;;
+	(p6) add r18=0x10,r18
+	(p7) add r17=1,r17
+	;;
+	(p6) mov cr.iip=r18
+	dep r16=r17,r16,IA64_PSR_RI_BIT,2
+	;;
+	mov cr.ipsr=r16
+	adds r19= VPD_VPSR_START_OFFSET,r25
+	add r28=PAL_VPS_RESUME_NORMAL,r20
+	add r29=PAL_VPS_RESUME_HANDLER,r20
+	;;
+	ld8 r19=[r19]
+	mov b0=r29
+	mov r27=cr.isr
+	;;
+	tbit.z p6,p7 = r19,IA64_PSR_IC_BIT		// p7=vpsr.ic
+	shr r27=r27,IA64_ISR_IR_BIT
+	;;
+	(p6) ld8 r26=[r25]
+	(p7) mov b0=r28
+	;;
+	(p6) dep r26=r27,r26,63,1
+	mov pr=r31,-2
+	br.sptk.many b0             // call pal service
+	;;
+END(kvm_resume_to_guest)
+
+
+MOV_TO_BANK0_REG(16)
+MOV_TO_BANK0_REG(17)
+MOV_TO_BANK0_REG(18)
+MOV_TO_BANK0_REG(19)
+MOV_TO_BANK0_REG(20)
+MOV_TO_BANK0_REG(21)
+MOV_TO_BANK0_REG(22)
+MOV_TO_BANK0_REG(23)
+MOV_TO_BANK0_REG(24)
+MOV_TO_BANK0_REG(25)
+MOV_TO_BANK0_REG(26)
+MOV_TO_BANK0_REG(27)
+MOV_TO_BANK0_REG(28)
+MOV_TO_BANK0_REG(29)
+MOV_TO_BANK0_REG(30)
+MOV_TO_BANK0_REG(31)
+
+
+// mov to reg table
+ENTRY(asm_mov_to_reg)
+	MOV_TO_REG0
+	MOV_TO_REG(1)
+	MOV_TO_REG(2)
+	MOV_TO_REG(3)
+	MOV_TO_REG(4)
+	MOV_TO_REG(5)
+	MOV_TO_REG(6)
+	MOV_TO_REG(7)
+	MOV_TO_REG(8)
+	MOV_TO_REG(9)
+	MOV_TO_REG(10)
+	MOV_TO_REG(11)
+	MOV_TO_REG(12)
+	MOV_TO_REG(13)
+	MOV_TO_REG(14)
+	MOV_TO_REG(15)
+	JMP_TO_MOV_TO_BANK0_REG(16)
+	JMP_TO_MOV_TO_BANK0_REG(17)
+	JMP_TO_MOV_TO_BANK0_REG(18)
+	JMP_TO_MOV_TO_BANK0_REG(19)
+	JMP_TO_MOV_TO_BANK0_REG(20)
+	JMP_TO_MOV_TO_BANK0_REG(21)
+	JMP_TO_MOV_TO_BANK0_REG(22)
+	JMP_TO_MOV_TO_BANK0_REG(23)
+	JMP_TO_MOV_TO_BANK0_REG(24)
+	JMP_TO_MOV_TO_BANK0_REG(25)
+	JMP_TO_MOV_TO_BANK0_REG(26)
+	JMP_TO_MOV_TO_BANK0_REG(27)
+	JMP_TO_MOV_TO_BANK0_REG(28)
+	JMP_TO_MOV_TO_BANK0_REG(29)
+	JMP_TO_MOV_TO_BANK0_REG(30)
+	JMP_TO_MOV_TO_BANK0_REG(31)
+	MOV_TO_REG(32)
+	MOV_TO_REG(33)
+	MOV_TO_REG(34)
+	MOV_TO_REG(35)
+	MOV_TO_REG(36)
+	MOV_TO_REG(37)
+	MOV_TO_REG(38)
+	MOV_TO_REG(39)
+	MOV_TO_REG(40)
+	MOV_TO_REG(41)
+	MOV_TO_REG(42)
+	MOV_TO_REG(43)
+	MOV_TO_REG(44)
+	MOV_TO_REG(45)
+	MOV_TO_REG(46)
+	MOV_TO_REG(47)
+	MOV_TO_REG(48)
+	MOV_TO_REG(49)
+	MOV_TO_REG(50)
+	MOV_TO_REG(51)
+	MOV_TO_REG(52)
+	MOV_TO_REG(53)
+	MOV_TO_REG(54)
+	MOV_TO_REG(55)
+	MOV_TO_REG(56)
+	MOV_TO_REG(57)
+	MOV_TO_REG(58)
+	MOV_TO_REG(59)
+	MOV_TO_REG(60)
+	MOV_TO_REG(61)
+	MOV_TO_REG(62)
+	MOV_TO_REG(63)
+	MOV_TO_REG(64)
+	MOV_TO_REG(65)
+	MOV_TO_REG(66)
+	MOV_TO_REG(67)
+	MOV_TO_REG(68)
+	MOV_TO_REG(69)
+	MOV_TO_REG(70)
+	MOV_TO_REG(71)
+	MOV_TO_REG(72)
+	MOV_TO_REG(73)
+	MOV_TO_REG(74)
+	MOV_TO_REG(75)
+	MOV_TO_REG(76)
+	MOV_TO_REG(77)
+	MOV_TO_REG(78)
+	MOV_TO_REG(79)
+	MOV_TO_REG(80)
+	MOV_TO_REG(81)
+	MOV_TO_REG(82)
+	MOV_TO_REG(83)
+	MOV_TO_REG(84)
+	MOV_TO_REG(85)
+	MOV_TO_REG(86)
+	MOV_TO_REG(87)
+	MOV_TO_REG(88)
+	MOV_TO_REG(89)
+	MOV_TO_REG(90)
+	MOV_TO_REG(91)
+	MOV_TO_REG(92)
+	MOV_TO_REG(93)
+	MOV_TO_REG(94)
+	MOV_TO_REG(95)
+	MOV_TO_REG(96)
+	MOV_TO_REG(97)
+	MOV_TO_REG(98)
+	MOV_TO_REG(99)
+	MOV_TO_REG(100)
+	MOV_TO_REG(101)
+	MOV_TO_REG(102)
+	MOV_TO_REG(103)
+	MOV_TO_REG(104)
+	MOV_TO_REG(105)
+	MOV_TO_REG(106)
+	MOV_TO_REG(107)
+	MOV_TO_REG(108)
+	MOV_TO_REG(109)
+	MOV_TO_REG(110)
+	MOV_TO_REG(111)
+	MOV_TO_REG(112)
+	MOV_TO_REG(113)
+	MOV_TO_REG(114)
+	MOV_TO_REG(115)
+	MOV_TO_REG(116)
+	MOV_TO_REG(117)
+	MOV_TO_REG(118)
+	MOV_TO_REG(119)
+	MOV_TO_REG(120)
+	MOV_TO_REG(121)
+	MOV_TO_REG(122)
+	MOV_TO_REG(123)
+	MOV_TO_REG(124)
+	MOV_TO_REG(125)
+	MOV_TO_REG(126)
+	MOV_TO_REG(127)
+END(asm_mov_to_reg)
diff --git a/arch/ia64/kvm/process.c b/arch/ia64/kvm/process.c
new file mode 100644
index 00000000000..b0398740b48
--- /dev/null
+++ b/arch/ia64/kvm/process.c
@@ -0,0 +1,1024 @@
+/*
+ * process.c: handle interruption inject for guests.
+ * Copyright (c) 2005, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ *  	Shaofan Li (Susue Li) <susie.li@intel.com>
+ *  	Xiaoyan Feng (Fleming Feng)  <fleming.feng@intel.com>
+ *  	Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
+ *  	Xiantao Zhang (xiantao.zhang@intel.com)
+ */
+#include "vcpu.h"
+
+#include <asm/pal.h>
+#include <asm/sal.h>
+#include <asm/fpswa.h>
+#include <asm/kregs.h>
+#include <asm/tlb.h>
+
+fpswa_interface_t *vmm_fpswa_interface;
+
+#define IA64_VHPT_TRANS_VECTOR			0x0000
+#define IA64_INST_TLB_VECTOR			0x0400
+#define IA64_DATA_TLB_VECTOR			0x0800
+#define IA64_ALT_INST_TLB_VECTOR		0x0c00
+#define IA64_ALT_DATA_TLB_VECTOR		0x1000
+#define IA64_DATA_NESTED_TLB_VECTOR		0x1400
+#define IA64_INST_KEY_MISS_VECTOR		0x1800
+#define IA64_DATA_KEY_MISS_VECTOR		0x1c00
+#define IA64_DIRTY_BIT_VECTOR			0x2000
+#define IA64_INST_ACCESS_BIT_VECTOR		0x2400
+#define IA64_DATA_ACCESS_BIT_VECTOR		0x2800
+#define IA64_BREAK_VECTOR			0x2c00
+#define IA64_EXTINT_VECTOR			0x3000
+#define IA64_PAGE_NOT_PRESENT_VECTOR		0x5000
+#define IA64_KEY_PERMISSION_VECTOR		0x5100
+#define IA64_INST_ACCESS_RIGHTS_VECTOR		0x5200
+#define IA64_DATA_ACCESS_RIGHTS_VECTOR		0x5300
+#define IA64_GENEX_VECTOR			0x5400
+#define IA64_DISABLED_FPREG_VECTOR		0x5500
+#define IA64_NAT_CONSUMPTION_VECTOR		0x5600
+#define IA64_SPECULATION_VECTOR		0x5700 /* UNUSED */
+#define IA64_DEBUG_VECTOR			0x5900
+#define IA64_UNALIGNED_REF_VECTOR		0x5a00
+#define IA64_UNSUPPORTED_DATA_REF_VECTOR	0x5b00
+#define IA64_FP_FAULT_VECTOR			0x5c00
+#define IA64_FP_TRAP_VECTOR			0x5d00
+#define IA64_LOWERPRIV_TRANSFER_TRAP_VECTOR 	0x5e00
+#define IA64_TAKEN_BRANCH_TRAP_VECTOR		0x5f00
+#define IA64_SINGLE_STEP_TRAP_VECTOR		0x6000
+
+/* SDM vol2 5.5 - IVA based interruption handling */
+#define INITIAL_PSR_VALUE_AT_INTERRUPTION (IA64_PSR_UP | IA64_PSR_MFL |\
+			IA64_PSR_MFH | IA64_PSR_PK | IA64_PSR_DT |    	\
+			IA64_PSR_RT | IA64_PSR_MC|IA64_PSR_IT)
+
+#define DOMN_PAL_REQUEST    0x110000
+#define DOMN_SAL_REQUEST    0x110001
+
+static u64 vec2off[68] = {0x0, 0x400, 0x800, 0xc00, 0x1000, 0x1400, 0x1800,
+	0x1c00, 0x2000, 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00,
+	0x4000, 0x4400, 0x4800, 0x4c00, 0x5000, 0x5100, 0x5200, 0x5300, 0x5400,
+	0x5500, 0x5600, 0x5700, 0x5800, 0x5900, 0x5a00, 0x5b00, 0x5c00, 0x5d00,
+	0x5e00, 0x5f00, 0x6000, 0x6100, 0x6200, 0x6300, 0x6400, 0x6500, 0x6600,
+	0x6700, 0x6800, 0x6900, 0x6a00, 0x6b00, 0x6c00, 0x6d00, 0x6e00, 0x6f00,
+	0x7000, 0x7100, 0x7200, 0x7300, 0x7400, 0x7500, 0x7600, 0x7700, 0x7800,
+	0x7900, 0x7a00, 0x7b00, 0x7c00, 0x7d00, 0x7e00, 0x7f00
+};
+
+static void collect_interruption(struct kvm_vcpu *vcpu)
+{
+	u64 ipsr;
+	u64 vdcr;
+	u64 vifs;
+	unsigned long vpsr;
+	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+
+	vpsr = vcpu_get_psr(vcpu);
+	vcpu_bsw0(vcpu);
+	if (vpsr & IA64_PSR_IC) {
+
+		/* Sync mpsr id/da/dd/ss/ed bits to vipsr
+		 * since after guest do rfi, we still want these bits on in
+		 * mpsr
+		 */
+
+		ipsr = regs->cr_ipsr;
+		vpsr = vpsr | (ipsr & (IA64_PSR_ID | IA64_PSR_DA
+					| IA64_PSR_DD | IA64_PSR_SS
+					| IA64_PSR_ED));
+		vcpu_set_ipsr(vcpu, vpsr);
+
+		/* Currently, for trap, we do not advance IIP to next
+		 * instruction. That's because we assume caller already
+		 * set up IIP correctly
+		 */
+
+		vcpu_set_iip(vcpu , regs->cr_iip);
+
+		/* set vifs.v to zero */
+		vifs = VCPU(vcpu, ifs);
+		vifs &= ~IA64_IFS_V;
+		vcpu_set_ifs(vcpu, vifs);
+
+		vcpu_set_iipa(vcpu, VMX(vcpu, cr_iipa));
+	}
+
+	vdcr = VCPU(vcpu, dcr);
+
+	/* Set guest psr
+	 * up/mfl/mfh/pk/dt/rt/mc/it keeps unchanged
+	 * be: set to the value of dcr.be
+	 * pp: set to the value of dcr.pp
+	 */
+	vpsr &= INITIAL_PSR_VALUE_AT_INTERRUPTION;
+	vpsr |= (vdcr & IA64_DCR_BE);
+
+	/* VDCR pp bit position is different from VPSR pp bit */
+	if (vdcr & IA64_DCR_PP) {
+		vpsr |= IA64_PSR_PP;
+	} else {
+		vpsr &= ~IA64_PSR_PP;
+	}
+
+	vcpu_set_psr(vcpu, vpsr);
+
+}
+
+void inject_guest_interruption(struct kvm_vcpu *vcpu, u64 vec)
+{
+	u64 viva;
+	struct kvm_pt_regs *regs;
+	union ia64_isr pt_isr;
+
+	regs = vcpu_regs(vcpu);
+
+	/* clear cr.isr.ir (incomplete register frame)*/
+	pt_isr.val = VMX(vcpu, cr_isr);
+	pt_isr.ir = 0;
+	VMX(vcpu, cr_isr) = pt_isr.val;
+
+	collect_interruption(vcpu);
+
+	viva = vcpu_get_iva(vcpu);
+	regs->cr_iip = viva + vec;
+}
+
+static u64 vcpu_get_itir_on_fault(struct kvm_vcpu *vcpu, u64 ifa)
+{
+	union ia64_rr rr, rr1;
+
+	rr.val = vcpu_get_rr(vcpu, ifa);
+	rr1.val = 0;
+	rr1.ps = rr.ps;
+	rr1.rid = rr.rid;
+	return (rr1.val);
+}
+
+/*
+ * Set vIFA & vITIR & vIHA, when vPSR.ic =1
+ * Parameter:
+ *  set_ifa: if true, set vIFA
+ *  set_itir: if true, set vITIR
+ *  set_iha: if true, set vIHA
+ */
+void set_ifa_itir_iha(struct kvm_vcpu *vcpu, u64 vadr,
+		int set_ifa, int set_itir, int set_iha)
+{
+	long vpsr;
+	u64 value;
+
+	vpsr = VCPU(vcpu, vpsr);
+	/* Vol2, Table 8-1 */
+	if (vpsr & IA64_PSR_IC) {
+		if (set_ifa)
+			vcpu_set_ifa(vcpu, vadr);
+		if (set_itir) {
+			value = vcpu_get_itir_on_fault(vcpu, vadr);
+			vcpu_set_itir(vcpu, value);
+		}
+
+		if (set_iha) {
+			value = vcpu_thash(vcpu, vadr);
+			vcpu_set_iha(vcpu, value);
+		}
+	}
+}
+
+/*
+ * Data TLB Fault
+ *  @ Data TLB vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void dtlb_fault(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	/* If vPSR.ic, IFA, ITIR, IHA */
+	set_ifa_itir_iha(vcpu, vadr, 1, 1, 1);
+	inject_guest_interruption(vcpu, IA64_DATA_TLB_VECTOR);
+}
+
+/*
+ * Instruction TLB Fault
+ *  @ Instruction TLB vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void itlb_fault(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	/* If vPSR.ic, IFA, ITIR, IHA */
+	set_ifa_itir_iha(vcpu, vadr, 1, 1, 1);
+	inject_guest_interruption(vcpu, IA64_INST_TLB_VECTOR);
+}
+
+/*
+ * Data Nested TLB Fault
+ *  @ Data Nested TLB Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void nested_dtlb(struct kvm_vcpu *vcpu)
+{
+	inject_guest_interruption(vcpu, IA64_DATA_NESTED_TLB_VECTOR);
+}
+
+/*
+ * Alternate Data TLB Fault
+ *  @ Alternate Data TLB vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void alt_dtlb(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	set_ifa_itir_iha(vcpu, vadr, 1, 1, 0);
+	inject_guest_interruption(vcpu, IA64_ALT_DATA_TLB_VECTOR);
+}
+
+/*
+ * Data TLB Fault
+ *  @ Data TLB vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void alt_itlb(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	set_ifa_itir_iha(vcpu, vadr, 1, 1, 0);
+	inject_guest_interruption(vcpu, IA64_ALT_INST_TLB_VECTOR);
+}
+
+/* Deal with:
+ *  VHPT Translation Vector
+ */
+static void _vhpt_fault(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	/* If vPSR.ic, IFA, ITIR, IHA*/
+	set_ifa_itir_iha(vcpu, vadr, 1, 1, 1);
+	inject_guest_interruption(vcpu, IA64_VHPT_TRANS_VECTOR);
+}
+
+/*
+ * VHPT Instruction Fault
+ *  @ VHPT Translation vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void ivhpt_fault(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	_vhpt_fault(vcpu, vadr);
+}
+
+/*
+ * VHPT Data Fault
+ *  @ VHPT Translation vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void dvhpt_fault(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	_vhpt_fault(vcpu, vadr);
+}
+
+/*
+ * Deal with:
+ *  General Exception vector
+ */
+void _general_exception(struct kvm_vcpu *vcpu)
+{
+	inject_guest_interruption(vcpu, IA64_GENEX_VECTOR);
+}
+
+/*
+ * Illegal Operation Fault
+ *  @ General Exception Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void illegal_op(struct kvm_vcpu *vcpu)
+{
+	_general_exception(vcpu);
+}
+
+/*
+ * Illegal Dependency Fault
+ *  @ General Exception Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void illegal_dep(struct kvm_vcpu *vcpu)
+{
+	_general_exception(vcpu);
+}
+
+/*
+ * Reserved Register/Field Fault
+ *  @ General Exception Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void rsv_reg_field(struct kvm_vcpu *vcpu)
+{
+	_general_exception(vcpu);
+}
+/*
+ * Privileged Operation Fault
+ *  @ General Exception Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+
+void privilege_op(struct kvm_vcpu *vcpu)
+{
+	_general_exception(vcpu);
+}
+
+/*
+ * Unimplement Data Address Fault
+ *  @ General Exception Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void unimpl_daddr(struct kvm_vcpu *vcpu)
+{
+	_general_exception(vcpu);
+}
+
+/*
+ * Privileged Register Fault
+ *  @ General Exception Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void privilege_reg(struct kvm_vcpu *vcpu)
+{
+	_general_exception(vcpu);
+}
+
+/* Deal with
+ *  Nat consumption vector
+ * Parameter:
+ *  vaddr: Optional, if t == REGISTER
+ */
+static void _nat_consumption_fault(struct kvm_vcpu *vcpu, u64 vadr,
+						enum tlb_miss_type t)
+{
+	/* If vPSR.ic && t == DATA/INST, IFA */
+	if (t == DATA || t == INSTRUCTION) {
+		/* IFA */
+		set_ifa_itir_iha(vcpu, vadr, 1, 0, 0);
+	}
+
+	inject_guest_interruption(vcpu, IA64_NAT_CONSUMPTION_VECTOR);
+}
+
+/*
+ * Instruction Nat Page Consumption Fault
+ *  @ Nat Consumption Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void inat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	_nat_consumption_fault(vcpu, vadr, INSTRUCTION);
+}
+
+/*
+ * Register Nat Consumption Fault
+ *  @ Nat Consumption Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void rnat_consumption(struct kvm_vcpu *vcpu)
+{
+	_nat_consumption_fault(vcpu, 0, REGISTER);
+}
+
+/*
+ * Data Nat Page Consumption Fault
+ *  @ Nat Consumption Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void dnat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	_nat_consumption_fault(vcpu, vadr, DATA);
+}
+
+/* Deal with
+ *  Page not present vector
+ */
+static void __page_not_present(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	/* If vPSR.ic, IFA, ITIR */
+	set_ifa_itir_iha(vcpu, vadr, 1, 1, 0);
+	inject_guest_interruption(vcpu, IA64_PAGE_NOT_PRESENT_VECTOR);
+}
+
+void data_page_not_present(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	__page_not_present(vcpu, vadr);
+}
+
+void inst_page_not_present(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	__page_not_present(vcpu, vadr);
+}
+
+/* Deal with
+ *  Data access rights vector
+ */
+void data_access_rights(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	/* If vPSR.ic, IFA, ITIR */
+	set_ifa_itir_iha(vcpu, vadr, 1, 1, 0);
+	inject_guest_interruption(vcpu, IA64_DATA_ACCESS_RIGHTS_VECTOR);
+}
+
+fpswa_ret_t vmm_fp_emulate(int fp_fault, void *bundle, unsigned long *ipsr,
+		unsigned long *fpsr, unsigned long *isr, unsigned long *pr,
+		unsigned long *ifs, struct kvm_pt_regs *regs)
+{
+	fp_state_t fp_state;
+	fpswa_ret_t ret;
+	struct kvm_vcpu *vcpu = current_vcpu;
+
+	uint64_t old_rr7 = ia64_get_rr(7UL<<61);
+
+	if (!vmm_fpswa_interface)
+		return (fpswa_ret_t) {-1, 0, 0, 0};
+
+	memset(&fp_state, 0, sizeof(fp_state_t));
+
+	/*
+	 * compute fp_state.  only FP registers f6 - f11 are used by the
+	 * vmm, so set those bits in the mask and set the low volatile
+	 * pointer to point to these registers.
+	 */
+	fp_state.bitmask_low64 = 0xfc0;  /* bit6..bit11 */
+
+	fp_state.fp_state_low_volatile = (fp_state_low_volatile_t *) &regs->f6;
+
+   /*
+	 * unsigned long (*EFI_FPSWA) (
+	 *      unsigned long    trap_type,
+	 *      void             *Bundle,
+	 *      unsigned long    *pipsr,
+	 *      unsigned long    *pfsr,
+	 *      unsigned long    *pisr,
+	 *      unsigned long    *ppreds,
+	 *      unsigned long    *pifs,
+	 *      void             *fp_state);
+	 */
+	/*Call host fpswa interface directly to virtualize
+	 *guest fpswa request!
+	 */
+	ia64_set_rr(7UL << 61, vcpu->arch.host.rr[7]);
+	ia64_srlz_d();
+
+	ret = (*vmm_fpswa_interface->fpswa) (fp_fault, bundle,
+			ipsr, fpsr, isr, pr, ifs, &fp_state);
+	ia64_set_rr(7UL << 61, old_rr7);
+	ia64_srlz_d();
+	return ret;
+}
+
+/*
+ * Handle floating-point assist faults and traps for domain.
+ */
+unsigned long vmm_handle_fpu_swa(int fp_fault, struct kvm_pt_regs *regs,
+					unsigned long isr)
+{
+	struct kvm_vcpu *v = current_vcpu;
+	IA64_BUNDLE bundle;
+	unsigned long fault_ip;
+	fpswa_ret_t ret;
+
+	fault_ip = regs->cr_iip;
+	/*
+	 * When the FP trap occurs, the trapping instruction is completed.
+	 * If ipsr.ri == 0, there is the trapping instruction in previous
+	 * bundle.
+	 */
+	if (!fp_fault && (ia64_psr(regs)->ri == 0))
+		fault_ip -= 16;
+
+	if (fetch_code(v, fault_ip, &bundle))
+		return -EAGAIN;
+
+	if (!bundle.i64[0] && !bundle.i64[1])
+		return -EACCES;
+
+	ret = vmm_fp_emulate(fp_fault, &bundle, &regs->cr_ipsr, &regs->ar_fpsr,
+			&isr, &regs->pr, &regs->cr_ifs, regs);
+	return ret.status;
+}
+
+void reflect_interruption(u64 ifa, u64 isr, u64 iim,
+		u64 vec, struct kvm_pt_regs *regs)
+{
+	u64 vector;
+	int status ;
+	struct kvm_vcpu *vcpu = current_vcpu;
+	u64 vpsr = VCPU(vcpu, vpsr);
+
+	vector = vec2off[vec];
+
+	if (!(vpsr & IA64_PSR_IC) && (vector != IA64_DATA_NESTED_TLB_VECTOR)) {
+		panic_vm(vcpu, "Interruption with vector :0x%lx occurs "
+						"with psr.ic = 0\n", vector);
+		return;
+	}
+
+	switch (vec) {
+	case 32: 	/*IA64_FP_FAULT_VECTOR*/
+		status = vmm_handle_fpu_swa(1, regs, isr);
+		if (!status) {
+			vcpu_increment_iip(vcpu);
+			return;
+		} else if (-EAGAIN == status)
+			return;
+		break;
+	case 33:	/*IA64_FP_TRAP_VECTOR*/
+		status = vmm_handle_fpu_swa(0, regs, isr);
+		if (!status)
+			return ;
+		break;
+	}
+
+	VCPU(vcpu, isr) = isr;
+	VCPU(vcpu, iipa) = regs->cr_iip;
+	if (vector == IA64_BREAK_VECTOR || vector == IA64_SPECULATION_VECTOR)
+		VCPU(vcpu, iim) = iim;
+	else
+		set_ifa_itir_iha(vcpu, ifa, 1, 1, 1);
+
+	inject_guest_interruption(vcpu, vector);
+}
+
+static unsigned long kvm_trans_pal_call_args(struct kvm_vcpu *vcpu,
+						unsigned long arg)
+{
+	struct thash_data *data;
+	unsigned long gpa, poff;
+
+	if (!is_physical_mode(vcpu)) {
+		/* Depends on caller to provide the DTR or DTC mapping.*/
+		data = vtlb_lookup(vcpu, arg, D_TLB);
+		if (data)
+			gpa = data->page_flags & _PAGE_PPN_MASK;
+		else {
+			data = vhpt_lookup(arg);
+			if (!data)
+				return 0;
+			gpa = data->gpaddr & _PAGE_PPN_MASK;
+		}
+
+		poff = arg & (PSIZE(data->ps) - 1);
+		arg = PAGEALIGN(gpa, data->ps) | poff;
+	}
+	arg = kvm_gpa_to_mpa(arg << 1 >> 1);
+
+	return (unsigned long)__va(arg);
+}
+
+static void set_pal_call_data(struct kvm_vcpu *vcpu)
+{
+	struct exit_ctl_data *p = &vcpu->arch.exit_data;
+	unsigned long gr28 = vcpu_get_gr(vcpu, 28);
+	unsigned long gr29 = vcpu_get_gr(vcpu, 29);
+	unsigned long gr30 = vcpu_get_gr(vcpu, 30);
+
+	/*FIXME:For static and stacked convention, firmware
+	 * has put the parameters in gr28-gr31 before
+	 * break to vmm  !!*/
+
+	switch (gr28) {
+	case PAL_PERF_MON_INFO:
+	case PAL_HALT_INFO:
+		p->u.pal_data.gr29 =  kvm_trans_pal_call_args(vcpu, gr29);
+		p->u.pal_data.gr30 = vcpu_get_gr(vcpu, 30);
+		break;
+	case PAL_BRAND_INFO:
+		p->u.pal_data.gr29 = gr29;
+		p->u.pal_data.gr30 = kvm_trans_pal_call_args(vcpu, gr30);
+		break;
+	default:
+		p->u.pal_data.gr29 = gr29;
+		p->u.pal_data.gr30 = vcpu_get_gr(vcpu, 30);
+	}
+	p->u.pal_data.gr28 = gr28;
+	p->u.pal_data.gr31 = vcpu_get_gr(vcpu, 31);
+
+	p->exit_reason = EXIT_REASON_PAL_CALL;
+}
+
+static void get_pal_call_result(struct kvm_vcpu *vcpu)
+{
+	struct exit_ctl_data *p = &vcpu->arch.exit_data;
+
+	if (p->exit_reason == EXIT_REASON_PAL_CALL) {
+		vcpu_set_gr(vcpu, 8, p->u.pal_data.ret.status, 0);
+		vcpu_set_gr(vcpu, 9, p->u.pal_data.ret.v0, 0);
+		vcpu_set_gr(vcpu, 10, p->u.pal_data.ret.v1, 0);
+		vcpu_set_gr(vcpu, 11, p->u.pal_data.ret.v2, 0);
+	} else
+		panic_vm(vcpu, "Mis-set for exit reason!\n");
+}
+
+static void set_sal_call_data(struct kvm_vcpu *vcpu)
+{
+	struct exit_ctl_data *p = &vcpu->arch.exit_data;
+
+	p->u.sal_data.in0 = vcpu_get_gr(vcpu, 32);
+	p->u.sal_data.in1 = vcpu_get_gr(vcpu, 33);
+	p->u.sal_data.in2 = vcpu_get_gr(vcpu, 34);
+	p->u.sal_data.in3 = vcpu_get_gr(vcpu, 35);
+	p->u.sal_data.in4 = vcpu_get_gr(vcpu, 36);
+	p->u.sal_data.in5 = vcpu_get_gr(vcpu, 37);
+	p->u.sal_data.in6 = vcpu_get_gr(vcpu, 38);
+	p->u.sal_data.in7 = vcpu_get_gr(vcpu, 39);
+	p->exit_reason = EXIT_REASON_SAL_CALL;
+}
+
+static void get_sal_call_result(struct kvm_vcpu *vcpu)
+{
+	struct exit_ctl_data *p = &vcpu->arch.exit_data;
+
+	if (p->exit_reason == EXIT_REASON_SAL_CALL) {
+		vcpu_set_gr(vcpu, 8, p->u.sal_data.ret.r8, 0);
+		vcpu_set_gr(vcpu, 9, p->u.sal_data.ret.r9, 0);
+		vcpu_set_gr(vcpu, 10, p->u.sal_data.ret.r10, 0);
+		vcpu_set_gr(vcpu, 11, p->u.sal_data.ret.r11, 0);
+	} else
+		panic_vm(vcpu, "Mis-set for exit reason!\n");
+}
+
+void  kvm_ia64_handle_break(unsigned long ifa, struct kvm_pt_regs *regs,
+		unsigned long isr, unsigned long iim)
+{
+	struct kvm_vcpu *v = current_vcpu;
+	long psr;
+
+	if (ia64_psr(regs)->cpl == 0) {
+		/* Allow hypercalls only when cpl = 0.  */
+		if (iim == DOMN_PAL_REQUEST) {
+			local_irq_save(psr);
+			set_pal_call_data(v);
+			vmm_transition(v);
+			get_pal_call_result(v);
+			vcpu_increment_iip(v);
+			local_irq_restore(psr);
+			return;
+		} else if (iim == DOMN_SAL_REQUEST) {
+			local_irq_save(psr);
+			set_sal_call_data(v);
+			vmm_transition(v);
+			get_sal_call_result(v);
+			vcpu_increment_iip(v);
+			local_irq_restore(psr);
+			return;
+		}
+	}
+	reflect_interruption(ifa, isr, iim, 11, regs);
+}
+
+void check_pending_irq(struct kvm_vcpu *vcpu)
+{
+	int  mask, h_pending, h_inservice;
+	u64 isr;
+	unsigned long  vpsr;
+	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+
+	h_pending = highest_pending_irq(vcpu);
+	if (h_pending == NULL_VECTOR) {
+		update_vhpi(vcpu, NULL_VECTOR);
+		return;
+	}
+	h_inservice = highest_inservice_irq(vcpu);
+
+	vpsr = VCPU(vcpu, vpsr);
+	mask = irq_masked(vcpu, h_pending, h_inservice);
+	if ((vpsr & IA64_PSR_I) && IRQ_NO_MASKED == mask) {
+		isr = vpsr & IA64_PSR_RI;
+		update_vhpi(vcpu, h_pending);
+		reflect_interruption(0, isr, 0, 12, regs); /* EXT IRQ */
+	} else if (mask == IRQ_MASKED_BY_INSVC) {
+		if (VCPU(vcpu, vhpi))
+			update_vhpi(vcpu, NULL_VECTOR);
+	} else {
+		/* masked by vpsr.i or vtpr.*/
+		update_vhpi(vcpu, h_pending);
+	}
+}
+
+static void generate_exirq(struct kvm_vcpu *vcpu)
+{
+	unsigned  vpsr;
+	uint64_t isr;
+
+	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+
+	vpsr = VCPU(vcpu, vpsr);
+	isr = vpsr & IA64_PSR_RI;
+	if (!(vpsr & IA64_PSR_IC))
+		panic_vm(vcpu, "Trying to inject one IRQ with psr.ic=0\n");
+	reflect_interruption(0, isr, 0, 12, regs); /* EXT IRQ */
+}
+
+void vhpi_detection(struct kvm_vcpu *vcpu)
+{
+	uint64_t    threshold, vhpi;
+	union ia64_tpr       vtpr;
+	struct ia64_psr vpsr;
+
+	vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
+	vtpr.val = VCPU(vcpu, tpr);
+
+	threshold = ((!vpsr.i) << 5) | (vtpr.mmi << 4) | vtpr.mic;
+	vhpi = VCPU(vcpu, vhpi);
+	if (vhpi > threshold) {
+		/* interrupt actived*/
+		generate_exirq(vcpu);
+	}
+}
+
+void leave_hypervisor_tail(void)
+{
+	struct kvm_vcpu *v = current_vcpu;
+
+	if (VMX(v, timer_check)) {
+		VMX(v, timer_check) = 0;
+		if (VMX(v, itc_check)) {
+			if (vcpu_get_itc(v) > VCPU(v, itm)) {
+				if (!(VCPU(v, itv) & (1 << 16))) {
+					vcpu_pend_interrupt(v, VCPU(v, itv)
+							& 0xff);
+					VMX(v, itc_check) = 0;
+				} else {
+					v->arch.timer_pending = 1;
+				}
+				VMX(v, last_itc) = VCPU(v, itm) + 1;
+			}
+		}
+	}
+
+	rmb();
+	if (v->arch.irq_new_pending) {
+		v->arch.irq_new_pending = 0;
+		VMX(v, irq_check) = 0;
+		check_pending_irq(v);
+		return;
+	}
+	if (VMX(v, irq_check)) {
+		VMX(v, irq_check) = 0;
+		vhpi_detection(v);
+	}
+}
+
+static inline void handle_lds(struct kvm_pt_regs *regs)
+{
+	regs->cr_ipsr |= IA64_PSR_ED;
+}
+
+void physical_tlb_miss(struct kvm_vcpu *vcpu, unsigned long vadr, int type)
+{
+	unsigned long pte;
+	union ia64_rr rr;
+
+	rr.val = ia64_get_rr(vadr);
+	pte =  vadr & _PAGE_PPN_MASK;
+	pte = pte | PHY_PAGE_WB;
+	thash_vhpt_insert(vcpu, pte, (u64)(rr.ps << 2), vadr, type);
+	return;
+}
+
+void kvm_page_fault(u64 vadr , u64 vec, struct kvm_pt_regs *regs)
+{
+	unsigned long vpsr;
+	int type;
+
+	u64 vhpt_adr, gppa, pteval, rr, itir;
+	union ia64_isr misr;
+	union ia64_pta vpta;
+	struct thash_data *data;
+	struct kvm_vcpu *v = current_vcpu;
+
+	vpsr = VCPU(v, vpsr);
+	misr.val = VMX(v, cr_isr);
+
+	type = vec;
+
+	if (is_physical_mode(v) && (!(vadr << 1 >> 62))) {
+		if (vec == 2) {
+			if (__gpfn_is_io((vadr << 1) >> (PAGE_SHIFT + 1))) {
+				emulate_io_inst(v, ((vadr << 1) >> 1), 4);
+				return;
+			}
+		}
+		physical_tlb_miss(v, vadr, type);
+		return;
+	}
+	data = vtlb_lookup(v, vadr, type);
+	if (data != 0) {
+		if (type == D_TLB) {
+			gppa = (vadr & ((1UL << data->ps) - 1))
+				+ (data->ppn >> (data->ps - 12) << data->ps);
+			if (__gpfn_is_io(gppa >> PAGE_SHIFT)) {
+				if (data->pl >= ((regs->cr_ipsr >>
+						IA64_PSR_CPL0_BIT) & 3))
+					emulate_io_inst(v, gppa, data->ma);
+				else {
+					vcpu_set_isr(v, misr.val);
+					data_access_rights(v, vadr);
+				}
+				return ;
+			}
+		}
+		thash_vhpt_insert(v, data->page_flags, data->itir, vadr, type);
+
+	} else if (type == D_TLB) {
+		if (misr.sp) {
+			handle_lds(regs);
+			return;
+		}
+
+		rr = vcpu_get_rr(v, vadr);
+		itir = rr & (RR_RID_MASK | RR_PS_MASK);
+
+		if (!vhpt_enabled(v, vadr, misr.rs ? RSE_REF : DATA_REF)) {
+			if (vpsr & IA64_PSR_IC) {
+				vcpu_set_isr(v, misr.val);
+				alt_dtlb(v, vadr);
+			} else {
+				nested_dtlb(v);
+			}
+			return ;
+		}
+
+		vpta.val = vcpu_get_pta(v);
+		/* avoid recursively walking (short format) VHPT */
+
+		vhpt_adr = vcpu_thash(v, vadr);
+		if (!guest_vhpt_lookup(vhpt_adr, &pteval)) {
+			/* VHPT successfully read.  */
+			if (!(pteval & _PAGE_P)) {
+				if (vpsr & IA64_PSR_IC) {
+					vcpu_set_isr(v, misr.val);
+					dtlb_fault(v, vadr);
+				} else {
+					nested_dtlb(v);
+				}
+			} else if ((pteval & _PAGE_MA_MASK) != _PAGE_MA_ST) {
+				thash_purge_and_insert(v, pteval, itir,
+								vadr, D_TLB);
+			} else if (vpsr & IA64_PSR_IC) {
+				vcpu_set_isr(v, misr.val);
+				dtlb_fault(v, vadr);
+			} else {
+				nested_dtlb(v);
+			}
+		} else {
+			/* Can't read VHPT.  */
+			if (vpsr & IA64_PSR_IC) {
+				vcpu_set_isr(v, misr.val);
+				dvhpt_fault(v, vadr);
+			} else {
+				nested_dtlb(v);
+			}
+		}
+	} else if (type == I_TLB) {
+		if (!(vpsr & IA64_PSR_IC))
+			misr.ni = 1;
+		if (!vhpt_enabled(v, vadr, INST_REF)) {
+			vcpu_set_isr(v, misr.val);
+			alt_itlb(v, vadr);
+			return;
+		}
+
+		vpta.val = vcpu_get_pta(v);
+
+		vhpt_adr = vcpu_thash(v, vadr);
+		if (!guest_vhpt_lookup(vhpt_adr, &pteval)) {
+			/* VHPT successfully read.  */
+			if (pteval & _PAGE_P) {
+				if ((pteval & _PAGE_MA_MASK) == _PAGE_MA_ST) {
+					vcpu_set_isr(v, misr.val);
+					itlb_fault(v, vadr);
+					return ;
+				}
+				rr = vcpu_get_rr(v, vadr);
+				itir = rr & (RR_RID_MASK | RR_PS_MASK);
+				thash_purge_and_insert(v, pteval, itir,
+							vadr, I_TLB);
+			} else {
+				vcpu_set_isr(v, misr.val);
+				inst_page_not_present(v, vadr);
+			}
+		} else {
+			vcpu_set_isr(v, misr.val);
+			ivhpt_fault(v, vadr);
+		}
+	}
+}
+
+void kvm_vexirq(struct kvm_vcpu *vcpu)
+{
+	u64 vpsr, isr;
+	struct kvm_pt_regs *regs;
+
+	regs = vcpu_regs(vcpu);
+	vpsr = VCPU(vcpu, vpsr);
+	isr = vpsr & IA64_PSR_RI;
+	reflect_interruption(0, isr, 0, 12, regs); /*EXT IRQ*/
+}
+
+void kvm_ia64_handle_irq(struct kvm_vcpu *v)
+{
+	struct exit_ctl_data *p = &v->arch.exit_data;
+	long psr;
+
+	local_irq_save(psr);
+	p->exit_reason = EXIT_REASON_EXTERNAL_INTERRUPT;
+	vmm_transition(v);
+	local_irq_restore(psr);
+
+	VMX(v, timer_check) = 1;
+
+}
+
+static void ptc_ga_remote_func(struct kvm_vcpu *v, int pos)
+{
+	u64 oldrid, moldrid, oldpsbits, vaddr;
+	struct kvm_ptc_g *p = &v->arch.ptc_g_data[pos];
+	vaddr = p->vaddr;
+
+	oldrid = VMX(v, vrr[0]);
+	VMX(v, vrr[0]) = p->rr;
+	oldpsbits = VMX(v, psbits[0]);
+	VMX(v, psbits[0]) = VMX(v, psbits[REGION_NUMBER(vaddr)]);
+	moldrid = ia64_get_rr(0x0);
+	ia64_set_rr(0x0, vrrtomrr(p->rr));
+	ia64_srlz_d();
+
+	vaddr = PAGEALIGN(vaddr, p->ps);
+	thash_purge_entries_remote(v, vaddr, p->ps);
+
+	VMX(v, vrr[0]) = oldrid;
+	VMX(v, psbits[0]) = oldpsbits;
+	ia64_set_rr(0x0, moldrid);
+	ia64_dv_serialize_data();
+}
+
+static void vcpu_do_resume(struct kvm_vcpu *vcpu)
+{
+	/*Re-init VHPT and VTLB once from resume*/
+	vcpu->arch.vhpt.num = VHPT_NUM_ENTRIES;
+	thash_init(&vcpu->arch.vhpt, VHPT_SHIFT);
+	vcpu->arch.vtlb.num = VTLB_NUM_ENTRIES;
+	thash_init(&vcpu->arch.vtlb, VTLB_SHIFT);
+
+	ia64_set_pta(vcpu->arch.vhpt.pta.val);
+}
+
+static void vmm_sanity_check(struct kvm_vcpu *vcpu)
+{
+	struct exit_ctl_data *p = &vcpu->arch.exit_data;
+
+	if (!vmm_sanity && p->exit_reason != EXIT_REASON_DEBUG) {
+		panic_vm(vcpu, "Failed to do vmm sanity check,"
+			"it maybe caused by crashed vmm!!\n\n");
+	}
+}
+
+static void kvm_do_resume_op(struct kvm_vcpu *vcpu)
+{
+	vmm_sanity_check(vcpu); /*Guarantee vcpu running on healthy vmm!*/
+
+	if (test_and_clear_bit(KVM_REQ_RESUME, &vcpu->requests)) {
+		vcpu_do_resume(vcpu);
+		return;
+	}
+
+	if (unlikely(test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))) {
+		thash_purge_all(vcpu);
+		return;
+	}
+
+	if (test_and_clear_bit(KVM_REQ_PTC_G, &vcpu->requests)) {
+		while (vcpu->arch.ptc_g_count > 0)
+			ptc_ga_remote_func(vcpu, --vcpu->arch.ptc_g_count);
+	}
+}
+
+void vmm_transition(struct kvm_vcpu *vcpu)
+{
+	ia64_call_vsa(PAL_VPS_SAVE, (unsigned long)vcpu->arch.vpd,
+			1, 0, 0, 0, 0, 0);
+	vmm_trampoline(&vcpu->arch.guest, &vcpu->arch.host);
+	ia64_call_vsa(PAL_VPS_RESTORE, (unsigned long)vcpu->arch.vpd,
+						1, 0, 0, 0, 0, 0);
+	kvm_do_resume_op(vcpu);
+}
+
+void vmm_panic_handler(u64 vec)
+{
+	struct kvm_vcpu *vcpu = current_vcpu;
+	vmm_sanity = 0;
+	panic_vm(vcpu, "Unexpected interruption occurs in VMM, vector:0x%lx\n",
+			vec2off[vec]);
+}
diff --git a/arch/ia64/kvm/trampoline.S b/arch/ia64/kvm/trampoline.S
new file mode 100644
index 00000000000..30897d44d61
--- /dev/null
+++ b/arch/ia64/kvm/trampoline.S
@@ -0,0 +1,1038 @@
+/* Save all processor states
+ *
+ * Copyright (c) 2007 Fleming Feng <fleming.feng@intel.com>
+ * Copyright (c) 2007 Anthony Xu   <anthony.xu@intel.com>
+ */
+
+#include <asm/asmmacro.h>
+#include "asm-offsets.h"
+
+
+#define CTX(name)    VMM_CTX_##name##_OFFSET
+
+	/*
+	 *	r32:		context_t base address
+	 */
+#define	SAVE_BRANCH_REGS			\
+	add	r2 = CTX(B0),r32;		\
+	add	r3 = CTX(B1),r32;		\
+	mov	r16 = b0;			\
+	mov	r17 = b1;			\
+	;;					\
+	st8	[r2]=r16,16;			\
+	st8	[r3]=r17,16;			\
+	;;					\
+	mov	r16 = b2;			\
+	mov	r17 = b3;			\
+	;;					\
+	st8	[r2]=r16,16;			\
+	st8	[r3]=r17,16;			\
+	;;					\
+	mov	r16 = b4;			\
+	mov	r17 = b5;			\
+	;;					\
+	st8	[r2]=r16;   			\
+	st8	[r3]=r17;   			\
+	;;
+
+	/*
+	 *	r33:		context_t base address
+	 */
+#define	RESTORE_BRANCH_REGS			\
+	add	r2 = CTX(B0),r33;		\
+	add	r3 = CTX(B1),r33;		\
+	;;					\
+	ld8	r16=[r2],16;			\
+	ld8	r17=[r3],16;			\
+	;;					\
+	mov	b0 = r16;			\
+	mov	b1 = r17;			\
+	;;					\
+	ld8	r16=[r2],16;			\
+	ld8	r17=[r3],16;			\
+	;;					\
+	mov	b2 = r16;			\
+	mov	b3 = r17;			\
+	;;					\
+	ld8	r16=[r2];   			\
+	ld8	r17=[r3];   			\
+	;;					\
+	mov	b4=r16;				\
+	mov	b5=r17;				\
+	;;
+
+
+	/*
+	 *	r32: context_t base address
+	 *	bsw == 1
+	 *	Save all bank1 general registers, r4 ~ r7
+	 */
+#define	SAVE_GENERAL_REGS			\
+	add	r2=CTX(R4),r32;			\
+	add	r3=CTX(R5),r32;			\
+	;;					\
+.mem.offset 0,0;        			\
+	st8.spill	[r2]=r4,16;		\
+.mem.offset 8,0;        			\
+	st8.spill	[r3]=r5,16;		\
+	;;					\
+.mem.offset 0,0;        			\
+	st8.spill	[r2]=r6,48;		\
+.mem.offset 8,0;        			\
+	st8.spill	[r3]=r7,48;		\
+	;;                          		\
+.mem.offset 0,0;        			\
+    st8.spill    [r2]=r12;			\
+.mem.offset 8,0;				\
+    st8.spill    [r3]=r13;			\
+    ;;
+
+	/*
+	 *	r33: context_t base address
+	 *	bsw == 1
+	 */
+#define	RESTORE_GENERAL_REGS			\
+	add	r2=CTX(R4),r33;			\
+	add	r3=CTX(R5),r33;			\
+	;;					\
+	ld8.fill	r4=[r2],16;		\
+	ld8.fill	r5=[r3],16;		\
+	;;					\
+	ld8.fill	r6=[r2],48;		\
+	ld8.fill	r7=[r3],48;		\
+	;;					\
+	ld8.fill    r12=[r2];			\
+	ld8.fill    r13 =[r3];			\
+	;;
+
+
+
+
+	/*
+	 *	r32:		context_t base address
+	 */
+#define	SAVE_KERNEL_REGS			\
+	add	r2 = CTX(KR0),r32;		\
+	add	r3 = CTX(KR1),r32;		\
+	mov	r16 = ar.k0;			\
+	mov	r17 = ar.k1;			\
+	;;		        		\
+	st8	[r2] = r16,16;			\
+	st8	[r3] = r17,16;			\
+	;;		        		\
+	mov	r16 = ar.k2;			\
+	mov	r17 = ar.k3;			\
+	;;		        		\
+	st8	[r2] = r16,16;			\
+	st8	[r3] = r17,16;			\
+	;;					\
+	mov	r16 = ar.k4;			\
+	mov	r17 = ar.k5;			\
+	;;				    	\
+	st8	[r2] = r16,16;			\
+	st8	[r3] = r17,16;			\
+	;;					\
+	mov	r16 = ar.k6;			\
+	mov	r17 = ar.k7;			\
+	;;		    			\
+	st8	[r2] = r16;     		\
+	st8	[r3] = r17;			\
+	;;
+
+
+
+	/*
+	 *	r33:		context_t base address
+	 */
+#define	RESTORE_KERNEL_REGS			\
+	add	r2 = CTX(KR0),r33;		\
+	add	r3 = CTX(KR1),r33;		\
+	;;		    			\
+	ld8	r16=[r2],16;     		\
+	ld8	r17=[r3],16;			\
+	;;					\
+	mov	ar.k0=r16;  			\
+	mov	ar.k1=r17;	    		\
+	;;		        		\
+	ld8	r16=[r2],16;			\
+	ld8	r17=[r3],16;			\
+	;;		        		\
+	mov	ar.k2=r16;   			\
+	mov	ar.k3=r17;	    		\
+	;;		        		\
+	ld8	r16=[r2],16;			\
+	ld8	r17=[r3],16;			\
+	;;					\
+	mov	ar.k4=r16;			\
+	mov	ar.k5=r17;	    		\
+	;;				    	\
+	ld8	r16=[r2],16;			\
+	ld8	r17=[r3],16;			\
+	;;					\
+	mov	ar.k6=r16;  			\
+	mov	ar.k7=r17;	    		\
+	;;
+
+
+
+	/*
+	 *	r32:		context_t base address
+	 */
+#define	SAVE_APP_REGS				\
+	add  r2 = CTX(BSPSTORE),r32;		\
+	mov  r16 = ar.bspstore;			\
+	;;					\
+	st8  [r2] = r16,CTX(RNAT)-CTX(BSPSTORE);\
+	mov  r16 = ar.rnat;			\
+	;;					\
+	st8  [r2] = r16,CTX(FCR)-CTX(RNAT);	\
+	mov  r16 = ar.fcr;			\
+	;;					\
+	st8  [r2] = r16,CTX(EFLAG)-CTX(FCR);	\
+	mov  r16 = ar.eflag;			\
+	;;					\
+	st8  [r2] = r16,CTX(CFLG)-CTX(EFLAG);	\
+	mov  r16 = ar.cflg;			\
+	;;					\
+	st8  [r2] = r16,CTX(FSR)-CTX(CFLG);	\
+	mov  r16 = ar.fsr;			\
+	;;					\
+	st8  [r2] = r16,CTX(FIR)-CTX(FSR);	\
+	mov  r16 = ar.fir;			\
+	;;					\
+	st8  [r2] = r16,CTX(FDR)-CTX(FIR);	\
+	mov  r16 = ar.fdr;			\
+	;;					\
+	st8  [r2] = r16,CTX(UNAT)-CTX(FDR);	\
+	mov  r16 = ar.unat;			\
+	;;					\
+	st8  [r2] = r16,CTX(FPSR)-CTX(UNAT);	\
+	mov  r16 = ar.fpsr;			\
+	;;					\
+	st8  [r2] = r16,CTX(PFS)-CTX(FPSR);	\
+	mov  r16 = ar.pfs;			\
+	;;					\
+	st8  [r2] = r16,CTX(LC)-CTX(PFS);	\
+	mov  r16 = ar.lc;			\
+	;;					\
+	st8  [r2] = r16;			\
+	;;
+
+	/*
+	 *	r33:		context_t base address
+	 */
+#define	RESTORE_APP_REGS			\
+	add  r2=CTX(BSPSTORE),r33;		\
+	;;					\
+	ld8  r16=[r2],CTX(RNAT)-CTX(BSPSTORE);	\
+	;;					\
+	mov  ar.bspstore=r16;			\
+	ld8  r16=[r2],CTX(FCR)-CTX(RNAT);	\
+	;;					\
+	mov  ar.rnat=r16;			\
+	ld8  r16=[r2],CTX(EFLAG)-CTX(FCR);	\
+	;;					\
+	mov  ar.fcr=r16;			\
+	ld8  r16=[r2],CTX(CFLG)-CTX(EFLAG);	\
+	;;					\
+	mov  ar.eflag=r16;			\
+	ld8  r16=[r2],CTX(FSR)-CTX(CFLG);	\
+	;;					\
+	mov  ar.cflg=r16;			\
+	ld8  r16=[r2],CTX(FIR)-CTX(FSR);	\
+	;;					\
+	mov  ar.fsr=r16;			\
+	ld8  r16=[r2],CTX(FDR)-CTX(FIR);	\
+	;;					\
+	mov  ar.fir=r16;			\
+	ld8  r16=[r2],CTX(UNAT)-CTX(FDR);	\
+	;;					\
+	mov  ar.fdr=r16;			\
+	ld8  r16=[r2],CTX(FPSR)-CTX(UNAT);	\
+	;;					\
+	mov  ar.unat=r16;			\
+	ld8  r16=[r2],CTX(PFS)-CTX(FPSR);	\
+	;;					\
+	mov  ar.fpsr=r16;			\
+	ld8  r16=[r2],CTX(LC)-CTX(PFS);		\
+	;;					\
+	mov  ar.pfs=r16;			\
+	ld8  r16=[r2];				\
+	;;					\
+	mov  ar.lc=r16;				\
+	;;
+
+	/*
+	 *	r32:		context_t base address
+	 */
+#define	SAVE_CTL_REGS				\
+	add	r2 = CTX(DCR),r32;		\
+	mov	r16 = cr.dcr;			\
+	;;					\
+	st8	[r2] = r16,CTX(IVA)-CTX(DCR);	\
+	;;                          		\
+	mov	r16 = cr.iva;			\
+	;;					\
+	st8	[r2] = r16,CTX(PTA)-CTX(IVA);	\
+	;;					\
+	mov r16 = cr.pta;			\
+	;;					\
+	st8 [r2] = r16 ;			\
+	;;
+
+	/*
+	 *	r33:		context_t base address
+	 */
+#define	RESTORE_CTL_REGS				\
+	add	r2 = CTX(DCR),r33;	        	\
+	;;						\
+	ld8	r16 = [r2],CTX(IVA)-CTX(DCR);		\
+	;;                      			\
+	mov	cr.dcr = r16;				\
+	dv_serialize_data;				\
+	;;						\
+	ld8	r16 = [r2],CTX(PTA)-CTX(IVA);		\
+	;;						\
+	mov	cr.iva = r16;				\
+	dv_serialize_data;				\
+	;;						\
+	ld8 r16 = [r2];					\
+	;;						\
+	mov cr.pta = r16;				\
+	dv_serialize_data;				\
+	;;
+
+
+	/*
+	 *	r32:		context_t base address
+	 */
+#define	SAVE_REGION_REGS			\
+	add	r2=CTX(RR0),r32;		\
+	mov	r16=rr[r0];			\
+	dep.z	r18=1,61,3;			\
+	;;					\
+	st8	[r2]=r16,8;			\
+	mov	r17=rr[r18];			\
+	dep.z	r18=2,61,3;			\
+	;;					\
+	st8	[r2]=r17,8;			\
+	mov	r16=rr[r18];			\
+	dep.z	r18=3,61,3;			\
+	;;					\
+	st8	[r2]=r16,8;			\
+	mov	r17=rr[r18];			\
+	dep.z	r18=4,61,3;			\
+	;;					\
+	st8	[r2]=r17,8;			\
+	mov	r16=rr[r18];			\
+	dep.z	r18=5,61,3;			\
+	;;					\
+	st8	[r2]=r16,8;			\
+	mov	r17=rr[r18];			\
+	dep.z	r18=7,61,3;			\
+	;;					\
+	st8	[r2]=r17,16;			\
+	mov	r16=rr[r18];			\
+	;;					\
+	st8	[r2]=r16,8;			\
+	;;
+
+	/*
+	 *	r33:context_t base address
+	 */
+#define	RESTORE_REGION_REGS	\
+	add	r2=CTX(RR0),r33;\
+	mov r18=r0;		\
+	;;			\
+	ld8	r20=[r2],8;	\
+	;;	/* rr0 */	\
+	ld8	r21=[r2],8;	\
+	;;	/* rr1 */	\
+	ld8	r22=[r2],8;	\
+	;;	/* rr2 */	\
+	ld8	r23=[r2],8;	\
+	;;	/* rr3 */	\
+	ld8	r24=[r2],8;	\
+	;;	/* rr4 */	\
+	ld8	r25=[r2],16;	\
+	;;	/* rr5 */	\
+	ld8	r27=[r2];	\
+	;;	/* rr7 */	\
+	mov rr[r18]=r20;	\
+	dep.z	r18=1,61,3;	\
+	;;  /* rr1 */		\
+	mov rr[r18]=r21;	\
+	dep.z	r18=2,61,3;	\
+	;;  /* rr2 */		\
+	mov rr[r18]=r22;	\
+	dep.z	r18=3,61,3;	\
+	;;  /* rr3 */		\
+	mov rr[r18]=r23;	\
+	dep.z	r18=4,61,3;	\
+	;;  /* rr4 */		\
+	mov rr[r18]=r24;	\
+	dep.z	r18=5,61,3;	\
+	;;  /* rr5 */		\
+	mov rr[r18]=r25;	\
+	dep.z	r18=7,61,3;	\
+	;;  /* rr7 */		\
+	mov rr[r18]=r27;	\
+	;;			\
+	srlz.i;			\
+	;;
+
+
+
+	/*
+	 *	r32:	context_t base address
+	 *	r36~r39:scratch registers
+	 */
+#define	SAVE_DEBUG_REGS				\
+	add	r2=CTX(IBR0),r32;		\
+	add	r3=CTX(DBR0),r32;		\
+	mov	r16=ibr[r0];			\
+	mov	r17=dbr[r0];			\
+	;;					\
+	st8	[r2]=r16,8; 			\
+	st8	[r3]=r17,8;	    		\
+	add	r18=1,r0;		    	\
+	;;					\
+	mov	r16=ibr[r18];			\
+	mov	r17=dbr[r18];			\
+	;;					\
+	st8	[r2]=r16,8;		    	\
+	st8	[r3]=r17,8;			\
+	add	r18=2,r0;			\
+	;;					\
+	mov	r16=ibr[r18];			\
+	mov	r17=dbr[r18];			\
+	;;					\
+	st8	[r2]=r16,8;		    	\
+	st8	[r3]=r17,8;			\
+	add	r18=2,r0;			\
+	;;					\
+	mov	r16=ibr[r18];			\
+	mov	r17=dbr[r18];			\
+	;;					\
+	st8	[r2]=r16,8;		    	\
+	st8	[r3]=r17,8;			\
+	add	r18=3,r0;			\
+	;;					\
+	mov	r16=ibr[r18];			\
+	mov	r17=dbr[r18];			\
+	;;					\
+	st8	[r2]=r16,8;		    	\
+	st8	[r3]=r17,8;			\
+	add	r18=4,r0;			\
+	;;					\
+	mov	r16=ibr[r18];			\
+	mov	r17=dbr[r18];			\
+	;;					\
+	st8	[r2]=r16,8;		    	\
+	st8	[r3]=r17,8;			\
+	add	r18=5,r0;			\
+	;;					\
+	mov	r16=ibr[r18];			\
+	mov	r17=dbr[r18];			\
+	;;					\
+	st8	[r2]=r16,8;		    	\
+	st8	[r3]=r17,8;			\
+	add	r18=6,r0;			\
+	;;					\
+	mov	r16=ibr[r18];			\
+	mov	r17=dbr[r18];			\
+	;;					\
+	st8	[r2]=r16,8;		    	\
+	st8	[r3]=r17,8;			\
+	add	r18=7,r0;			\
+	;;					\
+	mov	r16=ibr[r18];			\
+	mov	r17=dbr[r18];			\
+	;;					\
+	st8	[r2]=r16,8;		    	\
+	st8	[r3]=r17,8;			\
+	;;
+
+
+/*
+ *      r33:    point to context_t structure
+ *      ar.lc are corrupted.
+ */
+#define RESTORE_DEBUG_REGS			\
+	add	r2=CTX(IBR0),r33;		\
+	add	r3=CTX(DBR0),r33;		\
+	mov r16=7;    				\
+	mov r17=r0;				\
+	;;                    			\
+	mov ar.lc = r16;			\
+	;; 					\
+1:						\
+	ld8 r18=[r2],8;		    		\
+	ld8 r19=[r3],8;				\
+	;;					\
+	mov ibr[r17]=r18;			\
+	mov dbr[r17]=r19;			\
+	;;   					\
+	srlz.i;					\
+	;; 					\
+	add r17=1,r17;				\
+	br.cloop.sptk 1b;			\
+	;;
+
+
+	/*
+	 *	r32:		context_t base address
+	 */
+#define	SAVE_FPU_LOW				\
+	add	r2=CTX(F2),r32;			\
+	add	r3=CTX(F3),r32;			\
+	;;					\
+	stf.spill.nta	[r2]=f2,32;		\
+	stf.spill.nta	[r3]=f3,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f4,32;		\
+	stf.spill.nta	[r3]=f5,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f6,32;		\
+	stf.spill.nta	[r3]=f7,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f8,32;		\
+	stf.spill.nta	[r3]=f9,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f10,32;		\
+	stf.spill.nta	[r3]=f11,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f12,32;		\
+	stf.spill.nta	[r3]=f13,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f14,32;		\
+	stf.spill.nta	[r3]=f15,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f16,32;		\
+	stf.spill.nta	[r3]=f17,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f18,32;		\
+	stf.spill.nta	[r3]=f19,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f20,32;		\
+	stf.spill.nta	[r3]=f21,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f22,32;		\
+	stf.spill.nta	[r3]=f23,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f24,32;		\
+	stf.spill.nta	[r3]=f25,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f26,32;		\
+	stf.spill.nta	[r3]=f27,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f28,32;		\
+	stf.spill.nta	[r3]=f29,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f30;		\
+	stf.spill.nta	[r3]=f31;		\
+	;;
+
+	/*
+	 *	r32:		context_t base address
+	 */
+#define	SAVE_FPU_HIGH				\
+	add	r2=CTX(F32),r32;		\
+	add	r3=CTX(F33),r32;		\
+	;;					\
+	stf.spill.nta	[r2]=f32,32;		\
+	stf.spill.nta	[r3]=f33,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f34,32;		\
+	stf.spill.nta	[r3]=f35,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f36,32;		\
+	stf.spill.nta	[r3]=f37,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f38,32;		\
+	stf.spill.nta	[r3]=f39,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f40,32;		\
+	stf.spill.nta	[r3]=f41,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f42,32;		\
+	stf.spill.nta	[r3]=f43,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f44,32;		\
+	stf.spill.nta	[r3]=f45,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f46,32;		\
+	stf.spill.nta	[r3]=f47,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f48,32;		\
+	stf.spill.nta	[r3]=f49,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f50,32;		\
+	stf.spill.nta	[r3]=f51,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f52,32;		\
+	stf.spill.nta	[r3]=f53,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f54,32;		\
+	stf.spill.nta	[r3]=f55,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f56,32;		\
+	stf.spill.nta	[r3]=f57,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f58,32;		\
+	stf.spill.nta	[r3]=f59,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f60,32;		\
+	stf.spill.nta	[r3]=f61,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f62,32;		\
+	stf.spill.nta	[r3]=f63,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f64,32;		\
+	stf.spill.nta	[r3]=f65,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f66,32;		\
+	stf.spill.nta	[r3]=f67,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f68,32;		\
+	stf.spill.nta	[r3]=f69,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f70,32;		\
+	stf.spill.nta	[r3]=f71,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f72,32;		\
+	stf.spill.nta	[r3]=f73,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f74,32;		\
+	stf.spill.nta	[r3]=f75,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f76,32;		\
+	stf.spill.nta	[r3]=f77,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f78,32;		\
+	stf.spill.nta	[r3]=f79,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f80,32;		\
+	stf.spill.nta	[r3]=f81,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f82,32;		\
+	stf.spill.nta	[r3]=f83,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f84,32;		\
+	stf.spill.nta	[r3]=f85,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f86,32;		\
+	stf.spill.nta	[r3]=f87,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f88,32;		\
+	stf.spill.nta	[r3]=f89,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f90,32;		\
+	stf.spill.nta	[r3]=f91,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f92,32;		\
+	stf.spill.nta	[r3]=f93,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f94,32;		\
+	stf.spill.nta	[r3]=f95,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f96,32;		\
+	stf.spill.nta	[r3]=f97,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f98,32;		\
+	stf.spill.nta	[r3]=f99,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f100,32;		\
+	stf.spill.nta	[r3]=f101,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f102,32;		\
+	stf.spill.nta	[r3]=f103,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f104,32;		\
+	stf.spill.nta	[r3]=f105,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f106,32;		\
+	stf.spill.nta	[r3]=f107,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f108,32;		\
+	stf.spill.nta	[r3]=f109,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f110,32;		\
+	stf.spill.nta	[r3]=f111,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f112,32;		\
+	stf.spill.nta	[r3]=f113,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f114,32;		\
+	stf.spill.nta	[r3]=f115,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f116,32;		\
+	stf.spill.nta	[r3]=f117,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f118,32;		\
+	stf.spill.nta	[r3]=f119,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f120,32;		\
+	stf.spill.nta	[r3]=f121,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f122,32;		\
+	stf.spill.nta	[r3]=f123,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f124,32;		\
+	stf.spill.nta	[r3]=f125,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f126;		\
+	stf.spill.nta	[r3]=f127;		\
+	;;
+
+     /*
+      *      r33:    point to context_t structure
+      */
+#define	RESTORE_FPU_LOW				\
+    add     r2 = CTX(F2), r33;			\
+    add     r3 = CTX(F3), r33;			\
+    ;;						\
+    ldf.fill.nta f2 = [r2], 32;			\
+    ldf.fill.nta f3 = [r3], 32;			\
+    ;;						\
+    ldf.fill.nta f4 = [r2], 32;			\
+    ldf.fill.nta f5 = [r3], 32;			\
+    ;;						\
+    ldf.fill.nta f6 = [r2], 32;			\
+    ldf.fill.nta f7 = [r3], 32;			\
+    ;;						\
+    ldf.fill.nta f8 = [r2], 32;			\
+    ldf.fill.nta f9 = [r3], 32;			\
+    ;;						\
+    ldf.fill.nta f10 = [r2], 32;		\
+    ldf.fill.nta f11 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f12 = [r2], 32;		\
+    ldf.fill.nta f13 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f14 = [r2], 32;		\
+    ldf.fill.nta f15 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f16 = [r2], 32;		\
+    ldf.fill.nta f17 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f18 = [r2], 32;		\
+    ldf.fill.nta f19 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f20 = [r2], 32;		\
+    ldf.fill.nta f21 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f22 = [r2], 32;		\
+    ldf.fill.nta f23 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f24 = [r2], 32;		\
+    ldf.fill.nta f25 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f26 = [r2], 32;		\
+    ldf.fill.nta f27 = [r3], 32;		\
+	;;					\
+    ldf.fill.nta f28 = [r2], 32;		\
+    ldf.fill.nta f29 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f30 = [r2], 32;		\
+    ldf.fill.nta f31 = [r3], 32;		\
+    ;;
+
+
+
+    /*
+     *      r33:    point to context_t structure
+     */
+#define	RESTORE_FPU_HIGH			\
+    add     r2 = CTX(F32), r33;			\
+    add     r3 = CTX(F33), r33;			\
+    ;;						\
+    ldf.fill.nta f32 = [r2], 32;		\
+    ldf.fill.nta f33 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f34 = [r2], 32;		\
+    ldf.fill.nta f35 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f36 = [r2], 32;		\
+    ldf.fill.nta f37 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f38 = [r2], 32;		\
+    ldf.fill.nta f39 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f40 = [r2], 32;		\
+    ldf.fill.nta f41 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f42 = [r2], 32;		\
+    ldf.fill.nta f43 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f44 = [r2], 32;		\
+    ldf.fill.nta f45 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f46 = [r2], 32;		\
+    ldf.fill.nta f47 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f48 = [r2], 32;		\
+    ldf.fill.nta f49 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f50 = [r2], 32;		\
+    ldf.fill.nta f51 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f52 = [r2], 32;		\
+    ldf.fill.nta f53 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f54 = [r2], 32;		\
+    ldf.fill.nta f55 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f56 = [r2], 32;		\
+    ldf.fill.nta f57 = [r3], 32;   		\
+    ;;						\
+    ldf.fill.nta f58 = [r2], 32;		\
+    ldf.fill.nta f59 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f60 = [r2], 32;		\
+    ldf.fill.nta f61 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f62 = [r2], 32;		\
+    ldf.fill.nta f63 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f64 = [r2], 32;		\
+    ldf.fill.nta f65 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f66 = [r2], 32;		\
+    ldf.fill.nta f67 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f68 = [r2], 32;		\
+    ldf.fill.nta f69 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f70 = [r2], 32;		\
+    ldf.fill.nta f71 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f72 = [r2], 32;		\
+    ldf.fill.nta f73 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f74 = [r2], 32;		\
+    ldf.fill.nta f75 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f76 = [r2], 32;		\
+    ldf.fill.nta f77 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f78 = [r2], 32;		\
+    ldf.fill.nta f79 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f80 = [r2], 32;		\
+    ldf.fill.nta f81 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f82 = [r2], 32;		\
+    ldf.fill.nta f83 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f84 = [r2], 32;		\
+    ldf.fill.nta f85 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f86 = [r2], 32;		\
+    ldf.fill.nta f87 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f88 = [r2], 32;		\
+    ldf.fill.nta f89 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f90 = [r2], 32;		\
+    ldf.fill.nta f91 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f92 = [r2], 32;		\
+    ldf.fill.nta f93 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f94 = [r2], 32;		\
+    ldf.fill.nta f95 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f96 = [r2], 32;		\
+    ldf.fill.nta f97 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f98 = [r2], 32;		\
+    ldf.fill.nta f99 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f100 = [r2], 32;		\
+    ldf.fill.nta f101 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f102 = [r2], 32;		\
+    ldf.fill.nta f103 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f104 = [r2], 32;		\
+    ldf.fill.nta f105 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f106 = [r2], 32;		\
+    ldf.fill.nta f107 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f108 = [r2], 32;		\
+    ldf.fill.nta f109 = [r3], 32;   		\
+    ;;						\
+    ldf.fill.nta f110 = [r2], 32;		\
+    ldf.fill.nta f111 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f112 = [r2], 32;		\
+    ldf.fill.nta f113 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f114 = [r2], 32;		\
+    ldf.fill.nta f115 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f116 = [r2], 32;		\
+    ldf.fill.nta f117 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f118 = [r2], 32;		\
+    ldf.fill.nta f119 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f120 = [r2], 32;		\
+    ldf.fill.nta f121 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f122 = [r2], 32;		\
+    ldf.fill.nta f123 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f124 = [r2], 32;		\
+    ldf.fill.nta f125 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f126 = [r2], 32;		\
+    ldf.fill.nta f127 = [r3], 32;		\
+    ;;
+
+	/*
+	 *	r32:		context_t base address
+	 */
+#define	SAVE_PTK_REGS				\
+    add r2=CTX(PKR0), r32;			\
+    mov r16=7;    				\
+    ;;                         			\
+    mov ar.lc=r16;  				\
+    mov r17=r0;					\
+    ;;						\
+1:						\
+    mov r18=pkr[r17];				\
+    ;;                     			\
+    srlz.i;					\
+    ;; 						\
+    st8 [r2]=r18, 8;				\
+    ;;    					\
+    add r17 =1,r17;				\
+    ;;                     			\
+    br.cloop.sptk 1b;				\
+    ;;
+
+/*
+ *      r33:    point to context_t structure
+ *      ar.lc are corrupted.
+ */
+#define RESTORE_PTK_REGS	    		\
+    add r2=CTX(PKR0), r33;			\
+    mov r16=7;    				\
+    ;;                         			\
+    mov ar.lc=r16;  				\
+    mov r17=r0;					\
+    ;;						\
+1: 						\
+    ld8 r18=[r2], 8;				\
+    ;;						\
+    mov pkr[r17]=r18;				\
+    ;;    					\
+    srlz.i;					\
+    ;; 						\
+    add r17 =1,r17;				\
+    ;;                     			\
+    br.cloop.sptk 1b;				\
+    ;;
+
+
+/*
+ * void vmm_trampoline( context_t * from,
+ *			context_t * to)
+ *
+ * 	from:	r32
+ *	to:	r33
+ *  note: interrupt disabled before call this function.
+ */
+GLOBAL_ENTRY(vmm_trampoline)
+    mov r16 = psr
+    adds r2 = CTX(PSR), r32
+    ;;
+    st8 [r2] = r16, 8       // psr
+    mov r17 = pr
+    ;;
+    st8 [r2] = r17, 8       // pr
+    mov r18 = ar.unat
+    ;;
+    st8 [r2] = r18
+    mov r17 = ar.rsc
+    ;;
+    adds r2 = CTX(RSC),r32
+    ;;
+    st8 [r2]= r17
+    mov ar.rsc =0
+    flushrs
+    ;;
+    SAVE_GENERAL_REGS
+    ;;
+    SAVE_KERNEL_REGS
+    ;;
+    SAVE_APP_REGS
+    ;;
+    SAVE_BRANCH_REGS
+    ;;
+    SAVE_CTL_REGS
+    ;;
+    SAVE_REGION_REGS
+    ;;
+    //SAVE_DEBUG_REGS
+    ;;
+    rsm  psr.dfl
+    ;;
+    srlz.d
+    ;;
+    SAVE_FPU_LOW
+    ;;
+    rsm  psr.dfh
+    ;;
+    srlz.d
+    ;;
+    SAVE_FPU_HIGH
+    ;;
+    SAVE_PTK_REGS
+    ;;
+    RESTORE_PTK_REGS
+    ;;
+    RESTORE_FPU_HIGH
+    ;;
+    RESTORE_FPU_LOW
+    ;;
+    //RESTORE_DEBUG_REGS
+    ;;
+    RESTORE_REGION_REGS
+    ;;
+    RESTORE_CTL_REGS
+    ;;
+    RESTORE_BRANCH_REGS
+    ;;
+    RESTORE_APP_REGS
+    ;;
+    RESTORE_KERNEL_REGS
+    ;;
+    RESTORE_GENERAL_REGS
+    ;;
+    adds r2=CTX(PSR), r33
+    ;;
+    ld8 r16=[r2], 8       // psr
+    ;;
+    mov psr.l=r16
+    ;;
+    srlz.d
+    ;;
+    ld8 r16=[r2], 8       // pr
+    ;;
+    mov pr =r16,-1
+    ld8 r16=[r2]       // unat
+    ;;
+    mov ar.unat=r16
+    ;;
+    adds r2=CTX(RSC),r33
+    ;;
+    ld8 r16 =[r2]
+    ;;
+    mov ar.rsc = r16
+    ;;
+    br.ret.sptk.few b0
+END(vmm_trampoline)
diff --git a/arch/ia64/kvm/vcpu.c b/arch/ia64/kvm/vcpu.c
new file mode 100644
index 00000000000..958815c9787
--- /dev/null
+++ b/arch/ia64/kvm/vcpu.c
@@ -0,0 +1,2209 @@
+/*
+ * kvm_vcpu.c: handling all virtual cpu related thing.
+ * Copyright (c) 2005, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ *  Shaofan Li (Susue Li) <susie.li@intel.com>
+ *  Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com)
+ *  Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
+ *  Xiantao Zhang <xiantao.zhang@intel.com>
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/types.h>
+
+#include <asm/processor.h>
+#include <asm/ia64regs.h>
+#include <asm/gcc_intrin.h>
+#include <asm/kregs.h>
+#include <asm/pgtable.h>
+#include <asm/tlb.h>
+
+#include "asm-offsets.h"
+#include "vcpu.h"
+
+/*
+ * Special notes:
+ * - Index by it/dt/rt sequence
+ * - Only existing mode transitions are allowed in this table
+ * - RSE is placed at lazy mode when emulating guest partial mode
+ * - If gva happens to be rr0 and rr4, only allowed case is identity
+ *   mapping (gva=gpa), or panic! (How?)
+ */
+int mm_switch_table[8][8] = {
+	/*  2004/09/12(Kevin): Allow switch to self */
+	/*
+	 *  (it,dt,rt): (0,0,0) -> (1,1,1)
+	 *  This kind of transition usually occurs in the very early
+	 *  stage of Linux boot up procedure. Another case is in efi
+	 *  and pal calls. (see "arch/ia64/kernel/head.S")
+	 *
+	 *  (it,dt,rt): (0,0,0) -> (0,1,1)
+	 *  This kind of transition is found when OSYa exits efi boot
+	 *  service. Due to gva = gpa in this case (Same region),
+	 *  data access can be satisfied though itlb entry for physical
+	 *  emulation is hit.
+	 */
+	{SW_SELF, 0,  0,  SW_NOP, 0,  0,  0,  SW_P2V},
+	{0,  0,  0,  0,  0,  0,  0,  0},
+	{0,  0,  0,  0,  0,  0,  0,  0},
+	/*
+	 *  (it,dt,rt): (0,1,1) -> (1,1,1)
+	 *  This kind of transition is found in OSYa.
+	 *
+	 *  (it,dt,rt): (0,1,1) -> (0,0,0)
+	 *  This kind of transition is found in OSYa
+	 */
+	{SW_NOP, 0,  0,  SW_SELF, 0,  0,  0,  SW_P2V},
+	/* (1,0,0)->(1,1,1) */
+	{0,  0,  0,  0,  0,  0,  0,  SW_P2V},
+	/*
+	 *  (it,dt,rt): (1,0,1) -> (1,1,1)
+	 *  This kind of transition usually occurs when Linux returns
+	 *  from the low level TLB miss handlers.
+	 *  (see "arch/ia64/kernel/ivt.S")
+	 */
+	{0,  0,  0,  0,  0,  SW_SELF, 0,  SW_P2V},
+	{0,  0,  0,  0,  0,  0,  0,  0},
+	/*
+	 *  (it,dt,rt): (1,1,1) -> (1,0,1)
+	 *  This kind of transition usually occurs in Linux low level
+	 *  TLB miss handler. (see "arch/ia64/kernel/ivt.S")
+	 *
+	 *  (it,dt,rt): (1,1,1) -> (0,0,0)
+	 *  This kind of transition usually occurs in pal and efi calls,
+	 *  which requires running in physical mode.
+	 *  (see "arch/ia64/kernel/head.S")
+	 *  (1,1,1)->(1,0,0)
+	 */
+
+	{SW_V2P, 0,  0,  0,  SW_V2P, SW_V2P, 0,  SW_SELF},
+};
+
+void physical_mode_init(struct kvm_vcpu  *vcpu)
+{
+	vcpu->arch.mode_flags = GUEST_IN_PHY;
+}
+
+void switch_to_physical_rid(struct kvm_vcpu *vcpu)
+{
+	unsigned long psr;
+
+	/* Save original virtual mode rr[0] and rr[4] */
+	psr = ia64_clear_ic();
+	ia64_set_rr(VRN0<<VRN_SHIFT, vcpu->arch.metaphysical_rr0);
+	ia64_srlz_d();
+	ia64_set_rr(VRN4<<VRN_SHIFT, vcpu->arch.metaphysical_rr4);
+	ia64_srlz_d();
+
+	ia64_set_psr(psr);
+	return;
+}
+
+void switch_to_virtual_rid(struct kvm_vcpu *vcpu)
+{
+	unsigned long psr;
+
+	psr = ia64_clear_ic();
+	ia64_set_rr(VRN0 << VRN_SHIFT, vcpu->arch.metaphysical_saved_rr0);
+	ia64_srlz_d();
+	ia64_set_rr(VRN4 << VRN_SHIFT, vcpu->arch.metaphysical_saved_rr4);
+	ia64_srlz_d();
+	ia64_set_psr(psr);
+	return;
+}
+
+static int mm_switch_action(struct ia64_psr opsr, struct ia64_psr npsr)
+{
+	return mm_switch_table[MODE_IND(opsr)][MODE_IND(npsr)];
+}
+
+void switch_mm_mode(struct kvm_vcpu *vcpu, struct ia64_psr old_psr,
+					struct ia64_psr new_psr)
+{
+	int act;
+	act = mm_switch_action(old_psr, new_psr);
+	switch (act) {
+	case SW_V2P:
+		/*printk("V -> P mode transition: (0x%lx -> 0x%lx)\n",
+		old_psr.val, new_psr.val);*/
+		switch_to_physical_rid(vcpu);
+		/*
+		 * Set rse to enforced lazy, to prevent active rse
+		 *save/restor when guest physical mode.
+		 */
+		vcpu->arch.mode_flags |= GUEST_IN_PHY;
+		break;
+	case SW_P2V:
+		switch_to_virtual_rid(vcpu);
+		/*
+		 * recover old mode which is saved when entering
+		 * guest physical mode
+		 */
+		vcpu->arch.mode_flags &= ~GUEST_IN_PHY;
+		break;
+	case SW_SELF:
+		break;
+	case SW_NOP:
+		break;
+	default:
+		/* Sanity check */
+		break;
+	}
+	return;
+}
+
+/*
+ * In physical mode, insert tc/tr for region 0 and 4 uses
+ * RID[0] and RID[4] which is for physical mode emulation.
+ * However what those inserted tc/tr wants is rid for
+ * virtual mode. So original virtual rid needs to be restored
+ * before insert.
+ *
+ * Operations which required such switch include:
+ *  - insertions (itc.*, itr.*)
+ *  - purges (ptc.* and ptr.*)
+ *  - tpa
+ *  - tak
+ *  - thash?, ttag?
+ * All above needs actual virtual rid for destination entry.
+ */
+
+void check_mm_mode_switch(struct kvm_vcpu *vcpu,  struct ia64_psr old_psr,
+					struct ia64_psr new_psr)
+{
+
+	if ((old_psr.dt != new_psr.dt)
+			|| (old_psr.it != new_psr.it)
+			|| (old_psr.rt != new_psr.rt))
+		switch_mm_mode(vcpu, old_psr, new_psr);
+
+	return;
+}
+
+
+/*
+ * In physical mode, insert tc/tr for region 0 and 4 uses
+ * RID[0] and RID[4] which is for physical mode emulation.
+ * However what those inserted tc/tr wants is rid for
+ * virtual mode. So original virtual rid needs to be restored
+ * before insert.
+ *
+ * Operations which required such switch include:
+ *  - insertions (itc.*, itr.*)
+ *  - purges (ptc.* and ptr.*)
+ *  - tpa
+ *  - tak
+ *  - thash?, ttag?
+ * All above needs actual virtual rid for destination entry.
+ */
+
+void prepare_if_physical_mode(struct kvm_vcpu *vcpu)
+{
+	if (is_physical_mode(vcpu)) {
+		vcpu->arch.mode_flags |= GUEST_PHY_EMUL;
+		switch_to_virtual_rid(vcpu);
+	}
+	return;
+}
+
+/* Recover always follows prepare */
+void recover_if_physical_mode(struct kvm_vcpu *vcpu)
+{
+	if (is_physical_mode(vcpu))
+		switch_to_physical_rid(vcpu);
+	vcpu->arch.mode_flags &= ~GUEST_PHY_EMUL;
+	return;
+}
+
+#define RPT(x)	((u16) &((struct kvm_pt_regs *)0)->x)
+
+static u16 gr_info[32] = {
+	0, 	/* r0 is read-only : WE SHOULD NEVER GET THIS */
+	RPT(r1), RPT(r2), RPT(r3),
+	RPT(r4), RPT(r5), RPT(r6), RPT(r7),
+	RPT(r8), RPT(r9), RPT(r10), RPT(r11),
+	RPT(r12), RPT(r13), RPT(r14), RPT(r15),
+	RPT(r16), RPT(r17), RPT(r18), RPT(r19),
+	RPT(r20), RPT(r21), RPT(r22), RPT(r23),
+	RPT(r24), RPT(r25), RPT(r26), RPT(r27),
+	RPT(r28), RPT(r29), RPT(r30), RPT(r31)
+};
+
+#define IA64_FIRST_STACKED_GR   32
+#define IA64_FIRST_ROTATING_FR  32
+
+static inline unsigned long
+rotate_reg(unsigned long sor, unsigned long rrb, unsigned long reg)
+{
+	reg += rrb;
+	if (reg >= sor)
+		reg -= sor;
+	return reg;
+}
+
+/*
+ * Return the (rotated) index for floating point register
+ * be in the REGNUM (REGNUM must range from 32-127,
+ * result is in the range from 0-95.
+ */
+static inline unsigned long fph_index(struct kvm_pt_regs *regs,
+						long regnum)
+{
+	unsigned long rrb_fr = (regs->cr_ifs >> 25) & 0x7f;
+	return rotate_reg(96, rrb_fr, (regnum - IA64_FIRST_ROTATING_FR));
+}
+
+/*
+ * The inverse of the above: given bspstore and the number of
+ * registers, calculate ar.bsp.
+ */
+static inline unsigned long *kvm_rse_skip_regs(unsigned long *addr,
+							long num_regs)
+{
+	long delta = ia64_rse_slot_num(addr) + num_regs;
+	int i = 0;
+
+	if (num_regs < 0)
+		delta -= 0x3e;
+	if (delta < 0) {
+		while (delta <= -0x3f) {
+			i--;
+			delta += 0x3f;
+		}
+	} else {
+		while (delta >= 0x3f) {
+			i++;
+			delta -= 0x3f;
+		}
+	}
+
+	return addr + num_regs + i;
+}
+
+static void get_rse_reg(struct kvm_pt_regs *regs, unsigned long r1,
+					unsigned long *val, int *nat)
+{
+	unsigned long *bsp, *addr, *rnat_addr, *bspstore;
+	unsigned long *kbs = (void *) current_vcpu + VMM_RBS_OFFSET;
+	unsigned long nat_mask;
+	unsigned long old_rsc, new_rsc;
+	long sof = (regs->cr_ifs) & 0x7f;
+	long sor = (((regs->cr_ifs >> 14) & 0xf) << 3);
+	long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
+	long ridx = r1 - 32;
+
+	if (ridx < sor)
+		ridx = rotate_reg(sor, rrb_gr, ridx);
+
+	old_rsc = ia64_getreg(_IA64_REG_AR_RSC);
+	new_rsc = old_rsc&(~(0x3));
+	ia64_setreg(_IA64_REG_AR_RSC, new_rsc);
+
+	bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE);
+	bsp = kbs + (regs->loadrs >> 19);
+
+	addr = kvm_rse_skip_regs(bsp, -sof + ridx);
+	nat_mask = 1UL << ia64_rse_slot_num(addr);
+	rnat_addr = ia64_rse_rnat_addr(addr);
+
+	if (addr >= bspstore) {
+		ia64_flushrs();
+		ia64_mf();
+		bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE);
+	}
+	*val = *addr;
+	if (nat) {
+		if (bspstore < rnat_addr)
+			*nat = (int)!!(ia64_getreg(_IA64_REG_AR_RNAT)
+							& nat_mask);
+		else
+			*nat = (int)!!((*rnat_addr) & nat_mask);
+		ia64_setreg(_IA64_REG_AR_RSC, old_rsc);
+	}
+}
+
+void set_rse_reg(struct kvm_pt_regs *regs, unsigned long r1,
+				unsigned long val, unsigned long nat)
+{
+	unsigned long *bsp, *bspstore, *addr, *rnat_addr;
+	unsigned long *kbs = (void *) current_vcpu + VMM_RBS_OFFSET;
+	unsigned long nat_mask;
+	unsigned long old_rsc, new_rsc, psr;
+	unsigned long rnat;
+	long sof = (regs->cr_ifs) & 0x7f;
+	long sor = (((regs->cr_ifs >> 14) & 0xf) << 3);
+	long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
+	long ridx = r1 - 32;
+
+	if (ridx < sor)
+		ridx = rotate_reg(sor, rrb_gr, ridx);
+
+	old_rsc = ia64_getreg(_IA64_REG_AR_RSC);
+	/* put RSC to lazy mode, and set loadrs 0 */
+	new_rsc = old_rsc & (~0x3fff0003);
+	ia64_setreg(_IA64_REG_AR_RSC, new_rsc);
+	bsp = kbs + (regs->loadrs >> 19); /* 16 + 3 */
+
+	addr = kvm_rse_skip_regs(bsp, -sof + ridx);
+	nat_mask = 1UL << ia64_rse_slot_num(addr);
+	rnat_addr = ia64_rse_rnat_addr(addr);
+
+	local_irq_save(psr);
+	bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE);
+	if (addr >= bspstore) {
+
+		ia64_flushrs();
+		ia64_mf();
+		*addr = val;
+		bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE);
+		rnat = ia64_getreg(_IA64_REG_AR_RNAT);
+		if (bspstore < rnat_addr)
+			rnat = rnat & (~nat_mask);
+		else
+			*rnat_addr = (*rnat_addr)&(~nat_mask);
+
+		ia64_mf();
+		ia64_loadrs();
+		ia64_setreg(_IA64_REG_AR_RNAT, rnat);
+	} else {
+		rnat = ia64_getreg(_IA64_REG_AR_RNAT);
+		*addr = val;
+		if (bspstore < rnat_addr)
+			rnat = rnat&(~nat_mask);
+		else
+			*rnat_addr = (*rnat_addr) & (~nat_mask);
+
+		ia64_setreg(_IA64_REG_AR_BSPSTORE, (unsigned long)bspstore);
+		ia64_setreg(_IA64_REG_AR_RNAT, rnat);
+	}
+	local_irq_restore(psr);
+	ia64_setreg(_IA64_REG_AR_RSC, old_rsc);
+}
+
+void getreg(unsigned long regnum, unsigned long *val,
+				int *nat, struct kvm_pt_regs *regs)
+{
+	unsigned long addr, *unat;
+	if (regnum >= IA64_FIRST_STACKED_GR) {
+		get_rse_reg(regs, regnum, val, nat);
+		return;
+	}
+
+	/*
+	 * Now look at registers in [0-31] range and init correct UNAT
+	 */
+	addr = (unsigned long)regs;
+	unat = &regs->eml_unat;
+
+	addr += gr_info[regnum];
+
+	*val  = *(unsigned long *)addr;
+	/*
+	 * do it only when requested
+	 */
+	if (nat)
+		*nat  = (*unat >> ((addr >> 3) & 0x3f)) & 0x1UL;
+}
+
+void setreg(unsigned long regnum, unsigned long val,
+			int nat, struct kvm_pt_regs *regs)
+{
+	unsigned long addr;
+	unsigned long bitmask;
+	unsigned long *unat;
+
+	/*
+	 * First takes care of stacked registers
+	 */
+	if (regnum >= IA64_FIRST_STACKED_GR) {
+		set_rse_reg(regs, regnum, val, nat);
+		return;
+	}
+
+	/*
+	 * Now look at registers in [0-31] range and init correct UNAT
+	 */
+	addr = (unsigned long)regs;
+	unat = &regs->eml_unat;
+	/*
+	 * add offset from base of struct
+	 * and do it !
+	 */
+	addr += gr_info[regnum];
+
+	*(unsigned long *)addr = val;
+
+	/*
+	 * We need to clear the corresponding UNAT bit to fully emulate the load
+	 * UNAT bit_pos = GR[r3]{8:3} form EAS-2.4
+	 */
+	bitmask   = 1UL << ((addr >> 3) & 0x3f);
+	if (nat)
+		*unat |= bitmask;
+	 else
+		*unat &= ~bitmask;
+
+}
+
+u64 vcpu_get_gr(struct kvm_vcpu *vcpu, unsigned long reg)
+{
+	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+	unsigned long val;
+
+	if (!reg)
+		return 0;
+	getreg(reg, &val, 0, regs);
+	return val;
+}
+
+void vcpu_set_gr(struct kvm_vcpu *vcpu, unsigned long reg, u64 value, int nat)
+{
+	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+	long sof = (regs->cr_ifs) & 0x7f;
+
+	if (!reg)
+		return;
+	if (reg >= sof + 32)
+		return;
+	setreg(reg, value, nat, regs);	/* FIXME: handle NATs later*/
+}
+
+void getfpreg(unsigned long regnum, struct ia64_fpreg *fpval,
+				struct kvm_pt_regs *regs)
+{
+	/* Take floating register rotation into consideration*/
+	if (regnum >= IA64_FIRST_ROTATING_FR)
+		regnum = IA64_FIRST_ROTATING_FR + fph_index(regs, regnum);
+#define CASE_FIXED_FP(reg)			\
+	case  (reg) :				\
+		ia64_stf_spill(fpval, reg);	\
+	break
+
+	switch (regnum) {
+		CASE_FIXED_FP(0);
+		CASE_FIXED_FP(1);
+		CASE_FIXED_FP(2);
+		CASE_FIXED_FP(3);
+		CASE_FIXED_FP(4);
+		CASE_FIXED_FP(5);
+
+		CASE_FIXED_FP(6);
+		CASE_FIXED_FP(7);
+		CASE_FIXED_FP(8);
+		CASE_FIXED_FP(9);
+		CASE_FIXED_FP(10);
+		CASE_FIXED_FP(11);
+
+		CASE_FIXED_FP(12);
+		CASE_FIXED_FP(13);
+		CASE_FIXED_FP(14);
+		CASE_FIXED_FP(15);
+		CASE_FIXED_FP(16);
+		CASE_FIXED_FP(17);
+		CASE_FIXED_FP(18);
+		CASE_FIXED_FP(19);
+		CASE_FIXED_FP(20);
+		CASE_FIXED_FP(21);
+		CASE_FIXED_FP(22);
+		CASE_FIXED_FP(23);
+		CASE_FIXED_FP(24);
+		CASE_FIXED_FP(25);
+		CASE_FIXED_FP(26);
+		CASE_FIXED_FP(27);
+		CASE_FIXED_FP(28);
+		CASE_FIXED_FP(29);
+		CASE_FIXED_FP(30);
+		CASE_FIXED_FP(31);
+		CASE_FIXED_FP(32);
+		CASE_FIXED_FP(33);
+		CASE_FIXED_FP(34);
+		CASE_FIXED_FP(35);
+		CASE_FIXED_FP(36);
+		CASE_FIXED_FP(37);
+		CASE_FIXED_FP(38);
+		CASE_FIXED_FP(39);
+		CASE_FIXED_FP(40);
+		CASE_FIXED_FP(41);
+		CASE_FIXED_FP(42);
+		CASE_FIXED_FP(43);
+		CASE_FIXED_FP(44);
+		CASE_FIXED_FP(45);
+		CASE_FIXED_FP(46);
+		CASE_FIXED_FP(47);
+		CASE_FIXED_FP(48);
+		CASE_FIXED_FP(49);
+		CASE_FIXED_FP(50);
+		CASE_FIXED_FP(51);
+		CASE_FIXED_FP(52);
+		CASE_FIXED_FP(53);
+		CASE_FIXED_FP(54);
+		CASE_FIXED_FP(55);
+		CASE_FIXED_FP(56);
+		CASE_FIXED_FP(57);
+		CASE_FIXED_FP(58);
+		CASE_FIXED_FP(59);
+		CASE_FIXED_FP(60);
+		CASE_FIXED_FP(61);
+		CASE_FIXED_FP(62);
+		CASE_FIXED_FP(63);
+		CASE_FIXED_FP(64);
+		CASE_FIXED_FP(65);
+		CASE_FIXED_FP(66);
+		CASE_FIXED_FP(67);
+		CASE_FIXED_FP(68);
+		CASE_FIXED_FP(69);
+		CASE_FIXED_FP(70);
+		CASE_FIXED_FP(71);
+		CASE_FIXED_FP(72);
+		CASE_FIXED_FP(73);
+		CASE_FIXED_FP(74);
+		CASE_FIXED_FP(75);
+		CASE_FIXED_FP(76);
+		CASE_FIXED_FP(77);
+		CASE_FIXED_FP(78);
+		CASE_FIXED_FP(79);
+		CASE_FIXED_FP(80);
+		CASE_FIXED_FP(81);
+		CASE_FIXED_FP(82);
+		CASE_FIXED_FP(83);
+		CASE_FIXED_FP(84);
+		CASE_FIXED_FP(85);
+		CASE_FIXED_FP(86);
+		CASE_FIXED_FP(87);
+		CASE_FIXED_FP(88);
+		CASE_FIXED_FP(89);
+		CASE_FIXED_FP(90);
+		CASE_FIXED_FP(91);
+		CASE_FIXED_FP(92);
+		CASE_FIXED_FP(93);
+		CASE_FIXED_FP(94);
+		CASE_FIXED_FP(95);
+		CASE_FIXED_FP(96);
+		CASE_FIXED_FP(97);
+		CASE_FIXED_FP(98);
+		CASE_FIXED_FP(99);
+		CASE_FIXED_FP(100);
+		CASE_FIXED_FP(101);
+		CASE_FIXED_FP(102);
+		CASE_FIXED_FP(103);
+		CASE_FIXED_FP(104);
+		CASE_FIXED_FP(105);
+		CASE_FIXED_FP(106);
+		CASE_FIXED_FP(107);
+		CASE_FIXED_FP(108);
+		CASE_FIXED_FP(109);
+		CASE_FIXED_FP(110);
+		CASE_FIXED_FP(111);
+		CASE_FIXED_FP(112);
+		CASE_FIXED_FP(113);
+		CASE_FIXED_FP(114);
+		CASE_FIXED_FP(115);
+		CASE_FIXED_FP(116);
+		CASE_FIXED_FP(117);
+		CASE_FIXED_FP(118);
+		CASE_FIXED_FP(119);
+		CASE_FIXED_FP(120);
+		CASE_FIXED_FP(121);
+		CASE_FIXED_FP(122);
+		CASE_FIXED_FP(123);
+		CASE_FIXED_FP(124);
+		CASE_FIXED_FP(125);
+		CASE_FIXED_FP(126);
+		CASE_FIXED_FP(127);
+	}
+#undef CASE_FIXED_FP
+}
+
+void setfpreg(unsigned long regnum, struct ia64_fpreg *fpval,
+					struct kvm_pt_regs *regs)
+{
+	/* Take floating register rotation into consideration*/
+	if (regnum >= IA64_FIRST_ROTATING_FR)
+		regnum = IA64_FIRST_ROTATING_FR + fph_index(regs, regnum);
+
+#define CASE_FIXED_FP(reg)			\
+	case (reg) :				\
+		ia64_ldf_fill(reg, fpval);	\
+	break
+
+	switch (regnum) {
+		CASE_FIXED_FP(2);
+		CASE_FIXED_FP(3);
+		CASE_FIXED_FP(4);
+		CASE_FIXED_FP(5);
+
+		CASE_FIXED_FP(6);
+		CASE_FIXED_FP(7);
+		CASE_FIXED_FP(8);
+		CASE_FIXED_FP(9);
+		CASE_FIXED_FP(10);
+		CASE_FIXED_FP(11);
+
+		CASE_FIXED_FP(12);
+		CASE_FIXED_FP(13);
+		CASE_FIXED_FP(14);
+		CASE_FIXED_FP(15);
+		CASE_FIXED_FP(16);
+		CASE_FIXED_FP(17);
+		CASE_FIXED_FP(18);
+		CASE_FIXED_FP(19);
+		CASE_FIXED_FP(20);
+		CASE_FIXED_FP(21);
+		CASE_FIXED_FP(22);
+		CASE_FIXED_FP(23);
+		CASE_FIXED_FP(24);
+		CASE_FIXED_FP(25);
+		CASE_FIXED_FP(26);
+		CASE_FIXED_FP(27);
+		CASE_FIXED_FP(28);
+		CASE_FIXED_FP(29);
+		CASE_FIXED_FP(30);
+		CASE_FIXED_FP(31);
+		CASE_FIXED_FP(32);
+		CASE_FIXED_FP(33);
+		CASE_FIXED_FP(34);
+		CASE_FIXED_FP(35);
+		CASE_FIXED_FP(36);
+		CASE_FIXED_FP(37);
+		CASE_FIXED_FP(38);
+		CASE_FIXED_FP(39);
+		CASE_FIXED_FP(40);
+		CASE_FIXED_FP(41);
+		CASE_FIXED_FP(42);
+		CASE_FIXED_FP(43);
+		CASE_FIXED_FP(44);
+		CASE_FIXED_FP(45);
+		CASE_FIXED_FP(46);
+		CASE_FIXED_FP(47);
+		CASE_FIXED_FP(48);
+		CASE_FIXED_FP(49);
+		CASE_FIXED_FP(50);
+		CASE_FIXED_FP(51);
+		CASE_FIXED_FP(52);
+		CASE_FIXED_FP(53);
+		CASE_FIXED_FP(54);
+		CASE_FIXED_FP(55);
+		CASE_FIXED_FP(56);
+		CASE_FIXED_FP(57);
+		CASE_FIXED_FP(58);
+		CASE_FIXED_FP(59);
+		CASE_FIXED_FP(60);
+		CASE_FIXED_FP(61);
+		CASE_FIXED_FP(62);
+		CASE_FIXED_FP(63);
+		CASE_FIXED_FP(64);
+		CASE_FIXED_FP(65);
+		CASE_FIXED_FP(66);
+		CASE_FIXED_FP(67);
+		CASE_FIXED_FP(68);
+		CASE_FIXED_FP(69);
+		CASE_FIXED_FP(70);
+		CASE_FIXED_FP(71);
+		CASE_FIXED_FP(72);
+		CASE_FIXED_FP(73);
+		CASE_FIXED_FP(74);
+		CASE_FIXED_FP(75);
+		CASE_FIXED_FP(76);
+		CASE_FIXED_FP(77);
+		CASE_FIXED_FP(78);
+		CASE_FIXED_FP(79);
+		CASE_FIXED_FP(80);
+		CASE_FIXED_FP(81);
+		CASE_FIXED_FP(82);
+		CASE_FIXED_FP(83);
+		CASE_FIXED_FP(84);
+		CASE_FIXED_FP(85);
+		CASE_FIXED_FP(86);
+		CASE_FIXED_FP(87);
+		CASE_FIXED_FP(88);
+		CASE_FIXED_FP(89);
+		CASE_FIXED_FP(90);
+		CASE_FIXED_FP(91);
+		CASE_FIXED_FP(92);
+		CASE_FIXED_FP(93);
+		CASE_FIXED_FP(94);
+		CASE_FIXED_FP(95);
+		CASE_FIXED_FP(96);
+		CASE_FIXED_FP(97);
+		CASE_FIXED_FP(98);
+		CASE_FIXED_FP(99);
+		CASE_FIXED_FP(100);
+		CASE_FIXED_FP(101);
+		CASE_FIXED_FP(102);
+		CASE_FIXED_FP(103);
+		CASE_FIXED_FP(104);
+		CASE_FIXED_FP(105);
+		CASE_FIXED_FP(106);
+		CASE_FIXED_FP(107);
+		CASE_FIXED_FP(108);
+		CASE_FIXED_FP(109);
+		CASE_FIXED_FP(110);
+		CASE_FIXED_FP(111);
+		CASE_FIXED_FP(112);
+		CASE_FIXED_FP(113);
+		CASE_FIXED_FP(114);
+		CASE_FIXED_FP(115);
+		CASE_FIXED_FP(116);
+		CASE_FIXED_FP(117);
+		CASE_FIXED_FP(118);
+		CASE_FIXED_FP(119);
+		CASE_FIXED_FP(120);
+		CASE_FIXED_FP(121);
+		CASE_FIXED_FP(122);
+		CASE_FIXED_FP(123);
+		CASE_FIXED_FP(124);
+		CASE_FIXED_FP(125);
+		CASE_FIXED_FP(126);
+		CASE_FIXED_FP(127);
+	}
+}
+
+void vcpu_get_fpreg(struct kvm_vcpu *vcpu, unsigned long reg,
+						struct ia64_fpreg *val)
+{
+	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+
+	getfpreg(reg, val, regs);   /* FIXME: handle NATs later*/
+}
+
+void vcpu_set_fpreg(struct kvm_vcpu *vcpu, unsigned long reg,
+						struct ia64_fpreg *val)
+{
+	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+
+	if (reg > 1)
+		setfpreg(reg, val, regs);   /* FIXME: handle NATs later*/
+}
+
+/*
+ * The Altix RTC is mapped specially here for the vmm module
+ */
+#define SN_RTC_BASE	(u64 *)(KVM_VMM_BASE+(1UL<<KVM_VMM_SHIFT))
+static long kvm_get_itc(struct kvm_vcpu *vcpu)
+{
+#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
+	struct kvm *kvm = (struct kvm *)KVM_VM_BASE;
+
+	if (kvm->arch.is_sn2)
+		return (*SN_RTC_BASE);
+	else
+#endif
+		return ia64_getreg(_IA64_REG_AR_ITC);
+}
+
+/************************************************************************
+ * lsapic timer
+ ***********************************************************************/
+u64 vcpu_get_itc(struct kvm_vcpu *vcpu)
+{
+	unsigned long guest_itc;
+	guest_itc = VMX(vcpu, itc_offset) + kvm_get_itc(vcpu);
+
+	if (guest_itc >= VMX(vcpu, last_itc)) {
+		VMX(vcpu, last_itc) = guest_itc;
+		return  guest_itc;
+	} else
+		return VMX(vcpu, last_itc);
+}
+
+static inline void vcpu_set_itm(struct kvm_vcpu *vcpu, u64 val);
+static void vcpu_set_itc(struct kvm_vcpu *vcpu, u64 val)
+{
+	struct kvm_vcpu *v;
+	struct kvm *kvm;
+	int i;
+	long itc_offset = val - kvm_get_itc(vcpu);
+	unsigned long vitv = VCPU(vcpu, itv);
+
+	kvm = (struct kvm *)KVM_VM_BASE;
+
+	if (kvm_vcpu_is_bsp(vcpu)) {
+		for (i = 0; i < atomic_read(&kvm->online_vcpus); i++) {
+			v = (struct kvm_vcpu *)((char *)vcpu +
+					sizeof(struct kvm_vcpu_data) * i);
+			VMX(v, itc_offset) = itc_offset;
+			VMX(v, last_itc) = 0;
+		}
+	}
+	VMX(vcpu, last_itc) = 0;
+	if (VCPU(vcpu, itm) <= val) {
+		VMX(vcpu, itc_check) = 0;
+		vcpu_unpend_interrupt(vcpu, vitv);
+	} else {
+		VMX(vcpu, itc_check) = 1;
+		vcpu_set_itm(vcpu, VCPU(vcpu, itm));
+	}
+
+}
+
+static inline u64 vcpu_get_itm(struct kvm_vcpu *vcpu)
+{
+	return ((u64)VCPU(vcpu, itm));
+}
+
+static inline void vcpu_set_itm(struct kvm_vcpu *vcpu, u64 val)
+{
+	unsigned long vitv = VCPU(vcpu, itv);
+	VCPU(vcpu, itm) = val;
+
+	if (val > vcpu_get_itc(vcpu)) {
+		VMX(vcpu, itc_check) = 1;
+		vcpu_unpend_interrupt(vcpu, vitv);
+		VMX(vcpu, timer_pending) = 0;
+	} else
+		VMX(vcpu, itc_check) = 0;
+}
+
+#define  ITV_VECTOR(itv)    (itv&0xff)
+#define  ITV_IRQ_MASK(itv)  (itv&(1<<16))
+
+static inline void vcpu_set_itv(struct kvm_vcpu *vcpu, u64 val)
+{
+	VCPU(vcpu, itv) = val;
+	if (!ITV_IRQ_MASK(val) && vcpu->arch.timer_pending) {
+		vcpu_pend_interrupt(vcpu, ITV_VECTOR(val));
+		vcpu->arch.timer_pending = 0;
+	}
+}
+
+static inline void vcpu_set_eoi(struct kvm_vcpu *vcpu, u64 val)
+{
+	int vec;
+
+	vec = highest_inservice_irq(vcpu);
+	if (vec == NULL_VECTOR)
+		return;
+	VMX(vcpu, insvc[vec >> 6]) &= ~(1UL << (vec & 63));
+	VCPU(vcpu, eoi) = 0;
+	vcpu->arch.irq_new_pending = 1;
+
+}
+
+/* See Table 5-8 in SDM vol2 for the definition */
+int irq_masked(struct kvm_vcpu *vcpu, int h_pending, int h_inservice)
+{
+	union ia64_tpr vtpr;
+
+	vtpr.val = VCPU(vcpu, tpr);
+
+	if (h_inservice == NMI_VECTOR)
+		return IRQ_MASKED_BY_INSVC;
+
+	if (h_pending == NMI_VECTOR) {
+		/* Non Maskable Interrupt */
+		return IRQ_NO_MASKED;
+	}
+
+	if (h_inservice == ExtINT_VECTOR)
+		return IRQ_MASKED_BY_INSVC;
+
+	if (h_pending == ExtINT_VECTOR) {
+		if (vtpr.mmi) {
+			/* mask all external IRQ */
+			return IRQ_MASKED_BY_VTPR;
+		} else
+			return IRQ_NO_MASKED;
+	}
+
+	if (is_higher_irq(h_pending, h_inservice)) {
+		if (is_higher_class(h_pending, vtpr.mic + (vtpr.mmi << 4)))
+			return IRQ_NO_MASKED;
+		else
+			return IRQ_MASKED_BY_VTPR;
+	} else {
+		return IRQ_MASKED_BY_INSVC;
+	}
+}
+
+void vcpu_pend_interrupt(struct kvm_vcpu *vcpu, u8 vec)
+{
+	long spsr;
+	int ret;
+
+	local_irq_save(spsr);
+	ret = test_and_set_bit(vec, &VCPU(vcpu, irr[0]));
+	local_irq_restore(spsr);
+
+	vcpu->arch.irq_new_pending = 1;
+}
+
+void vcpu_unpend_interrupt(struct kvm_vcpu *vcpu, u8 vec)
+{
+	long spsr;
+	int ret;
+
+	local_irq_save(spsr);
+	ret = test_and_clear_bit(vec, &VCPU(vcpu, irr[0]));
+	local_irq_restore(spsr);
+	if (ret) {
+		vcpu->arch.irq_new_pending = 1;
+		wmb();
+	}
+}
+
+void update_vhpi(struct kvm_vcpu *vcpu, int vec)
+{
+	u64 vhpi;
+
+	if (vec == NULL_VECTOR)
+		vhpi = 0;
+	else if (vec == NMI_VECTOR)
+		vhpi = 32;
+	else if (vec == ExtINT_VECTOR)
+		vhpi = 16;
+	else
+		vhpi = vec >> 4;
+
+	VCPU(vcpu, vhpi) = vhpi;
+	if (VCPU(vcpu, vac).a_int)
+		ia64_call_vsa(PAL_VPS_SET_PENDING_INTERRUPT,
+				(u64)vcpu->arch.vpd, 0, 0, 0, 0, 0, 0);
+}
+
+u64 vcpu_get_ivr(struct kvm_vcpu *vcpu)
+{
+	int vec, h_inservice, mask;
+
+	vec = highest_pending_irq(vcpu);
+	h_inservice = highest_inservice_irq(vcpu);
+	mask = irq_masked(vcpu, vec, h_inservice);
+	if (vec == NULL_VECTOR || mask == IRQ_MASKED_BY_INSVC) {
+		if (VCPU(vcpu, vhpi))
+			update_vhpi(vcpu, NULL_VECTOR);
+		return IA64_SPURIOUS_INT_VECTOR;
+	}
+	if (mask == IRQ_MASKED_BY_VTPR) {
+		update_vhpi(vcpu, vec);
+		return IA64_SPURIOUS_INT_VECTOR;
+	}
+	VMX(vcpu, insvc[vec >> 6]) |= (1UL << (vec & 63));
+	vcpu_unpend_interrupt(vcpu, vec);
+	return  (u64)vec;
+}
+
+/**************************************************************************
+  Privileged operation emulation routines
+ **************************************************************************/
+u64 vcpu_thash(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	union ia64_pta vpta;
+	union ia64_rr vrr;
+	u64 pval;
+	u64 vhpt_offset;
+
+	vpta.val = vcpu_get_pta(vcpu);
+	vrr.val = vcpu_get_rr(vcpu, vadr);
+	vhpt_offset = ((vadr >> vrr.ps) << 3) & ((1UL << (vpta.size)) - 1);
+	if (vpta.vf) {
+		pval = ia64_call_vsa(PAL_VPS_THASH, vadr, vrr.val,
+				vpta.val, 0, 0, 0, 0);
+	} else {
+		pval = (vadr & VRN_MASK) | vhpt_offset |
+			(vpta.val << 3 >> (vpta.size + 3) << (vpta.size));
+	}
+	return  pval;
+}
+
+u64 vcpu_ttag(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	union ia64_rr vrr;
+	union ia64_pta vpta;
+	u64 pval;
+
+	vpta.val = vcpu_get_pta(vcpu);
+	vrr.val = vcpu_get_rr(vcpu, vadr);
+	if (vpta.vf) {
+		pval = ia64_call_vsa(PAL_VPS_TTAG, vadr, vrr.val,
+						0, 0, 0, 0, 0);
+	} else
+		pval = 1;
+
+	return  pval;
+}
+
+u64 vcpu_tak(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	struct thash_data *data;
+	union ia64_pta vpta;
+	u64 key;
+
+	vpta.val = vcpu_get_pta(vcpu);
+	if (vpta.vf == 0) {
+		key = 1;
+		return key;
+	}
+	data = vtlb_lookup(vcpu, vadr, D_TLB);
+	if (!data || !data->p)
+		key = 1;
+	else
+		key = data->key;
+
+	return key;
+}
+
+void kvm_thash(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long thash, vadr;
+
+	vadr = vcpu_get_gr(vcpu, inst.M46.r3);
+	thash = vcpu_thash(vcpu, vadr);
+	vcpu_set_gr(vcpu, inst.M46.r1, thash, 0);
+}
+
+void kvm_ttag(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long tag, vadr;
+
+	vadr = vcpu_get_gr(vcpu, inst.M46.r3);
+	tag = vcpu_ttag(vcpu, vadr);
+	vcpu_set_gr(vcpu, inst.M46.r1, tag, 0);
+}
+
+int vcpu_tpa(struct kvm_vcpu *vcpu, u64 vadr, unsigned long *padr)
+{
+	struct thash_data *data;
+	union ia64_isr visr, pt_isr;
+	struct kvm_pt_regs *regs;
+	struct ia64_psr vpsr;
+
+	regs = vcpu_regs(vcpu);
+	pt_isr.val = VMX(vcpu, cr_isr);
+	visr.val = 0;
+	visr.ei = pt_isr.ei;
+	visr.ir = pt_isr.ir;
+	vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
+	visr.na = 1;
+
+	data = vhpt_lookup(vadr);
+	if (data) {
+		if (data->p == 0) {
+			vcpu_set_isr(vcpu, visr.val);
+			data_page_not_present(vcpu, vadr);
+			return IA64_FAULT;
+		} else if (data->ma == VA_MATTR_NATPAGE) {
+			vcpu_set_isr(vcpu, visr.val);
+			dnat_page_consumption(vcpu, vadr);
+			return IA64_FAULT;
+		} else {
+			*padr = (data->gpaddr >> data->ps << data->ps) |
+				(vadr & (PSIZE(data->ps) - 1));
+			return IA64_NO_FAULT;
+		}
+	}
+
+	data = vtlb_lookup(vcpu, vadr, D_TLB);
+	if (data) {
+		if (data->p == 0) {
+			vcpu_set_isr(vcpu, visr.val);
+			data_page_not_present(vcpu, vadr);
+			return IA64_FAULT;
+		} else if (data->ma == VA_MATTR_NATPAGE) {
+			vcpu_set_isr(vcpu, visr.val);
+			dnat_page_consumption(vcpu, vadr);
+			return IA64_FAULT;
+		} else{
+			*padr = ((data->ppn >> (data->ps - 12)) << data->ps)
+				| (vadr & (PSIZE(data->ps) - 1));
+			return IA64_NO_FAULT;
+		}
+	}
+	if (!vhpt_enabled(vcpu, vadr, NA_REF)) {
+		if (vpsr.ic) {
+			vcpu_set_isr(vcpu, visr.val);
+			alt_dtlb(vcpu, vadr);
+			return IA64_FAULT;
+		} else {
+			nested_dtlb(vcpu);
+			return IA64_FAULT;
+		}
+	} else {
+		if (vpsr.ic) {
+			vcpu_set_isr(vcpu, visr.val);
+			dvhpt_fault(vcpu, vadr);
+			return IA64_FAULT;
+		} else{
+			nested_dtlb(vcpu);
+			return IA64_FAULT;
+		}
+	}
+
+	return IA64_NO_FAULT;
+}
+
+int kvm_tpa(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long r1, r3;
+
+	r3 = vcpu_get_gr(vcpu, inst.M46.r3);
+
+	if (vcpu_tpa(vcpu, r3, &r1))
+		return IA64_FAULT;
+
+	vcpu_set_gr(vcpu, inst.M46.r1, r1, 0);
+	return(IA64_NO_FAULT);
+}
+
+void kvm_tak(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long r1, r3;
+
+	r3 = vcpu_get_gr(vcpu, inst.M46.r3);
+	r1 = vcpu_tak(vcpu, r3);
+	vcpu_set_gr(vcpu, inst.M46.r1, r1, 0);
+}
+
+/************************************
+ * Insert/Purge translation register/cache
+ ************************************/
+void vcpu_itc_i(struct kvm_vcpu *vcpu, u64 pte, u64 itir, u64 ifa)
+{
+	thash_purge_and_insert(vcpu, pte, itir, ifa, I_TLB);
+}
+
+void vcpu_itc_d(struct kvm_vcpu *vcpu, u64 pte, u64 itir, u64 ifa)
+{
+	thash_purge_and_insert(vcpu, pte, itir, ifa, D_TLB);
+}
+
+void vcpu_itr_i(struct kvm_vcpu *vcpu, u64 slot, u64 pte, u64 itir, u64 ifa)
+{
+	u64 ps, va, rid;
+	struct thash_data *p_itr;
+
+	ps = itir_ps(itir);
+	va = PAGEALIGN(ifa, ps);
+	pte &= ~PAGE_FLAGS_RV_MASK;
+	rid = vcpu_get_rr(vcpu, ifa);
+	rid = rid & RR_RID_MASK;
+	p_itr = (struct thash_data *)&vcpu->arch.itrs[slot];
+	vcpu_set_tr(p_itr, pte, itir, va, rid);
+	vcpu_quick_region_set(VMX(vcpu, itr_regions), va);
+}
+
+
+void vcpu_itr_d(struct kvm_vcpu *vcpu, u64 slot, u64 pte, u64 itir, u64 ifa)
+{
+	u64 gpfn;
+	u64 ps, va, rid;
+	struct thash_data *p_dtr;
+
+	ps = itir_ps(itir);
+	va = PAGEALIGN(ifa, ps);
+	pte &= ~PAGE_FLAGS_RV_MASK;
+
+	if (ps != _PAGE_SIZE_16M)
+		thash_purge_entries(vcpu, va, ps);
+	gpfn = (pte & _PAGE_PPN_MASK) >> PAGE_SHIFT;
+	if (__gpfn_is_io(gpfn))
+		pte |= VTLB_PTE_IO;
+	rid = vcpu_get_rr(vcpu, va);
+	rid = rid & RR_RID_MASK;
+	p_dtr = (struct thash_data *)&vcpu->arch.dtrs[slot];
+	vcpu_set_tr((struct thash_data *)&vcpu->arch.dtrs[slot],
+							pte, itir, va, rid);
+	vcpu_quick_region_set(VMX(vcpu, dtr_regions), va);
+}
+
+void vcpu_ptr_d(struct kvm_vcpu *vcpu, u64 ifa, u64 ps)
+{
+	int index;
+	u64 va;
+
+	va = PAGEALIGN(ifa, ps);
+	while ((index = vtr_find_overlap(vcpu, va, ps, D_TLB)) >= 0)
+		vcpu->arch.dtrs[index].page_flags = 0;
+
+	thash_purge_entries(vcpu, va, ps);
+}
+
+void vcpu_ptr_i(struct kvm_vcpu *vcpu, u64 ifa, u64 ps)
+{
+	int index;
+	u64 va;
+
+	va = PAGEALIGN(ifa, ps);
+	while ((index = vtr_find_overlap(vcpu, va, ps, I_TLB)) >= 0)
+		vcpu->arch.itrs[index].page_flags = 0;
+
+	thash_purge_entries(vcpu, va, ps);
+}
+
+void vcpu_ptc_l(struct kvm_vcpu *vcpu, u64 va, u64 ps)
+{
+	va = PAGEALIGN(va, ps);
+	thash_purge_entries(vcpu, va, ps);
+}
+
+void vcpu_ptc_e(struct kvm_vcpu *vcpu, u64 va)
+{
+	thash_purge_all(vcpu);
+}
+
+void vcpu_ptc_ga(struct kvm_vcpu *vcpu, u64 va, u64 ps)
+{
+	struct exit_ctl_data *p = &vcpu->arch.exit_data;
+	long psr;
+	local_irq_save(psr);
+	p->exit_reason = EXIT_REASON_PTC_G;
+
+	p->u.ptc_g_data.rr = vcpu_get_rr(vcpu, va);
+	p->u.ptc_g_data.vaddr = va;
+	p->u.ptc_g_data.ps = ps;
+	vmm_transition(vcpu);
+	/* Do Local Purge Here*/
+	vcpu_ptc_l(vcpu, va, ps);
+	local_irq_restore(psr);
+}
+
+
+void vcpu_ptc_g(struct kvm_vcpu *vcpu, u64 va, u64 ps)
+{
+	vcpu_ptc_ga(vcpu, va, ps);
+}
+
+void kvm_ptc_e(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long ifa;
+
+	ifa = vcpu_get_gr(vcpu, inst.M45.r3);
+	vcpu_ptc_e(vcpu, ifa);
+}
+
+void kvm_ptc_g(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long ifa, itir;
+
+	ifa = vcpu_get_gr(vcpu, inst.M45.r3);
+	itir = vcpu_get_gr(vcpu, inst.M45.r2);
+	vcpu_ptc_g(vcpu, ifa, itir_ps(itir));
+}
+
+void kvm_ptc_ga(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long ifa, itir;
+
+	ifa = vcpu_get_gr(vcpu, inst.M45.r3);
+	itir = vcpu_get_gr(vcpu, inst.M45.r2);
+	vcpu_ptc_ga(vcpu, ifa, itir_ps(itir));
+}
+
+void kvm_ptc_l(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long ifa, itir;
+
+	ifa = vcpu_get_gr(vcpu, inst.M45.r3);
+	itir = vcpu_get_gr(vcpu, inst.M45.r2);
+	vcpu_ptc_l(vcpu, ifa, itir_ps(itir));
+}
+
+void kvm_ptr_d(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long ifa, itir;
+
+	ifa = vcpu_get_gr(vcpu, inst.M45.r3);
+	itir = vcpu_get_gr(vcpu, inst.M45.r2);
+	vcpu_ptr_d(vcpu, ifa, itir_ps(itir));
+}
+
+void kvm_ptr_i(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long ifa, itir;
+
+	ifa = vcpu_get_gr(vcpu, inst.M45.r3);
+	itir = vcpu_get_gr(vcpu, inst.M45.r2);
+	vcpu_ptr_i(vcpu, ifa, itir_ps(itir));
+}
+
+void kvm_itr_d(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long itir, ifa, pte, slot;
+
+	slot = vcpu_get_gr(vcpu, inst.M45.r3);
+	pte = vcpu_get_gr(vcpu, inst.M45.r2);
+	itir = vcpu_get_itir(vcpu);
+	ifa = vcpu_get_ifa(vcpu);
+	vcpu_itr_d(vcpu, slot, pte, itir, ifa);
+}
+
+
+
+void kvm_itr_i(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long itir, ifa, pte, slot;
+
+	slot = vcpu_get_gr(vcpu, inst.M45.r3);
+	pte = vcpu_get_gr(vcpu, inst.M45.r2);
+	itir = vcpu_get_itir(vcpu);
+	ifa = vcpu_get_ifa(vcpu);
+	vcpu_itr_i(vcpu, slot, pte, itir, ifa);
+}
+
+void kvm_itc_d(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long itir, ifa, pte;
+
+	itir = vcpu_get_itir(vcpu);
+	ifa = vcpu_get_ifa(vcpu);
+	pte = vcpu_get_gr(vcpu, inst.M45.r2);
+	vcpu_itc_d(vcpu, pte, itir, ifa);
+}
+
+void kvm_itc_i(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long itir, ifa, pte;
+
+	itir = vcpu_get_itir(vcpu);
+	ifa = vcpu_get_ifa(vcpu);
+	pte = vcpu_get_gr(vcpu, inst.M45.r2);
+	vcpu_itc_i(vcpu, pte, itir, ifa);
+}
+
+/*************************************
+ * Moves to semi-privileged registers
+ *************************************/
+
+void kvm_mov_to_ar_imm(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long imm;
+
+	if (inst.M30.s)
+		imm = -inst.M30.imm;
+	else
+		imm = inst.M30.imm;
+
+	vcpu_set_itc(vcpu, imm);
+}
+
+void kvm_mov_to_ar_reg(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long r2;
+
+	r2 = vcpu_get_gr(vcpu, inst.M29.r2);
+	vcpu_set_itc(vcpu, r2);
+}
+
+void kvm_mov_from_ar_reg(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long r1;
+
+	r1 = vcpu_get_itc(vcpu);
+	vcpu_set_gr(vcpu, inst.M31.r1, r1, 0);
+}
+
+/**************************************************************************
+  struct kvm_vcpu protection key register access routines
+ **************************************************************************/
+
+unsigned long vcpu_get_pkr(struct kvm_vcpu *vcpu, unsigned long reg)
+{
+	return ((unsigned long)ia64_get_pkr(reg));
+}
+
+void vcpu_set_pkr(struct kvm_vcpu *vcpu, unsigned long reg, unsigned long val)
+{
+	ia64_set_pkr(reg, val);
+}
+
+/********************************
+ * Moves to privileged registers
+ ********************************/
+unsigned long vcpu_set_rr(struct kvm_vcpu *vcpu, unsigned long reg,
+					unsigned long val)
+{
+	union ia64_rr oldrr, newrr;
+	unsigned long rrval;
+	struct exit_ctl_data *p = &vcpu->arch.exit_data;
+	unsigned long psr;
+
+	oldrr.val = vcpu_get_rr(vcpu, reg);
+	newrr.val = val;
+	vcpu->arch.vrr[reg >> VRN_SHIFT] = val;
+
+	switch ((unsigned long)(reg >> VRN_SHIFT)) {
+	case VRN6:
+		vcpu->arch.vmm_rr = vrrtomrr(val);
+		local_irq_save(psr);
+		p->exit_reason = EXIT_REASON_SWITCH_RR6;
+		vmm_transition(vcpu);
+		local_irq_restore(psr);
+		break;
+	case VRN4:
+		rrval = vrrtomrr(val);
+		vcpu->arch.metaphysical_saved_rr4 = rrval;
+		if (!is_physical_mode(vcpu))
+			ia64_set_rr(reg, rrval);
+		break;
+	case VRN0:
+		rrval = vrrtomrr(val);
+		vcpu->arch.metaphysical_saved_rr0 = rrval;
+		if (!is_physical_mode(vcpu))
+			ia64_set_rr(reg, rrval);
+		break;
+	default:
+		ia64_set_rr(reg, vrrtomrr(val));
+		break;
+	}
+
+	return (IA64_NO_FAULT);
+}
+
+void kvm_mov_to_rr(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long r3, r2;
+
+	r3 = vcpu_get_gr(vcpu, inst.M42.r3);
+	r2 = vcpu_get_gr(vcpu, inst.M42.r2);
+	vcpu_set_rr(vcpu, r3, r2);
+}
+
+void kvm_mov_to_dbr(struct kvm_vcpu *vcpu, INST64 inst)
+{
+}
+
+void kvm_mov_to_ibr(struct kvm_vcpu *vcpu, INST64 inst)
+{
+}
+
+void kvm_mov_to_pmc(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long r3, r2;
+
+	r3 = vcpu_get_gr(vcpu, inst.M42.r3);
+	r2 = vcpu_get_gr(vcpu, inst.M42.r2);
+	vcpu_set_pmc(vcpu, r3, r2);
+}
+
+void kvm_mov_to_pmd(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long r3, r2;
+
+	r3 = vcpu_get_gr(vcpu, inst.M42.r3);
+	r2 = vcpu_get_gr(vcpu, inst.M42.r2);
+	vcpu_set_pmd(vcpu, r3, r2);
+}
+
+void kvm_mov_to_pkr(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	u64 r3, r2;
+
+	r3 = vcpu_get_gr(vcpu, inst.M42.r3);
+	r2 = vcpu_get_gr(vcpu, inst.M42.r2);
+	vcpu_set_pkr(vcpu, r3, r2);
+}
+
+void kvm_mov_from_rr(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long r3, r1;
+
+	r3 = vcpu_get_gr(vcpu, inst.M43.r3);
+	r1 = vcpu_get_rr(vcpu, r3);
+	vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
+}
+
+void kvm_mov_from_pkr(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long r3, r1;
+
+	r3 = vcpu_get_gr(vcpu, inst.M43.r3);
+	r1 = vcpu_get_pkr(vcpu, r3);
+	vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
+}
+
+void kvm_mov_from_dbr(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long r3, r1;
+
+	r3 = vcpu_get_gr(vcpu, inst.M43.r3);
+	r1 = vcpu_get_dbr(vcpu, r3);
+	vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
+}
+
+void kvm_mov_from_ibr(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long r3, r1;
+
+	r3 = vcpu_get_gr(vcpu, inst.M43.r3);
+	r1 = vcpu_get_ibr(vcpu, r3);
+	vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
+}
+
+void kvm_mov_from_pmc(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long r3, r1;
+
+	r3 = vcpu_get_gr(vcpu, inst.M43.r3);
+	r1 = vcpu_get_pmc(vcpu, r3);
+	vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
+}
+
+unsigned long vcpu_get_cpuid(struct kvm_vcpu *vcpu, unsigned long reg)
+{
+	/* FIXME: This could get called as a result of a rsvd-reg fault */
+	if (reg > (ia64_get_cpuid(3) & 0xff))
+		return 0;
+	else
+		return ia64_get_cpuid(reg);
+}
+
+void kvm_mov_from_cpuid(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long r3, r1;
+
+	r3 = vcpu_get_gr(vcpu, inst.M43.r3);
+	r1 = vcpu_get_cpuid(vcpu, r3);
+	vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
+}
+
+void vcpu_set_tpr(struct kvm_vcpu *vcpu, unsigned long val)
+{
+	VCPU(vcpu, tpr) = val;
+	vcpu->arch.irq_check = 1;
+}
+
+unsigned long kvm_mov_to_cr(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long r2;
+
+	r2 = vcpu_get_gr(vcpu, inst.M32.r2);
+	VCPU(vcpu, vcr[inst.M32.cr3]) = r2;
+
+	switch (inst.M32.cr3) {
+	case 0:
+		vcpu_set_dcr(vcpu, r2);
+		break;
+	case 1:
+		vcpu_set_itm(vcpu, r2);
+		break;
+	case 66:
+		vcpu_set_tpr(vcpu, r2);
+		break;
+	case 67:
+		vcpu_set_eoi(vcpu, r2);
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+unsigned long kvm_mov_from_cr(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long tgt = inst.M33.r1;
+	unsigned long val;
+
+	switch (inst.M33.cr3) {
+	case 65:
+		val = vcpu_get_ivr(vcpu);
+		vcpu_set_gr(vcpu, tgt, val, 0);
+		break;
+
+	case 67:
+		vcpu_set_gr(vcpu, tgt, 0L, 0);
+		break;
+	default:
+		val = VCPU(vcpu, vcr[inst.M33.cr3]);
+		vcpu_set_gr(vcpu, tgt, val, 0);
+		break;
+	}
+
+	return 0;
+}
+
+void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val)
+{
+
+	unsigned long mask;
+	struct kvm_pt_regs *regs;
+	struct ia64_psr old_psr, new_psr;
+
+	old_psr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
+
+	regs = vcpu_regs(vcpu);
+	/* We only support guest as:
+	 *  vpsr.pk = 0
+	 *  vpsr.is = 0
+	 * Otherwise panic
+	 */
+	if (val & (IA64_PSR_PK | IA64_PSR_IS | IA64_PSR_VM))
+		panic_vm(vcpu, "Only support guests with vpsr.pk =0 "
+				"& vpsr.is=0\n");
+
+	/*
+	 * For those IA64_PSR bits: id/da/dd/ss/ed/ia
+	 * Since these bits will become 0, after success execution of each
+	 * instruction, we will change set them to mIA64_PSR
+	 */
+	VCPU(vcpu, vpsr) = val
+		& (~(IA64_PSR_ID | IA64_PSR_DA | IA64_PSR_DD |
+			IA64_PSR_SS | IA64_PSR_ED | IA64_PSR_IA));
+
+	if (!old_psr.i && (val & IA64_PSR_I)) {
+		/* vpsr.i 0->1 */
+		vcpu->arch.irq_check = 1;
+	}
+	new_psr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
+
+	/*
+	 * All vIA64_PSR bits shall go to mPSR (v->tf->tf_special.psr)
+	 * , except for the following bits:
+	 *  ic/i/dt/si/rt/mc/it/bn/vm
+	 */
+	mask =  IA64_PSR_IC + IA64_PSR_I + IA64_PSR_DT + IA64_PSR_SI +
+		IA64_PSR_RT + IA64_PSR_MC + IA64_PSR_IT + IA64_PSR_BN +
+		IA64_PSR_VM;
+
+	regs->cr_ipsr = (regs->cr_ipsr & mask) | (val & (~mask));
+
+	check_mm_mode_switch(vcpu, old_psr, new_psr);
+
+	return ;
+}
+
+unsigned long vcpu_cover(struct kvm_vcpu *vcpu)
+{
+	struct ia64_psr vpsr;
+
+	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+	vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
+
+	if (!vpsr.ic)
+		VCPU(vcpu, ifs) = regs->cr_ifs;
+	regs->cr_ifs = IA64_IFS_V;
+	return (IA64_NO_FAULT);
+}
+
+
+
+/**************************************************************************
+  VCPU banked general register access routines
+ **************************************************************************/
+#define vcpu_bsw0_unat(i, b0unat, b1unat, runat, VMM_PT_REGS_R16_SLOT)	\
+	do {     							\
+		__asm__ __volatile__ (					\
+				";;extr.u %0 = %3,%6,16;;\n"		\
+				"dep %1 = %0, %1, 0, 16;;\n"		\
+				"st8 [%4] = %1\n"			\
+				"extr.u %0 = %2, 16, 16;;\n"		\
+				"dep %3 = %0, %3, %6, 16;;\n"		\
+				"st8 [%5] = %3\n"			\
+				::"r"(i), "r"(*b1unat), "r"(*b0unat),	\
+				"r"(*runat), "r"(b1unat), "r"(runat),	\
+				"i"(VMM_PT_REGS_R16_SLOT) : "memory");	\
+	} while (0)
+
+void vcpu_bsw0(struct kvm_vcpu *vcpu)
+{
+	unsigned long i;
+
+	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+	unsigned long *r = &regs->r16;
+	unsigned long *b0 = &VCPU(vcpu, vbgr[0]);
+	unsigned long *b1 = &VCPU(vcpu, vgr[0]);
+	unsigned long *runat = &regs->eml_unat;
+	unsigned long *b0unat = &VCPU(vcpu, vbnat);
+	unsigned long *b1unat = &VCPU(vcpu, vnat);
+
+
+	if (VCPU(vcpu, vpsr) & IA64_PSR_BN) {
+		for (i = 0; i < 16; i++) {
+			*b1++ = *r;
+			*r++ = *b0++;
+		}
+		vcpu_bsw0_unat(i, b0unat, b1unat, runat,
+				VMM_PT_REGS_R16_SLOT);
+		VCPU(vcpu, vpsr) &= ~IA64_PSR_BN;
+	}
+}
+
+#define vcpu_bsw1_unat(i, b0unat, b1unat, runat, VMM_PT_REGS_R16_SLOT)	\
+	do {             						\
+		__asm__ __volatile__ (";;extr.u %0 = %3, %6, 16;;\n"	\
+				"dep %1 = %0, %1, 16, 16;;\n"		\
+				"st8 [%4] = %1\n"			\
+				"extr.u %0 = %2, 0, 16;;\n"		\
+				"dep %3 = %0, %3, %6, 16;;\n"		\
+				"st8 [%5] = %3\n"			\
+				::"r"(i), "r"(*b0unat), "r"(*b1unat),	\
+				"r"(*runat), "r"(b0unat), "r"(runat),	\
+				"i"(VMM_PT_REGS_R16_SLOT) : "memory");	\
+	} while (0)
+
+void vcpu_bsw1(struct kvm_vcpu *vcpu)
+{
+	unsigned long i;
+	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+	unsigned long *r = &regs->r16;
+	unsigned long *b0 = &VCPU(vcpu, vbgr[0]);
+	unsigned long *b1 = &VCPU(vcpu, vgr[0]);
+	unsigned long *runat = &regs->eml_unat;
+	unsigned long *b0unat = &VCPU(vcpu, vbnat);
+	unsigned long *b1unat = &VCPU(vcpu, vnat);
+
+	if (!(VCPU(vcpu, vpsr) & IA64_PSR_BN)) {
+		for (i = 0; i < 16; i++) {
+			*b0++ = *r;
+			*r++ = *b1++;
+		}
+		vcpu_bsw1_unat(i, b0unat, b1unat, runat,
+				VMM_PT_REGS_R16_SLOT);
+		VCPU(vcpu, vpsr) |= IA64_PSR_BN;
+	}
+}
+
+void vcpu_rfi(struct kvm_vcpu *vcpu)
+{
+	unsigned long ifs, psr;
+	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+
+	psr = VCPU(vcpu, ipsr);
+	if (psr & IA64_PSR_BN)
+		vcpu_bsw1(vcpu);
+	else
+		vcpu_bsw0(vcpu);
+	vcpu_set_psr(vcpu, psr);
+	ifs = VCPU(vcpu, ifs);
+	if (ifs >> 63)
+		regs->cr_ifs = ifs;
+	regs->cr_iip = VCPU(vcpu, iip);
+}
+
+/*
+   VPSR can't keep track of below bits of guest PSR
+   This function gets guest PSR
+ */
+
+unsigned long vcpu_get_psr(struct kvm_vcpu *vcpu)
+{
+	unsigned long mask;
+	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+
+	mask = IA64_PSR_BE | IA64_PSR_UP | IA64_PSR_AC | IA64_PSR_MFL |
+		IA64_PSR_MFH | IA64_PSR_CPL | IA64_PSR_RI;
+	return (VCPU(vcpu, vpsr) & ~mask) | (regs->cr_ipsr & mask);
+}
+
+void kvm_rsm(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long vpsr;
+	unsigned long imm24 = (inst.M44.i<<23) | (inst.M44.i2<<21)
+					| inst.M44.imm;
+
+	vpsr = vcpu_get_psr(vcpu);
+	vpsr &= (~imm24);
+	vcpu_set_psr(vcpu, vpsr);
+}
+
+void kvm_ssm(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long vpsr;
+	unsigned long imm24 = (inst.M44.i << 23) | (inst.M44.i2 << 21)
+				| inst.M44.imm;
+
+	vpsr = vcpu_get_psr(vcpu);
+	vpsr |= imm24;
+	vcpu_set_psr(vcpu, vpsr);
+}
+
+/* Generate Mask
+ * Parameter:
+ *  bit -- starting bit
+ *  len -- how many bits
+ */
+#define MASK(bit,len)				   	\
+({							\
+		__u64	ret;				\
+							\
+		__asm __volatile("dep %0=-1, r0, %1, %2"\
+				: "=r" (ret):		\
+		  "M" (bit),				\
+		  "M" (len));				\
+		ret;					\
+})
+
+void vcpu_set_psr_l(struct kvm_vcpu *vcpu, unsigned long val)
+{
+	val = (val & MASK(0, 32)) | (vcpu_get_psr(vcpu) & MASK(32, 32));
+	vcpu_set_psr(vcpu, val);
+}
+
+void kvm_mov_to_psr(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long val;
+
+	val = vcpu_get_gr(vcpu, inst.M35.r2);
+	vcpu_set_psr_l(vcpu, val);
+}
+
+void kvm_mov_from_psr(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long val;
+
+	val = vcpu_get_psr(vcpu);
+	val = (val & MASK(0, 32)) | (val & MASK(35, 2));
+	vcpu_set_gr(vcpu, inst.M33.r1, val, 0);
+}
+
+void vcpu_increment_iip(struct kvm_vcpu *vcpu)
+{
+	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+	struct ia64_psr *ipsr = (struct ia64_psr *)&regs->cr_ipsr;
+	if (ipsr->ri == 2) {
+		ipsr->ri = 0;
+		regs->cr_iip += 16;
+	} else
+		ipsr->ri++;
+}
+
+void vcpu_decrement_iip(struct kvm_vcpu *vcpu)
+{
+	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+	struct ia64_psr *ipsr = (struct ia64_psr *)&regs->cr_ipsr;
+
+	if (ipsr->ri == 0) {
+		ipsr->ri = 2;
+		regs->cr_iip -= 16;
+	} else
+		ipsr->ri--;
+}
+
+/** Emulate a privileged operation.
+ *
+ *
+ * @param vcpu virtual cpu
+ * @cause the reason cause virtualization fault
+ * @opcode the instruction code which cause virtualization fault
+ */
+
+void kvm_emulate(struct kvm_vcpu *vcpu, struct kvm_pt_regs *regs)
+{
+	unsigned long status, cause, opcode ;
+	INST64 inst;
+
+	status = IA64_NO_FAULT;
+	cause = VMX(vcpu, cause);
+	opcode = VMX(vcpu, opcode);
+	inst.inst = opcode;
+	/*
+	 * Switch to actual virtual rid in rr0 and rr4,
+	 * which is required by some tlb related instructions.
+	 */
+	prepare_if_physical_mode(vcpu);
+
+	switch (cause) {
+	case EVENT_RSM:
+		kvm_rsm(vcpu, inst);
+		break;
+	case EVENT_SSM:
+		kvm_ssm(vcpu, inst);
+		break;
+	case EVENT_MOV_TO_PSR:
+		kvm_mov_to_psr(vcpu, inst);
+		break;
+	case EVENT_MOV_FROM_PSR:
+		kvm_mov_from_psr(vcpu, inst);
+		break;
+	case EVENT_MOV_FROM_CR:
+		kvm_mov_from_cr(vcpu, inst);
+		break;
+	case EVENT_MOV_TO_CR:
+		kvm_mov_to_cr(vcpu, inst);
+		break;
+	case EVENT_BSW_0:
+		vcpu_bsw0(vcpu);
+		break;
+	case EVENT_BSW_1:
+		vcpu_bsw1(vcpu);
+		break;
+	case EVENT_COVER:
+		vcpu_cover(vcpu);
+		break;
+	case EVENT_RFI:
+		vcpu_rfi(vcpu);
+		break;
+	case EVENT_ITR_D:
+		kvm_itr_d(vcpu, inst);
+		break;
+	case EVENT_ITR_I:
+		kvm_itr_i(vcpu, inst);
+		break;
+	case EVENT_PTR_D:
+		kvm_ptr_d(vcpu, inst);
+		break;
+	case EVENT_PTR_I:
+		kvm_ptr_i(vcpu, inst);
+		break;
+	case EVENT_ITC_D:
+		kvm_itc_d(vcpu, inst);
+		break;
+	case EVENT_ITC_I:
+		kvm_itc_i(vcpu, inst);
+		break;
+	case EVENT_PTC_L:
+		kvm_ptc_l(vcpu, inst);
+		break;
+	case EVENT_PTC_G:
+		kvm_ptc_g(vcpu, inst);
+		break;
+	case EVENT_PTC_GA:
+		kvm_ptc_ga(vcpu, inst);
+		break;
+	case EVENT_PTC_E:
+		kvm_ptc_e(vcpu, inst);
+		break;
+	case EVENT_MOV_TO_RR:
+		kvm_mov_to_rr(vcpu, inst);
+		break;
+	case EVENT_MOV_FROM_RR:
+		kvm_mov_from_rr(vcpu, inst);
+		break;
+	case EVENT_THASH:
+		kvm_thash(vcpu, inst);
+		break;
+	case EVENT_TTAG:
+		kvm_ttag(vcpu, inst);
+		break;
+	case EVENT_TPA:
+		status = kvm_tpa(vcpu, inst);
+		break;
+	case EVENT_TAK:
+		kvm_tak(vcpu, inst);
+		break;
+	case EVENT_MOV_TO_AR_IMM:
+		kvm_mov_to_ar_imm(vcpu, inst);
+		break;
+	case EVENT_MOV_TO_AR:
+		kvm_mov_to_ar_reg(vcpu, inst);
+		break;
+	case EVENT_MOV_FROM_AR:
+		kvm_mov_from_ar_reg(vcpu, inst);
+		break;
+	case EVENT_MOV_TO_DBR:
+		kvm_mov_to_dbr(vcpu, inst);
+		break;
+	case EVENT_MOV_TO_IBR:
+		kvm_mov_to_ibr(vcpu, inst);
+		break;
+	case EVENT_MOV_TO_PMC:
+		kvm_mov_to_pmc(vcpu, inst);
+		break;
+	case EVENT_MOV_TO_PMD:
+		kvm_mov_to_pmd(vcpu, inst);
+		break;
+	case EVENT_MOV_TO_PKR:
+		kvm_mov_to_pkr(vcpu, inst);
+		break;
+	case EVENT_MOV_FROM_DBR:
+		kvm_mov_from_dbr(vcpu, inst);
+		break;
+	case EVENT_MOV_FROM_IBR:
+		kvm_mov_from_ibr(vcpu, inst);
+		break;
+	case EVENT_MOV_FROM_PMC:
+		kvm_mov_from_pmc(vcpu, inst);
+		break;
+	case EVENT_MOV_FROM_PKR:
+		kvm_mov_from_pkr(vcpu, inst);
+		break;
+	case EVENT_MOV_FROM_CPUID:
+		kvm_mov_from_cpuid(vcpu, inst);
+		break;
+	case EVENT_VMSW:
+		status = IA64_FAULT;
+		break;
+	default:
+		break;
+	};
+	/*Assume all status is NO_FAULT ?*/
+	if (status == IA64_NO_FAULT && cause != EVENT_RFI)
+		vcpu_increment_iip(vcpu);
+
+	recover_if_physical_mode(vcpu);
+}
+
+void init_vcpu(struct kvm_vcpu *vcpu)
+{
+	int i;
+
+	vcpu->arch.mode_flags = GUEST_IN_PHY;
+	VMX(vcpu, vrr[0]) = 0x38;
+	VMX(vcpu, vrr[1]) = 0x38;
+	VMX(vcpu, vrr[2]) = 0x38;
+	VMX(vcpu, vrr[3]) = 0x38;
+	VMX(vcpu, vrr[4]) = 0x38;
+	VMX(vcpu, vrr[5]) = 0x38;
+	VMX(vcpu, vrr[6]) = 0x38;
+	VMX(vcpu, vrr[7]) = 0x38;
+	VCPU(vcpu, vpsr) = IA64_PSR_BN;
+	VCPU(vcpu, dcr) = 0;
+	/* pta.size must not be 0.  The minimum is 15 (32k) */
+	VCPU(vcpu, pta) = 15 << 2;
+	VCPU(vcpu, itv) = 0x10000;
+	VCPU(vcpu, itm) = 0;
+	VMX(vcpu, last_itc) = 0;
+
+	VCPU(vcpu, lid) = VCPU_LID(vcpu);
+	VCPU(vcpu, ivr) = 0;
+	VCPU(vcpu, tpr) = 0x10000;
+	VCPU(vcpu, eoi) = 0;
+	VCPU(vcpu, irr[0]) = 0;
+	VCPU(vcpu, irr[1]) = 0;
+	VCPU(vcpu, irr[2]) = 0;
+	VCPU(vcpu, irr[3]) = 0;
+	VCPU(vcpu, pmv) = 0x10000;
+	VCPU(vcpu, cmcv) = 0x10000;
+	VCPU(vcpu, lrr0) = 0x10000;   /* default reset value? */
+	VCPU(vcpu, lrr1) = 0x10000;   /* default reset value? */
+	update_vhpi(vcpu, NULL_VECTOR);
+	VLSAPIC_XTP(vcpu) = 0x80;	/* disabled */
+
+	for (i = 0; i < 4; i++)
+		VLSAPIC_INSVC(vcpu, i) = 0;
+}
+
+void kvm_init_all_rr(struct kvm_vcpu *vcpu)
+{
+	unsigned long psr;
+
+	local_irq_save(psr);
+
+	/* WARNING: not allow co-exist of both virtual mode and physical
+	 * mode in same region
+	 */
+
+	vcpu->arch.metaphysical_saved_rr0 = vrrtomrr(VMX(vcpu, vrr[VRN0]));
+	vcpu->arch.metaphysical_saved_rr4 = vrrtomrr(VMX(vcpu, vrr[VRN4]));
+
+	if (is_physical_mode(vcpu)) {
+		if (vcpu->arch.mode_flags & GUEST_PHY_EMUL)
+			panic_vm(vcpu, "Machine Status conflicts!\n");
+
+		ia64_set_rr((VRN0 << VRN_SHIFT), vcpu->arch.metaphysical_rr0);
+		ia64_dv_serialize_data();
+		ia64_set_rr((VRN4 << VRN_SHIFT), vcpu->arch.metaphysical_rr4);
+		ia64_dv_serialize_data();
+	} else {
+		ia64_set_rr((VRN0 << VRN_SHIFT),
+				vcpu->arch.metaphysical_saved_rr0);
+		ia64_dv_serialize_data();
+		ia64_set_rr((VRN4 << VRN_SHIFT),
+				vcpu->arch.metaphysical_saved_rr4);
+		ia64_dv_serialize_data();
+	}
+	ia64_set_rr((VRN1 << VRN_SHIFT),
+			vrrtomrr(VMX(vcpu, vrr[VRN1])));
+	ia64_dv_serialize_data();
+	ia64_set_rr((VRN2 << VRN_SHIFT),
+			vrrtomrr(VMX(vcpu, vrr[VRN2])));
+	ia64_dv_serialize_data();
+	ia64_set_rr((VRN3 << VRN_SHIFT),
+			vrrtomrr(VMX(vcpu, vrr[VRN3])));
+	ia64_dv_serialize_data();
+	ia64_set_rr((VRN5 << VRN_SHIFT),
+			vrrtomrr(VMX(vcpu, vrr[VRN5])));
+	ia64_dv_serialize_data();
+	ia64_set_rr((VRN7 << VRN_SHIFT),
+			vrrtomrr(VMX(vcpu, vrr[VRN7])));
+	ia64_dv_serialize_data();
+	ia64_srlz_d();
+	ia64_set_psr(psr);
+}
+
+int vmm_entry(void)
+{
+	struct kvm_vcpu *v;
+	v = current_vcpu;
+
+	ia64_call_vsa(PAL_VPS_RESTORE, (unsigned long)v->arch.vpd,
+						0, 0, 0, 0, 0, 0);
+	kvm_init_vtlb(v);
+	kvm_init_vhpt(v);
+	init_vcpu(v);
+	kvm_init_all_rr(v);
+	vmm_reset_entry();
+
+	return 0;
+}
+
+static void kvm_show_registers(struct kvm_pt_regs *regs)
+{
+	unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri;
+
+	struct kvm_vcpu *vcpu = current_vcpu;
+	if (vcpu != NULL)
+		printk("vcpu 0x%p vcpu %d\n",
+		       vcpu, vcpu->vcpu_id);
+
+	printk("psr : %016lx ifs : %016lx ip  : [<%016lx>]\n",
+	       regs->cr_ipsr, regs->cr_ifs, ip);
+
+	printk("unat: %016lx pfs : %016lx rsc : %016lx\n",
+	       regs->ar_unat, regs->ar_pfs, regs->ar_rsc);
+	printk("rnat: %016lx bspstore: %016lx pr  : %016lx\n",
+	       regs->ar_rnat, regs->ar_bspstore, regs->pr);
+	printk("ldrs: %016lx ccv : %016lx fpsr: %016lx\n",
+	       regs->loadrs, regs->ar_ccv, regs->ar_fpsr);
+	printk("csd : %016lx ssd : %016lx\n", regs->ar_csd, regs->ar_ssd);
+	printk("b0  : %016lx b6  : %016lx b7  : %016lx\n", regs->b0,
+							regs->b6, regs->b7);
+	printk("f6  : %05lx%016lx f7  : %05lx%016lx\n",
+	       regs->f6.u.bits[1], regs->f6.u.bits[0],
+	       regs->f7.u.bits[1], regs->f7.u.bits[0]);
+	printk("f8  : %05lx%016lx f9  : %05lx%016lx\n",
+	       regs->f8.u.bits[1], regs->f8.u.bits[0],
+	       regs->f9.u.bits[1], regs->f9.u.bits[0]);
+	printk("f10 : %05lx%016lx f11 : %05lx%016lx\n",
+	       regs->f10.u.bits[1], regs->f10.u.bits[0],
+	       regs->f11.u.bits[1], regs->f11.u.bits[0]);
+
+	printk("r1  : %016lx r2  : %016lx r3  : %016lx\n", regs->r1,
+							regs->r2, regs->r3);
+	printk("r8  : %016lx r9  : %016lx r10 : %016lx\n", regs->r8,
+							regs->r9, regs->r10);
+	printk("r11 : %016lx r12 : %016lx r13 : %016lx\n", regs->r11,
+							regs->r12, regs->r13);
+	printk("r14 : %016lx r15 : %016lx r16 : %016lx\n", regs->r14,
+							regs->r15, regs->r16);
+	printk("r17 : %016lx r18 : %016lx r19 : %016lx\n", regs->r17,
+							regs->r18, regs->r19);
+	printk("r20 : %016lx r21 : %016lx r22 : %016lx\n", regs->r20,
+							regs->r21, regs->r22);
+	printk("r23 : %016lx r24 : %016lx r25 : %016lx\n", regs->r23,
+							regs->r24, regs->r25);
+	printk("r26 : %016lx r27 : %016lx r28 : %016lx\n", regs->r26,
+							regs->r27, regs->r28);
+	printk("r29 : %016lx r30 : %016lx r31 : %016lx\n", regs->r29,
+							regs->r30, regs->r31);
+
+}
+
+void panic_vm(struct kvm_vcpu *v, const char *fmt, ...)
+{
+	va_list args;
+	char buf[256];
+
+	struct kvm_pt_regs *regs = vcpu_regs(v);
+	struct exit_ctl_data *p = &v->arch.exit_data;
+	va_start(args, fmt);
+	vsnprintf(buf, sizeof(buf), fmt, args);
+	va_end(args);
+	printk(buf);
+	kvm_show_registers(regs);
+	p->exit_reason = EXIT_REASON_VM_PANIC;
+	vmm_transition(v);
+	/*Never to return*/
+	while (1);
+}
diff --git a/arch/ia64/kvm/vcpu.h b/arch/ia64/kvm/vcpu.h
new file mode 100644
index 00000000000..988911b4cc7
--- /dev/null
+++ b/arch/ia64/kvm/vcpu.h
@@ -0,0 +1,752 @@
+/*
+ *  vcpu.h: vcpu routines
+ *  	Copyright (c) 2005, Intel Corporation.
+ *  	Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
+ *  	Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com)
+ *
+ * 	Copyright (c) 2007, Intel Corporation.
+ *  	Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
+ *	Xiantao Zhang (xiantao.zhang@intel.com)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+
+#ifndef __KVM_VCPU_H__
+#define __KVM_VCPU_H__
+
+#include <asm/types.h>
+#include <asm/fpu.h>
+#include <asm/processor.h>
+
+#ifndef __ASSEMBLY__
+#include "vti.h"
+
+#include <linux/kvm_host.h>
+#include <linux/spinlock.h>
+
+typedef unsigned long IA64_INST;
+
+typedef union U_IA64_BUNDLE {
+	unsigned long i64[2];
+	struct { unsigned long template:5, slot0:41, slot1a:18,
+		slot1b:23, slot2:41; };
+	/* NOTE: following doesn't work because bitfields can't cross natural
+	   size boundaries
+	   struct { unsigned long template:5, slot0:41, slot1:41, slot2:41; }; */
+} IA64_BUNDLE;
+
+typedef union U_INST64_A5 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, r1:7, imm7b:7, r3:2, imm5c:5,
+		imm9d:9, s:1, major:4; };
+} INST64_A5;
+
+typedef union U_INST64_B4 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, btype:3, un3:3, p:1, b2:3, un11:11, x6:6,
+		wh:2, d:1, un1:1, major:4; };
+} INST64_B4;
+
+typedef union U_INST64_B8 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, un21:21, x6:6, un4:4, major:4; };
+} INST64_B8;
+
+typedef union U_INST64_B9 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, imm20:20, :1, x6:6, :3, i:1, major:4; };
+} INST64_B9;
+
+typedef union U_INST64_I19 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, imm20:20, :1, x6:6, x3:3, i:1, major:4; };
+} INST64_I19;
+
+typedef union U_INST64_I26 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, :7, r2:7, ar3:7, x6:6, x3:3, :1, major:4; };
+} INST64_I26;
+
+typedef union U_INST64_I27 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, :7, imm:7, ar3:7, x6:6, x3:3, s:1, major:4; };
+} INST64_I27;
+
+typedef union U_INST64_I28 { /* not privileged (mov from AR) */
+	IA64_INST inst;
+	struct { unsigned long qp:6, r1:7, :7, ar3:7, x6:6, x3:3, :1, major:4; };
+} INST64_I28;
+
+typedef union U_INST64_M28 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, :14, r3:7, x6:6, x3:3, :1, major:4; };
+} INST64_M28;
+
+typedef union U_INST64_M29 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, :7, r2:7, ar3:7, x6:6, x3:3, :1, major:4; };
+} INST64_M29;
+
+typedef union U_INST64_M30 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, :7, imm:7, ar3:7, x4:4, x2:2,
+		x3:3, s:1, major:4; };
+} INST64_M30;
+
+typedef union U_INST64_M31 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, r1:7, :7, ar3:7, x6:6, x3:3, :1, major:4; };
+} INST64_M31;
+
+typedef union U_INST64_M32 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, :7, r2:7, cr3:7, x6:6, x3:3, :1, major:4; };
+} INST64_M32;
+
+typedef union U_INST64_M33 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, r1:7, :7, cr3:7, x6:6, x3:3, :1, major:4; };
+} INST64_M33;
+
+typedef union U_INST64_M35 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, :7, r2:7, :7, x6:6, x3:3, :1, major:4; };
+
+} INST64_M35;
+
+typedef union U_INST64_M36 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, r1:7, :14, x6:6, x3:3, :1, major:4; };
+} INST64_M36;
+
+typedef union U_INST64_M37 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, imm20a:20, :1, x4:4, x2:2, x3:3,
+		i:1, major:4; };
+} INST64_M37;
+
+typedef union U_INST64_M41 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, :7, r2:7, :7, x6:6, x3:3, :1, major:4; };
+} INST64_M41;
+
+typedef union U_INST64_M42 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, :7, r2:7, r3:7, x6:6, x3:3, :1, major:4; };
+} INST64_M42;
+
+typedef union U_INST64_M43 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, r1:7, :7, r3:7, x6:6, x3:3, :1, major:4; };
+} INST64_M43;
+
+typedef union U_INST64_M44 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, imm:21, x4:4, i2:2, x3:3, i:1, major:4; };
+} INST64_M44;
+
+typedef union U_INST64_M45 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, :7, r2:7, r3:7, x6:6, x3:3, :1, major:4; };
+} INST64_M45;
+
+typedef union U_INST64_M46 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, r1:7, un7:7, r3:7, x6:6,
+		x3:3, un1:1, major:4; };
+} INST64_M46;
+
+typedef union U_INST64_M47 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, un14:14, r3:7, x6:6, x3:3, un1:1, major:4; };
+} INST64_M47;
+
+typedef union U_INST64_M1{
+	IA64_INST inst;
+	struct { unsigned long qp:6, r1:7, un7:7, r3:7, x:1, hint:2,
+		x6:6, m:1, major:4; };
+} INST64_M1;
+
+typedef union U_INST64_M2{
+	IA64_INST inst;
+	struct { unsigned long qp:6, r1:7, r2:7, r3:7, x:1, hint:2,
+		x6:6, m:1, major:4; };
+} INST64_M2;
+
+typedef union U_INST64_M3{
+	IA64_INST inst;
+	struct { unsigned long qp:6, r1:7, imm7:7, r3:7, i:1, hint:2,
+		x6:6, s:1, major:4; };
+} INST64_M3;
+
+typedef union U_INST64_M4 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, un7:7, r2:7, r3:7, x:1, hint:2,
+		x6:6, m:1, major:4; };
+} INST64_M4;
+
+typedef union U_INST64_M5 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, imm7:7, r2:7, r3:7, i:1, hint:2,
+		x6:6, s:1, major:4; };
+} INST64_M5;
+
+typedef union U_INST64_M6 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, f1:7, un7:7, r3:7, x:1, hint:2,
+		x6:6, m:1, major:4; };
+} INST64_M6;
+
+typedef union U_INST64_M9 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, :7, f2:7, r3:7, x:1, hint:2,
+		x6:6, m:1, major:4; };
+} INST64_M9;
+
+typedef union U_INST64_M10 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, imm7:7, f2:7, r3:7, i:1, hint:2,
+		x6:6, s:1, major:4; };
+} INST64_M10;
+
+typedef union U_INST64_M12 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, f1:7, f2:7, r3:7, x:1, hint:2,
+		x6:6, m:1, major:4; };
+} INST64_M12;
+
+typedef union U_INST64_M15 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, :7, imm7:7, r3:7, i:1, hint:2,
+		x6:6, s:1, major:4; };
+} INST64_M15;
+
+typedef union U_INST64 {
+	IA64_INST inst;
+	struct { unsigned long :37, major:4; } generic;
+	INST64_A5 A5;	/* used in build_hypercall_bundle only */
+	INST64_B4 B4;	/* used in build_hypercall_bundle only */
+	INST64_B8 B8;	/* rfi, bsw.[01] */
+	INST64_B9 B9;	/* break.b */
+	INST64_I19 I19;	/* used in build_hypercall_bundle only */
+	INST64_I26 I26;	/* mov register to ar (I unit) */
+	INST64_I27 I27;	/* mov immediate to ar (I unit) */
+	INST64_I28 I28;	/* mov from ar (I unit) */
+	INST64_M1  M1;	/* ld integer */
+	INST64_M2  M2;
+	INST64_M3  M3;
+	INST64_M4  M4;	/* st integer */
+	INST64_M5  M5;
+	INST64_M6  M6;	/* ldfd floating pointer 		*/
+	INST64_M9  M9;	/* stfd floating pointer		*/
+	INST64_M10 M10;	/* stfd floating pointer		*/
+	INST64_M12 M12;     /* ldfd pair floating pointer		*/
+	INST64_M15 M15;	/* lfetch + imm update			*/
+	INST64_M28 M28;	/* purge translation cache entry	*/
+	INST64_M29 M29;	/* mov register to ar (M unit)		*/
+	INST64_M30 M30;	/* mov immediate to ar (M unit)		*/
+	INST64_M31 M31;	/* mov from ar (M unit)			*/
+	INST64_M32 M32;	/* mov reg to cr			*/
+	INST64_M33 M33;	/* mov from cr				*/
+	INST64_M35 M35;	/* mov to psr				*/
+	INST64_M36 M36;	/* mov from psr				*/
+	INST64_M37 M37;	/* break.m				*/
+	INST64_M41 M41;	/* translation cache insert		*/
+	INST64_M42 M42;	/* mov to indirect reg/translation reg insert*/
+	INST64_M43 M43;	/* mov from indirect reg		*/
+	INST64_M44 M44;	/* set/reset system mask		*/
+	INST64_M45 M45;	/* translation purge			*/
+	INST64_M46 M46;	/* translation access (tpa,tak)		*/
+	INST64_M47 M47;	/* purge translation entry		*/
+} INST64;
+
+#define MASK_41 ((unsigned long)0x1ffffffffff)
+
+/* Virtual address memory attributes encoding */
+#define VA_MATTR_WB         0x0
+#define VA_MATTR_UC         0x4
+#define VA_MATTR_UCE        0x5
+#define VA_MATTR_WC         0x6
+#define VA_MATTR_NATPAGE    0x7
+
+#define PMASK(size)         (~((size) - 1))
+#define PSIZE(size)         (1UL<<(size))
+#define CLEARLSB(ppn, nbits)    (((ppn) >> (nbits)) << (nbits))
+#define PAGEALIGN(va, ps)	CLEARLSB(va, ps)
+#define PAGE_FLAGS_RV_MASK   (0x2|(0x3UL<<50)|(((1UL<<11)-1)<<53))
+#define _PAGE_MA_ST     (0x1 <<  2) /* is reserved for software use */
+
+#define ARCH_PAGE_SHIFT   12
+
+#define INVALID_TI_TAG (1UL << 63)
+
+#define VTLB_PTE_P_BIT      0
+#define VTLB_PTE_IO_BIT     60
+#define VTLB_PTE_IO         (1UL<<VTLB_PTE_IO_BIT)
+#define VTLB_PTE_P          (1UL<<VTLB_PTE_P_BIT)
+
+#define vcpu_quick_region_check(_tr_regions,_ifa)		\
+	(_tr_regions & (1 << ((unsigned long)_ifa >> 61)))
+
+#define vcpu_quick_region_set(_tr_regions,_ifa)             \
+	do {_tr_regions |= (1 << ((unsigned long)_ifa >> 61)); } while (0)
+
+static inline void vcpu_set_tr(struct thash_data *trp, u64 pte, u64 itir,
+		u64 va, u64 rid)
+{
+	trp->page_flags = pte;
+	trp->itir = itir;
+	trp->vadr = va;
+	trp->rid = rid;
+}
+
+extern u64 kvm_get_mpt_entry(u64 gpfn);
+
+/* Return I/ */
+static inline u64 __gpfn_is_io(u64 gpfn)
+{
+	u64  pte;
+	pte = kvm_get_mpt_entry(gpfn);
+	if (!(pte & GPFN_INV_MASK)) {
+		pte = pte & GPFN_IO_MASK;
+		if (pte != GPFN_PHYS_MMIO)
+			return pte;
+	}
+	return 0;
+}
+#endif
+#define IA64_NO_FAULT	0
+#define IA64_FAULT	1
+
+#define VMM_RBS_OFFSET  ((VMM_TASK_SIZE + 15) & ~15)
+
+#define SW_BAD  0   /* Bad mode transitition */
+#define SW_V2P  1   /* Physical emulatino is activated */
+#define SW_P2V  2   /* Exit physical mode emulation */
+#define SW_SELF 3   /* No mode transition */
+#define SW_NOP  4   /* Mode transition, but without action required */
+
+#define GUEST_IN_PHY    0x1
+#define GUEST_PHY_EMUL  0x2
+
+#define current_vcpu ((struct kvm_vcpu *) ia64_getreg(_IA64_REG_TP))
+
+#define VRN_SHIFT	61
+#define VRN_MASK	0xe000000000000000
+#define VRN0		0x0UL
+#define VRN1		0x1UL
+#define VRN2		0x2UL
+#define VRN3		0x3UL
+#define VRN4		0x4UL
+#define VRN5		0x5UL
+#define VRN6		0x6UL
+#define VRN7		0x7UL
+
+#define IRQ_NO_MASKED         0
+#define IRQ_MASKED_BY_VTPR    1
+#define IRQ_MASKED_BY_INSVC   2   /* masked by inservice IRQ */
+
+#define PTA_BASE_SHIFT      15
+
+#define IA64_PSR_VM_BIT     46
+#define IA64_PSR_VM (__IA64_UL(1) << IA64_PSR_VM_BIT)
+
+/* Interruption Function State */
+#define IA64_IFS_V_BIT      63
+#define IA64_IFS_V  (__IA64_UL(1) << IA64_IFS_V_BIT)
+
+#define PHY_PAGE_UC (_PAGE_A|_PAGE_D|_PAGE_P|_PAGE_MA_UC|_PAGE_AR_RWX)
+#define PHY_PAGE_WB (_PAGE_A|_PAGE_D|_PAGE_P|_PAGE_MA_WB|_PAGE_AR_RWX)
+
+#ifndef __ASSEMBLY__
+
+#include <asm/gcc_intrin.h>
+
+#define is_physical_mode(v)		\
+	((v->arch.mode_flags) & GUEST_IN_PHY)
+
+#define is_virtual_mode(v)	\
+	(!is_physical_mode(v))
+
+#define MODE_IND(psr)	\
+	(((psr).it << 2) + ((psr).dt << 1) + (psr).rt)
+
+#ifndef CONFIG_SMP
+#define _vmm_raw_spin_lock(x)	 do {}while(0)
+#define _vmm_raw_spin_unlock(x) do {}while(0)
+#else
+typedef struct {
+	volatile unsigned int lock;
+} vmm_spinlock_t;
+#define _vmm_raw_spin_lock(x)						\
+	do {								\
+		__u32 *ia64_spinlock_ptr = (__u32 *) (x);		\
+		__u64 ia64_spinlock_val;				\
+		ia64_spinlock_val = ia64_cmpxchg4_acq(ia64_spinlock_ptr, 1, 0);\
+		if (unlikely(ia64_spinlock_val)) {			\
+			do {						\
+				while (*ia64_spinlock_ptr)		\
+				ia64_barrier();				\
+				ia64_spinlock_val =			\
+				ia64_cmpxchg4_acq(ia64_spinlock_ptr, 1, 0);\
+			} while (ia64_spinlock_val);			\
+		}							\
+	} while (0)
+
+#define _vmm_raw_spin_unlock(x)				\
+	do { barrier();				\
+		((vmm_spinlock_t *)x)->lock = 0; } \
+while (0)
+#endif
+
+void vmm_spin_lock(vmm_spinlock_t *lock);
+void vmm_spin_unlock(vmm_spinlock_t *lock);
+enum {
+	I_TLB = 1,
+	D_TLB = 2
+};
+
+union kvm_va {
+	struct {
+		unsigned long off : 60;		/* intra-region offset */
+		unsigned long reg :  4;		/* region number */
+	} f;
+	unsigned long l;
+	void *p;
+};
+
+#define __kvm_pa(x)     ({union kvm_va _v; _v.l = (long) (x);		\
+						_v.f.reg = 0; _v.l; })
+#define __kvm_va(x)     ({union kvm_va _v; _v.l = (long) (x);		\
+				_v.f.reg = -1; _v.p; })
+
+#define _REGION_ID(x)           ({union ia64_rr _v; _v.val = (long)(x); \
+						_v.rid; })
+#define _REGION_PAGE_SIZE(x)    ({union ia64_rr _v; _v.val = (long)(x); \
+						_v.ps; })
+#define _REGION_HW_WALKER(x)    ({union ia64_rr _v; _v.val = (long)(x);	\
+						_v.ve; })
+
+enum vhpt_ref{ DATA_REF, NA_REF, INST_REF, RSE_REF };
+enum tlb_miss_type { INSTRUCTION, DATA, REGISTER };
+
+#define VCPU(_v, _x) ((_v)->arch.vpd->_x)
+#define VMX(_v, _x)  ((_v)->arch._x)
+
+#define VLSAPIC_INSVC(vcpu, i) ((vcpu)->arch.insvc[i])
+#define VLSAPIC_XTP(_v)        VMX(_v, xtp)
+
+static inline unsigned long itir_ps(unsigned long itir)
+{
+	return ((itir >> 2) & 0x3f);
+}
+
+
+/**************************************************************************
+  VCPU control register access routines
+ **************************************************************************/
+
+static inline u64 vcpu_get_itir(struct kvm_vcpu *vcpu)
+{
+	return ((u64)VCPU(vcpu, itir));
+}
+
+static inline void vcpu_set_itir(struct kvm_vcpu *vcpu, u64 val)
+{
+	VCPU(vcpu, itir) = val;
+}
+
+static inline u64 vcpu_get_ifa(struct kvm_vcpu *vcpu)
+{
+	return ((u64)VCPU(vcpu, ifa));
+}
+
+static inline void vcpu_set_ifa(struct kvm_vcpu *vcpu, u64 val)
+{
+	VCPU(vcpu, ifa) = val;
+}
+
+static inline u64 vcpu_get_iva(struct kvm_vcpu *vcpu)
+{
+	return ((u64)VCPU(vcpu, iva));
+}
+
+static inline u64 vcpu_get_pta(struct kvm_vcpu *vcpu)
+{
+	return ((u64)VCPU(vcpu, pta));
+}
+
+static inline u64 vcpu_get_lid(struct kvm_vcpu *vcpu)
+{
+	return ((u64)VCPU(vcpu, lid));
+}
+
+static inline u64 vcpu_get_tpr(struct kvm_vcpu *vcpu)
+{
+	return ((u64)VCPU(vcpu, tpr));
+}
+
+static inline u64 vcpu_get_eoi(struct kvm_vcpu *vcpu)
+{
+	return (0UL);		/*reads of eoi always return 0 */
+}
+
+static inline u64 vcpu_get_irr0(struct kvm_vcpu *vcpu)
+{
+	return ((u64)VCPU(vcpu, irr[0]));
+}
+
+static inline u64 vcpu_get_irr1(struct kvm_vcpu *vcpu)
+{
+	return ((u64)VCPU(vcpu, irr[1]));
+}
+
+static inline u64 vcpu_get_irr2(struct kvm_vcpu *vcpu)
+{
+	return ((u64)VCPU(vcpu, irr[2]));
+}
+
+static inline u64 vcpu_get_irr3(struct kvm_vcpu *vcpu)
+{
+	return ((u64)VCPU(vcpu, irr[3]));
+}
+
+static inline void vcpu_set_dcr(struct kvm_vcpu *vcpu, u64 val)
+{
+	ia64_setreg(_IA64_REG_CR_DCR, val);
+}
+
+static inline void vcpu_set_isr(struct kvm_vcpu *vcpu, u64 val)
+{
+	VCPU(vcpu, isr) = val;
+}
+
+static inline void vcpu_set_lid(struct kvm_vcpu *vcpu, u64 val)
+{
+	VCPU(vcpu, lid) = val;
+}
+
+static inline void vcpu_set_ipsr(struct kvm_vcpu *vcpu, u64 val)
+{
+	VCPU(vcpu, ipsr) = val;
+}
+
+static inline void vcpu_set_iip(struct kvm_vcpu *vcpu, u64 val)
+{
+	VCPU(vcpu, iip) = val;
+}
+
+static inline void vcpu_set_ifs(struct kvm_vcpu *vcpu, u64 val)
+{
+	VCPU(vcpu, ifs) = val;
+}
+
+static inline void vcpu_set_iipa(struct kvm_vcpu *vcpu, u64 val)
+{
+	VCPU(vcpu, iipa) = val;
+}
+
+static inline void vcpu_set_iha(struct kvm_vcpu *vcpu, u64 val)
+{
+	VCPU(vcpu, iha) = val;
+}
+
+
+static inline u64 vcpu_get_rr(struct kvm_vcpu *vcpu, u64 reg)
+{
+	return vcpu->arch.vrr[reg>>61];
+}
+
+/**************************************************************************
+  VCPU debug breakpoint register access routines
+ **************************************************************************/
+
+static inline void vcpu_set_dbr(struct kvm_vcpu *vcpu, u64 reg, u64 val)
+{
+	__ia64_set_dbr(reg, val);
+}
+
+static inline void vcpu_set_ibr(struct kvm_vcpu *vcpu, u64 reg, u64 val)
+{
+	ia64_set_ibr(reg, val);
+}
+
+static inline u64 vcpu_get_dbr(struct kvm_vcpu *vcpu, u64 reg)
+{
+	return ((u64)__ia64_get_dbr(reg));
+}
+
+static inline u64 vcpu_get_ibr(struct kvm_vcpu *vcpu, u64 reg)
+{
+	return ((u64)ia64_get_ibr(reg));
+}
+
+/**************************************************************************
+  VCPU performance monitor register access routines
+ **************************************************************************/
+static inline void vcpu_set_pmc(struct kvm_vcpu *vcpu, u64 reg, u64 val)
+{
+	/* NOTE: Writes to unimplemented PMC registers are discarded */
+	ia64_set_pmc(reg, val);
+}
+
+static inline void vcpu_set_pmd(struct kvm_vcpu *vcpu, u64 reg, u64 val)
+{
+	/* NOTE: Writes to unimplemented PMD registers are discarded */
+	ia64_set_pmd(reg, val);
+}
+
+static inline u64 vcpu_get_pmc(struct kvm_vcpu *vcpu, u64 reg)
+{
+	/* NOTE: Reads from unimplemented PMC registers return zero */
+	return ((u64)ia64_get_pmc(reg));
+}
+
+static inline u64 vcpu_get_pmd(struct kvm_vcpu *vcpu, u64 reg)
+{
+	/* NOTE: Reads from unimplemented PMD registers return zero */
+	return ((u64)ia64_get_pmd(reg));
+}
+
+static inline unsigned long vrrtomrr(unsigned long val)
+{
+	union ia64_rr rr;
+	rr.val = val;
+	rr.rid = (rr.rid << 4) | 0xe;
+	if (rr.ps > PAGE_SHIFT)
+		rr.ps = PAGE_SHIFT;
+	rr.ve = 1;
+	return rr.val;
+}
+
+
+static inline int highest_bits(int *dat)
+{
+	u32  bits, bitnum;
+	int i;
+
+	/* loop for all 256 bits */
+	for (i = 7; i >= 0 ; i--) {
+		bits = dat[i];
+		if (bits) {
+			bitnum = fls(bits);
+			return i * 32 + bitnum - 1;
+		}
+	}
+	return NULL_VECTOR;
+}
+
+/*
+ * The pending irq is higher than the inservice one.
+ *
+ */
+static inline int is_higher_irq(int pending, int inservice)
+{
+	return ((pending > inservice)
+			|| ((pending != NULL_VECTOR)
+				&& (inservice == NULL_VECTOR)));
+}
+
+static inline int is_higher_class(int pending, int mic)
+{
+	return ((pending >> 4) > mic);
+}
+
+/*
+ * Return 0-255 for pending irq.
+ *        NULL_VECTOR: when no pending.
+ */
+static inline int highest_pending_irq(struct kvm_vcpu *vcpu)
+{
+	if (VCPU(vcpu, irr[0]) & (1UL<<NMI_VECTOR))
+		return NMI_VECTOR;
+	if (VCPU(vcpu, irr[0]) & (1UL<<ExtINT_VECTOR))
+		return ExtINT_VECTOR;
+
+	return highest_bits((int *)&VCPU(vcpu, irr[0]));
+}
+
+static inline int highest_inservice_irq(struct kvm_vcpu *vcpu)
+{
+	if (VMX(vcpu, insvc[0]) & (1UL<<NMI_VECTOR))
+		return NMI_VECTOR;
+	if (VMX(vcpu, insvc[0]) & (1UL<<ExtINT_VECTOR))
+		return ExtINT_VECTOR;
+
+	return highest_bits((int *)&(VMX(vcpu, insvc[0])));
+}
+
+extern void vcpu_get_fpreg(struct kvm_vcpu *vcpu, unsigned long reg,
+					struct ia64_fpreg *val);
+extern void vcpu_set_fpreg(struct kvm_vcpu *vcpu, unsigned long reg,
+					struct ia64_fpreg *val);
+extern u64 vcpu_get_gr(struct kvm_vcpu *vcpu, unsigned long reg);
+extern void vcpu_set_gr(struct kvm_vcpu *vcpu, unsigned long reg,
+			u64 val, int nat);
+extern unsigned long vcpu_get_psr(struct kvm_vcpu *vcpu);
+extern void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val);
+extern u64 vcpu_thash(struct kvm_vcpu *vcpu, u64 vadr);
+extern void vcpu_bsw0(struct kvm_vcpu *vcpu);
+extern void thash_vhpt_insert(struct kvm_vcpu *v, u64 pte,
+					u64 itir, u64 va, int type);
+extern struct thash_data *vhpt_lookup(u64 va);
+extern u64 guest_vhpt_lookup(u64 iha, u64 *pte);
+extern void thash_purge_entries(struct kvm_vcpu *v, u64 va, u64 ps);
+extern void thash_purge_entries_remote(struct kvm_vcpu *v, u64 va, u64 ps);
+extern u64 translate_phy_pte(u64 *pte, u64 itir, u64 va);
+extern void thash_purge_and_insert(struct kvm_vcpu *v, u64 pte,
+		u64 itir, u64 ifa, int type);
+extern void thash_purge_all(struct kvm_vcpu *v);
+extern struct thash_data *vtlb_lookup(struct kvm_vcpu *v,
+						u64 va, int is_data);
+extern int vtr_find_overlap(struct kvm_vcpu *vcpu, u64 va,
+						u64 ps, int is_data);
+
+extern void vcpu_increment_iip(struct kvm_vcpu *v);
+extern void vcpu_decrement_iip(struct kvm_vcpu *vcpu);
+extern void vcpu_pend_interrupt(struct kvm_vcpu *vcpu, u8 vec);
+extern void vcpu_unpend_interrupt(struct kvm_vcpu *vcpu, u8 vec);
+extern void data_page_not_present(struct kvm_vcpu *vcpu, u64 vadr);
+extern void dnat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr);
+extern void alt_dtlb(struct kvm_vcpu *vcpu, u64 vadr);
+extern void nested_dtlb(struct kvm_vcpu *vcpu);
+extern void dvhpt_fault(struct kvm_vcpu *vcpu, u64 vadr);
+extern int vhpt_enabled(struct kvm_vcpu *vcpu, u64 vadr, enum vhpt_ref ref);
+
+extern void update_vhpi(struct kvm_vcpu *vcpu, int vec);
+extern int irq_masked(struct kvm_vcpu *vcpu, int h_pending, int h_inservice);
+
+extern int fetch_code(struct kvm_vcpu *vcpu, u64 gip, IA64_BUNDLE *pbundle);
+extern void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma);
+extern void vmm_transition(struct kvm_vcpu *vcpu);
+extern void vmm_trampoline(union context *from, union context *to);
+extern int vmm_entry(void);
+extern  u64 vcpu_get_itc(struct kvm_vcpu *vcpu);
+
+extern void vmm_reset_entry(void);
+void kvm_init_vtlb(struct kvm_vcpu *v);
+void kvm_init_vhpt(struct kvm_vcpu *v);
+void thash_init(struct thash_cb *hcb, u64 sz);
+
+void panic_vm(struct kvm_vcpu *v, const char *fmt, ...);
+u64 kvm_gpa_to_mpa(u64 gpa);
+extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2, u64 arg3,
+		u64 arg4, u64 arg5, u64 arg6, u64 arg7);
+
+extern long vmm_sanity;
+
+#endif
+#endif	/* __VCPU_H__ */
diff --git a/arch/ia64/kvm/vmm.c b/arch/ia64/kvm/vmm.c
new file mode 100644
index 00000000000..176a12cd56d
--- /dev/null
+++ b/arch/ia64/kvm/vmm.c
@@ -0,0 +1,99 @@
+/*
+ * vmm.c: vmm module interface with kvm module
+ *
+ * Copyright (c) 2007, Intel Corporation.
+ *
+ *  Xiantao Zhang (xiantao.zhang@intel.com)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <asm/fpswa.h>
+
+#include "vcpu.h"
+
+MODULE_AUTHOR("Intel");
+MODULE_LICENSE("GPL");
+
+extern char kvm_ia64_ivt;
+extern char kvm_asm_mov_from_ar;
+extern char kvm_asm_mov_from_ar_sn2;
+extern fpswa_interface_t *vmm_fpswa_interface;
+
+long vmm_sanity = 1;
+
+struct kvm_vmm_info vmm_info = {
+	.module			= THIS_MODULE,
+	.vmm_entry		= vmm_entry,
+	.tramp_entry		= vmm_trampoline,
+	.vmm_ivt		= (unsigned long)&kvm_ia64_ivt,
+	.patch_mov_ar		= (unsigned long)&kvm_asm_mov_from_ar,
+	.patch_mov_ar_sn2	= (unsigned long)&kvm_asm_mov_from_ar_sn2,
+};
+
+static int __init  kvm_vmm_init(void)
+{
+
+	vmm_fpswa_interface = fpswa_interface;
+
+	/*Register vmm data to kvm side*/
+	return kvm_init(&vmm_info, 1024, 0, THIS_MODULE);
+}
+
+static void __exit kvm_vmm_exit(void)
+{
+	kvm_exit();
+	return ;
+}
+
+void vmm_spin_lock(vmm_spinlock_t *lock)
+{
+	_vmm_raw_spin_lock(lock);
+}
+
+void vmm_spin_unlock(vmm_spinlock_t *lock)
+{
+	_vmm_raw_spin_unlock(lock);
+}
+
+static void vcpu_debug_exit(struct kvm_vcpu *vcpu)
+{
+	struct exit_ctl_data *p = &vcpu->arch.exit_data;
+	long psr;
+
+	local_irq_save(psr);
+	p->exit_reason = EXIT_REASON_DEBUG;
+	vmm_transition(vcpu);
+	local_irq_restore(psr);
+}
+
+asmlinkage int printk(const char *fmt, ...)
+{
+	struct kvm_vcpu *vcpu = current_vcpu;
+	va_list args;
+	int r;
+
+	memset(vcpu->arch.log_buf, 0, VMM_LOG_LEN);
+	va_start(args, fmt);
+	r = vsnprintf(vcpu->arch.log_buf, VMM_LOG_LEN, fmt, args);
+	va_end(args);
+	vcpu_debug_exit(vcpu);
+	return r;
+}
+
+module_init(kvm_vmm_init)
+module_exit(kvm_vmm_exit)
diff --git a/arch/ia64/kvm/vmm_ivt.S b/arch/ia64/kvm/vmm_ivt.S
new file mode 100644
index 00000000000..397e34a63e1
--- /dev/null
+++ b/arch/ia64/kvm/vmm_ivt.S
@@ -0,0 +1,1392 @@
+/*
+ * arch/ia64/kvm/vmm_ivt.S
+ *
+ * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co
+ *      Stephane Eranian <eranian@hpl.hp.com>
+ *      David Mosberger <davidm@hpl.hp.com>
+ * Copyright (C) 2000, 2002-2003 Intel Co
+ *      Asit Mallick <asit.k.mallick@intel.com>
+ *      Suresh Siddha <suresh.b.siddha@intel.com>
+ *      Kenneth Chen <kenneth.w.chen@intel.com>
+ *      Fenghua Yu <fenghua.yu@intel.com>
+ *
+ *
+ * 00/08/23 Asit Mallick <asit.k.mallick@intel.com> TLB handling
+ * for SMP
+ * 00/12/20 David Mosberger-Tang <davidm@hpl.hp.com> DTLB/ITLB
+ * handler now uses virtual PT.
+ *
+ * 07/6/20 Xuefei Xu  (Anthony Xu) (anthony.xu@intel.com)
+ *              Supporting Intel virtualization architecture
+ *
+ */
+
+/*
+ * This file defines the interruption vector table used by the CPU.
+ * It does not include one entry per possible cause of interruption.
+ *
+ * The first 20 entries of the table contain 64 bundles each while the
+ * remaining 48 entries contain only 16 bundles each.
+ *
+ * The 64 bundles are used to allow inlining the whole handler for
+ * critical
+ * interruptions like TLB misses.
+ *
+ *  For each entry, the comment is as follows:
+ *
+ *              // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss
+ *              (12,51)
+ *  entry offset ----/     /         /                  /
+ *  /
+ *  entry number ---------/         /                  /
+ *  /
+ *  size of the entry -------------/                  /
+ *  /
+ *  vector name -------------------------------------/
+ *  /
+ *  interruptions triggering this vector
+ *  ----------------------/
+ *
+ * The table is 32KB in size and must be aligned on 32KB
+ * boundary.
+ * (The CPU ignores the 15 lower bits of the address)
+ *
+ * Table is based upon EAS2.6 (Oct 1999)
+ */
+
+
+#include <asm/asmmacro.h>
+#include <asm/cache.h>
+#include <asm/pgtable.h>
+
+#include "asm-offsets.h"
+#include "vcpu.h"
+#include "kvm_minstate.h"
+#include "vti.h"
+
+#if 0
+# define PSR_DEFAULT_BITS   psr.ac
+#else
+# define PSR_DEFAULT_BITS   0
+#endif
+
+#define KVM_FAULT(n)    \
+	kvm_fault_##n:;          \
+	mov r19=n;;          \
+	br.sptk.many kvm_vmm_panic;         \
+	;;                  \
+
+#define KVM_REFLECT(n)    \
+	mov r31=pr;           \
+	mov r19=n;       /* prepare to save predicates */ \
+	mov r29=cr.ipsr;      \
+	;;      \
+	tbit.z p6,p7=r29,IA64_PSR_VM_BIT;       \
+(p7)	br.sptk.many kvm_dispatch_reflection;        \
+	br.sptk.many kvm_vmm_panic;      \
+
+GLOBAL_ENTRY(kvm_vmm_panic)
+	KVM_SAVE_MIN_WITH_COVER_R19
+	alloc r14=ar.pfs,0,0,1,0
+	mov out0=r15
+	adds r3=8,r2                // set up second base pointer
+	;;
+	ssm psr.ic
+	;;
+	srlz.i    // guarantee that interruption collection is on
+	;;
+	(p15) ssm psr.i               // restore psr.
+	addl r14=@gprel(ia64_leave_hypervisor),gp
+	;;
+	KVM_SAVE_REST
+	mov rp=r14
+	;;
+	br.call.sptk.many b6=vmm_panic_handler;
+END(kvm_vmm_panic)
+
+    .section .text..ivt,"ax"
+
+    .align 32768    // align on 32KB boundary
+    .global kvm_ia64_ivt
+kvm_ia64_ivt:
+///////////////////////////////////////////////////////////////
+// 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47)
+ENTRY(kvm_vhpt_miss)
+	KVM_FAULT(0)
+END(kvm_vhpt_miss)
+
+    .org kvm_ia64_ivt+0x400
+////////////////////////////////////////////////////////////////
+// 0x0400 Entry 1 (size 64 bundles) ITLB (21)
+ENTRY(kvm_itlb_miss)
+	mov r31 = pr
+	mov r29=cr.ipsr;
+	;;
+	tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
+(p6)	br.sptk kvm_alt_itlb_miss
+	mov r19 = 1
+	br.sptk kvm_itlb_miss_dispatch
+	KVM_FAULT(1);
+END(kvm_itlb_miss)
+
+    .org kvm_ia64_ivt+0x0800
+//////////////////////////////////////////////////////////////////
+// 0x0800 Entry 2 (size 64 bundles) DTLB (9,48)
+ENTRY(kvm_dtlb_miss)
+	mov r31 = pr
+	mov r29=cr.ipsr;
+	;;
+	tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
+(p6)	br.sptk kvm_alt_dtlb_miss
+	br.sptk kvm_dtlb_miss_dispatch
+END(kvm_dtlb_miss)
+
+     .org kvm_ia64_ivt+0x0c00
+////////////////////////////////////////////////////////////////////
+// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19)
+ENTRY(kvm_alt_itlb_miss)
+	mov r16=cr.ifa    // get address that caused the TLB miss
+	;;
+	movl r17=PAGE_KERNEL
+	mov r24=cr.ipsr
+	movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
+	;;
+	and r19=r19,r16     // clear ed, reserved bits, and PTE control bits
+	;;
+	or r19=r17,r19      // insert PTE control bits into r19
+	;;
+	movl r20=IA64_GRANULE_SHIFT<<2
+	;;
+	mov cr.itir=r20
+	;;
+	itc.i r19		// insert the TLB entry
+	mov pr=r31,-1
+	rfi
+END(kvm_alt_itlb_miss)
+
+    .org kvm_ia64_ivt+0x1000
+/////////////////////////////////////////////////////////////////////
+// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46)
+ENTRY(kvm_alt_dtlb_miss)
+	mov r16=cr.ifa		// get address that caused the TLB miss
+	;;
+	movl r17=PAGE_KERNEL
+	movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
+	mov r24=cr.ipsr
+	;;
+	and r19=r19,r16     // clear ed, reserved bits, and PTE control bits
+	;;
+	or r19=r19,r17	// insert PTE control bits into r19
+	;;
+	movl r20=IA64_GRANULE_SHIFT<<2
+	;;
+	mov cr.itir=r20
+	;;
+	itc.d r19		// insert the TLB entry
+	mov pr=r31,-1
+	rfi
+END(kvm_alt_dtlb_miss)
+
+    .org kvm_ia64_ivt+0x1400
+//////////////////////////////////////////////////////////////////////
+// 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45)
+ENTRY(kvm_nested_dtlb_miss)
+	KVM_FAULT(5)
+END(kvm_nested_dtlb_miss)
+
+    .org kvm_ia64_ivt+0x1800
+/////////////////////////////////////////////////////////////////////
+// 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24)
+ENTRY(kvm_ikey_miss)
+	KVM_REFLECT(6)
+END(kvm_ikey_miss)
+
+    .org kvm_ia64_ivt+0x1c00
+/////////////////////////////////////////////////////////////////////
+// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
+ENTRY(kvm_dkey_miss)
+	KVM_REFLECT(7)
+END(kvm_dkey_miss)
+
+    .org kvm_ia64_ivt+0x2000
+////////////////////////////////////////////////////////////////////
+// 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54)
+ENTRY(kvm_dirty_bit)
+	KVM_REFLECT(8)
+END(kvm_dirty_bit)
+
+    .org kvm_ia64_ivt+0x2400
+////////////////////////////////////////////////////////////////////
+// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27)
+ENTRY(kvm_iaccess_bit)
+	KVM_REFLECT(9)
+END(kvm_iaccess_bit)
+
+    .org kvm_ia64_ivt+0x2800
+///////////////////////////////////////////////////////////////////
+// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55)
+ENTRY(kvm_daccess_bit)
+	KVM_REFLECT(10)
+END(kvm_daccess_bit)
+
+    .org kvm_ia64_ivt+0x2c00
+/////////////////////////////////////////////////////////////////
+// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33)
+ENTRY(kvm_break_fault)
+	mov r31=pr
+	mov r19=11
+	mov r29=cr.ipsr
+	;;
+	KVM_SAVE_MIN_WITH_COVER_R19
+	;;
+	alloc r14=ar.pfs,0,0,4,0 //(must be first in insn group!)
+	mov out0=cr.ifa
+	mov out2=cr.isr     // FIXME: pity to make this slow access twice
+	mov out3=cr.iim     // FIXME: pity to make this slow access twice
+	adds r3=8,r2                // set up second base pointer
+	;;
+	ssm psr.ic
+	;;
+	srlz.i         // guarantee that interruption collection is on
+	;;
+	(p15)ssm psr.i               // restore psr.i
+	addl r14=@gprel(ia64_leave_hypervisor),gp
+	;;
+	KVM_SAVE_REST
+	mov rp=r14
+	;;
+	adds out1=16,sp
+	br.call.sptk.many b6=kvm_ia64_handle_break
+	;;
+END(kvm_break_fault)
+
+    .org kvm_ia64_ivt+0x3000
+/////////////////////////////////////////////////////////////////
+// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4)
+ENTRY(kvm_interrupt)
+	mov r31=pr		// prepare to save predicates
+	mov r19=12
+	mov r29=cr.ipsr
+	;;
+	tbit.z p6,p7=r29,IA64_PSR_VM_BIT
+	tbit.z p0,p15=r29,IA64_PSR_I_BIT
+	;;
+(p7)	br.sptk kvm_dispatch_interrupt
+	;;
+	mov r27=ar.rsc		/* M */
+	mov r20=r1			/* A */
+	mov r25=ar.unat		/* M */
+	mov r26=ar.pfs		/* I */
+	mov r28=cr.iip		/* M */
+	cover			/* B (or nothing) */
+	;;
+	mov r1=sp
+	;;
+	invala			/* M */
+	mov r30=cr.ifs
+	;;
+	addl r1=-VMM_PT_REGS_SIZE,r1
+	;;
+	adds r17=2*L1_CACHE_BYTES,r1	/* really: biggest cache-line size */
+	adds r16=PT(CR_IPSR),r1
+	;;
+	lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES
+	st8 [r16]=r29			/* save cr.ipsr */
+	;;
+	lfetch.fault.excl.nt1 [r17]
+	mov r29=b0
+	;;
+	adds r16=PT(R8),r1  	/* initialize first base pointer */
+	adds r17=PT(R9),r1  	/* initialize second base pointer */
+	mov r18=r0      		/* make sure r18 isn't NaT */
+	;;
+.mem.offset 0,0; st8.spill [r16]=r8,16
+.mem.offset 8,0; st8.spill [r17]=r9,16
+        ;;
+.mem.offset 0,0; st8.spill [r16]=r10,24
+.mem.offset 8,0; st8.spill [r17]=r11,24
+        ;;
+	st8 [r16]=r28,16		/* save cr.iip */
+	st8 [r17]=r30,16		/* save cr.ifs */
+	mov r8=ar.fpsr		/* M */
+	mov r9=ar.csd
+	mov r10=ar.ssd
+	movl r11=FPSR_DEFAULT	/* L-unit */
+	;;
+	st8 [r16]=r25,16		/* save ar.unat */
+	st8 [r17]=r26,16		/* save ar.pfs */
+	shl r18=r18,16		/* compute ar.rsc to be used for "loadrs" */
+	;;
+	st8 [r16]=r27,16		/* save ar.rsc */
+	adds r17=16,r17		/* skip over ar_rnat field */
+	;;
+	st8 [r17]=r31,16		/* save predicates */
+	adds r16=16,r16		/* skip over ar_bspstore field */
+	;;
+	st8 [r16]=r29,16		/* save b0 */
+	st8 [r17]=r18,16		/* save ar.rsc value for "loadrs" */
+	;;
+.mem.offset 0,0; st8.spill [r16]=r20,16    /* save original r1 */
+.mem.offset 8,0; st8.spill [r17]=r12,16
+	adds r12=-16,r1
+	/* switch to kernel memory stack (with 16 bytes of scratch) */
+	;;
+.mem.offset 0,0; st8.spill [r16]=r13,16
+.mem.offset 8,0; st8.spill [r17]=r8,16 /* save ar.fpsr */
+	;;
+.mem.offset 0,0; st8.spill [r16]=r15,16
+.mem.offset 8,0; st8.spill [r17]=r14,16
+	dep r14=-1,r0,60,4
+	;;
+.mem.offset 0,0; st8.spill [r16]=r2,16
+.mem.offset 8,0; st8.spill [r17]=r3,16
+	adds r2=VMM_PT_REGS_R16_OFFSET,r1
+	adds r14 = VMM_VCPU_GP_OFFSET,r13
+	;;
+	mov r8=ar.ccv
+	ld8 r14 = [r14]
+	;;
+	mov r1=r14       /* establish kernel global pointer */
+	;;                                          \
+	bsw.1
+	;;
+	alloc r14=ar.pfs,0,0,1,0	// must be first in an insn group
+	mov out0=r13
+	;;
+	ssm psr.ic
+	;;
+	srlz.i
+	;;
+	//(p15) ssm psr.i
+	adds r3=8,r2		// set up second base pointer for SAVE_REST
+	srlz.i			// ensure everybody knows psr.ic is back on
+	;;
+.mem.offset 0,0; st8.spill [r2]=r16,16
+.mem.offset 8,0; st8.spill [r3]=r17,16
+	;;
+.mem.offset 0,0; st8.spill [r2]=r18,16
+.mem.offset 8,0; st8.spill [r3]=r19,16
+	;;
+.mem.offset 0,0; st8.spill [r2]=r20,16
+.mem.offset 8,0; st8.spill [r3]=r21,16
+	mov r18=b6
+	;;
+.mem.offset 0,0; st8.spill [r2]=r22,16
+.mem.offset 8,0; st8.spill [r3]=r23,16
+	mov r19=b7
+	;;
+.mem.offset 0,0; st8.spill [r2]=r24,16
+.mem.offset 8,0; st8.spill [r3]=r25,16
+	;;
+.mem.offset 0,0; st8.spill [r2]=r26,16
+.mem.offset 8,0; st8.spill [r3]=r27,16
+	;;
+.mem.offset 0,0; st8.spill [r2]=r28,16
+.mem.offset 8,0; st8.spill [r3]=r29,16
+	;;
+.mem.offset 0,0; st8.spill [r2]=r30,16
+.mem.offset 8,0; st8.spill [r3]=r31,32
+	;;
+	mov ar.fpsr=r11       /* M-unit */
+	st8 [r2]=r8,8         /* ar.ccv */
+	adds r24=PT(B6)-PT(F7),r3
+	;;
+	stf.spill [r2]=f6,32
+	stf.spill [r3]=f7,32
+	;;
+	stf.spill [r2]=f8,32
+	stf.spill [r3]=f9,32
+	;;
+	stf.spill [r2]=f10
+	stf.spill [r3]=f11
+	adds r25=PT(B7)-PT(F11),r3
+	;;
+	st8 [r24]=r18,16       /* b6 */
+	st8 [r25]=r19,16       /* b7 */
+	;;
+	st8 [r24]=r9           /* ar.csd */
+	st8 [r25]=r10          /* ar.ssd */
+	;;
+	srlz.d		// make sure we see the effect of cr.ivr
+	addl r14=@gprel(ia64_leave_nested),gp
+	;;
+	mov rp=r14
+	br.call.sptk.many b6=kvm_ia64_handle_irq
+	;;
+END(kvm_interrupt)
+
+    .global kvm_dispatch_vexirq
+    .org kvm_ia64_ivt+0x3400
+//////////////////////////////////////////////////////////////////////
+// 0x3400 Entry 13 (size 64 bundles) Reserved
+ENTRY(kvm_virtual_exirq)
+	mov r31=pr
+	mov r19=13
+	mov r30 =r0
+	;;
+kvm_dispatch_vexirq:
+	cmp.eq p6,p0 = 1,r30
+	;;
+(p6)	add r29 = VMM_VCPU_SAVED_GP_OFFSET,r21
+	;;
+(p6)	ld8 r1 = [r29]
+	;;
+	KVM_SAVE_MIN_WITH_COVER_R19
+	alloc r14=ar.pfs,0,0,1,0
+	mov out0=r13
+
+	ssm psr.ic
+	;;
+	srlz.i // guarantee that interruption collection is on
+	;;
+	(p15) ssm psr.i               // restore psr.i
+	adds r3=8,r2                // set up second base pointer
+	;;
+	KVM_SAVE_REST
+	addl r14=@gprel(ia64_leave_hypervisor),gp
+	;;
+	mov rp=r14
+	br.call.sptk.many b6=kvm_vexirq
+END(kvm_virtual_exirq)
+
+    .org kvm_ia64_ivt+0x3800
+/////////////////////////////////////////////////////////////////////
+// 0x3800 Entry 14 (size 64 bundles) Reserved
+	KVM_FAULT(14)
+	// this code segment is from 2.6.16.13
+
+    .org kvm_ia64_ivt+0x3c00
+///////////////////////////////////////////////////////////////////////
+// 0x3c00 Entry 15 (size 64 bundles) Reserved
+	KVM_FAULT(15)
+
+    .org kvm_ia64_ivt+0x4000
+///////////////////////////////////////////////////////////////////////
+// 0x4000 Entry 16 (size 64 bundles) Reserved
+	KVM_FAULT(16)
+
+    .org kvm_ia64_ivt+0x4400
+//////////////////////////////////////////////////////////////////////
+// 0x4400 Entry 17 (size 64 bundles) Reserved
+	KVM_FAULT(17)
+
+    .org kvm_ia64_ivt+0x4800
+//////////////////////////////////////////////////////////////////////
+// 0x4800 Entry 18 (size 64 bundles) Reserved
+	KVM_FAULT(18)
+
+    .org kvm_ia64_ivt+0x4c00
+//////////////////////////////////////////////////////////////////////
+// 0x4c00 Entry 19 (size 64 bundles) Reserved
+	KVM_FAULT(19)
+
+    .org kvm_ia64_ivt+0x5000
+//////////////////////////////////////////////////////////////////////
+// 0x5000 Entry 20 (size 16 bundles) Page Not Present
+ENTRY(kvm_page_not_present)
+	KVM_REFLECT(20)
+END(kvm_page_not_present)
+
+    .org kvm_ia64_ivt+0x5100
+///////////////////////////////////////////////////////////////////////
+// 0x5100 Entry 21 (size 16 bundles) Key Permission vector
+ENTRY(kvm_key_permission)
+	KVM_REFLECT(21)
+END(kvm_key_permission)
+
+    .org kvm_ia64_ivt+0x5200
+//////////////////////////////////////////////////////////////////////
+// 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26)
+ENTRY(kvm_iaccess_rights)
+	KVM_REFLECT(22)
+END(kvm_iaccess_rights)
+
+    .org kvm_ia64_ivt+0x5300
+//////////////////////////////////////////////////////////////////////
+// 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53)
+ENTRY(kvm_daccess_rights)
+	KVM_REFLECT(23)
+END(kvm_daccess_rights)
+
+    .org kvm_ia64_ivt+0x5400
+/////////////////////////////////////////////////////////////////////
+// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39)
+ENTRY(kvm_general_exception)
+	KVM_REFLECT(24)
+	KVM_FAULT(24)
+END(kvm_general_exception)
+
+    .org kvm_ia64_ivt+0x5500
+//////////////////////////////////////////////////////////////////////
+// 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35)
+ENTRY(kvm_disabled_fp_reg)
+	KVM_REFLECT(25)
+END(kvm_disabled_fp_reg)
+
+    .org kvm_ia64_ivt+0x5600
+////////////////////////////////////////////////////////////////////
+// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50)
+ENTRY(kvm_nat_consumption)
+	KVM_REFLECT(26)
+END(kvm_nat_consumption)
+
+    .org kvm_ia64_ivt+0x5700
+/////////////////////////////////////////////////////////////////////
+// 0x5700 Entry 27 (size 16 bundles) Speculation (40)
+ENTRY(kvm_speculation_vector)
+	KVM_REFLECT(27)
+END(kvm_speculation_vector)
+
+    .org kvm_ia64_ivt+0x5800
+/////////////////////////////////////////////////////////////////////
+// 0x5800 Entry 28 (size 16 bundles) Reserved
+	KVM_FAULT(28)
+
+    .org kvm_ia64_ivt+0x5900
+///////////////////////////////////////////////////////////////////
+// 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56)
+ENTRY(kvm_debug_vector)
+	KVM_FAULT(29)
+END(kvm_debug_vector)
+
+    .org kvm_ia64_ivt+0x5a00
+///////////////////////////////////////////////////////////////
+// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57)
+ENTRY(kvm_unaligned_access)
+	KVM_REFLECT(30)
+END(kvm_unaligned_access)
+
+    .org kvm_ia64_ivt+0x5b00
+//////////////////////////////////////////////////////////////////////
+// 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57)
+ENTRY(kvm_unsupported_data_reference)
+	KVM_REFLECT(31)
+END(kvm_unsupported_data_reference)
+
+    .org kvm_ia64_ivt+0x5c00
+////////////////////////////////////////////////////////////////////
+// 0x5c00 Entry 32 (size 16 bundles) Floating Point FAULT (65)
+ENTRY(kvm_floating_point_fault)
+	KVM_REFLECT(32)
+END(kvm_floating_point_fault)
+
+    .org kvm_ia64_ivt+0x5d00
+/////////////////////////////////////////////////////////////////////
+// 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66)
+ENTRY(kvm_floating_point_trap)
+	KVM_REFLECT(33)
+END(kvm_floating_point_trap)
+
+    .org kvm_ia64_ivt+0x5e00
+//////////////////////////////////////////////////////////////////////
+// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66)
+ENTRY(kvm_lower_privilege_trap)
+	KVM_REFLECT(34)
+END(kvm_lower_privilege_trap)
+
+    .org kvm_ia64_ivt+0x5f00
+//////////////////////////////////////////////////////////////////////
+// 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68)
+ENTRY(kvm_taken_branch_trap)
+	KVM_REFLECT(35)
+END(kvm_taken_branch_trap)
+
+    .org kvm_ia64_ivt+0x6000
+////////////////////////////////////////////////////////////////////
+// 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69)
+ENTRY(kvm_single_step_trap)
+	KVM_REFLECT(36)
+END(kvm_single_step_trap)
+    .global kvm_virtualization_fault_back
+    .org kvm_ia64_ivt+0x6100
+/////////////////////////////////////////////////////////////////////
+// 0x6100 Entry 37 (size 16 bundles) Virtualization Fault
+ENTRY(kvm_virtualization_fault)
+	mov r31=pr
+	adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
+	;;
+	st8 [r16] = r1
+	adds r17 = VMM_VCPU_GP_OFFSET, r21
+	;;
+	ld8 r1 = [r17]
+	cmp.eq p6,p0=EVENT_MOV_FROM_AR,r24
+	cmp.eq p7,p0=EVENT_MOV_FROM_RR,r24
+	cmp.eq p8,p0=EVENT_MOV_TO_RR,r24
+	cmp.eq p9,p0=EVENT_RSM,r24
+	cmp.eq p10,p0=EVENT_SSM,r24
+	cmp.eq p11,p0=EVENT_MOV_TO_PSR,r24
+	cmp.eq p12,p0=EVENT_THASH,r24
+(p6)	br.dptk.many kvm_asm_mov_from_ar
+(p7)	br.dptk.many kvm_asm_mov_from_rr
+(p8)	br.dptk.many kvm_asm_mov_to_rr
+(p9)	br.dptk.many kvm_asm_rsm
+(p10)	br.dptk.many kvm_asm_ssm
+(p11)	br.dptk.many kvm_asm_mov_to_psr
+(p12)	br.dptk.many kvm_asm_thash
+	;;
+kvm_virtualization_fault_back:
+	adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
+	;;
+	ld8 r1 = [r16]
+	;;
+	mov r19=37
+	adds r16 = VMM_VCPU_CAUSE_OFFSET,r21
+	adds r17 = VMM_VCPU_OPCODE_OFFSET,r21
+	;;
+	st8 [r16] = r24
+	st8 [r17] = r25
+	;;
+	cmp.ne p6,p0=EVENT_RFI, r24
+(p6)	br.sptk kvm_dispatch_virtualization_fault
+	;;
+	adds r18=VMM_VPD_BASE_OFFSET,r21
+	;;
+	ld8 r18=[r18]
+	;;
+	adds r18=VMM_VPD_VIFS_OFFSET,r18
+	;;
+	ld8 r18=[r18]
+	;;
+	tbit.z p6,p0=r18,63
+(p6)	br.sptk kvm_dispatch_virtualization_fault
+	;;
+//if vifs.v=1 desert current register frame
+	alloc r18=ar.pfs,0,0,0,0
+	br.sptk kvm_dispatch_virtualization_fault
+END(kvm_virtualization_fault)
+
+    .org kvm_ia64_ivt+0x6200
+//////////////////////////////////////////////////////////////
+// 0x6200 Entry 38 (size 16 bundles) Reserved
+	KVM_FAULT(38)
+
+    .org kvm_ia64_ivt+0x6300
+/////////////////////////////////////////////////////////////////
+// 0x6300 Entry 39 (size 16 bundles) Reserved
+	KVM_FAULT(39)
+
+    .org kvm_ia64_ivt+0x6400
+/////////////////////////////////////////////////////////////////
+// 0x6400 Entry 40 (size 16 bundles) Reserved
+	KVM_FAULT(40)
+
+    .org kvm_ia64_ivt+0x6500
+//////////////////////////////////////////////////////////////////
+// 0x6500 Entry 41 (size 16 bundles) Reserved
+	KVM_FAULT(41)
+
+    .org kvm_ia64_ivt+0x6600
+//////////////////////////////////////////////////////////////////
+// 0x6600 Entry 42 (size 16 bundles) Reserved
+	KVM_FAULT(42)
+
+    .org kvm_ia64_ivt+0x6700
+//////////////////////////////////////////////////////////////////
+// 0x6700 Entry 43 (size 16 bundles) Reserved
+	KVM_FAULT(43)
+
+    .org kvm_ia64_ivt+0x6800
+//////////////////////////////////////////////////////////////////
+// 0x6800 Entry 44 (size 16 bundles) Reserved
+	KVM_FAULT(44)
+
+    .org kvm_ia64_ivt+0x6900
+///////////////////////////////////////////////////////////////////
+// 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception
+//(17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77)
+ENTRY(kvm_ia32_exception)
+	KVM_FAULT(45)
+END(kvm_ia32_exception)
+
+    .org kvm_ia64_ivt+0x6a00
+////////////////////////////////////////////////////////////////////
+// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept  (30,31,59,70,71)
+ENTRY(kvm_ia32_intercept)
+	KVM_FAULT(47)
+END(kvm_ia32_intercept)
+
+    .org kvm_ia64_ivt+0x6c00
+/////////////////////////////////////////////////////////////////////
+// 0x6c00 Entry 48 (size 16 bundles) Reserved
+	KVM_FAULT(48)
+
+    .org kvm_ia64_ivt+0x6d00
+//////////////////////////////////////////////////////////////////////
+// 0x6d00 Entry 49 (size 16 bundles) Reserved
+	KVM_FAULT(49)
+
+    .org kvm_ia64_ivt+0x6e00
+//////////////////////////////////////////////////////////////////////
+// 0x6e00 Entry 50 (size 16 bundles) Reserved
+	KVM_FAULT(50)
+
+    .org kvm_ia64_ivt+0x6f00
+/////////////////////////////////////////////////////////////////////
+// 0x6f00 Entry 51 (size 16 bundles) Reserved
+	KVM_FAULT(52)
+
+    .org kvm_ia64_ivt+0x7100
+////////////////////////////////////////////////////////////////////
+// 0x7100 Entry 53 (size 16 bundles) Reserved
+	KVM_FAULT(53)
+
+    .org kvm_ia64_ivt+0x7200
+/////////////////////////////////////////////////////////////////////
+// 0x7200 Entry 54 (size 16 bundles) Reserved
+	KVM_FAULT(54)
+
+    .org kvm_ia64_ivt+0x7300
+////////////////////////////////////////////////////////////////////
+// 0x7300 Entry 55 (size 16 bundles) Reserved
+	KVM_FAULT(55)
+
+    .org kvm_ia64_ivt+0x7400
+////////////////////////////////////////////////////////////////////
+// 0x7400 Entry 56 (size 16 bundles) Reserved
+	KVM_FAULT(56)
+
+    .org kvm_ia64_ivt+0x7500
+/////////////////////////////////////////////////////////////////////
+// 0x7500 Entry 57 (size 16 bundles) Reserved
+	KVM_FAULT(57)
+
+    .org kvm_ia64_ivt+0x7600
+/////////////////////////////////////////////////////////////////////
+// 0x7600 Entry 58 (size 16 bundles) Reserved
+	KVM_FAULT(58)
+
+    .org kvm_ia64_ivt+0x7700
+////////////////////////////////////////////////////////////////////
+// 0x7700 Entry 59 (size 16 bundles) Reserved
+	KVM_FAULT(59)
+
+    .org kvm_ia64_ivt+0x7800
+////////////////////////////////////////////////////////////////////
+// 0x7800 Entry 60 (size 16 bundles) Reserved
+	KVM_FAULT(60)
+
+    .org kvm_ia64_ivt+0x7900
+/////////////////////////////////////////////////////////////////////
+// 0x7900 Entry 61 (size 16 bundles) Reserved
+	KVM_FAULT(61)
+
+    .org kvm_ia64_ivt+0x7a00
+/////////////////////////////////////////////////////////////////////
+// 0x7a00 Entry 62 (size 16 bundles) Reserved
+	KVM_FAULT(62)
+
+    .org kvm_ia64_ivt+0x7b00
+/////////////////////////////////////////////////////////////////////
+// 0x7b00 Entry 63 (size 16 bundles) Reserved
+	KVM_FAULT(63)
+
+    .org kvm_ia64_ivt+0x7c00
+////////////////////////////////////////////////////////////////////
+// 0x7c00 Entry 64 (size 16 bundles) Reserved
+	KVM_FAULT(64)
+
+    .org kvm_ia64_ivt+0x7d00
+/////////////////////////////////////////////////////////////////////
+// 0x7d00 Entry 65 (size 16 bundles) Reserved
+	KVM_FAULT(65)
+
+    .org kvm_ia64_ivt+0x7e00
+/////////////////////////////////////////////////////////////////////
+// 0x7e00 Entry 66 (size 16 bundles) Reserved
+	KVM_FAULT(66)
+
+    .org kvm_ia64_ivt+0x7f00
+////////////////////////////////////////////////////////////////////
+// 0x7f00 Entry 67 (size 16 bundles) Reserved
+	KVM_FAULT(67)
+
+    .org kvm_ia64_ivt+0x8000
+// There is no particular reason for this code to be here, other than that
+// there happens to be space here that would go unused otherwise.  If this
+// fault ever gets "unreserved", simply moved the following code to a more
+// suitable spot...
+
+
+ENTRY(kvm_dtlb_miss_dispatch)
+	mov r19 = 2
+	KVM_SAVE_MIN_WITH_COVER_R19
+	alloc r14=ar.pfs,0,0,3,0
+	mov out0=cr.ifa
+	mov out1=r15
+	adds r3=8,r2                // set up second base pointer
+	;;
+	ssm psr.ic
+	;;
+	srlz.i     // guarantee that interruption collection is on
+	;;
+	(p15) ssm psr.i               // restore psr.i
+	addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
+	;;
+	KVM_SAVE_REST
+	KVM_SAVE_EXTRA
+	mov rp=r14
+	;;
+	adds out2=16,r12
+	br.call.sptk.many b6=kvm_page_fault
+END(kvm_dtlb_miss_dispatch)
+
+ENTRY(kvm_itlb_miss_dispatch)
+
+	KVM_SAVE_MIN_WITH_COVER_R19
+	alloc r14=ar.pfs,0,0,3,0
+	mov out0=cr.ifa
+	mov out1=r15
+	adds r3=8,r2                // set up second base pointer
+	;;
+	ssm psr.ic
+	;;
+	srlz.i   // guarantee that interruption collection is on
+	;;
+	(p15) ssm psr.i               // restore psr.i
+	addl r14=@gprel(ia64_leave_hypervisor),gp
+	;;
+	KVM_SAVE_REST
+	mov rp=r14
+	;;
+	adds out2=16,r12
+	br.call.sptk.many b6=kvm_page_fault
+END(kvm_itlb_miss_dispatch)
+
+ENTRY(kvm_dispatch_reflection)
+/*
+ * Input:
+ *  psr.ic: off
+ *  r19:    intr type (offset into ivt, see ia64_int.h)
+ *  r31:    contains saved predicates (pr)
+ */
+	KVM_SAVE_MIN_WITH_COVER_R19
+	alloc r14=ar.pfs,0,0,5,0
+	mov out0=cr.ifa
+	mov out1=cr.isr
+	mov out2=cr.iim
+	mov out3=r15
+	adds r3=8,r2                // set up second base pointer
+	;;
+	ssm psr.ic
+	;;
+	srlz.i   // guarantee that interruption collection is on
+	;;
+	(p15) ssm psr.i               // restore psr.i
+	addl r14=@gprel(ia64_leave_hypervisor),gp
+	;;
+	KVM_SAVE_REST
+	mov rp=r14
+	;;
+	adds out4=16,r12
+	br.call.sptk.many b6=reflect_interruption
+END(kvm_dispatch_reflection)
+
+ENTRY(kvm_dispatch_virtualization_fault)
+	adds r16 = VMM_VCPU_CAUSE_OFFSET,r21
+	adds r17 = VMM_VCPU_OPCODE_OFFSET,r21
+	;;
+	st8 [r16] = r24
+	st8 [r17] = r25
+	;;
+	KVM_SAVE_MIN_WITH_COVER_R19
+	;;
+	alloc r14=ar.pfs,0,0,2,0 // (must be first in insn group!)
+	mov out0=r13        //vcpu
+	adds r3=8,r2                // set up second base pointer
+	;;
+	ssm psr.ic
+	;;
+	srlz.i    // guarantee that interruption collection is on
+	;;
+	(p15) ssm psr.i               // restore psr.i
+	addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
+	;;
+	KVM_SAVE_REST
+	KVM_SAVE_EXTRA
+	mov rp=r14
+	;;
+	adds out1=16,sp         //regs
+	br.call.sptk.many b6=kvm_emulate
+END(kvm_dispatch_virtualization_fault)
+
+
+ENTRY(kvm_dispatch_interrupt)
+	KVM_SAVE_MIN_WITH_COVER_R19	// uses r31; defines r2 and r3
+	;;
+	alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group
+	adds r3=8,r2		// set up second base pointer for SAVE_REST
+	;;
+	ssm psr.ic
+	;;
+	srlz.i
+	;;
+	(p15) ssm psr.i
+	addl r14=@gprel(ia64_leave_hypervisor),gp
+	;;
+	KVM_SAVE_REST
+	mov rp=r14
+	;;
+	mov out0=r13		// pass pointer to pt_regs as second arg
+	br.call.sptk.many b6=kvm_ia64_handle_irq
+END(kvm_dispatch_interrupt)
+
+GLOBAL_ENTRY(ia64_leave_nested)
+	rsm psr.i
+	;;
+	adds r21=PT(PR)+16,r12
+	;;
+	lfetch [r21],PT(CR_IPSR)-PT(PR)
+	adds r2=PT(B6)+16,r12
+	adds r3=PT(R16)+16,r12
+	;;
+	lfetch [r21]
+	ld8 r28=[r2],8		// load b6
+	adds r29=PT(R24)+16,r12
+
+	ld8.fill r16=[r3]
+	adds r3=PT(AR_CSD)-PT(R16),r3
+	adds r30=PT(AR_CCV)+16,r12
+	;;
+	ld8.fill r24=[r29]
+	ld8 r15=[r30]		// load ar.ccv
+	;;
+	ld8 r29=[r2],16		// load b7
+	ld8 r30=[r3],16		// load ar.csd
+	;;
+	ld8 r31=[r2],16		// load ar.ssd
+	ld8.fill r8=[r3],16
+	;;
+	ld8.fill r9=[r2],16
+	ld8.fill r10=[r3],PT(R17)-PT(R10)
+	;;
+	ld8.fill r11=[r2],PT(R18)-PT(R11)
+	ld8.fill r17=[r3],16
+	;;
+	ld8.fill r18=[r2],16
+	ld8.fill r19=[r3],16
+	;;
+	ld8.fill r20=[r2],16
+	ld8.fill r21=[r3],16
+	mov ar.csd=r30
+	mov ar.ssd=r31
+	;;
+	rsm psr.i | psr.ic
+	// initiate turning off of interrupt and interruption collection
+	invala			// invalidate ALAT
+	;;
+	srlz.i
+	;;
+	ld8.fill r22=[r2],24
+	ld8.fill r23=[r3],24
+	mov b6=r28
+	;;
+	ld8.fill r25=[r2],16
+	ld8.fill r26=[r3],16
+	mov b7=r29
+	;;
+	ld8.fill r27=[r2],16
+	ld8.fill r28=[r3],16
+	;;
+	ld8.fill r29=[r2],16
+	ld8.fill r30=[r3],24
+	;;
+	ld8.fill r31=[r2],PT(F9)-PT(R31)
+	adds r3=PT(F10)-PT(F6),r3
+	;;
+	ldf.fill f9=[r2],PT(F6)-PT(F9)
+	ldf.fill f10=[r3],PT(F8)-PT(F10)
+	;;
+	ldf.fill f6=[r2],PT(F7)-PT(F6)
+	;;
+	ldf.fill f7=[r2],PT(F11)-PT(F7)
+	ldf.fill f8=[r3],32
+	;;
+	srlz.i			// ensure interruption collection is off
+	mov ar.ccv=r15
+	;;
+	bsw.0	// switch back to bank 0 (no stop bit required beforehand...)
+	;;
+	ldf.fill f11=[r2]
+//	mov r18=r13
+//	mov r21=r13
+	adds r16=PT(CR_IPSR)+16,r12
+	adds r17=PT(CR_IIP)+16,r12
+	;;
+	ld8 r29=[r16],16	// load cr.ipsr
+	ld8 r28=[r17],16	// load cr.iip
+	;;
+	ld8 r30=[r16],16	// load cr.ifs
+	ld8 r25=[r17],16	// load ar.unat
+	;;
+	ld8 r26=[r16],16	// load ar.pfs
+	ld8 r27=[r17],16	// load ar.rsc
+	cmp.eq p9,p0=r0,r0
+	// set p9 to indicate that we should restore cr.ifs
+	;;
+	ld8 r24=[r16],16	// load ar.rnat (may be garbage)
+	ld8 r23=[r17],16// load ar.bspstore (may be garbage)
+	;;
+	ld8 r31=[r16],16	// load predicates
+	ld8 r22=[r17],16	// load b0
+	;;
+	ld8 r19=[r16],16	// load ar.rsc value for "loadrs"
+	ld8.fill r1=[r17],16	// load r1
+	;;
+	ld8.fill r12=[r16],16
+	ld8.fill r13=[r17],16
+	;;
+	ld8 r20=[r16],16	// ar.fpsr
+	ld8.fill r15=[r17],16
+	;;
+	ld8.fill r14=[r16],16
+	ld8.fill r2=[r17]
+	;;
+	ld8.fill r3=[r16]
+	;;
+	mov r16=ar.bsp		// get existing backing store pointer
+	;;
+	mov b0=r22
+	mov ar.pfs=r26
+	mov cr.ifs=r30
+	mov cr.ipsr=r29
+	mov ar.fpsr=r20
+	mov cr.iip=r28
+	;;
+	mov ar.rsc=r27
+	mov ar.unat=r25
+	mov pr=r31,-1
+	rfi
+END(ia64_leave_nested)
+
+GLOBAL_ENTRY(ia64_leave_hypervisor_prepare)
+/*
+ * work.need_resched etc. mustn't get changed
+ *by this CPU before it returns to
+ * user- or fsys-mode, hence we disable interrupts early on:
+ */
+	adds r2 = PT(R4)+16,r12
+	adds r3 = PT(R5)+16,r12
+	adds r8 = PT(EML_UNAT)+16,r12
+	;;
+	ld8 r8 = [r8]
+	;;
+	mov ar.unat=r8
+	;;
+	ld8.fill r4=[r2],16    //load r4
+	ld8.fill r5=[r3],16    //load r5
+	;;
+	ld8.fill r6=[r2]    //load r6
+	ld8.fill r7=[r3]    //load r7
+	;;
+END(ia64_leave_hypervisor_prepare)
+//fall through
+GLOBAL_ENTRY(ia64_leave_hypervisor)
+	rsm psr.i
+	;;
+	br.call.sptk.many b0=leave_hypervisor_tail
+	;;
+	adds r20=PT(PR)+16,r12
+	adds r8=PT(EML_UNAT)+16,r12
+	;;
+	ld8 r8=[r8]
+	;;
+	mov ar.unat=r8
+	;;
+	lfetch [r20],PT(CR_IPSR)-PT(PR)
+	adds r2 = PT(B6)+16,r12
+	adds r3 = PT(B7)+16,r12
+	;;
+	lfetch [r20]
+	;;
+	ld8 r24=[r2],16        /* B6 */
+	ld8 r25=[r3],16        /* B7 */
+	;;
+	ld8 r26=[r2],16        /* ar_csd */
+	ld8 r27=[r3],16        /* ar_ssd */
+	mov b6 = r24
+	;;
+	ld8.fill r8=[r2],16
+	ld8.fill r9=[r3],16
+	mov b7 = r25
+	;;
+	mov ar.csd = r26
+	mov ar.ssd = r27
+	;;
+	ld8.fill r10=[r2],PT(R15)-PT(R10)
+	ld8.fill r11=[r3],PT(R14)-PT(R11)
+	;;
+	ld8.fill r15=[r2],PT(R16)-PT(R15)
+	ld8.fill r14=[r3],PT(R17)-PT(R14)
+	;;
+	ld8.fill r16=[r2],16
+	ld8.fill r17=[r3],16
+	;;
+	ld8.fill r18=[r2],16
+	ld8.fill r19=[r3],16
+	;;
+	ld8.fill r20=[r2],16
+	ld8.fill r21=[r3],16
+	;;
+	ld8.fill r22=[r2],16
+	ld8.fill r23=[r3],16
+	;;
+	ld8.fill r24=[r2],16
+	ld8.fill r25=[r3],16
+	;;
+	ld8.fill r26=[r2],16
+	ld8.fill r27=[r3],16
+	;;
+	ld8.fill r28=[r2],16
+	ld8.fill r29=[r3],16
+	;;
+	ld8.fill r30=[r2],PT(F6)-PT(R30)
+	ld8.fill r31=[r3],PT(F7)-PT(R31)
+	;;
+	rsm psr.i | psr.ic
+	// initiate turning off of interrupt and interruption collection
+	invala          // invalidate ALAT
+	;;
+	srlz.i          // ensure interruption collection is off
+	;;
+	bsw.0
+	;;
+	adds r16 = PT(CR_IPSR)+16,r12
+	adds r17 = PT(CR_IIP)+16,r12
+	mov r21=r13		// get current
+	;;
+	ld8 r31=[r16],16    // load cr.ipsr
+	ld8 r30=[r17],16    // load cr.iip
+	;;
+	ld8 r29=[r16],16    // load cr.ifs
+	ld8 r28=[r17],16    // load ar.unat
+	;;
+	ld8 r27=[r16],16    // load ar.pfs
+	ld8 r26=[r17],16    // load ar.rsc
+	;;
+	ld8 r25=[r16],16    // load ar.rnat
+	ld8 r24=[r17],16    // load ar.bspstore
+	;;
+	ld8 r23=[r16],16    // load predicates
+	ld8 r22=[r17],16    // load b0
+	;;
+	ld8 r20=[r16],16    // load ar.rsc value for "loadrs"
+	ld8.fill r1=[r17],16    //load r1
+	;;
+	ld8.fill r12=[r16],16    //load r12
+	ld8.fill r13=[r17],PT(R2)-PT(R13)    //load r13
+	;;
+	ld8 r19=[r16],PT(R3)-PT(AR_FPSR)    //load ar_fpsr
+	ld8.fill r2=[r17],PT(AR_CCV)-PT(R2)    //load r2
+	;;
+	ld8.fill r3=[r16]	//load r3
+	ld8 r18=[r17]	//load ar_ccv
+	;;
+	mov ar.fpsr=r19
+	mov ar.ccv=r18
+	shr.u r18=r20,16
+	;;
+kvm_rbs_switch:
+	mov r19=96
+
+kvm_dont_preserve_current_frame:
+/*
+    * To prevent leaking bits between the hypervisor and guest domain,
+    * we must clear the stacked registers in the "invalid" partition here.
+    * 5 registers/cycle on McKinley).
+    */
+#   define pRecurse	p6
+#   define pReturn	p7
+#   define Nregs	14
+
+	alloc loc0=ar.pfs,2,Nregs-2,2,0
+	shr.u loc1=r18,9	// RNaTslots <= floor(dirtySize / (64*8))
+	sub r19=r19,r18		// r19 = (physStackedSize + 8) - dirtySize
+	;;
+	mov ar.rsc=r20		// load ar.rsc to be used for "loadrs"
+	shladd in0=loc1,3,r19
+	mov in1=0
+	;;
+	TEXT_ALIGN(32)
+kvm_rse_clear_invalid:
+	alloc loc0=ar.pfs,2,Nregs-2,2,0
+	cmp.lt pRecurse,p0=Nregs*8,in0
+	// if more than Nregs regs left to clear, (re)curse
+	add out0=-Nregs*8,in0
+	add out1=1,in1		// increment recursion count
+	mov loc1=0
+	mov loc2=0
+	;;
+	mov loc3=0
+	mov loc4=0
+	mov loc5=0
+	mov loc6=0
+	mov loc7=0
+(pRecurse) br.call.dptk.few b0=kvm_rse_clear_invalid
+	;;
+	mov loc8=0
+	mov loc9=0
+	cmp.ne pReturn,p0=r0,in1
+	// if recursion count != 0, we need to do a br.ret
+	mov loc10=0
+	mov loc11=0
+(pReturn) br.ret.dptk.many b0
+
+#	undef pRecurse
+#	undef pReturn
+
+// loadrs has already been shifted
+	alloc r16=ar.pfs,0,0,0,0    // drop current register frame
+	;;
+	loadrs
+	;;
+	mov ar.bspstore=r24
+	;;
+	mov ar.unat=r28
+	mov ar.rnat=r25
+	mov ar.rsc=r26
+	;;
+	mov cr.ipsr=r31
+	mov cr.iip=r30
+	mov cr.ifs=r29
+	mov ar.pfs=r27
+	adds r18=VMM_VPD_BASE_OFFSET,r21
+	;;
+	ld8 r18=[r18]   //vpd
+	adds r17=VMM_VCPU_ISR_OFFSET,r21
+	;;
+	ld8 r17=[r17]
+	adds r19=VMM_VPD_VPSR_OFFSET,r18
+	;;
+	ld8 r19=[r19]        //vpsr
+	mov r25=r18
+	adds r16= VMM_VCPU_GP_OFFSET,r21
+	;;
+	ld8 r16= [r16] // Put gp in r24
+	movl r24=@gprel(ia64_vmm_entry)  // calculate return address
+	;;
+	add  r24=r24,r16
+	;;
+	br.sptk.many  kvm_vps_sync_write       // call the service
+	;;
+END(ia64_leave_hypervisor)
+// fall through
+GLOBAL_ENTRY(ia64_vmm_entry)
+/*
+ *  must be at bank 0
+ *  parameter:
+ *  r17:cr.isr
+ *  r18:vpd
+ *  r19:vpsr
+ *  r22:b0
+ *  r23:predicate
+ */
+	mov r24=r22
+	mov r25=r18
+	tbit.nz p1,p2 = r19,IA64_PSR_IC_BIT        // p1=vpsr.ic
+(p1) 	br.cond.sptk.few kvm_vps_resume_normal
+(p2)	br.cond.sptk.many kvm_vps_resume_handler
+	;;
+END(ia64_vmm_entry)
+
+/*
+ * extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2,
+ *                  u64 arg3, u64 arg4, u64 arg5,
+ *                  u64 arg6, u64 arg7);
+ *
+ * XXX: The currently defined services use only 4 args at the max. The
+ *  rest are not consumed.
+ */
+GLOBAL_ENTRY(ia64_call_vsa)
+    .regstk 4,4,0,0
+
+rpsave  =   loc0
+pfssave =   loc1
+psrsave =   loc2
+entry   =   loc3
+hostret =   r24
+
+	alloc   pfssave=ar.pfs,4,4,0,0
+	mov rpsave=rp
+	adds entry=VMM_VCPU_VSA_BASE_OFFSET, r13
+	;;
+	ld8 entry=[entry]
+1:	mov hostret=ip
+	mov r25=in1         // copy arguments
+	mov r26=in2
+	mov r27=in3
+	mov psrsave=psr
+	;;
+	tbit.nz p6,p0=psrsave,14    // IA64_PSR_I
+	tbit.nz p7,p0=psrsave,13    // IA64_PSR_IC
+	;;
+	add hostret=2f-1b,hostret   // calculate return address
+	add entry=entry,in0
+	;;
+	rsm psr.i | psr.ic
+	;;
+	srlz.i
+	mov b6=entry
+	br.cond.sptk b6         // call the service
+2:
+// Architectural sequence for enabling interrupts if necessary
+(p7)    ssm psr.ic
+	;;
+(p7)    srlz.i
+	;;
+(p6)    ssm psr.i
+	;;
+	mov rp=rpsave
+	mov ar.pfs=pfssave
+	mov r8=r31
+	;;
+	srlz.d
+	br.ret.sptk rp
+
+END(ia64_call_vsa)
+
+#define  INIT_BSPSTORE  ((4<<30)-(12<<20)-0x100)
+
+GLOBAL_ENTRY(vmm_reset_entry)
+	//set up ipsr, iip, vpd.vpsr, dcr
+	// For IPSR: it/dt/rt=1, i/ic=1, si=1, vm/bn=1
+	// For DCR: all bits 0
+	bsw.0
+	;;
+	mov r21 =r13
+	adds r14=-VMM_PT_REGS_SIZE, r12
+	;;
+	movl r6=0x501008826000      // IPSR dt/rt/it:1;i/ic:1, si:1, vm/bn:1
+	movl r10=0x8000000000000000
+	adds r16=PT(CR_IIP), r14
+	adds r20=PT(R1), r14
+	;;
+	rsm psr.ic | psr.i
+	;;
+	srlz.i
+	;;
+	mov ar.rsc = 0
+	;;
+	flushrs
+	;;
+	mov ar.bspstore = 0
+	// clear BSPSTORE
+	;;
+	mov cr.ipsr=r6
+	mov cr.ifs=r10
+	ld8 r4 = [r16] // Set init iip for first run.
+	ld8 r1 = [r20]
+	;;
+	mov cr.iip=r4
+	adds r16=VMM_VPD_BASE_OFFSET,r13
+	;;
+	ld8 r18=[r16]
+	;;
+	adds r19=VMM_VPD_VPSR_OFFSET,r18
+	;;
+	ld8 r19=[r19]
+	mov r17=r0
+	mov r22=r0
+	mov r23=r0
+	br.cond.sptk ia64_vmm_entry
+	br.ret.sptk  b0
+END(vmm_reset_entry)
diff --git a/arch/ia64/kvm/vti.h b/arch/ia64/kvm/vti.h
new file mode 100644
index 00000000000..b214b5b0432
--- /dev/null
+++ b/arch/ia64/kvm/vti.h
@@ -0,0 +1,290 @@
+/*
+ * vti.h: prototype for generial vt related interface
+ *   	Copyright (c) 2004, Intel Corporation.
+ *
+ *	Xuefei Xu (Anthony Xu) (anthony.xu@intel.com)
+ *	Fred Yang (fred.yang@intel.com)
+ * 	Kun Tian (Kevin Tian) (kevin.tian@intel.com)
+ *
+ *  	Copyright (c) 2007, Intel Corporation.
+ *  	Zhang xiantao <xiantao.zhang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+#ifndef _KVM_VT_I_H
+#define _KVM_VT_I_H
+
+#ifndef __ASSEMBLY__
+#include <asm/page.h>
+
+#include <linux/kvm_host.h>
+
+/* define itr.i and itr.d  in ia64_itr function */
+#define	ITR	0x01
+#define	DTR	0x02
+#define	IaDTR	0x03
+
+#define IA64_TR_VMM       6 /*itr6, dtr6 : maps vmm code, vmbuffer*/
+#define IA64_TR_VM_DATA   7 /*dtr7       : maps current vm data*/
+
+#define RR6 (6UL<<61)
+#define RR7 (7UL<<61)
+
+
+/* config_options in pal_vp_init_env */
+#define	VP_INITIALIZE	1UL
+#define	VP_FR_PMC	1UL<<1
+#define	VP_OPCODE	1UL<<8
+#define	VP_CAUSE	1UL<<9
+#define VP_FW_ACC   	1UL<<63
+
+/* init vp env with initializing vm_buffer */
+#define	VP_INIT_ENV_INITALIZE  (VP_INITIALIZE | VP_FR_PMC |\
+	VP_OPCODE | VP_CAUSE | VP_FW_ACC)
+/* init vp env without initializing vm_buffer */
+#define	VP_INIT_ENV  VP_FR_PMC | VP_OPCODE | VP_CAUSE | VP_FW_ACC
+
+#define		PAL_VP_CREATE   265
+/* Stacked Virt. Initializes a new VPD for the operation of
+ * a new virtual processor in the virtual environment.
+ */
+#define		PAL_VP_ENV_INFO 266
+/*Stacked Virt. Returns the parameters needed to enter a virtual environment.*/
+#define		PAL_VP_EXIT_ENV 267
+/*Stacked Virt. Allows a logical processor to exit a virtual environment.*/
+#define		PAL_VP_INIT_ENV 268
+/*Stacked Virt. Allows a logical processor to enter a virtual environment.*/
+#define		PAL_VP_REGISTER 269
+/*Stacked Virt. Register a different host IVT for the virtual processor.*/
+#define		PAL_VP_RESUME   270
+/* Renamed from PAL_VP_RESUME */
+#define		PAL_VP_RESTORE  270
+/*Stacked Virt. Resumes virtual processor operation on the logical processor.*/
+#define		PAL_VP_SUSPEND  271
+/* Renamed from PAL_VP_SUSPEND */
+#define		PAL_VP_SAVE	271
+/* Stacked Virt. Suspends operation for the specified virtual processor on
+ * the logical processor.
+ */
+#define		PAL_VP_TERMINATE 272
+/* Stacked Virt. Terminates operation for the specified virtual processor.*/
+
+union vac {
+	unsigned long value;
+	struct {
+		unsigned int a_int:1;
+		unsigned int a_from_int_cr:1;
+		unsigned int a_to_int_cr:1;
+		unsigned int a_from_psr:1;
+		unsigned int a_from_cpuid:1;
+		unsigned int a_cover:1;
+		unsigned int a_bsw:1;
+		long reserved:57;
+	};
+};
+
+union vdc {
+	unsigned long value;
+	struct {
+		unsigned int d_vmsw:1;
+		unsigned int d_extint:1;
+		unsigned int d_ibr_dbr:1;
+		unsigned int d_pmc:1;
+		unsigned int d_to_pmd:1;
+		unsigned int d_itm:1;
+		long reserved:58;
+	};
+};
+
+struct vpd {
+	union vac   vac;
+	union vdc   vdc;
+	unsigned long  virt_env_vaddr;
+	unsigned long  reserved1[29];
+	unsigned long  vhpi;
+	unsigned long  reserved2[95];
+	unsigned long  vgr[16];
+	unsigned long  vbgr[16];
+	unsigned long  vnat;
+	unsigned long  vbnat;
+	unsigned long  vcpuid[5];
+	unsigned long  reserved3[11];
+	unsigned long  vpsr;
+	unsigned long  vpr;
+	unsigned long  reserved4[76];
+	union {
+		unsigned long  vcr[128];
+		struct {
+			unsigned long dcr;
+			unsigned long itm;
+			unsigned long iva;
+			unsigned long rsv1[5];
+			unsigned long pta;
+			unsigned long rsv2[7];
+			unsigned long ipsr;
+			unsigned long isr;
+			unsigned long rsv3;
+			unsigned long iip;
+			unsigned long ifa;
+			unsigned long itir;
+			unsigned long iipa;
+			unsigned long ifs;
+			unsigned long iim;
+			unsigned long iha;
+			unsigned long rsv4[38];
+			unsigned long lid;
+			unsigned long ivr;
+			unsigned long tpr;
+			unsigned long eoi;
+			unsigned long irr[4];
+			unsigned long itv;
+			unsigned long pmv;
+			unsigned long cmcv;
+			unsigned long rsv5[5];
+			unsigned long lrr0;
+			unsigned long lrr1;
+			unsigned long rsv6[46];
+		};
+	};
+	unsigned long  reserved5[128];
+	unsigned long  reserved6[3456];
+	unsigned long  vmm_avail[128];
+	unsigned long  reserved7[4096];
+};
+
+#define PAL_PROC_VM_BIT		(1UL << 40)
+#define PAL_PROC_VMSW_BIT	(1UL << 54)
+
+static inline s64 ia64_pal_vp_env_info(u64 *buffer_size,
+		u64 *vp_env_info)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL_STK(iprv, PAL_VP_ENV_INFO, 0, 0, 0);
+	*buffer_size = iprv.v0;
+	*vp_env_info = iprv.v1;
+	return iprv.status;
+}
+
+static inline s64 ia64_pal_vp_exit_env(u64 iva)
+{
+	struct ia64_pal_retval iprv;
+
+	PAL_CALL_STK(iprv, PAL_VP_EXIT_ENV, (u64)iva, 0, 0);
+	return iprv.status;
+}
+
+static inline s64 ia64_pal_vp_init_env(u64 config_options, u64 pbase_addr,
+			u64 vbase_addr, u64 *vsa_base)
+{
+	struct ia64_pal_retval iprv;
+
+	PAL_CALL_STK(iprv, PAL_VP_INIT_ENV, config_options, pbase_addr,
+			vbase_addr);
+	*vsa_base = iprv.v0;
+
+	return iprv.status;
+}
+
+static inline s64 ia64_pal_vp_restore(u64 *vpd, u64 pal_proc_vector)
+{
+	struct ia64_pal_retval iprv;
+
+	PAL_CALL_STK(iprv, PAL_VP_RESTORE, (u64)vpd, pal_proc_vector, 0);
+
+	return iprv.status;
+}
+
+static inline s64 ia64_pal_vp_save(u64 *vpd, u64 pal_proc_vector)
+{
+	struct ia64_pal_retval iprv;
+
+	PAL_CALL_STK(iprv, PAL_VP_SAVE, (u64)vpd, pal_proc_vector, 0);
+
+	return iprv.status;
+}
+
+#endif
+
+/*VPD field offset*/
+#define VPD_VAC_START_OFFSET		0
+#define VPD_VDC_START_OFFSET		8
+#define VPD_VHPI_START_OFFSET		256
+#define VPD_VGR_START_OFFSET		1024
+#define VPD_VBGR_START_OFFSET		1152
+#define VPD_VNAT_START_OFFSET		1280
+#define VPD_VBNAT_START_OFFSET		1288
+#define VPD_VCPUID_START_OFFSET		1296
+#define VPD_VPSR_START_OFFSET		1424
+#define VPD_VPR_START_OFFSET		1432
+#define VPD_VRSE_CFLE_START_OFFSET	1440
+#define VPD_VCR_START_OFFSET		2048
+#define VPD_VTPR_START_OFFSET		2576
+#define VPD_VRR_START_OFFSET		3072
+#define VPD_VMM_VAIL_START_OFFSET	31744
+
+/*Virtualization faults*/
+
+#define EVENT_MOV_TO_AR			 1
+#define EVENT_MOV_TO_AR_IMM		 2
+#define EVENT_MOV_FROM_AR		 3
+#define EVENT_MOV_TO_CR			 4
+#define EVENT_MOV_FROM_CR		 5
+#define EVENT_MOV_TO_PSR		 6
+#define EVENT_MOV_FROM_PSR		 7
+#define EVENT_ITC_D			 8
+#define EVENT_ITC_I			 9
+#define EVENT_MOV_TO_RR			 10
+#define EVENT_MOV_TO_DBR		 11
+#define EVENT_MOV_TO_IBR		 12
+#define EVENT_MOV_TO_PKR		 13
+#define EVENT_MOV_TO_PMC		 14
+#define EVENT_MOV_TO_PMD		 15
+#define EVENT_ITR_D			 16
+#define EVENT_ITR_I			 17
+#define EVENT_MOV_FROM_RR		 18
+#define EVENT_MOV_FROM_DBR		 19
+#define EVENT_MOV_FROM_IBR		 20
+#define EVENT_MOV_FROM_PKR		 21
+#define EVENT_MOV_FROM_PMC		 22
+#define EVENT_MOV_FROM_CPUID		 23
+#define EVENT_SSM			 24
+#define EVENT_RSM			 25
+#define EVENT_PTC_L			 26
+#define EVENT_PTC_G			 27
+#define EVENT_PTC_GA			 28
+#define EVENT_PTR_D			 29
+#define EVENT_PTR_I			 30
+#define EVENT_THASH			 31
+#define EVENT_TTAG			 32
+#define EVENT_TPA			 33
+#define EVENT_TAK			 34
+#define EVENT_PTC_E			 35
+#define EVENT_COVER			 36
+#define EVENT_RFI			 37
+#define EVENT_BSW_0			 38
+#define EVENT_BSW_1			 39
+#define EVENT_VMSW			 40
+
+/**PAL virtual services offsets */
+#define PAL_VPS_RESUME_NORMAL           0x0000
+#define PAL_VPS_RESUME_HANDLER          0x0400
+#define PAL_VPS_SYNC_READ               0x0800
+#define PAL_VPS_SYNC_WRITE              0x0c00
+#define PAL_VPS_SET_PENDING_INTERRUPT   0x1000
+#define PAL_VPS_THASH                   0x1400
+#define PAL_VPS_TTAG                    0x1800
+#define PAL_VPS_RESTORE                 0x1c00
+#define PAL_VPS_SAVE                    0x2000
+
+#endif/* _VT_I_H*/
diff --git a/arch/ia64/kvm/vtlb.c b/arch/ia64/kvm/vtlb.c
new file mode 100644
index 00000000000..a7869f8f49a
--- /dev/null
+++ b/arch/ia64/kvm/vtlb.c
@@ -0,0 +1,640 @@
+/*
+ * vtlb.c: guest virtual tlb handling module.
+ * Copyright (c) 2004, Intel Corporation.
+ *  Yaozu Dong (Eddie Dong) <Eddie.dong@intel.com>
+ *  Xuefei Xu (Anthony Xu) <anthony.xu@intel.com>
+ *
+ * Copyright (c) 2007, Intel Corporation.
+ *  Xuefei Xu (Anthony Xu) <anthony.xu@intel.com>
+ *  Xiantao Zhang <xiantao.zhang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include "vcpu.h"
+
+#include <linux/rwsem.h>
+
+#include <asm/tlb.h>
+
+/*
+ * Check to see if the address rid:va is translated by the TLB
+ */
+
+static int __is_tr_translated(struct thash_data *trp, u64 rid, u64 va)
+{
+	return ((trp->p) && (trp->rid == rid)
+				&& ((va-trp->vadr) < PSIZE(trp->ps)));
+}
+
+/*
+ * Only for GUEST TR format.
+ */
+static int __is_tr_overlap(struct thash_data *trp, u64 rid, u64 sva, u64 eva)
+{
+	u64 sa1, ea1;
+
+	if (!trp->p || trp->rid != rid)
+		return 0;
+
+	sa1 = trp->vadr;
+	ea1 = sa1 + PSIZE(trp->ps) - 1;
+	eva -= 1;
+	if ((sva > ea1) || (sa1 > eva))
+		return 0;
+	else
+		return 1;
+
+}
+
+void machine_tlb_purge(u64 va, u64 ps)
+{
+	ia64_ptcl(va, ps << 2);
+}
+
+void local_flush_tlb_all(void)
+{
+	int i, j;
+	unsigned long flags, count0, count1;
+	unsigned long stride0, stride1, addr;
+
+	addr    = current_vcpu->arch.ptce_base;
+	count0  = current_vcpu->arch.ptce_count[0];
+	count1  = current_vcpu->arch.ptce_count[1];
+	stride0 = current_vcpu->arch.ptce_stride[0];
+	stride1 = current_vcpu->arch.ptce_stride[1];
+
+	local_irq_save(flags);
+	for (i = 0; i < count0; ++i) {
+		for (j = 0; j < count1; ++j) {
+			ia64_ptce(addr);
+			addr += stride1;
+		}
+		addr += stride0;
+	}
+	local_irq_restore(flags);
+	ia64_srlz_i();          /* srlz.i implies srlz.d */
+}
+
+int vhpt_enabled(struct kvm_vcpu *vcpu, u64 vadr, enum vhpt_ref ref)
+{
+	union ia64_rr    vrr;
+	union ia64_pta   vpta;
+	struct  ia64_psr   vpsr;
+
+	vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
+	vrr.val = vcpu_get_rr(vcpu, vadr);
+	vpta.val = vcpu_get_pta(vcpu);
+
+	if (vrr.ve & vpta.ve) {
+		switch (ref) {
+		case DATA_REF:
+		case NA_REF:
+			return vpsr.dt;
+		case INST_REF:
+			return vpsr.dt && vpsr.it && vpsr.ic;
+		case RSE_REF:
+			return vpsr.dt && vpsr.rt;
+
+		}
+	}
+	return 0;
+}
+
+struct thash_data *vsa_thash(union ia64_pta vpta, u64 va, u64 vrr, u64 *tag)
+{
+	u64 index, pfn, rid, pfn_bits;
+
+	pfn_bits = vpta.size - 5 - 8;
+	pfn = REGION_OFFSET(va) >> _REGION_PAGE_SIZE(vrr);
+	rid = _REGION_ID(vrr);
+	index = ((rid & 0xff) << pfn_bits)|(pfn & ((1UL << pfn_bits) - 1));
+	*tag = ((rid >> 8) & 0xffff) | ((pfn >> pfn_bits) << 16);
+
+	return (struct thash_data *)((vpta.base << PTA_BASE_SHIFT) +
+				(index << 5));
+}
+
+struct thash_data *__vtr_lookup(struct kvm_vcpu *vcpu, u64 va, int type)
+{
+
+	struct thash_data *trp;
+	int  i;
+	u64 rid;
+
+	rid = vcpu_get_rr(vcpu, va);
+	rid = rid & RR_RID_MASK;
+	if (type == D_TLB) {
+		if (vcpu_quick_region_check(vcpu->arch.dtr_regions, va)) {
+			for (trp = (struct thash_data *)&vcpu->arch.dtrs, i = 0;
+						i < NDTRS; i++, trp++) {
+				if (__is_tr_translated(trp, rid, va))
+					return trp;
+			}
+		}
+	} else {
+		if (vcpu_quick_region_check(vcpu->arch.itr_regions, va)) {
+			for (trp = (struct thash_data *)&vcpu->arch.itrs, i = 0;
+					i < NITRS; i++, trp++) {
+				if (__is_tr_translated(trp, rid, va))
+					return trp;
+			}
+		}
+	}
+
+	return NULL;
+}
+
+static void vhpt_insert(u64 pte, u64 itir, u64 ifa, u64 gpte)
+{
+	union ia64_rr rr;
+	struct thash_data *head;
+	unsigned long ps, gpaddr;
+
+	ps = itir_ps(itir);
+	rr.val = ia64_get_rr(ifa);
+
+	 gpaddr = ((gpte & _PAGE_PPN_MASK) >> ps << ps) |
+					(ifa & ((1UL << ps) - 1));
+
+	head = (struct thash_data *)ia64_thash(ifa);
+	head->etag = INVALID_TI_TAG;
+	ia64_mf();
+	head->page_flags = pte & ~PAGE_FLAGS_RV_MASK;
+	head->itir = rr.ps << 2;
+	head->etag = ia64_ttag(ifa);
+	head->gpaddr = gpaddr;
+}
+
+void mark_pages_dirty(struct kvm_vcpu *v, u64 pte, u64 ps)
+{
+	u64 i, dirty_pages = 1;
+	u64 base_gfn = (pte&_PAGE_PPN_MASK) >> PAGE_SHIFT;
+	vmm_spinlock_t *lock = __kvm_va(v->arch.dirty_log_lock_pa);
+	void *dirty_bitmap = (void *)KVM_MEM_DIRTY_LOG_BASE;
+
+	dirty_pages <<= ps <= PAGE_SHIFT ? 0 : ps - PAGE_SHIFT;
+
+	vmm_spin_lock(lock);
+	for (i = 0; i < dirty_pages; i++) {
+		/* avoid RMW */
+		if (!test_bit(base_gfn + i, dirty_bitmap))
+			set_bit(base_gfn + i , dirty_bitmap);
+	}
+	vmm_spin_unlock(lock);
+}
+
+void thash_vhpt_insert(struct kvm_vcpu *v, u64 pte, u64 itir, u64 va, int type)
+{
+	u64 phy_pte, psr;
+	union ia64_rr mrr;
+
+	mrr.val = ia64_get_rr(va);
+	phy_pte = translate_phy_pte(&pte, itir, va);
+
+	if (itir_ps(itir) >= mrr.ps) {
+		vhpt_insert(phy_pte, itir, va, pte);
+	} else {
+		phy_pte  &= ~PAGE_FLAGS_RV_MASK;
+		psr = ia64_clear_ic();
+		ia64_itc(type, va, phy_pte, itir_ps(itir));
+		paravirt_dv_serialize_data();
+		ia64_set_psr(psr);
+	}
+
+	if (!(pte&VTLB_PTE_IO))
+		mark_pages_dirty(v, pte, itir_ps(itir));
+}
+
+/*
+ *   vhpt lookup
+ */
+struct thash_data *vhpt_lookup(u64 va)
+{
+	struct thash_data *head;
+	u64 tag;
+
+	head = (struct thash_data *)ia64_thash(va);
+	tag = ia64_ttag(va);
+	if (head->etag == tag)
+		return head;
+	return NULL;
+}
+
+u64 guest_vhpt_lookup(u64 iha, u64 *pte)
+{
+	u64 ret;
+	struct thash_data *data;
+
+	data = __vtr_lookup(current_vcpu, iha, D_TLB);
+	if (data != NULL)
+		thash_vhpt_insert(current_vcpu, data->page_flags,
+			data->itir, iha, D_TLB);
+
+	asm volatile ("rsm psr.ic|psr.i;;"
+			"srlz.d;;"
+			"ld8.s r9=[%1];;"
+			"tnat.nz p6,p7=r9;;"
+			"(p6) mov %0=1;"
+			"(p6) mov r9=r0;"
+			"(p7) extr.u r9=r9,0,53;;"
+			"(p7) mov %0=r0;"
+			"(p7) st8 [%2]=r9;;"
+			"ssm psr.ic;;"
+			"srlz.d;;"
+			"ssm psr.i;;"
+			"srlz.d;;"
+			: "=&r"(ret) : "r"(iha), "r"(pte) : "memory");
+
+	return ret;
+}
+
+/*
+ *  purge software guest tlb
+ */
+
+static void vtlb_purge(struct kvm_vcpu *v, u64 va, u64 ps)
+{
+	struct thash_data *cur;
+	u64 start, curadr, size, psbits, tag, rr_ps, num;
+	union ia64_rr vrr;
+	struct thash_cb *hcb = &v->arch.vtlb;
+
+	vrr.val = vcpu_get_rr(v, va);
+	psbits = VMX(v, psbits[(va >> 61)]);
+	start = va & ~((1UL << ps) - 1);
+	while (psbits) {
+		curadr = start;
+		rr_ps = __ffs(psbits);
+		psbits &= ~(1UL << rr_ps);
+		num = 1UL << ((ps < rr_ps) ? 0 : (ps - rr_ps));
+		size = PSIZE(rr_ps);
+		vrr.ps = rr_ps;
+		while (num) {
+			cur = vsa_thash(hcb->pta, curadr, vrr.val, &tag);
+			if (cur->etag == tag && cur->ps == rr_ps)
+				cur->etag = INVALID_TI_TAG;
+			curadr += size;
+			num--;
+		}
+	}
+}
+
+
+/*
+ *  purge VHPT and machine TLB
+ */
+static void vhpt_purge(struct kvm_vcpu *v, u64 va, u64 ps)
+{
+	struct thash_data *cur;
+	u64 start, size, tag, num;
+	union ia64_rr rr;
+
+	start = va & ~((1UL << ps) - 1);
+	rr.val = ia64_get_rr(va);
+	size = PSIZE(rr.ps);
+	num = 1UL << ((ps < rr.ps) ? 0 : (ps - rr.ps));
+	while (num) {
+		cur = (struct thash_data *)ia64_thash(start);
+		tag = ia64_ttag(start);
+		if (cur->etag == tag)
+			cur->etag = INVALID_TI_TAG;
+		start += size;
+		num--;
+	}
+	machine_tlb_purge(va, ps);
+}
+
+/*
+ * Insert an entry into hash TLB or VHPT.
+ * NOTES:
+ *  1: When inserting VHPT to thash, "va" is a must covered
+ *  address by the inserted machine VHPT entry.
+ *  2: The format of entry is always in TLB.
+ *  3: The caller need to make sure the new entry will not overlap
+ *     with any existed entry.
+ */
+void vtlb_insert(struct kvm_vcpu *v, u64 pte, u64 itir, u64 va)
+{
+	struct thash_data *head;
+	union ia64_rr vrr;
+	u64 tag;
+	struct thash_cb *hcb = &v->arch.vtlb;
+
+	vrr.val = vcpu_get_rr(v, va);
+	vrr.ps = itir_ps(itir);
+	VMX(v, psbits[va >> 61]) |= (1UL << vrr.ps);
+	head = vsa_thash(hcb->pta, va, vrr.val, &tag);
+	head->page_flags = pte;
+	head->itir = itir;
+	head->etag = tag;
+}
+
+int vtr_find_overlap(struct kvm_vcpu *vcpu, u64 va, u64 ps, int type)
+{
+	struct thash_data  *trp;
+	int  i;
+	u64 end, rid;
+
+	rid = vcpu_get_rr(vcpu, va);
+	rid = rid & RR_RID_MASK;
+	end = va + PSIZE(ps);
+	if (type == D_TLB) {
+		if (vcpu_quick_region_check(vcpu->arch.dtr_regions, va)) {
+			for (trp = (struct thash_data *)&vcpu->arch.dtrs, i = 0;
+					i < NDTRS; i++, trp++) {
+				if (__is_tr_overlap(trp, rid, va, end))
+					return i;
+			}
+		}
+	} else {
+		if (vcpu_quick_region_check(vcpu->arch.itr_regions, va)) {
+			for (trp = (struct thash_data *)&vcpu->arch.itrs, i = 0;
+					i < NITRS; i++, trp++) {
+				if (__is_tr_overlap(trp, rid, va, end))
+					return i;
+			}
+		}
+	}
+	return -1;
+}
+
+/*
+ * Purge entries in VTLB and VHPT
+ */
+void thash_purge_entries(struct kvm_vcpu *v, u64 va, u64 ps)
+{
+	if (vcpu_quick_region_check(v->arch.tc_regions, va))
+		vtlb_purge(v, va, ps);
+	vhpt_purge(v, va, ps);
+}
+
+void thash_purge_entries_remote(struct kvm_vcpu *v, u64 va, u64 ps)
+{
+	u64 old_va = va;
+	va = REGION_OFFSET(va);
+	if (vcpu_quick_region_check(v->arch.tc_regions, old_va))
+		vtlb_purge(v, va, ps);
+	vhpt_purge(v, va, ps);
+}
+
+u64 translate_phy_pte(u64 *pte, u64 itir, u64 va)
+{
+	u64 ps, ps_mask, paddr, maddr, io_mask;
+	union pte_flags phy_pte;
+
+	ps = itir_ps(itir);
+	ps_mask = ~((1UL << ps) - 1);
+	phy_pte.val = *pte;
+	paddr = *pte;
+	paddr = ((paddr & _PAGE_PPN_MASK) & ps_mask) | (va & ~ps_mask);
+	maddr = kvm_get_mpt_entry(paddr >> PAGE_SHIFT);
+	io_mask = maddr & GPFN_IO_MASK;
+	if (io_mask && (io_mask != GPFN_PHYS_MMIO)) {
+		*pte |= VTLB_PTE_IO;
+		return -1;
+	}
+	maddr = ((maddr & _PAGE_PPN_MASK) & PAGE_MASK) |
+					(paddr & ~PAGE_MASK);
+	phy_pte.ppn = maddr >> ARCH_PAGE_SHIFT;
+	return phy_pte.val;
+}
+
+/*
+ * Purge overlap TCs and then insert the new entry to emulate itc ops.
+ * Notes: Only TC entry can purge and insert.
+ */
+void  thash_purge_and_insert(struct kvm_vcpu *v, u64 pte, u64 itir,
+						u64 ifa, int type)
+{
+	u64 ps;
+	u64 phy_pte, io_mask, index;
+	union ia64_rr vrr, mrr;
+
+	ps = itir_ps(itir);
+	vrr.val = vcpu_get_rr(v, ifa);
+	mrr.val = ia64_get_rr(ifa);
+
+	index = (pte & _PAGE_PPN_MASK) >> PAGE_SHIFT;
+	io_mask = kvm_get_mpt_entry(index) & GPFN_IO_MASK;
+	phy_pte = translate_phy_pte(&pte, itir, ifa);
+
+	/* Ensure WB attribute if pte is related to a normal mem page,
+	 * which is required by vga acceleration since qemu maps shared
+	 * vram buffer with WB.
+	 */
+	if (!(pte & VTLB_PTE_IO) && ((pte & _PAGE_MA_MASK) != _PAGE_MA_NAT) &&
+			io_mask != GPFN_PHYS_MMIO) {
+		pte &= ~_PAGE_MA_MASK;
+		phy_pte &= ~_PAGE_MA_MASK;
+	}
+
+	vtlb_purge(v, ifa, ps);
+	vhpt_purge(v, ifa, ps);
+
+	if ((ps != mrr.ps) || (pte & VTLB_PTE_IO)) {
+		vtlb_insert(v, pte, itir, ifa);
+		vcpu_quick_region_set(VMX(v, tc_regions), ifa);
+	}
+	if (pte & VTLB_PTE_IO)
+		return;
+
+	if (ps >= mrr.ps)
+		vhpt_insert(phy_pte, itir, ifa, pte);
+	else {
+		u64 psr;
+		phy_pte  &= ~PAGE_FLAGS_RV_MASK;
+		psr = ia64_clear_ic();
+		ia64_itc(type, ifa, phy_pte, ps);
+		paravirt_dv_serialize_data();
+		ia64_set_psr(psr);
+	}
+	if (!(pte&VTLB_PTE_IO))
+		mark_pages_dirty(v, pte, ps);
+
+}
+
+/*
+ * Purge all TCs or VHPT entries including those in Hash table.
+ *
+ */
+
+void thash_purge_all(struct kvm_vcpu *v)
+{
+	int i;
+	struct thash_data *head;
+	struct thash_cb  *vtlb, *vhpt;
+	vtlb = &v->arch.vtlb;
+	vhpt = &v->arch.vhpt;
+
+	for (i = 0; i < 8; i++)
+		VMX(v, psbits[i]) = 0;
+
+	head = vtlb->hash;
+	for (i = 0; i < vtlb->num; i++) {
+		head->page_flags = 0;
+		head->etag = INVALID_TI_TAG;
+		head->itir = 0;
+		head->next = 0;
+		head++;
+	};
+
+	head = vhpt->hash;
+	for (i = 0; i < vhpt->num; i++) {
+		head->page_flags = 0;
+		head->etag = INVALID_TI_TAG;
+		head->itir = 0;
+		head->next = 0;
+		head++;
+	};
+
+	local_flush_tlb_all();
+}
+
+/*
+ * Lookup the hash table and its collision chain to find an entry
+ * covering this address rid:va or the entry.
+ *
+ * INPUT:
+ *  in: TLB format for both VHPT & TLB.
+ */
+struct thash_data *vtlb_lookup(struct kvm_vcpu *v, u64 va, int is_data)
+{
+	struct thash_data  *cch;
+	u64    psbits, ps, tag;
+	union ia64_rr vrr;
+
+	struct thash_cb *hcb = &v->arch.vtlb;
+
+	cch = __vtr_lookup(v, va, is_data);
+	if (cch)
+		return cch;
+
+	if (vcpu_quick_region_check(v->arch.tc_regions, va) == 0)
+		return NULL;
+
+	psbits = VMX(v, psbits[(va >> 61)]);
+	vrr.val = vcpu_get_rr(v, va);
+	while (psbits) {
+		ps = __ffs(psbits);
+		psbits &= ~(1UL << ps);
+		vrr.ps = ps;
+		cch = vsa_thash(hcb->pta, va, vrr.val, &tag);
+		if (cch->etag == tag && cch->ps == ps)
+			return cch;
+	}
+
+	return NULL;
+}
+
+/*
+ * Initialize internal control data before service.
+ */
+void thash_init(struct thash_cb *hcb, u64 sz)
+{
+	int i;
+	struct thash_data *head;
+
+	hcb->pta.val = (unsigned long)hcb->hash;
+	hcb->pta.vf = 1;
+	hcb->pta.ve = 1;
+	hcb->pta.size = sz;
+	head = hcb->hash;
+	for (i = 0; i < hcb->num; i++) {
+		head->page_flags = 0;
+		head->itir = 0;
+		head->etag = INVALID_TI_TAG;
+		head->next = 0;
+		head++;
+	}
+}
+
+u64 kvm_get_mpt_entry(u64 gpfn)
+{
+	u64 *base = (u64 *) KVM_P2M_BASE;
+
+	if (gpfn >= (KVM_P2M_SIZE >> 3))
+		panic_vm(current_vcpu, "Invalid gpfn =%lx\n", gpfn);
+
+	return *(base + gpfn);
+}
+
+u64 kvm_lookup_mpa(u64 gpfn)
+{
+	u64 maddr;
+	maddr = kvm_get_mpt_entry(gpfn);
+	return maddr&_PAGE_PPN_MASK;
+}
+
+u64 kvm_gpa_to_mpa(u64 gpa)
+{
+	u64 pte = kvm_lookup_mpa(gpa >> PAGE_SHIFT);
+	return (pte >> PAGE_SHIFT << PAGE_SHIFT) | (gpa & ~PAGE_MASK);
+}
+
+/*
+ * Fetch guest bundle code.
+ * INPUT:
+ *  gip: guest ip
+ *  pbundle: used to return fetched bundle.
+ */
+int fetch_code(struct kvm_vcpu *vcpu, u64 gip, IA64_BUNDLE *pbundle)
+{
+	u64     gpip = 0;   /* guest physical IP*/
+	u64     *vpa;
+	struct thash_data    *tlb;
+	u64     maddr;
+
+	if (!(VCPU(vcpu, vpsr) & IA64_PSR_IT)) {
+		/* I-side physical mode */
+		gpip = gip;
+	} else {
+		tlb = vtlb_lookup(vcpu, gip, I_TLB);
+		if (tlb)
+			gpip = (tlb->ppn >> (tlb->ps - 12) << tlb->ps) |
+				(gip & (PSIZE(tlb->ps) - 1));
+	}
+	if (gpip) {
+		maddr = kvm_gpa_to_mpa(gpip);
+	} else {
+		tlb = vhpt_lookup(gip);
+		if (tlb == NULL) {
+			ia64_ptcl(gip, ARCH_PAGE_SHIFT << 2);
+			return IA64_FAULT;
+		}
+		maddr = (tlb->ppn >> (tlb->ps - 12) << tlb->ps)
+					| (gip & (PSIZE(tlb->ps) - 1));
+	}
+	vpa = (u64 *)__kvm_va(maddr);
+
+	pbundle->i64[0] = *vpa++;
+	pbundle->i64[1] = *vpa;
+
+	return IA64_NO_FAULT;
+}
+
+void kvm_init_vhpt(struct kvm_vcpu *v)
+{
+	v->arch.vhpt.num = VHPT_NUM_ENTRIES;
+	thash_init(&v->arch.vhpt, VHPT_SHIFT);
+	ia64_set_pta(v->arch.vhpt.pta.val);
+	/*Enable VHPT here?*/
+}
+
+void kvm_init_vtlb(struct kvm_vcpu *v)
+{
+	v->arch.vtlb.num = VTLB_NUM_ENTRIES;
+	thash_init(&v->arch.vtlb, VTLB_SHIFT);
+}
diff --git a/arch/ia64/lib/Makefile b/arch/ia64/lib/Makefile
index d8536a2c22a..98771e2a78a 100644
--- a/arch/ia64/lib/Makefile
+++ b/arch/ia64/lib/Makefile
@@ -9,12 +9,11 @@ lib-y := __divsi3.o __udivsi3.o __modsi3.o __umodsi3.o			\
 	checksum.o clear_page.o csum_partial_copy.o			\
 	clear_user.o strncpy_from_user.o strlen_user.o strnlen_user.o	\
 	flush.o ip_fast_csum.o do_csum.o				\
-	memset.o strlen.o
+	memset.o strlen.o xor.o
 
-lib-$(CONFIG_ITANIUM)	+= copy_page.o copy_user.o memcpy.o
-lib-$(CONFIG_MCKINLEY)	+= copy_page_mck.o memcpy_mck.o
+obj-$(CONFIG_ITANIUM)	+= copy_page.o copy_user.o memcpy.o
+obj-$(CONFIG_MCKINLEY)	+= copy_page_mck.o memcpy_mck.o
 lib-$(CONFIG_PERFMON)	+= carta_random.o
-lib-$(CONFIG_MD_RAID5)	+= xor.o
 
 AFLAGS___divdi3.o	=
 AFLAGS___udivdi3.o	= -DUNSIGNED
diff --git a/arch/ia64/lib/checksum.c b/arch/ia64/lib/checksum.c
index beb11721d9f..9fc955026f8 100644
--- a/arch/ia64/lib/checksum.c
+++ b/arch/ia64/lib/checksum.c
@@ -33,33 +33,34 @@ from64to16 (unsigned long x)
  * computes the checksum of the TCP/UDP pseudo-header
  * returns a 16-bit checksum, already complemented.
  */
-unsigned short int
-csum_tcpudp_magic (unsigned long saddr, unsigned long daddr, unsigned short len,
-		   unsigned short proto, unsigned int sum)
+__sum16
+csum_tcpudp_magic (__be32 saddr, __be32 daddr, unsigned short len,
+		   unsigned short proto, __wsum sum)
 {
-	return ~from64to16(saddr + daddr + sum + ((unsigned long) ntohs(len) << 16) +
-			   ((unsigned long) proto << 8));
+	return (__force __sum16)~from64to16(
+		(__force u64)saddr + (__force u64)daddr +
+		(__force u64)sum + ((len + proto) << 8));
 }
 
 EXPORT_SYMBOL(csum_tcpudp_magic);
 
-unsigned int
-csum_tcpudp_nofold (unsigned long saddr, unsigned long daddr, unsigned short len,
-		    unsigned short proto, unsigned int sum)
+__wsum
+csum_tcpudp_nofold (__be32 saddr, __be32 daddr, unsigned short len,
+		    unsigned short proto, __wsum sum)
 {
 	unsigned long result;
 
-	result = (saddr + daddr + sum +
-		  ((unsigned long) ntohs(len) << 16) +
-		  ((unsigned long) proto << 8));
+	result = (__force u64)saddr + (__force u64)daddr +
+		 (__force u64)sum + ((len + proto) << 8);
 
 	/* Fold down to 32-bits so we don't lose in the typedef-less network stack.  */
 	/* 64 to 33 */
 	result = (result & 0xffffffff) + (result >> 32);
 	/* 33 to 32 */
 	result = (result & 0xffffffff) + (result >> 32);
-	return result;
+	return (__force __wsum)result;
 }
+EXPORT_SYMBOL(csum_tcpudp_nofold);
 
 extern unsigned long do_csum (const unsigned char *, long);
 
@@ -75,16 +76,15 @@ extern unsigned long do_csum (const unsigned char *, long);
  *
  * it's best to have buff aligned on a 32-bit boundary
  */
-unsigned int
-csum_partial (const unsigned char * buff, int len, unsigned int sum)
+__wsum csum_partial(const void *buff, int len, __wsum sum)
 {
-	unsigned long result = do_csum(buff, len);
+	u64 result = do_csum(buff, len);
 
 	/* add in old sum, and carry.. */
-	result += sum;
+	result += (__force u32)sum;
 	/* 32+c bits -> 32 bits */
 	result = (result & 0xffffffff) + (result >> 32);
-	return result;
+	return (__force __wsum)result;
 }
 
 EXPORT_SYMBOL(csum_partial);
@@ -93,10 +93,9 @@ EXPORT_SYMBOL(csum_partial);
  * this routine is used for miscellaneous IP-like checksums, mainly
  * in icmp.c
  */
-unsigned short
-ip_compute_csum (unsigned char * buff, int len)
+__sum16 ip_compute_csum (const void *buff, int len)
 {
-	return ~do_csum(buff,len);
+	return (__force __sum16)~do_csum(buff,len);
 }
 
 EXPORT_SYMBOL(ip_compute_csum);
diff --git a/arch/ia64/lib/clear_page.S b/arch/ia64/lib/clear_page.S
index d4987061dda..2d814e7ed19 100644
--- a/arch/ia64/lib/clear_page.S
+++ b/arch/ia64/lib/clear_page.S
@@ -8,7 +8,6 @@
  * 2/12/02 kchen	Tuned for both Itanium and McKinley
  * 3/08/02 davidm	Some more tweaking
  */
-#include <linux/config.h>
 
 #include <asm/asmmacro.h>
 #include <asm/page.h>
diff --git a/arch/ia64/lib/csum_partial_copy.c b/arch/ia64/lib/csum_partial_copy.c
index 36866e8a5d2..118daf5a063 100644
--- a/arch/ia64/lib/csum_partial_copy.c
+++ b/arch/ia64/lib/csum_partial_copy.c
@@ -104,9 +104,9 @@ out:
  */
 extern unsigned long do_csum(const unsigned char *, long);
 
-static unsigned int
-do_csum_partial_copy_from_user (const unsigned char __user *src, unsigned char *dst,
-				int len, unsigned int psum, int *errp)
+__wsum
+csum_partial_copy_from_user(const void __user *src, void *dst,
+				int len, __wsum psum, int *errp)
 {
 	unsigned long result;
 
@@ -122,30 +122,19 @@ do_csum_partial_copy_from_user (const unsigned char __user *src, unsigned char *
 	result = do_csum(dst, len);
 
 	/* add in old sum, and carry.. */
-	result += psum;
+	result += (__force u32)psum;
 	/* 32+c bits -> 32 bits */
 	result = (result & 0xffffffff) + (result >> 32);
-	return result;
+	return (__force __wsum)result;
 }
 
-unsigned int
-csum_partial_copy_from_user (const unsigned char __user *src, unsigned char *dst,
-			     int len, unsigned int sum, int *errp)
-{
-	if (!access_ok(VERIFY_READ, src, len)) {
-		*errp = -EFAULT;
-		memset(dst, 0, len);
-		return sum;
-	}
-
-	return do_csum_partial_copy_from_user(src, dst, len, sum, errp);
-}
+EXPORT_SYMBOL(csum_partial_copy_from_user);
 
-unsigned int
-csum_partial_copy_nocheck(const unsigned char __user *src, unsigned char *dst,
-			  int len, unsigned int sum)
+__wsum
+csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum)
 {
-	return do_csum_partial_copy_from_user(src, dst, len, sum, NULL);
+	return csum_partial_copy_from_user((__force const void __user *)src,
+					   dst, len, sum, NULL);
 }
 
 EXPORT_SYMBOL(csum_partial_copy_nocheck);
diff --git a/arch/ia64/lib/do_csum.S b/arch/ia64/lib/do_csum.S
index 6bec2fc9f5b..1a431a5cf86 100644
--- a/arch/ia64/lib/do_csum.S
+++ b/arch/ia64/lib/do_csum.S
@@ -201,7 +201,7 @@ GLOBAL_ENTRY(do_csum)
 	;;
 (p6)	adds result1[0]=1,result1[0]
 (p9)	br.cond.sptk .do_csum_exit	// if (count == 1) exit
-	// Fall through to caluculate the checksum, feeding result1[0] as
+	// Fall through to calculate the checksum, feeding result1[0] as
 	// the initial value in result1[0].
 	//
 	// Calculate the checksum loading two 8-byte words per loop.
diff --git a/arch/ia64/lib/flush.S b/arch/ia64/lib/flush.S
index 2a0d27f2f21..1d8c8886006 100644
--- a/arch/ia64/lib/flush.S
+++ b/arch/ia64/lib/flush.S
@@ -60,3 +60,58 @@ GLOBAL_ENTRY(flush_icache_range)
 	mov	ar.lc=r3		// restore ar.lc
 	br.ret.sptk.many rp
 END(flush_icache_range)
+
+	/*
+	 * clflush_cache_range(start,size)
+	 *
+	 *	Flush cache lines from start to start+size-1.
+	 *
+	 *	Must deal with range from start to start+size-1 but nothing else
+	 *	(need to be careful not to touch addresses that may be
+	 *	unmapped).
+	 *
+	 *	Note: "in0" and "in1" are preserved for debugging purposes.
+	 */
+	.section .kprobes.text,"ax"
+GLOBAL_ENTRY(clflush_cache_range)
+
+	.prologue
+	alloc	r2=ar.pfs,2,0,0,0
+	movl	r3=ia64_cache_stride_shift
+	mov	r21=1
+	add     r22=in1,in0
+	;;
+	ld8	r20=[r3]		// r20: stride shift
+	sub	r22=r22,r0,1		// last byte address
+	;;
+	shr.u	r23=in0,r20		// start / (stride size)
+	shr.u	r22=r22,r20		// (last byte address) / (stride size)
+	shl	r21=r21,r20		// r21: stride size of the i-cache(s)
+	;;
+	sub	r8=r22,r23		// number of strides - 1
+	shl	r24=r23,r20		// r24: addresses for "fc" =
+					//	"start" rounded down to stride
+					//	boundary
+	.save	ar.lc,r3
+	mov	r3=ar.lc		// save ar.lc
+	;;
+
+	.body
+	mov	ar.lc=r8
+	;;
+	/*
+	 * 32 byte aligned loop, even number of (actually 2) bundles
+	 */
+.Loop_fc:
+	fc	r24		// issuable on M0 only
+	add	r24=r21,r24	// we flush "stride size" bytes per iteration
+	nop.i	0
+	br.cloop.sptk.few .Loop_fc
+	;;
+	sync.i
+	;;
+	srlz.i
+	;;
+	mov	ar.lc=r3		// restore ar.lc
+	br.ret.sptk.many rp
+END(clflush_cache_range)
diff --git a/arch/ia64/lib/io.c b/arch/ia64/lib/io.c
index 8949e44091a..bcd16f8ad9d 100644
--- a/arch/ia64/lib/io.c
+++ b/arch/ia64/lib/io.c
@@ -1,4 +1,3 @@
-#include <linux/config.h>
 #include <linux/module.h>
 #include <linux/types.h>
 
diff --git a/arch/ia64/lib/ip_fast_csum.S b/arch/ia64/lib/ip_fast_csum.S
index 19674ca2acf..620d9dc5220 100644
--- a/arch/ia64/lib/ip_fast_csum.S
+++ b/arch/ia64/lib/ip_fast_csum.S
@@ -8,8 +8,8 @@
  *      in0: address of buffer to checksum (char *)
  *      in1: length of the buffer (int)
  *
- * Copyright (C) 2002 Intel Corp.
- * Copyright (C) 2002 Ken Chen <kenneth.w.chen@intel.com>
+ * Copyright (C) 2002, 2006 Intel Corp.
+ * Copyright (C) 2002, 2006 Ken Chen <kenneth.w.chen@intel.com>
  */
 
 #include <asm/asmmacro.h>
@@ -25,6 +25,9 @@
 
 #define in0	r32
 #define in1	r33
+#define in2	r34
+#define in3	r35
+#define in4	r36
 #define ret0	r8
 
 GLOBAL_ENTRY(ip_fast_csum)
@@ -65,8 +68,9 @@ GLOBAL_ENTRY(ip_fast_csum)
 	zxt2	r20=r20
 	;;
 	add	r20=ret0,r20
+	mov	r9=0xffff
 	;;
-	andcm	ret0=-1,r20
+	andcm	ret0=r9,r20
 	.restore sp		// reset frame state
 	br.ret.sptk.many b0
 	;;
@@ -88,3 +92,53 @@ GLOBAL_ENTRY(ip_fast_csum)
 	mov	b0=r34
 	br.ret.sptk.many b0
 END(ip_fast_csum)
+
+GLOBAL_ENTRY(csum_ipv6_magic)
+	ld4	r20=[in0],4
+	ld4	r21=[in1],4
+	zxt4	in2=in2
+	;;
+	ld4	r22=[in0],4
+	ld4	r23=[in1],4
+	dep	r15=in3,in2,32,16
+	;;
+	ld4	r24=[in0],4
+	ld4	r25=[in1],4
+	mux1	r15=r15,@rev
+	add	r16=r20,r21
+	add	r17=r22,r23
+	zxt4	in4=in4
+	;;
+	ld4	r26=[in0],4
+	ld4	r27=[in1],4
+	shr.u	r15=r15,16
+	add	r18=r24,r25
+	add	r8=r16,r17
+	;;
+	add	r19=r26,r27
+	add	r8=r8,r18
+	;;
+	add	r8=r8,r19
+	add	r15=r15,in4
+	;;
+	add	r8=r8,r15
+	;;
+	shr.u	r10=r8,32	// now fold sum into short
+	zxt4	r11=r8
+	;;
+	add	r8=r10,r11
+	;;
+	shr.u	r10=r8,16	// yeah, keep it rolling
+	zxt2	r11=r8
+	;;
+	add	r8=r10,r11
+	;;
+	shr.u	r10=r8,16	// three times lucky
+	zxt2	r11=r8
+	;;
+	add	r8=r10,r11
+	mov	r9=0xffff
+	;;
+	andcm	r8=r9,r8
+	br.ret.sptk.many b0
+END(csum_ipv6_magic)
diff --git a/arch/ia64/lib/memcpy_mck.S b/arch/ia64/lib/memcpy_mck.S
index 9e534d52b1d..ab0f8763972 100644
--- a/arch/ia64/lib/memcpy_mck.S
+++ b/arch/ia64/lib/memcpy_mck.S
@@ -13,7 +13,6 @@
  * Copyright (C) 2002 Intel Corp.
  * Copyright (C) 2002 Ken Chen <kenneth.w.chen@intel.com>
  */
-#include <linux/config.h>
 #include <asm/asmmacro.h>
 #include <asm/page.h>
 
diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c
index 84fd1c14c8a..52715a71aed 100644
--- a/arch/ia64/mm/contig.c
+++ b/arch/ia64/mm/contig.c
@@ -14,10 +14,11 @@
  * Routines used by ia64 machines with contiguous (or virtually contiguous)
  * memory.
  */
-#include <linux/config.h>
 #include <linux/bootmem.h>
 #include <linux/efi.h>
+#include <linux/memblock.h>
 #include <linux/mm.h>
+#include <linux/nmi.h>
 #include <linux/swap.h>
 
 #include <asm/meminit.h>
@@ -27,68 +28,13 @@
 #include <asm/mca.h>
 
 #ifdef CONFIG_VIRTUAL_MEM_MAP
-static unsigned long num_dma_physpages;
+static unsigned long max_gap;
 #endif
 
-/**
- * show_mem - display a memory statistics summary
- *
- * Just walks the pages in the system and describes where they're allocated.
- */
-void
-show_mem (void)
-{
-	int i, total = 0, reserved = 0;
-	int shared = 0, cached = 0;
-
-	printk("Mem-info:\n");
-	show_free_areas();
-
-	printk("Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
-	i = max_mapnr;
-	while (i-- > 0) {
-		if (!pfn_valid(i))
-			continue;
-		total++;
-		if (PageReserved(mem_map+i))
-			reserved++;
-		else if (PageSwapCache(mem_map+i))
-			cached++;
-		else if (page_count(mem_map + i))
-			shared += page_count(mem_map + i) - 1;
-	}
-	printk("%d pages of RAM\n", total);
-	printk("%d reserved pages\n", reserved);
-	printk("%d pages shared\n", shared);
-	printk("%d pages swap cached\n", cached);
-	printk("%ld pages in page table cache\n",
-		pgtable_quicklist_total_size());
-}
-
 /* physical address where the bootmem map is located */
 unsigned long bootmap_start;
 
 /**
- * find_max_pfn - adjust the maximum page number callback
- * @start: start of range
- * @end: end of range
- * @arg: address of pointer to global max_pfn variable
- *
- * Passed as a callback function to efi_memmap_walk() to determine the highest
- * available page frame number in the system.
- */
-int
-find_max_pfn (unsigned long start, unsigned long end, void *arg)
-{
-	unsigned long *max_pfnp = arg, pfn;
-
-	pfn = (PAGE_ALIGN(end - 1) - PAGE_OFFSET) >> PAGE_SHIFT;
-	if (pfn > *max_pfnp)
-		*max_pfnp = pfn;
-	return 0;
-}
-
-/**
  * find_bootmap_location - callback to find a memory area for the bootmap
  * @start: start of region
  * @end: end of region
@@ -98,10 +44,10 @@ find_max_pfn (unsigned long start, unsigned long end, void *arg)
  * bootmap_start.  This address must be page-aligned.
  */
 static int __init
-find_bootmap_location (unsigned long start, unsigned long end, void *arg)
+find_bootmap_location (u64 start, u64 end, void *arg)
 {
-	unsigned long needed = *(unsigned long *)arg;
-	unsigned long range_start, range_end, free_start;
+	u64 needed = *(unsigned long *)arg;
+	u64 range_start, range_end, free_start;
 	int i;
 
 #if IGNORE_PFN0
@@ -135,6 +81,112 @@ find_bootmap_location (unsigned long start, unsigned long end, void *arg)
 	return 0;
 }
 
+#ifdef CONFIG_SMP
+static void *cpu_data;
+/**
+ * per_cpu_init - setup per-cpu variables
+ *
+ * Allocate and setup per-cpu data areas.
+ */
+void *per_cpu_init(void)
+{
+	static bool first_time = true;
+	void *cpu0_data = __cpu0_per_cpu;
+	unsigned int cpu;
+
+	if (!first_time)
+		goto skip;
+	first_time = false;
+
+	/*
+	 * get_free_pages() cannot be used before cpu_init() done.
+	 * BSP allocates PERCPU_PAGE_SIZE bytes for all possible CPUs
+	 * to avoid that AP calls get_zeroed_page().
+	 */
+	for_each_possible_cpu(cpu) {
+		void *src = cpu == 0 ? cpu0_data : __phys_per_cpu_start;
+
+		memcpy(cpu_data, src, __per_cpu_end - __per_cpu_start);
+		__per_cpu_offset[cpu] = (char *)cpu_data - __per_cpu_start;
+		per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu];
+
+		/*
+		 * percpu area for cpu0 is moved from the __init area
+		 * which is setup by head.S and used till this point.
+		 * Update ar.k3.  This move is ensures that percpu
+		 * area for cpu0 is on the correct node and its
+		 * virtual address isn't insanely far from other
+		 * percpu areas which is important for congruent
+		 * percpu allocator.
+		 */
+		if (cpu == 0)
+			ia64_set_kr(IA64_KR_PER_CPU_DATA, __pa(cpu_data) -
+				    (unsigned long)__per_cpu_start);
+
+		cpu_data += PERCPU_PAGE_SIZE;
+	}
+skip:
+	return __per_cpu_start + __per_cpu_offset[smp_processor_id()];
+}
+
+static inline void
+alloc_per_cpu_data(void)
+{
+	cpu_data = __alloc_bootmem(PERCPU_PAGE_SIZE * num_possible_cpus(),
+				   PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
+}
+
+/**
+ * setup_per_cpu_areas - setup percpu areas
+ *
+ * Arch code has already allocated and initialized percpu areas.  All
+ * this function has to do is to teach the determined layout to the
+ * dynamic percpu allocator, which happens to be more complex than
+ * creating whole new ones using helpers.
+ */
+void __init
+setup_per_cpu_areas(void)
+{
+	struct pcpu_alloc_info *ai;
+	struct pcpu_group_info *gi;
+	unsigned int cpu;
+	ssize_t static_size, reserved_size, dyn_size;
+	int rc;
+
+	ai = pcpu_alloc_alloc_info(1, num_possible_cpus());
+	if (!ai)
+		panic("failed to allocate pcpu_alloc_info");
+	gi = &ai->groups[0];
+
+	/* units are assigned consecutively to possible cpus */
+	for_each_possible_cpu(cpu)
+		gi->cpu_map[gi->nr_units++] = cpu;
+
+	/* set parameters */
+	static_size = __per_cpu_end - __per_cpu_start;
+	reserved_size = PERCPU_MODULE_RESERVE;
+	dyn_size = PERCPU_PAGE_SIZE - static_size - reserved_size;
+	if (dyn_size < 0)
+		panic("percpu area overflow static=%zd reserved=%zd\n",
+		      static_size, reserved_size);
+
+	ai->static_size		= static_size;
+	ai->reserved_size	= reserved_size;
+	ai->dyn_size		= dyn_size;
+	ai->unit_size		= PERCPU_PAGE_SIZE;
+	ai->atom_size		= PAGE_SIZE;
+	ai->alloc_size		= PERCPU_PAGE_SIZE;
+
+	rc = pcpu_setup_first_chunk(ai, __per_cpu_start + __per_cpu_offset[0]);
+	if (rc)
+		panic("failed to setup percpu area (err=%d)", rc);
+
+	pcpu_free_alloc_info(ai);
+}
+#else
+#define alloc_per_cpu_data() do { } while (0)
+#endif /* CONFIG_SMP */
+
 /**
  * find_memory - setup memory map
  *
@@ -149,9 +201,10 @@ find_memory (void)
 	reserve_memory();
 
 	/* first find highest page frame number */
-	max_pfn = 0;
-	efi_memmap_walk(find_max_pfn, &max_pfn);
-
+	min_low_pfn = ~0UL;
+	max_low_pfn = 0;
+	efi_memmap_walk(find_max_min_low_pfn, NULL);
+	max_pfn = max_low_pfn;
 	/* how many bytes to cover all the pages */
 	bootmap_size = bootmem_bootmap_pages(max_pfn) << PAGE_SHIFT;
 
@@ -161,69 +214,18 @@ find_memory (void)
 	if (bootmap_start == ~0UL)
 		panic("Cannot find %ld bytes for bootmap\n", bootmap_size);
 
-	bootmap_size = init_bootmem(bootmap_start >> PAGE_SHIFT, max_pfn);
+	bootmap_size = init_bootmem_node(NODE_DATA(0),
+			(bootmap_start >> PAGE_SHIFT), 0, max_pfn);
 
 	/* Free all available memory, then mark bootmem-map as being in use. */
 	efi_memmap_walk(filter_rsvd_memory, free_bootmem);
-	reserve_bootmem(bootmap_start, bootmap_size);
+	reserve_bootmem(bootmap_start, bootmap_size, BOOTMEM_DEFAULT);
 
 	find_initrd();
-}
 
-#ifdef CONFIG_SMP
-/**
- * per_cpu_init - setup per-cpu variables
- *
- * Allocate and setup per-cpu data areas.
- */
-void * __cpuinit
-per_cpu_init (void)
-{
-	void *cpu_data;
-	int cpu;
-	static int first_time=1;
-
-	/*
-	 * get_free_pages() cannot be used before cpu_init() done.  BSP
-	 * allocates "NR_CPUS" pages for all CPUs to avoid that AP calls
-	 * get_zeroed_page().
-	 */
-	if (first_time) {
-		first_time=0;
-		cpu_data = __alloc_bootmem(PERCPU_PAGE_SIZE * NR_CPUS,
-					   PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
-		for (cpu = 0; cpu < NR_CPUS; cpu++) {
-			memcpy(cpu_data, __phys_per_cpu_start, __per_cpu_end - __per_cpu_start);
-			__per_cpu_offset[cpu] = (char *) cpu_data - __per_cpu_start;
-			cpu_data += PERCPU_PAGE_SIZE;
-			per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu];
-		}
-	}
-	return __per_cpu_start + __per_cpu_offset[smp_processor_id()];
-}
-#endif /* CONFIG_SMP */
-
-static int
-count_pages (u64 start, u64 end, void *arg)
-{
-	unsigned long *count = arg;
-
-	*count += (end - start) >> PAGE_SHIFT;
-	return 0;
+	alloc_per_cpu_data();
 }
 
-#ifdef CONFIG_VIRTUAL_MEM_MAP
-static int
-count_dma_pages (u64 start, u64 end, void *arg)
-{
-	unsigned long *count = arg;
-
-	if (start < MAX_DMA_ADDRESS)
-		*count += (min(end, MAX_DMA_ADDRESS) - start) >> PAGE_SHIFT;
-	return 0;
-}
-#endif
-
 /*
  * Set up the page tables.
  */
@@ -232,71 +234,45 @@ void __init
 paging_init (void)
 {
 	unsigned long max_dma;
-	unsigned long zones_size[MAX_NR_ZONES];
-#ifdef CONFIG_VIRTUAL_MEM_MAP
-	unsigned long zholes_size[MAX_NR_ZONES];
-	unsigned long max_gap;
-#endif
-
-	/* initialize mem_map[] */
-
-	memset(zones_size, 0, sizeof(zones_size));
-
-	num_physpages = 0;
-	efi_memmap_walk(count_pages, &num_physpages);
+	unsigned long max_zone_pfns[MAX_NR_ZONES];
 
+	memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
+#ifdef CONFIG_ZONE_DMA
 	max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT;
+	max_zone_pfns[ZONE_DMA] = max_dma;
+#endif
+	max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
 
 #ifdef CONFIG_VIRTUAL_MEM_MAP
-	memset(zholes_size, 0, sizeof(zholes_size));
-
-	num_dma_physpages = 0;
-	efi_memmap_walk(count_dma_pages, &num_dma_physpages);
-
-	if (max_low_pfn < max_dma) {
-		zones_size[ZONE_DMA] = max_low_pfn;
-		zholes_size[ZONE_DMA] = max_low_pfn - num_dma_physpages;
-	} else {
-		zones_size[ZONE_DMA] = max_dma;
-		zholes_size[ZONE_DMA] = max_dma - num_dma_physpages;
-		if (num_physpages > num_dma_physpages) {
-			zones_size[ZONE_NORMAL] = max_low_pfn - max_dma;
-			zholes_size[ZONE_NORMAL] =
-				((max_low_pfn - max_dma) -
-				 (num_physpages - num_dma_physpages));
-		}
-	}
-
-	max_gap = 0;
+	efi_memmap_walk(filter_memory, register_active_ranges);
 	efi_memmap_walk(find_largest_hole, (u64 *)&max_gap);
 	if (max_gap < LARGE_GAP) {
 		vmem_map = (struct page *) 0;
-		free_area_init_node(0, NODE_DATA(0), zones_size, 0,
-				    zholes_size);
+		free_area_init_nodes(max_zone_pfns);
 	} else {
 		unsigned long map_size;
 
 		/* allocate virtual_mem_map */
 
-		map_size = PAGE_ALIGN(max_low_pfn * sizeof(struct page));
-		vmalloc_end -= map_size;
-		vmem_map = (struct page *) vmalloc_end;
+		map_size = PAGE_ALIGN(ALIGN(max_low_pfn, MAX_ORDER_NR_PAGES) *
+			sizeof(struct page));
+		VMALLOC_END -= map_size;
+		vmem_map = (struct page *) VMALLOC_END;
 		efi_memmap_walk(create_mem_map_page_table, NULL);
 
-		NODE_DATA(0)->node_mem_map = vmem_map;
-		free_area_init_node(0, NODE_DATA(0), zones_size,
-				    0, zholes_size);
+		/*
+		 * alloc_node_mem_map makes an adjustment for mem_map
+		 * which isn't compatible with vmem_map.
+		 */
+		NODE_DATA(0)->node_mem_map = vmem_map +
+			find_min_pfn_with_active_regions();
+		free_area_init_nodes(max_zone_pfns);
 
 		printk("Virtual mem_map starts at 0x%p\n", mem_map);
 	}
 #else /* !CONFIG_VIRTUAL_MEM_MAP */
-	if (max_low_pfn < max_dma)
-		zones_size[ZONE_DMA] = max_low_pfn;
-	else {
-		zones_size[ZONE_DMA] = max_dma;
-		zones_size[ZONE_NORMAL] = max_low_pfn - max_dma;
-	}
-	free_area_init(zones_size);
+	memblock_add_node(0, PFN_PHYS(max_low_pfn), 0);
+	free_area_init_nodes(max_zone_pfns);
 #endif /* !CONFIG_VIRTUAL_MEM_MAP */
 	zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
 }
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index b6bcc9fa360..87862680536 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -16,11 +16,13 @@
 
 #include <linux/kernel.h>
 #include <linux/mm.h>
+#include <linux/nmi.h>
 #include <linux/swap.h>
 #include <linux/bootmem.h>
 #include <linux/acpi.h>
 #include <linux/efi.h>
 #include <linux/nodemask.h>
+#include <linux/slab.h>
 #include <asm/pgalloc.h>
 #include <asm/tlb.h>
 #include <asm/meminit.h>
@@ -33,12 +35,11 @@
  */
 struct early_node_data {
 	struct ia64_node_data *node_data;
-	pg_data_t *pgdat;
 	unsigned long pernode_addr;
 	unsigned long pernode_size;
-	struct bootmem_data bootmem_data;
-	unsigned long num_physpages;
+#ifdef CONFIG_ZONE_DMA
 	unsigned long num_dma_physpages;
+#endif
 	unsigned long min_pfn;
 	unsigned long max_pfn;
 };
@@ -46,6 +47,8 @@ struct early_node_data {
 static struct early_node_data mem_data[MAX_NUMNODES] __initdata;
 static nodemask_t memory_less_mask __initdata;
 
+pg_data_t *pgdat_list[MAX_NUMNODES];
+
 /*
  * To prevent cache aliasing effects, align per-node structures so that they
  * start at addresses that are strided by node number.
@@ -71,23 +74,20 @@ static nodemask_t memory_less_mask __initdata;
 static int __init build_node_maps(unsigned long start, unsigned long len,
 				  int node)
 {
-	unsigned long cstart, epfn, end = start + len;
-	struct bootmem_data *bdp = &mem_data[node].bootmem_data;
+	unsigned long spfn, epfn, end = start + len;
+	struct bootmem_data *bdp = &bootmem_node_data[node];
 
 	epfn = GRANULEROUNDUP(end) >> PAGE_SHIFT;
-	cstart = GRANULEROUNDDOWN(start);
+	spfn = GRANULEROUNDDOWN(start) >> PAGE_SHIFT;
 
 	if (!bdp->node_low_pfn) {
-		bdp->node_boot_start = cstart;
+		bdp->node_min_pfn = spfn;
 		bdp->node_low_pfn = epfn;
 	} else {
-		bdp->node_boot_start = min(cstart, bdp->node_boot_start);
+		bdp->node_min_pfn = min(spfn, bdp->node_min_pfn);
 		bdp->node_low_pfn = max(epfn, bdp->node_low_pfn);
 	}
 
-	min_low_pfn = min(min_low_pfn, bdp->node_boot_start>>PAGE_SHIFT);
-	max_low_pfn = max(max_low_pfn, bdp->node_low_pfn);
-
 	return 0;
 }
 
@@ -99,11 +99,11 @@ static int __init build_node_maps(unsigned long start, unsigned long len,
  * acpi_boot_init() (which builds the node_to_cpu_mask array) hasn't been
  * called yet.  Note that node 0 will also count all non-existent cpus.
  */
-static int __init early_nr_cpus_node(int node)
+static int __meminit early_nr_cpus_node(int node)
 {
 	int cpu, n = 0;
 
-	for (cpu = 0; cpu < NR_CPUS; cpu++)
+	for_each_possible_early_cpu(cpu)
 		if (node == node_cpuid[cpu].nid)
 			n++;
 
@@ -114,7 +114,7 @@ static int __init early_nr_cpus_node(int node)
  * compute_pernodesize - compute size of pernode data
  * @node: the node id.
  */
-static unsigned long __init compute_pernodesize(int node)
+static unsigned long __meminit compute_pernodesize(int node)
 {
 	unsigned long pernodesize = 0, cpus;
 
@@ -123,6 +123,7 @@ static unsigned long __init compute_pernodesize(int node)
 	pernodesize += node * L1_CACHE_BYTES;
 	pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t));
 	pernodesize += L1_CACHE_ALIGN(sizeof(struct ia64_node_data));
+	pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t));
 	pernodesize = PAGE_ALIGN(pernodesize);
 	return pernodesize;
 }
@@ -141,19 +142,121 @@ static void *per_cpu_node_setup(void *cpu_data, int node)
 #ifdef CONFIG_SMP
 	int cpu;
 
-	for (cpu = 0; cpu < NR_CPUS; cpu++) {
-		if (node == node_cpuid[cpu].nid) {
-			memcpy(__va(cpu_data), __phys_per_cpu_start,
-			       __per_cpu_end - __per_cpu_start);
-			__per_cpu_offset[cpu] = (char*)__va(cpu_data) -
-				__per_cpu_start;
-			cpu_data += PERCPU_PAGE_SIZE;
-		}
+	for_each_possible_early_cpu(cpu) {
+		void *src = cpu == 0 ? __cpu0_per_cpu : __phys_per_cpu_start;
+
+		if (node != node_cpuid[cpu].nid)
+			continue;
+
+		memcpy(__va(cpu_data), src, __per_cpu_end - __per_cpu_start);
+		__per_cpu_offset[cpu] = (char *)__va(cpu_data) -
+			__per_cpu_start;
+
+		/*
+		 * percpu area for cpu0 is moved from the __init area
+		 * which is setup by head.S and used till this point.
+		 * Update ar.k3.  This move is ensures that percpu
+		 * area for cpu0 is on the correct node and its
+		 * virtual address isn't insanely far from other
+		 * percpu areas which is important for congruent
+		 * percpu allocator.
+		 */
+		if (cpu == 0)
+			ia64_set_kr(IA64_KR_PER_CPU_DATA,
+				    (unsigned long)cpu_data -
+				    (unsigned long)__per_cpu_start);
+
+		cpu_data += PERCPU_PAGE_SIZE;
 	}
 #endif
 	return cpu_data;
 }
 
+#ifdef CONFIG_SMP
+/**
+ * setup_per_cpu_areas - setup percpu areas
+ *
+ * Arch code has already allocated and initialized percpu areas.  All
+ * this function has to do is to teach the determined layout to the
+ * dynamic percpu allocator, which happens to be more complex than
+ * creating whole new ones using helpers.
+ */
+void __init setup_per_cpu_areas(void)
+{
+	struct pcpu_alloc_info *ai;
+	struct pcpu_group_info *uninitialized_var(gi);
+	unsigned int *cpu_map;
+	void *base;
+	unsigned long base_offset;
+	unsigned int cpu;
+	ssize_t static_size, reserved_size, dyn_size;
+	int node, prev_node, unit, nr_units, rc;
+
+	ai = pcpu_alloc_alloc_info(MAX_NUMNODES, nr_cpu_ids);
+	if (!ai)
+		panic("failed to allocate pcpu_alloc_info");
+	cpu_map = ai->groups[0].cpu_map;
+
+	/* determine base */
+	base = (void *)ULONG_MAX;
+	for_each_possible_cpu(cpu)
+		base = min(base,
+			   (void *)(__per_cpu_offset[cpu] + __per_cpu_start));
+	base_offset = (void *)__per_cpu_start - base;
+
+	/* build cpu_map, units are grouped by node */
+	unit = 0;
+	for_each_node(node)
+		for_each_possible_cpu(cpu)
+			if (node == node_cpuid[cpu].nid)
+				cpu_map[unit++] = cpu;
+	nr_units = unit;
+
+	/* set basic parameters */
+	static_size = __per_cpu_end - __per_cpu_start;
+	reserved_size = PERCPU_MODULE_RESERVE;
+	dyn_size = PERCPU_PAGE_SIZE - static_size - reserved_size;
+	if (dyn_size < 0)
+		panic("percpu area overflow static=%zd reserved=%zd\n",
+		      static_size, reserved_size);
+
+	ai->static_size		= static_size;
+	ai->reserved_size	= reserved_size;
+	ai->dyn_size		= dyn_size;
+	ai->unit_size		= PERCPU_PAGE_SIZE;
+	ai->atom_size		= PAGE_SIZE;
+	ai->alloc_size		= PERCPU_PAGE_SIZE;
+
+	/*
+	 * CPUs are put into groups according to node.  Walk cpu_map
+	 * and create new groups at node boundaries.
+	 */
+	prev_node = -1;
+	ai->nr_groups = 0;
+	for (unit = 0; unit < nr_units; unit++) {
+		cpu = cpu_map[unit];
+		node = node_cpuid[cpu].nid;
+
+		if (node == prev_node) {
+			gi->nr_units++;
+			continue;
+		}
+		prev_node = node;
+
+		gi = &ai->groups[ai->nr_groups++];
+		gi->nr_units		= 1;
+		gi->base_offset		= __per_cpu_offset[cpu] + base_offset;
+		gi->cpu_map		= &cpu_map[unit];
+	}
+
+	rc = pcpu_setup_first_chunk(ai, base);
+	if (rc)
+		panic("failed to setup percpu area (err=%d)", rc);
+
+	pcpu_free_alloc_info(ai);
+}
+#endif
+
 /**
  * fill_pernode - initialize pernode data.
  * @node: the node id.
@@ -165,7 +268,7 @@ static void __init fill_pernode(int node, unsigned long pernode,
 {
 	void *cpu_data;
 	int cpus = early_nr_cpus_node(node);
-	struct bootmem_data *bdp = &mem_data[node].bootmem_data;
+	struct bootmem_data *bdp = &bootmem_node_data[node];
 
 	mem_data[node].pernode_addr = pernode;
 	mem_data[node].pernode_size = pernodesize;
@@ -175,13 +278,13 @@ static void __init fill_pernode(int node, unsigned long pernode,
 	pernode += PERCPU_PAGE_SIZE * cpus;
 	pernode += node * L1_CACHE_BYTES;
 
-	mem_data[node].pgdat = __va(pernode);
+	pgdat_list[node] = __va(pernode);
 	pernode += L1_CACHE_ALIGN(sizeof(pg_data_t));
 
 	mem_data[node].node_data = __va(pernode);
 	pernode += L1_CACHE_ALIGN(sizeof(struct ia64_node_data));
 
-	mem_data[node].pgdat->bdata = bdp;
+	pgdat_list[node]->bdata = bdp;
 	pernode += L1_CACHE_ALIGN(sizeof(pg_data_t));
 
 	cpu_data = per_cpu_node_setup(cpu_data, node);
@@ -220,20 +323,21 @@ static void __init fill_pernode(int node, unsigned long pernode,
 static int __init find_pernode_space(unsigned long start, unsigned long len,
 				     int node)
 {
-	unsigned long epfn;
+	unsigned long spfn, epfn;
 	unsigned long pernodesize = 0, pernode, pages, mapsize;
-	struct bootmem_data *bdp = &mem_data[node].bootmem_data;
+	struct bootmem_data *bdp = &bootmem_node_data[node];
 
+	spfn = start >> PAGE_SHIFT;
 	epfn = (start + len) >> PAGE_SHIFT;
 
-	pages = bdp->node_low_pfn - (bdp->node_boot_start >> PAGE_SHIFT);
+	pages = bdp->node_low_pfn - bdp->node_min_pfn;
 	mapsize = bootmem_bootmap_pages(pages) << PAGE_SHIFT;
 
 	/*
 	 * Make sure this memory falls within this node's usable memory
 	 * since we may have thrown some away in build_maps().
 	 */
-	if (start < bdp->node_boot_start || epfn > bdp->node_low_pfn)
+	if (spfn < bdp->node_min_pfn || epfn > bdp->node_low_pfn)
 		return 0;
 
 	/* Don't setup this node's local space twice... */
@@ -268,7 +372,7 @@ static int __init find_pernode_space(unsigned long start, unsigned long len,
 static int __init free_node_bootmem(unsigned long start, unsigned long len,
 				    int node)
 {
-	free_bootmem_node(mem_data[node].pgdat, start, len);
+	free_bootmem_node(pgdat_list[node], start, len);
 
 	return 0;
 }
@@ -287,7 +391,7 @@ static void __init reserve_pernode_space(void)
 	int node;
 
 	for_each_online_node(node) {
-		pg_data_t *pdp = mem_data[node].pgdat;
+		pg_data_t *pdp = pgdat_list[node];
 
 		if (node_isset(node, memory_less_mask))
 			continue;
@@ -295,15 +399,36 @@ static void __init reserve_pernode_space(void)
 		bdp = pdp->bdata;
 
 		/* First the bootmem_map itself */
-		pages = bdp->node_low_pfn - (bdp->node_boot_start>>PAGE_SHIFT);
+		pages = bdp->node_low_pfn - bdp->node_min_pfn;
 		size = bootmem_bootmap_pages(pages) << PAGE_SHIFT;
 		base = __pa(bdp->node_bootmem_map);
-		reserve_bootmem_node(pdp, base, size);
+		reserve_bootmem_node(pdp, base, size, BOOTMEM_DEFAULT);
 
 		/* Now the per-node space */
 		size = mem_data[node].pernode_size;
 		base = __pa(mem_data[node].pernode_addr);
-		reserve_bootmem_node(pdp, base, size);
+		reserve_bootmem_node(pdp, base, size, BOOTMEM_DEFAULT);
+	}
+}
+
+static void __meminit scatter_node_data(void)
+{
+	pg_data_t **dst;
+	int node;
+
+	/*
+	 * for_each_online_node() can't be used at here.
+	 * node_online_map is not set for hot-added nodes at this time,
+	 * because we are halfway through initialization of the new node's
+	 * structures.  If for_each_online_node() is used, a new node's
+	 * pg_data_ptrs will be not initialized. Instead of using it,
+	 * pgdat_list[] is checked.
+	 */
+	for_each_node(node) {
+		if (pgdat_list[node]) {
+			dst = LOCAL_DATA_ADDR(pgdat_list[node])->pg_data_ptrs;
+			memcpy(dst, pgdat_list, sizeof(pgdat_list));
+		}
 	}
 }
 
@@ -317,22 +442,16 @@ static void __init reserve_pernode_space(void)
  */
 static void __init initialize_pernode_data(void)
 {
-	pg_data_t *pgdat_list[MAX_NUMNODES];
 	int cpu, node;
 
-	for_each_online_node(node)
-		pgdat_list[node] = mem_data[node].pgdat;
+	scatter_node_data();
 
-	/* Copy the pg_data_t list to each node and init the node field */
-	for_each_online_node(node) {
-		memcpy(mem_data[node].node_data->pg_data_ptrs, pgdat_list,
-		       sizeof(pgdat_list));
-	}
 #ifdef CONFIG_SMP
 	/* Set the node_data pointer for each per-cpu struct */
-	for (cpu = 0; cpu < NR_CPUS; cpu++) {
+	for_each_possible_early_cpu(cpu) {
 		node = node_cpuid[cpu].nid;
-		per_cpu(cpu_info, cpu).node_data = mem_data[node].node_data;
+		per_cpu(ia64_cpu_info, cpu).node_data =
+			mem_data[node].node_data;
 	}
 #else
 	{
@@ -340,7 +459,7 @@ static void __init initialize_pernode_data(void)
 		cpu = 0;
 		node = node_cpuid[cpu].nid;
 		cpu0_cpu_info = (struct cpuinfo_ia64 *)(__phys_per_cpu_start +
-			((char *)&per_cpu__cpu_info - __per_cpu_start));
+			((char *)&ia64_cpu_info - __per_cpu_start));
 		cpu0_cpu_info->node_data = mem_data[node].node_data;
 	}
 #endif /* CONFIG_SMP */
@@ -372,7 +491,7 @@ static void __init *memory_less_node_alloc(int nid, unsigned long pernodesize)
 	if (bestnode == -1)
 		bestnode = anynode;
 
-	ptr = __alloc_bootmem_node(mem_data[bestnode].pgdat, pernodesize,
+	ptr = __alloc_bootmem_node(pgdat_list[bestnode], pernodesize,
 		PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
 
 	return ptr;
@@ -397,37 +516,6 @@ static void __init memory_less_nodes(void)
 	return;
 }
 
-#ifdef CONFIG_SPARSEMEM
-/**
- * register_sparse_mem - notify SPARSEMEM that this memory range exists.
- * @start: physical start of range
- * @end: physical end of range
- * @arg: unused
- *
- * Simply calls SPARSEMEM to register memory section(s).
- */
-static int __init register_sparse_mem(unsigned long start, unsigned long end,
-	void *arg)
-{
-	int nid;
-
-	start = __pa(start) >> PAGE_SHIFT;
-	end = __pa(end) >> PAGE_SHIFT;
-	nid = early_pfn_to_nid(start);
-	memory_present(nid, start, end);
-
-	return 0;
-}
-
-static void __init arch_sparse_init(void)
-{
-	efi_memmap_walk(register_sparse_mem, NULL);
-	sparse_init();
-}
-#else
-#define arch_sparse_init() do {} while (0)
-#endif
-
 /**
  * find_memory - walk the EFI memory map and setup the bootmem allocator
  *
@@ -452,12 +540,16 @@ void __init find_memory(void)
 	/* These actually end up getting called by call_pernode_memory() */
 	efi_memmap_walk(filter_rsvd_memory, build_node_maps);
 	efi_memmap_walk(filter_rsvd_memory, find_pernode_space);
+	efi_memmap_walk(find_max_min_low_pfn, NULL);
 
 	for_each_online_node(node)
-		if (mem_data[node].bootmem_data.node_low_pfn) {
+		if (bootmem_node_data[node].node_low_pfn) {
 			node_clear(node, memory_less_mask);
 			mem_data[node].min_pfn = ~0UL;
 		}
+
+	efi_memmap_walk(filter_memory, register_active_ranges);
+
 	/*
 	 * Initialize the boot memory maps in reverse order since that's
 	 * what the bootmem allocator expects
@@ -471,14 +563,14 @@ void __init find_memory(void)
 		else if (node_isset(node, memory_less_mask))
 			continue;
 
-		bdp = &mem_data[node].bootmem_data;
+		bdp = &bootmem_node_data[node];
 		pernode = mem_data[node].pernode_addr;
 		pernodesize = mem_data[node].pernode_size;
 		map = pernode + pernodesize;
 
-		init_bootmem_node(mem_data[node].pgdat,
+		init_bootmem_node(pgdat_list[node],
 				  map>>PAGE_SHIFT,
-				  bdp->node_boot_start>>PAGE_SHIFT,
+				  bdp->node_min_pfn,
 				  bdp->node_low_pfn);
 	}
 
@@ -500,18 +592,14 @@ void __init find_memory(void)
  * find_pernode_space() does most of this already, we just need to set
  * local_per_cpu_offset
  */
-void __cpuinit *per_cpu_init(void)
+void *per_cpu_init(void)
 {
 	int cpu;
 	static int first_time = 1;
 
-
-	if (smp_processor_id() != 0)
-		return __per_cpu_start + __per_cpu_offset[smp_processor_id()];
-
 	if (first_time) {
 		first_time = 0;
-		for (cpu = 0; cpu < NR_CPUS; cpu++)
+		for_each_possible_early_cpu(cpu)
 			per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu];
 	}
 
@@ -519,126 +607,6 @@ void __cpuinit *per_cpu_init(void)
 }
 #endif /* CONFIG_SMP */
 
-#ifdef CONFIG_VIRTUAL_MEM_MAP
-static inline int find_next_valid_pfn_for_pgdat(pg_data_t *pgdat, int i)
-{
-	unsigned long end_address, hole_next_pfn;
-	unsigned long stop_address;
-
-	end_address = (unsigned long) &vmem_map[pgdat->node_start_pfn + i];
-	end_address = PAGE_ALIGN(end_address);
-
-	stop_address = (unsigned long) &vmem_map[
-		pgdat->node_start_pfn + pgdat->node_spanned_pages];
-
-	do {
-		pgd_t *pgd;
-		pud_t *pud;
-		pmd_t *pmd;
-		pte_t *pte;
-
-		pgd = pgd_offset_k(end_address);
-		if (pgd_none(*pgd)) {
-			end_address += PGDIR_SIZE;
-			continue;
-		}
-
-		pud = pud_offset(pgd, end_address);
-		if (pud_none(*pud)) {
-			end_address += PUD_SIZE;
-			continue;
-		}
-
-		pmd = pmd_offset(pud, end_address);
-		if (pmd_none(*pmd)) {
-			end_address += PMD_SIZE;
-			continue;
-		}
-
-		pte = pte_offset_kernel(pmd, end_address);
-retry_pte:
-		if (pte_none(*pte)) {
-			end_address += PAGE_SIZE;
-			pte++;
-			if ((end_address < stop_address) &&
-			    (end_address != ALIGN(end_address, 1UL << PMD_SHIFT)))
-				goto retry_pte;
-			continue;
-		}
-		/* Found next valid vmem_map page */
-		break;
-	} while (end_address < stop_address);
-
-	end_address = min(end_address, stop_address);
-	end_address = end_address - (unsigned long) vmem_map + sizeof(struct page) - 1;
-	hole_next_pfn = end_address / sizeof(struct page);
-	return hole_next_pfn - pgdat->node_start_pfn;
-}
-#else
-static inline int find_next_valid_pfn_for_pgdat(pg_data_t *pgdat, int i)
-{
-	return i + 1;
-}
-#endif
-
-/**
- * show_mem - give short summary of memory stats
- *
- * Shows a simple page count of reserved and used pages in the system.
- * For discontig machines, it does this on a per-pgdat basis.
- */
-void show_mem(void)
-{
-	int i, total_reserved = 0;
-	int total_shared = 0, total_cached = 0;
-	unsigned long total_present = 0;
-	pg_data_t *pgdat;
-
-	printk("Mem-info:\n");
-	show_free_areas();
-	printk("Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
-	for_each_online_pgdat(pgdat) {
-		unsigned long present;
-		unsigned long flags;
-		int shared = 0, cached = 0, reserved = 0;
-
-		printk("Node ID: %d\n", pgdat->node_id);
-		pgdat_resize_lock(pgdat, &flags);
-		present = pgdat->node_present_pages;
-		for(i = 0; i < pgdat->node_spanned_pages; i++) {
-			struct page *page;
-			if (pfn_valid(pgdat->node_start_pfn + i))
-				page = pfn_to_page(pgdat->node_start_pfn + i);
-			else {
-				i = find_next_valid_pfn_for_pgdat(pgdat, i) - 1;
-				continue;
-			}
-			if (PageReserved(page))
-				reserved++;
-			else if (PageSwapCache(page))
-				cached++;
-			else if (page_count(page))
-				shared += page_count(page)-1;
-		}
-		pgdat_resize_unlock(pgdat, &flags);
-		total_present += present;
-		total_reserved += reserved;
-		total_cached += cached;
-		total_shared += shared;
-		printk("\t%ld pages of RAM\n", present);
-		printk("\t%d reserved pages\n", reserved);
-		printk("\t%d pages shared\n", shared);
-		printk("\t%d pages swap cached\n", cached);
-	}
-	printk("%ld pages of RAM\n", total_present);
-	printk("%d reserved pages\n", total_reserved);
-	printk("%d pages shared\n", total_shared);
-	printk("%d pages swap cached\n", total_cached);
-	printk("Total of %ld pages in page table cache\n",
-		pgtable_quicklist_total_size());
-	printk("%d free buffer pages\n", nr_free_buffer_pages());
-}
-
 /**
  * call_pernode_memory - use SRAT to call callback functions with node info
  * @start: physical start of range
@@ -700,12 +668,12 @@ static __init int count_node_pages(unsigned long start, unsigned long len, int n
 {
 	unsigned long end = start + len;
 
-	mem_data[node].num_physpages += len >> PAGE_SHIFT;
+#ifdef CONFIG_ZONE_DMA
 	if (start <= __pa(MAX_DMA_ADDRESS))
 		mem_data[node].num_dma_physpages +=
 			(min(end, __pa(MAX_DMA_ADDRESS)) - start) >>PAGE_SHIFT;
+#endif
 	start = GRANULEROUNDDOWN(start);
-	start = ORDERROUNDDOWN(start);
 	end = GRANULEROUNDUP(end);
 	mem_data[node].max_pfn = max(mem_data[node].max_pfn,
 				     end >> PAGE_SHIFT);
@@ -724,65 +692,73 @@ static __init int count_node_pages(unsigned long start, unsigned long len, int n
 void __init paging_init(void)
 {
 	unsigned long max_dma;
-	unsigned long zones_size[MAX_NR_ZONES];
-	unsigned long zholes_size[MAX_NR_ZONES];
 	unsigned long pfn_offset = 0;
+	unsigned long max_pfn = 0;
 	int node;
+	unsigned long max_zone_pfns[MAX_NR_ZONES];
 
 	max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT;
 
-	arch_sparse_init();
-
 	efi_memmap_walk(filter_rsvd_memory, count_node_pages);
 
+	sparse_memory_present_with_active_regions(MAX_NUMNODES);
+	sparse_init();
+
 #ifdef CONFIG_VIRTUAL_MEM_MAP
-	vmalloc_end -= PAGE_ALIGN(max_low_pfn * sizeof(struct page));
-	vmem_map = (struct page *) vmalloc_end;
+	VMALLOC_END -= PAGE_ALIGN(ALIGN(max_low_pfn, MAX_ORDER_NR_PAGES) *
+		sizeof(struct page));
+	vmem_map = (struct page *) VMALLOC_END;
 	efi_memmap_walk(create_mem_map_page_table, NULL);
 	printk("Virtual mem_map starts at 0x%p\n", vmem_map);
 #endif
 
 	for_each_online_node(node) {
-		memset(zones_size, 0, sizeof(zones_size));
-		memset(zholes_size, 0, sizeof(zholes_size));
-
-		num_physpages += mem_data[node].num_physpages;
-
-		if (mem_data[node].min_pfn >= max_dma) {
-			/* All of this node's memory is above ZONE_DMA */
-			zones_size[ZONE_NORMAL] = mem_data[node].max_pfn -
-				mem_data[node].min_pfn;
-			zholes_size[ZONE_NORMAL] = mem_data[node].max_pfn -
-				mem_data[node].min_pfn -
-				mem_data[node].num_physpages;
-		} else if (mem_data[node].max_pfn < max_dma) {
-			/* All of this node's memory is in ZONE_DMA */
-			zones_size[ZONE_DMA] = mem_data[node].max_pfn -
-				mem_data[node].min_pfn;
-			zholes_size[ZONE_DMA] = mem_data[node].max_pfn -
-				mem_data[node].min_pfn -
-				mem_data[node].num_dma_physpages;
-		} else {
-			/* This node has memory in both zones */
-			zones_size[ZONE_DMA] = max_dma -
-				mem_data[node].min_pfn;
-			zholes_size[ZONE_DMA] = zones_size[ZONE_DMA] -
-				mem_data[node].num_dma_physpages;
-			zones_size[ZONE_NORMAL] = mem_data[node].max_pfn -
-				max_dma;
-			zholes_size[ZONE_NORMAL] = zones_size[ZONE_NORMAL] -
-				(mem_data[node].num_physpages -
-				 mem_data[node].num_dma_physpages);
-		}
-
 		pfn_offset = mem_data[node].min_pfn;
 
 #ifdef CONFIG_VIRTUAL_MEM_MAP
 		NODE_DATA(node)->node_mem_map = vmem_map + pfn_offset;
 #endif
-		free_area_init_node(node, NODE_DATA(node), zones_size,
-				    pfn_offset, zholes_size);
+		if (mem_data[node].max_pfn > max_pfn)
+			max_pfn = mem_data[node].max_pfn;
 	}
 
+	memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
+#ifdef CONFIG_ZONE_DMA
+	max_zone_pfns[ZONE_DMA] = max_dma;
+#endif
+	max_zone_pfns[ZONE_NORMAL] = max_pfn;
+	free_area_init_nodes(max_zone_pfns);
+
 	zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
 }
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+pg_data_t *arch_alloc_nodedata(int nid)
+{
+	unsigned long size = compute_pernodesize(nid);
+
+	return kzalloc(size, GFP_KERNEL);
+}
+
+void arch_free_nodedata(pg_data_t *pgdat)
+{
+	kfree(pgdat);
+}
+
+void arch_refresh_nodedata(int update_node, pg_data_t *update_pgdat)
+{
+	pgdat_list[update_node] = update_pgdat;
+	scatter_node_data();
+}
+#endif
+
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
+{
+	return vmemmap_populate_basepages(start, end, node);
+}
+
+void vmemmap_free(unsigned long start, unsigned long end)
+{
+}
+#endif
diff --git a/arch/ia64/mm/extable.c b/arch/ia64/mm/extable.c
index 6d259e34f35..c99a41e29fe 100644
--- a/arch/ia64/mm/extable.c
+++ b/arch/ia64/mm/extable.c
@@ -5,11 +5,10 @@
  *	David Mosberger-Tang <davidm@hpl.hp.com>
  */
 
-#include <linux/config.h>
 #include <linux/sort.h>
 
 #include <asm/uaccess.h>
-#include <asm/module.h>
+#include <linux/module.h>
 
 static int cmp_ex(const void *a, const void *b)
 {
@@ -54,6 +53,32 @@ void sort_extable (struct exception_table_entry *start,
 	     cmp_ex, swap_ex);
 }
 
+static inline unsigned long ex_to_addr(const struct exception_table_entry *x)
+{
+	return (unsigned long)&x->addr + x->addr;
+}
+
+#ifdef CONFIG_MODULES
+/*
+ * Any entry referring to the module init will be at the beginning or
+ * the end.
+ */
+void trim_init_extable(struct module *m)
+{
+	/*trim the beginning*/
+	while (m->num_exentries &&
+	       within_module_init(ex_to_addr(&m->extable[0]), m)) {
+		m->extable++;
+		m->num_exentries--;
+	}
+	/*trim the end*/
+	while (m->num_exentries &&
+	       within_module_init(ex_to_addr(&m->extable[m->num_exentries-1]),
+				  m))
+		m->num_exentries--;
+}
+#endif /* CONFIG_MODULES */
+
 const struct exception_table_entry *
 search_extable (const struct exception_table_entry *first,
 		const struct exception_table_entry *last,
diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
index d98ec49570b..7225dad8709 100644
--- a/arch/ia64/mm/fault.c
+++ b/arch/ia64/mm/fault.c
@@ -7,17 +7,38 @@
 #include <linux/sched.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
-#include <linux/smp_lock.h>
 #include <linux/interrupt.h>
 #include <linux/kprobes.h>
+#include <linux/kdebug.h>
+#include <linux/prefetch.h>
 
 #include <asm/pgtable.h>
 #include <asm/processor.h>
-#include <asm/system.h>
 #include <asm/uaccess.h>
-#include <asm/kdebug.h>
 
-extern void die (char *, struct pt_regs *, long);
+extern int die(char *, struct pt_regs *, long);
+
+#ifdef CONFIG_KPROBES
+static inline int notify_page_fault(struct pt_regs *regs, int trap)
+{
+	int ret = 0;
+
+	if (!user_mode(regs)) {
+		/* kprobe_running() needs smp_processor_id() */
+		preempt_disable();
+		if (kprobe_running() && kprobe_fault_handler(regs, trap))
+			ret = 1;
+		preempt_enable();
+	}
+
+	return ret;
+}
+#else
+static inline int notify_page_fault(struct pt_regs *regs, int trap)
+{
+	return 0;
+}
+#endif
 
 /*
  * Return TRUE if ADDRESS points at a page in the kernel's mapped segment
@@ -51,6 +72,10 @@ mapped_kernel_page_is_present (unsigned long address)
 	return pte_present(pte);
 }
 
+#	define VM_READ_BIT	0
+#	define VM_WRITE_BIT	1
+#	define VM_EXEC_BIT	2
+
 void __kprobes
 ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *regs)
 {
@@ -59,6 +84,11 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re
 	struct mm_struct *mm = current->mm;
 	struct siginfo si;
 	unsigned long mask;
+	int fault;
+	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
+
+	mask = ((((isr >> IA64_ISR_X_BIT) & 1UL) << VM_EXEC_BIT)
+		| (((isr >> IA64_ISR_W_BIT) & 1UL) << VM_WRITE_BIT));
 
 	/* mmap_sem is performance critical.... */
 	prefetchw(&mm->mmap_sem);
@@ -84,18 +114,28 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re
 	/*
 	 * This is to handle the kprobes on user space access instructions
 	 */
-	if (notify_die(DIE_PAGE_FAULT, "page fault", regs, code, TRAP_BRKPT,
-					SIGSEGV) == NOTIFY_STOP)
+	if (notify_page_fault(regs, TRAP_BRKPT))
 		return;
 
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
+	if (mask & VM_WRITE)
+		flags |= FAULT_FLAG_WRITE;
+retry:
 	down_read(&mm->mmap_sem);
 
 	vma = find_vma_prev(mm, address, &prev_vma);
-	if (!vma)
+	if (!vma && !prev_vma )
 		goto bad_area;
 
-	/* find_vma_prev() returns vma such that address < vma->vm_end or NULL */
-	if (address < vma->vm_start)
+        /*
+         * find_vma_prev() returns vma such that address < vma->vm_end or NULL
+         *
+         * May find no vma, but could be that the last vm area is the
+         * register backing store that needs to expand upwards, in
+         * this case vma will be null, but prev_vma will ne non-null
+         */
+        if (( !vma && prev_vma ) || (address < vma->vm_start) )
 		goto check_expansion;
 
   good_area:
@@ -103,53 +143,67 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re
 
 	/* OK, we've got a good vm_area for this memory area.  Check the access permissions: */
 
-#	define VM_READ_BIT	0
-#	define VM_WRITE_BIT	1
-#	define VM_EXEC_BIT	2
-
 #	if (((1 << VM_READ_BIT) != VM_READ || (1 << VM_WRITE_BIT) != VM_WRITE) \
 	    || (1 << VM_EXEC_BIT) != VM_EXEC)
 #		error File is out of sync with <linux/mm.h>.  Please update.
 #	endif
 
-	mask = (  (((isr >> IA64_ISR_X_BIT) & 1UL) << VM_EXEC_BIT)
-		| (((isr >> IA64_ISR_W_BIT) & 1UL) << VM_WRITE_BIT)
-		| (((isr >> IA64_ISR_R_BIT) & 1UL) << VM_READ_BIT));
+	if (((isr >> IA64_ISR_R_BIT) & 1UL) && (!(vma->vm_flags & (VM_READ | VM_WRITE))))
+		goto bad_area;
 
 	if ((vma->vm_flags & mask) != mask)
 		goto bad_area;
 
-  survive:
 	/*
 	 * If for any reason at all we couldn't handle the fault, make
 	 * sure we exit gracefully rather than endlessly redo the
 	 * fault.
 	 */
-	switch (handle_mm_fault(mm, vma, address, (mask & VM_WRITE) != 0)) {
-	      case VM_FAULT_MINOR:
-		++current->min_flt;
-		break;
-	      case VM_FAULT_MAJOR:
-		++current->maj_flt;
-		break;
-	      case VM_FAULT_SIGBUS:
+	fault = handle_mm_fault(mm, vma, address, flags);
+
+	if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
+		return;
+
+	if (unlikely(fault & VM_FAULT_ERROR)) {
 		/*
 		 * We ran out of memory, or some other thing happened
 		 * to us that made us unable to handle the page fault
 		 * gracefully.
 		 */
-		signal = SIGBUS;
-		goto bad_area;
-	      case VM_FAULT_OOM:
-		goto out_of_memory;
-	      default:
+		if (fault & VM_FAULT_OOM) {
+			goto out_of_memory;
+		} else if (fault & VM_FAULT_SIGBUS) {
+			signal = SIGBUS;
+			goto bad_area;
+		}
 		BUG();
 	}
+
+	if (flags & FAULT_FLAG_ALLOW_RETRY) {
+		if (fault & VM_FAULT_MAJOR)
+			current->maj_flt++;
+		else
+			current->min_flt++;
+		if (fault & VM_FAULT_RETRY) {
+			flags &= ~FAULT_FLAG_ALLOW_RETRY;
+			flags |= FAULT_FLAG_TRIED;
+
+			 /* No need to up_read(&mm->mmap_sem) as we would
+			 * have already released it in __lock_page_or_retry
+			 * in mm/filemap.c.
+			 */
+
+			goto retry;
+		}
+	}
+
 	up_read(&mm->mmap_sem);
 	return;
 
   check_expansion:
 	if (!(prev_vma && (prev_vma->vm_flags & VM_GROWSUP) && (address == prev_vma->vm_end))) {
+		if (!vma)
+			goto bad_area;
 		if (!(vma->vm_flags & VM_GROWSDOWN))
 			goto bad_area;
 		if (REGION_NUMBER(address) != REGION_NUMBER(vma->vm_start)
@@ -237,20 +291,16 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re
 	else
 		printk(KERN_ALERT "Unable to handle kernel paging request at "
 		       "virtual address %016lx\n", address);
-	die("Oops", regs, isr);
+	if (die("Oops", regs, isr))
+		regs = NULL;
 	bust_spinlocks(0);
-	do_exit(SIGKILL);
+	if (regs)
+		do_exit(SIGKILL);
 	return;
 
   out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (current->pid == 1) {
-		yield();
-		down_read(&mm->mmap_sem);
-		goto survive;
-	}
-	printk(KERN_CRIT "VM: killing process %s\n", current->comm);
-	if (user_mode(regs))
-		do_exit(SIGKILL);
-	goto no_context;
+	if (!user_mode(regs))
+		goto no_context;
+	pagefault_out_of_memory();
 }
diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c
index 8d506710fdb..76069c18ee4 100644
--- a/arch/ia64/mm/hugetlbpage.c
+++ b/arch/ia64/mm/hugetlbpage.c
@@ -8,24 +8,24 @@
  * Feb, 2004: dynamic hugetlb page size via boot parameter
  */
 
-#include <linux/config.h>
 #include <linux/init.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
 #include <linux/hugetlb.h>
 #include <linux/pagemap.h>
-#include <linux/smp_lock.h>
-#include <linux/slab.h>
+#include <linux/module.h>
 #include <linux/sysctl.h>
+#include <linux/log2.h>
 #include <asm/mman.h>
 #include <asm/pgalloc.h>
 #include <asm/tlb.h>
 #include <asm/tlbflush.h>
 
-unsigned int hpage_shift=HPAGE_SHIFT_DEFAULT;
+unsigned int hpage_shift = HPAGE_SHIFT_DEFAULT;
+EXPORT_SYMBOL(hpage_shift);
 
 pte_t *
-huge_pte_alloc (struct mm_struct *mm, unsigned long addr)
+huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz)
 {
 	unsigned long taddr = htlbpage_to_page(addr);
 	pgd_t *pgd;
@@ -38,7 +38,7 @@ huge_pte_alloc (struct mm_struct *mm, unsigned long addr)
 	if (pud) {
 		pmd = pmd_alloc(mm, pud, taddr);
 		if (pmd)
-			pte = pte_alloc_map(mm, pmd, taddr);
+			pte = pte_alloc_map(mm, NULL, pmd, taddr);
 	}
 	return pte;
 }
@@ -65,13 +65,19 @@ huge_pte_offset (struct mm_struct *mm, unsigned long addr)
 	return pte;
 }
 
+int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
+{
+	return 0;
+}
+
 #define mk_pte_huge(entry) { pte_val(entry) |= _PAGE_P; }
 
 /*
  * Don't actually need to do any preparation, but need to make sure
  * the address is in the right region.
  */
-int prepare_hugepage_range(unsigned long addr, unsigned long len)
+int prepare_hugepage_range(struct file *file,
+			unsigned long addr, unsigned long len)
 {
 	if (len & ~HPAGE_MASK)
 		return -EINVAL;
@@ -102,13 +108,19 @@ int pmd_huge(pmd_t pmd)
 {
 	return 0;
 }
+
+int pud_huge(pud_t pud)
+{
+	return 0;
+}
+
 struct page *
 follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmd, int write)
 {
 	return NULL;
 }
 
-void hugetlb_free_pgd_range(struct mmu_gather **tlb,
+void hugetlb_free_pgd_range(struct mmu_gather *tlb,
 			unsigned long addr, unsigned long end,
 			unsigned long floor, unsigned long ceiling)
 {
@@ -136,25 +148,31 @@ void hugetlb_free_pgd_range(struct mmu_gather **tlb,
 unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
 		unsigned long pgoff, unsigned long flags)
 {
-	struct vm_area_struct *vmm;
+	struct vm_unmapped_area_info info;
 
 	if (len > RGN_MAP_LIMIT)
 		return -ENOMEM;
 	if (len & ~HPAGE_MASK)
 		return -EINVAL;
+
+	/* Handle MAP_FIXED */
+	if (flags & MAP_FIXED) {
+		if (prepare_hugepage_range(file, addr, len))
+			return -EINVAL;
+		return addr;
+	}
+
 	/* This code assumes that RGN_HPAGE != 0. */
 	if ((REGION_NUMBER(addr) != RGN_HPAGE) || (addr & (HPAGE_SIZE - 1)))
 		addr = HPAGE_REGION_BASE;
-	else
-		addr = ALIGN(addr, HPAGE_SIZE);
-	for (vmm = find_vma(current->mm, addr); ; vmm = vmm->vm_next) {
-		/* At this point:  (!vmm || addr < vmm->vm_end). */
-		if (REGION_OFFSET(addr) + len > RGN_MAP_LIMIT)
-			return -ENOMEM;
-		if (!vmm || (addr + len) <= vmm->vm_start)
-			return addr;
-		addr = ALIGN(vmm->vm_end, HPAGE_SIZE);
-	}
+
+	info.flags = 0;
+	info.length = len;
+	info.low_limit = addr;
+	info.high_limit = HPAGE_REGION_BASE + RGN_MAP_LIMIT;
+	info.align_mask = PAGE_MASK & (HPAGE_SIZE - 1);
+	info.align_offset = 0;
+	return vm_unmapped_area(&info);
 }
 
 static int __init hugetlb_setup_sz(char *str)
@@ -169,7 +187,7 @@ static int __init hugetlb_setup_sz(char *str)
 		tr_pages = 0x15557000UL;
 
 	size = memparse(str, &str);
-	if (*str || (size & (size-1)) || !(tr_pages & size) ||
+	if (*str || !is_power_of_2(size) || !(tr_pages & size) ||
 		size <= PAGE_SIZE ||
 		size >= (1UL << PAGE_SHIFT << MAX_ORDER)) {
 		printk(KERN_WARNING "Invalid huge page size specified\n");
@@ -182,6 +200,6 @@ static int __init hugetlb_setup_sz(char *str)
 	 * override here with new page shift.
 	 */
 	ia64_set_rr(HPAGE_REGION_BASE, hpage_shift << 2);
-	return 1;
+	return 0;
 }
-__setup("hugepagesz=", hugetlb_setup_sz);
+early_param("hugepagesz", hugetlb_setup_sz);
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index cafa8776a53..25c350264a4 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -4,13 +4,13 @@
  * Copyright (C) 1998-2003 Hewlett-Packard Co
  *	David Mosberger-Tang <davidm@hpl.hp.com>
  */
-#include <linux/config.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 
 #include <linux/bootmem.h>
 #include <linux/efi.h>
 #include <linux/elf.h>
+#include <linux/memblock.h>
 #include <linux/mm.h>
 #include <linux/mmzone.h>
 #include <linux/module.h>
@@ -20,10 +20,9 @@
 #include <linux/swap.h>
 #include <linux/proc_fs.h>
 #include <linux/bitops.h>
+#include <linux/kexec.h>
 
-#include <asm/a.out.h>
 #include <asm/dma.h>
-#include <asm/ia32.h>
 #include <asm/io.h>
 #include <asm/machvec.h>
 #include <asm/numa.h>
@@ -31,24 +30,19 @@
 #include <asm/pgalloc.h>
 #include <asm/sal.h>
 #include <asm/sections.h>
-#include <asm/system.h>
 #include <asm/tlb.h>
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
 #include <asm/mca.h>
-
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
-
-DEFINE_PER_CPU(unsigned long *, __pgtable_quicklist);
-DEFINE_PER_CPU(long, __pgtable_quicklist_size);
+#include <asm/paravirt.h>
 
 extern void ia64_tlb_init (void);
 
 unsigned long MAX_DMA_ADDRESS = PAGE_OFFSET + 0x100000000UL;
 
 #ifdef CONFIG_VIRTUAL_MEM_MAP
-unsigned long vmalloc_end = VMALLOC_END_INIT;
-EXPORT_SYMBOL(vmalloc_end);
+unsigned long VMALLOC_END = VMALLOC_END_INIT;
+EXPORT_SYMBOL(VMALLOC_END);
 struct page *vmem_map;
 EXPORT_SYMBOL(vmem_map);
 #endif
@@ -56,63 +50,11 @@ EXPORT_SYMBOL(vmem_map);
 struct page *zero_page_memmap_ptr;	/* map entry for zero page */
 EXPORT_SYMBOL(zero_page_memmap_ptr);
 
-#define MIN_PGT_PAGES			25UL
-#define MAX_PGT_FREES_PER_PASS		16L
-#define PGT_FRACTION_OF_NODE_MEM	16
-
-static inline long
-max_pgt_pages(void)
-{
-	u64 node_free_pages, max_pgt_pages;
-
-#ifndef	CONFIG_NUMA
-	node_free_pages = nr_free_pages();
-#else
-	node_free_pages = nr_free_pages_pgdat(NODE_DATA(numa_node_id()));
-#endif
-	max_pgt_pages = node_free_pages / PGT_FRACTION_OF_NODE_MEM;
-	max_pgt_pages = max(max_pgt_pages, MIN_PGT_PAGES);
-	return max_pgt_pages;
-}
-
-static inline long
-min_pages_to_free(void)
-{
-	long pages_to_free;
-
-	pages_to_free = pgtable_quicklist_size - max_pgt_pages();
-	pages_to_free = min(pages_to_free, MAX_PGT_FREES_PER_PASS);
-	return pages_to_free;
-}
-
 void
-check_pgt_cache(void)
-{
-	long pages_to_free;
-
-	if (unlikely(pgtable_quicklist_size <= MIN_PGT_PAGES))
-		return;
-
-	preempt_disable();
-	while (unlikely((pages_to_free = min_pages_to_free()) > 0)) {
-		while (pages_to_free--) {
-			free_page((unsigned long)pgtable_quicklist_alloc());
-		}
-		preempt_enable();
-		preempt_disable();
-	}
-	preempt_enable();
-}
-
-void
-lazy_mmu_prot_update (pte_t pte)
+__ia64_sync_icache_dcache (pte_t pte)
 {
 	unsigned long addr;
 	struct page *page;
-	unsigned long order;
-
-	if (!pte_exec(pte))
-		return;				/* not an executable page... */
 
 	page = pte_page(pte);
 	addr = (unsigned long) page_address(page);
@@ -120,23 +62,37 @@ lazy_mmu_prot_update (pte_t pte)
 	if (test_bit(PG_arch_1, &page->flags))
 		return;				/* i-cache is already coherent with d-cache */
 
-	if (PageCompound(page)) {
-		order = (unsigned long) (page[1].lru.prev);
-		flush_icache_range(addr, addr + (1UL << order << PAGE_SHIFT));
-	}
-	else
-		flush_icache_range(addr, addr + PAGE_SIZE);
+	flush_icache_range(addr, addr + (PAGE_SIZE << compound_order(page)));
 	set_bit(PG_arch_1, &page->flags);	/* mark page as clean */
 }
 
+/*
+ * Since DMA is i-cache coherent, any (complete) pages that were written via
+ * DMA can be marked as "clean" so that lazy_mmu_prot_update() doesn't have to
+ * flush them when they get mapped into an executable vm-area.
+ */
+void
+dma_mark_clean(void *addr, size_t size)
+{
+	unsigned long pg_addr, end;
+
+	pg_addr = PAGE_ALIGN((unsigned long) addr);
+	end = (unsigned long) addr + size;
+	while (pg_addr + PAGE_SIZE <= end) {
+		struct page *page = virt_to_page(pg_addr);
+		set_bit(PG_arch_1, &page->flags);
+		pg_addr += PAGE_SIZE;
+	}
+}
+
 inline void
 ia64_set_rbs_bot (void)
 {
-	unsigned long stack_size = current->signal->rlim[RLIMIT_STACK].rlim_max & -16;
+	unsigned long stack_size = rlimit_max(RLIMIT_STACK) & -16;
 
 	if (stack_size > MAX_USER_STACK_SIZE)
 		stack_size = MAX_USER_STACK_SIZE;
-	current->thread.rbs_bot = STACK_TOP - stack_size;
+	current->thread.rbs_bot = PAGE_ALIGN(current->mm->start_stack - stack_size);
 }
 
 /*
@@ -157,14 +113,14 @@ ia64_init_addr_space (void)
 	 * the problem.  When the process attempts to write to the register backing store
 	 * for the first time, it will get a SEGFAULT in this case.
 	 */
-	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+	vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
 	if (vma) {
-		memset(vma, 0, sizeof(*vma));
+		INIT_LIST_HEAD(&vma->anon_vma_chain);
 		vma->vm_mm = current->mm;
 		vma->vm_start = current->thread.rbs_bot & PAGE_MASK;
 		vma->vm_end = vma->vm_start + PAGE_SIZE;
-		vma->vm_page_prot = protection_map[VM_DATA_DEFAULT_FLAGS & 0x7];
 		vma->vm_flags = VM_DATA_DEFAULT_FLAGS|VM_GROWSUP|VM_ACCOUNT;
+		vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
 		down_write(&current->mm->mmap_sem);
 		if (insert_vm_struct(current->mm, vma)) {
 			up_write(&current->mm->mmap_sem);
@@ -176,13 +132,14 @@ ia64_init_addr_space (void)
 
 	/* map NaT-page at address zero to speed up speculative dereferencing of NULL: */
 	if (!(current->personality & MMAP_PAGE_ZERO)) {
-		vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+		vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
 		if (vma) {
-			memset(vma, 0, sizeof(*vma));
+			INIT_LIST_HEAD(&vma->anon_vma_chain);
 			vma->vm_mm = current->mm;
 			vma->vm_end = PAGE_SIZE;
 			vma->vm_page_prot = __pgprot(pgprot_val(PAGE_READONLY) | _PAGE_MA_NAT);
-			vma->vm_flags = VM_READ | VM_MAYREAD | VM_IO | VM_RESERVED;
+			vma->vm_flags = VM_READ | VM_MAYREAD | VM_IO |
+					VM_DONTEXPAND | VM_DONTDUMP;
 			down_write(&current->mm->mmap_sem);
 			if (insert_vm_struct(current->mm, vma)) {
 				up_write(&current->mm->mmap_sem);
@@ -197,25 +154,13 @@ ia64_init_addr_space (void)
 void
 free_initmem (void)
 {
-	unsigned long addr, eaddr;
-
-	addr = (unsigned long) ia64_imva(__init_begin);
-	eaddr = (unsigned long) ia64_imva(__init_end);
-	while (addr < eaddr) {
-		ClearPageReserved(virt_to_page(addr));
-		init_page_count(virt_to_page(addr));
-		free_page(addr);
-		++totalram_pages;
-		addr += PAGE_SIZE;
-	}
-	printk(KERN_INFO "Freeing unused kernel memory: %ldkB freed\n",
-	       (__init_end - __init_begin) >> 10);
+	free_reserved_area(ia64_imva(__init_begin), ia64_imva(__init_end),
+			   -1, "unused kernel");
 }
 
 void __init
 free_initrd_mem (unsigned long start, unsigned long end)
 {
-	struct page *page;
 	/*
 	 * EFI uses 4KB pages while the kernel can use 4KB or bigger.
 	 * Thus EFI and the kernel may have different page sizes. It is
@@ -256,11 +201,7 @@ free_initrd_mem (unsigned long start, unsigned long end)
 	for (; start < end; start += PAGE_SIZE) {
 		if (!virt_addr_valid(start))
 			continue;
-		page = virt_to_page(start);
-		ClearPageReserved(page);
-		init_page_count(page);
-		free_page(start);
-		++totalram_pages;
+		free_reserved_page(virt_to_page(start));
 	}
 }
 
@@ -303,6 +244,7 @@ put_kernel_page (struct page *page, unsigned long address, pgprot_t pgprot)
 static void __init
 setup_gate (void)
 {
+	void *gate_section;
 	struct page *page;
 
 	/*
@@ -310,10 +252,11 @@ setup_gate (void)
 	 * headers etc. and once execute-only page to enable
 	 * privilege-promotion via "epc":
 	 */
-	page = virt_to_page(ia64_imva(__start_gate_section));
+	gate_section = paravirt_get_gate_section();
+	page = virt_to_page(ia64_imva(gate_section));
 	put_kernel_page(page, GATE_ADDR, PAGE_READONLY);
 #ifdef HAVE_BUGGY_SEGREL
-	page = virt_to_page(ia64_imva(__start_gate_section + PAGE_SIZE));
+	page = virt_to_page(ia64_imva(gate_section + PAGE_SIZE));
 	put_kernel_page(page, GATE_ADDR + PAGE_SIZE, PAGE_GATE);
 #else
 	put_kernel_page(page, GATE_ADDR + PERCPU_PAGE_SIZE, PAGE_GATE);
@@ -335,11 +278,10 @@ setup_gate (void)
 	ia64_patch_gate();
 }
 
-void __devinit
-ia64_mmu_init (void *my_cpu_data)
+void ia64_mmu_init(void *my_cpu_data)
 {
-	unsigned long psr, pta, impl_va_bits;
-	extern void __devinit tlb_init (void);
+	unsigned long pta, impl_va_bits;
+	extern void tlb_init(void);
 
 #ifdef CONFIG_DISABLE_VHPT
 #	define VHPT_ENABLE_BIT	0
@@ -347,15 +289,6 @@ ia64_mmu_init (void *my_cpu_data)
 #	define VHPT_ENABLE_BIT	1
 #endif
 
-	/* Pin mapping for percpu area into TLB */
-	psr = ia64_clear_ic();
-	ia64_itr(0x2, IA64_TR_PERCPU_DATA, PERCPU_ADDR,
-		 pte_val(pfn_pte(__pa(my_cpu_data) >> PAGE_SHIFT, PAGE_KERNEL)),
-		 PERCPU_PAGE_SHIFT);
-
-	ia64_set_psr(psr);
-	ia64_srlz_i();
-
 	/*
 	 * Check if the virtually mapped linear page table (VMLPT) overlaps with a mapped
 	 * address space.  The IA-64 architecture guarantees that at least 50 bits of
@@ -416,9 +349,61 @@ ia64_mmu_init (void *my_cpu_data)
 }
 
 #ifdef CONFIG_VIRTUAL_MEM_MAP
+int vmemmap_find_next_valid_pfn(int node, int i)
+{
+	unsigned long end_address, hole_next_pfn;
+	unsigned long stop_address;
+	pg_data_t *pgdat = NODE_DATA(node);
+
+	end_address = (unsigned long) &vmem_map[pgdat->node_start_pfn + i];
+	end_address = PAGE_ALIGN(end_address);
+	stop_address = (unsigned long) &vmem_map[pgdat_end_pfn(pgdat)];
+
+	do {
+		pgd_t *pgd;
+		pud_t *pud;
+		pmd_t *pmd;
+		pte_t *pte;
+
+		pgd = pgd_offset_k(end_address);
+		if (pgd_none(*pgd)) {
+			end_address += PGDIR_SIZE;
+			continue;
+		}
+
+		pud = pud_offset(pgd, end_address);
+		if (pud_none(*pud)) {
+			end_address += PUD_SIZE;
+			continue;
+		}
 
-int __init
-create_mem_map_page_table (u64 start, u64 end, void *arg)
+		pmd = pmd_offset(pud, end_address);
+		if (pmd_none(*pmd)) {
+			end_address += PMD_SIZE;
+			continue;
+		}
+
+		pte = pte_offset_kernel(pmd, end_address);
+retry_pte:
+		if (pte_none(*pte)) {
+			end_address += PAGE_SIZE;
+			pte++;
+			if ((end_address < stop_address) &&
+			    (end_address != ALIGN(end_address, 1UL << PMD_SHIFT)))
+				goto retry_pte;
+			continue;
+		}
+		/* Found next valid vmem_map page */
+		break;
+	} while (end_address < stop_address);
+
+	end_address = min(end_address, stop_address);
+	end_address = end_address - (unsigned long) vmem_map + sizeof(struct page) - 1;
+	hole_next_pfn = end_address / sizeof(struct page);
+	return hole_next_pfn - pgdat->node_start_pfn;
+}
+
+int __init create_mem_map_page_table(u64 start, u64 end, void *arg)
 {
 	unsigned long address, start_page, end_page;
 	struct page *map_start, *map_end;
@@ -463,8 +448,8 @@ struct memmap_init_callback_data {
 	unsigned long zone;
 };
 
-static int
-virtual_memmap_init (u64 start, u64 end, void *arg)
+static int __meminit
+virtual_memmap_init(u64 start, u64 end, void *arg)
 {
 	struct memmap_init_callback_data *args;
 	struct page *map_start, *map_end;
@@ -489,16 +474,17 @@ virtual_memmap_init (u64 start, u64 end, void *arg)
 
 	if (map_start < map_end)
 		memmap_init_zone((unsigned long)(map_end - map_start),
-				 args->nid, args->zone, page_to_pfn(map_start));
+				 args->nid, args->zone, page_to_pfn(map_start),
+				 MEMMAP_EARLY);
 	return 0;
 }
 
-void
+void __meminit
 memmap_init (unsigned long size, int nid, unsigned long zone,
 	     unsigned long start_pfn)
 {
 	if (!vmem_map)
-		memmap_init_zone(size, nid, zone, start_pfn);
+		memmap_init_zone(size, nid, zone, start_pfn, MEMMAP_EARLY);
 	else {
 		struct page *start;
 		struct memmap_init_callback_data args;
@@ -525,8 +511,7 @@ ia64_pfn_valid (unsigned long pfn)
 }
 EXPORT_SYMBOL(ia64_pfn_valid);
 
-int __init
-find_largest_hole (u64 start, u64 end, void *arg)
+int __init find_largest_hole(u64 start, u64 end, void *arg)
 {
 	u64 *max_gap = arg;
 
@@ -539,18 +524,38 @@ find_largest_hole (u64 start, u64 end, void *arg)
 	last_end = end;
 	return 0;
 }
+
 #endif /* CONFIG_VIRTUAL_MEM_MAP */
 
-static int __init
-count_reserved_pages (u64 start, u64 end, void *arg)
+int __init register_active_ranges(u64 start, u64 len, int nid)
 {
-	unsigned long num_reserved = 0;
-	unsigned long *count = arg;
+	u64 end = start + len;
 
-	for (; start < end; start += PAGE_SIZE)
-		if (PageReserved(virt_to_page(start)))
-			++num_reserved;
-	*count += num_reserved;
+#ifdef CONFIG_KEXEC
+	if (start > crashk_res.start && start < crashk_res.end)
+		start = crashk_res.end;
+	if (end > crashk_res.start && end < crashk_res.end)
+		end = crashk_res.start;
+#endif
+
+	if (start < end)
+		memblock_add_node(__pa(start), end - start, nid);
+	return 0;
+}
+
+int
+find_max_min_low_pfn (u64 start, u64 end, void *arg)
+{
+	unsigned long pfn_start, pfn_end;
+#ifdef CONFIG_FLATMEM
+	pfn_start = (PAGE_ALIGN(__pa(start))) >> PAGE_SHIFT;
+	pfn_end = (PAGE_ALIGN(__pa(end - 1))) >> PAGE_SHIFT;
+#else
+	pfn_start = GRANULEROUNDDOWN(__pa(start)) >> PAGE_SHIFT;
+	pfn_end = GRANULEROUNDUP(__pa(end - 1)) >> PAGE_SHIFT;
+#endif
+	min_low_pfn = min(min_low_pfn, pfn_start);
+	max_low_pfn = max(max_low_pfn, pfn_end);
 	return 0;
 }
 
@@ -576,10 +581,7 @@ __setup("nolwsys", nolwsys_setup);
 void __init
 mem_init (void)
 {
-	long reserved_pages, codesize, datasize, initsize;
-	pg_data_t *pgdat;
 	int i;
-	static struct kcore_list kcore_mem, kcore_vmem, kcore_kernel;
 
 	BUG_ON(PTRS_PER_PGD * sizeof(pgd_t) != PAGE_SIZE);
 	BUG_ON(PTRS_PER_PMD * sizeof(pmd_t) != PAGE_SIZE);
@@ -595,33 +597,13 @@ mem_init (void)
 #endif
 
 #ifdef CONFIG_FLATMEM
-	if (!mem_map)
-		BUG();
-	max_mapnr = max_low_pfn;
+	BUG_ON(!mem_map);
 #endif
 
+	set_max_mapnr(max_low_pfn);
 	high_memory = __va(max_low_pfn * PAGE_SIZE);
-
-	kclist_add(&kcore_mem, __va(0), max_low_pfn * PAGE_SIZE);
-	kclist_add(&kcore_vmem, (void *)VMALLOC_START, VMALLOC_END-VMALLOC_START);
-	kclist_add(&kcore_kernel, _stext, _end - _stext);
-
-	for_each_online_pgdat(pgdat)
-		if (pgdat->bdata->node_bootmem_map)
-			totalram_pages += free_all_bootmem_node(pgdat);
-
-	reserved_pages = 0;
-	efi_memmap_walk(count_reserved_pages, &reserved_pages);
-
-	codesize =  (unsigned long) _etext - (unsigned long) _stext;
-	datasize =  (unsigned long) _edata - (unsigned long) _etext;
-	initsize =  (unsigned long) __init_end - (unsigned long) __init_begin;
-
-	printk(KERN_INFO "Memory: %luk/%luk available (%luk code, %luk reserved, "
-	       "%luk data, %luk init)\n", (unsigned long) nr_free_pages() << (PAGE_SHIFT - 10),
-	       num_physpages << (PAGE_SHIFT - 10), codesize >> 10,
-	       reserved_pages << (PAGE_SHIFT - 10), datasize >> 10, initsize >> 10);
-
+	free_all_bootmem();
+	mem_init_print_info(NULL);
 
 	/*
 	 * For fsyscall entrpoints with no light-weight handler, use the ordinary
@@ -629,30 +611,17 @@ mem_init (void)
 	 * code can tell them apart.
 	 */
 	for (i = 0; i < NR_syscalls; ++i) {
-		extern unsigned long fsyscall_table[NR_syscalls];
 		extern unsigned long sys_call_table[NR_syscalls];
+		unsigned long *fsyscall_table = paravirt_get_fsyscall_table();
 
 		if (!fsyscall_table[i] || nolwsys)
 			fsyscall_table[i] = sys_call_table[i] | 1;
 	}
 	setup_gate();
-
-#ifdef CONFIG_IA32_SUPPORT
-	ia32_mem_init();
-#endif
 }
 
 #ifdef CONFIG_MEMORY_HOTPLUG
-void online_page(struct page *page)
-{
-	ClearPageReserved(page);
-	init_page_count(page);
-	__free_page(page);
-	totalram_pages++;
-	num_physpages++;
-}
-
-int add_memory(u64 start, u64 size)
+int arch_add_memory(int nid, u64 start, u64 size)
 {
 	pg_data_t *pgdat;
 	struct zone *zone;
@@ -660,20 +629,106 @@ int add_memory(u64 start, u64 size)
 	unsigned long nr_pages = size >> PAGE_SHIFT;
 	int ret;
 
-	pgdat = NODE_DATA(0);
+	pgdat = NODE_DATA(nid);
 
 	zone = pgdat->node_zones + ZONE_NORMAL;
-	ret = __add_pages(zone, start_pfn, nr_pages);
+	ret = __add_pages(nid, zone, start_pfn, nr_pages);
 
 	if (ret)
 		printk("%s: Problem encountered in __add_pages() as ret=%d\n",
-		       __FUNCTION__,  ret);
+		       __func__,  ret);
 
 	return ret;
 }
 
-int remove_memory(u64 start, u64 size)
+#ifdef CONFIG_MEMORY_HOTREMOVE
+int arch_remove_memory(u64 start, u64 size)
 {
-	return -EINVAL;
+	unsigned long start_pfn = start >> PAGE_SHIFT;
+	unsigned long nr_pages = size >> PAGE_SHIFT;
+	struct zone *zone;
+	int ret;
+
+	zone = page_zone(pfn_to_page(start_pfn));
+	ret = __remove_pages(zone, start_pfn, nr_pages);
+	if (ret)
+		pr_warn("%s: Problem encountered in __remove_pages() as"
+			" ret=%d\n", __func__,  ret);
+
+	return ret;
 }
 #endif
+#endif
+
+/*
+ * Even when CONFIG_IA32_SUPPORT is not enabled it is
+ * useful to have the Linux/x86 domain registered to
+ * avoid an attempted module load when emulators call
+ * personality(PER_LINUX32). This saves several milliseconds
+ * on each such call.
+ */
+static struct exec_domain ia32_exec_domain;
+
+static int __init
+per_linux32_init(void)
+{
+	ia32_exec_domain.name = "Linux/x86";
+	ia32_exec_domain.handler = NULL;
+	ia32_exec_domain.pers_low = PER_LINUX32;
+	ia32_exec_domain.pers_high = PER_LINUX32;
+	ia32_exec_domain.signal_map = default_exec_domain.signal_map;
+	ia32_exec_domain.signal_invmap = default_exec_domain.signal_invmap;
+	register_exec_domain(&ia32_exec_domain);
+
+	return 0;
+}
+
+__initcall(per_linux32_init);
+
+/**
+ * show_mem - give short summary of memory stats
+ *
+ * Shows a simple page count of reserved and used pages in the system.
+ * For discontig machines, it does this on a per-pgdat basis.
+ */
+void show_mem(unsigned int filter)
+{
+	int total_reserved = 0;
+	unsigned long total_present = 0;
+	pg_data_t *pgdat;
+
+	printk(KERN_INFO "Mem-info:\n");
+	show_free_areas(filter);
+	printk(KERN_INFO "Node memory in pages:\n");
+	for_each_online_pgdat(pgdat) {
+		unsigned long present;
+		unsigned long flags;
+		int reserved = 0;
+		int nid = pgdat->node_id;
+		int zoneid;
+
+		if (skip_free_areas_node(filter, nid))
+			continue;
+		pgdat_resize_lock(pgdat, &flags);
+
+		for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) {
+			struct zone *zone = &pgdat->node_zones[zoneid];
+			if (!populated_zone(zone))
+				continue;
+
+			reserved += zone->present_pages - zone->managed_pages;
+		}
+		present = pgdat->node_present_pages;
+
+		pgdat_resize_unlock(pgdat, &flags);
+		total_present += present;
+		total_reserved += reserved;
+		printk(KERN_INFO "Node %4d:  RAM: %11ld, rsvd: %8d, ",
+		       nid, present, reserved);
+	}
+	printk(KERN_INFO "%ld pages of RAM\n", total_present);
+	printk(KERN_INFO "%d reserved pages\n", total_reserved);
+	printk(KERN_INFO "Total of %ld pages in page table cache\n",
+	       quicklist_total_size());
+	printk(KERN_INFO "%ld free buffer pages\n", nr_free_buffer_pages());
+}
diff --git a/arch/ia64/mm/ioremap.c b/arch/ia64/mm/ioremap.c
index 643ccc6960c..43964cde621 100644
--- a/arch/ia64/mm/ioremap.c
+++ b/arch/ia64/mm/ioremap.c
@@ -1,5 +1,5 @@
 /*
- * (c) Copyright 2006 Hewlett-Packard Development Company, L.P.
+ * (c) Copyright 2006, 2007 Hewlett-Packard Development Company, L.P.
  *	Bjorn Helgaas <bjorn.helgaas@hp.com>
  *
  * This program is free software; you can redistribute it and/or modify
@@ -10,34 +10,116 @@
 #include <linux/compiler.h>
 #include <linux/module.h>
 #include <linux/efi.h>
+#include <linux/io.h>
+#include <linux/vmalloc.h>
 #include <asm/io.h>
+#include <asm/meminit.h>
 
 static inline void __iomem *
-__ioremap (unsigned long offset, unsigned long size)
+__ioremap_uc(unsigned long phys_addr)
 {
-	return (void __iomem *) (__IA64_UNCACHED_OFFSET | offset);
+	return (void __iomem *) (__IA64_UNCACHED_OFFSET | phys_addr);
 }
 
 void __iomem *
-ioremap (unsigned long offset, unsigned long size)
+early_ioremap (unsigned long phys_addr, unsigned long size)
 {
-	if (efi_mem_attribute_range(offset, size, EFI_MEMORY_WB))
-		return phys_to_virt(offset);
+	u64 attr;
+	attr = kern_mem_attribute(phys_addr, size);
+	if (attr & EFI_MEMORY_WB)
+		return (void __iomem *) phys_to_virt(phys_addr);
+	return __ioremap_uc(phys_addr);
+}
+
+void __iomem *
+ioremap (unsigned long phys_addr, unsigned long size)
+{
+	void __iomem *addr;
+	struct vm_struct *area;
+	unsigned long offset;
+	pgprot_t prot;
+	u64 attr;
+	unsigned long gran_base, gran_size;
+	unsigned long page_base;
 
-	if (efi_mem_attribute_range(offset, size, EFI_MEMORY_UC))
-		return __ioremap(offset, size);
+	/*
+	 * For things in kern_memmap, we must use the same attribute
+	 * as the rest of the kernel.  For more details, see
+	 * Documentation/ia64/aliasing.txt.
+	 */
+	attr = kern_mem_attribute(phys_addr, size);
+	if (attr & EFI_MEMORY_WB)
+		return (void __iomem *) phys_to_virt(phys_addr);
+	else if (attr & EFI_MEMORY_UC)
+		return __ioremap_uc(phys_addr);
 
 	/*
-	 * Someday this should check ACPI resources so we
-	 * can do the right thing for hot-plugged regions.
+	 * Some chipsets don't support UC access to memory.  If
+	 * WB is supported for the whole granule, we prefer that.
 	 */
-	return __ioremap(offset, size);
+	gran_base = GRANULEROUNDDOWN(phys_addr);
+	gran_size = GRANULEROUNDUP(phys_addr + size) - gran_base;
+	if (efi_mem_attribute(gran_base, gran_size) & EFI_MEMORY_WB)
+		return (void __iomem *) phys_to_virt(phys_addr);
+
+	/*
+	 * WB is not supported for the whole granule, so we can't use
+	 * the region 7 identity mapping.  If we can safely cover the
+	 * area with kernel page table mappings, we can use those
+	 * instead.
+	 */
+	page_base = phys_addr & PAGE_MASK;
+	size = PAGE_ALIGN(phys_addr + size) - page_base;
+	if (efi_mem_attribute(page_base, size) & EFI_MEMORY_WB) {
+		prot = PAGE_KERNEL;
+
+		/*
+		 * Mappings have to be page-aligned
+		 */
+		offset = phys_addr & ~PAGE_MASK;
+		phys_addr &= PAGE_MASK;
+
+		/*
+		 * Ok, go for it..
+		 */
+		area = get_vm_area(size, VM_IOREMAP);
+		if (!area)
+			return NULL;
+
+		area->phys_addr = phys_addr;
+		addr = (void __iomem *) area->addr;
+		if (ioremap_page_range((unsigned long) addr,
+				(unsigned long) addr + size, phys_addr, prot)) {
+			vunmap((void __force *) addr);
+			return NULL;
+		}
+
+		return (void __iomem *) (offset + (char __iomem *)addr);
+	}
+
+	return __ioremap_uc(phys_addr);
 }
 EXPORT_SYMBOL(ioremap);
 
 void __iomem *
-ioremap_nocache (unsigned long offset, unsigned long size)
+ioremap_nocache (unsigned long phys_addr, unsigned long size)
 {
-	return __ioremap(offset, size);
+	if (kern_mem_attribute(phys_addr, size) & EFI_MEMORY_WB)
+		return NULL;
+
+	return __ioremap_uc(phys_addr);
 }
 EXPORT_SYMBOL(ioremap_nocache);
+
+void
+early_iounmap (volatile void __iomem *addr, unsigned long size)
+{
+}
+
+void
+iounmap (volatile void __iomem *addr)
+{
+	if (REGION_NUMBER(addr) == RGN_GATE)
+		vunmap((void *) ((unsigned long) addr & PAGE_MASK));
+}
+EXPORT_SYMBOL(iounmap);
diff --git a/arch/ia64/mm/numa.c b/arch/ia64/mm/numa.c
index 4e5c8b36ad9..ea21d4cad54 100644
--- a/arch/ia64/mm/numa.c
+++ b/arch/ia64/mm/numa.c
@@ -10,13 +10,13 @@
  *                         2002/08/07 Erich Focht <efocht@ess.nec.de>
  */
 
-#include <linux/config.h>
 #include <linux/cpu.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/node.h>
 #include <linux/init.h>
 #include <linux/bootmem.h>
+#include <linux/module.h>
 #include <asm/mmzone.h>
 #include <asm/numa.h>
 
@@ -27,7 +27,9 @@
  */
 int num_node_memblks;
 struct node_memblk_s node_memblk[NR_NODE_MEMBLKS];
-struct node_cpuid_s node_cpuid[NR_CPUS];
+struct node_cpuid_s node_cpuid[NR_CPUS] =
+	{ [0 ... NR_CPUS-1] = { .phys_id = 0, .nid = NUMA_NO_NODE } };
+
 /*
  * This is a matrix with "distances" between nodes, they should be
  * proportional to the memory access latency ratios.
@@ -56,18 +58,53 @@ paddr_to_nid(unsigned long paddr)
  * SPARSEMEM to allocate the SPARSEMEM sectionmap on the NUMA node where
  * the section resides.
  */
-int early_pfn_to_nid(unsigned long pfn)
+int __meminit __early_pfn_to_nid(unsigned long pfn)
 {
 	int i, section = pfn >> PFN_SECTION_SHIFT, ssec, esec;
+	/*
+	 * NOTE: The following SMP-unsafe globals are only used early in boot
+	 * when the kernel is running single-threaded.
+	 */
+	static int __meminitdata last_ssec, last_esec;
+	static int __meminitdata last_nid;
+
+	if (section >= last_ssec && section < last_esec)
+		return last_nid;
 
 	for (i = 0; i < num_node_memblks; i++) {
 		ssec = node_memblk[i].start_paddr >> PA_SECTION_SHIFT;
 		esec = (node_memblk[i].start_paddr + node_memblk[i].size +
 			((1L << PA_SECTION_SHIFT) - 1)) >> PA_SECTION_SHIFT;
-		if (section >= ssec && section < esec)
+		if (section >= ssec && section < esec) {
+			last_ssec = ssec;
+			last_esec = esec;
+			last_nid = node_memblk[i].nid;
 			return node_memblk[i].nid;
+		}
 	}
 
-	return 0;
+	return -1;
 }
+
+void numa_clear_node(int cpu)
+{
+	unmap_cpu_from_node(cpu, NUMA_NO_NODE);
+}
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+/*
+ *  SRAT information is stored in node_memblk[], then we can use SRAT
+ *  information at memory-hot-add if necessary.
+ */
+
+int memory_add_physaddr_to_nid(u64 addr)
+{
+	int nid = paddr_to_nid(addr);
+	if (nid < 0)
+		return 0;
+	return nid;
+}
+
+EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
+#endif
 #endif
diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c
index 4dbbca0b5e9..ed612976868 100644
--- a/arch/ia64/mm/tlb.c
+++ b/arch/ia64/mm/tlb.c
@@ -10,8 +10,11 @@
  *              IPI based ptc implementation and A-step IPI implementation.
  * Rohit Seth <rohit.seth@intel.com>
  * Ken Chen <kenneth.w.chen@intel.com>
+ * Christophe de Dinechin <ddd@hp.com>: Avoid ptc.e on memory allocation
+ * Copyright (C) 2007 Intel Corp
+ *	Fenghua Yu <fenghua.yu@intel.com>
+ *	Add multiple ptc.g/ptc.ga instruction support in global tlb purge.
  */
-#include <linux/config.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
@@ -19,6 +22,7 @@
 #include <linux/smp.h>
 #include <linux/mm.h>
 #include <linux/bootmem.h>
+#include <linux/slab.h>
 
 #include <asm/delay.h>
 #include <asm/mmu_context.h>
@@ -26,19 +30,26 @@
 #include <asm/pal.h>
 #include <asm/tlbflush.h>
 #include <asm/dma.h>
+#include <asm/processor.h>
+#include <asm/sal.h>
+#include <asm/tlb.h>
 
 static struct {
-	unsigned long mask;	/* mask of supported purge page-sizes */
+	u64 mask;		/* mask of supported purge page-sizes */
 	unsigned long max_bits;	/* log2 of largest supported purge page-size */
 } purge;
 
 struct ia64_ctx ia64_ctx = {
-	.lock =		SPIN_LOCK_UNLOCKED,
-	.next =		1,
-	.max_ctx =	~0U
+	.lock =	__SPIN_LOCK_UNLOCKED(ia64_ctx.lock),
+	.next =	1,
+	.max_ctx = ~0U
 };
 
 DEFINE_PER_CPU(u8, ia64_need_tlb_flush);
+DEFINE_PER_CPU(u8, ia64_tr_num);  /*Number of TR slots in current processor*/
+DEFINE_PER_CPU(u8, ia64_tr_used); /*Max Slot number used by kernel*/
+
+struct ia64_tr_entry *ia64_idtrs[NR_CPUS];
 
 /*
  * Initializes the ia64_ctx.bitmap array based on max_ctx+1.
@@ -84,30 +95,183 @@ wrap_mmu_context (struct mm_struct *mm)
 	local_flush_tlb_all();
 }
 
+/*
+ * Implement "spinaphores" ... like counting semaphores, but they
+ * spin instead of sleeping.  If there are ever any other users for
+ * this primitive it can be moved up to a spinaphore.h header.
+ */
+struct spinaphore {
+	unsigned long	ticket;
+	unsigned long	serve;
+};
+
+static inline void spinaphore_init(struct spinaphore *ss, int val)
+{
+	ss->ticket = 0;
+	ss->serve = val;
+}
+
+static inline void down_spin(struct spinaphore *ss)
+{
+	unsigned long t = ia64_fetchadd(1, &ss->ticket, acq), serve;
+
+	if (time_before(t, ss->serve))
+		return;
+
+	ia64_invala();
+
+	for (;;) {
+		asm volatile ("ld8.c.nc %0=[%1]" : "=r"(serve) : "r"(&ss->serve) : "memory");
+		if (time_before(t, serve))
+			return;
+		cpu_relax();
+	}
+}
+
+static inline void up_spin(struct spinaphore *ss)
+{
+	ia64_fetchadd(1, &ss->serve, rel);
+}
+
+static struct spinaphore ptcg_sem;
+static u16 nptcg = 1;
+static int need_ptcg_sem = 1;
+static int toolatetochangeptcgsem = 0;
+
+/*
+ * Kernel parameter "nptcg=" overrides max number of concurrent global TLB
+ * purges which is reported from either PAL or SAL PALO.
+ *
+ * We don't have sanity checking for nptcg value. It's the user's responsibility
+ * for valid nptcg value on the platform. Otherwise, kernel may hang in some
+ * cases.
+ */
+static int __init
+set_nptcg(char *str)
+{
+	int value = 0;
+
+	get_option(&str, &value);
+	setup_ptcg_sem(value, NPTCG_FROM_KERNEL_PARAMETER);
+
+	return 1;
+}
+
+__setup("nptcg=", set_nptcg);
+
+/*
+ * Maximum number of simultaneous ptc.g purges in the system can
+ * be defined by PAL_VM_SUMMARY (in which case we should take
+ * the smallest value for any cpu in the system) or by the PAL
+ * override table (in which case we should ignore the value from
+ * PAL_VM_SUMMARY).
+ *
+ * Kernel parameter "nptcg=" overrides maximum number of simultanesous ptc.g
+ * purges defined in either PAL_VM_SUMMARY or PAL override table. In this case,
+ * we should ignore the value from either PAL_VM_SUMMARY or PAL override table.
+ *
+ * Complicating the logic here is the fact that num_possible_cpus()
+ * isn't fully setup until we start bringing cpus online.
+ */
 void
-ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start,
-		       unsigned long end, unsigned long nbits)
+setup_ptcg_sem(int max_purges, int nptcg_from)
 {
-	static DEFINE_SPINLOCK(ptcg_lock);
+	static int kp_override;
+	static int palo_override;
+	static int firstcpu = 1;
 
-	if (mm != current->active_mm || !current->mm) {
-		flush_tlb_all();
+	if (toolatetochangeptcgsem) {
+		if (nptcg_from == NPTCG_FROM_PAL && max_purges == 0)
+			BUG_ON(1 < nptcg);
+		else
+			BUG_ON(max_purges < nptcg);
 		return;
 	}
 
-	/* HW requires global serialization of ptc.ga.  */
-	spin_lock(&ptcg_lock);
-	{
-		do {
-			/*
-			 * Flush ALAT entries also.
-			 */
-			ia64_ptcga(start, (nbits<<2));
-			ia64_srlz_i();
-			start += (1UL << nbits);
-		} while (start < end);
+	if (nptcg_from == NPTCG_FROM_KERNEL_PARAMETER) {
+		kp_override = 1;
+		nptcg = max_purges;
+		goto resetsema;
+	}
+	if (kp_override) {
+		need_ptcg_sem = num_possible_cpus() > nptcg;
+		return;
+	}
+
+	if (nptcg_from == NPTCG_FROM_PALO) {
+		palo_override = 1;
+
+		/* In PALO max_purges == 0 really means it! */
+		if (max_purges == 0)
+			panic("Whoa! Platform does not support global TLB purges.\n");
+		nptcg = max_purges;
+		if (nptcg == PALO_MAX_TLB_PURGES) {
+			need_ptcg_sem = 0;
+			return;
+		}
+		goto resetsema;
+	}
+	if (palo_override) {
+		if (nptcg != PALO_MAX_TLB_PURGES)
+			need_ptcg_sem = (num_possible_cpus() > nptcg);
+		return;
+	}
+
+	/* In PAL_VM_SUMMARY max_purges == 0 actually means 1 */
+	if (max_purges == 0) max_purges = 1;
+
+	if (firstcpu) {
+		nptcg = max_purges;
+		firstcpu = 0;
 	}
-	spin_unlock(&ptcg_lock);
+	if (max_purges < nptcg)
+		nptcg = max_purges;
+	if (nptcg == PAL_MAX_PURGES) {
+		need_ptcg_sem = 0;
+		return;
+	} else
+		need_ptcg_sem = (num_possible_cpus() > nptcg);
+
+resetsema:
+	spinaphore_init(&ptcg_sem, max_purges);
+}
+
+void
+ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start,
+		       unsigned long end, unsigned long nbits)
+{
+	struct mm_struct *active_mm = current->active_mm;
+
+	toolatetochangeptcgsem = 1;
+
+	if (mm != active_mm) {
+		/* Restore region IDs for mm */
+		if (mm && active_mm) {
+			activate_context(mm);
+		} else {
+			flush_tlb_all();
+			return;
+		}
+	}
+
+	if (need_ptcg_sem)
+		down_spin(&ptcg_sem);
+
+	do {
+		/*
+		 * Flush ALAT entries also.
+		 */
+		ia64_ptcga(start, (nbits << 2));
+		ia64_srlz_i();
+		start += (1UL << nbits);
+	} while (start < end);
+
+	if (need_ptcg_sem)
+		up_spin(&ptcg_sem);
+
+        if (mm != active_mm) {
+                activate_context(active_mm);
+        }
 }
 
 void
@@ -158,7 +322,7 @@ flush_tlb_range (struct vm_area_struct *vma, unsigned long start,
 
 	preempt_disable();
 #ifdef CONFIG_SMP
-	if (mm != current->active_mm || cpus_weight(mm->cpu_vm_mask) != 1) {
+	if (mm != current->active_mm || cpumask_weight(mm_cpumask(mm)) != 1) {
 		platform_global_tlb_purge(mm, start, end, nbits);
 		preempt_enable();
 		return;
@@ -173,15 +337,17 @@ flush_tlb_range (struct vm_area_struct *vma, unsigned long start,
 }
 EXPORT_SYMBOL(flush_tlb_range);
 
-void __devinit
-ia64_tlb_init (void)
+void ia64_tlb_init(void)
 {
-	ia64_ptce_info_t ptce_info;
-	unsigned long tr_pgbits;
+	ia64_ptce_info_t uninitialized_var(ptce_info); /* GCC be quiet */
+	u64 tr_pgbits;
 	long status;
+	pal_vm_info_1_u_t vm_info_1;
+	pal_vm_info_2_u_t vm_info_2;
+	int cpu = smp_processor_id();
 
 	if ((status = ia64_pal_vm_page_size(&tr_pgbits, &purge.mask)) != 0) {
-		printk(KERN_ERR "PAL_VM_PAGE_SIZE failed with status=%ld;"
+		printk(KERN_ERR "PAL_VM_PAGE_SIZE failed with status=%ld; "
 		       "defaulting to architected purge page-sizes.\n", status);
 		purge.mask = 0x115557000UL;
 	}
@@ -195,4 +361,201 @@ ia64_tlb_init (void)
 	local_cpu_data->ptce_stride[1] = ptce_info.stride[1];
 
 	local_flush_tlb_all();	/* nuke left overs from bootstrapping... */
+	status = ia64_pal_vm_summary(&vm_info_1, &vm_info_2);
+
+	if (status) {
+		printk(KERN_ERR "ia64_pal_vm_summary=%ld\n", status);
+		per_cpu(ia64_tr_num, cpu) = 8;
+		return;
+	}
+	per_cpu(ia64_tr_num, cpu) = vm_info_1.pal_vm_info_1_s.max_itr_entry+1;
+	if (per_cpu(ia64_tr_num, cpu) >
+				(vm_info_1.pal_vm_info_1_s.max_dtr_entry+1))
+		per_cpu(ia64_tr_num, cpu) =
+				vm_info_1.pal_vm_info_1_s.max_dtr_entry+1;
+	if (per_cpu(ia64_tr_num, cpu) > IA64_TR_ALLOC_MAX) {
+		static int justonce = 1;
+		per_cpu(ia64_tr_num, cpu) = IA64_TR_ALLOC_MAX;
+		if (justonce) {
+			justonce = 0;
+			printk(KERN_DEBUG "TR register number exceeds "
+			       "IA64_TR_ALLOC_MAX!\n");
+		}
+	}
+}
+
+/*
+ * is_tr_overlap
+ *
+ * Check overlap with inserted TRs.
+ */
+static int is_tr_overlap(struct ia64_tr_entry *p, u64 va, u64 log_size)
+{
+	u64 tr_log_size;
+	u64 tr_end;
+	u64 va_rr = ia64_get_rr(va);
+	u64 va_rid = RR_TO_RID(va_rr);
+	u64 va_end = va + (1<<log_size) - 1;
+
+	if (va_rid != RR_TO_RID(p->rr))
+		return 0;
+	tr_log_size = (p->itir & 0xff) >> 2;
+	tr_end = p->ifa + (1<<tr_log_size) - 1;
+
+	if (va > tr_end || p->ifa > va_end)
+		return 0;
+	return 1;
+
+}
+
+/*
+ * ia64_insert_tr in virtual mode. Allocate a TR slot
+ *
+ * target_mask : 0x1 : itr, 0x2 : dtr, 0x3 : idtr
+ *
+ * va 	: virtual address.
+ * pte 	: pte entries inserted.
+ * log_size: range to be covered.
+ *
+ * Return value:  <0 :  error No.
+ *
+ *		  >=0 : slot number allocated for TR.
+ * Must be called with preemption disabled.
+ */
+int ia64_itr_entry(u64 target_mask, u64 va, u64 pte, u64 log_size)
+{
+	int i, r;
+	unsigned long psr;
+	struct ia64_tr_entry *p;
+	int cpu = smp_processor_id();
+
+	if (!ia64_idtrs[cpu]) {
+		ia64_idtrs[cpu] = kmalloc(2 * IA64_TR_ALLOC_MAX *
+				sizeof (struct ia64_tr_entry), GFP_KERNEL);
+		if (!ia64_idtrs[cpu])
+			return -ENOMEM;
+	}
+	r = -EINVAL;
+	/*Check overlap with existing TR entries*/
+	if (target_mask & 0x1) {
+		p = ia64_idtrs[cpu];
+		for (i = IA64_TR_ALLOC_BASE; i <= per_cpu(ia64_tr_used, cpu);
+								i++, p++) {
+			if (p->pte & 0x1)
+				if (is_tr_overlap(p, va, log_size)) {
+					printk(KERN_DEBUG "Overlapped Entry"
+						"Inserted for TR Reigster!!\n");
+					goto out;
+			}
+		}
+	}
+	if (target_mask & 0x2) {
+		p = ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX;
+		for (i = IA64_TR_ALLOC_BASE; i <= per_cpu(ia64_tr_used, cpu);
+								i++, p++) {
+			if (p->pte & 0x1)
+				if (is_tr_overlap(p, va, log_size)) {
+					printk(KERN_DEBUG "Overlapped Entry"
+						"Inserted for TR Reigster!!\n");
+					goto out;
+				}
+		}
+	}
+
+	for (i = IA64_TR_ALLOC_BASE; i < per_cpu(ia64_tr_num, cpu); i++) {
+		switch (target_mask & 0x3) {
+		case 1:
+			if (!((ia64_idtrs[cpu] + i)->pte & 0x1))
+				goto found;
+			continue;
+		case 2:
+			if (!((ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + i)->pte & 0x1))
+				goto found;
+			continue;
+		case 3:
+			if (!((ia64_idtrs[cpu] + i)->pte & 0x1) &&
+			    !((ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + i)->pte & 0x1))
+				goto found;
+			continue;
+		default:
+			r = -EINVAL;
+			goto out;
+		}
+	}
+found:
+	if (i >= per_cpu(ia64_tr_num, cpu))
+		return -EBUSY;
+
+	/*Record tr info for mca hander use!*/
+	if (i > per_cpu(ia64_tr_used, cpu))
+		per_cpu(ia64_tr_used, cpu) = i;
+
+	psr = ia64_clear_ic();
+	if (target_mask & 0x1) {
+		ia64_itr(0x1, i, va, pte, log_size);
+		ia64_srlz_i();
+		p = ia64_idtrs[cpu] + i;
+		p->ifa = va;
+		p->pte = pte;
+		p->itir = log_size << 2;
+		p->rr = ia64_get_rr(va);
+	}
+	if (target_mask & 0x2) {
+		ia64_itr(0x2, i, va, pte, log_size);
+		ia64_srlz_i();
+		p = ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + i;
+		p->ifa = va;
+		p->pte = pte;
+		p->itir = log_size << 2;
+		p->rr = ia64_get_rr(va);
+	}
+	ia64_set_psr(psr);
+	r = i;
+out:
+	return r;
+}
+EXPORT_SYMBOL_GPL(ia64_itr_entry);
+
+/*
+ * ia64_purge_tr
+ *
+ * target_mask: 0x1: purge itr, 0x2 : purge dtr, 0x3 purge idtr.
+ * slot: slot number to be freed.
+ *
+ * Must be called with preemption disabled.
+ */
+void ia64_ptr_entry(u64 target_mask, int slot)
+{
+	int cpu = smp_processor_id();
+	int i;
+	struct ia64_tr_entry *p;
+
+	if (slot < IA64_TR_ALLOC_BASE || slot >= per_cpu(ia64_tr_num, cpu))
+		return;
+
+	if (target_mask & 0x1) {
+		p = ia64_idtrs[cpu] + slot;
+		if ((p->pte&0x1) && is_tr_overlap(p, p->ifa, p->itir>>2)) {
+			p->pte = 0;
+			ia64_ptr(0x1, p->ifa, p->itir>>2);
+			ia64_srlz_i();
+		}
+	}
+
+	if (target_mask & 0x2) {
+		p = ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + slot;
+		if ((p->pte & 0x1) && is_tr_overlap(p, p->ifa, p->itir>>2)) {
+			p->pte = 0;
+			ia64_ptr(0x2, p->ifa, p->itir>>2);
+			ia64_srlz_i();
+		}
+	}
+
+	for (i = per_cpu(ia64_tr_used, cpu); i >= IA64_TR_ALLOC_BASE; i--) {
+		if (((ia64_idtrs[cpu] + i)->pte & 0x1) ||
+		    ((ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + i)->pte & 0x1))
+			break;
+	}
+	per_cpu(ia64_tr_used, cpu) = i;
 }
+EXPORT_SYMBOL_GPL(ia64_ptr_entry);
diff --git a/arch/ia64/oprofile/Kconfig b/arch/ia64/oprofile/Kconfig
deleted file mode 100644
index 97271ab484d..00000000000
--- a/arch/ia64/oprofile/Kconfig
+++ /dev/null
@@ -1,20 +0,0 @@
-config PROFILING
-	bool "Profiling support (EXPERIMENTAL)"
-	help
-	  Say Y here to enable the extended profiling support mechanisms used
-	  by profilers such as OProfile.
-
-config OPROFILE
-	tristate "OProfile system profiling (EXPERIMENTAL)"
-	depends on PROFILING
-	help
-	  OProfile is a profiling system capable of profiling the
-	  whole system, include the kernel, kernel modules, libraries,
-	  and applications.
-
-	  Due to firmware bugs, you may need to use the "nohalt" boot
-	  option if you're using OProfile with the hardware performance
-	  counters.
-
-	  If unsure, say N.
-
diff --git a/arch/ia64/oprofile/backtrace.c b/arch/ia64/oprofile/backtrace.c
index adb01566bd5..6a219a94605 100644
--- a/arch/ia64/oprofile/backtrace.c
+++ b/arch/ia64/oprofile/backtrace.c
@@ -14,7 +14,6 @@
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <asm/ptrace.h>
-#include <asm/system.h>
 
 /*
  * For IA64 we need to perform a complex little dance to get both
@@ -29,27 +28,9 @@ typedef struct
 	unsigned int depth;
 	struct pt_regs *regs;
 	struct unw_frame_info frame;
-	u64 *prev_pfs_loc;	/* state for WAR for old spinlock ool code */
+	unsigned long *prev_pfs_loc;	/* state for WAR for old spinlock ool code */
 } ia64_backtrace_t;
 
-#if (__GNUC__ == 3 && __GNUC_MINOR__ < 3)
-/*
- * Returns non-zero if the PC is in the spinlock contention out-of-line code
- * with non-standard calling sequence (on older compilers).
- */
-static __inline__ int in_old_ool_spinlock_code(unsigned long pc)
-{
-	extern const char ia64_spinlock_contention_pre3_4[] __attribute__ ((weak));
-	extern const char ia64_spinlock_contention_pre3_4_end[] __attribute__ ((weak));
-	unsigned long sc_start = (unsigned long)ia64_spinlock_contention_pre3_4;
-	unsigned long sc_end = (unsigned long)ia64_spinlock_contention_pre3_4_end;
-	return (sc_start && sc_end && pc >= sc_start && pc < sc_end);
-}
-#else
-/* Newer spinlock code does a proper br.call and works fine with the unwinder */
-#define in_old_ool_spinlock_code(pc)	0
-#endif
-
 /* Returns non-zero if the PC is in the Interrupt Vector Table */
 static __inline__ int in_ivt_code(unsigned long pc)
 {
@@ -80,7 +61,7 @@ static __inline__ int next_frame(ia64_backtrace_t *bt)
 	 */
 	if (bt->prev_pfs_loc && bt->regs && bt->frame.pfs_loc == bt->prev_pfs_loc)
 		bt->frame.pfs_loc = &bt->regs->ar_pfs;
-	bt->prev_pfs_loc = (in_old_ool_spinlock_code(bt->frame.ip) ? bt->frame.pfs_loc : NULL);
+	bt->prev_pfs_loc = NULL;
 
 	return unw_unwind(&bt->frame) == 0;
 }
diff --git a/arch/ia64/oprofile/init.c b/arch/ia64/oprofile/init.c
index 125a602a660..31b545c3546 100644
--- a/arch/ia64/oprofile/init.c
+++ b/arch/ia64/oprofile/init.c
@@ -12,11 +12,11 @@
 #include <linux/init.h>
 #include <linux/errno.h>
  
-extern int perfmon_init(struct oprofile_operations * ops);
+extern int perfmon_init(struct oprofile_operations *ops);
 extern void perfmon_exit(void);
 extern void ia64_backtrace(struct pt_regs * const regs, unsigned int depth);
 
-int __init oprofile_arch_init(struct oprofile_operations * ops)
+int __init oprofile_arch_init(struct oprofile_operations *ops)
 {
 	int ret = -ENODEV;
 
diff --git a/arch/ia64/oprofile/perfmon.c b/arch/ia64/oprofile/perfmon.c
index b7975a469fb..192d3e8e1f6 100644
--- a/arch/ia64/oprofile/perfmon.c
+++ b/arch/ia64/oprofile/perfmon.c
@@ -8,7 +8,6 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/config.h>
 #include <linux/oprofile.h>
 #include <linux/sched.h>
 #include <asm/perfmon.h>
@@ -57,7 +56,7 @@ static pfm_buffer_fmt_t oprofile_fmt = {
 };
 
 
-static char * get_cpu_type(void)
+static char *get_cpu_type(void)
 {
 	__u8 family = local_cpu_data->family;
 
@@ -76,7 +75,7 @@ static char * get_cpu_type(void)
 
 static int using_perfmon;
 
-int perfmon_init(struct oprofile_operations * ops)
+int perfmon_init(struct oprofile_operations *ops)
 {
 	int ret = pfm_register_buffer_fmt(&oprofile_fmt);
 	if (ret)
diff --git a/arch/ia64/pci/Makefile b/arch/ia64/pci/Makefile
index e66889e6922..fb14dc520d2 100644
--- a/arch/ia64/pci/Makefile
+++ b/arch/ia64/pci/Makefile
@@ -1,4 +1,4 @@
 #
 # Makefile for the ia64-specific parts of the pci bus
 #
-obj-y		:= pci.o
+obj-y		:= pci.o fixup.o
diff --git a/arch/ia64/pci/fixup.c b/arch/ia64/pci/fixup.c
new file mode 100644
index 00000000000..1fe9aa5068e
--- /dev/null
+++ b/arch/ia64/pci/fixup.c
@@ -0,0 +1,70 @@
+/*
+ * Exceptions for specific devices. Usually work-arounds for fatal design flaws.
+ * Derived from fixup.c of i386 tree.
+ */
+
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/vgaarb.h>
+
+#include <asm/machvec.h>
+
+/*
+ * Fixup to mark boot BIOS video selected by BIOS before it changes
+ *
+ * From information provided by "Jon Smirl" <jonsmirl@gmail.com>
+ *
+ * The standard boot ROM sequence for an x86 machine uses the BIOS
+ * to select an initial video card for boot display. This boot video
+ * card will have it's BIOS copied to C0000 in system RAM.
+ * IORESOURCE_ROM_SHADOW is used to associate the boot video
+ * card with this copy. On laptops this copy has to be used since
+ * the main ROM may be compressed or combined with another image.
+ * See pci_map_rom() for use of this flag. Before marking the device
+ * with IORESOURCE_ROM_SHADOW check if a vga_default_device is already set
+ * by either arch cde or vga-arbitration, if so only apply the fixup to this
+ * already determined primary video card.
+ */
+
+static void pci_fixup_video(struct pci_dev *pdev)
+{
+	struct pci_dev *bridge;
+	struct pci_bus *bus;
+	u16 config;
+
+	if ((strcmp(ia64_platform_name, "dig") != 0)
+	    && (strcmp(ia64_platform_name, "hpzx1")  != 0))
+		return;
+	/* Maybe, this machine supports legacy memory map. */
+
+	/* Is VGA routed to us? */
+	bus = pdev->bus;
+	while (bus) {
+		bridge = bus->self;
+
+		/*
+		 * From information provided by
+		 * "David Miller" <davem@davemloft.net>
+		 * The bridge control register is valid for PCI header
+		 * type BRIDGE, or CARDBUS. Host to PCI controllers use
+		 * PCI header type NORMAL.
+		 */
+		if (bridge && (pci_is_bridge(bridge))) {
+			pci_read_config_word(bridge, PCI_BRIDGE_CONTROL,
+						&config);
+			if (!(config & PCI_BRIDGE_CTL_VGA))
+				return;
+		}
+		bus = bus->parent;
+	}
+	if (!vga_default_device() || pdev == vga_default_device()) {
+		pci_read_config_word(pdev, PCI_COMMAND, &config);
+		if (config & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY)) {
+			pdev->resource[PCI_ROM_RESOURCE].flags |= IORESOURCE_ROM_SHADOW;
+			dev_printk(KERN_DEBUG, &pdev->dev, "Boot video device\n");
+			vga_set_default_device(pdev);
+		}
+	}
+}
+DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_ANY_ID, PCI_ANY_ID,
+				PCI_CLASS_DISPLAY_VGA, 8, pci_fixup_video);
diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c
index ab829a22f8a..291a582777c 100644
--- a/arch/ia64/pci/pci.c
+++ b/arch/ia64/pci/pci.c
@@ -10,21 +10,21 @@
  *
  * Note: Above list of copyright holders is incomplete...
  */
-#include <linux/config.h>
 
 #include <linux/acpi.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/pci.h>
+#include <linux/pci-acpi.h>
 #include <linux/init.h>
 #include <linux/ioport.h>
 #include <linux/slab.h>
-#include <linux/smp_lock.h>
 #include <linux/spinlock.h>
+#include <linux/bootmem.h>
+#include <linux/export.h>
 
 #include <asm/machvec.h>
 #include <asm/page.h>
-#include <asm/system.h>
 #include <asm/io.h>
 #include <asm/sal.h>
 #include <asm/smp.h>
@@ -45,8 +45,7 @@
 #define PCI_SAL_EXT_ADDRESS(seg, bus, devfn, reg)	\
 	(((u64) seg << 28) | (bus << 20) | (devfn << 12) | (reg))
 
-static int
-pci_sal_read (unsigned int seg, unsigned int bus, unsigned int devfn,
+int raw_pci_read(unsigned int seg, unsigned int bus, unsigned int devfn,
 	      int reg, int len, u32 *value)
 {
 	u64 addr, data = 0;
@@ -58,10 +57,13 @@ pci_sal_read (unsigned int seg, unsigned int bus, unsigned int devfn,
 	if ((seg | reg) <= 255) {
 		addr = PCI_SAL_ADDRESS(seg, bus, devfn, reg);
 		mode = 0;
-	} else {
+	} else if (sal_revision >= SAL_VERSION_CODE(3,2)) {
 		addr = PCI_SAL_EXT_ADDRESS(seg, bus, devfn, reg);
 		mode = 1;
+	} else {
+		return -EINVAL;
 	}
+
 	result = ia64_sal_pci_config_read(addr, mode, len, &data);
 	if (result != 0)
 		return -EINVAL;
@@ -70,8 +72,7 @@ pci_sal_read (unsigned int seg, unsigned int bus, unsigned int devfn,
 	return 0;
 }
 
-static int
-pci_sal_write (unsigned int seg, unsigned int bus, unsigned int devfn,
+int raw_pci_write(unsigned int seg, unsigned int bus, unsigned int devfn,
 	       int reg, int len, u32 value)
 {
 	u64 addr;
@@ -83,9 +84,11 @@ pci_sal_write (unsigned int seg, unsigned int bus, unsigned int devfn,
 	if ((seg | reg) <= 255) {
 		addr = PCI_SAL_ADDRESS(seg, bus, devfn, reg);
 		mode = 0;
-	} else {
+	} else if (sal_revision >= SAL_VERSION_CODE(3,2)) {
 		addr = PCI_SAL_EXT_ADDRESS(seg, bus, devfn, reg);
 		mode = 1;
+	} else {
+		return -EINVAL;
 	}
 	result = ia64_sal_pci_config_write(addr, mode, len, value);
 	if (result != 0)
@@ -93,24 +96,17 @@ pci_sal_write (unsigned int seg, unsigned int bus, unsigned int devfn,
 	return 0;
 }
 
-static struct pci_raw_ops pci_sal_ops = {
-	.read =		pci_sal_read,
-	.write =	pci_sal_write
-};
-
-struct pci_raw_ops *raw_pci_ops = &pci_sal_ops;
-
-static int
-pci_read (struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *value)
+static int pci_read(struct pci_bus *bus, unsigned int devfn, int where,
+							int size, u32 *value)
 {
-	return raw_pci_ops->read(pci_domain_nr(bus), bus->number,
+	return raw_pci_read(pci_domain_nr(bus), bus->number,
 				 devfn, where, size, value);
 }
 
-static int
-pci_write (struct pci_bus *bus, unsigned int devfn, int where, int size, u32 value)
+static int pci_write(struct pci_bus *bus, unsigned int devfn, int where,
+							int size, u32 value)
 {
-	return raw_pci_ops->write(pci_domain_nr(bus), bus->number,
+	return raw_pci_write(pci_domain_nr(bus), bus->number,
 				  devfn, where, size, value);
 }
 
@@ -121,23 +117,26 @@ struct pci_ops pci_root_ops = {
 
 /* Called by ACPI when it finds a new root bus.  */
 
-static struct pci_controller * __devinit
-alloc_pci_controller (int seg)
+static struct pci_controller *alloc_pci_controller(int seg)
 {
 	struct pci_controller *controller;
 
-	controller = kmalloc(sizeof(*controller), GFP_KERNEL);
+	controller = kzalloc(sizeof(*controller), GFP_KERNEL);
 	if (!controller)
 		return NULL;
 
-	memset(controller, 0, sizeof(*controller));
 	controller->segment = seg;
-	controller->node = -1;
 	return controller;
 }
 
 struct pci_root_info {
+	struct acpi_device *bridge;
 	struct pci_controller *controller;
+	struct list_head resources;
+	struct resource *res;
+	resource_size_t *res_offset;
+	unsigned int res_num;
+	struct list_head io_resources;
 	char *name;
 };
 
@@ -157,7 +156,7 @@ new_space (u64 phys_base, int sparse)
 			return i;
 
 	if (num_io_spaces == MAX_IO_SPACES) {
-		printk(KERN_ERR "PCI: Too many IO port spaces "
+		pr_err("PCI: Too many IO port spaces "
 			"(MAX_IO_SPACES=%lu)\n", MAX_IO_SPACES);
 		return ~0;
 	}
@@ -169,28 +168,25 @@ new_space (u64 phys_base, int sparse)
 	return i;
 }
 
-static u64 __devinit
-add_io_space (struct pci_root_info *info, struct acpi_resource_address64 *addr)
+static u64 add_io_space(struct pci_root_info *info,
+			struct acpi_resource_address64 *addr)
 {
+	struct iospace_resource *iospace;
 	struct resource *resource;
 	char *name;
-	u64 base, min, max, base_port;
+	unsigned long base, min, max, base_port;
 	unsigned int sparse = 0, space_nr, len;
 
-	resource = kzalloc(sizeof(*resource), GFP_KERNEL);
-	if (!resource) {
-		printk(KERN_ERR "PCI: No memory for %s I/O port space\n",
-			info->name);
+	len = strlen(info->name) + 32;
+	iospace = kzalloc(sizeof(*iospace) + len, GFP_KERNEL);
+	if (!iospace) {
+		dev_err(&info->bridge->dev,
+				"PCI: No memory for %s I/O port space\n",
+				info->name);
 		goto out;
 	}
 
-	len = strlen(info->name) + 32;
-	name = kzalloc(len, GFP_KERNEL);
-	if (!name) {
-		printk(KERN_ERR "PCI: No memory for %s I/O port space name\n",
-			info->name);
-		goto free_resource;
-	}
+	name = (char *)(iospace + 1);
 
 	min = addr->minimum;
 	max = min + addr->address_length - 1;
@@ -199,7 +195,7 @@ add_io_space (struct pci_root_info *info, struct acpi_resource_address64 *addr)
 
 	space_nr = new_space(addr->translation_offset, sparse);
 	if (space_nr == ~0)
-		goto free_name;
+		goto free_resource;
 
 	base = __pa(io_space[space_nr].mmio_base);
 	base_port = IO_SPACE_BASE(space_nr);
@@ -214,24 +210,29 @@ add_io_space (struct pci_root_info *info, struct acpi_resource_address64 *addr)
 	if (space_nr == 0)
 		sparse = 1;
 
+	resource = &iospace->res;
 	resource->name  = name;
 	resource->flags = IORESOURCE_MEM;
 	resource->start = base + (sparse ? IO_SPACE_SPARSE_ENCODING(min) : min);
 	resource->end   = base + (sparse ? IO_SPACE_SPARSE_ENCODING(max) : max);
-	insert_resource(&iomem_resource, resource);
+	if (insert_resource(&iomem_resource, resource)) {
+		dev_err(&info->bridge->dev,
+				"can't allocate host bridge io space resource  %pR\n",
+				resource);
+		goto free_resource;
+	}
 
+	list_add_tail(&iospace->list, &info->io_resources);
 	return base_port;
 
-free_name:
-	kfree(name);
 free_resource:
-	kfree(resource);
+	kfree(iospace);
 out:
 	return ~0;
 }
 
-static acpi_status __devinit resource_to_window(struct acpi_resource *resource,
-	struct acpi_resource_address64 *addr)
+static acpi_status resource_to_window(struct acpi_resource *resource,
+				      struct acpi_resource_address64 *addr)
 {
 	acpi_status status;
 
@@ -253,8 +254,7 @@ static acpi_status __devinit resource_to_window(struct acpi_resource *resource,
 	return AE_ERROR;
 }
 
-static acpi_status __devinit
-count_window (struct acpi_resource *resource, void *data)
+static acpi_status count_window(struct acpi_resource *resource, void *data)
 {
 	unsigned int *windows = (unsigned int *) data;
 	struct acpi_resource_address64 addr;
@@ -267,10 +267,10 @@ count_window (struct acpi_resource *resource, void *data)
 	return AE_OK;
 }
 
-static __devinit acpi_status add_window(struct acpi_resource *res, void *data)
+static acpi_status add_window(struct acpi_resource *res, void *data)
 {
 	struct pci_root_info *info = data;
-	struct pci_window *window;
+	struct resource *resource;
 	struct acpi_resource_address64 addr;
 	acpi_status status;
 	unsigned long flags, offset = 0;
@@ -294,156 +294,208 @@ static __devinit acpi_status add_window(struct acpi_resource *res, void *data)
 	} else
 		return AE_OK;
 
-	window = &info->controller->window[info->controller->windows++];
-	window->resource.name = info->name;
-	window->resource.flags = flags;
-	window->resource.start = addr.minimum + offset;
-	window->resource.end = window->resource.start + addr.address_length - 1;
-	window->resource.child = NULL;
-	window->offset = offset;
-
-	if (insert_resource(root, &window->resource)) {
-		printk(KERN_ERR "alloc 0x%lx-0x%lx from %s for %s failed\n",
-			window->resource.start, window->resource.end,
-			root->name, info->name);
+	resource = &info->res[info->res_num];
+	resource->name = info->name;
+	resource->flags = flags;
+	resource->start = addr.minimum + offset;
+	resource->end = resource->start + addr.address_length - 1;
+	info->res_offset[info->res_num] = offset;
+
+	if (insert_resource(root, resource)) {
+		dev_err(&info->bridge->dev,
+			"can't allocate host bridge window %pR\n",
+			resource);
+	} else {
+		if (offset)
+			dev_info(&info->bridge->dev, "host bridge window %pR "
+				 "(PCI address [%#llx-%#llx])\n",
+				 resource,
+				 resource->start - offset,
+				 resource->end - offset);
+		else
+			dev_info(&info->bridge->dev,
+				 "host bridge window %pR\n", resource);
 	}
-
+	/* HP's firmware has a hack to work around a Windows bug.
+	 * Ignore these tiny memory ranges */
+	if (!((resource->flags & IORESOURCE_MEM) &&
+	      (resource->end - resource->start < 16)))
+		pci_add_resource_offset(&info->resources, resource,
+					info->res_offset[info->res_num]);
+
+	info->res_num++;
 	return AE_OK;
 }
 
-static void __devinit
-pcibios_setup_root_windows(struct pci_bus *bus, struct pci_controller *ctrl)
+static void free_pci_root_info_res(struct pci_root_info *info)
 {
-	int i, j;
+	struct iospace_resource *iospace, *tmp;
 
-	j = 0;
-	for (i = 0; i < ctrl->windows; i++) {
-		struct resource *res = &ctrl->window[i].resource;
-		/* HP's firmware has a hack to work around a Windows bug.
-		 * Ignore these tiny memory ranges */
-		if ((res->flags & IORESOURCE_MEM) &&
-		    (res->end - res->start < 16))
+	list_for_each_entry_safe(iospace, tmp, &info->io_resources, list)
+		kfree(iospace);
+
+	kfree(info->name);
+	kfree(info->res);
+	info->res = NULL;
+	kfree(info->res_offset);
+	info->res_offset = NULL;
+	info->res_num = 0;
+	kfree(info->controller);
+	info->controller = NULL;
+}
+
+static void __release_pci_root_info(struct pci_root_info *info)
+{
+	int i;
+	struct resource *res;
+	struct iospace_resource *iospace;
+
+	list_for_each_entry(iospace, &info->io_resources, list)
+		release_resource(&iospace->res);
+
+	for (i = 0; i < info->res_num; i++) {
+		res = &info->res[i];
+
+		if (!res->parent)
 			continue;
-		if (j >= PCI_BUS_NUM_RESOURCES) {
-			printk("Ignoring range [%lx-%lx] (%lx)\n", res->start,
-					res->end, res->flags);
+
+		if (!(res->flags & (IORESOURCE_MEM | IORESOURCE_IO)))
 			continue;
-		}
-		bus->resource[j++] = res;
+
+		release_resource(res);
 	}
+
+	free_pci_root_info_res(info);
+	kfree(info);
 }
 
-struct pci_bus * __devinit
-pci_acpi_scan_root(struct acpi_device *device, int domain, int bus)
+static void release_pci_root_info(struct pci_host_bridge *bridge)
 {
-	struct pci_root_info info;
-	struct pci_controller *controller;
-	unsigned int windows = 0;
-	struct pci_bus *pbus;
-	char *name;
-	int pxm;
-
-	controller = alloc_pci_controller(domain);
-	if (!controller)
-		goto out1;
-
-	controller->acpi_handle = device->handle;
+	struct pci_root_info *info = bridge->release_data;
 
-	pxm = acpi_get_pxm(controller->acpi_handle);
-#ifdef CONFIG_NUMA
-	if (pxm >= 0)
-		controller->node = pxm_to_nid_map[pxm];
-#endif
+	__release_pci_root_info(info);
+}
 
-	acpi_walk_resources(device->handle, METHOD_NAME__CRS, count_window,
-			&windows);
-	controller->window = kmalloc_node(sizeof(*controller->window) * windows,
-			GFP_KERNEL, controller->node);
-	if (!controller->window)
-		goto out2;
+static int
+probe_pci_root_info(struct pci_root_info *info, struct acpi_device *device,
+		int busnum, int domain)
+{
+	char *name;
 
 	name = kmalloc(16, GFP_KERNEL);
 	if (!name)
-		goto out3;
+		return -ENOMEM;
 
-	sprintf(name, "PCI Bus %04x:%02x", domain, bus);
-	info.controller = controller;
-	info.name = name;
-	acpi_walk_resources(device->handle, METHOD_NAME__CRS, add_window,
-			&info);
+	sprintf(name, "PCI Bus %04x:%02x", domain, busnum);
+	info->bridge = device;
+	info->name = name;
 
-	pbus = pci_scan_bus_parented(NULL, bus, &pci_root_ops, controller);
-	if (pbus)
-		pcibios_setup_root_windows(pbus, controller);
+	acpi_walk_resources(device->handle, METHOD_NAME__CRS, count_window,
+			&info->res_num);
+	if (info->res_num) {
+		info->res =
+			kzalloc_node(sizeof(*info->res) * info->res_num,
+				     GFP_KERNEL, info->controller->node);
+		if (!info->res) {
+			kfree(name);
+			return -ENOMEM;
+		}
 
-	return pbus;
+		info->res_offset =
+			kzalloc_node(sizeof(*info->res_offset) * info->res_num,
+					GFP_KERNEL, info->controller->node);
+		if (!info->res_offset) {
+			kfree(name);
+			kfree(info->res);
+			info->res = NULL;
+			return -ENOMEM;
+		}
+
+		info->res_num = 0;
+		acpi_walk_resources(device->handle, METHOD_NAME__CRS,
+			add_window, info);
+	} else
+		kfree(name);
 
-out3:
-	kfree(controller->window);
-out2:
-	kfree(controller);
-out1:
-	return NULL;
+	return 0;
 }
 
-void pcibios_resource_to_bus(struct pci_dev *dev,
-		struct pci_bus_region *region, struct resource *res)
+struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
 {
-	struct pci_controller *controller = PCI_CONTROLLER(dev);
-	unsigned long offset = 0;
-	int i;
+	struct acpi_device *device = root->device;
+	int domain = root->segment;
+	int bus = root->secondary.start;
+	struct pci_controller *controller;
+	struct pci_root_info *info = NULL;
+	int busnum = root->secondary.start;
+	struct pci_bus *pbus;
+	int ret;
 
-	for (i = 0; i < controller->windows; i++) {
-		struct pci_window *window = &controller->window[i];
-		if (!(window->resource.flags & res->flags))
-			continue;
-		if (window->resource.start > res->start)
-			continue;
-		if (window->resource.end < res->end)
-			continue;
-		offset = window->offset;
-		break;
+	controller = alloc_pci_controller(domain);
+	if (!controller)
+		return NULL;
+
+	controller->companion = device;
+	controller->node = acpi_get_node(device->handle);
+
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info) {
+		dev_err(&device->dev,
+				"pci_bus %04x:%02x: ignored (out of memory)\n",
+				domain, busnum);
+		kfree(controller);
+		return NULL;
 	}
 
-	region->start = res->start - offset;
-	region->end = res->end - offset;
+	info->controller = controller;
+	INIT_LIST_HEAD(&info->io_resources);
+	INIT_LIST_HEAD(&info->resources);
+
+	ret = probe_pci_root_info(info, device, busnum, domain);
+	if (ret) {
+		kfree(info->controller);
+		kfree(info);
+		return NULL;
+	}
+	/* insert busn resource at first */
+	pci_add_resource(&info->resources, &root->secondary);
+	/*
+	 * See arch/x86/pci/acpi.c.
+	 * The desired pci bus might already be scanned in a quirk. We
+	 * should handle the case here, but it appears that IA64 hasn't
+	 * such quirk. So we just ignore the case now.
+	 */
+	pbus = pci_create_root_bus(NULL, bus, &pci_root_ops, controller,
+				   &info->resources);
+	if (!pbus) {
+		pci_free_resource_list(&info->resources);
+		__release_pci_root_info(info);
+		return NULL;
+	}
+
+	pci_set_host_bridge_release(to_pci_host_bridge(pbus->bridge),
+			release_pci_root_info, info);
+	pci_scan_child_bus(pbus);
+	return pbus;
 }
-EXPORT_SYMBOL(pcibios_resource_to_bus);
 
-void pcibios_bus_to_resource(struct pci_dev *dev,
-		struct resource *res, struct pci_bus_region *region)
+int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
 {
-	struct pci_controller *controller = PCI_CONTROLLER(dev);
-	unsigned long offset = 0;
-	int i;
+	struct pci_controller *controller = bridge->bus->sysdata;
 
-	for (i = 0; i < controller->windows; i++) {
-		struct pci_window *window = &controller->window[i];
-		if (!(window->resource.flags & res->flags))
-			continue;
-		if (window->resource.start - window->offset > region->start)
-			continue;
-		if (window->resource.end - window->offset < region->end)
-			continue;
-		offset = window->offset;
-		break;
-	}
-
-	res->start = region->start + offset;
-	res->end = region->end + offset;
+	ACPI_COMPANION_SET(&bridge->dev, controller->companion);
+	return 0;
 }
-EXPORT_SYMBOL(pcibios_bus_to_resource);
 
-static int __devinit is_valid_resource(struct pci_dev *dev, int idx)
+static int is_valid_resource(struct pci_dev *dev, int idx)
 {
 	unsigned int i, type_mask = IORESOURCE_IO | IORESOURCE_MEM;
-	struct resource *devr = &dev->resource[idx];
+	struct resource *devr = &dev->resource[idx], *busr;
 
 	if (!dev->bus)
 		return 0;
-	for (i=0; i<PCI_BUS_NUM_RESOURCES; i++) {
-		struct resource *busr = dev->bus->resource[i];
 
+	pci_bus_for_each_resource(dev->bus, busr, i) {
 		if (!busr || ((busr->flags ^ devr->flags) & type_mask))
 			continue;
 		if ((devr->start) && (devr->start >= busr->start) &&
@@ -453,29 +505,25 @@ static int __devinit is_valid_resource(struct pci_dev *dev, int idx)
 	return 0;
 }
 
-static void __devinit
-pcibios_fixup_resources(struct pci_dev *dev, int start, int limit)
+static void pcibios_fixup_resources(struct pci_dev *dev, int start, int limit)
 {
-	struct pci_bus_region region;
 	int i;
 
 	for (i = start; i < limit; i++) {
 		if (!dev->resource[i].flags)
 			continue;
-		region.start = dev->resource[i].start;
-		region.end = dev->resource[i].end;
-		pcibios_bus_to_resource(dev, &dev->resource[i], &region);
 		if ((is_valid_resource(dev, i)))
 			pci_claim_resource(dev, i);
 	}
 }
 
-static void __devinit pcibios_fixup_device_resources(struct pci_dev *dev)
+void pcibios_fixup_device_resources(struct pci_dev *dev)
 {
 	pcibios_fixup_resources(dev, 0, PCI_BRIDGE_RESOURCES);
 }
+EXPORT_SYMBOL_GPL(pcibios_fixup_device_resources);
 
-static void __devinit pcibios_fixup_bridge_resources(struct pci_dev *dev)
+static void pcibios_fixup_bridge_resources(struct pci_dev *dev)
 {
 	pcibios_fixup_resources(dev, PCI_BRIDGE_RESOURCES, PCI_NUM_RESOURCES);
 }
@@ -483,8 +531,7 @@ static void __devinit pcibios_fixup_bridge_resources(struct pci_dev *dev)
 /*
  *  Called after each bus is probed, but before its children are examined.
  */
-void __devinit
-pcibios_fixup_bus (struct pci_bus *b)
+void pcibios_fixup_bus(struct pci_bus *b)
 {
 	struct pci_dev *dev;
 
@@ -494,58 +541,22 @@ pcibios_fixup_bus (struct pci_bus *b)
 	}
 	list_for_each_entry(dev, &b->devices, bus_list)
 		pcibios_fixup_device_resources(dev);
-
-	return;
+	platform_pci_fixup_bus(b);
 }
 
-void __devinit
-pcibios_update_irq (struct pci_dev *dev, int irq)
+void pcibios_add_bus(struct pci_bus *bus)
 {
-	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
-
-	/* ??? FIXME -- record old value for shutdown.  */
+	acpi_pci_add_bus(bus);
 }
 
-static inline int
-pcibios_enable_resources (struct pci_dev *dev, int mask)
+void pcibios_remove_bus(struct pci_bus *bus)
 {
-	u16 cmd, old_cmd;
-	int idx;
-	struct resource *r;
-	unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM;
-
-	if (!dev)
-		return -EINVAL;
-
-	pci_read_config_word(dev, PCI_COMMAND, &cmd);
-	old_cmd = cmd;
-	for (idx=0; idx<PCI_NUM_RESOURCES; idx++) {
-		/* Only set up the desired resources.  */
-		if (!(mask & (1 << idx)))
-			continue;
+	acpi_pci_remove_bus(bus);
+}
 
-		r = &dev->resource[idx];
-		if (!(r->flags & type_mask))
-			continue;
-		if ((idx == PCI_ROM_RESOURCE) &&
-				(!(r->flags & IORESOURCE_ROM_ENABLE)))
-			continue;
-		if (!r->start && r->end) {
-			printk(KERN_ERR
-			       "PCI: Device %s not available because of resource collisions\n",
-			       pci_name(dev));
-			return -EINVAL;
-		}
-		if (r->flags & IORESOURCE_IO)
-			cmd |= PCI_COMMAND_IO;
-		if (r->flags & IORESOURCE_MEM)
-			cmd |= PCI_COMMAND_MEMORY;
-	}
-	if (cmd != old_cmd) {
-		printk("PCI: Enabling device %s (%04x -> %04x)\n", pci_name(dev), old_cmd, cmd);
-		pci_write_config_word(dev, PCI_COMMAND, cmd);
-	}
-	return 0;
+void pcibios_set_master (struct pci_dev *dev)
+{
+	/* No special bus mastering setup handling */
 }
 
 int
@@ -553,38 +564,37 @@ pcibios_enable_device (struct pci_dev *dev, int mask)
 {
 	int ret;
 
-	ret = pcibios_enable_resources(dev, mask);
+	ret = pci_enable_resources(dev, mask);
 	if (ret < 0)
 		return ret;
 
-	return acpi_pci_irq_enable(dev);
+	if (!dev->msi_enabled)
+		return acpi_pci_irq_enable(dev);
+	return 0;
 }
 
 void
 pcibios_disable_device (struct pci_dev *dev)
 {
-	acpi_pci_irq_disable(dev);
+	BUG_ON(atomic_read(&dev->enable_cnt));
+	if (!dev->msi_enabled)
+		acpi_pci_irq_disable(dev);
 }
 
-void
-pcibios_align_resource (void *data, struct resource *res,
-		        unsigned long size, unsigned long align)
+resource_size_t
+pcibios_align_resource (void *data, const struct resource *res,
+		        resource_size_t size, resource_size_t align)
 {
-}
-
-/*
- * PCI BIOS setup, always defaults to SAL interface
- */
-char * __init
-pcibios_setup (char *str)
-{
-	return str;
+	return res->start;
 }
 
 int
 pci_mmap_page_range (struct pci_dev *dev, struct vm_area_struct *vma,
 		     enum pci_mmap_state mmap_state, int write_combine)
 {
+	unsigned long size = vma->vm_end - vma->vm_start;
+	pgprot_t prot;
+
 	/*
 	 * I/O space cannot be accessed via normal processor loads and
 	 * stores on this platform.
@@ -598,17 +608,24 @@ pci_mmap_page_range (struct pci_dev *dev, struct vm_area_struct *vma,
 		 */
 		return -EINVAL;
 
+	if (!valid_mmap_phys_addr_range(vma->vm_pgoff, size))
+		return -EINVAL;
+
+	prot = phys_mem_access_prot(NULL, vma->vm_pgoff, size,
+				    vma->vm_page_prot);
+
 	/*
-	 * Leave vm_pgoff as-is, the PCI space address is the physical
-	 * address on this platform.
+	 * If the user requested WC, the kernel uses UC or WC for this region,
+	 * and the chipset supports WC, we can use WC. Otherwise, we have to
+	 * use the same attribute the kernel uses.
 	 */
-	vma->vm_flags |= (VM_SHM | VM_RESERVED | VM_IO);
-
-	if (write_combine && efi_range_is_wc(vma->vm_start,
-					     vma->vm_end - vma->vm_start))
+	if (write_combine &&
+	    ((pgprot_val(prot) & _PAGE_MA_MASK) == _PAGE_MA_UC ||
+	     (pgprot_val(prot) & _PAGE_MA_MASK) == _PAGE_MA_WC) &&
+	    efi_range_is_wc(vma->vm_start, vma->vm_end - vma->vm_start))
 		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
 	else
-		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+		vma->vm_page_prot = prot;
 
 	if (remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
 			     vma->vm_end - vma->vm_start, vma->vm_page_prot))
@@ -643,20 +660,35 @@ char *ia64_pci_get_legacy_mem(struct pci_bus *bus)
  * vector to get the base address.
  */
 int
-pci_mmap_legacy_page_range(struct pci_bus *bus, struct vm_area_struct *vma)
+pci_mmap_legacy_page_range(struct pci_bus *bus, struct vm_area_struct *vma,
+			   enum pci_mmap_state mmap_state)
 {
+	unsigned long size = vma->vm_end - vma->vm_start;
+	pgprot_t prot;
 	char *addr;
 
+	/* We only support mmap'ing of legacy memory space */
+	if (mmap_state != pci_mmap_mem)
+		return -ENOSYS;
+
+	/*
+	 * Avoid attribute aliasing.  See Documentation/ia64/aliasing.txt
+	 * for more details.
+	 */
+	if (!valid_mmap_phys_addr_range(vma->vm_pgoff, size))
+		return -EINVAL;
+	prot = phys_mem_access_prot(NULL, vma->vm_pgoff, size,
+				    vma->vm_page_prot);
+
 	addr = pci_get_legacy_mem(bus);
 	if (IS_ERR(addr))
 		return PTR_ERR(addr);
 
 	vma->vm_pgoff += (unsigned long)addr >> PAGE_SHIFT;
-	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-	vma->vm_flags |= (VM_SHM | VM_RESERVED | VM_IO);
+	vma->vm_page_prot = prot;
 
 	if (remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
-			    vma->vm_end - vma->vm_start, vma->vm_page_prot))
+			    size, vma->vm_page_prot))
 		return -EAGAIN;
 
 	return 0;
@@ -729,83 +761,66 @@ int ia64_pci_legacy_write(struct pci_bus *bus, u16 port, u32 val, u8 size)
 }
 
 /**
- * pci_cacheline_size - determine cacheline size for PCI devices
- * @dev: void
+ * set_pci_cacheline_size - determine cacheline size for PCI devices
  *
  * We want to use the line-size of the outer-most cache.  We assume
  * that this line-size is the same for all CPUs.
  *
  * Code mostly taken from arch/ia64/kernel/palinfo.c:cache_info().
- *
- * RETURNS: An appropriate -ERRNO error value on eror, or zero for success.
  */
-static unsigned long
-pci_cacheline_size (void)
+static void __init set_pci_dfl_cacheline_size(void)
 {
-	u64 levels, unique_caches;
-	s64 status;
+	unsigned long levels, unique_caches;
+	long status;
 	pal_cache_config_info_t cci;
-	static u8 cacheline_size;
-
-	if (cacheline_size)
-		return cacheline_size;
 
 	status = ia64_pal_cache_summary(&levels, &unique_caches);
 	if (status != 0) {
-		printk(KERN_ERR "%s: ia64_pal_cache_summary() failed (status=%ld)\n",
-		       __FUNCTION__, status);
-		return SMP_CACHE_BYTES;
+		pr_err("%s: ia64_pal_cache_summary() failed "
+			"(status=%ld)\n", __func__, status);
+		return;
 	}
 
-	status = ia64_pal_cache_config_info(levels - 1, /* cache_type (data_or_unified)= */ 2,
-					    &cci);
+	status = ia64_pal_cache_config_info(levels - 1,
+				/* cache_type (data_or_unified)= */ 2, &cci);
 	if (status != 0) {
-		printk(KERN_ERR "%s: ia64_pal_cache_config_info() failed (status=%ld)\n",
-		       __FUNCTION__, status);
-		return SMP_CACHE_BYTES;
+		pr_err("%s: ia64_pal_cache_config_info() failed "
+			"(status=%ld)\n", __func__, status);
+		return;
 	}
-	cacheline_size = 1 << cci.pcci_line_size;
-	return cacheline_size;
+	pci_dfl_cache_line_size = (1 << cci.pcci_line_size) / 4;
 }
 
-/**
- * pcibios_prep_mwi - helper function for drivers/pci/pci.c:pci_set_mwi()
- * @dev: the PCI device for which MWI is enabled
- *
- * For ia64, we can get the cacheline sizes from PAL.
- *
- * RETURNS: An appropriate -ERRNO error value on eror, or zero for success.
- */
-int
-pcibios_prep_mwi (struct pci_dev *dev)
-{
-	unsigned long desired_linesize, current_linesize;
-	int rc = 0;
-	u8 pci_linesize;
-
-	desired_linesize = pci_cacheline_size();
-
-	pci_read_config_byte(dev, PCI_CACHE_LINE_SIZE, &pci_linesize);
-	current_linesize = 4 * pci_linesize;
-	if (desired_linesize != current_linesize) {
-		printk(KERN_WARNING "PCI: slot %s has incorrect PCI cache line size of %lu bytes,",
-		       pci_name(dev), current_linesize);
-		if (current_linesize > desired_linesize) {
-			printk(" expected %lu bytes instead\n", desired_linesize);
-			rc = -EINVAL;
-		} else {
-			printk(" correcting to %lu\n", desired_linesize);
-			pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE, desired_linesize / 4);
-		}
+u64 ia64_dma_get_required_mask(struct device *dev)
+{
+	u32 low_totalram = ((max_pfn - 1) << PAGE_SHIFT);
+	u32 high_totalram = ((max_pfn - 1) >> (32 - PAGE_SHIFT));
+	u64 mask;
+
+	if (!high_totalram) {
+		/* convert to mask just covering totalram */
+		low_totalram = (1 << (fls(low_totalram) - 1));
+		low_totalram += low_totalram - 1;
+		mask = low_totalram;
+	} else {
+		high_totalram = (1 << (fls(high_totalram) - 1));
+		high_totalram += high_totalram - 1;
+		mask = (((u64)high_totalram) << 32) + 0xffffffff;
 	}
-	return rc;
+	return mask;
 }
+EXPORT_SYMBOL_GPL(ia64_dma_get_required_mask);
 
-int pci_vector_resources(int last, int nr_released)
+u64 dma_get_required_mask(struct device *dev)
 {
-	int count = nr_released;
-
-	count += (IA64_LAST_DEVICE_VECTOR - last);
+	return platform_dma_get_required_mask(dev);
+}
+EXPORT_SYMBOL_GPL(dma_get_required_mask);
 
-	return count;
+static int __init pcibios_init(void)
+{
+	set_pci_dfl_cacheline_size();
+	return 0;
 }
+
+subsys_initcall(pcibios_init);
diff --git a/arch/ia64/scripts/check-segrel.lds b/arch/ia64/scripts/check-segrel.lds
index 1c2f13e181d..85a0d54fb5a 100644
--- a/arch/ia64/scripts/check-segrel.lds
+++ b/arch/ia64/scripts/check-segrel.lds
@@ -1,6 +1,7 @@
 SECTIONS {
 	. = SIZEOF_HEADERS;
 	.rodata : { *(.rodata) } :ro
+	.note : { *(.note*) }
 	. = 0xa0000;
 	.data : { *(.data) } :dat
 	/DISCARD/ : { *(*) }
diff --git a/arch/ia64/scripts/pvcheck.sed b/arch/ia64/scripts/pvcheck.sed
new file mode 100644
index 00000000000..e59809a3fc0
--- /dev/null
+++ b/arch/ia64/scripts/pvcheck.sed
@@ -0,0 +1,33 @@
+#
+# Checker for paravirtualizations of privileged operations.
+#
+s/ssm.*psr\.ic.*/.warning \"ssm psr.ic should not be used directly\"/g
+s/rsm.*psr\.ic.*/.warning \"rsm psr.ic should not be used directly\"/g
+s/ssm.*psr\.i.*/.warning \"ssm psr.i should not be used directly\"/g
+s/rsm.*psr\.i.*/.warning \"rsm psr.i should not be used directly\"/g
+s/ssm.*psr\.dt.*/.warning \"ssm psr.dt should not be used directly\"/g
+s/rsm.*psr\.dt.*/.warning \"rsm psr.dt should not be used directly\"/g
+s/mov.*=.*cr\.ifa/.warning \"cr.ifa should not used directly\"/g
+s/mov.*=.*cr\.itir/.warning \"cr.itir should not used directly\"/g
+s/mov.*=.*cr\.isr/.warning \"cr.isr should not used directly\"/g
+s/mov.*=.*cr\.iha/.warning \"cr.iha should not used directly\"/g
+s/mov.*=.*cr\.ipsr/.warning \"cr.ipsr should not used directly\"/g
+s/mov.*=.*cr\.iim/.warning \"cr.iim should not used directly\"/g
+s/mov.*=.*cr\.iip/.warning \"cr.iip should not used directly\"/g
+s/mov.*=.*cr\.ivr/.warning \"cr.ivr should not used directly\"/g
+s/mov.*=[^\.]*psr/.warning \"psr should not used directly\"/g	# avoid ar.fpsr
+s/mov.*=.*ar\.eflags/.warning \"ar.eflags should not used directly\"/g
+s/mov.*=.*ar\.itc.*/.warning \"ar.itc should not used directly\"/g
+s/mov.*cr\.ifa.*=.*/.warning \"cr.ifa should not used directly\"/g
+s/mov.*cr\.itir.*=.*/.warning \"cr.itir should not used directly\"/g
+s/mov.*cr\.iha.*=.*/.warning \"cr.iha should not used directly\"/g
+s/mov.*cr\.ipsr.*=.*/.warning \"cr.ipsr should not used directly\"/g
+s/mov.*cr\.ifs.*=.*/.warning \"cr.ifs should not used directly\"/g
+s/mov.*cr\.iip.*=.*/.warning \"cr.iip should not used directly\"/g
+s/mov.*cr\.kr.*=.*/.warning \"cr.kr should not used directly\"/g
+s/mov.*ar\.eflags.*=.*/.warning \"ar.eflags should not used directly\"/g
+s/itc\.i.*/.warning \"itc.i should not be used directly.\"/g
+s/itc\.d.*/.warning \"itc.d should not be used directly.\"/g
+s/bsw\.0/.warning \"bsw.0 should not be used directly.\"/g
+s/bsw\.1/.warning \"bsw.1 should not be used directly.\"/g
+s/ptc\.ga.*/.warning \"ptc.ga should not be used directly.\"/g
diff --git a/arch/ia64/scripts/unwcheck.py b/arch/ia64/scripts/unwcheck.py
index c27849889e1..2bfd941ff7c 100755..100644
--- a/arch/ia64/scripts/unwcheck.py
+++ b/arch/ia64/scripts/unwcheck.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python
 #
 # Usage: unwcheck.py FILE
 #
diff --git a/arch/ia64/sn/kernel/Makefile b/arch/ia64/sn/kernel/Makefile
index ab9c48c8801..d27df1d45da 100644
--- a/arch/ia64/sn/kernel/Makefile
+++ b/arch/ia64/sn/kernel/Makefile
@@ -4,18 +4,15 @@
 # License.  See the file "COPYING" in the main directory of this archive
 # for more details.
 #
-# Copyright (C) 1999,2001-2005 Silicon Graphics, Inc.  All Rights Reserved.
+# Copyright (C) 1999,2001-2006,2008 Silicon Graphics, Inc.  All Rights Reserved.
 #
 
-CPPFLAGS += -I$(srctree)/arch/ia64/sn/include
+ccflags-y := -Iarch/ia64/sn/include
 
 obj-y				+= setup.o bte.o bte_error.o irq.o mca.o idle.o \
-				   huberror.o io_init.o iomv.o klconflib.o pio_phys.o \
+				   huberror.o io_acpi_init.o io_common.o \
+				   io_init.o iomv.o klconflib.o pio_phys.o \
 				   sn2/
 obj-$(CONFIG_IA64_GENERIC)      += machvec.o
 obj-$(CONFIG_SGI_TIOCX)		+= tiocx.o
-obj-$(CONFIG_IA64_SGI_SN_XP)	+= xp.o
-xp-y				:= xp_main.o xp_nofault.o
-obj-$(CONFIG_IA64_SGI_SN_XP)	+= xpc.o
-xpc-y				:= xpc_main.o xpc_channel.o xpc_partition.o
-obj-$(CONFIG_IA64_SGI_SN_XP)	+= xpnet.o
+obj-$(CONFIG_PCI_MSI)		+= msi_sn.o
diff --git a/arch/ia64/sn/kernel/bte.c b/arch/ia64/sn/kernel/bte.c
index e952ef4f6d9..cad775a1a15 100644
--- a/arch/ia64/sn/kernel/bte.c
+++ b/arch/ia64/sn/kernel/bte.c
@@ -3,10 +3,9 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (c) 2000-2006 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2007 Silicon Graphics, Inc.  All Rights Reserved.
  */
 
-#include <linux/config.h>
 #include <linux/module.h>
 #include <asm/sn/nodepda.h>
 #include <asm/sn/addrs.h>
@@ -20,6 +19,7 @@
 #include <linux/bootmem.h>
 #include <linux/string.h>
 #include <linux/sched.h>
+#include <linux/slab.h>
 
 #include <asm/sn/bte.h>
 
@@ -64,7 +64,7 @@ static inline void bte_start_transfer(struct bteinfo_s *bte, u64 len, u64 mode)
  * Use the block transfer engine to move kernel memory from src to dest
  * using the assigned mode.
  *
- * Paramaters:
+ * Parameters:
  *   src - physical address of the transfer source.
  *   dest - physical address of the transfer destination.
  *   len - number of bytes to transfer from source to dest.
@@ -98,9 +98,10 @@ bte_result_t bte_copy(u64 src, u64 dest, u64 len, u64 mode, void *notification)
 		return BTE_SUCCESS;
 	}
 
-	BUG_ON((len & L1_CACHE_MASK) ||
-		 (src & L1_CACHE_MASK) || (dest & L1_CACHE_MASK));
-	BUG_ON(!(len < ((BTE_LEN_MASK + 1) << L1_CACHE_SHIFT)));
+	BUG_ON(len & L1_CACHE_MASK);
+	BUG_ON(src & L1_CACHE_MASK);
+	BUG_ON(dest & L1_CACHE_MASK);
+	BUG_ON(len > BTE_MAX_XFER);
 
 	/*
 	 * Start with interface corresponding to cpu number
@@ -228,7 +229,7 @@ retry_bteop:
 		     BTE_LNSTAT_LOAD(bte), *bte->most_rcnt_na));
 
 	if (transfer_stat & IBLS_ERROR) {
-		bte_status = transfer_stat & ~IBLS_ERROR;
+		bte_status = BTE_GET_ERROR_STATUS(transfer_stat);
 	} else {
 		bte_status = BTE_SUCCESS;
 	}
@@ -248,7 +249,7 @@ EXPORT_SYMBOL(bte_copy);
  * use the block transfer engine to move kernel
  * memory from src to dest using the assigned mode.
  *
- * Paramaters:
+ * Parameters:
  *   src - physical address of the transfer source.
  *   dest - physical address of the transfer destination.
  *   len - number of bytes to transfer from source to dest.
@@ -256,7 +257,7 @@ EXPORT_SYMBOL(bte_copy);
  *          for IBCT0/1 in the SGI documentation.
  *
  * NOTE: If the source, dest, and len are all cache line aligned,
- * then it would be _FAR_ preferrable to use bte_copy instead.
+ * then it would be _FAR_ preferable to use bte_copy instead.
  */
 bte_result_t bte_unaligned_copy(u64 src, u64 dest, u64 len, u64 mode)
 {
@@ -278,8 +279,7 @@ bte_result_t bte_unaligned_copy(u64 src, u64 dest, u64 len, u64 mode)
 	}
 
 	/* temporary buffer used during unaligned transfers */
-	bteBlock_unaligned = kmalloc(len + 3 * L1_CACHE_BYTES,
-				     GFP_KERNEL | GFP_DMA);
+	bteBlock_unaligned = kmalloc(len + 3 * L1_CACHE_BYTES, GFP_KERNEL);
 	if (bteBlock_unaligned == NULL) {
 		return BTEFAIL_NOTAVAIL;
 	}
@@ -302,7 +302,7 @@ bte_result_t bte_unaligned_copy(u64 src, u64 dest, u64 len, u64 mode)
 	 * a standard bte copy.
 	 *
 	 * One nasty exception to the above rule is when the
-	 * source and destination are not symetrically
+	 * source and destination are not symmetrically
 	 * mis-aligned.  If the source offset from the first
 	 * cache line is different from the destination offset,
 	 * we make the first section be the entire transfer
@@ -339,7 +339,7 @@ bte_result_t bte_unaligned_copy(u64 src, u64 dest, u64 len, u64 mode)
 
 			if (footBcopyDest == (headBcopyDest + headBcopyLen)) {
 				/*
-				 * We have two contigous bcopy
+				 * We have two contiguous bcopy
 				 * blocks.  Merge them.
 				 */
 				headBcopyLen += footBcopyLen;
@@ -377,20 +377,19 @@ bte_result_t bte_unaligned_copy(u64 src, u64 dest, u64 len, u64 mode)
 	} else {
 
 		/*
-		 * The transfer is not symetric, we will
+		 * The transfer is not symmetric, we will
 		 * allocate a buffer large enough for all the
 		 * data, bte_copy into that buffer and then
 		 * bcopy to the destination.
 		 */
 
-		/* Add the leader from source */
-		headBteLen = len + (src & L1_CACHE_MASK);
-		/* Add the trailing bytes from footer. */
-		headBteLen += L1_CACHE_BYTES - (headBteLen & L1_CACHE_MASK);
-		headBteSource = src & ~L1_CACHE_MASK;
 		headBcopySrcOffset = src & L1_CACHE_MASK;
 		headBcopyDest = dest;
 		headBcopyLen = len;
+
+		headBteSource = src - headBcopySrcOffset;
+		/* Add the leading and trailing bytes from source */
+		headBteLen = L1_CACHE_ALIGN(len + headBcopySrcOffset);
 	}
 
 	if (headBcopyLen > 0) {
diff --git a/arch/ia64/sn/kernel/bte_error.c b/arch/ia64/sn/kernel/bte_error.c
index f1ec1370b3e..4cb09f3f1ef 100644
--- a/arch/ia64/sn/kernel/bte_error.c
+++ b/arch/ia64/sn/kernel/bte_error.c
@@ -3,7 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2007 Silicon Graphics, Inc.  All Rights Reserved.
  */
 
 #include <linux/types.h>
@@ -78,7 +78,7 @@ int shub1_bte_error_handler(unsigned long _nodepda)
 		 * There are errors which still need to be cleaned up by
 		 * hubiio_crb_error_handler
 		 */
-		mod_timer(recovery_timer, HZ * 5);
+		mod_timer(recovery_timer, jiffies + (HZ * 5));
 		BTE_PRINTK(("eh:%p:%d Marked Giving up\n", err_nodepda,
 			    smp_processor_id()));
 		return 1;
@@ -95,7 +95,7 @@ int shub1_bte_error_handler(unsigned long _nodepda)
 			icrbd.ii_icrb0_d_regval =
 			    REMOTE_HUB_L(nasid, IIO_ICRB_D(i));
 			if (icrbd.d_bteop) {
-				mod_timer(recovery_timer, HZ * 5);
+				mod_timer(recovery_timer, jiffies + (HZ * 5));
 				BTE_PRINTK(("eh:%p:%d Valid %d, Giving up\n",
 					    err_nodepda, smp_processor_id(),
 					    i));
@@ -105,7 +105,7 @@ int shub1_bte_error_handler(unsigned long _nodepda)
 	}
 
 	BTE_PRINTK(("eh:%p:%d Cleaning up\n", err_nodepda, smp_processor_id()));
-	/* Reenable both bte interfaces */
+	/* Re-enable both bte interfaces */
 	imem.ii_imem_regval = REMOTE_HUB_L(nasid, IIO_IMEM);
 	imem.ii_imem_fld_s.i_b0_esd = imem.ii_imem_fld_s.i_b1_esd = 1;
 	REMOTE_HUB_S(nasid, IIO_IMEM, imem.ii_imem_regval);
@@ -148,9 +148,13 @@ int shub2_bte_error_handler(unsigned long _nodepda)
 	for (i = 0; i < BTES_PER_NODE; i++) {
 		bte = &err_nodepda->bte_if[i];
 		status = BTE_LNSTAT_LOAD(bte);
-		if ((status & IBLS_ERROR) || !(status & IBLS_BUSY))
+		if (status & IBLS_ERROR) {
+			bte->bh_error = BTE_SHUB2_ERROR(status);
 			continue;
-		mod_timer(recovery_timer, HZ * 5);
+		}
+		if (!(status & IBLS_BUSY))
+			continue;
+		mod_timer(recovery_timer, jiffies + (HZ * 5));
 		BTE_PRINTK(("eh:%p:%d Marked Giving up\n", err_nodepda,
 			    smp_processor_id()));
 		return 1;
@@ -243,7 +247,7 @@ bte_crb_error_handler(cnodeid_t cnode, int btenum,
 
 	/*
 	 * The caller has already figured out the error type, we save that
-	 * in the bte handle structure for the thread excercising the
+	 * in the bte handle structure for the thread exercising the
 	 * interface to consume.
 	 */
 	bte->bh_error = ioe->ie_errortype + BTEFAIL_OFFSET;
diff --git a/arch/ia64/sn/kernel/huberror.c b/arch/ia64/sn/kernel/huberror.c
index 56ab6bae00e..f925dec2da9 100644
--- a/arch/ia64/sn/kernel/huberror.c
+++ b/arch/ia64/sn/kernel/huberror.c
@@ -3,12 +3,11 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (C) 1992 - 1997, 2000,2002-2005 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 1992 - 1997, 2000,2002-2007 Silicon Graphics, Inc. All rights reserved.
  */
 
 #include <linux/types.h>
 #include <linux/interrupt.h>
-#include <linux/pci.h>
 #include <asm/delay.h>
 #include <asm/sn/sn_sal.h>
 #include "ioerror.h"
@@ -22,7 +21,7 @@
 void hubiio_crb_error_handler(struct hubdev_info *hubdev_info);
 extern void bte_crb_error_handler(cnodeid_t, int, int, ioerror_t *,
 				  int);
-static irqreturn_t hub_eint_handler(int irq, void *arg, struct pt_regs *ep)
+static irqreturn_t hub_eint_handler(int irq, void *arg)
 {
 	struct hubdev_info *hubdev_info;
 	struct ia64_sal_retval ret_stuff;
@@ -38,12 +37,20 @@ static irqreturn_t hub_eint_handler(int irq, void *arg, struct pt_regs *ep)
 			(u64) nasid, 0, 0, 0, 0, 0, 0);
 
 		if ((int)ret_stuff.v0)
-			panic("hubii_eint_handler(): Fatal TIO Error");
+			panic("%s: Fatal %s Error", __func__,
+				((nasid & 1) ? "TIO" : "HUBII"));
 
 		if (!(nasid & 1)) /* Not a TIO, handle CRB errors */
 			(void)hubiio_crb_error_handler(hubdev_info);
-	} else 
-		bte_error_handler((unsigned long)NODEPDA(nasid_to_cnodeid(nasid)));
+	} else
+		if (nasid & 1) {	/* TIO errors */
+			SAL_CALL_NOLOCK(ret_stuff, SN_SAL_HUB_ERROR_INTERRUPT,
+				(u64) nasid, 0, 0, 0, 0, 0, 0);
+
+			if ((int)ret_stuff.v0)
+				panic("%s: Fatal TIO Error", __func__);
+		} else
+			bte_error_handler((unsigned long)NODEPDA(nasid_to_cnodeid(nasid)));
 
 	return IRQ_HANDLED;
 }
@@ -178,11 +185,15 @@ void hubiio_crb_error_handler(struct hubdev_info *hubdev_info)
  */
 void hub_error_init(struct hubdev_info *hubdev_info)
 {
-	if (request_irq(SGI_II_ERROR, (void *)hub_eint_handler, SA_SHIRQ,
-			"SN_hub_error", (void *)hubdev_info))
-		printk("hub_error_init: Failed to request_irq for 0x%p\n",
+
+	if (request_irq(SGI_II_ERROR, hub_eint_handler, IRQF_SHARED,
+			"SN_hub_error", hubdev_info)) {
+		printk(KERN_ERR "hub_error_init: Failed to request_irq for 0x%p\n",
 		    hubdev_info);
-	return;
+		return;
+	}
+	irq_set_handler(SGI_II_ERROR, handle_level_irq);
+	sn_set_err_irq_affinity(SGI_II_ERROR);
 }
 
 
@@ -195,11 +206,15 @@ void hub_error_init(struct hubdev_info *hubdev_info)
  */
 void ice_error_init(struct hubdev_info *hubdev_info)
 {
+
         if (request_irq
-            (SGI_TIO_ERROR, (void *)hub_eint_handler, SA_SHIRQ, "SN_TIO_error",
-             (void *)hubdev_info))
+            (SGI_TIO_ERROR, (void *)hub_eint_handler, IRQF_SHARED, "SN_TIO_error",
+             (void *)hubdev_info)) {
                 printk("ice_error_init: request_irq() error hubdev_info 0x%p\n",
                        hubdev_info);
-        return;
+		return;
+	}
+	irq_set_handler(SGI_TIO_ERROR, handle_level_irq);
+	sn_set_err_irq_affinity(SGI_TIO_ERROR);
 }
 
diff --git a/arch/ia64/sn/kernel/io_acpi_init.c b/arch/ia64/sn/kernel/io_acpi_init.c
new file mode 100644
index 00000000000..0640739cc20
--- /dev/null
+++ b/arch/ia64/sn/kernel/io_acpi_init.c
@@ -0,0 +1,510 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2006 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#include <asm/sn/types.h>
+#include <asm/sn/addrs.h>
+#include <asm/sn/pcidev.h>
+#include <asm/sn/pcibus_provider_defs.h>
+#include <asm/sn/sn_sal.h>
+#include "xtalk/hubdev.h"
+#include <linux/acpi.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+
+
+/*
+ * The code in this file will only be executed when running with
+ * a PROM that has ACPI IO support. (i.e., SN_ACPI_BASE_SUPPORT() == 1)
+ */
+
+
+/*
+ * This value must match the UUID the PROM uses
+ * (io/acpi/defblk.c) when building a vendor descriptor.
+ */
+struct acpi_vendor_uuid sn_uuid = {
+	.subtype = 0,
+	.data	= { 0x2c, 0xc6, 0xa6, 0xfe, 0x9c, 0x44, 0xda, 0x11,
+		    0xa2, 0x7c, 0x08, 0x00, 0x69, 0x13, 0xea, 0x51 },
+};
+
+struct sn_pcidev_match {
+	u8 bus;
+	unsigned int devfn;
+	acpi_handle handle;
+};
+
+/*
+ * Perform the early IO init in PROM.
+ */
+static long
+sal_ioif_init(u64 *result)
+{
+	struct ia64_sal_retval isrv = {0,0,0,0};
+
+	SAL_CALL_NOLOCK(isrv,
+			SN_SAL_IOIF_INIT, 0, 0, 0, 0, 0, 0, 0);
+	*result = isrv.v0;
+	return isrv.status;
+}
+
+/*
+ * sn_acpi_hubdev_init() - This function is called by acpi_ns_get_device_callback()
+ *			   for all SGIHUB and SGITIO acpi devices defined in the
+ *			   DSDT. It obtains the hubdev_info pointer from the
+ *			   ACPI vendor resource, which the PROM setup, and sets up the
+ *			   hubdev_info in the pda.
+ */
+
+static acpi_status __init
+sn_acpi_hubdev_init(acpi_handle handle, u32 depth, void *context, void **ret)
+{
+	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+	struct acpi_buffer name_buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+	u64 addr;
+	struct hubdev_info *hubdev;
+	struct hubdev_info *hubdev_ptr;
+	int i;
+	u64 nasid;
+	struct acpi_resource *resource;
+	acpi_status status;
+	struct acpi_resource_vendor_typed *vendor;
+	extern void sn_common_hubdev_init(struct hubdev_info *);
+
+	status = acpi_get_vendor_resource(handle, METHOD_NAME__CRS,
+					  &sn_uuid, &buffer);
+	if (ACPI_FAILURE(status)) {
+		acpi_get_name(handle, ACPI_FULL_PATHNAME, &name_buffer);
+		printk(KERN_ERR
+		       "sn_acpi_hubdev_init: acpi_get_vendor_resource() "
+		       "(0x%x) failed for: %s\n", status,
+			(char *)name_buffer.pointer);
+		kfree(name_buffer.pointer);
+		return AE_OK;		/* Continue walking namespace */
+	}
+
+	resource = buffer.pointer;
+	vendor = &resource->data.vendor_typed;
+	if ((vendor->byte_length - sizeof(struct acpi_vendor_uuid)) !=
+	    sizeof(struct hubdev_info *)) {
+		acpi_get_name(handle, ACPI_FULL_PATHNAME, &name_buffer);
+		printk(KERN_ERR
+		       "sn_acpi_hubdev_init: Invalid vendor data length: "
+		       "%d for: %s\n",
+			vendor->byte_length, (char *)name_buffer.pointer);
+		kfree(name_buffer.pointer);
+		goto exit;
+	}
+
+	memcpy(&addr, vendor->byte_data, sizeof(struct hubdev_info *));
+	hubdev_ptr = __va((struct hubdev_info *) addr);
+
+	nasid = hubdev_ptr->hdi_nasid;
+	i = nasid_to_cnodeid(nasid);
+	hubdev = (struct hubdev_info *)(NODEPDA(i)->pdinfo);
+	*hubdev = *hubdev_ptr;
+	sn_common_hubdev_init(hubdev);
+
+exit:
+	kfree(buffer.pointer);
+	return AE_OK;		/* Continue walking namespace */
+}
+
+/*
+ * sn_get_bussoft_ptr() - The pcibus_bussoft pointer is found in
+ *			  the ACPI Vendor resource for this bus.
+ */
+static struct pcibus_bussoft *
+sn_get_bussoft_ptr(struct pci_bus *bus)
+{
+	u64 addr;
+	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+	struct acpi_buffer name_buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+	acpi_handle handle;
+	struct pcibus_bussoft *prom_bussoft_ptr;
+	struct acpi_resource *resource;
+	acpi_status status;
+	struct acpi_resource_vendor_typed *vendor;
+
+
+	handle = acpi_device_handle(PCI_CONTROLLER(bus)->companion);
+	status = acpi_get_vendor_resource(handle, METHOD_NAME__CRS,
+					  &sn_uuid, &buffer);
+	if (ACPI_FAILURE(status)) {
+		acpi_get_name(handle, ACPI_FULL_PATHNAME, &name_buffer);
+		printk(KERN_ERR "%s: "
+		       "acpi_get_vendor_resource() failed (0x%x) for: %s\n",
+		       __func__, status, (char *)name_buffer.pointer);
+		kfree(name_buffer.pointer);
+		return NULL;
+	}
+	resource = buffer.pointer;
+	vendor = &resource->data.vendor_typed;
+
+	if ((vendor->byte_length - sizeof(struct acpi_vendor_uuid)) !=
+	     sizeof(struct pcibus_bussoft *)) {
+		printk(KERN_ERR
+		       "%s: Invalid vendor data length %d\n",
+			__func__, vendor->byte_length);
+		kfree(buffer.pointer);
+		return NULL;
+	}
+	memcpy(&addr, vendor->byte_data, sizeof(struct pcibus_bussoft *));
+	prom_bussoft_ptr = __va((struct pcibus_bussoft *) addr);
+	kfree(buffer.pointer);
+
+	return prom_bussoft_ptr;
+}
+
+/*
+ * sn_extract_device_info - Extract the pcidev_info and the sn_irq_info
+ *			    pointers from the vendor resource using the
+ *			    provided acpi handle, and copy the structures
+ *			    into the argument buffers.
+ */
+static int
+sn_extract_device_info(acpi_handle handle, struct pcidev_info **pcidev_info,
+		    struct sn_irq_info **sn_irq_info)
+{
+	u64 addr;
+	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+	struct acpi_buffer name_buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+	struct sn_irq_info *irq_info, *irq_info_prom;
+	struct pcidev_info *pcidev_ptr, *pcidev_prom_ptr;
+	struct acpi_resource *resource;
+	int ret = 0;
+	acpi_status status;
+	struct acpi_resource_vendor_typed *vendor;
+
+	/*
+	 * The pointer to this device's pcidev_info structure in
+	 * the PROM, is in the vendor resource.
+	 */
+	status = acpi_get_vendor_resource(handle, METHOD_NAME__CRS,
+					  &sn_uuid, &buffer);
+	if (ACPI_FAILURE(status)) {
+		acpi_get_name(handle, ACPI_FULL_PATHNAME, &name_buffer);
+		printk(KERN_ERR
+		       "%s: acpi_get_vendor_resource() failed (0x%x) for: %s\n",
+			__func__, status, (char *)name_buffer.pointer);
+		kfree(name_buffer.pointer);
+		return 1;
+	}
+
+	resource = buffer.pointer;
+	vendor = &resource->data.vendor_typed;
+	if ((vendor->byte_length - sizeof(struct acpi_vendor_uuid)) !=
+	    sizeof(struct pci_devdev_info *)) {
+		acpi_get_name(handle, ACPI_FULL_PATHNAME, &name_buffer);
+		printk(KERN_ERR
+		       "%s: Invalid vendor data length: %d for: %s\n",
+			 __func__, vendor->byte_length,
+			(char *)name_buffer.pointer);
+		kfree(name_buffer.pointer);
+		ret = 1;
+		goto exit;
+	}
+
+	pcidev_ptr = kzalloc(sizeof(struct pcidev_info), GFP_KERNEL);
+	if (!pcidev_ptr)
+		panic("%s: Unable to alloc memory for pcidev_info", __func__);
+
+	memcpy(&addr, vendor->byte_data, sizeof(struct pcidev_info *));
+	pcidev_prom_ptr = __va(addr);
+	memcpy(pcidev_ptr, pcidev_prom_ptr, sizeof(struct pcidev_info));
+
+	/* Get the IRQ info */
+	irq_info = kzalloc(sizeof(struct sn_irq_info), GFP_KERNEL);
+	if (!irq_info)
+		 panic("%s: Unable to alloc memory for sn_irq_info", __func__);
+
+	if (pcidev_ptr->pdi_sn_irq_info) {
+		irq_info_prom = __va(pcidev_ptr->pdi_sn_irq_info);
+		memcpy(irq_info, irq_info_prom, sizeof(struct sn_irq_info));
+	}
+
+	*pcidev_info = pcidev_ptr;
+	*sn_irq_info = irq_info;
+
+exit:
+	kfree(buffer.pointer);
+	return ret;
+}
+
+static unsigned int
+get_host_devfn(acpi_handle device_handle, acpi_handle rootbus_handle)
+{
+	unsigned long long adr;
+	acpi_handle child;
+	unsigned int devfn;
+	int function;
+	acpi_handle parent;
+	int slot;
+	acpi_status status;
+	struct acpi_buffer name_buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+
+	acpi_get_name(device_handle, ACPI_FULL_PATHNAME, &name_buffer);
+
+	/*
+	 * Do an upward search to find the root bus device, and
+	 * obtain the host devfn from the previous child device.
+	 */
+	child = device_handle;
+	while (child) {
+		status = acpi_get_parent(child, &parent);
+		if (ACPI_FAILURE(status)) {
+			printk(KERN_ERR "%s: acpi_get_parent() failed "
+			       "(0x%x) for: %s\n", __func__, status,
+				(char *)name_buffer.pointer);
+			panic("%s: Unable to find host devfn\n", __func__);
+		}
+		if (parent == rootbus_handle)
+			break;
+		child = parent;
+	}
+	if (!child) {
+		printk(KERN_ERR "%s: Unable to find root bus for: %s\n",
+		       __func__, (char *)name_buffer.pointer);
+		BUG();
+	}
+
+	status = acpi_evaluate_integer(child, METHOD_NAME__ADR, NULL, &adr);
+	if (ACPI_FAILURE(status)) {
+		printk(KERN_ERR "%s: Unable to get _ADR (0x%x) for: %s\n",
+		       __func__, status, (char *)name_buffer.pointer);
+		panic("%s: Unable to find host devfn\n", __func__);
+	}
+
+	kfree(name_buffer.pointer);
+
+	slot = (adr >> 16) & 0xffff;
+	function = adr & 0xffff;
+	devfn = PCI_DEVFN(slot, function);
+	return devfn;
+}
+
+/*
+ * find_matching_device - Callback routine to find the ACPI device
+ *			  that matches up with our pci_dev device.
+ *			  Matching is done on bus number and devfn.
+ *			  To find the bus number for a particular
+ *			  ACPI device, we must look at the _BBN method
+ *			  of its parent.
+ */
+static acpi_status
+find_matching_device(acpi_handle handle, u32 lvl, void *context, void **rv)
+{
+	unsigned long long bbn = -1;
+	unsigned long long adr;
+	acpi_handle parent = NULL;
+	acpi_status status;
+	unsigned int devfn;
+	int function;
+	int slot;
+	struct sn_pcidev_match *info = context;
+	struct acpi_buffer name_buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+
+        status = acpi_evaluate_integer(handle, METHOD_NAME__ADR, NULL,
+                                       &adr);
+        if (ACPI_SUCCESS(status)) {
+		status = acpi_get_parent(handle, &parent);
+		if (ACPI_FAILURE(status)) {
+			acpi_get_name(handle, ACPI_FULL_PATHNAME, &name_buffer);
+			printk(KERN_ERR
+			       "%s: acpi_get_parent() failed (0x%x) for: %s\n",
+				__func__, status, (char *)name_buffer.pointer);
+			kfree(name_buffer.pointer);
+			return AE_OK;
+		}
+		status = acpi_evaluate_integer(parent, METHOD_NAME__BBN,
+					       NULL, &bbn);
+		if (ACPI_FAILURE(status)) {
+			acpi_get_name(handle, ACPI_FULL_PATHNAME, &name_buffer);
+			printk(KERN_ERR
+			  "%s: Failed to find _BBN in parent of: %s\n",
+					__func__, (char *)name_buffer.pointer);
+			kfree(name_buffer.pointer);
+			return AE_OK;
+		}
+
+                slot = (adr >> 16) & 0xffff;
+                function = adr & 0xffff;
+                devfn = PCI_DEVFN(slot, function);
+                if ((info->devfn == devfn) && (info->bus == bbn)) {
+			/* We have a match! */
+			info->handle = handle;
+			return 1;
+		}
+	}
+	return AE_OK;
+}
+
+/*
+ * sn_acpi_get_pcidev_info - Search ACPI namespace for the acpi
+ *			     device matching the specified pci_dev,
+ *			     and return the pcidev info and irq info.
+ */
+int
+sn_acpi_get_pcidev_info(struct pci_dev *dev, struct pcidev_info **pcidev_info,
+			struct sn_irq_info **sn_irq_info)
+{
+	unsigned int host_devfn;
+	struct sn_pcidev_match pcidev_match;
+	acpi_handle rootbus_handle;
+	unsigned long long segment;
+	acpi_status status;
+	struct acpi_buffer name_buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+
+	rootbus_handle = acpi_device_handle(PCI_CONTROLLER(dev)->companion);
+        status = acpi_evaluate_integer(rootbus_handle, METHOD_NAME__SEG, NULL,
+                                       &segment);
+        if (ACPI_SUCCESS(status)) {
+		if (segment != pci_domain_nr(dev)) {
+			acpi_get_name(rootbus_handle, ACPI_FULL_PATHNAME,
+				&name_buffer);
+			printk(KERN_ERR
+			       "%s: Segment number mismatch, 0x%llx vs 0x%x for: %s\n",
+			       __func__, segment, pci_domain_nr(dev),
+			       (char *)name_buffer.pointer);
+			kfree(name_buffer.pointer);
+			return 1;
+		}
+	} else {
+		acpi_get_name(rootbus_handle, ACPI_FULL_PATHNAME, &name_buffer);
+		printk(KERN_ERR "%s: Unable to get __SEG from: %s\n",
+		       __func__, (char *)name_buffer.pointer);
+		kfree(name_buffer.pointer);
+		return 1;
+	}
+
+	/*
+	 * We want to search all devices in this segment/domain
+	 * of the ACPI namespace for the matching ACPI device,
+	 * which holds the pcidev_info pointer in its vendor resource.
+	 */
+	pcidev_match.bus = dev->bus->number;
+	pcidev_match.devfn = dev->devfn;
+	pcidev_match.handle = NULL;
+
+	acpi_walk_namespace(ACPI_TYPE_DEVICE, rootbus_handle, ACPI_UINT32_MAX,
+			    find_matching_device, NULL, &pcidev_match, NULL);
+
+	if (!pcidev_match.handle) {
+		printk(KERN_ERR
+		       "%s: Could not find matching ACPI device for %s.\n",
+		       __func__, pci_name(dev));
+		return 1;
+	}
+
+	if (sn_extract_device_info(pcidev_match.handle, pcidev_info, sn_irq_info))
+		return 1;
+
+	/* Build up the pcidev_info.pdi_slot_host_handle */
+	host_devfn = get_host_devfn(pcidev_match.handle, rootbus_handle);
+	(*pcidev_info)->pdi_slot_host_handle =
+			((unsigned long) pci_domain_nr(dev) << 40) |
+					/* bus == 0 */
+					host_devfn;
+	return 0;
+}
+
+/*
+ * sn_acpi_slot_fixup - Obtain the pcidev_info and sn_irq_info.
+ *			Perform any SN specific slot fixup.
+ *			At present there does not appear to be
+ *			any generic way to handle a ROM image
+ *			that has been shadowed by the PROM, so
+ *			we pass a pointer to it	within the
+ *			pcidev_info structure.
+ */
+
+void
+sn_acpi_slot_fixup(struct pci_dev *dev)
+{
+	void __iomem *addr;
+	struct pcidev_info *pcidev_info = NULL;
+	struct sn_irq_info *sn_irq_info = NULL;
+	size_t image_size, size;
+
+	if (sn_acpi_get_pcidev_info(dev, &pcidev_info, &sn_irq_info)) {
+		panic("%s:  Failure obtaining pcidev_info for %s\n",
+		      __func__, pci_name(dev));
+	}
+
+	if (pcidev_info->pdi_pio_mapped_addr[PCI_ROM_RESOURCE]) {
+		/*
+		 * A valid ROM image exists and has been shadowed by the
+		 * PROM. Setup the pci_dev ROM resource with the address
+		 * of the shadowed copy, and the actual length of the ROM image.
+		 */
+		size = pci_resource_len(dev, PCI_ROM_RESOURCE);
+		addr = ioremap(pcidev_info->pdi_pio_mapped_addr[PCI_ROM_RESOURCE],
+			       size);
+		image_size = pci_get_rom_size(dev, addr, size);
+		dev->resource[PCI_ROM_RESOURCE].start = (unsigned long) addr;
+		dev->resource[PCI_ROM_RESOURCE].end =
+					(unsigned long) addr + image_size - 1;
+		dev->resource[PCI_ROM_RESOURCE].flags |= IORESOURCE_ROM_BIOS_COPY;
+	}
+	sn_pci_fixup_slot(dev, pcidev_info, sn_irq_info);
+}
+
+EXPORT_SYMBOL(sn_acpi_slot_fixup);
+
+
+/*
+ * sn_acpi_bus_fixup -  Perform SN specific setup of software structs
+ *			(pcibus_bussoft, pcidev_info) and hardware
+ *			registers, for the specified bus and devices under it.
+ */
+void
+sn_acpi_bus_fixup(struct pci_bus *bus)
+{
+	struct pci_dev *pci_dev = NULL;
+	struct pcibus_bussoft *prom_bussoft_ptr;
+
+	if (!bus->parent) {	/* If root bus */
+		prom_bussoft_ptr = sn_get_bussoft_ptr(bus);
+		if (prom_bussoft_ptr == NULL) {
+			printk(KERN_ERR
+			       "%s: 0x%04x:0x%02x Unable to "
+			       "obtain prom_bussoft_ptr\n",
+			       __func__, pci_domain_nr(bus), bus->number);
+			return;
+		}
+		sn_common_bus_fixup(bus, prom_bussoft_ptr);
+	}
+	list_for_each_entry(pci_dev, &bus->devices, bus_list) {
+		sn_acpi_slot_fixup(pci_dev);
+	}
+}
+
+/*
+ * sn_io_acpi_init - PROM has ACPI support for IO, defining at a minimum the
+ *		     nodes and root buses in the DSDT. As a result, bus scanning
+ *		     will be initiated by the Linux ACPI code.
+ */
+
+void __init
+sn_io_acpi_init(void)
+{
+	u64 result;
+	long status;
+
+	/* SN Altix does not follow the IOSAPIC IRQ routing model */
+	acpi_irq_model = ACPI_IRQ_MODEL_PLATFORM;
+
+	/* Setup hubdev_info for all SGIHUB/SGITIO devices */
+	acpi_get_devices("SGIHUB", sn_acpi_hubdev_init, NULL, NULL);
+	acpi_get_devices("SGITIO", sn_acpi_hubdev_init, NULL, NULL);
+
+	status = sal_ioif_init(&result);
+	if (status || result)
+		panic("sal_ioif_init failed: [%lx] %s\n",
+		      status, ia64_sal_strerror(status));
+}
diff --git a/arch/ia64/sn/kernel/io_common.c b/arch/ia64/sn/kernel/io_common.c
new file mode 100644
index 00000000000..11f2275570f
--- /dev/null
+++ b/arch/ia64/sn/kernel/io_common.c
@@ -0,0 +1,564 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2006 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#include <linux/bootmem.h>
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <asm/sn/types.h>
+#include <asm/sn/addrs.h>
+#include <asm/sn/sn_feature_sets.h>
+#include <asm/sn/geo.h>
+#include <asm/sn/io.h>
+#include <asm/sn/l1.h>
+#include <asm/sn/module.h>
+#include <asm/sn/pcibr_provider.h>
+#include <asm/sn/pcibus_provider_defs.h>
+#include <asm/sn/pcidev.h>
+#include <asm/sn/simulator.h>
+#include <asm/sn/sn_sal.h>
+#include <asm/sn/tioca_provider.h>
+#include <asm/sn/tioce_provider.h>
+#include "xtalk/hubdev.h"
+#include "xtalk/xwidgetdev.h"
+#include <linux/acpi.h>
+#include <asm/sn/sn2/sn_hwperf.h>
+#include <asm/sn/acpi.h>
+
+extern void sn_init_cpei_timer(void);
+extern void register_sn_procfs(void);
+extern void sn_io_acpi_init(void);
+extern void sn_io_init(void);
+
+
+static struct list_head sn_sysdata_list;
+
+/* sysdata list struct */
+struct sysdata_el {
+	struct list_head entry;
+	void *sysdata;
+};
+
+int sn_ioif_inited;		/* SN I/O infrastructure initialized? */
+
+int sn_acpi_rev;		/* SN ACPI revision */
+EXPORT_SYMBOL_GPL(sn_acpi_rev);
+
+struct sn_pcibus_provider *sn_pci_provider[PCIIO_ASIC_MAX_TYPES];	/* indexed by asic type */
+
+/*
+ * Hooks and struct for unsupported pci providers
+ */
+
+static dma_addr_t
+sn_default_pci_map(struct pci_dev *pdev, unsigned long paddr, size_t size, int type)
+{
+	return 0;
+}
+
+static void
+sn_default_pci_unmap(struct pci_dev *pdev, dma_addr_t addr, int direction)
+{
+	return;
+}
+
+static void *
+sn_default_pci_bus_fixup(struct pcibus_bussoft *soft, struct pci_controller *controller)
+{
+	return NULL;
+}
+
+static struct sn_pcibus_provider sn_pci_default_provider = {
+	.dma_map = sn_default_pci_map,
+	.dma_map_consistent = sn_default_pci_map,
+	.dma_unmap = sn_default_pci_unmap,
+	.bus_fixup = sn_default_pci_bus_fixup,
+};
+
+/*
+ * Retrieve the DMA Flush List given nasid, widget, and device.
+ * This list is needed to implement the WAR - Flush DMA data on PIO Reads.
+ */
+static inline u64
+sal_get_device_dmaflush_list(u64 nasid, u64 widget_num, u64 device_num,
+			     u64 address)
+{
+	struct ia64_sal_retval ret_stuff;
+	ret_stuff.status = 0;
+	ret_stuff.v0 = 0;
+
+	SAL_CALL_NOLOCK(ret_stuff,
+			(u64) SN_SAL_IOIF_GET_DEVICE_DMAFLUSH_LIST,
+			(u64) nasid, (u64) widget_num,
+			(u64) device_num, (u64) address, 0, 0, 0);
+	return ret_stuff.status;
+}
+
+/*
+ * sn_pcidev_info_get() - Retrieve the pcidev_info struct for the specified
+ *			  device.
+ */
+inline struct pcidev_info *
+sn_pcidev_info_get(struct pci_dev *dev)
+{
+	struct pcidev_info *pcidev;
+
+	list_for_each_entry(pcidev,
+			    &(SN_PLATFORM_DATA(dev)->pcidev_info), pdi_list) {
+		if (pcidev->pdi_linux_pcidev == dev)
+			return pcidev;
+	}
+	return NULL;
+}
+
+/* Older PROM flush WAR
+ *
+ * 01/16/06 -- This war will be in place until a new official PROM is released.
+ * Additionally note that the struct sn_flush_device_war also has to be
+ * removed from arch/ia64/sn/include/xtalk/hubdev.h
+ */
+
+static s64 sn_device_fixup_war(u64 nasid, u64 widget, int device,
+			       struct sn_flush_device_common *common)
+{
+	struct sn_flush_device_war *war_list;
+	struct sn_flush_device_war *dev_entry;
+	struct ia64_sal_retval isrv = {0,0,0,0};
+
+	printk_once(KERN_WARNING
+		"PROM version < 4.50 -- implementing old PROM flush WAR\n");
+
+	war_list = kzalloc(DEV_PER_WIDGET * sizeof(*war_list), GFP_KERNEL);
+	BUG_ON(!war_list);
+
+	SAL_CALL_NOLOCK(isrv, SN_SAL_IOIF_GET_WIDGET_DMAFLUSH_LIST,
+			nasid, widget, __pa(war_list), 0, 0, 0 ,0);
+	if (isrv.status)
+		panic("sn_device_fixup_war failed: %s\n",
+		      ia64_sal_strerror(isrv.status));
+
+	dev_entry = war_list + device;
+	memcpy(common,dev_entry, sizeof(*common));
+	kfree(war_list);
+
+	return isrv.status;
+}
+
+/*
+ * sn_common_hubdev_init() - This routine is called to initialize the HUB data
+ *			     structure for each node in the system.
+ */
+void __init
+sn_common_hubdev_init(struct hubdev_info *hubdev)
+{
+
+	struct sn_flush_device_kernel *sn_flush_device_kernel;
+	struct sn_flush_device_kernel *dev_entry;
+	s64 status;
+	int widget, device, size;
+
+	/* Attach the error interrupt handlers */
+	if (hubdev->hdi_nasid & 1)	/* If TIO */
+		ice_error_init(hubdev);
+	else
+		hub_error_init(hubdev);
+
+	for (widget = 0; widget <= HUB_WIDGET_ID_MAX; widget++)
+		hubdev->hdi_xwidget_info[widget].xwi_hubinfo = hubdev;
+
+	if (!hubdev->hdi_flush_nasid_list.widget_p)
+		return;
+
+	size = (HUB_WIDGET_ID_MAX + 1) *
+		sizeof(struct sn_flush_device_kernel *);
+	hubdev->hdi_flush_nasid_list.widget_p =
+		kzalloc(size, GFP_KERNEL);
+	BUG_ON(!hubdev->hdi_flush_nasid_list.widget_p);
+
+	for (widget = 0; widget <= HUB_WIDGET_ID_MAX; widget++) {
+		size = DEV_PER_WIDGET *
+			sizeof(struct sn_flush_device_kernel);
+		sn_flush_device_kernel = kzalloc(size, GFP_KERNEL);
+		BUG_ON(!sn_flush_device_kernel);
+
+		dev_entry = sn_flush_device_kernel;
+		for (device = 0; device < DEV_PER_WIDGET;
+		     device++, dev_entry++) {
+			size = sizeof(struct sn_flush_device_common);
+			dev_entry->common = kzalloc(size, GFP_KERNEL);
+			BUG_ON(!dev_entry->common);
+			if (sn_prom_feature_available(PRF_DEVICE_FLUSH_LIST))
+				status = sal_get_device_dmaflush_list(
+					     hubdev->hdi_nasid, widget, device,
+					     (u64)(dev_entry->common));
+			else
+				status = sn_device_fixup_war(hubdev->hdi_nasid,
+							     widget, device,
+							     dev_entry->common);
+			if (status != SALRET_OK)
+				panic("SAL call failed: %s\n",
+				      ia64_sal_strerror(status));
+
+			spin_lock_init(&dev_entry->sfdl_flush_lock);
+		}
+
+		if (sn_flush_device_kernel)
+			hubdev->hdi_flush_nasid_list.widget_p[widget] =
+							 sn_flush_device_kernel;
+	}
+}
+
+void sn_pci_unfixup_slot(struct pci_dev *dev)
+{
+	struct pci_dev *host_pci_dev = SN_PCIDEV_INFO(dev)->host_pci_dev;
+
+	sn_irq_unfixup(dev);
+	pci_dev_put(host_pci_dev);
+	pci_dev_put(dev);
+}
+
+/*
+ * sn_pci_fixup_slot()
+ */
+void sn_pci_fixup_slot(struct pci_dev *dev, struct pcidev_info *pcidev_info,
+		       struct sn_irq_info *sn_irq_info)
+{
+	int segment = pci_domain_nr(dev->bus);
+	struct pcibus_bussoft *bs;
+	struct pci_dev *host_pci_dev;
+	unsigned int bus_no, devfn;
+
+	pci_dev_get(dev); /* for the sysdata pointer */
+
+	/* Add pcidev_info to list in pci_controller.platform_data */
+	list_add_tail(&pcidev_info->pdi_list,
+		      &(SN_PLATFORM_DATA(dev->bus)->pcidev_info));
+	/*
+	 * Using the PROMs values for the PCI host bus, get the Linux
+	 * PCI host_pci_dev struct and set up host bus linkages
+ 	 */
+
+	bus_no = (pcidev_info->pdi_slot_host_handle >> 32) & 0xff;
+	devfn = pcidev_info->pdi_slot_host_handle & 0xffffffff;
+	host_pci_dev = pci_get_domain_bus_and_slot(segment, bus_no, devfn);
+
+	pcidev_info->host_pci_dev = host_pci_dev;
+	pcidev_info->pdi_linux_pcidev = dev;
+	pcidev_info->pdi_host_pcidev_info = SN_PCIDEV_INFO(host_pci_dev);
+	bs = SN_PCIBUS_BUSSOFT(dev->bus);
+	pcidev_info->pdi_pcibus_info = bs;
+
+	if (bs && bs->bs_asic_type < PCIIO_ASIC_MAX_TYPES) {
+		SN_PCIDEV_BUSPROVIDER(dev) = sn_pci_provider[bs->bs_asic_type];
+	} else {
+		SN_PCIDEV_BUSPROVIDER(dev) = &sn_pci_default_provider;
+	}
+
+	/* Only set up IRQ stuff if this device has a host bus context */
+	if (bs && sn_irq_info->irq_irq) {
+		pcidev_info->pdi_sn_irq_info = sn_irq_info;
+		dev->irq = pcidev_info->pdi_sn_irq_info->irq_irq;
+		sn_irq_fixup(dev, sn_irq_info);
+	} else {
+		pcidev_info->pdi_sn_irq_info = NULL;
+		kfree(sn_irq_info);
+	}
+}
+
+/*
+ * sn_common_bus_fixup - Perform platform specific bus fixup.
+ *			 Execute the ASIC specific fixup routine
+ *			 for this bus.
+ */
+void
+sn_common_bus_fixup(struct pci_bus *bus,
+		    struct pcibus_bussoft *prom_bussoft_ptr)
+{
+	int cnode;
+	struct pci_controller *controller;
+	struct hubdev_info *hubdev_info;
+	int nasid;
+	void *provider_soft;
+	struct sn_pcibus_provider *provider;
+	struct sn_platform_data *sn_platform_data;
+
+	controller = PCI_CONTROLLER(bus);
+	/*
+	 * Per-provider fixup.  Copies the bus soft structure from prom
+	 * to local area and links SN_PCIBUS_BUSSOFT().
+	 */
+
+	if (prom_bussoft_ptr->bs_asic_type >= PCIIO_ASIC_MAX_TYPES) {
+		printk(KERN_WARNING "sn_common_bus_fixup: Unsupported asic type, %d",
+		       prom_bussoft_ptr->bs_asic_type);
+		return;
+	}
+
+	if (prom_bussoft_ptr->bs_asic_type == PCIIO_ASIC_TYPE_PPB)
+		return;	/* no further fixup necessary */
+
+	provider = sn_pci_provider[prom_bussoft_ptr->bs_asic_type];
+	if (provider == NULL)
+		panic("sn_common_bus_fixup: No provider registered for this asic type, %d",
+		      prom_bussoft_ptr->bs_asic_type);
+
+	if (provider->bus_fixup)
+		provider_soft = (*provider->bus_fixup) (prom_bussoft_ptr,
+				 controller);
+	else
+		provider_soft = NULL;
+
+	/*
+	 * Generic bus fixup goes here.  Don't reference prom_bussoft_ptr
+	 * after this point.
+	 */
+	controller->platform_data = kzalloc(sizeof(struct sn_platform_data),
+					    GFP_KERNEL);
+	BUG_ON(controller->platform_data == NULL);
+	sn_platform_data =
+			(struct sn_platform_data *) controller->platform_data;
+	sn_platform_data->provider_soft = provider_soft;
+	INIT_LIST_HEAD(&((struct sn_platform_data *)
+			 controller->platform_data)->pcidev_info);
+	nasid = NASID_GET(SN_PCIBUS_BUSSOFT(bus)->bs_base);
+	cnode = nasid_to_cnodeid(nasid);
+	hubdev_info = (struct hubdev_info *)(NODEPDA(cnode)->pdinfo);
+	SN_PCIBUS_BUSSOFT(bus)->bs_xwidget_info =
+	    &(hubdev_info->hdi_xwidget_info[SN_PCIBUS_BUSSOFT(bus)->bs_xid]);
+
+	/*
+	 * If the node information we obtained during the fixup phase is
+	 * invalid then set controller->node to -1 (undetermined)
+	 */
+	if (controller->node >= num_online_nodes()) {
+		struct pcibus_bussoft *b = SN_PCIBUS_BUSSOFT(bus);
+
+		printk(KERN_WARNING "Device ASIC=%u XID=%u PBUSNUM=%u "
+		       "L_IO=%llx L_MEM=%llx BASE=%llx\n",
+		       b->bs_asic_type, b->bs_xid, b->bs_persist_busnum,
+		       b->bs_legacy_io, b->bs_legacy_mem, b->bs_base);
+		printk(KERN_WARNING "on node %d but only %d nodes online."
+		       "Association set to undetermined.\n",
+		       controller->node, num_online_nodes());
+		controller->node = -1;
+	}
+}
+
+void sn_bus_store_sysdata(struct pci_dev *dev)
+{
+	struct sysdata_el *element;
+
+	element = kzalloc(sizeof(struct sysdata_el), GFP_KERNEL);
+	if (!element) {
+		dev_dbg(&dev->dev, "%s: out of memory!\n", __func__);
+		return;
+	}
+	element->sysdata = SN_PCIDEV_INFO(dev);
+	list_add(&element->entry, &sn_sysdata_list);
+}
+
+void sn_bus_free_sysdata(void)
+{
+	struct sysdata_el *element;
+	struct list_head *list, *safe;
+
+	list_for_each_safe(list, safe, &sn_sysdata_list) {
+		element = list_entry(list, struct sysdata_el, entry);
+		list_del(&element->entry);
+		list_del(&(((struct pcidev_info *)
+			     (element->sysdata))->pdi_list));
+		kfree(element->sysdata);
+		kfree(element);
+	}
+	return;
+}
+
+/*
+ * hubdev_init_node() - Creates the HUB data structure and link them to it's
+ *			own NODE specific data area.
+ */
+void __init hubdev_init_node(nodepda_t * npda, cnodeid_t node)
+{
+	struct hubdev_info *hubdev_info;
+	int size;
+	pg_data_t *pg;
+
+	size = sizeof(struct hubdev_info);
+
+	if (node >= num_online_nodes())	/* Headless/memless IO nodes */
+		pg = NODE_DATA(0);
+	else
+		pg = NODE_DATA(node);
+
+	hubdev_info = (struct hubdev_info *)alloc_bootmem_node(pg, size);
+
+	npda->pdinfo = (void *)hubdev_info;
+}
+
+geoid_t
+cnodeid_get_geoid(cnodeid_t cnode)
+{
+	struct hubdev_info *hubdev;
+
+	hubdev = (struct hubdev_info *)(NODEPDA(cnode)->pdinfo);
+	return hubdev->hdi_geoid;
+}
+
+void sn_generate_path(struct pci_bus *pci_bus, char *address)
+{
+	nasid_t nasid;
+	cnodeid_t cnode;
+	geoid_t geoid;
+	moduleid_t moduleid;
+	u16 bricktype;
+
+	nasid = NASID_GET(SN_PCIBUS_BUSSOFT(pci_bus)->bs_base);
+	cnode = nasid_to_cnodeid(nasid);
+	geoid = cnodeid_get_geoid(cnode);
+	moduleid = geo_module(geoid);
+
+	sprintf(address, "module_%c%c%c%c%.2d",
+		'0'+RACK_GET_CLASS(MODULE_GET_RACK(moduleid)),
+		'0'+RACK_GET_GROUP(MODULE_GET_RACK(moduleid)),
+		'0'+RACK_GET_NUM(MODULE_GET_RACK(moduleid)),
+		MODULE_GET_BTCHAR(moduleid), MODULE_GET_BPOS(moduleid));
+
+	/* Tollhouse requires slot id to be displayed */
+	bricktype = MODULE_GET_BTYPE(moduleid);
+	if ((bricktype == L1_BRICKTYPE_191010) ||
+	    (bricktype == L1_BRICKTYPE_1932))
+			sprintf(address + strlen(address), "^%d",
+						geo_slot(geoid));
+}
+
+void sn_pci_fixup_bus(struct pci_bus *bus)
+{
+
+	if (SN_ACPI_BASE_SUPPORT())
+		sn_acpi_bus_fixup(bus);
+	else
+		sn_bus_fixup(bus);
+}
+
+/*
+ * sn_io_early_init - Perform early IO (and some non-IO) initialization.
+ *		      In particular, setup the sn_pci_provider[] array.
+ *		      This needs to be done prior to any bus scanning
+ *		      (acpi_scan_init()) in the ACPI case, as the SN
+ *		      bus fixup code will reference the array.
+ */
+static int __init
+sn_io_early_init(void)
+{
+	int i;
+
+	if (!ia64_platform_is("sn2") || IS_RUNNING_ON_FAKE_PROM())
+		return 0;
+
+	/* we set the acpi revision to that of the DSDT table OEM rev. */
+	{
+		struct acpi_table_header *header = NULL;
+
+		acpi_get_table(ACPI_SIG_DSDT, 1, &header);
+		BUG_ON(header == NULL);
+		sn_acpi_rev = header->oem_revision;
+	}
+
+	/*
+	 * prime sn_pci_provider[].  Individual provider init routines will
+	 * override their respective default entries.
+	 */
+
+	for (i = 0; i < PCIIO_ASIC_MAX_TYPES; i++)
+		sn_pci_provider[i] = &sn_pci_default_provider;
+
+	pcibr_init_provider();
+	tioca_init_provider();
+	tioce_init_provider();
+
+	/*
+	 * This is needed to avoid bounce limit checks in the blk layer
+	 */
+	ia64_max_iommu_merge_mask = ~PAGE_MASK;
+
+	sn_irq_lh_init();
+	INIT_LIST_HEAD(&sn_sysdata_list);
+	sn_init_cpei_timer();
+
+#ifdef CONFIG_PROC_FS
+	register_sn_procfs();
+#endif
+
+	{
+		struct acpi_table_header *header;
+		(void)acpi_get_table(ACPI_SIG_DSDT, 1, &header);
+		printk(KERN_INFO "ACPI  DSDT OEM Rev 0x%x\n",
+			header->oem_revision);
+	}
+	if (SN_ACPI_BASE_SUPPORT())
+		sn_io_acpi_init();
+	else
+		sn_io_init();
+	return 0;
+}
+
+arch_initcall(sn_io_early_init);
+
+/*
+ * sn_io_late_init() - Perform any final platform specific IO initialization.
+ */
+
+int __init
+sn_io_late_init(void)
+{
+	struct pci_bus *bus;
+	struct pcibus_bussoft *bussoft;
+	cnodeid_t cnode;
+	nasid_t nasid;
+	cnodeid_t near_cnode;
+
+	if (!ia64_platform_is("sn2") || IS_RUNNING_ON_FAKE_PROM())
+		return 0;
+
+	/*
+	 * Setup closest node in pci_controller->node for
+	 * PIC, TIOCP, TIOCE (TIOCA does it during bus fixup using
+	 * info from the PROM).
+	 */
+	bus = NULL;
+	while ((bus = pci_find_next_bus(bus)) != NULL) {
+		bussoft = SN_PCIBUS_BUSSOFT(bus);
+		nasid = NASID_GET(bussoft->bs_base);
+		cnode = nasid_to_cnodeid(nasid);
+		if ((bussoft->bs_asic_type == PCIIO_ASIC_TYPE_TIOCP) ||
+		    (bussoft->bs_asic_type == PCIIO_ASIC_TYPE_TIOCE) ||
+		    (bussoft->bs_asic_type == PCIIO_ASIC_TYPE_PIC)) {
+			/* PCI Bridge: find nearest node with CPUs */
+			int e = sn_hwperf_get_nearest_node(cnode, NULL,
+							   &near_cnode);
+			if (e < 0) {
+				near_cnode = (cnodeid_t)-1; /* use any node */
+				printk(KERN_WARNING "sn_io_late_init: failed "
+				       "to find near node with CPUs for "
+				       "node %d, err=%d\n", cnode, e);
+			}
+			PCI_CONTROLLER(bus)->node = near_cnode;
+		}
+	}
+
+	sn_ioif_inited = 1;	/* SN I/O infrastructure now initialized */
+
+	return 0;
+}
+
+fs_initcall(sn_io_late_init);
+
+EXPORT_SYMBOL(sn_pci_unfixup_slot);
+EXPORT_SYMBOL(sn_bus_store_sysdata);
+EXPORT_SYMBOL(sn_bus_free_sysdata);
+EXPORT_SYMBOL(sn_generate_path);
+
diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c
index 5101ac46264..0b5ce82d203 100644
--- a/arch/ia64/sn/kernel/io_init.c
+++ b/arch/ia64/sn/kernel/io_init.c
@@ -3,103 +3,30 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (C) 1992 - 1997, 2000-2005 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 1992 - 1997, 2000-2006 Silicon Graphics, Inc. All rights reserved.
  */
 
-#include <linux/bootmem.h>
-#include <linux/nodemask.h>
+#include <linux/slab.h>
+#include <linux/export.h>
 #include <asm/sn/types.h>
 #include <asm/sn/addrs.h>
-#include <asm/sn/sn_feature_sets.h>
-#include <asm/sn/geo.h>
 #include <asm/sn/io.h>
-#include <asm/sn/l1.h>
 #include <asm/sn/module.h>
-#include <asm/sn/pcibr_provider.h>
+#include <asm/sn/intr.h>
 #include <asm/sn/pcibus_provider_defs.h>
 #include <asm/sn/pcidev.h>
-#include <asm/sn/simulator.h>
 #include <asm/sn/sn_sal.h>
-#include <asm/sn/tioca_provider.h>
-#include <asm/sn/tioce_provider.h>
 #include "xtalk/hubdev.h"
-#include "xtalk/xwidgetdev.h"
-
-
-extern void sn_init_cpei_timer(void);
-extern void register_sn_procfs(void);
-
-static struct list_head sn_sysdata_list;
-
-/* sysdata list struct */
-struct sysdata_el {
-	struct list_head entry;
-	void *sysdata;
-};
-
-struct slab_info {
-	struct hubdev_info hubdev;
-};
-
-struct brick {
-	moduleid_t id;		/* Module ID of this module        */
-	struct slab_info slab_info[MAX_SLABS + 1];
-};
-
-int sn_ioif_inited;		/* SN I/O infrastructure initialized? */
-
-struct sn_pcibus_provider *sn_pci_provider[PCIIO_ASIC_MAX_TYPES];	/* indexed by asic type */
-
-static int max_segment_number;		 /* Default highest segment number */
-static int max_pcibus_number = 255;	/* Default highest pci bus number */
 
 /*
- * Hooks and struct for unsupported pci providers
+ * The code in this file will only be executed when running with
+ * a PROM that does _not_ have base ACPI IO support.
+ * (i.e., SN_ACPI_BASE_SUPPORT() == 0)
  */
 
-static dma_addr_t
-sn_default_pci_map(struct pci_dev *pdev, unsigned long paddr, size_t size)
-{
-	return 0;
-}
-
-static void
-sn_default_pci_unmap(struct pci_dev *pdev, dma_addr_t addr, int direction)
-{
-	return;
-}
-
-static void *
-sn_default_pci_bus_fixup(struct pcibus_bussoft *soft, struct pci_controller *controller)
-{
-	return NULL;
-}
-
-static struct sn_pcibus_provider sn_pci_default_provider = {
-	.dma_map = sn_default_pci_map,
-	.dma_map_consistent = sn_default_pci_map,
-	.dma_unmap = sn_default_pci_unmap,
-	.bus_fixup = sn_default_pci_bus_fixup,
-};
-
-/*
- * Retrieve the DMA Flush List given nasid, widget, and device.
- * This list is needed to implement the WAR - Flush DMA data on PIO Reads.
- */
-static inline u64
-sal_get_device_dmaflush_list(u64 nasid, u64 widget_num, u64 device_num,
-			     u64 address)
-{
-	struct ia64_sal_retval ret_stuff;
-	ret_stuff.status = 0;
-	ret_stuff.v0 = 0;
+static int max_segment_number;		 /* Default highest segment number */
+static int max_pcibus_number = 255;	/* Default highest pci bus number */
 
-	SAL_CALL_NOLOCK(ret_stuff,
-			(u64) SN_SAL_IOIF_GET_DEVICE_DMAFLUSH_LIST,
-			(u64) nasid, (u64) widget_num,
-			(u64) device_num, (u64) address, 0, 0, 0);
-	return ret_stuff.status;
-}
 
 /*
  * Retrieve the hub device info structure for the given nasid.
@@ -150,74 +77,20 @@ sal_get_pcidev_info(u64 segment, u64 bus_number, u64 devfn, u64 pci_dev,
 	return ret_stuff.v0;
 }
 
-/*
- * sn_pcidev_info_get() - Retrieve the pcidev_info struct for the specified
- *			  device.
- */
-inline struct pcidev_info *
-sn_pcidev_info_get(struct pci_dev *dev)
-{
-	struct pcidev_info *pcidev;
-
-	list_for_each_entry(pcidev,
-			    &(SN_PCI_CONTROLLER(dev)->pcidev_info), pdi_list) {
-		if (pcidev->pdi_linux_pcidev == dev) {
-			return pcidev;
-		}
-	}
-	return NULL;
-}
-
-/* Older PROM flush WAR
- *
- * 01/16/06 -- This war will be in place until a new official PROM is released.
- * Additionally note that the struct sn_flush_device_war also has to be
- * removed from arch/ia64/sn/include/xtalk/hubdev.h
- */
-static u8 war_implemented = 0;
-
-static s64 sn_device_fixup_war(u64 nasid, u64 widget, int device,
-			       struct sn_flush_device_common *common)
-{
-	struct sn_flush_device_war *war_list;
-	struct sn_flush_device_war *dev_entry;
-	struct ia64_sal_retval isrv = {0,0,0,0};
-
-	if (!war_implemented) {
-		printk(KERN_WARNING "PROM version < 4.50 -- implementing old "
-		       "PROM flush WAR\n");
-		war_implemented = 1;
-	}
-
-	war_list = kzalloc(DEV_PER_WIDGET * sizeof(*war_list), GFP_KERNEL);
-	if (!war_list)
-		BUG();
-
-	SAL_CALL_NOLOCK(isrv, SN_SAL_IOIF_GET_WIDGET_DMAFLUSH_LIST,
-			nasid, widget, __pa(war_list), 0, 0, 0 ,0);
-	if (isrv.status)
-		panic("sn_device_fixup_war failed: %s\n",
-		      ia64_sal_strerror(isrv.status));
-
-	dev_entry = war_list + device;
-	memcpy(common,dev_entry, sizeof(*common));
-	kfree(war_list);
-
-	return isrv.status;
-}
 
 /*
- * sn_fixup_ionodes() - This routine initializes the HUB data strcuture for
- *	each node in the system.
+ * sn_fixup_ionodes() - This routine initializes the HUB data structure for
+ *			each node in the system. This function is only
+ *			executed when running with a non-ACPI capable PROM.
  */
 static void __init sn_fixup_ionodes(void)
 {
-	struct sn_flush_device_kernel *sn_flush_device_kernel;
-	struct sn_flush_device_kernel *dev_entry;
+
 	struct hubdev_info *hubdev;
 	u64 status;
 	u64 nasid;
-	int i, widget, device, size;
+	int i;
+	extern void sn_common_hubdev_init(struct hubdev_info *);
 
 	/*
 	 * Get SGI Specific HUB chipset information.
@@ -240,165 +113,70 @@ static void __init sn_fixup_ionodes(void)
 			max_segment_number = hubdev->max_segment_number;
 			max_pcibus_number = hubdev->max_pcibus_number;
 		}
-
-		/* Attach the error interrupt handlers */
-		if (nasid & 1)
-			ice_error_init(hubdev);
-		else
-			hub_error_init(hubdev);
-
-		for (widget = 0; widget <= HUB_WIDGET_ID_MAX; widget++)
-			hubdev->hdi_xwidget_info[widget].xwi_hubinfo = hubdev;
-
-		if (!hubdev->hdi_flush_nasid_list.widget_p)
-			continue;
-
-		size = (HUB_WIDGET_ID_MAX + 1) *
-			sizeof(struct sn_flush_device_kernel *);
-		hubdev->hdi_flush_nasid_list.widget_p =
-			kzalloc(size, GFP_KERNEL);
-		if (!hubdev->hdi_flush_nasid_list.widget_p)
-			BUG();
-
-		for (widget = 0; widget <= HUB_WIDGET_ID_MAX; widget++) {
-			size = DEV_PER_WIDGET *
-				sizeof(struct sn_flush_device_kernel);
-			sn_flush_device_kernel = kzalloc(size, GFP_KERNEL);
-			if (!sn_flush_device_kernel)
-				BUG();
-
-			dev_entry = sn_flush_device_kernel;
-			for (device = 0; device < DEV_PER_WIDGET;
-			     device++,dev_entry++) {
-				size = sizeof(struct sn_flush_device_common);
-				dev_entry->common = kzalloc(size, GFP_KERNEL);
-				if (!dev_entry->common)
-					BUG();
-
-				if (sn_prom_feature_available(
-						       PRF_DEVICE_FLUSH_LIST))
-					status = sal_get_device_dmaflush_list(
-						     nasid, widget, device,
-						     (u64)(dev_entry->common));
-				else
-					status = sn_device_fixup_war(nasid,
-						     widget, device,
-						     dev_entry->common);
-				if (status != SALRET_OK)
-					panic("SAL call failed: %s\n",
-					      ia64_sal_strerror(status));
-
-				spin_lock_init(&dev_entry->sfdl_flush_lock);
-			}
-
-			if (sn_flush_device_kernel)
-				hubdev->hdi_flush_nasid_list.widget_p[widget] =
-						       sn_flush_device_kernel;
-	        }
+		sn_common_hubdev_init(hubdev);
 	}
 }
 
 /*
- * sn_pci_window_fixup() - Create a pci_window for each device resource.
- *			   Until ACPI support is added, we need this code
- *			   to setup pci_windows for use by
- *			   pcibios_bus_to_resource(),
- *			   pcibios_resource_to_bus(), etc.
+ * sn_pci_legacy_window_fixup - Setup PCI resources for
+ *				legacy IO and MEM space. This needs to
+ *				be done here, as the PROM does not have
+ *				ACPI support defining the root buses
+ *				and their resources (_CRS),
  */
 static void
-sn_pci_window_fixup(struct pci_dev *dev, unsigned int count,
-		    s64 * pci_addrs)
-{
-	struct pci_controller *controller = PCI_CONTROLLER(dev->bus);
-	unsigned int i;
-	unsigned int idx;
-	unsigned int new_count;
-	struct pci_window *new_window;
-
-	if (count == 0)
-		return;
-	idx = controller->windows;
-	new_count = controller->windows + count;
-	new_window = kcalloc(new_count, sizeof(struct pci_window), GFP_KERNEL);
-	if (new_window == NULL)
-		BUG();
-	if (controller->window) {
-		memcpy(new_window, controller->window,
-		       sizeof(struct pci_window) * controller->windows);
-		kfree(controller->window);
-	}
-
-	/* Setup a pci_window for each device resource. */
-	for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
-		if (pci_addrs[i] == -1)
-			continue;
-
-		new_window[idx].offset = dev->resource[i].start - pci_addrs[i];
-		new_window[idx].resource = dev->resource[i];
-		idx++;
-	}
-
-	controller->windows = new_count;
-	controller->window = new_window;
-}
-
-void sn_pci_unfixup_slot(struct pci_dev *dev)
+sn_legacy_pci_window_fixup(struct resource *res,
+		u64 legacy_io, u64 legacy_mem)
 {
-	struct pci_dev *host_pci_dev = SN_PCIDEV_INFO(dev)->host_pci_dev;
-
-	sn_irq_unfixup(dev);
-	pci_dev_put(host_pci_dev);
-	pci_dev_put(dev);
+		res[0].name = "legacy_io";
+		res[0].flags = IORESOURCE_IO;
+		res[0].start = legacy_io;
+		res[0].end = res[0].start + 0xffff;
+		res[0].parent = &ioport_resource;
+		res[1].name = "legacy_mem";
+		res[1].flags = IORESOURCE_MEM;
+		res[1].start = legacy_mem;
+		res[1].end = res[1].start + (1024 * 1024) - 1;
+		res[1].parent = &iomem_resource;
 }
 
 /*
- * sn_pci_fixup_slot() - This routine sets up a slot's resources
- * consistent with the Linux PCI abstraction layer.  Resources acquired
- * from our PCI provider include PIO maps to BAR space and interrupt
- * objects.
+ * sn_io_slot_fixup() -   We are not running with an ACPI capable PROM,
+ *			  and need to convert the pci_dev->resource
+ *			  'start' and 'end' addresses to mapped addresses,
+ *			  and setup the pci_controller->window array entries.
  */
-void sn_pci_fixup_slot(struct pci_dev *dev)
+void
+sn_io_slot_fixup(struct pci_dev *dev)
 {
-	unsigned int count = 0;
 	int idx;
-	int segment = pci_domain_nr(dev->bus);
-	int status = 0;
-	struct pcibus_bussoft *bs;
- 	struct pci_bus *host_pci_bus;
- 	struct pci_dev *host_pci_dev;
+	unsigned long addr, end, size, start;
 	struct pcidev_info *pcidev_info;
-	s64 pci_addrs[PCI_ROM_RESOURCE + 1];
- 	struct sn_irq_info *sn_irq_info;
- 	unsigned long size;
- 	unsigned int bus_no, devfn;
+	struct sn_irq_info *sn_irq_info;
+	int status;
 
-	pci_dev_get(dev); /* for the sysdata pointer */
 	pcidev_info = kzalloc(sizeof(struct pcidev_info), GFP_KERNEL);
 	if (!pcidev_info)
-		BUG();		/* Cannot afford to run out of memory */
+		panic("%s: Unable to alloc memory for pcidev_info", __func__);
 
 	sn_irq_info = kzalloc(sizeof(struct sn_irq_info), GFP_KERNEL);
 	if (!sn_irq_info)
-		BUG();		/* Cannot afford to run out of memory */
+		panic("%s: Unable to alloc memory for sn_irq_info", __func__);
 
 	/* Call to retrieve pci device information needed by kernel. */
-	status = sal_get_pcidev_info((u64) segment, (u64) dev->bus->number, 
-				     dev->devfn,
-				     (u64) __pa(pcidev_info),
-				     (u64) __pa(sn_irq_info));
-	if (status)
-		BUG(); /* Cannot get platform pci device information */
+	status = sal_get_pcidev_info((u64) pci_domain_nr(dev),
+		(u64) dev->bus->number,
+		dev->devfn,
+		(u64) __pa(pcidev_info),
+		(u64) __pa(sn_irq_info));
+
+	BUG_ON(status); /* Cannot get platform pci device information */
 
-	/* Add pcidev_info to list in sn_pci_controller struct */
-	list_add_tail(&pcidev_info->pdi_list,
-		      &(SN_PCI_CONTROLLER(dev->bus)->pcidev_info));
 
 	/* Copy over PIO Mapped Addresses */
 	for (idx = 0; idx <= PCI_ROM_RESOURCE; idx++) {
-		unsigned long start, end, addr;
 
 		if (!pcidev_info->pdi_pio_mapped_addr[idx]) {
-			pci_addrs[idx] = -1;
 			continue;
 		}
 
@@ -406,80 +184,60 @@ void sn_pci_fixup_slot(struct pci_dev *dev)
 		end = dev->resource[idx].end;
 		size = end - start;
 		if (size == 0) {
-			pci_addrs[idx] = -1;
 			continue;
 		}
-		pci_addrs[idx] = start;
-		count++;
 		addr = pcidev_info->pdi_pio_mapped_addr[idx];
 		addr = ((addr << 4) >> 4) | __IA64_UNCACHED_OFFSET;
 		dev->resource[idx].start = addr;
 		dev->resource[idx].end = addr + size;
-		if (dev->resource[idx].flags & IORESOURCE_IO)
-			dev->resource[idx].parent = &ioport_resource;
-		else
-			dev->resource[idx].parent = &iomem_resource;
-	}
-	/* Create a pci_window in the pci_controller struct for
-	 * each device resource.
-	 */
-	if (count > 0)
-		sn_pci_window_fixup(dev, count, pci_addrs);
 
-	/*
-	 * Using the PROMs values for the PCI host bus, get the Linux
- 	 * PCI host_pci_dev struct and set up host bus linkages
- 	 */
-
-	bus_no = (pcidev_info->pdi_slot_host_handle >> 32) & 0xff;
-	devfn = pcidev_info->pdi_slot_host_handle & 0xffffffff;
- 	host_pci_bus = pci_find_bus(segment, bus_no);
- 	host_pci_dev = pci_get_slot(host_pci_bus, devfn);
-
-	pcidev_info->host_pci_dev = host_pci_dev;
-	pcidev_info->pdi_linux_pcidev = dev;
-	pcidev_info->pdi_host_pcidev_info = SN_PCIDEV_INFO(host_pci_dev);
-	bs = SN_PCIBUS_BUSSOFT(dev->bus);
-	pcidev_info->pdi_pcibus_info = bs;
-
-	if (bs && bs->bs_asic_type < PCIIO_ASIC_MAX_TYPES) {
-		SN_PCIDEV_BUSPROVIDER(dev) = sn_pci_provider[bs->bs_asic_type];
-	} else {
-		SN_PCIDEV_BUSPROVIDER(dev) = &sn_pci_default_provider;
-	}
+		/*
+		 * if it's already in the device structure, remove it before
+		 * inserting
+		 */
+		if (dev->resource[idx].parent && dev->resource[idx].parent->child)
+			release_resource(&dev->resource[idx]);
 
-	/* Only set up IRQ stuff if this device has a host bus context */
-	if (bs && sn_irq_info->irq_irq) {
-		pcidev_info->pdi_sn_irq_info = sn_irq_info;
-		dev->irq = pcidev_info->pdi_sn_irq_info->irq_irq;
-		sn_irq_fixup(dev, sn_irq_info);
-	} else {
-		pcidev_info->pdi_sn_irq_info = NULL;
-		kfree(sn_irq_info);
+		if (dev->resource[idx].flags & IORESOURCE_IO)
+			insert_resource(&ioport_resource, &dev->resource[idx]);
+		else
+			insert_resource(&iomem_resource, &dev->resource[idx]);
+		/*
+		 * If ROM, set the actual ROM image size, and mark as
+		 * shadowed in PROM.
+		 */
+		if (idx == PCI_ROM_RESOURCE) {
+			size_t image_size;
+			void __iomem *rom;
+
+			rom = ioremap(pci_resource_start(dev, PCI_ROM_RESOURCE),
+				      size + 1);
+			image_size = pci_get_rom_size(dev, rom, size + 1);
+			dev->resource[PCI_ROM_RESOURCE].end =
+				dev->resource[PCI_ROM_RESOURCE].start +
+				image_size - 1;
+			dev->resource[PCI_ROM_RESOURCE].flags |=
+						 IORESOURCE_ROM_BIOS_COPY;
+		}
 	}
 
-	/*
-	 * MSI currently not supported on altix.  Remove this when
-	 * the MSI abstraction patches are integrated into the kernel
-	 * (sometime after 2.6.16 releases)
-	 */
-	dev->no_msi = 1;
+	sn_pci_fixup_slot(dev, pcidev_info, sn_irq_info);
 }
 
+EXPORT_SYMBOL(sn_io_slot_fixup);
+
 /*
  * sn_pci_controller_fixup() - This routine sets up a bus's resources
- * consistent with the Linux PCI abstraction layer.
+ *			       consistent with the Linux PCI abstraction layer.
  */
-void sn_pci_controller_fixup(int segment, int busnum, struct pci_bus *bus)
+static void __init
+sn_pci_controller_fixup(int segment, int busnum, struct pci_bus *bus)
 {
-	int status;
-	int nasid, cnode;
+	s64 status = 0;
 	struct pci_controller *controller;
-	struct sn_pci_controller *sn_controller;
 	struct pcibus_bussoft *prom_bussoft_ptr;
-	struct hubdev_info *hubdev_info;
-	void *provider_soft;
-	struct sn_pcibus_provider *provider;
+	struct resource *res;
+	LIST_HEAD(resources);
 
  	status = sal_get_pcibus_info((u64) segment, (u64) busnum,
  				     (u64) ia64_tpa(&prom_bussoft_ptr));
@@ -487,261 +245,75 @@ void sn_pci_controller_fixup(int segment, int busnum, struct pci_bus *bus)
 		return;		/*bus # does not exist */
 	prom_bussoft_ptr = __va(prom_bussoft_ptr);
 
-	/* Allocate a sn_pci_controller, which has a pci_controller struct
-	 * as the first member.
-	 */
-	sn_controller = kzalloc(sizeof(struct sn_pci_controller), GFP_KERNEL);
-	if (!sn_controller)
-		BUG();
-	INIT_LIST_HEAD(&sn_controller->pcidev_info);
-	controller = &sn_controller->pci_controller;
+	controller = kzalloc(sizeof(*controller), GFP_KERNEL);
+	BUG_ON(!controller);
 	controller->segment = segment;
 
-	if (bus == NULL) {
- 		bus = pci_scan_bus(busnum, &pci_root_ops, controller);
- 		if (bus == NULL)
- 			goto error_return; /* error, or bus already scanned */
- 		bus->sysdata = NULL;
-	}
-
-	if (bus->sysdata)
-		goto error_return; /* sysdata already alloc'd */
+	res = kcalloc(2, sizeof(struct resource), GFP_KERNEL);
+	BUG_ON(!res);
 
 	/*
-	 * Per-provider fixup.  Copies the contents from prom to local
-	 * area and links SN_PCIBUS_BUSSOFT().
+	 * Temporarily save the prom_bussoft_ptr for use by sn_bus_fixup().
+	 * (platform_data will be overwritten later in sn_common_bus_fixup())
 	 */
-
-	if (prom_bussoft_ptr->bs_asic_type >= PCIIO_ASIC_MAX_TYPES)
-		goto error_return; /* unsupported asic type */
-
-	if (prom_bussoft_ptr->bs_asic_type == PCIIO_ASIC_TYPE_PPB)
-		goto error_return; /* no further fixup necessary */
-
-	provider = sn_pci_provider[prom_bussoft_ptr->bs_asic_type];
-	if (provider == NULL)
-		goto error_return; /* no provider registerd for this asic */
-
-	bus->sysdata = controller;
-	if (provider->bus_fixup)
-		provider_soft = (*provider->bus_fixup) (prom_bussoft_ptr, controller);
-	else
-		provider_soft = NULL;
-
-	if (provider_soft == NULL) {
-		/* fixup failed or not applicable */
-		bus->sysdata = NULL;
-		goto error_return;
+	controller->platform_data = prom_bussoft_ptr;
+
+	sn_legacy_pci_window_fixup(res,
+			prom_bussoft_ptr->bs_legacy_io,
+			prom_bussoft_ptr->bs_legacy_mem);
+	pci_add_resource_offset(&resources,	&res[0],
+			prom_bussoft_ptr->bs_legacy_io);
+	pci_add_resource_offset(&resources,	&res[1],
+			prom_bussoft_ptr->bs_legacy_mem);
+
+	bus = pci_scan_root_bus(NULL, busnum, &pci_root_ops, controller,
+				&resources);
+ 	if (bus == NULL) {
+		kfree(res);
+		kfree(controller);
 	}
-
-	/*
-	 * Setup pci_windows for legacy IO and MEM space.
-	 * (Temporary until ACPI support is in place.)
-	 */
-	controller->window = kcalloc(2, sizeof(struct pci_window), GFP_KERNEL);
-	if (controller->window == NULL)
-		BUG();
-	controller->window[0].offset = prom_bussoft_ptr->bs_legacy_io;
-	controller->window[0].resource.name = "legacy_io";
-	controller->window[0].resource.flags = IORESOURCE_IO;
-	controller->window[0].resource.start = prom_bussoft_ptr->bs_legacy_io;
-	controller->window[0].resource.end =
-	    controller->window[0].resource.start + 0xffff;
-	controller->window[0].resource.parent = &ioport_resource;
-	controller->window[1].offset = prom_bussoft_ptr->bs_legacy_mem;
-	controller->window[1].resource.name = "legacy_mem";
-	controller->window[1].resource.flags = IORESOURCE_MEM;
-	controller->window[1].resource.start = prom_bussoft_ptr->bs_legacy_mem;
-	controller->window[1].resource.end =
-	    controller->window[1].resource.start + (1024 * 1024) - 1;
-	controller->window[1].resource.parent = &iomem_resource;
-	controller->windows = 2;
-
-	/*
-	 * Generic bus fixup goes here.  Don't reference prom_bussoft_ptr
-	 * after this point.
-	 */
-
-	PCI_CONTROLLER(bus)->platform_data = provider_soft;
-	nasid = NASID_GET(SN_PCIBUS_BUSSOFT(bus)->bs_base);
-	cnode = nasid_to_cnodeid(nasid);
-	hubdev_info = (struct hubdev_info *)(NODEPDA(cnode)->pdinfo);
-	SN_PCIBUS_BUSSOFT(bus)->bs_xwidget_info =
-	    &(hubdev_info->hdi_xwidget_info[SN_PCIBUS_BUSSOFT(bus)->bs_xid]);
-
-	/*
-	 * If the node information we obtained during the fixup phase is invalid
-	 * then set controller->node to -1 (undetermined)
-	 */
-	if (controller->node >= num_online_nodes()) {
-		struct pcibus_bussoft *b = SN_PCIBUS_BUSSOFT(bus);
-
-		printk(KERN_WARNING "Device ASIC=%u XID=%u PBUSNUM=%u"
-				    "L_IO=%lx L_MEM=%lx BASE=%lx\n",
-			b->bs_asic_type, b->bs_xid, b->bs_persist_busnum,
-			b->bs_legacy_io, b->bs_legacy_mem, b->bs_base);
-		printk(KERN_WARNING "on node %d but only %d nodes online."
-			"Association set to undetermined.\n",
-			controller->node, num_online_nodes());
-		controller->node = -1;
-	}
-	return;
-
-error_return:
-
-	kfree(sn_controller);
-	return;
 }
 
-void sn_bus_store_sysdata(struct pci_dev *dev)
+/*
+ * sn_bus_fixup
+ */
+void
+sn_bus_fixup(struct pci_bus *bus)
 {
-	struct sysdata_el *element;
+	struct pci_dev *pci_dev = NULL;
+	struct pcibus_bussoft *prom_bussoft_ptr;
 
-	element = kzalloc(sizeof(struct sysdata_el), GFP_KERNEL);
-	if (!element) {
-		dev_dbg(dev, "%s: out of memory!\n", __FUNCTION__);
-		return;
-	}
-	element->sysdata = SN_PCIDEV_INFO(dev);
-	list_add(&element->entry, &sn_sysdata_list);
-}
+	if (!bus->parent) {  /* If root bus */
+		prom_bussoft_ptr = PCI_CONTROLLER(bus)->platform_data;
+		if (prom_bussoft_ptr == NULL) {
+			printk(KERN_ERR
+			       "sn_bus_fixup: 0x%04x:0x%02x Unable to "
+			       "obtain prom_bussoft_ptr\n",
+			       pci_domain_nr(bus), bus->number);
+			return;
+		}
+		sn_common_bus_fixup(bus, prom_bussoft_ptr);
+        }
+        list_for_each_entry(pci_dev, &bus->devices, bus_list) {
+                sn_io_slot_fixup(pci_dev);
+        }
 
-void sn_bus_free_sysdata(void)
-{
-	struct sysdata_el *element;
-	struct list_head *list, *safe;
-
-	list_for_each_safe(list, safe, &sn_sysdata_list) {
-		element = list_entry(list, struct sysdata_el, entry);
-		list_del(&element->entry);
-		list_del(&(((struct pcidev_info *)
-			     (element->sysdata))->pdi_list));
-		kfree(element->sysdata);
-		kfree(element);
-	}
-	return;
 }
 
 /*
- * Ugly hack to get PCI setup until we have a proper ACPI namespace.
+ * sn_io_init - PROM does not have ACPI support to define nodes or root buses,
+ *		so we need to do things the hard way, including initiating the
+ *		bus scanning ourselves.
  */
 
-#define PCI_BUSES_TO_SCAN 256
-
-static int __init sn_pci_init(void)
+void __init sn_io_init(void)
 {
 	int i, j;
-	struct pci_dev *pci_dev = NULL;
-
-	if (!ia64_platform_is("sn2") || IS_RUNNING_ON_FAKE_PROM())
-		return 0;
-
-	/*
-	 * prime sn_pci_provider[].  Individial provider init routines will
-	 * override their respective default entries.
-	 */
-
-	for (i = 0; i < PCIIO_ASIC_MAX_TYPES; i++)
-		sn_pci_provider[i] = &sn_pci_default_provider;
-
-	pcibr_init_provider();
-	tioca_init_provider();
-	tioce_init_provider();
 
-	/*
-	 * This is needed to avoid bounce limit checks in the blk layer
-	 */
-	ia64_max_iommu_merge_mask = ~PAGE_MASK;
 	sn_fixup_ionodes();
-	sn_irq_lh_init();
-	INIT_LIST_HEAD(&sn_sysdata_list);
-	sn_init_cpei_timer();
-
-#ifdef CONFIG_PROC_FS
-	register_sn_procfs();
-#endif
 
 	/* busses are not known yet ... */
 	for (i = 0; i <= max_segment_number; i++)
 		for (j = 0; j <= max_pcibus_number; j++)
 			sn_pci_controller_fixup(i, j, NULL);
-
-	/*
-	 * Generic Linux PCI Layer has created the pci_bus and pci_dev 
-	 * structures - time for us to add our SN PLatform specific 
-	 * information.
-	 */
-
-	while ((pci_dev =
-		pci_get_device(PCI_ANY_ID, PCI_ANY_ID, pci_dev)) != NULL)
-		sn_pci_fixup_slot(pci_dev);
-
-	sn_ioif_inited = 1;	/* sn I/O infrastructure now initialized */
-
-	return 0;
 }
-
-/*
- * hubdev_init_node() - Creates the HUB data structure and link them to it's 
- *	own NODE specific data area.
- */
-void hubdev_init_node(nodepda_t * npda, cnodeid_t node)
-{
-	struct hubdev_info *hubdev_info;
-	int size;
-	pg_data_t *pg;
-
-	size = sizeof(struct hubdev_info);
-
-	if (node >= num_online_nodes())	/* Headless/memless IO nodes */
-		pg = NODE_DATA(0);
-	else
-		pg = NODE_DATA(node);
-
-	hubdev_info = (struct hubdev_info *)alloc_bootmem_node(pg, size);
-
-	npda->pdinfo = (void *)hubdev_info;
-}
-
-geoid_t
-cnodeid_get_geoid(cnodeid_t cnode)
-{
-	struct hubdev_info *hubdev;
-
-	hubdev = (struct hubdev_info *)(NODEPDA(cnode)->pdinfo);
-	return hubdev->hdi_geoid;
-}
-
-void sn_generate_path(struct pci_bus *pci_bus, char *address)
-{
-	nasid_t nasid;
-	cnodeid_t cnode;
-	geoid_t geoid;
-	moduleid_t moduleid;
-	u16 bricktype;
-
-	nasid = NASID_GET(SN_PCIBUS_BUSSOFT(pci_bus)->bs_base);
-	cnode = nasid_to_cnodeid(nasid);
-	geoid = cnodeid_get_geoid(cnode);
-	moduleid = geo_module(geoid);
-
-	sprintf(address, "module_%c%c%c%c%.2d",
-		'0'+RACK_GET_CLASS(MODULE_GET_RACK(moduleid)),
-		'0'+RACK_GET_GROUP(MODULE_GET_RACK(moduleid)),
-		'0'+RACK_GET_NUM(MODULE_GET_RACK(moduleid)),
-		MODULE_GET_BTCHAR(moduleid), MODULE_GET_BPOS(moduleid));
-
-	/* Tollhouse requires slot id to be displayed */
-	bricktype = MODULE_GET_BTYPE(moduleid);
-	if ((bricktype == L1_BRICKTYPE_191010) ||
-	    (bricktype == L1_BRICKTYPE_1932))
-			sprintf(address, "%s^%d", address, geo_slot(geoid));
-}
-
-subsys_initcall(sn_pci_init);
-EXPORT_SYMBOL(sn_pci_fixup_slot);
-EXPORT_SYMBOL(sn_pci_unfixup_slot);
-EXPORT_SYMBOL(sn_pci_controller_fixup);
-EXPORT_SYMBOL(sn_bus_store_sysdata);
-EXPORT_SYMBOL(sn_bus_free_sysdata);
-EXPORT_SYMBOL(sn_generate_path);
diff --git a/arch/ia64/sn/kernel/iomv.c b/arch/ia64/sn/kernel/iomv.c
index 7ce3cdad627..c77ebdf9811 100644
--- a/arch/ia64/sn/kernel/iomv.c
+++ b/arch/ia64/sn/kernel/iomv.c
@@ -1,12 +1,13 @@
-/* 
+/*
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (C) 2000-2003 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 2000-2003, 2006 Silicon Graphics, Inc. All rights reserved.
  */
 
 #include <linux/module.h>
+#include <linux/acpi.h>
 #include <asm/io.h>
 #include <asm/delay.h>
 #include <asm/vga.h>
@@ -15,6 +16,7 @@
 #include <asm/sn/pda.h>
 #include <asm/sn/sn_cpuid.h>
 #include <asm/sn/shub_mmr.h>
+#include <asm/sn/acpi.h>
 
 #define IS_LEGACY_VGA_IOPORT(p) \
 	(((p) >= 0x3b0 && (p) <= 0x3bb) || ((p) >= 0x3c0 && (p) <= 0x3df))
@@ -24,18 +26,22 @@
  * @port: port to convert
  *
  * Legacy in/out instructions are converted to ld/st instructions
- * on IA64.  This routine will convert a port number into a valid 
+ * on IA64.  This routine will convert a port number into a valid
  * SN i/o address.  Used by sn_in*() and sn_out*().
  */
+
 void *sn_io_addr(unsigned long port)
 {
 	if (!IS_RUNNING_ON_SIMULATOR()) {
 		if (IS_LEGACY_VGA_IOPORT(port))
-			port += vga_console_iobase;
+			return (__ia64_mk_io_addr(port));
 		/* On sn2, legacy I/O ports don't point at anything */
 		if (port < (64 * 1024))
 			return NULL;
-		return ((void *)(port | __IA64_UNCACHED_OFFSET));
+		if (SN_ACPI_BASE_SUPPORT())
+			return (__ia64_mk_io_addr(port));
+		else
+			return ((void *)(port | __IA64_UNCACHED_OFFSET));
 	} else {
 		/* but the simulator uses them... */
 		unsigned long addr;
@@ -57,7 +63,7 @@ EXPORT_SYMBOL(sn_io_addr);
 /**
  * __sn_mmiowb - I/O space memory barrier
  *
- * See include/asm-ia64/io.h and Documentation/DocBook/deviceiobook.tmpl
+ * See arch/ia64/include/asm/io.h and Documentation/DocBook/deviceiobook.tmpl
  * for details.
  *
  * On SN2, we wait for the PIO_WRITE_STATUS SHub register to clear.
diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c
index c265e02f503..85d09515490 100644
--- a/arch/ia64/sn/kernel/irq.c
+++ b/arch/ia64/sn/kernel/irq.c
@@ -5,12 +5,14 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (c) 2000-2006 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2008 Silicon Graphics, Inc.  All Rights Reserved.
  */
 
 #include <linux/irq.h>
 #include <linux/spinlock.h>
 #include <linux/init.h>
+#include <linux/rculist.h>
+#include <linux/slab.h>
 #include <asm/sn/addrs.h>
 #include <asm/sn/arch.h>
 #include <asm/sn/intr.h>
@@ -19,18 +21,17 @@
 #include <asm/sn/pcidev.h>
 #include <asm/sn/shub_mmr.h>
 #include <asm/sn/sn_sal.h>
+#include <asm/sn/sn_feature_sets.h>
 
-static void force_interrupt(int irq);
 static void register_intr_pda(struct sn_irq_info *sn_irq_info);
 static void unregister_intr_pda(struct sn_irq_info *sn_irq_info);
 
-int sn_force_interrupt_flag = 1;
 extern int sn_ioif_inited;
-static struct list_head **sn_irq_lh;
-static spinlock_t sn_irq_info_lock = SPIN_LOCK_UNLOCKED; /* non-IRQ lock */
+struct list_head **sn_irq_lh;
+static DEFINE_SPINLOCK(sn_irq_info_lock); /* non-IRQ lock */
 
-static inline u64 sn_intr_alloc(nasid_t local_nasid, int local_widget,
-				     u64 sn_irq_info,
+u64 sn_intr_alloc(nasid_t local_nasid, int local_widget,
+				     struct sn_irq_info *sn_irq_info,
 				     int req_irq, nasid_t req_nasid,
 				     int req_slice)
 {
@@ -40,12 +41,13 @@ static inline u64 sn_intr_alloc(nasid_t local_nasid, int local_widget,
 
 	SAL_CALL_NOLOCK(ret_stuff, (u64) SN_SAL_IOIF_INTERRUPT,
 			(u64) SAL_INTR_ALLOC, (u64) local_nasid,
-			(u64) local_widget, (u64) sn_irq_info, (u64) req_irq,
+			(u64) local_widget, __pa(sn_irq_info), (u64) req_irq,
 			(u64) req_nasid, (u64) req_slice);
+
 	return ret_stuff.status;
 }
 
-static inline void sn_intr_free(nasid_t local_nasid, int local_widget,
+void sn_intr_free(nasid_t local_nasid, int local_widget,
 				struct sn_irq_info *sn_irq_info)
 {
 	struct ia64_sal_retval ret_stuff;
@@ -58,140 +60,208 @@ static inline void sn_intr_free(nasid_t local_nasid, int local_widget,
 			(u64) sn_irq_info->irq_cookie, 0, 0);
 }
 
-static unsigned int sn_startup_irq(unsigned int irq)
+u64 sn_intr_redirect(nasid_t local_nasid, int local_widget,
+		      struct sn_irq_info *sn_irq_info,
+		      nasid_t req_nasid, int req_slice)
+{
+	struct ia64_sal_retval ret_stuff;
+	ret_stuff.status = 0;
+	ret_stuff.v0 = 0;
+
+	SAL_CALL_NOLOCK(ret_stuff, (u64) SN_SAL_IOIF_INTERRUPT,
+			(u64) SAL_INTR_REDIRECT, (u64) local_nasid,
+			(u64) local_widget, __pa(sn_irq_info),
+			(u64) req_nasid, (u64) req_slice, 0);
+
+	return ret_stuff.status;
+}
+
+static unsigned int sn_startup_irq(struct irq_data *data)
 {
 	return 0;
 }
 
-static void sn_shutdown_irq(unsigned int irq)
+static void sn_shutdown_irq(struct irq_data *data)
 {
 }
 
-static void sn_disable_irq(unsigned int irq)
+extern void ia64_mca_register_cpev(int);
+
+static void sn_disable_irq(struct irq_data *data)
 {
+	if (data->irq == local_vector_to_irq(IA64_CPE_VECTOR))
+		ia64_mca_register_cpev(0);
 }
 
-static void sn_enable_irq(unsigned int irq)
+static void sn_enable_irq(struct irq_data *data)
 {
+	if (data->irq == local_vector_to_irq(IA64_CPE_VECTOR))
+		ia64_mca_register_cpev(data->irq);
 }
 
-static void sn_ack_irq(unsigned int irq)
+static void sn_ack_irq(struct irq_data *data)
 {
 	u64 event_occurred, mask;
+	unsigned int irq = data->irq & 0xff;
 
-	irq = irq & 0xff;
 	event_occurred = HUB_L((u64*)LOCAL_MMR_ADDR(SH_EVENT_OCCURRED));
 	mask = event_occurred & SH_ALL_INT_MASK;
 	HUB_S((u64*)LOCAL_MMR_ADDR(SH_EVENT_OCCURRED_ALIAS), mask);
 	__set_bit(irq, (volatile void *)pda->sn_in_service_ivecs);
 
-	move_native_irq(irq);
+	irq_move_irq(data);
 }
 
-static void sn_end_irq(unsigned int irq)
+struct sn_irq_info *sn_retarget_vector(struct sn_irq_info *sn_irq_info,
+				       nasid_t nasid, int slice)
 {
-	int ivec;
-	u64 event_occurred;
-
-	ivec = irq & 0xff;
-	if (ivec == SGI_UART_VECTOR) {
-		event_occurred = HUB_L((u64*)LOCAL_MMR_ADDR (SH_EVENT_OCCURRED));
-		/* If the UART bit is set here, we may have received an
-		 * interrupt from the UART that the driver missed.  To
-		 * make sure, we IPI ourselves to force us to look again.
-		 */
-		if (event_occurred & SH_EVENT_OCCURRED_UART_INT_MASK) {
-			platform_send_ipi(smp_processor_id(), SGI_UART_VECTOR,
-					  IA64_IPI_DM_INT, 0);
-		}
+	int vector;
+	int cpuid;
+#ifdef CONFIG_SMP
+	int cpuphys;
+#endif
+	int64_t bridge;
+	int local_widget, status;
+	nasid_t local_nasid;
+	struct sn_irq_info *new_irq_info;
+	struct sn_pcibus_provider *pci_provider;
+
+	bridge = (u64) sn_irq_info->irq_bridge;
+	if (!bridge) {
+		return NULL; /* irq is not a device interrupt */
 	}
-	__clear_bit(ivec, (volatile void *)pda->sn_in_service_ivecs);
-	if (sn_force_interrupt_flag)
-		force_interrupt(irq);
-}
 
-static void sn_irq_info_free(struct rcu_head *head);
+	local_nasid = NASID_GET(bridge);
 
-static void sn_set_affinity_irq(unsigned int irq, cpumask_t mask)
-{
-	struct sn_irq_info *sn_irq_info, *sn_irq_info_safe;
-	int cpuid, cpuphys;
+	if (local_nasid & 1)
+		local_widget = TIO_SWIN_WIDGETNUM(bridge);
+	else
+		local_widget = SWIN_WIDGETNUM(bridge);
+	vector = sn_irq_info->irq_irq;
 
-	cpuid = first_cpu(mask);
-	cpuphys = cpu_physical_id(cpuid);
+	/* Make use of SAL_INTR_REDIRECT if PROM supports it */
+	status = sn_intr_redirect(local_nasid, local_widget, sn_irq_info, nasid, slice);
+	if (!status) {
+		new_irq_info = sn_irq_info;
+		goto finish_up;
+	}
 
-	list_for_each_entry_safe(sn_irq_info, sn_irq_info_safe,
-				 sn_irq_lh[irq], list) {
-		u64 bridge;
-		int local_widget, status;
-		nasid_t local_nasid;
-		struct sn_irq_info *new_irq_info;
-		struct sn_pcibus_provider *pci_provider;
-
-		new_irq_info = kmalloc(sizeof(struct sn_irq_info), GFP_ATOMIC);
-		if (new_irq_info == NULL)
-			break;
-		memcpy(new_irq_info, sn_irq_info, sizeof(struct sn_irq_info));
-
-		bridge = (u64) new_irq_info->irq_bridge;
-		if (!bridge) {
-			kfree(new_irq_info);
-			break; /* irq is not a device interrupt */
-		}
+	/*
+	 * PROM does not support SAL_INTR_REDIRECT, or it failed.
+	 * Revert to old method.
+	 */
+	new_irq_info = kmemdup(sn_irq_info, sizeof(struct sn_irq_info),
+			       GFP_ATOMIC);
+	if (new_irq_info == NULL)
+		return NULL;
+
+	/* Free the old PROM new_irq_info structure */
+	sn_intr_free(local_nasid, local_widget, new_irq_info);
+	unregister_intr_pda(new_irq_info);
+
+	/* allocate a new PROM new_irq_info struct */
+	status = sn_intr_alloc(local_nasid, local_widget,
+			       new_irq_info, vector,
+			       nasid, slice);
+
+	/* SAL call failed */
+	if (status) {
+		kfree(new_irq_info);
+		return NULL;
+	}
 
-		local_nasid = NASID_GET(bridge);
+	register_intr_pda(new_irq_info);
+	spin_lock(&sn_irq_info_lock);
+	list_replace_rcu(&sn_irq_info->list, &new_irq_info->list);
+	spin_unlock(&sn_irq_info_lock);
+	kfree_rcu(sn_irq_info, rcu);
 
-		if (local_nasid & 1)
-			local_widget = TIO_SWIN_WIDGETNUM(bridge);
-		else
-			local_widget = SWIN_WIDGETNUM(bridge);
 
-		/* Free the old PROM new_irq_info structure */
-		sn_intr_free(local_nasid, local_widget, new_irq_info);
-		/* Update kernels new_irq_info with new target info */
-		unregister_intr_pda(new_irq_info);
+finish_up:
+	/* Update kernels new_irq_info with new target info */
+	cpuid = nasid_slice_to_cpuid(new_irq_info->irq_nasid,
+				     new_irq_info->irq_slice);
+	new_irq_info->irq_cpuid = cpuid;
 
-		/* allocate a new PROM new_irq_info struct */
-		status = sn_intr_alloc(local_nasid, local_widget,
-				       __pa(new_irq_info), irq,
-				       cpuid_to_nasid(cpuid),
-				       cpuid_to_slice(cpuid));
+	pci_provider = sn_pci_provider[new_irq_info->irq_bridge_type];
 
-		/* SAL call failed */
-		if (status) {
-			kfree(new_irq_info);
-			break;
-		}
+	/*
+	 * If this represents a line interrupt, target it.  If it's
+	 * an msi (irq_int_bit < 0), it's already targeted.
+	 */
+	if (new_irq_info->irq_int_bit >= 0 &&
+	    pci_provider && pci_provider->target_interrupt)
+		(pci_provider->target_interrupt)(new_irq_info);
+
+#ifdef CONFIG_SMP
+	cpuphys = cpu_physical_id(cpuid);
+	set_irq_affinity_info((vector & 0xff), cpuphys, 0);
+#endif
+
+	return new_irq_info;
+}
+
+static int sn_set_affinity_irq(struct irq_data *data,
+			       const struct cpumask *mask, bool force)
+{
+	struct sn_irq_info *sn_irq_info, *sn_irq_info_safe;
+	unsigned int irq = data->irq;
+	nasid_t nasid;
+	int slice;
 
-		new_irq_info->irq_cpuid = cpuid;
-		register_intr_pda(new_irq_info);
+	nasid = cpuid_to_nasid(cpumask_first_and(mask, cpu_online_mask));
+	slice = cpuid_to_slice(cpumask_first_and(mask, cpu_online_mask));
 
-		pci_provider = sn_pci_provider[new_irq_info->irq_bridge_type];
-		if (pci_provider && pci_provider->target_interrupt)
-			(pci_provider->target_interrupt)(new_irq_info);
+	list_for_each_entry_safe(sn_irq_info, sn_irq_info_safe,
+				 sn_irq_lh[irq], list)
+		(void)sn_retarget_vector(sn_irq_info, nasid, slice);
 
-		spin_lock(&sn_irq_info_lock);
-		list_replace_rcu(&sn_irq_info->list, &new_irq_info->list);
-		spin_unlock(&sn_irq_info_lock);
-		call_rcu(&sn_irq_info->rcu, sn_irq_info_free);
+	return 0;
+}
 
 #ifdef CONFIG_SMP
-		set_irq_affinity_info((irq & 0xff), cpuphys, 0);
+void sn_set_err_irq_affinity(unsigned int irq)
+{
+        /*
+         * On systems which support CPU disabling (SHub2), all error interrupts
+         * are targeted at the boot CPU.
+         */
+        if (is_shub2() && sn_prom_feature_available(PRF_CPU_DISABLE_SUPPORT))
+                set_irq_affinity_info(irq, cpu_physical_id(0), 0);
+}
+#else
+void sn_set_err_irq_affinity(unsigned int irq) { }
 #endif
-	}
+
+static void
+sn_mask_irq(struct irq_data *data)
+{
+}
+
+static void
+sn_unmask_irq(struct irq_data *data)
+{
 }
 
-struct hw_interrupt_type irq_type_sn = {
-	.typename	= "SN hub",
-	.startup	= sn_startup_irq,
-	.shutdown	= sn_shutdown_irq,
-	.enable		= sn_enable_irq,
-	.disable	= sn_disable_irq,
-	.ack		= sn_ack_irq,
-	.end		= sn_end_irq,
-	.set_affinity	= sn_set_affinity_irq
+struct irq_chip irq_type_sn = {
+	.name			= "SN hub",
+	.irq_startup		= sn_startup_irq,
+	.irq_shutdown		= sn_shutdown_irq,
+	.irq_enable		= sn_enable_irq,
+	.irq_disable		= sn_disable_irq,
+	.irq_ack		= sn_ack_irq,
+	.irq_mask		= sn_mask_irq,
+	.irq_unmask		= sn_unmask_irq,
+	.irq_set_affinity	= sn_set_affinity_irq
 };
 
+ia64_vector sn_irq_to_vector(int irq)
+{
+	if (irq >= IA64_NUM_VECTORS)
+		return 0;
+	return (ia64_vector)irq;
+}
+
 unsigned int sn_local_vector_to_irq(u8 vector)
 {
 	return (CPU_VECTOR_TO_IRQ(smp_processor_id(), vector));
@@ -200,12 +270,13 @@ unsigned int sn_local_vector_to_irq(u8 vector)
 void sn_irq_init(void)
 {
 	int i;
-	irq_desc_t *base_desc = irq_desc;
+
+	ia64_first_device_vector = IA64_SN2_FIRST_DEVICE_VECTOR;
+	ia64_last_device_vector = IA64_SN2_LAST_DEVICE_VECTOR;
 
 	for (i = 0; i < NR_IRQS; i++) {
-		if (base_desc[i].handler == &no_irq_type) {
-			base_desc[i].handler = &irq_type_sn;
-		}
+		if (irq_get_chip(i) == &no_irq_chip)
+			irq_set_chip(i, &irq_type_sn);
 	}
 }
 
@@ -264,19 +335,14 @@ static void unregister_intr_pda(struct sn_irq_info *sn_irq_info)
 	rcu_read_unlock();
 }
 
-static void sn_irq_info_free(struct rcu_head *head)
-{
-	struct sn_irq_info *sn_irq_info;
-
-	sn_irq_info = container_of(head, struct sn_irq_info, rcu);
-	kfree(sn_irq_info);
-}
-
 void sn_irq_fixup(struct pci_dev *pci_dev, struct sn_irq_info *sn_irq_info)
 {
 	nasid_t nasid = sn_irq_info->irq_nasid;
 	int slice = sn_irq_info->irq_slice;
 	int cpu = nasid_slice_to_cpuid(nasid, slice);
+#ifdef CONFIG_SMP
+	int cpuphys;
+#endif
 
 	pci_dev_get(pci_dev);
 	sn_irq_info->irq_cpuid = cpu;
@@ -285,9 +351,21 @@ void sn_irq_fixup(struct pci_dev *pci_dev, struct sn_irq_info *sn_irq_info)
 	/* link it into the sn_irq[irq] list */
 	spin_lock(&sn_irq_info_lock);
 	list_add_rcu(&sn_irq_info->list, sn_irq_lh[sn_irq_info->irq_irq]);
+	reserve_irq_vector(sn_irq_info->irq_irq);
+	if (sn_irq_info->irq_int_bit != -1)
+		irq_set_handler(sn_irq_info->irq_irq, handle_level_irq);
 	spin_unlock(&sn_irq_info_lock);
 
 	register_intr_pda(sn_irq_info);
+#ifdef CONFIG_SMP
+	cpuphys = cpu_physical_id(cpu);
+	set_irq_affinity_info(sn_irq_info->irq_irq, cpuphys, 0);
+	/*
+	 * Affinity was set by the PROM, prevent it from
+	 * being reset by the request_irq() path.
+	 */
+	irqd_mark_affinity_was_set(irq_get_irq_data(sn_irq_info->irq_irq));
+#endif
 }
 
 void sn_irq_unfixup(struct pci_dev *pci_dev)
@@ -310,8 +388,11 @@ void sn_irq_unfixup(struct pci_dev *pci_dev)
 	spin_lock(&sn_irq_info_lock);
 	list_del_rcu(&sn_irq_info->list);
 	spin_unlock(&sn_irq_info_lock);
-	call_rcu(&sn_irq_info->rcu, sn_irq_info_free);
+	if (list_empty(sn_irq_lh[sn_irq_info->irq_irq]))
+		free_irq_vector(sn_irq_info->irq_irq);
+	kfree_rcu(sn_irq_info, rcu);
 	pci_dev_put(pci_dev);
+
 }
 
 static inline void
@@ -320,22 +401,11 @@ sn_call_force_intr_provider(struct sn_irq_info *sn_irq_info)
 	struct sn_pcibus_provider *pci_provider;
 
 	pci_provider = sn_pci_provider[sn_irq_info->irq_bridge_type];
-	if (pci_provider && pci_provider->force_interrupt)
-		(*pci_provider->force_interrupt)(sn_irq_info);
-}
-
-static void force_interrupt(int irq)
-{
-	struct sn_irq_info *sn_irq_info;
-
-	if (!sn_ioif_inited)
-		return;
-
-	rcu_read_lock();
-	list_for_each_entry_rcu(sn_irq_info, sn_irq_lh[irq], list)
-		sn_call_force_intr_provider(sn_irq_info);
 
-	rcu_read_unlock();
+	/* Don't force an interrupt if the irq has been disabled */
+	if (!irqd_irq_disabled(irq_get_irq_data(sn_irq_info->irq_irq)) &&
+	    pci_provider && pci_provider->force_interrupt)
+		(*pci_provider->force_interrupt)(sn_irq_info);
 }
 
 /*
@@ -356,7 +426,7 @@ static void sn_check_intr(int irq, struct sn_irq_info *sn_irq_info)
 	/*
 	 * Bridge types attached to TIO (anything but PIC) do not need this WAR
 	 * since they do not target Shub II interrupt registers.  If that
-	 * ever changes, this check needs to accomodate.
+	 * ever changes, this check needs to accommodate.
 	 */
 	if (sn_irq_info->irq_bridge_type != PCIIO_ASIC_TYPE_PIC)
 		return;
diff --git a/arch/ia64/sn/kernel/mca.c b/arch/ia64/sn/kernel/mca.c
index 3db62f24596..27793f7aa99 100644
--- a/arch/ia64/sn/kernel/mca.c
+++ b/arch/ia64/sn/kernel/mca.c
@@ -98,8 +98,9 @@ sn_platform_plat_specific_err_print(const u8 * sect_header, u8 ** oemdata,
 	while (*sn_oemdata_size > sn_oemdata_bufsize) {
 		u8 *newbuf = vmalloc(*sn_oemdata_size);
 		if (!newbuf) {
+			mutex_unlock(&sn_oemdata_mutex);
 			printk(KERN_ERR "%s: unable to extend sn_oemdata\n",
-			       __FUNCTION__);
+			       __func__);
 			return 1;
 		}
 		vfree(*sn_oemdata);
diff --git a/arch/ia64/sn/kernel/msi_sn.c b/arch/ia64/sn/kernel/msi_sn.c
new file mode 100644
index 00000000000..afc58d2799a
--- /dev/null
+++ b/arch/ia64/sn/kernel/msi_sn.c
@@ -0,0 +1,238 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2006 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+#include <linux/types.h>
+#include <linux/irq.h>
+#include <linux/pci.h>
+#include <linux/cpumask.h>
+#include <linux/msi.h>
+#include <linux/slab.h>
+
+#include <asm/sn/addrs.h>
+#include <asm/sn/intr.h>
+#include <asm/sn/pcibus_provider_defs.h>
+#include <asm/sn/pcidev.h>
+#include <asm/sn/nodepda.h>
+
+struct sn_msi_info {
+	u64 pci_addr;
+	struct sn_irq_info *sn_irq_info;
+};
+
+static struct sn_msi_info sn_msi_info[NR_IRQS];
+
+static struct irq_chip sn_msi_chip;
+
+void sn_teardown_msi_irq(unsigned int irq)
+{
+	nasid_t nasid;
+	int widget;
+	struct pci_dev *pdev;
+	struct pcidev_info *sn_pdev;
+	struct sn_irq_info *sn_irq_info;
+	struct pcibus_bussoft *bussoft;
+	struct sn_pcibus_provider *provider;
+
+	sn_irq_info = sn_msi_info[irq].sn_irq_info;
+	if (sn_irq_info == NULL || sn_irq_info->irq_int_bit >= 0)
+		return;
+
+	sn_pdev = (struct pcidev_info *)sn_irq_info->irq_pciioinfo;
+	pdev = sn_pdev->pdi_linux_pcidev;
+	provider = SN_PCIDEV_BUSPROVIDER(pdev);
+
+	(*provider->dma_unmap)(pdev,
+			       sn_msi_info[irq].pci_addr,
+			       PCI_DMA_FROMDEVICE);
+	sn_msi_info[irq].pci_addr = 0;
+
+	bussoft = SN_PCIDEV_BUSSOFT(pdev);
+	nasid = NASID_GET(bussoft->bs_base);
+	widget = (nasid & 1) ?
+			TIO_SWIN_WIDGETNUM(bussoft->bs_base) :
+			SWIN_WIDGETNUM(bussoft->bs_base);
+
+	sn_intr_free(nasid, widget, sn_irq_info);
+	sn_msi_info[irq].sn_irq_info = NULL;
+
+	destroy_irq(irq);
+}
+
+int sn_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *entry)
+{
+	struct msi_msg msg;
+	int widget;
+	int status;
+	nasid_t nasid;
+	u64 bus_addr;
+	struct sn_irq_info *sn_irq_info;
+	struct pcibus_bussoft *bussoft = SN_PCIDEV_BUSSOFT(pdev);
+	struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev);
+	int irq;
+
+	if (!entry->msi_attrib.is_64)
+		return -EINVAL;
+
+	if (bussoft == NULL)
+		return -EINVAL;
+
+	if (provider == NULL || provider->dma_map_consistent == NULL)
+		return -EINVAL;
+
+	irq = create_irq();
+	if (irq < 0)
+		return irq;
+
+	/*
+	 * Set up the vector plumbing.  Let the prom (via sn_intr_alloc)
+	 * decide which cpu to direct this msi at by default.
+	 */
+
+	nasid = NASID_GET(bussoft->bs_base);
+	widget = (nasid & 1) ?
+			TIO_SWIN_WIDGETNUM(bussoft->bs_base) :
+			SWIN_WIDGETNUM(bussoft->bs_base);
+
+	sn_irq_info = kzalloc(sizeof(struct sn_irq_info), GFP_KERNEL);
+	if (! sn_irq_info) {
+		destroy_irq(irq);
+		return -ENOMEM;
+	}
+
+	status = sn_intr_alloc(nasid, widget, sn_irq_info, irq, -1, -1);
+	if (status) {
+		kfree(sn_irq_info);
+		destroy_irq(irq);
+		return -ENOMEM;
+	}
+
+	sn_irq_info->irq_int_bit = -1;		/* mark this as an MSI irq */
+	sn_irq_fixup(pdev, sn_irq_info);
+
+	/* Prom probably should fill these in, but doesn't ... */
+	sn_irq_info->irq_bridge_type = bussoft->bs_asic_type;
+	sn_irq_info->irq_bridge = (void *)bussoft->bs_base;
+
+	/*
+	 * Map the xio address into bus space
+	 */
+	bus_addr = (*provider->dma_map_consistent)(pdev,
+					sn_irq_info->irq_xtalkaddr,
+					sizeof(sn_irq_info->irq_xtalkaddr),
+					SN_DMA_MSI|SN_DMA_ADDR_XIO);
+	if (! bus_addr) {
+		sn_intr_free(nasid, widget, sn_irq_info);
+		kfree(sn_irq_info);
+		destroy_irq(irq);
+		return -ENOMEM;
+	}
+
+	sn_msi_info[irq].sn_irq_info = sn_irq_info;
+	sn_msi_info[irq].pci_addr = bus_addr;
+
+	msg.address_hi = (u32)(bus_addr >> 32);
+	msg.address_lo = (u32)(bus_addr & 0x00000000ffffffff);
+
+	/*
+	 * In the SN platform, bit 16 is a "send vector" bit which
+	 * must be present in order to move the vector through the system.
+	 */
+	msg.data = 0x100 + irq;
+
+	irq_set_msi_desc(irq, entry);
+	write_msi_msg(irq, &msg);
+	irq_set_chip_and_handler(irq, &sn_msi_chip, handle_edge_irq);
+
+	return 0;
+}
+
+#ifdef CONFIG_SMP
+static int sn_set_msi_irq_affinity(struct irq_data *data,
+				   const struct cpumask *cpu_mask, bool force)
+{
+	struct msi_msg msg;
+	int slice;
+	nasid_t nasid;
+	u64 bus_addr;
+	struct pci_dev *pdev;
+	struct pcidev_info *sn_pdev;
+	struct sn_irq_info *sn_irq_info;
+	struct sn_irq_info *new_irq_info;
+	struct sn_pcibus_provider *provider;
+	unsigned int cpu, irq = data->irq;
+
+	cpu = cpumask_first_and(cpu_mask, cpu_online_mask);
+	sn_irq_info = sn_msi_info[irq].sn_irq_info;
+	if (sn_irq_info == NULL || sn_irq_info->irq_int_bit >= 0)
+		return -1;
+
+	/*
+	 * Release XIO resources for the old MSI PCI address
+	 */
+
+	get_cached_msi_msg(irq, &msg);
+        sn_pdev = (struct pcidev_info *)sn_irq_info->irq_pciioinfo;
+	pdev = sn_pdev->pdi_linux_pcidev;
+	provider = SN_PCIDEV_BUSPROVIDER(pdev);
+
+	bus_addr = (u64)(msg.address_hi) << 32 | (u64)(msg.address_lo);
+	(*provider->dma_unmap)(pdev, bus_addr, PCI_DMA_FROMDEVICE);
+	sn_msi_info[irq].pci_addr = 0;
+
+	nasid = cpuid_to_nasid(cpu);
+	slice = cpuid_to_slice(cpu);
+
+	new_irq_info = sn_retarget_vector(sn_irq_info, nasid, slice);
+	sn_msi_info[irq].sn_irq_info = new_irq_info;
+	if (new_irq_info == NULL)
+		return -1;
+
+	/*
+	 * Map the xio address into bus space
+	 */
+
+	bus_addr = (*provider->dma_map_consistent)(pdev,
+					new_irq_info->irq_xtalkaddr,
+					sizeof(new_irq_info->irq_xtalkaddr),
+					SN_DMA_MSI|SN_DMA_ADDR_XIO);
+
+	sn_msi_info[irq].pci_addr = bus_addr;
+	msg.address_hi = (u32)(bus_addr >> 32);
+	msg.address_lo = (u32)(bus_addr & 0x00000000ffffffff);
+
+	write_msi_msg(irq, &msg);
+	cpumask_copy(data->affinity, cpu_mask);
+
+	return 0;
+}
+#endif /* CONFIG_SMP */
+
+static void sn_ack_msi_irq(struct irq_data *data)
+{
+	irq_move_irq(data);
+	ia64_eoi();
+}
+
+static int sn_msi_retrigger_irq(struct irq_data *data)
+{
+	unsigned int vector = data->irq;
+	ia64_resend_irq(vector);
+
+	return 1;
+}
+
+static struct irq_chip sn_msi_chip = {
+	.name			= "PCI-MSI",
+	.irq_mask		= mask_msi_irq,
+	.irq_unmask		= unmask_msi_irq,
+	.irq_ack		= sn_ack_msi_irq,
+#ifdef CONFIG_SMP
+	.irq_set_affinity	= sn_set_msi_irq_affinity,
+#endif
+	.irq_retrigger		= sn_msi_retrigger_irq,
+};
diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c
index 30988dfbddf..53b01b8e2f1 100644
--- a/arch/ia64/sn/kernel/setup.c
+++ b/arch/ia64/sn/kernel/setup.c
@@ -6,14 +6,13 @@
  * Copyright (C) 1999,2001-2006 Silicon Graphics, Inc. All rights reserved.
  */
 
-#include <linux/config.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/delay.h>
 #include <linux/kernel.h>
 #include <linux/kdev_t.h>
 #include <linux/string.h>
-#include <linux/tty.h>
+#include <linux/screen_info.h>
 #include <linux/console.h>
 #include <linux/timex.h>
 #include <linux/sched.h>
@@ -26,7 +25,6 @@
 #include <linux/interrupt.h>
 #include <linux/acpi.h>
 #include <linux/compiler.h>
-#include <linux/sched.h>
 #include <linux/root_dev.h>
 #include <linux/nodemask.h>
 #include <linux/pm.h>
@@ -35,9 +33,9 @@
 #include <asm/io.h>
 #include <asm/sal.h>
 #include <asm/machvec.h>
-#include <asm/system.h>
 #include <asm/processor.h>
 #include <asm/vga.h>
+#include <asm/setup.h>
 #include <asm/sn/arch.h>
 #include <asm/sn/addrs.h>
 #include <asm/sn/pda.h>
@@ -66,8 +64,6 @@ extern void sn_timer_init(void);
 extern unsigned long last_time_offset;
 extern void (*ia64_mark_idle) (int);
 extern void snidle(int);
-extern unsigned char acpi_kbd_controller_present;
-extern unsigned long long (*ia64_printk_clock)(void);
 
 unsigned long sn_rtc_cycles_per_second;
 EXPORT_SYMBOL(sn_rtc_cycles_per_second);
@@ -139,7 +135,7 @@ static int __init pxm_to_nasid(int pxm)
 	int i;
 	int nid;
 
-	nid = pxm_to_nid_map[pxm];
+	nid = pxm_to_node(pxm);
 	for (i = 0; i < num_node_memblks; i++) {
 		if (node_memblk[i].nid == nid) {
 			return NASID_GET(node_memblk[i].start_paddr);
@@ -169,7 +165,7 @@ void __init early_sn_setup(void)
 	 * IO on SN2 is done via SAL calls, early_printk won't work without this.
 	 *
 	 * This code duplicates some of the ACPI table parsing that is in efi.c & sal.c.
-	 * Any changes to those file may have to be made hereas well.
+	 * Any changes to those file may have to be made here as well.
 	 */
 	efi_systab = (efi_system_table_t *) __va(ia64_boot_param->efi_systab);
 	config_tables = __va(efi_systab->tables);
@@ -196,7 +192,7 @@ void __init early_sn_setup(void)
 }
 
 extern int platform_intr_list[];
-static int __initdata shub_1_1_found;
+static int shub_1_1_found;
 
 /*
  * sn_check_for_wars
@@ -204,7 +200,7 @@ static int __initdata shub_1_1_found;
  * Set flag for enabling shub specific wars
  */
 
-static inline int __init is_shub_1_1(int nasid)
+static inline int is_shub_1_1(int nasid)
 {
 	unsigned long id;
 	int rev;
@@ -216,7 +212,7 @@ static inline int __init is_shub_1_1(int nasid)
 	return rev <= 2;
 }
 
-static void __init sn_check_for_wars(void)
+static void sn_check_for_wars(void)
 {
 	int cnode;
 
@@ -245,7 +241,7 @@ static void __init sn_check_for_wars(void)
  * Note:  This stuff is duped here because Altix requires the PCDP to
  * locate a usable VGA device due to lack of proper ACPI support.  Structures
  * could be used from drivers/firmware/pcdp.h, but it was decided that moving
- * this file to a more public location just for Altix use was undesireable.
+ * this file to a more public location just for Altix use was undesirable.
  */
 
 struct hcdp_uart_desc {
@@ -350,8 +346,7 @@ sn_scan_pcdp(void)
 			continue;	/* not PCI interconnect */
 
 		if (if_pci.translation & PCDP_PCI_TRANS_IOPORT)
-			vga_console_iobase =
-				if_pci.ioport_tra | __IA64_UNCACHED_OFFSET;
+			vga_console_iobase = if_pci.ioport_tra;
 
 		if (if_pci.translation & PCDP_PCI_TRANS_MMIO)
 			vga_console_membase =
@@ -364,14 +359,6 @@ sn_scan_pcdp(void)
 
 static unsigned long sn2_rtc_initial;
 
-static unsigned long long ia64_sn2_printk_clock(void)
-{
-	unsigned long rtc_now = rtc_time();
-
-	return (rtc_now - sn2_rtc_initial) *
-		(1000000000 / sn_rtc_cycles_per_second);
-}
-
 /**
  * sn_setup - SN platform setup routine
  * @cmdline_p: kernel command line
@@ -390,7 +377,17 @@ void __init sn_setup(char **cmdline_p)
 	ia64_sn_plat_set_error_handling_features();	// obsolete
 	ia64_sn_set_os_feature(OSF_MCA_SLV_TO_OS_INIT_SLV);
 	ia64_sn_set_os_feature(OSF_FEAT_LOG_SBES);
+	/*
+	 * Note: The calls to notify the PROM of ACPI and PCI Segment
+	 *	 support must be done prior to acpi_load_tables(), as
+	 *	 an ACPI capable PROM will rebuild the DSDT as result
+	 *	 of the call.
+	 */
+	ia64_sn_set_os_feature(OSF_PCISEGMENT_ENABLE);
+	ia64_sn_set_os_feature(OSF_ACPI_ENABLE);
 
+	/* Load the new DSDT and SSDT tables into the global table list. */
+	acpi_table_init();
 
 #if defined(CONFIG_VT) && defined(CONFIG_VGA_CONSOLE)
 	/*
@@ -415,6 +412,17 @@ void __init sn_setup(char **cmdline_p)
 	if (! vga_console_membase)
 		sn_scan_pcdp();
 
+	/*
+	 *	Setup legacy IO space.
+	 *	vga_console_iobase maps to PCI IO Space address 0 on the
+	 * 	bus containing the VGA console.
+	 */
+	if (vga_console_iobase) {
+		io_space[0].mmio_base =
+			(unsigned long) ioremap(vga_console_iobase, 0);
+		io_space[0].sparse = 0;
+	}
+
 	if (vga_console_membase) {
 		/* usable vga ... make tty0 the preferred default console */
 		if (!strstr(*cmdline_p, "console="))
@@ -451,19 +459,6 @@ void __init sn_setup(char **cmdline_p)
 
 	platform_intr_list[ACPI_INTERRUPT_CPEI] = IA64_CPE_VECTOR;
 
-	ia64_printk_clock = ia64_sn2_printk_clock;
-
-	/*
-	 * Old PROMs do not provide an ACPI FADT. Disable legacy keyboard
-	 * support here so we don't have to listen to failed keyboard probe
-	 * messages.
-	 */
-	if (version <= 0x0209 && acpi_kbd_controller_present) {
-		printk(KERN_INFO "Disabling legacy keyboard support as prom "
-		       "is too old and doesn't provide FADT\n");
-		acpi_kbd_controller_present = 0;
-	}
-
 	printk("SGI SAL version %x.%02x\n", version >> 8, version & 0x00FF);
 
 	/*
@@ -512,12 +507,11 @@ static void __init sn_init_pdas(char **cmdline_p)
 	cnodeid_t cnode;
 
 	/*
-	 * Allocate & initalize the nodepda for each node.
+	 * Allocate & initialize the nodepda for each node.
 	 */
 	for_each_online_node(cnode) {
 		nodepdaindr[cnode] =
 		    alloc_bootmem_node(NODE_DATA(cnode), sizeof(nodepda_t));
-		memset(nodepdaindr[cnode], 0, sizeof(nodepda_t));
 		memset(nodepdaindr[cnode]->phys_cpuid, -1,
 		    sizeof(nodepdaindr[cnode]->phys_cpuid));
 		spin_lock_init(&nodepdaindr[cnode]->ptc_lock);
@@ -526,11 +520,9 @@ static void __init sn_init_pdas(char **cmdline_p)
 	/*
 	 * Allocate & initialize nodepda for TIOs.  For now, put them on node 0.
 	 */
-	for (cnode = num_online_nodes(); cnode < num_cnodes; cnode++) {
+	for (cnode = num_online_nodes(); cnode < num_cnodes; cnode++)
 		nodepdaindr[cnode] =
 		    alloc_bootmem_node(NODE_DATA(0), sizeof(nodepda_t));
-		memset(nodepdaindr[cnode], 0, sizeof(nodepda_t));
-	}
 
 	/*
 	 * Now copy the array of nodepda pointers to each nodepda.
@@ -566,7 +558,7 @@ static void __init sn_init_pdas(char **cmdline_p)
  * Also sets up a few fields in the nodepda.  Also known as
  * platform_cpu_init() by the ia64 machvec code.
  */
-void __init sn_cpu_init(void)
+void sn_cpu_init(void)
 {
 	int cpuid;
 	int cpuphyid;
@@ -575,9 +567,10 @@ void __init sn_cpu_init(void)
 	int slice;
 	int cnode;
 	int i;
-	static int wars_have_been_checked;
+	static int wars_have_been_checked, set_cpu0_number;
 
-	if (smp_processor_id() == 0 && IS_MEDUSA()) {
+	cpuid = smp_processor_id();
+	if (cpuid == 0 && IS_MEDUSA()) {
 		if (ia64_sn_is_fake_prom())
 			sn_prom_type = 2;
 		else
@@ -597,6 +590,20 @@ void __init sn_cpu_init(void)
 	sn_hub_info->as_shift = sn_hub_info->nasid_shift - 2;
 
 	/*
+	 * Don't check status. The SAL call is not supported on all PROMs
+	 * but a failure is harmless.
+	 * Architecturally, cpu_init is always called twice on cpu 0. We
+	 * should set cpu_number on cpu 0 once.
+	 */
+	if (cpuid == 0) {
+		if (!set_cpu0_number) {
+			(void) ia64_sn_set_cpu_number(cpuid);
+			set_cpu0_number = 1;
+		}
+	} else
+		(void) ia64_sn_set_cpu_number(cpuid);
+
+	/*
 	 * The boot cpu makes this call again after platform initialization is
 	 * complete.
 	 */
@@ -607,7 +614,6 @@ void __init sn_cpu_init(void)
 		if (ia64_sn_get_prom_feature_set(i, &sn_prom_features[i]) != 0)
 			break;
 
-	cpuid = smp_processor_id();
 	cpuphyid = get_sapicid();
 
 	if (ia64_sn_get_sapic_info(cpuphyid, &nasid, &subnode, &slice))
@@ -704,7 +710,7 @@ void __init build_cnode_tables(void)
 	 * cnode == node for all C & M bricks.
 	 */
 	for_each_online_node(node) {
-		nasid = pxm_to_nasid(nid_to_pxm_map[node]);
+		nasid = pxm_to_nasid(node_to_pxm(node));
 		sn_cnodeid_to_nasid[node] = nasid;
 		physical_node_map[nasid] = node;
 	}
@@ -726,8 +732,7 @@ void __init build_cnode_tables(void)
 		kl_config_hdr_t *klgraph_header;
 		nasid = cnodeid_to_nasid(node);
 		klgraph_header = ia64_sn_get_klconfig_addr(nasid);
-		if (klgraph_header == NULL)
-			BUG();
+		BUG_ON(klgraph_header == NULL);
 		brd = NODE_OFFSET_TO_LBOARD(nasid, klgraph_header->ch_board_info);
 		while (brd) {
 			if (board_needs_cnode(brd->brd_type) && physical_node_map[brd->brd_nasid] < 0) {
@@ -744,7 +749,7 @@ nasid_slice_to_cpuid(int nasid, int slice)
 {
 	long cpu;
 
-	for (cpu = 0; cpu < NR_CPUS; cpu++)
+	for (cpu = 0; cpu < nr_cpu_ids; cpu++)
 		if (cpuid_to_nasid(cpu) == nasid &&
 					cpuid_to_slice(cpu) == slice)
 			return cpu;
@@ -758,5 +763,13 @@ int sn_prom_feature_available(int id)
 		return 0;
 	return test_bit(id, sn_prom_features);
 }
+
+void
+sn_kernel_launch_event(void)
+{
+	/* ignore status until we understand possible failure, if any*/
+	if (ia64_sn_kernel_launch_event())
+		printk(KERN_ERR "KEXEC is not supported in this PROM, Please update the PROM.\n");
+}
 EXPORT_SYMBOL(sn_prom_feature_available);
 
diff --git a/arch/ia64/sn/kernel/sn2/Makefile b/arch/ia64/sn/kernel/sn2/Makefile
index 99e17769323..3d09108d427 100644
--- a/arch/ia64/sn/kernel/sn2/Makefile
+++ b/arch/ia64/sn/kernel/sn2/Makefile
@@ -9,7 +9,7 @@
 # sn2 specific kernel files
 #
 
-CPPFLAGS += -I$(srctree)/arch/ia64/sn/include
+ccflags-y := -Iarch/ia64/sn/include
 
 obj-y += cache.o io.o ptc_deadlock.o sn2_smp.o sn_proc_fs.o \
 	 prominfo_proc.o timer.o timer_interrupt.o sn_hwperf.o
diff --git a/arch/ia64/sn/kernel/sn2/cache.c b/arch/ia64/sn/kernel/sn2/cache.c
index bc3cfa17cd0..2862cb33026 100644
--- a/arch/ia64/sn/kernel/sn2/cache.c
+++ b/arch/ia64/sn/kernel/sn2/cache.c
@@ -3,11 +3,12 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  * 
- * Copyright (C) 2001-2003 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 2001-2003, 2006 Silicon Graphics, Inc. All rights reserved.
  *
  */
 #include <linux/module.h>
 #include <asm/pgalloc.h>
+#include <asm/sn/arch.h>
 
 /**
  * sn_flush_all_caches - flush a range of address from all caches (incl. L4)
@@ -17,18 +18,24 @@
  * Flush a range of addresses from all caches including L4. 
  * All addresses fully or partially contained within 
  * @flush_addr to @flush_addr + @bytes are flushed
- * from the all caches.
+ * from all caches.
  */
 void
 sn_flush_all_caches(long flush_addr, long bytes)
 {
-	flush_icache_range(flush_addr, flush_addr+bytes);
+	unsigned long addr = flush_addr;
+
+	/* SHub1 requires a cached address */
+	if (is_shub1() && (addr & RGN_BITS) == RGN_BASE(RGN_UNCACHED))
+		addr = (addr - RGN_BASE(RGN_UNCACHED)) + RGN_BASE(RGN_KERNEL);
+
+	flush_icache_range(addr, addr + bytes);
 	/*
 	 * The last call may have returned before the caches
 	 * were actually flushed, so we call it again to make
 	 * sure.
 	 */
-	flush_icache_range(flush_addr, flush_addr+bytes);
+	flush_icache_range(addr, addr + bytes);
 	mb();
 }
 EXPORT_SYMBOL(sn_flush_all_caches);
diff --git a/arch/ia64/sn/kernel/sn2/prominfo_proc.c b/arch/ia64/sn/kernel/sn2/prominfo_proc.c
index 6ae276d5d50..ec4de2b0965 100644
--- a/arch/ia64/sn/kernel/sn2/prominfo_proc.c
+++ b/arch/ia64/sn/kernel/sn2/prominfo_proc.c
@@ -8,12 +8,11 @@
  * Module to export the system's Firmware Interface Tables, including
  * PROM revision numbers and banners, in /proc
  */
-#include <linux/config.h>
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/proc_fs.h>
+#include <linux/seq_file.h>
 #include <linux/nodemask.h>
-#include <asm/system.h>
 #include <asm/io.h>
 #include <asm/sn/sn_sal.h>
 #include <asm/sn/sn_cpuid.h>
@@ -103,18 +102,18 @@ get_fit_entry(unsigned long nasid, int index, unsigned long *fentry,
 /*
  * These two routines display the FIT table for each node.
  */
-static int dump_fit_entry(char *page, unsigned long *fentry)
+static void dump_fit_entry(struct seq_file *m, unsigned long *fentry)
 {
 	unsigned type;
 
 	type = FIT_TYPE(fentry[1]);
-	return sprintf(page, "%02x %-25s %x.%02x %016lx %u\n",
-		       type,
-		       fit_type_name(type),
-		       FIT_MAJOR(fentry[1]), FIT_MINOR(fentry[1]),
-		       fentry[0],
-		       /* mult by sixteen to get size in bytes */
-		       (unsigned)(fentry[1] & 0xffffff) * 16);
+	seq_printf(m, "%02x %-25s %x.%02x %016lx %u\n",
+		   type,
+		   fit_type_name(type),
+		   FIT_MAJOR(fentry[1]), FIT_MINOR(fentry[1]),
+		   fentry[0],
+		   /* mult by sixteen to get size in bytes */
+		   (unsigned)(fentry[1] & 0xffffff) * 16);
 }
 
 
@@ -126,31 +125,39 @@ static int dump_fit_entry(char *page, unsigned long *fentry)
  * OK except for 4kB pages (and no one is going to do that on SN
  * anyway).
  */
-static int
-dump_fit(char *page, unsigned long nasid)
+static int proc_fit_show(struct seq_file *m, void *v)
 {
+	unsigned long nasid = (unsigned long)m->private;
 	unsigned long fentry[2];
 	int index;
-	char *p;
 
-	p = page;
 	for (index=0;;index++) {
 		BUG_ON(index * 60 > PAGE_SIZE);
 		if (get_fit_entry(nasid, index, fentry, NULL, 0))
 			break;
-		p += dump_fit_entry(p, fentry);
+		dump_fit_entry(m, fentry);
 	}
+	return 0;
+}
 
-	return p - page;
+static int proc_fit_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, proc_fit_show, PDE_DATA(inode));
 }
 
-static int
-dump_version(char *page, unsigned long nasid)
+static const struct file_operations proc_fit_fops = {
+	.open		= proc_fit_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int proc_version_show(struct seq_file *m, void *v)
 {
+	unsigned long nasid = (unsigned long)m->private;
 	unsigned long fentry[2];
 	char banner[128];
 	int index;
-	int len;
 
 	for (index = 0; ; index++) {
 		if (get_fit_entry(nasid, index, fentry, banner,
@@ -160,56 +167,24 @@ dump_version(char *page, unsigned long nasid)
 			break;
 	}
 
-	len = sprintf(page, "%x.%02x\n", FIT_MAJOR(fentry[1]),
-		      FIT_MINOR(fentry[1]));
-	page += len;
+	seq_printf(m, "%x.%02x\n", FIT_MAJOR(fentry[1]), FIT_MINOR(fentry[1]));
 
 	if (banner[0])
-		len += snprintf(page, PAGE_SIZE-len, "%s\n", banner);
-
-	return len;
-}
-
-/* same as in proc_misc.c */
-static int
-proc_calc_metrics(char *page, char **start, off_t off, int count, int *eof,
-		  int len)
-{
-	if (len <= off + count)
-		*eof = 1;
-	*start = page + off;
-	len -= off;
-	if (len > count)
-		len = count;
-	if (len < 0)
-		len = 0;
-	return len;
+		seq_printf(m, "%s\n", banner);
+	return 0;
 }
 
-static int
-read_version_entry(char *page, char **start, off_t off, int count, int *eof,
-		   void *data)
+static int proc_version_open(struct inode *inode, struct file *file)
 {
-	int len;
-
-	/* data holds the NASID of the node */
-	len = dump_version(page, (unsigned long)data);
-	len = proc_calc_metrics(page, start, off, count, eof, len);
-	return len;
+	return single_open(file, proc_version_show, PDE_DATA(inode));
 }
 
-static int
-read_fit_entry(char *page, char **start, off_t off, int count, int *eof,
-	       void *data)
-{
-	int len;
-
-	/* data holds the NASID of the node */
-	len = dump_fit(page, (unsigned long)data);
-	len = proc_calc_metrics(page, start, off, count, eof, len);
-
-	return len;
-}
+static const struct file_operations proc_version_fops = {
+	.open		= proc_version_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
 
 /* module entry points */
 int __init prominfo_init(void);
@@ -218,63 +193,39 @@ void __exit prominfo_exit(void);
 module_init(prominfo_init);
 module_exit(prominfo_exit);
 
-static struct proc_dir_entry **proc_entries;
-static struct proc_dir_entry *sgi_prominfo_entry;
-
 #define NODE_NAME_LEN 11
 
 int __init prominfo_init(void)
 {
-	struct proc_dir_entry **entp;
-	struct proc_dir_entry *p;
+	struct proc_dir_entry *sgi_prominfo_entry;
 	cnodeid_t cnodeid;
-	unsigned long nasid;
-	int size;
-	char name[NODE_NAME_LEN];
 
 	if (!ia64_platform_is("sn2"))
 		return 0;
 
-	size = num_online_nodes() * sizeof(struct proc_dir_entry *);
-	proc_entries = kzalloc(size, GFP_KERNEL);
-	if (!proc_entries)
-		return -ENOMEM;
-
 	sgi_prominfo_entry = proc_mkdir("sgi_prominfo", NULL);
+	if (!sgi_prominfo_entry)
+		return -ENOMEM;
 
-	entp = proc_entries;
 	for_each_online_node(cnodeid) {
+		struct proc_dir_entry *dir;
+		unsigned long nasid;
+		char name[NODE_NAME_LEN];
+
 		sprintf(name, "node%d", cnodeid);
-		*entp = proc_mkdir(name, sgi_prominfo_entry);
+		dir = proc_mkdir(name, sgi_prominfo_entry);
+		if (!dir)
+			continue;
 		nasid = cnodeid_to_nasid(cnodeid);
-		p = create_proc_read_entry("fit", 0, *entp, read_fit_entry,
-					   (void *)nasid);
-		if (p)
-			p->owner = THIS_MODULE;
-		p = create_proc_read_entry("version", 0, *entp,
-					   read_version_entry, (void *)nasid);
-		if (p)
-			p->owner = THIS_MODULE;
-		entp++;
+		proc_create_data("fit", 0, dir,
+				 &proc_fit_fops, (void *)nasid);
+		proc_create_data("version", 0, dir,
+				 &proc_version_fops, (void *)nasid);
 	}
-
 	return 0;
 }
 
 void __exit prominfo_exit(void)
 {
-	struct proc_dir_entry **entp;
-	unsigned int cnodeid;
-	char name[NODE_NAME_LEN];
-
-	entp = proc_entries;
-	for_each_online_node(cnodeid) {
-		remove_proc_entry("fit", *entp);
-		remove_proc_entry("version", *entp);
-		sprintf(name, "node%d", cnodeid);
-		remove_proc_entry(name, sgi_prominfo_entry);
-		entp++;
-	}
-	remove_proc_entry("sgi_prominfo", NULL);
-	kfree(proc_entries);
+	remove_proc_subtree("sgi_prominfo", NULL);
 }
diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c
index d9d306c79f2..68c84541162 100644
--- a/arch/ia64/sn/kernel/sn2/sn2_smp.c
+++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c
@@ -26,7 +26,6 @@
 #include <asm/processor.h>
 #include <asm/irq.h>
 #include <asm/sal.h>
-#include <asm/system.h>
 #include <asm/delay.h>
 #include <asm/io.h>
 #include <asm/smp.h>
@@ -40,12 +39,16 @@
 #include <asm/sn/shub_mmr.h>
 #include <asm/sn/nodepda.h>
 #include <asm/sn/rw_mmr.h>
+#include <asm/sn/sn_feature_sets.h>
 
 DEFINE_PER_CPU(struct ptc_stats, ptcstats);
 DECLARE_PER_CPU(struct ptc_stats, ptcstats);
 
 static  __cacheline_aligned DEFINE_SPINLOCK(sn2_global_ptc_lock);
 
+/* 0 = old algorithm (no IPI flushes), 1 = ipi deadlock flush, 2 = ipi instead of SHUB ptc, >2 = always ipi */
+static int sn2_flush_opt = 0;
+
 extern unsigned long
 sn2_ptc_deadlock_recovery_core(volatile unsigned long *, unsigned long,
 			       volatile unsigned long *, unsigned long,
@@ -76,6 +79,8 @@ struct ptc_stats {
 	unsigned long shub_itc_clocks;
 	unsigned long shub_itc_clocks_max;
 	unsigned long shub_ptc_flushes_not_my_mm;
+	unsigned long shub_ipi_flushes;
+	unsigned long shub_ipi_flushes_itc_clocks;
 };
 
 #define sn2_ptctest	0
@@ -99,7 +104,7 @@ static inline unsigned long wait_piowc(void)
  *
  * SN2 PIO writes from separate CPUs are not guaranteed to arrive in order.
  * Context switching user threads which have memory-mapped MMIO may cause
- * PIOs to issue from seperate CPUs, thus the PIO writes must be drained
+ * PIOs to issue from separate CPUs, thus the PIO writes must be drained
  * from the previous CPU's Shub before execution resumes on the new CPU.
  */
 void sn_migrate(struct task_struct *task)
@@ -121,6 +126,18 @@ void sn_tlb_migrate_finish(struct mm_struct *mm)
 		flush_tlb_mm(mm);
 }
 
+static void
+sn2_ipi_flush_all_tlb(struct mm_struct *mm)
+{
+	unsigned long itc;
+
+	itc = ia64_get_itc();
+	smp_flush_tlb_cpumask(*mm_cpumask(mm));
+	itc = ia64_get_itc() - itc;
+	__get_cpu_var(ptcstats).shub_ipi_flushes_itc_clocks += itc;
+	__get_cpu_var(ptcstats).shub_ipi_flushes++;
+}
+
 /**
  * sn2_global_tlb_purge - globally purge translation cache of virtual address range
  * @mm: mm_struct containing virtual address range
@@ -154,12 +171,17 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start,
 	unsigned long itc, itc2, flags, data0 = 0, data1 = 0, rr_value, old_rr = 0;
 	short nasids[MAX_NUMNODES], nix;
 	nodemask_t nodes_flushed;
-	int active, max_active, deadlock;
+	int active, max_active, deadlock, flush_opt = sn2_flush_opt;
+
+	if (flush_opt > 2) {
+		sn2_ipi_flush_all_tlb(mm);
+		return;
+	}
 
 	nodes_clear(nodes_flushed);
 	i = 0;
 
-	for_each_cpu_mask(cpu, mm->cpu_vm_mask) {
+	for_each_cpu(cpu, mm_cpumask(mm)) {
 		cnode = cpu_to_node(cpu);
 		node_set(cnode, nodes_flushed);
 		lcpu = cpu;
@@ -189,6 +211,12 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start,
 		return;
 	}
 
+	if (flush_opt == 2) {
+		sn2_ipi_flush_all_tlb(mm);
+		preempt_enable();
+		return;
+	}
+
 	itc = ia64_get_itc();
 	nix = 0;
 	for_each_node_mask(cnode, nodes_flushed)
@@ -256,6 +284,8 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start,
 			}
 			if (active >= max_active || i == (nix - 1)) {
 				if ((deadlock = wait_piowc())) {
+					if (flush_opt == 1)
+						goto done;
 					sn2_ptc_deadlock_recovery(nasids, ibegin, i, mynasid, ptc0, data0, ptc1, data1);
 					if (reset_max_active_on_deadlock())
 						max_active = 1;
@@ -267,6 +297,7 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start,
 		start += (1UL << nbits);
 	} while (start < end);
 
+done:
 	itc2 = ia64_get_itc() - itc2;
 	__get_cpu_var(ptcstats).shub_itc_clocks += itc2;
 	if (itc2 > __get_cpu_var(ptcstats).shub_itc_clocks_max)
@@ -279,6 +310,11 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start,
 
 	spin_unlock_irqrestore(PTC_LOCK(shub1), flags);
 
+	if (flush_opt == 1 && deadlock) {
+		__get_cpu_var(ptcstats).deadlocks++;
+		sn2_ipi_flush_all_tlb(mm);
+	}
+
 	preempt_enable();
 }
 
@@ -393,13 +429,38 @@ void sn2_send_IPI(int cpuid, int vector, int delivery_mode, int redirect)
 	sn_send_IPI_phys(nasid, physid, vector, delivery_mode);
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
+/**
+ * sn_cpu_disable_allowed - Determine if a CPU can be disabled.
+ * @cpu - CPU that is requested to be disabled.
+ *
+ * CPU disable is only allowed on SHub2 systems running with a PROM
+ * that supports CPU disable. It is not permitted to disable the boot processor.
+ */
+bool sn_cpu_disable_allowed(int cpu)
+{
+	if (is_shub2() && sn_prom_feature_available(PRF_CPU_DISABLE_SUPPORT)) {
+		if (cpu != 0)
+			return true;
+		else
+			printk(KERN_WARNING
+			      "Disabling the boot processor is not allowed.\n");
+
+	} else
+		printk(KERN_WARNING
+		       "CPU disable is not supported on this system.\n");
+
+	return false;
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
 #ifdef CONFIG_PROC_FS
 
 #define PTC_BASENAME	"sgi_sn/ptc_statistics"
 
 static void *sn2_ptc_seq_start(struct seq_file *file, loff_t * offset)
 {
-	if (*offset < NR_CPUS)
+	if (*offset < nr_cpu_ids)
 		return offset;
 	return NULL;
 }
@@ -407,7 +468,7 @@ static void *sn2_ptc_seq_start(struct seq_file *file, loff_t * offset)
 static void *sn2_ptc_seq_next(struct seq_file *file, void *data, loff_t * offset)
 {
 	(*offset)++;
-	if (*offset < NR_CPUS)
+	if (*offset < nr_cpu_ids)
 		return offset;
 	return NULL;
 }
@@ -425,25 +486,45 @@ static int sn2_ptc_seq_show(struct seq_file *file, void *data)
 
 	if (!cpu) {
 		seq_printf(file,
-			   "# cpu ptc_l newrid ptc_flushes nodes_flushed deadlocks lock_nsec shub_nsec shub_nsec_max not_my_mm deadlock2\n");
-		seq_printf(file, "# ptctest %d\n", sn2_ptctest);
+			   "# cpu ptc_l newrid ptc_flushes nodes_flushed deadlocks lock_nsec shub_nsec shub_nsec_max not_my_mm deadlock2 ipi_fluches ipi_nsec\n");
+		seq_printf(file, "# ptctest %d, flushopt %d\n", sn2_ptctest, sn2_flush_opt);
 	}
 
-	if (cpu < NR_CPUS && cpu_online(cpu)) {
+	if (cpu < nr_cpu_ids && cpu_online(cpu)) {
 		stat = &per_cpu(ptcstats, cpu);
-		seq_printf(file, "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n", cpu, stat->ptc_l,
+		seq_printf(file, "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n", cpu, stat->ptc_l,
 				stat->change_rid, stat->shub_ptc_flushes, stat->nodes_flushed,
 				stat->deadlocks,
-				1000 * stat->lock_itc_clocks / per_cpu(cpu_info, cpu).cyc_per_usec,
-				1000 * stat->shub_itc_clocks / per_cpu(cpu_info, cpu).cyc_per_usec,
-				1000 * stat->shub_itc_clocks_max / per_cpu(cpu_info, cpu).cyc_per_usec,
+				1000 * stat->lock_itc_clocks / per_cpu(ia64_cpu_info, cpu).cyc_per_usec,
+				1000 * stat->shub_itc_clocks / per_cpu(ia64_cpu_info, cpu).cyc_per_usec,
+				1000 * stat->shub_itc_clocks_max / per_cpu(ia64_cpu_info, cpu).cyc_per_usec,
 				stat->shub_ptc_flushes_not_my_mm,
-				stat->deadlocks2);
+				stat->deadlocks2,
+				stat->shub_ipi_flushes,
+				1000 * stat->shub_ipi_flushes_itc_clocks / per_cpu(ia64_cpu_info, cpu).cyc_per_usec);
 	}
 	return 0;
 }
 
-static struct seq_operations sn2_ptc_seq_ops = {
+static ssize_t sn2_ptc_proc_write(struct file *file, const char __user *user, size_t count, loff_t *data)
+{
+	int cpu;
+	char optstr[64];
+
+	if (count == 0 || count > sizeof(optstr))
+		return -EINVAL;
+	if (copy_from_user(optstr, user, count))
+		return -EFAULT;
+	optstr[count - 1] = '\0';
+	sn2_flush_opt = simple_strtoul(optstr, NULL, 0);
+
+	for_each_online_cpu(cpu)
+		memset(&per_cpu(ptcstats, cpu), 0, sizeof(struct ptc_stats));
+
+	return count;
+}
+
+static const struct seq_operations sn2_ptc_seq_ops = {
 	.start = sn2_ptc_seq_start,
 	.next = sn2_ptc_seq_next,
 	.stop = sn2_ptc_seq_stop,
@@ -455,9 +536,10 @@ static int sn2_ptc_proc_open(struct inode *inode, struct file *file)
 	return seq_open(file, &sn2_ptc_seq_ops);
 }
 
-static struct file_operations proc_sn2_ptc_operations = {
+static const struct file_operations proc_sn2_ptc_operations = {
 	.open = sn2_ptc_proc_open,
 	.read = seq_read,
+	.write = sn2_ptc_proc_write,
 	.llseek = seq_lseek,
 	.release = seq_release,
 };
@@ -469,11 +551,12 @@ static int __init sn2_ptc_init(void)
 	if (!ia64_platform_is("sn2"))
 		return 0;
 
-	if (!(proc_sn2_ptc = create_proc_entry(PTC_BASENAME, 0444, NULL))) {
+	proc_sn2_ptc = proc_create(PTC_BASENAME, 0444,
+				   NULL, &proc_sn2_ptc_operations);
+	if (!proc_sn2_ptc) {
 		printk(KERN_ERR "unable to create %s proc entry", PTC_BASENAME);
 		return -EINVAL;
 	}
-	proc_sn2_ptc->proc_fops = &proc_sn2_ptc_operations;
 	spin_lock_init(&sn2_global_ptc_lock);
 	return 0;
 }
diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
index 739c948dc50..b9992571c03 100644
--- a/arch/ia64/sn/kernel/sn2/sn_hwperf.c
+++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
@@ -25,17 +25,18 @@
 
 #include <linux/fs.h>
 #include <linux/slab.h>
+#include <linux/export.h>
 #include <linux/vmalloc.h>
 #include <linux/seq_file.h>
 #include <linux/miscdevice.h>
 #include <linux/utsname.h>
 #include <linux/cpumask.h>
-#include <linux/smp_lock.h>
 #include <linux/nodemask.h>
+#include <linux/smp.h>
+#include <linux/mutex.h>
+
 #include <asm/processor.h>
 #include <asm/topology.h>
-#include <asm/smp.h>
-#include <asm/semaphore.h>
 #include <asm/uaccess.h>
 #include <asm/sal.h>
 #include <asm/sn/io.h>
@@ -49,7 +50,9 @@ static void *sn_hwperf_salheap = NULL;
 static int sn_hwperf_obj_cnt = 0;
 static nasid_t sn_hwperf_master_nasid = INVALID_NASID;
 static int sn_hwperf_init(void);
-static DECLARE_MUTEX(sn_hwperf_init_mutex);
+static DEFINE_MUTEX(sn_hwperf_init_mutex);
+
+#define cnode_possible(n)	((n) < num_cnodes)
 
 static int sn_hwperf_enum_objects(int *nobj, struct sn_hwperf_object_info **ret)
 {
@@ -63,7 +66,8 @@ static int sn_hwperf_enum_objects(int *nobj, struct sn_hwperf_object_info **ret)
 	}
 
 	sz = sn_hwperf_obj_cnt * sizeof(struct sn_hwperf_object_info);
-	if ((objbuf = (struct sn_hwperf_object_info *) vmalloc(sz)) == NULL) {
+	objbuf = vmalloc(sz);
+	if (objbuf == NULL) {
 		printk("sn_hwperf_enum_objects: vmalloc(%d) failed\n", (int)sz);
 		e = -ENOMEM;
 		goto out;
@@ -127,14 +131,14 @@ static int sn_hwperf_geoid_to_cnode(char *location)
 		}
 	}
 
-	return node_possible(cnode) ? cnode : -1;
+	return cnode_possible(cnode) ? cnode : -1;
 }
 
 static int sn_hwperf_obj_to_cnode(struct sn_hwperf_object_info * obj)
 {
 	if (!SN_HWPERF_IS_NODE(obj) && !SN_HWPERF_IS_IONODE(obj))
 		BUG();
-	if (!obj->sn_hwp_this_part)
+	if (SN_HWPERF_FOREIGN(obj))
 		return -1;
 	return sn_hwperf_geoid_to_cnode(obj->location);
 }
@@ -186,7 +190,7 @@ static void print_pci_topology(struct seq_file *s)
 	int e;
 
 	for (sz = PAGE_SIZE; sz < 16 * PAGE_SIZE; sz += PAGE_SIZE) {
-		if (!(p = (char *)kmalloc(sz, GFP_KERNEL)))
+		if (!(p = kmalloc(sz, GFP_KERNEL)))
 			break;
 		e = ia64_sn_ioif_get_pci_topology(__pa(p), sz);
 		if (e == SALRET_OK)
@@ -199,12 +203,12 @@ static void print_pci_topology(struct seq_file *s)
 
 static inline int sn_hwperf_has_cpus(cnodeid_t node)
 {
-	return node_online(node) && nr_cpus_node(node);
+	return node < MAX_NUMNODES && node_online(node) && nr_cpus_node(node);
 }
 
 static inline int sn_hwperf_has_mem(cnodeid_t node)
 {
-	return node_online(node) && NODE_DATA(node)->node_present_pages;
+	return node < MAX_NUMNODES && node_online(node) && NODE_DATA(node)->node_present_pages;
 }
 
 static struct sn_hwperf_object_info *
@@ -237,7 +241,7 @@ static int sn_hwperf_get_nearest_node_objdata(struct sn_hwperf_object_info *objb
 	int found_mem = 0;
 	int found_cpu = 0;
 
-	if (!node_possible(node))
+	if (!cnode_possible(node))
 		return -EINVAL;
 
 	if (sn_hwperf_has_cpus(node)) {
@@ -271,8 +275,7 @@ static int sn_hwperf_get_nearest_node_objdata(struct sn_hwperf_object_info *objb
 
 	/* get it's interconnect topology */
 	sz = op->ports * sizeof(struct sn_hwperf_port_info);
-	if (sz > sizeof(ptdata))
-		BUG();
+	BUG_ON(sz > sizeof(ptdata));
 	e = ia64_sn_hwperf_op(sn_hwperf_master_nasid,
 			      SN_HWPERF_ENUM_PORTS, nodeobj->id, sz,
 			      (u64)&ptdata, 0, 0, NULL);
@@ -306,8 +309,7 @@ static int sn_hwperf_get_nearest_node_objdata(struct sn_hwperf_object_info *objb
 	if (router && (!found_cpu || !found_mem)) {
 		/* search for a node connected to the same router */
 		sz = router->ports * sizeof(struct sn_hwperf_port_info);
-		if (sz > sizeof(ptdata))
-			BUG();
+		BUG_ON(sz > sizeof(ptdata));
 		e = ia64_sn_hwperf_op(sn_hwperf_master_nasid,
 				      SN_HWPERF_ENUM_PORTS, router->id, sz,
 				      (u64)&ptdata, 0, 0, NULL);
@@ -381,7 +383,6 @@ static int sn_topology_show(struct seq_file *s, void *d)
 	int j;
 	const char *slabname;
 	int ordinal;
-	cpumask_t cpumask;
 	char slice;
 	struct cpuinfo_ia64 *c;
 	struct sn_hwperf_port_info *ptdata;
@@ -413,14 +414,14 @@ static int sn_topology_show(struct seq_file *s, void *d)
 		}
 		seq_printf(s, "partition %u %s local "
 			"shubtype %s, "
-			"nasid_mask 0x%016lx, "
+			"nasid_mask 0x%016llx, "
 			"nasid_bits %d:%d, "
 			"system_size %d, "
 			"sharing_size %d, "
 			"coherency_domain %d, "
 			"region_size %d\n",
 
-			partid, system_utsname.nodename,
+			partid, utsname()->nodename,
 			shubtype ? "shub2" : "shub1", 
 			(u64)nasid_mask << nasid_shift, nasid_msb, nasid_shift,
 			system_size, sharing_size, coher, region_size);
@@ -442,7 +443,7 @@ static int sn_topology_show(struct seq_file *s, void *d)
 	seq_printf(s, "%s %d %s %s asic %s", slabname, ordinal, obj->location,
 		obj->sn_hwp_this_part ? "local" : "shared", obj->name);
 
-	if (!SN_HWPERF_IS_NODE(obj) && !SN_HWPERF_IS_IONODE(obj))
+	if (ordinal < 0 || (!SN_HWPERF_IS_NODE(obj) && !SN_HWPERF_IS_IONODE(obj)))
 		seq_putc(s, '\n');
 	else {
 		cnodeid_t near_mem = -1;
@@ -468,20 +469,20 @@ static int sn_topology_show(struct seq_file *s, void *d)
 		/*
 		 * CPUs on this node, if any
 		 */
-		cpumask = node_to_cpumask(ordinal);
-		for_each_online_cpu(i) {
-			if (cpu_isset(i, cpumask)) {
+		if (!SN_HWPERF_IS_IONODE(obj)) {
+			for_each_cpu_and(i, cpu_online_mask,
+					 cpumask_of_node(ordinal)) {
 				slice = 'a' + cpuid_to_slice(i);
 				c = cpu_data(i);
 				seq_printf(s, "cpu %d %s%c local"
-					" freq %luMHz, arch ia64",
-					i, obj->location, slice,
-					c->proc_freq / 1000000);
+					   " freq %luMHz, arch ia64",
+					   i, obj->location, slice,
+					   c->proc_freq / 1000000);
 				for_each_online_cpu(j) {
 					seq_printf(s, j ? ":%d" : ", dist %d",
-						node_distance(
-						    cpu_to_node(i),
-						    cpu_to_node(j)));
+						   node_distance(
+						    	cpu_to_node(i),
+						    	cpu_to_node(j)));
 				}
 				seq_putc(s, '\n');
 			}
@@ -523,7 +524,7 @@ static int sn_topology_show(struct seq_file *s, void *d)
 			if (obj->sn_hwp_this_part && p->sn_hwp_this_part)
 				/* both ends local to this partition */
 				seq_puts(s, " local");
-			else if (!obj->sn_hwp_this_part && !p->sn_hwp_this_part)
+			else if (SN_HWPERF_FOREIGN(p))
 				/* both ends of the link in foreign partiton */
 				seq_puts(s, " foreign");
 			else
@@ -571,7 +572,7 @@ static void sn_topology_stop(struct seq_file *m, void *v)
 /*
  * /proc/sgi_sn/sn_topology, read-only using seq_file
  */
-static struct seq_operations sn_topology_seq_ops = {
+static const struct seq_operations sn_topology_seq_ops = {
 	.start = sn_topology_start,
 	.next = sn_topology_next,
 	.stop = sn_topology_stop,
@@ -609,28 +610,32 @@ static int sn_hwperf_op_cpu(struct sn_hwperf_op_info *op_info)
 	op_info->a->arg &= SN_HWPERF_ARG_OBJID_MASK;
 
 	if (cpu != SN_HWPERF_ARG_ANY_CPU) {
-		if (cpu >= NR_CPUS || !cpu_online(cpu)) {
+		if (cpu >= nr_cpu_ids || !cpu_online(cpu)) {
 			r = -EINVAL;
 			goto out;
 		}
 	}
 
-	if (cpu == SN_HWPERF_ARG_ANY_CPU || cpu == get_cpu()) {
-		/* don't care, or already on correct cpu */
+	if (cpu == SN_HWPERF_ARG_ANY_CPU) {
+		/* don't care which cpu */
 		sn_hwperf_call_sal(op_info);
-	}
-	else {
+	} else if (cpu == get_cpu()) {
+		/* already on correct cpu */
+		sn_hwperf_call_sal(op_info);
+		put_cpu();
+	} else {
+		put_cpu();
 		if (use_ipi) {
 			/* use an interprocessor interrupt to call SAL */
 			smp_call_function_single(cpu, sn_hwperf_call_sal,
-				op_info, 1, 1);
+				op_info, 1);
 		}
 		else {
 			/* migrate the task before calling SAL */ 
 			save_allowed = current->cpus_allowed;
-			set_cpus_allowed(current, cpumask_of_cpu(cpu));
+			set_cpus_allowed_ptr(current, cpumask_of(cpu));
 			sn_hwperf_call_sal(op_info);
-			set_cpus_allowed(current, save_allowed);
+			set_cpus_allowed_ptr(current, &save_allowed);
 		}
 	}
 	r = op_info->ret;
@@ -681,8 +686,7 @@ static int sn_hwperf_map_err(int hwperf_err)
 /*
  * ioctl for "sn_hwperf" misc device
  */
-static int
-sn_hwperf_ioctl(struct inode *in, struct file *fp, u32 op, u64 arg)
+static long sn_hwperf_ioctl(struct file *fp, u32 op, unsigned long arg)
 {
 	struct sn_hwperf_ioctl_args a;
 	struct cpuinfo_ia64 *cdata;
@@ -698,8 +702,6 @@ sn_hwperf_ioctl(struct inode *in, struct file *fp, u32 op, u64 arg)
 	int i;
 	int j;
 
-	unlock_kernel();
-
 	/* only user requests are allowed here */
 	if ((op & SN_HWPERF_OP_MASK) < 10) {
 		r = -EINVAL;
@@ -745,9 +747,10 @@ sn_hwperf_ioctl(struct inode *in, struct file *fp, u32 op, u64 arg)
 			goto error;
 		} else
 		if ((r = sn_hwperf_enum_objects(&nobj, &objs)) == 0) {
+			int cpuobj_index = 0;
+
 			memset(p, 0, a.sz);
 			for (i = 0; i < nobj; i++) {
-				int cpuobj_index = 0;
 				if (!SN_HWPERF_IS_NODE(objs + i))
 					continue;
 				node = sn_hwperf_obj_to_cnode(objs + i);
@@ -776,7 +779,7 @@ sn_hwperf_ioctl(struct inode *in, struct file *fp, u32 op, u64 arg)
 
 	case SN_HWPERF_GET_NODE_NASID:
 		if (a.sz != sizeof(u64) ||
-		   (node = a.arg) < 0 || !node_possible(node)) {
+		   (node = a.arg) < 0 || !cnode_possible(node)) {
 			r = -EINVAL;
 			goto error;
 		}
@@ -784,17 +787,18 @@ sn_hwperf_ioctl(struct inode *in, struct file *fp, u32 op, u64 arg)
 		break;
 
 	case SN_HWPERF_GET_OBJ_NODE:
-		if (a.sz != sizeof(u64) || a.arg < 0) {
+		i = a.arg;
+		if (a.sz != sizeof(u64) || i < 0) {
 			r = -EINVAL;
 			goto error;
 		}
 		if ((r = sn_hwperf_enum_objects(&nobj, &objs)) == 0) {
-			if (a.arg >= nobj) {
+			if (i >= nobj) {
 				r = -EINVAL;
 				vfree(objs);
 				goto error;
 			}
-			if (objs[(i = a.arg)].id != a.arg) {
+			if (objs[i].id != a.arg) {
 				for (i = 0; i < nobj; i++) {
 					if (objs[i].id == a.arg)
 						break;
@@ -856,12 +860,12 @@ sn_hwperf_ioctl(struct inode *in, struct file *fp, u32 op, u64 arg)
 error:
 	vfree(p);
 
-	lock_kernel();
 	return r;
 }
 
-static struct file_operations sn_hwperf_fops = {
-	.ioctl = sn_hwperf_ioctl,
+static const struct file_operations sn_hwperf_fops = {
+	.unlocked_ioctl = sn_hwperf_ioctl,
+	.llseek = noop_llseek,
 };
 
 static struct miscdevice sn_hwperf_dev = {
@@ -877,10 +881,10 @@ static int sn_hwperf_init(void)
 	int e = 0;
 
 	/* single threaded, once-only initialization */
-	down(&sn_hwperf_init_mutex);
+	mutex_lock(&sn_hwperf_init_mutex);
 
 	if (sn_hwperf_salheap) {
-		up(&sn_hwperf_init_mutex);
+		mutex_unlock(&sn_hwperf_init_mutex);
 		return e;
 	}
 
@@ -929,7 +933,7 @@ out:
 		sn_hwperf_salheap = NULL;
 		sn_hwperf_obj_cnt = 0;
 	}
-	up(&sn_hwperf_init_mutex);
+	mutex_unlock(&sn_hwperf_init_mutex);
 	return e;
 }
 
@@ -973,7 +977,7 @@ int sn_hwperf_get_nearest_node(cnodeid_t node,
 	return e;
 }
 
-static int __devinit sn_hwperf_misc_register_init(void)
+static int sn_hwperf_misc_register_init(void)
 {
 	int e;
 
diff --git a/arch/ia64/sn/kernel/sn2/sn_proc_fs.c b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c
index 5100261310f..7aab87f4806 100644
--- a/arch/ia64/sn/kernel/sn2/sn_proc_fs.c
+++ b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c
@@ -5,7 +5,6 @@
  *
  * Copyright (C) 2000-2005 Silicon Graphics, Inc. All rights reserved.
  */
-#include <linux/config.h>
 
 #ifdef CONFIG_PROC_FS
 #include <linux/proc_fs.h>
@@ -37,7 +36,7 @@ static int system_serial_number_open(struct inode *inode, struct file *file)
 
 static int licenseID_show(struct seq_file *s, void *p)
 {
-	seq_printf(s, "0x%lx\n", sn_partition_serial_number_val());
+	seq_printf(s, "0x%llx\n", sn_partition_serial_number_val());
 	return 0;
 }
 
@@ -46,38 +45,6 @@ static int licenseID_open(struct inode *inode, struct file *file)
 	return single_open(file, licenseID_show, NULL);
 }
 
-/*
- * Enable forced interrupt by default.
- * When set, the sn interrupt handler writes the force interrupt register on
- * the bridge chip.  The hardware will then send an interrupt message if the
- * interrupt line is active.  This mimics a level sensitive interrupt.
- */
-extern int sn_force_interrupt_flag;
-
-static int sn_force_interrupt_show(struct seq_file *s, void *p)
-{
-	seq_printf(s, "Force interrupt is %s\n",
-		sn_force_interrupt_flag ? "enabled" : "disabled");
-	return 0;
-}
-
-static ssize_t sn_force_interrupt_write_proc(struct file *file,
-		const char __user *buffer, size_t count, loff_t *data)
-{
-	char val;
-
-	if (copy_from_user(&val, buffer, 1))
-		return -EFAULT;
-
-	sn_force_interrupt_flag = (val == '0') ? 0 : 1;
-	return count;
-}
-
-static int sn_force_interrupt_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, sn_force_interrupt_show, NULL);
-}
-
 static int coherence_id_show(struct seq_file *s, void *p)
 {
 	seq_printf(s, "%d\n", partition_coherence_id());
@@ -90,35 +57,45 @@ static int coherence_id_open(struct inode *inode, struct file *file)
 	return single_open(file, coherence_id_show, NULL);
 }
 
-static struct proc_dir_entry
-*sn_procfs_create_entry(const char *name, struct proc_dir_entry *parent,
-			int (*openfunc)(struct inode *, struct file *),
-	int (*releasefunc)(struct inode *, struct file *),
-	ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *))
-{
-	struct proc_dir_entry *e = create_proc_entry(name, 0444, parent);
-
-	if (e) {
-		struct file_operations *f;
-
-		f = kzalloc(sizeof(*f), GFP_KERNEL);
-		if (f) {
-			f->open = openfunc;
-			f->read = seq_read;
-			f->llseek = seq_lseek;
-			f->release = releasefunc;
-			f->write = write;
-			e->proc_fops = f;
-		}
-	}
-
-	return e;
-}
-
 /* /proc/sgi_sn/sn_topology uses seq_file, see sn_hwperf.c */
 extern int sn_topology_open(struct inode *, struct file *);
 extern int sn_topology_release(struct inode *, struct file *);
 
+static const struct file_operations proc_partition_id_fops = {
+	.open		= partition_id_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static const struct file_operations proc_system_sn_fops = {
+	.open		= system_serial_number_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static const struct file_operations proc_license_id_fops = {
+	.open		= licenseID_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static const struct file_operations proc_coherence_id_fops = {
+	.open		= coherence_id_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static const struct file_operations proc_sn_topo_fops = {
+	.open		= sn_topology_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= sn_topology_release,
+};
+
 void register_sn_procfs(void)
 {
 	static struct proc_dir_entry *sgi_proc_dir = NULL;
@@ -127,24 +104,14 @@ void register_sn_procfs(void)
 	if (!(sgi_proc_dir = proc_mkdir("sgi_sn", NULL)))
 		return;
 
-	sn_procfs_create_entry("partition_id", sgi_proc_dir,
-		partition_id_open, single_release, NULL);
-
-	sn_procfs_create_entry("system_serial_number", sgi_proc_dir,
-		system_serial_number_open, single_release, NULL);
-
-	sn_procfs_create_entry("licenseID", sgi_proc_dir, 
-		licenseID_open, single_release, NULL);
-
-	sn_procfs_create_entry("sn_force_interrupt", sgi_proc_dir,
-		sn_force_interrupt_open, single_release,
-		sn_force_interrupt_write_proc);
-
-	sn_procfs_create_entry("coherence_id", sgi_proc_dir, 
-		coherence_id_open, single_release, NULL);
-	
-	sn_procfs_create_entry("sn_topology", sgi_proc_dir,
-		sn_topology_open, sn_topology_release, NULL);
+	proc_create("partition_id", 0444, sgi_proc_dir,
+		    &proc_partition_id_fops);
+	proc_create("system_serial_number", 0444, sgi_proc_dir,
+		    &proc_system_sn_fops);
+	proc_create("licenseID", 0444, sgi_proc_dir, &proc_license_id_fops);
+	proc_create("coherence_id", 0444, sgi_proc_dir,
+		    &proc_coherence_id_fops);
+	proc_create("sn_topology", 0444, sgi_proc_dir, &proc_sn_topo_fops);
 }
 
 #endif /* CONFIG_PROC_FS */
diff --git a/arch/ia64/sn/kernel/sn2/timer.c b/arch/ia64/sn/kernel/sn2/timer.c
index 56a88b6df4b..abab8f99e91 100644
--- a/arch/ia64/sn/kernel/sn2/timer.c
+++ b/arch/ia64/sn/kernel/sn2/timer.c
@@ -11,9 +11,9 @@
 #include <linux/sched.h>
 #include <linux/time.h>
 #include <linux/interrupt.h>
+#include <linux/clocksource.h>
 
 #include <asm/hw_irq.h>
-#include <asm/system.h>
 #include <asm/timex.h>
 
 #include <asm/sn/leds.h>
@@ -22,11 +22,17 @@
 
 extern unsigned long sn_rtc_cycles_per_second;
 
-static struct time_interpolator sn2_interpolator = {
-	.drift = -1,
-	.shift = 10,
-	.mask = (1LL << 55) - 1,
-	.source = TIME_SOURCE_MMIO64
+static cycle_t read_sn2(struct clocksource *cs)
+{
+	return (cycle_t)readq(RTC_COUNTER_ADDR);
+}
+
+static struct clocksource clocksource_sn2 = {
+        .name           = "sn2_rtc",
+        .rating         = 450,
+        .read           = read_sn2,
+        .mask           = (1LL << 55) - 1,
+        .flags          = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
 /*
@@ -47,9 +53,8 @@ ia64_sn_udelay (unsigned long usecs)
 
 void __init sn_timer_init(void)
 {
-	sn2_interpolator.frequency = sn_rtc_cycles_per_second;
-	sn2_interpolator.addr = RTC_COUNTER_ADDR;
-	register_time_interpolator(&sn2_interpolator);
+	clocksource_sn2.archdata.fsys_mmio = RTC_COUNTER_ADDR;
+	clocksource_register_hz(&clocksource_sn2, sn_rtc_cycles_per_second);
 
 	ia64_udelay = &ia64_sn_udelay;
 }
diff --git a/arch/ia64/sn/kernel/sn2/timer_interrupt.c b/arch/ia64/sn/kernel/sn2/timer_interrupt.c
index fa7f6994591..103d6ea8e94 100644
--- a/arch/ia64/sn/kernel/sn2/timer_interrupt.c
+++ b/arch/ia64/sn/kernel/sn2/timer_interrupt.c
@@ -36,7 +36,7 @@ extern irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs);
 
 #define SN_LB_INT_WAR_INTERVAL 100
 
-void sn_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+void sn_timer_interrupt(int irq, void *dev_id)
 {
 	/* LED blinking */
 	if (!pda->hb_count--) {
diff --git a/arch/ia64/sn/kernel/tiocx.c b/arch/ia64/sn/kernel/tiocx.c
index feaf1a6e810..e35f6485c1f 100644
--- a/arch/ia64/sn/kernel/tiocx.c
+++ b/arch/ia64/sn/kernel/tiocx.c
@@ -14,7 +14,6 @@
 #include <linux/capability.h>
 #include <linux/device.h>
 #include <linux/delay.h>
-#include <asm/system.h>
 #include <asm/uaccess.h>
 #include <asm/sn/sn_sal.h>
 #include <asm/sn/addrs.h>
@@ -66,8 +65,7 @@ static int tiocx_match(struct device *dev, struct device_driver *drv)
 
 }
 
-static int tiocx_uevent(struct device *dev, char **envp, int num_envp,
-			 char *buffer, int buffer_size)
+static int tiocx_uevent(struct device *dev, struct kobj_uevent_env *env)
 {
 	return -ENODEV;
 }
@@ -192,6 +190,7 @@ cx_device_register(nasid_t nasid, int part_num, int mfg_num,
 		   struct hubdev_info *hubdev, int bt)
 {
 	struct cx_dev *cx_dev;
+	int r;
 
 	cx_dev = kzalloc(sizeof(struct cx_dev), GFP_KERNEL);
 	DBG("cx_dev= 0x%p\n", cx_dev);
@@ -207,9 +206,12 @@ cx_device_register(nasid_t nasid, int part_num, int mfg_num,
 	cx_dev->dev.parent = NULL;
 	cx_dev->dev.bus = &tiocx_bus_type;
 	cx_dev->dev.release = tiocx_bus_release;
-	snprintf(cx_dev->dev.bus_id, BUS_ID_SIZE, "%d",
-		 cx_dev->cx_id.nasid);
-	device_register(&cx_dev->dev);
+	dev_set_name(&cx_dev->dev, "%d", cx_dev->cx_id.nasid);
+	r = device_register(&cx_dev->dev);
+	if (r) {
+		kfree(cx_dev);
+		return r;
+	}
 	get_device(&cx_dev->dev);
 
 	device_create_file(&cx_dev->dev, &dev_attr_cxdev_control);
@@ -369,8 +371,8 @@ static void tio_corelet_reset(nasid_t nasid, int corelet)
 
 static int is_fpga_tio(int nasid, int *bt)
 {
-	u16 ioboard_type;
-	s64 rc;
+	u16 uninitialized_var(ioboard_type);	/* GCC be quiet */
+	long rc;
 
 	rc = ia64_sn_sysctl_ioboard_get(nasid, &ioboard_type);
 	if (rc) {
@@ -488,11 +490,14 @@ static int __init tiocx_init(void)
 {
 	cnodeid_t cnodeid;
 	int found_tiocx_device = 0;
+	int err;
 
 	if (!ia64_platform_is("sn2"))
 		return 0;
 
-	bus_register(&tiocx_bus_type);
+	err = bus_register(&tiocx_bus_type);
+	if (err)
+		return err;
 
 	for (cnodeid = 0; cnodeid < num_cnodes; cnodeid++) {
 		nasid_t nasid;
@@ -552,7 +557,7 @@ static void __exit tiocx_exit(void)
 	bus_unregister(&tiocx_bus_type);
 }
 
-subsys_initcall(tiocx_init);
+fs_initcall(tiocx_init);
 module_exit(tiocx_exit);
 
 /************************************************************************
diff --git a/arch/ia64/sn/kernel/xp_main.c b/arch/ia64/sn/kernel/xp_main.c
deleted file mode 100644
index b7ea46645e1..00000000000
--- a/arch/ia64/sn/kernel/xp_main.c
+++ /dev/null
@@ -1,290 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (c) 2004-2005 Silicon Graphics, Inc.  All Rights Reserved.
- */
-
-
-/*
- * Cross Partition (XP) base.
- *
- *	XP provides a base from which its users can interact
- *	with XPC, yet not be dependent on XPC.
- *
- */
-
-
-#include <linux/kernel.h>
-#include <linux/interrupt.h>
-#include <linux/module.h>
-#include <linux/mutex.h>
-#include <asm/sn/intr.h>
-#include <asm/sn/sn_sal.h>
-#include <asm/sn/xp.h>
-
-
-/*
- * Target of nofault PIO read.
- */
-u64 xp_nofault_PIOR_target;
-
-
-/*
- * xpc_registrations[] keeps track of xpc_connect()'s done by the kernel-level
- * users of XPC.
- */
-struct xpc_registration xpc_registrations[XPC_NCHANNELS];
-
-
-/*
- * Initialize the XPC interface to indicate that XPC isn't loaded.
- */
-static enum xpc_retval xpc_notloaded(void) { return xpcNotLoaded; }
-
-struct xpc_interface xpc_interface = {
-	(void (*)(int)) xpc_notloaded,
-	(void (*)(int)) xpc_notloaded,
-	(enum xpc_retval (*)(partid_t, int, u32, void **)) xpc_notloaded,
-	(enum xpc_retval (*)(partid_t, int, void *)) xpc_notloaded,
-	(enum xpc_retval (*)(partid_t, int, void *, xpc_notify_func, void *))
-							xpc_notloaded,
-	(void (*)(partid_t, int, void *)) xpc_notloaded,
-	(enum xpc_retval (*)(partid_t, void *)) xpc_notloaded
-};
-
-
-/*
- * XPC calls this when it (the XPC module) has been loaded.
- */
-void
-xpc_set_interface(void (*connect)(int),
-		void (*disconnect)(int),
-		enum xpc_retval (*allocate)(partid_t, int, u32, void **),
-		enum xpc_retval (*send)(partid_t, int, void *),
-		enum xpc_retval (*send_notify)(partid_t, int, void *,
-						xpc_notify_func, void *),
-		void (*received)(partid_t, int, void *),
-		enum xpc_retval (*partid_to_nasids)(partid_t, void *))
-{
-	xpc_interface.connect = connect;
-	xpc_interface.disconnect = disconnect;
-	xpc_interface.allocate = allocate;
-	xpc_interface.send = send;
-	xpc_interface.send_notify = send_notify;
-	xpc_interface.received = received;
-	xpc_interface.partid_to_nasids = partid_to_nasids;
-}
-
-
-/*
- * XPC calls this when it (the XPC module) is being unloaded.
- */
-void
-xpc_clear_interface(void)
-{
-	xpc_interface.connect = (void (*)(int)) xpc_notloaded;
-	xpc_interface.disconnect = (void (*)(int)) xpc_notloaded;
-	xpc_interface.allocate = (enum xpc_retval (*)(partid_t, int, u32,
-					void **)) xpc_notloaded;
-	xpc_interface.send = (enum xpc_retval (*)(partid_t, int, void *))
-					xpc_notloaded;
-	xpc_interface.send_notify = (enum xpc_retval (*)(partid_t, int, void *,
-				    xpc_notify_func, void *)) xpc_notloaded;
-	xpc_interface.received = (void (*)(partid_t, int, void *))
-					xpc_notloaded;
-	xpc_interface.partid_to_nasids = (enum xpc_retval (*)(partid_t, void *))
-					xpc_notloaded;
-}
-
-
-/*
- * Register for automatic establishment of a channel connection whenever
- * a partition comes up.
- *
- * Arguments:
- *
- *	ch_number - channel # to register for connection.
- *	func - function to call for asynchronous notification of channel
- *	       state changes (i.e., connection, disconnection, error) and
- *	       the arrival of incoming messages.
- *      key - pointer to optional user-defined value that gets passed back
- *	      to the user on any callouts made to func.
- *	payload_size - size in bytes of the XPC message's payload area which
- *		       contains a user-defined message. The user should make
- *		       this large enough to hold their largest message.
- *	nentries - max #of XPC message entries a message queue can contain.
- *		   The actual number, which is determined when a connection
- * 		   is established and may be less then requested, will be
- *		   passed to the user via the xpcConnected callout.
- *	assigned_limit - max number of kthreads allowed to be processing
- * 			 messages (per connection) at any given instant.
- *	idle_limit - max number of kthreads allowed to be idle at any given
- * 		     instant.
- */
-enum xpc_retval
-xpc_connect(int ch_number, xpc_channel_func func, void *key, u16 payload_size,
-		u16 nentries, u32 assigned_limit, u32 idle_limit)
-{
-	struct xpc_registration *registration;
-
-
-	DBUG_ON(ch_number < 0 || ch_number >= XPC_NCHANNELS);
-	DBUG_ON(payload_size == 0 || nentries == 0);
-	DBUG_ON(func == NULL);
-	DBUG_ON(assigned_limit == 0 || idle_limit > assigned_limit);
-
-	registration = &xpc_registrations[ch_number];
-
-	if (mutex_lock_interruptible(&registration->mutex) != 0) {
-		return xpcInterrupted;
-	}
-
-	/* if XPC_CHANNEL_REGISTERED(ch_number) */
-	if (registration->func != NULL) {
-		mutex_unlock(&registration->mutex);
-		return xpcAlreadyRegistered;
-	}
-
-	/* register the channel for connection */
-	registration->msg_size = XPC_MSG_SIZE(payload_size);
-	registration->nentries = nentries;
-	registration->assigned_limit = assigned_limit;
-	registration->idle_limit = idle_limit;
-	registration->key = key;
-	registration->func = func;
-
-	mutex_unlock(&registration->mutex);
-
-	xpc_interface.connect(ch_number);
-
-	return xpcSuccess;
-}
-
-
-/*
- * Remove the registration for automatic connection of the specified channel
- * when a partition comes up.
- *
- * Before returning this xpc_disconnect() will wait for all connections on the
- * specified channel have been closed/torndown. So the caller can be assured
- * that they will not be receiving any more callouts from XPC to their
- * function registered via xpc_connect().
- *
- * Arguments:
- *
- *	ch_number - channel # to unregister.
- */
-void
-xpc_disconnect(int ch_number)
-{
-	struct xpc_registration *registration;
-
-
-	DBUG_ON(ch_number < 0 || ch_number >= XPC_NCHANNELS);
-
-	registration = &xpc_registrations[ch_number];
-
-	/*
-	 * We've decided not to make this a down_interruptible(), since we
-	 * figured XPC's users will just turn around and call xpc_disconnect()
-	 * again anyways, so we might as well wait, if need be.
-	 */
-	mutex_lock(&registration->mutex);
-
-	/* if !XPC_CHANNEL_REGISTERED(ch_number) */
-	if (registration->func == NULL) {
-		mutex_unlock(&registration->mutex);
-		return;
-	}
-
-	/* remove the connection registration for the specified channel */
-	registration->func = NULL;
-	registration->key = NULL;
-	registration->nentries = 0;
-	registration->msg_size = 0;
-	registration->assigned_limit = 0;
-	registration->idle_limit = 0;
-
-	xpc_interface.disconnect(ch_number);
-
-	mutex_unlock(&registration->mutex);
-
-	return;
-}
-
-
-int __init
-xp_init(void)
-{
-	int ret, ch_number;
-	u64 func_addr = *(u64 *) xp_nofault_PIOR;
-	u64 err_func_addr = *(u64 *) xp_error_PIOR;
-
-
-	if (!ia64_platform_is("sn2")) {
-		return -ENODEV;
-	}
-
-	/*
-	 * Register a nofault code region which performs a cross-partition
-	 * PIO read. If the PIO read times out, the MCA handler will consume
-	 * the error and return to a kernel-provided instruction to indicate
-	 * an error. This PIO read exists because it is guaranteed to timeout
-	 * if the destination is down (AMO operations do not timeout on at
-	 * least some CPUs on Shubs <= v1.2, which unfortunately we have to
-	 * work around).
-	 */
-	if ((ret = sn_register_nofault_code(func_addr, err_func_addr,
-						err_func_addr, 1, 1)) != 0) {
-		printk(KERN_ERR "XP: can't register nofault code, error=%d\n",
-			ret);
-	}
-	/*
-	 * Setup the nofault PIO read target. (There is no special reason why
-	 * SH_IPI_ACCESS was selected.)
-	 */
-	if (is_shub2()) {
-		xp_nofault_PIOR_target = SH2_IPI_ACCESS0;
-	} else {
-		xp_nofault_PIOR_target = SH1_IPI_ACCESS;
-	}
-
-	/* initialize the connection registration mutex */
-	for (ch_number = 0; ch_number < XPC_NCHANNELS; ch_number++) {
-		mutex_init(&xpc_registrations[ch_number].mutex);
-	}
-
-	return 0;
-}
-module_init(xp_init);
-
-
-void __exit
-xp_exit(void)
-{
-	u64 func_addr = *(u64 *) xp_nofault_PIOR;
-	u64 err_func_addr = *(u64 *) xp_error_PIOR;
-
-
-	/* unregister the PIO read nofault code region */
-	(void) sn_register_nofault_code(func_addr, err_func_addr,
-					err_func_addr, 1, 0);
-}
-module_exit(xp_exit);
-
-
-MODULE_AUTHOR("Silicon Graphics, Inc.");
-MODULE_DESCRIPTION("Cross Partition (XP) base");
-MODULE_LICENSE("GPL");
-
-EXPORT_SYMBOL(xp_nofault_PIOR);
-EXPORT_SYMBOL(xp_nofault_PIOR_target);
-EXPORT_SYMBOL(xpc_registrations);
-EXPORT_SYMBOL(xpc_interface);
-EXPORT_SYMBOL(xpc_clear_interface);
-EXPORT_SYMBOL(xpc_set_interface);
-EXPORT_SYMBOL(xpc_connect);
-EXPORT_SYMBOL(xpc_disconnect);
-
diff --git a/arch/ia64/sn/kernel/xp_nofault.S b/arch/ia64/sn/kernel/xp_nofault.S
deleted file mode 100644
index b772543053c..00000000000
--- a/arch/ia64/sn/kernel/xp_nofault.S
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (c) 2004-2005 Silicon Graphics, Inc.  All Rights Reserved.
- */
-
-
-/*
- * The xp_nofault_PIOR function takes a pointer to a remote PIO register
- * and attempts to load and consume a value from it.  This function
- * will be registered as a nofault code block.  In the event that the
- * PIO read fails, the MCA handler will force the error to look
- * corrected and vector to the xp_error_PIOR which will return an error.
- *
- *	extern int xp_nofault_PIOR(void *remote_register);
- */
-
-	.global xp_nofault_PIOR
-xp_nofault_PIOR:
-	mov	r8=r0			// Stage a success return value
-	ld8.acq	r9=[r32];;		// PIO Read the specified register
-	adds	r9=1,r9			// Add to force a consume
-	br.ret.sptk.many b0;;		// Return success
-
-	.global xp_error_PIOR
-xp_error_PIOR:
-	mov	r8=1			// Return value of 1
-	br.ret.sptk.many b0;;		// Return failure
-
diff --git a/arch/ia64/sn/kernel/xpc_channel.c b/arch/ia64/sn/kernel/xpc_channel.c
deleted file mode 100644
index 8255a9be463..00000000000
--- a/arch/ia64/sn/kernel/xpc_channel.c
+++ /dev/null
@@ -1,2374 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (c) 2004-2006 Silicon Graphics, Inc.  All Rights Reserved.
- */
-
-
-/*
- * Cross Partition Communication (XPC) channel support.
- *
- *	This is the part of XPC that manages the channels and
- *	sends/receives messages across them to/from other partitions.
- *
- */
-
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/sched.h>
-#include <linux/cache.h>
-#include <linux/interrupt.h>
-#include <linux/mutex.h>
-#include <linux/completion.h>
-#include <asm/sn/bte.h>
-#include <asm/sn/sn_sal.h>
-#include <asm/sn/xpc.h>
-
-
-/*
- * Guarantee that the kzalloc'd memory is cacheline aligned.
- */
-static void *
-xpc_kzalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
-{
-	/* see if kzalloc will give us cachline aligned memory by default */
-	*base = kzalloc(size, flags);
-	if (*base == NULL) {
-		return NULL;
-	}
-	if ((u64) *base == L1_CACHE_ALIGN((u64) *base)) {
-		return *base;
-	}
-	kfree(*base);
-
-	/* nope, we'll have to do it ourselves */
-	*base = kzalloc(size + L1_CACHE_BYTES, flags);
-	if (*base == NULL) {
-		return NULL;
-	}
-	return (void *) L1_CACHE_ALIGN((u64) *base);
-}
-
-
-/*
- * Set up the initial values for the XPartition Communication channels.
- */
-static void
-xpc_initialize_channels(struct xpc_partition *part, partid_t partid)
-{
-	int ch_number;
-	struct xpc_channel *ch;
-
-
-	for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
-		ch = &part->channels[ch_number];
-
-		ch->partid = partid;
-		ch->number = ch_number;
-		ch->flags = XPC_C_DISCONNECTED;
-
-		ch->local_GP = &part->local_GPs[ch_number];
-		ch->local_openclose_args =
-					&part->local_openclose_args[ch_number];
-
-		atomic_set(&ch->kthreads_assigned, 0);
-		atomic_set(&ch->kthreads_idle, 0);
-		atomic_set(&ch->kthreads_active, 0);
-
-		atomic_set(&ch->references, 0);
-		atomic_set(&ch->n_to_notify, 0);
-
-		spin_lock_init(&ch->lock);
-		mutex_init(&ch->msg_to_pull_mutex);
-		init_completion(&ch->wdisconnect_wait);
-
-		atomic_set(&ch->n_on_msg_allocate_wq, 0);
-		init_waitqueue_head(&ch->msg_allocate_wq);
-		init_waitqueue_head(&ch->idle_wq);
-	}
-}
-
-
-/*
- * Setup the infrastructure necessary to support XPartition Communication
- * between the specified remote partition and the local one.
- */
-enum xpc_retval
-xpc_setup_infrastructure(struct xpc_partition *part)
-{
-	int ret, cpuid;
-	struct timer_list *timer;
-	partid_t partid = XPC_PARTID(part);
-
-
-	/*
-	 * Zero out MOST of the entry for this partition. Only the fields
-	 * starting with `nchannels' will be zeroed. The preceding fields must
-	 * remain `viable' across partition ups and downs, since they may be
-	 * referenced during this memset() operation.
-	 */
-	memset(&part->nchannels, 0, sizeof(struct xpc_partition) -
-				offsetof(struct xpc_partition, nchannels));
-
-	/*
-	 * Allocate all of the channel structures as a contiguous chunk of
-	 * memory.
-	 */
-	part->channels = kzalloc(sizeof(struct xpc_channel) * XPC_NCHANNELS,
-								GFP_KERNEL);
-	if (part->channels == NULL) {
-		dev_err(xpc_chan, "can't get memory for channels\n");
-		return xpcNoMemory;
-	}
-
-	part->nchannels = XPC_NCHANNELS;
-
-
-	/* allocate all the required GET/PUT values */
-
-	part->local_GPs = xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE,
-					GFP_KERNEL, &part->local_GPs_base);
-	if (part->local_GPs == NULL) {
-		kfree(part->channels);
-		part->channels = NULL;
-		dev_err(xpc_chan, "can't get memory for local get/put "
-			"values\n");
-		return xpcNoMemory;
-	}
-
-	part->remote_GPs = xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE,
-					GFP_KERNEL, &part->remote_GPs_base);
-	if (part->remote_GPs == NULL) {
-		dev_err(xpc_chan, "can't get memory for remote get/put "
-			"values\n");
-		kfree(part->local_GPs_base);
-		part->local_GPs = NULL;
-		kfree(part->channels);
-		part->channels = NULL;
-		return xpcNoMemory;
-	}
-
-
-	/* allocate all the required open and close args */
-
-	part->local_openclose_args = xpc_kzalloc_cacheline_aligned(
-					XPC_OPENCLOSE_ARGS_SIZE, GFP_KERNEL,
-					&part->local_openclose_args_base);
-	if (part->local_openclose_args == NULL) {
-		dev_err(xpc_chan, "can't get memory for local connect args\n");
-		kfree(part->remote_GPs_base);
-		part->remote_GPs = NULL;
-		kfree(part->local_GPs_base);
-		part->local_GPs = NULL;
-		kfree(part->channels);
-		part->channels = NULL;
-		return xpcNoMemory;
-	}
-
-	part->remote_openclose_args = xpc_kzalloc_cacheline_aligned(
-					XPC_OPENCLOSE_ARGS_SIZE, GFP_KERNEL,
-					&part->remote_openclose_args_base);
-	if (part->remote_openclose_args == NULL) {
-		dev_err(xpc_chan, "can't get memory for remote connect args\n");
-		kfree(part->local_openclose_args_base);
-		part->local_openclose_args = NULL;
-		kfree(part->remote_GPs_base);
-		part->remote_GPs = NULL;
-		kfree(part->local_GPs_base);
-		part->local_GPs = NULL;
-		kfree(part->channels);
-		part->channels = NULL;
-		return xpcNoMemory;
-	}
-
-
-	xpc_initialize_channels(part, partid);
-
-	atomic_set(&part->nchannels_active, 0);
-	atomic_set(&part->nchannels_engaged, 0);
-
-
-	/* local_IPI_amo were set to 0 by an earlier memset() */
-
-	/* Initialize this partitions AMO_t structure */
-	part->local_IPI_amo_va = xpc_IPI_init(partid);
-
-	spin_lock_init(&part->IPI_lock);
-
-	atomic_set(&part->channel_mgr_requests, 1);
-	init_waitqueue_head(&part->channel_mgr_wq);
-
-	sprintf(part->IPI_owner, "xpc%02d", partid);
-	ret = request_irq(SGI_XPC_NOTIFY, xpc_notify_IRQ_handler, SA_SHIRQ,
-				part->IPI_owner, (void *) (u64) partid);
-	if (ret != 0) {
-		dev_err(xpc_chan, "can't register NOTIFY IRQ handler, "
-			"errno=%d\n", -ret);
-		kfree(part->remote_openclose_args_base);
-		part->remote_openclose_args = NULL;
-		kfree(part->local_openclose_args_base);
-		part->local_openclose_args = NULL;
-		kfree(part->remote_GPs_base);
-		part->remote_GPs = NULL;
-		kfree(part->local_GPs_base);
-		part->local_GPs = NULL;
-		kfree(part->channels);
-		part->channels = NULL;
-		return xpcLackOfResources;
-	}
-
-	/* Setup a timer to check for dropped IPIs */
-	timer = &part->dropped_IPI_timer;
-	init_timer(timer);
-	timer->function = (void (*)(unsigned long)) xpc_dropped_IPI_check;
-	timer->data = (unsigned long) part;
-	timer->expires = jiffies + XPC_P_DROPPED_IPI_WAIT;
-	add_timer(timer);
-
-	/*
-	 * With the setting of the partition setup_state to XPC_P_SETUP, we're
-	 * declaring that this partition is ready to go.
-	 */
-	part->setup_state = XPC_P_SETUP;
-
-
-	/*
-	 * Setup the per partition specific variables required by the
-	 * remote partition to establish channel connections with us.
-	 *
-	 * The setting of the magic # indicates that these per partition
-	 * specific variables are ready to be used.
-	 */
-	xpc_vars_part[partid].GPs_pa = __pa(part->local_GPs);
-	xpc_vars_part[partid].openclose_args_pa =
-					__pa(part->local_openclose_args);
-	xpc_vars_part[partid].IPI_amo_pa = __pa(part->local_IPI_amo_va);
-	cpuid = raw_smp_processor_id();	/* any CPU in this partition will do */
-	xpc_vars_part[partid].IPI_nasid = cpuid_to_nasid(cpuid);
-	xpc_vars_part[partid].IPI_phys_cpuid = cpu_physical_id(cpuid);
-	xpc_vars_part[partid].nchannels = part->nchannels;
-	xpc_vars_part[partid].magic = XPC_VP_MAGIC1;
-
-	return xpcSuccess;
-}
-
-
-/*
- * Create a wrapper that hides the underlying mechanism for pulling a cacheline
- * (or multiple cachelines) from a remote partition.
- *
- * src must be a cacheline aligned physical address on the remote partition.
- * dst must be a cacheline aligned virtual address on this partition.
- * cnt must be an cacheline sized
- */
-static enum xpc_retval
-xpc_pull_remote_cachelines(struct xpc_partition *part, void *dst,
-				const void *src, size_t cnt)
-{
-	bte_result_t bte_ret;
-
-
-	DBUG_ON((u64) src != L1_CACHE_ALIGN((u64) src));
-	DBUG_ON((u64) dst != L1_CACHE_ALIGN((u64) dst));
-	DBUG_ON(cnt != L1_CACHE_ALIGN(cnt));
-
-	if (part->act_state == XPC_P_DEACTIVATING) {
-		return part->reason;
-	}
-
-	bte_ret = xp_bte_copy((u64) src, (u64) ia64_tpa((u64) dst),
-				(u64) cnt, (BTE_NORMAL | BTE_WACQUIRE), NULL);
-	if (bte_ret == BTE_SUCCESS) {
-		return xpcSuccess;
-	}
-
-	dev_dbg(xpc_chan, "xp_bte_copy() from partition %d failed, ret=%d\n",
-		XPC_PARTID(part), bte_ret);
-
-	return xpc_map_bte_errors(bte_ret);
-}
-
-
-/*
- * Pull the remote per partititon specific variables from the specified
- * partition.
- */
-enum xpc_retval
-xpc_pull_remote_vars_part(struct xpc_partition *part)
-{
-	u8 buffer[L1_CACHE_BYTES * 2];
-	struct xpc_vars_part *pulled_entry_cacheline =
-			(struct xpc_vars_part *) L1_CACHE_ALIGN((u64) buffer);
-	struct xpc_vars_part *pulled_entry;
-	u64 remote_entry_cacheline_pa, remote_entry_pa;
-	partid_t partid = XPC_PARTID(part);
-	enum xpc_retval ret;
-
-
-	/* pull the cacheline that contains the variables we're interested in */
-
-	DBUG_ON(part->remote_vars_part_pa !=
-				L1_CACHE_ALIGN(part->remote_vars_part_pa));
-	DBUG_ON(sizeof(struct xpc_vars_part) != L1_CACHE_BYTES / 2);
-
-	remote_entry_pa = part->remote_vars_part_pa +
-			sn_partition_id * sizeof(struct xpc_vars_part);
-
-	remote_entry_cacheline_pa = (remote_entry_pa & ~(L1_CACHE_BYTES - 1));
-
-	pulled_entry = (struct xpc_vars_part *) ((u64) pulled_entry_cacheline +
-				(remote_entry_pa & (L1_CACHE_BYTES - 1)));
-
-	ret = xpc_pull_remote_cachelines(part, pulled_entry_cacheline,
-					(void *) remote_entry_cacheline_pa,
-					L1_CACHE_BYTES);
-	if (ret != xpcSuccess) {
-		dev_dbg(xpc_chan, "failed to pull XPC vars_part from "
-			"partition %d, ret=%d\n", partid, ret);
-		return ret;
-	}
-
-
-	/* see if they've been set up yet */
-
-	if (pulled_entry->magic != XPC_VP_MAGIC1 &&
-				pulled_entry->magic != XPC_VP_MAGIC2) {
-
-		if (pulled_entry->magic != 0) {
-			dev_dbg(xpc_chan, "partition %d's XPC vars_part for "
-				"partition %d has bad magic value (=0x%lx)\n",
-				partid, sn_partition_id, pulled_entry->magic);
-			return xpcBadMagic;
-		}
-
-		/* they've not been initialized yet */
-		return xpcRetry;
-	}
-
-	if (xpc_vars_part[partid].magic == XPC_VP_MAGIC1) {
-
-		/* validate the variables */
-
-		if (pulled_entry->GPs_pa == 0 ||
-				pulled_entry->openclose_args_pa == 0 ||
-					pulled_entry->IPI_amo_pa == 0) {
-
-			dev_err(xpc_chan, "partition %d's XPC vars_part for "
-				"partition %d are not valid\n", partid,
-				sn_partition_id);
-			return xpcInvalidAddress;
-		}
-
-		/* the variables we imported look to be valid */
-
-		part->remote_GPs_pa = pulled_entry->GPs_pa;
-		part->remote_openclose_args_pa =
-					pulled_entry->openclose_args_pa;
-		part->remote_IPI_amo_va =
-				      (AMO_t *) __va(pulled_entry->IPI_amo_pa);
-		part->remote_IPI_nasid = pulled_entry->IPI_nasid;
-		part->remote_IPI_phys_cpuid = pulled_entry->IPI_phys_cpuid;
-
-		if (part->nchannels > pulled_entry->nchannels) {
-			part->nchannels = pulled_entry->nchannels;
-		}
-
-		/* let the other side know that we've pulled their variables */
-
-		xpc_vars_part[partid].magic = XPC_VP_MAGIC2;
-	}
-
-	if (pulled_entry->magic == XPC_VP_MAGIC1) {
-		return xpcRetry;
-	}
-
-	return xpcSuccess;
-}
-
-
-/*
- * Get the IPI flags and pull the openclose args and/or remote GPs as needed.
- */
-static u64
-xpc_get_IPI_flags(struct xpc_partition *part)
-{
-	unsigned long irq_flags;
-	u64 IPI_amo;
-	enum xpc_retval ret;
-
-
-	/*
-	 * See if there are any IPI flags to be handled.
-	 */
-
-	spin_lock_irqsave(&part->IPI_lock, irq_flags);
-	if ((IPI_amo = part->local_IPI_amo) != 0) {
-		part->local_IPI_amo = 0;
-	}
-	spin_unlock_irqrestore(&part->IPI_lock, irq_flags);
-
-
-	if (XPC_ANY_OPENCLOSE_IPI_FLAGS_SET(IPI_amo)) {
-		ret = xpc_pull_remote_cachelines(part,
-					part->remote_openclose_args,
-					(void *) part->remote_openclose_args_pa,
-					XPC_OPENCLOSE_ARGS_SIZE);
-		if (ret != xpcSuccess) {
-			XPC_DEACTIVATE_PARTITION(part, ret);
-
-			dev_dbg(xpc_chan, "failed to pull openclose args from "
-				"partition %d, ret=%d\n", XPC_PARTID(part),
-				ret);
-
-			/* don't bother processing IPIs anymore */
-			IPI_amo = 0;
-		}
-	}
-
-	if (XPC_ANY_MSG_IPI_FLAGS_SET(IPI_amo)) {
-		ret = xpc_pull_remote_cachelines(part, part->remote_GPs,
-						(void *) part->remote_GPs_pa,
-						XPC_GP_SIZE);
-		if (ret != xpcSuccess) {
-			XPC_DEACTIVATE_PARTITION(part, ret);
-
-			dev_dbg(xpc_chan, "failed to pull GPs from partition "
-				"%d, ret=%d\n", XPC_PARTID(part), ret);
-
-			/* don't bother processing IPIs anymore */
-			IPI_amo = 0;
-		}
-	}
-
-	return IPI_amo;
-}
-
-
-/*
- * Allocate the local message queue and the notify queue.
- */
-static enum xpc_retval
-xpc_allocate_local_msgqueue(struct xpc_channel *ch)
-{
-	unsigned long irq_flags;
-	int nentries;
-	size_t nbytes;
-
-
-	// >>> may want to check for ch->flags & XPC_C_DISCONNECTING between
-	// >>> iterations of the for-loop, bail if set?
-
-	// >>> should we impose a minumum #of entries? like 4 or 8?
-	for (nentries = ch->local_nentries; nentries > 0; nentries--) {
-
-		nbytes = nentries * ch->msg_size;
-		ch->local_msgqueue = xpc_kzalloc_cacheline_aligned(nbytes,
-						GFP_KERNEL,
-						&ch->local_msgqueue_base);
-		if (ch->local_msgqueue == NULL) {
-			continue;
-		}
-
-		nbytes = nentries * sizeof(struct xpc_notify);
-		ch->notify_queue = kzalloc(nbytes, GFP_KERNEL);
-		if (ch->notify_queue == NULL) {
-			kfree(ch->local_msgqueue_base);
-			ch->local_msgqueue = NULL;
-			continue;
-		}
-
-		spin_lock_irqsave(&ch->lock, irq_flags);
-		if (nentries < ch->local_nentries) {
-			dev_dbg(xpc_chan, "nentries=%d local_nentries=%d, "
-				"partid=%d, channel=%d\n", nentries,
-				ch->local_nentries, ch->partid, ch->number);
-
-			ch->local_nentries = nentries;
-		}
-		spin_unlock_irqrestore(&ch->lock, irq_flags);
-		return xpcSuccess;
-	}
-
-	dev_dbg(xpc_chan, "can't get memory for local message queue and notify "
-		"queue, partid=%d, channel=%d\n", ch->partid, ch->number);
-	return xpcNoMemory;
-}
-
-
-/*
- * Allocate the cached remote message queue.
- */
-static enum xpc_retval
-xpc_allocate_remote_msgqueue(struct xpc_channel *ch)
-{
-	unsigned long irq_flags;
-	int nentries;
-	size_t nbytes;
-
-
-	DBUG_ON(ch->remote_nentries <= 0);
-
-	// >>> may want to check for ch->flags & XPC_C_DISCONNECTING between
-	// >>> iterations of the for-loop, bail if set?
-
-	// >>> should we impose a minumum #of entries? like 4 or 8?
-	for (nentries = ch->remote_nentries; nentries > 0; nentries--) {
-
-		nbytes = nentries * ch->msg_size;
-		ch->remote_msgqueue = xpc_kzalloc_cacheline_aligned(nbytes,
-						GFP_KERNEL,
-						&ch->remote_msgqueue_base);
-		if (ch->remote_msgqueue == NULL) {
-			continue;
-		}
-
-		spin_lock_irqsave(&ch->lock, irq_flags);
-		if (nentries < ch->remote_nentries) {
-			dev_dbg(xpc_chan, "nentries=%d remote_nentries=%d, "
-				"partid=%d, channel=%d\n", nentries,
-				ch->remote_nentries, ch->partid, ch->number);
-
-			ch->remote_nentries = nentries;
-		}
-		spin_unlock_irqrestore(&ch->lock, irq_flags);
-		return xpcSuccess;
-	}
-
-	dev_dbg(xpc_chan, "can't get memory for cached remote message queue, "
-		"partid=%d, channel=%d\n", ch->partid, ch->number);
-	return xpcNoMemory;
-}
-
-
-/*
- * Allocate message queues and other stuff associated with a channel.
- *
- * Note: Assumes all of the channel sizes are filled in.
- */
-static enum xpc_retval
-xpc_allocate_msgqueues(struct xpc_channel *ch)
-{
-	unsigned long irq_flags;
-	enum xpc_retval ret;
-
-
-	DBUG_ON(ch->flags & XPC_C_SETUP);
-
-	if ((ret = xpc_allocate_local_msgqueue(ch)) != xpcSuccess) {
-		return ret;
-	}
-
-	if ((ret = xpc_allocate_remote_msgqueue(ch)) != xpcSuccess) {
-		kfree(ch->local_msgqueue_base);
-		ch->local_msgqueue = NULL;
-		kfree(ch->notify_queue);
-		ch->notify_queue = NULL;
-		return ret;
-	}
-
-	spin_lock_irqsave(&ch->lock, irq_flags);
-	ch->flags |= XPC_C_SETUP;
-	spin_unlock_irqrestore(&ch->lock, irq_flags);
-
-	return xpcSuccess;
-}
-
-
-/*
- * Process a connect message from a remote partition.
- *
- * Note: xpc_process_connect() is expecting to be called with the
- * spin_lock_irqsave held and will leave it locked upon return.
- */
-static void
-xpc_process_connect(struct xpc_channel *ch, unsigned long *irq_flags)
-{
-	enum xpc_retval ret;
-
-
-	DBUG_ON(!spin_is_locked(&ch->lock));
-
-	if (!(ch->flags & XPC_C_OPENREQUEST) ||
-				!(ch->flags & XPC_C_ROPENREQUEST)) {
-		/* nothing more to do for now */
-		return;
-	}
-	DBUG_ON(!(ch->flags & XPC_C_CONNECTING));
-
-	if (!(ch->flags & XPC_C_SETUP)) {
-		spin_unlock_irqrestore(&ch->lock, *irq_flags);
-		ret = xpc_allocate_msgqueues(ch);
-		spin_lock_irqsave(&ch->lock, *irq_flags);
-
-		if (ret != xpcSuccess) {
-			XPC_DISCONNECT_CHANNEL(ch, ret, irq_flags);
-		}
-		if (ch->flags & (XPC_C_CONNECTED | XPC_C_DISCONNECTING)) {
-			return;
-		}
-
-		DBUG_ON(!(ch->flags & XPC_C_SETUP));
-		DBUG_ON(ch->local_msgqueue == NULL);
-		DBUG_ON(ch->remote_msgqueue == NULL);
-	}
-
-	if (!(ch->flags & XPC_C_OPENREPLY)) {
-		ch->flags |= XPC_C_OPENREPLY;
-		xpc_IPI_send_openreply(ch, irq_flags);
-	}
-
-	if (!(ch->flags & XPC_C_ROPENREPLY)) {
-		return;
-	}
-
-	DBUG_ON(ch->remote_msgqueue_pa == 0);
-
-	ch->flags = (XPC_C_CONNECTED | XPC_C_SETUP);	/* clear all else */
-
-	dev_info(xpc_chan, "channel %d to partition %d connected\n",
-		ch->number, ch->partid);
-
-	spin_unlock_irqrestore(&ch->lock, *irq_flags);
-	xpc_create_kthreads(ch, 1);
-	spin_lock_irqsave(&ch->lock, *irq_flags);
-}
-
-
-/*
- * Notify those who wanted to be notified upon delivery of their message.
- */
-static void
-xpc_notify_senders(struct xpc_channel *ch, enum xpc_retval reason, s64 put)
-{
-	struct xpc_notify *notify;
-	u8 notify_type;
-	s64 get = ch->w_remote_GP.get - 1;
-
-
-	while (++get < put && atomic_read(&ch->n_to_notify) > 0) {
-
-		notify = &ch->notify_queue[get % ch->local_nentries];
-
-		/*
-		 * See if the notify entry indicates it was associated with
-		 * a message who's sender wants to be notified. It is possible
-		 * that it is, but someone else is doing or has done the
-		 * notification.
-		 */
-		notify_type = notify->type;
-		if (notify_type == 0 ||
-				cmpxchg(&notify->type, notify_type, 0) !=
-								notify_type) {
-			continue;
-		}
-
-		DBUG_ON(notify_type != XPC_N_CALL);
-
-		atomic_dec(&ch->n_to_notify);
-
-		if (notify->func != NULL) {
-			dev_dbg(xpc_chan, "notify->func() called, notify=0x%p, "
-				"msg_number=%ld, partid=%d, channel=%d\n",
-				(void *) notify, get, ch->partid, ch->number);
-
-			notify->func(reason, ch->partid, ch->number,
-								notify->key);
-
-			dev_dbg(xpc_chan, "notify->func() returned, "
-				"notify=0x%p, msg_number=%ld, partid=%d, "
-				"channel=%d\n", (void *) notify, get,
-				ch->partid, ch->number);
-		}
-	}
-}
-
-
-/*
- * Free up message queues and other stuff that were allocated for the specified
- * channel.
- *
- * Note: ch->reason and ch->reason_line are left set for debugging purposes,
- * they're cleared when XPC_C_DISCONNECTED is cleared.
- */
-static void
-xpc_free_msgqueues(struct xpc_channel *ch)
-{
-	DBUG_ON(!spin_is_locked(&ch->lock));
-	DBUG_ON(atomic_read(&ch->n_to_notify) != 0);
-
-	ch->remote_msgqueue_pa = 0;
-	ch->func = NULL;
-	ch->key = NULL;
-	ch->msg_size = 0;
-	ch->local_nentries = 0;
-	ch->remote_nentries = 0;
-	ch->kthreads_assigned_limit = 0;
-	ch->kthreads_idle_limit = 0;
-
-	ch->local_GP->get = 0;
-	ch->local_GP->put = 0;
-	ch->remote_GP.get = 0;
-	ch->remote_GP.put = 0;
-	ch->w_local_GP.get = 0;
-	ch->w_local_GP.put = 0;
-	ch->w_remote_GP.get = 0;
-	ch->w_remote_GP.put = 0;
-	ch->next_msg_to_pull = 0;
-
-	if (ch->flags & XPC_C_SETUP) {
-		ch->flags &= ~XPC_C_SETUP;
-
-		dev_dbg(xpc_chan, "ch->flags=0x%x, partid=%d, channel=%d\n",
-			ch->flags, ch->partid, ch->number);
-
-		kfree(ch->local_msgqueue_base);
-		ch->local_msgqueue = NULL;
-		kfree(ch->remote_msgqueue_base);
-		ch->remote_msgqueue = NULL;
-		kfree(ch->notify_queue);
-		ch->notify_queue = NULL;
-	}
-}
-
-
-/*
- * spin_lock_irqsave() is expected to be held on entry.
- */
-static void
-xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
-{
-	struct xpc_partition *part = &xpc_partitions[ch->partid];
-	u32 channel_was_connected = (ch->flags & XPC_C_WASCONNECTED);
-
-
-	DBUG_ON(!spin_is_locked(&ch->lock));
-
-	if (!(ch->flags & XPC_C_DISCONNECTING)) {
-		return;
-	}
-
-	DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST));
-
-	/* make sure all activity has settled down first */
-
-	if (atomic_read(&ch->references) > 0 ||
-			((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) &&
-			!(ch->flags & XPC_C_DISCONNECTINGCALLOUT_MADE))) {
-		return;
-	}
-	DBUG_ON(atomic_read(&ch->kthreads_assigned) != 0);
-
-	if (part->act_state == XPC_P_DEACTIVATING) {
-		/* can't proceed until the other side disengages from us */
-		if (xpc_partition_engaged(1UL << ch->partid)) {
-			return;
-		}
-
-	} else {
-
-		/* as long as the other side is up do the full protocol */
-
-		if (!(ch->flags & XPC_C_RCLOSEREQUEST)) {
-			return;
-		}
-
-		if (!(ch->flags & XPC_C_CLOSEREPLY)) {
-			ch->flags |= XPC_C_CLOSEREPLY;
-			xpc_IPI_send_closereply(ch, irq_flags);
-		}
-
-		if (!(ch->flags & XPC_C_RCLOSEREPLY)) {
-			return;
-		}
-	}
-
-	/* wake those waiting for notify completion */
-	if (atomic_read(&ch->n_to_notify) > 0) {
-		/* >>> we do callout while holding ch->lock */
-		xpc_notify_senders(ch, ch->reason, ch->w_local_GP.put);
-	}
-
-	/* both sides are disconnected now */
-
-	if (ch->flags & XPC_C_DISCONNECTINGCALLOUT_MADE) {
-		spin_unlock_irqrestore(&ch->lock, *irq_flags);
-		xpc_disconnect_callout(ch, xpcDisconnected);
-		spin_lock_irqsave(&ch->lock, *irq_flags);
-	}
-
-	/* it's now safe to free the channel's message queues */
-	xpc_free_msgqueues(ch);
-
-	/* mark disconnected, clear all other flags except XPC_C_WDISCONNECT */
-	ch->flags = (XPC_C_DISCONNECTED | (ch->flags & XPC_C_WDISCONNECT));
-
-	atomic_dec(&part->nchannels_active);
-
-	if (channel_was_connected) {
-		dev_info(xpc_chan, "channel %d to partition %d disconnected, "
-			"reason=%d\n", ch->number, ch->partid, ch->reason);
-	}
-
-	if (ch->flags & XPC_C_WDISCONNECT) {
-		/* we won't lose the CPU since we're holding ch->lock */
-		complete(&ch->wdisconnect_wait);
-	} else if (ch->delayed_IPI_flags) {
-		if (part->act_state != XPC_P_DEACTIVATING) {
-			/* time to take action on any delayed IPI flags */
-			spin_lock(&part->IPI_lock);
-			XPC_SET_IPI_FLAGS(part->local_IPI_amo, ch->number,
-							ch->delayed_IPI_flags);
-			spin_unlock(&part->IPI_lock);
-		}
-		ch->delayed_IPI_flags = 0;
-	}
-}
-
-
-/*
- * Process a change in the channel's remote connection state.
- */
-static void
-xpc_process_openclose_IPI(struct xpc_partition *part, int ch_number,
-				u8 IPI_flags)
-{
-	unsigned long irq_flags;
-	struct xpc_openclose_args *args =
-				&part->remote_openclose_args[ch_number];
-	struct xpc_channel *ch = &part->channels[ch_number];
-	enum xpc_retval reason;
-
-
-
-	spin_lock_irqsave(&ch->lock, irq_flags);
-
-again:
-
-	if ((ch->flags & XPC_C_DISCONNECTED) &&
-					(ch->flags & XPC_C_WDISCONNECT)) {
-		/*
-		 * Delay processing IPI flags until thread waiting disconnect
-		 * has had a chance to see that the channel is disconnected.
-		 */
-		ch->delayed_IPI_flags |= IPI_flags;
-		spin_unlock_irqrestore(&ch->lock, irq_flags);
-		return;
-	}
-
-
-	if (IPI_flags & XPC_IPI_CLOSEREQUEST) {
-
-		dev_dbg(xpc_chan, "XPC_IPI_CLOSEREQUEST (reason=%d) received "
-			"from partid=%d, channel=%d\n", args->reason,
-			ch->partid, ch->number);
-
-		/*
-		 * If RCLOSEREQUEST is set, we're probably waiting for
-		 * RCLOSEREPLY. We should find it and a ROPENREQUEST packed
-		 * with this RCLOSEREQUEST in the IPI_flags.
-		 */
-
-		if (ch->flags & XPC_C_RCLOSEREQUEST) {
-			DBUG_ON(!(ch->flags & XPC_C_DISCONNECTING));
-			DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST));
-			DBUG_ON(!(ch->flags & XPC_C_CLOSEREPLY));
-			DBUG_ON(ch->flags & XPC_C_RCLOSEREPLY);
-
-			DBUG_ON(!(IPI_flags & XPC_IPI_CLOSEREPLY));
-			IPI_flags &= ~XPC_IPI_CLOSEREPLY;
-			ch->flags |= XPC_C_RCLOSEREPLY;
-
-			/* both sides have finished disconnecting */
-			xpc_process_disconnect(ch, &irq_flags);
-			DBUG_ON(!(ch->flags & XPC_C_DISCONNECTED));
-			goto again;
-		}
-
-		if (ch->flags & XPC_C_DISCONNECTED) {
-			if (!(IPI_flags & XPC_IPI_OPENREQUEST)) {
-				if ((XPC_GET_IPI_FLAGS(part->local_IPI_amo,
-					 ch_number) & XPC_IPI_OPENREQUEST)) {
-
-					DBUG_ON(ch->delayed_IPI_flags != 0);
-					spin_lock(&part->IPI_lock);
-					XPC_SET_IPI_FLAGS(part->local_IPI_amo,
-							ch_number,
-							XPC_IPI_CLOSEREQUEST);
-					spin_unlock(&part->IPI_lock);
-				}
-				spin_unlock_irqrestore(&ch->lock, irq_flags);
-				return;
-			}
-
-			XPC_SET_REASON(ch, 0, 0);
-			ch->flags &= ~XPC_C_DISCONNECTED;
-
-			atomic_inc(&part->nchannels_active);
-			ch->flags |= (XPC_C_CONNECTING | XPC_C_ROPENREQUEST);
-		}
-
-		IPI_flags &= ~(XPC_IPI_OPENREQUEST | XPC_IPI_OPENREPLY);
-
-		/*
-		 * The meaningful CLOSEREQUEST connection state fields are:
-		 *      reason = reason connection is to be closed
-		 */
-
-		ch->flags |= XPC_C_RCLOSEREQUEST;
-
-		if (!(ch->flags & XPC_C_DISCONNECTING)) {
-			reason = args->reason;
-			if (reason <= xpcSuccess || reason > xpcUnknownReason) {
-				reason = xpcUnknownReason;
-			} else if (reason == xpcUnregistering) {
-				reason = xpcOtherUnregistering;
-			}
-
-			XPC_DISCONNECT_CHANNEL(ch, reason, &irq_flags);
-
-			DBUG_ON(IPI_flags & XPC_IPI_CLOSEREPLY);
-			spin_unlock_irqrestore(&ch->lock, irq_flags);
-			return;
-		}
-
-		xpc_process_disconnect(ch, &irq_flags);
-	}
-
-
-	if (IPI_flags & XPC_IPI_CLOSEREPLY) {
-
-		dev_dbg(xpc_chan, "XPC_IPI_CLOSEREPLY received from partid=%d,"
-			" channel=%d\n", ch->partid, ch->number);
-
-		if (ch->flags & XPC_C_DISCONNECTED) {
-			DBUG_ON(part->act_state != XPC_P_DEACTIVATING);
-			spin_unlock_irqrestore(&ch->lock, irq_flags);
-			return;
-		}
-
-		DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST));
-
-		if (!(ch->flags & XPC_C_RCLOSEREQUEST)) {
-			if ((XPC_GET_IPI_FLAGS(part->local_IPI_amo, ch_number)
-						& XPC_IPI_CLOSEREQUEST)) {
-
-				DBUG_ON(ch->delayed_IPI_flags != 0);
-				spin_lock(&part->IPI_lock);
-				XPC_SET_IPI_FLAGS(part->local_IPI_amo,
-						ch_number, XPC_IPI_CLOSEREPLY);
-				spin_unlock(&part->IPI_lock);
-			}
-			spin_unlock_irqrestore(&ch->lock, irq_flags);
-			return;
-		}
-
-		ch->flags |= XPC_C_RCLOSEREPLY;
-
-		if (ch->flags & XPC_C_CLOSEREPLY) {
-			/* both sides have finished disconnecting */
-			xpc_process_disconnect(ch, &irq_flags);
-		}
-	}
-
-
-	if (IPI_flags & XPC_IPI_OPENREQUEST) {
-
-		dev_dbg(xpc_chan, "XPC_IPI_OPENREQUEST (msg_size=%d, "
-			"local_nentries=%d) received from partid=%d, "
-			"channel=%d\n", args->msg_size, args->local_nentries,
-			ch->partid, ch->number);
-
-		if (part->act_state == XPC_P_DEACTIVATING ||
-					(ch->flags & XPC_C_ROPENREQUEST)) {
-			spin_unlock_irqrestore(&ch->lock, irq_flags);
-			return;
-		}
-
-		if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_WDISCONNECT)) {
-			ch->delayed_IPI_flags |= XPC_IPI_OPENREQUEST;
-			spin_unlock_irqrestore(&ch->lock, irq_flags);
-			return;
-		}
-		DBUG_ON(!(ch->flags & (XPC_C_DISCONNECTED |
-							XPC_C_OPENREQUEST)));
-		DBUG_ON(ch->flags & (XPC_C_ROPENREQUEST | XPC_C_ROPENREPLY |
-					XPC_C_OPENREPLY | XPC_C_CONNECTED));
-
-		/*
-		 * The meaningful OPENREQUEST connection state fields are:
-		 *      msg_size = size of channel's messages in bytes
-		 *      local_nentries = remote partition's local_nentries
-		 */
-		if (args->msg_size == 0 || args->local_nentries == 0) {
-			/* assume OPENREQUEST was delayed by mistake */
-			spin_unlock_irqrestore(&ch->lock, irq_flags);
-			return;
-		}
-
-		ch->flags |= (XPC_C_ROPENREQUEST | XPC_C_CONNECTING);
-		ch->remote_nentries = args->local_nentries;
-
-
-		if (ch->flags & XPC_C_OPENREQUEST) {
-			if (args->msg_size != ch->msg_size) {
-				XPC_DISCONNECT_CHANNEL(ch, xpcUnequalMsgSizes,
-								&irq_flags);
-				spin_unlock_irqrestore(&ch->lock, irq_flags);
-				return;
-			}
-		} else {
-			ch->msg_size = args->msg_size;
-
-			XPC_SET_REASON(ch, 0, 0);
-			ch->flags &= ~XPC_C_DISCONNECTED;
-
-			atomic_inc(&part->nchannels_active);
-		}
-
-		xpc_process_connect(ch, &irq_flags);
-	}
-
-
-	if (IPI_flags & XPC_IPI_OPENREPLY) {
-
-		dev_dbg(xpc_chan, "XPC_IPI_OPENREPLY (local_msgqueue_pa=0x%lx, "
-			"local_nentries=%d, remote_nentries=%d) received from "
-			"partid=%d, channel=%d\n", args->local_msgqueue_pa,
-			args->local_nentries, args->remote_nentries,
-			ch->partid, ch->number);
-
-		if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_DISCONNECTED)) {
-			spin_unlock_irqrestore(&ch->lock, irq_flags);
-			return;
-		}
-		if (!(ch->flags & XPC_C_OPENREQUEST)) {
-			XPC_DISCONNECT_CHANNEL(ch, xpcOpenCloseError,
-								&irq_flags);
-			spin_unlock_irqrestore(&ch->lock, irq_flags);
-			return;
-		}
-
-		DBUG_ON(!(ch->flags & XPC_C_ROPENREQUEST));
-		DBUG_ON(ch->flags & XPC_C_CONNECTED);
-
-		/*
-		 * The meaningful OPENREPLY connection state fields are:
-		 *      local_msgqueue_pa = physical address of remote
-		 *			    partition's local_msgqueue
-		 *      local_nentries = remote partition's local_nentries
-		 *      remote_nentries = remote partition's remote_nentries
-		 */
-		DBUG_ON(args->local_msgqueue_pa == 0);
-		DBUG_ON(args->local_nentries == 0);
-		DBUG_ON(args->remote_nentries == 0);
-
-		ch->flags |= XPC_C_ROPENREPLY;
-		ch->remote_msgqueue_pa = args->local_msgqueue_pa;
-
-		if (args->local_nentries < ch->remote_nentries) {
-			dev_dbg(xpc_chan, "XPC_IPI_OPENREPLY: new "
-				"remote_nentries=%d, old remote_nentries=%d, "
-				"partid=%d, channel=%d\n",
-				args->local_nentries, ch->remote_nentries,
-				ch->partid, ch->number);
-
-			ch->remote_nentries = args->local_nentries;
-		}
-		if (args->remote_nentries < ch->local_nentries) {
-			dev_dbg(xpc_chan, "XPC_IPI_OPENREPLY: new "
-				"local_nentries=%d, old local_nentries=%d, "
-				"partid=%d, channel=%d\n",
-				args->remote_nentries, ch->local_nentries,
-				ch->partid, ch->number);
-
-			ch->local_nentries = args->remote_nentries;
-		}
-
-		xpc_process_connect(ch, &irq_flags);
-	}
-
-	spin_unlock_irqrestore(&ch->lock, irq_flags);
-}
-
-
-/*
- * Attempt to establish a channel connection to a remote partition.
- */
-static enum xpc_retval
-xpc_connect_channel(struct xpc_channel *ch)
-{
-	unsigned long irq_flags;
-	struct xpc_registration *registration = &xpc_registrations[ch->number];
-
-
-	if (mutex_trylock(&registration->mutex) == 0) {
-		return xpcRetry;
-	}
-
-	if (!XPC_CHANNEL_REGISTERED(ch->number)) {
-		mutex_unlock(&registration->mutex);
-		return xpcUnregistered;
-	}
-
-	spin_lock_irqsave(&ch->lock, irq_flags);
-
-	DBUG_ON(ch->flags & XPC_C_CONNECTED);
-	DBUG_ON(ch->flags & XPC_C_OPENREQUEST);
-
-	if (ch->flags & XPC_C_DISCONNECTING) {
-		spin_unlock_irqrestore(&ch->lock, irq_flags);
-		mutex_unlock(&registration->mutex);
-		return ch->reason;
-	}
-
-
-	/* add info from the channel connect registration to the channel */
-
-	ch->kthreads_assigned_limit = registration->assigned_limit;
-	ch->kthreads_idle_limit = registration->idle_limit;
-	DBUG_ON(atomic_read(&ch->kthreads_assigned) != 0);
-	DBUG_ON(atomic_read(&ch->kthreads_idle) != 0);
-	DBUG_ON(atomic_read(&ch->kthreads_active) != 0);
-
-	ch->func = registration->func;
-	DBUG_ON(registration->func == NULL);
-	ch->key = registration->key;
-
-	ch->local_nentries = registration->nentries;
-
-	if (ch->flags & XPC_C_ROPENREQUEST) {
-		if (registration->msg_size != ch->msg_size) {
-			/* the local and remote sides aren't the same */
-
-			/*
-			 * Because XPC_DISCONNECT_CHANNEL() can block we're
-			 * forced to up the registration sema before we unlock
-			 * the channel lock. But that's okay here because we're
-			 * done with the part that required the registration
-			 * sema. XPC_DISCONNECT_CHANNEL() requires that the
-			 * channel lock be locked and will unlock and relock
-			 * the channel lock as needed.
-			 */
-			mutex_unlock(&registration->mutex);
-			XPC_DISCONNECT_CHANNEL(ch, xpcUnequalMsgSizes,
-								&irq_flags);
-			spin_unlock_irqrestore(&ch->lock, irq_flags);
-			return xpcUnequalMsgSizes;
-		}
-	} else {
-		ch->msg_size = registration->msg_size;
-
-		XPC_SET_REASON(ch, 0, 0);
-		ch->flags &= ~XPC_C_DISCONNECTED;
-
-		atomic_inc(&xpc_partitions[ch->partid].nchannels_active);
-	}
-
-	mutex_unlock(&registration->mutex);
-
-
-	/* initiate the connection */
-
-	ch->flags |= (XPC_C_OPENREQUEST | XPC_C_CONNECTING);
-	xpc_IPI_send_openrequest(ch, &irq_flags);
-
-	xpc_process_connect(ch, &irq_flags);
-
-	spin_unlock_irqrestore(&ch->lock, irq_flags);
-
-	return xpcSuccess;
-}
-
-
-/*
- * Clear some of the msg flags in the local message queue.
- */
-static inline void
-xpc_clear_local_msgqueue_flags(struct xpc_channel *ch)
-{
-	struct xpc_msg *msg;
-	s64 get;
-
-
-	get = ch->w_remote_GP.get;
-	do {
-		msg = (struct xpc_msg *) ((u64) ch->local_msgqueue +
-				(get % ch->local_nentries) * ch->msg_size);
-		msg->flags = 0;
-	} while (++get < (volatile s64) ch->remote_GP.get);
-}
-
-
-/*
- * Clear some of the msg flags in the remote message queue.
- */
-static inline void
-xpc_clear_remote_msgqueue_flags(struct xpc_channel *ch)
-{
-	struct xpc_msg *msg;
-	s64 put;
-
-
-	put = ch->w_remote_GP.put;
-	do {
-		msg = (struct xpc_msg *) ((u64) ch->remote_msgqueue +
-				(put % ch->remote_nentries) * ch->msg_size);
-		msg->flags = 0;
-	} while (++put < (volatile s64) ch->remote_GP.put);
-}
-
-
-static void
-xpc_process_msg_IPI(struct xpc_partition *part, int ch_number)
-{
-	struct xpc_channel *ch = &part->channels[ch_number];
-	int nmsgs_sent;
-
-
-	ch->remote_GP = part->remote_GPs[ch_number];
-
-
-	/* See what, if anything, has changed for each connected channel */
-
-	xpc_msgqueue_ref(ch);
-
-	if (ch->w_remote_GP.get == ch->remote_GP.get &&
-				ch->w_remote_GP.put == ch->remote_GP.put) {
-		/* nothing changed since GPs were last pulled */
-		xpc_msgqueue_deref(ch);
-		return;
-	}
-
-	if (!(ch->flags & XPC_C_CONNECTED)){
-		xpc_msgqueue_deref(ch);
-		return;
-	}
-
-
-	/*
-	 * First check to see if messages recently sent by us have been
-	 * received by the other side. (The remote GET value will have
-	 * changed since we last looked at it.)
-	 */
-
-	if (ch->w_remote_GP.get != ch->remote_GP.get) {
-
-		/*
-		 * We need to notify any senders that want to be notified
-		 * that their sent messages have been received by their
-		 * intended recipients. We need to do this before updating
-		 * w_remote_GP.get so that we don't allocate the same message
-		 * queue entries prematurely (see xpc_allocate_msg()).
-		 */
-		if (atomic_read(&ch->n_to_notify) > 0) {
-			/*
-			 * Notify senders that messages sent have been
-			 * received and delivered by the other side.
-			 */
-			xpc_notify_senders(ch, xpcMsgDelivered,
-							ch->remote_GP.get);
-		}
-
-		/*
-		 * Clear msg->flags in previously sent messages, so that
-		 * they're ready for xpc_allocate_msg().
-		 */
-		xpc_clear_local_msgqueue_flags(ch);
-
-		ch->w_remote_GP.get = ch->remote_GP.get;
-
-		dev_dbg(xpc_chan, "w_remote_GP.get changed to %ld, partid=%d, "
-			"channel=%d\n", ch->w_remote_GP.get, ch->partid,
-			ch->number);
-
-		/*
-		 * If anyone was waiting for message queue entries to become
-		 * available, wake them up.
-		 */
-		if (atomic_read(&ch->n_on_msg_allocate_wq) > 0) {
-			wake_up(&ch->msg_allocate_wq);
-		}
-	}
-
-
-	/*
-	 * Now check for newly sent messages by the other side. (The remote
-	 * PUT value will have changed since we last looked at it.)
-	 */
-
-	if (ch->w_remote_GP.put != ch->remote_GP.put) {
-		/*
-		 * Clear msg->flags in previously received messages, so that
-		 * they're ready for xpc_get_deliverable_msg().
-		 */
-		xpc_clear_remote_msgqueue_flags(ch);
-
-		ch->w_remote_GP.put = ch->remote_GP.put;
-
-		dev_dbg(xpc_chan, "w_remote_GP.put changed to %ld, partid=%d, "
-			"channel=%d\n", ch->w_remote_GP.put, ch->partid,
-			ch->number);
-
-		nmsgs_sent = ch->w_remote_GP.put - ch->w_local_GP.get;
-		if (nmsgs_sent > 0) {
-			dev_dbg(xpc_chan, "msgs waiting to be copied and "
-				"delivered=%d, partid=%d, channel=%d\n",
-				nmsgs_sent, ch->partid, ch->number);
-
-			if (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) {
-				xpc_activate_kthreads(ch, nmsgs_sent);
-			}
-		}
-	}
-
-	xpc_msgqueue_deref(ch);
-}
-
-
-void
-xpc_process_channel_activity(struct xpc_partition *part)
-{
-	unsigned long irq_flags;
-	u64 IPI_amo, IPI_flags;
-	struct xpc_channel *ch;
-	int ch_number;
-	u32 ch_flags;
-
-
-	IPI_amo = xpc_get_IPI_flags(part);
-
-	/*
-	 * Initiate channel connections for registered channels.
-	 *
-	 * For each connected channel that has pending messages activate idle
-	 * kthreads and/or create new kthreads as needed.
-	 */
-
-	for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
-		ch = &part->channels[ch_number];
-
-
-		/*
-		 * Process any open or close related IPI flags, and then deal
-		 * with connecting or disconnecting the channel as required.
-		 */
-
-		IPI_flags = XPC_GET_IPI_FLAGS(IPI_amo, ch_number);
-
-		if (XPC_ANY_OPENCLOSE_IPI_FLAGS_SET(IPI_flags)) {
-			xpc_process_openclose_IPI(part, ch_number, IPI_flags);
-		}
-
-		ch_flags = ch->flags;	/* need an atomic snapshot of flags */
-
-		if (ch_flags & XPC_C_DISCONNECTING) {
-			spin_lock_irqsave(&ch->lock, irq_flags);
-			xpc_process_disconnect(ch, &irq_flags);
-			spin_unlock_irqrestore(&ch->lock, irq_flags);
-			continue;
-		}
-
-		if (part->act_state == XPC_P_DEACTIVATING) {
-			continue;
-		}
-
-		if (!(ch_flags & XPC_C_CONNECTED)) {
-			if (!(ch_flags & XPC_C_OPENREQUEST)) {
-				DBUG_ON(ch_flags & XPC_C_SETUP);
-				(void) xpc_connect_channel(ch);
-			} else {
-				spin_lock_irqsave(&ch->lock, irq_flags);
-				xpc_process_connect(ch, &irq_flags);
-				spin_unlock_irqrestore(&ch->lock, irq_flags);
-			}
-			continue;
-		}
-
-
-		/*
-		 * Process any message related IPI flags, this may involve the
-		 * activation of kthreads to deliver any pending messages sent
-		 * from the other partition.
-		 */
-
-		if (XPC_ANY_MSG_IPI_FLAGS_SET(IPI_flags)) {
-			xpc_process_msg_IPI(part, ch_number);
-		}
-	}
-}
-
-
-/*
- * XPC's heartbeat code calls this function to inform XPC that a partition is
- * going down.  XPC responds by tearing down the XPartition Communication
- * infrastructure used for the just downed partition.
- *
- * XPC's heartbeat code will never call this function and xpc_partition_up()
- * at the same time. Nor will it ever make multiple calls to either function
- * at the same time.
- */
-void
-xpc_partition_going_down(struct xpc_partition *part, enum xpc_retval reason)
-{
-	unsigned long irq_flags;
-	int ch_number;
-	struct xpc_channel *ch;
-
-
-	dev_dbg(xpc_chan, "deactivating partition %d, reason=%d\n",
-		XPC_PARTID(part), reason);
-
-	if (!xpc_part_ref(part)) {
-		/* infrastructure for this partition isn't currently set up */
-		return;
-	}
-
-
-	/* disconnect channels associated with the partition going down */
-
-	for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
-		ch = &part->channels[ch_number];
-
-		xpc_msgqueue_ref(ch);
-		spin_lock_irqsave(&ch->lock, irq_flags);
-
-		XPC_DISCONNECT_CHANNEL(ch, reason, &irq_flags);
-
-		spin_unlock_irqrestore(&ch->lock, irq_flags);
-		xpc_msgqueue_deref(ch);
-	}
-
-	xpc_wakeup_channel_mgr(part);
-
-	xpc_part_deref(part);
-}
-
-
-/*
- * Teardown the infrastructure necessary to support XPartition Communication
- * between the specified remote partition and the local one.
- */
-void
-xpc_teardown_infrastructure(struct xpc_partition *part)
-{
-	partid_t partid = XPC_PARTID(part);
-
-
-	/*
-	 * We start off by making this partition inaccessible to local
-	 * processes by marking it as no longer setup. Then we make it
-	 * inaccessible to remote processes by clearing the XPC per partition
-	 * specific variable's magic # (which indicates that these variables
-	 * are no longer valid) and by ignoring all XPC notify IPIs sent to
-	 * this partition.
-	 */
-
-	DBUG_ON(atomic_read(&part->nchannels_engaged) != 0);
-	DBUG_ON(atomic_read(&part->nchannels_active) != 0);
-	DBUG_ON(part->setup_state != XPC_P_SETUP);
-	part->setup_state = XPC_P_WTEARDOWN;
-
-	xpc_vars_part[partid].magic = 0;
-
-
-	free_irq(SGI_XPC_NOTIFY, (void *) (u64) partid);
-
-
-	/*
-	 * Before proceding with the teardown we have to wait until all
-	 * existing references cease.
-	 */
-	wait_event(part->teardown_wq, (atomic_read(&part->references) == 0));
-
-
-	/* now we can begin tearing down the infrastructure */
-
-	part->setup_state = XPC_P_TORNDOWN;
-
-	/* in case we've still got outstanding timers registered... */
-	del_timer_sync(&part->dropped_IPI_timer);
-
-	kfree(part->remote_openclose_args_base);
-	part->remote_openclose_args = NULL;
-	kfree(part->local_openclose_args_base);
-	part->local_openclose_args = NULL;
-	kfree(part->remote_GPs_base);
-	part->remote_GPs = NULL;
-	kfree(part->local_GPs_base);
-	part->local_GPs = NULL;
-	kfree(part->channels);
-	part->channels = NULL;
-	part->local_IPI_amo_va = NULL;
-}
-
-
-/*
- * Called by XP at the time of channel connection registration to cause
- * XPC to establish connections to all currently active partitions.
- */
-void
-xpc_initiate_connect(int ch_number)
-{
-	partid_t partid;
-	struct xpc_partition *part;
-	struct xpc_channel *ch;
-
-
-	DBUG_ON(ch_number < 0 || ch_number >= XPC_NCHANNELS);
-
-	for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
-		part = &xpc_partitions[partid];
-
-		if (xpc_part_ref(part)) {
-			ch = &part->channels[ch_number];
-
-			/*
-			 * Initiate the establishment of a connection on the
-			 * newly registered channel to the remote partition.
-			 */
-			xpc_wakeup_channel_mgr(part);
-			xpc_part_deref(part);
-		}
-	}
-}
-
-
-void
-xpc_connected_callout(struct xpc_channel *ch)
-{
-	/* let the registerer know that a connection has been established */
-
-	if (ch->func != NULL) {
-		dev_dbg(xpc_chan, "ch->func() called, reason=xpcConnected, "
-			"partid=%d, channel=%d\n", ch->partid, ch->number);
-
-		ch->func(xpcConnected, ch->partid, ch->number,
-				(void *) (u64) ch->local_nentries, ch->key);
-
-		dev_dbg(xpc_chan, "ch->func() returned, reason=xpcConnected, "
-			"partid=%d, channel=%d\n", ch->partid, ch->number);
-	}
-}
-
-
-/*
- * Called by XP at the time of channel connection unregistration to cause
- * XPC to teardown all current connections for the specified channel.
- *
- * Before returning xpc_initiate_disconnect() will wait until all connections
- * on the specified channel have been closed/torndown. So the caller can be
- * assured that they will not be receiving any more callouts from XPC to the
- * function they registered via xpc_connect().
- *
- * Arguments:
- *
- *	ch_number - channel # to unregister.
- */
-void
-xpc_initiate_disconnect(int ch_number)
-{
-	unsigned long irq_flags;
-	partid_t partid;
-	struct xpc_partition *part;
-	struct xpc_channel *ch;
-
-
-	DBUG_ON(ch_number < 0 || ch_number >= XPC_NCHANNELS);
-
-	/* initiate the channel disconnect for every active partition */
-	for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
-		part = &xpc_partitions[partid];
-
-		if (xpc_part_ref(part)) {
-			ch = &part->channels[ch_number];
-			xpc_msgqueue_ref(ch);
-
-			spin_lock_irqsave(&ch->lock, irq_flags);
-
-			if (!(ch->flags & XPC_C_DISCONNECTED)) {
-				ch->flags |= XPC_C_WDISCONNECT;
-
-				XPC_DISCONNECT_CHANNEL(ch, xpcUnregistering,
-								&irq_flags);
-			}
-
-			spin_unlock_irqrestore(&ch->lock, irq_flags);
-
-			xpc_msgqueue_deref(ch);
-			xpc_part_deref(part);
-		}
-	}
-
-	xpc_disconnect_wait(ch_number);
-}
-
-
-/*
- * To disconnect a channel, and reflect it back to all who may be waiting.
- *
- * An OPEN is not allowed until XPC_C_DISCONNECTING is cleared by
- * xpc_process_disconnect(), and if set, XPC_C_WDISCONNECT is cleared by
- * xpc_disconnect_wait().
- *
- * THE CHANNEL IS TO BE LOCKED BY THE CALLER AND WILL REMAIN LOCKED UPON RETURN.
- */
-void
-xpc_disconnect_channel(const int line, struct xpc_channel *ch,
-			enum xpc_retval reason, unsigned long *irq_flags)
-{
-	u32 channel_was_connected = (ch->flags & XPC_C_CONNECTED);
-
-
-	DBUG_ON(!spin_is_locked(&ch->lock));
-
-	if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_DISCONNECTED)) {
-		return;
-	}
-	DBUG_ON(!(ch->flags & (XPC_C_CONNECTING | XPC_C_CONNECTED)));
-
-	dev_dbg(xpc_chan, "reason=%d, line=%d, partid=%d, channel=%d\n",
-		reason, line, ch->partid, ch->number);
-
-	XPC_SET_REASON(ch, reason, line);
-
-	ch->flags |= (XPC_C_CLOSEREQUEST | XPC_C_DISCONNECTING);
-	/* some of these may not have been set */
-	ch->flags &= ~(XPC_C_OPENREQUEST | XPC_C_OPENREPLY |
-			XPC_C_ROPENREQUEST | XPC_C_ROPENREPLY |
-			XPC_C_CONNECTING | XPC_C_CONNECTED);
-
-	xpc_IPI_send_closerequest(ch, irq_flags);
-
-	if (channel_was_connected) {
-		ch->flags |= XPC_C_WASCONNECTED;
-	}
-
-	spin_unlock_irqrestore(&ch->lock, *irq_flags);
-
-	/* wake all idle kthreads so they can exit */
-	if (atomic_read(&ch->kthreads_idle) > 0) {
-		wake_up_all(&ch->idle_wq);
-	}
-
-	/* wake those waiting to allocate an entry from the local msg queue */
-	if (atomic_read(&ch->n_on_msg_allocate_wq) > 0) {
-		wake_up(&ch->msg_allocate_wq);
-	}
-
-	spin_lock_irqsave(&ch->lock, *irq_flags);
-}
-
-
-void
-xpc_disconnect_callout(struct xpc_channel *ch, enum xpc_retval reason)
-{
-	/*
-	 * Let the channel's registerer know that the channel is being
-	 * disconnected. We don't want to do this if the registerer was never
-	 * informed of a connection being made.
-	 */
-
-	if (ch->func != NULL) {
-		dev_dbg(xpc_chan, "ch->func() called, reason=%d, partid=%d, "
-			"channel=%d\n", reason, ch->partid, ch->number);
-
-		ch->func(reason, ch->partid, ch->number, NULL, ch->key);
-
-		dev_dbg(xpc_chan, "ch->func() returned, reason=%d, partid=%d, "
-			"channel=%d\n", reason, ch->partid, ch->number);
-	}
-}
-
-
-/*
- * Wait for a message entry to become available for the specified channel,
- * but don't wait any longer than 1 jiffy.
- */
-static enum xpc_retval
-xpc_allocate_msg_wait(struct xpc_channel *ch)
-{
-	enum xpc_retval ret;
-
-
-	if (ch->flags & XPC_C_DISCONNECTING) {
-		DBUG_ON(ch->reason == xpcInterrupted);  // >>> Is this true?
-		return ch->reason;
-	}
-
-	atomic_inc(&ch->n_on_msg_allocate_wq);
-	ret = interruptible_sleep_on_timeout(&ch->msg_allocate_wq, 1);
-	atomic_dec(&ch->n_on_msg_allocate_wq);
-
-	if (ch->flags & XPC_C_DISCONNECTING) {
-		ret = ch->reason;
-		DBUG_ON(ch->reason == xpcInterrupted);  // >>> Is this true?
-	} else if (ret == 0) {
-		ret = xpcTimeout;
-	} else {
-		ret = xpcInterrupted;
-	}
-
-	return ret;
-}
-
-
-/*
- * Allocate an entry for a message from the message queue associated with the
- * specified channel.
- */
-static enum xpc_retval
-xpc_allocate_msg(struct xpc_channel *ch, u32 flags,
-			struct xpc_msg **address_of_msg)
-{
-	struct xpc_msg *msg;
-	enum xpc_retval ret;
-	s64 put;
-
-
-	/* this reference will be dropped in xpc_send_msg() */
-	xpc_msgqueue_ref(ch);
-
-	if (ch->flags & XPC_C_DISCONNECTING) {
-		xpc_msgqueue_deref(ch);
-		return ch->reason;
-	}
-	if (!(ch->flags & XPC_C_CONNECTED)) {
-		xpc_msgqueue_deref(ch);
-		return xpcNotConnected;
-	}
-
-
-	/*
-	 * Get the next available message entry from the local message queue.
-	 * If none are available, we'll make sure that we grab the latest
-	 * GP values.
-	 */
-	ret = xpcTimeout;
-
-	while (1) {
-
-		put = (volatile s64) ch->w_local_GP.put;
-		if (put - (volatile s64) ch->w_remote_GP.get <
-							ch->local_nentries) {
-
-			/* There are available message entries. We need to try
-			 * to secure one for ourselves. We'll do this by trying
-			 * to increment w_local_GP.put as long as someone else
-			 * doesn't beat us to it. If they do, we'll have to
-			 * try again.
-		 	 */
-			if (cmpxchg(&ch->w_local_GP.put, put, put + 1) ==
-									put) {
-				/* we got the entry referenced by put */
-				break;
-			}
-			continue;	/* try again */
-		}
-
-
-		/*
-		 * There aren't any available msg entries at this time.
-		 *
-		 * In waiting for a message entry to become available,
-		 * we set a timeout in case the other side is not
-		 * sending completion IPIs. This lets us fake an IPI
-		 * that will cause the IPI handler to fetch the latest
-		 * GP values as if an IPI was sent by the other side.
-		 */
-		if (ret == xpcTimeout) {
-			xpc_IPI_send_local_msgrequest(ch);
-		}
-
-		if (flags & XPC_NOWAIT) {
-			xpc_msgqueue_deref(ch);
-			return xpcNoWait;
-		}
-
-		ret = xpc_allocate_msg_wait(ch);
-		if (ret != xpcInterrupted && ret != xpcTimeout) {
-			xpc_msgqueue_deref(ch);
-			return ret;
-		}
-	}
-
-
-	/* get the message's address and initialize it */
-	msg = (struct xpc_msg *) ((u64) ch->local_msgqueue +
-				(put % ch->local_nentries) * ch->msg_size);
-
-
-	DBUG_ON(msg->flags != 0);
-	msg->number = put;
-
-	dev_dbg(xpc_chan, "w_local_GP.put changed to %ld; msg=0x%p, "
-		"msg_number=%ld, partid=%d, channel=%d\n", put + 1,
-		(void *) msg, msg->number, ch->partid, ch->number);
-
-	*address_of_msg = msg;
-
-	return xpcSuccess;
-}
-
-
-/*
- * Allocate an entry for a message from the message queue associated with the
- * specified channel. NOTE that this routine can sleep waiting for a message
- * entry to become available. To not sleep, pass in the XPC_NOWAIT flag.
- *
- * Arguments:
- *
- *	partid - ID of partition to which the channel is connected.
- *	ch_number - channel #.
- *	flags - see xpc.h for valid flags.
- *	payload - address of the allocated payload area pointer (filled in on
- * 	          return) in which the user-defined message is constructed.
- */
-enum xpc_retval
-xpc_initiate_allocate(partid_t partid, int ch_number, u32 flags, void **payload)
-{
-	struct xpc_partition *part = &xpc_partitions[partid];
-	enum xpc_retval ret = xpcUnknownReason;
-	struct xpc_msg *msg = NULL;
-
-
-	DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS);
-	DBUG_ON(ch_number < 0 || ch_number >= part->nchannels);
-
-	*payload = NULL;
-
-	if (xpc_part_ref(part)) {
-		ret = xpc_allocate_msg(&part->channels[ch_number], flags, &msg);
-		xpc_part_deref(part);
-
-		if (msg != NULL) {
-			*payload = &msg->payload;
-		}
-	}
-
-	return ret;
-}
-
-
-/*
- * Now we actually send the messages that are ready to be sent by advancing
- * the local message queue's Put value and then send an IPI to the recipient
- * partition.
- */
-static void
-xpc_send_msgs(struct xpc_channel *ch, s64 initial_put)
-{
-	struct xpc_msg *msg;
-	s64 put = initial_put + 1;
-	int send_IPI = 0;
-
-
-	while (1) {
-
-		while (1) {
-			if (put == (volatile s64) ch->w_local_GP.put) {
-				break;
-			}
-
-			msg = (struct xpc_msg *) ((u64) ch->local_msgqueue +
-			       (put % ch->local_nentries) * ch->msg_size);
-
-			if (!(msg->flags & XPC_M_READY)) {
-				break;
-			}
-
-			put++;
-		}
-
-		if (put == initial_put) {
-			/* nothing's changed */
-			break;
-		}
-
-		if (cmpxchg_rel(&ch->local_GP->put, initial_put, put) !=
-								initial_put) {
-			/* someone else beat us to it */
-			DBUG_ON((volatile s64) ch->local_GP->put < initial_put);
-			break;
-		}
-
-		/* we just set the new value of local_GP->put */
-
-		dev_dbg(xpc_chan, "local_GP->put changed to %ld, partid=%d, "
-			"channel=%d\n", put, ch->partid, ch->number);
-
-		send_IPI = 1;
-
-		/*
-		 * We need to ensure that the message referenced by
-		 * local_GP->put is not XPC_M_READY or that local_GP->put
-		 * equals w_local_GP.put, so we'll go have a look.
-		 */
-		initial_put = put;
-	}
-
-	if (send_IPI) {
-		xpc_IPI_send_msgrequest(ch);
-	}
-}
-
-
-/*
- * Common code that does the actual sending of the message by advancing the
- * local message queue's Put value and sends an IPI to the partition the
- * message is being sent to.
- */
-static enum xpc_retval
-xpc_send_msg(struct xpc_channel *ch, struct xpc_msg *msg, u8 notify_type,
-			xpc_notify_func func, void *key)
-{
-	enum xpc_retval ret = xpcSuccess;
-	struct xpc_notify *notify = notify;
-	s64 put, msg_number = msg->number;
-
-
-	DBUG_ON(notify_type == XPC_N_CALL && func == NULL);
-	DBUG_ON((((u64) msg - (u64) ch->local_msgqueue) / ch->msg_size) !=
-					msg_number % ch->local_nentries);
-	DBUG_ON(msg->flags & XPC_M_READY);
-
-	if (ch->flags & XPC_C_DISCONNECTING) {
-		/* drop the reference grabbed in xpc_allocate_msg() */
-		xpc_msgqueue_deref(ch);
-		return ch->reason;
-	}
-
-	if (notify_type != 0) {
-		/*
-		 * Tell the remote side to send an ACK interrupt when the
-		 * message has been delivered.
-		 */
-		msg->flags |= XPC_M_INTERRUPT;
-
-		atomic_inc(&ch->n_to_notify);
-
-		notify = &ch->notify_queue[msg_number % ch->local_nentries];
-		notify->func = func;
-		notify->key = key;
-		notify->type = notify_type;
-
-		// >>> is a mb() needed here?
-
-		if (ch->flags & XPC_C_DISCONNECTING) {
-			/*
-			 * An error occurred between our last error check and
-			 * this one. We will try to clear the type field from
-			 * the notify entry. If we succeed then
-			 * xpc_disconnect_channel() didn't already process
-			 * the notify entry.
-			 */
-			if (cmpxchg(&notify->type, notify_type, 0) ==
-								notify_type) {
-				atomic_dec(&ch->n_to_notify);
-				ret = ch->reason;
-			}
-
-			/* drop the reference grabbed in xpc_allocate_msg() */
-			xpc_msgqueue_deref(ch);
-			return ret;
-		}
-	}
-
-	msg->flags |= XPC_M_READY;
-
-	/*
-	 * The preceding store of msg->flags must occur before the following
-	 * load of ch->local_GP->put.
-	 */
-	mb();
-
-	/* see if the message is next in line to be sent, if so send it */
-
-	put = ch->local_GP->put;
-	if (put == msg_number) {
-		xpc_send_msgs(ch, put);
-	}
-
-	/* drop the reference grabbed in xpc_allocate_msg() */
-	xpc_msgqueue_deref(ch);
-	return ret;
-}
-
-
-/*
- * Send a message previously allocated using xpc_initiate_allocate() on the
- * specified channel connected to the specified partition.
- *
- * This routine will not wait for the message to be received, nor will
- * notification be given when it does happen. Once this routine has returned
- * the message entry allocated via xpc_initiate_allocate() is no longer
- * accessable to the caller.
- *
- * This routine, although called by users, does not call xpc_part_ref() to
- * ensure that the partition infrastructure is in place. It relies on the
- * fact that we called xpc_msgqueue_ref() in xpc_allocate_msg().
- *
- * Arguments:
- *
- *	partid - ID of partition to which the channel is connected.
- *	ch_number - channel # to send message on.
- *	payload - pointer to the payload area allocated via
- *			xpc_initiate_allocate().
- */
-enum xpc_retval
-xpc_initiate_send(partid_t partid, int ch_number, void *payload)
-{
-	struct xpc_partition *part = &xpc_partitions[partid];
-	struct xpc_msg *msg = XPC_MSG_ADDRESS(payload);
-	enum xpc_retval ret;
-
-
-	dev_dbg(xpc_chan, "msg=0x%p, partid=%d, channel=%d\n", (void *) msg,
-		partid, ch_number);
-
-	DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS);
-	DBUG_ON(ch_number < 0 || ch_number >= part->nchannels);
-	DBUG_ON(msg == NULL);
-
-	ret = xpc_send_msg(&part->channels[ch_number], msg, 0, NULL, NULL);
-
-	return ret;
-}
-
-
-/*
- * Send a message previously allocated using xpc_initiate_allocate on the
- * specified channel connected to the specified partition.
- *
- * This routine will not wait for the message to be sent. Once this routine
- * has returned the message entry allocated via xpc_initiate_allocate() is no
- * longer accessable to the caller.
- *
- * Once the remote end of the channel has received the message, the function
- * passed as an argument to xpc_initiate_send_notify() will be called. This
- * allows the sender to free up or re-use any buffers referenced by the
- * message, but does NOT mean the message has been processed at the remote
- * end by a receiver.
- *
- * If this routine returns an error, the caller's function will NOT be called.
- *
- * This routine, although called by users, does not call xpc_part_ref() to
- * ensure that the partition infrastructure is in place. It relies on the
- * fact that we called xpc_msgqueue_ref() in xpc_allocate_msg().
- *
- * Arguments:
- *
- *	partid - ID of partition to which the channel is connected.
- *	ch_number - channel # to send message on.
- *	payload - pointer to the payload area allocated via
- *			xpc_initiate_allocate().
- *	func - function to call with asynchronous notification of message
- *		  receipt. THIS FUNCTION MUST BE NON-BLOCKING.
- *	key - user-defined key to be passed to the function when it's called.
- */
-enum xpc_retval
-xpc_initiate_send_notify(partid_t partid, int ch_number, void *payload,
-				xpc_notify_func func, void *key)
-{
-	struct xpc_partition *part = &xpc_partitions[partid];
-	struct xpc_msg *msg = XPC_MSG_ADDRESS(payload);
-	enum xpc_retval ret;
-
-
-	dev_dbg(xpc_chan, "msg=0x%p, partid=%d, channel=%d\n", (void *) msg,
-		partid, ch_number);
-
-	DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS);
-	DBUG_ON(ch_number < 0 || ch_number >= part->nchannels);
-	DBUG_ON(msg == NULL);
-	DBUG_ON(func == NULL);
-
-	ret = xpc_send_msg(&part->channels[ch_number], msg, XPC_N_CALL,
-								func, key);
-	return ret;
-}
-
-
-static struct xpc_msg *
-xpc_pull_remote_msg(struct xpc_channel *ch, s64 get)
-{
-	struct xpc_partition *part = &xpc_partitions[ch->partid];
-	struct xpc_msg *remote_msg, *msg;
-	u32 msg_index, nmsgs;
-	u64 msg_offset;
-	enum xpc_retval ret;
-
-
-	if (mutex_lock_interruptible(&ch->msg_to_pull_mutex) != 0) {
-		/* we were interrupted by a signal */
-		return NULL;
-	}
-
-	while (get >= ch->next_msg_to_pull) {
-
-		/* pull as many messages as are ready and able to be pulled */
-
-		msg_index = ch->next_msg_to_pull % ch->remote_nentries;
-
-		DBUG_ON(ch->next_msg_to_pull >=
-					(volatile s64) ch->w_remote_GP.put);
-		nmsgs =  (volatile s64) ch->w_remote_GP.put -
-						ch->next_msg_to_pull;
-		if (msg_index + nmsgs > ch->remote_nentries) {
-			/* ignore the ones that wrap the msg queue for now */
-			nmsgs = ch->remote_nentries - msg_index;
-		}
-
-		msg_offset = msg_index * ch->msg_size;
-		msg = (struct xpc_msg *) ((u64) ch->remote_msgqueue +
-								msg_offset);
-		remote_msg = (struct xpc_msg *) (ch->remote_msgqueue_pa +
-								msg_offset);
-
-		if ((ret = xpc_pull_remote_cachelines(part, msg, remote_msg,
-				nmsgs * ch->msg_size)) != xpcSuccess) {
-
-			dev_dbg(xpc_chan, "failed to pull %d msgs starting with"
-				" msg %ld from partition %d, channel=%d, "
-				"ret=%d\n", nmsgs, ch->next_msg_to_pull,
-				ch->partid, ch->number, ret);
-
-			XPC_DEACTIVATE_PARTITION(part, ret);
-
-			mutex_unlock(&ch->msg_to_pull_mutex);
-			return NULL;
-		}
-
-		mb();	/* >>> this may not be needed, we're not sure */
-
-		ch->next_msg_to_pull += nmsgs;
-	}
-
-	mutex_unlock(&ch->msg_to_pull_mutex);
-
-	/* return the message we were looking for */
-	msg_offset = (get % ch->remote_nentries) * ch->msg_size;
-	msg = (struct xpc_msg *) ((u64) ch->remote_msgqueue + msg_offset);
-
-	return msg;
-}
-
-
-/*
- * Get a message to be delivered.
- */
-static struct xpc_msg *
-xpc_get_deliverable_msg(struct xpc_channel *ch)
-{
-	struct xpc_msg *msg = NULL;
-	s64 get;
-
-
-	do {
-		if ((volatile u32) ch->flags & XPC_C_DISCONNECTING) {
-			break;
-		}
-
-		get = (volatile s64) ch->w_local_GP.get;
-		if (get == (volatile s64) ch->w_remote_GP.put) {
-			break;
-		}
-
-		/* There are messages waiting to be pulled and delivered.
-		 * We need to try to secure one for ourselves. We'll do this
-		 * by trying to increment w_local_GP.get and hope that no one
-		 * else beats us to it. If they do, we'll we'll simply have
-		 * to try again for the next one.
-	 	 */
-
-		if (cmpxchg(&ch->w_local_GP.get, get, get + 1) == get) {
-			/* we got the entry referenced by get */
-
-			dev_dbg(xpc_chan, "w_local_GP.get changed to %ld, "
-				"partid=%d, channel=%d\n", get + 1,
-				ch->partid, ch->number);
-
-			/* pull the message from the remote partition */
-
-			msg = xpc_pull_remote_msg(ch, get);
-
-			DBUG_ON(msg != NULL && msg->number != get);
-			DBUG_ON(msg != NULL && (msg->flags & XPC_M_DONE));
-			DBUG_ON(msg != NULL && !(msg->flags & XPC_M_READY));
-
-			break;
-		}
-
-	} while (1);
-
-	return msg;
-}
-
-
-/*
- * Deliver a message to its intended recipient.
- */
-void
-xpc_deliver_msg(struct xpc_channel *ch)
-{
-	struct xpc_msg *msg;
-
-
-	if ((msg = xpc_get_deliverable_msg(ch)) != NULL) {
-
-		/*
-		 * This ref is taken to protect the payload itself from being
-		 * freed before the user is finished with it, which the user
-		 * indicates by calling xpc_initiate_received().
-		 */
-		xpc_msgqueue_ref(ch);
-
-		atomic_inc(&ch->kthreads_active);
-
-		if (ch->func != NULL) {
-			dev_dbg(xpc_chan, "ch->func() called, msg=0x%p, "
-				"msg_number=%ld, partid=%d, channel=%d\n",
-				(void *) msg, msg->number, ch->partid,
-				ch->number);
-
-			/* deliver the message to its intended recipient */
-			ch->func(xpcMsgReceived, ch->partid, ch->number,
-					&msg->payload, ch->key);
-
-			dev_dbg(xpc_chan, "ch->func() returned, msg=0x%p, "
-				"msg_number=%ld, partid=%d, channel=%d\n",
-				(void *) msg, msg->number, ch->partid,
-				ch->number);
-		}
-
-		atomic_dec(&ch->kthreads_active);
-	}
-}
-
-
-/*
- * Now we actually acknowledge the messages that have been delivered and ack'd
- * by advancing the cached remote message queue's Get value and if requested
- * send an IPI to the message sender's partition.
- */
-static void
-xpc_acknowledge_msgs(struct xpc_channel *ch, s64 initial_get, u8 msg_flags)
-{
-	struct xpc_msg *msg;
-	s64 get = initial_get + 1;
-	int send_IPI = 0;
-
-
-	while (1) {
-
-		while (1) {
-			if (get == (volatile s64) ch->w_local_GP.get) {
-				break;
-			}
-
-			msg = (struct xpc_msg *) ((u64) ch->remote_msgqueue +
-			       (get % ch->remote_nentries) * ch->msg_size);
-
-			if (!(msg->flags & XPC_M_DONE)) {
-				break;
-			}
-
-			msg_flags |= msg->flags;
-			get++;
-		}
-
-		if (get == initial_get) {
-			/* nothing's changed */
-			break;
-		}
-
-		if (cmpxchg_rel(&ch->local_GP->get, initial_get, get) !=
-								initial_get) {
-			/* someone else beat us to it */
-			DBUG_ON((volatile s64) ch->local_GP->get <=
-								initial_get);
-			break;
-		}
-
-		/* we just set the new value of local_GP->get */
-
-		dev_dbg(xpc_chan, "local_GP->get changed to %ld, partid=%d, "
-			"channel=%d\n", get, ch->partid, ch->number);
-
-		send_IPI = (msg_flags & XPC_M_INTERRUPT);
-
-		/*
-		 * We need to ensure that the message referenced by
-		 * local_GP->get is not XPC_M_DONE or that local_GP->get
-		 * equals w_local_GP.get, so we'll go have a look.
-		 */
-		initial_get = get;
-	}
-
-	if (send_IPI) {
-		xpc_IPI_send_msgrequest(ch);
-	}
-}
-
-
-/*
- * Acknowledge receipt of a delivered message.
- *
- * If a message has XPC_M_INTERRUPT set, send an interrupt to the partition
- * that sent the message.
- *
- * This function, although called by users, does not call xpc_part_ref() to
- * ensure that the partition infrastructure is in place. It relies on the
- * fact that we called xpc_msgqueue_ref() in xpc_deliver_msg().
- *
- * Arguments:
- *
- *	partid - ID of partition to which the channel is connected.
- *	ch_number - channel # message received on.
- *	payload - pointer to the payload area allocated via
- *			xpc_initiate_allocate().
- */
-void
-xpc_initiate_received(partid_t partid, int ch_number, void *payload)
-{
-	struct xpc_partition *part = &xpc_partitions[partid];
-	struct xpc_channel *ch;
-	struct xpc_msg *msg = XPC_MSG_ADDRESS(payload);
-	s64 get, msg_number = msg->number;
-
-
-	DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS);
-	DBUG_ON(ch_number < 0 || ch_number >= part->nchannels);
-
-	ch = &part->channels[ch_number];
-
-	dev_dbg(xpc_chan, "msg=0x%p, msg_number=%ld, partid=%d, channel=%d\n",
-		(void *) msg, msg_number, ch->partid, ch->number);
-
-	DBUG_ON((((u64) msg - (u64) ch->remote_msgqueue) / ch->msg_size) !=
-					msg_number % ch->remote_nentries);
-	DBUG_ON(msg->flags & XPC_M_DONE);
-
-	msg->flags |= XPC_M_DONE;
-
-	/*
-	 * The preceding store of msg->flags must occur before the following
-	 * load of ch->local_GP->get.
-	 */
-	mb();
-
-	/*
-	 * See if this message is next in line to be acknowledged as having
-	 * been delivered.
-	 */
-	get = ch->local_GP->get;
-	if (get == msg_number) {
-		xpc_acknowledge_msgs(ch, get, msg->flags);
-	}
-
-	/* the call to xpc_msgqueue_ref() was done by xpc_deliver_msg()  */
-	xpc_msgqueue_deref(ch);
-}
-
diff --git a/arch/ia64/sn/kernel/xpc_main.c b/arch/ia64/sn/kernel/xpc_main.c
deleted file mode 100644
index 99b123a6421..00000000000
--- a/arch/ia64/sn/kernel/xpc_main.c
+++ /dev/null
@@ -1,1424 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (c) 2004-2006 Silicon Graphics, Inc.  All Rights Reserved.
- */
-
-
-/*
- * Cross Partition Communication (XPC) support - standard version.
- *
- *	XPC provides a message passing capability that crosses partition
- *	boundaries. This module is made up of two parts:
- *
- *	    partition	This part detects the presence/absence of other
- *			partitions. It provides a heartbeat and monitors
- *			the heartbeats of other partitions.
- *
- *	    channel	This part manages the channels and sends/receives
- *			messages across them to/from other partitions.
- *
- *	There are a couple of additional functions residing in XP, which
- *	provide an interface to XPC for its users.
- *
- *
- *	Caveats:
- *
- *	  . We currently have no way to determine which nasid an IPI came
- *	    from. Thus, xpc_IPI_send() does a remote AMO write followed by
- *	    an IPI. The AMO indicates where data is to be pulled from, so
- *	    after the IPI arrives, the remote partition checks the AMO word.
- *	    The IPI can actually arrive before the AMO however, so other code
- *	    must periodically check for this case. Also, remote AMO operations
- *	    do not reliably time out. Thus we do a remote PIO read solely to
- *	    know whether the remote partition is down and whether we should
- *	    stop sending IPIs to it. This remote PIO read operation is set up
- *	    in a special nofault region so SAL knows to ignore (and cleanup)
- *	    any errors due to the remote AMO write, PIO read, and/or PIO
- *	    write operations.
- *
- *	    If/when new hardware solves this IPI problem, we should abandon
- *	    the current approach.
- *
- */
-
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/sched.h>
-#include <linux/syscalls.h>
-#include <linux/cache.h>
-#include <linux/interrupt.h>
-#include <linux/delay.h>
-#include <linux/reboot.h>
-#include <linux/completion.h>
-#include <asm/sn/intr.h>
-#include <asm/sn/sn_sal.h>
-#include <asm/kdebug.h>
-#include <asm/uaccess.h>
-#include <asm/sn/xpc.h>
-
-
-/* define two XPC debug device structures to be used with dev_dbg() et al */
-
-struct device_driver xpc_dbg_name = {
-	.name = "xpc"
-};
-
-struct device xpc_part_dbg_subname = {
-	.bus_id = {0},		/* set to "part" at xpc_init() time */
-	.driver = &xpc_dbg_name
-};
-
-struct device xpc_chan_dbg_subname = {
-	.bus_id = {0},		/* set to "chan" at xpc_init() time */
-	.driver = &xpc_dbg_name
-};
-
-struct device *xpc_part = &xpc_part_dbg_subname;
-struct device *xpc_chan = &xpc_chan_dbg_subname;
-
-
-static int xpc_kdebug_ignore;
-
-
-/* systune related variables for /proc/sys directories */
-
-static int xpc_hb_interval = XPC_HB_DEFAULT_INTERVAL;
-static int xpc_hb_min_interval = 1;
-static int xpc_hb_max_interval = 10;
-
-static int xpc_hb_check_interval = XPC_HB_CHECK_DEFAULT_INTERVAL;
-static int xpc_hb_check_min_interval = 10;
-static int xpc_hb_check_max_interval = 120;
-
-int xpc_disengage_request_timelimit = XPC_DISENGAGE_REQUEST_DEFAULT_TIMELIMIT;
-static int xpc_disengage_request_min_timelimit = 0;
-static int xpc_disengage_request_max_timelimit = 120;
-
-static ctl_table xpc_sys_xpc_hb_dir[] = {
-	{
-		1,
-		"hb_interval",
-		&xpc_hb_interval,
-		sizeof(int),
-		0644,
-		NULL,
-		&proc_dointvec_minmax,
-		&sysctl_intvec,
-		NULL,
-		&xpc_hb_min_interval,
-		&xpc_hb_max_interval
-	},
-	{
-		2,
-		"hb_check_interval",
-		&xpc_hb_check_interval,
-		sizeof(int),
-		0644,
-		NULL,
-		&proc_dointvec_minmax,
-		&sysctl_intvec,
-		NULL,
-		&xpc_hb_check_min_interval,
-		&xpc_hb_check_max_interval
-	},
-	{0}
-};
-static ctl_table xpc_sys_xpc_dir[] = {
-	{
-		1,
-		"hb",
-		NULL,
-		0,
-		0555,
-		xpc_sys_xpc_hb_dir
-	},
-	{
-		2,
-		"disengage_request_timelimit",
-		&xpc_disengage_request_timelimit,
-		sizeof(int),
-		0644,
-		NULL,
-		&proc_dointvec_minmax,
-		&sysctl_intvec,
-		NULL,
-		&xpc_disengage_request_min_timelimit,
-		&xpc_disengage_request_max_timelimit
-	},
-	{0}
-};
-static ctl_table xpc_sys_dir[] = {
-	{
-		1,
-		"xpc",
-		NULL,
-		0,
-		0555,
-		xpc_sys_xpc_dir
-	},
-	{0}
-};
-static struct ctl_table_header *xpc_sysctl;
-
-/* non-zero if any remote partition disengage request was timed out */
-int xpc_disengage_request_timedout;
-
-/* #of IRQs received */
-static atomic_t xpc_act_IRQ_rcvd;
-
-/* IRQ handler notifies this wait queue on receipt of an IRQ */
-static DECLARE_WAIT_QUEUE_HEAD(xpc_act_IRQ_wq);
-
-static unsigned long xpc_hb_check_timeout;
-
-/* notification that the xpc_hb_checker thread has exited */
-static DECLARE_COMPLETION(xpc_hb_checker_exited);
-
-/* notification that the xpc_discovery thread has exited */
-static DECLARE_COMPLETION(xpc_discovery_exited);
-
-
-static struct timer_list xpc_hb_timer;
-
-
-static void xpc_kthread_waitmsgs(struct xpc_partition *, struct xpc_channel *);
-
-
-static int xpc_system_reboot(struct notifier_block *, unsigned long, void *);
-static struct notifier_block xpc_reboot_notifier = {
-	.notifier_call = xpc_system_reboot,
-};
-
-static int xpc_system_die(struct notifier_block *, unsigned long, void *);
-static struct notifier_block xpc_die_notifier = {
-	.notifier_call = xpc_system_die,
-};
-
-
-/*
- * Timer function to enforce the timelimit on the partition disengage request.
- */
-static void
-xpc_timeout_partition_disengage_request(unsigned long data)
-{
-	struct xpc_partition *part = (struct xpc_partition *) data;
-
-
-	DBUG_ON(jiffies < part->disengage_request_timeout);
-
-	(void) xpc_partition_disengaged(part);
-
-	DBUG_ON(part->disengage_request_timeout != 0);
-	DBUG_ON(xpc_partition_engaged(1UL << XPC_PARTID(part)) != 0);
-}
-
-
-/*
- * Notify the heartbeat check thread that an IRQ has been received.
- */
-static irqreturn_t
-xpc_act_IRQ_handler(int irq, void *dev_id, struct pt_regs *regs)
-{
-	atomic_inc(&xpc_act_IRQ_rcvd);
-	wake_up_interruptible(&xpc_act_IRQ_wq);
-	return IRQ_HANDLED;
-}
-
-
-/*
- * Timer to produce the heartbeat.  The timer structures function is
- * already set when this is initially called.  A tunable is used to
- * specify when the next timeout should occur.
- */
-static void
-xpc_hb_beater(unsigned long dummy)
-{
-	xpc_vars->heartbeat++;
-
-	if (jiffies >= xpc_hb_check_timeout) {
-		wake_up_interruptible(&xpc_act_IRQ_wq);
-	}
-
-	xpc_hb_timer.expires = jiffies + (xpc_hb_interval * HZ);
-	add_timer(&xpc_hb_timer);
-}
-
-
-/*
- * This thread is responsible for nearly all of the partition
- * activation/deactivation.
- */
-static int
-xpc_hb_checker(void *ignore)
-{
-	int last_IRQ_count = 0;
-	int new_IRQ_count;
-	int force_IRQ=0;
-
-
-	/* this thread was marked active by xpc_hb_init() */
-
-	daemonize(XPC_HB_CHECK_THREAD_NAME);
-
-	set_cpus_allowed(current, cpumask_of_cpu(XPC_HB_CHECK_CPU));
-
-	xpc_hb_check_timeout = jiffies + (xpc_hb_check_interval * HZ);
-
-	while (!(volatile int) xpc_exiting) {
-
-		dev_dbg(xpc_part, "woke up with %d ticks rem; %d IRQs have "
-			"been received\n",
-			(int) (xpc_hb_check_timeout - jiffies),
-			atomic_read(&xpc_act_IRQ_rcvd) - last_IRQ_count);
-
-
-		/* checking of remote heartbeats is skewed by IRQ handling */
-		if (jiffies >= xpc_hb_check_timeout) {
-			dev_dbg(xpc_part, "checking remote heartbeats\n");
-			xpc_check_remote_hb();
-
-			/*
-			 * We need to periodically recheck to ensure no
-			 * IPI/AMO pairs have been missed.  That check
-			 * must always reset xpc_hb_check_timeout.
-			 */
-			force_IRQ = 1;
-		}
-
-
-		/* check for outstanding IRQs */
-		new_IRQ_count = atomic_read(&xpc_act_IRQ_rcvd);
-		if (last_IRQ_count < new_IRQ_count || force_IRQ != 0) {
-			force_IRQ = 0;
-
-			dev_dbg(xpc_part, "found an IRQ to process; will be "
-				"resetting xpc_hb_check_timeout\n");
-
-			last_IRQ_count += xpc_identify_act_IRQ_sender();
-			if (last_IRQ_count < new_IRQ_count) {
-				/* retry once to help avoid missing AMO */
-				(void) xpc_identify_act_IRQ_sender();
-			}
-			last_IRQ_count = new_IRQ_count;
-
-			xpc_hb_check_timeout = jiffies +
-					   (xpc_hb_check_interval * HZ);
-		}
-
-		/* wait for IRQ or timeout */
-		(void) wait_event_interruptible(xpc_act_IRQ_wq,
-			    (last_IRQ_count < atomic_read(&xpc_act_IRQ_rcvd) ||
-					jiffies >= xpc_hb_check_timeout ||
-						(volatile int) xpc_exiting));
-	}
-
-	dev_dbg(xpc_part, "heartbeat checker is exiting\n");
-
-
-	/* mark this thread as having exited */
-	complete(&xpc_hb_checker_exited);
-	return 0;
-}
-
-
-/*
- * This thread will attempt to discover other partitions to activate
- * based on info provided by SAL. This new thread is short lived and
- * will exit once discovery is complete.
- */
-static int
-xpc_initiate_discovery(void *ignore)
-{
-	daemonize(XPC_DISCOVERY_THREAD_NAME);
-
-	xpc_discovery();
-
-	dev_dbg(xpc_part, "discovery thread is exiting\n");
-
-	/* mark this thread as having exited */
-	complete(&xpc_discovery_exited);
-	return 0;
-}
-
-
-/*
- * Establish first contact with the remote partititon. This involves pulling
- * the XPC per partition variables from the remote partition and waiting for
- * the remote partition to pull ours.
- */
-static enum xpc_retval
-xpc_make_first_contact(struct xpc_partition *part)
-{
-	enum xpc_retval ret;
-
-
-	while ((ret = xpc_pull_remote_vars_part(part)) != xpcSuccess) {
-		if (ret != xpcRetry) {
-			XPC_DEACTIVATE_PARTITION(part, ret);
-			return ret;
-		}
-
-		dev_dbg(xpc_chan, "waiting to make first contact with "
-			"partition %d\n", XPC_PARTID(part));
-
-		/* wait a 1/4 of a second or so */
-		(void) msleep_interruptible(250);
-
-		if (part->act_state == XPC_P_DEACTIVATING) {
-			return part->reason;
-		}
-	}
-
-	return xpc_mark_partition_active(part);
-}
-
-
-/*
- * The first kthread assigned to a newly activated partition is the one
- * created by XPC HB with which it calls xpc_partition_up(). XPC hangs on to
- * that kthread until the partition is brought down, at which time that kthread
- * returns back to XPC HB. (The return of that kthread will signify to XPC HB
- * that XPC has dismantled all communication infrastructure for the associated
- * partition.) This kthread becomes the channel manager for that partition.
- *
- * Each active partition has a channel manager, who, besides connecting and
- * disconnecting channels, will ensure that each of the partition's connected
- * channels has the required number of assigned kthreads to get the work done.
- */
-static void
-xpc_channel_mgr(struct xpc_partition *part)
-{
-	while (part->act_state != XPC_P_DEACTIVATING ||
-			atomic_read(&part->nchannels_active) > 0 ||
-					!xpc_partition_disengaged(part)) {
-
-		xpc_process_channel_activity(part);
-
-
-		/*
-		 * Wait until we've been requested to activate kthreads or
-		 * all of the channel's message queues have been torn down or
-		 * a signal is pending.
-		 *
-		 * The channel_mgr_requests is set to 1 after being awakened,
-		 * This is done to prevent the channel mgr from making one pass
-		 * through the loop for each request, since he will
-		 * be servicing all the requests in one pass. The reason it's
-		 * set to 1 instead of 0 is so that other kthreads will know
-		 * that the channel mgr is running and won't bother trying to
-		 * wake him up.
-		 */
-		atomic_dec(&part->channel_mgr_requests);
-		(void) wait_event_interruptible(part->channel_mgr_wq,
-				(atomic_read(&part->channel_mgr_requests) > 0 ||
-				(volatile u64) part->local_IPI_amo != 0 ||
-				((volatile u8) part->act_state ==
-							XPC_P_DEACTIVATING &&
-				atomic_read(&part->nchannels_active) == 0 &&
-				xpc_partition_disengaged(part))));
-		atomic_set(&part->channel_mgr_requests, 1);
-
-		// >>> Does it need to wakeup periodically as well? In case we
-		// >>> miscalculated the #of kthreads to wakeup or create?
-	}
-}
-
-
-/*
- * When XPC HB determines that a partition has come up, it will create a new
- * kthread and that kthread will call this function to attempt to set up the
- * basic infrastructure used for Cross Partition Communication with the newly
- * upped partition.
- *
- * The kthread that was created by XPC HB and which setup the XPC
- * infrastructure will remain assigned to the partition until the partition
- * goes down. At which time the kthread will teardown the XPC infrastructure
- * and then exit.
- *
- * XPC HB will put the remote partition's XPC per partition specific variables
- * physical address into xpc_partitions[partid].remote_vars_part_pa prior to
- * calling xpc_partition_up().
- */
-static void
-xpc_partition_up(struct xpc_partition *part)
-{
-	DBUG_ON(part->channels != NULL);
-
-	dev_dbg(xpc_chan, "activating partition %d\n", XPC_PARTID(part));
-
-	if (xpc_setup_infrastructure(part) != xpcSuccess) {
-		return;
-	}
-
-	/*
-	 * The kthread that XPC HB called us with will become the
-	 * channel manager for this partition. It will not return
-	 * back to XPC HB until the partition's XPC infrastructure
-	 * has been dismantled.
-	 */
-
-	(void) xpc_part_ref(part);	/* this will always succeed */
-
-	if (xpc_make_first_contact(part) == xpcSuccess) {
-		xpc_channel_mgr(part);
-	}
-
-	xpc_part_deref(part);
-
-	xpc_teardown_infrastructure(part);
-}
-
-
-static int
-xpc_activating(void *__partid)
-{
-	partid_t partid = (u64) __partid;
-	struct xpc_partition *part = &xpc_partitions[partid];
-	unsigned long irq_flags;
-	struct sched_param param = { sched_priority: MAX_RT_PRIO - 1 };
-	int ret;
-
-
-	DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS);
-
-	spin_lock_irqsave(&part->act_lock, irq_flags);
-
-	if (part->act_state == XPC_P_DEACTIVATING) {
-		part->act_state = XPC_P_INACTIVE;
-		spin_unlock_irqrestore(&part->act_lock, irq_flags);
-		part->remote_rp_pa = 0;
-		return 0;
-	}
-
-	/* indicate the thread is activating */
-	DBUG_ON(part->act_state != XPC_P_ACTIVATION_REQ);
-	part->act_state = XPC_P_ACTIVATING;
-
-	XPC_SET_REASON(part, 0, 0);
-	spin_unlock_irqrestore(&part->act_lock, irq_flags);
-
-	dev_dbg(xpc_part, "bringing partition %d up\n", partid);
-
-	daemonize("xpc%02d", partid);
-
-	/*
-	 * This thread needs to run at a realtime priority to prevent a
-	 * significant performance degradation.
-	 */
-	ret = sched_setscheduler(current, SCHED_FIFO, &param);
-	if (ret != 0) {
-		dev_warn(xpc_part, "unable to set pid %d to a realtime "
-			"priority, ret=%d\n", current->pid, ret);
-	}
-
-	/* allow this thread and its children to run on any CPU */
-	set_cpus_allowed(current, CPU_MASK_ALL);
-
-	/*
-	 * Register the remote partition's AMOs with SAL so it can handle
-	 * and cleanup errors within that address range should the remote
-	 * partition go down. We don't unregister this range because it is
-	 * difficult to tell when outstanding writes to the remote partition
-	 * are finished and thus when it is safe to unregister. This should
-	 * not result in wasted space in the SAL xp_addr_region table because
-	 * we should get the same page for remote_amos_page_pa after module
-	 * reloads and system reboots.
-	 */
-	if (sn_register_xp_addr_region(part->remote_amos_page_pa,
-							PAGE_SIZE, 1) < 0) {
-		dev_warn(xpc_part, "xpc_partition_up(%d) failed to register "
-			"xp_addr region\n", partid);
-
-		spin_lock_irqsave(&part->act_lock, irq_flags);
-		part->act_state = XPC_P_INACTIVE;
-		XPC_SET_REASON(part, xpcPhysAddrRegFailed, __LINE__);
-		spin_unlock_irqrestore(&part->act_lock, irq_flags);
-		part->remote_rp_pa = 0;
-		return 0;
-	}
-
-	xpc_allow_hb(partid, xpc_vars);
-	xpc_IPI_send_activated(part);
-
-
-	/*
-	 * xpc_partition_up() holds this thread and marks this partition as
-	 * XPC_P_ACTIVE by calling xpc_hb_mark_active().
-	 */
-	(void) xpc_partition_up(part);
-
-	xpc_disallow_hb(partid, xpc_vars);
-	xpc_mark_partition_inactive(part);
-
-	if (part->reason == xpcReactivating) {
-		/* interrupting ourselves results in activating partition */
-		xpc_IPI_send_reactivate(part);
-	}
-
-	return 0;
-}
-
-
-void
-xpc_activate_partition(struct xpc_partition *part)
-{
-	partid_t partid = XPC_PARTID(part);
-	unsigned long irq_flags;
-	pid_t pid;
-
-
-	spin_lock_irqsave(&part->act_lock, irq_flags);
-
-	DBUG_ON(part->act_state != XPC_P_INACTIVE);
-
-	part->act_state = XPC_P_ACTIVATION_REQ;
-	XPC_SET_REASON(part, xpcCloneKThread, __LINE__);
-
-	spin_unlock_irqrestore(&part->act_lock, irq_flags);
-
-	pid = kernel_thread(xpc_activating, (void *) ((u64) partid), 0);
-
-	if (unlikely(pid <= 0)) {
-		spin_lock_irqsave(&part->act_lock, irq_flags);
-		part->act_state = XPC_P_INACTIVE;
-		XPC_SET_REASON(part, xpcCloneKThreadFailed, __LINE__);
-		spin_unlock_irqrestore(&part->act_lock, irq_flags);
-	}
-}
-
-
-/*
- * Handle the receipt of a SGI_XPC_NOTIFY IRQ by seeing whether the specified
- * partition actually sent it. Since SGI_XPC_NOTIFY IRQs may be shared by more
- * than one partition, we use an AMO_t structure per partition to indicate
- * whether a partition has sent an IPI or not.  >>> If it has, then wake up the
- * associated kthread to handle it.
- *
- * All SGI_XPC_NOTIFY IRQs received by XPC are the result of IPIs sent by XPC
- * running on other partitions.
- *
- * Noteworthy Arguments:
- *
- *	irq - Interrupt ReQuest number. NOT USED.
- *
- *	dev_id - partid of IPI's potential sender.
- *
- *	regs - processor's context before the processor entered
- *	       interrupt code. NOT USED.
- */
-irqreturn_t
-xpc_notify_IRQ_handler(int irq, void *dev_id, struct pt_regs *regs)
-{
-	partid_t partid = (partid_t) (u64) dev_id;
-	struct xpc_partition *part = &xpc_partitions[partid];
-
-
-	DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS);
-
-	if (xpc_part_ref(part)) {
-		xpc_check_for_channel_activity(part);
-
-		xpc_part_deref(part);
-	}
-	return IRQ_HANDLED;
-}
-
-
-/*
- * Check to see if xpc_notify_IRQ_handler() dropped any IPIs on the floor
- * because the write to their associated IPI amo completed after the IRQ/IPI
- * was received.
- */
-void
-xpc_dropped_IPI_check(struct xpc_partition *part)
-{
-	if (xpc_part_ref(part)) {
-		xpc_check_for_channel_activity(part);
-
-		part->dropped_IPI_timer.expires = jiffies +
-							XPC_P_DROPPED_IPI_WAIT;
-		add_timer(&part->dropped_IPI_timer);
-		xpc_part_deref(part);
-	}
-}
-
-
-void
-xpc_activate_kthreads(struct xpc_channel *ch, int needed)
-{
-	int idle = atomic_read(&ch->kthreads_idle);
-	int assigned = atomic_read(&ch->kthreads_assigned);
-	int wakeup;
-
-
-	DBUG_ON(needed <= 0);
-
-	if (idle > 0) {
-		wakeup = (needed > idle) ? idle : needed;
-		needed -= wakeup;
-
-		dev_dbg(xpc_chan, "wakeup %d idle kthreads, partid=%d, "
-			"channel=%d\n", wakeup, ch->partid, ch->number);
-
-		/* only wakeup the requested number of kthreads */
-		wake_up_nr(&ch->idle_wq, wakeup);
-	}
-
-	if (needed <= 0) {
-		return;
-	}
-
-	if (needed + assigned > ch->kthreads_assigned_limit) {
-		needed = ch->kthreads_assigned_limit - assigned;
-		// >>>should never be less than 0
-		if (needed <= 0) {
-			return;
-		}
-	}
-
-	dev_dbg(xpc_chan, "create %d new kthreads, partid=%d, channel=%d\n",
-		needed, ch->partid, ch->number);
-
-	xpc_create_kthreads(ch, needed);
-}
-
-
-/*
- * This function is where XPC's kthreads wait for messages to deliver.
- */
-static void
-xpc_kthread_waitmsgs(struct xpc_partition *part, struct xpc_channel *ch)
-{
-	do {
-		/* deliver messages to their intended recipients */
-
-		while ((volatile s64) ch->w_local_GP.get <
-				(volatile s64) ch->w_remote_GP.put &&
-					!((volatile u32) ch->flags &
-						XPC_C_DISCONNECTING)) {
-			xpc_deliver_msg(ch);
-		}
-
-		if (atomic_inc_return(&ch->kthreads_idle) >
-						ch->kthreads_idle_limit) {
-			/* too many idle kthreads on this channel */
-			atomic_dec(&ch->kthreads_idle);
-			break;
-		}
-
-		dev_dbg(xpc_chan, "idle kthread calling "
-			"wait_event_interruptible_exclusive()\n");
-
-		(void) wait_event_interruptible_exclusive(ch->idle_wq,
-				((volatile s64) ch->w_local_GP.get <
-					(volatile s64) ch->w_remote_GP.put ||
-				((volatile u32) ch->flags &
-						XPC_C_DISCONNECTING)));
-
-		atomic_dec(&ch->kthreads_idle);
-
-	} while (!((volatile u32) ch->flags & XPC_C_DISCONNECTING));
-}
-
-
-static int
-xpc_daemonize_kthread(void *args)
-{
-	partid_t partid = XPC_UNPACK_ARG1(args);
-	u16 ch_number = XPC_UNPACK_ARG2(args);
-	struct xpc_partition *part = &xpc_partitions[partid];
-	struct xpc_channel *ch;
-	int n_needed;
-	unsigned long irq_flags;
-
-
-	daemonize("xpc%02dc%d", partid, ch_number);
-
-	dev_dbg(xpc_chan, "kthread starting, partid=%d, channel=%d\n",
-		partid, ch_number);
-
-	ch = &part->channels[ch_number];
-
-	if (!(ch->flags & XPC_C_DISCONNECTING)) {
-
-		/* let registerer know that connection has been established */
-
-		spin_lock_irqsave(&ch->lock, irq_flags);
-		if (!(ch->flags & XPC_C_CONNECTEDCALLOUT)) {
-			ch->flags |= XPC_C_CONNECTEDCALLOUT;
-			spin_unlock_irqrestore(&ch->lock, irq_flags);
-
-			xpc_connected_callout(ch);
-
-			spin_lock_irqsave(&ch->lock, irq_flags);
-			ch->flags |= XPC_C_CONNECTEDCALLOUT_MADE;
-			spin_unlock_irqrestore(&ch->lock, irq_flags);
-
-			/*
-			 * It is possible that while the callout was being
-			 * made that the remote partition sent some messages.
-			 * If that is the case, we may need to activate
-			 * additional kthreads to help deliver them. We only
-			 * need one less than total #of messages to deliver.
-			 */
-			n_needed = ch->w_remote_GP.put - ch->w_local_GP.get - 1;
-			if (n_needed > 0 &&
-					!(ch->flags & XPC_C_DISCONNECTING)) {
-				xpc_activate_kthreads(ch, n_needed);
-			}
-		} else {
-			spin_unlock_irqrestore(&ch->lock, irq_flags);
-		}
-
-		xpc_kthread_waitmsgs(part, ch);
-	}
-
-	if (atomic_dec_return(&ch->kthreads_assigned) == 0) {
-		spin_lock_irqsave(&ch->lock, irq_flags);
-		if ((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) &&
-				!(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) {
-			ch->flags |= XPC_C_DISCONNECTINGCALLOUT;
-			spin_unlock_irqrestore(&ch->lock, irq_flags);
-
-			xpc_disconnect_callout(ch, xpcDisconnecting);
-
-			spin_lock_irqsave(&ch->lock, irq_flags);
-			ch->flags |= XPC_C_DISCONNECTINGCALLOUT_MADE;
-		}
-		spin_unlock_irqrestore(&ch->lock, irq_flags);
-		if (atomic_dec_return(&part->nchannels_engaged) == 0) {
-			xpc_mark_partition_disengaged(part);
-			xpc_IPI_send_disengage(part);
-		}
-	}
-
-
-	xpc_msgqueue_deref(ch);
-
-	dev_dbg(xpc_chan, "kthread exiting, partid=%d, channel=%d\n",
-		partid, ch_number);
-
-	xpc_part_deref(part);
-	return 0;
-}
-
-
-/*
- * For each partition that XPC has established communications with, there is
- * a minimum of one kernel thread assigned to perform any operation that
- * may potentially sleep or block (basically the callouts to the asynchronous
- * functions registered via xpc_connect()).
- *
- * Additional kthreads are created and destroyed by XPC as the workload
- * demands.
- *
- * A kthread is assigned to one of the active channels that exists for a given
- * partition.
- */
-void
-xpc_create_kthreads(struct xpc_channel *ch, int needed)
-{
-	unsigned long irq_flags;
-	pid_t pid;
-	u64 args = XPC_PACK_ARGS(ch->partid, ch->number);
-	struct xpc_partition *part = &xpc_partitions[ch->partid];
-
-
-	while (needed-- > 0) {
-
-		/*
-		 * The following is done on behalf of the newly created
-		 * kthread. That kthread is responsible for doing the
-		 * counterpart to the following before it exits.
-		 */
-		(void) xpc_part_ref(part);
-		xpc_msgqueue_ref(ch);
-		if (atomic_inc_return(&ch->kthreads_assigned) == 1 &&
-		    atomic_inc_return(&part->nchannels_engaged) == 1) {
-			xpc_mark_partition_engaged(part);
-		}
-
-		pid = kernel_thread(xpc_daemonize_kthread, (void *) args, 0);
-		if (pid < 0) {
-			/* the fork failed */
-			if (atomic_dec_return(&ch->kthreads_assigned) == 0 &&
-			    atomic_dec_return(&part->nchannels_engaged) == 0) {
-				xpc_mark_partition_disengaged(part);
-				xpc_IPI_send_disengage(part);
-			}
-			xpc_msgqueue_deref(ch);
-			xpc_part_deref(part);
-
-			if (atomic_read(&ch->kthreads_assigned) <
-						ch->kthreads_idle_limit) {
-				/*
-				 * Flag this as an error only if we have an
-				 * insufficient #of kthreads for the channel
-				 * to function.
-				 *
-				 * No xpc_msgqueue_ref() is needed here since
-				 * the channel mgr is doing this.
-				 */
-				spin_lock_irqsave(&ch->lock, irq_flags);
-				XPC_DISCONNECT_CHANNEL(ch, xpcLackOfResources,
-								&irq_flags);
-				spin_unlock_irqrestore(&ch->lock, irq_flags);
-			}
-			break;
-		}
-
-		ch->kthreads_created++;	// >>> temporary debug only!!!
-	}
-}
-
-
-void
-xpc_disconnect_wait(int ch_number)
-{
-	unsigned long irq_flags;
-	partid_t partid;
-	struct xpc_partition *part;
-	struct xpc_channel *ch;
-	int wakeup_channel_mgr;
-
-
-	/* now wait for all callouts to the caller's function to cease */
-	for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
-		part = &xpc_partitions[partid];
-
-		if (!xpc_part_ref(part)) {
-			continue;
-		}
-
-		ch = &part->channels[ch_number];
-
-		if (!(ch->flags & XPC_C_WDISCONNECT)) {
-			xpc_part_deref(part);
-			continue;
-		}
-
-		wait_for_completion(&ch->wdisconnect_wait);
-
-		spin_lock_irqsave(&ch->lock, irq_flags);
-		DBUG_ON(!(ch->flags & XPC_C_DISCONNECTED));
-		wakeup_channel_mgr = 0;
-
-		if (ch->delayed_IPI_flags) {
-			if (part->act_state != XPC_P_DEACTIVATING) {
-				spin_lock(&part->IPI_lock);
-				XPC_SET_IPI_FLAGS(part->local_IPI_amo,
-					ch->number, ch->delayed_IPI_flags);
-				spin_unlock(&part->IPI_lock);
-				wakeup_channel_mgr = 1;
-			}
-			ch->delayed_IPI_flags = 0;
-		}
-
-		ch->flags &= ~XPC_C_WDISCONNECT;
-		spin_unlock_irqrestore(&ch->lock, irq_flags);
-
-		if (wakeup_channel_mgr) {
-			xpc_wakeup_channel_mgr(part);
-		}
-
-		xpc_part_deref(part);
-	}
-}
-
-
-static void
-xpc_do_exit(enum xpc_retval reason)
-{
-	partid_t partid;
-	int active_part_count, printed_waiting_msg = 0;
-	struct xpc_partition *part;
-	unsigned long printmsg_time, disengage_request_timeout = 0;
-
-
-	/* a 'rmmod XPC' and a 'reboot' cannot both end up here together */
-	DBUG_ON(xpc_exiting == 1);
-
-	/*
-	 * Let the heartbeat checker thread and the discovery thread
-	 * (if one is running) know that they should exit. Also wake up
-	 * the heartbeat checker thread in case it's sleeping.
-	 */
-	xpc_exiting = 1;
-	wake_up_interruptible(&xpc_act_IRQ_wq);
-
-	/* ignore all incoming interrupts */
-	free_irq(SGI_XPC_ACTIVATE, NULL);
-
-	/* wait for the discovery thread to exit */
-	wait_for_completion(&xpc_discovery_exited);
-
-	/* wait for the heartbeat checker thread to exit */
-	wait_for_completion(&xpc_hb_checker_exited);
-
-
-	/* sleep for a 1/3 of a second or so */
-	(void) msleep_interruptible(300);
-
-
-	/* wait for all partitions to become inactive */
-
-	printmsg_time = jiffies + (XPC_DISENGAGE_PRINTMSG_INTERVAL * HZ);
-	xpc_disengage_request_timedout = 0;
-
-	do {
-		active_part_count = 0;
-
-		for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
-			part = &xpc_partitions[partid];
-
-			if (xpc_partition_disengaged(part) &&
-					part->act_state == XPC_P_INACTIVE) {
-				continue;
-			}
-
-			active_part_count++;
-
-			XPC_DEACTIVATE_PARTITION(part, reason);
-
-			if (part->disengage_request_timeout >
-						disengage_request_timeout) {
-				disengage_request_timeout =
-						part->disengage_request_timeout;
-			}
-		}
-
-		if (xpc_partition_engaged(-1UL)) {
-			if (time_after(jiffies, printmsg_time)) {
-				dev_info(xpc_part, "waiting for remote "
-					"partitions to disengage, timeout in "
-					"%ld seconds\n",
-					(disengage_request_timeout - jiffies)
-									/ HZ);
-				printmsg_time = jiffies +
-					(XPC_DISENGAGE_PRINTMSG_INTERVAL * HZ);
-				printed_waiting_msg = 1;
-			}
-
-		} else if (active_part_count > 0) {
-			if (printed_waiting_msg) {
-				dev_info(xpc_part, "waiting for local partition"
-					" to disengage\n");
-				printed_waiting_msg = 0;
-			}
-
-		} else {
-			if (!xpc_disengage_request_timedout) {
-				dev_info(xpc_part, "all partitions have "
-					"disengaged\n");
-			}
-			break;
-		}
-
-		/* sleep for a 1/3 of a second or so */
-		(void) msleep_interruptible(300);
-
-	} while (1);
-
-	DBUG_ON(xpc_partition_engaged(-1UL));
-
-
-	/* indicate to others that our reserved page is uninitialized */
-	xpc_rsvd_page->vars_pa = 0;
-
-	/* now it's time to eliminate our heartbeat */
-	del_timer_sync(&xpc_hb_timer);
-	DBUG_ON(xpc_vars->heartbeating_to_mask != 0);
-
-	if (reason == xpcUnloading) {
-		/* take ourselves off of the reboot_notifier_list */
-		(void) unregister_reboot_notifier(&xpc_reboot_notifier);
-
-		/* take ourselves off of the die_notifier list */
-		(void) unregister_die_notifier(&xpc_die_notifier);
-	}
-
-	/* close down protections for IPI operations */
-	xpc_restrict_IPI_ops();
-
-
-	/* clear the interface to XPC's functions */
-	xpc_clear_interface();
-
-	if (xpc_sysctl) {
-		unregister_sysctl_table(xpc_sysctl);
-	}
-}
-
-
-/*
- * This function is called when the system is being rebooted.
- */
-static int
-xpc_system_reboot(struct notifier_block *nb, unsigned long event, void *unused)
-{
-	enum xpc_retval reason;
-
-
-	switch (event) {
-	case SYS_RESTART:
-		reason = xpcSystemReboot;
-		break;
-	case SYS_HALT:
-		reason = xpcSystemHalt;
-		break;
-	case SYS_POWER_OFF:
-		reason = xpcSystemPoweroff;
-		break;
-	default:
-		reason = xpcSystemGoingDown;
-	}
-
-	xpc_do_exit(reason);
-	return NOTIFY_DONE;
-}
-
-
-/*
- * Notify other partitions to disengage from all references to our memory.
- */
-static void
-xpc_die_disengage(void)
-{
-	struct xpc_partition *part;
-	partid_t partid;
-	unsigned long engaged;
-	long time, printmsg_time, disengage_request_timeout;
-
-
-	/* keep xpc_hb_checker thread from doing anything (just in case) */
-	xpc_exiting = 1;
-
-	xpc_vars->heartbeating_to_mask = 0;  /* indicate we're deactivated */
-
-	for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
-		part = &xpc_partitions[partid];
-
-		if (!XPC_SUPPORTS_DISENGAGE_REQUEST(part->
-							remote_vars_version)) {
-
-			/* just in case it was left set by an earlier XPC */
-			xpc_clear_partition_engaged(1UL << partid);
-			continue;
-		}
-
-		if (xpc_partition_engaged(1UL << partid) ||
-					part->act_state != XPC_P_INACTIVE) {
-			xpc_request_partition_disengage(part);
-			xpc_mark_partition_disengaged(part);
-			xpc_IPI_send_disengage(part);
-		}
-	}
-
-	time = rtc_time();
-	printmsg_time = time +
-		(XPC_DISENGAGE_PRINTMSG_INTERVAL * sn_rtc_cycles_per_second);
-	disengage_request_timeout = time +
-		(xpc_disengage_request_timelimit * sn_rtc_cycles_per_second);
-
-	/* wait for all other partitions to disengage from us */
-
-	while (1) {
-		engaged = xpc_partition_engaged(-1UL);
-		if (!engaged) {
-			dev_info(xpc_part, "all partitions have disengaged\n");
-			break;
-		}
-
-		time = rtc_time();
-		if (time >= disengage_request_timeout) {
-			for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
-				if (engaged & (1UL << partid)) {
-					dev_info(xpc_part, "disengage from "
-						"remote partition %d timed "
-						"out\n", partid);
-				}
-			}
-			break;
-		}
-
-		if (time >= printmsg_time) {
-			dev_info(xpc_part, "waiting for remote partitions to "
-				"disengage, timeout in %ld seconds\n",
-				(disengage_request_timeout - time) /
-						sn_rtc_cycles_per_second);
-			printmsg_time = time +
-					(XPC_DISENGAGE_PRINTMSG_INTERVAL *
-						sn_rtc_cycles_per_second);
-		}
-	}
-}
-
-
-/*
- * This function is called when the system is being restarted or halted due
- * to some sort of system failure. If this is the case we need to notify the
- * other partitions to disengage from all references to our memory.
- * This function can also be called when our heartbeater could be offlined
- * for a time. In this case we need to notify other partitions to not worry
- * about the lack of a heartbeat.
- */
-static int
-xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused)
-{
-	switch (event) {
-	case DIE_MACHINE_RESTART:
-	case DIE_MACHINE_HALT:
-		xpc_die_disengage();
-		break;
-
-	case DIE_KDEBUG_ENTER:
-		/* Should lack of heartbeat be ignored by other partitions? */
-		if (!xpc_kdebug_ignore) {
-			break;
-		}
-		/* fall through */
-	case DIE_MCA_MONARCH_ENTER:
-	case DIE_INIT_MONARCH_ENTER:
-		xpc_vars->heartbeat++;
-		xpc_vars->heartbeat_offline = 1;
-		break;
-
-	case DIE_KDEBUG_LEAVE:
-		/* Is lack of heartbeat being ignored by other partitions? */
-		if (!xpc_kdebug_ignore) {
-			break;
-		}
-		/* fall through */
-	case DIE_MCA_MONARCH_LEAVE:
-	case DIE_INIT_MONARCH_LEAVE:
-		xpc_vars->heartbeat++;
-		xpc_vars->heartbeat_offline = 0;
-		break;
-	}
-
-	return NOTIFY_DONE;
-}
-
-
-int __init
-xpc_init(void)
-{
-	int ret;
-	partid_t partid;
-	struct xpc_partition *part;
-	pid_t pid;
-
-
-	if (!ia64_platform_is("sn2")) {
-		return -ENODEV;
-	}
-
-	/*
-	 * xpc_remote_copy_buffer is used as a temporary buffer for bte_copy'ng
-	 * various portions of a partition's reserved page. Its size is based
-	 * on the size of the reserved page header and part_nasids mask. So we
-	 * need to ensure that the other items will fit as well.
-	 */
-	if (XPC_RP_VARS_SIZE > XPC_RP_HEADER_SIZE + XP_NASID_MASK_BYTES) {
-		dev_err(xpc_part, "xpc_remote_copy_buffer is not big enough\n");
-		return -EPERM;
-	}
-	DBUG_ON((u64) xpc_remote_copy_buffer !=
-				L1_CACHE_ALIGN((u64) xpc_remote_copy_buffer));
-
-	snprintf(xpc_part->bus_id, BUS_ID_SIZE, "part");
-	snprintf(xpc_chan->bus_id, BUS_ID_SIZE, "chan");
-
-	xpc_sysctl = register_sysctl_table(xpc_sys_dir, 1);
-
-	/*
-	 * The first few fields of each entry of xpc_partitions[] need to
-	 * be initialized now so that calls to xpc_connect() and
-	 * xpc_disconnect() can be made prior to the activation of any remote
-	 * partition. NOTE THAT NONE OF THE OTHER FIELDS BELONGING TO THESE
-	 * ENTRIES ARE MEANINGFUL UNTIL AFTER AN ENTRY'S CORRESPONDING
-	 * PARTITION HAS BEEN ACTIVATED.
-	 */
-	for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
-		part = &xpc_partitions[partid];
-
-		DBUG_ON((u64) part != L1_CACHE_ALIGN((u64) part));
-
-		part->act_IRQ_rcvd = 0;
-		spin_lock_init(&part->act_lock);
-		part->act_state = XPC_P_INACTIVE;
-		XPC_SET_REASON(part, 0, 0);
-
-		init_timer(&part->disengage_request_timer);
-		part->disengage_request_timer.function =
-				xpc_timeout_partition_disengage_request;
-		part->disengage_request_timer.data = (unsigned long) part;
-
-		part->setup_state = XPC_P_UNSET;
-		init_waitqueue_head(&part->teardown_wq);
-		atomic_set(&part->references, 0);
-	}
-
-	/*
-	 * Open up protections for IPI operations (and AMO operations on
-	 * Shub 1.1 systems).
-	 */
-	xpc_allow_IPI_ops();
-
-	/*
-	 * Interrupts being processed will increment this atomic variable and
-	 * awaken the heartbeat thread which will process the interrupts.
-	 */
-	atomic_set(&xpc_act_IRQ_rcvd, 0);
-
-	/*
-	 * This is safe to do before the xpc_hb_checker thread has started
-	 * because the handler releases a wait queue.  If an interrupt is
-	 * received before the thread is waiting, it will not go to sleep,
-	 * but rather immediately process the interrupt.
-	 */
-	ret = request_irq(SGI_XPC_ACTIVATE, xpc_act_IRQ_handler, 0,
-							"xpc hb", NULL);
-	if (ret != 0) {
-		dev_err(xpc_part, "can't register ACTIVATE IRQ handler, "
-			"errno=%d\n", -ret);
-
-		xpc_restrict_IPI_ops();
-
-		if (xpc_sysctl) {
-			unregister_sysctl_table(xpc_sysctl);
-		}
-		return -EBUSY;
-	}
-
-	/*
-	 * Fill the partition reserved page with the information needed by
-	 * other partitions to discover we are alive and establish initial
-	 * communications.
-	 */
-	xpc_rsvd_page = xpc_rsvd_page_init();
-	if (xpc_rsvd_page == NULL) {
-		dev_err(xpc_part, "could not setup our reserved page\n");
-
-		free_irq(SGI_XPC_ACTIVATE, NULL);
-		xpc_restrict_IPI_ops();
-
-		if (xpc_sysctl) {
-			unregister_sysctl_table(xpc_sysctl);
-		}
-		return -EBUSY;
-	}
-
-
-	/* add ourselves to the reboot_notifier_list */
-	ret = register_reboot_notifier(&xpc_reboot_notifier);
-	if (ret != 0) {
-		dev_warn(xpc_part, "can't register reboot notifier\n");
-	}
-
-	/* add ourselves to the die_notifier list (i.e., ia64die_chain) */
-	ret = register_die_notifier(&xpc_die_notifier);
-	if (ret != 0) {
-		dev_warn(xpc_part, "can't register die notifier\n");
-	}
-
-
-	/*
-	 * Set the beating to other partitions into motion.  This is
-	 * the last requirement for other partitions' discovery to
-	 * initiate communications with us.
-	 */
-	init_timer(&xpc_hb_timer);
-	xpc_hb_timer.function = xpc_hb_beater;
-	xpc_hb_beater(0);
-
-
-	/*
-	 * The real work-horse behind xpc.  This processes incoming
-	 * interrupts and monitors remote heartbeats.
-	 */
-	pid = kernel_thread(xpc_hb_checker, NULL, 0);
-	if (pid < 0) {
-		dev_err(xpc_part, "failed while forking hb check thread\n");
-
-		/* indicate to others that our reserved page is uninitialized */
-		xpc_rsvd_page->vars_pa = 0;
-
-		/* take ourselves off of the reboot_notifier_list */
-		(void) unregister_reboot_notifier(&xpc_reboot_notifier);
-
-		/* take ourselves off of the die_notifier list */
-		(void) unregister_die_notifier(&xpc_die_notifier);
-
-		del_timer_sync(&xpc_hb_timer);
-		free_irq(SGI_XPC_ACTIVATE, NULL);
-		xpc_restrict_IPI_ops();
-
-		if (xpc_sysctl) {
-			unregister_sysctl_table(xpc_sysctl);
-		}
-		return -EBUSY;
-	}
-
-
-	/*
-	 * Startup a thread that will attempt to discover other partitions to
-	 * activate based on info provided by SAL. This new thread is short
-	 * lived and will exit once discovery is complete.
-	 */
-	pid = kernel_thread(xpc_initiate_discovery, NULL, 0);
-	if (pid < 0) {
-		dev_err(xpc_part, "failed while forking discovery thread\n");
-
-		/* mark this new thread as a non-starter */
-		complete(&xpc_discovery_exited);
-
-		xpc_do_exit(xpcUnloading);
-		return -EBUSY;
-	}
-
-
-	/* set the interface to point at XPC's functions */
-	xpc_set_interface(xpc_initiate_connect, xpc_initiate_disconnect,
-			  xpc_initiate_allocate, xpc_initiate_send,
-			  xpc_initiate_send_notify, xpc_initiate_received,
-			  xpc_initiate_partid_to_nasids);
-
-	return 0;
-}
-module_init(xpc_init);
-
-
-void __exit
-xpc_exit(void)
-{
-	xpc_do_exit(xpcUnloading);
-}
-module_exit(xpc_exit);
-
-
-MODULE_AUTHOR("Silicon Graphics, Inc.");
-MODULE_DESCRIPTION("Cross Partition Communication (XPC) support");
-MODULE_LICENSE("GPL");
-
-module_param(xpc_hb_interval, int, 0);
-MODULE_PARM_DESC(xpc_hb_interval, "Number of seconds between "
-		"heartbeat increments.");
-
-module_param(xpc_hb_check_interval, int, 0);
-MODULE_PARM_DESC(xpc_hb_check_interval, "Number of seconds between "
-		"heartbeat checks.");
-
-module_param(xpc_disengage_request_timelimit, int, 0);
-MODULE_PARM_DESC(xpc_disengage_request_timelimit, "Number of seconds to wait "
-		"for disengage request to complete.");
-
-module_param(xpc_kdebug_ignore, int, 0);
-MODULE_PARM_DESC(xpc_kdebug_ignore, "Should lack of heartbeat be ignored by "
-		"other partitions when dropping into kdebug.");
-
diff --git a/arch/ia64/sn/kernel/xpc_partition.c b/arch/ia64/sn/kernel/xpc_partition.c
deleted file mode 100644
index 2a89cfce495..00000000000
--- a/arch/ia64/sn/kernel/xpc_partition.c
+++ /dev/null
@@ -1,1247 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (c) 2004-2006 Silicon Graphics, Inc.  All Rights Reserved.
- */
-
-
-/*
- * Cross Partition Communication (XPC) partition support.
- *
- *	This is the part of XPC that detects the presence/absence of
- *	other partitions. It provides a heartbeat and monitors the
- *	heartbeats of other partitions.
- *
- */
-
-
-#include <linux/kernel.h>
-#include <linux/sysctl.h>
-#include <linux/cache.h>
-#include <linux/mmzone.h>
-#include <linux/nodemask.h>
-#include <asm/uncached.h>
-#include <asm/sn/bte.h>
-#include <asm/sn/intr.h>
-#include <asm/sn/sn_sal.h>
-#include <asm/sn/nodepda.h>
-#include <asm/sn/addrs.h>
-#include <asm/sn/xpc.h>
-
-
-/* XPC is exiting flag */
-int xpc_exiting;
-
-
-/* SH_IPI_ACCESS shub register value on startup */
-static u64 xpc_sh1_IPI_access;
-static u64 xpc_sh2_IPI_access0;
-static u64 xpc_sh2_IPI_access1;
-static u64 xpc_sh2_IPI_access2;
-static u64 xpc_sh2_IPI_access3;
-
-
-/* original protection values for each node */
-u64 xpc_prot_vec[MAX_NUMNODES];
-
-
-/* this partition's reserved page pointers */
-struct xpc_rsvd_page *xpc_rsvd_page;
-static u64 *xpc_part_nasids;
-static u64 *xpc_mach_nasids;
-struct xpc_vars *xpc_vars;
-struct xpc_vars_part *xpc_vars_part;
-
-static int xp_nasid_mask_bytes;	/* actual size in bytes of nasid mask */
-static int xp_nasid_mask_words;	/* actual size in words of nasid mask */
-
-
-/*
- * For performance reasons, each entry of xpc_partitions[] is cacheline
- * aligned. And xpc_partitions[] is padded with an additional entry at the
- * end so that the last legitimate entry doesn't share its cacheline with
- * another variable.
- */
-struct xpc_partition xpc_partitions[XP_MAX_PARTITIONS + 1];
-
-
-/*
- * Generic buffer used to store a local copy of portions of a remote
- * partition's reserved page (either its header and part_nasids mask,
- * or its vars).
- *
- * xpc_discovery runs only once and is a seperate thread that is
- * very likely going to be processing in parallel with receiving
- * interrupts.
- */
-char ____cacheline_aligned xpc_remote_copy_buffer[XPC_RP_HEADER_SIZE +
-							XP_NASID_MASK_BYTES];
-
-
-/*
- * Guarantee that the kmalloc'd memory is cacheline aligned.
- */
-static void *
-xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
-{
-	/* see if kmalloc will give us cachline aligned memory by default */
-	*base = kmalloc(size, flags);
-	if (*base == NULL) {
-		return NULL;
-	}
-	if ((u64) *base == L1_CACHE_ALIGN((u64) *base)) {
-		return *base;
-	}
-	kfree(*base);
-
-	/* nope, we'll have to do it ourselves */
-	*base = kmalloc(size + L1_CACHE_BYTES, flags);
-	if (*base == NULL) {
-		return NULL;
-	}
-	return (void *) L1_CACHE_ALIGN((u64) *base);
-}
-
-
-/*
- * Given a nasid, get the physical address of the  partition's reserved page
- * for that nasid. This function returns 0 on any error.
- */
-static u64
-xpc_get_rsvd_page_pa(int nasid)
-{
-	bte_result_t bte_res;
-	s64 status;
-	u64 cookie = 0;
-	u64 rp_pa = nasid;	/* seed with nasid */
-	u64 len = 0;
-	u64 buf = buf;
-	u64 buf_len = 0;
-	void *buf_base = NULL;
-
-
-	while (1) {
-
-		status = sn_partition_reserved_page_pa(buf, &cookie, &rp_pa,
-								&len);
-
-		dev_dbg(xpc_part, "SAL returned with status=%li, cookie="
-			"0x%016lx, address=0x%016lx, len=0x%016lx\n",
-			status, cookie, rp_pa, len);
-
-		if (status != SALRET_MORE_PASSES) {
-			break;
-		}
-
-		if (L1_CACHE_ALIGN(len) > buf_len) {
-			kfree(buf_base);
-			buf_len = L1_CACHE_ALIGN(len);
-			buf = (u64) xpc_kmalloc_cacheline_aligned(buf_len,
-							GFP_KERNEL, &buf_base);
-			if (buf_base == NULL) {
-				dev_err(xpc_part, "unable to kmalloc "
-					"len=0x%016lx\n", buf_len);
-				status = SALRET_ERROR;
-				break;
-			}
-		}
-
-		bte_res = xp_bte_copy(rp_pa, ia64_tpa(buf), buf_len,
-					(BTE_NOTIFY | BTE_WACQUIRE), NULL);
-		if (bte_res != BTE_SUCCESS) {
-			dev_dbg(xpc_part, "xp_bte_copy failed %i\n", bte_res);
-			status = SALRET_ERROR;
-			break;
-		}
-	}
-
-	kfree(buf_base);
-
-	if (status != SALRET_OK) {
-		rp_pa = 0;
-	}
-	dev_dbg(xpc_part, "reserved page at phys address 0x%016lx\n", rp_pa);
-	return rp_pa;
-}
-
-
-/*
- * Fill the partition reserved page with the information needed by
- * other partitions to discover we are alive and establish initial
- * communications.
- */
-struct xpc_rsvd_page *
-xpc_rsvd_page_init(void)
-{
-	struct xpc_rsvd_page *rp;
-	AMO_t *amos_page;
-	u64 rp_pa, nasid_array = 0;
-	int i, ret;
-
-
-	/* get the local reserved page's address */
-
-	preempt_disable();
-	rp_pa = xpc_get_rsvd_page_pa(cpuid_to_nasid(smp_processor_id()));
-	preempt_enable();
-	if (rp_pa == 0) {
-		dev_err(xpc_part, "SAL failed to locate the reserved page\n");
-		return NULL;
-	}
-	rp = (struct xpc_rsvd_page *) __va(rp_pa);
-
-	if (rp->partid != sn_partition_id) {
-		dev_err(xpc_part, "the reserved page's partid of %d should be "
-			"%d\n", rp->partid, sn_partition_id);
-		return NULL;
-	}
-
-	rp->version = XPC_RP_VERSION;
-
-	/* establish the actual sizes of the nasid masks */
-	if (rp->SAL_version == 1) {
-		/* SAL_version 1 didn't set the nasids_size field */
-		rp->nasids_size = 128;
-	}
-	xp_nasid_mask_bytes = rp->nasids_size;
-	xp_nasid_mask_words = xp_nasid_mask_bytes / 8;
-
-	/* setup the pointers to the various items in the reserved page */
-	xpc_part_nasids = XPC_RP_PART_NASIDS(rp);
-	xpc_mach_nasids = XPC_RP_MACH_NASIDS(rp);
-	xpc_vars = XPC_RP_VARS(rp);
-	xpc_vars_part = XPC_RP_VARS_PART(rp);
-
-	/*
-	 * Before clearing xpc_vars, see if a page of AMOs had been previously
-	 * allocated. If not we'll need to allocate one and set permissions
-	 * so that cross-partition AMOs are allowed.
-	 *
-	 * The allocated AMO page needs MCA reporting to remain disabled after
-	 * XPC has unloaded.  To make this work, we keep a copy of the pointer
-	 * to this page (i.e., amos_page) in the struct xpc_vars structure,
-	 * which is pointed to by the reserved page, and re-use that saved copy
-	 * on subsequent loads of XPC. This AMO page is never freed, and its
-	 * memory protections are never restricted.
-	 */
-	if ((amos_page = xpc_vars->amos_page) == NULL) {
-		amos_page = (AMO_t *) TO_AMO(uncached_alloc_page(0));
-		if (amos_page == NULL) {
-			dev_err(xpc_part, "can't allocate page of AMOs\n");
-			return NULL;
-		}
-
-		/*
-		 * Open up AMO-R/W to cpu.  This is done for Shub 1.1 systems
-		 * when xpc_allow_IPI_ops() is called via xpc_hb_init().
-		 */
-		if (!enable_shub_wars_1_1()) {
-			ret = sn_change_memprotect(ia64_tpa((u64) amos_page),
-					PAGE_SIZE, SN_MEMPROT_ACCESS_CLASS_1,
-					&nasid_array);
-			if (ret != 0) {
-				dev_err(xpc_part, "can't change memory "
-					"protections\n");
-				uncached_free_page(__IA64_UNCACHED_OFFSET |
-						   TO_PHYS((u64) amos_page));
-				return NULL;
-			}
-		}
-	} else if (!IS_AMO_ADDRESS((u64) amos_page)) {
-		/*
-		 * EFI's XPBOOT can also set amos_page in the reserved page,
-		 * but it happens to leave it as an uncached physical address
-		 * and we need it to be an uncached virtual, so we'll have to
-		 * convert it.
-		 */
-		if (!IS_AMO_PHYS_ADDRESS((u64) amos_page)) {
-			dev_err(xpc_part, "previously used amos_page address "
-				"is bad = 0x%p\n", (void *) amos_page);
-			return NULL;
-		}
-		amos_page = (AMO_t *) TO_AMO((u64) amos_page);
-	}
-
-	/* clear xpc_vars */
-	memset(xpc_vars, 0, sizeof(struct xpc_vars));
-
-	xpc_vars->version = XPC_V_VERSION;
-	xpc_vars->act_nasid = cpuid_to_nasid(0);
-	xpc_vars->act_phys_cpuid = cpu_physical_id(0);
-	xpc_vars->vars_part_pa = __pa(xpc_vars_part);
-	xpc_vars->amos_page_pa = ia64_tpa((u64) amos_page);
-	xpc_vars->amos_page = amos_page;  /* save for next load of XPC */
-
-
-	/* clear xpc_vars_part */
-	memset((u64 *) xpc_vars_part, 0, sizeof(struct xpc_vars_part) *
-							XP_MAX_PARTITIONS);
-
-	/* initialize the activate IRQ related AMO variables */
-	for (i = 0; i < xp_nasid_mask_words; i++) {
-		(void) xpc_IPI_init(XPC_ACTIVATE_IRQ_AMOS + i);
-	}
-
-	/* initialize the engaged remote partitions related AMO variables */
-	(void) xpc_IPI_init(XPC_ENGAGED_PARTITIONS_AMO);
-	(void) xpc_IPI_init(XPC_DISENGAGE_REQUEST_AMO);
-
-	/* timestamp of when reserved page was setup by XPC */
-	rp->stamp = CURRENT_TIME;
-
-	/*
-	 * This signifies to the remote partition that our reserved
-	 * page is initialized.
-	 */
-	rp->vars_pa = __pa(xpc_vars);
-
-	return rp;
-}
-
-
-/*
- * Change protections to allow IPI operations (and AMO operations on
- * Shub 1.1 systems).
- */
-void
-xpc_allow_IPI_ops(void)
-{
-	int node;
-	int nasid;
-
-
-	// >>> Change SH_IPI_ACCESS code to use SAL call once it is available.
-
-	if (is_shub2()) {
-		xpc_sh2_IPI_access0 =
-			(u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS0));
-		xpc_sh2_IPI_access1 =
-			(u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS1));
-		xpc_sh2_IPI_access2 =
-			(u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS2));
-		xpc_sh2_IPI_access3 =
-			(u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS3));
-
-		for_each_online_node(node) {
-			nasid = cnodeid_to_nasid(node);
-			HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0),
-								-1UL);
-			HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1),
-								-1UL);
-			HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2),
-								-1UL);
-			HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3),
-								-1UL);
-		}
-
-	} else {
-		xpc_sh1_IPI_access =
-			(u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH1_IPI_ACCESS));
-
-		for_each_online_node(node) {
-			nasid = cnodeid_to_nasid(node);
-			HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS),
-								-1UL);
-
-			/*
-			 * Since the BIST collides with memory operations on
-			 * SHUB 1.1 sn_change_memprotect() cannot be used.
-			 */
-			if (enable_shub_wars_1_1()) {
-				/* open up everything */
-				xpc_prot_vec[node] = (u64) HUB_L((u64 *)
-						GLOBAL_MMR_ADDR(nasid,
-						SH1_MD_DQLP_MMR_DIR_PRIVEC0));
-				HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid,
-						SH1_MD_DQLP_MMR_DIR_PRIVEC0),
-								-1UL);
-				HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid,
-						SH1_MD_DQRP_MMR_DIR_PRIVEC0),
-								-1UL);
-			}
-		}
-	}
-}
-
-
-/*
- * Restrict protections to disallow IPI operations (and AMO operations on
- * Shub 1.1 systems).
- */
-void
-xpc_restrict_IPI_ops(void)
-{
-	int node;
-	int nasid;
-
-
-	// >>> Change SH_IPI_ACCESS code to use SAL call once it is available.
-
-	if (is_shub2()) {
-
-		for_each_online_node(node) {
-			nasid = cnodeid_to_nasid(node);
-			HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0),
-							xpc_sh2_IPI_access0);
-			HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1),
-							xpc_sh2_IPI_access1);
-			HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2),
-							xpc_sh2_IPI_access2);
-			HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3),
-							xpc_sh2_IPI_access3);
-		}
-
-	} else {
-
-		for_each_online_node(node) {
-			nasid = cnodeid_to_nasid(node);
-			HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS),
-							xpc_sh1_IPI_access);
-
-			if (enable_shub_wars_1_1()) {
-				HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid,
-						SH1_MD_DQLP_MMR_DIR_PRIVEC0),
-							xpc_prot_vec[node]);
-				HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid,
-						SH1_MD_DQRP_MMR_DIR_PRIVEC0),
-							xpc_prot_vec[node]);
-			}
-		}
-	}
-}
-
-
-/*
- * At periodic intervals, scan through all active partitions and ensure
- * their heartbeat is still active.  If not, the partition is deactivated.
- */
-void
-xpc_check_remote_hb(void)
-{
-	struct xpc_vars *remote_vars;
-	struct xpc_partition *part;
-	partid_t partid;
-	bte_result_t bres;
-
-
-	remote_vars = (struct xpc_vars *) xpc_remote_copy_buffer;
-
-	for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
-
-		if (xpc_exiting) {
-			break;
-		}
-
-		if (partid == sn_partition_id) {
-			continue;
-		}
-
-		part = &xpc_partitions[partid];
-
-		if (part->act_state == XPC_P_INACTIVE ||
-				part->act_state == XPC_P_DEACTIVATING) {
-			continue;
-		}
-
-		/* pull the remote_hb cache line */
-		bres = xp_bte_copy(part->remote_vars_pa,
-					ia64_tpa((u64) remote_vars),
-					XPC_RP_VARS_SIZE,
-					(BTE_NOTIFY | BTE_WACQUIRE), NULL);
-		if (bres != BTE_SUCCESS) {
-			XPC_DEACTIVATE_PARTITION(part,
-						xpc_map_bte_errors(bres));
-			continue;
-		}
-
-		dev_dbg(xpc_part, "partid = %d, heartbeat = %ld, last_heartbeat"
-			" = %ld, heartbeat_offline = %ld, HB_mask = 0x%lx\n",
-			partid, remote_vars->heartbeat, part->last_heartbeat,
-			remote_vars->heartbeat_offline,
-			remote_vars->heartbeating_to_mask);
-
-		if (((remote_vars->heartbeat == part->last_heartbeat) &&
-			(remote_vars->heartbeat_offline == 0)) ||
-			     !xpc_hb_allowed(sn_partition_id, remote_vars)) {
-
-			XPC_DEACTIVATE_PARTITION(part, xpcNoHeartbeat);
-			continue;
-		}
-
-		part->last_heartbeat = remote_vars->heartbeat;
-	}
-}
-
-
-/*
- * Get a copy of a portion of the remote partition's rsvd page.
- *
- * remote_rp points to a buffer that is cacheline aligned for BTE copies and
- * is large enough to contain a copy of their reserved page header and
- * part_nasids mask.
- */
-static enum xpc_retval
-xpc_get_remote_rp(int nasid, u64 *discovered_nasids,
-		struct xpc_rsvd_page *remote_rp, u64 *remote_rp_pa)
-{
-	int bres, i;
-
-
-	/* get the reserved page's physical address */
-
-	*remote_rp_pa = xpc_get_rsvd_page_pa(nasid);
-	if (*remote_rp_pa == 0) {
-		return xpcNoRsvdPageAddr;
-	}
-
-
-	/* pull over the reserved page header and part_nasids mask */
-
-	bres = xp_bte_copy(*remote_rp_pa, ia64_tpa((u64) remote_rp),
-				XPC_RP_HEADER_SIZE + xp_nasid_mask_bytes,
-				(BTE_NOTIFY | BTE_WACQUIRE), NULL);
-	if (bres != BTE_SUCCESS) {
-		return xpc_map_bte_errors(bres);
-	}
-
-
-	if (discovered_nasids != NULL) {
-		u64 *remote_part_nasids = XPC_RP_PART_NASIDS(remote_rp);
-
-
-		for (i = 0; i < xp_nasid_mask_words; i++) {
-			discovered_nasids[i] |= remote_part_nasids[i];
-		}
-	}
-
-
-	/* check that the partid is for another partition */
-
-	if (remote_rp->partid < 1 ||
-				remote_rp->partid > (XP_MAX_PARTITIONS - 1)) {
-		return xpcInvalidPartid;
-	}
-
-	if (remote_rp->partid == sn_partition_id) {
-		return xpcLocalPartid;
-	}
-
-
-	if (XPC_VERSION_MAJOR(remote_rp->version) !=
-					XPC_VERSION_MAJOR(XPC_RP_VERSION)) {
-		return xpcBadVersion;
-	}
-
-	return xpcSuccess;
-}
-
-
-/*
- * Get a copy of the remote partition's XPC variables from the reserved page.
- *
- * remote_vars points to a buffer that is cacheline aligned for BTE copies and
- * assumed to be of size XPC_RP_VARS_SIZE.
- */
-static enum xpc_retval
-xpc_get_remote_vars(u64 remote_vars_pa, struct xpc_vars *remote_vars)
-{
-	int bres;
-
-
-	if (remote_vars_pa == 0) {
-		return xpcVarsNotSet;
-	}
-
-
-	/* pull over the cross partition variables */
-
-	bres = xp_bte_copy(remote_vars_pa, ia64_tpa((u64) remote_vars),
-				XPC_RP_VARS_SIZE,
-				(BTE_NOTIFY | BTE_WACQUIRE), NULL);
-	if (bres != BTE_SUCCESS) {
-		return xpc_map_bte_errors(bres);
-	}
-
-	if (XPC_VERSION_MAJOR(remote_vars->version) !=
-					XPC_VERSION_MAJOR(XPC_V_VERSION)) {
-		return xpcBadVersion;
-	}
-
-	return xpcSuccess;
-}
-
-
-/*
- * Update the remote partition's info.
- */
-static void
-xpc_update_partition_info(struct xpc_partition *part, u8 remote_rp_version,
-		struct timespec *remote_rp_stamp, u64 remote_rp_pa,
-		u64 remote_vars_pa, struct xpc_vars *remote_vars)
-{
-	part->remote_rp_version = remote_rp_version;
-	dev_dbg(xpc_part, "  remote_rp_version = 0x%016lx\n",
-		part->remote_rp_version);
-
-	part->remote_rp_stamp = *remote_rp_stamp;
-	dev_dbg(xpc_part, "  remote_rp_stamp (tv_sec = 0x%lx tv_nsec = 0x%lx\n",
-		part->remote_rp_stamp.tv_sec, part->remote_rp_stamp.tv_nsec);
-
-	part->remote_rp_pa = remote_rp_pa;
-	dev_dbg(xpc_part, "  remote_rp_pa = 0x%016lx\n", part->remote_rp_pa);
-
-	part->remote_vars_pa = remote_vars_pa;
-	dev_dbg(xpc_part, "  remote_vars_pa = 0x%016lx\n",
-		part->remote_vars_pa);
-
-	part->last_heartbeat = remote_vars->heartbeat;
-	dev_dbg(xpc_part, "  last_heartbeat = 0x%016lx\n",
-		part->last_heartbeat);
-
-	part->remote_vars_part_pa = remote_vars->vars_part_pa;
-	dev_dbg(xpc_part, "  remote_vars_part_pa = 0x%016lx\n",
-		part->remote_vars_part_pa);
-
-	part->remote_act_nasid = remote_vars->act_nasid;
-	dev_dbg(xpc_part, "  remote_act_nasid = 0x%x\n",
-		part->remote_act_nasid);
-
-	part->remote_act_phys_cpuid = remote_vars->act_phys_cpuid;
-	dev_dbg(xpc_part, "  remote_act_phys_cpuid = 0x%x\n",
-		part->remote_act_phys_cpuid);
-
-	part->remote_amos_page_pa = remote_vars->amos_page_pa;
-	dev_dbg(xpc_part, "  remote_amos_page_pa = 0x%lx\n",
-		part->remote_amos_page_pa);
-
-	part->remote_vars_version = remote_vars->version;
-	dev_dbg(xpc_part, "  remote_vars_version = 0x%x\n",
-		part->remote_vars_version);
-}
-
-
-/*
- * Prior code has determined the nasid which generated an IPI.  Inspect
- * that nasid to determine if its partition needs to be activated or
- * deactivated.
- *
- * A partition is consider "awaiting activation" if our partition
- * flags indicate it is not active and it has a heartbeat.  A
- * partition is considered "awaiting deactivation" if our partition
- * flags indicate it is active but it has no heartbeat or it is not
- * sending its heartbeat to us.
- *
- * To determine the heartbeat, the remote nasid must have a properly
- * initialized reserved page.
- */
-static void
-xpc_identify_act_IRQ_req(int nasid)
-{
-	struct xpc_rsvd_page *remote_rp;
-	struct xpc_vars *remote_vars;
-	u64 remote_rp_pa;
-	u64 remote_vars_pa;
-	int remote_rp_version;
-	int reactivate = 0;
-	int stamp_diff;
-	struct timespec remote_rp_stamp = { 0, 0 };
-	partid_t partid;
-	struct xpc_partition *part;
-	enum xpc_retval ret;
-
-
-	/* pull over the reserved page structure */
-
-	remote_rp = (struct xpc_rsvd_page *) xpc_remote_copy_buffer;
-
-	ret = xpc_get_remote_rp(nasid, NULL, remote_rp, &remote_rp_pa);
-	if (ret != xpcSuccess) {
-		dev_warn(xpc_part, "unable to get reserved page from nasid %d, "
-			"which sent interrupt, reason=%d\n", nasid, ret);
-		return;
-	}
-
-	remote_vars_pa = remote_rp->vars_pa;
-	remote_rp_version = remote_rp->version;
-	if (XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
-		remote_rp_stamp = remote_rp->stamp;
-	}
-	partid = remote_rp->partid;
-	part = &xpc_partitions[partid];
-
-
-	/* pull over the cross partition variables */
-
-	remote_vars = (struct xpc_vars *) xpc_remote_copy_buffer;
-
-	ret = xpc_get_remote_vars(remote_vars_pa, remote_vars);
-	if (ret != xpcSuccess) {
-
-		dev_warn(xpc_part, "unable to get XPC variables from nasid %d, "
-			"which sent interrupt, reason=%d\n", nasid, ret);
-
-		XPC_DEACTIVATE_PARTITION(part, ret);
-		return;
-	}
-
-
-	part->act_IRQ_rcvd++;
-
-	dev_dbg(xpc_part, "partid for nasid %d is %d; IRQs = %d; HB = "
-		"%ld:0x%lx\n", (int) nasid, (int) partid, part->act_IRQ_rcvd,
-		remote_vars->heartbeat, remote_vars->heartbeating_to_mask);
-
-	if (xpc_partition_disengaged(part) &&
-					part->act_state == XPC_P_INACTIVE) {
-
-		xpc_update_partition_info(part, remote_rp_version,
-					&remote_rp_stamp, remote_rp_pa,
-					remote_vars_pa, remote_vars);
-
-		if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
-			if (xpc_partition_disengage_requested(1UL << partid)) {
-				/*
-				 * Other side is waiting on us to disengage,
-				 * even though we already have.
-				 */
-				return;
-			}
-		} else {
-			/* other side doesn't support disengage requests */
-			xpc_clear_partition_disengage_request(1UL << partid);
-		}
-
-		xpc_activate_partition(part);
-		return;
-	}
-
-	DBUG_ON(part->remote_rp_version == 0);
-	DBUG_ON(part->remote_vars_version == 0);
-
-	if (!XPC_SUPPORTS_RP_STAMP(part->remote_rp_version)) {
-		DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(part->
-							remote_vars_version));
-
-		if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
-			DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->
-								version));
-			/* see if the other side rebooted */
-			if (part->remote_amos_page_pa ==
-				remote_vars->amos_page_pa &&
-					xpc_hb_allowed(sn_partition_id,
-								remote_vars)) {
-				/* doesn't look that way, so ignore the IPI */
-				return;
-			}
-		}
-
-		/*
-		 * Other side rebooted and previous XPC didn't support the
-		 * disengage request, so we don't need to do anything special.
-		 */
-
-		xpc_update_partition_info(part, remote_rp_version,
-						&remote_rp_stamp, remote_rp_pa,
-						remote_vars_pa, remote_vars);
-		part->reactivate_nasid = nasid;
-		XPC_DEACTIVATE_PARTITION(part, xpcReactivating);
-		return;
-	}
-
-	DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version));
-
-	if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
-		DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version));
-
-		/*
-		 * Other side rebooted and previous XPC did support the
-		 * disengage request, but the new one doesn't.
-		 */
-
-		xpc_clear_partition_engaged(1UL << partid);
-		xpc_clear_partition_disengage_request(1UL << partid);
-
-		xpc_update_partition_info(part, remote_rp_version,
-						&remote_rp_stamp, remote_rp_pa,
-						remote_vars_pa, remote_vars);
-		reactivate = 1;
-
-	} else {
-		DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version));
-
-		stamp_diff = xpc_compare_stamps(&part->remote_rp_stamp,
-							&remote_rp_stamp);
-		if (stamp_diff != 0) {
-			DBUG_ON(stamp_diff >= 0);
-
-			/*
-			 * Other side rebooted and the previous XPC did support
-			 * the disengage request, as does the new one.
-			 */
-
-			DBUG_ON(xpc_partition_engaged(1UL << partid));
-			DBUG_ON(xpc_partition_disengage_requested(1UL <<
-								partid));
-
-			xpc_update_partition_info(part, remote_rp_version,
-						&remote_rp_stamp, remote_rp_pa,
-						remote_vars_pa, remote_vars);
-			reactivate = 1;
-		}
-	}
-
-	if (part->disengage_request_timeout > 0 &&
-					!xpc_partition_disengaged(part)) {
-		/* still waiting on other side to disengage from us */
-		return;
-	}
-
-	if (reactivate) {
-		part->reactivate_nasid = nasid;
-		XPC_DEACTIVATE_PARTITION(part, xpcReactivating);
-
-	} else if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version) &&
-			xpc_partition_disengage_requested(1UL << partid)) {
-		XPC_DEACTIVATE_PARTITION(part, xpcOtherGoingDown);
-	}
-}
-
-
-/*
- * Loop through the activation AMO variables and process any bits
- * which are set.  Each bit indicates a nasid sending a partition
- * activation or deactivation request.
- *
- * Return #of IRQs detected.
- */
-int
-xpc_identify_act_IRQ_sender(void)
-{
-	int word, bit;
-	u64 nasid_mask;
-	u64 nasid;			/* remote nasid */
-	int n_IRQs_detected = 0;
-	AMO_t *act_amos;
-
-
-	act_amos = xpc_vars->amos_page + XPC_ACTIVATE_IRQ_AMOS;
-
-
-	/* scan through act AMO variable looking for non-zero entries */
-	for (word = 0; word < xp_nasid_mask_words; word++) {
-
-		if (xpc_exiting) {
-			break;
-		}
-
-		nasid_mask = xpc_IPI_receive(&act_amos[word]);
-		if (nasid_mask == 0) {
-			/* no IRQs from nasids in this variable */
-			continue;
-		}
-
-		dev_dbg(xpc_part, "AMO[%d] gave back 0x%lx\n", word,
-			nasid_mask);
-
-
-		/*
-		 * If this nasid has been added to the machine since
-		 * our partition was reset, this will retain the
-		 * remote nasid in our reserved pages machine mask.
-		 * This is used in the event of module reload.
-		 */
-		xpc_mach_nasids[word] |= nasid_mask;
-
-
-		/* locate the nasid(s) which sent interrupts */
-
-		for (bit = 0; bit < (8 * sizeof(u64)); bit++) {
-			if (nasid_mask & (1UL << bit)) {
-				n_IRQs_detected++;
-				nasid = XPC_NASID_FROM_W_B(word, bit);
-				dev_dbg(xpc_part, "interrupt from nasid %ld\n",
-					nasid);
-				xpc_identify_act_IRQ_req(nasid);
-			}
-		}
-	}
-	return n_IRQs_detected;
-}
-
-
-/*
- * See if the other side has responded to a partition disengage request
- * from us.
- */
-int
-xpc_partition_disengaged(struct xpc_partition *part)
-{
-	partid_t partid = XPC_PARTID(part);
-	int disengaged;
-
-
-	disengaged = (xpc_partition_engaged(1UL << partid) == 0);
-	if (part->disengage_request_timeout) {
-		if (!disengaged) {
-			if (jiffies < part->disengage_request_timeout) {
-				/* timelimit hasn't been reached yet */
-				return 0;
-			}
-
-			/*
-			 * Other side hasn't responded to our disengage
-			 * request in a timely fashion, so assume it's dead.
-			 */
-
-			dev_info(xpc_part, "disengage from remote partition %d "
-				"timed out\n", partid);
-			xpc_disengage_request_timedout = 1;
-			xpc_clear_partition_engaged(1UL << partid);
-			disengaged = 1;
-		}
-		part->disengage_request_timeout = 0;
-
-		/* cancel the timer function, provided it's not us */
-		if (!in_interrupt()) {
-			del_singleshot_timer_sync(&part->
-						      disengage_request_timer);
-		}
-
-		DBUG_ON(part->act_state != XPC_P_DEACTIVATING &&
-					part->act_state != XPC_P_INACTIVE);
-		if (part->act_state != XPC_P_INACTIVE) {
-			xpc_wakeup_channel_mgr(part);
-		}
-
-		if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
-			xpc_cancel_partition_disengage_request(part);
-		}
-	}
-	return disengaged;
-}
-
-
-/*
- * Mark specified partition as active.
- */
-enum xpc_retval
-xpc_mark_partition_active(struct xpc_partition *part)
-{
-	unsigned long irq_flags;
-	enum xpc_retval ret;
-
-
-	dev_dbg(xpc_part, "setting partition %d to ACTIVE\n", XPC_PARTID(part));
-
-	spin_lock_irqsave(&part->act_lock, irq_flags);
-	if (part->act_state == XPC_P_ACTIVATING) {
-		part->act_state = XPC_P_ACTIVE;
-		ret = xpcSuccess;
-	} else {
-		DBUG_ON(part->reason == xpcSuccess);
-		ret = part->reason;
-	}
-	spin_unlock_irqrestore(&part->act_lock, irq_flags);
-
-	return ret;
-}
-
-
-/*
- * Notify XPC that the partition is down.
- */
-void
-xpc_deactivate_partition(const int line, struct xpc_partition *part,
-				enum xpc_retval reason)
-{
-	unsigned long irq_flags;
-
-
-	spin_lock_irqsave(&part->act_lock, irq_flags);
-
-	if (part->act_state == XPC_P_INACTIVE) {
-		XPC_SET_REASON(part, reason, line);
-		spin_unlock_irqrestore(&part->act_lock, irq_flags);
-		if (reason == xpcReactivating) {
-			/* we interrupt ourselves to reactivate partition */
-			xpc_IPI_send_reactivate(part);
-		}
-		return;
-	}
-	if (part->act_state == XPC_P_DEACTIVATING) {
-		if ((part->reason == xpcUnloading && reason != xpcUnloading) ||
-					reason == xpcReactivating) {
-			XPC_SET_REASON(part, reason, line);
-		}
-		spin_unlock_irqrestore(&part->act_lock, irq_flags);
-		return;
-	}
-
-	part->act_state = XPC_P_DEACTIVATING;
-	XPC_SET_REASON(part, reason, line);
-
-	spin_unlock_irqrestore(&part->act_lock, irq_flags);
-
-	if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
-		xpc_request_partition_disengage(part);
-		xpc_IPI_send_disengage(part);
-
-		/* set a timelimit on the disengage request */
-		part->disengage_request_timeout = jiffies +
-					(xpc_disengage_request_timelimit * HZ);
-		part->disengage_request_timer.expires =
-					part->disengage_request_timeout;
-		add_timer(&part->disengage_request_timer);
-	}
-
-	dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n",
-		XPC_PARTID(part), reason);
-
-	xpc_partition_going_down(part, reason);
-}
-
-
-/*
- * Mark specified partition as inactive.
- */
-void
-xpc_mark_partition_inactive(struct xpc_partition *part)
-{
-	unsigned long irq_flags;
-
-
-	dev_dbg(xpc_part, "setting partition %d to INACTIVE\n",
-		XPC_PARTID(part));
-
-	spin_lock_irqsave(&part->act_lock, irq_flags);
-	part->act_state = XPC_P_INACTIVE;
-	spin_unlock_irqrestore(&part->act_lock, irq_flags);
-	part->remote_rp_pa = 0;
-}
-
-
-/*
- * SAL has provided a partition and machine mask.  The partition mask
- * contains a bit for each even nasid in our partition.  The machine
- * mask contains a bit for each even nasid in the entire machine.
- *
- * Using those two bit arrays, we can determine which nasids are
- * known in the machine.  Each should also have a reserved page
- * initialized if they are available for partitioning.
- */
-void
-xpc_discovery(void)
-{
-	void *remote_rp_base;
-	struct xpc_rsvd_page *remote_rp;
-	struct xpc_vars *remote_vars;
-	u64 remote_rp_pa;
-	u64 remote_vars_pa;
-	int region;
-	int region_size;
-	int max_regions;
-	int nasid;
-	struct xpc_rsvd_page *rp;
-	partid_t partid;
-	struct xpc_partition *part;
-	u64 *discovered_nasids;
-	enum xpc_retval ret;
-
-
-	remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RP_HEADER_SIZE +
-						xp_nasid_mask_bytes,
-						GFP_KERNEL, &remote_rp_base);
-	if (remote_rp == NULL) {
-		return;
-	}
-	remote_vars = (struct xpc_vars *) remote_rp;
-
-
-	discovered_nasids = kzalloc(sizeof(u64) * xp_nasid_mask_words,
-							GFP_KERNEL);
-	if (discovered_nasids == NULL) {
-		kfree(remote_rp_base);
-		return;
-	}
-
-	rp = (struct xpc_rsvd_page *) xpc_rsvd_page;
-
-	/*
-	 * The term 'region' in this context refers to the minimum number of
-	 * nodes that can comprise an access protection grouping. The access
-	 * protection is in regards to memory, IOI and IPI.
-	 */
-	max_regions = 64;
-	region_size = sn_region_size;
-
-	switch (region_size) {
-	case 128:
-		max_regions *= 2;
-	case 64:
-		max_regions *= 2;
-	case 32:
-		max_regions *= 2;
-		region_size = 16;
-		DBUG_ON(!is_shub2());
-	}
-
-	for (region = 0; region < max_regions; region++) {
-
-		if ((volatile int) xpc_exiting) {
-			break;
-		}
-
-		dev_dbg(xpc_part, "searching region %d\n", region);
-
-		for (nasid = (region * region_size * 2);
-		     nasid < ((region + 1) * region_size * 2);
-		     nasid += 2) {
-
-			if ((volatile int) xpc_exiting) {
-				break;
-			}
-
-			dev_dbg(xpc_part, "checking nasid %d\n", nasid);
-
-
-			if (XPC_NASID_IN_ARRAY(nasid, xpc_part_nasids)) {
-				dev_dbg(xpc_part, "PROM indicates Nasid %d is "
-					"part of the local partition; skipping "
-					"region\n", nasid);
-				break;
-			}
-
-			if (!(XPC_NASID_IN_ARRAY(nasid, xpc_mach_nasids))) {
-				dev_dbg(xpc_part, "PROM indicates Nasid %d was "
-					"not on Numa-Link network at reset\n",
-					nasid);
-				continue;
-			}
-
-			if (XPC_NASID_IN_ARRAY(nasid, discovered_nasids)) {
-				dev_dbg(xpc_part, "Nasid %d is part of a "
-					"partition which was previously "
-					"discovered\n", nasid);
-				continue;
-			}
-
-
-			/* pull over the reserved page structure */
-
-			ret = xpc_get_remote_rp(nasid, discovered_nasids,
-					      remote_rp, &remote_rp_pa);
-			if (ret != xpcSuccess) {
-				dev_dbg(xpc_part, "unable to get reserved page "
-					"from nasid %d, reason=%d\n", nasid,
-					ret);
-
-				if (ret == xpcLocalPartid) {
-					break;
-				}
-				continue;
-			}
-
-			remote_vars_pa = remote_rp->vars_pa;
-
-			partid = remote_rp->partid;
-			part = &xpc_partitions[partid];
-
-
-			/* pull over the cross partition variables */
-
-			ret = xpc_get_remote_vars(remote_vars_pa, remote_vars);
-			if (ret != xpcSuccess) {
-				dev_dbg(xpc_part, "unable to get XPC variables "
-					"from nasid %d, reason=%d\n", nasid,
-					ret);
-
-				XPC_DEACTIVATE_PARTITION(part, ret);
-				continue;
-			}
-
-			if (part->act_state != XPC_P_INACTIVE) {
-				dev_dbg(xpc_part, "partition %d on nasid %d is "
-					"already activating\n", partid, nasid);
-				break;
-			}
-
-			/*
-			 * Register the remote partition's AMOs with SAL so it
-			 * can handle and cleanup errors within that address
-			 * range should the remote partition go down. We don't
-			 * unregister this range because it is difficult to
-			 * tell when outstanding writes to the remote partition
-			 * are finished and thus when it is thus safe to
-			 * unregister. This should not result in wasted space
-			 * in the SAL xp_addr_region table because we should
-			 * get the same page for remote_act_amos_pa after
-			 * module reloads and system reboots.
-			 */
-			if (sn_register_xp_addr_region(
-					    remote_vars->amos_page_pa,
-							PAGE_SIZE, 1) < 0) {
-				dev_dbg(xpc_part, "partition %d failed to "
-					"register xp_addr region 0x%016lx\n",
-					partid, remote_vars->amos_page_pa);
-
-				XPC_SET_REASON(part, xpcPhysAddrRegFailed,
-						__LINE__);
-				break;
-			}
-
-			/*
-			 * The remote nasid is valid and available.
-			 * Send an interrupt to that nasid to notify
-			 * it that we are ready to begin activation.
-			 */
-			dev_dbg(xpc_part, "sending an interrupt to AMO 0x%lx, "
-				"nasid %d, phys_cpuid 0x%x\n",
-				remote_vars->amos_page_pa,
-				remote_vars->act_nasid,
-				remote_vars->act_phys_cpuid);
-
-			if (XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->
-								version)) {
-				part->remote_amos_page_pa =
-						remote_vars->amos_page_pa;
-				xpc_mark_partition_disengaged(part);
-				xpc_cancel_partition_disengage_request(part);
-			}
-			xpc_IPI_send_activate(remote_vars);
-		}
-	}
-
-	kfree(discovered_nasids);
-	kfree(remote_rp_base);
-}
-
-
-/*
- * Given a partid, get the nasids owned by that partition from the
- * remote partition's reserved page.
- */
-enum xpc_retval
-xpc_initiate_partid_to_nasids(partid_t partid, void *nasid_mask)
-{
-	struct xpc_partition *part;
-	u64 part_nasid_pa;
-	int bte_res;
-
-
-	part = &xpc_partitions[partid];
-	if (part->remote_rp_pa == 0) {
-		return xpcPartitionDown;
-	}
-
-	memset(nasid_mask, 0, XP_NASID_MASK_BYTES);
-
-	part_nasid_pa = (u64) XPC_RP_PART_NASIDS(part->remote_rp_pa);
-
-	bte_res = xp_bte_copy(part_nasid_pa, ia64_tpa((u64) nasid_mask),
-			xp_nasid_mask_bytes, (BTE_NOTIFY | BTE_WACQUIRE), NULL);
-
-	return xpc_map_bte_errors(bte_res);
-}
-
diff --git a/arch/ia64/sn/kernel/xpnet.c b/arch/ia64/sn/kernel/xpnet.c
deleted file mode 100644
index e5c6d3c0a8e..00000000000
--- a/arch/ia64/sn/kernel/xpnet.c
+++ /dev/null
@@ -1,719 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 1999,2001-2005 Silicon Graphics, Inc. All rights reserved.
- */
-
-
-/*
- * Cross Partition Network Interface (XPNET) support
- *
- *	XPNET provides a virtual network layered on top of the Cross
- *	Partition communication layer.
- *
- *	XPNET provides direct point-to-point and broadcast-like support
- *	for an ethernet-like device.  The ethernet broadcast medium is
- *	replaced with a point-to-point message structure which passes
- *	pointers to a DMA-capable block that a remote partition should
- *	retrieve and pass to the upper level networking layer.
- *
- */
-
-
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/init.h>
-#include <linux/ioport.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/delay.h>
-#include <linux/ethtool.h>
-#include <linux/mii.h>
-#include <linux/smp.h>
-#include <linux/string.h>
-#include <asm/sn/bte.h>
-#include <asm/sn/io.h>
-#include <asm/sn/sn_sal.h>
-#include <asm/types.h>
-#include <asm/atomic.h>
-#include <asm/sn/xp.h>
-
-
-/*
- * The message payload transferred by XPC.
- *
- * buf_pa is the physical address where the DMA should pull from.
- *
- * NOTE: for performance reasons, buf_pa should _ALWAYS_ begin on a
- * cacheline boundary.  To accomplish this, we record the number of
- * bytes from the beginning of the first cacheline to the first useful
- * byte of the skb (leadin_ignore) and the number of bytes from the
- * last useful byte of the skb to the end of the last cacheline
- * (tailout_ignore).
- *
- * size is the number of bytes to transfer which includes the skb->len
- * (useful bytes of the senders skb) plus the leadin and tailout
- */
-struct xpnet_message {
-	u16 version;		/* Version for this message */
-	u16 embedded_bytes;	/* #of bytes embedded in XPC message */
-	u32 magic;		/* Special number indicating this is xpnet */
-	u64 buf_pa;		/* phys address of buffer to retrieve */
-	u32 size;		/* #of bytes in buffer */
-	u8 leadin_ignore;	/* #of bytes to ignore at the beginning */
-	u8 tailout_ignore;	/* #of bytes to ignore at the end */
-	unsigned char data;	/* body of small packets */
-};
-
-/*
- * Determine the size of our message, the cacheline aligned size,
- * and then the number of message will request from XPC.
- *
- * XPC expects each message to exist in an individual cacheline.
- */
-#define XPNET_MSG_SIZE		(L1_CACHE_BYTES - XPC_MSG_PAYLOAD_OFFSET)
-#define XPNET_MSG_DATA_MAX	\
-		(XPNET_MSG_SIZE - (u64)(&((struct xpnet_message *)0)->data))
-#define XPNET_MSG_ALIGNED_SIZE	(L1_CACHE_ALIGN(XPNET_MSG_SIZE))
-#define XPNET_MSG_NENTRIES	(PAGE_SIZE / XPNET_MSG_ALIGNED_SIZE)
-
-
-#define XPNET_MAX_KTHREADS	(XPNET_MSG_NENTRIES + 1)
-#define XPNET_MAX_IDLE_KTHREADS	(XPNET_MSG_NENTRIES + 1)
-
-/*
- * Version number of XPNET implementation. XPNET can always talk to versions
- * with same major #, and never talk to versions with a different version.
- */
-#define _XPNET_VERSION(_major, _minor)	(((_major) << 4) | (_minor))
-#define XPNET_VERSION_MAJOR(_v)		((_v) >> 4)
-#define XPNET_VERSION_MINOR(_v)		((_v) & 0xf)
-
-#define	XPNET_VERSION _XPNET_VERSION(1,0)		/* version 1.0 */
-#define	XPNET_VERSION_EMBED _XPNET_VERSION(1,1)		/* version 1.1 */
-#define XPNET_MAGIC	0x88786984 /* "XNET" */
-
-#define XPNET_VALID_MSG(_m)						     \
-   ((XPNET_VERSION_MAJOR(_m->version) == XPNET_VERSION_MAJOR(XPNET_VERSION)) \
-    && (msg->magic == XPNET_MAGIC))
-
-#define XPNET_DEVICE_NAME		"xp0"
-
-
-/*
- * When messages are queued with xpc_send_notify, a kmalloc'd buffer
- * of the following type is passed as a notification cookie.  When the
- * notification function is called, we use the cookie to decide
- * whether all outstanding message sends have completed.  The skb can
- * then be released.
- */
-struct xpnet_pending_msg {
-	struct list_head free_list;
-	struct sk_buff *skb;
-	atomic_t use_count;
-};
-
-/* driver specific structure pointed to by the device structure */
-struct xpnet_dev_private {
-	struct net_device_stats stats;
-};
-
-struct net_device *xpnet_device;
-
-/*
- * When we are notified of other partitions activating, we add them to
- * our bitmask of partitions to which we broadcast.
- */
-static u64 xpnet_broadcast_partitions;
-/* protect above */
-static DEFINE_SPINLOCK(xpnet_broadcast_lock);
-
-/*
- * Since the Block Transfer Engine (BTE) is being used for the transfer
- * and it relies upon cache-line size transfers, we need to reserve at
- * least one cache-line for head and tail alignment.  The BTE is
- * limited to 8MB transfers.
- *
- * Testing has shown that changing MTU to greater than 64KB has no effect
- * on TCP as the two sides negotiate a Max Segment Size that is limited
- * to 64K.  Other protocols May use packets greater than this, but for
- * now, the default is 64KB.
- */
-#define XPNET_MAX_MTU (0x800000UL - L1_CACHE_BYTES)
-/* 32KB has been determined to be the ideal */
-#define XPNET_DEF_MTU (0x8000UL)
-
-
-/*
- * The partition id is encapsulated in the MAC address.  The following
- * define locates the octet the partid is in.
- */
-#define XPNET_PARTID_OCTET	1
-#define XPNET_LICENSE_OCTET	2
-
-
-/*
- * Define the XPNET debug device structure that is to be used with dev_dbg(),
- * dev_err(), dev_warn(), and dev_info().
- */
-struct device_driver xpnet_dbg_name = {
-	.name = "xpnet"
-};
-
-struct device xpnet_dbg_subname = {
-	.bus_id = {0},			/* set to "" */
-	.driver = &xpnet_dbg_name
-};
-
-struct device *xpnet = &xpnet_dbg_subname;
-
-/*
- * Packet was recevied by XPC and forwarded to us.
- */
-static void
-xpnet_receive(partid_t partid, int channel, struct xpnet_message *msg)
-{
-	struct sk_buff *skb;
-	bte_result_t bret;
-	struct xpnet_dev_private *priv =
-		(struct xpnet_dev_private *) xpnet_device->priv;
-
-
-	if (!XPNET_VALID_MSG(msg)) {
-		/*
-		 * Packet with a different XPC version.  Ignore.
-		 */
-		xpc_received(partid, channel, (void *) msg);
-
-		priv->stats.rx_errors++;
-
-		return;
-	}
-	dev_dbg(xpnet, "received 0x%lx, %d, %d, %d\n", msg->buf_pa, msg->size,
-		msg->leadin_ignore, msg->tailout_ignore);
-
-
-	/* reserve an extra cache line */
-	skb = dev_alloc_skb(msg->size + L1_CACHE_BYTES);
-	if (!skb) {
-		dev_err(xpnet, "failed on dev_alloc_skb(%d)\n",
-			msg->size + L1_CACHE_BYTES);
-
-		xpc_received(partid, channel, (void *) msg);
-
-		priv->stats.rx_errors++;
-
-		return;
-	}
-
-	/*
-	 * The allocated skb has some reserved space.
-	 * In order to use bte_copy, we need to get the
-	 * skb->data pointer moved forward.
-	 */
-	skb_reserve(skb, (L1_CACHE_BYTES - ((u64)skb->data &
-					    (L1_CACHE_BYTES - 1)) +
-			  msg->leadin_ignore));
-
-	/*
-	 * Update the tail pointer to indicate data actually
-	 * transferred.
-	 */
-	skb_put(skb, (msg->size - msg->leadin_ignore - msg->tailout_ignore));
-
-	/*
-	 * Move the data over from the the other side.
-	 */
-	if ((XPNET_VERSION_MINOR(msg->version) == 1) &&
-						(msg->embedded_bytes != 0)) {
-		dev_dbg(xpnet, "copying embedded message. memcpy(0x%p, 0x%p, "
-			"%lu)\n", skb->data, &msg->data,
-			(size_t) msg->embedded_bytes);
-
-		memcpy(skb->data, &msg->data, (size_t) msg->embedded_bytes);
-	} else {
-		dev_dbg(xpnet, "transferring buffer to the skb->data area;\n\t"
-			"bte_copy(0x%p, 0x%p, %hu)\n", (void *)msg->buf_pa,
-			(void *)__pa((u64)skb->data & ~(L1_CACHE_BYTES - 1)),
-			msg->size);
-
-		bret = bte_copy(msg->buf_pa,
-				__pa((u64)skb->data & ~(L1_CACHE_BYTES - 1)),
-				msg->size, (BTE_NOTIFY | BTE_WACQUIRE), NULL);
-
-		if (bret != BTE_SUCCESS) {
-			// >>> Need better way of cleaning skb.  Currently skb
-			// >>> appears in_use and we can't just call
-			// >>> dev_kfree_skb.
-			dev_err(xpnet, "bte_copy(0x%p, 0x%p, 0x%hx) returned "
-				"error=0x%x\n", (void *)msg->buf_pa,
-				(void *)__pa((u64)skb->data &
-							~(L1_CACHE_BYTES - 1)),
-				msg->size, bret);
-
-			xpc_received(partid, channel, (void *) msg);
-
-			priv->stats.rx_errors++;
-
-			return;
-		}
-	}
-
-	dev_dbg(xpnet, "<skb->head=0x%p skb->data=0x%p skb->tail=0x%p "
-		"skb->end=0x%p skb->len=%d\n", (void *) skb->head,
-		(void *) skb->data, (void *) skb->tail, (void *) skb->end,
-		skb->len);
-
-	skb->dev = xpnet_device;
-	skb->protocol = eth_type_trans(skb, xpnet_device);
-	skb->ip_summed = CHECKSUM_UNNECESSARY;
-
-	dev_dbg(xpnet, "passing skb to network layer; \n\tskb->head=0x%p "
-		"skb->data=0x%p skb->tail=0x%p skb->end=0x%p skb->len=%d\n",
-		(void *) skb->head, (void *) skb->data, (void *) skb->tail,
-		(void *) skb->end, skb->len);
-
-
-	xpnet_device->last_rx = jiffies;
-	priv->stats.rx_packets++;
-	priv->stats.rx_bytes += skb->len + ETH_HLEN;
-
-	netif_rx_ni(skb);
-	xpc_received(partid, channel, (void *) msg);
-}
-
-
-/*
- * This is the handler which XPC calls during any sort of change in
- * state or message reception on a connection.
- */
-static void
-xpnet_connection_activity(enum xpc_retval reason, partid_t partid, int channel,
-			  void *data, void *key)
-{
-	long bp;
-
-
-	DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS);
-	DBUG_ON(channel != XPC_NET_CHANNEL);
-
-	switch(reason) {
-	case xpcMsgReceived:	/* message received */
-		DBUG_ON(data == NULL);
-
-		xpnet_receive(partid, channel, (struct xpnet_message *) data);
-		break;
-
-	case xpcConnected:	/* connection completed to a partition */
-		spin_lock_bh(&xpnet_broadcast_lock);
-		xpnet_broadcast_partitions |= 1UL << (partid -1 );
-		bp = xpnet_broadcast_partitions;
-		spin_unlock_bh(&xpnet_broadcast_lock);
-
-		netif_carrier_on(xpnet_device);
-
-		dev_dbg(xpnet, "%s connection created to partition %d; "
-			"xpnet_broadcast_partitions=0x%lx\n",
-			xpnet_device->name, partid, bp);
-		break;
-
-	default:
-		spin_lock_bh(&xpnet_broadcast_lock);
-		xpnet_broadcast_partitions &= ~(1UL << (partid -1 ));
-		bp = xpnet_broadcast_partitions;
-		spin_unlock_bh(&xpnet_broadcast_lock);
-
-		if (bp == 0) {
-			netif_carrier_off(xpnet_device);
-		}
-
-		dev_dbg(xpnet, "%s disconnected from partition %d; "
-			"xpnet_broadcast_partitions=0x%lx\n",
-			xpnet_device->name, partid, bp);
-		break;
-
-	}
-}
-
-
-static int
-xpnet_dev_open(struct net_device *dev)
-{
-	enum xpc_retval ret;
-
-
-	dev_dbg(xpnet, "calling xpc_connect(%d, 0x%p, NULL, %ld, %ld, %d, "
-		"%d)\n", XPC_NET_CHANNEL, xpnet_connection_activity,
-		XPNET_MSG_SIZE, XPNET_MSG_NENTRIES, XPNET_MAX_KTHREADS,
-		XPNET_MAX_IDLE_KTHREADS);
-
-	ret = xpc_connect(XPC_NET_CHANNEL, xpnet_connection_activity, NULL,
-			  XPNET_MSG_SIZE, XPNET_MSG_NENTRIES,
-			  XPNET_MAX_KTHREADS, XPNET_MAX_IDLE_KTHREADS);
-	if (ret != xpcSuccess) {
-		dev_err(xpnet, "ifconfig up of %s failed on XPC connect, "
-			"ret=%d\n", dev->name, ret);
-
-		return -ENOMEM;
-	}
-
-	dev_dbg(xpnet, "ifconfig up of %s; XPC connected\n", dev->name);
-
-	return 0;
-}
-
-
-static int
-xpnet_dev_stop(struct net_device *dev)
-{
-	xpc_disconnect(XPC_NET_CHANNEL);
-
-	dev_dbg(xpnet, "ifconfig down of %s; XPC disconnected\n", dev->name);
-
-	return 0;
-}
-
-
-static int
-xpnet_dev_change_mtu(struct net_device *dev, int new_mtu)
-{
-	/* 68 comes from min TCP+IP+MAC header */
-	if ((new_mtu < 68) || (new_mtu > XPNET_MAX_MTU)) {
-		dev_err(xpnet, "ifconfig %s mtu %d failed; value must be "
-			"between 68 and %ld\n", dev->name, new_mtu,
-			XPNET_MAX_MTU);
-		return -EINVAL;
-	}
-
-	dev->mtu = new_mtu;
-	dev_dbg(xpnet, "ifconfig %s mtu set to %d\n", dev->name, new_mtu);
-	return 0;
-}
-
-
-/*
- * Required for the net_device structure.
- */
-static int
-xpnet_dev_set_config(struct net_device *dev, struct ifmap *new_map)
-{
-	return 0;
-}
-
-
-/*
- * Return statistics to the caller.
- */
-static struct net_device_stats *
-xpnet_dev_get_stats(struct net_device *dev)
-{
-	struct xpnet_dev_private *priv;
-
-
-	priv = (struct xpnet_dev_private *) dev->priv;
-
-	return &priv->stats;
-}
-
-
-/*
- * Notification that the other end has received the message and
- * DMA'd the skb information.  At this point, they are done with
- * our side.  When all recipients are done processing, we
- * release the skb and then release our pending message structure.
- */
-static void
-xpnet_send_completed(enum xpc_retval reason, partid_t partid, int channel,
-			void *__qm)
-{
-	struct xpnet_pending_msg *queued_msg =
-		(struct xpnet_pending_msg *) __qm;
-
-
-	DBUG_ON(queued_msg == NULL);
-
-	dev_dbg(xpnet, "message to %d notified with reason %d\n",
-		partid, reason);
-
-	if (atomic_dec_return(&queued_msg->use_count) == 0) {
-		dev_dbg(xpnet, "all acks for skb->head=-x%p\n",
-			(void *) queued_msg->skb->head);
-
-		dev_kfree_skb_any(queued_msg->skb);
-		kfree(queued_msg);
-	}
-}
-
-
-/*
- * Network layer has formatted a packet (skb) and is ready to place it
- * "on the wire".  Prepare and send an xpnet_message to all partitions
- * which have connected with us and are targets of this packet.
- *
- * MAC-NOTE:  For the XPNET driver, the MAC address contains the
- * destination partition_id.  If the destination partition id word
- * is 0xff, this packet is to broadcast to all partitions.
- */
-static int
-xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
-{
-	struct xpnet_pending_msg *queued_msg;
-	enum xpc_retval ret;
-	struct xpnet_message *msg;
-	u64 start_addr, end_addr;
-	long dp;
-	u8 second_mac_octet;
-	partid_t dest_partid;
-	struct xpnet_dev_private *priv;
-	u16 embedded_bytes;
-
-
-	priv = (struct xpnet_dev_private *) dev->priv;
-
-
-	dev_dbg(xpnet, ">skb->head=0x%p skb->data=0x%p skb->tail=0x%p "
-		"skb->end=0x%p skb->len=%d\n", (void *) skb->head,
-		(void *) skb->data, (void *) skb->tail, (void *) skb->end,
-		skb->len);
-
-
-	/*
-	 * The xpnet_pending_msg tracks how many outstanding
-	 * xpc_send_notifies are relying on this skb.  When none
-	 * remain, release the skb.
-	 */
-	queued_msg = kmalloc(sizeof(struct xpnet_pending_msg), GFP_ATOMIC);
-	if (queued_msg == NULL) {
-		dev_warn(xpnet, "failed to kmalloc %ld bytes; dropping "
-			"packet\n", sizeof(struct xpnet_pending_msg));
-
-		priv->stats.tx_errors++;
-
-		return -ENOMEM;
-	}
-
-
-	/* get the beginning of the first cacheline and end of last */
-	start_addr = ((u64) skb->data & ~(L1_CACHE_BYTES - 1));
-	end_addr = L1_CACHE_ALIGN((u64) skb->tail);
-
-	/* calculate how many bytes to embed in the XPC message */
-	embedded_bytes = 0;
-	if (unlikely(skb->len <= XPNET_MSG_DATA_MAX)) {
-		/* skb->data does fit so embed */
-		embedded_bytes = skb->len;
-	}
-
-
-	/*
-	 * Since the send occurs asynchronously, we set the count to one
-	 * and begin sending.  Any sends that happen to complete before
-	 * we are done sending will not free the skb.  We will be left
-	 * with that task during exit.  This also handles the case of
-	 * a packet destined for a partition which is no longer up.
-	 */
-	atomic_set(&queued_msg->use_count, 1);
-	queued_msg->skb = skb;
-
-
-	second_mac_octet = skb->data[XPNET_PARTID_OCTET];
-	if (second_mac_octet == 0xff) {
-		/* we are being asked to broadcast to all partitions */
-		dp = xpnet_broadcast_partitions;
-	} else if (second_mac_octet != 0) {
-		dp = xpnet_broadcast_partitions &
-					(1UL << (second_mac_octet - 1));
-	} else {
-		/* 0 is an invalid partid.  Ignore */
-		dp = 0;
-	}
-	dev_dbg(xpnet, "destination Partitions mask (dp) = 0x%lx\n", dp);
-
-	/*
-	 * If we wanted to allow promiscous mode to work like an
-	 * unswitched network, this would be a good point to OR in a
-	 * mask of partitions which should be receiving all packets.
-	 */
-
-	/*
-	 * Main send loop.
-	 */
-	for (dest_partid = 1; dp && dest_partid < XP_MAX_PARTITIONS;
-	     dest_partid++) {
-
-
-		if (!(dp & (1UL << (dest_partid - 1)))) {
-			/* not destined for this partition */
-			continue;
-		}
-
-		/* remove this partition from the destinations mask */
-		dp &= ~(1UL << (dest_partid - 1));
-
-
-		/* found a partition to send to */
-
-		ret = xpc_allocate(dest_partid, XPC_NET_CHANNEL,
-				   XPC_NOWAIT, (void **)&msg);
-		if (unlikely(ret != xpcSuccess)) {
-			continue;
-		}
-
-		msg->embedded_bytes = embedded_bytes;
-		if (unlikely(embedded_bytes != 0)) {
-			msg->version = XPNET_VERSION_EMBED;
-			dev_dbg(xpnet, "calling memcpy(0x%p, 0x%p, 0x%lx)\n",
-				&msg->data, skb->data, (size_t) embedded_bytes);
-			memcpy(&msg->data, skb->data, (size_t) embedded_bytes);
-		} else {
-			msg->version = XPNET_VERSION;
-		}
-		msg->magic = XPNET_MAGIC;
-		msg->size = end_addr - start_addr;
-		msg->leadin_ignore = (u64) skb->data - start_addr;
-		msg->tailout_ignore = end_addr - (u64) skb->tail;
-		msg->buf_pa = __pa(start_addr);
-
-		dev_dbg(xpnet, "sending XPC message to %d:%d\nmsg->buf_pa="
-			"0x%lx, msg->size=%u, msg->leadin_ignore=%u, "
-			"msg->tailout_ignore=%u\n", dest_partid,
-			XPC_NET_CHANNEL, msg->buf_pa, msg->size,
-			msg->leadin_ignore, msg->tailout_ignore);
-
-
-		atomic_inc(&queued_msg->use_count);
-
-		ret = xpc_send_notify(dest_partid, XPC_NET_CHANNEL, msg,
-				      xpnet_send_completed, queued_msg);
-		if (unlikely(ret != xpcSuccess)) {
-			atomic_dec(&queued_msg->use_count);
-			continue;
-		}
-
-	}
-
-	if (atomic_dec_return(&queued_msg->use_count) == 0) {
-		dev_dbg(xpnet, "no partitions to receive packet destined for "
-			"%d\n", dest_partid);
-
-
-		dev_kfree_skb(skb);
-		kfree(queued_msg);
-	}
-
-	priv->stats.tx_packets++;
-	priv->stats.tx_bytes += skb->len;
-
-	return 0;
-}
-
-
-/*
- * Deal with transmit timeouts coming from the network layer.
- */
-static void
-xpnet_dev_tx_timeout (struct net_device *dev)
-{
-	struct xpnet_dev_private *priv;
-
-
-	priv = (struct xpnet_dev_private *) dev->priv;
-
-	priv->stats.tx_errors++;
-	return;
-}
-
-
-static int __init
-xpnet_init(void)
-{
-	int i;
-	u32 license_num;
-	int result = -ENOMEM;
-
-
-	if (!ia64_platform_is("sn2")) {
-		return -ENODEV;
-	}
-
-	dev_info(xpnet, "registering network device %s\n", XPNET_DEVICE_NAME);
-
-	/*
-	 * use ether_setup() to init the majority of our device
-	 * structure and then override the necessary pieces.
-	 */
-	xpnet_device = alloc_netdev(sizeof(struct xpnet_dev_private),
-				    XPNET_DEVICE_NAME, ether_setup);
-	if (xpnet_device == NULL) {
-		return -ENOMEM;
-	}
-
-	netif_carrier_off(xpnet_device);
-
-	xpnet_device->mtu = XPNET_DEF_MTU;
-	xpnet_device->change_mtu = xpnet_dev_change_mtu;
-	xpnet_device->open = xpnet_dev_open;
-	xpnet_device->get_stats = xpnet_dev_get_stats;
-	xpnet_device->stop = xpnet_dev_stop;
-	xpnet_device->hard_start_xmit = xpnet_dev_hard_start_xmit;
-	xpnet_device->tx_timeout = xpnet_dev_tx_timeout;
-	xpnet_device->set_config = xpnet_dev_set_config;
-
-	/*
-	 * Multicast assumes the LSB of the first octet is set for multicast
-	 * MAC addresses.  We chose the first octet of the MAC to be unlikely
-	 * to collide with any vendor's officially issued MAC.
-	 */
-	xpnet_device->dev_addr[0] = 0xfe;
-	xpnet_device->dev_addr[XPNET_PARTID_OCTET] = sn_partition_id;
-	license_num = sn_partition_serial_number_val();
-	for (i = 3; i >= 0; i--) {
-		xpnet_device->dev_addr[XPNET_LICENSE_OCTET + i] =
-							license_num & 0xff;
-		license_num = license_num >> 8;
-	}
-
-	/*
-	 * ether_setup() sets this to a multicast device.  We are
-	 * really not supporting multicast at this time.
-	 */
-	xpnet_device->flags &= ~IFF_MULTICAST;
-
-	/*
-	 * No need to checksum as it is a DMA transfer.  The BTE will
-	 * report an error if the data is not retrievable and the
-	 * packet will be dropped.
-	 */
-	xpnet_device->features = NETIF_F_NO_CSUM;
-
-	result = register_netdev(xpnet_device);
-	if (result != 0) {
-		free_netdev(xpnet_device);
-	}
-
-	return result;
-}
-module_init(xpnet_init);
-
-
-static void __exit
-xpnet_exit(void)
-{
-	dev_info(xpnet, "unregistering network device %s\n",
-		xpnet_device[0].name);
-
-	unregister_netdev(xpnet_device);
-
-	free_netdev(xpnet_device);
-}
-module_exit(xpnet_exit);
-
-
-MODULE_AUTHOR("Silicon Graphics, Inc.");
-MODULE_DESCRIPTION("Cross Partition Network adapter (XPNET)");
-MODULE_LICENSE("GPL");
-
diff --git a/arch/ia64/sn/pci/Makefile b/arch/ia64/sn/pci/Makefile
index c6946784a6a..df2a9014542 100644
--- a/arch/ia64/sn/pci/Makefile
+++ b/arch/ia64/sn/pci/Makefile
@@ -7,6 +7,6 @@
 #
 # Makefile for the sn pci general routines.
 
-CPPFLAGS += -I$(srctree)/arch/ia64/sn/include
+ccflags-y := -Iarch/ia64/sn/include
 
 obj-y := pci_dma.o tioca_provider.o tioce_provider.o pcibr/
diff --git a/arch/ia64/sn/pci/pci_dma.c b/arch/ia64/sn/pci/pci_dma.c
index b4b84c26921..d0853e8e862 100644
--- a/arch/ia64/sn/pci/pci_dma.c
+++ b/arch/ia64/sn/pci/pci_dma.c
@@ -9,14 +9,16 @@
  * a description of how these routines should be used.
  */
 
+#include <linux/gfp.h>
 #include <linux/module.h>
+#include <linux/dma-mapping.h>
 #include <asm/dma.h>
-#include <asm/sn/pcibr_provider.h>
+#include <asm/sn/intr.h>
 #include <asm/sn/pcibus_provider_defs.h>
 #include <asm/sn/pcidev.h>
 #include <asm/sn/sn_sal.h>
 
-#define SG_ENT_VIRT_ADDRESS(sg)	(page_address((sg)->page) + (sg)->offset)
+#define SG_ENT_VIRT_ADDRESS(sg)	(sg_virt((sg)))
 #define SG_ENT_PHYS_ADDRESS(SG)	virt_to_phys(SG_ENT_VIRT_ADDRESS(SG))
 
 /**
@@ -30,15 +32,14 @@
  * this function.  Of course, SN only supports devices that have 32 or more
  * address bits when using the PMU.
  */
-int sn_dma_supported(struct device *dev, u64 mask)
+static int sn_dma_supported(struct device *dev, u64 mask)
 {
-	BUG_ON(dev->bus != &pci_bus_type);
+	BUG_ON(!dev_is_pci(dev));
 
 	if (mask < 0x7fffffff)
 		return 0;
 	return 1;
 }
-EXPORT_SYMBOL(sn_dma_supported);
 
 /**
  * sn_dma_set_mask - set the DMA mask
@@ -49,7 +50,7 @@ EXPORT_SYMBOL(sn_dma_supported);
  */
 int sn_dma_set_mask(struct device *dev, u64 dma_mask)
 {
-	BUG_ON(dev->bus != &pci_bus_type);
+	BUG_ON(!dev_is_pci(dev));
 
 	if (!sn_dma_supported(dev, dma_mask))
 		return 0;
@@ -74,8 +75,9 @@ EXPORT_SYMBOL(sn_dma_set_mask);
  * queue for a SCSI controller).  See Documentation/DMA-API.txt for
  * more information.
  */
-void *sn_dma_alloc_coherent(struct device *dev, size_t size,
-			    dma_addr_t * dma_handle, gfp_t flags)
+static void *sn_dma_alloc_coherent(struct device *dev, size_t size,
+				   dma_addr_t * dma_handle, gfp_t flags,
+				   struct dma_attrs *attrs)
 {
 	void *cpuaddr;
 	unsigned long phys_addr;
@@ -83,14 +85,15 @@ void *sn_dma_alloc_coherent(struct device *dev, size_t size,
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev);
 
-	BUG_ON(dev->bus != &pci_bus_type);
+	BUG_ON(!dev_is_pci(dev));
 
 	/*
 	 * Allocate the memory.
 	 */
 	node = pcibus_to_node(pdev->bus);
 	if (likely(node >=0)) {
-		struct page *p = alloc_pages_node(node, flags, get_order(size));
+		struct page *p = alloc_pages_exact_node(node,
+						flags, get_order(size));
 
 		if (likely(p))
 			cpuaddr = page_address(p);
@@ -113,16 +116,16 @@ void *sn_dma_alloc_coherent(struct device *dev, size_t size,
 	 * resources.
 	 */
 
-	*dma_handle = provider->dma_map_consistent(pdev, phys_addr, size);
+	*dma_handle = provider->dma_map_consistent(pdev, phys_addr, size,
+						   SN_DMA_ADDR_PHYS);
 	if (!*dma_handle) {
-		printk(KERN_ERR "%s: out of ATEs\n", __FUNCTION__);
+		printk(KERN_ERR "%s: out of ATEs\n", __func__);
 		free_pages((unsigned long)cpuaddr, get_order(size));
 		return NULL;
 	}
 
 	return cpuaddr;
 }
-EXPORT_SYMBOL(sn_dma_alloc_coherent);
 
 /**
  * sn_pci_free_coherent - free memory associated with coherent DMAable region
@@ -134,25 +137,25 @@ EXPORT_SYMBOL(sn_dma_alloc_coherent);
  * Frees the memory allocated by dma_alloc_coherent(), potentially unmapping
  * any associated IOMMU mappings.
  */
-void sn_dma_free_coherent(struct device *dev, size_t size, void *cpu_addr,
-			  dma_addr_t dma_handle)
+static void sn_dma_free_coherent(struct device *dev, size_t size, void *cpu_addr,
+				 dma_addr_t dma_handle, struct dma_attrs *attrs)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev);
 
-	BUG_ON(dev->bus != &pci_bus_type);
+	BUG_ON(!dev_is_pci(dev));
 
 	provider->dma_unmap(pdev, dma_handle, 0);
 	free_pages((unsigned long)cpu_addr, get_order(size));
 }
-EXPORT_SYMBOL(sn_dma_free_coherent);
 
 /**
- * sn_dma_map_single - map a single page for DMA
+ * sn_dma_map_single_attrs - map a single page for DMA
  * @dev: device to map for
  * @cpu_addr: kernel virtual address of the region to map
  * @size: size of the region
  * @direction: DMA direction
+ * @attrs: optional dma attributes
  *
  * Map the region pointed to by @cpu_addr for DMA and return the
  * DMA address.
@@ -162,51 +165,68 @@ EXPORT_SYMBOL(sn_dma_free_coherent);
  * no way of saving the dmamap handle from the alloc to later free
  * (which is pretty much unacceptable).
  *
+ * mappings with the DMA_ATTR_WRITE_BARRIER get mapped with
+ * dma_map_consistent() so that writes force a flush of pending DMA.
+ * (See "SGI Altix Architecture Considerations for Linux Device Drivers",
+ * Document Number: 007-4763-001)
+ *
  * TODO: simplify our interface;
  *       figure out how to save dmamap handle so can use two step.
  */
-dma_addr_t sn_dma_map_single(struct device *dev, void *cpu_addr, size_t size,
-			     int direction)
+static dma_addr_t sn_dma_map_page(struct device *dev, struct page *page,
+				  unsigned long offset, size_t size,
+				  enum dma_data_direction dir,
+				  struct dma_attrs *attrs)
 {
+	void *cpu_addr = page_address(page) + offset;
 	dma_addr_t dma_addr;
 	unsigned long phys_addr;
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev);
+	int dmabarr;
+
+	dmabarr = dma_get_attr(DMA_ATTR_WRITE_BARRIER, attrs);
 
-	BUG_ON(dev->bus != &pci_bus_type);
+	BUG_ON(!dev_is_pci(dev));
 
 	phys_addr = __pa(cpu_addr);
-	dma_addr = provider->dma_map(pdev, phys_addr, size);
+	if (dmabarr)
+		dma_addr = provider->dma_map_consistent(pdev, phys_addr,
+							size, SN_DMA_ADDR_PHYS);
+	else
+		dma_addr = provider->dma_map(pdev, phys_addr, size,
+					     SN_DMA_ADDR_PHYS);
+
 	if (!dma_addr) {
-		printk(KERN_ERR "%s: out of ATEs\n", __FUNCTION__);
+		printk(KERN_ERR "%s: out of ATEs\n", __func__);
 		return 0;
 	}
 	return dma_addr;
 }
-EXPORT_SYMBOL(sn_dma_map_single);
 
 /**
- * sn_dma_unmap_single - unamp a DMA mapped page
+ * sn_dma_unmap_single_attrs - unamp a DMA mapped page
  * @dev: device to sync
  * @dma_addr: DMA address to sync
  * @size: size of region
  * @direction: DMA direction
+ * @attrs: optional dma attributes
  *
  * This routine is supposed to sync the DMA region specified
  * by @dma_handle into the coherence domain.  On SN, we're always cache
  * coherent, so we just need to free any ATEs associated with this mapping.
  */
-void sn_dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
-			 int direction)
+static void sn_dma_unmap_page(struct device *dev, dma_addr_t dma_addr,
+			      size_t size, enum dma_data_direction dir,
+			      struct dma_attrs *attrs)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev);
 
-	BUG_ON(dev->bus != &pci_bus_type);
+	BUG_ON(!dev_is_pci(dev));
 
-	provider->dma_unmap(pdev, dma_addr, direction);
+	provider->dma_unmap(pdev, dma_addr, dir);
 }
-EXPORT_SYMBOL(sn_dma_unmap_single);
 
 /**
  * sn_dma_unmap_sg - unmap a DMA scatterlist
@@ -214,25 +234,27 @@ EXPORT_SYMBOL(sn_dma_unmap_single);
  * @sg: scatterlist to unmap
  * @nhwentries: number of scatterlist entries
  * @direction: DMA direction
+ * @attrs: optional dma attributes
  *
  * Unmap a set of streaming mode DMA translations.
  */
-void sn_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
-		     int nhwentries, int direction)
+static void sn_dma_unmap_sg(struct device *dev, struct scatterlist *sgl,
+			    int nhwentries, enum dma_data_direction dir,
+			    struct dma_attrs *attrs)
 {
 	int i;
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev);
+	struct scatterlist *sg;
 
-	BUG_ON(dev->bus != &pci_bus_type);
+	BUG_ON(!dev_is_pci(dev));
 
-	for (i = 0; i < nhwentries; i++, sg++) {
-		provider->dma_unmap(pdev, sg->dma_address, direction);
+	for_each_sg(sgl, sg, nhwentries, i) {
+		provider->dma_unmap(pdev, sg->dma_address, dir);
 		sg->dma_address = (dma_addr_t) NULL;
 		sg->dma_length = 0;
 	}
 }
-EXPORT_SYMBOL(sn_dma_unmap_sg);
 
 /**
  * sn_dma_map_sg - map a scatterlist for DMA
@@ -240,36 +262,55 @@ EXPORT_SYMBOL(sn_dma_unmap_sg);
  * @sg: scatterlist to map
  * @nhwentries: number of entries
  * @direction: direction of the DMA transaction
+ * @attrs: optional dma attributes
+ *
+ * mappings with the DMA_ATTR_WRITE_BARRIER get mapped with
+ * dma_map_consistent() so that writes force a flush of pending DMA.
+ * (See "SGI Altix Architecture Considerations for Linux Device Drivers",
+ * Document Number: 007-4763-001)
  *
  * Maps each entry of @sg for DMA.
  */
-int sn_dma_map_sg(struct device *dev, struct scatterlist *sg, int nhwentries,
-		  int direction)
+static int sn_dma_map_sg(struct device *dev, struct scatterlist *sgl,
+			 int nhwentries, enum dma_data_direction dir,
+			 struct dma_attrs *attrs)
 {
 	unsigned long phys_addr;
-	struct scatterlist *saved_sg = sg;
+	struct scatterlist *saved_sg = sgl, *sg;
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev);
 	int i;
+	int dmabarr;
+
+	dmabarr = dma_get_attr(DMA_ATTR_WRITE_BARRIER, attrs);
 
-	BUG_ON(dev->bus != &pci_bus_type);
+	BUG_ON(!dev_is_pci(dev));
 
 	/*
 	 * Setup a DMA address for each entry in the scatterlist.
 	 */
-	for (i = 0; i < nhwentries; i++, sg++) {
+	for_each_sg(sgl, sg, nhwentries, i) {
+		dma_addr_t dma_addr;
 		phys_addr = SG_ENT_PHYS_ADDRESS(sg);
-		sg->dma_address = provider->dma_map(pdev,
-						    phys_addr, sg->length);
+		if (dmabarr)
+			dma_addr = provider->dma_map_consistent(pdev,
+								phys_addr,
+								sg->length,
+								SN_DMA_ADDR_PHYS);
+		else
+			dma_addr = provider->dma_map(pdev, phys_addr,
+						     sg->length,
+						     SN_DMA_ADDR_PHYS);
 
+		sg->dma_address = dma_addr;
 		if (!sg->dma_address) {
-			printk(KERN_ERR "%s: out of ATEs\n", __FUNCTION__);
+			printk(KERN_ERR "%s: out of ATEs\n", __func__);
 
 			/*
 			 * Free any successfully allocated entries.
 			 */
 			if (i > 0)
-				sn_dma_unmap_sg(dev, saved_sg, i, direction);
+				sn_dma_unmap_sg(dev, saved_sg, i, dir, attrs);
 			return 0;
 		}
 
@@ -278,41 +319,42 @@ int sn_dma_map_sg(struct device *dev, struct scatterlist *sg, int nhwentries,
 
 	return nhwentries;
 }
-EXPORT_SYMBOL(sn_dma_map_sg);
 
-void sn_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
-				size_t size, int direction)
+static void sn_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
+				       size_t size, enum dma_data_direction dir)
 {
-	BUG_ON(dev->bus != &pci_bus_type);
+	BUG_ON(!dev_is_pci(dev));
 }
-EXPORT_SYMBOL(sn_dma_sync_single_for_cpu);
 
-void sn_dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle,
-				   size_t size, int direction)
+static void sn_dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle,
+					  size_t size,
+					  enum dma_data_direction dir)
 {
-	BUG_ON(dev->bus != &pci_bus_type);
+	BUG_ON(!dev_is_pci(dev));
 }
-EXPORT_SYMBOL(sn_dma_sync_single_for_device);
 
-void sn_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
-			    int nelems, int direction)
+static void sn_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
+				   int nelems, enum dma_data_direction dir)
 {
-	BUG_ON(dev->bus != &pci_bus_type);
+	BUG_ON(!dev_is_pci(dev));
 }
-EXPORT_SYMBOL(sn_dma_sync_sg_for_cpu);
 
-void sn_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
-			       int nelems, int direction)
+static void sn_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
+				      int nelems, enum dma_data_direction dir)
 {
-	BUG_ON(dev->bus != &pci_bus_type);
+	BUG_ON(!dev_is_pci(dev));
 }
-EXPORT_SYMBOL(sn_dma_sync_sg_for_device);
 
-int sn_dma_mapping_error(dma_addr_t dma_addr)
+static int sn_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 {
 	return 0;
 }
-EXPORT_SYMBOL(sn_dma_mapping_error);
+
+u64 sn_dma_get_required_mask(struct device *dev)
+{
+	return DMA_BIT_MASK(64);
+}
+EXPORT_SYMBOL_GPL(sn_dma_get_required_mask);
 
 char *sn_pci_get_legacy_mem(struct pci_bus *bus)
 {
@@ -331,7 +373,7 @@ int sn_pci_legacy_read(struct pci_bus *bus, u16 port, u32 *val, u8 size)
 	/*
 	 * First, try the SN_SAL_IOIF_PCI_SAFE SAL call which can work
 	 * around hw issues at the pci bus level.  SGI proms older than
-	 * 4.10 don't implment this.
+	 * 4.10 don't implement this.
 	 */
 
 	SAL_CALL(isrv, SN_SAL_IOIF_PCI_SAFE,
@@ -346,7 +388,7 @@ int sn_pci_legacy_read(struct pci_bus *bus, u16 port, u32 *val, u8 size)
 	/*
 	 * If the above failed, retry using the SAL_PROBE call which should
 	 * be present in all proms (but which cannot work round PCI chipset
-	 * bugs).  This code is retained for compatability with old
+	 * bugs).  This code is retained for compatibility with old
 	 * pre-4.10 proms, and should be removed at some point in the future.
 	 */
 
@@ -377,7 +419,7 @@ int sn_pci_legacy_write(struct pci_bus *bus, u16 port, u32 val, u8 size)
 	/*
 	 * First, try the SN_SAL_IOIF_PCI_SAFE SAL call which can work
 	 * around hw issues at the pci bus level.  SGI proms older than
-	 * 4.10 don't implment this.
+	 * 4.10 don't implement this.
 	 */
 
 	SAL_CALL(isrv, SN_SAL_IOIF_PCI_SAFE,
@@ -392,7 +434,7 @@ int sn_pci_legacy_write(struct pci_bus *bus, u16 port, u32 val, u8 size)
 	/*
 	 * If the above failed, retry using the SAL_PROBE call which should
 	 * be present in all proms (but which cannot work round PCI chipset
-	 * bugs).  This code is retained for compatability with old
+	 * bugs).  This code is retained for compatibility with old
 	 * pre-4.10 proms, and should be removed at some point in the future.
 	 */
 
@@ -423,3 +465,23 @@ int sn_pci_legacy_write(struct pci_bus *bus, u16 port, u32 val, u8 size)
  out:
 	return ret;
 }
+
+static struct dma_map_ops sn_dma_ops = {
+	.alloc			= sn_dma_alloc_coherent,
+	.free			= sn_dma_free_coherent,
+	.map_page		= sn_dma_map_page,
+	.unmap_page		= sn_dma_unmap_page,
+	.map_sg			= sn_dma_map_sg,
+	.unmap_sg		= sn_dma_unmap_sg,
+	.sync_single_for_cpu 	= sn_dma_sync_single_for_cpu,
+	.sync_sg_for_cpu	= sn_dma_sync_sg_for_cpu,
+	.sync_single_for_device = sn_dma_sync_single_for_device,
+	.sync_sg_for_device	= sn_dma_sync_sg_for_device,
+	.mapping_error		= sn_dma_mapping_error,
+	.dma_supported		= sn_dma_supported,
+};
+
+void sn_dma_init(void)
+{
+	dma_ops = &sn_dma_ops;
+}
diff --git a/arch/ia64/sn/pci/pcibr/Makefile b/arch/ia64/sn/pci/pcibr/Makefile
index 3b403ea456f..396bcae3630 100644
--- a/arch/ia64/sn/pci/pcibr/Makefile
+++ b/arch/ia64/sn/pci/pcibr/Makefile
@@ -7,7 +7,7 @@
 #
 # Makefile for the sn2 io routines.
 
-CPPFLAGS += -I$(srctree)/arch/ia64/sn/include
+ccflags-y := -Iarch/ia64/sn/include
 
 obj-y				+=  pcibr_dma.o pcibr_reg.o \
 				    pcibr_ate.o pcibr_provider.o
diff --git a/arch/ia64/sn/pci/pcibr/pcibr_ate.c b/arch/ia64/sn/pci/pcibr/pcibr_ate.c
index 1f0253bfe0a..5bc34eac9e0 100644
--- a/arch/ia64/sn/pci/pcibr/pcibr_ate.c
+++ b/arch/ia64/sn/pci/pcibr/pcibr_ate.c
@@ -30,7 +30,7 @@ static void mark_ate(struct ate_resource *ate_resource, int start, int number,
 
 /*
  * find_free_ate:  Find the first free ate index starting from the given
- *		   index for the desired consequtive count.
+ *		   index for the desired consecutive count.
  */
 static int find_free_ate(struct ate_resource *ate_resource, int start,
 			 int count)
@@ -54,6 +54,8 @@ static int find_free_ate(struct ate_resource *ate_resource, int start,
 					break;
 				}
 			}
+			if (i >= ate_resource->num_ate)
+				return -1;
 		} else
 			index++;	/* Try next ate */
 	}
@@ -88,7 +90,7 @@ static inline int alloc_ate_resource(struct ate_resource *ate_resource,
 		return -1;
 
 	/*
-	 * Find the required number of free consequtive ates.
+	 * Find the required number of free consecutive ates.
 	 */
 	start_index =
 	    find_free_ate(ate_resource, ate_resource->lowest_free_index,
@@ -105,7 +107,7 @@ static inline int alloc_ate_resource(struct ate_resource *ate_resource,
 /*
  * Allocate "count" contiguous Bridge Address Translation Entries
  * on the specified bridge to be used for PCI to XTALK mappings.
- * Indices in rm map range from 1..num_entries.  Indicies returned
+ * Indices in rm map range from 1..num_entries.  Indices returned
  * to caller range from 0..num_entries-1.
  *
  * Return the start index on success, -1 on failure.
@@ -126,7 +128,7 @@ int pcibr_ate_alloc(struct pcibus_info *pcibus_info, int count)
  * Setup an Address Translation Entry as specified.  Use either the Bridge
  * internal maps or the external map RAM, as appropriate.
  */
-static inline u64 *pcibr_ate_addr(struct pcibus_info *pcibus_info,
+static inline u64 __iomem *pcibr_ate_addr(struct pcibus_info *pcibus_info,
 				       int ate_index)
 {
 	if (ate_index < pcibus_info->pbi_int_ate_size) {
@@ -160,7 +162,7 @@ void pcibr_ate_free(struct pcibus_info *pcibus_info, int index)
 
 	volatile u64 ate;
 	int count;
-	u64 flags;
+	unsigned long flags;
 
 	if (pcibr_invalidate_ate) {
 		/* For debugging purposes, clear the valid bit in the ATE */
diff --git a/arch/ia64/sn/pci/pcibr/pcibr_dma.c b/arch/ia64/sn/pci/pcibr/pcibr_dma.c
index 9f86bb6519a..1e863b277ac 100644
--- a/arch/ia64/sn/pci/pcibr/pcibr_dma.c
+++ b/arch/ia64/sn/pci/pcibr/pcibr_dma.c
@@ -8,6 +8,7 @@
 
 #include <linux/types.h>
 #include <linux/pci.h>
+#include <linux/export.h>
 #include <asm/sn/addrs.h>
 #include <asm/sn/geo.h>
 #include <asm/sn/pcibr_provider.h>
@@ -41,7 +42,7 @@ extern int sn_ioif_inited;
 
 static dma_addr_t
 pcibr_dmamap_ate32(struct pcidev_info *info,
-		   u64 paddr, size_t req_size, u64 flags)
+		   u64 paddr, size_t req_size, u64 flags, int dma_flags)
 {
 
 	struct pcidev_info *pcidev_info = info->pdi_host_pcidev_info;
@@ -81,9 +82,12 @@ pcibr_dmamap_ate32(struct pcidev_info *info,
 	if (IS_PCIX(pcibus_info))
 		ate_flags &= ~(PCI32_ATE_PREF);
 
-	xio_addr =
-	    IS_PIC_SOFT(pcibus_info) ? PHYS_TO_DMA(paddr) :
-	    PHYS_TO_TIODMA(paddr);
+	if (SN_DMA_ADDRTYPE(dma_flags == SN_DMA_ADDR_PHYS))
+		xio_addr = IS_PIC_SOFT(pcibus_info) ? PHYS_TO_DMA(paddr) :
+	    					      PHYS_TO_TIODMA(paddr);
+	else
+		xio_addr = paddr;
+
 	offset = IOPGOFF(xio_addr);
 	ate = ate_flags | (xio_addr - offset);
 
@@ -91,6 +95,17 @@ pcibr_dmamap_ate32(struct pcidev_info *info,
 	if (IS_PIC_SOFT(pcibus_info)) {
 		ate |= (pcibus_info->pbi_hub_xid << PIC_ATE_TARGETID_SHFT);
 	}
+
+	/*
+	 * If we're mapping for MSI, set the MSI bit in the ATE.  If it's a
+	 * TIOCP based pci bus, we also need to set the PIO bit in the ATE.
+	 */
+	if (dma_flags & SN_DMA_MSI) {
+		ate |= PCI32_ATE_MSI;
+		if (IS_TIOCP_SOFT(pcibus_info))
+			ate |= PCI32_ATE_PIO;
+	}
+
 	ate_write(pcibus_info, ate_index, ate_count, ate);
 
 	/*
@@ -105,20 +120,26 @@ pcibr_dmamap_ate32(struct pcidev_info *info,
 	if (pcibus_info->pbi_devreg[internal_device] & PCIBR_DEV_SWAP_DIR)
 		ATE_SWAP_ON(pci_addr);
 
+
 	return pci_addr;
 }
 
 static dma_addr_t
 pcibr_dmatrans_direct64(struct pcidev_info * info, u64 paddr,
-			u64 dma_attributes)
+			u64 dma_attributes, int dma_flags)
 {
 	struct pcibus_info *pcibus_info = (struct pcibus_info *)
 	    ((info->pdi_host_pcidev_info)->pdi_pcibus_info);
 	u64 pci_addr;
 
 	/* Translate to Crosstalk View of Physical Address */
-	pci_addr = (IS_PIC_SOFT(pcibus_info) ? PHYS_TO_DMA(paddr) :
-		    PHYS_TO_TIODMA(paddr)) | dma_attributes;
+	if (SN_DMA_ADDRTYPE(dma_flags) == SN_DMA_ADDR_PHYS)
+		pci_addr = IS_PIC_SOFT(pcibus_info) ?
+				PHYS_TO_DMA(paddr) :
+				PHYS_TO_TIODMA(paddr);
+	else
+		pci_addr = paddr;
+	pci_addr |= dma_attributes;
 
 	/* Handle Bus mode */
 	if (IS_PCIX(pcibus_info))
@@ -130,7 +151,9 @@ pcibr_dmatrans_direct64(struct pcidev_info * info, u64 paddr,
 		    ((u64) pcibus_info->
 		     pbi_hub_xid << PIC_PCI64_ATTR_TARG_SHFT);
 	} else
-		pci_addr |= TIOCP_PCI64_CMDTYPE_MEM;
+		pci_addr |= (dma_flags & SN_DMA_MSI) ?
+				TIOCP_PCI64_CMDTYPE_MSI :
+				TIOCP_PCI64_CMDTYPE_MEM;
 
 	/* If PCI mode, func zero uses VCHAN0, every other func uses VCHAN1 */
 	if (!IS_PCIX(pcibus_info) && PCI_FUNC(info->pdi_linux_pcidev->devfn))
@@ -141,7 +164,7 @@ pcibr_dmatrans_direct64(struct pcidev_info * info, u64 paddr,
 
 static dma_addr_t
 pcibr_dmatrans_direct32(struct pcidev_info * info,
-			u64 paddr, size_t req_size, u64 flags)
+			u64 paddr, size_t req_size, u64 flags, int dma_flags)
 {
 	struct pcidev_info *pcidev_info = info->pdi_host_pcidev_info;
 	struct pcibus_info *pcibus_info = (struct pcibus_info *)pcidev_info->
@@ -156,8 +179,14 @@ pcibr_dmatrans_direct32(struct pcidev_info * info,
 		return 0;
 	}
 
-	xio_addr = IS_PIC_SOFT(pcibus_info) ? PHYS_TO_DMA(paddr) :
-	    PHYS_TO_TIODMA(paddr);
+	if (dma_flags & SN_DMA_MSI)
+		return 0;
+
+	if (SN_DMA_ADDRTYPE(dma_flags) == SN_DMA_ADDR_PHYS)
+		xio_addr = IS_PIC_SOFT(pcibus_info) ? PHYS_TO_DMA(paddr) :
+	    					      PHYS_TO_TIODMA(paddr);
+	else
+		xio_addr = paddr;
 
 	xio_base = pcibus_info->pbi_dir_xbase;
 	offset = xio_addr - xio_base;
@@ -172,7 +201,7 @@ pcibr_dmatrans_direct32(struct pcidev_info * info,
 }
 
 /*
- * Wrapper routine for free'ing DMA maps
+ * Wrapper routine for freeing DMA maps
  * DMA mappings for Direct 64 and 32 do not have any DMA maps.
  */
 void
@@ -199,7 +228,7 @@ pcibr_dma_unmap(struct pci_dev *hwdev, dma_addr_t dma_handle, int direction)
  * after doing the read.  For PIC this routine then forces a fake interrupt
  * on another line, which is logically associated with the slot that the PIO
  * is addressed to.  It then spins while watching the memory location that
- * the interrupt is targetted to.  When the interrupt response arrives, we 
+ * the interrupt is targeted to.  When the interrupt response arrives, we 
  * are sure that the DMA has landed in memory and it is safe for the driver
  * to proceed.	For TIOCP use the Device(x) Write Request Buffer Flush 
  * Bridge register since it ensures the data has entered the coherence domain,
@@ -212,7 +241,7 @@ void sn_dma_flush(u64 addr)
 	int is_tio;
 	int wid_num;
 	int i, j;
-	u64 flags;
+	unsigned long flags;
 	u64 itte;
 	struct hubdev_info *hubinfo;
 	struct sn_flush_device_kernel *p;
@@ -228,9 +257,7 @@ void sn_dma_flush(u64 addr)
 
 	hubinfo = (NODEPDA(nasid_to_cnodeid(nasid)))->pdinfo;
 
-	if (!hubinfo) {
-		BUG();
-	}
+	BUG_ON(!hubinfo);
 
 	flush_nasid_list = &hubinfo->hdi_flush_nasid_list;
 	if (flush_nasid_list->widget_p == NULL)
@@ -327,7 +354,7 @@ void sn_dma_flush(u64 addr)
  */
 
 dma_addr_t
-pcibr_dma_map(struct pci_dev * hwdev, unsigned long phys_addr, size_t size)
+pcibr_dma_map(struct pci_dev * hwdev, unsigned long phys_addr, size_t size, int dma_flags)
 {
 	dma_addr_t dma_handle;
 	struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(hwdev);
@@ -344,11 +371,11 @@ pcibr_dma_map(struct pci_dev * hwdev, unsigned long phys_addr, size_t size)
 		 */
 
 		dma_handle = pcibr_dmatrans_direct64(pcidev_info, phys_addr,
-						     PCI64_ATTR_PREF);
+						     PCI64_ATTR_PREF, dma_flags);
 	} else {
 		/* Handle 32-63 bit cards via direct mapping */
 		dma_handle = pcibr_dmatrans_direct32(pcidev_info, phys_addr,
-						     size, 0);
+						     size, 0, dma_flags);
 		if (!dma_handle) {
 			/*
 			 * It is a 32 bit card and we cannot do direct mapping,
@@ -356,7 +383,8 @@ pcibr_dma_map(struct pci_dev * hwdev, unsigned long phys_addr, size_t size)
 			 */
 
 			dma_handle = pcibr_dmamap_ate32(pcidev_info, phys_addr,
-							size, PCI32_ATE_PREF);
+							size, PCI32_ATE_PREF,
+							dma_flags);
 		}
 	}
 
@@ -365,18 +393,18 @@ pcibr_dma_map(struct pci_dev * hwdev, unsigned long phys_addr, size_t size)
 
 dma_addr_t
 pcibr_dma_map_consistent(struct pci_dev * hwdev, unsigned long phys_addr,
-			 size_t size)
+			 size_t size, int dma_flags)
 {
 	dma_addr_t dma_handle;
 	struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(hwdev);
 
 	if (hwdev->dev.coherent_dma_mask == ~0UL) {
 		dma_handle = pcibr_dmatrans_direct64(pcidev_info, phys_addr,
-					    PCI64_ATTR_BAR);
+					    PCI64_ATTR_BAR, dma_flags);
 	} else {
 		dma_handle = (dma_addr_t) pcibr_dmamap_ate32(pcidev_info,
 						    phys_addr, size,
-						    PCI32_ATE_BAR);
+						    PCI32_ATE_BAR, dma_flags);
 	}
 
 	return dma_handle;
diff --git a/arch/ia64/sn/pci/pcibr/pcibr_provider.c b/arch/ia64/sn/pci/pcibr/pcibr_provider.c
index ab1211ef017..8dbbef4a4f4 100644
--- a/arch/ia64/sn/pci/pcibr/pcibr_provider.c
+++ b/arch/ia64/sn/pci/pcibr/pcibr_provider.c
@@ -3,24 +3,28 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (C) 2001-2004 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 2001-2004, 2006 Silicon Graphics, Inc. All rights reserved.
  */
 
 #include <linux/interrupt.h>
 #include <linux/types.h>
+#include <linux/slab.h>
 #include <linux/pci.h>
+#include <linux/export.h>
 #include <asm/sn/addrs.h>
 #include <asm/sn/geo.h>
 #include <asm/sn/pcibr_provider.h>
 #include <asm/sn/pcibus_provider_defs.h>
 #include <asm/sn/pcidev.h>
 #include <asm/sn/sn_sal.h>
+#include <asm/sn/pic.h>
 #include <asm/sn/sn2/sn_hwperf.h>
 #include "xtalk/xwidgetdev.h"
 #include "xtalk/hubdev.h"
 
 int
-sal_pcibr_slot_enable(struct pcibus_info *soft, int device, void *resp)
+sal_pcibr_slot_enable(struct pcibus_info *soft, int device, void *resp,
+                      char **ssdt)
 {
 	struct ia64_sal_retval ret_stuff;
 	u64 busnum;
@@ -32,7 +36,8 @@ sal_pcibr_slot_enable(struct pcibus_info *soft, int device, void *resp)
 	segment = soft->pbi_buscommon.bs_persist_segment;
 	busnum = soft->pbi_buscommon.bs_persist_busnum;
 	SAL_CALL_NOLOCK(ret_stuff, (u64) SN_SAL_IOIF_SLOT_ENABLE, segment,
-			busnum, (u64) device, (u64) resp, 0, 0, 0);
+			busnum, (u64) device, (u64) resp, (u64)ia64_tpa(ssdt),
+			0, 0);
 
 	return (int)ret_stuff.v0;
 }
@@ -76,8 +81,8 @@ static int sal_pcibr_error_interrupt(struct pcibus_info *soft)
 
 u16 sn_ioboard_to_pci_bus(struct pci_bus *pci_bus)
 {
-	s64 rc;
-	u16 ioboard;
+	long rc;
+	u16 uninitialized_var(ioboard);		/* GCC be quiet */
 	nasid_t nasid = NASID_GET(SN_PCIBUS_BUSSOFT(pci_bus)->bs_base);
 
 	rc = ia64_sn_sysctl_ioboard_get(nasid, &ioboard);
@@ -95,13 +100,13 @@ u16 sn_ioboard_to_pci_bus(struct pci_bus *pci_bus)
  * bridge sends an error interrupt.
  */
 static irqreturn_t
-pcibr_error_intr_handler(int irq, void *arg, struct pt_regs *regs)
+pcibr_error_intr_handler(int irq, void *arg)
 {
-	struct pcibus_info *soft = (struct pcibus_info *)arg;
+	struct pcibus_info *soft = arg;
 
-	if (sal_pcibr_error_interrupt(soft) < 0) {
+	if (sal_pcibr_error_interrupt(soft) < 0)
 		panic("pcibr_error_intr_handler(): Fatal Bridge Error");
-	}
+
 	return IRQ_HANDLED;
 }
 
@@ -109,7 +114,6 @@ void *
 pcibr_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *controller)
 {
 	int nasid, cnode, j;
-	cnodeid_t near_cnode;
 	struct hubdev_info *hubdev_info;
 	struct pcibus_info *soft;
 	struct sn_flush_device_kernel *sn_flush_device_kernel;
@@ -123,26 +127,27 @@ pcibr_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *cont
 	 * Allocate kernel bus soft and copy from prom.
 	 */
 
-	soft = kmalloc(sizeof(struct pcibus_info), GFP_KERNEL);
+	soft = kmemdup(prom_bussoft, sizeof(struct pcibus_info), GFP_KERNEL);
 	if (!soft) {
 		return NULL;
 	}
 
-	memcpy(soft, prom_bussoft, sizeof(struct pcibus_info));
-	soft->pbi_buscommon.bs_base =
-	    (((u64) soft->pbi_buscommon.
-	      bs_base << 4) >> 4) | __IA64_UNCACHED_OFFSET;
+	soft->pbi_buscommon.bs_base = (unsigned long)
+		ioremap(REGION_OFFSET(soft->pbi_buscommon.bs_base),
+			sizeof(struct pic));
 
 	spin_lock_init(&soft->pbi_lock);
 
 	/*
 	 * register the bridge's error interrupt handler
 	 */
-	if (request_irq(SGI_PCIASIC_ERROR, (void *)pcibr_error_intr_handler,
-			SA_SHIRQ, "PCIBR error", (void *)(soft))) {
+	if (request_irq(SGI_PCIASIC_ERROR, pcibr_error_intr_handler,
+			IRQF_SHARED, "PCIBR error", (void *)(soft))) {
 		printk(KERN_WARNING
 		       "pcibr cannot allocate interrupt for error handler\n");
 	}
+	irq_set_handler(SGI_PCIASIC_ERROR, handle_level_irq);
+	sn_set_err_irq_affinity(SGI_PCIASIC_ERROR);
 
 	/* 
 	 * Update the Bridge with the "kernel" pagesize 
@@ -186,20 +191,6 @@ pcibr_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *cont
 		return NULL;
 	}
 
-	if (prom_bussoft->bs_asic_type == PCIIO_ASIC_TYPE_TIOCP) {
-		/* TIO PCI Bridge: find nearest node with CPUs */
-		int e = sn_hwperf_get_nearest_node(cnode, NULL, &near_cnode);
-
-		if (e < 0) {
-			near_cnode = (cnodeid_t)-1; /* use any node */
-			printk(KERN_WARNING "pcibr_bus_fixup: failed to find "
-				"near node with CPUs to TIO node %d, err=%d\n",
-				cnode, e);
-		}
-		controller->node = near_cnode;
-	}
-	else
-		controller->node = cnode;
 	return soft;
 }
 
diff --git a/arch/ia64/sn/pci/tioca_provider.c b/arch/ia64/sn/pci/tioca_provider.c
index be017691296..a70b11fd57d 100644
--- a/arch/ia64/sn/pci/tioca_provider.c
+++ b/arch/ia64/sn/pci/tioca_provider.c
@@ -9,6 +9,9 @@
 #include <linux/types.h>
 #include <linux/interrupt.h>
 #include <linux/pci.h>
+#include <linux/bitmap.h>
+#include <linux/slab.h>
+#include <linux/export.h>
 #include <asm/sn/sn_sal.h>
 #include <asm/sn/addrs.h>
 #include <asm/sn/io.h>
@@ -88,7 +91,7 @@ tioca_gart_init(struct tioca_kernel *tioca_kern)
 		break;
 	default:
 		printk(KERN_ERR "%s:  Invalid CA_APERATURE_SIZE "
-		       "0x%lx\n", __FUNCTION__, (ulong) CA_APERATURE_SIZE);
+		       "0x%lx\n", __func__, (ulong) CA_APERATURE_SIZE);
 		return -1;
 	}
 
@@ -123,8 +126,8 @@ tioca_gart_init(struct tioca_kernel *tioca_kern)
 
 	if (!tmp) {
 		printk(KERN_ERR "%s:  Could not allocate "
-		       "%lu bytes (order %d) for GART\n",
-		       __FUNCTION__,
+		       "%llu bytes (order %d) for GART\n",
+		       __func__,
 		       tioca_kern->ca_gart_size,
 		       get_order(tioca_kern->ca_gart_size));
 		return -ENOMEM;
@@ -223,7 +226,7 @@ tioca_fastwrite_enable(struct tioca_kernel *tioca_kern)
 
 	/*
 	 * Scan all vga controllers on this bus making sure they all
-	 * suport FW.  If not, return.
+	 * support FW.  If not, return.
 	 */
 
 	list_for_each_entry(pdev, tioca_kern->ca_devices, bus_list) {
@@ -341,15 +344,15 @@ tioca_dma_d48(struct pci_dev *pdev, u64 paddr)
 
 	if (node_upper > 64) {
 		printk(KERN_ERR "%s:  coretalk addr 0x%p node id out "
-		       "of range\n", __FUNCTION__, (void *)ct_addr);
+		       "of range\n", __func__, (void *)ct_addr);
 		return 0;
 	}
 
 	agp_dma_extn = __sn_readq_relaxed(&ca_base->ca_agp_dma_addr_extn);
 	if (node_upper != (agp_dma_extn >> CA_AGP_DMA_NODE_ID_SHFT)) {
 		printk(KERN_ERR "%s:  coretalk upper node (%u) "
-		       "mismatch with ca_agp_dma_addr_extn (%lu)\n",
-		       __FUNCTION__,
+		       "mismatch with ca_agp_dma_addr_extn (%llu)\n",
+		       __func__,
 		       node_upper, (agp_dma_extn >> CA_AGP_DMA_NODE_ID_SHFT));
 		return 0;
 	}
@@ -364,12 +367,12 @@ tioca_dma_d48(struct pci_dev *pdev, u64 paddr)
  * @req_size: len (bytes) to map
  *
  * Map @paddr into CA address space using the GART mechanism.  The mapped
- * dma_addr_t is guarenteed to be contiguous in CA bus space.
+ * dma_addr_t is guaranteed to be contiguous in CA bus space.
  */
 static dma_addr_t
-tioca_dma_mapped(struct pci_dev *pdev, u64 paddr, size_t req_size)
+tioca_dma_mapped(struct pci_dev *pdev, unsigned long paddr, size_t req_size)
 {
-	int i, ps, ps_shift, entry, entries, mapsize, last_entry;
+	int ps, ps_shift, entry, entries, mapsize;
 	u64 xio_addr, end_xio_addr;
 	struct tioca_common *tioca_common;
 	struct tioca_kernel *tioca_kern;
@@ -410,21 +413,13 @@ tioca_dma_mapped(struct pci_dev *pdev, u64 paddr, size_t req_size)
 	map = tioca_kern->ca_pcigart_pagemap;
 	mapsize = tioca_kern->ca_pcigart_entries;
 
-	entry = find_first_zero_bit(map, mapsize);
-	while (entry < mapsize) {
-		last_entry = find_next_bit(map, mapsize, entry);
-
-		if (last_entry - entry >= entries)
-			break;
-
-		entry = find_next_zero_bit(map, mapsize, last_entry);
-	}
-
-	if (entry > mapsize)
+	entry = bitmap_find_next_zero_area(map, mapsize, 0, entries, 0);
+	if (entry >= mapsize) {
+		kfree(ca_dmamap);
 		goto map_return;
+	}
 
-	for (i = 0; i < entries; i++)
-		set_bit(entry + i, map);
+	bitmap_set(map, entry, entries);
 
 	bus_addr = tioca_kern->ca_pciap_base + (entry * ps);
 
@@ -515,12 +510,18 @@ tioca_dma_unmap(struct pci_dev *pdev, dma_addr_t bus_addr, int dir)
  * use the GART mapped mode.
  */
 static u64
-tioca_dma_map(struct pci_dev *pdev, u64 paddr, size_t byte_count)
+tioca_dma_map(struct pci_dev *pdev, unsigned long paddr, size_t byte_count, int dma_flags)
 {
 	u64 mapaddr;
 
 	/*
-	 * If card is 64 or 48 bit addresable, use a direct mapping.  32
+	 * Not supported for now ...
+	 */
+	if (dma_flags & SN_DMA_MSI)
+		return 0;
+
+	/*
+	 * If card is 64 or 48 bit addressable, use a direct mapping.  32
 	 * bit direct is so restrictive w.r.t. where the memory resides that
 	 * we don't use it even though CA has some support.
 	 */
@@ -544,13 +545,12 @@ tioca_dma_map(struct pci_dev *pdev, u64 paddr, size_t byte_count)
  * tioca_error_intr_handler - SGI TIO CA error interrupt handler
  * @irq: unused
  * @arg: pointer to tioca_common struct for the given CA
- * @pt: unused
  *
  * Handle a CA error interrupt.  Simply a wrapper around a SAL call which
  * defers processing to the SGI prom.
  */
 static irqreturn_t
-tioca_error_intr_handler(int irq, void *arg, struct pt_regs *pt)
+tioca_error_intr_handler(int irq, void *arg)
 {
 	struct tioca_common *soft = arg;
 	struct ia64_sal_retval ret_stuff;
@@ -589,10 +589,10 @@ tioca_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *cont
 
 	/* sanity check prom rev */
 
-	if (sn_sal_rev() < 0x0406) {
+	if (is_shub1() && sn_sal_rev() < 0x0406) {
 		printk
 		    (KERN_ERR "%s:  SGI prom rev 4.06 or greater required "
-		     "for tioca support\n", __FUNCTION__);
+		     "for tioca support\n", __func__);
 		return NULL;
 	}
 
@@ -600,12 +600,14 @@ tioca_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *cont
 	 * Allocate kernel bus soft and copy from prom.
 	 */
 
-	tioca_common = kzalloc(sizeof(struct tioca_common), GFP_KERNEL);
+	tioca_common = kmemdup(prom_bussoft, sizeof(struct tioca_common),
+			       GFP_KERNEL);
 	if (!tioca_common)
 		return NULL;
 
-	memcpy(tioca_common, prom_bussoft, sizeof(struct tioca_common));
-	tioca_common->ca_common.bs_base |= __IA64_UNCACHED_OFFSET;
+	tioca_common->ca_common.bs_base = (unsigned long)
+		ioremap(REGION_OFFSET(tioca_common->ca_common.bs_base),
+			sizeof(struct tioca_common));
 
 	/* init kernel-private area */
 
@@ -640,13 +642,16 @@ tioca_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *cont
 
 	if (request_irq(SGI_TIOCA_ERROR,
 			tioca_error_intr_handler,
-			SA_SHIRQ, "TIOCA error", (void *)tioca_common))
+			IRQF_SHARED, "TIOCA error", (void *)tioca_common))
 		printk(KERN_WARNING
 		       "%s:  Unable to get irq %d.  "
 		       "Error interrupts won't be routed for TIOCA bus %d\n",
-		       __FUNCTION__, SGI_TIOCA_ERROR,
+		       __func__, SGI_TIOCA_ERROR,
 		       (int)tioca_common->ca_common.bs_persist_busnum);
 
+	irq_set_handler(SGI_TIOCA_ERROR, handle_level_irq);
+	sn_set_err_irq_affinity(SGI_TIOCA_ERROR);
+
 	/* Setup locality information */
 	controller->node = tioca_kern->ca_closest_node;
 	return tioca_common;
diff --git a/arch/ia64/sn/pci/tioce_provider.c b/arch/ia64/sn/pci/tioce_provider.c
index 833295624e5..46d3df4b03a 100644
--- a/arch/ia64/sn/pci/tioce_provider.c
+++ b/arch/ia64/sn/pci/tioce_provider.c
@@ -3,11 +3,12 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (C) 2003-2005 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (C) 2003-2006 Silicon Graphics, Inc.  All Rights Reserved.
  */
 
 #include <linux/types.h>
 #include <linux/interrupt.h>
+#include <linux/slab.h>
 #include <linux/pci.h>
 #include <asm/sn/sn_sal.h>
 #include <asm/sn/addrs.h>
@@ -15,7 +16,6 @@
 #include <asm/sn/pcidev.h>
 #include <asm/sn/pcibus_provider_defs.h>
 #include <asm/sn/tioce_provider.h>
-#include <asm/sn/sn2/sn_hwperf.h>
 
 /*
  * 1/26/2006
@@ -42,7 +42,7 @@
  *	} else
  *		do desired mmr access
  *
- * According to hw, we can use reads instead of writes to the above addres
+ * According to hw, we can use reads instead of writes to the above address
  *
  * Note this WAR can only to be used for accessing internal MMR's in the
  * TIOCE Coretalk Address Range 0x0 - 0x07ff_ffff.  This includes the
@@ -53,7 +53,7 @@
  */
 
 static void inline
-tioce_mmr_war_pre(struct tioce_kernel *kern, void *mmr_addr)
+tioce_mmr_war_pre(struct tioce_kernel *kern, void __iomem *mmr_addr)
 {
 	u64 mmr_base;
 	u64 mmr_offset;
@@ -62,7 +62,7 @@ tioce_mmr_war_pre(struct tioce_kernel *kern, void *mmr_addr)
 		return;
 
 	mmr_base = kern->ce_common->ce_pcibus.bs_base;
-	mmr_offset = (u64)mmr_addr - mmr_base;
+	mmr_offset = (unsigned long)mmr_addr - mmr_base;
 
 	if (mmr_offset < 0x45000) {
 		u64 mmr_war_offset;
@@ -74,12 +74,12 @@ tioce_mmr_war_pre(struct tioce_kernel *kern, void *mmr_addr)
 		else
 			mmr_war_offset = 0x158;
 
-		readq_relaxed((void *)(mmr_base + mmr_war_offset));
+		readq_relaxed((void __iomem *)(mmr_base + mmr_war_offset));
 	}
 }
 
 static void inline
-tioce_mmr_war_post(struct tioce_kernel *kern, void *mmr_addr)
+tioce_mmr_war_post(struct tioce_kernel *kern, void __iomem *mmr_addr)
 {
 	u64 mmr_base;
 	u64 mmr_offset;
@@ -88,12 +88,12 @@ tioce_mmr_war_post(struct tioce_kernel *kern, void *mmr_addr)
 		return;
 
 	mmr_base = kern->ce_common->ce_pcibus.bs_base;
-	mmr_offset = (u64)mmr_addr - mmr_base;
+	mmr_offset = (unsigned long)mmr_addr - mmr_base;
 
 	if (mmr_offset < 0x45000) {
 		if (mmr_offset == 0x100)
-			readq_relaxed((void *)(mmr_base + 0x38));
-		readq_relaxed((void *)(mmr_base + 0xb050));
+			readq_relaxed((void __iomem *)(mmr_base + 0x38));
+		readq_relaxed((void __iomem *)(mmr_base + 0xb050));
 	}
 }
 
@@ -170,7 +170,8 @@ tioce_mmr_war_post(struct tioce_kernel *kern, void *mmr_addr)
 	(ATE_PAGE((start)+(len)-1, pagesize) - ATE_PAGE(start, pagesize) + 1)
 
 #define ATE_VALID(ate)	((ate) & (1UL << 63))
-#define ATE_MAKE(addr, ps) (((addr) & ~ATE_PAGEMASK(ps)) | (1UL << 63))
+#define ATE_MAKE(addr, ps, msi) \
+	(((addr) & ~ATE_PAGEMASK(ps)) | (1UL << 63) | ((msi)?(1UL << 62):0))
 
 /*
  * Flavors of ate-based mapping supported by tioce_alloc_map()
@@ -196,15 +197,17 @@ tioce_mmr_war_post(struct tioce_kernel *kern, void *mmr_addr)
  *
  * 63    - must be 1 to indicate d64 mode to CE hardware
  * 62    - barrier bit ... controlled with tioce_dma_barrier()
- * 61    - 0 since this is not an MSI transaction
+ * 61    - msi bit ... specified through dma_flags
  * 60:54 - reserved, MBZ
  */
 static u64
-tioce_dma_d64(unsigned long ct_addr)
+tioce_dma_d64(unsigned long ct_addr, int dma_flags)
 {
 	u64 bus_addr;
 
 	bus_addr = ct_addr | (1UL << 63);
+	if (dma_flags & SN_DMA_MSI)
+		bus_addr |= (1UL << 61);
 
 	return bus_addr;
 }
@@ -220,7 +223,7 @@ tioce_dma_d64(unsigned long ct_addr)
  * @pci_dev.
  */
 static inline void
-pcidev_to_tioce(struct pci_dev *pdev, struct tioce **base,
+pcidev_to_tioce(struct pci_dev *pdev, struct tioce __iomem **base,
 		struct tioce_kernel **kernel, int *port)
 {
 	struct pcidev_info *pcidev_info;
@@ -232,7 +235,7 @@ pcidev_to_tioce(struct pci_dev *pdev, struct tioce **base,
 	ce_kernel = (struct tioce_kernel *)ce_common->ce_kernel_private;
 
 	if (base)
-		*base = (struct tioce *)ce_common->ce_pcibus.bs_base;
+		*base = (struct tioce __iomem *)ce_common->ce_pcibus.bs_base;
 	if (kernel)
 		*kernel = ce_kernel;
 
@@ -254,14 +257,14 @@ pcidev_to_tioce(struct pci_dev *pdev, struct tioce **base,
  * @ct_addr: the coretalk address to map
  * @len: number of bytes to map
  *
- * Given the addressing type, set up various paramaters that define the
+ * Given the addressing type, set up various parameters that define the
  * ATE pool to use.  Search for a contiguous block of entries to cover the
- * length, and if enough resources exist, fill in the ATE's and construct a
+ * length, and if enough resources exist, fill in the ATEs and construct a
  * tioce_dmamap struct to track the mapping.
  */
 static u64
 tioce_alloc_map(struct tioce_kernel *ce_kern, int type, int port,
-		u64 ct_addr, int len)
+		u64 ct_addr, int len, int dma_flags)
 {
 	int i;
 	int j;
@@ -270,14 +273,15 @@ tioce_alloc_map(struct tioce_kernel *ce_kern, int type, int port,
 	int entries;
 	int nates;
 	u64 pagesize;
+	int msi_capable, msi_wanted;
 	u64 *ate_shadow;
-	u64 *ate_reg;
+	u64 __iomem *ate_reg;
 	u64 addr;
-	struct tioce *ce_mmr;
+	struct tioce __iomem *ce_mmr;
 	u64 bus_base;
 	struct tioce_dmamap *map;
 
-	ce_mmr = (struct tioce *)ce_kern->ce_common->ce_pcibus.bs_base;
+	ce_mmr = (struct tioce __iomem *)ce_kern->ce_common->ce_pcibus.bs_base;
 
 	switch (type) {
 	case TIOCE_ATE_M32:
@@ -291,6 +295,7 @@ tioce_alloc_map(struct tioce_kernel *ce_kern, int type, int port,
 		ate_reg = ce_mmr->ce_ure_ate3240;
 		pagesize = ce_kern->ce_ate3240_pagesize;
 		bus_base = TIOCE_M32_MIN;
+		msi_capable = 1;
 		break;
 	case TIOCE_ATE_M40:
 		first = 0;
@@ -299,6 +304,7 @@ tioce_alloc_map(struct tioce_kernel *ce_kern, int type, int port,
 		ate_reg = ce_mmr->ce_ure_ate40;
 		pagesize = MB(64);
 		bus_base = TIOCE_M40_MIN;
+		msi_capable = 0;
 		break;
 	case TIOCE_ATE_M40S:
 		/*
@@ -311,11 +317,16 @@ tioce_alloc_map(struct tioce_kernel *ce_kern, int type, int port,
 		ate_reg = ce_mmr->ce_ure_ate3240;
 		pagesize = GB(16);
 		bus_base = TIOCE_M40S_MIN;
+		msi_capable = 0;
 		break;
 	default:
 		return 0;
 	}
 
+	msi_wanted = dma_flags & SN_DMA_MSI;
+	if (msi_wanted && !msi_capable)
+		return 0;
+
 	nates = ATE_NPAGES(ct_addr, len, pagesize);
 	if (nates > entries)
 		return 0;
@@ -344,7 +355,7 @@ tioce_alloc_map(struct tioce_kernel *ce_kern, int type, int port,
 	for (j = 0; j < nates; j++) {
 		u64 ate;
 
-		ate = ATE_MAKE(addr, pagesize);
+		ate = ATE_MAKE(addr, pagesize, msi_wanted);
 		ate_shadow[i + j] = ate;
 		tioce_mmr_storei(ce_kern, &ate_reg[i + j], ate);
 		addr += pagesize;
@@ -371,16 +382,19 @@ tioce_alloc_map(struct tioce_kernel *ce_kern, int type, int port,
  * Map @paddr into 32-bit bus space of the CE associated with @pcidev_info.
  */
 static u64
-tioce_dma_d32(struct pci_dev *pdev, u64 ct_addr)
+tioce_dma_d32(struct pci_dev *pdev, u64 ct_addr, int dma_flags)
 {
 	int dma_ok;
 	int port;
-	struct tioce *ce_mmr;
+	struct tioce __iomem *ce_mmr;
 	struct tioce_kernel *ce_kern;
 	u64 ct_upper;
 	u64 ct_lower;
 	dma_addr_t bus_addr;
 
+	if (dma_flags & SN_DMA_MSI)
+		return 0;
+
 	ct_upper = ct_addr & ~0x3fffffffUL;
 	ct_lower = ct_addr & 0x3fffffffUL;
 
@@ -447,7 +461,7 @@ tioce_dma_unmap(struct pci_dev *pdev, dma_addr_t bus_addr, int dir)
 	int i;
 	int port;
 	struct tioce_kernel *ce_kern;
-	struct tioce *ce_mmr;
+	struct tioce __iomem *ce_mmr;
 	unsigned long flags;
 
 	bus_addr = tioce_dma_barrier(bus_addr, 0);
@@ -480,8 +494,8 @@ tioce_dma_unmap(struct pci_dev *pdev, dma_addr_t bus_addr, int dir)
 
 		if (&map->ce_dmamap_list == &ce_kern->ce_dmamap_list) {
 			printk(KERN_WARNING
-			       "%s:  %s - no map found for bus_addr 0x%lx\n",
-			       __FUNCTION__, pci_name(pdev), bus_addr);
+			       "%s:  %s - no map found for bus_addr 0x%llx\n",
+			       __func__, pci_name(pdev), bus_addr);
 		} else if (--map->refcnt == 0) {
 			for (i = 0; i < map->ate_count; i++) {
 				map->ate_shadow[i] = 0;
@@ -507,7 +521,7 @@ tioce_dma_unmap(struct pci_dev *pdev, dma_addr_t bus_addr, int dir)
  */
 static u64
 tioce_do_dma_map(struct pci_dev *pdev, u64 paddr, size_t byte_count,
-		 int barrier)
+		 int barrier, int dma_flags)
 {
 	unsigned long flags;
 	u64 ct_addr;
@@ -523,15 +537,18 @@ tioce_do_dma_map(struct pci_dev *pdev, u64 paddr, size_t byte_count,
 	if (dma_mask < 0x7fffffffUL)
 		return 0;
 
-	ct_addr = PHYS_TO_TIODMA(paddr);
+	if (SN_DMA_ADDRTYPE(dma_flags) == SN_DMA_ADDR_PHYS)
+		ct_addr = PHYS_TO_TIODMA(paddr);
+	else
+		ct_addr = paddr;
 
 	/*
 	 * If the device can generate 64 bit addresses, create a D64 map.
-	 * Since this should never fail, bypass the rest of the checks.
 	 */
 	if (dma_mask == ~0UL) {
-		mapaddr = tioce_dma_d64(ct_addr);
-		goto dma_map_done;
+		mapaddr = tioce_dma_d64(ct_addr, dma_flags);
+		if (mapaddr)
+			goto dma_map_done;
 	}
 
 	pcidev_to_tioce(pdev, NULL, &ce_kern, &port);
@@ -565,8 +582,8 @@ tioce_do_dma_map(struct pci_dev *pdev, u64 paddr, size_t byte_count,
 	 */
 	if (!mapaddr && !barrier && dma_mask >= 0xffffffffffUL) {
 		/*
-		 * We have two options for 40-bit mappings:  16GB "super" ATE's
-		 * and 64MB "regular" ATE's.  We'll try both if needed for a
+		 * We have two options for 40-bit mappings:  16GB "super" ATEs
+		 * and 64MB "regular" ATEs.  We'll try both if needed for a
 		 * given mapping but which one we try first depends on the
 		 * size.  For requests >64MB, prefer to use a super page with
 		 * regular as the fallback. Otherwise, try in the reverse order.
@@ -574,18 +591,22 @@ tioce_do_dma_map(struct pci_dev *pdev, u64 paddr, size_t byte_count,
 
 		if (byte_count > MB(64)) {
 			mapaddr = tioce_alloc_map(ce_kern, TIOCE_ATE_M40S,
-						  port, ct_addr, byte_count);
+						  port, ct_addr, byte_count,
+						  dma_flags);
 			if (!mapaddr)
 				mapaddr =
 				    tioce_alloc_map(ce_kern, TIOCE_ATE_M40, -1,
-						    ct_addr, byte_count);
+						    ct_addr, byte_count,
+						    dma_flags);
 		} else {
 			mapaddr = tioce_alloc_map(ce_kern, TIOCE_ATE_M40, -1,
-						  ct_addr, byte_count);
+						  ct_addr, byte_count,
+						  dma_flags);
 			if (!mapaddr)
 				mapaddr =
 				    tioce_alloc_map(ce_kern, TIOCE_ATE_M40S,
-						    port, ct_addr, byte_count);
+						    port, ct_addr, byte_count,
+						    dma_flags);
 		}
 	}
 
@@ -593,7 +614,7 @@ tioce_do_dma_map(struct pci_dev *pdev, u64 paddr, size_t byte_count,
 	 * 32-bit direct is the next mode to try
 	 */
 	if (!mapaddr && dma_mask >= 0xffffffffUL)
-		mapaddr = tioce_dma_d32(pdev, ct_addr);
+		mapaddr = tioce_dma_d32(pdev, ct_addr, dma_flags);
 
 	/*
 	 * Last resort, try 32-bit ATE-based map.
@@ -601,7 +622,7 @@ tioce_do_dma_map(struct pci_dev *pdev, u64 paddr, size_t byte_count,
 	if (!mapaddr)
 		mapaddr =
 		    tioce_alloc_map(ce_kern, TIOCE_ATE_M32, -1, ct_addr,
-				    byte_count);
+				    byte_count, dma_flags);
 
 	spin_unlock_irqrestore(&ce_kern->ce_lock, flags);
 
@@ -622,9 +643,9 @@ dma_map_done:
  * in the address.
  */
 static u64
-tioce_dma(struct pci_dev *pdev, u64 paddr, size_t byte_count)
+tioce_dma(struct pci_dev *pdev, unsigned long  paddr, size_t byte_count, int dma_flags)
 {
-	return tioce_do_dma_map(pdev, paddr, byte_count, 0);
+	return tioce_do_dma_map(pdev, paddr, byte_count, 0, dma_flags);
 }
 
 /**
@@ -635,22 +656,23 @@ tioce_dma(struct pci_dev *pdev, u64 paddr, size_t byte_count)
  *
  * Simply call tioce_do_dma_map() to create a map with the barrier bit set
  * in the address.
- */ static u64
-tioce_dma_consistent(struct pci_dev *pdev, u64 paddr, size_t byte_count)
+ */
+static u64
+tioce_dma_consistent(struct pci_dev *pdev, unsigned long  paddr, size_t byte_count, int dma_flags)
 {
-	return tioce_do_dma_map(pdev, paddr, byte_count, 1);
+	return tioce_do_dma_map(pdev, paddr, byte_count, 1, dma_flags);
 }
 
 /**
  * tioce_error_intr_handler - SGI TIO CE error interrupt handler
  * @irq: unused
  * @arg: pointer to tioce_common struct for the given CE
- * @pt: unused
  *
  * Handle a CE error interrupt.  Simply a wrapper around a SAL call which
  * defers processing to the SGI prom.
- */ static irqreturn_t
-tioce_error_intr_handler(int irq, void *arg, struct pt_regs *pt)
+ */
+static irqreturn_t
+tioce_error_intr_handler(int irq, void *arg)
 {
 	struct tioce_common *soft = arg;
 	struct ia64_sal_retval ret_stuff;
@@ -668,8 +690,8 @@ tioce_error_intr_handler(int irq, void *arg, struct pt_regs *pt)
 }
 
 /**
- * tioce_reserve_m32 - reserve M32 ate's for the indicated address range
- * @tioce_kernel: TIOCE context to reserve ate's for
+ * tioce_reserve_m32 - reserve M32 ATEs for the indicated address range
+ * @tioce_kernel: TIOCE context to reserve ATEs for
  * @base: starting bus address to reserve
  * @limit: last bus address to reserve
  *
@@ -680,9 +702,9 @@ static void
 tioce_reserve_m32(struct tioce_kernel *ce_kern, u64 base, u64 limit)
 {
 	int ate_index, last_ate, ps;
-	struct tioce *ce_mmr;
+	struct tioce __iomem *ce_mmr;
 
-	ce_mmr = (struct tioce *)ce_kern->ce_common->ce_pcibus.bs_base;
+	ce_mmr = (struct tioce __iomem *)ce_kern->ce_common->ce_pcibus.bs_base;
 	ps = ce_kern->ce_ate3240_pagesize;
 	ate_index = ATE_PAGE(base, ps);
 	last_ate = ate_index + ATE_NPAGES(base, limit-base+1, ps) - 1;
@@ -696,7 +718,7 @@ tioce_reserve_m32(struct tioce_kernel *ce_kern, u64 base, u64 limit)
 	while (ate_index <= last_ate) {
 		u64 ate;
 
-		ate = ATE_MAKE(0xdeadbeef, ps);
+		ate = ATE_MAKE(0xdeadbeef, ps, 0);
 		ce_kern->ce_ate3240_shadow[ate_index] = ate;
 		tioce_mmr_storei(ce_kern, &ce_mmr->ce_ure_ate3240[ate_index],
 				 ate);
@@ -716,7 +738,7 @@ tioce_kern_init(struct tioce_common *tioce_common)
 	int dev;
 	u32 tmp;
 	unsigned int seg, bus;
-	struct tioce *tioce_mmr;
+	struct tioce __iomem *tioce_mmr;
 	struct tioce_kernel *tioce_kern;
 
 	tioce_kern = kzalloc(sizeof(struct tioce_kernel), GFP_KERNEL);
@@ -733,21 +755,21 @@ tioce_kern_init(struct tioce_common *tioce_common)
 	 * Determine the secondary bus number of the port2 logical PPB.
 	 * This is used to decide whether a given pci device resides on
 	 * port1 or port2.  Note:  We don't have enough plumbing set up
-	 * here to use pci_read_config_xxx() so use the raw_pci_ops vector.
+	 * here to use pci_read_config_xxx() so use raw_pci_read().
 	 */
 
 	seg = tioce_common->ce_pcibus.bs_persist_segment;
 	bus = tioce_common->ce_pcibus.bs_persist_busnum;
 
-	raw_pci_ops->read(seg, bus, PCI_DEVFN(2, 0), PCI_SECONDARY_BUS, 1,&tmp);
+	raw_pci_read(seg, bus, PCI_DEVFN(2, 0), PCI_SECONDARY_BUS, 1,&tmp);
 	tioce_kern->ce_port1_secondary = (u8) tmp;
 
 	/*
 	 * Set PMU pagesize to the largest size available, and zero out
-	 * the ate's.
+	 * the ATEs.
 	 */
 
-	tioce_mmr = (struct tioce *)tioce_common->ce_pcibus.bs_base;
+	tioce_mmr = (struct tioce __iomem *)tioce_common->ce_pcibus.bs_base;
 	tioce_mmr_clri(tioce_kern, &tioce_mmr->ce_ure_page_map,
 		       CE_URE_PAGESIZE_MASK);
 	tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_ure_page_map,
@@ -765,7 +787,7 @@ tioce_kern_init(struct tioce_common *tioce_common)
 	}
 
 	/*
-	 * Reserve ATE's corresponding to reserved address ranges.  These
+	 * Reserve ATEs corresponding to reserved address ranges.  These
 	 * include:
 	 *
 	 *	Memory space covered by each PPB mem base/limit register
@@ -780,11 +802,11 @@ tioce_kern_init(struct tioce_common *tioce_common)
 
 		/* mem base/limit */
 
-		raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0),
+		raw_pci_read(seg, bus, PCI_DEVFN(dev, 0),
 				  PCI_MEMORY_BASE, 2, &tmp);
 		base = (u64)tmp << 16;
 
-		raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0),
+		raw_pci_read(seg, bus, PCI_DEVFN(dev, 0),
 				  PCI_MEMORY_LIMIT, 2, &tmp);
 		limit = (u64)tmp << 16;
 		limit |= 0xfffffUL;
@@ -798,21 +820,21 @@ tioce_kern_init(struct tioce_common *tioce_common)
 		 * attributes.
 		 */
 
-		raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0),
+		raw_pci_read(seg, bus, PCI_DEVFN(dev, 0),
 				  PCI_PREF_MEMORY_BASE, 2, &tmp);
 		base = ((u64)tmp & PCI_PREF_RANGE_MASK) << 16;
 
-		raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0),
+		raw_pci_read(seg, bus, PCI_DEVFN(dev, 0),
 				  PCI_PREF_BASE_UPPER32, 4, &tmp);
 		base |= (u64)tmp << 32;
 
-		raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0),
+		raw_pci_read(seg, bus, PCI_DEVFN(dev, 0),
 				  PCI_PREF_MEMORY_LIMIT, 2, &tmp);
 
 		limit = ((u64)tmp & PCI_PREF_RANGE_MASK) << 16;
 		limit |= 0xfffffUL;
 
-		raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0),
+		raw_pci_read(seg, bus, PCI_DEVFN(dev, 0),
 				  PCI_PREF_LIMIT_UPPER32, 4, &tmp);
 		limit |= (u64)tmp << 32;
 
@@ -838,7 +860,7 @@ tioce_force_interrupt(struct sn_irq_info *sn_irq_info)
 	struct pcidev_info *pcidev_info;
 	struct tioce_common *ce_common;
 	struct tioce_kernel *ce_kern;
-	struct tioce *ce_mmr;
+	struct tioce __iomem *ce_mmr;
 	u64 force_int_val;
 
 	if (!sn_irq_info->irq_bridge)
@@ -852,7 +874,7 @@ tioce_force_interrupt(struct sn_irq_info *sn_irq_info)
 		return;
 
 	ce_common = (struct tioce_common *)pcidev_info->pdi_pcibus_info;
-	ce_mmr = (struct tioce *)ce_common->ce_pcibus.bs_base;
+	ce_mmr = (struct tioce __iomem *)ce_common->ce_pcibus.bs_base;
 	ce_kern = (struct tioce_kernel *)ce_common->ce_kernel_private;
 
 	/*
@@ -933,7 +955,7 @@ tioce_target_interrupt(struct sn_irq_info *sn_irq_info)
 	struct pcidev_info *pcidev_info;
 	struct tioce_common *ce_common;
 	struct tioce_kernel *ce_kern;
-	struct tioce *ce_mmr;
+	struct tioce __iomem *ce_mmr;
 	int bit;
 	u64 vector;
 
@@ -942,7 +964,7 @@ tioce_target_interrupt(struct sn_irq_info *sn_irq_info)
 		return;
 
 	ce_common = (struct tioce_common *)pcidev_info->pdi_pcibus_info;
-	ce_mmr = (struct tioce *)ce_common->ce_pcibus.bs_base;
+	ce_mmr = (struct tioce __iomem *)ce_common->ce_pcibus.bs_base;
 	ce_kern = (struct tioce_kernel *)ce_common->ce_kernel_private;
 
 	bit = sn_irq_info->irq_int_bit;
@@ -970,11 +992,9 @@ tioce_target_interrupt(struct sn_irq_info *sn_irq_info)
 static void *
 tioce_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *controller)
 {
-	int my_nasid;
-	cnodeid_t my_cnode, mem_cnode;
 	struct tioce_common *tioce_common;
 	struct tioce_kernel *tioce_kern;
-	struct tioce *tioce_mmr;
+	struct tioce __iomem *tioce_mmr;
 
 	/*
 	 * Allocate kernel bus soft and copy from prom.
@@ -985,7 +1005,9 @@ tioce_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *cont
 		return NULL;
 
 	memcpy(tioce_common, prom_bussoft, sizeof(struct tioce_common));
-	tioce_common->ce_pcibus.bs_base |= __IA64_UNCACHED_OFFSET;
+	tioce_common->ce_pcibus.bs_base = (unsigned long)
+		ioremap(REGION_OFFSET(tioce_common->ce_pcibus.bs_base),
+			sizeof(struct tioce_common));
 
 	tioce_kern = tioce_kern_init(tioce_common);
 	if (tioce_kern == NULL) {
@@ -998,38 +1020,25 @@ tioce_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *cont
 	 * interrupt handler.
 	 */
 
-	tioce_mmr = (struct tioce *)tioce_common->ce_pcibus.bs_base;
+	tioce_mmr = (struct tioce __iomem *)tioce_common->ce_pcibus.bs_base;
 	tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_adm_int_status_alias, ~0ULL);
 	tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_adm_error_summary_alias,
 		       ~0ULL);
-	tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_dre_comp_err_addr, ~0ULL);
+	tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_dre_comp_err_addr, 0ULL);
 
 	if (request_irq(SGI_PCIASIC_ERROR,
 			tioce_error_intr_handler,
-			SA_SHIRQ, "TIOCE error", (void *)tioce_common))
+			IRQF_SHARED, "TIOCE error", (void *)tioce_common))
 		printk(KERN_WARNING
 		       "%s:  Unable to get irq %d.  "
 		       "Error interrupts won't be routed for "
 		       "TIOCE bus %04x:%02x\n",
-		       __FUNCTION__, SGI_PCIASIC_ERROR,
+		       __func__, SGI_PCIASIC_ERROR,
 		       tioce_common->ce_pcibus.bs_persist_segment,
 		       tioce_common->ce_pcibus.bs_persist_busnum);
 
-	/*
-	 * identify closest nasid for memory allocations
-	 */
-
-	my_nasid = NASID_GET(tioce_common->ce_pcibus.bs_base);
-	my_cnode = nasid_to_cnodeid(my_nasid);
-
-	if (sn_hwperf_get_nearest_node(my_cnode, &mem_cnode, NULL) < 0) {
-		printk(KERN_WARNING "tioce_bus_fixup: failed to find "
-		       "closest node with MEM to TIO node %d\n", my_cnode);
-		mem_cnode = (cnodeid_t)-1; /* use any node */
-	}
-
-	controller->node = mem_cnode;
-
+	irq_set_handler(SGI_PCIASIC_ERROR, handle_level_irq);
+	sn_set_err_irq_affinity(SGI_PCIASIC_ERROR);
 	return tioce_common;
 }
 
diff --git a/arch/ia64/uv/Makefile b/arch/ia64/uv/Makefile
new file mode 100644
index 00000000000..aa9f91947c4
--- /dev/null
+++ b/arch/ia64/uv/Makefile
@@ -0,0 +1,12 @@
+# arch/ia64/uv/Makefile
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+#
+# Makefile for the sn uv subplatform
+#
+
+obj-y += kernel/
diff --git a/arch/ia64/uv/kernel/Makefile b/arch/ia64/uv/kernel/Makefile
new file mode 100644
index 00000000000..124e441d383
--- /dev/null
+++ b/arch/ia64/uv/kernel/Makefile
@@ -0,0 +1,13 @@
+# arch/ia64/uv/kernel/Makefile
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+#
+
+ccflags-y := -Iarch/ia64/sn/include
+
+obj-y				+= setup.o
+obj-$(CONFIG_IA64_GENERIC)      += machvec.o
diff --git a/arch/ia64/uv/kernel/machvec.c b/arch/ia64/uv/kernel/machvec.c
new file mode 100644
index 00000000000..50737a9dca7
--- /dev/null
+++ b/arch/ia64/uv/kernel/machvec.c
@@ -0,0 +1,11 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+#define MACHVEC_PLATFORM_NAME	uv
+#define MACHVEC_PLATFORM_HEADER	<asm/machvec_uv.h>
+#include <asm/machvec_init.h>
diff --git a/arch/ia64/uv/kernel/setup.c b/arch/ia64/uv/kernel/setup.c
new file mode 100644
index 00000000000..f1490657baf
--- /dev/null
+++ b/arch/ia64/uv/kernel/setup.c
@@ -0,0 +1,116 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * SGI UV Core Functions
+ *
+ * Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <asm/sn/simulator.h>
+#include <asm/uv/uv_mmrs.h>
+#include <asm/uv/uv_hub.h>
+
+DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
+EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info);
+
+#ifdef CONFIG_IA64_SGI_UV
+int sn_prom_type;
+long sn_partition_id;
+EXPORT_SYMBOL(sn_partition_id);
+long sn_coherency_id;
+EXPORT_SYMBOL_GPL(sn_coherency_id);
+long sn_region_size;
+EXPORT_SYMBOL(sn_region_size);
+#endif
+
+struct redir_addr {
+	unsigned long redirect;
+	unsigned long alias;
+};
+
+#define DEST_SHIFT UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT
+
+static __initdata struct redir_addr redir_addrs[] = {
+	{UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR, UVH_SI_ALIAS0_OVERLAY_CONFIG},
+	{UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR, UVH_SI_ALIAS1_OVERLAY_CONFIG},
+	{UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR, UVH_SI_ALIAS2_OVERLAY_CONFIG},
+};
+
+static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size)
+{
+	union uvh_si_alias0_overlay_config_u alias;
+	union uvh_rh_gam_alias210_redirect_config_2_mmr_u redirect;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(redir_addrs); i++) {
+		alias.v = uv_read_local_mmr(redir_addrs[i].alias);
+		if (alias.s.base == 0) {
+			*size = (1UL << alias.s.m_alias);
+			redirect.v = uv_read_local_mmr(redir_addrs[i].redirect);
+			*base = (unsigned long)redirect.s.dest_base << DEST_SHIFT;
+			return;
+		}
+	}
+	BUG();
+}
+
+void __init uv_setup(char **cmdline_p)
+{
+	union uvh_si_addr_map_config_u m_n_config;
+	union uvh_node_id_u node_id;
+	unsigned long gnode_upper;
+	int nid, cpu, m_val, n_val;
+	unsigned long mmr_base, lowmem_redir_base, lowmem_redir_size;
+
+	if (IS_MEDUSA()) {
+		lowmem_redir_base = 0;
+		lowmem_redir_size = 0;
+		node_id.v = 0;
+		m_n_config.s.m_skt = 37;
+		m_n_config.s.n_skt = 0;
+		mmr_base = 0;
+#if 0
+		/* Need BIOS calls - TDB */
+		if (!ia64_sn_is_fake_prom())
+			sn_prom_type = 1;
+		else
+#endif
+			sn_prom_type = 2;
+		printk(KERN_INFO "Running on medusa with %s PROM\n",
+					(sn_prom_type == 1) ? "real" : "fake");
+	} else {
+		get_lowmem_redirect(&lowmem_redir_base, &lowmem_redir_size);
+		node_id.v = uv_read_local_mmr(UVH_NODE_ID);
+		m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG);
+		mmr_base =
+			uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) &
+				~UV_MMR_ENABLE;
+	}
+
+	m_val = m_n_config.s.m_skt;
+	n_val = m_n_config.s.n_skt;
+	printk(KERN_DEBUG "UV: global MMR base 0x%lx\n", mmr_base);
+
+	gnode_upper = (((unsigned long)node_id.s.node_id) &
+		       ~((1 << n_val) - 1)) << m_val;
+
+	for_each_present_cpu(cpu) {
+		nid = cpu_to_node(cpu);
+		uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base;
+		uv_cpu_hub_info(cpu)->lowmem_remap_top =
+			lowmem_redir_base + lowmem_redir_size;
+		uv_cpu_hub_info(cpu)->m_val = m_val;
+		uv_cpu_hub_info(cpu)->n_val = n_val;
+		uv_cpu_hub_info(cpu)->pnode_mask = (1 << n_val) -1;
+		uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1;
+		uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper;
+		uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base;
+		uv_cpu_hub_info(cpu)->coherency_domain_number = 0;/* ZZZ */
+		printk(KERN_DEBUG "UV cpu %d, nid %d\n", cpu, nid);
+	}
+}
+