40 files changed, 18733 insertions, 2157 deletions
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 6bea6962f8e..d260605e6d5 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -13,6 +13,20 @@ menuconfig IOMMU_SUPPORT
 
 if IOMMU_SUPPORT
 
+config OF_IOMMU
+       def_bool y
+       depends on OF
+
+config FSL_PAMU
+	bool "Freescale IOMMU support"
+	depends on PPC_E500MC
+	select IOMMU_API
+	select GENERIC_ALLOCATOR
+	help
+	  Freescale PAMU support. PAMU is the IOMMU present on Freescale QorIQ platforms.
+	  PAMU can authorize memory access, remap the memory address, and remap I/O
+	  transaction types.
+
 # MSM IOMMU support
 config MSM_IOMMU
 	bool "MSM IOMMU Support"
@@ -43,7 +57,7 @@ config AMD_IOMMU
 	  With this option you can enable support for AMD IOMMU hardware in
 	  your system. An IOMMU is a hardware component which provides
 	  remapping of DMA memory accesses from devices. With an AMD IOMMU you
-	  can isolate the the DMA memory of different devices and protect the
+	  can isolate the DMA memory of different devices and protect the
 	  system from misbehaving device drivers or hardware.
 
 	  You can find out if your system has an AMD IOMMU if you look into
@@ -61,13 +75,13 @@ config AMD_IOMMU_STATS
 	  If unsure, say N.
 
 config AMD_IOMMU_V2
-	tristate "AMD IOMMU Version 2 driver (EXPERIMENTAL)"
-	depends on AMD_IOMMU && PROFILING && EXPERIMENTAL
+	tristate "AMD IOMMU Version 2 driver"
+	depends on AMD_IOMMU && PROFILING
 	select MMU_NOTIFIER
 	---help---
 	  This option enables support for the AMD IOMMUv2 features of the IOMMU
 	  hardware. Select this option if you want to use devices that support
-	  the the PCI PRI and PASID interface.
+	  the PCI PRI and PASID interface.
 
 # Intel IOMMU support
 config DMAR_TABLE
@@ -115,8 +129,8 @@ config INTEL_IOMMU_FLOPPY_WA
 	  16MiB to make floppy (an ISA device) work.
 
 config IRQ_REMAP
-	bool "Support for Interrupt Remapping (EXPERIMENTAL)"
-	depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI && EXPERIMENTAL
+	bool "Support for Interrupt Remapping"
+	depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI
 	select DMAR_TABLE
 	---help---
 	  Supports Interrupt remapping for IO-APIC and MSI devices.
@@ -126,7 +140,7 @@ config IRQ_REMAP
 # OMAP IOMMU support
 config OMAP_IOMMU
 	bool "OMAP IOMMU Support"
-	depends on ARCH_OMAP
+	depends on ARCH_OMAP2PLUS
 	select IOMMU_API
 
 config OMAP_IOVMM
@@ -142,4 +156,153 @@ config OMAP_IOMMU_DEBUG
 
          Say N unless you know you need this.
 
+config TEGRA_IOMMU_GART
+	bool "Tegra GART IOMMU Support"
+	depends on ARCH_TEGRA_2x_SOC
+	select IOMMU_API
+	help
+	  Enables support for remapping discontiguous physical memory
+	  shared with the operating system into contiguous I/O virtual
+	  space through the GART (Graphics Address Relocation Table)
+	  hardware included on Tegra SoCs.
+
+config TEGRA_IOMMU_SMMU
+	bool "Tegra SMMU IOMMU Support"
+	depends on ARCH_TEGRA && TEGRA_AHB
+	select IOMMU_API
+	help
+	  Enables support for remapping discontiguous physical memory
+	  shared with the operating system into contiguous I/O virtual
+	  space through the SMMU (System Memory Management Unit)
+	  hardware included on Tegra SoCs.
+
+config EXYNOS_IOMMU
+	bool "Exynos IOMMU Support"
+	depends on ARCH_EXYNOS
+	select IOMMU_API
+	help
+	  Support for the IOMMU (System MMU) of Samsung Exynos application
+	  processor family. This enables H/W multimedia accelerators to see
+	  non-linear physical memory chunks as linear memory in their
+	  address space.
+
+	  If unsure, say N here.
+
+config EXYNOS_IOMMU_DEBUG
+	bool "Debugging log for Exynos IOMMU"
+	depends on EXYNOS_IOMMU
+	help
+	  Select this to see the detailed log message that shows what
+	  happens in the IOMMU driver.
+
+	  Say N unless you need kernel log message for IOMMU debugging.
+
+config SHMOBILE_IPMMU
+	bool
+
+config SHMOBILE_IPMMU_TLB
+	bool
+
+config SHMOBILE_IOMMU
+	bool "IOMMU for Renesas IPMMU/IPMMUI"
+	default n
+	depends on ARM
+	depends on ARCH_SHMOBILE || COMPILE_TEST
+	select IOMMU_API
+	select ARM_DMA_USE_IOMMU
+	select SHMOBILE_IPMMU
+	select SHMOBILE_IPMMU_TLB
+	help
+	  Support for Renesas IPMMU/IPMMUI. This option enables
+	  remapping of DMA memory accesses from all of the IP blocks
+	  on the ICB.
+
+	  Warning: Drivers (including userspace drivers of UIO
+	  devices) of the IP blocks on the ICB *must* use addresses
+	  allocated from the IPMMU (iova) for DMA with this option
+	  enabled.
+
+	  If unsure, say N.
+
+choice
+	prompt "IPMMU/IPMMUI address space size"
+	default SHMOBILE_IOMMU_ADDRSIZE_2048MB
+	depends on SHMOBILE_IOMMU
+	help
+	  This option sets IPMMU/IPMMUI address space size by
+	  adjusting the 1st level page table size. The page table size
+	  is calculated as follows:
+
+	      page table size = number of page table entries * 4 bytes
+	      number of page table entries = address space size / 1 MiB
+
+	  For example, when the address space size is 2048 MiB, the
+	  1st level page table size is 8192 bytes.
+
+	config SHMOBILE_IOMMU_ADDRSIZE_2048MB
+		bool "2 GiB"
+
+	config SHMOBILE_IOMMU_ADDRSIZE_1024MB
+		bool "1 GiB"
+
+	config SHMOBILE_IOMMU_ADDRSIZE_512MB
+		bool "512 MiB"
+
+	config SHMOBILE_IOMMU_ADDRSIZE_256MB
+		bool "256 MiB"
+
+	config SHMOBILE_IOMMU_ADDRSIZE_128MB
+		bool "128 MiB"
+
+	config SHMOBILE_IOMMU_ADDRSIZE_64MB
+		bool "64 MiB"
+
+	config SHMOBILE_IOMMU_ADDRSIZE_32MB
+		bool "32 MiB"
+
+endchoice
+
+config SHMOBILE_IOMMU_L1SIZE
+	int
+	default 8192 if SHMOBILE_IOMMU_ADDRSIZE_2048MB
+	default 4096 if SHMOBILE_IOMMU_ADDRSIZE_1024MB
+	default 2048 if SHMOBILE_IOMMU_ADDRSIZE_512MB
+	default 1024 if SHMOBILE_IOMMU_ADDRSIZE_256MB
+	default 512 if SHMOBILE_IOMMU_ADDRSIZE_128MB
+	default 256 if SHMOBILE_IOMMU_ADDRSIZE_64MB
+	default 128 if SHMOBILE_IOMMU_ADDRSIZE_32MB
+
+config IPMMU_VMSA
+	bool "Renesas VMSA-compatible IPMMU"
+	depends on ARM_LPAE
+	depends on ARCH_SHMOBILE || COMPILE_TEST
+	select IOMMU_API
+	select ARM_DMA_USE_IOMMU
+	help
+	  Support for the Renesas VMSA-compatible IPMMU Renesas found in the
+	  R-Mobile APE6 and R-Car H2/M2 SoCs.
+
+	  If unsure, say N.
+
+config SPAPR_TCE_IOMMU
+	bool "sPAPR TCE IOMMU Support"
+	depends on PPC_POWERNV || PPC_PSERIES
+	select IOMMU_API
+	help
+	  Enables bits of IOMMU API required by VFIO. The iommu_ops
+	  is not implemented as it is not necessary for VFIO.
+
+config ARM_SMMU
+	bool "ARM Ltd. System MMU (SMMU) Support"
+	depends on ARM64 || (ARM_LPAE && OF)
+	select IOMMU_API
+	select ARM_DMA_USE_IOMMU if ARM
+	help
+	  Support for implementations of the ARM System MMU architecture
+	  versions 1 and 2. The driver supports both v7l and v8l table
+	  formats with 4k and 64k page sizes.
+
+	  Say Y here if your SoC includes an IOMMU device implementing
+	  the ARM SMMU architecture.
+
 endif # IOMMU_SUPPORT
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index 0e36b4934af..8893bad048e 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -1,10 +1,21 @@
 obj-$(CONFIG_IOMMU_API) += iommu.o
+obj-$(CONFIG_IOMMU_API) += iommu-traces.o
+obj-$(CONFIG_OF_IOMMU)	+= of_iommu.o
 obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o msm_iommu_dev.o
 obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o amd_iommu_init.o
 obj-$(CONFIG_AMD_IOMMU_V2) += amd_iommu_v2.o
+obj-$(CONFIG_ARM_SMMU) += arm-smmu.o
 obj-$(CONFIG_DMAR_TABLE) += dmar.o
 obj-$(CONFIG_INTEL_IOMMU) += iova.o intel-iommu.o
-obj-$(CONFIG_IRQ_REMAP) += intr_remapping.o
+obj-$(CONFIG_IPMMU_VMSA) += ipmmu-vmsa.o
+obj-$(CONFIG_IRQ_REMAP) += intel_irq_remapping.o irq_remapping.o
 obj-$(CONFIG_OMAP_IOMMU) += omap-iommu.o
+obj-$(CONFIG_OMAP_IOMMU) += omap-iommu2.o
 obj-$(CONFIG_OMAP_IOVMM) += omap-iovmm.o
 obj-$(CONFIG_OMAP_IOMMU_DEBUG) += omap-iommu-debug.o
+obj-$(CONFIG_TEGRA_IOMMU_GART) += tegra-gart.o
+obj-$(CONFIG_TEGRA_IOMMU_SMMU) += tegra-smmu.o
+obj-$(CONFIG_EXYNOS_IOMMU) += exynos-iommu.o
+obj-$(CONFIG_SHMOBILE_IOMMU) += shmobile-iommu.o
+obj-$(CONFIG_SHMOBILE_IPMMU) += shmobile-ipmmu.o
+obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index f75e0608be5..4aec6a29e31 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -31,6 +31,12 @@
 #include <linux/amd-iommu.h>
 #include <linux/notifier.h>
 #include <linux/export.h>
+#include <linux/irq.h>
+#include <linux/msi.h>
+#include <asm/irq_remapping.h>
+#include <asm/io_apic.h>
+#include <asm/apic.h>
+#include <asm/hw_irq.h>
 #include <asm/msidef.h>
 #include <asm/proto.h>
 #include <asm/iommu.h>
@@ -39,6 +45,8 @@
 
 #include "amd_iommu_proto.h"
 #include "amd_iommu_types.h"
+#include "irq_remapping.h"
+#include "pci.h"
 
 #define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28))
 
@@ -50,17 +58,9 @@
  * physically contiguous memory regions it is mapping into page sizes
  * that we support.
  *
- * Traditionally the IOMMU core just handed us the mappings directly,
- * after making sure the size is an order of a 4KiB page and that the
- * mapping has natural alignment.
- *
- * To retain this behavior, we currently advertise that we support
- * all page sizes that are an order of 4KiB.
- *
- * If at some point we'd like to utilize the IOMMU core's new behavior,
- * we could change this to advertise the real page sizes we support.
+ * 512GB Pages are not supported due to a hardware bug
  */
-#define AMD_IOMMU_PGSIZES	(~0xFFFUL)
+#define AMD_IOMMU_PGSIZES	((~0xFFFUL) & ~(2ULL << 38))
 
 static DEFINE_RWLOCK(amd_iommu_devtable_lock);
 
@@ -72,6 +72,9 @@ static DEFINE_SPINLOCK(iommu_pd_list_lock);
 static LIST_HEAD(dev_data_list);
 static DEFINE_SPINLOCK(dev_data_list_lock);
 
+LIST_HEAD(ioapic_map);
+LIST_HEAD(hpet_map);
+
 /*
  * Domain for untranslated devices - only allocated
  * if iommu=pt passed on kernel cmd line.
@@ -83,6 +86,8 @@ static struct iommu_ops amd_iommu_ops;
 static ATOMIC_NOTIFIER_HEAD(ppr_notifier);
 int amd_iommu_max_glx_val = -1;
 
+static struct dma_map_ops amd_iommu_dma_ops;
+
 /*
  * general struct to manage commands send to an IOMMU
  */
@@ -90,6 +95,8 @@ struct iommu_cmd {
 	u32 data[4];
 };
 
+struct kmem_cache *amd_iommu_irq_cache;
+
 static void update_domain(struct protection_domain *domain);
 static int __init alloc_passthrough_domain(void);
 
@@ -126,6 +133,9 @@ static void free_dev_data(struct iommu_dev_data *dev_data)
 	list_del(&dev_data->dev_data_list);
 	spin_unlock_irqrestore(&dev_data_list_lock, flags);
 
+	if (dev_data->group)
+		iommu_group_put(dev_data->group);
+
 	kfree(dev_data);
 }
 
@@ -164,7 +174,7 @@ static inline u16 get_device_id(struct device *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
 
-	return calc_devid(pdev->bus->number, pdev->devfn);
+	return PCI_DEVID(pdev->bus->number, pdev->devfn);
 }
 
 static struct iommu_dev_data *get_dev_data(struct device *dev)
@@ -238,8 +248,8 @@ static bool check_device(struct device *dev)
 	if (!dev || !dev->dma_mask)
 		return false;
 
-	/* No device or no PCI device */
-	if (dev->bus != &pci_bus_type)
+	/* No PCI device */
+	if (!dev_is_pci(dev))
 		return false;
 
 	devid = get_device_id(dev);
@@ -254,11 +264,175 @@ static bool check_device(struct device *dev)
 	return true;
 }
 
+static struct pci_bus *find_hosted_bus(struct pci_bus *bus)
+{
+	while (!bus->self) {
+		if (!pci_is_root_bus(bus))
+			bus = bus->parent;
+		else
+			return ERR_PTR(-ENODEV);
+	}
+
+	return bus;
+}
+
+#define REQ_ACS_FLAGS	(PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF)
+
+static struct pci_dev *get_isolation_root(struct pci_dev *pdev)
+{
+	struct pci_dev *dma_pdev = pdev;
+
+	/* Account for quirked devices */
+	swap_pci_ref(&dma_pdev, pci_get_dma_source(dma_pdev));
+
+	/*
+	 * If it's a multifunction device that does not support our
+	 * required ACS flags, add to the same group as lowest numbered
+	 * function that also does not suport the required ACS flags.
+	 */
+	if (dma_pdev->multifunction &&
+	    !pci_acs_enabled(dma_pdev, REQ_ACS_FLAGS)) {
+		u8 i, slot = PCI_SLOT(dma_pdev->devfn);
+
+		for (i = 0; i < 8; i++) {
+			struct pci_dev *tmp;
+
+			tmp = pci_get_slot(dma_pdev->bus, PCI_DEVFN(slot, i));
+			if (!tmp)
+				continue;
+
+			if (!pci_acs_enabled(tmp, REQ_ACS_FLAGS)) {
+				swap_pci_ref(&dma_pdev, tmp);
+				break;
+			}
+			pci_dev_put(tmp);
+		}
+	}
+
+	/*
+	 * Devices on the root bus go through the iommu.  If that's not us,
+	 * find the next upstream device and test ACS up to the root bus.
+	 * Finding the next device may require skipping virtual buses.
+	 */
+	while (!pci_is_root_bus(dma_pdev->bus)) {
+		struct pci_bus *bus = find_hosted_bus(dma_pdev->bus);
+		if (IS_ERR(bus))
+			break;
+
+		if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS))
+			break;
+
+		swap_pci_ref(&dma_pdev, pci_dev_get(bus->self));
+	}
+
+	return dma_pdev;
+}
+
+static int use_pdev_iommu_group(struct pci_dev *pdev, struct device *dev)
+{
+	struct iommu_group *group = iommu_group_get(&pdev->dev);
+	int ret;
+
+	if (!group) {
+		group = iommu_group_alloc();
+		if (IS_ERR(group))
+			return PTR_ERR(group);
+
+		WARN_ON(&pdev->dev != dev);
+	}
+
+	ret = iommu_group_add_device(group, dev);
+	iommu_group_put(group);
+	return ret;
+}
+
+static int use_dev_data_iommu_group(struct iommu_dev_data *dev_data,
+				    struct device *dev)
+{
+	if (!dev_data->group) {
+		struct iommu_group *group = iommu_group_alloc();
+		if (IS_ERR(group))
+			return PTR_ERR(group);
+
+		dev_data->group = group;
+	}
+
+	return iommu_group_add_device(dev_data->group, dev);
+}
+
+static int init_iommu_group(struct device *dev)
+{
+	struct iommu_dev_data *dev_data;
+	struct iommu_group *group;
+	struct pci_dev *dma_pdev;
+	int ret;
+
+	group = iommu_group_get(dev);
+	if (group) {
+		iommu_group_put(group);
+		return 0;
+	}
+
+	dev_data = find_dev_data(get_device_id(dev));
+	if (!dev_data)
+		return -ENOMEM;
+
+	if (dev_data->alias_data) {
+		u16 alias;
+		struct pci_bus *bus;
+
+		if (dev_data->alias_data->group)
+			goto use_group;
+
+		/*
+		 * If the alias device exists, it's effectively just a first
+		 * level quirk for finding the DMA source.
+		 */
+		alias = amd_iommu_alias_table[dev_data->devid];
+		dma_pdev = pci_get_bus_and_slot(alias >> 8, alias & 0xff);
+		if (dma_pdev) {
+			dma_pdev = get_isolation_root(dma_pdev);
+			goto use_pdev;
+		}
+
+		/*
+		 * If the alias is virtual, try to find a parent device
+		 * and test whether the IOMMU group is actualy rooted above
+		 * the alias.  Be careful to also test the parent device if
+		 * we think the alias is the root of the group.
+		 */
+		bus = pci_find_bus(0, alias >> 8);
+		if (!bus)
+			goto use_group;
+
+		bus = find_hosted_bus(bus);
+		if (IS_ERR(bus) || !bus->self)
+			goto use_group;
+
+		dma_pdev = get_isolation_root(pci_dev_get(bus->self));
+		if (dma_pdev != bus->self || (dma_pdev->multifunction &&
+		    !pci_acs_enabled(dma_pdev, REQ_ACS_FLAGS)))
+			goto use_pdev;
+
+		pci_dev_put(dma_pdev);
+		goto use_group;
+	}
+
+	dma_pdev = get_isolation_root(pci_dev_get(to_pci_dev(dev)));
+use_pdev:
+	ret = use_pdev_iommu_group(dma_pdev, dev);
+	pci_dev_put(dma_pdev);
+	return ret;
+use_group:
+	return use_dev_data_iommu_group(dev_data->alias_data, dev);
+}
+
 static int iommu_init_device(struct device *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct iommu_dev_data *dev_data;
 	u16 alias;
+	int ret;
 
 	if (dev->archdata.iommu)
 		return 0;
@@ -281,6 +455,12 @@ static int iommu_init_device(struct device *dev)
 		dev_data->alias_data = alias_data;
 	}
 
+	ret = init_iommu_group(dev);
+	if (ret) {
+		free_dev_data(dev_data);
+		return ret;
+	}
+
 	if (pci_iommuv2_capable(pdev)) {
 		struct amd_iommu *iommu;
 
@@ -309,6 +489,8 @@ static void iommu_ignore_device(struct device *dev)
 
 static void iommu_uninit_device(struct device *dev)
 {
+	iommu_group_remove_device(dev);
+
 	/*
 	 * Nothing to do here - we keep dev_data around for unplugged devices
 	 * and reuse it when the device is re-plugged - not doing so would
@@ -382,7 +564,6 @@ DECLARE_STATS_COUNTER(invalidate_iotlb);
 DECLARE_STATS_COUNTER(invalidate_iotlb_all);
 DECLARE_STATS_COUNTER(pri_requests);
 
-
 static struct dentry *stats_dir;
 static struct dentry *de_fflush;
 
@@ -402,7 +583,7 @@ static void amd_iommu_stats_init(void)
 		return;
 
 	de_fflush  = debugfs_create_bool("fullflush", 0444, stats_dir,
-					 (u32 *)&amd_iommu_unmap_flush);
+					 &amd_iommu_unmap_flush);
 
 	amd_iommu_stats_add(&compl_wait);
 	amd_iommu_stats_add(&cnt_map_single);
@@ -450,12 +631,27 @@ static void dump_command(unsigned long phys_addr)
 
 static void iommu_print_event(struct amd_iommu *iommu, void *__evt)
 {
-	u32 *event = __evt;
-	int type  = (event[1] >> EVENT_TYPE_SHIFT)  & EVENT_TYPE_MASK;
-	int devid = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK;
-	int domid = (event[1] >> EVENT_DOMID_SHIFT) & EVENT_DOMID_MASK;
-	int flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK;
-	u64 address = (u64)(((u64)event[3]) << 32) | event[2];
+	int type, devid, domid, flags;
+	volatile u32 *event = __evt;
+	int count = 0;
+	u64 address;
+
+retry:
+	type    = (event[1] >> EVENT_TYPE_SHIFT)  & EVENT_TYPE_MASK;
+	devid   = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK;
+	domid   = (event[1] >> EVENT_DOMID_SHIFT) & EVENT_DOMID_MASK;
+	flags   = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK;
+	address = (u64)(((u64)event[3]) << 32) | event[2];
+
+	if (type == 0) {
+		/* Did we hit the erratum? */
+		if (++count == LOOP_TIMEOUT) {
+			pr_err("AMD-Vi: No event written to event log\n");
+			return;
+		}
+		udelay(1);
+		goto retry;
+	}
 
 	printk(KERN_ERR "AMD-Vi: Event logged [");
 
@@ -463,26 +659,26 @@ static void iommu_print_event(struct amd_iommu *iommu, void *__evt)
 	case EVENT_TYPE_ILL_DEV:
 		printk("ILLEGAL_DEV_TABLE_ENTRY device=%02x:%02x.%x "
 		       "address=0x%016llx flags=0x%04x]\n",
-		       PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
+		       PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
 		       address, flags);
 		dump_dte_entry(devid);
 		break;
 	case EVENT_TYPE_IO_FAULT:
 		printk("IO_PAGE_FAULT device=%02x:%02x.%x "
 		       "domain=0x%04x address=0x%016llx flags=0x%04x]\n",
-		       PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
+		       PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
 		       domid, address, flags);
 		break;
 	case EVENT_TYPE_DEV_TAB_ERR:
 		printk("DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
 		       "address=0x%016llx flags=0x%04x]\n",
-		       PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
+		       PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
 		       address, flags);
 		break;
 	case EVENT_TYPE_PAGE_TAB_ERR:
 		printk("PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
 		       "domain=0x%04x address=0x%016llx flags=0x%04x]\n",
-		       PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
+		       PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
 		       domid, address, flags);
 		break;
 	case EVENT_TYPE_ILL_CMD:
@@ -496,26 +692,25 @@ static void iommu_print_event(struct amd_iommu *iommu, void *__evt)
 	case EVENT_TYPE_IOTLB_INV_TO:
 		printk("IOTLB_INV_TIMEOUT device=%02x:%02x.%x "
 		       "address=0x%016llx]\n",
-		       PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
+		       PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
 		       address);
 		break;
 	case EVENT_TYPE_INV_DEV_REQ:
 		printk("INVALID_DEVICE_REQUEST device=%02x:%02x.%x "
 		       "address=0x%016llx flags=0x%04x]\n",
-		       PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
+		       PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
 		       address, flags);
 		break;
 	default:
 		printk(KERN_ERR "UNKNOWN type=0x%02x]\n", type);
 	}
+
+	memset(__evt, 0, 4 * sizeof(u32));
 }
 
 static void iommu_poll_events(struct amd_iommu *iommu)
 {
 	u32 head, tail;
-	unsigned long flags;
-
-	spin_lock_irqsave(&iommu->lock, flags);
 
 	head = readl(iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
 	tail = readl(iommu->mmio_base + MMIO_EVT_TAIL_OFFSET);
@@ -526,30 +721,14 @@ static void iommu_poll_events(struct amd_iommu *iommu)
 	}
 
 	writel(head, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
-
-	spin_unlock_irqrestore(&iommu->lock, flags);
 }
 
-static void iommu_handle_ppr_entry(struct amd_iommu *iommu, u32 head)
+static void iommu_handle_ppr_entry(struct amd_iommu *iommu, u64 *raw)
 {
 	struct amd_iommu_fault fault;
-	volatile u64 *raw;
-	int i;
 
 	INC_STATS_COUNTER(pri_requests);
 
-	raw = (u64 *)(iommu->ppr_log + head);
-
-	/*
-	 * Hardware bug: Interrupt may arrive before the entry is written to
-	 * memory. If this happens we need to wait for the entry to arrive.
-	 */
-	for (i = 0; i < LOOP_TIMEOUT; ++i) {
-		if (PPR_REQ_TYPE(raw[0]) != 0)
-			break;
-		udelay(1);
-	}
-
 	if (PPR_REQ_TYPE(raw[0]) != PPR_REQ_FAULT) {
 		pr_err_ratelimited("AMD-Vi: Unknown PPR request received\n");
 		return;
@@ -561,54 +740,95 @@ static void iommu_handle_ppr_entry(struct amd_iommu *iommu, u32 head)
 	fault.tag       = PPR_TAG(raw[0]);
 	fault.flags     = PPR_FLAGS(raw[0]);
 
-	/*
-	 * To detect the hardware bug we need to clear the entry
-	 * to back to zero.
-	 */
-	raw[0] = raw[1] = 0;
-
 	atomic_notifier_call_chain(&ppr_notifier, 0, &fault);
 }
 
 static void iommu_poll_ppr_log(struct amd_iommu *iommu)
 {
-	unsigned long flags;
 	u32 head, tail;
 
 	if (iommu->ppr_log == NULL)
 		return;
 
-	spin_lock_irqsave(&iommu->lock, flags);
-
 	head = readl(iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
 	tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET);
 
 	while (head != tail) {
+		volatile u64 *raw;
+		u64 entry[2];
+		int i;
 
-		/* Handle PPR entry */
-		iommu_handle_ppr_entry(iommu, head);
+		raw = (u64 *)(iommu->ppr_log + head);
+
+		/*
+		 * Hardware bug: Interrupt may arrive before the entry is
+		 * written to memory. If this happens we need to wait for the
+		 * entry to arrive.
+		 */
+		for (i = 0; i < LOOP_TIMEOUT; ++i) {
+			if (PPR_REQ_TYPE(raw[0]) != 0)
+				break;
+			udelay(1);
+		}
 
-		/* Update and refresh ring-buffer state*/
+		/* Avoid memcpy function-call overhead */
+		entry[0] = raw[0];
+		entry[1] = raw[1];
+
+		/*
+		 * To detect the hardware bug we need to clear the entry
+		 * back to zero.
+		 */
+		raw[0] = raw[1] = 0UL;
+
+		/* Update head pointer of hardware ring-buffer */
 		head = (head + PPR_ENTRY_SIZE) % PPR_LOG_SIZE;
 		writel(head, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
-		tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET);
-	}
 
-	/* enable ppr interrupts again */
-	writel(MMIO_STATUS_PPR_INT_MASK, iommu->mmio_base + MMIO_STATUS_OFFSET);
+		/* Handle PPR entry */
+		iommu_handle_ppr_entry(iommu, entry);
 
-	spin_unlock_irqrestore(&iommu->lock, flags);
+		/* Refresh ring-buffer information */
+		head = readl(iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
+		tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET);
+	}
 }
 
 irqreturn_t amd_iommu_int_thread(int irq, void *data)
 {
-	struct amd_iommu *iommu;
+	struct amd_iommu *iommu = (struct amd_iommu *) data;
+	u32 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
 
-	for_each_iommu(iommu) {
-		iommu_poll_events(iommu);
-		iommu_poll_ppr_log(iommu);
-	}
+	while (status & (MMIO_STATUS_EVT_INT_MASK | MMIO_STATUS_PPR_INT_MASK)) {
+		/* Enable EVT and PPR interrupts again */
+		writel((MMIO_STATUS_EVT_INT_MASK | MMIO_STATUS_PPR_INT_MASK),
+			iommu->mmio_base + MMIO_STATUS_OFFSET);
+
+		if (status & MMIO_STATUS_EVT_INT_MASK) {
+			pr_devel("AMD-Vi: Processing IOMMU Event Log\n");
+			iommu_poll_events(iommu);
+		}
+
+		if (status & MMIO_STATUS_PPR_INT_MASK) {
+			pr_devel("AMD-Vi: Processing IOMMU PPR Log\n");
+			iommu_poll_ppr_log(iommu);
+		}
 
+		/*
+		 * Hardware bug: ERBT1312
+		 * When re-enabling interrupt (by writing 1
+		 * to clear the bit), the hardware might also try to set
+		 * the interrupt bit in the event status register.
+		 * In this scenario, the bit will be set, and disable
+		 * subsequent interrupts.
+		 *
+		 * Workaround: The IOMMU driver should read back the
+		 * status register and check if the interrupt bits are cleared.
+		 * If not, driver will need to go through the interrupt handler
+		 * again and re-clear the bits
+		 */
+		status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
+	}
 	return IRQ_HANDLED;
 }
 
@@ -701,7 +921,7 @@ static void build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address,
 	CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES);
 	if (s) /* size bit - we flush more than one 4kb page */
 		cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
-	if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
+	if (pde) /* PDE bit - we want to flush everything, not only the PTEs */
 		cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
 }
 
@@ -743,7 +963,7 @@ static void build_inv_iommu_pasid(struct iommu_cmd *cmd, u16 domid, int pasid,
 
 	address &= ~(0xfffULL);
 
-	cmd->data[0]  = pasid & PASID_MASK;
+	cmd->data[0]  = pasid;
 	cmd->data[1]  = domid;
 	cmd->data[2]  = lower_32_bits(address);
 	cmd->data[3]  = upper_32_bits(address);
@@ -762,10 +982,10 @@ static void build_inv_iotlb_pasid(struct iommu_cmd *cmd, u16 devid, int pasid,
 	address &= ~(0xfffULL);
 
 	cmd->data[0]  = devid;
-	cmd->data[0] |= (pasid & 0xff) << 16;
+	cmd->data[0] |= ((pasid >> 8) & 0xff) << 16;
 	cmd->data[0] |= (qdep  & 0xff) << 24;
 	cmd->data[1]  = devid;
-	cmd->data[1] |= ((pasid >> 8) & 0xfff) << 16;
+	cmd->data[1] |= (pasid & 0xff) << 16;
 	cmd->data[2]  = lower_32_bits(address);
 	cmd->data[2] |= CMD_INV_IOMMU_PAGES_GN_MASK;
 	cmd->data[3]  = upper_32_bits(address);
@@ -781,7 +1001,7 @@ static void build_complete_ppr(struct iommu_cmd *cmd, u16 devid, int pasid,
 
 	cmd->data[0]  = devid;
 	if (gn) {
-		cmd->data[1]  = pasid & PASID_MASK;
+		cmd->data[1]  = pasid;
 		cmd->data[2]  = CMD_INV_IOMMU_PAGES_GN_MASK;
 	}
 	cmd->data[3]  = tag & 0x1ff;
@@ -796,6 +1016,13 @@ static void build_inv_all(struct iommu_cmd *cmd)
 	CMD_SET_TYPE(cmd, CMD_INV_ALL);
 }
 
+static void build_inv_irt(struct iommu_cmd *cmd, u16 devid)
+{
+	memset(cmd, 0, sizeof(*cmd));
+	cmd->data[0] = devid;
+	CMD_SET_TYPE(cmd, CMD_INV_IRT);
+}
+
 /*
  * Writes the command to the IOMMUs command buffer and informs the
  * hardware about the new command.
@@ -917,12 +1144,32 @@ static void iommu_flush_all(struct amd_iommu *iommu)
 	iommu_completion_wait(iommu);
 }
 
+static void iommu_flush_irt(struct amd_iommu *iommu, u16 devid)
+{
+	struct iommu_cmd cmd;
+
+	build_inv_irt(&cmd, devid);
+
+	iommu_queue_command(iommu, &cmd);
+}
+
+static void iommu_flush_irt_all(struct amd_iommu *iommu)
+{
+	u32 devid;
+
+	for (devid = 0; devid <= MAX_DEV_TABLE_ENTRIES; devid++)
+		iommu_flush_irt(iommu, devid);
+
+	iommu_completion_wait(iommu);
+}
+
 void iommu_flush_all_caches(struct amd_iommu *iommu)
 {
 	if (iommu_feature(iommu, FEATURE_IA)) {
 		iommu_flush_all(iommu);
 	} else {
 		iommu_flush_dte_all(iommu);
+		iommu_flush_irt_all(iommu);
 		iommu_flush_tlb_all(iommu);
 	}
 }
@@ -1252,6 +1499,10 @@ static unsigned long iommu_unmap_page(struct protection_domain *dom,
 
 			/* Large PTE found which maps this address */
 			unmap_size = PTE_PAGE_SIZE(*pte);
+
+			/* Only unmap from the first pte in the page */
+			if ((unmap_size - 1) & bus_addr)
+				break;
 			count      = PAGE_SIZE_PTE_COUNT(unmap_size);
 			for (i = 0; i < count; i++)
 				pte[i] = 0ULL;
@@ -1261,7 +1512,7 @@ static unsigned long iommu_unmap_page(struct protection_domain *dom,
 		unmapped += unmap_size;
 	}
 
-	BUG_ON(!is_power_of_2(unmapped));
+	BUG_ON(unmapped && !is_power_of_2(unmapped));
 
 	return unmapped;
 }
@@ -1661,34 +1912,59 @@ static void domain_id_free(int id)
 	write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
 }
 
+#define DEFINE_FREE_PT_FN(LVL, FN)				\
+static void free_pt_##LVL (unsigned long __pt)			\
+{								\
+	unsigned long p;					\
+	u64 *pt;						\
+	int i;							\
+								\
+	pt = (u64 *)__pt;					\
+								\
+	for (i = 0; i < 512; ++i) {				\
+		if (!IOMMU_PTE_PRESENT(pt[i]))			\
+			continue;				\
+								\
+		p = (unsigned long)IOMMU_PTE_PAGE(pt[i]);	\
+		FN(p);						\
+	}							\
+	free_page((unsigned long)pt);				\
+}
+
+DEFINE_FREE_PT_FN(l2, free_page)
+DEFINE_FREE_PT_FN(l3, free_pt_l2)
+DEFINE_FREE_PT_FN(l4, free_pt_l3)
+DEFINE_FREE_PT_FN(l5, free_pt_l4)
+DEFINE_FREE_PT_FN(l6, free_pt_l5)
+
 static void free_pagetable(struct protection_domain *domain)
 {
-	int i, j;
-	u64 *p1, *p2, *p3;
-
-	p1 = domain->pt_root;
+	unsigned long root = (unsigned long)domain->pt_root;
 
-	if (!p1)
-		return;
-
-	for (i = 0; i < 512; ++i) {
-		if (!IOMMU_PTE_PRESENT(p1[i]))
-			continue;
-
-		p2 = IOMMU_PTE_PAGE(p1[i]);
-		for (j = 0; j < 512; ++j) {
-			if (!IOMMU_PTE_PRESENT(p2[j]))
-				continue;
-			p3 = IOMMU_PTE_PAGE(p2[j]);
-			free_page((unsigned long)p3);
-		}
-
-		free_page((unsigned long)p2);
+	switch (domain->mode) {
+	case PAGE_MODE_NONE:
+		break;
+	case PAGE_MODE_1_LEVEL:
+		free_page(root);
+		break;
+	case PAGE_MODE_2_LEVEL:
+		free_pt_l2(root);
+		break;
+	case PAGE_MODE_3_LEVEL:
+		free_pt_l3(root);
+		break;
+	case PAGE_MODE_4_LEVEL:
+		free_pt_l4(root);
+		break;
+	case PAGE_MODE_5_LEVEL:
+		free_pt_l5(root);
+		break;
+	case PAGE_MODE_6_LEVEL:
+		free_pt_l6(root);
+		break;
+	default:
+		BUG();
 	}
-
-	free_page((unsigned long)p1);
-
-	domain->pt_root = NULL;
 }
 
 static void free_gcr3_tbl_level1(u64 *tbl)
@@ -2035,24 +2311,24 @@ out_err:
 }
 
 /* FIXME: Move this to PCI code */
-#define PCI_PRI_TLP_OFF		(1 << 2)
+#define PCI_PRI_TLP_OFF		(1 << 15)
 
-bool pci_pri_tlp_required(struct pci_dev *pdev)
+static bool pci_pri_tlp_required(struct pci_dev *pdev)
 {
-	u16 control;
+	u16 status;
 	int pos;
 
 	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
 	if (!pos)
 		return false;
 
-	pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control);
+	pci_read_config_word(pdev, pos + PCI_PRI_STATUS, &status);
 
-	return (control & PCI_PRI_TLP_OFF) ? true : false;
+	return (status & PCI_PRI_TLP_OFF) ? true : false;
 }
 
 /*
- * If a device is not yet associated with a domain, this function does
+ * If a device is not yet associated with a domain, this function
  * assigns it visible for the hardware
  */
 static int attach_device(struct device *dev,
@@ -2218,20 +2494,34 @@ static int device_change_notifier(struct notifier_block *nb,
 
 		iommu_init_device(dev);
 
+		/*
+		 * dev_data is still NULL and
+		 * got initialized in iommu_init_device
+		 */
+		dev_data = get_dev_data(dev);
+
+		if (iommu_pass_through || dev_data->iommu_v2) {
+			dev_data->passthrough = true;
+			attach_device(dev, pt_domain);
+			break;
+		}
+
 		domain = domain_for_device(dev);
 
 		/* allocate a protection domain if a device is added */
 		dma_domain = find_protection_domain(devid);
-		if (dma_domain)
-			goto out;
-		dma_domain = dma_ops_domain_alloc();
-		if (!dma_domain)
-			goto out;
-		dma_domain->target_dev = devid;
+		if (!dma_domain) {
+			dma_domain = dma_ops_domain_alloc();
+			if (!dma_domain)
+				goto out;
+			dma_domain->target_dev = devid;
+
+			spin_lock_irqsave(&iommu_pd_list_lock, flags);
+			list_add_tail(&dma_domain->list, &iommu_pd_list);
+			spin_unlock_irqrestore(&iommu_pd_list_lock, flags);
+		}
 
-		spin_lock_irqsave(&iommu_pd_list_lock, flags);
-		list_add_tail(&dma_domain->list, &iommu_pd_list);
-		spin_unlock_irqrestore(&iommu_pd_list_lock, flags);
+		dev->archdata.dma_ops = &amd_iommu_dma_ops;
 
 		break;
 	case BUS_NOTIFY_DEL_DEVICE:
@@ -2286,7 +2576,7 @@ static struct protection_domain *get_domain(struct device *dev)
 	if (domain != NULL)
 		return domain;
 
-	/* Device not bount yet - bind it */
+	/* Device not bound yet - bind it */
 	dma_dom = find_protection_domain(devid);
 	if (!dma_dom)
 		dma_dom = amd_iommu_rlookup_table[devid]->default_dom;
@@ -2593,24 +2883,6 @@ static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
 }
 
 /*
- * This is a special map_sg function which is used if we should map a
- * device which is not handled by an AMD IOMMU in the system.
- */
-static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist,
-			   int nelems, int dir)
-{
-	struct scatterlist *s;
-	int i;
-
-	for_each_sg(sglist, s, nelems, i) {
-		s->dma_address = (dma_addr_t)sg_phys(s);
-		s->dma_length  = s->length;
-	}
-
-	return nelems;
-}
-
-/*
  * The exported map_sg function for dma_ops (handles scatter-gather
  * lists).
  */
@@ -2629,9 +2901,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
 	INC_STATS_COUNTER(cnt_map_sg);
 
 	domain = get_domain(dev);
-	if (PTR_ERR(domain) == -EINVAL)
-		return map_sg_no_iommu(dev, sglist, nelems, dir);
-	else if (IS_ERR(domain))
+	if (IS_ERR(domain))
 		return 0;
 
 	dma_mask = *dev->dma_mask;
@@ -2707,7 +2977,8 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist,
  * The exported alloc_coherent function for dma_ops.
  */
 static void *alloc_coherent(struct device *dev, size_t size,
-			    dma_addr_t *dma_addr, gfp_t flag)
+			    dma_addr_t *dma_addr, gfp_t flag,
+			    struct dma_attrs *attrs)
 {
 	unsigned long flags;
 	void *virt_addr;
@@ -2765,7 +3036,8 @@ out_free:
  * The exported free_coherent function for dma_ops.
  */
 static void free_coherent(struct device *dev, size_t size,
-			  void *virt_addr, dma_addr_t dma_addr)
+			  void *virt_addr, dma_addr_t dma_addr,
+			  struct dma_attrs *attrs)
 {
 	unsigned long flags;
 	struct protection_domain *domain;
@@ -2804,7 +3076,7 @@ static int amd_iommu_dma_supported(struct device *dev, u64 mask)
  * we don't need to preallocate the protection domains anymore.
  * For now we have to.
  */
-static void prealloc_protection_domains(void)
+static void __init prealloc_protection_domains(void)
 {
 	struct iommu_dev_data *dev_data;
 	struct dma_ops_domain *dma_dom;
@@ -2823,7 +3095,7 @@ static void prealloc_protection_domains(void)
 			alloc_passthrough_domain();
 			dev_data->passthrough = true;
 			attach_device(&dev->dev, pt_domain);
-			pr_info("AMD-Vi: Using passthough domain for device %s\n",
+			pr_info("AMD-Vi: Using passthrough domain for device %s\n",
 				dev_name(&dev->dev));
 		}
 
@@ -2846,8 +3118,8 @@ static void prealloc_protection_domains(void)
 }
 
 static struct dma_map_ops amd_iommu_dma_ops = {
-	.alloc_coherent = alloc_coherent,
-	.free_coherent = free_coherent,
+	.alloc = alloc_coherent,
+	.free = free_coherent,
 	.map_page = map_page,
 	.unmap_page = unmap_page,
 	.map_sg = map_sg,
@@ -2927,13 +3199,17 @@ int __init amd_iommu_init_dma_ops(void)
 
 	amd_iommu_stats_init();
 
+	if (amd_iommu_unmap_flush)
+		pr_info("AMD-Vi: IO/TLB flush on unmap enabled\n");
+	else
+		pr_info("AMD-Vi: Lazy IO/TLB flushing enabled\n");
+
 	return 0;
 
 free_domains:
 
 	for_each_iommu(iommu) {
-		if (iommu->default_dom)
-			dma_ops_domain_free(iommu->default_dom);
+		dma_ops_domain_free(iommu->default_dom);
 	}
 
 	return ret;
@@ -3033,6 +3309,10 @@ static int amd_iommu_domain_init(struct iommu_domain *dom)
 
 	dom->priv = domain;
 
+	dom->geometry.aperture_start = 0;
+	dom->geometry.aperture_end   = ~0ULL;
+	dom->geometry.force_aperture = true;
+
 	return 0;
 
 out_free:
@@ -3154,7 +3434,7 @@ static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
 }
 
 static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
-					  unsigned long iova)
+					  dma_addr_t iova)
 {
 	struct protection_domain *domain = dom->priv;
 	unsigned long offset_mask;
@@ -3186,31 +3466,13 @@ static int amd_iommu_domain_has_cap(struct iommu_domain *domain,
 	switch (cap) {
 	case IOMMU_CAP_CACHE_COHERENCY:
 		return 1;
+	case IOMMU_CAP_INTR_REMAP:
+		return irq_remapping_enabled;
 	}
 
 	return 0;
 }
 
-static int amd_iommu_device_group(struct device *dev, unsigned int *groupid)
-{
-	struct iommu_dev_data *dev_data = dev->archdata.iommu;
-	struct pci_dev *pdev = to_pci_dev(dev);
-	u16 devid;
-
-	if (!dev_data)
-		return -ENODEV;
-
-	if (pdev->is_virtfn || !iommu_group_mf)
-		devid = dev_data->devid;
-	else
-		devid = calc_devid(pdev->bus->number,
-				   PCI_DEVFN(PCI_SLOT(pdev->devfn), 0));
-
-	*groupid = amd_iommu_alias_table[devid];
-
-	return 0;
-}
-
 static struct iommu_ops amd_iommu_ops = {
 	.domain_init = amd_iommu_domain_init,
 	.domain_destroy = amd_iommu_domain_destroy,
@@ -3220,7 +3482,6 @@ static struct iommu_ops amd_iommu_ops = {
 	.unmap = amd_iommu_unmap,
 	.iova_to_phys = amd_iommu_iova_to_phys,
 	.domain_has_cap = amd_iommu_domain_has_cap,
-	.device_group = amd_iommu_device_group,
 	.pgsize_bitmap	= AMD_IOMMU_PGSIZES,
 };
 
@@ -3238,8 +3499,6 @@ int __init amd_iommu_init_passthrough(void)
 {
 	struct iommu_dev_data *dev_data;
 	struct pci_dev *dev = NULL;
-	struct amd_iommu *iommu;
-	u16 devid;
 	int ret;
 
 	ret = alloc_passthrough_domain();
@@ -3253,12 +3512,6 @@ int __init amd_iommu_init_passthrough(void)
 		dev_data = get_dev_data(&dev->dev);
 		dev_data->passthrough = true;
 
-		devid = get_device_id(&dev->dev);
-
-		iommu = amd_iommu_rlookup_table[devid];
-		if (!iommu)
-			continue;
-
 		attach_device(&dev->dev, pt_domain);
 	}
 
@@ -3634,3 +3887,469 @@ int amd_iommu_device_info(struct pci_dev *pdev,
 	return 0;
 }
 EXPORT_SYMBOL(amd_iommu_device_info);
+
+#ifdef CONFIG_IRQ_REMAP
+
+/*****************************************************************************
+ *
+ * Interrupt Remapping Implementation
+ *
+ *****************************************************************************/
+
+union irte {
+	u32 val;
+	struct {
+		u32 valid	: 1,
+		    no_fault	: 1,
+		    int_type	: 3,
+		    rq_eoi	: 1,
+		    dm		: 1,
+		    rsvd_1	: 1,
+		    destination	: 8,
+		    vector	: 8,
+		    rsvd_2	: 8;
+	} fields;
+};
+
+#define DTE_IRQ_PHYS_ADDR_MASK	(((1ULL << 45)-1) << 6)
+#define DTE_IRQ_REMAP_INTCTL    (2ULL << 60)
+#define DTE_IRQ_TABLE_LEN       (8ULL << 1)
+#define DTE_IRQ_REMAP_ENABLE    1ULL
+
+static void set_dte_irq_entry(u16 devid, struct irq_remap_table *table)
+{
+	u64 dte;
+
+	dte	= amd_iommu_dev_table[devid].data[2];
+	dte	&= ~DTE_IRQ_PHYS_ADDR_MASK;
+	dte	|= virt_to_phys(table->table);
+	dte	|= DTE_IRQ_REMAP_INTCTL;
+	dte	|= DTE_IRQ_TABLE_LEN;
+	dte	|= DTE_IRQ_REMAP_ENABLE;
+
+	amd_iommu_dev_table[devid].data[2] = dte;
+}
+
+#define IRTE_ALLOCATED (~1U)
+
+static struct irq_remap_table *get_irq_table(u16 devid, bool ioapic)
+{
+	struct irq_remap_table *table = NULL;
+	struct amd_iommu *iommu;
+	unsigned long flags;
+	u16 alias;
+
+	write_lock_irqsave(&amd_iommu_devtable_lock, flags);
+
+	iommu = amd_iommu_rlookup_table[devid];
+	if (!iommu)
+		goto out_unlock;
+
+	table = irq_lookup_table[devid];
+	if (table)
+		goto out;
+
+	alias = amd_iommu_alias_table[devid];
+	table = irq_lookup_table[alias];
+	if (table) {
+		irq_lookup_table[devid] = table;
+		set_dte_irq_entry(devid, table);
+		iommu_flush_dte(iommu, devid);
+		goto out;
+	}
+
+	/* Nothing there yet, allocate new irq remapping table */
+	table = kzalloc(sizeof(*table), GFP_ATOMIC);
+	if (!table)
+		goto out;
+
+	/* Initialize table spin-lock */
+	spin_lock_init(&table->lock);
+
+	if (ioapic)
+		/* Keep the first 32 indexes free for IOAPIC interrupts */
+		table->min_index = 32;
+
+	table->table = kmem_cache_alloc(amd_iommu_irq_cache, GFP_ATOMIC);
+	if (!table->table) {
+		kfree(table);
+		table = NULL;
+		goto out;
+	}
+
+	memset(table->table, 0, MAX_IRQS_PER_TABLE * sizeof(u32));
+
+	if (ioapic) {
+		int i;
+
+		for (i = 0; i < 32; ++i)
+			table->table[i] = IRTE_ALLOCATED;
+	}
+
+	irq_lookup_table[devid] = table;
+	set_dte_irq_entry(devid, table);
+	iommu_flush_dte(iommu, devid);
+	if (devid != alias) {
+		irq_lookup_table[alias] = table;
+		set_dte_irq_entry(alias, table);
+		iommu_flush_dte(iommu, alias);
+	}
+
+out:
+	iommu_completion_wait(iommu);
+
+out_unlock:
+	write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+
+	return table;
+}
+
+static int alloc_irq_index(struct irq_cfg *cfg, u16 devid, int count)
+{
+	struct irq_remap_table *table;
+	unsigned long flags;
+	int index, c;
+
+	table = get_irq_table(devid, false);
+	if (!table)
+		return -ENODEV;
+
+	spin_lock_irqsave(&table->lock, flags);
+
+	/* Scan table for free entries */
+	for (c = 0, index = table->min_index;
+	     index < MAX_IRQS_PER_TABLE;
+	     ++index) {
+		if (table->table[index] == 0)
+			c += 1;
+		else
+			c = 0;
+
+		if (c == count)	{
+			struct irq_2_irte *irte_info;
+
+			for (; c != 0; --c)
+				table->table[index - c + 1] = IRTE_ALLOCATED;
+
+			index -= count - 1;
+
+			cfg->remapped	      = 1;
+			irte_info             = &cfg->irq_2_irte;
+			irte_info->devid      = devid;
+			irte_info->index      = index;
+
+			goto out;
+		}
+	}
+
+	index = -ENOSPC;
+
+out:
+	spin_unlock_irqrestore(&table->lock, flags);
+
+	return index;
+}
+
+static int get_irte(u16 devid, int index, union irte *irte)
+{
+	struct irq_remap_table *table;
+	unsigned long flags;
+
+	table = get_irq_table(devid, false);
+	if (!table)
+		return -ENOMEM;
+
+	spin_lock_irqsave(&table->lock, flags);
+	irte->val = table->table[index];
+	spin_unlock_irqrestore(&table->lock, flags);
+
+	return 0;
+}
+
+static int modify_irte(u16 devid, int index, union irte irte)
+{
+	struct irq_remap_table *table;
+	struct amd_iommu *iommu;
+	unsigned long flags;
+
+	iommu = amd_iommu_rlookup_table[devid];
+	if (iommu == NULL)
+		return -EINVAL;
+
+	table = get_irq_table(devid, false);
+	if (!table)
+		return -ENOMEM;
+
+	spin_lock_irqsave(&table->lock, flags);
+	table->table[index] = irte.val;
+	spin_unlock_irqrestore(&table->lock, flags);
+
+	iommu_flush_irt(iommu, devid);
+	iommu_completion_wait(iommu);
+
+	return 0;
+}
+
+static void free_irte(u16 devid, int index)
+{
+	struct irq_remap_table *table;
+	struct amd_iommu *iommu;
+	unsigned long flags;
+
+	iommu = amd_iommu_rlookup_table[devid];
+	if (iommu == NULL)
+		return;
+
+	table = get_irq_table(devid, false);
+	if (!table)
+		return;
+
+	spin_lock_irqsave(&table->lock, flags);
+	table->table[index] = 0;
+	spin_unlock_irqrestore(&table->lock, flags);
+
+	iommu_flush_irt(iommu, devid);
+	iommu_completion_wait(iommu);
+}
+
+static int setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry,
+			      unsigned int destination, int vector,
+			      struct io_apic_irq_attr *attr)
+{
+	struct irq_remap_table *table;
+	struct irq_2_irte *irte_info;
+	struct irq_cfg *cfg;
+	union irte irte;
+	int ioapic_id;
+	int index;
+	int devid;
+	int ret;
+
+	cfg = irq_get_chip_data(irq);
+	if (!cfg)
+		return -EINVAL;
+
+	irte_info = &cfg->irq_2_irte;
+	ioapic_id = mpc_ioapic_id(attr->ioapic);
+	devid     = get_ioapic_devid(ioapic_id);
+
+	if (devid < 0)
+		return devid;
+
+	table = get_irq_table(devid, true);
+	if (table == NULL)
+		return -ENOMEM;
+
+	index = attr->ioapic_pin;
+
+	/* Setup IRQ remapping info */
+	cfg->remapped	      = 1;
+	irte_info->devid      = devid;
+	irte_info->index      = index;
+
+	/* Setup IRTE for IOMMU */
+	irte.val		= 0;
+	irte.fields.vector      = vector;
+	irte.fields.int_type    = apic->irq_delivery_mode;
+	irte.fields.destination = destination;
+	irte.fields.dm          = apic->irq_dest_mode;
+	irte.fields.valid       = 1;
+
+	ret = modify_irte(devid, index, irte);
+	if (ret)
+		return ret;
+
+	/* Setup IOAPIC entry */
+	memset(entry, 0, sizeof(*entry));
+
+	entry->vector        = index;
+	entry->mask          = 0;
+	entry->trigger       = attr->trigger;
+	entry->polarity      = attr->polarity;
+
+	/*
+	 * Mask level triggered irqs.
+	 */
+	if (attr->trigger)
+		entry->mask = 1;
+
+	return 0;
+}
+
+static int set_affinity(struct irq_data *data, const struct cpumask *mask,
+			bool force)
+{
+	struct irq_2_irte *irte_info;
+	unsigned int dest, irq;
+	struct irq_cfg *cfg;
+	union irte irte;
+	int err;
+
+	if (!config_enabled(CONFIG_SMP))
+		return -1;
+
+	cfg       = data->chip_data;
+	irq       = data->irq;
+	irte_info = &cfg->irq_2_irte;
+
+	if (!cpumask_intersects(mask, cpu_online_mask))
+		return -EINVAL;
+
+	if (get_irte(irte_info->devid, irte_info->index, &irte))
+		return -EBUSY;
+
+	if (assign_irq_vector(irq, cfg, mask))
+		return -EBUSY;
+
+	err = apic->cpu_mask_to_apicid_and(cfg->domain, mask, &dest);
+	if (err) {
+		if (assign_irq_vector(irq, cfg, data->affinity))
+			pr_err("AMD-Vi: Failed to recover vector for irq %d\n", irq);
+		return err;
+	}
+
+	irte.fields.vector      = cfg->vector;
+	irte.fields.destination = dest;
+
+	modify_irte(irte_info->devid, irte_info->index, irte);
+
+	if (cfg->move_in_progress)
+		send_cleanup_vector(cfg);
+
+	cpumask_copy(data->affinity, mask);
+
+	return 0;
+}
+
+static int free_irq(int irq)
+{
+	struct irq_2_irte *irte_info;
+	struct irq_cfg *cfg;
+
+	cfg = irq_get_chip_data(irq);
+	if (!cfg)
+		return -EINVAL;
+
+	irte_info = &cfg->irq_2_irte;
+
+	free_irte(irte_info->devid, irte_info->index);
+
+	return 0;
+}
+
+static void compose_msi_msg(struct pci_dev *pdev,
+			    unsigned int irq, unsigned int dest,
+			    struct msi_msg *msg, u8 hpet_id)
+{
+	struct irq_2_irte *irte_info;
+	struct irq_cfg *cfg;
+	union irte irte;
+
+	cfg = irq_get_chip_data(irq);
+	if (!cfg)
+		return;
+
+	irte_info = &cfg->irq_2_irte;
+
+	irte.val		= 0;
+	irte.fields.vector	= cfg->vector;
+	irte.fields.int_type    = apic->irq_delivery_mode;
+	irte.fields.destination	= dest;
+	irte.fields.dm		= apic->irq_dest_mode;
+	irte.fields.valid	= 1;
+
+	modify_irte(irte_info->devid, irte_info->index, irte);
+
+	msg->address_hi = MSI_ADDR_BASE_HI;
+	msg->address_lo = MSI_ADDR_BASE_LO;
+	msg->data       = irte_info->index;
+}
+
+static int msi_alloc_irq(struct pci_dev *pdev, int irq, int nvec)
+{
+	struct irq_cfg *cfg;
+	int index;
+	u16 devid;
+
+	if (!pdev)
+		return -EINVAL;
+
+	cfg = irq_get_chip_data(irq);
+	if (!cfg)
+		return -EINVAL;
+
+	devid = get_device_id(&pdev->dev);
+	index = alloc_irq_index(cfg, devid, nvec);
+
+	return index < 0 ? MAX_IRQS_PER_TABLE : index;
+}
+
+static int msi_setup_irq(struct pci_dev *pdev, unsigned int irq,
+			 int index, int offset)
+{
+	struct irq_2_irte *irte_info;
+	struct irq_cfg *cfg;
+	u16 devid;
+
+	if (!pdev)
+		return -EINVAL;
+
+	cfg = irq_get_chip_data(irq);
+	if (!cfg)
+		return -EINVAL;
+
+	if (index >= MAX_IRQS_PER_TABLE)
+		return 0;
+
+	devid		= get_device_id(&pdev->dev);
+	irte_info	= &cfg->irq_2_irte;
+
+	cfg->remapped	      = 1;
+	irte_info->devid      = devid;
+	irte_info->index      = index + offset;
+
+	return 0;
+}
+
+static int setup_hpet_msi(unsigned int irq, unsigned int id)
+{
+	struct irq_2_irte *irte_info;
+	struct irq_cfg *cfg;
+	int index, devid;
+
+	cfg = irq_get_chip_data(irq);
+	if (!cfg)
+		return -EINVAL;
+
+	irte_info = &cfg->irq_2_irte;
+	devid     = get_hpet_devid(id);
+	if (devid < 0)
+		return devid;
+
+	index = alloc_irq_index(cfg, devid, 1);
+	if (index < 0)
+		return index;
+
+	cfg->remapped	      = 1;
+	irte_info->devid      = devid;
+	irte_info->index      = index;
+
+	return 0;
+}
+
+struct irq_remap_ops amd_iommu_irq_ops = {
+	.supported		= amd_iommu_supported,
+	.prepare		= amd_iommu_prepare,
+	.enable			= amd_iommu_enable,
+	.disable		= amd_iommu_disable,
+	.reenable		= amd_iommu_reenable,
+	.enable_faulting	= amd_iommu_enable_faulting,
+	.setup_ioapic_entry	= setup_ioapic_entry,
+	.set_affinity		= set_affinity,
+	.free_irq		= free_irq,
+	.compose_msi_msg	= compose_msi_msg,
+	.msi_alloc_irq		= msi_alloc_irq,
+	.msi_setup_irq		= msi_setup_irq,
+	.setup_hpet_msi		= setup_hpet_msi,
+};
+#endif
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index a35e98ad972..0e08545d729 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -31,9 +31,12 @@
 #include <asm/gart.h>
 #include <asm/x86_init.h>
 #include <asm/iommu_table.h>
+#include <asm/io_apic.h>
+#include <asm/irq_remapping.h>
 
 #include "amd_iommu_proto.h"
 #include "amd_iommu_types.h"
+#include "irq_remapping.h"
 
 /*
  * definitions for the ACPI scanning code
@@ -53,6 +56,10 @@
 #define IVHD_DEV_ALIAS_RANGE            0x43
 #define IVHD_DEV_EXT_SELECT             0x46
 #define IVHD_DEV_EXT_SELECT_RANGE       0x47
+#define IVHD_DEV_SPECIAL		0x48
+
+#define IVHD_SPECIAL_IOAPIC		1
+#define IVHD_SPECIAL_HPET		2
 
 #define IVHD_FLAG_HT_TUN_EN_MASK        0x01
 #define IVHD_FLAG_PASSPW_EN_MASK        0x02
@@ -91,7 +98,7 @@ struct ivhd_header {
 	u64 mmio_phys;
 	u16 pci_seg;
 	u16 info;
-	u32 reserved;
+	u32 efr;
 } __attribute__((packed));
 
 /*
@@ -121,15 +128,16 @@ struct ivmd_header {
 } __attribute__((packed));
 
 bool amd_iommu_dump;
+bool amd_iommu_irq_remap __read_mostly;
 
-static int __initdata amd_iommu_detected;
+static bool amd_iommu_detected;
 static bool __initdata amd_iommu_disabled;
 
 u16 amd_iommu_last_bdf;			/* largest PCI device id we have
 					   to handle */
 LIST_HEAD(amd_iommu_unity_map);		/* a list of required unity mappings
 					   we find in ACPI */
-bool amd_iommu_unmap_flush;		/* if true, flush on every unmap */
+u32 amd_iommu_unmap_flush;		/* if true, flush on every unmap */
 
 LIST_HEAD(amd_iommu_list);		/* list of all AMD IOMMUs in the
 					   system */
@@ -142,18 +150,14 @@ int amd_iommus_present;
 bool amd_iommu_np_cache __read_mostly;
 bool amd_iommu_iotlb_sup __read_mostly = true;
 
-u32 amd_iommu_max_pasids __read_mostly = ~0;
+u32 amd_iommu_max_pasid __read_mostly = ~0;
 
 bool amd_iommu_v2_present __read_mostly;
+bool amd_iommu_pc_present __read_mostly;
 
 bool amd_iommu_force_isolation __read_mostly;
 
 /*
- * The ACPI table parsing functions set this variable on an error
- */
-static int __initdata amd_iommu_init_err;
-
-/*
  * List of protection domains - used during resume
  */
 LIST_HEAD(amd_iommu_pd_list);
@@ -181,7 +185,13 @@ u16 *amd_iommu_alias_table;
 struct amd_iommu **amd_iommu_rlookup_table;
 
 /*
- * AMD IOMMU allows up to 2^16 differend protection domains. This is a bitmap
+ * This table is used to find the irq remapping table for a given device id
+ * quickly.
+ */
+struct irq_remap_table **irq_lookup_table;
+
+/*
+ * AMD IOMMU allows up to 2^16 different protection domains. This is a bitmap
  * to know which ones are already in use.
  */
 unsigned long *amd_iommu_pd_alloc_bitmap;
@@ -190,11 +200,31 @@ static u32 dev_table_size;	/* size of the device table */
 static u32 alias_table_size;	/* size of the alias table */
 static u32 rlookup_table_size;	/* size if the rlookup table */
 
-/*
- * This function flushes all internal caches of
- * the IOMMU used by this driver.
- */
-extern void iommu_flush_all_caches(struct amd_iommu *iommu);
+enum iommu_init_state {
+	IOMMU_START_STATE,
+	IOMMU_IVRS_DETECTED,
+	IOMMU_ACPI_FINISHED,
+	IOMMU_ENABLED,
+	IOMMU_PCI_INIT,
+	IOMMU_INTERRUPTS_EN,
+	IOMMU_DMA_OPS,
+	IOMMU_INITIALIZED,
+	IOMMU_NOT_FOUND,
+	IOMMU_INIT_ERROR,
+};
+
+/* Early ioapic and hpet maps from kernel command line */
+#define EARLY_MAP_SIZE		4
+static struct devid_map __initdata early_ioapic_map[EARLY_MAP_SIZE];
+static struct devid_map __initdata early_hpet_map[EARLY_MAP_SIZE];
+static int __initdata early_ioapic_map_size;
+static int __initdata early_hpet_map_size;
+static bool __initdata cmdline_maps;
+
+static enum iommu_init_state init_state = IOMMU_START_STATE;
+
+static int amd_iommu_enable_interrupts(void);
+static int __init iommu_go_to_state(enum iommu_init_state state);
 
 static inline void update_last_devid(u16 devid)
 {
@@ -319,23 +349,6 @@ static void iommu_set_inv_tlb_timeout(struct amd_iommu *iommu, int timeout)
 /* Function to enable the hardware */
 static void iommu_enable(struct amd_iommu *iommu)
 {
-	static const char * const feat_str[] = {
-		"PreF", "PPR", "X2APIC", "NX", "GT", "[5]",
-		"IA", "GA", "HE", "PC", NULL
-	};
-	int i;
-
-	printk(KERN_INFO "AMD-Vi: Enabling IOMMU at %s cap 0x%hx",
-	       dev_name(&iommu->dev->dev), iommu->cap_ptr);
-
-	if (iommu->cap & (1 << IOMMU_CAP_EFR)) {
-		printk(KERN_CONT " extended features: ");
-		for (i = 0; feat_str[i]; ++i)
-			if (iommu_feature(iommu, (1ULL << i)))
-				printk(KERN_CONT " %s", feat_str[i]);
-	}
-	printk(KERN_CONT "\n");
-
 	iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
 }
 
@@ -356,31 +369,23 @@ static void iommu_disable(struct amd_iommu *iommu)
  * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in
  * the system has one.
  */
-static u8 * __init iommu_map_mmio_space(u64 address)
+static u8 __iomem * __init iommu_map_mmio_space(u64 address, u64 end)
 {
-	u8 *ret;
-
-	if (!request_mem_region(address, MMIO_REGION_LENGTH, "amd_iommu")) {
-		pr_err("AMD-Vi: Can not reserve memory region %llx for mmio\n",
-			address);
+	if (!request_mem_region(address, end, "amd_iommu")) {
+		pr_err("AMD-Vi: Can not reserve memory region %llx-%llx for mmio\n",
+			address, end);
 		pr_err("AMD-Vi: This is a BIOS bug. Please contact your hardware vendor\n");
 		return NULL;
 	}
 
-	ret = ioremap_nocache(address, MMIO_REGION_LENGTH);
-	if (ret != NULL)
-		return ret;
-
-	release_mem_region(address, MMIO_REGION_LENGTH);
-
-	return NULL;
+	return (u8 __iomem *)ioremap_nocache(address, end);
 }
 
 static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu)
 {
 	if (iommu->mmio_base)
 		iounmap(iommu->mmio_base);
-	release_mem_region(iommu->mmio_phys, MMIO_REGION_LENGTH);
+	release_mem_region(iommu->mmio_phys, iommu->mmio_phys_end);
 }
 
 /****************************************************************************
@@ -409,7 +414,7 @@ static int __init find_last_devid_on_pci(int bus, int dev, int fn, int cap_ptr)
 	u32 cap;
 
 	cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET);
-	update_last_devid(calc_devid(MMIO_GET_BUS(cap), MMIO_GET_LD(cap)));
+	update_last_devid(PCI_DEVID(MMIO_GET_BUS(cap), MMIO_GET_LD(cap)));
 
 	return 0;
 }
@@ -426,7 +431,7 @@ static int __init find_last_devid_from_ivhd(struct ivhd_header *h)
 	p += sizeof(*h);
 	end += h->length;
 
-	find_last_devid_on_pci(PCI_BUS(h->devid),
+	find_last_devid_on_pci(PCI_BUS_NUM(h->devid),
 			PCI_SLOT(h->devid),
 			PCI_FUNC(h->devid),
 			h->cap_ptr);
@@ -469,11 +474,9 @@ static int __init find_last_devid_acpi(struct acpi_table_header *table)
 	 */
 	for (i = 0; i < table->length; ++i)
 		checksum += p[i];
-	if (checksum != 0) {
+	if (checksum != 0)
 		/* ACPI table corrupt */
-		amd_iommu_init_err = -ENODEV;
-		return 0;
-	}
+		return -ENODEV;
 
 	p += IVRS_HEADER_LENGTH;
 
@@ -496,7 +499,7 @@ static int __init find_last_devid_acpi(struct acpi_table_header *table)
 
 /****************************************************************************
  *
- * The following functions belong the the code path which parses the ACPI table
+ * The following functions belong to the code path which parses the ACPI table
  * the second time. In this ACPI parsing iteration we allocate IOMMU specific
  * data structures, initialize the device/alias/rlookup table and also
  * basically initialize the hardware.
@@ -708,8 +711,68 @@ static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu,
 	set_iommu_for_device(iommu, devid);
 }
 
+static int __init add_special_device(u8 type, u8 id, u16 devid, bool cmd_line)
+{
+	struct devid_map *entry;
+	struct list_head *list;
+
+	if (type == IVHD_SPECIAL_IOAPIC)
+		list = &ioapic_map;
+	else if (type == IVHD_SPECIAL_HPET)
+		list = &hpet_map;
+	else
+		return -EINVAL;
+
+	list_for_each_entry(entry, list, list) {
+		if (!(entry->id == id && entry->cmd_line))
+			continue;
+
+		pr_info("AMD-Vi: Command-line override present for %s id %d - ignoring\n",
+			type == IVHD_SPECIAL_IOAPIC ? "IOAPIC" : "HPET", id);
+
+		return 0;
+	}
+
+	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry)
+		return -ENOMEM;
+
+	entry->id	= id;
+	entry->devid	= devid;
+	entry->cmd_line	= cmd_line;
+
+	list_add_tail(&entry->list, list);
+
+	return 0;
+}
+
+static int __init add_early_maps(void)
+{
+	int i, ret;
+
+	for (i = 0; i < early_ioapic_map_size; ++i) {
+		ret = add_special_device(IVHD_SPECIAL_IOAPIC,
+					 early_ioapic_map[i].id,
+					 early_ioapic_map[i].devid,
+					 early_ioapic_map[i].cmd_line);
+		if (ret)
+			return ret;
+	}
+
+	for (i = 0; i < early_hpet_map_size; ++i) {
+		ret = add_special_device(IVHD_SPECIAL_HPET,
+					 early_hpet_map[i].id,
+					 early_hpet_map[i].devid,
+					 early_hpet_map[i].cmd_line);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
 /*
- * Reads the device exclusion range from ACPI and initialize IOMMU with
+ * Reads the device exclusion range from ACPI and initializes the IOMMU with
  * it
  */
 static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m)
@@ -725,101 +788,17 @@ static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m)
 		 * per device. But we can enable the exclusion range per
 		 * device. This is done here
 		 */
-		set_dev_entry_bit(m->devid, DEV_ENTRY_EX);
+		set_dev_entry_bit(devid, DEV_ENTRY_EX);
 		iommu->exclusion_start = m->range_start;
 		iommu->exclusion_length = m->range_length;
 	}
 }
 
 /*
- * This function reads some important data from the IOMMU PCI space and
- * initializes the driver data structure with it. It reads the hardware
- * capabilities and the first/last device entries
- */
-static void __init init_iommu_from_pci(struct amd_iommu *iommu)
-{
-	int cap_ptr = iommu->cap_ptr;
-	u32 range, misc, low, high;
-	int i, j;
-
-	pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET,
-			      &iommu->cap);
-	pci_read_config_dword(iommu->dev, cap_ptr + MMIO_RANGE_OFFSET,
-			      &range);
-	pci_read_config_dword(iommu->dev, cap_ptr + MMIO_MISC_OFFSET,
-			      &misc);
-
-	iommu->first_device = calc_devid(MMIO_GET_BUS(range),
-					 MMIO_GET_FD(range));
-	iommu->last_device = calc_devid(MMIO_GET_BUS(range),
-					MMIO_GET_LD(range));
-	iommu->evt_msi_num = MMIO_MSI_NUM(misc);
-
-	if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB)))
-		amd_iommu_iotlb_sup = false;
-
-	/* read extended feature bits */
-	low  = readl(iommu->mmio_base + MMIO_EXT_FEATURES);
-	high = readl(iommu->mmio_base + MMIO_EXT_FEATURES + 4);
-
-	iommu->features = ((u64)high << 32) | low;
-
-	if (iommu_feature(iommu, FEATURE_GT)) {
-		int glxval;
-		u32 pasids;
-		u64 shift;
-
-		shift   = iommu->features & FEATURE_PASID_MASK;
-		shift >>= FEATURE_PASID_SHIFT;
-		pasids  = (1 << shift);
-
-		amd_iommu_max_pasids = min(amd_iommu_max_pasids, pasids);
-
-		glxval   = iommu->features & FEATURE_GLXVAL_MASK;
-		glxval >>= FEATURE_GLXVAL_SHIFT;
-
-		if (amd_iommu_max_glx_val == -1)
-			amd_iommu_max_glx_val = glxval;
-		else
-			amd_iommu_max_glx_val = min(amd_iommu_max_glx_val, glxval);
-	}
-
-	if (iommu_feature(iommu, FEATURE_GT) &&
-	    iommu_feature(iommu, FEATURE_PPR)) {
-		iommu->is_iommu_v2   = true;
-		amd_iommu_v2_present = true;
-	}
-
-	if (!is_rd890_iommu(iommu->dev))
-		return;
-
-	/*
-	 * Some rd890 systems may not be fully reconfigured by the BIOS, so
-	 * it's necessary for us to store this information so it can be
-	 * reprogrammed on resume
-	 */
-
-	pci_read_config_dword(iommu->dev, iommu->cap_ptr + 4,
-			      &iommu->stored_addr_lo);
-	pci_read_config_dword(iommu->dev, iommu->cap_ptr + 8,
-			      &iommu->stored_addr_hi);
-
-	/* Low bit locks writes to configuration space */
-	iommu->stored_addr_lo &= ~1;
-
-	for (i = 0; i < 6; i++)
-		for (j = 0; j < 0x12; j++)
-			iommu->stored_l1[i][j] = iommu_read_l1(iommu, i, j);
-
-	for (i = 0; i < 0x83; i++)
-		iommu->stored_l2[i] = iommu_read_l2(iommu, i);
-}
-
-/*
  * Takes a pointer to an AMD IOMMU entry in the ACPI table and
  * initializes the hardware and our data structures with it.
  */
-static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
+static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
 					struct ivhd_header *h)
 {
 	u8 *p = (u8 *)h;
@@ -828,6 +807,12 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
 	u32 dev_i, ext_flags = 0;
 	bool alias = false;
 	struct ivhd_entry *e;
+	int ret;
+
+
+	ret = add_early_maps();
+	if (ret)
+		return ret;
 
 	/*
 	 * First save the recommended feature enable bits from ACPI
@@ -848,10 +833,10 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
 
 			DUMP_printk("  DEV_ALL\t\t\t first devid: %02x:%02x.%x"
 				    " last device %02x:%02x.%x flags: %02x\n",
-				    PCI_BUS(iommu->first_device),
+				    PCI_BUS_NUM(iommu->first_device),
 				    PCI_SLOT(iommu->first_device),
 				    PCI_FUNC(iommu->first_device),
-				    PCI_BUS(iommu->last_device),
+				    PCI_BUS_NUM(iommu->last_device),
 				    PCI_SLOT(iommu->last_device),
 				    PCI_FUNC(iommu->last_device),
 				    e->flags);
@@ -865,7 +850,7 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
 
 			DUMP_printk("  DEV_SELECT\t\t\t devid: %02x:%02x.%x "
 				    "flags: %02x\n",
-				    PCI_BUS(e->devid),
+				    PCI_BUS_NUM(e->devid),
 				    PCI_SLOT(e->devid),
 				    PCI_FUNC(e->devid),
 				    e->flags);
@@ -877,7 +862,7 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
 
 			DUMP_printk("  DEV_SELECT_RANGE_START\t "
 				    "devid: %02x:%02x.%x flags: %02x\n",
-				    PCI_BUS(e->devid),
+				    PCI_BUS_NUM(e->devid),
 				    PCI_SLOT(e->devid),
 				    PCI_FUNC(e->devid),
 				    e->flags);
@@ -891,11 +876,11 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
 
 			DUMP_printk("  DEV_ALIAS\t\t\t devid: %02x:%02x.%x "
 				    "flags: %02x devid_to: %02x:%02x.%x\n",
-				    PCI_BUS(e->devid),
+				    PCI_BUS_NUM(e->devid),
 				    PCI_SLOT(e->devid),
 				    PCI_FUNC(e->devid),
 				    e->flags,
-				    PCI_BUS(e->ext >> 8),
+				    PCI_BUS_NUM(e->ext >> 8),
 				    PCI_SLOT(e->ext >> 8),
 				    PCI_FUNC(e->ext >> 8));
 
@@ -910,11 +895,11 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
 			DUMP_printk("  DEV_ALIAS_RANGE\t\t "
 				    "devid: %02x:%02x.%x flags: %02x "
 				    "devid_to: %02x:%02x.%x\n",
-				    PCI_BUS(e->devid),
+				    PCI_BUS_NUM(e->devid),
 				    PCI_SLOT(e->devid),
 				    PCI_FUNC(e->devid),
 				    e->flags,
-				    PCI_BUS(e->ext >> 8),
+				    PCI_BUS_NUM(e->ext >> 8),
 				    PCI_SLOT(e->ext >> 8),
 				    PCI_FUNC(e->ext >> 8));
 
@@ -928,7 +913,7 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
 
 			DUMP_printk("  DEV_EXT_SELECT\t\t devid: %02x:%02x.%x "
 				    "flags: %02x ext: %08x\n",
-				    PCI_BUS(e->devid),
+				    PCI_BUS_NUM(e->devid),
 				    PCI_SLOT(e->devid),
 				    PCI_FUNC(e->devid),
 				    e->flags, e->ext);
@@ -941,7 +926,7 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
 
 			DUMP_printk("  DEV_EXT_SELECT_RANGE\t devid: "
 				    "%02x:%02x.%x flags: %02x ext: %08x\n",
-				    PCI_BUS(e->devid),
+				    PCI_BUS_NUM(e->devid),
 				    PCI_SLOT(e->devid),
 				    PCI_FUNC(e->devid),
 				    e->flags, e->ext);
@@ -954,7 +939,7 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
 		case IVHD_DEV_RANGE_END:
 
 			DUMP_printk("  DEV_RANGE_END\t\t devid: %02x:%02x.%x\n",
-				    PCI_BUS(e->devid),
+				    PCI_BUS_NUM(e->devid),
 				    PCI_SLOT(e->devid),
 				    PCI_FUNC(e->devid));
 
@@ -969,12 +954,43 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
 							flags, ext_flags);
 			}
 			break;
+		case IVHD_DEV_SPECIAL: {
+			u8 handle, type;
+			const char *var;
+			u16 devid;
+			int ret;
+
+			handle = e->ext & 0xff;
+			devid  = (e->ext >>  8) & 0xffff;
+			type   = (e->ext >> 24) & 0xff;
+
+			if (type == IVHD_SPECIAL_IOAPIC)
+				var = "IOAPIC";
+			else if (type == IVHD_SPECIAL_HPET)
+				var = "HPET";
+			else
+				var = "UNKNOWN";
+
+			DUMP_printk("  DEV_SPECIAL(%s[%d])\t\tdevid: %02x:%02x.%x\n",
+				    var, (int)handle,
+				    PCI_BUS_NUM(devid),
+				    PCI_SLOT(devid),
+				    PCI_FUNC(devid));
+
+			set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
+			ret = add_special_device(type, handle, devid, false);
+			if (ret)
+				return ret;
+			break;
+		}
 		default:
 			break;
 		}
 
 		p += ivhd_entry_length(p);
 	}
+
+	return 0;
 }
 
 /* Initializes the device->iommu mapping for the driver */
@@ -1008,12 +1024,46 @@ static void __init free_iommu_all(void)
 }
 
 /*
+ * Family15h Model 10h-1fh erratum 746 (IOMMU Logging May Stall Translations)
+ * Workaround:
+ *     BIOS should disable L2B micellaneous clock gating by setting
+ *     L2_L2B_CK_GATE_CONTROL[CKGateL2BMiscDisable](D0F2xF4_x90[2]) = 1b
+ */
+static void amd_iommu_erratum_746_workaround(struct amd_iommu *iommu)
+{
+	u32 value;
+
+	if ((boot_cpu_data.x86 != 0x15) ||
+	    (boot_cpu_data.x86_model < 0x10) ||
+	    (boot_cpu_data.x86_model > 0x1f))
+		return;
+
+	pci_write_config_dword(iommu->dev, 0xf0, 0x90);
+	pci_read_config_dword(iommu->dev, 0xf4, &value);
+
+	if (value & BIT(2))
+		return;
+
+	/* Select NB indirect register 0x90 and enable writing */
+	pci_write_config_dword(iommu->dev, 0xf0, 0x90 | (1 << 8));
+
+	pci_write_config_dword(iommu->dev, 0xf4, value | 0x4);
+	pr_info("AMD-Vi: Applying erratum 746 workaround for IOMMU at %s\n",
+		dev_name(&iommu->dev->dev));
+
+	/* Clear the enable writing bit */
+	pci_write_config_dword(iommu->dev, 0xf0, 0x90);
+}
+
+/*
  * This function clues the initialization function for one IOMMU
  * together and also allocates the command buffer and programs the
  * hardware. It does NOT enable the IOMMU. This is done afterwards.
  */
 static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
 {
+	int ret;
+
 	spin_lock_init(&iommu->lock);
 
 	/* Add IOMMU to internal data structures */
@@ -1031,14 +1081,22 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
 	/*
 	 * Copy data from ACPI table entry to the iommu struct
 	 */
-	iommu->dev = pci_get_bus_and_slot(PCI_BUS(h->devid), h->devid & 0xff);
-	if (!iommu->dev)
-		return 1;
-
+	iommu->devid   = h->devid;
 	iommu->cap_ptr = h->cap_ptr;
 	iommu->pci_seg = h->pci_seg;
 	iommu->mmio_phys = h->mmio_phys;
-	iommu->mmio_base = iommu_map_mmio_space(h->mmio_phys);
+
+	/* Check if IVHD EFR contains proper max banks/counters */
+	if ((h->efr != 0) &&
+	    ((h->efr & (0xF << 13)) != 0) &&
+	    ((h->efr & (0x3F << 17)) != 0)) {
+		iommu->mmio_phys_end = MMIO_REG_END_OFFSET;
+	} else {
+		iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET;
+	}
+
+	iommu->mmio_base = iommu_map_mmio_space(iommu->mmio_phys,
+						iommu->mmio_phys_end);
 	if (!iommu->mmio_base)
 		return -ENOMEM;
 
@@ -1052,20 +1110,19 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
 
 	iommu->int_enabled = false;
 
-	init_iommu_from_pci(iommu);
-	init_iommu_from_acpi(iommu, h);
-	init_iommu_devices(iommu);
+	ret = init_iommu_from_acpi(iommu, h);
+	if (ret)
+		return ret;
 
-	if (iommu_feature(iommu, FEATURE_PPR)) {
-		iommu->ppr_log = alloc_ppr_log(iommu);
-		if (!iommu->ppr_log)
-			return -ENOMEM;
-	}
+	/*
+	 * Make sure IOMMU is not considered to translate itself. The IVRS
+	 * table tells us so, but this is a lie!
+	 */
+	amd_iommu_rlookup_table[iommu->devid] = NULL;
 
-	if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE))
-		amd_iommu_np_cache = true;
+	init_iommu_devices(iommu);
 
-	return pci_enable_device(iommu->dev);
+	return 0;
 }
 
 /*
@@ -1089,23 +1146,19 @@ static int __init init_iommu_all(struct acpi_table_header *table)
 
 			DUMP_printk("device: %02x:%02x.%01x cap: %04x "
 				    "seg: %d flags: %01x info %04x\n",
-				    PCI_BUS(h->devid), PCI_SLOT(h->devid),
+				    PCI_BUS_NUM(h->devid), PCI_SLOT(h->devid),
 				    PCI_FUNC(h->devid), h->cap_ptr,
 				    h->pci_seg, h->flags, h->info);
 			DUMP_printk("       mmio-addr: %016llx\n",
 				    h->mmio_phys);
 
 			iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL);
-			if (iommu == NULL) {
-				amd_iommu_init_err = -ENOMEM;
-				return 0;
-			}
+			if (iommu == NULL)
+				return -ENOMEM;
 
 			ret = init_iommu_one(iommu, h);
-			if (ret) {
-				amd_iommu_init_err = ret;
-				return 0;
-			}
+			if (ret)
+				return ret;
 			break;
 		default:
 			break;
@@ -1118,10 +1171,184 @@ static int __init init_iommu_all(struct acpi_table_header *table)
 	return 0;
 }
 
+
+static void init_iommu_perf_ctr(struct amd_iommu *iommu)
+{
+	u64 val = 0xabcd, val2 = 0;
+
+	if (!iommu_feature(iommu, FEATURE_PC))
+		return;
+
+	amd_iommu_pc_present = true;
+
+	/* Check if the performance counters can be written to */
+	if ((0 != amd_iommu_pc_get_set_reg_val(0, 0, 0, 0, &val, true)) ||
+	    (0 != amd_iommu_pc_get_set_reg_val(0, 0, 0, 0, &val2, false)) ||
+	    (val != val2)) {
+		pr_err("AMD-Vi: Unable to write to IOMMU perf counter.\n");
+		amd_iommu_pc_present = false;
+		return;
+	}
+
+	pr_info("AMD-Vi: IOMMU performance counters supported\n");
+
+	val = readl(iommu->mmio_base + MMIO_CNTR_CONF_OFFSET);
+	iommu->max_banks = (u8) ((val >> 12) & 0x3f);
+	iommu->max_counters = (u8) ((val >> 7) & 0xf);
+}
+
+
+static int iommu_init_pci(struct amd_iommu *iommu)
+{
+	int cap_ptr = iommu->cap_ptr;
+	u32 range, misc, low, high;
+
+	iommu->dev = pci_get_bus_and_slot(PCI_BUS_NUM(iommu->devid),
+					  iommu->devid & 0xff);
+	if (!iommu->dev)
+		return -ENODEV;
+
+	pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET,
+			      &iommu->cap);
+	pci_read_config_dword(iommu->dev, cap_ptr + MMIO_RANGE_OFFSET,
+			      &range);
+	pci_read_config_dword(iommu->dev, cap_ptr + MMIO_MISC_OFFSET,
+			      &misc);
+
+	iommu->first_device = PCI_DEVID(MMIO_GET_BUS(range),
+					 MMIO_GET_FD(range));
+	iommu->last_device = PCI_DEVID(MMIO_GET_BUS(range),
+					MMIO_GET_LD(range));
+
+	if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB)))
+		amd_iommu_iotlb_sup = false;
+
+	/* read extended feature bits */
+	low  = readl(iommu->mmio_base + MMIO_EXT_FEATURES);
+	high = readl(iommu->mmio_base + MMIO_EXT_FEATURES + 4);
+
+	iommu->features = ((u64)high << 32) | low;
+
+	if (iommu_feature(iommu, FEATURE_GT)) {
+		int glxval;
+		u32 max_pasid;
+		u64 pasmax;
+
+		pasmax = iommu->features & FEATURE_PASID_MASK;
+		pasmax >>= FEATURE_PASID_SHIFT;
+		max_pasid  = (1 << (pasmax + 1)) - 1;
+
+		amd_iommu_max_pasid = min(amd_iommu_max_pasid, max_pasid);
+
+		BUG_ON(amd_iommu_max_pasid & ~PASID_MASK);
+
+		glxval   = iommu->features & FEATURE_GLXVAL_MASK;
+		glxval >>= FEATURE_GLXVAL_SHIFT;
+
+		if (amd_iommu_max_glx_val == -1)
+			amd_iommu_max_glx_val = glxval;
+		else
+			amd_iommu_max_glx_val = min(amd_iommu_max_glx_val, glxval);
+	}
+
+	if (iommu_feature(iommu, FEATURE_GT) &&
+	    iommu_feature(iommu, FEATURE_PPR)) {
+		iommu->is_iommu_v2   = true;
+		amd_iommu_v2_present = true;
+	}
+
+	if (iommu_feature(iommu, FEATURE_PPR)) {
+		iommu->ppr_log = alloc_ppr_log(iommu);
+		if (!iommu->ppr_log)
+			return -ENOMEM;
+	}
+
+	if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE))
+		amd_iommu_np_cache = true;
+
+	init_iommu_perf_ctr(iommu);
+
+	if (is_rd890_iommu(iommu->dev)) {
+		int i, j;
+
+		iommu->root_pdev = pci_get_bus_and_slot(iommu->dev->bus->number,
+				PCI_DEVFN(0, 0));
+
+		/*
+		 * Some rd890 systems may not be fully reconfigured by the
+		 * BIOS, so it's necessary for us to store this information so
+		 * it can be reprogrammed on resume
+		 */
+		pci_read_config_dword(iommu->dev, iommu->cap_ptr + 4,
+				&iommu->stored_addr_lo);
+		pci_read_config_dword(iommu->dev, iommu->cap_ptr + 8,
+				&iommu->stored_addr_hi);
+
+		/* Low bit locks writes to configuration space */
+		iommu->stored_addr_lo &= ~1;
+
+		for (i = 0; i < 6; i++)
+			for (j = 0; j < 0x12; j++)
+				iommu->stored_l1[i][j] = iommu_read_l1(iommu, i, j);
+
+		for (i = 0; i < 0x83; i++)
+			iommu->stored_l2[i] = iommu_read_l2(iommu, i);
+	}
+
+	amd_iommu_erratum_746_workaround(iommu);
+
+	return pci_enable_device(iommu->dev);
+}
+
+static void print_iommu_info(void)
+{
+	static const char * const feat_str[] = {
+		"PreF", "PPR", "X2APIC", "NX", "GT", "[5]",
+		"IA", "GA", "HE", "PC"
+	};
+	struct amd_iommu *iommu;
+
+	for_each_iommu(iommu) {
+		int i;
+
+		pr_info("AMD-Vi: Found IOMMU at %s cap 0x%hx\n",
+			dev_name(&iommu->dev->dev), iommu->cap_ptr);
+
+		if (iommu->cap & (1 << IOMMU_CAP_EFR)) {
+			pr_info("AMD-Vi:  Extended features: ");
+			for (i = 0; i < ARRAY_SIZE(feat_str); ++i) {
+				if (iommu_feature(iommu, (1ULL << i)))
+					pr_cont(" %s", feat_str[i]);
+			}
+			pr_cont("\n");
+		}
+	}
+	if (irq_remapping_enabled)
+		pr_info("AMD-Vi: Interrupt remapping enabled\n");
+}
+
+static int __init amd_iommu_init_pci(void)
+{
+	struct amd_iommu *iommu;
+	int ret = 0;
+
+	for_each_iommu(iommu) {
+		ret = iommu_init_pci(iommu);
+		if (ret)
+			break;
+	}
+
+	ret = amd_iommu_init_devices();
+
+	print_iommu_info();
+
+	return ret;
+}
+
 /****************************************************************************
  *
  * The following functions initialize the MSI interrupts for all IOMMUs
- * in the system. Its a bit challenging because there could be multiple
+ * in the system. It's a bit challenging because there could be multiple
  * IOMMUs per PCI BDF but we can call pci_enable_msi(x) only once per
  * pci_dev.
  *
@@ -1131,45 +1358,55 @@ static int iommu_setup_msi(struct amd_iommu *iommu)
 {
 	int r;
 
-	if (pci_enable_msi(iommu->dev))
-		return 1;
+	r = pci_enable_msi(iommu->dev);
+	if (r)
+		return r;
 
 	r = request_threaded_irq(iommu->dev->irq,
 				 amd_iommu_int_handler,
 				 amd_iommu_int_thread,
 				 0, "AMD-Vi",
-				 iommu->dev);
+				 iommu);
 
 	if (r) {
 		pci_disable_msi(iommu->dev);
-		return 1;
+		return r;
 	}
 
 	iommu->int_enabled = true;
-	iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
-
-	if (iommu->ppr_log != NULL)
-		iommu_feature_enable(iommu, CONTROL_PPFINT_EN);
 
 	return 0;
 }
 
 static int iommu_init_msi(struct amd_iommu *iommu)
 {
+	int ret;
+
 	if (iommu->int_enabled)
-		return 0;
+		goto enable_faults;
 
-	if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSI))
-		return iommu_setup_msi(iommu);
+	if (iommu->dev->msi_cap)
+		ret = iommu_setup_msi(iommu);
+	else
+		ret = -ENODEV;
 
-	return 1;
+	if (ret)
+		return ret;
+
+enable_faults:
+	iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
+
+	if (iommu->ppr_log != NULL)
+		iommu_feature_enable(iommu, CONTROL_PPFINT_EN);
+
+	return 0;
 }
 
 /****************************************************************************
  *
  * The next functions belong to the third pass of parsing the ACPI
  * table. In this last pass the memory mapping requirements are
- * gathered (like exclusion and unity mapping reanges).
+ * gathered (like exclusion and unity mapping ranges).
  *
  ****************************************************************************/
 
@@ -1210,7 +1447,7 @@ static int __init init_exclusion_range(struct ivmd_header *m)
 /* called for unity map ACPI definition */
 static int __init init_unity_map_range(struct ivmd_header *m)
 {
-	struct unity_map_entry *e = 0;
+	struct unity_map_entry *e = NULL;
 	char *s;
 
 	e = kzalloc(sizeof(*e), GFP_KERNEL);
@@ -1242,8 +1479,8 @@ static int __init init_unity_map_range(struct ivmd_header *m)
 
 	DUMP_printk("%s devid_start: %02x:%02x.%x devid_end: %02x:%02x.%x"
 		    " range_start: %016llx range_end: %016llx flags: %x\n", s,
-		    PCI_BUS(e->devid_start), PCI_SLOT(e->devid_start),
-		    PCI_FUNC(e->devid_start), PCI_BUS(e->devid_end),
+		    PCI_BUS_NUM(e->devid_start), PCI_SLOT(e->devid_start),
+		    PCI_FUNC(e->devid_start), PCI_BUS_NUM(e->devid_end),
 		    PCI_SLOT(e->devid_end), PCI_FUNC(e->devid_end),
 		    e->address_start, e->address_end, m->flags);
 
@@ -1278,7 +1515,7 @@ static int __init init_memory_definitions(struct acpi_table_header *table)
  * Init the device table to not allow DMA access for devices and
  * suppress all page faults
  */
-static void init_device_table(void)
+static void init_device_table_dma(void)
 {
 	u32 devid;
 
@@ -1288,6 +1525,27 @@ static void init_device_table(void)
 	}
 }
 
+static void __init uninit_device_table_dma(void)
+{
+	u32 devid;
+
+	for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) {
+		amd_iommu_dev_table[devid].data[0] = 0ULL;
+		amd_iommu_dev_table[devid].data[1] = 0ULL;
+	}
+}
+
+static void init_device_table(void)
+{
+	u32 devid;
+
+	if (!amd_iommu_irq_remap)
+		return;
+
+	for (devid = 0; devid <= amd_iommu_last_bdf; ++devid)
+		set_dev_entry_bit(devid, DEV_ENTRY_IRQ_TBL_EN);
+}
+
 static void iommu_init_flags(struct amd_iommu *iommu)
 {
 	iommu->acpi_flags & IVHD_FLAG_HT_TUN_EN_MASK ?
@@ -1319,20 +1577,16 @@ static void iommu_apply_resume_quirks(struct amd_iommu *iommu)
 {
 	int i, j;
 	u32 ioc_feature_control;
-	struct pci_dev *pdev = NULL;
+	struct pci_dev *pdev = iommu->root_pdev;
 
 	/* RD890 BIOSes may not have completely reconfigured the iommu */
-	if (!is_rd890_iommu(iommu->dev))
+	if (!is_rd890_iommu(iommu->dev) || !pdev)
 		return;
 
 	/*
 	 * First, we need to ensure that the iommu is enabled. This is
 	 * controlled by a register in the northbridge
 	 */
-	pdev = pci_get_bus_and_slot(iommu->dev->bus->number, PCI_DEVFN(0, 0));
-
-	if (!pdev)
-		return;
 
 	/* Select Northbridge indirect register 0x75 and enable writing */
 	pci_write_config_dword(pdev, 0x60, 0x75 | (1 << 7));
@@ -1342,8 +1596,6 @@ static void iommu_apply_resume_quirks(struct amd_iommu *iommu)
 	if (!(ioc_feature_control & 0x1))
 		pci_write_config_dword(pdev, 0x64, ioc_feature_control | 1);
 
-	pci_dev_put(pdev);
-
 	/* Restore the iommu BAR */
 	pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4,
 			       iommu->stored_addr_lo);
@@ -1368,7 +1620,7 @@ static void iommu_apply_resume_quirks(struct amd_iommu *iommu)
  * This function finally enables all IOMMUs found in the system after
  * they have been initialized
  */
-static void enable_iommus(void)
+static void early_enable_iommus(void)
 {
 	struct amd_iommu *iommu;
 
@@ -1378,15 +1630,29 @@ static void enable_iommus(void)
 		iommu_set_device_table(iommu);
 		iommu_enable_command_buffer(iommu);
 		iommu_enable_event_buffer(iommu);
-		iommu_enable_ppr_log(iommu);
-		iommu_enable_gt(iommu);
 		iommu_set_exclusion_range(iommu);
-		iommu_init_msi(iommu);
 		iommu_enable(iommu);
 		iommu_flush_all_caches(iommu);
 	}
 }
 
+static void enable_iommus_v2(void)
+{
+	struct amd_iommu *iommu;
+
+	for_each_iommu(iommu) {
+		iommu_enable_ppr_log(iommu);
+		iommu_enable_gt(iommu);
+	}
+}
+
+static void enable_iommus(void)
+{
+	early_enable_iommus();
+
+	enable_iommus_v2();
+}
+
 static void disable_iommus(void)
 {
 	struct amd_iommu *iommu;
@@ -1409,6 +1675,8 @@ static void amd_iommu_resume(void)
 
 	/* re-load the hardware */
 	enable_iommus();
+
+	amd_iommu_enable_interrupts();
 }
 
 static int amd_iommu_suspend(void)
@@ -1424,10 +1692,104 @@ static struct syscore_ops amd_iommu_syscore_ops = {
 	.resume = amd_iommu_resume,
 };
 
+static void __init free_on_init_error(void)
+{
+	free_pages((unsigned long)irq_lookup_table,
+		   get_order(rlookup_table_size));
+
+	if (amd_iommu_irq_cache) {
+		kmem_cache_destroy(amd_iommu_irq_cache);
+		amd_iommu_irq_cache = NULL;
+
+	}
+
+	free_pages((unsigned long)amd_iommu_rlookup_table,
+		   get_order(rlookup_table_size));
+
+	free_pages((unsigned long)amd_iommu_alias_table,
+		   get_order(alias_table_size));
+
+	free_pages((unsigned long)amd_iommu_dev_table,
+		   get_order(dev_table_size));
+
+	free_iommu_all();
+
+#ifdef CONFIG_GART_IOMMU
+	/*
+	 * We failed to initialize the AMD IOMMU - try fallback to GART
+	 * if possible.
+	 */
+	gart_iommu_init();
+
+#endif
+}
+
+/* SB IOAPIC is always on this device in AMD systems */
+#define IOAPIC_SB_DEVID		((0x00 << 8) | PCI_DEVFN(0x14, 0))
+
+static bool __init check_ioapic_information(void)
+{
+	const char *fw_bug = FW_BUG;
+	bool ret, has_sb_ioapic;
+	int idx;
+
+	has_sb_ioapic = false;
+	ret           = false;
+
+	/*
+	 * If we have map overrides on the kernel command line the
+	 * messages in this function might not describe firmware bugs
+	 * anymore - so be careful
+	 */
+	if (cmdline_maps)
+		fw_bug = "";
+
+	for (idx = 0; idx < nr_ioapics; idx++) {
+		int devid, id = mpc_ioapic_id(idx);
+
+		devid = get_ioapic_devid(id);
+		if (devid < 0) {
+			pr_err("%sAMD-Vi: IOAPIC[%d] not in IVRS table\n",
+				fw_bug, id);
+			ret = false;
+		} else if (devid == IOAPIC_SB_DEVID) {
+			has_sb_ioapic = true;
+			ret           = true;
+		}
+	}
+
+	if (!has_sb_ioapic) {
+		/*
+		 * We expect the SB IOAPIC to be listed in the IVRS
+		 * table. The system timer is connected to the SB IOAPIC
+		 * and if we don't have it in the list the system will
+		 * panic at boot time.  This situation usually happens
+		 * when the BIOS is buggy and provides us the wrong
+		 * device id for the IOAPIC in the system.
+		 */
+		pr_err("%sAMD-Vi: No southbridge IOAPIC found\n", fw_bug);
+	}
+
+	if (!ret)
+		pr_err("AMD-Vi: Disabling interrupt remapping\n");
+
+	return ret;
+}
+
+static void __init free_dma_resources(void)
+{
+	amd_iommu_uninit_devices();
+
+	free_pages((unsigned long)amd_iommu_pd_alloc_bitmap,
+		   get_order(MAX_DOMAIN_ID/8));
+
+	free_unity_maps();
+}
+
 /*
- * This is the core init function for AMD IOMMU hardware in the system.
- * This function is called from the generic x86 DMA layer initialization
- * code.
+ * This is the hardware init function for AMD IOMMU in the system.
+ * This function is called either from amd_iommu_init or from the interrupt
+ * remapping setup code.
  *
  * This function basically parses the ACPI table for AMD IOMMU (IVRS)
  * three times:
@@ -1446,25 +1808,34 @@ static struct syscore_ops amd_iommu_syscore_ops = {
  *		remapping requirements parsed out of the ACPI table in
  *		this last pass.
  *
- * After that the hardware is initialized and ready to go. In the last
- * step we do some Linux specific things like registering the driver in
- * the dma_ops interface and initializing the suspend/resume support
- * functions. Finally it prints some information about AMD IOMMUs and
- * the driver state and enables the hardware.
+ * After everything is set up the IOMMUs are enabled and the necessary
+ * hotplug and suspend notifiers are registered.
  */
-static int __init amd_iommu_init(void)
+static int __init early_amd_iommu_init(void)
 {
+	struct acpi_table_header *ivrs_base;
+	acpi_size ivrs_size;
+	acpi_status status;
 	int i, ret = 0;
 
+	if (!amd_iommu_detected)
+		return -ENODEV;
+
+	status = acpi_get_table_with_size("IVRS", 0, &ivrs_base, &ivrs_size);
+	if (status == AE_NOT_FOUND)
+		return -ENODEV;
+	else if (ACPI_FAILURE(status)) {
+		const char *err = acpi_format_exception(status);
+		pr_err("AMD-Vi: IVRS table error: %s\n", err);
+		return -EINVAL;
+	}
+
 	/*
 	 * First parse ACPI tables to find the largest Bus/Dev/Func
 	 * we need to handle. Upon this information the shared data
 	 * structures for the IOMMUs in the system will be allocated
 	 */
-	if (acpi_table_parse("IVRS", find_last_devid_acpi) != 0)
-		return -ENODEV;
-
-	ret = amd_iommu_init_err;
+	ret = find_last_devid_acpi(ivrs_base);
 	if (ret)
 		goto out;
 
@@ -1472,9 +1843,8 @@ static int __init amd_iommu_init(void)
 	alias_table_size   = tbl_size(ALIAS_TABLE_ENTRY_SIZE);
 	rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE);
 
-	ret = -ENOMEM;
-
 	/* Device table - directly used by all IOMMUs */
+	ret = -ENOMEM;
 	amd_iommu_dev_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
 				      get_order(dev_table_size));
 	if (amd_iommu_dev_table == NULL)
@@ -1487,23 +1857,20 @@ static int __init amd_iommu_init(void)
 	amd_iommu_alias_table = (void *)__get_free_pages(GFP_KERNEL,
 			get_order(alias_table_size));
 	if (amd_iommu_alias_table == NULL)
-		goto free;
+		goto out;
 
 	/* IOMMU rlookup table - find the IOMMU for a specific device */
 	amd_iommu_rlookup_table = (void *)__get_free_pages(
 			GFP_KERNEL | __GFP_ZERO,
 			get_order(rlookup_table_size));
 	if (amd_iommu_rlookup_table == NULL)
-		goto free;
+		goto out;
 
 	amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages(
 					    GFP_KERNEL | __GFP_ZERO,
 					    get_order(MAX_DOMAIN_ID/8));
 	if (amd_iommu_pd_alloc_bitmap == NULL)
-		goto free;
-
-	/* init the device table */
-	init_device_table();
+		goto out;
 
 	/*
 	 * let all alias entries point to itself
@@ -1523,28 +1890,93 @@ static int __init amd_iommu_init(void)
 	 * now the data structures are allocated and basically initialized
 	 * start the real acpi table scan
 	 */
-	ret = -ENODEV;
-	if (acpi_table_parse("IVRS", init_iommu_all) != 0)
-		goto free;
+	ret = init_iommu_all(ivrs_base);
+	if (ret)
+		goto out;
 
-	if (amd_iommu_init_err) {
-		ret = amd_iommu_init_err;
-		goto free;
+	if (amd_iommu_irq_remap)
+		amd_iommu_irq_remap = check_ioapic_information();
+
+	if (amd_iommu_irq_remap) {
+		/*
+		 * Interrupt remapping enabled, create kmem_cache for the
+		 * remapping tables.
+		 */
+		ret = -ENOMEM;
+		amd_iommu_irq_cache = kmem_cache_create("irq_remap_cache",
+				MAX_IRQS_PER_TABLE * sizeof(u32),
+				IRQ_TABLE_ALIGNMENT,
+				0, NULL);
+		if (!amd_iommu_irq_cache)
+			goto out;
+
+		irq_lookup_table = (void *)__get_free_pages(
+				GFP_KERNEL | __GFP_ZERO,
+				get_order(rlookup_table_size));
+		if (!irq_lookup_table)
+			goto out;
 	}
 
-	if (acpi_table_parse("IVRS", init_memory_definitions) != 0)
-		goto free;
+	ret = init_memory_definitions(ivrs_base);
+	if (ret)
+		goto out;
 
-	if (amd_iommu_init_err) {
-		ret = amd_iommu_init_err;
-		goto free;
+	/* init the device table */
+	init_device_table();
+
+out:
+	/* Don't leak any ACPI memory */
+	early_acpi_os_unmap_memory((char __iomem *)ivrs_base, ivrs_size);
+	ivrs_base = NULL;
+
+	return ret;
+}
+
+static int amd_iommu_enable_interrupts(void)
+{
+	struct amd_iommu *iommu;
+	int ret = 0;
+
+	for_each_iommu(iommu) {
+		ret = iommu_init_msi(iommu);
+		if (ret)
+			goto out;
 	}
 
-	ret = amd_iommu_init_devices();
-	if (ret)
-		goto free;
+out:
+	return ret;
+}
 
-	enable_iommus();
+static bool detect_ivrs(void)
+{
+	struct acpi_table_header *ivrs_base;
+	acpi_size ivrs_size;
+	acpi_status status;
+
+	status = acpi_get_table_with_size("IVRS", 0, &ivrs_base, &ivrs_size);
+	if (status == AE_NOT_FOUND)
+		return false;
+	else if (ACPI_FAILURE(status)) {
+		const char *err = acpi_format_exception(status);
+		pr_err("AMD-Vi: IVRS table error: %s\n", err);
+		return false;
+	}
+
+	early_acpi_os_unmap_memory((char __iomem *)ivrs_base, ivrs_size);
+
+	/* Make sure ACS will be enabled during PCI probe */
+	pci_request_acs();
+
+	if (!disable_irq_remap)
+		amd_iommu_irq_remap = true;
+
+	return true;
+}
+
+static int amd_iommu_init_dma(void)
+{
+	struct amd_iommu *iommu;
+	int ret;
 
 	if (iommu_pass_through)
 		ret = amd_iommu_init_passthrough();
@@ -1552,58 +1984,163 @@ static int __init amd_iommu_init(void)
 		ret = amd_iommu_init_dma_ops();
 
 	if (ret)
-		goto free_disable;
+		return ret;
+
+	init_device_table_dma();
+
+	for_each_iommu(iommu)
+		iommu_flush_all_caches(iommu);
 
 	amd_iommu_init_api();
 
 	amd_iommu_init_notifier();
 
-	register_syscore_ops(&amd_iommu_syscore_ops);
+	return 0;
+}
 
-	if (iommu_pass_through)
-		goto out;
+/****************************************************************************
+ *
+ * AMD IOMMU Initialization State Machine
+ *
+ ****************************************************************************/
 
-	if (amd_iommu_unmap_flush)
-		printk(KERN_INFO "AMD-Vi: IO/TLB flush on unmap enabled\n");
-	else
-		printk(KERN_INFO "AMD-Vi: Lazy IO/TLB flushing enabled\n");
+static int __init state_next(void)
+{
+	int ret = 0;
+
+	switch (init_state) {
+	case IOMMU_START_STATE:
+		if (!detect_ivrs()) {
+			init_state	= IOMMU_NOT_FOUND;
+			ret		= -ENODEV;
+		} else {
+			init_state	= IOMMU_IVRS_DETECTED;
+		}
+		break;
+	case IOMMU_IVRS_DETECTED:
+		ret = early_amd_iommu_init();
+		init_state = ret ? IOMMU_INIT_ERROR : IOMMU_ACPI_FINISHED;
+		break;
+	case IOMMU_ACPI_FINISHED:
+		early_enable_iommus();
+		register_syscore_ops(&amd_iommu_syscore_ops);
+		x86_platform.iommu_shutdown = disable_iommus;
+		init_state = IOMMU_ENABLED;
+		break;
+	case IOMMU_ENABLED:
+		ret = amd_iommu_init_pci();
+		init_state = ret ? IOMMU_INIT_ERROR : IOMMU_PCI_INIT;
+		enable_iommus_v2();
+		break;
+	case IOMMU_PCI_INIT:
+		ret = amd_iommu_enable_interrupts();
+		init_state = ret ? IOMMU_INIT_ERROR : IOMMU_INTERRUPTS_EN;
+		break;
+	case IOMMU_INTERRUPTS_EN:
+		ret = amd_iommu_init_dma();
+		init_state = ret ? IOMMU_INIT_ERROR : IOMMU_DMA_OPS;
+		break;
+	case IOMMU_DMA_OPS:
+		init_state = IOMMU_INITIALIZED;
+		break;
+	case IOMMU_INITIALIZED:
+		/* Nothing to do */
+		break;
+	case IOMMU_NOT_FOUND:
+	case IOMMU_INIT_ERROR:
+		/* Error states => do nothing */
+		ret = -EINVAL;
+		break;
+	default:
+		/* Unknown state */
+		BUG();
+	}
 
-	x86_platform.iommu_shutdown = disable_iommus;
-out:
 	return ret;
+}
 
-free_disable:
-	disable_iommus();
+static int __init iommu_go_to_state(enum iommu_init_state state)
+{
+	int ret = 0;
 
-free:
-	amd_iommu_uninit_devices();
+	while (init_state != state) {
+		ret = state_next();
+		if (init_state == IOMMU_NOT_FOUND ||
+		    init_state == IOMMU_INIT_ERROR)
+			break;
+	}
 
-	free_pages((unsigned long)amd_iommu_pd_alloc_bitmap,
-		   get_order(MAX_DOMAIN_ID/8));
+	return ret;
+}
 
-	free_pages((unsigned long)amd_iommu_rlookup_table,
-		   get_order(rlookup_table_size));
+#ifdef CONFIG_IRQ_REMAP
+int __init amd_iommu_prepare(void)
+{
+	return iommu_go_to_state(IOMMU_ACPI_FINISHED);
+}
 
-	free_pages((unsigned long)amd_iommu_alias_table,
-		   get_order(alias_table_size));
+int __init amd_iommu_supported(void)
+{
+	return amd_iommu_irq_remap ? 1 : 0;
+}
 
-	free_pages((unsigned long)amd_iommu_dev_table,
-		   get_order(dev_table_size));
+int __init amd_iommu_enable(void)
+{
+	int ret;
 
-	free_iommu_all();
+	ret = iommu_go_to_state(IOMMU_ENABLED);
+	if (ret)
+		return ret;
 
-	free_unity_maps();
+	irq_remapping_enabled = 1;
 
-#ifdef CONFIG_GART_IOMMU
-	/*
-	 * We failed to initialize the AMD IOMMU - try fallback to GART
-	 * if possible.
-	 */
-	gart_iommu_init();
+	return 0;
+}
 
+void amd_iommu_disable(void)
+{
+	amd_iommu_suspend();
+}
+
+int amd_iommu_reenable(int mode)
+{
+	amd_iommu_resume();
+
+	return 0;
+}
+
+int __init amd_iommu_enable_faulting(void)
+{
+	/* We enable MSI later when PCI is initialized */
+	return 0;
+}
 #endif
 
-	goto out;
+/*
+ * This is the core init function for AMD IOMMU hardware in the system.
+ * This function is called from the generic x86 DMA layer initialization
+ * code.
+ */
+static int __init amd_iommu_init(void)
+{
+	int ret;
+
+	ret = iommu_go_to_state(IOMMU_INITIALIZED);
+	if (ret) {
+		free_dma_resources();
+		if (!irq_remapping_enabled) {
+			disable_iommus();
+			free_on_init_error();
+		} else {
+			struct amd_iommu *iommu;
+
+			uninit_device_table_dma();
+			for_each_iommu(iommu)
+				iommu_flush_all_caches(iommu);
+		}
+	}
+
+	return ret;
 }
 
 /****************************************************************************
@@ -1613,29 +2150,25 @@ free:
  * IOMMUs
  *
  ****************************************************************************/
-static int __init early_amd_iommu_detect(struct acpi_table_header *table)
-{
-	return 0;
-}
-
 int __init amd_iommu_detect(void)
 {
+	int ret;
+
 	if (no_iommu || (iommu_detected && !gart_iommu_aperture))
 		return -ENODEV;
 
 	if (amd_iommu_disabled)
 		return -ENODEV;
 
-	if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) {
-		iommu_detected = 1;
-		amd_iommu_detected = 1;
-		x86_init.iommu.iommu_init = amd_iommu_init;
+	ret = iommu_go_to_state(IOMMU_IVRS_DETECTED);
+	if (ret)
+		return ret;
+
+	amd_iommu_detected = true;
+	iommu_detected = 1;
+	x86_init.iommu.iommu_init = amd_iommu_init;
 
-		/* Make sure ACS will be enabled */
-		pci_request_acs();
-		return 1;
-	}
-	return -ENODEV;
+	return 0;
 }
 
 /****************************************************************************
@@ -1666,16 +2199,159 @@ static int __init parse_amd_iommu_options(char *str)
 	return 1;
 }
 
-__setup("amd_iommu_dump", parse_amd_iommu_dump);
-__setup("amd_iommu=", parse_amd_iommu_options);
+static int __init parse_ivrs_ioapic(char *str)
+{
+	unsigned int bus, dev, fn;
+	int ret, id, i;
+	u16 devid;
+
+	ret = sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn);
+
+	if (ret != 4) {
+		pr_err("AMD-Vi: Invalid command line: ivrs_ioapic%s\n", str);
+		return 1;
+	}
+
+	if (early_ioapic_map_size == EARLY_MAP_SIZE) {
+		pr_err("AMD-Vi: Early IOAPIC map overflow - ignoring ivrs_ioapic%s\n",
+			str);
+		return 1;
+	}
+
+	devid = ((bus & 0xff) << 8) | ((dev & 0x1f) << 3) | (fn & 0x7);
+
+	cmdline_maps			= true;
+	i				= early_ioapic_map_size++;
+	early_ioapic_map[i].id		= id;
+	early_ioapic_map[i].devid	= devid;
+	early_ioapic_map[i].cmd_line	= true;
+
+	return 1;
+}
+
+static int __init parse_ivrs_hpet(char *str)
+{
+	unsigned int bus, dev, fn;
+	int ret, id, i;
+	u16 devid;
+
+	ret = sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn);
+
+	if (ret != 4) {
+		pr_err("AMD-Vi: Invalid command line: ivrs_hpet%s\n", str);
+		return 1;
+	}
+
+	if (early_hpet_map_size == EARLY_MAP_SIZE) {
+		pr_err("AMD-Vi: Early HPET map overflow - ignoring ivrs_hpet%s\n",
+			str);
+		return 1;
+	}
+
+	devid = ((bus & 0xff) << 8) | ((dev & 0x1f) << 3) | (fn & 0x7);
+
+	cmdline_maps			= true;
+	i				= early_hpet_map_size++;
+	early_hpet_map[i].id		= id;
+	early_hpet_map[i].devid		= devid;
+	early_hpet_map[i].cmd_line	= true;
+
+	return 1;
+}
+
+__setup("amd_iommu_dump",	parse_amd_iommu_dump);
+__setup("amd_iommu=",		parse_amd_iommu_options);
+__setup("ivrs_ioapic",		parse_ivrs_ioapic);
+__setup("ivrs_hpet",		parse_ivrs_hpet);
 
 IOMMU_INIT_FINISH(amd_iommu_detect,
 		  gart_iommu_hole_init,
-		  0,
-		  0);
+		  NULL,
+		  NULL);
 
 bool amd_iommu_v2_supported(void)
 {
 	return amd_iommu_v2_present;
 }
 EXPORT_SYMBOL(amd_iommu_v2_supported);
+
+/****************************************************************************
+ *
+ * IOMMU EFR Performance Counter support functionality. This code allows
+ * access to the IOMMU PC functionality.
+ *
+ ****************************************************************************/
+
+u8 amd_iommu_pc_get_max_banks(u16 devid)
+{
+	struct amd_iommu *iommu;
+	u8 ret = 0;
+
+	/* locate the iommu governing the devid */
+	iommu = amd_iommu_rlookup_table[devid];
+	if (iommu)
+		ret = iommu->max_banks;
+
+	return ret;
+}
+EXPORT_SYMBOL(amd_iommu_pc_get_max_banks);
+
+bool amd_iommu_pc_supported(void)
+{
+	return amd_iommu_pc_present;
+}
+EXPORT_SYMBOL(amd_iommu_pc_supported);
+
+u8 amd_iommu_pc_get_max_counters(u16 devid)
+{
+	struct amd_iommu *iommu;
+	u8 ret = 0;
+
+	/* locate the iommu governing the devid */
+	iommu = amd_iommu_rlookup_table[devid];
+	if (iommu)
+		ret = iommu->max_counters;
+
+	return ret;
+}
+EXPORT_SYMBOL(amd_iommu_pc_get_max_counters);
+
+int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr, u8 fxn,
+				    u64 *value, bool is_write)
+{
+	struct amd_iommu *iommu;
+	u32 offset;
+	u32 max_offset_lim;
+
+	/* Make sure the IOMMU PC resource is available */
+	if (!amd_iommu_pc_present)
+		return -ENODEV;
+
+	/* Locate the iommu associated with the device ID */
+	iommu = amd_iommu_rlookup_table[devid];
+
+	/* Check for valid iommu and pc register indexing */
+	if (WARN_ON((iommu == NULL) || (fxn > 0x28) || (fxn & 7)))
+		return -ENODEV;
+
+	offset = (u32)(((0x40|bank) << 12) | (cntr << 8) | fxn);
+
+	/* Limit the offset to the hw defined mmio region aperture */
+	max_offset_lim = (u32)(((0x40|iommu->max_banks) << 12) |
+				(iommu->max_counters << 8) | 0x28);
+	if ((offset < MMIO_CNTR_REG_OFFSET) ||
+	    (offset > max_offset_lim))
+		return -EINVAL;
+
+	if (is_write) {
+		writel((u32)*value, iommu->mmio_base + offset);
+		writel((*value >> 32), iommu->mmio_base + offset + 4);
+	} else {
+		*value = readl(iommu->mmio_base + offset + 4);
+		*value <<= 32;
+		*value = readl(iommu->mmio_base + offset);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(amd_iommu_pc_get_set_reg_val);
diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd_iommu_proto.h
index 1a7f41c6cc6..95ed6deae47 100644
--- a/drivers/iommu/amd_iommu_proto.h
+++ b/drivers/iommu/amd_iommu_proto.h
@@ -32,6 +32,14 @@ extern void amd_iommu_uninit_devices(void);
 extern void amd_iommu_init_notifier(void);
 extern void amd_iommu_init_api(void);
 
+/* Needed for interrupt remapping */
+extern int amd_iommu_supported(void);
+extern int amd_iommu_prepare(void);
+extern int amd_iommu_enable(void);
+extern void amd_iommu_disable(void);
+extern int amd_iommu_reenable(int);
+extern int amd_iommu_enable_faulting(void);
+
 /* IOMMUv2 specific functions */
 struct iommu_domain;
 
@@ -48,6 +56,13 @@ extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid,
 extern int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, int pasid);
 extern struct iommu_domain *amd_iommu_get_v2_domain(struct pci_dev *pdev);
 
+/* IOMMU Performance Counter functions */
+extern bool amd_iommu_pc_supported(void);
+extern u8 amd_iommu_pc_get_max_banks(u16 devid);
+extern u8 amd_iommu_pc_get_max_counters(u16 devid);
+extern int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr, u8 fxn,
+				    u64 *value, bool is_write);
+
 #define PPR_SUCCESS			0x0
 #define PPR_INVALID			0x1
 #define PPR_FAILURE			0xf
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 2452f3b7173..f1a5abf11ac 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -24,6 +24,8 @@
 #include <linux/mutex.h>
 #include <linux/list.h>
 #include <linux/spinlock.h>
+#include <linux/pci.h>
+#include <linux/irqreturn.h>
 
 /*
  * Maximum number of IOMMUs supported
@@ -37,9 +39,6 @@
 #define ALIAS_TABLE_ENTRY_SIZE		2
 #define RLOOKUP_TABLE_ENTRY_SIZE	(sizeof(void *))
 
-/* Length of the MMIO region for the AMD IOMMU */
-#define MMIO_REGION_LENGTH       0x4000
-
 /* Capability offsets used by the driver */
 #define MMIO_CAP_HDR_OFFSET	0x00
 #define MMIO_RANGE_OFFSET	0x0c
@@ -77,6 +76,10 @@
 #define MMIO_STATUS_OFFSET	0x2020
 #define MMIO_PPR_HEAD_OFFSET	0x2030
 #define MMIO_PPR_TAIL_OFFSET	0x2038
+#define MMIO_CNTR_CONF_OFFSET	0x4000
+#define MMIO_CNTR_REG_OFFSET	0x40000
+#define MMIO_REG_END_OFFSET	0x80000
+
 
 
 /* Extended Feature Bits */
@@ -96,9 +99,15 @@
 #define FEATURE_GLXVAL_SHIFT	14
 #define FEATURE_GLXVAL_MASK	(0x03ULL << FEATURE_GLXVAL_SHIFT)
 
-#define PASID_MASK		0x000fffff
+/* Note:
+ * The current driver only support 16-bit PASID.
+ * Currently, hardware only implement upto 16-bit PASID
+ * even though the spec says it could have upto 20 bits.
+ */
+#define PASID_MASK		0x0000ffff
 
 /* MMIO status bits */
+#define MMIO_STATUS_EVT_INT_MASK	(1 << 1)
 #define MMIO_STATUS_COM_WAIT_INT_MASK	(1 << 2)
 #define MMIO_STATUS_PPR_INT_MASK	(1 << 6)
 
@@ -152,6 +161,7 @@
 #define CMD_INV_DEV_ENTRY       0x02
 #define CMD_INV_IOMMU_PAGES	0x03
 #define CMD_INV_IOTLB_PAGES	0x04
+#define CMD_INV_IRT		0x05
 #define CMD_COMPLETE_PPR	0x07
 #define CMD_INV_ALL		0x08
 
@@ -175,6 +185,7 @@
 #define DEV_ENTRY_EX            0x67
 #define DEV_ENTRY_SYSMGT1       0x68
 #define DEV_ENTRY_SYSMGT2       0x69
+#define DEV_ENTRY_IRQ_TBL_EN	0x80
 #define DEV_ENTRY_INIT_PASS     0xb8
 #define DEV_ENTRY_EINT_PASS     0xb9
 #define DEV_ENTRY_NMI_PASS      0xba
@@ -183,6 +194,8 @@
 #define DEV_ENTRY_MODE_MASK	0x07
 #define DEV_ENTRY_MODE_SHIFT	0x09
 
+#define MAX_DEV_TABLE_ENTRIES	0xffff
+
 /* constants to configure the command buffer */
 #define CMD_BUFFER_SIZE    8192
 #define CMD_BUFFER_UNINITIALIZED 1
@@ -255,7 +268,7 @@
 #define PAGE_SIZE_ALIGN(address, pagesize) \
 		((address) & ~((pagesize) - 1))
 /*
- * Creates an IOMMU PTE for an address an a given pagesize
+ * Creates an IOMMU PTE for an address and a given pagesize
  * The PTE has no permission bits set
  * Pagesize is expected to be a power-of-two larger than 4096
  */
@@ -311,9 +324,6 @@
 
 #define MAX_DOMAIN_ID 65536
 
-/* FIXME: move this macro to <linux/pci.h> */
-#define PCI_BUS(x) (((x) >> 8) & 0xff)
-
 /* Protection domain flags */
 #define PD_DMA_OPS_MASK		(1UL << 0) /* domain used for dma_ops */
 #define PD_DEFAULT_MASK		(1UL << 1) /* domain is a default dma_ops
@@ -334,6 +344,23 @@ extern bool amd_iommu_np_cache;
 /* Only true if all IOMMUs support device IOTLBs */
 extern bool amd_iommu_iotlb_sup;
 
+#define MAX_IRQS_PER_TABLE	256
+#define IRQ_TABLE_ALIGNMENT	128
+
+struct irq_remap_table {
+	spinlock_t lock;
+	unsigned min_index;
+	u32 *table;
+};
+
+extern struct irq_remap_table **irq_lookup_table;
+
+/* Interrupt remapping feature used? */
+extern bool amd_iommu_irq_remap;
+
+/* kmem_cache to get tables with 128 byte alignement */
+extern struct kmem_cache *amd_iommu_irq_cache;
+
 /*
  * Make iterating over all IOMMUs easier
  */
@@ -404,7 +431,8 @@ struct iommu_dev_data {
 	struct list_head dev_data_list;	  /* For global dev_data_list */
 	struct iommu_dev_data *alias_data;/* The alias dev_data */
 	struct protection_domain *domain; /* Domain the device is bound to */
-	atomic_t bind;			  /* Domain attach reverent count */
+	atomic_t bind;			  /* Domain attach reference count */
+	struct iommu_group *group;	  /* IOMMU group for virtual aliases */
 	u16 devid;			  /* PCI Device ID */
 	bool iommu_v2;			  /* Device can make use of IOMMUv2 */
 	bool passthrough;		  /* Default for device is pt_domain */
@@ -481,10 +509,17 @@ struct amd_iommu {
 	/* Pointer to PCI device of this IOMMU */
 	struct pci_dev *dev;
 
+	/* Cache pdev to root device for resume quirks */
+	struct pci_dev *root_pdev;
+
 	/* physical address of MMIO space */
 	u64 mmio_phys;
+
+	/* physical end address of MMIO space */
+	u64 mmio_phys_end;
+
 	/* virtual address of MMIO space */
-	u8 *mmio_base;
+	u8 __iomem *mmio_base;
 
 	/* capabilities of that IOMMU read from ACPI */
 	u32 cap;
@@ -498,6 +533,9 @@ struct amd_iommu {
 	/* IOMMUv2 */
 	bool is_iommu_v2;
 
+	/* PCI device id of the IOMMU device */
+	u16 devid;
+
 	/*
 	 * Capability pointer. There could be more than one IOMMU per PCI
 	 * device function if there are more than one AMD IOMMU capability
@@ -527,8 +565,6 @@ struct amd_iommu {
 	u32 evt_buf_size;
 	/* event buffer virtual address */
 	u8 *evt_buf;
-	/* MSI number for event interrupt */
-	u16 evt_msi_num;
 
 	/* Base of the PPR log, if present */
 	u8 *ppr_log;
@@ -559,8 +595,23 @@ struct amd_iommu {
 
 	/* The l2 indirect registers */
 	u32 stored_l2[0x83];
+
+	/* The maximum PC banks and counters/bank (PCSup=1) */
+	u8 max_banks;
+	u8 max_counters;
 };
 
+struct devid_map {
+	struct list_head list;
+	u8 id;
+	u16 devid;
+	bool cmd_line;
+};
+
+/* Map HPET and IOAPIC ids to the devid used by the IOMMU */
+extern struct list_head ioapic_map;
+extern struct list_head hpet_map;
+
 /*
  * List with all IOMMUs in the system. This list is not locked because it is
  * only written and read at driver initialization or suspend time
@@ -649,10 +700,10 @@ extern unsigned long *amd_iommu_pd_alloc_bitmap;
  * If true, the addresses will be flushed on unmap time, not when
  * they are reused
  */
-extern bool amd_iommu_unmap_flush;
+extern u32 amd_iommu_unmap_flush;
 
-/* Smallest number of PASIDs supported by any IOMMU in the system */
-extern u32 amd_iommu_max_pasids;
+/* Smallest max PASID supported by any IOMMU in the system */
+extern u32 amd_iommu_max_pasid;
 
 extern bool amd_iommu_v2_present;
 
@@ -661,11 +712,34 @@ extern bool amd_iommu_force_isolation;
 /* Max levels of glxval supported */
 extern int amd_iommu_max_glx_val;
 
-/* takes bus and device/function and returns the device id
- * FIXME: should that be in generic PCI code? */
-static inline u16 calc_devid(u8 bus, u8 devfn)
+/*
+ * This function flushes all internal caches of
+ * the IOMMU used by this driver.
+ */
+extern void iommu_flush_all_caches(struct amd_iommu *iommu);
+
+static inline int get_ioapic_devid(int id)
+{
+	struct devid_map *entry;
+
+	list_for_each_entry(entry, &ioapic_map, list) {
+		if (entry->id == id)
+			return entry->devid;
+	}
+
+	return -EINVAL;
+}
+
+static inline int get_hpet_devid(int id)
 {
-	return (((u16)bus) << 8) | devfn;
+	struct devid_map *entry;
+
+	list_for_each_entry(entry, &hpet_map, list) {
+		if (entry->id == id)
+			return entry->devid;
+	}
+
+	return -EINVAL;
 }
 
 #ifdef CONFIG_AMD_IOMMU_STATS
diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c
index 8add9f125d3..499b4366a98 100644
--- a/drivers/iommu/amd_iommu_v2.c
+++ b/drivers/iommu/amd_iommu_v2.c
@@ -45,17 +45,22 @@ struct pri_queue {
 struct pasid_state {
 	struct list_head list;			/* For global state-list */
 	atomic_t count;				/* Reference count */
+	unsigned mmu_notifier_count;		/* Counting nested mmu_notifier
+						   calls */
 	struct task_struct *task;		/* Task bound to this PASID */
 	struct mm_struct *mm;			/* mm_struct for the faults */
 	struct mmu_notifier mn;                 /* mmu_otifier handle */
 	struct pri_queue pri[PRI_QUEUE_SIZE];	/* PRI tag states */
 	struct device_state *device_state;	/* Link to our device_state */
 	int pasid;				/* PASID index */
-	spinlock_t lock;			/* Protect pri_queues */
+	spinlock_t lock;			/* Protect pri_queues and
+						   mmu_notifer_count */
 	wait_queue_head_t wq;			/* To wait for count == 0 */
 };
 
 struct device_state {
+	struct list_head list;
+	u16 devid;
 	atomic_t count;
 	struct pci_dev *pdev;
 	struct pasid_state **states;
@@ -81,13 +86,9 @@ struct fault {
 	u16 flags;
 };
 
-struct device_state **state_table;
+static LIST_HEAD(state_list);
 static spinlock_t state_lock;
 
-/* List and lock for all pasid_states */
-static LIST_HEAD(pasid_state_list);
-static DEFINE_SPINLOCK(ps_lock);
-
 static struct workqueue_struct *iommu_wq;
 
 /*
@@ -99,7 +100,6 @@ static u64 *empty_page_table;
 
 static void free_pasid_states(struct device_state *dev_state);
 static void unbind_pasid(struct device_state *dev_state, int pasid);
-static int task_exit(struct notifier_block *nb, unsigned long e, void *data);
 
 static u16 device_id(struct pci_dev *pdev)
 {
@@ -111,13 +111,25 @@ static u16 device_id(struct pci_dev *pdev)
 	return devid;
 }
 
+static struct device_state *__get_device_state(u16 devid)
+{
+	struct device_state *dev_state;
+
+	list_for_each_entry(dev_state, &state_list, list) {
+		if (dev_state->devid == devid)
+			return dev_state;
+	}
+
+	return NULL;
+}
+
 static struct device_state *get_device_state(u16 devid)
 {
 	struct device_state *dev_state;
 	unsigned long flags;
 
 	spin_lock_irqsave(&state_lock, flags);
-	dev_state = state_table[devid];
+	dev_state = __get_device_state(devid);
 	if (dev_state != NULL)
 		atomic_inc(&dev_state->count);
 	spin_unlock_irqrestore(&state_lock, flags);
@@ -158,29 +170,6 @@ static void put_device_state_wait(struct device_state *dev_state)
 	free_device_state(dev_state);
 }
 
-static struct notifier_block profile_nb = {
-	.notifier_call = task_exit,
-};
-
-static void link_pasid_state(struct pasid_state *pasid_state)
-{
-	spin_lock(&ps_lock);
-	list_add_tail(&pasid_state->list, &pasid_state_list);
-	spin_unlock(&ps_lock);
-}
-
-static void __unlink_pasid_state(struct pasid_state *pasid_state)
-{
-	list_del(&pasid_state->list);
-}
-
-static void unlink_pasid_state(struct pasid_state *pasid_state)
-{
-	spin_lock(&ps_lock);
-	__unlink_pasid_state(pasid_state);
-	spin_unlock(&ps_lock);
-}
-
 /* Must be called under dev_state->lock */
 static struct pasid_state **__get_pasid_state_ptr(struct device_state *dev_state,
 						  int pasid, bool alloc)
@@ -337,7 +326,6 @@ static void unbind_pasid(struct device_state *dev_state, int pasid)
 	if (pasid_state == NULL)
 		return;
 
-	unlink_pasid_state(pasid_state);
 	__unbind_pasid(pasid_state);
 	put_pasid_state_wait(pasid_state); /* Reference taken in this function */
 }
@@ -379,7 +367,12 @@ static void free_pasid_states(struct device_state *dev_state)
 			continue;
 
 		put_pasid_state(pasid_state);
-		unbind_pasid(dev_state, i);
+
+		/*
+		 * This will call the mn_release function and
+		 * unbind the PASID
+		 */
+		mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm);
 	}
 
 	if (dev_state->pasid_levels == 2)
@@ -439,12 +432,19 @@ static void mn_invalidate_range_start(struct mmu_notifier *mn,
 {
 	struct pasid_state *pasid_state;
 	struct device_state *dev_state;
+	unsigned long flags;
 
 	pasid_state = mn_to_state(mn);
 	dev_state   = pasid_state->device_state;
 
-	amd_iommu_domain_set_gcr3(dev_state->domain, pasid_state->pasid,
-				  __pa(empty_page_table));
+	spin_lock_irqsave(&pasid_state->lock, flags);
+	if (pasid_state->mmu_notifier_count == 0) {
+		amd_iommu_domain_set_gcr3(dev_state->domain,
+					  pasid_state->pasid,
+					  __pa(empty_page_table));
+	}
+	pasid_state->mmu_notifier_count += 1;
+	spin_unlock_irqrestore(&pasid_state->lock, flags);
 }
 
 static void mn_invalidate_range_end(struct mmu_notifier *mn,
@@ -453,15 +453,39 @@ static void mn_invalidate_range_end(struct mmu_notifier *mn,
 {
 	struct pasid_state *pasid_state;
 	struct device_state *dev_state;
+	unsigned long flags;
+
+	pasid_state = mn_to_state(mn);
+	dev_state   = pasid_state->device_state;
+
+	spin_lock_irqsave(&pasid_state->lock, flags);
+	pasid_state->mmu_notifier_count -= 1;
+	if (pasid_state->mmu_notifier_count == 0) {
+		amd_iommu_domain_set_gcr3(dev_state->domain,
+					  pasid_state->pasid,
+					  __pa(pasid_state->mm->pgd));
+	}
+	spin_unlock_irqrestore(&pasid_state->lock, flags);
+}
+
+static void mn_release(struct mmu_notifier *mn, struct mm_struct *mm)
+{
+	struct pasid_state *pasid_state;
+	struct device_state *dev_state;
+
+	might_sleep();
 
 	pasid_state = mn_to_state(mn);
 	dev_state   = pasid_state->device_state;
 
-	amd_iommu_domain_set_gcr3(dev_state->domain, pasid_state->pasid,
-				  __pa(pasid_state->mm->pgd));
+	if (pasid_state->device_state->inv_ctx_cb)
+		dev_state->inv_ctx_cb(dev_state->pdev, pasid_state->pasid);
+
+	unbind_pasid(dev_state, pasid_state->pasid);
 }
 
 static struct mmu_notifier_ops iommu_mn = {
+	.release		= mn_release,
 	.clear_flush_young      = mn_clear_flush_young,
 	.change_pte             = mn_change_pte,
 	.invalidate_page        = mn_invalidate_page,
@@ -504,8 +528,10 @@ static void do_fault(struct work_struct *work)
 
 	write = !!(fault->flags & PPR_FAULT_WRITE);
 
+	down_read(&fault->state->mm->mmap_sem);
 	npages = get_user_pages(fault->state->task, fault->state->mm,
 				fault->address, 1, write, 0, &page, NULL);
+	up_read(&fault->state->mm->mmap_sem);
 
 	if (npages == 1) {
 		put_page(page);
@@ -604,53 +630,6 @@ static struct notifier_block ppr_nb = {
 	.notifier_call = ppr_notifier,
 };
 
-static int task_exit(struct notifier_block *nb, unsigned long e, void *data)
-{
-	struct pasid_state *pasid_state;
-	struct task_struct *task;
-
-	task = data;
-
-	/*
-	 * Using this notifier is a hack - but there is no other choice
-	 * at the moment. What I really want is a sleeping notifier that
-	 * is called when an MM goes down. But such a notifier doesn't
-	 * exist yet. The notifier needs to sleep because it has to make
-	 * sure that the device does not use the PASID and the address
-	 * space anymore before it is destroyed. This includes waiting
-	 * for pending PRI requests to pass the workqueue. The
-	 * MMU-Notifiers would be a good fit, but they use RCU and so
-	 * they are not allowed to sleep. Lets see how we can solve this
-	 * in a more intelligent way in the future.
-	 */
-again:
-	spin_lock(&ps_lock);
-	list_for_each_entry(pasid_state, &pasid_state_list, list) {
-		struct device_state *dev_state;
-		int pasid;
-
-		if (pasid_state->task != task)
-			continue;
-
-		/* Drop Lock and unbind */
-		spin_unlock(&ps_lock);
-
-		dev_state = pasid_state->device_state;
-		pasid     = pasid_state->pasid;
-
-		if (pasid_state->device_state->inv_ctx_cb)
-			dev_state->inv_ctx_cb(dev_state->pdev, pasid);
-
-		unbind_pasid(dev_state, pasid);
-
-		/* Task may be in the list multiple times */
-		goto again;
-	}
-	spin_unlock(&ps_lock);
-
-	return NOTIFY_OK;
-}
-
 int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid,
 			 struct task_struct *task)
 {
@@ -681,6 +660,8 @@ int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid,
 
 	atomic_set(&pasid_state->count, 1);
 	init_waitqueue_head(&pasid_state->wq);
+	spin_lock_init(&pasid_state->lock);
+
 	pasid_state->task         = task;
 	pasid_state->mm           = get_task_mm(task);
 	pasid_state->device_state = dev_state;
@@ -701,8 +682,6 @@ int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid,
 	if (ret)
 		goto out_clear_state;
 
-	link_pasid_state(pasid_state);
-
 	return 0;
 
 out_clear_state:
@@ -723,6 +702,7 @@ EXPORT_SYMBOL(amd_iommu_bind_pasid);
 
 void amd_iommu_unbind_pasid(struct pci_dev *pdev, int pasid)
 {
+	struct pasid_state *pasid_state;
 	struct device_state *dev_state;
 	u16 devid;
 
@@ -739,7 +719,17 @@ void amd_iommu_unbind_pasid(struct pci_dev *pdev, int pasid)
 	if (pasid < 0 || pasid >= dev_state->max_pasids)
 		goto out;
 
-	unbind_pasid(dev_state, pasid);
+	pasid_state = get_pasid_state(dev_state, pasid);
+	if (pasid_state == NULL)
+		goto out;
+	/*
+	 * Drop reference taken here. We are safe because we still hold
+	 * the reference taken in the amd_iommu_bind_pasid function.
+	 */
+	put_pasid_state(pasid_state);
+
+	/* This will call the mn_release function and unbind the PASID */
+	mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm);
 
 out:
 	put_device_state(dev_state);
@@ -769,7 +759,8 @@ int amd_iommu_init_device(struct pci_dev *pdev, int pasids)
 
 	spin_lock_init(&dev_state->lock);
 	init_waitqueue_head(&dev_state->wq);
-	dev_state->pdev = pdev;
+	dev_state->pdev  = pdev;
+	dev_state->devid = devid;
 
 	tmp = pasids;
 	for (dev_state->pasid_levels = 0; (tmp - 1) & ~0x1ff; tmp >>= 9)
@@ -799,13 +790,13 @@ int amd_iommu_init_device(struct pci_dev *pdev, int pasids)
 
 	spin_lock_irqsave(&state_lock, flags);
 
-	if (state_table[devid] != NULL) {
+	if (__get_device_state(devid) != NULL) {
 		spin_unlock_irqrestore(&state_lock, flags);
 		ret = -EBUSY;
 		goto out_free_domain;
 	}
 
-	state_table[devid] = dev_state;
+	list_add_tail(&dev_state->list, &state_list);
 
 	spin_unlock_irqrestore(&state_lock, flags);
 
@@ -837,13 +828,13 @@ void amd_iommu_free_device(struct pci_dev *pdev)
 
 	spin_lock_irqsave(&state_lock, flags);
 
-	dev_state = state_table[devid];
+	dev_state = __get_device_state(devid);
 	if (dev_state == NULL) {
 		spin_unlock_irqrestore(&state_lock, flags);
 		return;
 	}
 
-	state_table[devid] = NULL;
+	list_del(&dev_state->list);
 
 	spin_unlock_irqrestore(&state_lock, flags);
 
@@ -870,7 +861,7 @@ int amd_iommu_set_invalid_ppr_cb(struct pci_dev *pdev,
 	spin_lock_irqsave(&state_lock, flags);
 
 	ret = -EINVAL;
-	dev_state = state_table[devid];
+	dev_state = __get_device_state(devid);
 	if (dev_state == NULL)
 		goto out_unlock;
 
@@ -901,7 +892,7 @@ int amd_iommu_set_invalidate_ctx_cb(struct pci_dev *pdev,
 	spin_lock_irqsave(&state_lock, flags);
 
 	ret = -EINVAL;
-	dev_state = state_table[devid];
+	dev_state = __get_device_state(devid);
 	if (dev_state == NULL)
 		goto out_unlock;
 
@@ -918,23 +909,25 @@ EXPORT_SYMBOL(amd_iommu_set_invalidate_ctx_cb);
 
 static int __init amd_iommu_v2_init(void)
 {
-	size_t state_table_size;
 	int ret;
 
-	pr_info("AMD IOMMUv2 driver by Joerg Roedel <joerg.roedel@amd.com>");
+	pr_info("AMD IOMMUv2 driver by Joerg Roedel <joerg.roedel@amd.com>\n");
 
-	spin_lock_init(&state_lock);
+	if (!amd_iommu_v2_supported()) {
+		pr_info("AMD IOMMUv2 functionality not available on this system\n");
+		/*
+		 * Load anyway to provide the symbols to other modules
+		 * which may use AMD IOMMUv2 optionally.
+		 */
+		return 0;
+	}
 
-	state_table_size = MAX_DEVICES * sizeof(struct device_state *);
-	state_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
-					       get_order(state_table_size));
-	if (state_table == NULL)
-		return -ENOMEM;
+	spin_lock_init(&state_lock);
 
 	ret = -ENOMEM;
 	iommu_wq = create_workqueue("amd_iommu_v2");
 	if (iommu_wq == NULL)
-		goto out_free;
+		goto out;
 
 	ret = -ENOMEM;
 	empty_page_table = (u64 *)get_zeroed_page(GFP_KERNEL);
@@ -942,26 +935,24 @@ static int __init amd_iommu_v2_init(void)
 		goto out_destroy_wq;
 
 	amd_iommu_register_ppr_notifier(&ppr_nb);
-	profile_event_register(PROFILE_TASK_EXIT, &profile_nb);
 
 	return 0;
 
 out_destroy_wq:
 	destroy_workqueue(iommu_wq);
 
-out_free:
-	free_pages((unsigned long)state_table, get_order(state_table_size));
-
+out:
 	return ret;
 }
 
 static void __exit amd_iommu_v2_exit(void)
 {
 	struct device_state *dev_state;
-	size_t state_table_size;
 	int i;
 
-	profile_event_unregister(PROFILE_TASK_EXIT, &profile_nb);
+	if (!amd_iommu_v2_supported())
+		return;
+
 	amd_iommu_unregister_ppr_notifier(&ppr_nb);
 
 	flush_workqueue(iommu_wq);
@@ -984,9 +975,6 @@ static void __exit amd_iommu_v2_exit(void)
 
 	destroy_workqueue(iommu_wq);
 
-	state_table_size = MAX_DEVICES * sizeof(struct device_state *);
-	free_pages((unsigned long)state_table, get_order(state_table_size));
-
 	free_page((unsigned long)empty_page_table);
 }
 
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
new file mode 100644
index 00000000000..1599354e974
--- /dev/null
+++ b/drivers/iommu/arm-smmu.c
@@ -0,0 +1,2064 @@
+/*
+ * IOMMU API for ARM architected SMMU implementations.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) 2013 ARM Limited
+ *
+ * Author: Will Deacon <will.deacon@arm.com>
+ *
+ * This driver currently supports:
+ *	- SMMUv1 and v2 implementations
+ *	- Stream-matching and stream-indexing
+ *	- v7/v8 long-descriptor format
+ *	- Non-secure access to the SMMU
+ *	- 4k and 64k pages, with contiguous pte hints.
+ *	- Up to 42-bit addressing (dependent on VA_BITS)
+ *	- Context fault reporting
+ */
+
+#define pr_fmt(fmt) "arm-smmu: " fmt
+
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/iommu.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include <linux/amba/bus.h>
+
+#include <asm/pgalloc.h>
+
+/* Maximum number of stream IDs assigned to a single device */
+#define MAX_MASTER_STREAMIDS		MAX_PHANDLE_ARGS
+
+/* Maximum number of context banks per SMMU */
+#define ARM_SMMU_MAX_CBS		128
+
+/* Maximum number of mapping groups per SMMU */
+#define ARM_SMMU_MAX_SMRS		128
+
+/* SMMU global address space */
+#define ARM_SMMU_GR0(smmu)		((smmu)->base)
+#define ARM_SMMU_GR1(smmu)		((smmu)->base + (smmu)->pagesize)
+
+/*
+ * SMMU global address space with conditional offset to access secure
+ * aliases of non-secure registers (e.g. nsCR0: 0x400, nsGFSR: 0x448,
+ * nsGFSYNR0: 0x450)
+ */
+#define ARM_SMMU_GR0_NS(smmu)						\
+	((smmu)->base +							\
+		((smmu->options & ARM_SMMU_OPT_SECURE_CFG_ACCESS)	\
+			? 0x400 : 0))
+
+/* Page table bits */
+#define ARM_SMMU_PTE_XN			(((pteval_t)3) << 53)
+#define ARM_SMMU_PTE_CONT		(((pteval_t)1) << 52)
+#define ARM_SMMU_PTE_AF			(((pteval_t)1) << 10)
+#define ARM_SMMU_PTE_SH_NS		(((pteval_t)0) << 8)
+#define ARM_SMMU_PTE_SH_OS		(((pteval_t)2) << 8)
+#define ARM_SMMU_PTE_SH_IS		(((pteval_t)3) << 8)
+#define ARM_SMMU_PTE_PAGE		(((pteval_t)3) << 0)
+
+#if PAGE_SIZE == SZ_4K
+#define ARM_SMMU_PTE_CONT_ENTRIES	16
+#elif PAGE_SIZE == SZ_64K
+#define ARM_SMMU_PTE_CONT_ENTRIES	32
+#else
+#define ARM_SMMU_PTE_CONT_ENTRIES	1
+#endif
+
+#define ARM_SMMU_PTE_CONT_SIZE		(PAGE_SIZE * ARM_SMMU_PTE_CONT_ENTRIES)
+#define ARM_SMMU_PTE_CONT_MASK		(~(ARM_SMMU_PTE_CONT_SIZE - 1))
+
+/* Stage-1 PTE */
+#define ARM_SMMU_PTE_AP_UNPRIV		(((pteval_t)1) << 6)
+#define ARM_SMMU_PTE_AP_RDONLY		(((pteval_t)2) << 6)
+#define ARM_SMMU_PTE_ATTRINDX_SHIFT	2
+#define ARM_SMMU_PTE_nG			(((pteval_t)1) << 11)
+
+/* Stage-2 PTE */
+#define ARM_SMMU_PTE_HAP_FAULT		(((pteval_t)0) << 6)
+#define ARM_SMMU_PTE_HAP_READ		(((pteval_t)1) << 6)
+#define ARM_SMMU_PTE_HAP_WRITE		(((pteval_t)2) << 6)
+#define ARM_SMMU_PTE_MEMATTR_OIWB	(((pteval_t)0xf) << 2)
+#define ARM_SMMU_PTE_MEMATTR_NC		(((pteval_t)0x5) << 2)
+#define ARM_SMMU_PTE_MEMATTR_DEV	(((pteval_t)0x1) << 2)
+
+/* Configuration registers */
+#define ARM_SMMU_GR0_sCR0		0x0
+#define sCR0_CLIENTPD			(1 << 0)
+#define sCR0_GFRE			(1 << 1)
+#define sCR0_GFIE			(1 << 2)
+#define sCR0_GCFGFRE			(1 << 4)
+#define sCR0_GCFGFIE			(1 << 5)
+#define sCR0_USFCFG			(1 << 10)
+#define sCR0_VMIDPNE			(1 << 11)
+#define sCR0_PTM			(1 << 12)
+#define sCR0_FB				(1 << 13)
+#define sCR0_BSU_SHIFT			14
+#define sCR0_BSU_MASK			0x3
+
+/* Identification registers */
+#define ARM_SMMU_GR0_ID0		0x20
+#define ARM_SMMU_GR0_ID1		0x24
+#define ARM_SMMU_GR0_ID2		0x28
+#define ARM_SMMU_GR0_ID3		0x2c
+#define ARM_SMMU_GR0_ID4		0x30
+#define ARM_SMMU_GR0_ID5		0x34
+#define ARM_SMMU_GR0_ID6		0x38
+#define ARM_SMMU_GR0_ID7		0x3c
+#define ARM_SMMU_GR0_sGFSR		0x48
+#define ARM_SMMU_GR0_sGFSYNR0		0x50
+#define ARM_SMMU_GR0_sGFSYNR1		0x54
+#define ARM_SMMU_GR0_sGFSYNR2		0x58
+#define ARM_SMMU_GR0_PIDR0		0xfe0
+#define ARM_SMMU_GR0_PIDR1		0xfe4
+#define ARM_SMMU_GR0_PIDR2		0xfe8
+
+#define ID0_S1TS			(1 << 30)
+#define ID0_S2TS			(1 << 29)
+#define ID0_NTS				(1 << 28)
+#define ID0_SMS				(1 << 27)
+#define ID0_PTFS_SHIFT			24
+#define ID0_PTFS_MASK			0x2
+#define ID0_PTFS_V8_ONLY		0x2
+#define ID0_CTTW			(1 << 14)
+#define ID0_NUMIRPT_SHIFT		16
+#define ID0_NUMIRPT_MASK		0xff
+#define ID0_NUMSMRG_SHIFT		0
+#define ID0_NUMSMRG_MASK		0xff
+
+#define ID1_PAGESIZE			(1 << 31)
+#define ID1_NUMPAGENDXB_SHIFT		28
+#define ID1_NUMPAGENDXB_MASK		7
+#define ID1_NUMS2CB_SHIFT		16
+#define ID1_NUMS2CB_MASK		0xff
+#define ID1_NUMCB_SHIFT			0
+#define ID1_NUMCB_MASK			0xff
+
+#define ID2_OAS_SHIFT			4
+#define ID2_OAS_MASK			0xf
+#define ID2_IAS_SHIFT			0
+#define ID2_IAS_MASK			0xf
+#define ID2_UBS_SHIFT			8
+#define ID2_UBS_MASK			0xf
+#define ID2_PTFS_4K			(1 << 12)
+#define ID2_PTFS_16K			(1 << 13)
+#define ID2_PTFS_64K			(1 << 14)
+
+#define PIDR2_ARCH_SHIFT		4
+#define PIDR2_ARCH_MASK			0xf
+
+/* Global TLB invalidation */
+#define ARM_SMMU_GR0_STLBIALL		0x60
+#define ARM_SMMU_GR0_TLBIVMID		0x64
+#define ARM_SMMU_GR0_TLBIALLNSNH	0x68
+#define ARM_SMMU_GR0_TLBIALLH		0x6c
+#define ARM_SMMU_GR0_sTLBGSYNC		0x70
+#define ARM_SMMU_GR0_sTLBGSTATUS	0x74
+#define sTLBGSTATUS_GSACTIVE		(1 << 0)
+#define TLB_LOOP_TIMEOUT		1000000	/* 1s! */
+
+/* Stream mapping registers */
+#define ARM_SMMU_GR0_SMR(n)		(0x800 + ((n) << 2))
+#define SMR_VALID			(1 << 31)
+#define SMR_MASK_SHIFT			16
+#define SMR_MASK_MASK			0x7fff
+#define SMR_ID_SHIFT			0
+#define SMR_ID_MASK			0x7fff
+
+#define ARM_SMMU_GR0_S2CR(n)		(0xc00 + ((n) << 2))
+#define S2CR_CBNDX_SHIFT		0
+#define S2CR_CBNDX_MASK			0xff
+#define S2CR_TYPE_SHIFT			16
+#define S2CR_TYPE_MASK			0x3
+#define S2CR_TYPE_TRANS			(0 << S2CR_TYPE_SHIFT)
+#define S2CR_TYPE_BYPASS		(1 << S2CR_TYPE_SHIFT)
+#define S2CR_TYPE_FAULT			(2 << S2CR_TYPE_SHIFT)
+
+/* Context bank attribute registers */
+#define ARM_SMMU_GR1_CBAR(n)		(0x0 + ((n) << 2))
+#define CBAR_VMID_SHIFT			0
+#define CBAR_VMID_MASK			0xff
+#define CBAR_S1_BPSHCFG_SHIFT		8
+#define CBAR_S1_BPSHCFG_MASK		3
+#define CBAR_S1_BPSHCFG_NSH		3
+#define CBAR_S1_MEMATTR_SHIFT		12
+#define CBAR_S1_MEMATTR_MASK		0xf
+#define CBAR_S1_MEMATTR_WB		0xf
+#define CBAR_TYPE_SHIFT			16
+#define CBAR_TYPE_MASK			0x3
+#define CBAR_TYPE_S2_TRANS		(0 << CBAR_TYPE_SHIFT)
+#define CBAR_TYPE_S1_TRANS_S2_BYPASS	(1 << CBAR_TYPE_SHIFT)
+#define CBAR_TYPE_S1_TRANS_S2_FAULT	(2 << CBAR_TYPE_SHIFT)
+#define CBAR_TYPE_S1_TRANS_S2_TRANS	(3 << CBAR_TYPE_SHIFT)
+#define CBAR_IRPTNDX_SHIFT		24
+#define CBAR_IRPTNDX_MASK		0xff
+
+#define ARM_SMMU_GR1_CBA2R(n)		(0x800 + ((n) << 2))
+#define CBA2R_RW64_32BIT		(0 << 0)
+#define CBA2R_RW64_64BIT		(1 << 0)
+
+/* Translation context bank */
+#define ARM_SMMU_CB_BASE(smmu)		((smmu)->base + ((smmu)->size >> 1))
+#define ARM_SMMU_CB(smmu, n)		((n) * (smmu)->pagesize)
+
+#define ARM_SMMU_CB_SCTLR		0x0
+#define ARM_SMMU_CB_RESUME		0x8
+#define ARM_SMMU_CB_TTBCR2		0x10
+#define ARM_SMMU_CB_TTBR0_LO		0x20
+#define ARM_SMMU_CB_TTBR0_HI		0x24
+#define ARM_SMMU_CB_TTBCR		0x30
+#define ARM_SMMU_CB_S1_MAIR0		0x38
+#define ARM_SMMU_CB_FSR			0x58
+#define ARM_SMMU_CB_FAR_LO		0x60
+#define ARM_SMMU_CB_FAR_HI		0x64
+#define ARM_SMMU_CB_FSYNR0		0x68
+#define ARM_SMMU_CB_S1_TLBIASID		0x610
+
+#define SCTLR_S1_ASIDPNE		(1 << 12)
+#define SCTLR_CFCFG			(1 << 7)
+#define SCTLR_CFIE			(1 << 6)
+#define SCTLR_CFRE			(1 << 5)
+#define SCTLR_E				(1 << 4)
+#define SCTLR_AFE			(1 << 2)
+#define SCTLR_TRE			(1 << 1)
+#define SCTLR_M				(1 << 0)
+#define SCTLR_EAE_SBOP			(SCTLR_AFE | SCTLR_TRE)
+
+#define RESUME_RETRY			(0 << 0)
+#define RESUME_TERMINATE		(1 << 0)
+
+#define TTBCR_EAE			(1 << 31)
+
+#define TTBCR_PASIZE_SHIFT		16
+#define TTBCR_PASIZE_MASK		0x7
+
+#define TTBCR_TG0_4K			(0 << 14)
+#define TTBCR_TG0_64K			(1 << 14)
+
+#define TTBCR_SH0_SHIFT			12
+#define TTBCR_SH0_MASK			0x3
+#define TTBCR_SH_NS			0
+#define TTBCR_SH_OS			2
+#define TTBCR_SH_IS			3
+
+#define TTBCR_ORGN0_SHIFT		10
+#define TTBCR_IRGN0_SHIFT		8
+#define TTBCR_RGN_MASK			0x3
+#define TTBCR_RGN_NC			0
+#define TTBCR_RGN_WBWA			1
+#define TTBCR_RGN_WT			2
+#define TTBCR_RGN_WB			3
+
+#define TTBCR_SL0_SHIFT			6
+#define TTBCR_SL0_MASK			0x3
+#define TTBCR_SL0_LVL_2			0
+#define TTBCR_SL0_LVL_1			1
+
+#define TTBCR_T1SZ_SHIFT		16
+#define TTBCR_T0SZ_SHIFT		0
+#define TTBCR_SZ_MASK			0xf
+
+#define TTBCR2_SEP_SHIFT		15
+#define TTBCR2_SEP_MASK			0x7
+
+#define TTBCR2_PASIZE_SHIFT		0
+#define TTBCR2_PASIZE_MASK		0x7
+
+/* Common definitions for PASize and SEP fields */
+#define TTBCR2_ADDR_32			0
+#define TTBCR2_ADDR_36			1
+#define TTBCR2_ADDR_40			2
+#define TTBCR2_ADDR_42			3
+#define TTBCR2_ADDR_44			4
+#define TTBCR2_ADDR_48			5
+
+#define TTBRn_HI_ASID_SHIFT		16
+
+#define MAIR_ATTR_SHIFT(n)		((n) << 3)
+#define MAIR_ATTR_MASK			0xff
+#define MAIR_ATTR_DEVICE		0x04
+#define MAIR_ATTR_NC			0x44
+#define MAIR_ATTR_WBRWA			0xff
+#define MAIR_ATTR_IDX_NC		0
+#define MAIR_ATTR_IDX_CACHE		1
+#define MAIR_ATTR_IDX_DEV		2
+
+#define FSR_MULTI			(1 << 31)
+#define FSR_SS				(1 << 30)
+#define FSR_UUT				(1 << 8)
+#define FSR_ASF				(1 << 7)
+#define FSR_TLBLKF			(1 << 6)
+#define FSR_TLBMCF			(1 << 5)
+#define FSR_EF				(1 << 4)
+#define FSR_PF				(1 << 3)
+#define FSR_AFF				(1 << 2)
+#define FSR_TF				(1 << 1)
+
+#define FSR_IGN				(FSR_AFF | FSR_ASF | FSR_TLBMCF |	\
+					 FSR_TLBLKF)
+#define FSR_FAULT			(FSR_MULTI | FSR_SS | FSR_UUT |		\
+					 FSR_EF | FSR_PF | FSR_TF | FSR_IGN)
+
+#define FSYNR0_WNR			(1 << 4)
+
+struct arm_smmu_smr {
+	u8				idx;
+	u16				mask;
+	u16				id;
+};
+
+struct arm_smmu_master {
+	struct device_node		*of_node;
+
+	/*
+	 * The following is specific to the master's position in the
+	 * SMMU chain.
+	 */
+	struct rb_node			node;
+	int				num_streamids;
+	u16				streamids[MAX_MASTER_STREAMIDS];
+
+	/*
+	 * We only need to allocate these on the root SMMU, as we
+	 * configure unmatched streams to bypass translation.
+	 */
+	struct arm_smmu_smr		*smrs;
+};
+
+struct arm_smmu_device {
+	struct device			*dev;
+	struct device_node		*parent_of_node;
+
+	void __iomem			*base;
+	unsigned long			size;
+	unsigned long			pagesize;
+
+#define ARM_SMMU_FEAT_COHERENT_WALK	(1 << 0)
+#define ARM_SMMU_FEAT_STREAM_MATCH	(1 << 1)
+#define ARM_SMMU_FEAT_TRANS_S1		(1 << 2)
+#define ARM_SMMU_FEAT_TRANS_S2		(1 << 3)
+#define ARM_SMMU_FEAT_TRANS_NESTED	(1 << 4)
+	u32				features;
+
+#define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)
+	u32				options;
+	int				version;
+
+	u32				num_context_banks;
+	u32				num_s2_context_banks;
+	DECLARE_BITMAP(context_map, ARM_SMMU_MAX_CBS);
+	atomic_t			irptndx;
+
+	u32				num_mapping_groups;
+	DECLARE_BITMAP(smr_map, ARM_SMMU_MAX_SMRS);
+
+	unsigned long			input_size;
+	unsigned long			s1_output_size;
+	unsigned long			s2_output_size;
+
+	u32				num_global_irqs;
+	u32				num_context_irqs;
+	unsigned int			*irqs;
+
+	struct list_head		list;
+	struct rb_root			masters;
+};
+
+struct arm_smmu_cfg {
+	struct arm_smmu_device		*smmu;
+	u8				cbndx;
+	u8				irptndx;
+	u32				cbar;
+	pgd_t				*pgd;
+};
+#define INVALID_IRPTNDX			0xff
+
+#define ARM_SMMU_CB_ASID(cfg)		((cfg)->cbndx)
+#define ARM_SMMU_CB_VMID(cfg)		((cfg)->cbndx + 1)
+
+struct arm_smmu_domain {
+	/*
+	 * A domain can span across multiple, chained SMMUs and requires
+	 * all devices within the domain to follow the same translation
+	 * path.
+	 */
+	struct arm_smmu_device		*leaf_smmu;
+	struct arm_smmu_cfg		root_cfg;
+	phys_addr_t			output_mask;
+
+	spinlock_t			lock;
+};
+
+static DEFINE_SPINLOCK(arm_smmu_devices_lock);
+static LIST_HEAD(arm_smmu_devices);
+
+struct arm_smmu_option_prop {
+	u32 opt;
+	const char *prop;
+};
+
+static struct arm_smmu_option_prop arm_smmu_options [] = {
+	{ ARM_SMMU_OPT_SECURE_CFG_ACCESS, "calxeda,smmu-secure-config-access" },
+	{ 0, NULL},
+};
+
+static void parse_driver_options(struct arm_smmu_device *smmu)
+{
+	int i = 0;
+	do {
+		if (of_property_read_bool(smmu->dev->of_node,
+						arm_smmu_options[i].prop)) {
+			smmu->options |= arm_smmu_options[i].opt;
+			dev_notice(smmu->dev, "option %s\n",
+				arm_smmu_options[i].prop);
+		}
+	} while (arm_smmu_options[++i].opt);
+}
+
+static struct arm_smmu_master *find_smmu_master(struct arm_smmu_device *smmu,
+						struct device_node *dev_node)
+{
+	struct rb_node *node = smmu->masters.rb_node;
+
+	while (node) {
+		struct arm_smmu_master *master;
+		master = container_of(node, struct arm_smmu_master, node);
+
+		if (dev_node < master->of_node)
+			node = node->rb_left;
+		else if (dev_node > master->of_node)
+			node = node->rb_right;
+		else
+			return master;
+	}
+
+	return NULL;
+}
+
+static int insert_smmu_master(struct arm_smmu_device *smmu,
+			      struct arm_smmu_master *master)
+{
+	struct rb_node **new, *parent;
+
+	new = &smmu->masters.rb_node;
+	parent = NULL;
+	while (*new) {
+		struct arm_smmu_master *this;
+		this = container_of(*new, struct arm_smmu_master, node);
+
+		parent = *new;
+		if (master->of_node < this->of_node)
+			new = &((*new)->rb_left);
+		else if (master->of_node > this->of_node)
+			new = &((*new)->rb_right);
+		else
+			return -EEXIST;
+	}
+
+	rb_link_node(&master->node, parent, new);
+	rb_insert_color(&master->node, &smmu->masters);
+	return 0;
+}
+
+static int register_smmu_master(struct arm_smmu_device *smmu,
+				struct device *dev,
+				struct of_phandle_args *masterspec)
+{
+	int i;
+	struct arm_smmu_master *master;
+
+	master = find_smmu_master(smmu, masterspec->np);
+	if (master) {
+		dev_err(dev,
+			"rejecting multiple registrations for master device %s\n",
+			masterspec->np->name);
+		return -EBUSY;
+	}
+
+	if (masterspec->args_count > MAX_MASTER_STREAMIDS) {
+		dev_err(dev,
+			"reached maximum number (%d) of stream IDs for master device %s\n",
+			MAX_MASTER_STREAMIDS, masterspec->np->name);
+		return -ENOSPC;
+	}
+
+	master = devm_kzalloc(dev, sizeof(*master), GFP_KERNEL);
+	if (!master)
+		return -ENOMEM;
+
+	master->of_node		= masterspec->np;
+	master->num_streamids	= masterspec->args_count;
+
+	for (i = 0; i < master->num_streamids; ++i)
+		master->streamids[i] = masterspec->args[i];
+
+	return insert_smmu_master(smmu, master);
+}
+
+static struct arm_smmu_device *find_parent_smmu(struct arm_smmu_device *smmu)
+{
+	struct arm_smmu_device *parent;
+
+	if (!smmu->parent_of_node)
+		return NULL;
+
+	spin_lock(&arm_smmu_devices_lock);
+	list_for_each_entry(parent, &arm_smmu_devices, list)
+		if (parent->dev->of_node == smmu->parent_of_node)
+			goto out_unlock;
+
+	parent = NULL;
+	dev_warn(smmu->dev,
+		 "Failed to find SMMU parent despite parent in DT\n");
+out_unlock:
+	spin_unlock(&arm_smmu_devices_lock);
+	return parent;
+}
+
+static int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end)
+{
+	int idx;
+
+	do {
+		idx = find_next_zero_bit(map, end, start);
+		if (idx == end)
+			return -ENOSPC;
+	} while (test_and_set_bit(idx, map));
+
+	return idx;
+}
+
+static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
+{
+	clear_bit(idx, map);
+}
+
+/* Wait for any pending TLB invalidations to complete */
+static void arm_smmu_tlb_sync(struct arm_smmu_device *smmu)
+{
+	int count = 0;
+	void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
+
+	writel_relaxed(0, gr0_base + ARM_SMMU_GR0_sTLBGSYNC);
+	while (readl_relaxed(gr0_base + ARM_SMMU_GR0_sTLBGSTATUS)
+	       & sTLBGSTATUS_GSACTIVE) {
+		cpu_relax();
+		if (++count == TLB_LOOP_TIMEOUT) {
+			dev_err_ratelimited(smmu->dev,
+			"TLB sync timed out -- SMMU may be deadlocked\n");
+			return;
+		}
+		udelay(1);
+	}
+}
+
+static void arm_smmu_tlb_inv_context(struct arm_smmu_cfg *cfg)
+{
+	struct arm_smmu_device *smmu = cfg->smmu;
+	void __iomem *base = ARM_SMMU_GR0(smmu);
+	bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
+
+	if (stage1) {
+		base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
+		writel_relaxed(ARM_SMMU_CB_ASID(cfg),
+			       base + ARM_SMMU_CB_S1_TLBIASID);
+	} else {
+		base = ARM_SMMU_GR0(smmu);
+		writel_relaxed(ARM_SMMU_CB_VMID(cfg),
+			       base + ARM_SMMU_GR0_TLBIVMID);
+	}
+
+	arm_smmu_tlb_sync(smmu);
+}
+
+static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
+{
+	int flags, ret;
+	u32 fsr, far, fsynr, resume;
+	unsigned long iova;
+	struct iommu_domain *domain = dev;
+	struct arm_smmu_domain *smmu_domain = domain->priv;
+	struct arm_smmu_cfg *root_cfg = &smmu_domain->root_cfg;
+	struct arm_smmu_device *smmu = root_cfg->smmu;
+	void __iomem *cb_base;
+
+	cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, root_cfg->cbndx);
+	fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR);
+
+	if (!(fsr & FSR_FAULT))
+		return IRQ_NONE;
+
+	if (fsr & FSR_IGN)
+		dev_err_ratelimited(smmu->dev,
+				    "Unexpected context fault (fsr 0x%u)\n",
+				    fsr);
+
+	fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0);
+	flags = fsynr & FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ;
+
+	far = readl_relaxed(cb_base + ARM_SMMU_CB_FAR_LO);
+	iova = far;
+#ifdef CONFIG_64BIT
+	far = readl_relaxed(cb_base + ARM_SMMU_CB_FAR_HI);
+	iova |= ((unsigned long)far << 32);
+#endif
+
+	if (!report_iommu_fault(domain, smmu->dev, iova, flags)) {
+		ret = IRQ_HANDLED;
+		resume = RESUME_RETRY;
+	} else {
+		dev_err_ratelimited(smmu->dev,
+		    "Unhandled context fault: iova=0x%08lx, fsynr=0x%x, cb=%d\n",
+		    iova, fsynr, root_cfg->cbndx);
+		ret = IRQ_NONE;
+		resume = RESUME_TERMINATE;
+	}
+
+	/* Clear the faulting FSR */
+	writel(fsr, cb_base + ARM_SMMU_CB_FSR);
+
+	/* Retry or terminate any stalled transactions */
+	if (fsr & FSR_SS)
+		writel_relaxed(resume, cb_base + ARM_SMMU_CB_RESUME);
+
+	return ret;
+}
+
+static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
+{
+	u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
+	struct arm_smmu_device *smmu = dev;
+	void __iomem *gr0_base = ARM_SMMU_GR0_NS(smmu);
+
+	gfsr = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSR);
+	gfsynr0 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR0);
+	gfsynr1 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR1);
+	gfsynr2 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR2);
+
+	if (!gfsr)
+		return IRQ_NONE;
+
+	dev_err_ratelimited(smmu->dev,
+		"Unexpected global fault, this could be serious\n");
+	dev_err_ratelimited(smmu->dev,
+		"\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
+		gfsr, gfsynr0, gfsynr1, gfsynr2);
+
+	writel(gfsr, gr0_base + ARM_SMMU_GR0_sGFSR);
+	return IRQ_HANDLED;
+}
+
+static void arm_smmu_flush_pgtable(struct arm_smmu_device *smmu, void *addr,
+				   size_t size)
+{
+	unsigned long offset = (unsigned long)addr & ~PAGE_MASK;
+
+
+	/* Ensure new page tables are visible to the hardware walker */
+	if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) {
+		dsb(ishst);
+	} else {
+		/*
+		 * If the SMMU can't walk tables in the CPU caches, treat them
+		 * like non-coherent DMA since we need to flush the new entries
+		 * all the way out to memory. There's no possibility of
+		 * recursion here as the SMMU table walker will not be wired
+		 * through another SMMU.
+		 */
+		dma_map_page(smmu->dev, virt_to_page(addr), offset, size,
+				DMA_TO_DEVICE);
+	}
+}
+
+static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain)
+{
+	u32 reg;
+	bool stage1;
+	struct arm_smmu_cfg *root_cfg = &smmu_domain->root_cfg;
+	struct arm_smmu_device *smmu = root_cfg->smmu;
+	void __iomem *cb_base, *gr0_base, *gr1_base;
+
+	gr0_base = ARM_SMMU_GR0(smmu);
+	gr1_base = ARM_SMMU_GR1(smmu);
+	stage1 = root_cfg->cbar != CBAR_TYPE_S2_TRANS;
+	cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, root_cfg->cbndx);
+
+	/* CBAR */
+	reg = root_cfg->cbar;
+	if (smmu->version == 1)
+	      reg |= root_cfg->irptndx << CBAR_IRPTNDX_SHIFT;
+
+	/*
+	 * Use the weakest shareability/memory types, so they are
+	 * overridden by the ttbcr/pte.
+	 */
+	if (stage1) {
+		reg |= (CBAR_S1_BPSHCFG_NSH << CBAR_S1_BPSHCFG_SHIFT) |
+			(CBAR_S1_MEMATTR_WB << CBAR_S1_MEMATTR_SHIFT);
+	} else {
+		reg |= ARM_SMMU_CB_VMID(root_cfg) << CBAR_VMID_SHIFT;
+	}
+	writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(root_cfg->cbndx));
+
+	if (smmu->version > 1) {
+		/* CBA2R */
+#ifdef CONFIG_64BIT
+		reg = CBA2R_RW64_64BIT;
+#else
+		reg = CBA2R_RW64_32BIT;
+#endif
+		writel_relaxed(reg,
+			       gr1_base + ARM_SMMU_GR1_CBA2R(root_cfg->cbndx));
+
+		/* TTBCR2 */
+		switch (smmu->input_size) {
+		case 32:
+			reg = (TTBCR2_ADDR_32 << TTBCR2_SEP_SHIFT);
+			break;
+		case 36:
+			reg = (TTBCR2_ADDR_36 << TTBCR2_SEP_SHIFT);
+			break;
+		case 39:
+			reg = (TTBCR2_ADDR_40 << TTBCR2_SEP_SHIFT);
+			break;
+		case 42:
+			reg = (TTBCR2_ADDR_42 << TTBCR2_SEP_SHIFT);
+			break;
+		case 44:
+			reg = (TTBCR2_ADDR_44 << TTBCR2_SEP_SHIFT);
+			break;
+		case 48:
+			reg = (TTBCR2_ADDR_48 << TTBCR2_SEP_SHIFT);
+			break;
+		}
+
+		switch (smmu->s1_output_size) {
+		case 32:
+			reg |= (TTBCR2_ADDR_32 << TTBCR2_PASIZE_SHIFT);
+			break;
+		case 36:
+			reg |= (TTBCR2_ADDR_36 << TTBCR2_PASIZE_SHIFT);
+			break;
+		case 39:
+			reg |= (TTBCR2_ADDR_40 << TTBCR2_PASIZE_SHIFT);
+			break;
+		case 42:
+			reg |= (TTBCR2_ADDR_42 << TTBCR2_PASIZE_SHIFT);
+			break;
+		case 44:
+			reg |= (TTBCR2_ADDR_44 << TTBCR2_PASIZE_SHIFT);
+			break;
+		case 48:
+			reg |= (TTBCR2_ADDR_48 << TTBCR2_PASIZE_SHIFT);
+			break;
+		}
+
+		if (stage1)
+			writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBCR2);
+	}
+
+	/* TTBR0 */
+	arm_smmu_flush_pgtable(smmu, root_cfg->pgd,
+			       PTRS_PER_PGD * sizeof(pgd_t));
+	reg = __pa(root_cfg->pgd);
+	writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_LO);
+	reg = (phys_addr_t)__pa(root_cfg->pgd) >> 32;
+	if (stage1)
+		reg |= ARM_SMMU_CB_ASID(root_cfg) << TTBRn_HI_ASID_SHIFT;
+	writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_HI);
+
+	/*
+	 * TTBCR
+	 * We use long descriptor, with inner-shareable WBWA tables in TTBR0.
+	 */
+	if (smmu->version > 1) {
+		if (PAGE_SIZE == SZ_4K)
+			reg = TTBCR_TG0_4K;
+		else
+			reg = TTBCR_TG0_64K;
+
+		if (!stage1) {
+			switch (smmu->s2_output_size) {
+			case 32:
+				reg |= (TTBCR2_ADDR_32 << TTBCR_PASIZE_SHIFT);
+				break;
+			case 36:
+				reg |= (TTBCR2_ADDR_36 << TTBCR_PASIZE_SHIFT);
+				break;
+			case 40:
+				reg |= (TTBCR2_ADDR_40 << TTBCR_PASIZE_SHIFT);
+				break;
+			case 42:
+				reg |= (TTBCR2_ADDR_42 << TTBCR_PASIZE_SHIFT);
+				break;
+			case 44:
+				reg |= (TTBCR2_ADDR_44 << TTBCR_PASIZE_SHIFT);
+				break;
+			case 48:
+				reg |= (TTBCR2_ADDR_48 << TTBCR_PASIZE_SHIFT);
+				break;
+			}
+		} else {
+			reg |= (64 - smmu->s1_output_size) << TTBCR_T0SZ_SHIFT;
+		}
+	} else {
+		reg = 0;
+	}
+
+	reg |= TTBCR_EAE |
+	      (TTBCR_SH_IS << TTBCR_SH0_SHIFT) |
+	      (TTBCR_RGN_WBWA << TTBCR_ORGN0_SHIFT) |
+	      (TTBCR_RGN_WBWA << TTBCR_IRGN0_SHIFT) |
+	      (TTBCR_SL0_LVL_1 << TTBCR_SL0_SHIFT);
+	writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBCR);
+
+	/* MAIR0 (stage-1 only) */
+	if (stage1) {
+		reg = (MAIR_ATTR_NC << MAIR_ATTR_SHIFT(MAIR_ATTR_IDX_NC)) |
+		      (MAIR_ATTR_WBRWA << MAIR_ATTR_SHIFT(MAIR_ATTR_IDX_CACHE)) |
+		      (MAIR_ATTR_DEVICE << MAIR_ATTR_SHIFT(MAIR_ATTR_IDX_DEV));
+		writel_relaxed(reg, cb_base + ARM_SMMU_CB_S1_MAIR0);
+	}
+
+	/* SCTLR */
+	reg = SCTLR_CFCFG | SCTLR_CFIE | SCTLR_CFRE | SCTLR_M | SCTLR_EAE_SBOP;
+	if (stage1)
+		reg |= SCTLR_S1_ASIDPNE;
+#ifdef __BIG_ENDIAN
+	reg |= SCTLR_E;
+#endif
+	writel_relaxed(reg, cb_base + ARM_SMMU_CB_SCTLR);
+}
+
+static int arm_smmu_init_domain_context(struct iommu_domain *domain,
+					struct device *dev)
+{
+	int irq, ret, start;
+	struct arm_smmu_domain *smmu_domain = domain->priv;
+	struct arm_smmu_cfg *root_cfg = &smmu_domain->root_cfg;
+	struct arm_smmu_device *smmu, *parent;
+
+	/*
+	 * Walk the SMMU chain to find the root device for this chain.
+	 * We assume that no masters have translations which terminate
+	 * early, and therefore check that the root SMMU does indeed have
+	 * a StreamID for the master in question.
+	 */
+	parent = dev->archdata.iommu;
+	smmu_domain->output_mask = -1;
+	do {
+		smmu = parent;
+		smmu_domain->output_mask &= (1ULL << smmu->s2_output_size) - 1;
+	} while ((parent = find_parent_smmu(smmu)));
+
+	if (!find_smmu_master(smmu, dev->of_node)) {
+		dev_err(dev, "unable to find root SMMU for device\n");
+		return -ENODEV;
+	}
+
+	if (smmu->features & ARM_SMMU_FEAT_TRANS_NESTED) {
+		/*
+		 * We will likely want to change this if/when KVM gets
+		 * involved.
+		 */
+		root_cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
+		start = smmu->num_s2_context_banks;
+	} else if (smmu->features & ARM_SMMU_FEAT_TRANS_S2) {
+		root_cfg->cbar = CBAR_TYPE_S2_TRANS;
+		start = 0;
+	} else {
+		root_cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
+		start = smmu->num_s2_context_banks;
+	}
+
+	ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
+				      smmu->num_context_banks);
+	if (IS_ERR_VALUE(ret))
+		return ret;
+
+	root_cfg->cbndx = ret;
+	if (smmu->version == 1) {
+		root_cfg->irptndx = atomic_inc_return(&smmu->irptndx);
+		root_cfg->irptndx %= smmu->num_context_irqs;
+	} else {
+		root_cfg->irptndx = root_cfg->cbndx;
+	}
+
+	irq = smmu->irqs[smmu->num_global_irqs + root_cfg->irptndx];
+	ret = request_irq(irq, arm_smmu_context_fault, IRQF_SHARED,
+			  "arm-smmu-context-fault", domain);
+	if (IS_ERR_VALUE(ret)) {
+		dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
+			root_cfg->irptndx, irq);
+		root_cfg->irptndx = INVALID_IRPTNDX;
+		goto out_free_context;
+	}
+
+	root_cfg->smmu = smmu;
+	arm_smmu_init_context_bank(smmu_domain);
+	return ret;
+
+out_free_context:
+	__arm_smmu_free_bitmap(smmu->context_map, root_cfg->cbndx);
+	return ret;
+}
+
+static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
+{
+	struct arm_smmu_domain *smmu_domain = domain->priv;
+	struct arm_smmu_cfg *root_cfg = &smmu_domain->root_cfg;
+	struct arm_smmu_device *smmu = root_cfg->smmu;
+	void __iomem *cb_base;
+	int irq;
+
+	if (!smmu)
+		return;
+
+	/* Disable the context bank and nuke the TLB before freeing it. */
+	cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, root_cfg->cbndx);
+	writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR);
+	arm_smmu_tlb_inv_context(root_cfg);
+
+	if (root_cfg->irptndx != INVALID_IRPTNDX) {
+		irq = smmu->irqs[smmu->num_global_irqs + root_cfg->irptndx];
+		free_irq(irq, domain);
+	}
+
+	__arm_smmu_free_bitmap(smmu->context_map, root_cfg->cbndx);
+}
+
+static int arm_smmu_domain_init(struct iommu_domain *domain)
+{
+	struct arm_smmu_domain *smmu_domain;
+	pgd_t *pgd;
+
+	/*
+	 * Allocate the domain and initialise some of its data structures.
+	 * We can't really do anything meaningful until we've added a
+	 * master.
+	 */
+	smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
+	if (!smmu_domain)
+		return -ENOMEM;
+
+	pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL);
+	if (!pgd)
+		goto out_free_domain;
+	smmu_domain->root_cfg.pgd = pgd;
+
+	spin_lock_init(&smmu_domain->lock);
+	domain->priv = smmu_domain;
+	return 0;
+
+out_free_domain:
+	kfree(smmu_domain);
+	return -ENOMEM;
+}
+
+static void arm_smmu_free_ptes(pmd_t *pmd)
+{
+	pgtable_t table = pmd_pgtable(*pmd);
+	pgtable_page_dtor(table);
+	__free_page(table);
+}
+
+static void arm_smmu_free_pmds(pud_t *pud)
+{
+	int i;
+	pmd_t *pmd, *pmd_base = pmd_offset(pud, 0);
+
+	pmd = pmd_base;
+	for (i = 0; i < PTRS_PER_PMD; ++i) {
+		if (pmd_none(*pmd))
+			continue;
+
+		arm_smmu_free_ptes(pmd);
+		pmd++;
+	}
+
+	pmd_free(NULL, pmd_base);
+}
+
+static void arm_smmu_free_puds(pgd_t *pgd)
+{
+	int i;
+	pud_t *pud, *pud_base = pud_offset(pgd, 0);
+
+	pud = pud_base;
+	for (i = 0; i < PTRS_PER_PUD; ++i) {
+		if (pud_none(*pud))
+			continue;
+
+		arm_smmu_free_pmds(pud);
+		pud++;
+	}
+
+	pud_free(NULL, pud_base);
+}
+
+static void arm_smmu_free_pgtables(struct arm_smmu_domain *smmu_domain)
+{
+	int i;
+	struct arm_smmu_cfg *root_cfg = &smmu_domain->root_cfg;
+	pgd_t *pgd, *pgd_base = root_cfg->pgd;
+
+	/*
+	 * Recursively free the page tables for this domain. We don't
+	 * care about speculative TLB filling because the tables should
+	 * not be active in any context bank at this point (SCTLR.M is 0).
+	 */
+	pgd = pgd_base;
+	for (i = 0; i < PTRS_PER_PGD; ++i) {
+		if (pgd_none(*pgd))
+			continue;
+		arm_smmu_free_puds(pgd);
+		pgd++;
+	}
+
+	kfree(pgd_base);
+}
+
+static void arm_smmu_domain_destroy(struct iommu_domain *domain)
+{
+	struct arm_smmu_domain *smmu_domain = domain->priv;
+
+	/*
+	 * Free the domain resources. We assume that all devices have
+	 * already been detached.
+	 */
+	arm_smmu_destroy_domain_context(domain);
+	arm_smmu_free_pgtables(smmu_domain);
+	kfree(smmu_domain);
+}
+
+static int arm_smmu_master_configure_smrs(struct arm_smmu_device *smmu,
+					  struct arm_smmu_master *master)
+{
+	int i;
+	struct arm_smmu_smr *smrs;
+	void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
+
+	if (!(smmu->features & ARM_SMMU_FEAT_STREAM_MATCH))
+		return 0;
+
+	if (master->smrs)
+		return -EEXIST;
+
+	smrs = kmalloc(sizeof(*smrs) * master->num_streamids, GFP_KERNEL);
+	if (!smrs) {
+		dev_err(smmu->dev, "failed to allocate %d SMRs for master %s\n",
+			master->num_streamids, master->of_node->name);
+		return -ENOMEM;
+	}
+
+	/* Allocate the SMRs on the root SMMU */
+	for (i = 0; i < master->num_streamids; ++i) {
+		int idx = __arm_smmu_alloc_bitmap(smmu->smr_map, 0,
+						  smmu->num_mapping_groups);
+		if (IS_ERR_VALUE(idx)) {
+			dev_err(smmu->dev, "failed to allocate free SMR\n");
+			goto err_free_smrs;
+		}
+
+		smrs[i] = (struct arm_smmu_smr) {
+			.idx	= idx,
+			.mask	= 0, /* We don't currently share SMRs */
+			.id	= master->streamids[i],
+		};
+	}
+
+	/* It worked! Now, poke the actual hardware */
+	for (i = 0; i < master->num_streamids; ++i) {
+		u32 reg = SMR_VALID | smrs[i].id << SMR_ID_SHIFT |
+			  smrs[i].mask << SMR_MASK_SHIFT;
+		writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_SMR(smrs[i].idx));
+	}
+
+	master->smrs = smrs;
+	return 0;
+
+err_free_smrs:
+	while (--i >= 0)
+		__arm_smmu_free_bitmap(smmu->smr_map, smrs[i].idx);
+	kfree(smrs);
+	return -ENOSPC;
+}
+
+static void arm_smmu_master_free_smrs(struct arm_smmu_device *smmu,
+				      struct arm_smmu_master *master)
+{
+	int i;
+	void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
+	struct arm_smmu_smr *smrs = master->smrs;
+
+	/* Invalidate the SMRs before freeing back to the allocator */
+	for (i = 0; i < master->num_streamids; ++i) {
+		u8 idx = smrs[i].idx;
+		writel_relaxed(~SMR_VALID, gr0_base + ARM_SMMU_GR0_SMR(idx));
+		__arm_smmu_free_bitmap(smmu->smr_map, idx);
+	}
+
+	master->smrs = NULL;
+	kfree(smrs);
+}
+
+static void arm_smmu_bypass_stream_mapping(struct arm_smmu_device *smmu,
+					   struct arm_smmu_master *master)
+{
+	int i;
+	void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
+
+	for (i = 0; i < master->num_streamids; ++i) {
+		u16 sid = master->streamids[i];
+		writel_relaxed(S2CR_TYPE_BYPASS,
+			       gr0_base + ARM_SMMU_GR0_S2CR(sid));
+	}
+}
+
+static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
+				      struct arm_smmu_master *master)
+{
+	int i, ret;
+	struct arm_smmu_device *parent, *smmu = smmu_domain->root_cfg.smmu;
+	void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
+
+	ret = arm_smmu_master_configure_smrs(smmu, master);
+	if (ret)
+		return ret;
+
+	/* Bypass the leaves */
+	smmu = smmu_domain->leaf_smmu;
+	while ((parent = find_parent_smmu(smmu))) {
+		/*
+		 * We won't have a StreamID match for anything but the root
+		 * smmu, so we only need to worry about StreamID indexing,
+		 * where we must install bypass entries in the S2CRs.
+		 */
+		if (smmu->features & ARM_SMMU_FEAT_STREAM_MATCH)
+			continue;
+
+		arm_smmu_bypass_stream_mapping(smmu, master);
+		smmu = parent;
+	}
+
+	/* Now we're at the root, time to point at our context bank */
+	for (i = 0; i < master->num_streamids; ++i) {
+		u32 idx, s2cr;
+		idx = master->smrs ? master->smrs[i].idx : master->streamids[i];
+		s2cr = S2CR_TYPE_TRANS |
+		       (smmu_domain->root_cfg.cbndx << S2CR_CBNDX_SHIFT);
+		writel_relaxed(s2cr, gr0_base + ARM_SMMU_GR0_S2CR(idx));
+	}
+
+	return 0;
+}
+
+static void arm_smmu_domain_remove_master(struct arm_smmu_domain *smmu_domain,
+					  struct arm_smmu_master *master)
+{
+	struct arm_smmu_device *smmu = smmu_domain->root_cfg.smmu;
+
+	/*
+	 * We *must* clear the S2CR first, because freeing the SMR means
+	 * that it can be re-allocated immediately.
+	 */
+	arm_smmu_bypass_stream_mapping(smmu, master);
+	arm_smmu_master_free_smrs(smmu, master);
+}
+
+static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
+{
+	int ret = -EINVAL;
+	struct arm_smmu_domain *smmu_domain = domain->priv;
+	struct arm_smmu_device *device_smmu = dev->archdata.iommu;
+	struct arm_smmu_master *master;
+	unsigned long flags;
+
+	if (!device_smmu) {
+		dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
+		return -ENXIO;
+	}
+
+	/*
+	 * Sanity check the domain. We don't currently support domains
+	 * that cross between different SMMU chains.
+	 */
+	spin_lock_irqsave(&smmu_domain->lock, flags);
+	if (!smmu_domain->leaf_smmu) {
+		/* Now that we have a master, we can finalise the domain */
+		ret = arm_smmu_init_domain_context(domain, dev);
+		if (IS_ERR_VALUE(ret))
+			goto err_unlock;
+
+		smmu_domain->leaf_smmu = device_smmu;
+	} else if (smmu_domain->leaf_smmu != device_smmu) {
+		dev_err(dev,
+			"cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
+			dev_name(smmu_domain->leaf_smmu->dev),
+			dev_name(device_smmu->dev));
+		goto err_unlock;
+	}
+	spin_unlock_irqrestore(&smmu_domain->lock, flags);
+
+	/* Looks ok, so add the device to the domain */
+	master = find_smmu_master(smmu_domain->leaf_smmu, dev->of_node);
+	if (!master)
+		return -ENODEV;
+
+	return arm_smmu_domain_add_master(smmu_domain, master);
+
+err_unlock:
+	spin_unlock_irqrestore(&smmu_domain->lock, flags);
+	return ret;
+}
+
+static void arm_smmu_detach_dev(struct iommu_domain *domain, struct device *dev)
+{
+	struct arm_smmu_domain *smmu_domain = domain->priv;
+	struct arm_smmu_master *master;
+
+	master = find_smmu_master(smmu_domain->leaf_smmu, dev->of_node);
+	if (master)
+		arm_smmu_domain_remove_master(smmu_domain, master);
+}
+
+static bool arm_smmu_pte_is_contiguous_range(unsigned long addr,
+					     unsigned long end)
+{
+	return !(addr & ~ARM_SMMU_PTE_CONT_MASK) &&
+		(addr + ARM_SMMU_PTE_CONT_SIZE <= end);
+}
+
+static int arm_smmu_alloc_init_pte(struct arm_smmu_device *smmu, pmd_t *pmd,
+				   unsigned long addr, unsigned long end,
+				   unsigned long pfn, int prot, int stage)
+{
+	pte_t *pte, *start;
+	pteval_t pteval = ARM_SMMU_PTE_PAGE | ARM_SMMU_PTE_AF | ARM_SMMU_PTE_XN;
+
+	if (pmd_none(*pmd)) {
+		/* Allocate a new set of tables */
+		pgtable_t table = alloc_page(GFP_ATOMIC|__GFP_ZERO);
+		if (!table)
+			return -ENOMEM;
+
+		arm_smmu_flush_pgtable(smmu, page_address(table), PAGE_SIZE);
+		if (!pgtable_page_ctor(table)) {
+			__free_page(table);
+			return -ENOMEM;
+		}
+		pmd_populate(NULL, pmd, table);
+		arm_smmu_flush_pgtable(smmu, pmd, sizeof(*pmd));
+	}
+
+	if (stage == 1) {
+		pteval |= ARM_SMMU_PTE_AP_UNPRIV | ARM_SMMU_PTE_nG;
+		if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ))
+			pteval |= ARM_SMMU_PTE_AP_RDONLY;
+
+		if (prot & IOMMU_CACHE)
+			pteval |= (MAIR_ATTR_IDX_CACHE <<
+				   ARM_SMMU_PTE_ATTRINDX_SHIFT);
+	} else {
+		pteval |= ARM_SMMU_PTE_HAP_FAULT;
+		if (prot & IOMMU_READ)
+			pteval |= ARM_SMMU_PTE_HAP_READ;
+		if (prot & IOMMU_WRITE)
+			pteval |= ARM_SMMU_PTE_HAP_WRITE;
+		if (prot & IOMMU_CACHE)
+			pteval |= ARM_SMMU_PTE_MEMATTR_OIWB;
+		else
+			pteval |= ARM_SMMU_PTE_MEMATTR_NC;
+	}
+
+	/* If no access, create a faulting entry to avoid TLB fills */
+	if (prot & IOMMU_EXEC)
+		pteval &= ~ARM_SMMU_PTE_XN;
+	else if (!(prot & (IOMMU_READ | IOMMU_WRITE)))
+		pteval &= ~ARM_SMMU_PTE_PAGE;
+
+	pteval |= ARM_SMMU_PTE_SH_IS;
+	start = pmd_page_vaddr(*pmd) + pte_index(addr);
+	pte = start;
+
+	/*
+	 * Install the page table entries. This is fairly complicated
+	 * since we attempt to make use of the contiguous hint in the
+	 * ptes where possible. The contiguous hint indicates a series
+	 * of ARM_SMMU_PTE_CONT_ENTRIES ptes mapping a physically
+	 * contiguous region with the following constraints:
+	 *
+	 *   - The region start is aligned to ARM_SMMU_PTE_CONT_SIZE
+	 *   - Each pte in the region has the contiguous hint bit set
+	 *
+	 * This complicates unmapping (also handled by this code, when
+	 * neither IOMMU_READ or IOMMU_WRITE are set) because it is
+	 * possible, yet highly unlikely, that a client may unmap only
+	 * part of a contiguous range. This requires clearing of the
+	 * contiguous hint bits in the range before installing the new
+	 * faulting entries.
+	 *
+	 * Note that re-mapping an address range without first unmapping
+	 * it is not supported, so TLB invalidation is not required here
+	 * and is instead performed at unmap and domain-init time.
+	 */
+	do {
+		int i = 1;
+		pteval &= ~ARM_SMMU_PTE_CONT;
+
+		if (arm_smmu_pte_is_contiguous_range(addr, end)) {
+			i = ARM_SMMU_PTE_CONT_ENTRIES;
+			pteval |= ARM_SMMU_PTE_CONT;
+		} else if (pte_val(*pte) &
+			   (ARM_SMMU_PTE_CONT | ARM_SMMU_PTE_PAGE)) {
+			int j;
+			pte_t *cont_start;
+			unsigned long idx = pte_index(addr);
+
+			idx &= ~(ARM_SMMU_PTE_CONT_ENTRIES - 1);
+			cont_start = pmd_page_vaddr(*pmd) + idx;
+			for (j = 0; j < ARM_SMMU_PTE_CONT_ENTRIES; ++j)
+				pte_val(*(cont_start + j)) &= ~ARM_SMMU_PTE_CONT;
+
+			arm_smmu_flush_pgtable(smmu, cont_start,
+					       sizeof(*pte) *
+					       ARM_SMMU_PTE_CONT_ENTRIES);
+		}
+
+		do {
+			*pte = pfn_pte(pfn, __pgprot(pteval));
+		} while (pte++, pfn++, addr += PAGE_SIZE, --i);
+	} while (addr != end);
+
+	arm_smmu_flush_pgtable(smmu, start, sizeof(*pte) * (pte - start));
+	return 0;
+}
+
+static int arm_smmu_alloc_init_pmd(struct arm_smmu_device *smmu, pud_t *pud,
+				   unsigned long addr, unsigned long end,
+				   phys_addr_t phys, int prot, int stage)
+{
+	int ret;
+	pmd_t *pmd;
+	unsigned long next, pfn = __phys_to_pfn(phys);
+
+#ifndef __PAGETABLE_PMD_FOLDED
+	if (pud_none(*pud)) {
+		pmd = (pmd_t *)get_zeroed_page(GFP_ATOMIC);
+		if (!pmd)
+			return -ENOMEM;
+
+		arm_smmu_flush_pgtable(smmu, pmd, PAGE_SIZE);
+		pud_populate(NULL, pud, pmd);
+		arm_smmu_flush_pgtable(smmu, pud, sizeof(*pud));
+
+		pmd += pmd_index(addr);
+	} else
+#endif
+		pmd = pmd_offset(pud, addr);
+
+	do {
+		next = pmd_addr_end(addr, end);
+		ret = arm_smmu_alloc_init_pte(smmu, pmd, addr, next, pfn,
+					      prot, stage);
+		phys += next - addr;
+	} while (pmd++, addr = next, addr < end);
+
+	return ret;
+}
+
+static int arm_smmu_alloc_init_pud(struct arm_smmu_device *smmu, pgd_t *pgd,
+				   unsigned long addr, unsigned long end,
+				   phys_addr_t phys, int prot, int stage)
+{
+	int ret = 0;
+	pud_t *pud;
+	unsigned long next;
+
+#ifndef __PAGETABLE_PUD_FOLDED
+	if (pgd_none(*pgd)) {
+		pud = (pud_t *)get_zeroed_page(GFP_ATOMIC);
+		if (!pud)
+			return -ENOMEM;
+
+		arm_smmu_flush_pgtable(smmu, pud, PAGE_SIZE);
+		pgd_populate(NULL, pgd, pud);
+		arm_smmu_flush_pgtable(smmu, pgd, sizeof(*pgd));
+
+		pud += pud_index(addr);
+	} else
+#endif
+		pud = pud_offset(pgd, addr);
+
+	do {
+		next = pud_addr_end(addr, end);
+		ret = arm_smmu_alloc_init_pmd(smmu, pud, addr, next, phys,
+					      prot, stage);
+		phys += next - addr;
+	} while (pud++, addr = next, addr < end);
+
+	return ret;
+}
+
+static int arm_smmu_handle_mapping(struct arm_smmu_domain *smmu_domain,
+				   unsigned long iova, phys_addr_t paddr,
+				   size_t size, int prot)
+{
+	int ret, stage;
+	unsigned long end;
+	phys_addr_t input_mask, output_mask;
+	struct arm_smmu_cfg *root_cfg = &smmu_domain->root_cfg;
+	pgd_t *pgd = root_cfg->pgd;
+	struct arm_smmu_device *smmu = root_cfg->smmu;
+	unsigned long flags;
+
+	if (root_cfg->cbar == CBAR_TYPE_S2_TRANS) {
+		stage = 2;
+		output_mask = (1ULL << smmu->s2_output_size) - 1;
+	} else {
+		stage = 1;
+		output_mask = (1ULL << smmu->s1_output_size) - 1;
+	}
+
+	if (!pgd)
+		return -EINVAL;
+
+	if (size & ~PAGE_MASK)
+		return -EINVAL;
+
+	input_mask = (1ULL << smmu->input_size) - 1;
+	if ((phys_addr_t)iova & ~input_mask)
+		return -ERANGE;
+
+	if (paddr & ~output_mask)
+		return -ERANGE;
+
+	spin_lock_irqsave(&smmu_domain->lock, flags);
+	pgd += pgd_index(iova);
+	end = iova + size;
+	do {
+		unsigned long next = pgd_addr_end(iova, end);
+
+		ret = arm_smmu_alloc_init_pud(smmu, pgd, iova, next, paddr,
+					      prot, stage);
+		if (ret)
+			goto out_unlock;
+
+		paddr += next - iova;
+		iova = next;
+	} while (pgd++, iova != end);
+
+out_unlock:
+	spin_unlock_irqrestore(&smmu_domain->lock, flags);
+
+	return ret;
+}
+
+static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
+			phys_addr_t paddr, size_t size, int prot)
+{
+	struct arm_smmu_domain *smmu_domain = domain->priv;
+
+	if (!smmu_domain)
+		return -ENODEV;
+
+	/* Check for silent address truncation up the SMMU chain. */
+	if ((phys_addr_t)iova & ~smmu_domain->output_mask)
+		return -ERANGE;
+
+	return arm_smmu_handle_mapping(smmu_domain, iova, paddr, size, prot);
+}
+
+static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
+			     size_t size)
+{
+	int ret;
+	struct arm_smmu_domain *smmu_domain = domain->priv;
+
+	ret = arm_smmu_handle_mapping(smmu_domain, iova, 0, size, 0);
+	arm_smmu_tlb_inv_context(&smmu_domain->root_cfg);
+	return ret ? 0 : size;
+}
+
+static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
+					 dma_addr_t iova)
+{
+	pgd_t *pgdp, pgd;
+	pud_t pud;
+	pmd_t pmd;
+	pte_t pte;
+	struct arm_smmu_domain *smmu_domain = domain->priv;
+	struct arm_smmu_cfg *root_cfg = &smmu_domain->root_cfg;
+
+	pgdp = root_cfg->pgd;
+	if (!pgdp)
+		return 0;
+
+	pgd = *(pgdp + pgd_index(iova));
+	if (pgd_none(pgd))
+		return 0;
+
+	pud = *pud_offset(&pgd, iova);
+	if (pud_none(pud))
+		return 0;
+
+	pmd = *pmd_offset(&pud, iova);
+	if (pmd_none(pmd))
+		return 0;
+
+	pte = *(pmd_page_vaddr(pmd) + pte_index(iova));
+	if (pte_none(pte))
+		return 0;
+
+	return __pfn_to_phys(pte_pfn(pte)) | (iova & ~PAGE_MASK);
+}
+
+static int arm_smmu_domain_has_cap(struct iommu_domain *domain,
+				   unsigned long cap)
+{
+	unsigned long caps = 0;
+	struct arm_smmu_domain *smmu_domain = domain->priv;
+
+	if (smmu_domain->root_cfg.smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
+		caps |= IOMMU_CAP_CACHE_COHERENCY;
+
+	return !!(cap & caps);
+}
+
+static int arm_smmu_add_device(struct device *dev)
+{
+	struct arm_smmu_device *child, *parent, *smmu;
+	struct arm_smmu_master *master = NULL;
+	struct iommu_group *group;
+	int ret;
+
+	if (dev->archdata.iommu) {
+		dev_warn(dev, "IOMMU driver already assigned to device\n");
+		return -EINVAL;
+	}
+
+	spin_lock(&arm_smmu_devices_lock);
+	list_for_each_entry(parent, &arm_smmu_devices, list) {
+		smmu = parent;
+
+		/* Try to find a child of the current SMMU. */
+		list_for_each_entry(child, &arm_smmu_devices, list) {
+			if (child->parent_of_node == parent->dev->of_node) {
+				/* Does the child sit above our master? */
+				master = find_smmu_master(child, dev->of_node);
+				if (master) {
+					smmu = NULL;
+					break;
+				}
+			}
+		}
+
+		/* We found some children, so keep searching. */
+		if (!smmu) {
+			master = NULL;
+			continue;
+		}
+
+		master = find_smmu_master(smmu, dev->of_node);
+		if (master)
+			break;
+	}
+	spin_unlock(&arm_smmu_devices_lock);
+
+	if (!master)
+		return -ENODEV;
+
+	group = iommu_group_alloc();
+	if (IS_ERR(group)) {
+		dev_err(dev, "Failed to allocate IOMMU group\n");
+		return PTR_ERR(group);
+	}
+
+	ret = iommu_group_add_device(group, dev);
+	iommu_group_put(group);
+	dev->archdata.iommu = smmu;
+
+	return ret;
+}
+
+static void arm_smmu_remove_device(struct device *dev)
+{
+	dev->archdata.iommu = NULL;
+	iommu_group_remove_device(dev);
+}
+
+static struct iommu_ops arm_smmu_ops = {
+	.domain_init	= arm_smmu_domain_init,
+	.domain_destroy	= arm_smmu_domain_destroy,
+	.attach_dev	= arm_smmu_attach_dev,
+	.detach_dev	= arm_smmu_detach_dev,
+	.map		= arm_smmu_map,
+	.unmap		= arm_smmu_unmap,
+	.iova_to_phys	= arm_smmu_iova_to_phys,
+	.domain_has_cap	= arm_smmu_domain_has_cap,
+	.add_device	= arm_smmu_add_device,
+	.remove_device	= arm_smmu_remove_device,
+	.pgsize_bitmap	= (SECTION_SIZE |
+			   ARM_SMMU_PTE_CONT_SIZE |
+			   PAGE_SIZE),
+};
+
+static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
+{
+	void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
+	void __iomem *cb_base;
+	int i = 0;
+	u32 reg;
+
+	/* clear global FSR */
+	reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
+	writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
+
+	/* Mark all SMRn as invalid and all S2CRn as bypass */
+	for (i = 0; i < smmu->num_mapping_groups; ++i) {
+		writel_relaxed(~SMR_VALID, gr0_base + ARM_SMMU_GR0_SMR(i));
+		writel_relaxed(S2CR_TYPE_BYPASS, gr0_base + ARM_SMMU_GR0_S2CR(i));
+	}
+
+	/* Make sure all context banks are disabled and clear CB_FSR  */
+	for (i = 0; i < smmu->num_context_banks; ++i) {
+		cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, i);
+		writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR);
+		writel_relaxed(FSR_FAULT, cb_base + ARM_SMMU_CB_FSR);
+	}
+
+	/* Invalidate the TLB, just in case */
+	writel_relaxed(0, gr0_base + ARM_SMMU_GR0_STLBIALL);
+	writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLH);
+	writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLNSNH);
+
+	reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
+
+	/* Enable fault reporting */
+	reg |= (sCR0_GFRE | sCR0_GFIE | sCR0_GCFGFRE | sCR0_GCFGFIE);
+
+	/* Disable TLB broadcasting. */
+	reg |= (sCR0_VMIDPNE | sCR0_PTM);
+
+	/* Enable client access, but bypass when no mapping is found */
+	reg &= ~(sCR0_CLIENTPD | sCR0_USFCFG);
+
+	/* Disable forced broadcasting */
+	reg &= ~sCR0_FB;
+
+	/* Don't upgrade barriers */
+	reg &= ~(sCR0_BSU_MASK << sCR0_BSU_SHIFT);
+
+	/* Push the button */
+	arm_smmu_tlb_sync(smmu);
+	writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
+}
+
+static int arm_smmu_id_size_to_bits(int size)
+{
+	switch (size) {
+	case 0:
+		return 32;
+	case 1:
+		return 36;
+	case 2:
+		return 40;
+	case 3:
+		return 42;
+	case 4:
+		return 44;
+	case 5:
+	default:
+		return 48;
+	}
+}
+
+static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
+{
+	unsigned long size;
+	void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
+	u32 id;
+
+	dev_notice(smmu->dev, "probing hardware configuration...\n");
+
+	/* Primecell ID */
+	id = readl_relaxed(gr0_base + ARM_SMMU_GR0_PIDR2);
+	smmu->version = ((id >> PIDR2_ARCH_SHIFT) & PIDR2_ARCH_MASK) + 1;
+	dev_notice(smmu->dev, "SMMUv%d with:\n", smmu->version);
+
+	/* ID0 */
+	id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID0);
+#ifndef CONFIG_64BIT
+	if (((id >> ID0_PTFS_SHIFT) & ID0_PTFS_MASK) == ID0_PTFS_V8_ONLY) {
+		dev_err(smmu->dev, "\tno v7 descriptor support!\n");
+		return -ENODEV;
+	}
+#endif
+	if (id & ID0_S1TS) {
+		smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
+		dev_notice(smmu->dev, "\tstage 1 translation\n");
+	}
+
+	if (id & ID0_S2TS) {
+		smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
+		dev_notice(smmu->dev, "\tstage 2 translation\n");
+	}
+
+	if (id & ID0_NTS) {
+		smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
+		dev_notice(smmu->dev, "\tnested translation\n");
+	}
+
+	if (!(smmu->features &
+		(ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2 |
+		 ARM_SMMU_FEAT_TRANS_NESTED))) {
+		dev_err(smmu->dev, "\tno translation support!\n");
+		return -ENODEV;
+	}
+
+	if (id & ID0_CTTW) {
+		smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
+		dev_notice(smmu->dev, "\tcoherent table walk\n");
+	}
+
+	if (id & ID0_SMS) {
+		u32 smr, sid, mask;
+
+		smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
+		smmu->num_mapping_groups = (id >> ID0_NUMSMRG_SHIFT) &
+					   ID0_NUMSMRG_MASK;
+		if (smmu->num_mapping_groups == 0) {
+			dev_err(smmu->dev,
+				"stream-matching supported, but no SMRs present!\n");
+			return -ENODEV;
+		}
+
+		smr = SMR_MASK_MASK << SMR_MASK_SHIFT;
+		smr |= (SMR_ID_MASK << SMR_ID_SHIFT);
+		writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
+		smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
+
+		mask = (smr >> SMR_MASK_SHIFT) & SMR_MASK_MASK;
+		sid = (smr >> SMR_ID_SHIFT) & SMR_ID_MASK;
+		if ((mask & sid) != sid) {
+			dev_err(smmu->dev,
+				"SMR mask bits (0x%x) insufficient for ID field (0x%x)\n",
+				mask, sid);
+			return -ENODEV;
+		}
+
+		dev_notice(smmu->dev,
+			   "\tstream matching with %u register groups, mask 0x%x",
+			   smmu->num_mapping_groups, mask);
+	}
+
+	/* ID1 */
+	id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID1);
+	smmu->pagesize = (id & ID1_PAGESIZE) ? SZ_64K : SZ_4K;
+
+	/* Check for size mismatch of SMMU address space from mapped region */
+	size = 1 << (((id >> ID1_NUMPAGENDXB_SHIFT) & ID1_NUMPAGENDXB_MASK) + 1);
+	size *= (smmu->pagesize << 1);
+	if (smmu->size != size)
+		dev_warn(smmu->dev, "SMMU address space size (0x%lx) differs "
+			"from mapped region size (0x%lx)!\n", size, smmu->size);
+
+	smmu->num_s2_context_banks = (id >> ID1_NUMS2CB_SHIFT) &
+				      ID1_NUMS2CB_MASK;
+	smmu->num_context_banks = (id >> ID1_NUMCB_SHIFT) & ID1_NUMCB_MASK;
+	if (smmu->num_s2_context_banks > smmu->num_context_banks) {
+		dev_err(smmu->dev, "impossible number of S2 context banks!\n");
+		return -ENODEV;
+	}
+	dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
+		   smmu->num_context_banks, smmu->num_s2_context_banks);
+
+	/* ID2 */
+	id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID2);
+	size = arm_smmu_id_size_to_bits((id >> ID2_IAS_SHIFT) & ID2_IAS_MASK);
+
+	/*
+	 * Stage-1 output limited by stage-2 input size due to pgd
+	 * allocation (PTRS_PER_PGD).
+	 */
+#ifdef CONFIG_64BIT
+	smmu->s1_output_size = min((unsigned long)VA_BITS, size);
+#else
+	smmu->s1_output_size = min(32UL, size);
+#endif
+
+	/* The stage-2 output mask is also applied for bypass */
+	size = arm_smmu_id_size_to_bits((id >> ID2_OAS_SHIFT) & ID2_OAS_MASK);
+	smmu->s2_output_size = min((unsigned long)PHYS_MASK_SHIFT, size);
+
+	if (smmu->version == 1) {
+		smmu->input_size = 32;
+	} else {
+#ifdef CONFIG_64BIT
+		size = (id >> ID2_UBS_SHIFT) & ID2_UBS_MASK;
+		size = min(VA_BITS, arm_smmu_id_size_to_bits(size));
+#else
+		size = 32;
+#endif
+		smmu->input_size = size;
+
+		if ((PAGE_SIZE == SZ_4K && !(id & ID2_PTFS_4K)) ||
+		    (PAGE_SIZE == SZ_64K && !(id & ID2_PTFS_64K)) ||
+		    (PAGE_SIZE != SZ_4K && PAGE_SIZE != SZ_64K)) {
+			dev_err(smmu->dev, "CPU page size 0x%lx unsupported\n",
+				PAGE_SIZE);
+			return -ENODEV;
+		}
+	}
+
+	dev_notice(smmu->dev,
+		   "\t%lu-bit VA, %lu-bit IPA, %lu-bit PA\n",
+		   smmu->input_size, smmu->s1_output_size, smmu->s2_output_size);
+	return 0;
+}
+
+static int arm_smmu_device_dt_probe(struct platform_device *pdev)
+{
+	struct resource *res;
+	struct arm_smmu_device *smmu;
+	struct device_node *dev_node;
+	struct device *dev = &pdev->dev;
+	struct rb_node *node;
+	struct of_phandle_args masterspec;
+	int num_irqs, i, err;
+
+	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
+	if (!smmu) {
+		dev_err(dev, "failed to allocate arm_smmu_device\n");
+		return -ENOMEM;
+	}
+	smmu->dev = dev;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	smmu->base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(smmu->base))
+		return PTR_ERR(smmu->base);
+	smmu->size = resource_size(res);
+
+	if (of_property_read_u32(dev->of_node, "#global-interrupts",
+				 &smmu->num_global_irqs)) {
+		dev_err(dev, "missing #global-interrupts property\n");
+		return -ENODEV;
+	}
+
+	num_irqs = 0;
+	while ((res = platform_get_resource(pdev, IORESOURCE_IRQ, num_irqs))) {
+		num_irqs++;
+		if (num_irqs > smmu->num_global_irqs)
+			smmu->num_context_irqs++;
+	}
+
+	if (!smmu->num_context_irqs) {
+		dev_err(dev, "found %d interrupts but expected at least %d\n",
+			num_irqs, smmu->num_global_irqs + 1);
+		return -ENODEV;
+	}
+
+	smmu->irqs = devm_kzalloc(dev, sizeof(*smmu->irqs) * num_irqs,
+				  GFP_KERNEL);
+	if (!smmu->irqs) {
+		dev_err(dev, "failed to allocate %d irqs\n", num_irqs);
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < num_irqs; ++i) {
+		int irq = platform_get_irq(pdev, i);
+		if (irq < 0) {
+			dev_err(dev, "failed to get irq index %d\n", i);
+			return -ENODEV;
+		}
+		smmu->irqs[i] = irq;
+	}
+
+	i = 0;
+	smmu->masters = RB_ROOT;
+	while (!of_parse_phandle_with_args(dev->of_node, "mmu-masters",
+					   "#stream-id-cells", i,
+					   &masterspec)) {
+		err = register_smmu_master(smmu, dev, &masterspec);
+		if (err) {
+			dev_err(dev, "failed to add master %s\n",
+				masterspec.np->name);
+			goto out_put_masters;
+		}
+
+		i++;
+	}
+	dev_notice(dev, "registered %d master devices\n", i);
+
+	if ((dev_node = of_parse_phandle(dev->of_node, "smmu-parent", 0)))
+		smmu->parent_of_node = dev_node;
+
+	err = arm_smmu_device_cfg_probe(smmu);
+	if (err)
+		goto out_put_parent;
+
+	parse_driver_options(smmu);
+
+	if (smmu->version > 1 &&
+	    smmu->num_context_banks != smmu->num_context_irqs) {
+		dev_err(dev,
+			"found only %d context interrupt(s) but %d required\n",
+			smmu->num_context_irqs, smmu->num_context_banks);
+		err = -ENODEV;
+		goto out_put_parent;
+	}
+
+	for (i = 0; i < smmu->num_global_irqs; ++i) {
+		err = request_irq(smmu->irqs[i],
+				  arm_smmu_global_fault,
+				  IRQF_SHARED,
+				  "arm-smmu global fault",
+				  smmu);
+		if (err) {
+			dev_err(dev, "failed to request global IRQ %d (%u)\n",
+				i, smmu->irqs[i]);
+			goto out_free_irqs;
+		}
+	}
+
+	INIT_LIST_HEAD(&smmu->list);
+	spin_lock(&arm_smmu_devices_lock);
+	list_add(&smmu->list, &arm_smmu_devices);
+	spin_unlock(&arm_smmu_devices_lock);
+
+	arm_smmu_device_reset(smmu);
+	return 0;
+
+out_free_irqs:
+	while (i--)
+		free_irq(smmu->irqs[i], smmu);
+
+out_put_parent:
+	if (smmu->parent_of_node)
+		of_node_put(smmu->parent_of_node);
+
+out_put_masters:
+	for (node = rb_first(&smmu->masters); node; node = rb_next(node)) {
+		struct arm_smmu_master *master;
+		master = container_of(node, struct arm_smmu_master, node);
+		of_node_put(master->of_node);
+	}
+
+	return err;
+}
+
+static int arm_smmu_device_remove(struct platform_device *pdev)
+{
+	int i;
+	struct device *dev = &pdev->dev;
+	struct arm_smmu_device *curr, *smmu = NULL;
+	struct rb_node *node;
+
+	spin_lock(&arm_smmu_devices_lock);
+	list_for_each_entry(curr, &arm_smmu_devices, list) {
+		if (curr->dev == dev) {
+			smmu = curr;
+			list_del(&smmu->list);
+			break;
+		}
+	}
+	spin_unlock(&arm_smmu_devices_lock);
+
+	if (!smmu)
+		return -ENODEV;
+
+	if (smmu->parent_of_node)
+		of_node_put(smmu->parent_of_node);
+
+	for (node = rb_first(&smmu->masters); node; node = rb_next(node)) {
+		struct arm_smmu_master *master;
+		master = container_of(node, struct arm_smmu_master, node);
+		of_node_put(master->of_node);
+	}
+
+	if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
+		dev_err(dev, "removing device with active domains!\n");
+
+	for (i = 0; i < smmu->num_global_irqs; ++i)
+		free_irq(smmu->irqs[i], smmu);
+
+	/* Turn the thing off */
+	writel(sCR0_CLIENTPD,ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
+	return 0;
+}
+
+#ifdef CONFIG_OF
+static struct of_device_id arm_smmu_of_match[] = {
+	{ .compatible = "arm,smmu-v1", },
+	{ .compatible = "arm,smmu-v2", },
+	{ .compatible = "arm,mmu-400", },
+	{ .compatible = "arm,mmu-500", },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
+#endif
+
+static struct platform_driver arm_smmu_driver = {
+	.driver	= {
+		.owner		= THIS_MODULE,
+		.name		= "arm-smmu",
+		.of_match_table	= of_match_ptr(arm_smmu_of_match),
+	},
+	.probe	= arm_smmu_device_dt_probe,
+	.remove	= arm_smmu_device_remove,
+};
+
+static int __init arm_smmu_init(void)
+{
+	int ret;
+
+	ret = platform_driver_register(&arm_smmu_driver);
+	if (ret)
+		return ret;
+
+	/* Oh, for a proper bus abstraction */
+	if (!iommu_present(&platform_bus_type))
+		bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
+
+#ifdef CONFIG_ARM_AMBA
+	if (!iommu_present(&amba_bustype))
+		bus_set_iommu(&amba_bustype, &arm_smmu_ops);
+#endif
+
+	return 0;
+}
+
+static void __exit arm_smmu_exit(void)
+{
+	return platform_driver_unregister(&arm_smmu_driver);
+}
+
+subsys_initcall(arm_smmu_init);
+module_exit(arm_smmu_exit);
+
+MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations");
+MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index 35c1e17fce1..9a4f05e5b23 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -26,6 +26,8 @@
  * These routines are used by both DMA-remapping and Interrupt-remapping
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt /* has to precede printk.h */
+
 #include <linux/pci.h>
 #include <linux/dmar.h>
 #include <linux/iova.h>
@@ -36,18 +38,32 @@
 #include <linux/tboot.h>
 #include <linux/dmi.h>
 #include <linux/slab.h>
+#include <asm/irq_remapping.h>
 #include <asm/iommu_table.h>
 
-#define PREFIX "DMAR: "
+#include "irq_remapping.h"
 
-/* No locks are needed as DMA remapping hardware unit
- * list is constructed at boot time and hotplug of
- * these units are not supported by the architecture.
+/*
+ * Assumptions:
+ * 1) The hotplug framework guarentees that DMAR unit will be hot-added
+ *    before IO devices managed by that unit.
+ * 2) The hotplug framework guarantees that DMAR unit will be hot-removed
+ *    after IO devices managed by that unit.
+ * 3) Hotplug events are rare.
+ *
+ * Locking rules for DMA and interrupt remapping related global data structures:
+ * 1) Use dmar_global_lock in process context
+ * 2) Use RCU in interrupt context
  */
+DECLARE_RWSEM(dmar_global_lock);
 LIST_HEAD(dmar_drhd_units);
 
 struct acpi_table_header * __initdata dmar_tbl;
 static acpi_size dmar_tbl_size;
+static int dmar_dev_scope_status = 1;
+
+static int alloc_iommu(struct dmar_drhd_unit *drhd);
+static void free_iommu(struct intel_iommu *iommu);
 
 static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
 {
@@ -56,115 +72,260 @@ static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
 	 * the very end.
 	 */
 	if (drhd->include_all)
-		list_add_tail(&drhd->list, &dmar_drhd_units);
+		list_add_tail_rcu(&drhd->list, &dmar_drhd_units);
 	else
-		list_add(&drhd->list, &dmar_drhd_units);
+		list_add_rcu(&drhd->list, &dmar_drhd_units);
 }
 
-static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
-					   struct pci_dev **dev, u16 segment)
+void *dmar_alloc_dev_scope(void *start, void *end, int *cnt)
 {
-	struct pci_bus *bus;
-	struct pci_dev *pdev = NULL;
-	struct acpi_dmar_pci_path *path;
-	int count;
-
-	bus = pci_find_bus(segment, scope->bus);
-	path = (struct acpi_dmar_pci_path *)(scope + 1);
-	count = (scope->length - sizeof(struct acpi_dmar_device_scope))
-		/ sizeof(struct acpi_dmar_pci_path);
+	struct acpi_dmar_device_scope *scope;
 
-	while (count) {
-		if (pdev)
-			pci_dev_put(pdev);
-		/*
-		 * Some BIOSes list non-exist devices in DMAR table, just
-		 * ignore it
-		 */
-		if (!bus) {
-			printk(KERN_WARNING
-			PREFIX "Device scope bus [%d] not found\n",
-			scope->bus);
-			break;
+	*cnt = 0;
+	while (start < end) {
+		scope = start;
+		if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ACPI ||
+		    scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
+		    scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
+			(*cnt)++;
+		else if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_IOAPIC &&
+			scope->entry_type != ACPI_DMAR_SCOPE_TYPE_HPET) {
+			pr_warn("Unsupported device scope\n");
 		}
-		pdev = pci_get_slot(bus, PCI_DEVFN(path->dev, path->fn));
-		if (!pdev) {
-			printk(KERN_WARNING PREFIX
-			"Device scope device [%04x:%02x:%02x.%02x] not found\n",
-				segment, bus->number, path->dev, path->fn);
-			break;
+		start += scope->length;
+	}
+	if (*cnt == 0)
+		return NULL;
+
+	return kcalloc(*cnt, sizeof(struct dmar_dev_scope), GFP_KERNEL);
+}
+
+void dmar_free_dev_scope(struct dmar_dev_scope **devices, int *cnt)
+{
+	int i;
+	struct device *tmp_dev;
+
+	if (*devices && *cnt) {
+		for_each_active_dev_scope(*devices, *cnt, i, tmp_dev)
+			put_device(tmp_dev);
+		kfree(*devices);
+	}
+
+	*devices = NULL;
+	*cnt = 0;
+}
+
+/* Optimize out kzalloc()/kfree() for normal cases */
+static char dmar_pci_notify_info_buf[64];
+
+static struct dmar_pci_notify_info *
+dmar_alloc_pci_notify_info(struct pci_dev *dev, unsigned long event)
+{
+	int level = 0;
+	size_t size;
+	struct pci_dev *tmp;
+	struct dmar_pci_notify_info *info;
+
+	BUG_ON(dev->is_virtfn);
+
+	/* Only generate path[] for device addition event */
+	if (event == BUS_NOTIFY_ADD_DEVICE)
+		for (tmp = dev; tmp; tmp = tmp->bus->self)
+			level++;
+
+	size = sizeof(*info) + level * sizeof(struct acpi_dmar_pci_path);
+	if (size <= sizeof(dmar_pci_notify_info_buf)) {
+		info = (struct dmar_pci_notify_info *)dmar_pci_notify_info_buf;
+	} else {
+		info = kzalloc(size, GFP_KERNEL);
+		if (!info) {
+			pr_warn("Out of memory when allocating notify_info "
+				"for %s.\n", pci_name(dev));
+			if (dmar_dev_scope_status == 0)
+				dmar_dev_scope_status = -ENOMEM;
+			return NULL;
 		}
-		path ++;
-		count --;
-		bus = pdev->subordinate;
 	}
-	if (!pdev) {
-		printk(KERN_WARNING PREFIX
-		"Device scope device [%04x:%02x:%02x.%02x] not found\n",
-		segment, scope->bus, path->dev, path->fn);
-		*dev = NULL;
-		return 0;
+
+	info->event = event;
+	info->dev = dev;
+	info->seg = pci_domain_nr(dev->bus);
+	info->level = level;
+	if (event == BUS_NOTIFY_ADD_DEVICE) {
+		for (tmp = dev; tmp; tmp = tmp->bus->self) {
+			level--;
+			info->path[level].device = PCI_SLOT(tmp->devfn);
+			info->path[level].function = PCI_FUNC(tmp->devfn);
+			if (pci_is_root_bus(tmp->bus))
+				info->bus = tmp->bus->number;
+		}
 	}
-	if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT && \
-			pdev->subordinate) || (scope->entry_type == \
-			ACPI_DMAR_SCOPE_TYPE_BRIDGE && !pdev->subordinate)) {
-		pci_dev_put(pdev);
-		printk(KERN_WARNING PREFIX
-			"Device scope type does not match for %s\n",
-			 pci_name(pdev));
-		return -EINVAL;
+
+	return info;
+}
+
+static inline void dmar_free_pci_notify_info(struct dmar_pci_notify_info *info)
+{
+	if ((void *)info != dmar_pci_notify_info_buf)
+		kfree(info);
+}
+
+static bool dmar_match_pci_path(struct dmar_pci_notify_info *info, int bus,
+				struct acpi_dmar_pci_path *path, int count)
+{
+	int i;
+
+	if (info->bus != bus)
+		return false;
+	if (info->level != count)
+		return false;
+
+	for (i = 0; i < count; i++) {
+		if (path[i].device != info->path[i].device ||
+		    path[i].function != info->path[i].function)
+			return false;
 	}
-	*dev = pdev;
-	return 0;
+
+	return true;
 }
 
-int __init dmar_parse_dev_scope(void *start, void *end, int *cnt,
-				struct pci_dev ***devices, u16 segment)
+/* Return: > 0 if match found, 0 if no match found, < 0 if error happens */
+int dmar_insert_dev_scope(struct dmar_pci_notify_info *info,
+			  void *start, void*end, u16 segment,
+			  struct dmar_dev_scope *devices,
+			  int devices_cnt)
 {
+	int i, level;
+	struct device *tmp, *dev = &info->dev->dev;
 	struct acpi_dmar_device_scope *scope;
-	void * tmp = start;
-	int index;
-	int ret;
+	struct acpi_dmar_pci_path *path;
 
-	*cnt = 0;
-	while (start < end) {
+	if (segment != info->seg)
+		return 0;
+
+	for (; start < end; start += scope->length) {
 		scope = start;
-		if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
-		    scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
-			(*cnt)++;
-		else if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_IOAPIC) {
-			printk(KERN_WARNING PREFIX
-			       "Unsupported device scope\n");
+		if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_ENDPOINT &&
+		    scope->entry_type != ACPI_DMAR_SCOPE_TYPE_BRIDGE)
+			continue;
+
+		path = (struct acpi_dmar_pci_path *)(scope + 1);
+		level = (scope->length - sizeof(*scope)) / sizeof(*path);
+		if (!dmar_match_pci_path(info, scope->bus, path, level))
+			continue;
+
+		if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT) ^
+		    (info->dev->hdr_type == PCI_HEADER_TYPE_NORMAL)) {
+			pr_warn("Device scope type does not match for %s\n",
+				pci_name(info->dev));
+			return -EINVAL;
 		}
-		start += scope->length;
+
+		for_each_dev_scope(devices, devices_cnt, i, tmp)
+			if (tmp == NULL) {
+				devices[i].bus = info->dev->bus->number;
+				devices[i].devfn = info->dev->devfn;
+				rcu_assign_pointer(devices[i].dev,
+						   get_device(dev));
+				return 1;
+			}
+		BUG_ON(i >= devices_cnt);
 	}
-	if (*cnt == 0)
-		return 0;
 
-	*devices = kcalloc(*cnt, sizeof(struct pci_dev *), GFP_KERNEL);
-	if (!*devices)
-		return -ENOMEM;
+	return 0;
+}
 
-	start = tmp;
-	index = 0;
-	while (start < end) {
-		scope = start;
-		if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
-		    scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE) {
-			ret = dmar_parse_one_dev_scope(scope,
-				&(*devices)[index], segment);
-			if (ret) {
-				kfree(*devices);
-				return ret;
-			}
-			index ++;
+int dmar_remove_dev_scope(struct dmar_pci_notify_info *info, u16 segment,
+			  struct dmar_dev_scope *devices, int count)
+{
+	int index;
+	struct device *tmp;
+
+	if (info->seg != segment)
+		return 0;
+
+	for_each_active_dev_scope(devices, count, index, tmp)
+		if (tmp == &info->dev->dev) {
+			rcu_assign_pointer(devices[index].dev, NULL);
+			synchronize_rcu();
+			put_device(tmp);
+			return 1;
 		}
-		start += scope->length;
-	}
 
 	return 0;
 }
 
+static int dmar_pci_bus_add_dev(struct dmar_pci_notify_info *info)
+{
+	int ret = 0;
+	struct dmar_drhd_unit *dmaru;
+	struct acpi_dmar_hardware_unit *drhd;
+
+	for_each_drhd_unit(dmaru) {
+		if (dmaru->include_all)
+			continue;
+
+		drhd = container_of(dmaru->hdr,
+				    struct acpi_dmar_hardware_unit, header);
+		ret = dmar_insert_dev_scope(info, (void *)(drhd + 1),
+				((void *)drhd) + drhd->header.length,
+				dmaru->segment,
+				dmaru->devices, dmaru->devices_cnt);
+		if (ret != 0)
+			break;
+	}
+	if (ret >= 0)
+		ret = dmar_iommu_notify_scope_dev(info);
+	if (ret < 0 && dmar_dev_scope_status == 0)
+		dmar_dev_scope_status = ret;
+
+	return ret;
+}
+
+static void  dmar_pci_bus_del_dev(struct dmar_pci_notify_info *info)
+{
+	struct dmar_drhd_unit *dmaru;
+
+	for_each_drhd_unit(dmaru)
+		if (dmar_remove_dev_scope(info, dmaru->segment,
+			dmaru->devices, dmaru->devices_cnt))
+			break;
+	dmar_iommu_notify_scope_dev(info);
+}
+
+static int dmar_pci_bus_notifier(struct notifier_block *nb,
+				 unsigned long action, void *data)
+{
+	struct pci_dev *pdev = to_pci_dev(data);
+	struct dmar_pci_notify_info *info;
+
+	/* Only care about add/remove events for physical functions */
+	if (pdev->is_virtfn)
+		return NOTIFY_DONE;
+	if (action != BUS_NOTIFY_ADD_DEVICE && action != BUS_NOTIFY_DEL_DEVICE)
+		return NOTIFY_DONE;
+
+	info = dmar_alloc_pci_notify_info(pdev, action);
+	if (!info)
+		return NOTIFY_DONE;
+
+	down_write(&dmar_global_lock);
+	if (action == BUS_NOTIFY_ADD_DEVICE)
+		dmar_pci_bus_add_dev(info);
+	else if (action == BUS_NOTIFY_DEL_DEVICE)
+		dmar_pci_bus_del_dev(info);
+	up_write(&dmar_global_lock);
+
+	dmar_free_pci_notify_info(info);
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block dmar_pci_bus_nb = {
+	.notifier_call = dmar_pci_bus_notifier,
+	.priority = INT_MIN,
+};
+
 /**
  * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
  * structure which uniquely represent one DMA remapping hardware unit
@@ -186,9 +347,18 @@ dmar_parse_one_drhd(struct acpi_dmar_header *header)
 	dmaru->reg_base_addr = drhd->address;
 	dmaru->segment = drhd->segment;
 	dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
+	dmaru->devices = dmar_alloc_dev_scope((void *)(drhd + 1),
+					      ((void *)drhd) + drhd->header.length,
+					      &dmaru->devices_cnt);
+	if (dmaru->devices_cnt && dmaru->devices == NULL) {
+		kfree(dmaru);
+		return -ENOMEM;
+	}
 
 	ret = alloc_iommu(dmaru);
 	if (ret) {
+		dmar_free_dev_scope(&dmaru->devices,
+				    &dmaru->devices_cnt);
 		kfree(dmaru);
 		return ret;
 	}
@@ -196,25 +366,33 @@ dmar_parse_one_drhd(struct acpi_dmar_header *header)
 	return 0;
 }
 
-static int __init dmar_parse_dev(struct dmar_drhd_unit *dmaru)
+static void dmar_free_drhd(struct dmar_drhd_unit *dmaru)
 {
-	struct acpi_dmar_hardware_unit *drhd;
-	int ret = 0;
-
-	drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr;
-
-	if (dmaru->include_all)
-		return 0;
+	if (dmaru->devices && dmaru->devices_cnt)
+		dmar_free_dev_scope(&dmaru->devices, &dmaru->devices_cnt);
+	if (dmaru->iommu)
+		free_iommu(dmaru->iommu);
+	kfree(dmaru);
+}
 
-	ret = dmar_parse_dev_scope((void *)(drhd + 1),
-				((void *)drhd) + drhd->header.length,
-				&dmaru->devices_cnt, &dmaru->devices,
-				drhd->segment);
-	if (ret) {
-		list_del(&dmaru->list);
-		kfree(dmaru);
+static int __init dmar_parse_one_andd(struct acpi_dmar_header *header)
+{
+	struct acpi_dmar_andd *andd = (void *)header;
+
+	/* Check for NUL termination within the designated length */
+	if (strnlen(andd->object_name, header->length - 8) == header->length - 8) {
+		WARN_TAINT(1, TAINT_FIRMWARE_WORKAROUND,
+			   "Your BIOS is broken; ANDD object name is not NUL-terminated\n"
+			   "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
+			   dmi_get_system_info(DMI_BIOS_VENDOR),
+			   dmi_get_system_info(DMI_BIOS_VERSION),
+			   dmi_get_system_info(DMI_PRODUCT_VERSION));
+		return -EINVAL;
 	}
-	return ret;
+	pr_info("ANDD device: %x name: %s\n", andd->device_number,
+		andd->object_name);
+
+	return 0;
 }
 
 #ifdef CONFIG_ACPI_NUMA
@@ -260,28 +438,30 @@ dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
 	case ACPI_DMAR_TYPE_HARDWARE_UNIT:
 		drhd = container_of(header, struct acpi_dmar_hardware_unit,
 				    header);
-		printk (KERN_INFO PREFIX
-			"DRHD base: %#016Lx flags: %#x\n",
+		pr_info("DRHD base: %#016Lx flags: %#x\n",
 			(unsigned long long)drhd->address, drhd->flags);
 		break;
 	case ACPI_DMAR_TYPE_RESERVED_MEMORY:
 		rmrr = container_of(header, struct acpi_dmar_reserved_memory,
 				    header);
-		printk (KERN_INFO PREFIX
-			"RMRR base: %#016Lx end: %#016Lx\n",
+		pr_info("RMRR base: %#016Lx end: %#016Lx\n",
 			(unsigned long long)rmrr->base_address,
 			(unsigned long long)rmrr->end_address);
 		break;
 	case ACPI_DMAR_TYPE_ATSR:
 		atsr = container_of(header, struct acpi_dmar_atsr, header);
-		printk(KERN_INFO PREFIX "ATSR flags: %#x\n", atsr->flags);
+		pr_info("ATSR flags: %#x\n", atsr->flags);
 		break;
 	case ACPI_DMAR_HARDWARE_AFFINITY:
 		rhsa = container_of(header, struct acpi_dmar_rhsa, header);
-		printk(KERN_INFO PREFIX "RHSA base: %#016Lx proximity domain: %#x\n",
+		pr_info("RHSA base: %#016Lx proximity domain: %#x\n",
 		       (unsigned long long)rhsa->base_address,
 		       rhsa->proximity_domain);
 		break;
+	case ACPI_DMAR_TYPE_ANDD:
+		/* We don't print this here because we need to sanity-check
+		   it first. So print it in dmar_parse_one_andd() instead. */
+		break;
 	}
 }
 
@@ -298,7 +478,7 @@ static int __init dmar_table_detect(void)
 				&dmar_tbl_size);
 
 	if (ACPI_SUCCESS(status) && !dmar_tbl) {
-		printk (KERN_WARNING PREFIX "Unable to map DMAR\n");
+		pr_warn("Unable to map DMAR\n");
 		status = AE_NOT_FOUND;
 	}
 
@@ -314,6 +494,7 @@ parse_dmar_table(void)
 	struct acpi_table_dmar *dmar;
 	struct acpi_dmar_header *entry_header;
 	int ret = 0;
+	int drhd_count = 0;
 
 	/*
 	 * Do it again, earlier dmar_tbl mapping could be mapped with
@@ -332,20 +513,18 @@ parse_dmar_table(void)
 		return -ENODEV;
 
 	if (dmar->width < PAGE_SHIFT - 1) {
-		printk(KERN_WARNING PREFIX "Invalid DMAR haw\n");
+		pr_warn("Invalid DMAR haw\n");
 		return -EINVAL;
 	}
 
-	printk (KERN_INFO PREFIX "Host address width %d\n",
-		dmar->width + 1);
+	pr_info("Host address width %d\n", dmar->width + 1);
 
 	entry_header = (struct acpi_dmar_header *)(dmar + 1);
 	while (((unsigned long)entry_header) <
 			(((unsigned long)dmar) + dmar_tbl->length)) {
 		/* Avoid looping forever on bad ACPI tables */
 		if (entry_header->length == 0) {
-			printk(KERN_WARNING PREFIX
-				"Invalid 0-length structure\n");
+			pr_warn("Invalid 0-length structure\n");
 			ret = -EINVAL;
 			break;
 		}
@@ -354,6 +533,7 @@ parse_dmar_table(void)
 
 		switch (entry_header->type) {
 		case ACPI_DMAR_TYPE_HARDWARE_UNIT:
+			drhd_count++;
 			ret = dmar_parse_one_drhd(entry_header);
 			break;
 		case ACPI_DMAR_TYPE_RESERVED_MEMORY:
@@ -367,9 +547,11 @@ parse_dmar_table(void)
 			ret = dmar_parse_one_rhsa(entry_header);
 #endif
 			break;
+		case ACPI_DMAR_TYPE_ANDD:
+			ret = dmar_parse_one_andd(entry_header);
+			break;
 		default:
-			printk(KERN_WARNING PREFIX
-				"Unknown DMAR structure type %d\n",
+			pr_warn("Unknown DMAR structure type %d\n",
 				entry_header->type);
 			ret = 0; /* for forward compatibility */
 			break;
@@ -379,17 +561,20 @@ parse_dmar_table(void)
 
 		entry_header = ((void *)entry_header + entry_header->length);
 	}
+	if (drhd_count == 0)
+		pr_warn(FW_BUG "No DRHD structure found in DMAR table\n");
 	return ret;
 }
 
-static int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
-			  struct pci_dev *dev)
+static int dmar_pci_device_match(struct dmar_dev_scope devices[],
+				 int cnt, struct pci_dev *dev)
 {
 	int index;
+	struct device *tmp;
 
 	while (dev) {
-		for (index = 0; index < cnt; index++)
-			if (dev == devices[index])
+		for_each_active_dev_scope(devices, cnt, index, tmp)
+			if (dev_is_pci(tmp) && dev == to_pci_dev(tmp))
 				return 1;
 
 		/* Check our parent */
@@ -402,56 +587,141 @@ static int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
 struct dmar_drhd_unit *
 dmar_find_matched_drhd_unit(struct pci_dev *dev)
 {
-	struct dmar_drhd_unit *dmaru = NULL;
+	struct dmar_drhd_unit *dmaru;
 	struct acpi_dmar_hardware_unit *drhd;
 
 	dev = pci_physfn(dev);
 
-	list_for_each_entry(dmaru, &dmar_drhd_units, list) {
+	rcu_read_lock();
+	for_each_drhd_unit(dmaru) {
 		drhd = container_of(dmaru->hdr,
 				    struct acpi_dmar_hardware_unit,
 				    header);
 
 		if (dmaru->include_all &&
 		    drhd->segment == pci_domain_nr(dev->bus))
-			return dmaru;
+			goto out;
 
 		if (dmar_pci_device_match(dmaru->devices,
 					  dmaru->devices_cnt, dev))
-			return dmaru;
+			goto out;
 	}
+	dmaru = NULL;
+out:
+	rcu_read_unlock();
 
-	return NULL;
+	return dmaru;
 }
 
-int __init dmar_dev_scope_init(void)
+static void __init dmar_acpi_insert_dev_scope(u8 device_number,
+					      struct acpi_device *adev)
 {
-	static int dmar_dev_scope_initialized;
-	struct dmar_drhd_unit *drhd, *drhd_n;
-	int ret = -ENODEV;
-
-	if (dmar_dev_scope_initialized)
-		return dmar_dev_scope_initialized;
+	struct dmar_drhd_unit *dmaru;
+	struct acpi_dmar_hardware_unit *drhd;
+	struct acpi_dmar_device_scope *scope;
+	struct device *tmp;
+	int i;
+	struct acpi_dmar_pci_path *path;
 
-	if (list_empty(&dmar_drhd_units))
-		goto fail;
+	for_each_drhd_unit(dmaru) {
+		drhd = container_of(dmaru->hdr,
+				    struct acpi_dmar_hardware_unit,
+				    header);
 
-	list_for_each_entry_safe(drhd, drhd_n, &dmar_drhd_units, list) {
-		ret = dmar_parse_dev(drhd);
-		if (ret)
-			goto fail;
+		for (scope = (void *)(drhd + 1);
+		     (unsigned long)scope < ((unsigned long)drhd) + drhd->header.length;
+		     scope = ((void *)scope) + scope->length) {
+			if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_ACPI)
+				continue;
+			if (scope->enumeration_id != device_number)
+				continue;
+
+			path = (void *)(scope + 1);
+			pr_info("ACPI device \"%s\" under DMAR at %llx as %02x:%02x.%d\n",
+				dev_name(&adev->dev), dmaru->reg_base_addr,
+				scope->bus, path->device, path->function);
+			for_each_dev_scope(dmaru->devices, dmaru->devices_cnt, i, tmp)
+				if (tmp == NULL) {
+					dmaru->devices[i].bus = scope->bus;
+					dmaru->devices[i].devfn = PCI_DEVFN(path->device,
+									    path->function);
+					rcu_assign_pointer(dmaru->devices[i].dev,
+							   get_device(&adev->dev));
+					return;
+				}
+			BUG_ON(i >= dmaru->devices_cnt);
+		}
 	}
+	pr_warn("No IOMMU scope found for ANDD enumeration ID %d (%s)\n",
+		device_number, dev_name(&adev->dev));
+}
 
-	ret = dmar_parse_rmrr_atsr_dev();
-	if (ret)
-		goto fail;
+static int __init dmar_acpi_dev_scope_init(void)
+{
+	struct acpi_dmar_andd *andd;
+
+	if (dmar_tbl == NULL)
+		return -ENODEV;
 
-	dmar_dev_scope_initialized = 1;
+	for (andd = (void *)dmar_tbl + sizeof(struct acpi_table_dmar);
+	     ((unsigned long)andd) < ((unsigned long)dmar_tbl) + dmar_tbl->length;
+	     andd = ((void *)andd) + andd->header.length) {
+		if (andd->header.type == ACPI_DMAR_TYPE_ANDD) {
+			acpi_handle h;
+			struct acpi_device *adev;
+
+			if (!ACPI_SUCCESS(acpi_get_handle(ACPI_ROOT_OBJECT,
+							  andd->object_name,
+							  &h))) {
+				pr_err("Failed to find handle for ACPI object %s\n",
+				       andd->object_name);
+				continue;
+			}
+			acpi_bus_get_device(h, &adev);
+			if (!adev) {
+				pr_err("Failed to get device for ACPI object %s\n",
+				       andd->object_name);
+				continue;
+			}
+			dmar_acpi_insert_dev_scope(andd->device_number, adev);
+		}
+	}
 	return 0;
+}
 
-fail:
-	dmar_dev_scope_initialized = ret;
-	return ret;
+int __init dmar_dev_scope_init(void)
+{
+	struct pci_dev *dev = NULL;
+	struct dmar_pci_notify_info *info;
+
+	if (dmar_dev_scope_status != 1)
+		return dmar_dev_scope_status;
+
+	if (list_empty(&dmar_drhd_units)) {
+		dmar_dev_scope_status = -ENODEV;
+	} else {
+		dmar_dev_scope_status = 0;
+
+		dmar_acpi_dev_scope_init();
+
+		for_each_pci_dev(dev) {
+			if (dev->is_virtfn)
+				continue;
+
+			info = dmar_alloc_pci_notify_info(dev,
+					BUS_NOTIFY_ADD_DEVICE);
+			if (!info) {
+				return dmar_dev_scope_status;
+			} else {
+				dmar_pci_bus_add_dev(info);
+				dmar_free_pci_notify_info(info);
+			}
+		}
+
+		bus_register_notifier(&pci_bus_type, &dmar_pci_bus_nb);
+	}
+
+	return dmar_dev_scope_status;
 }
 
 
@@ -460,24 +730,23 @@ int __init dmar_table_init(void)
 	static int dmar_table_initialized;
 	int ret;
 
-	if (dmar_table_initialized)
-		return 0;
-
-	dmar_table_initialized = 1;
-
-	ret = parse_dmar_table();
-	if (ret) {
-		if (ret != -ENODEV)
-			printk(KERN_INFO PREFIX "parse DMAR table failure.\n");
-		return ret;
-	}
+	if (dmar_table_initialized == 0) {
+		ret = parse_dmar_table();
+		if (ret < 0) {
+			if (ret != -ENODEV)
+				pr_info("parse DMAR table failure.\n");
+		} else  if (list_empty(&dmar_drhd_units)) {
+			pr_info("No DMAR devices found\n");
+			ret = -ENODEV;
+		}
 
-	if (list_empty(&dmar_drhd_units)) {
-		printk(KERN_INFO PREFIX "No DMAR devices found\n");
-		return -ENODEV;
+		if (ret < 0)
+			dmar_table_initialized = ret;
+		else
+			dmar_table_initialized = 1;
 	}
 
-	return 0;
+	return dmar_table_initialized < 0 ? dmar_table_initialized : 0;
 }
 
 static void warn_invalid_dmar(u64 addr, const char *message)
@@ -492,7 +761,7 @@ static void warn_invalid_dmar(u64 addr, const char *message)
 		dmi_get_system_info(DMI_PRODUCT_VERSION));
 }
 
-int __init check_zero_address(void)
+static int __init check_zero_address(void)
 {
 	struct acpi_table_dmar *dmar;
 	struct acpi_dmar_header *entry_header;
@@ -505,8 +774,7 @@ int __init check_zero_address(void)
 			(((unsigned long)dmar) + dmar_tbl->length)) {
 		/* Avoid looping forever on bad ACPI tables */
 		if (entry_header->length == 0) {
-			printk(KERN_WARNING PREFIX
-				"Invalid 0-length structure\n");
+			pr_warn("Invalid 0-length structure\n");
 			return 0;
 		}
 
@@ -547,19 +815,11 @@ int __init detect_intel_iommu(void)
 {
 	int ret;
 
+	down_write(&dmar_global_lock);
 	ret = dmar_table_detect();
 	if (ret)
 		ret = check_zero_address();
 	{
-		struct acpi_table_dmar *dmar;
-
-		dmar = (struct acpi_table_dmar *) dmar_tbl;
-
-		if (ret && intr_remapping_enabled && cpu_has_x2apic &&
-		    dmar->flags & 0x1)
-			printk(KERN_INFO
-			       "Queued invalidation will be enabled to support x2apic and Intr-remapping.\n");
-
 		if (ret && !no_iommu && !iommu_detected && !dmar_disabled) {
 			iommu_detected = 1;
 			/* Make sure ACS will be enabled */
@@ -571,21 +831,97 @@ int __init detect_intel_iommu(void)
 			x86_init.iommu.iommu_init = intel_iommu_init;
 #endif
 	}
-	early_acpi_os_unmap_memory(dmar_tbl, dmar_tbl_size);
+	early_acpi_os_unmap_memory((void __iomem *)dmar_tbl, dmar_tbl_size);
 	dmar_tbl = NULL;
+	up_write(&dmar_global_lock);
 
 	return ret ? 1 : -ENODEV;
 }
 
 
-int alloc_iommu(struct dmar_drhd_unit *drhd)
+static void unmap_iommu(struct intel_iommu *iommu)
+{
+	iounmap(iommu->reg);
+	release_mem_region(iommu->reg_phys, iommu->reg_size);
+}
+
+/**
+ * map_iommu: map the iommu's registers
+ * @iommu: the iommu to map
+ * @phys_addr: the physical address of the base resgister
+ *
+ * Memory map the iommu's registers.  Start w/ a single page, and
+ * possibly expand if that turns out to be insufficent.
+ */
+static int map_iommu(struct intel_iommu *iommu, u64 phys_addr)
+{
+	int map_size, err=0;
+
+	iommu->reg_phys = phys_addr;
+	iommu->reg_size = VTD_PAGE_SIZE;
+
+	if (!request_mem_region(iommu->reg_phys, iommu->reg_size, iommu->name)) {
+		pr_err("IOMMU: can't reserve memory\n");
+		err = -EBUSY;
+		goto out;
+	}
+
+	iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
+	if (!iommu->reg) {
+		pr_err("IOMMU: can't map the region\n");
+		err = -ENOMEM;
+		goto release;
+	}
+
+	iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
+	iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
+
+	if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) {
+		err = -EINVAL;
+		warn_invalid_dmar(phys_addr, " returns all ones");
+		goto unmap;
+	}
+
+	/* the registers might be more than one page */
+	map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
+			 cap_max_fault_reg_offset(iommu->cap));
+	map_size = VTD_PAGE_ALIGN(map_size);
+	if (map_size > iommu->reg_size) {
+		iounmap(iommu->reg);
+		release_mem_region(iommu->reg_phys, iommu->reg_size);
+		iommu->reg_size = map_size;
+		if (!request_mem_region(iommu->reg_phys, iommu->reg_size,
+					iommu->name)) {
+			pr_err("IOMMU: can't reserve memory\n");
+			err = -EBUSY;
+			goto out;
+		}
+		iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
+		if (!iommu->reg) {
+			pr_err("IOMMU: can't map the region\n");
+			err = -ENOMEM;
+			goto release;
+		}
+	}
+	err = 0;
+	goto out;
+
+unmap:
+	iounmap(iommu->reg);
+release:
+	release_mem_region(iommu->reg_phys, iommu->reg_size);
+out:
+	return err;
+}
+
+static int alloc_iommu(struct dmar_drhd_unit *drhd)
 {
 	struct intel_iommu *iommu;
-	int map_size;
-	u32 ver;
+	u32 ver, sts;
 	static int iommu_allocated = 0;
 	int agaw = 0;
 	int msagaw = 0;
+	int err;
 
 	if (!drhd->reg_base_addr) {
 		warn_invalid_dmar(0, "");
@@ -599,51 +935,31 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)
 	iommu->seq_id = iommu_allocated++;
 	sprintf (iommu->name, "dmar%d", iommu->seq_id);
 
-	iommu->reg = ioremap(drhd->reg_base_addr, VTD_PAGE_SIZE);
-	if (!iommu->reg) {
-		printk(KERN_ERR "IOMMU: can't map the region\n");
+	err = map_iommu(iommu, drhd->reg_base_addr);
+	if (err) {
+		pr_err("IOMMU: failed to map %s\n", iommu->name);
 		goto error;
 	}
-	iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
-	iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
-
-	if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) {
-		warn_invalid_dmar(drhd->reg_base_addr, " returns all ones");
-		goto err_unmap;
-	}
 
+	err = -EINVAL;
 	agaw = iommu_calculate_agaw(iommu);
 	if (agaw < 0) {
-		printk(KERN_ERR
-		       "Cannot get a valid agaw for iommu (seq_id = %d)\n",
-		       iommu->seq_id);
+		pr_err("Cannot get a valid agaw for iommu (seq_id = %d)\n",
+			iommu->seq_id);
 		goto err_unmap;
 	}
 	msagaw = iommu_calculate_max_sagaw(iommu);
 	if (msagaw < 0) {
-		printk(KERN_ERR
-			"Cannot get a valid max agaw for iommu (seq_id = %d)\n",
+		pr_err("Cannot get a valid max agaw for iommu (seq_id = %d)\n",
 			iommu->seq_id);
 		goto err_unmap;
 	}
 	iommu->agaw = agaw;
 	iommu->msagaw = msagaw;
+	iommu->segment = drhd->segment;
 
 	iommu->node = -1;
 
-	/* the registers might be more than one page */
-	map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
-		cap_max_fault_reg_offset(iommu->cap));
-	map_size = VTD_PAGE_ALIGN(map_size);
-	if (map_size > VTD_PAGE_SIZE) {
-		iounmap(iommu->reg);
-		iommu->reg = ioremap(drhd->reg_base_addr, map_size);
-		if (!iommu->reg) {
-			printk(KERN_ERR "IOMMU: can't map the region\n");
-			goto error;
-		}
-	}
-
 	ver = readl(iommu->reg + DMAR_VER_REG);
 	pr_info("IOMMU %d: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n",
 		iommu->seq_id,
@@ -652,27 +968,44 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)
 		(unsigned long long)iommu->cap,
 		(unsigned long long)iommu->ecap);
 
+	/* Reflect status in gcmd */
+	sts = readl(iommu->reg + DMAR_GSTS_REG);
+	if (sts & DMA_GSTS_IRES)
+		iommu->gcmd |= DMA_GCMD_IRE;
+	if (sts & DMA_GSTS_TES)
+		iommu->gcmd |= DMA_GCMD_TE;
+	if (sts & DMA_GSTS_QIES)
+		iommu->gcmd |= DMA_GCMD_QIE;
+
 	raw_spin_lock_init(&iommu->register_lock);
 
 	drhd->iommu = iommu;
 	return 0;
 
  err_unmap:
-	iounmap(iommu->reg);
+	unmap_iommu(iommu);
  error:
 	kfree(iommu);
-	return -1;
+	return err;
 }
 
-void free_iommu(struct intel_iommu *iommu)
+static void free_iommu(struct intel_iommu *iommu)
 {
-	if (!iommu)
-		return;
+	if (iommu->irq) {
+		free_irq(iommu->irq, iommu);
+		irq_set_handler_data(iommu->irq, NULL);
+		dmar_free_hwirq(iommu->irq);
+	}
 
-	free_dmar_iommu(iommu);
+	if (iommu->qi) {
+		free_page((unsigned long)iommu->qi->desc);
+		kfree(iommu->qi->desc_status);
+		kfree(iommu->qi);
+	}
 
 	if (iommu->reg)
-		iounmap(iommu->reg);
+		unmap_iommu(iommu);
+
 	kfree(iommu);
 }
 
@@ -709,7 +1042,7 @@ static int qi_check_fault(struct intel_iommu *iommu, int index)
 	if (fault & DMA_FSTS_IQE) {
 		head = readl(iommu->reg + DMAR_IQH_REG);
 		if ((head >> DMAR_IQ_SHIFT) == index) {
-			printk(KERN_ERR "VT-d detected invalid descriptor: "
+			pr_err("VT-d detected invalid descriptor: "
 				"low=%llx, high=%llx\n",
 				(unsigned long long)qi->desc[index].low,
 				(unsigned long long)qi->desc[index].high);
@@ -992,17 +1325,17 @@ int dmar_enable_qi(struct intel_iommu *iommu)
 	desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO, 0);
 	if (!desc_page) {
 		kfree(qi);
-		iommu->qi = 0;
+		iommu->qi = NULL;
 		return -ENOMEM;
 	}
 
 	qi->desc = page_address(desc_page);
 
-	qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_ATOMIC);
+	qi->desc_status = kzalloc(QI_LENGTH * sizeof(int), GFP_ATOMIC);
 	if (!qi->desc_status) {
 		free_page((unsigned long) qi->desc);
 		kfree(qi);
-		iommu->qi = 0;
+		iommu->qi = NULL;
 		return -ENOMEM;
 	}
 
@@ -1039,9 +1372,10 @@ static const char *dma_remap_fault_reasons[] =
 	"non-zero reserved fields in RTP",
 	"non-zero reserved fields in CTP",
 	"non-zero reserved fields in PTE",
+	"PCE for translation request specifies blocking",
 };
 
-static const char *intr_remap_fault_reasons[] =
+static const char *irq_remap_fault_reasons[] =
 {
 	"Detected reserved fields in the decoded interrupt-remapped request",
 	"Interrupt index exceeded the interrupt-remapping table size",
@@ -1052,14 +1386,12 @@ static const char *intr_remap_fault_reasons[] =
 	"Blocked an interrupt request due to source-id verification failure",
 };
 
-#define MAX_FAULT_REASON_IDX 	(ARRAY_SIZE(fault_reason_strings) - 1)
-
-const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
+static const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
 {
-	if (fault_reason >= 0x20 && (fault_reason <= 0x20 +
-				     ARRAY_SIZE(intr_remap_fault_reasons))) {
+	if (fault_reason >= 0x20 && (fault_reason - 0x20 <
+					ARRAY_SIZE(irq_remap_fault_reasons))) {
 		*fault_type = INTR_REMAP;
-		return intr_remap_fault_reasons[fault_reason - 0x20];
+		return irq_remap_fault_reasons[fault_reason - 0x20];
 	} else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
 		*fault_type = DMA_REMAP;
 		return dma_remap_fault_reasons[fault_reason];
@@ -1128,15 +1460,14 @@ static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
 	reason = dmar_get_fault_reason(fault_reason, &fault_type);
 
 	if (fault_type == INTR_REMAP)
-		printk(KERN_ERR "INTR-REMAP: Request device [[%02x:%02x.%d] "
+		pr_err("INTR-REMAP: Request device [[%02x:%02x.%d] "
 		       "fault index %llx\n"
 			"INTR-REMAP:[fault reason %02d] %s\n",
 			(source_id >> 8), PCI_SLOT(source_id & 0xFF),
 			PCI_FUNC(source_id & 0xFF), addr >> 48,
 			fault_reason, reason);
 	else
-		printk(KERN_ERR
-		       "DMAR:[%s] Request device [%02x:%02x.%d] "
+		pr_err("DMAR:[%s] Request device [%02x:%02x.%d] "
 		       "fault addr %llx \n"
 		       "DMAR:[fault reason %02d] %s\n",
 		       (type ? "DMA Read" : "DMA Write"),
@@ -1156,12 +1487,11 @@ irqreturn_t dmar_fault(int irq, void *dev_id)
 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
 	fault_status = readl(iommu->reg + DMAR_FSTS_REG);
 	if (fault_status)
-		printk(KERN_ERR "DRHD: handling fault status reg %x\n",
-		       fault_status);
+		pr_err("DRHD: handling fault status reg %x\n", fault_status);
 
 	/* TBD: ignore advanced fault log currently */
 	if (!(fault_status & DMA_FSTS_PPF))
-		goto clear_rest;
+		goto unlock_exit;
 
 	fault_index = dma_fsts_fault_record_index(fault_status);
 	reg = cap_fault_reg_offset(iommu->cap);
@@ -1202,11 +1532,10 @@ irqreturn_t dmar_fault(int irq, void *dev_id)
 			fault_index = 0;
 		raw_spin_lock_irqsave(&iommu->register_lock, flag);
 	}
-clear_rest:
-	/* clear all the other faults */
-	fault_status = readl(iommu->reg + DMAR_FSTS_REG);
-	writel(fault_status, iommu->reg + DMAR_FSTS_REG);
 
+	writel(DMA_FSTS_PFO | DMA_FSTS_PPF, iommu->reg + DMAR_FSTS_REG);
+
+unlock_exit:
 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
 	return IRQ_HANDLED;
 }
@@ -1221,9 +1550,9 @@ int dmar_set_interrupt(struct intel_iommu *iommu)
 	if (iommu->irq)
 		return 0;
 
-	irq = create_irq();
-	if (!irq) {
-		printk(KERN_ERR "IOMMU: no free vectors\n");
+	irq = dmar_alloc_hwirq();
+	if (irq <= 0) {
+		pr_err("IOMMU: no free vectors\n");
 		return -EINVAL;
 	}
 
@@ -1234,31 +1563,30 @@ int dmar_set_interrupt(struct intel_iommu *iommu)
 	if (ret) {
 		irq_set_handler_data(irq, NULL);
 		iommu->irq = 0;
-		destroy_irq(irq);
+		dmar_free_hwirq(irq);
 		return ret;
 	}
 
 	ret = request_irq(irq, dmar_fault, IRQF_NO_THREAD, iommu->name, iommu);
 	if (ret)
-		printk(KERN_ERR "IOMMU: can't request irq\n");
+		pr_err("IOMMU: can't request irq\n");
 	return ret;
 }
 
 int __init enable_drhd_fault_handling(void)
 {
 	struct dmar_drhd_unit *drhd;
+	struct intel_iommu *iommu;
 
 	/*
 	 * Enable fault control interrupt.
 	 */
-	for_each_drhd_unit(drhd) {
-		int ret;
-		struct intel_iommu *iommu = drhd->iommu;
-		ret = dmar_set_interrupt(iommu);
+	for_each_iommu(iommu, drhd) {
+		u32 fault_status;
+		int ret = dmar_set_interrupt(iommu);
 
 		if (ret) {
-			printk(KERN_ERR "DRHD %Lx: failed to enable fault, "
-			       " interrupt, ret %d\n",
+			pr_err("DRHD %Lx: failed to enable fault, interrupt, ret %d\n",
 			       (unsigned long long)drhd->reg_base_addr, ret);
 			return -1;
 		}
@@ -1267,6 +1595,8 @@ int __init enable_drhd_fault_handling(void)
 		 * Clear any previous faults.
 		 */
 		dmar_fault(iommu->irq, iommu);
+		fault_status = readl(iommu->reg + DMAR_FSTS_REG);
+		writel(fault_status, iommu->reg + DMAR_FSTS_REG);
 	}
 
 	return 0;
@@ -1308,4 +1638,27 @@ int __init dmar_ir_support(void)
 		return 0;
 	return dmar->flags & 0x1;
 }
+
+static int __init dmar_free_unused_resources(void)
+{
+	struct dmar_drhd_unit *dmaru, *dmaru_n;
+
+	/* DMAR units are in use */
+	if (irq_remapping_enabled || intel_iommu_enabled)
+		return 0;
+
+	if (dmar_dev_scope_status != 1 && !list_empty(&dmar_drhd_units))
+		bus_unregister_notifier(&pci_bus_type, &dmar_pci_bus_nb);
+
+	down_write(&dmar_global_lock);
+	list_for_each_entry_safe(dmaru, dmaru_n, &dmar_drhd_units, list) {
+		list_del(&dmaru->list);
+		dmar_free_drhd(dmaru);
+	}
+	up_write(&dmar_global_lock);
+
+	return 0;
+}
+
+late_initcall(dmar_free_unused_resources);
 IOMMU_INIT_POST(detect_intel_iommu);
diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
new file mode 100644
index 00000000000..99054d2c040
--- /dev/null
+++ b/drivers/iommu/exynos-iommu.c
@@ -0,0 +1,1227 @@
+/* linux/drivers/iommu/exynos_iommu.c
+ *
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ *		http://www.samsung.com
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifdef CONFIG_EXYNOS_IOMMU_DEBUG
+#define DEBUG
+#endif
+
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/pm_runtime.h>
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/mm.h>
+#include <linux/iommu.h>
+#include <linux/errno.h>
+#include <linux/list.h>
+#include <linux/memblock.h>
+#include <linux/export.h>
+
+#include <asm/cacheflush.h>
+#include <asm/pgtable.h>
+
+typedef u32 sysmmu_iova_t;
+typedef u32 sysmmu_pte_t;
+
+/* We does not consider super section mapping (16MB) */
+#define SECT_ORDER 20
+#define LPAGE_ORDER 16
+#define SPAGE_ORDER 12
+
+#define SECT_SIZE (1 << SECT_ORDER)
+#define LPAGE_SIZE (1 << LPAGE_ORDER)
+#define SPAGE_SIZE (1 << SPAGE_ORDER)
+
+#define SECT_MASK (~(SECT_SIZE - 1))
+#define LPAGE_MASK (~(LPAGE_SIZE - 1))
+#define SPAGE_MASK (~(SPAGE_SIZE - 1))
+
+#define lv1ent_fault(sent) ((*(sent) == ZERO_LV2LINK) || \
+			   ((*(sent) & 3) == 0) || ((*(sent) & 3) == 3))
+#define lv1ent_zero(sent) (*(sent) == ZERO_LV2LINK)
+#define lv1ent_page_zero(sent) ((*(sent) & 3) == 1)
+#define lv1ent_page(sent) ((*(sent) != ZERO_LV2LINK) && \
+			  ((*(sent) & 3) == 1))
+#define lv1ent_section(sent) ((*(sent) & 3) == 2)
+
+#define lv2ent_fault(pent) ((*(pent) & 3) == 0)
+#define lv2ent_small(pent) ((*(pent) & 2) == 2)
+#define lv2ent_large(pent) ((*(pent) & 3) == 1)
+
+static u32 sysmmu_page_offset(sysmmu_iova_t iova, u32 size)
+{
+	return iova & (size - 1);
+}
+
+#define section_phys(sent) (*(sent) & SECT_MASK)
+#define section_offs(iova) sysmmu_page_offset((iova), SECT_SIZE)
+#define lpage_phys(pent) (*(pent) & LPAGE_MASK)
+#define lpage_offs(iova) sysmmu_page_offset((iova), LPAGE_SIZE)
+#define spage_phys(pent) (*(pent) & SPAGE_MASK)
+#define spage_offs(iova) sysmmu_page_offset((iova), SPAGE_SIZE)
+
+#define NUM_LV1ENTRIES 4096
+#define NUM_LV2ENTRIES (SECT_SIZE / SPAGE_SIZE)
+
+static u32 lv1ent_offset(sysmmu_iova_t iova)
+{
+	return iova >> SECT_ORDER;
+}
+
+static u32 lv2ent_offset(sysmmu_iova_t iova)
+{
+	return (iova >> SPAGE_ORDER) & (NUM_LV2ENTRIES - 1);
+}
+
+#define LV2TABLE_SIZE (NUM_LV2ENTRIES * sizeof(sysmmu_pte_t))
+
+#define SPAGES_PER_LPAGE (LPAGE_SIZE / SPAGE_SIZE)
+
+#define lv2table_base(sent) (*(sent) & 0xFFFFFC00)
+
+#define mk_lv1ent_sect(pa) ((pa) | 2)
+#define mk_lv1ent_page(pa) ((pa) | 1)
+#define mk_lv2ent_lpage(pa) ((pa) | 1)
+#define mk_lv2ent_spage(pa) ((pa) | 2)
+
+#define CTRL_ENABLE	0x5
+#define CTRL_BLOCK	0x7
+#define CTRL_DISABLE	0x0
+
+#define CFG_LRU		0x1
+#define CFG_QOS(n)	((n & 0xF) << 7)
+#define CFG_MASK	0x0150FFFF /* Selecting bit 0-15, 20, 22 and 24 */
+#define CFG_ACGEN	(1 << 24) /* System MMU 3.3 only */
+#define CFG_SYSSEL	(1 << 22) /* System MMU 3.2 only */
+#define CFG_FLPDCACHE	(1 << 20) /* System MMU 3.2+ only */
+
+#define REG_MMU_CTRL		0x000
+#define REG_MMU_CFG		0x004
+#define REG_MMU_STATUS		0x008
+#define REG_MMU_FLUSH		0x00C
+#define REG_MMU_FLUSH_ENTRY	0x010
+#define REG_PT_BASE_ADDR	0x014
+#define REG_INT_STATUS		0x018
+#define REG_INT_CLEAR		0x01C
+
+#define REG_PAGE_FAULT_ADDR	0x024
+#define REG_AW_FAULT_ADDR	0x028
+#define REG_AR_FAULT_ADDR	0x02C
+#define REG_DEFAULT_SLAVE_ADDR	0x030
+
+#define REG_MMU_VERSION		0x034
+
+#define MMU_MAJ_VER(val)	((val) >> 7)
+#define MMU_MIN_VER(val)	((val) & 0x7F)
+#define MMU_RAW_VER(reg)	(((reg) >> 21) & ((1 << 11) - 1)) /* 11 bits */
+
+#define MAKE_MMU_VER(maj, min)	((((maj) & 0xF) << 7) | ((min) & 0x7F))
+
+#define REG_PB0_SADDR		0x04C
+#define REG_PB0_EADDR		0x050
+#define REG_PB1_SADDR		0x054
+#define REG_PB1_EADDR		0x058
+
+#define has_sysmmu(dev)		(dev->archdata.iommu != NULL)
+
+static struct kmem_cache *lv2table_kmem_cache;
+static sysmmu_pte_t *zero_lv2_table;
+#define ZERO_LV2LINK mk_lv1ent_page(virt_to_phys(zero_lv2_table))
+
+static sysmmu_pte_t *section_entry(sysmmu_pte_t *pgtable, sysmmu_iova_t iova)
+{
+	return pgtable + lv1ent_offset(iova);
+}
+
+static sysmmu_pte_t *page_entry(sysmmu_pte_t *sent, sysmmu_iova_t iova)
+{
+	return (sysmmu_pte_t *)phys_to_virt(
+				lv2table_base(sent)) + lv2ent_offset(iova);
+}
+
+enum exynos_sysmmu_inttype {
+	SYSMMU_PAGEFAULT,
+	SYSMMU_AR_MULTIHIT,
+	SYSMMU_AW_MULTIHIT,
+	SYSMMU_BUSERROR,
+	SYSMMU_AR_SECURITY,
+	SYSMMU_AR_ACCESS,
+	SYSMMU_AW_SECURITY,
+	SYSMMU_AW_PROTECTION, /* 7 */
+	SYSMMU_FAULT_UNKNOWN,
+	SYSMMU_FAULTS_NUM
+};
+
+static unsigned short fault_reg_offset[SYSMMU_FAULTS_NUM] = {
+	REG_PAGE_FAULT_ADDR,
+	REG_AR_FAULT_ADDR,
+	REG_AW_FAULT_ADDR,
+	REG_DEFAULT_SLAVE_ADDR,
+	REG_AR_FAULT_ADDR,
+	REG_AR_FAULT_ADDR,
+	REG_AW_FAULT_ADDR,
+	REG_AW_FAULT_ADDR
+};
+
+static char *sysmmu_fault_name[SYSMMU_FAULTS_NUM] = {
+	"PAGE FAULT",
+	"AR MULTI-HIT FAULT",
+	"AW MULTI-HIT FAULT",
+	"BUS ERROR",
+	"AR SECURITY PROTECTION FAULT",
+	"AR ACCESS PROTECTION FAULT",
+	"AW SECURITY PROTECTION FAULT",
+	"AW ACCESS PROTECTION FAULT",
+	"UNKNOWN FAULT"
+};
+
+/* attached to dev.archdata.iommu of the master device */
+struct exynos_iommu_owner {
+	struct list_head client; /* entry of exynos_iommu_domain.clients */
+	struct device *dev;
+	struct device *sysmmu;
+	struct iommu_domain *domain;
+	void *vmm_data;         /* IO virtual memory manager's data */
+	spinlock_t lock;        /* Lock to preserve consistency of System MMU */
+};
+
+struct exynos_iommu_domain {
+	struct list_head clients; /* list of sysmmu_drvdata.node */
+	sysmmu_pte_t *pgtable; /* lv1 page table, 16KB */
+	short *lv2entcnt; /* free lv2 entry counter for each section */
+	spinlock_t lock; /* lock for this structure */
+	spinlock_t pgtablelock; /* lock for modifying page table @ pgtable */
+};
+
+struct sysmmu_drvdata {
+	struct device *sysmmu;	/* System MMU's device descriptor */
+	struct device *master;	/* Owner of system MMU */
+	void __iomem *sfrbase;
+	struct clk *clk;
+	struct clk *clk_master;
+	int activations;
+	spinlock_t lock;
+	struct iommu_domain *domain;
+	phys_addr_t pgtable;
+};
+
+static bool set_sysmmu_active(struct sysmmu_drvdata *data)
+{
+	/* return true if the System MMU was not active previously
+	   and it needs to be initialized */
+	return ++data->activations == 1;
+}
+
+static bool set_sysmmu_inactive(struct sysmmu_drvdata *data)
+{
+	/* return true if the System MMU is needed to be disabled */
+	BUG_ON(data->activations < 1);
+	return --data->activations == 0;
+}
+
+static bool is_sysmmu_active(struct sysmmu_drvdata *data)
+{
+	return data->activations > 0;
+}
+
+static void sysmmu_unblock(void __iomem *sfrbase)
+{
+	__raw_writel(CTRL_ENABLE, sfrbase + REG_MMU_CTRL);
+}
+
+static unsigned int __raw_sysmmu_version(struct sysmmu_drvdata *data)
+{
+	return MMU_RAW_VER(__raw_readl(data->sfrbase + REG_MMU_VERSION));
+}
+
+static bool sysmmu_block(void __iomem *sfrbase)
+{
+	int i = 120;
+
+	__raw_writel(CTRL_BLOCK, sfrbase + REG_MMU_CTRL);
+	while ((i > 0) && !(__raw_readl(sfrbase + REG_MMU_STATUS) & 1))
+		--i;
+
+	if (!(__raw_readl(sfrbase + REG_MMU_STATUS) & 1)) {
+		sysmmu_unblock(sfrbase);
+		return false;
+	}
+
+	return true;
+}
+
+static void __sysmmu_tlb_invalidate(void __iomem *sfrbase)
+{
+	__raw_writel(0x1, sfrbase + REG_MMU_FLUSH);
+}
+
+static void __sysmmu_tlb_invalidate_entry(void __iomem *sfrbase,
+				sysmmu_iova_t iova, unsigned int num_inv)
+{
+	unsigned int i;
+
+	for (i = 0; i < num_inv; i++) {
+		__raw_writel((iova & SPAGE_MASK) | 1,
+				sfrbase + REG_MMU_FLUSH_ENTRY);
+		iova += SPAGE_SIZE;
+	}
+}
+
+static void __sysmmu_set_ptbase(void __iomem *sfrbase,
+				       phys_addr_t pgd)
+{
+	__raw_writel(pgd, sfrbase + REG_PT_BASE_ADDR);
+
+	__sysmmu_tlb_invalidate(sfrbase);
+}
+
+static void show_fault_information(const char *name,
+		enum exynos_sysmmu_inttype itype,
+		phys_addr_t pgtable_base, sysmmu_iova_t fault_addr)
+{
+	sysmmu_pte_t *ent;
+
+	if ((itype >= SYSMMU_FAULTS_NUM) || (itype < SYSMMU_PAGEFAULT))
+		itype = SYSMMU_FAULT_UNKNOWN;
+
+	pr_err("%s occurred at %#x by %s(Page table base: %pa)\n",
+		sysmmu_fault_name[itype], fault_addr, name, &pgtable_base);
+
+	ent = section_entry(phys_to_virt(pgtable_base), fault_addr);
+	pr_err("\tLv1 entry: %#x\n", *ent);
+
+	if (lv1ent_page(ent)) {
+		ent = page_entry(ent, fault_addr);
+		pr_err("\t Lv2 entry: %#x\n", *ent);
+	}
+}
+
+static irqreturn_t exynos_sysmmu_irq(int irq, void *dev_id)
+{
+	/* SYSMMU is in blocked when interrupt occurred. */
+	struct sysmmu_drvdata *data = dev_id;
+	enum exynos_sysmmu_inttype itype;
+	sysmmu_iova_t addr = -1;
+	int ret = -ENOSYS;
+
+	WARN_ON(!is_sysmmu_active(data));
+
+	spin_lock(&data->lock);
+
+	if (!IS_ERR(data->clk_master))
+		clk_enable(data->clk_master);
+
+	itype = (enum exynos_sysmmu_inttype)
+		__ffs(__raw_readl(data->sfrbase + REG_INT_STATUS));
+	if (WARN_ON(!((itype >= 0) && (itype < SYSMMU_FAULT_UNKNOWN))))
+		itype = SYSMMU_FAULT_UNKNOWN;
+	else
+		addr = __raw_readl(data->sfrbase + fault_reg_offset[itype]);
+
+	if (itype == SYSMMU_FAULT_UNKNOWN) {
+		pr_err("%s: Fault is not occurred by System MMU '%s'!\n",
+			__func__, dev_name(data->sysmmu));
+		pr_err("%s: Please check if IRQ is correctly configured.\n",
+			__func__);
+		BUG();
+	} else {
+		unsigned int base =
+				__raw_readl(data->sfrbase + REG_PT_BASE_ADDR);
+		show_fault_information(dev_name(data->sysmmu),
+					itype, base, addr);
+		if (data->domain)
+			ret = report_iommu_fault(data->domain,
+					data->master, addr, itype);
+	}
+
+	/* fault is not recovered by fault handler */
+	BUG_ON(ret != 0);
+
+	__raw_writel(1 << itype, data->sfrbase + REG_INT_CLEAR);
+
+	sysmmu_unblock(data->sfrbase);
+
+	if (!IS_ERR(data->clk_master))
+		clk_disable(data->clk_master);
+
+	spin_unlock(&data->lock);
+
+	return IRQ_HANDLED;
+}
+
+static void __sysmmu_disable_nocount(struct sysmmu_drvdata *data)
+{
+	if (!IS_ERR(data->clk_master))
+		clk_enable(data->clk_master);
+
+	__raw_writel(CTRL_DISABLE, data->sfrbase + REG_MMU_CTRL);
+	__raw_writel(0, data->sfrbase + REG_MMU_CFG);
+
+	clk_disable(data->clk);
+	if (!IS_ERR(data->clk_master))
+		clk_disable(data->clk_master);
+}
+
+static bool __sysmmu_disable(struct sysmmu_drvdata *data)
+{
+	bool disabled;
+	unsigned long flags;
+
+	spin_lock_irqsave(&data->lock, flags);
+
+	disabled = set_sysmmu_inactive(data);
+
+	if (disabled) {
+		data->pgtable = 0;
+		data->domain = NULL;
+
+		__sysmmu_disable_nocount(data);
+
+		dev_dbg(data->sysmmu, "Disabled\n");
+	} else  {
+		dev_dbg(data->sysmmu, "%d times left to disable\n",
+					data->activations);
+	}
+
+	spin_unlock_irqrestore(&data->lock, flags);
+
+	return disabled;
+}
+
+static void __sysmmu_init_config(struct sysmmu_drvdata *data)
+{
+	unsigned int cfg = CFG_LRU | CFG_QOS(15);
+	unsigned int ver;
+
+	ver = __raw_sysmmu_version(data);
+	if (MMU_MAJ_VER(ver) == 3) {
+		if (MMU_MIN_VER(ver) >= 2) {
+			cfg |= CFG_FLPDCACHE;
+			if (MMU_MIN_VER(ver) == 3) {
+				cfg |= CFG_ACGEN;
+				cfg &= ~CFG_LRU;
+			} else {
+				cfg |= CFG_SYSSEL;
+			}
+		}
+	}
+
+	__raw_writel(cfg, data->sfrbase + REG_MMU_CFG);
+}
+
+static void __sysmmu_enable_nocount(struct sysmmu_drvdata *data)
+{
+	if (!IS_ERR(data->clk_master))
+		clk_enable(data->clk_master);
+	clk_enable(data->clk);
+
+	__raw_writel(CTRL_BLOCK, data->sfrbase + REG_MMU_CTRL);
+
+	__sysmmu_init_config(data);
+
+	__sysmmu_set_ptbase(data->sfrbase, data->pgtable);
+
+	__raw_writel(CTRL_ENABLE, data->sfrbase + REG_MMU_CTRL);
+
+	if (!IS_ERR(data->clk_master))
+		clk_disable(data->clk_master);
+}
+
+static int __sysmmu_enable(struct sysmmu_drvdata *data,
+			phys_addr_t pgtable, struct iommu_domain *domain)
+{
+	int ret = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&data->lock, flags);
+	if (set_sysmmu_active(data)) {
+		data->pgtable = pgtable;
+		data->domain = domain;
+
+		__sysmmu_enable_nocount(data);
+
+		dev_dbg(data->sysmmu, "Enabled\n");
+	} else {
+		ret = (pgtable == data->pgtable) ? 1 : -EBUSY;
+
+		dev_dbg(data->sysmmu, "already enabled\n");
+	}
+
+	if (WARN_ON(ret < 0))
+		set_sysmmu_inactive(data); /* decrement count */
+
+	spin_unlock_irqrestore(&data->lock, flags);
+
+	return ret;
+}
+
+/* __exynos_sysmmu_enable: Enables System MMU
+ *
+ * returns -error if an error occurred and System MMU is not enabled,
+ * 0 if the System MMU has been just enabled and 1 if System MMU was already
+ * enabled before.
+ */
+static int __exynos_sysmmu_enable(struct device *dev, phys_addr_t pgtable,
+				  struct iommu_domain *domain)
+{
+	int ret = 0;
+	unsigned long flags;
+	struct exynos_iommu_owner *owner = dev->archdata.iommu;
+	struct sysmmu_drvdata *data;
+
+	BUG_ON(!has_sysmmu(dev));
+
+	spin_lock_irqsave(&owner->lock, flags);
+
+	data = dev_get_drvdata(owner->sysmmu);
+
+	ret = __sysmmu_enable(data, pgtable, domain);
+	if (ret >= 0)
+		data->master = dev;
+
+	spin_unlock_irqrestore(&owner->lock, flags);
+
+	return ret;
+}
+
+int exynos_sysmmu_enable(struct device *dev, phys_addr_t pgtable)
+{
+	BUG_ON(!memblock_is_memory(pgtable));
+
+	return __exynos_sysmmu_enable(dev, pgtable, NULL);
+}
+
+static bool exynos_sysmmu_disable(struct device *dev)
+{
+	unsigned long flags;
+	bool disabled = true;
+	struct exynos_iommu_owner *owner = dev->archdata.iommu;
+	struct sysmmu_drvdata *data;
+
+	BUG_ON(!has_sysmmu(dev));
+
+	spin_lock_irqsave(&owner->lock, flags);
+
+	data = dev_get_drvdata(owner->sysmmu);
+
+	disabled = __sysmmu_disable(data);
+	if (disabled)
+		data->master = NULL;
+
+	spin_unlock_irqrestore(&owner->lock, flags);
+
+	return disabled;
+}
+
+static void __sysmmu_tlb_invalidate_flpdcache(struct sysmmu_drvdata *data,
+					      sysmmu_iova_t iova)
+{
+	if (__raw_sysmmu_version(data) == MAKE_MMU_VER(3, 3))
+		__raw_writel(iova | 0x1, data->sfrbase + REG_MMU_FLUSH_ENTRY);
+}
+
+static void sysmmu_tlb_invalidate_flpdcache(struct device *dev,
+					    sysmmu_iova_t iova)
+{
+	unsigned long flags;
+	struct exynos_iommu_owner *owner = dev->archdata.iommu;
+	struct sysmmu_drvdata *data = dev_get_drvdata(owner->sysmmu);
+
+	if (!IS_ERR(data->clk_master))
+		clk_enable(data->clk_master);
+
+	spin_lock_irqsave(&data->lock, flags);
+	if (is_sysmmu_active(data))
+		__sysmmu_tlb_invalidate_flpdcache(data, iova);
+	spin_unlock_irqrestore(&data->lock, flags);
+
+	if (!IS_ERR(data->clk_master))
+		clk_disable(data->clk_master);
+}
+
+static void sysmmu_tlb_invalidate_entry(struct device *dev, sysmmu_iova_t iova,
+					size_t size)
+{
+	struct exynos_iommu_owner *owner = dev->archdata.iommu;
+	unsigned long flags;
+	struct sysmmu_drvdata *data;
+
+	data = dev_get_drvdata(owner->sysmmu);
+
+	spin_lock_irqsave(&data->lock, flags);
+	if (is_sysmmu_active(data)) {
+		unsigned int num_inv = 1;
+
+		if (!IS_ERR(data->clk_master))
+			clk_enable(data->clk_master);
+
+		/*
+		 * L2TLB invalidation required
+		 * 4KB page: 1 invalidation
+		 * 64KB page: 16 invalidation
+		 * 1MB page: 64 invalidation
+		 * because it is set-associative TLB
+		 * with 8-way and 64 sets.
+		 * 1MB page can be cached in one of all sets.
+		 * 64KB page can be one of 16 consecutive sets.
+		 */
+		if (MMU_MAJ_VER(__raw_sysmmu_version(data)) == 2)
+			num_inv = min_t(unsigned int, size / PAGE_SIZE, 64);
+
+		if (sysmmu_block(data->sfrbase)) {
+			__sysmmu_tlb_invalidate_entry(
+				data->sfrbase, iova, num_inv);
+			sysmmu_unblock(data->sfrbase);
+		}
+		if (!IS_ERR(data->clk_master))
+			clk_disable(data->clk_master);
+	} else {
+		dev_dbg(dev, "disabled. Skipping TLB invalidation @ %#x\n",
+			iova);
+	}
+	spin_unlock_irqrestore(&data->lock, flags);
+}
+
+void exynos_sysmmu_tlb_invalidate(struct device *dev)
+{
+	struct exynos_iommu_owner *owner = dev->archdata.iommu;
+	unsigned long flags;
+	struct sysmmu_drvdata *data;
+
+	data = dev_get_drvdata(owner->sysmmu);
+
+	spin_lock_irqsave(&data->lock, flags);
+	if (is_sysmmu_active(data)) {
+		if (!IS_ERR(data->clk_master))
+			clk_enable(data->clk_master);
+		if (sysmmu_block(data->sfrbase)) {
+			__sysmmu_tlb_invalidate(data->sfrbase);
+			sysmmu_unblock(data->sfrbase);
+		}
+		if (!IS_ERR(data->clk_master))
+			clk_disable(data->clk_master);
+	} else {
+		dev_dbg(dev, "disabled. Skipping TLB invalidation\n");
+	}
+	spin_unlock_irqrestore(&data->lock, flags);
+}
+
+static int __init exynos_sysmmu_probe(struct platform_device *pdev)
+{
+	int irq, ret;
+	struct device *dev = &pdev->dev;
+	struct sysmmu_drvdata *data;
+	struct resource *res;
+
+	data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	data->sfrbase = devm_ioremap_resource(dev, res);
+	if (IS_ERR(data->sfrbase))
+		return PTR_ERR(data->sfrbase);
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq <= 0) {
+		dev_err(dev, "Unable to find IRQ resource\n");
+		return irq;
+	}
+
+	ret = devm_request_irq(dev, irq, exynos_sysmmu_irq, 0,
+				dev_name(dev), data);
+	if (ret) {
+		dev_err(dev, "Unabled to register handler of irq %d\n", irq);
+		return ret;
+	}
+
+	data->clk = devm_clk_get(dev, "sysmmu");
+	if (IS_ERR(data->clk)) {
+		dev_err(dev, "Failed to get clock!\n");
+		return PTR_ERR(data->clk);
+	} else  {
+		ret = clk_prepare(data->clk);
+		if (ret) {
+			dev_err(dev, "Failed to prepare clk\n");
+			return ret;
+		}
+	}
+
+	data->clk_master = devm_clk_get(dev, "master");
+	if (!IS_ERR(data->clk_master)) {
+		ret = clk_prepare(data->clk_master);
+		if (ret) {
+			clk_unprepare(data->clk);
+			dev_err(dev, "Failed to prepare master's clk\n");
+			return ret;
+		}
+	}
+
+	data->sysmmu = dev;
+	spin_lock_init(&data->lock);
+
+	platform_set_drvdata(pdev, data);
+
+	pm_runtime_enable(dev);
+
+	return 0;
+}
+
+static const struct of_device_id sysmmu_of_match[] __initconst = {
+	{ .compatible	= "samsung,exynos-sysmmu", },
+	{ },
+};
+
+static struct platform_driver exynos_sysmmu_driver __refdata = {
+	.probe	= exynos_sysmmu_probe,
+	.driver	= {
+		.owner		= THIS_MODULE,
+		.name		= "exynos-sysmmu",
+		.of_match_table	= sysmmu_of_match,
+	}
+};
+
+static inline void pgtable_flush(void *vastart, void *vaend)
+{
+	dmac_flush_range(vastart, vaend);
+	outer_flush_range(virt_to_phys(vastart),
+				virt_to_phys(vaend));
+}
+
+static int exynos_iommu_domain_init(struct iommu_domain *domain)
+{
+	struct exynos_iommu_domain *priv;
+	int i;
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->pgtable = (sysmmu_pte_t *)__get_free_pages(GFP_KERNEL, 2);
+	if (!priv->pgtable)
+		goto err_pgtable;
+
+	priv->lv2entcnt = (short *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1);
+	if (!priv->lv2entcnt)
+		goto err_counter;
+
+	/* w/a of System MMU v3.3 to prevent caching 1MiB mapping */
+	for (i = 0; i < NUM_LV1ENTRIES; i += 8) {
+		priv->pgtable[i + 0] = ZERO_LV2LINK;
+		priv->pgtable[i + 1] = ZERO_LV2LINK;
+		priv->pgtable[i + 2] = ZERO_LV2LINK;
+		priv->pgtable[i + 3] = ZERO_LV2LINK;
+		priv->pgtable[i + 4] = ZERO_LV2LINK;
+		priv->pgtable[i + 5] = ZERO_LV2LINK;
+		priv->pgtable[i + 6] = ZERO_LV2LINK;
+		priv->pgtable[i + 7] = ZERO_LV2LINK;
+	}
+
+	pgtable_flush(priv->pgtable, priv->pgtable + NUM_LV1ENTRIES);
+
+	spin_lock_init(&priv->lock);
+	spin_lock_init(&priv->pgtablelock);
+	INIT_LIST_HEAD(&priv->clients);
+
+	domain->geometry.aperture_start = 0;
+	domain->geometry.aperture_end   = ~0UL;
+	domain->geometry.force_aperture = true;
+
+	domain->priv = priv;
+	return 0;
+
+err_counter:
+	free_pages((unsigned long)priv->pgtable, 2);
+err_pgtable:
+	kfree(priv);
+	return -ENOMEM;
+}
+
+static void exynos_iommu_domain_destroy(struct iommu_domain *domain)
+{
+	struct exynos_iommu_domain *priv = domain->priv;
+	struct exynos_iommu_owner *owner;
+	unsigned long flags;
+	int i;
+
+	WARN_ON(!list_empty(&priv->clients));
+
+	spin_lock_irqsave(&priv->lock, flags);
+
+	list_for_each_entry(owner, &priv->clients, client) {
+		while (!exynos_sysmmu_disable(owner->dev))
+			; /* until System MMU is actually disabled */
+	}
+
+	while (!list_empty(&priv->clients))
+		list_del_init(priv->clients.next);
+
+	spin_unlock_irqrestore(&priv->lock, flags);
+
+	for (i = 0; i < NUM_LV1ENTRIES; i++)
+		if (lv1ent_page(priv->pgtable + i))
+			kmem_cache_free(lv2table_kmem_cache,
+				phys_to_virt(lv2table_base(priv->pgtable + i)));
+
+	free_pages((unsigned long)priv->pgtable, 2);
+	free_pages((unsigned long)priv->lv2entcnt, 1);
+	kfree(domain->priv);
+	domain->priv = NULL;
+}
+
+static int exynos_iommu_attach_device(struct iommu_domain *domain,
+				   struct device *dev)
+{
+	struct exynos_iommu_owner *owner = dev->archdata.iommu;
+	struct exynos_iommu_domain *priv = domain->priv;
+	phys_addr_t pagetable = virt_to_phys(priv->pgtable);
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&priv->lock, flags);
+
+	ret = __exynos_sysmmu_enable(dev, pagetable, domain);
+	if (ret == 0) {
+		list_add_tail(&owner->client, &priv->clients);
+		owner->domain = domain;
+	}
+
+	spin_unlock_irqrestore(&priv->lock, flags);
+
+	if (ret < 0) {
+		dev_err(dev, "%s: Failed to attach IOMMU with pgtable %pa\n",
+					__func__, &pagetable);
+		return ret;
+	}
+
+	dev_dbg(dev, "%s: Attached IOMMU with pgtable %pa %s\n",
+		__func__, &pagetable, (ret == 0) ? "" : ", again");
+
+	return ret;
+}
+
+static void exynos_iommu_detach_device(struct iommu_domain *domain,
+				    struct device *dev)
+{
+	struct exynos_iommu_owner *owner;
+	struct exynos_iommu_domain *priv = domain->priv;
+	phys_addr_t pagetable = virt_to_phys(priv->pgtable);
+	unsigned long flags;
+
+	spin_lock_irqsave(&priv->lock, flags);
+
+	list_for_each_entry(owner, &priv->clients, client) {
+		if (owner == dev->archdata.iommu) {
+			if (exynos_sysmmu_disable(dev)) {
+				list_del_init(&owner->client);
+				owner->domain = NULL;
+			}
+			break;
+		}
+	}
+
+	spin_unlock_irqrestore(&priv->lock, flags);
+
+	if (owner == dev->archdata.iommu)
+		dev_dbg(dev, "%s: Detached IOMMU with pgtable %pa\n",
+					__func__, &pagetable);
+	else
+		dev_err(dev, "%s: No IOMMU is attached\n", __func__);
+}
+
+static sysmmu_pte_t *alloc_lv2entry(struct exynos_iommu_domain *priv,
+		sysmmu_pte_t *sent, sysmmu_iova_t iova, short *pgcounter)
+{
+	if (lv1ent_section(sent)) {
+		WARN(1, "Trying mapping on %#08x mapped with 1MiB page", iova);
+		return ERR_PTR(-EADDRINUSE);
+	}
+
+	if (lv1ent_fault(sent)) {
+		sysmmu_pte_t *pent;
+		bool need_flush_flpd_cache = lv1ent_zero(sent);
+
+		pent = kmem_cache_zalloc(lv2table_kmem_cache, GFP_ATOMIC);
+		BUG_ON((unsigned int)pent & (LV2TABLE_SIZE - 1));
+		if (!pent)
+			return ERR_PTR(-ENOMEM);
+
+		*sent = mk_lv1ent_page(virt_to_phys(pent));
+		*pgcounter = NUM_LV2ENTRIES;
+		pgtable_flush(pent, pent + NUM_LV2ENTRIES);
+		pgtable_flush(sent, sent + 1);
+
+		/*
+		 * If pretched SLPD is a fault SLPD in zero_l2_table, FLPD cache
+		 * may caches the address of zero_l2_table. This function
+		 * replaces the zero_l2_table with new L2 page table to write
+		 * valid mappings.
+		 * Accessing the valid area may cause page fault since FLPD
+		 * cache may still caches zero_l2_table for the valid area
+		 * instead of new L2 page table that have the mapping
+		 * information of the valid area
+		 * Thus any replacement of zero_l2_table with other valid L2
+		 * page table must involve FLPD cache invalidation for System
+		 * MMU v3.3.
+		 * FLPD cache invalidation is performed with TLB invalidation
+		 * by VPN without blocking. It is safe to invalidate TLB without
+		 * blocking because the target address of TLB invalidation is
+		 * not currently mapped.
+		 */
+		if (need_flush_flpd_cache) {
+			struct exynos_iommu_owner *owner;
+
+			spin_lock(&priv->lock);
+			list_for_each_entry(owner, &priv->clients, client)
+				sysmmu_tlb_invalidate_flpdcache(
+							owner->dev, iova);
+			spin_unlock(&priv->lock);
+		}
+	}
+
+	return page_entry(sent, iova);
+}
+
+static int lv1set_section(struct exynos_iommu_domain *priv,
+			  sysmmu_pte_t *sent, sysmmu_iova_t iova,
+			  phys_addr_t paddr, short *pgcnt)
+{
+	if (lv1ent_section(sent)) {
+		WARN(1, "Trying mapping on 1MiB@%#08x that is mapped",
+			iova);
+		return -EADDRINUSE;
+	}
+
+	if (lv1ent_page(sent)) {
+		if (*pgcnt != NUM_LV2ENTRIES) {
+			WARN(1, "Trying mapping on 1MiB@%#08x that is mapped",
+				iova);
+			return -EADDRINUSE;
+		}
+
+		kmem_cache_free(lv2table_kmem_cache, page_entry(sent, 0));
+		*pgcnt = 0;
+	}
+
+	*sent = mk_lv1ent_sect(paddr);
+
+	pgtable_flush(sent, sent + 1);
+
+	spin_lock(&priv->lock);
+	if (lv1ent_page_zero(sent)) {
+		struct exynos_iommu_owner *owner;
+		/*
+		 * Flushing FLPD cache in System MMU v3.3 that may cache a FLPD
+		 * entry by speculative prefetch of SLPD which has no mapping.
+		 */
+		list_for_each_entry(owner, &priv->clients, client)
+			sysmmu_tlb_invalidate_flpdcache(owner->dev, iova);
+	}
+	spin_unlock(&priv->lock);
+
+	return 0;
+}
+
+static int lv2set_page(sysmmu_pte_t *pent, phys_addr_t paddr, size_t size,
+								short *pgcnt)
+{
+	if (size == SPAGE_SIZE) {
+		if (WARN_ON(!lv2ent_fault(pent)))
+			return -EADDRINUSE;
+
+		*pent = mk_lv2ent_spage(paddr);
+		pgtable_flush(pent, pent + 1);
+		*pgcnt -= 1;
+	} else { /* size == LPAGE_SIZE */
+		int i;
+
+		for (i = 0; i < SPAGES_PER_LPAGE; i++, pent++) {
+			if (WARN_ON(!lv2ent_fault(pent))) {
+				if (i > 0)
+					memset(pent - i, 0, sizeof(*pent) * i);
+				return -EADDRINUSE;
+			}
+
+			*pent = mk_lv2ent_lpage(paddr);
+		}
+		pgtable_flush(pent - SPAGES_PER_LPAGE, pent);
+		*pgcnt -= SPAGES_PER_LPAGE;
+	}
+
+	return 0;
+}
+
+/*
+ * *CAUTION* to the I/O virtual memory managers that support exynos-iommu:
+ *
+ * System MMU v3.x have an advanced logic to improve address translation
+ * performance with caching more page table entries by a page table walk.
+ * However, the logic has a bug that caching fault page table entries and System
+ * MMU reports page fault if the cached fault entry is hit even though the fault
+ * entry is updated to a valid entry after the entry is cached.
+ * To prevent caching fault page table entries which may be updated to valid
+ * entries later, the virtual memory manager should care about the w/a about the
+ * problem. The followings describe w/a.
+ *
+ * Any two consecutive I/O virtual address regions must have a hole of 128KiB
+ * in maximum to prevent misbehavior of System MMU 3.x. (w/a of h/w bug)
+ *
+ * Precisely, any start address of I/O virtual region must be aligned by
+ * the following sizes for System MMU v3.1 and v3.2.
+ * System MMU v3.1: 128KiB
+ * System MMU v3.2: 256KiB
+ *
+ * Because System MMU v3.3 caches page table entries more aggressively, it needs
+ * more w/a.
+ * - Any two consecutive I/O virtual regions must be have a hole of larger size
+ *   than or equal size to 128KiB.
+ * - Start address of an I/O virtual region must be aligned by 128KiB.
+ */
+static int exynos_iommu_map(struct iommu_domain *domain, unsigned long l_iova,
+			 phys_addr_t paddr, size_t size, int prot)
+{
+	struct exynos_iommu_domain *priv = domain->priv;
+	sysmmu_pte_t *entry;
+	sysmmu_iova_t iova = (sysmmu_iova_t)l_iova;
+	unsigned long flags;
+	int ret = -ENOMEM;
+
+	BUG_ON(priv->pgtable == NULL);
+
+	spin_lock_irqsave(&priv->pgtablelock, flags);
+
+	entry = section_entry(priv->pgtable, iova);
+
+	if (size == SECT_SIZE) {
+		ret = lv1set_section(priv, entry, iova, paddr,
+					&priv->lv2entcnt[lv1ent_offset(iova)]);
+	} else {
+		sysmmu_pte_t *pent;
+
+		pent = alloc_lv2entry(priv, entry, iova,
+					&priv->lv2entcnt[lv1ent_offset(iova)]);
+
+		if (IS_ERR(pent))
+			ret = PTR_ERR(pent);
+		else
+			ret = lv2set_page(pent, paddr, size,
+					&priv->lv2entcnt[lv1ent_offset(iova)]);
+	}
+
+	if (ret)
+		pr_err("%s: Failed(%d) to map %#zx bytes @ %#x\n",
+			__func__, ret, size, iova);
+
+	spin_unlock_irqrestore(&priv->pgtablelock, flags);
+
+	return ret;
+}
+
+static void exynos_iommu_tlb_invalidate_entry(struct exynos_iommu_domain *priv,
+						sysmmu_iova_t iova, size_t size)
+{
+	struct exynos_iommu_owner *owner;
+	unsigned long flags;
+
+	spin_lock_irqsave(&priv->lock, flags);
+
+	list_for_each_entry(owner, &priv->clients, client)
+		sysmmu_tlb_invalidate_entry(owner->dev, iova, size);
+
+	spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+static size_t exynos_iommu_unmap(struct iommu_domain *domain,
+					unsigned long l_iova, size_t size)
+{
+	struct exynos_iommu_domain *priv = domain->priv;
+	sysmmu_iova_t iova = (sysmmu_iova_t)l_iova;
+	sysmmu_pte_t *ent;
+	size_t err_pgsize;
+	unsigned long flags;
+
+	BUG_ON(priv->pgtable == NULL);
+
+	spin_lock_irqsave(&priv->pgtablelock, flags);
+
+	ent = section_entry(priv->pgtable, iova);
+
+	if (lv1ent_section(ent)) {
+		if (WARN_ON(size < SECT_SIZE)) {
+			err_pgsize = SECT_SIZE;
+			goto err;
+		}
+
+		*ent = ZERO_LV2LINK; /* w/a for h/w bug in Sysmem MMU v3.3 */
+		pgtable_flush(ent, ent + 1);
+		size = SECT_SIZE;
+		goto done;
+	}
+
+	if (unlikely(lv1ent_fault(ent))) {
+		if (size > SECT_SIZE)
+			size = SECT_SIZE;
+		goto done;
+	}
+
+	/* lv1ent_page(sent) == true here */
+
+	ent = page_entry(ent, iova);
+
+	if (unlikely(lv2ent_fault(ent))) {
+		size = SPAGE_SIZE;
+		goto done;
+	}
+
+	if (lv2ent_small(ent)) {
+		*ent = 0;
+		size = SPAGE_SIZE;
+		pgtable_flush(ent, ent + 1);
+		priv->lv2entcnt[lv1ent_offset(iova)] += 1;
+		goto done;
+	}
+
+	/* lv1ent_large(ent) == true here */
+	if (WARN_ON(size < LPAGE_SIZE)) {
+		err_pgsize = LPAGE_SIZE;
+		goto err;
+	}
+
+	memset(ent, 0, sizeof(*ent) * SPAGES_PER_LPAGE);
+	pgtable_flush(ent, ent + SPAGES_PER_LPAGE);
+
+	size = LPAGE_SIZE;
+	priv->lv2entcnt[lv1ent_offset(iova)] += SPAGES_PER_LPAGE;
+done:
+	spin_unlock_irqrestore(&priv->pgtablelock, flags);
+
+	exynos_iommu_tlb_invalidate_entry(priv, iova, size);
+
+	return size;
+err:
+	spin_unlock_irqrestore(&priv->pgtablelock, flags);
+
+	pr_err("%s: Failed: size(%#zx) @ %#x is smaller than page size %#zx\n",
+		__func__, size, iova, err_pgsize);
+
+	return 0;
+}
+
+static phys_addr_t exynos_iommu_iova_to_phys(struct iommu_domain *domain,
+					  dma_addr_t iova)
+{
+	struct exynos_iommu_domain *priv = domain->priv;
+	sysmmu_pte_t *entry;
+	unsigned long flags;
+	phys_addr_t phys = 0;
+
+	spin_lock_irqsave(&priv->pgtablelock, flags);
+
+	entry = section_entry(priv->pgtable, iova);
+
+	if (lv1ent_section(entry)) {
+		phys = section_phys(entry) + section_offs(iova);
+	} else if (lv1ent_page(entry)) {
+		entry = page_entry(entry, iova);
+
+		if (lv2ent_large(entry))
+			phys = lpage_phys(entry) + lpage_offs(iova);
+		else if (lv2ent_small(entry))
+			phys = spage_phys(entry) + spage_offs(iova);
+	}
+
+	spin_unlock_irqrestore(&priv->pgtablelock, flags);
+
+	return phys;
+}
+
+static int exynos_iommu_add_device(struct device *dev)
+{
+	struct iommu_group *group;
+	int ret;
+
+	group = iommu_group_get(dev);
+
+	if (!group) {
+		group = iommu_group_alloc();
+		if (IS_ERR(group)) {
+			dev_err(dev, "Failed to allocate IOMMU group\n");
+			return PTR_ERR(group);
+		}
+	}
+
+	ret = iommu_group_add_device(group, dev);
+	iommu_group_put(group);
+
+	return ret;
+}
+
+static void exynos_iommu_remove_device(struct device *dev)
+{
+	iommu_group_remove_device(dev);
+}
+
+static struct iommu_ops exynos_iommu_ops = {
+	.domain_init = exynos_iommu_domain_init,
+	.domain_destroy = exynos_iommu_domain_destroy,
+	.attach_dev = exynos_iommu_attach_device,
+	.detach_dev = exynos_iommu_detach_device,
+	.map = exynos_iommu_map,
+	.unmap = exynos_iommu_unmap,
+	.iova_to_phys = exynos_iommu_iova_to_phys,
+	.add_device = exynos_iommu_add_device,
+	.remove_device = exynos_iommu_remove_device,
+	.pgsize_bitmap = SECT_SIZE | LPAGE_SIZE | SPAGE_SIZE,
+};
+
+static int __init exynos_iommu_init(void)
+{
+	int ret;
+
+	lv2table_kmem_cache = kmem_cache_create("exynos-iommu-lv2table",
+				LV2TABLE_SIZE, LV2TABLE_SIZE, 0, NULL);
+	if (!lv2table_kmem_cache) {
+		pr_err("%s: Failed to create kmem cache\n", __func__);
+		return -ENOMEM;
+	}
+
+	ret = platform_driver_register(&exynos_sysmmu_driver);
+	if (ret) {
+		pr_err("%s: Failed to register driver\n", __func__);
+		goto err_reg_driver;
+	}
+
+	zero_lv2_table = kmem_cache_zalloc(lv2table_kmem_cache, GFP_KERNEL);
+	if (zero_lv2_table == NULL) {
+		pr_err("%s: Failed to allocate zero level2 page table\n",
+			__func__);
+		ret = -ENOMEM;
+		goto err_zero_lv2;
+	}
+
+	ret = bus_set_iommu(&platform_bus_type, &exynos_iommu_ops);
+	if (ret) {
+		pr_err("%s: Failed to register exynos-iommu driver.\n",
+								__func__);
+		goto err_set_iommu;
+	}
+
+	return 0;
+err_set_iommu:
+	kmem_cache_free(lv2table_kmem_cache, zero_lv2_table);
+err_zero_lv2:
+	platform_driver_unregister(&exynos_sysmmu_driver);
+err_reg_driver:
+	kmem_cache_destroy(lv2table_kmem_cache);
+	return ret;
+}
+subsys_initcall(exynos_iommu_init);
diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c
new file mode 100644
index 00000000000..bb446d742a2
--- /dev/null
+++ b/drivers/iommu/fsl_pamu.c
@@ -0,0 +1,1308 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright (C) 2013 Freescale Semiconductor, Inc.
+ *
+ */
+
+#define pr_fmt(fmt)    "fsl-pamu: %s: " fmt, __func__
+
+#include <linux/init.h>
+#include <linux/iommu.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/device.h>
+#include <linux/of_platform.h>
+#include <linux/bootmem.h>
+#include <linux/genalloc.h>
+#include <asm/io.h>
+#include <asm/bitops.h>
+#include <asm/fsl_guts.h>
+
+#include "fsl_pamu.h"
+
+/* define indexes for each operation mapping scenario */
+#define OMI_QMAN        0x00
+#define OMI_FMAN        0x01
+#define OMI_QMAN_PRIV   0x02
+#define OMI_CAAM        0x03
+
+#define make64(high, low) (((u64)(high) << 32) | (low))
+
+struct pamu_isr_data {
+	void __iomem *pamu_reg_base;	/* Base address of PAMU regs*/
+	unsigned int count;		/* The number of PAMUs */
+};
+
+static struct paace *ppaact;
+static struct paace *spaact;
+static struct ome *omt;
+
+/*
+ * Table for matching compatible strings, for device tree
+ * guts node, for QorIQ SOCs.
+ * "fsl,qoriq-device-config-2.0" corresponds to T4 & B4
+ * SOCs. For the older SOCs "fsl,qoriq-device-config-1.0"
+ * string would be used.
+*/
+static const struct of_device_id guts_device_ids[] = {
+	{ .compatible = "fsl,qoriq-device-config-1.0", },
+	{ .compatible = "fsl,qoriq-device-config-2.0", },
+	{}
+};
+
+
+/*
+ * Table for matching compatible strings, for device tree
+ * L3 cache controller node.
+ * "fsl,t4240-l3-cache-controller" corresponds to T4,
+ * "fsl,b4860-l3-cache-controller" corresponds to B4 &
+ * "fsl,p4080-l3-cache-controller" corresponds to other,
+ * SOCs.
+*/
+static const struct of_device_id l3_device_ids[] = {
+	{ .compatible = "fsl,t4240-l3-cache-controller", },
+	{ .compatible = "fsl,b4860-l3-cache-controller", },
+	{ .compatible = "fsl,p4080-l3-cache-controller", },
+	{}
+};
+
+/* maximum subwindows permitted per liodn */
+static u32 max_subwindow_count;
+
+/* Pool for fspi allocation */
+struct gen_pool *spaace_pool;
+
+/**
+ * pamu_get_max_subwin_cnt() - Return the maximum supported
+ * subwindow count per liodn.
+ *
+ */
+u32 pamu_get_max_subwin_cnt()
+{
+	return max_subwindow_count;
+}
+
+/**
+ * pamu_get_ppaace() - Return the primary PACCE
+ * @liodn: liodn PAACT index for desired PAACE
+ *
+ * Returns the ppace pointer upon success else return
+ * null.
+ */
+static struct paace *pamu_get_ppaace(int liodn)
+{
+	if (!ppaact || liodn >= PAACE_NUMBER_ENTRIES) {
+		pr_debug("PPAACT doesn't exist\n");
+		return NULL;
+	}
+
+	return &ppaact[liodn];
+}
+
+/**
+ * pamu_enable_liodn() - Set valid bit of PACCE
+ * @liodn: liodn PAACT index for desired PAACE
+ *
+ * Returns 0 upon success else error code < 0 returned
+ */
+int pamu_enable_liodn(int liodn)
+{
+	struct paace *ppaace;
+
+	ppaace = pamu_get_ppaace(liodn);
+	if (!ppaace) {
+		pr_debug("Invalid primary paace entry\n");
+		return -ENOENT;
+	}
+
+	if (!get_bf(ppaace->addr_bitfields, PPAACE_AF_WSE)) {
+		pr_debug("liodn %d not configured\n", liodn);
+		return -EINVAL;
+	}
+
+	/* Ensure that all other stores to the ppaace complete first */
+	mb();
+
+	set_bf(ppaace->addr_bitfields, PAACE_AF_V, PAACE_V_VALID);
+	mb();
+
+	return 0;
+}
+
+/**
+ * pamu_disable_liodn() - Clears valid bit of PACCE
+ * @liodn: liodn PAACT index for desired PAACE
+ *
+ * Returns 0 upon success else error code < 0 returned
+ */
+int pamu_disable_liodn(int liodn)
+{
+	struct paace *ppaace;
+
+	ppaace = pamu_get_ppaace(liodn);
+	if (!ppaace) {
+		pr_debug("Invalid primary paace entry\n");
+		return -ENOENT;
+	}
+
+	set_bf(ppaace->addr_bitfields, PAACE_AF_V, PAACE_V_INVALID);
+	mb();
+
+	return 0;
+}
+
+/* Derive the window size encoding for a particular PAACE entry */
+static unsigned int map_addrspace_size_to_wse(phys_addr_t addrspace_size)
+{
+	/* Bug if not a power of 2 */
+	BUG_ON((addrspace_size & (addrspace_size - 1)));
+
+	/* window size is 2^(WSE+1) bytes */
+	return fls64(addrspace_size) - 2;
+}
+
+/* Derive the PAACE window count encoding for the subwindow count */
+static unsigned int map_subwindow_cnt_to_wce(u32 subwindow_cnt)
+{
+       /* window count is 2^(WCE+1) bytes */
+       return __ffs(subwindow_cnt) - 1;
+}
+
+/*
+ * Set the PAACE type as primary and set the coherency required domain
+ * attribute
+ */
+static void pamu_init_ppaace(struct paace *ppaace)
+{
+	set_bf(ppaace->addr_bitfields, PAACE_AF_PT, PAACE_PT_PRIMARY);
+
+	set_bf(ppaace->domain_attr.to_host.coherency_required, PAACE_DA_HOST_CR,
+	       PAACE_M_COHERENCE_REQ);
+}
+
+/*
+ * Set the PAACE type as secondary and set the coherency required domain
+ * attribute.
+ */
+static void pamu_init_spaace(struct paace *spaace)
+{
+	set_bf(spaace->addr_bitfields, PAACE_AF_PT, PAACE_PT_SECONDARY);
+	set_bf(spaace->domain_attr.to_host.coherency_required, PAACE_DA_HOST_CR,
+	       PAACE_M_COHERENCE_REQ);
+}
+
+/*
+ * Return the spaace (corresponding to the secondary window index)
+ * for a particular ppaace.
+ */
+static struct paace *pamu_get_spaace(struct paace *paace, u32 wnum)
+{
+	u32 subwin_cnt;
+	struct paace *spaace = NULL;
+
+	subwin_cnt = 1UL << (get_bf(paace->impl_attr, PAACE_IA_WCE) + 1);
+
+	if (wnum < subwin_cnt)
+		spaace = &spaact[paace->fspi + wnum];
+	else
+		pr_debug("secondary paace out of bounds\n");
+
+	return spaace;
+}
+
+/**
+ * pamu_get_fspi_and_allocate() - Allocates fspi index and reserves subwindows
+ *                                required for primary PAACE in the secondary
+ *                                PAACE table.
+ * @subwin_cnt: Number of subwindows to be reserved.
+ *
+ * A PPAACE entry may have a number of associated subwindows. A subwindow
+ * corresponds to a SPAACE entry in the SPAACT table. Each PAACE entry stores
+ * the index (fspi) of the first SPAACE entry in the SPAACT table. This
+ * function returns the index of the first SPAACE entry. The remaining
+ * SPAACE entries are reserved contiguously from that index.
+ *
+ * Returns a valid fspi index in the range of 0 - SPAACE_NUMBER_ENTRIES on success.
+ * If no SPAACE entry is available or the allocator can not reserve the required
+ * number of contiguous entries function returns ULONG_MAX indicating a failure.
+ *
+*/
+static unsigned long pamu_get_fspi_and_allocate(u32 subwin_cnt)
+{
+	unsigned long spaace_addr;
+
+	spaace_addr = gen_pool_alloc(spaace_pool, subwin_cnt * sizeof(struct paace));
+	if (!spaace_addr)
+		return ULONG_MAX;
+
+	return (spaace_addr - (unsigned long)spaact) / (sizeof(struct paace));
+}
+
+/* Release the subwindows reserved for a particular LIODN */
+void pamu_free_subwins(int liodn)
+{
+	struct paace *ppaace;
+	u32 subwin_cnt, size;
+
+	ppaace = pamu_get_ppaace(liodn);
+	if (!ppaace) {
+		pr_debug("Invalid liodn entry\n");
+		return;
+	}
+
+	if (get_bf(ppaace->addr_bitfields, PPAACE_AF_MW)) {
+		subwin_cnt = 1UL << (get_bf(ppaace->impl_attr, PAACE_IA_WCE) + 1);
+		size = (subwin_cnt - 1) * sizeof(struct paace);
+		gen_pool_free(spaace_pool, (unsigned long)&spaact[ppaace->fspi], size);
+		set_bf(ppaace->addr_bitfields, PPAACE_AF_MW, 0);
+	}
+}
+
+/*
+ * Function used for updating stash destination for the coressponding
+ * LIODN.
+ */
+int  pamu_update_paace_stash(int liodn, u32 subwin, u32 value)
+{
+	struct paace *paace;
+
+	paace = pamu_get_ppaace(liodn);
+	if (!paace) {
+		pr_debug("Invalid liodn entry\n");
+		return -ENOENT;
+	}
+	if (subwin) {
+		paace = pamu_get_spaace(paace, subwin - 1);
+		if (!paace) {
+			return -ENOENT;
+		}
+	}
+	set_bf(paace->impl_attr, PAACE_IA_CID, value);
+
+	mb();
+
+	return 0;
+}
+
+/* Disable a subwindow corresponding to the LIODN */
+int pamu_disable_spaace(int liodn, u32 subwin)
+{
+	struct paace *paace;
+
+	paace = pamu_get_ppaace(liodn);
+	if (!paace) {
+		pr_debug("Invalid liodn entry\n");
+		return -ENOENT;
+	}
+	if (subwin) {
+		paace = pamu_get_spaace(paace, subwin - 1);
+		if (!paace) {
+			return -ENOENT;
+		}
+		set_bf(paace->addr_bitfields, PAACE_AF_V,
+			 PAACE_V_INVALID);
+	} else {
+		set_bf(paace->addr_bitfields, PAACE_AF_AP,
+			 PAACE_AP_PERMS_DENIED);
+	}
+
+	mb();
+
+	return 0;
+}
+
+
+/**
+ * pamu_config_paace() - Sets up PPAACE entry for specified liodn
+ *
+ * @liodn: Logical IO device number
+ * @win_addr: starting address of DSA window
+ * @win-size: size of DSA window
+ * @omi: Operation mapping index -- if ~omi == 0 then omi not defined
+ * @rpn: real (true physical) page number
+ * @stashid: cache stash id for associated cpu -- if ~stashid == 0 then
+ *	     stashid not defined
+ * @snoopid: snoop id for hardware coherency -- if ~snoopid == 0 then
+ *	     snoopid not defined
+ * @subwin_cnt: number of sub-windows
+ * @prot: window permissions
+ *
+ * Returns 0 upon success else error code < 0 returned
+ */
+int pamu_config_ppaace(int liodn, phys_addr_t win_addr, phys_addr_t win_size,
+		       u32 omi, unsigned long rpn, u32 snoopid, u32 stashid,
+		       u32 subwin_cnt, int prot)
+{
+	struct paace *ppaace;
+	unsigned long fspi;
+
+	if ((win_size & (win_size - 1)) || win_size < PAMU_PAGE_SIZE) {
+		pr_debug("window size too small or not a power of two %llx\n", win_size);
+		return -EINVAL;
+	}
+
+	if (win_addr & (win_size - 1)) {
+		pr_debug("window address is not aligned with window size\n");
+		return -EINVAL;
+	}
+
+	ppaace = pamu_get_ppaace(liodn);
+	if (!ppaace) {
+		return -ENOENT;
+	}
+
+	/* window size is 2^(WSE+1) bytes */
+	set_bf(ppaace->addr_bitfields, PPAACE_AF_WSE,
+		map_addrspace_size_to_wse(win_size));
+
+	pamu_init_ppaace(ppaace);
+
+	ppaace->wbah = win_addr >> (PAMU_PAGE_SHIFT + 20);
+	set_bf(ppaace->addr_bitfields, PPAACE_AF_WBAL,
+	       (win_addr >> PAMU_PAGE_SHIFT));
+
+	/* set up operation mapping if it's configured */
+	if (omi < OME_NUMBER_ENTRIES) {
+		set_bf(ppaace->impl_attr, PAACE_IA_OTM, PAACE_OTM_INDEXED);
+		ppaace->op_encode.index_ot.omi = omi;
+	} else if (~omi != 0) {
+		pr_debug("bad operation mapping index: %d\n", omi);
+		return -EINVAL;
+	}
+
+	/* configure stash id */
+	if (~stashid != 0)
+		set_bf(ppaace->impl_attr, PAACE_IA_CID, stashid);
+
+	/* configure snoop id */
+	if (~snoopid != 0)
+		ppaace->domain_attr.to_host.snpid = snoopid;
+
+	if (subwin_cnt) {
+		/* The first entry is in the primary PAACE instead */
+		fspi = pamu_get_fspi_and_allocate(subwin_cnt - 1);
+		if (fspi == ULONG_MAX) {
+			pr_debug("spaace indexes exhausted\n");
+			return -EINVAL;
+		}
+
+		/* window count is 2^(WCE+1) bytes */
+		set_bf(ppaace->impl_attr, PAACE_IA_WCE,
+		       map_subwindow_cnt_to_wce(subwin_cnt));
+		set_bf(ppaace->addr_bitfields, PPAACE_AF_MW, 0x1);
+		ppaace->fspi = fspi;
+	} else {
+		set_bf(ppaace->impl_attr, PAACE_IA_ATM, PAACE_ATM_WINDOW_XLATE);
+		ppaace->twbah = rpn >> 20;
+		set_bf(ppaace->win_bitfields, PAACE_WIN_TWBAL, rpn);
+		set_bf(ppaace->addr_bitfields, PAACE_AF_AP, prot);
+		set_bf(ppaace->impl_attr, PAACE_IA_WCE, 0);
+		set_bf(ppaace->addr_bitfields, PPAACE_AF_MW, 0);
+	}
+	mb();
+
+	return 0;
+}
+
+/**
+ * pamu_config_spaace() - Sets up SPAACE entry for specified subwindow
+ *
+ * @liodn:  Logical IO device number
+ * @subwin_cnt:  number of sub-windows associated with dma-window
+ * @subwin: subwindow index
+ * @subwin_size: size of subwindow
+ * @omi: Operation mapping index
+ * @rpn: real (true physical) page number
+ * @snoopid: snoop id for hardware coherency -- if ~snoopid == 0 then
+ *			  snoopid not defined
+ * @stashid: cache stash id for associated cpu
+ * @enable: enable/disable subwindow after reconfiguration
+ * @prot: sub window permissions
+ *
+ * Returns 0 upon success else error code < 0 returned
+ */
+int pamu_config_spaace(int liodn, u32 subwin_cnt, u32 subwin,
+		       phys_addr_t subwin_size, u32 omi, unsigned long rpn,
+		       u32 snoopid, u32 stashid, int enable, int prot)
+{
+	struct paace *paace;
+
+
+	/* setup sub-windows */
+	if (!subwin_cnt) {
+		pr_debug("Invalid subwindow count\n");
+		return -EINVAL;
+	}
+
+	paace = pamu_get_ppaace(liodn);
+	if (subwin > 0 && subwin < subwin_cnt && paace) {
+		paace = pamu_get_spaace(paace, subwin - 1);
+
+		if (paace && !(paace->addr_bitfields & PAACE_V_VALID)) {
+			pamu_init_spaace(paace);
+			set_bf(paace->addr_bitfields, SPAACE_AF_LIODN, liodn);
+		}
+	}
+
+	if (!paace) {
+		pr_debug("Invalid liodn entry\n");
+		return -ENOENT;
+	}
+
+	if ((subwin_size & (subwin_size - 1)) || subwin_size < PAMU_PAGE_SIZE) {
+		pr_debug("subwindow size out of range, or not a power of 2\n");
+		return -EINVAL;
+	}
+
+	if (rpn == ULONG_MAX) {
+		pr_debug("real page number out of range\n");
+		return -EINVAL;
+	}
+
+	/* window size is 2^(WSE+1) bytes */
+	set_bf(paace->win_bitfields, PAACE_WIN_SWSE,
+	       map_addrspace_size_to_wse(subwin_size));
+
+	set_bf(paace->impl_attr, PAACE_IA_ATM, PAACE_ATM_WINDOW_XLATE);
+	paace->twbah = rpn >> 20;
+	set_bf(paace->win_bitfields, PAACE_WIN_TWBAL, rpn);
+	set_bf(paace->addr_bitfields, PAACE_AF_AP, prot);
+
+	/* configure snoop id */
+	if (~snoopid != 0)
+		paace->domain_attr.to_host.snpid = snoopid;
+
+	/* set up operation mapping if it's configured */
+	if (omi < OME_NUMBER_ENTRIES) {
+		set_bf(paace->impl_attr, PAACE_IA_OTM, PAACE_OTM_INDEXED);
+		paace->op_encode.index_ot.omi = omi;
+	} else if (~omi != 0) {
+		pr_debug("bad operation mapping index: %d\n", omi);
+		return -EINVAL;
+	}
+
+	if (~stashid != 0)
+		set_bf(paace->impl_attr, PAACE_IA_CID, stashid);
+
+	smp_wmb();
+
+	if (enable)
+		set_bf(paace->addr_bitfields, PAACE_AF_V, PAACE_V_VALID);
+
+	mb();
+
+	return 0;
+}
+
+/**
+* get_ome_index() - Returns the index in the operation mapping table
+*                   for device.
+* @*omi_index: pointer for storing the index value
+*
+*/
+void get_ome_index(u32 *omi_index, struct device *dev)
+{
+	if (of_device_is_compatible(dev->of_node, "fsl,qman-portal"))
+		*omi_index = OMI_QMAN;
+	if (of_device_is_compatible(dev->of_node, "fsl,qman"))
+		*omi_index = OMI_QMAN_PRIV;
+}
+
+/**
+ * get_stash_id - Returns stash destination id corresponding to a
+ *                cache type and vcpu.
+ * @stash_dest_hint: L1, L2 or L3
+ * @vcpu: vpcu target for a particular cache type.
+ *
+ * Returs stash on success or ~(u32)0 on failure.
+ *
+ */
+u32 get_stash_id(u32 stash_dest_hint, u32 vcpu)
+{
+	const u32 *prop;
+	struct device_node *node;
+	u32 cache_level;
+	int len, found = 0;
+	int i;
+
+	/* Fastpath, exit early if L3/CPC cache is target for stashing */
+	if (stash_dest_hint == PAMU_ATTR_CACHE_L3) {
+		node = of_find_matching_node(NULL, l3_device_ids);
+		if (node) {
+			prop = of_get_property(node, "cache-stash-id", 0);
+			if (!prop) {
+				pr_debug("missing cache-stash-id at %s\n", node->full_name);
+				of_node_put(node);
+				return ~(u32)0;
+			}
+			of_node_put(node);
+			return be32_to_cpup(prop);
+		}
+		return ~(u32)0;
+	}
+
+	for_each_node_by_type(node, "cpu") {
+		prop = of_get_property(node, "reg", &len);
+		for (i = 0; i < len / sizeof(u32); i++) {
+			if (be32_to_cpup(&prop[i]) == vcpu) {
+				found = 1;
+				goto found_cpu_node;
+			}
+		}
+	}
+found_cpu_node:
+
+	/* find the hwnode that represents the cache */
+	for (cache_level = PAMU_ATTR_CACHE_L1; (cache_level < PAMU_ATTR_CACHE_L3) && found; cache_level++) {
+		if (stash_dest_hint == cache_level) {
+			prop = of_get_property(node, "cache-stash-id", 0);
+			if (!prop) {
+				pr_debug("missing cache-stash-id at %s\n", node->full_name);
+				of_node_put(node);
+				return ~(u32)0;
+			}
+			of_node_put(node);
+			return be32_to_cpup(prop);
+		}
+
+		prop = of_get_property(node, "next-level-cache", 0);
+		if (!prop) {
+			pr_debug("can't find next-level-cache at %s\n",
+				node->full_name);
+			of_node_put(node);
+			return ~(u32)0;  /* can't traverse any further */
+		}
+		of_node_put(node);
+
+		/* advance to next node in cache hierarchy */
+		node = of_find_node_by_phandle(*prop);
+		if (!node) {
+			pr_debug("Invalid node for cache hierarchy\n");
+			return ~(u32)0;
+		}
+	}
+
+	pr_debug("stash dest not found for %d on vcpu %d\n",
+	          stash_dest_hint, vcpu);
+	return ~(u32)0;
+}
+
+/* Identify if the PAACT table entry belongs to QMAN, BMAN or QMAN Portal */
+#define QMAN_PAACE 1
+#define QMAN_PORTAL_PAACE 2
+#define BMAN_PAACE 3
+
+/**
+ * Setup operation mapping and stash destinations for QMAN and QMAN portal.
+ * Memory accesses to QMAN and BMAN private memory need not be coherent, so
+ * clear the PAACE entry coherency attribute for them.
+ */
+static void setup_qbman_paace(struct paace *ppaace, int  paace_type)
+{
+	switch (paace_type) {
+	case QMAN_PAACE:
+		set_bf(ppaace->impl_attr, PAACE_IA_OTM, PAACE_OTM_INDEXED);
+		ppaace->op_encode.index_ot.omi = OMI_QMAN_PRIV;
+		/* setup QMAN Private data stashing for the L3 cache */
+		set_bf(ppaace->impl_attr, PAACE_IA_CID, get_stash_id(PAMU_ATTR_CACHE_L3, 0));
+		set_bf(ppaace->domain_attr.to_host.coherency_required, PAACE_DA_HOST_CR,
+		       0);
+		break;
+	case QMAN_PORTAL_PAACE:
+		set_bf(ppaace->impl_attr, PAACE_IA_OTM, PAACE_OTM_INDEXED);
+		ppaace->op_encode.index_ot.omi = OMI_QMAN;
+		/*Set DQRR and Frame stashing for the L3 cache */
+		set_bf(ppaace->impl_attr, PAACE_IA_CID, get_stash_id(PAMU_ATTR_CACHE_L3, 0));
+		break;
+	case BMAN_PAACE:
+		set_bf(ppaace->domain_attr.to_host.coherency_required, PAACE_DA_HOST_CR,
+		       0);
+		break;
+	}
+}
+
+/**
+ * Setup the operation mapping table for various devices. This is a static
+ * table where each table index corresponds to a particular device. PAMU uses
+ * this table to translate device transaction to appropriate corenet
+ * transaction.
+ */
+static void __init setup_omt(struct ome *omt)
+{
+	struct ome *ome;
+
+	/* Configure OMI_QMAN */
+	ome = &omt[OMI_QMAN];
+
+	ome->moe[IOE_READ_IDX] = EOE_VALID | EOE_READ;
+	ome->moe[IOE_EREAD0_IDX] = EOE_VALID | EOE_RSA;
+	ome->moe[IOE_WRITE_IDX] = EOE_VALID | EOE_WRITE;
+	ome->moe[IOE_EWRITE0_IDX] = EOE_VALID | EOE_WWSAO;
+
+	ome->moe[IOE_DIRECT0_IDX] = EOE_VALID | EOE_LDEC;
+	ome->moe[IOE_DIRECT1_IDX] = EOE_VALID | EOE_LDECPE;
+
+	/* Configure OMI_FMAN */
+	ome = &omt[OMI_FMAN];
+	ome->moe[IOE_READ_IDX]  = EOE_VALID | EOE_READI;
+	ome->moe[IOE_WRITE_IDX] = EOE_VALID | EOE_WRITE;
+
+	/* Configure OMI_QMAN private */
+	ome = &omt[OMI_QMAN_PRIV];
+	ome->moe[IOE_READ_IDX]  = EOE_VALID | EOE_READ;
+	ome->moe[IOE_WRITE_IDX] = EOE_VALID | EOE_WRITE;
+	ome->moe[IOE_EREAD0_IDX] = EOE_VALID | EOE_RSA;
+	ome->moe[IOE_EWRITE0_IDX] = EOE_VALID | EOE_WWSA;
+
+	/* Configure OMI_CAAM */
+	ome = &omt[OMI_CAAM];
+	ome->moe[IOE_READ_IDX]  = EOE_VALID | EOE_READI;
+	ome->moe[IOE_WRITE_IDX] = EOE_VALID | EOE_WRITE;
+}
+
+/*
+ * Get the maximum number of PAACT table entries
+ * and subwindows supported by PAMU
+ */
+static void get_pamu_cap_values(unsigned long pamu_reg_base)
+{
+	u32 pc_val;
+
+	pc_val = in_be32((u32 *)(pamu_reg_base + PAMU_PC3));
+	/* Maximum number of subwindows per liodn */
+	max_subwindow_count = 1 << (1 + PAMU_PC3_MWCE(pc_val));
+}
+
+/* Setup PAMU registers pointing to PAACT, SPAACT and OMT */
+int setup_one_pamu(unsigned long pamu_reg_base, unsigned long pamu_reg_size,
+	           phys_addr_t ppaact_phys, phys_addr_t spaact_phys,
+		   phys_addr_t omt_phys)
+{
+	u32 *pc;
+	struct pamu_mmap_regs *pamu_regs;
+
+	pc = (u32 *) (pamu_reg_base + PAMU_PC);
+	pamu_regs = (struct pamu_mmap_regs *)
+		(pamu_reg_base + PAMU_MMAP_REGS_BASE);
+
+	/* set up pointers to corenet control blocks */
+
+	out_be32(&pamu_regs->ppbah, upper_32_bits(ppaact_phys));
+	out_be32(&pamu_regs->ppbal, lower_32_bits(ppaact_phys));
+	ppaact_phys = ppaact_phys + PAACT_SIZE;
+	out_be32(&pamu_regs->pplah, upper_32_bits(ppaact_phys));
+	out_be32(&pamu_regs->pplal, lower_32_bits(ppaact_phys));
+
+	out_be32(&pamu_regs->spbah, upper_32_bits(spaact_phys));
+	out_be32(&pamu_regs->spbal, lower_32_bits(spaact_phys));
+	spaact_phys = spaact_phys + SPAACT_SIZE;
+	out_be32(&pamu_regs->splah, upper_32_bits(spaact_phys));
+	out_be32(&pamu_regs->splal, lower_32_bits(spaact_phys));
+
+	out_be32(&pamu_regs->obah, upper_32_bits(omt_phys));
+	out_be32(&pamu_regs->obal, lower_32_bits(omt_phys));
+	omt_phys = omt_phys + OMT_SIZE;
+	out_be32(&pamu_regs->olah, upper_32_bits(omt_phys));
+	out_be32(&pamu_regs->olal, lower_32_bits(omt_phys));
+
+	/*
+	 * set PAMU enable bit,
+	 * allow ppaact & omt to be cached
+	 * & enable PAMU access violation interrupts.
+	 */
+
+	out_be32((u32 *)(pamu_reg_base + PAMU_PICS),
+			PAMU_ACCESS_VIOLATION_ENABLE);
+	out_be32(pc, PAMU_PC_PE | PAMU_PC_OCE | PAMU_PC_SPCC | PAMU_PC_PPCC);
+	return 0;
+}
+
+/* Enable all device LIODNS */
+static void __init setup_liodns(void)
+{
+	int i, len;
+	struct paace *ppaace;
+	struct device_node *node = NULL;
+	const u32 *prop;
+
+	for_each_node_with_property(node, "fsl,liodn") {
+		prop = of_get_property(node, "fsl,liodn", &len);
+		for (i = 0; i < len / sizeof(u32); i++) {
+			int liodn;
+
+			liodn = be32_to_cpup(&prop[i]);
+			if (liodn >= PAACE_NUMBER_ENTRIES) {
+				pr_debug("Invalid LIODN value %d\n", liodn);
+				continue;
+			}
+			ppaace = pamu_get_ppaace(liodn);
+			pamu_init_ppaace(ppaace);
+			/* window size is 2^(WSE+1) bytes */
+			set_bf(ppaace->addr_bitfields, PPAACE_AF_WSE, 35);
+			ppaace->wbah = 0;
+			set_bf(ppaace->addr_bitfields, PPAACE_AF_WBAL, 0);
+			set_bf(ppaace->impl_attr, PAACE_IA_ATM,
+				PAACE_ATM_NO_XLATE);
+			set_bf(ppaace->addr_bitfields, PAACE_AF_AP,
+				PAACE_AP_PERMS_ALL);
+			if (of_device_is_compatible(node, "fsl,qman-portal"))
+				setup_qbman_paace(ppaace, QMAN_PORTAL_PAACE);
+			if (of_device_is_compatible(node, "fsl,qman"))
+				setup_qbman_paace(ppaace, QMAN_PAACE);
+			if (of_device_is_compatible(node, "fsl,bman"))
+				setup_qbman_paace(ppaace, BMAN_PAACE);
+			mb();
+			pamu_enable_liodn(liodn);
+		}
+	}
+}
+
+irqreturn_t pamu_av_isr(int irq, void *arg)
+{
+	struct pamu_isr_data *data = arg;
+	phys_addr_t phys;
+	unsigned int i, j, ret;
+
+	pr_emerg("access violation interrupt\n");
+
+	for (i = 0; i < data->count; i++) {
+		void __iomem *p = data->pamu_reg_base + i * PAMU_OFFSET;
+		u32 pics = in_be32(p + PAMU_PICS);
+
+		if (pics & PAMU_ACCESS_VIOLATION_STAT) {
+			u32 avs1 = in_be32(p + PAMU_AVS1);
+			struct paace *paace;
+
+			pr_emerg("POES1=%08x\n", in_be32(p + PAMU_POES1));
+			pr_emerg("POES2=%08x\n", in_be32(p + PAMU_POES2));
+			pr_emerg("AVS1=%08x\n", avs1);
+			pr_emerg("AVS2=%08x\n", in_be32(p + PAMU_AVS2));
+			pr_emerg("AVA=%016llx\n", make64(in_be32(p + PAMU_AVAH),
+				in_be32(p + PAMU_AVAL)));
+			pr_emerg("UDAD=%08x\n", in_be32(p + PAMU_UDAD));
+			pr_emerg("POEA=%016llx\n", make64(in_be32(p + PAMU_POEAH),
+				in_be32(p + PAMU_POEAL)));
+
+			phys = make64(in_be32(p + PAMU_POEAH),
+				in_be32(p + PAMU_POEAL));
+
+			/* Assume that POEA points to a PAACE */
+			if (phys) {
+				u32 *paace = phys_to_virt(phys);
+
+				/* Only the first four words are relevant */
+				for (j = 0; j < 4; j++)
+					pr_emerg("PAACE[%u]=%08x\n", j, in_be32(paace + j));
+			}
+
+			/* clear access violation condition */
+			out_be32((p + PAMU_AVS1), avs1 & PAMU_AV_MASK);
+			paace = pamu_get_ppaace(avs1 >> PAMU_AVS1_LIODN_SHIFT);
+			BUG_ON(!paace);
+			/* check if we got a violation for a disabled LIODN */
+			if (!get_bf(paace->addr_bitfields, PAACE_AF_V)) {
+				/*
+				 * As per hardware erratum A-003638, access
+				 * violation can be reported for a disabled
+				 * LIODN. If we hit that condition, disable
+				 * access violation reporting.
+				 */
+				pics &= ~PAMU_ACCESS_VIOLATION_ENABLE;
+			} else {
+				/* Disable the LIODN */
+				ret = pamu_disable_liodn(avs1 >> PAMU_AVS1_LIODN_SHIFT);
+				BUG_ON(ret);
+				pr_emerg("Disabling liodn %x\n", avs1 >> PAMU_AVS1_LIODN_SHIFT);
+			}
+			out_be32((p + PAMU_PICS), pics);
+		}
+	}
+
+
+	return IRQ_HANDLED;
+}
+
+#define LAWAR_EN		0x80000000
+#define LAWAR_TARGET_MASK	0x0FF00000
+#define LAWAR_TARGET_SHIFT	20
+#define LAWAR_SIZE_MASK		0x0000003F
+#define LAWAR_CSDID_MASK	0x000FF000
+#define LAWAR_CSDID_SHIFT	12
+
+#define LAW_SIZE_4K		0xb
+
+struct ccsr_law {
+	u32	lawbarh;	/* LAWn base address high */
+	u32	lawbarl;	/* LAWn base address low */
+	u32	lawar;		/* LAWn attributes */
+	u32	reserved;
+};
+
+/*
+ * Create a coherence subdomain for a given memory block.
+ */
+static int __init create_csd(phys_addr_t phys, size_t size, u32 csd_port_id)
+{
+	struct device_node *np;
+	const __be32 *iprop;
+	void __iomem *lac = NULL;	/* Local Access Control registers */
+	struct ccsr_law __iomem *law;
+	void __iomem *ccm = NULL;
+	u32 __iomem *csdids;
+	unsigned int i, num_laws, num_csds;
+	u32 law_target = 0;
+	u32 csd_id = 0;
+	int ret = 0;
+
+	np = of_find_compatible_node(NULL, NULL, "fsl,corenet-law");
+	if (!np)
+		return -ENODEV;
+
+	iprop = of_get_property(np, "fsl,num-laws", NULL);
+	if (!iprop) {
+		ret = -ENODEV;
+		goto error;
+	}
+
+	num_laws = be32_to_cpup(iprop);
+	if (!num_laws) {
+		ret = -ENODEV;
+		goto error;
+	}
+
+	lac = of_iomap(np, 0);
+	if (!lac) {
+		ret = -ENODEV;
+		goto error;
+	}
+
+	/* LAW registers are at offset 0xC00 */
+	law = lac + 0xC00;
+
+	of_node_put(np);
+
+	np = of_find_compatible_node(NULL, NULL, "fsl,corenet-cf");
+	if (!np) {
+		ret = -ENODEV;
+		goto error;
+	}
+
+	iprop = of_get_property(np, "fsl,ccf-num-csdids", NULL);
+	if (!iprop) {
+		ret = -ENODEV;
+		goto error;
+	}
+
+	num_csds = be32_to_cpup(iprop);
+	if (!num_csds) {
+		ret = -ENODEV;
+		goto error;
+	}
+
+	ccm = of_iomap(np, 0);
+	if (!ccm) {
+		ret = -ENOMEM;
+		goto error;
+	}
+
+	/* The undocumented CSDID registers are at offset 0x600 */
+	csdids = ccm + 0x600;
+
+	of_node_put(np);
+	np = NULL;
+
+	/* Find an unused coherence subdomain ID */
+	for (csd_id = 0; csd_id < num_csds; csd_id++) {
+		if (!csdids[csd_id])
+			break;
+	}
+
+	/* Store the Port ID in the (undocumented) proper CIDMRxx register */
+	csdids[csd_id] = csd_port_id;
+
+	/* Find the DDR LAW that maps to our buffer. */
+	for (i = 0; i < num_laws; i++) {
+		if (law[i].lawar & LAWAR_EN) {
+			phys_addr_t law_start, law_end;
+
+			law_start = make64(law[i].lawbarh, law[i].lawbarl);
+			law_end = law_start +
+				(2ULL << (law[i].lawar & LAWAR_SIZE_MASK));
+
+			if (law_start <= phys && phys < law_end) {
+				law_target = law[i].lawar & LAWAR_TARGET_MASK;
+				break;
+			}
+		}
+	}
+
+	if (i == 0 || i == num_laws) {
+		/* This should never happen*/
+		ret = -ENOENT;
+		goto error;
+	}
+
+	/* Find a free LAW entry */
+	while (law[--i].lawar & LAWAR_EN) {
+		if (i == 0) {
+			/* No higher priority LAW slots available */
+			ret = -ENOENT;
+			goto error;
+		}
+	}
+
+	law[i].lawbarh = upper_32_bits(phys);
+	law[i].lawbarl = lower_32_bits(phys);
+	wmb();
+	law[i].lawar = LAWAR_EN | law_target | (csd_id << LAWAR_CSDID_SHIFT) |
+		(LAW_SIZE_4K + get_order(size));
+	wmb();
+
+error:
+	if (ccm)
+		iounmap(ccm);
+
+	if (lac)
+		iounmap(lac);
+
+	if (np)
+		of_node_put(np);
+
+	return ret;
+}
+
+/*
+ * Table of SVRs and the corresponding PORT_ID values. Port ID corresponds to a
+ * bit map of snoopers for a given range of memory mapped by a LAW.
+ *
+ * All future CoreNet-enabled SOCs will have this erratum(A-004510) fixed, so this
+ * table should never need to be updated.  SVRs are guaranteed to be unique, so
+ * there is no worry that a future SOC will inadvertently have one of these
+ * values.
+ */
+static const struct {
+	u32 svr;
+	u32 port_id;
+} port_id_map[] = {
+	{0x82100010, 0xFF000000},	/* P2040 1.0 */
+	{0x82100011, 0xFF000000},	/* P2040 1.1 */
+	{0x82100110, 0xFF000000},	/* P2041 1.0 */
+	{0x82100111, 0xFF000000},	/* P2041 1.1 */
+	{0x82110310, 0xFF000000},	/* P3041 1.0 */
+	{0x82110311, 0xFF000000},	/* P3041 1.1 */
+	{0x82010020, 0xFFF80000},	/* P4040 2.0 */
+	{0x82000020, 0xFFF80000},	/* P4080 2.0 */
+	{0x82210010, 0xFC000000},       /* P5010 1.0 */
+	{0x82210020, 0xFC000000},       /* P5010 2.0 */
+	{0x82200010, 0xFC000000},	/* P5020 1.0 */
+	{0x82050010, 0xFF800000},	/* P5021 1.0 */
+	{0x82040010, 0xFF800000},	/* P5040 1.0 */
+};
+
+#define SVR_SECURITY	0x80000	/* The Security (E) bit */
+
+static int __init fsl_pamu_probe(struct platform_device *pdev)
+{
+	void __iomem *pamu_regs = NULL;
+	struct ccsr_guts __iomem *guts_regs = NULL;
+	u32 pamubypenr, pamu_counter;
+	unsigned long pamu_reg_off;
+	unsigned long pamu_reg_base;
+	struct pamu_isr_data *data = NULL;
+	struct device_node *guts_node;
+	u64 size;
+	struct page *p;
+	int ret = 0;
+	int irq;
+	phys_addr_t ppaact_phys;
+	phys_addr_t spaact_phys;
+	phys_addr_t omt_phys;
+	size_t mem_size = 0;
+	unsigned int order = 0;
+	u32 csd_port_id = 0;
+	unsigned i;
+	/*
+	 * enumerate all PAMUs and allocate and setup PAMU tables
+	 * for each of them,
+	 * NOTE : All PAMUs share the same LIODN tables.
+	 */
+
+	pamu_regs = of_iomap(pdev->dev.of_node, 0);
+	if (!pamu_regs) {
+		dev_err(&pdev->dev, "ioremap of PAMU node failed\n");
+		return -ENOMEM;
+	}
+	of_get_address(pdev->dev.of_node, 0, &size, NULL);
+
+	irq = irq_of_parse_and_map(pdev->dev.of_node, 0);
+	if (irq == NO_IRQ) {
+		dev_warn(&pdev->dev, "no interrupts listed in PAMU node\n");
+		goto error;
+	}
+
+	data = kzalloc(sizeof(struct pamu_isr_data), GFP_KERNEL);
+	if (!data) {
+		dev_err(&pdev->dev, "PAMU isr data memory allocation failed\n");
+		ret = -ENOMEM;
+		goto error;
+	}
+	data->pamu_reg_base = pamu_regs;
+	data->count = size / PAMU_OFFSET;
+
+	/* The ISR needs access to the regs, so we won't iounmap them */
+	ret = request_irq(irq, pamu_av_isr, 0, "pamu", data);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "error %i installing ISR for irq %i\n",
+			ret, irq);
+		goto error;
+	}
+
+	guts_node = of_find_matching_node(NULL, guts_device_ids);
+	if (!guts_node) {
+		dev_err(&pdev->dev, "could not find GUTS node %s\n",
+			pdev->dev.of_node->full_name);
+		ret = -ENODEV;
+		goto error;
+	}
+
+	guts_regs = of_iomap(guts_node, 0);
+	of_node_put(guts_node);
+	if (!guts_regs) {
+		dev_err(&pdev->dev, "ioremap of GUTS node failed\n");
+		ret = -ENODEV;
+		goto error;
+	}
+
+	/* read in the PAMU capability registers */
+	get_pamu_cap_values((unsigned long)pamu_regs);
+	/*
+	 * To simplify the allocation of a coherency domain, we allocate the
+	 * PAACT and the OMT in the same memory buffer.  Unfortunately, this
+	 * wastes more memory compared to allocating the buffers separately.
+	 */
+	/* Determine how much memory we need */
+	mem_size = (PAGE_SIZE << get_order(PAACT_SIZE)) +
+		(PAGE_SIZE << get_order(SPAACT_SIZE)) +
+		(PAGE_SIZE << get_order(OMT_SIZE));
+	order = get_order(mem_size);
+
+	p = alloc_pages(GFP_KERNEL | __GFP_ZERO, order);
+	if (!p) {
+		dev_err(&pdev->dev, "unable to allocate PAACT/SPAACT/OMT block\n");
+		ret = -ENOMEM;
+		goto error;
+	}
+
+	ppaact = page_address(p);
+	ppaact_phys = page_to_phys(p);
+
+	/* Make sure the memory is naturally aligned */
+	if (ppaact_phys & ((PAGE_SIZE << order) - 1)) {
+		dev_err(&pdev->dev, "PAACT/OMT block is unaligned\n");
+		ret = -ENOMEM;
+		goto error;
+	}
+
+	spaact = (void *)ppaact + (PAGE_SIZE << get_order(PAACT_SIZE));
+	omt = (void *)spaact + (PAGE_SIZE << get_order(SPAACT_SIZE));
+
+	dev_dbg(&pdev->dev, "ppaact virt=%p phys=0x%llx\n", ppaact,
+		(unsigned long long) ppaact_phys);
+
+	/* Check to see if we need to implement the work-around on this SOC */
+
+	/* Determine the Port ID for our coherence subdomain */
+	for (i = 0; i < ARRAY_SIZE(port_id_map); i++) {
+		if (port_id_map[i].svr == (mfspr(SPRN_SVR) & ~SVR_SECURITY)) {
+			csd_port_id = port_id_map[i].port_id;
+			dev_dbg(&pdev->dev, "found matching SVR %08x\n",
+				port_id_map[i].svr);
+			break;
+		}
+	}
+
+	if (csd_port_id) {
+		dev_dbg(&pdev->dev, "creating coherency subdomain at address "
+			"0x%llx, size %zu, port id 0x%08x", ppaact_phys,
+			mem_size, csd_port_id);
+
+		ret = create_csd(ppaact_phys, mem_size, csd_port_id);
+		if (ret) {
+			dev_err(&pdev->dev, "could not create coherence "
+				"subdomain\n");
+			return ret;
+		}
+	}
+
+	spaact_phys = virt_to_phys(spaact);
+	omt_phys = virt_to_phys(omt);
+
+	spaace_pool = gen_pool_create(ilog2(sizeof(struct paace)), -1);
+	if (!spaace_pool) {
+		ret = -ENOMEM;
+		dev_err(&pdev->dev, "PAMU : failed to allocate spaace gen pool\n");
+		goto error;
+	}
+
+	ret = gen_pool_add(spaace_pool, (unsigned long)spaact, SPAACT_SIZE, -1);
+	if (ret)
+		goto error_genpool;
+
+	pamubypenr = in_be32(&guts_regs->pamubypenr);
+
+	for (pamu_reg_off = 0, pamu_counter = 0x80000000; pamu_reg_off < size;
+	     pamu_reg_off += PAMU_OFFSET, pamu_counter >>= 1) {
+
+		pamu_reg_base = (unsigned long) pamu_regs + pamu_reg_off;
+		setup_one_pamu(pamu_reg_base, pamu_reg_off, ppaact_phys,
+				 spaact_phys, omt_phys);
+		/* Disable PAMU bypass for this PAMU */
+		pamubypenr &= ~pamu_counter;
+	}
+
+	setup_omt(omt);
+
+	/* Enable all relevant PAMU(s) */
+	out_be32(&guts_regs->pamubypenr, pamubypenr);
+
+	iounmap(guts_regs);
+
+	/* Enable DMA for the LIODNs in the device tree*/
+
+	setup_liodns();
+
+	return 0;
+
+error_genpool:
+	gen_pool_destroy(spaace_pool);
+
+error:
+	if (irq != NO_IRQ)
+		free_irq(irq, data);
+
+	if (data) {
+		memset(data, 0, sizeof(struct pamu_isr_data));
+		kfree(data);
+	}
+
+	if (pamu_regs)
+		iounmap(pamu_regs);
+
+	if (guts_regs)
+		iounmap(guts_regs);
+
+	if (ppaact)
+		free_pages((unsigned long)ppaact, order);
+
+	ppaact = NULL;
+
+	return ret;
+}
+
+static const struct of_device_id fsl_of_pamu_ids[] = {
+	{
+		.compatible = "fsl,p4080-pamu",
+	},
+	{
+		.compatible = "fsl,pamu",
+	},
+	{},
+};
+
+static struct platform_driver fsl_of_pamu_driver = {
+	.driver = {
+		.name = "fsl-of-pamu",
+		.owner = THIS_MODULE,
+	},
+	.probe = fsl_pamu_probe,
+};
+
+static __init int fsl_pamu_init(void)
+{
+	struct platform_device *pdev = NULL;
+	struct device_node *np;
+	int ret;
+
+	/*
+	 * The normal OF process calls the probe function at some
+	 * indeterminate later time, after most drivers have loaded.  This is
+	 * too late for us, because PAMU clients (like the Qman driver)
+	 * depend on PAMU being initialized early.
+	 *
+	 * So instead, we "manually" call our probe function by creating the
+	 * platform devices ourselves.
+	 */
+
+	/*
+	 * We assume that there is only one PAMU node in the device tree.  A
+	 * single PAMU node represents all of the PAMU devices in the SOC
+	 * already.   Everything else already makes that assumption, and the
+	 * binding for the PAMU nodes doesn't allow for any parent-child
+	 * relationships anyway.  In other words, support for more than one
+	 * PAMU node would require significant changes to a lot of code.
+	 */
+
+	np = of_find_compatible_node(NULL, NULL, "fsl,pamu");
+	if (!np) {
+		pr_err("could not find a PAMU node\n");
+		return -ENODEV;
+	}
+
+	ret = platform_driver_register(&fsl_of_pamu_driver);
+	if (ret) {
+		pr_err("could not register driver (err=%i)\n", ret);
+		goto error_driver_register;
+	}
+
+	pdev = platform_device_alloc("fsl-of-pamu", 0);
+	if (!pdev) {
+		pr_err("could not allocate device %s\n",
+		       np->full_name);
+		ret = -ENOMEM;
+		goto error_device_alloc;
+	}
+	pdev->dev.of_node = of_node_get(np);
+
+	ret = pamu_domain_init();
+	if (ret)
+		goto error_device_add;
+
+	ret = platform_device_add(pdev);
+	if (ret) {
+		pr_err("could not add device %s (err=%i)\n",
+		       np->full_name, ret);
+		goto error_device_add;
+	}
+
+	return 0;
+
+error_device_add:
+	of_node_put(pdev->dev.of_node);
+	pdev->dev.of_node = NULL;
+
+	platform_device_put(pdev);
+
+error_device_alloc:
+	platform_driver_unregister(&fsl_of_pamu_driver);
+
+error_driver_register:
+	of_node_put(np);
+
+	return ret;
+}
+arch_initcall(fsl_pamu_init);
diff --git a/drivers/iommu/fsl_pamu.h b/drivers/iommu/fsl_pamu.h
new file mode 100644
index 00000000000..8fc1a125b16
--- /dev/null
+++ b/drivers/iommu/fsl_pamu.h
@@ -0,0 +1,410 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright (C) 2013 Freescale Semiconductor, Inc.
+ *
+ */
+
+#ifndef __FSL_PAMU_H
+#define __FSL_PAMU_H
+
+#include <asm/fsl_pamu_stash.h>
+
+/* Bit Field macros
+ *	v = bit field variable; m = mask, m##_SHIFT = shift, x = value to load
+ */
+#define set_bf(v, m, x)		(v = ((v) & ~(m)) | (((x) << (m##_SHIFT)) & (m)))
+#define get_bf(v, m)		(((v) & (m)) >> (m##_SHIFT))
+
+/* PAMU CCSR space */
+#define PAMU_PGC 0x00000000     /* Allows all peripheral accesses */
+#define PAMU_PE 0x40000000      /* enable PAMU                    */
+
+/* PAMU_OFFSET to the next pamu space in ccsr */
+#define PAMU_OFFSET 0x1000
+
+#define PAMU_MMAP_REGS_BASE 0
+
+struct pamu_mmap_regs {
+	u32 ppbah;
+	u32 ppbal;
+	u32 pplah;
+	u32 pplal;
+	u32 spbah;
+	u32 spbal;
+	u32 splah;
+	u32 splal;
+	u32 obah;
+	u32 obal;
+	u32 olah;
+	u32 olal;
+};
+
+/* PAMU Error Registers */
+#define PAMU_POES1 0x0040
+#define PAMU_POES2 0x0044
+#define PAMU_POEAH 0x0048
+#define PAMU_POEAL 0x004C
+#define PAMU_AVS1  0x0050
+#define PAMU_AVS1_AV    0x1
+#define PAMU_AVS1_OTV   0x6
+#define PAMU_AVS1_APV   0x78
+#define PAMU_AVS1_WAV   0x380
+#define PAMU_AVS1_LAV   0x1c00
+#define PAMU_AVS1_GCV   0x2000
+#define PAMU_AVS1_PDV   0x4000
+#define PAMU_AV_MASK    (PAMU_AVS1_AV | PAMU_AVS1_OTV | PAMU_AVS1_APV | PAMU_AVS1_WAV \
+			| PAMU_AVS1_LAV | PAMU_AVS1_GCV | PAMU_AVS1_PDV)
+#define PAMU_AVS1_LIODN_SHIFT 16
+#define PAMU_LAV_LIODN_NOT_IN_PPAACT 0x400
+
+#define PAMU_AVS2  0x0054
+#define PAMU_AVAH  0x0058
+#define PAMU_AVAL  0x005C
+#define PAMU_EECTL 0x0060
+#define PAMU_EEDIS 0x0064
+#define PAMU_EEINTEN 0x0068
+#define PAMU_EEDET 0x006C
+#define PAMU_EEATTR 0x0070
+#define PAMU_EEAHI 0x0074
+#define PAMU_EEALO 0x0078
+#define PAMU_EEDHI 0X007C
+#define PAMU_EEDLO 0x0080
+#define PAMU_EECC  0x0084
+#define PAMU_UDAD  0x0090
+
+/* PAMU Revision Registers */
+#define PAMU_PR1 0x0BF8
+#define PAMU_PR2 0x0BFC
+
+/* PAMU version mask */
+#define PAMU_PR1_MASK 0xffff
+
+/* PAMU Capabilities Registers */
+#define PAMU_PC1 0x0C00
+#define PAMU_PC2 0x0C04
+#define PAMU_PC3 0x0C08
+#define PAMU_PC4 0x0C0C
+
+/* PAMU Control Register */
+#define PAMU_PC 0x0C10
+
+/* PAMU control defs */
+#define PAMU_CONTROL 0x0C10
+#define PAMU_PC_PGC 0x80000000  /* PAMU gate closed bit */
+#define PAMU_PC_PE   0x40000000 /* PAMU enable bit */
+#define PAMU_PC_SPCC 0x00000010 /* sPAACE cache enable */
+#define PAMU_PC_PPCC 0x00000001 /* pPAACE cache enable */
+#define PAMU_PC_OCE  0x00001000 /* OMT cache enable */
+
+#define PAMU_PFA1 0x0C14
+#define PAMU_PFA2 0x0C18
+
+#define PAMU_PC2_MLIODN(X) ((X) >> 16)
+#define PAMU_PC3_MWCE(X) (((X) >> 21) & 0xf)
+
+/* PAMU Interrupt control and Status Register */
+#define PAMU_PICS 0x0C1C
+#define PAMU_ACCESS_VIOLATION_STAT   0x8
+#define PAMU_ACCESS_VIOLATION_ENABLE 0x4
+
+/* PAMU Debug Registers */
+#define PAMU_PD1 0x0F00
+#define PAMU_PD2 0x0F04
+#define PAMU_PD3 0x0F08
+#define PAMU_PD4 0x0F0C
+
+#define PAACE_AP_PERMS_DENIED  0x0
+#define PAACE_AP_PERMS_QUERY   0x1
+#define PAACE_AP_PERMS_UPDATE  0x2
+#define PAACE_AP_PERMS_ALL     0x3
+
+#define PAACE_DD_TO_HOST       0x0
+#define PAACE_DD_TO_IO         0x1
+#define PAACE_PT_PRIMARY       0x0
+#define PAACE_PT_SECONDARY     0x1
+#define PAACE_V_INVALID        0x0
+#define PAACE_V_VALID          0x1
+#define PAACE_MW_SUBWINDOWS    0x1
+
+#define PAACE_WSE_4K           0xB
+#define PAACE_WSE_8K           0xC
+#define PAACE_WSE_16K          0xD
+#define PAACE_WSE_32K          0xE
+#define PAACE_WSE_64K          0xF
+#define PAACE_WSE_128K         0x10
+#define PAACE_WSE_256K         0x11
+#define PAACE_WSE_512K         0x12
+#define PAACE_WSE_1M           0x13
+#define PAACE_WSE_2M           0x14
+#define PAACE_WSE_4M           0x15
+#define PAACE_WSE_8M           0x16
+#define PAACE_WSE_16M          0x17
+#define PAACE_WSE_32M          0x18
+#define PAACE_WSE_64M          0x19
+#define PAACE_WSE_128M         0x1A
+#define PAACE_WSE_256M         0x1B
+#define PAACE_WSE_512M         0x1C
+#define PAACE_WSE_1G           0x1D
+#define PAACE_WSE_2G           0x1E
+#define PAACE_WSE_4G           0x1F
+
+#define PAACE_DID_PCI_EXPRESS_1 0x00
+#define PAACE_DID_PCI_EXPRESS_2 0x01
+#define PAACE_DID_PCI_EXPRESS_3 0x02
+#define PAACE_DID_PCI_EXPRESS_4 0x03
+#define PAACE_DID_LOCAL_BUS     0x04
+#define PAACE_DID_SRIO          0x0C
+#define PAACE_DID_MEM_1         0x10
+#define PAACE_DID_MEM_2         0x11
+#define PAACE_DID_MEM_3         0x12
+#define PAACE_DID_MEM_4         0x13
+#define PAACE_DID_MEM_1_2       0x14
+#define PAACE_DID_MEM_3_4       0x15
+#define PAACE_DID_MEM_1_4       0x16
+#define PAACE_DID_BM_SW_PORTAL  0x18
+#define PAACE_DID_PAMU          0x1C
+#define PAACE_DID_CAAM          0x21
+#define PAACE_DID_QM_SW_PORTAL  0x3C
+#define PAACE_DID_CORE0_INST    0x80
+#define PAACE_DID_CORE0_DATA    0x81
+#define PAACE_DID_CORE1_INST    0x82
+#define PAACE_DID_CORE1_DATA    0x83
+#define PAACE_DID_CORE2_INST    0x84
+#define PAACE_DID_CORE2_DATA    0x85
+#define PAACE_DID_CORE3_INST    0x86
+#define PAACE_DID_CORE3_DATA    0x87
+#define PAACE_DID_CORE4_INST    0x88
+#define PAACE_DID_CORE4_DATA    0x89
+#define PAACE_DID_CORE5_INST    0x8A
+#define PAACE_DID_CORE5_DATA    0x8B
+#define PAACE_DID_CORE6_INST    0x8C
+#define PAACE_DID_CORE6_DATA    0x8D
+#define PAACE_DID_CORE7_INST    0x8E
+#define PAACE_DID_CORE7_DATA    0x8F
+#define PAACE_DID_BROADCAST     0xFF
+
+#define PAACE_ATM_NO_XLATE      0x00
+#define PAACE_ATM_WINDOW_XLATE  0x01
+#define PAACE_ATM_PAGE_XLATE    0x02
+#define PAACE_ATM_WIN_PG_XLATE  \
+                (PAACE_ATM_WINDOW_XLATE | PAACE_ATM_PAGE_XLATE)
+#define PAACE_OTM_NO_XLATE      0x00
+#define PAACE_OTM_IMMEDIATE     0x01
+#define PAACE_OTM_INDEXED       0x02
+#define PAACE_OTM_RESERVED      0x03
+
+#define PAACE_M_COHERENCE_REQ   0x01
+
+#define PAACE_PID_0             0x0
+#define PAACE_PID_1             0x1
+#define PAACE_PID_2             0x2
+#define PAACE_PID_3             0x3
+#define PAACE_PID_4             0x4
+#define PAACE_PID_5             0x5
+#define PAACE_PID_6             0x6
+#define PAACE_PID_7             0x7
+
+#define PAACE_TCEF_FORMAT0_8B   0x00
+#define PAACE_TCEF_FORMAT1_RSVD 0x01
+/*
+ * Hard coded value for the PAACT size to accomodate
+ * maximum LIODN value generated by u-boot.
+ */
+#define PAACE_NUMBER_ENTRIES    0x500
+/* Hard coded value for the SPAACT size */
+#define SPAACE_NUMBER_ENTRIES	0x800
+
+#define	OME_NUMBER_ENTRIES      16
+
+/* PAACE Bit Field Defines */
+#define PPAACE_AF_WBAL			0xfffff000
+#define PPAACE_AF_WBAL_SHIFT		12
+#define PPAACE_AF_WSE			0x00000fc0
+#define PPAACE_AF_WSE_SHIFT		6
+#define PPAACE_AF_MW			0x00000020
+#define PPAACE_AF_MW_SHIFT		5
+
+#define SPAACE_AF_LIODN			0xffff0000
+#define SPAACE_AF_LIODN_SHIFT		16
+
+#define PAACE_AF_AP			0x00000018
+#define PAACE_AF_AP_SHIFT		3
+#define PAACE_AF_DD			0x00000004
+#define PAACE_AF_DD_SHIFT		2
+#define PAACE_AF_PT			0x00000002
+#define PAACE_AF_PT_SHIFT		1
+#define PAACE_AF_V			0x00000001
+#define PAACE_AF_V_SHIFT		0
+
+#define PAACE_DA_HOST_CR		0x80
+#define PAACE_DA_HOST_CR_SHIFT		7
+
+#define PAACE_IA_CID			0x00FF0000
+#define PAACE_IA_CID_SHIFT		16
+#define PAACE_IA_WCE			0x000000F0
+#define PAACE_IA_WCE_SHIFT		4
+#define PAACE_IA_ATM			0x0000000C
+#define PAACE_IA_ATM_SHIFT		2
+#define PAACE_IA_OTM			0x00000003
+#define PAACE_IA_OTM_SHIFT		0
+
+#define PAACE_WIN_TWBAL			0xfffff000
+#define PAACE_WIN_TWBAL_SHIFT		12
+#define PAACE_WIN_SWSE			0x00000fc0
+#define PAACE_WIN_SWSE_SHIFT		6
+
+/* PAMU Data Structures */
+/* primary / secondary paact structure */
+struct paace {
+	/* PAACE Offset 0x00 */
+	u32 wbah;				/* only valid for Primary PAACE */
+	u32 addr_bitfields;		/* See P/S PAACE_AF_* */
+
+	/* PAACE Offset 0x08 */
+	/* Interpretation of first 32 bits dependent on DD above */
+	union {
+		struct {
+			/* Destination ID, see PAACE_DID_* defines */
+			u8 did;
+			/* Partition ID */
+			u8 pid;
+			/* Snoop ID */
+			u8 snpid;
+			/* coherency_required : 1 reserved : 7 */
+			u8 coherency_required; /* See PAACE_DA_* */
+		} to_host;
+		struct {
+			/* Destination ID, see PAACE_DID_* defines */
+			u8  did;
+			u8  reserved1;
+			u16 reserved2;
+		} to_io;
+	} domain_attr;
+
+	/* Implementation attributes + window count + address & operation translation modes */
+	u32 impl_attr;			/* See PAACE_IA_* */
+
+	/* PAACE Offset 0x10 */
+	/* Translated window base address */
+	u32 twbah;
+	u32 win_bitfields;			/* See PAACE_WIN_* */
+
+	/* PAACE Offset 0x18 */
+	/* first secondary paace entry */
+	u32 fspi;				/* only valid for Primary PAACE */
+	union {
+		struct {
+			u8 ioea;
+			u8 moea;
+			u8 ioeb;
+			u8 moeb;
+		} immed_ot;
+		struct {
+			u16 reserved;
+			u16 omi;
+		} index_ot;
+	} op_encode;
+
+	/* PAACE Offsets 0x20-0x38 */
+	u32 reserved[8];			/* not currently implemented */
+};
+
+/* OME : Operation mapping entry
+ * MOE : Mapped Operation Encodings
+ * The operation mapping table is table containing operation mapping entries (OME).
+ * The index of a particular OME is programmed in the PAACE entry for translation
+ * in bound I/O operations corresponding to an LIODN. The OMT is used for translation
+ * specifically in case of the indexed translation mode. Each OME contains a 128
+ * byte mapped operation encoding (MOE), where each byte represents an MOE.
+ */
+#define NUM_MOE 128
+struct ome {
+	u8 moe[NUM_MOE];
+} __attribute__((packed));
+
+#define PAACT_SIZE              (sizeof(struct paace) * PAACE_NUMBER_ENTRIES)
+#define SPAACT_SIZE              (sizeof(struct paace) * SPAACE_NUMBER_ENTRIES)
+#define OMT_SIZE                (sizeof(struct ome) * OME_NUMBER_ENTRIES)
+
+#define PAMU_PAGE_SHIFT 12
+#define PAMU_PAGE_SIZE  4096ULL
+
+#define IOE_READ        0x00
+#define IOE_READ_IDX    0x00
+#define IOE_WRITE       0x81
+#define IOE_WRITE_IDX   0x01
+#define IOE_EREAD0      0x82    /* Enhanced read type 0 */
+#define IOE_EREAD0_IDX  0x02    /* Enhanced read type 0 */
+#define IOE_EWRITE0     0x83    /* Enhanced write type 0 */
+#define IOE_EWRITE0_IDX 0x03    /* Enhanced write type 0 */
+#define IOE_DIRECT0     0x84    /* Directive type 0 */
+#define IOE_DIRECT0_IDX 0x04    /* Directive type 0 */
+#define IOE_EREAD1      0x85    /* Enhanced read type 1 */
+#define IOE_EREAD1_IDX  0x05    /* Enhanced read type 1 */
+#define IOE_EWRITE1     0x86    /* Enhanced write type 1 */
+#define IOE_EWRITE1_IDX 0x06    /* Enhanced write type 1 */
+#define IOE_DIRECT1     0x87    /* Directive type 1 */
+#define IOE_DIRECT1_IDX 0x07    /* Directive type 1 */
+#define IOE_RAC         0x8c    /* Read with Atomic clear */
+#define IOE_RAC_IDX     0x0c    /* Read with Atomic clear */
+#define IOE_RAS         0x8d    /* Read with Atomic set */
+#define IOE_RAS_IDX     0x0d    /* Read with Atomic set */
+#define IOE_RAD         0x8e    /* Read with Atomic decrement */
+#define IOE_RAD_IDX     0x0e    /* Read with Atomic decrement */
+#define IOE_RAI         0x8f    /* Read with Atomic increment */
+#define IOE_RAI_IDX     0x0f    /* Read with Atomic increment */
+
+#define EOE_READ        0x00
+#define EOE_WRITE       0x01
+#define EOE_RAC         0x0c    /* Read with Atomic clear */
+#define EOE_RAS         0x0d    /* Read with Atomic set */
+#define EOE_RAD         0x0e    /* Read with Atomic decrement */
+#define EOE_RAI         0x0f    /* Read with Atomic increment */
+#define EOE_LDEC        0x10    /* Load external cache */
+#define EOE_LDECL       0x11    /* Load external cache with stash lock */
+#define EOE_LDECPE      0x12    /* Load external cache with preferred exclusive */
+#define EOE_LDECPEL     0x13    /* Load external cache with preferred exclusive and lock */
+#define EOE_LDECFE      0x14    /* Load external cache with forced exclusive */
+#define EOE_LDECFEL     0x15    /* Load external cache with forced exclusive and lock */
+#define EOE_RSA         0x16    /* Read with stash allocate */
+#define EOE_RSAU        0x17    /* Read with stash allocate and unlock */
+#define EOE_READI       0x18    /* Read with invalidate */
+#define EOE_RWNITC      0x19    /* Read with no intention to cache */
+#define EOE_WCI         0x1a    /* Write cache inhibited */
+#define EOE_WWSA        0x1b    /* Write with stash allocate */
+#define EOE_WWSAL       0x1c    /* Write with stash allocate and lock */
+#define EOE_WWSAO       0x1d    /* Write with stash allocate only */
+#define EOE_WWSAOL      0x1e    /* Write with stash allocate only and lock */
+#define EOE_VALID       0x80
+
+/* Function prototypes */
+int pamu_domain_init(void);
+int pamu_enable_liodn(int liodn);
+int pamu_disable_liodn(int liodn);
+void pamu_free_subwins(int liodn);
+int pamu_config_ppaace(int liodn, phys_addr_t win_addr, phys_addr_t win_size,
+		       u32 omi, unsigned long rpn, u32 snoopid, uint32_t stashid,
+		       u32 subwin_cnt, int prot);
+int pamu_config_spaace(int liodn, u32 subwin_cnt, u32 subwin_addr,
+		       phys_addr_t subwin_size, u32 omi, unsigned long rpn,
+		       uint32_t snoopid, u32 stashid, int enable, int prot);
+
+u32 get_stash_id(u32 stash_dest_hint, u32 vcpu);
+void get_ome_index(u32 *omi_index, struct device *dev);
+int  pamu_update_paace_stash(int liodn, u32 subwin, u32 value);
+int pamu_disable_spaace(int liodn, u32 subwin);
+u32 pamu_get_max_subwin_cnt(void);
+
+#endif  /* __FSL_PAMU_H */
diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c
new file mode 100644
index 00000000000..af47648301a
--- /dev/null
+++ b/drivers/iommu/fsl_pamu_domain.c
@@ -0,0 +1,1170 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright (C) 2013 Freescale Semiconductor, Inc.
+ * Author: Varun Sethi <varun.sethi@freescale.com>
+ *
+ */
+
+#define pr_fmt(fmt)    "fsl-pamu-domain: %s: " fmt, __func__
+
+#include <linux/init.h>
+#include <linux/iommu.h>
+#include <linux/notifier.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/device.h>
+#include <linux/of_platform.h>
+#include <linux/bootmem.h>
+#include <linux/err.h>
+#include <asm/io.h>
+#include <asm/bitops.h>
+
+#include <asm/pci-bridge.h>
+#include <sysdev/fsl_pci.h>
+
+#include "fsl_pamu_domain.h"
+#include "pci.h"
+
+/*
+ * Global spinlock that needs to be held while
+ * configuring PAMU.
+ */
+static DEFINE_SPINLOCK(iommu_lock);
+
+static struct kmem_cache *fsl_pamu_domain_cache;
+static struct kmem_cache *iommu_devinfo_cache;
+static DEFINE_SPINLOCK(device_domain_lock);
+
+static int __init iommu_init_mempool(void)
+{
+
+	fsl_pamu_domain_cache = kmem_cache_create("fsl_pamu_domain",
+					 sizeof(struct fsl_dma_domain),
+					 0,
+					 SLAB_HWCACHE_ALIGN,
+
+					 NULL);
+	if (!fsl_pamu_domain_cache) {
+		pr_debug("Couldn't create fsl iommu_domain cache\n");
+		return -ENOMEM;
+	}
+
+	iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
+					 sizeof(struct device_domain_info),
+					 0,
+					 SLAB_HWCACHE_ALIGN,
+					 NULL);
+	if (!iommu_devinfo_cache) {
+		pr_debug("Couldn't create devinfo cache\n");
+		kmem_cache_destroy(fsl_pamu_domain_cache);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static phys_addr_t get_phys_addr(struct fsl_dma_domain *dma_domain, dma_addr_t iova)
+{
+	u32 win_cnt = dma_domain->win_cnt;
+	struct dma_window *win_ptr =
+				&dma_domain->win_arr[0];
+	struct iommu_domain_geometry *geom;
+
+	geom = &dma_domain->iommu_domain->geometry;
+
+	if (!win_cnt || !dma_domain->geom_size) {
+		pr_debug("Number of windows/geometry not configured for the domain\n");
+		return 0;
+	}
+
+	if (win_cnt > 1) {
+		u64 subwin_size;
+		dma_addr_t subwin_iova;
+		u32 wnd;
+
+		subwin_size = dma_domain->geom_size >> ilog2(win_cnt);
+		subwin_iova = iova & ~(subwin_size - 1);
+		wnd = (subwin_iova - geom->aperture_start) >> ilog2(subwin_size);
+		win_ptr = &dma_domain->win_arr[wnd];
+	}
+
+	if (win_ptr->valid)
+		return (win_ptr->paddr + (iova & (win_ptr->size - 1)));
+
+	return 0;
+}
+
+static int map_subwins(int liodn, struct fsl_dma_domain *dma_domain)
+{
+	struct dma_window *sub_win_ptr =
+				&dma_domain->win_arr[0];
+	int i, ret;
+	unsigned long rpn, flags;
+
+	for (i = 0; i < dma_domain->win_cnt; i++) {
+		if (sub_win_ptr[i].valid) {
+			rpn = sub_win_ptr[i].paddr >>
+				 PAMU_PAGE_SHIFT;
+			spin_lock_irqsave(&iommu_lock, flags);
+			ret = pamu_config_spaace(liodn, dma_domain->win_cnt, i,
+						 sub_win_ptr[i].size,
+						 ~(u32)0,
+						 rpn,
+						 dma_domain->snoop_id,
+						 dma_domain->stash_id,
+						 (i > 0) ? 1 : 0,
+						 sub_win_ptr[i].prot);
+			spin_unlock_irqrestore(&iommu_lock, flags);
+			if (ret) {
+				pr_debug("PAMU SPAACE configuration failed for liodn %d\n",
+					 liodn);
+				return ret;
+			}
+		}
+	}
+
+	return ret;
+}
+
+static int map_win(int liodn, struct fsl_dma_domain *dma_domain)
+{
+	int ret;
+	struct dma_window *wnd = &dma_domain->win_arr[0];
+	phys_addr_t wnd_addr = dma_domain->iommu_domain->geometry.aperture_start;
+	unsigned long flags;
+
+	spin_lock_irqsave(&iommu_lock, flags);
+	ret = pamu_config_ppaace(liodn, wnd_addr,
+				 wnd->size,
+				 ~(u32)0,
+				 wnd->paddr >> PAMU_PAGE_SHIFT,
+				 dma_domain->snoop_id, dma_domain->stash_id,
+				 0, wnd->prot);
+	spin_unlock_irqrestore(&iommu_lock, flags);
+	if (ret)
+		pr_debug("PAMU PAACE configuration failed for liodn %d\n",
+			liodn);
+
+	return ret;
+}
+
+/* Map the DMA window corresponding to the LIODN */
+static int map_liodn(int liodn, struct fsl_dma_domain *dma_domain)
+{
+	if (dma_domain->win_cnt > 1)
+		return map_subwins(liodn, dma_domain);
+	else
+		return map_win(liodn, dma_domain);
+
+}
+
+/* Update window/subwindow mapping for the LIODN */
+static int update_liodn(int liodn, struct fsl_dma_domain *dma_domain, u32 wnd_nr)
+{
+	int ret;
+	struct dma_window *wnd = &dma_domain->win_arr[wnd_nr];
+	unsigned long flags;
+
+	spin_lock_irqsave(&iommu_lock, flags);
+	if (dma_domain->win_cnt > 1) {
+		ret = pamu_config_spaace(liodn, dma_domain->win_cnt, wnd_nr,
+					 wnd->size,
+					 ~(u32)0,
+					 wnd->paddr >> PAMU_PAGE_SHIFT,
+					 dma_domain->snoop_id,
+					 dma_domain->stash_id,
+					 (wnd_nr > 0) ? 1 : 0,
+					 wnd->prot);
+		if (ret)
+			pr_debug("Subwindow reconfiguration failed for liodn %d\n", liodn);
+	} else {
+		phys_addr_t wnd_addr;
+
+		wnd_addr = dma_domain->iommu_domain->geometry.aperture_start;
+
+		ret = pamu_config_ppaace(liodn, wnd_addr,
+					 wnd->size,
+					 ~(u32)0,
+					 wnd->paddr >> PAMU_PAGE_SHIFT,
+					dma_domain->snoop_id, dma_domain->stash_id,
+					0, wnd->prot);
+		if (ret)
+			pr_debug("Window reconfiguration failed for liodn %d\n", liodn);
+	}
+
+	spin_unlock_irqrestore(&iommu_lock, flags);
+
+	return ret;
+}
+
+static int update_liodn_stash(int liodn, struct fsl_dma_domain *dma_domain,
+				 u32 val)
+{
+	int ret = 0, i;
+	unsigned long flags;
+
+	spin_lock_irqsave(&iommu_lock, flags);
+	if (!dma_domain->win_arr) {
+		pr_debug("Windows not configured, stash destination update failed for liodn %d\n", liodn);
+		spin_unlock_irqrestore(&iommu_lock, flags);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < dma_domain->win_cnt; i++) {
+		ret = pamu_update_paace_stash(liodn, i, val);
+		if (ret) {
+			pr_debug("Failed to update SPAACE %d field for liodn %d\n ", i, liodn);
+			spin_unlock_irqrestore(&iommu_lock, flags);
+			return ret;
+		}
+	}
+
+	spin_unlock_irqrestore(&iommu_lock, flags);
+
+	return ret;
+}
+
+/* Set the geometry parameters for a LIODN */
+static int pamu_set_liodn(int liodn, struct device *dev,
+			   struct fsl_dma_domain *dma_domain,
+			   struct iommu_domain_geometry *geom_attr,
+			   u32 win_cnt)
+{
+	phys_addr_t window_addr, window_size;
+	phys_addr_t subwin_size;
+	int ret = 0, i;
+	u32 omi_index = ~(u32)0;
+	unsigned long flags;
+
+	/*
+	 * Configure the omi_index at the geometry setup time.
+	 * This is a static value which depends on the type of
+	 * device and would not change thereafter.
+	 */
+	get_ome_index(&omi_index, dev);
+
+	window_addr = geom_attr->aperture_start;
+	window_size = dma_domain->geom_size;
+
+	spin_lock_irqsave(&iommu_lock, flags);
+	ret = pamu_disable_liodn(liodn);
+	if (!ret)
+		ret = pamu_config_ppaace(liodn, window_addr, window_size, omi_index,
+					 0, dma_domain->snoop_id,
+					 dma_domain->stash_id, win_cnt, 0);
+	spin_unlock_irqrestore(&iommu_lock, flags);
+	if (ret) {
+		pr_debug("PAMU PAACE configuration failed for liodn %d, win_cnt =%d\n", liodn, win_cnt);
+		return ret;
+	}
+
+	if (win_cnt > 1) {
+		subwin_size = window_size >> ilog2(win_cnt);
+		for (i = 0; i < win_cnt; i++) {
+			spin_lock_irqsave(&iommu_lock, flags);
+			ret = pamu_disable_spaace(liodn, i);
+			if (!ret)
+				ret = pamu_config_spaace(liodn, win_cnt, i,
+							 subwin_size, omi_index,
+							 0, dma_domain->snoop_id,
+							 dma_domain->stash_id,
+							 0, 0);
+			spin_unlock_irqrestore(&iommu_lock, flags);
+			if (ret) {
+				pr_debug("PAMU SPAACE configuration failed for liodn %d\n", liodn);
+				return ret;
+			}
+		}
+	}
+
+	return ret;
+}
+
+static int check_size(u64 size, dma_addr_t iova)
+{
+	/*
+	 * Size must be a power of two and at least be equal
+	 * to PAMU page size.
+	 */
+	if ((size & (size - 1)) || size < PAMU_PAGE_SIZE) {
+		pr_debug("%s: size too small or not a power of two\n", __func__);
+		return -EINVAL;
+	}
+
+	/* iova must be page size aligned*/
+	if (iova & (size - 1)) {
+		pr_debug("%s: address is not aligned with window size\n", __func__);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static struct fsl_dma_domain *iommu_alloc_dma_domain(void)
+{
+	struct fsl_dma_domain *domain;
+
+	domain = kmem_cache_zalloc(fsl_pamu_domain_cache, GFP_KERNEL);
+	if (!domain)
+		return NULL;
+
+	domain->stash_id = ~(u32)0;
+	domain->snoop_id = ~(u32)0;
+	domain->win_cnt = pamu_get_max_subwin_cnt();
+	domain->geom_size = 0;
+
+	INIT_LIST_HEAD(&domain->devices);
+
+	spin_lock_init(&domain->domain_lock);
+
+	return domain;
+}
+
+static void remove_device_ref(struct device_domain_info *info, u32 win_cnt)
+{
+	unsigned long flags;
+
+	list_del(&info->link);
+	spin_lock_irqsave(&iommu_lock, flags);
+	if (win_cnt > 1)
+		pamu_free_subwins(info->liodn);
+	pamu_disable_liodn(info->liodn);
+	spin_unlock_irqrestore(&iommu_lock, flags);
+	spin_lock_irqsave(&device_domain_lock, flags);
+	info->dev->archdata.iommu_domain = NULL;
+	kmem_cache_free(iommu_devinfo_cache, info);
+	spin_unlock_irqrestore(&device_domain_lock, flags);
+}
+
+static void detach_device(struct device *dev, struct fsl_dma_domain *dma_domain)
+{
+	struct device_domain_info *info, *tmp;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dma_domain->domain_lock, flags);
+	/* Remove the device from the domain device list */
+	list_for_each_entry_safe(info, tmp, &dma_domain->devices, link) {
+		if (!dev || (info->dev == dev))
+			remove_device_ref(info, dma_domain->win_cnt);
+	}
+	spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+}
+
+static void attach_device(struct fsl_dma_domain *dma_domain, int liodn, struct device *dev)
+{
+	struct device_domain_info *info, *old_domain_info;
+	unsigned long flags;
+
+	spin_lock_irqsave(&device_domain_lock, flags);
+	/*
+	 * Check here if the device is already attached to domain or not.
+	 * If the device is already attached to a domain detach it.
+	 */
+	old_domain_info = dev->archdata.iommu_domain;
+	if (old_domain_info && old_domain_info->domain != dma_domain) {
+		spin_unlock_irqrestore(&device_domain_lock, flags);
+		detach_device(dev, old_domain_info->domain);
+		spin_lock_irqsave(&device_domain_lock, flags);
+	}
+
+	info = kmem_cache_zalloc(iommu_devinfo_cache, GFP_ATOMIC);
+
+	info->dev = dev;
+	info->liodn = liodn;
+	info->domain = dma_domain;
+
+	list_add(&info->link, &dma_domain->devices);
+	/*
+	 * In case of devices with multiple LIODNs just store
+	 * the info for the first LIODN as all
+	 * LIODNs share the same domain
+	 */
+	if (!dev->archdata.iommu_domain)
+		dev->archdata.iommu_domain = info;
+	spin_unlock_irqrestore(&device_domain_lock, flags);
+
+}
+
+static phys_addr_t fsl_pamu_iova_to_phys(struct iommu_domain *domain,
+					    dma_addr_t iova)
+{
+	struct fsl_dma_domain *dma_domain = domain->priv;
+
+	if ((iova < domain->geometry.aperture_start) ||
+		iova > (domain->geometry.aperture_end))
+		return 0;
+
+	return get_phys_addr(dma_domain, iova);
+}
+
+static int fsl_pamu_domain_has_cap(struct iommu_domain *domain,
+				      unsigned long cap)
+{
+	return cap == IOMMU_CAP_CACHE_COHERENCY;
+}
+
+static void fsl_pamu_domain_destroy(struct iommu_domain *domain)
+{
+	struct fsl_dma_domain *dma_domain = domain->priv;
+
+	domain->priv = NULL;
+
+	/* remove all the devices from the device list */
+	detach_device(NULL, dma_domain);
+
+	dma_domain->enabled = 0;
+	dma_domain->mapped = 0;
+
+	kmem_cache_free(fsl_pamu_domain_cache, dma_domain);
+}
+
+static int fsl_pamu_domain_init(struct iommu_domain *domain)
+{
+	struct fsl_dma_domain *dma_domain;
+
+	dma_domain = iommu_alloc_dma_domain();
+	if (!dma_domain) {
+		pr_debug("dma_domain allocation failed\n");
+		return -ENOMEM;
+	}
+	domain->priv = dma_domain;
+	dma_domain->iommu_domain = domain;
+	/* defaul geometry 64 GB i.e. maximum system address */
+	domain->geometry.aperture_start = 0;
+	domain->geometry.aperture_end = (1ULL << 36) - 1;
+	domain->geometry.force_aperture = true;
+
+	return 0;
+}
+
+/* Configure geometry settings for all LIODNs associated with domain */
+static int pamu_set_domain_geometry(struct fsl_dma_domain *dma_domain,
+				    struct iommu_domain_geometry *geom_attr,
+				    u32 win_cnt)
+{
+	struct device_domain_info *info;
+	int ret = 0;
+
+	list_for_each_entry(info, &dma_domain->devices, link) {
+		ret = pamu_set_liodn(info->liodn, info->dev, dma_domain,
+				      geom_attr, win_cnt);
+		if (ret)
+			break;
+	}
+
+	return ret;
+}
+
+/* Update stash destination for all LIODNs associated with the domain */
+static int update_domain_stash(struct fsl_dma_domain *dma_domain, u32 val)
+{
+	struct device_domain_info *info;
+	int ret = 0;
+
+	list_for_each_entry(info, &dma_domain->devices, link) {
+		ret = update_liodn_stash(info->liodn, dma_domain, val);
+		if (ret)
+			break;
+	}
+
+	return ret;
+}
+
+/* Update domain mappings for all LIODNs associated with the domain */
+static int update_domain_mapping(struct fsl_dma_domain *dma_domain, u32 wnd_nr)
+{
+	struct device_domain_info *info;
+	int ret = 0;
+
+	list_for_each_entry(info, &dma_domain->devices, link) {
+		ret = update_liodn(info->liodn, dma_domain, wnd_nr);
+		if (ret)
+			break;
+	}
+	return ret;
+}
+
+static int disable_domain_win(struct fsl_dma_domain *dma_domain, u32 wnd_nr)
+{
+	struct device_domain_info *info;
+	int ret = 0;
+
+	list_for_each_entry(info, &dma_domain->devices, link) {
+		if (dma_domain->win_cnt == 1 && dma_domain->enabled) {
+			ret = pamu_disable_liodn(info->liodn);
+			if (!ret)
+				dma_domain->enabled = 0;
+		} else {
+			ret = pamu_disable_spaace(info->liodn, wnd_nr);
+		}
+	}
+
+	return ret;
+}
+
+static void fsl_pamu_window_disable(struct iommu_domain *domain, u32 wnd_nr)
+{
+	struct fsl_dma_domain *dma_domain = domain->priv;
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&dma_domain->domain_lock, flags);
+	if (!dma_domain->win_arr) {
+		pr_debug("Number of windows not configured\n");
+		spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+		return;
+	}
+
+	if (wnd_nr >= dma_domain->win_cnt) {
+		pr_debug("Invalid window index\n");
+		spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+		return;
+	}
+
+	if (dma_domain->win_arr[wnd_nr].valid) {
+		ret = disable_domain_win(dma_domain, wnd_nr);
+		if (!ret) {
+			dma_domain->win_arr[wnd_nr].valid = 0;
+			dma_domain->mapped--;
+		}
+	}
+
+	spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+
+}
+
+static int fsl_pamu_window_enable(struct iommu_domain *domain, u32 wnd_nr,
+				  phys_addr_t paddr, u64 size, int prot)
+{
+	struct fsl_dma_domain *dma_domain = domain->priv;
+	struct dma_window *wnd;
+	int pamu_prot = 0;
+	int ret;
+	unsigned long flags;
+	u64 win_size;
+
+	if (prot & IOMMU_READ)
+		pamu_prot |= PAACE_AP_PERMS_QUERY;
+	if (prot & IOMMU_WRITE)
+		pamu_prot |= PAACE_AP_PERMS_UPDATE;
+
+	spin_lock_irqsave(&dma_domain->domain_lock, flags);
+	if (!dma_domain->win_arr) {
+		pr_debug("Number of windows not configured\n");
+		spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+		return -ENODEV;
+	}
+
+	if (wnd_nr >= dma_domain->win_cnt) {
+		pr_debug("Invalid window index\n");
+		spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+		return -EINVAL;
+	}
+
+	win_size = dma_domain->geom_size >> ilog2(dma_domain->win_cnt);
+	if (size > win_size) {
+		pr_debug("Invalid window size \n");
+		spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+		return -EINVAL;
+	}
+
+	if (dma_domain->win_cnt == 1) {
+		if (dma_domain->enabled) {
+			pr_debug("Disable the window before updating the mapping\n");
+			spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+			return -EBUSY;
+		}
+
+		ret = check_size(size, domain->geometry.aperture_start);
+		if (ret) {
+			pr_debug("Aperture start not aligned to the size\n");
+			spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+			return -EINVAL;
+		}
+	}
+
+	wnd = &dma_domain->win_arr[wnd_nr];
+	if (!wnd->valid) {
+		wnd->paddr = paddr;
+		wnd->size = size;
+		wnd->prot = pamu_prot;
+
+		ret = update_domain_mapping(dma_domain, wnd_nr);
+		if (!ret) {
+			wnd->valid = 1;
+			dma_domain->mapped++;
+		}
+	} else {
+		pr_debug("Disable the window before updating the mapping\n");
+		ret = -EBUSY;
+	}
+
+	spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+
+	return ret;
+}
+
+/*
+ * Attach the LIODN to the DMA domain and configure the geometry
+ * and window mappings.
+ */
+static int handle_attach_device(struct fsl_dma_domain *dma_domain,
+				 struct device *dev, const u32 *liodn,
+				 int num)
+{
+	unsigned long flags;
+	struct iommu_domain *domain = dma_domain->iommu_domain;
+	int ret = 0;
+	int i;
+
+	spin_lock_irqsave(&dma_domain->domain_lock, flags);
+	for (i = 0; i < num; i++) {
+
+		/* Ensure that LIODN value is valid */
+		if (liodn[i] >= PAACE_NUMBER_ENTRIES) {
+			pr_debug("Invalid liodn %d, attach device failed for %s\n",
+				liodn[i], dev->of_node->full_name);
+			ret = -EINVAL;
+			break;
+		}
+
+		attach_device(dma_domain, liodn[i], dev);
+		/*
+		 * Check if geometry has already been configured
+		 * for the domain. If yes, set the geometry for
+		 * the LIODN.
+		 */
+		if (dma_domain->win_arr) {
+			u32 win_cnt = dma_domain->win_cnt > 1 ? dma_domain->win_cnt : 0;
+			ret = pamu_set_liodn(liodn[i], dev, dma_domain,
+					      &domain->geometry,
+					      win_cnt);
+			if (ret)
+				break;
+			if (dma_domain->mapped) {
+				/*
+				 * Create window/subwindow mapping for
+				 * the LIODN.
+				 */
+				ret = map_liodn(liodn[i], dma_domain);
+				if (ret)
+					break;
+			}
+		}
+	}
+	spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+
+	return ret;
+}
+
+static int fsl_pamu_attach_device(struct iommu_domain *domain,
+				  struct device *dev)
+{
+	struct fsl_dma_domain *dma_domain = domain->priv;
+	const u32 *liodn;
+	u32 liodn_cnt;
+	int len, ret = 0;
+	struct pci_dev *pdev = NULL;
+	struct pci_controller *pci_ctl;
+
+	/*
+	 * Use LIODN of the PCI controller while attaching a
+	 * PCI device.
+	 */
+	if (dev_is_pci(dev)) {
+		pdev = to_pci_dev(dev);
+		pci_ctl = pci_bus_to_host(pdev->bus);
+		/*
+		 * make dev point to pci controller device
+		 * so we can get the LIODN programmed by
+		 * u-boot.
+		 */
+		dev = pci_ctl->parent;
+	}
+
+	liodn = of_get_property(dev->of_node, "fsl,liodn", &len);
+	if (liodn) {
+		liodn_cnt = len / sizeof(u32);
+		ret = handle_attach_device(dma_domain, dev,
+					 liodn, liodn_cnt);
+	} else {
+		pr_debug("missing fsl,liodn property at %s\n",
+		          dev->of_node->full_name);
+			ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+static void fsl_pamu_detach_device(struct iommu_domain *domain,
+				      struct device *dev)
+{
+	struct fsl_dma_domain *dma_domain = domain->priv;
+	const u32 *prop;
+	int len;
+	struct pci_dev *pdev = NULL;
+	struct pci_controller *pci_ctl;
+
+	/*
+	 * Use LIODN of the PCI controller while detaching a
+	 * PCI device.
+	 */
+	if (dev_is_pci(dev)) {
+		pdev = to_pci_dev(dev);
+		pci_ctl = pci_bus_to_host(pdev->bus);
+		/*
+		 * make dev point to pci controller device
+		 * so we can get the LIODN programmed by
+		 * u-boot.
+		 */
+		dev = pci_ctl->parent;
+	}
+
+	prop = of_get_property(dev->of_node, "fsl,liodn", &len);
+	if (prop)
+		detach_device(dev, dma_domain);
+	else
+		pr_debug("missing fsl,liodn property at %s\n",
+		          dev->of_node->full_name);
+}
+
+static  int configure_domain_geometry(struct iommu_domain *domain, void *data)
+{
+	struct iommu_domain_geometry *geom_attr = data;
+	struct fsl_dma_domain *dma_domain = domain->priv;
+	dma_addr_t geom_size;
+	unsigned long flags;
+
+	geom_size = geom_attr->aperture_end - geom_attr->aperture_start + 1;
+	/*
+	 * Sanity check the geometry size. Also, we do not support
+	 * DMA outside of the geometry.
+	 */
+	if (check_size(geom_size, geom_attr->aperture_start) ||
+		!geom_attr->force_aperture) {
+			pr_debug("Invalid PAMU geometry attributes\n");
+			return -EINVAL;
+		}
+
+	spin_lock_irqsave(&dma_domain->domain_lock, flags);
+	if (dma_domain->enabled) {
+		pr_debug("Can't set geometry attributes as domain is active\n");
+		spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+		return  -EBUSY;
+	}
+
+	/* Copy the domain geometry information */
+	memcpy(&domain->geometry, geom_attr,
+	       sizeof(struct iommu_domain_geometry));
+	dma_domain->geom_size = geom_size;
+
+	spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+
+	return 0;
+}
+
+/* Set the domain stash attribute */
+static int configure_domain_stash(struct fsl_dma_domain *dma_domain, void *data)
+{
+	struct pamu_stash_attribute *stash_attr = data;
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&dma_domain->domain_lock, flags);
+
+	memcpy(&dma_domain->dma_stash, stash_attr,
+		 sizeof(struct pamu_stash_attribute));
+
+	dma_domain->stash_id = get_stash_id(stash_attr->cache,
+					    stash_attr->cpu);
+	if (dma_domain->stash_id == ~(u32)0) {
+		pr_debug("Invalid stash attributes\n");
+		spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+		return -EINVAL;
+	}
+
+	ret = update_domain_stash(dma_domain, dma_domain->stash_id);
+
+	spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+
+	return ret;
+}
+
+/* Configure domain dma state i.e. enable/disable DMA*/
+static int configure_domain_dma_state(struct fsl_dma_domain *dma_domain, bool enable)
+{
+	struct device_domain_info *info;
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&dma_domain->domain_lock, flags);
+
+	if (enable && !dma_domain->mapped) {
+		pr_debug("Can't enable DMA domain without valid mapping\n");
+		spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+		return -ENODEV;
+	}
+
+	dma_domain->enabled = enable;
+	list_for_each_entry(info, &dma_domain->devices,
+				 link) {
+		ret = (enable) ? pamu_enable_liodn(info->liodn) :
+			pamu_disable_liodn(info->liodn);
+		if (ret)
+			pr_debug("Unable to set dma state for liodn %d",
+				 info->liodn);
+	}
+	spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+
+	return 0;
+}
+
+static int fsl_pamu_set_domain_attr(struct iommu_domain *domain,
+				 enum iommu_attr attr_type, void *data)
+{
+	struct fsl_dma_domain *dma_domain = domain->priv;
+	int ret = 0;
+
+
+	switch (attr_type) {
+	case DOMAIN_ATTR_GEOMETRY:
+		ret = configure_domain_geometry(domain, data);
+		break;
+	case DOMAIN_ATTR_FSL_PAMU_STASH:
+		ret = configure_domain_stash(dma_domain, data);
+		break;
+	case DOMAIN_ATTR_FSL_PAMU_ENABLE:
+		ret = configure_domain_dma_state(dma_domain, *(int *)data);
+		break;
+	default:
+		pr_debug("Unsupported attribute type\n");
+		ret = -EINVAL;
+		break;
+	};
+
+	return ret;
+}
+
+static int fsl_pamu_get_domain_attr(struct iommu_domain *domain,
+				 enum iommu_attr attr_type, void *data)
+{
+	struct fsl_dma_domain *dma_domain = domain->priv;
+	int ret = 0;
+
+
+	switch (attr_type) {
+	case DOMAIN_ATTR_FSL_PAMU_STASH:
+		memcpy((struct pamu_stash_attribute *) data, &dma_domain->dma_stash,
+				 sizeof(struct pamu_stash_attribute));
+		break;
+	case DOMAIN_ATTR_FSL_PAMU_ENABLE:
+		*(int *)data = dma_domain->enabled;
+		break;
+	case DOMAIN_ATTR_FSL_PAMUV1:
+		*(int *)data = DOMAIN_ATTR_FSL_PAMUV1;
+		break;
+	default:
+		pr_debug("Unsupported attribute type\n");
+		ret = -EINVAL;
+		break;
+	};
+
+	return ret;
+}
+
+#define REQ_ACS_FLAGS	(PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF)
+
+static struct iommu_group *get_device_iommu_group(struct device *dev)
+{
+	struct iommu_group *group;
+
+	group = iommu_group_get(dev);
+	if (!group)
+		group = iommu_group_alloc();
+
+	return group;
+}
+
+static  bool check_pci_ctl_endpt_part(struct pci_controller *pci_ctl)
+{
+	u32 version;
+
+	/* Check the PCI controller version number by readding BRR1 register */
+	version = in_be32(pci_ctl->cfg_addr + (PCI_FSL_BRR1 >> 2));
+	version &= PCI_FSL_BRR1_VER;
+	/* If PCI controller version is >= 0x204 we can partition endpoints*/
+	if (version >= 0x204)
+		return 1;
+
+	return 0;
+}
+
+/* Get iommu group information from peer devices or devices on the parent bus */
+static struct iommu_group *get_shared_pci_device_group(struct pci_dev *pdev)
+{
+	struct pci_dev *tmp;
+	struct iommu_group *group;
+	struct pci_bus *bus = pdev->bus;
+
+	/*
+	 * Traverese the pci bus device list to get
+	 * the shared iommu group.
+	 */
+	while (bus) {
+		list_for_each_entry(tmp, &bus->devices, bus_list) {
+			if (tmp == pdev)
+				continue;
+			group = iommu_group_get(&tmp->dev);
+			if (group)
+				return group;
+		}
+
+		bus = bus->parent;
+	}
+
+	return NULL;
+}
+
+static struct iommu_group *get_pci_device_group(struct pci_dev *pdev)
+{
+	struct pci_controller *pci_ctl;
+	bool pci_endpt_partioning;
+	struct iommu_group *group = NULL;
+	struct pci_dev *bridge, *dma_pdev = NULL;
+
+	pci_ctl = pci_bus_to_host(pdev->bus);
+	pci_endpt_partioning = check_pci_ctl_endpt_part(pci_ctl);
+	/* We can partition PCIe devices so assign device group to the device */
+	if (pci_endpt_partioning) {
+		bridge = pci_find_upstream_pcie_bridge(pdev);
+		if (bridge) {
+			if (pci_is_pcie(bridge))
+				dma_pdev = pci_get_domain_bus_and_slot(
+						pci_domain_nr(pdev->bus),
+						bridge->subordinate->number, 0);
+			if (!dma_pdev)
+				dma_pdev = pci_dev_get(bridge);
+		} else
+			dma_pdev = pci_dev_get(pdev);
+
+		/* Account for quirked devices */
+		swap_pci_ref(&dma_pdev, pci_get_dma_source(dma_pdev));
+
+		/*
+		 * If it's a multifunction device that does not support our
+		 * required ACS flags, add to the same group as lowest numbered
+		 * function that also does not suport the required ACS flags.
+		 */
+		if (dma_pdev->multifunction &&
+		    !pci_acs_enabled(dma_pdev, REQ_ACS_FLAGS)) {
+			u8 i, slot = PCI_SLOT(dma_pdev->devfn);
+
+			for (i = 0; i < 8; i++) {
+				struct pci_dev *tmp;
+
+				tmp = pci_get_slot(dma_pdev->bus, PCI_DEVFN(slot, i));
+				if (!tmp)
+					continue;
+
+				if (!pci_acs_enabled(tmp, REQ_ACS_FLAGS)) {
+					swap_pci_ref(&dma_pdev, tmp);
+					break;
+				}
+				pci_dev_put(tmp);
+			}
+		}
+
+		/*
+		 * Devices on the root bus go through the iommu.  If that's not us,
+		 * find the next upstream device and test ACS up to the root bus.
+		 * Finding the next device may require skipping virtual buses.
+		 */
+		while (!pci_is_root_bus(dma_pdev->bus)) {
+			struct pci_bus *bus = dma_pdev->bus;
+
+			while (!bus->self) {
+				if (!pci_is_root_bus(bus))
+					bus = bus->parent;
+				else
+					goto root_bus;
+			}
+
+			if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS))
+				break;
+
+			swap_pci_ref(&dma_pdev, pci_dev_get(bus->self));
+		}
+
+root_bus:
+		group = get_device_iommu_group(&dma_pdev->dev);
+		pci_dev_put(dma_pdev);
+		/*
+		 * PCIe controller is not a paritionable entity
+		 * free the controller device iommu_group.
+		 */
+		if (pci_ctl->parent->iommu_group)
+			iommu_group_remove_device(pci_ctl->parent);
+	} else {
+		/*
+		 * All devices connected to the controller will share the
+		 * PCI controllers device group. If this is the first
+		 * device to be probed for the pci controller, copy the
+		 * device group information from the PCI controller device
+		 * node and remove the PCI controller iommu group.
+		 * For subsequent devices, the iommu group information can
+		 * be obtained from sibling devices (i.e. from the bus_devices
+		 * link list).
+		 */
+		if (pci_ctl->parent->iommu_group) {
+			group = get_device_iommu_group(pci_ctl->parent);
+			iommu_group_remove_device(pci_ctl->parent);
+		} else
+			group = get_shared_pci_device_group(pdev);
+	}
+
+	if (!group)
+		group = ERR_PTR(-ENODEV);
+
+	return group;
+}
+
+static int fsl_pamu_add_device(struct device *dev)
+{
+	struct iommu_group *group = ERR_PTR(-ENODEV);
+	struct pci_dev *pdev;
+	const u32 *prop;
+	int ret, len;
+
+	/*
+	 * For platform devices we allocate a separate group for
+	 * each of the devices.
+	 */
+	if (dev_is_pci(dev)) {
+		pdev = to_pci_dev(dev);
+		/* Don't create device groups for virtual PCI bridges */
+		if (pdev->subordinate)
+			return 0;
+
+		group = get_pci_device_group(pdev);
+
+	} else {
+		prop = of_get_property(dev->of_node, "fsl,liodn", &len);
+		if (prop)
+			group = get_device_iommu_group(dev);
+	}
+
+	if (IS_ERR(group))
+		return PTR_ERR(group);
+
+	ret = iommu_group_add_device(group, dev);
+
+	iommu_group_put(group);
+	return ret;
+}
+
+static void fsl_pamu_remove_device(struct device *dev)
+{
+	iommu_group_remove_device(dev);
+}
+
+static int fsl_pamu_set_windows(struct iommu_domain *domain, u32 w_count)
+{
+	struct fsl_dma_domain *dma_domain = domain->priv;
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&dma_domain->domain_lock, flags);
+	/* Ensure domain is inactive i.e. DMA should be disabled for the domain */
+	if (dma_domain->enabled) {
+		pr_debug("Can't set geometry attributes as domain is active\n");
+		spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+		return  -EBUSY;
+	}
+
+	/* Ensure that the geometry has been set for the domain */
+	if (!dma_domain->geom_size) {
+		pr_debug("Please configure geometry before setting the number of windows\n");
+		spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+		return -EINVAL;
+	}
+
+	/*
+	 * Ensure we have valid window count i.e. it should be less than
+	 * maximum permissible limit and should be a power of two.
+	 */
+	if (w_count > pamu_get_max_subwin_cnt() || !is_power_of_2(w_count)) {
+		pr_debug("Invalid window count\n");
+		spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+		return -EINVAL;
+	}
+
+	ret = pamu_set_domain_geometry(dma_domain, &domain->geometry,
+				((w_count > 1) ? w_count : 0));
+	if (!ret) {
+		if (dma_domain->win_arr)
+			kfree(dma_domain->win_arr);
+		dma_domain->win_arr = kzalloc(sizeof(struct dma_window) *
+							  w_count, GFP_ATOMIC);
+		if (!dma_domain->win_arr) {
+			spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+			return -ENOMEM;
+		}
+		dma_domain->win_cnt = w_count;
+	}
+	spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+
+	return ret;
+}
+
+static u32 fsl_pamu_get_windows(struct iommu_domain *domain)
+{
+	struct fsl_dma_domain *dma_domain = domain->priv;
+
+	return dma_domain->win_cnt;
+}
+
+static struct iommu_ops fsl_pamu_ops = {
+	.domain_init	= fsl_pamu_domain_init,
+	.domain_destroy = fsl_pamu_domain_destroy,
+	.attach_dev	= fsl_pamu_attach_device,
+	.detach_dev	= fsl_pamu_detach_device,
+	.domain_window_enable = fsl_pamu_window_enable,
+	.domain_window_disable = fsl_pamu_window_disable,
+	.domain_get_windows = fsl_pamu_get_windows,
+	.domain_set_windows = fsl_pamu_set_windows,
+	.iova_to_phys	= fsl_pamu_iova_to_phys,
+	.domain_has_cap = fsl_pamu_domain_has_cap,
+	.domain_set_attr = fsl_pamu_set_domain_attr,
+	.domain_get_attr = fsl_pamu_get_domain_attr,
+	.add_device	= fsl_pamu_add_device,
+	.remove_device	= fsl_pamu_remove_device,
+};
+
+int pamu_domain_init()
+{
+	int ret = 0;
+
+	ret = iommu_init_mempool();
+	if (ret)
+		return ret;
+
+	bus_set_iommu(&platform_bus_type, &fsl_pamu_ops);
+	bus_set_iommu(&pci_bus_type, &fsl_pamu_ops);
+
+	return ret;
+}
diff --git a/drivers/iommu/fsl_pamu_domain.h b/drivers/iommu/fsl_pamu_domain.h
new file mode 100644
index 00000000000..c90293f9970
--- /dev/null
+++ b/drivers/iommu/fsl_pamu_domain.h
@@ -0,0 +1,85 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright (C) 2013 Freescale Semiconductor, Inc.
+ *
+ */
+
+#ifndef __FSL_PAMU_DOMAIN_H
+#define __FSL_PAMU_DOMAIN_H
+
+#include "fsl_pamu.h"
+
+struct dma_window {
+	phys_addr_t paddr;
+	u64 size;
+	int valid;
+	int prot;
+};
+
+struct fsl_dma_domain {
+	/*
+	 * Indicates the geometry size for the domain.
+	 * This would be set when the geometry is
+	 * configured for the domain.
+	 */
+	dma_addr_t			geom_size;
+	/*
+	 * Number of windows assocaited with this domain.
+	 * During domain initialization, it is set to the
+	 * the maximum number of subwindows allowed for a LIODN.
+	 * Minimum value for this is 1 indicating a single PAMU
+	 * window, without any sub windows. Value can be set/
+	 * queried by set_attr/get_attr API for DOMAIN_ATTR_WINDOWS.
+	 * Value can only be set once the geometry has been configured.
+	 */
+	u32				win_cnt;
+	/*
+	 * win_arr contains information of the configured
+	 * windows for a domain. This is allocated only
+	 * when the number of windows for the domain are
+	 * set.
+	 */
+	struct dma_window		*win_arr;
+	/* list of devices associated with the domain */
+	struct list_head		devices;
+	/* dma_domain states:
+	 * mapped - A particular mapping has been created
+	 * within the configured geometry.
+	 * enabled - DMA has been enabled for the given
+	 * domain. This translates to setting of the
+	 * valid bit for the primary PAACE in the PAMU
+	 * PAACT table. Domain geometry should be set and
+	 * it must have a valid mapping before DMA can be
+	 * enabled for it.
+	 *
+	 */
+	int				mapped;
+	int				enabled;
+	/* stash_id obtained from the stash attribute details */
+	u32				stash_id;
+	struct pamu_stash_attribute	dma_stash;
+	u32				snoop_id;
+	struct iommu_domain		*iommu_domain;
+	spinlock_t			domain_lock;
+};
+
+/* domain-device relationship */
+struct device_domain_info {
+	struct list_head link;	/* link to domain siblings */
+	struct device *dev;
+	u32 liodn;
+	struct fsl_dma_domain *domain; /* pointer to domain */
+};
+#endif  /* __FSL_PAMU_DOMAIN_H */
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index c9c6053198d..51b6b77dc3e 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006, Intel Corporation.
+ * Copyright © 2006-2014 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -10,15 +10,11 @@
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
  *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- * Copyright (C) 2006-2008 Intel Corporation
- * Author: Ashok Raj <ashok.raj@intel.com>
- * Author: Shaohua Li <shaohua.li@intel.com>
- * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
- * Author: Fenghua Yu <fenghua.yu@intel.com>
+ * Authors: David Woodhouse <dwmw2@infradead.org>,
+ *          Ashok Raj <ashok.raj@intel.com>,
+ *          Shaohua Li <shaohua.li@intel.com>,
+ *          Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
+ *          Fenghua Yu <fenghua.yu@intel.com>
  */
 
 #include <linux/init.h>
@@ -33,6 +29,7 @@
 #include <linux/dmar.h>
 #include <linux/dma-mapping.h>
 #include <linux/mempool.h>
+#include <linux/memory.h>
 #include <linux/timer.h>
 #include <linux/iova.h>
 #include <linux/iommu.h>
@@ -42,14 +39,17 @@
 #include <linux/dmi.h>
 #include <linux/pci-ats.h>
 #include <linux/memblock.h>
+#include <linux/dma-contiguous.h>
+#include <asm/irq_remapping.h>
 #include <asm/cacheflush.h>
 #include <asm/iommu.h>
 
+#include "irq_remapping.h"
+#include "pci.h"
+
 #define ROOT_SIZE		VTD_PAGE_SIZE
 #define CONTEXT_SIZE		VTD_PAGE_SIZE
 
-#define IS_BRIDGE_HOST_DEVICE(pdev) \
-			    ((pdev->class >> 8) == PCI_CLASS_BRIDGE_HOST)
 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
@@ -61,6 +61,7 @@
 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
 
 #define MAX_AGAW_WIDTH 64
+#define MAX_AGAW_PFN_WIDTH	(MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
 
 #define __DOMAIN_MAX_PFN(gaw)  ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
 #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
@@ -104,12 +105,12 @@ static inline int agaw_to_level(int agaw)
 
 static inline int agaw_to_width(int agaw)
 {
-	return 30 + agaw * LEVEL_STRIDE;
+	return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
 }
 
 static inline int width_to_agaw(int width)
 {
-	return (width - 30) / LEVEL_STRIDE;
+	return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
 }
 
 static inline unsigned int level_to_offset_bits(int level)
@@ -139,7 +140,7 @@ static inline unsigned long align_to_level(unsigned long pfn, int level)
 
 static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
 {
-	return  1 << ((lvl - 1) * LEVEL_STRIDE);
+	return  1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
 }
 
 /* VT-d pages must always be _smaller_ than MM pages. Otherwise things
@@ -286,26 +287,6 @@ static inline void dma_clear_pte(struct dma_pte *pte)
 	pte->val = 0;
 }
 
-static inline void dma_set_pte_readable(struct dma_pte *pte)
-{
-	pte->val |= DMA_PTE_READ;
-}
-
-static inline void dma_set_pte_writable(struct dma_pte *pte)
-{
-	pte->val |= DMA_PTE_WRITE;
-}
-
-static inline void dma_set_pte_snp(struct dma_pte *pte)
-{
-	pte->val |= DMA_PTE_SNP;
-}
-
-static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
-{
-	pte->val = (pte->val & ~3) | (prot & 3);
-}
-
 static inline u64 dma_pte_addr(struct dma_pte *pte)
 {
 #ifdef CONFIG_64BIT
@@ -316,11 +297,6 @@ static inline u64 dma_pte_addr(struct dma_pte *pte)
 #endif
 }
 
-static inline void dma_set_pte_pfn(struct dma_pte *pte, unsigned long pfn)
-{
-	pte->val |= (uint64_t)pfn << VTD_PAGE_SHIFT;
-}
-
 static inline bool dma_pte_present(struct dma_pte *pte)
 {
 	return (pte->val & 3) != 0;
@@ -356,10 +332,18 @@ static int hw_pass_through = 1;
 /* si_domain contains mulitple devices */
 #define DOMAIN_FLAG_STATIC_IDENTITY	(1 << 2)
 
+/* define the limit of IOMMUs supported in each domain */
+#ifdef	CONFIG_X86
+# define	IOMMU_UNITS_SUPPORTED	MAX_IO_APICS
+#else
+# define	IOMMU_UNITS_SUPPORTED	64
+#endif
+
 struct dmar_domain {
 	int	id;			/* domain id */
 	int	nid;			/* node id */
-	unsigned long iommu_bmp;	/* bitmap of iommus this domain uses*/
+	DECLARE_BITMAP(iommu_bmp, IOMMU_UNITS_SUPPORTED);
+					/* bitmap of iommus this domain uses*/
 
 	struct list_head devices; 	/* all devices' list */
 	struct iova_domain iovad;	/* iova's that belong to this domain */
@@ -386,23 +370,46 @@ struct dmar_domain {
 struct device_domain_info {
 	struct list_head link;	/* link to domain siblings */
 	struct list_head global; /* link to global list */
-	int segment;		/* PCI domain */
 	u8 bus;			/* PCI bus number */
 	u8 devfn;		/* PCI devfn number */
-	struct pci_dev *dev; /* it's NULL for PCIe-to-PCI bridge */
+	struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
 	struct intel_iommu *iommu; /* IOMMU used by this device */
 	struct dmar_domain *domain; /* pointer to domain */
 };
 
+struct dmar_rmrr_unit {
+	struct list_head list;		/* list of rmrr units	*/
+	struct acpi_dmar_header *hdr;	/* ACPI header		*/
+	u64	base_address;		/* reserved base address*/
+	u64	end_address;		/* reserved end address */
+	struct dmar_dev_scope *devices;	/* target devices */
+	int	devices_cnt;		/* target device count */
+};
+
+struct dmar_atsr_unit {
+	struct list_head list;		/* list of ATSR units */
+	struct acpi_dmar_header *hdr;	/* ACPI header */
+	struct dmar_dev_scope *devices;	/* target devices */
+	int devices_cnt;		/* target device count */
+	u8 include_all:1;		/* include all ports */
+};
+
+static LIST_HEAD(dmar_atsr_units);
+static LIST_HEAD(dmar_rmrr_units);
+
+#define for_each_rmrr_units(rmrr) \
+	list_for_each_entry(rmrr, &dmar_rmrr_units, list)
+
 static void flush_unmaps_timeout(unsigned long data);
 
-DEFINE_TIMER(unmap_timer,  flush_unmaps_timeout, 0, 0);
+static DEFINE_TIMER(unmap_timer,  flush_unmaps_timeout, 0, 0);
 
 #define HIGH_WATER_MARK 250
 struct deferred_flush_tables {
 	int next;
 	struct iova *iova[HIGH_WATER_MARK];
 	struct dmar_domain *domain[HIGH_WATER_MARK];
+	struct page *freelist[HIGH_WATER_MARK];
 };
 
 static struct deferred_flush_tables *deferred_flush;
@@ -416,7 +423,12 @@ static LIST_HEAD(unmaps_to_do);
 static int timer_on;
 static long list_size;
 
+static void domain_exit(struct dmar_domain *domain);
 static void domain_remove_dev_info(struct dmar_domain *domain);
+static void domain_remove_one_dev_info(struct dmar_domain *domain,
+				       struct device *dev);
+static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
+					   struct device *dev);
 
 #ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
 int dmar_disabled = 0;
@@ -571,7 +583,7 @@ static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
 	BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
 	BUG_ON(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY);
 
-	iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
+	iommu_id = find_first_bit(domain->iommu_bmp, g_num_of_iommus);
 	if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
 		return NULL;
 
@@ -580,16 +592,31 @@ static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
 
 static void domain_update_iommu_coherency(struct dmar_domain *domain)
 {
-	int i;
+	struct dmar_drhd_unit *drhd;
+	struct intel_iommu *iommu;
+	int i, found = 0;
 
 	domain->iommu_coherency = 1;
 
-	for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) {
+	for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
+		found = 1;
 		if (!ecap_coherent(g_iommus[i]->ecap)) {
 			domain->iommu_coherency = 0;
 			break;
 		}
 	}
+	if (found)
+		return;
+
+	/* No hardware attached; use lowest common denominator */
+	rcu_read_lock();
+	for_each_active_iommu(iommu, drhd) {
+		if (!ecap_coherent(iommu->ecap)) {
+			domain->iommu_coherency = 0;
+			break;
+		}
+	}
+	rcu_read_unlock();
 }
 
 static void domain_update_iommu_snooping(struct dmar_domain *domain)
@@ -598,7 +625,7 @@ static void domain_update_iommu_snooping(struct dmar_domain *domain)
 
 	domain->iommu_snooping = 1;
 
-	for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) {
+	for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
 		if (!ecap_sc_support(g_iommus[i]->ecap)) {
 			domain->iommu_snooping = 0;
 			break;
@@ -618,12 +645,15 @@ static void domain_update_iommu_superpage(struct dmar_domain *domain)
 	}
 
 	/* set iommu_superpage to the smallest common denominator */
+	rcu_read_lock();
 	for_each_active_iommu(iommu, drhd) {
 		mask &= cap_super_page_val(iommu->cap);
 		if (!mask) {
 			break;
 		}
 	}
+	rcu_read_unlock();
+
 	domain->iommu_superpage = fls(mask);
 }
 
@@ -635,34 +665,56 @@ static void domain_update_iommu_cap(struct dmar_domain *domain)
 	domain_update_iommu_superpage(domain);
 }
 
-static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn)
+static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
 {
 	struct dmar_drhd_unit *drhd = NULL;
+	struct intel_iommu *iommu;
+	struct device *tmp;
+	struct pci_dev *ptmp, *pdev = NULL;
+	u16 segment;
 	int i;
 
-	for_each_drhd_unit(drhd) {
-		if (drhd->ignored)
-			continue;
-		if (segment != drhd->segment)
+	if (dev_is_pci(dev)) {
+		pdev = to_pci_dev(dev);
+		segment = pci_domain_nr(pdev->bus);
+	} else if (ACPI_COMPANION(dev))
+		dev = &ACPI_COMPANION(dev)->dev;
+
+	rcu_read_lock();
+	for_each_active_iommu(iommu, drhd) {
+		if (pdev && segment != drhd->segment)
 			continue;
 
-		for (i = 0; i < drhd->devices_cnt; i++) {
-			if (drhd->devices[i] &&
-			    drhd->devices[i]->bus->number == bus &&
-			    drhd->devices[i]->devfn == devfn)
-				return drhd->iommu;
-			if (drhd->devices[i] &&
-			    drhd->devices[i]->subordinate &&
-			    drhd->devices[i]->subordinate->number <= bus &&
-			    drhd->devices[i]->subordinate->subordinate >= bus)
-				return drhd->iommu;
+		for_each_active_dev_scope(drhd->devices,
+					  drhd->devices_cnt, i, tmp) {
+			if (tmp == dev) {
+				*bus = drhd->devices[i].bus;
+				*devfn = drhd->devices[i].devfn;
+				goto out;
+			}
+
+			if (!pdev || !dev_is_pci(tmp))
+				continue;
+
+			ptmp = to_pci_dev(tmp);
+			if (ptmp->subordinate &&
+			    ptmp->subordinate->number <= pdev->bus->number &&
+			    ptmp->subordinate->busn_res.end >= pdev->bus->number)
+				goto got_pdev;
 		}
 
-		if (drhd->include_all)
-			return drhd->iommu;
+		if (pdev && drhd->include_all) {
+		got_pdev:
+			*bus = pdev->bus->number;
+			*devfn = pdev->devfn;
+			goto out;
+		}
 	}
+	iommu = NULL;
+ out:
+	rcu_read_unlock();
 
-	return NULL;
+	return iommu;
 }
 
 static void domain_flush_cache(struct dmar_domain *domain,
@@ -762,7 +814,7 @@ out:
 }
 
 static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
-				      unsigned long pfn, int target_level)
+				      unsigned long pfn, int *target_level)
 {
 	int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
 	struct dma_pte *parent, *pte = NULL;
@@ -770,17 +822,21 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
 	int offset;
 
 	BUG_ON(!domain->pgd);
-	BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width);
+
+	if (addr_width < BITS_PER_LONG && pfn >> addr_width)
+		/* Address beyond IOMMU's addressing capabilities. */
+		return NULL;
+
 	parent = domain->pgd;
 
-	while (level > 0) {
+	while (1) {
 		void *tmp_page;
 
 		offset = pfn_level_offset(pfn, level);
 		pte = &parent[offset];
-		if (!target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
+		if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
 			break;
-		if (level == target_level)
+		if (level == *target_level)
 			break;
 
 		if (!dma_pte_present(pte)) {
@@ -801,10 +857,16 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
 				domain_flush_cache(domain, pte, sizeof(*pte));
 			}
 		}
+		if (level == 1)
+			break;
+
 		parent = phys_to_virt(dma_pte_addr(pte));
 		level--;
 	}
 
+	if (!*target_level)
+		*target_level = level;
+
 	return pte;
 }
 
@@ -842,14 +904,13 @@ static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
 }
 
 /* clear last level pte, a tlb flush should be followed */
-static int dma_pte_clear_range(struct dmar_domain *domain,
+static void dma_pte_clear_range(struct dmar_domain *domain,
 				unsigned long start_pfn,
 				unsigned long last_pfn)
 {
 	int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
 	unsigned int large_page = 1;
 	struct dma_pte *first_pte, *pte;
-	int order;
 
 	BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
 	BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
@@ -873,9 +934,39 @@ static int dma_pte_clear_range(struct dmar_domain *domain,
 				   (void *)pte - (void *)first_pte);
 
 	} while (start_pfn && start_pfn <= last_pfn);
+}
+
+static void dma_pte_free_level(struct dmar_domain *domain, int level,
+			       struct dma_pte *pte, unsigned long pfn,
+			       unsigned long start_pfn, unsigned long last_pfn)
+{
+	pfn = max(start_pfn, pfn);
+	pte = &pte[pfn_level_offset(pfn, level)];
+
+	do {
+		unsigned long level_pfn;
+		struct dma_pte *level_pte;
+
+		if (!dma_pte_present(pte) || dma_pte_superpage(pte))
+			goto next;
+
+		level_pfn = pfn & level_mask(level - 1);
+		level_pte = phys_to_virt(dma_pte_addr(pte));
 
-	order = (large_page - 1) * 9;
-	return order;
+		if (level > 2)
+			dma_pte_free_level(domain, level - 1, level_pte,
+					   level_pfn, start_pfn, last_pfn);
+
+		/* If range covers entire pagetable, free it */
+		if (!(start_pfn > level_pfn ||
+		      last_pfn < level_pfn + level_size(level) - 1)) {
+			dma_clear_pte(pte);
+			domain_flush_cache(domain, pte, sizeof(*pte));
+			free_pgtable_page(level_pte);
+		}
+next:
+		pfn += level_size(level);
+	} while (!first_pte_in_page(++pte) && pfn <= last_pfn);
 }
 
 /* free page table pages. last level pte should already be cleared */
@@ -884,55 +975,139 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain,
 				   unsigned long last_pfn)
 {
 	int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
-	struct dma_pte *first_pte, *pte;
-	int total = agaw_to_level(domain->agaw);
-	int level;
-	unsigned long tmp;
-	int large_page = 2;
 
 	BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
 	BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
 	BUG_ON(start_pfn > last_pfn);
 
 	/* We don't need lock here; nobody else touches the iova range */
-	level = 2;
-	while (level <= total) {
-		tmp = align_to_level(start_pfn, level);
+	dma_pte_free_level(domain, agaw_to_level(domain->agaw),
+			   domain->pgd, 0, start_pfn, last_pfn);
 
-		/* If we can't even clear one PTE at this level, we're done */
-		if (tmp + level_size(level) - 1 > last_pfn)
-			return;
+	/* free pgd */
+	if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
+		free_pgtable_page(domain->pgd);
+		domain->pgd = NULL;
+	}
+}
 
-		do {
-			large_page = level;
-			first_pte = pte = dma_pfn_level_pte(domain, tmp, level, &large_page);
-			if (large_page > level)
-				level = large_page + 1;
-			if (!pte) {
-				tmp = align_to_level(tmp + 1, level + 1);
-				continue;
-			}
-			do {
-				if (dma_pte_present(pte)) {
-					free_pgtable_page(phys_to_virt(dma_pte_addr(pte)));
-					dma_clear_pte(pte);
-				}
-				pte++;
-				tmp += level_size(level);
-			} while (!first_pte_in_page(pte) &&
-				 tmp + level_size(level) - 1 <= last_pfn);
+/* When a page at a given level is being unlinked from its parent, we don't
+   need to *modify* it at all. All we need to do is make a list of all the
+   pages which can be freed just as soon as we've flushed the IOTLB and we
+   know the hardware page-walk will no longer touch them.
+   The 'pte' argument is the *parent* PTE, pointing to the page that is to
+   be freed. */
+static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
+					    int level, struct dma_pte *pte,
+					    struct page *freelist)
+{
+	struct page *pg;
+
+	pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
+	pg->freelist = freelist;
+	freelist = pg;
+
+	if (level == 1)
+		return freelist;
+
+	pte = page_address(pg);
+	do {
+		if (dma_pte_present(pte) && !dma_pte_superpage(pte))
+			freelist = dma_pte_list_pagetables(domain, level - 1,
+							   pte, freelist);
+		pte++;
+	} while (!first_pte_in_page(pte));
+
+	return freelist;
+}
+
+static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
+					struct dma_pte *pte, unsigned long pfn,
+					unsigned long start_pfn,
+					unsigned long last_pfn,
+					struct page *freelist)
+{
+	struct dma_pte *first_pte = NULL, *last_pte = NULL;
+
+	pfn = max(start_pfn, pfn);
+	pte = &pte[pfn_level_offset(pfn, level)];
+
+	do {
+		unsigned long level_pfn;
+
+		if (!dma_pte_present(pte))
+			goto next;
+
+		level_pfn = pfn & level_mask(level);
+
+		/* If range covers entire pagetable, free it */
+		if (start_pfn <= level_pfn &&
+		    last_pfn >= level_pfn + level_size(level) - 1) {
+			/* These suborbinate page tables are going away entirely. Don't
+			   bother to clear them; we're just going to *free* them. */
+			if (level > 1 && !dma_pte_superpage(pte))
+				freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
+
+			dma_clear_pte(pte);
+			if (!first_pte)
+				first_pte = pte;
+			last_pte = pte;
+		} else if (level > 1) {
+			/* Recurse down into a level that isn't *entirely* obsolete */
+			freelist = dma_pte_clear_level(domain, level - 1,
+						       phys_to_virt(dma_pte_addr(pte)),
+						       level_pfn, start_pfn, last_pfn,
+						       freelist);
+		}
+next:
+		pfn += level_size(level);
+	} while (!first_pte_in_page(++pte) && pfn <= last_pfn);
+
+	if (first_pte)
+		domain_flush_cache(domain, first_pte,
+				   (void *)++last_pte - (void *)first_pte);
+
+	return freelist;
+}
+
+/* We can't just free the pages because the IOMMU may still be walking
+   the page tables, and may have cached the intermediate levels. The
+   pages can only be freed after the IOTLB flush has been done. */
+struct page *domain_unmap(struct dmar_domain *domain,
+			  unsigned long start_pfn,
+			  unsigned long last_pfn)
+{
+	int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
+	struct page *freelist = NULL;
+
+	BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
+	BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
+	BUG_ON(start_pfn > last_pfn);
+
+	/* we don't need lock here; nobody else touches the iova range */
+	freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
+				       domain->pgd, 0, start_pfn, last_pfn, NULL);
 
-			domain_flush_cache(domain, first_pte,
-					   (void *)pte - (void *)first_pte);
-			
-		} while (tmp && tmp + level_size(level) - 1 <= last_pfn);
-		level++;
-	}
 	/* free pgd */
 	if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
-		free_pgtable_page(domain->pgd);
+		struct page *pgd_page = virt_to_page(domain->pgd);
+		pgd_page->freelist = freelist;
+		freelist = pgd_page;
+
 		domain->pgd = NULL;
 	}
+
+	return freelist;
+}
+
+void dma_free_pagelist(struct page *freelist)
+{
+	struct page *pg;
+
+	while ((pg = freelist)) {
+		freelist = pg->freelist;
+		free_pgtable_page(page_address(pg));
+	}
 }
 
 /* iommu handling */
@@ -1044,7 +1219,7 @@ static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
 		break;
 	case DMA_TLB_PSI_FLUSH:
 		val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
-		/* Note: always flush non-leaf currently */
+		/* IH bit is passed in as part of address */
 		val_iva = size_order | addr;
 		break;
 	default:
@@ -1083,13 +1258,14 @@ static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
 			(unsigned long long)DMA_TLB_IAIG(val));
 }
 
-static struct device_domain_info *iommu_support_dev_iotlb(
-	struct dmar_domain *domain, int segment, u8 bus, u8 devfn)
+static struct device_domain_info *
+iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
+			 u8 bus, u8 devfn)
 {
 	int found = 0;
 	unsigned long flags;
 	struct device_domain_info *info;
-	struct intel_iommu *iommu = device_to_iommu(segment, bus, devfn);
+	struct pci_dev *pdev;
 
 	if (!ecap_dev_iotlb_support(iommu->ecap))
 		return NULL;
@@ -1105,34 +1281,35 @@ static struct device_domain_info *iommu_support_dev_iotlb(
 		}
 	spin_unlock_irqrestore(&device_domain_lock, flags);
 
-	if (!found || !info->dev)
+	if (!found || !info->dev || !dev_is_pci(info->dev))
 		return NULL;
 
-	if (!pci_find_ext_capability(info->dev, PCI_EXT_CAP_ID_ATS))
-		return NULL;
+	pdev = to_pci_dev(info->dev);
 
-	if (!dmar_find_matched_atsr_unit(info->dev))
+	if (!pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS))
 		return NULL;
 
-	info->iommu = iommu;
+	if (!dmar_find_matched_atsr_unit(pdev))
+		return NULL;
 
 	return info;
 }
 
 static void iommu_enable_dev_iotlb(struct device_domain_info *info)
 {
-	if (!info)
+	if (!info || !dev_is_pci(info->dev))
 		return;
 
-	pci_enable_ats(info->dev, VTD_PAGE_SHIFT);
+	pci_enable_ats(to_pci_dev(info->dev), VTD_PAGE_SHIFT);
 }
 
 static void iommu_disable_dev_iotlb(struct device_domain_info *info)
 {
-	if (!info->dev || !pci_ats_enabled(info->dev))
+	if (!info->dev || !dev_is_pci(info->dev) ||
+	    !pci_ats_enabled(to_pci_dev(info->dev)))
 		return;
 
-	pci_disable_ats(info->dev);
+	pci_disable_ats(to_pci_dev(info->dev));
 }
 
 static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
@@ -1144,24 +1321,31 @@ static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
 
 	spin_lock_irqsave(&device_domain_lock, flags);
 	list_for_each_entry(info, &domain->devices, link) {
-		if (!info->dev || !pci_ats_enabled(info->dev))
+		struct pci_dev *pdev;
+		if (!info->dev || !dev_is_pci(info->dev))
+			continue;
+
+		pdev = to_pci_dev(info->dev);
+		if (!pci_ats_enabled(pdev))
 			continue;
 
 		sid = info->bus << 8 | info->devfn;
-		qdep = pci_ats_queue_depth(info->dev);
+		qdep = pci_ats_queue_depth(pdev);
 		qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
 	}
 	spin_unlock_irqrestore(&device_domain_lock, flags);
 }
 
 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
-				  unsigned long pfn, unsigned int pages, int map)
+				  unsigned long pfn, unsigned int pages, int ih, int map)
 {
 	unsigned int mask = ilog2(__roundup_pow_of_two(pages));
 	uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
 
 	BUG_ON(pages == 0);
 
+	if (ih)
+		ih = 1 << 6;
 	/*
 	 * Fallback to domain selective flush if no PSI support or the size is
 	 * too big.
@@ -1172,7 +1356,7 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
 		iommu->flush.flush_iotlb(iommu, did, 0, 0,
 						DMA_TLB_DSI_FLUSH);
 	else
-		iommu->flush.flush_iotlb(iommu, did, addr, mask,
+		iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
 						DMA_TLB_PSI_FLUSH);
 
 	/*
@@ -1241,8 +1425,8 @@ static int iommu_init_domains(struct intel_iommu *iommu)
 	unsigned long nlongs;
 
 	ndomains = cap_ndoms(iommu->cap);
-	pr_debug("IOMMU %d: Number of Domains supportd <%ld>\n", iommu->seq_id,
-			ndomains);
+	pr_debug("IOMMU%d: Number of Domains supported <%ld>\n",
+		 iommu->seq_id, ndomains);
 	nlongs = BITS_TO_LONGS(ndomains);
 
 	spin_lock_init(&iommu->lock);
@@ -1252,13 +1436,17 @@ static int iommu_init_domains(struct intel_iommu *iommu)
 	 */
 	iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
 	if (!iommu->domain_ids) {
-		printk(KERN_ERR "Allocating domain id array failed\n");
+		pr_err("IOMMU%d: allocating domain id array failed\n",
+		       iommu->seq_id);
 		return -ENOMEM;
 	}
 	iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
 			GFP_KERNEL);
 	if (!iommu->domains) {
-		printk(KERN_ERR "Allocating domain array failed\n");
+		pr_err("IOMMU%d: allocating domain array failed\n",
+		       iommu->seq_id);
+		kfree(iommu->domain_ids);
+		iommu->domain_ids = NULL;
 		return -ENOMEM;
 	}
 
@@ -1271,62 +1459,50 @@ static int iommu_init_domains(struct intel_iommu *iommu)
 	return 0;
 }
 
-
-static void domain_exit(struct dmar_domain *domain);
-static void vm_domain_exit(struct dmar_domain *domain);
-
-void free_dmar_iommu(struct intel_iommu *iommu)
+static void free_dmar_iommu(struct intel_iommu *iommu)
 {
 	struct dmar_domain *domain;
-	int i;
+	int i, count;
 	unsigned long flags;
 
 	if ((iommu->domains) && (iommu->domain_ids)) {
 		for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) {
+			/*
+			 * Domain id 0 is reserved for invalid translation
+			 * if hardware supports caching mode.
+			 */
+			if (cap_caching_mode(iommu->cap) && i == 0)
+				continue;
+
 			domain = iommu->domains[i];
 			clear_bit(i, iommu->domain_ids);
 
 			spin_lock_irqsave(&domain->iommu_lock, flags);
-			if (--domain->iommu_count == 0) {
-				if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
-					vm_domain_exit(domain);
-				else
-					domain_exit(domain);
-			}
+			count = --domain->iommu_count;
 			spin_unlock_irqrestore(&domain->iommu_lock, flags);
+			if (count == 0)
+				domain_exit(domain);
 		}
 	}
 
 	if (iommu->gcmd & DMA_GCMD_TE)
 		iommu_disable_translation(iommu);
 
-	if (iommu->irq) {
-		irq_set_handler_data(iommu->irq, NULL);
-		/* This will mask the irq */
-		free_irq(iommu->irq, iommu);
-		destroy_irq(iommu->irq);
-	}
-
 	kfree(iommu->domains);
 	kfree(iommu->domain_ids);
+	iommu->domains = NULL;
+	iommu->domain_ids = NULL;
 
 	g_iommus[iommu->seq_id] = NULL;
 
-	/* if all iommus are freed, free g_iommus */
-	for (i = 0; i < g_num_of_iommus; i++) {
-		if (g_iommus[i])
-			break;
-	}
-
-	if (i == g_num_of_iommus)
-		kfree(g_iommus);
-
 	/* free context mapping */
 	free_context_table(iommu);
 }
 
-static struct dmar_domain *alloc_domain(void)
+static struct dmar_domain *alloc_domain(bool vm)
 {
+	/* domain id for virtual machine, it won't be set in context */
+	static atomic_t vm_domid = ATOMIC_INIT(0);
 	struct dmar_domain *domain;
 
 	domain = alloc_domain_mem();
@@ -1334,8 +1510,15 @@ static struct dmar_domain *alloc_domain(void)
 		return NULL;
 
 	domain->nid = -1;
-	memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
+	domain->iommu_count = 0;
+	memset(domain->iommu_bmp, 0, sizeof(domain->iommu_bmp));
 	domain->flags = 0;
+	spin_lock_init(&domain->iommu_lock);
+	INIT_LIST_HEAD(&domain->devices);
+	if (vm) {
+		domain->id = atomic_inc_return(&vm_domid);
+		domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
+	}
 
 	return domain;
 }
@@ -1359,8 +1542,9 @@ static int iommu_attach_domain(struct dmar_domain *domain,
 	}
 
 	domain->id = num;
+	domain->iommu_count++;
 	set_bit(num, iommu->domain_ids);
-	set_bit(iommu->seq_id, &domain->iommu_bmp);
+	set_bit(iommu->seq_id, domain->iommu_bmp);
 	iommu->domains[num] = domain;
 	spin_unlock_irqrestore(&iommu->lock, flags);
 
@@ -1372,22 +1556,16 @@ static void iommu_detach_domain(struct dmar_domain *domain,
 {
 	unsigned long flags;
 	int num, ndomains;
-	int found = 0;
 
 	spin_lock_irqsave(&iommu->lock, flags);
 	ndomains = cap_ndoms(iommu->cap);
 	for_each_set_bit(num, iommu->domain_ids, ndomains) {
 		if (iommu->domains[num] == domain) {
-			found = 1;
+			clear_bit(num, iommu->domain_ids);
+			iommu->domains[num] = NULL;
 			break;
 		}
 	}
-
-	if (found) {
-		clear_bit(num, iommu->domain_ids);
-		clear_bit(iommu->seq_id, &domain->iommu_bmp);
-		iommu->domains[num] = NULL;
-	}
 	spin_unlock_irqrestore(&iommu->lock, flags);
 }
 
@@ -1459,8 +1637,6 @@ static int domain_init(struct dmar_domain *domain, int guest_width)
 	unsigned long sagaw;
 
 	init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
-	spin_lock_init(&domain->iommu_lock);
-
 	domain_reserve_special_ranges(domain);
 
 	/* calculate AGAW */
@@ -1479,7 +1655,6 @@ static int domain_init(struct dmar_domain *domain, int guest_width)
 			return -ENODEV;
 	}
 	domain->agaw = agaw;
-	INIT_LIST_HEAD(&domain->devices);
 
 	if (ecap_coherent(iommu->ecap))
 		domain->iommu_coherency = 1;
@@ -1491,8 +1666,11 @@ static int domain_init(struct dmar_domain *domain, int guest_width)
 	else
 		domain->iommu_snooping = 0;
 
-	domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
-	domain->iommu_count = 1;
+	if (intel_iommu_superpage)
+		domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
+	else
+		domain->iommu_superpage = 0;
+
 	domain->nid = iommu->node;
 
 	/* always allocate the top pgd */
@@ -1507,6 +1685,7 @@ static void domain_exit(struct dmar_domain *domain)
 {
 	struct dmar_drhd_unit *drhd;
 	struct intel_iommu *iommu;
+	struct page *freelist = NULL;
 
 	/* Domain 0 is reserved, so dont process it */
 	if (!domain)
@@ -1516,29 +1695,33 @@ static void domain_exit(struct dmar_domain *domain)
 	if (!intel_iommu_strict)
 		flush_unmaps_timeout(0);
 
+	/* remove associated devices */
 	domain_remove_dev_info(domain);
+
 	/* destroy iovas */
 	put_iova_domain(&domain->iovad);
 
-	/* clear ptes */
-	dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
-
-	/* free page tables */
-	dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
+	freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
 
+	/* clear attached or cached domains */
+	rcu_read_lock();
 	for_each_active_iommu(iommu, drhd)
-		if (test_bit(iommu->seq_id, &domain->iommu_bmp))
+		if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
+		    test_bit(iommu->seq_id, domain->iommu_bmp))
 			iommu_detach_domain(domain, iommu);
+	rcu_read_unlock();
+
+	dma_free_pagelist(freelist);
 
 	free_domain_mem(domain);
 }
 
-static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
-				 u8 bus, u8 devfn, int translation)
+static int domain_context_mapping_one(struct dmar_domain *domain,
+				      struct intel_iommu *iommu,
+				      u8 bus, u8 devfn, int translation)
 {
 	struct context_entry *context;
 	unsigned long flags;
-	struct intel_iommu *iommu;
 	struct dma_pte *pgd;
 	unsigned long num;
 	unsigned long ndomains;
@@ -1553,10 +1736,6 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
 	BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
 	       translation != CONTEXT_TT_MULTI_LEVEL);
 
-	iommu = device_to_iommu(segment, bus, devfn);
-	if (!iommu)
-		return -ENODEV;
-
 	context = device_to_context_entry(iommu, bus, devfn);
 	if (!context)
 		return -ENOMEM;
@@ -1614,7 +1793,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
 	context_set_domain_id(context, id);
 
 	if (translation != CONTEXT_TT_PASS_THROUGH) {
-		info = iommu_support_dev_iotlb(domain, segment, bus, devfn);
+		info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
 		translation = info ? CONTEXT_TT_DEV_IOTLB :
 				     CONTEXT_TT_MULTI_LEVEL;
 	}
@@ -1653,7 +1832,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
 	spin_unlock_irqrestore(&iommu->lock, flags);
 
 	spin_lock_irqsave(&domain->iommu_lock, flags);
-	if (!test_and_set_bit(iommu->seq_id, &domain->iommu_bmp)) {
+	if (!test_and_set_bit(iommu->seq_id, domain->iommu_bmp)) {
 		domain->iommu_count++;
 		if (domain->iommu_count == 1)
 			domain->nid = iommu->node;
@@ -1664,27 +1843,32 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
 }
 
 static int
-domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev,
-			int translation)
+domain_context_mapping(struct dmar_domain *domain, struct device *dev,
+		       int translation)
 {
 	int ret;
-	struct pci_dev *tmp, *parent;
+	struct pci_dev *pdev, *tmp, *parent;
+	struct intel_iommu *iommu;
+	u8 bus, devfn;
+
+	iommu = device_to_iommu(dev, &bus, &devfn);
+	if (!iommu)
+		return -ENODEV;
 
-	ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus),
-					 pdev->bus->number, pdev->devfn,
+	ret = domain_context_mapping_one(domain, iommu, bus, devfn,
 					 translation);
-	if (ret)
+	if (ret || !dev_is_pci(dev))
 		return ret;
 
 	/* dependent device mapping */
+	pdev = to_pci_dev(dev);
 	tmp = pci_find_upstream_pcie_bridge(pdev);
 	if (!tmp)
 		return 0;
 	/* Secondary interface's bus number and devfn 0 */
 	parent = pdev->bus->self;
 	while (parent != tmp) {
-		ret = domain_context_mapping_one(domain,
-						 pci_domain_nr(parent->bus),
+		ret = domain_context_mapping_one(domain, iommu,
 						 parent->bus->number,
 						 parent->devfn, translation);
 		if (ret)
@@ -1692,33 +1876,33 @@ domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev,
 		parent = parent->bus->self;
 	}
 	if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
-		return domain_context_mapping_one(domain,
-					pci_domain_nr(tmp->subordinate),
+		return domain_context_mapping_one(domain, iommu,
 					tmp->subordinate->number, 0,
 					translation);
 	else /* this is a legacy PCI bridge */
-		return domain_context_mapping_one(domain,
-						  pci_domain_nr(tmp->bus),
+		return domain_context_mapping_one(domain, iommu,
 						  tmp->bus->number,
 						  tmp->devfn,
 						  translation);
 }
 
-static int domain_context_mapped(struct pci_dev *pdev)
+static int domain_context_mapped(struct device *dev)
 {
 	int ret;
-	struct pci_dev *tmp, *parent;
+	struct pci_dev *pdev, *tmp, *parent;
 	struct intel_iommu *iommu;
+	u8 bus, devfn;
 
-	iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
-				pdev->devfn);
+	iommu = device_to_iommu(dev, &bus, &devfn);
 	if (!iommu)
 		return -ENODEV;
 
-	ret = device_context_mapped(iommu, pdev->bus->number, pdev->devfn);
-	if (!ret)
+	ret = device_context_mapped(iommu, bus, devfn);
+	if (!ret || !dev_is_pci(dev))
 		return ret;
+
 	/* dependent device mapping */
+	pdev = to_pci_dev(dev);
 	tmp = pci_find_upstream_pcie_bridge(pdev);
 	if (!tmp)
 		return ret;
@@ -1814,14 +1998,21 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
 		if (!pte) {
 			largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
 
-			first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, largepage_lvl);
+			first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
 			if (!pte)
 				return -ENOMEM;
 			/* It is large page*/
-			if (largepage_lvl > 1)
+			if (largepage_lvl > 1) {
 				pteval |= DMA_PTE_LARGE_PAGE;
-			else
+				/* Ensure that old small page tables are removed to make room
+				   for superpage, if they exist. */
+				dma_pte_clear_range(domain, iov_pfn,
+						    iov_pfn + lvl_to_nr_pages(largepage_lvl) - 1);
+				dma_pte_free_pagetable(domain, iov_pfn,
+						       iov_pfn + lvl_to_nr_pages(largepage_lvl) - 1);
+			} else {
 				pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
+			}
 
 		}
 		/* We don't need lock here, nobody else
@@ -1900,27 +2091,45 @@ static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
 	iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
 }
 
+static inline void unlink_domain_info(struct device_domain_info *info)
+{
+	assert_spin_locked(&device_domain_lock);
+	list_del(&info->link);
+	list_del(&info->global);
+	if (info->dev)
+		info->dev->archdata.iommu = NULL;
+}
+
 static void domain_remove_dev_info(struct dmar_domain *domain)
 {
 	struct device_domain_info *info;
-	unsigned long flags;
-	struct intel_iommu *iommu;
+	unsigned long flags, flags2;
 
 	spin_lock_irqsave(&device_domain_lock, flags);
 	while (!list_empty(&domain->devices)) {
 		info = list_entry(domain->devices.next,
 			struct device_domain_info, link);
-		list_del(&info->link);
-		list_del(&info->global);
-		if (info->dev)
-			info->dev->dev.archdata.iommu = NULL;
+		unlink_domain_info(info);
 		spin_unlock_irqrestore(&device_domain_lock, flags);
 
 		iommu_disable_dev_iotlb(info);
-		iommu = device_to_iommu(info->segment, info->bus, info->devfn);
-		iommu_detach_dev(iommu, info->bus, info->devfn);
-		free_devinfo_mem(info);
+		iommu_detach_dev(info->iommu, info->bus, info->devfn);
 
+		if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) {
+			iommu_detach_dependent_devices(info->iommu, info->dev);
+			/* clear this iommu in iommu_bmp, update iommu count
+			 * and capabilities
+			 */
+			spin_lock_irqsave(&domain->iommu_lock, flags2);
+			if (test_and_clear_bit(info->iommu->seq_id,
+					       domain->iommu_bmp)) {
+				domain->iommu_count--;
+				domain_update_iommu_cap(domain);
+			}
+			spin_unlock_irqrestore(&domain->iommu_lock, flags2);
+		}
+
+		free_devinfo_mem(info);
 		spin_lock_irqsave(&device_domain_lock, flags);
 	}
 	spin_unlock_irqrestore(&device_domain_lock, flags);
@@ -1928,154 +2137,153 @@ static void domain_remove_dev_info(struct dmar_domain *domain)
 
 /*
  * find_domain
- * Note: we use struct pci_dev->dev.archdata.iommu stores the info
+ * Note: we use struct device->archdata.iommu stores the info
  */
-static struct dmar_domain *
-find_domain(struct pci_dev *pdev)
+static struct dmar_domain *find_domain(struct device *dev)
 {
 	struct device_domain_info *info;
 
 	/* No lock here, assumes no domain exit in normal case */
-	info = pdev->dev.archdata.iommu;
+	info = dev->archdata.iommu;
 	if (info)
 		return info->domain;
 	return NULL;
 }
 
+static inline struct device_domain_info *
+dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
+{
+	struct device_domain_info *info;
+
+	list_for_each_entry(info, &device_domain_list, global)
+		if (info->iommu->segment == segment && info->bus == bus &&
+		    info->devfn == devfn)
+			return info;
+
+	return NULL;
+}
+
+static struct dmar_domain *dmar_insert_dev_info(struct intel_iommu *iommu,
+						int bus, int devfn,
+						struct device *dev,
+						struct dmar_domain *domain)
+{
+	struct dmar_domain *found = NULL;
+	struct device_domain_info *info;
+	unsigned long flags;
+
+	info = alloc_devinfo_mem();
+	if (!info)
+		return NULL;
+
+	info->bus = bus;
+	info->devfn = devfn;
+	info->dev = dev;
+	info->domain = domain;
+	info->iommu = iommu;
+	if (!dev)
+		domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
+
+	spin_lock_irqsave(&device_domain_lock, flags);
+	if (dev)
+		found = find_domain(dev);
+	else {
+		struct device_domain_info *info2;
+		info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
+		if (info2)
+			found = info2->domain;
+	}
+	if (found) {
+		spin_unlock_irqrestore(&device_domain_lock, flags);
+		free_devinfo_mem(info);
+		/* Caller must free the original domain */
+		return found;
+	}
+
+	list_add(&info->link, &domain->devices);
+	list_add(&info->global, &device_domain_list);
+	if (dev)
+		dev->archdata.iommu = info;
+	spin_unlock_irqrestore(&device_domain_lock, flags);
+
+	return domain;
+}
+
 /* domain is initialized */
-static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
+static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
 {
-	struct dmar_domain *domain, *found = NULL;
-	struct intel_iommu *iommu;
-	struct dmar_drhd_unit *drhd;
-	struct device_domain_info *info, *tmp;
-	struct pci_dev *dev_tmp;
+	struct dmar_domain *domain, *free = NULL;
+	struct intel_iommu *iommu = NULL;
+	struct device_domain_info *info;
+	struct pci_dev *dev_tmp = NULL;
 	unsigned long flags;
-	int bus = 0, devfn = 0;
-	int segment;
-	int ret;
+	u8 bus, devfn, bridge_bus, bridge_devfn;
 
-	domain = find_domain(pdev);
+	domain = find_domain(dev);
 	if (domain)
 		return domain;
 
-	segment = pci_domain_nr(pdev->bus);
+	if (dev_is_pci(dev)) {
+		struct pci_dev *pdev = to_pci_dev(dev);
+		u16 segment;
 
-	dev_tmp = pci_find_upstream_pcie_bridge(pdev);
-	if (dev_tmp) {
-		if (pci_is_pcie(dev_tmp)) {
-			bus = dev_tmp->subordinate->number;
-			devfn = 0;
-		} else {
-			bus = dev_tmp->bus->number;
-			devfn = dev_tmp->devfn;
-		}
-		spin_lock_irqsave(&device_domain_lock, flags);
-		list_for_each_entry(info, &device_domain_list, global) {
-			if (info->segment == segment &&
-			    info->bus == bus && info->devfn == devfn) {
-				found = info->domain;
-				break;
+		segment = pci_domain_nr(pdev->bus);
+		dev_tmp = pci_find_upstream_pcie_bridge(pdev);
+		if (dev_tmp) {
+			if (pci_is_pcie(dev_tmp)) {
+				bridge_bus = dev_tmp->subordinate->number;
+				bridge_devfn = 0;
+			} else {
+				bridge_bus = dev_tmp->bus->number;
+				bridge_devfn = dev_tmp->devfn;
 			}
-		}
-		spin_unlock_irqrestore(&device_domain_lock, flags);
-		/* pcie-pci bridge already has a domain, uses it */
-		if (found) {
-			domain = found;
-			goto found_domain;
+			spin_lock_irqsave(&device_domain_lock, flags);
+			info = dmar_search_domain_by_dev_info(segment,
+							      bridge_bus,
+							      bridge_devfn);
+			if (info) {
+				iommu = info->iommu;
+				domain = info->domain;
+			}
+			spin_unlock_irqrestore(&device_domain_lock, flags);
+			/* pcie-pci bridge already has a domain, uses it */
+			if (info)
+				goto found_domain;
 		}
 	}
 
-	domain = alloc_domain();
-	if (!domain)
+	iommu = device_to_iommu(dev, &bus, &devfn);
+	if (!iommu)
 		goto error;
 
-	/* Allocate new domain for the device */
-	drhd = dmar_find_matched_drhd_unit(pdev);
-	if (!drhd) {
-		printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
-			pci_name(pdev));
-		return NULL;
-	}
-	iommu = drhd->iommu;
-
-	ret = iommu_attach_domain(domain, iommu);
-	if (ret) {
+	/* Allocate and initialize new domain for the device */
+	domain = alloc_domain(false);
+	if (!domain)
+		goto error;
+	if (iommu_attach_domain(domain, iommu)) {
 		free_domain_mem(domain);
+		domain = NULL;
 		goto error;
 	}
-
-	if (domain_init(domain, gaw)) {
-		domain_exit(domain);
+	free = domain;
+	if (domain_init(domain, gaw))
 		goto error;
-	}
 
 	/* register pcie-to-pci device */
 	if (dev_tmp) {
-		info = alloc_devinfo_mem();
-		if (!info) {
-			domain_exit(domain);
+		domain = dmar_insert_dev_info(iommu, bridge_bus, bridge_devfn,
+					      NULL, domain);
+		if (!domain)
 			goto error;
-		}
-		info->segment = segment;
-		info->bus = bus;
-		info->devfn = devfn;
-		info->dev = NULL;
-		info->domain = domain;
-		/* This domain is shared by devices under p2p bridge */
-		domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
-
-		/* pcie-to-pci bridge already has a domain, uses it */
-		found = NULL;
-		spin_lock_irqsave(&device_domain_lock, flags);
-		list_for_each_entry(tmp, &device_domain_list, global) {
-			if (tmp->segment == segment &&
-			    tmp->bus == bus && tmp->devfn == devfn) {
-				found = tmp->domain;
-				break;
-			}
-		}
-		if (found) {
-			spin_unlock_irqrestore(&device_domain_lock, flags);
-			free_devinfo_mem(info);
-			domain_exit(domain);
-			domain = found;
-		} else {
-			list_add(&info->link, &domain->devices);
-			list_add(&info->global, &device_domain_list);
-			spin_unlock_irqrestore(&device_domain_lock, flags);
-		}
 	}
 
 found_domain:
-	info = alloc_devinfo_mem();
-	if (!info)
-		goto error;
-	info->segment = segment;
-	info->bus = pdev->bus->number;
-	info->devfn = pdev->devfn;
-	info->dev = pdev;
-	info->domain = domain;
-	spin_lock_irqsave(&device_domain_lock, flags);
-	/* somebody is fast */
-	found = find_domain(pdev);
-	if (found != NULL) {
-		spin_unlock_irqrestore(&device_domain_lock, flags);
-		if (found != domain) {
-			domain_exit(domain);
-			domain = found;
-		}
-		free_devinfo_mem(info);
-		return domain;
-	}
-	list_add(&info->link, &domain->devices);
-	list_add(&info->global, &device_domain_list);
-	pdev->dev.archdata.iommu = info;
-	spin_unlock_irqrestore(&device_domain_lock, flags);
-	return domain;
+	domain = dmar_insert_dev_info(iommu, bus, devfn, dev, domain);
 error:
-	/* recheck it here, maybe others set it */
-	return find_domain(pdev);
+	if (free != domain)
+		domain_exit(free);
+
+	return domain;
 }
 
 static int iommu_identity_mapping;
@@ -2109,14 +2317,14 @@ static int iommu_domain_identity_map(struct dmar_domain *domain,
 				  DMA_PTE_READ|DMA_PTE_WRITE);
 }
 
-static int iommu_prepare_identity_map(struct pci_dev *pdev,
+static int iommu_prepare_identity_map(struct device *dev,
 				      unsigned long long start,
 				      unsigned long long end)
 {
 	struct dmar_domain *domain;
 	int ret;
 
-	domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
+	domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
 	if (!domain)
 		return -ENOMEM;
 
@@ -2126,13 +2334,13 @@ static int iommu_prepare_identity_map(struct pci_dev *pdev,
 	   up to start with in si_domain */
 	if (domain == si_domain && hw_pass_through) {
 		printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
-		       pci_name(pdev), start, end);
+		       dev_name(dev), start, end);
 		return 0;
 	}
 
 	printk(KERN_INFO
 	       "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
-	       pci_name(pdev), start, end);
+	       dev_name(dev), start, end);
 	
 	if (end < start) {
 		WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
@@ -2160,7 +2368,7 @@ static int iommu_prepare_identity_map(struct pci_dev *pdev,
 		goto error;
 
 	/* context entry init */
-	ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL);
+	ret = domain_context_mapping(domain, dev, CONTEXT_TT_MULTI_LEVEL);
 	if (ret)
 		goto error;
 
@@ -2172,12 +2380,12 @@ static int iommu_prepare_identity_map(struct pci_dev *pdev,
 }
 
 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
-	struct pci_dev *pdev)
+					 struct device *dev)
 {
-	if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
+	if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
 		return 0;
-	return iommu_prepare_identity_map(pdev, rmrr->base_address,
-		rmrr->end_address);
+	return iommu_prepare_identity_map(dev, rmrr->base_address,
+					  rmrr->end_address);
 }
 
 #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
@@ -2191,7 +2399,7 @@ static inline void iommu_prepare_isa(void)
 		return;
 
 	printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
-	ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024 - 1);
+	ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1);
 
 	if (ret)
 		printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
@@ -2213,11 +2421,11 @@ static int __init si_domain_init(int hw)
 	struct intel_iommu *iommu;
 	int nid, ret = 0;
 
-	si_domain = alloc_domain();
+	si_domain = alloc_domain(false);
 	if (!si_domain)
 		return -EFAULT;
 
-	pr_debug("Identity mapping domain is domain %d\n", si_domain->id);
+	si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY;
 
 	for_each_active_iommu(iommu, drhd) {
 		ret = iommu_attach_domain(si_domain, iommu);
@@ -2232,7 +2440,8 @@ static int __init si_domain_init(int hw)
 		return -EFAULT;
 	}
 
-	si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY;
+	pr_debug("IOMMU: identity mapping domain is domain %d\n",
+		 si_domain->id);
 
 	if (hw)
 		return 0;
@@ -2252,16 +2461,14 @@ static int __init si_domain_init(int hw)
 	return 0;
 }
 
-static void domain_remove_one_dev_info(struct dmar_domain *domain,
-					  struct pci_dev *pdev);
-static int identity_mapping(struct pci_dev *pdev)
+static int identity_mapping(struct device *dev)
 {
 	struct device_domain_info *info;
 
 	if (likely(!iommu_identity_mapping))
 		return 0;
 
-	info = pdev->dev.archdata.iommu;
+	info = dev->archdata.iommu;
 	if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
 		return (info->domain == si_domain);
 
@@ -2269,77 +2476,112 @@ static int identity_mapping(struct pci_dev *pdev)
 }
 
 static int domain_add_dev_info(struct dmar_domain *domain,
-			       struct pci_dev *pdev,
-			       int translation)
+			       struct device *dev, int translation)
 {
-	struct device_domain_info *info;
-	unsigned long flags;
+	struct dmar_domain *ndomain;
+	struct intel_iommu *iommu;
+	u8 bus, devfn;
 	int ret;
 
-	info = alloc_devinfo_mem();
-	if (!info)
-		return -ENOMEM;
+	iommu = device_to_iommu(dev, &bus, &devfn);
+	if (!iommu)
+		return -ENODEV;
 
-	ret = domain_context_mapping(domain, pdev, translation);
+	ndomain = dmar_insert_dev_info(iommu, bus, devfn, dev, domain);
+	if (ndomain != domain)
+		return -EBUSY;
+
+	ret = domain_context_mapping(domain, dev, translation);
 	if (ret) {
-		free_devinfo_mem(info);
+		domain_remove_one_dev_info(domain, dev);
 		return ret;
 	}
 
-	info->segment = pci_domain_nr(pdev->bus);
-	info->bus = pdev->bus->number;
-	info->devfn = pdev->devfn;
-	info->dev = pdev;
-	info->domain = domain;
+	return 0;
+}
 
-	spin_lock_irqsave(&device_domain_lock, flags);
-	list_add(&info->link, &domain->devices);
-	list_add(&info->global, &device_domain_list);
-	pdev->dev.archdata.iommu = info;
-	spin_unlock_irqrestore(&device_domain_lock, flags);
+static bool device_has_rmrr(struct device *dev)
+{
+	struct dmar_rmrr_unit *rmrr;
+	struct device *tmp;
+	int i;
 
-	return 0;
+	rcu_read_lock();
+	for_each_rmrr_units(rmrr) {
+		/*
+		 * Return TRUE if this RMRR contains the device that
+		 * is passed in.
+		 */
+		for_each_active_dev_scope(rmrr->devices,
+					  rmrr->devices_cnt, i, tmp)
+			if (tmp == dev) {
+				rcu_read_unlock();
+				return true;
+			}
+	}
+	rcu_read_unlock();
+	return false;
 }
 
-static int iommu_should_identity_map(struct pci_dev *pdev, int startup)
+static int iommu_should_identity_map(struct device *dev, int startup)
 {
-	if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
-		return 1;
 
-	if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
-		return 1;
+	if (dev_is_pci(dev)) {
+		struct pci_dev *pdev = to_pci_dev(dev);
 
-	if (!(iommu_identity_mapping & IDENTMAP_ALL))
-		return 0;
+		/*
+		 * We want to prevent any device associated with an RMRR from
+		 * getting placed into the SI Domain. This is done because
+		 * problems exist when devices are moved in and out of domains
+		 * and their respective RMRR info is lost. We exempt USB devices
+		 * from this process due to their usage of RMRRs that are known
+		 * to not be needed after BIOS hand-off to OS.
+		 */
+		if (device_has_rmrr(dev) &&
+		    (pdev->class >> 8) != PCI_CLASS_SERIAL_USB)
+			return 0;
 
-	/*
-	 * We want to start off with all devices in the 1:1 domain, and
-	 * take them out later if we find they can't access all of memory.
-	 *
-	 * However, we can't do this for PCI devices behind bridges,
-	 * because all PCI devices behind the same bridge will end up
-	 * with the same source-id on their transactions.
-	 *
-	 * Practically speaking, we can't change things around for these
-	 * devices at run-time, because we can't be sure there'll be no
-	 * DMA transactions in flight for any of their siblings.
-	 * 
-	 * So PCI devices (unless they're on the root bus) as well as
-	 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
-	 * the 1:1 domain, just in _case_ one of their siblings turns out
-	 * not to be able to map all of memory.
-	 */
-	if (!pci_is_pcie(pdev)) {
-		if (!pci_is_root_bus(pdev->bus))
+		if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
+			return 1;
+
+		if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
+			return 1;
+
+		if (!(iommu_identity_mapping & IDENTMAP_ALL))
 			return 0;
-		if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
+
+		/*
+		 * We want to start off with all devices in the 1:1 domain, and
+		 * take them out later if we find they can't access all of memory.
+		 *
+		 * However, we can't do this for PCI devices behind bridges,
+		 * because all PCI devices behind the same bridge will end up
+		 * with the same source-id on their transactions.
+		 *
+		 * Practically speaking, we can't change things around for these
+		 * devices at run-time, because we can't be sure there'll be no
+		 * DMA transactions in flight for any of their siblings.
+		 *
+		 * So PCI devices (unless they're on the root bus) as well as
+		 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
+		 * the 1:1 domain, just in _case_ one of their siblings turns out
+		 * not to be able to map all of memory.
+		 */
+		if (!pci_is_pcie(pdev)) {
+			if (!pci_is_root_bus(pdev->bus))
+				return 0;
+			if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
+				return 0;
+		} else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
 			return 0;
-	} else if (pdev->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
-		return 0;
+	} else {
+		if (device_has_rmrr(dev))
+			return 0;
+	}
 
-	/* 
+	/*
 	 * At boot time, we don't yet know if devices will be 64-bit capable.
-	 * Assume that they will -- if they turn out not to be, then we can 
+	 * Assume that they will — if they turn out not to be, then we can
 	 * take them out of the 1:1 domain later.
 	 */
 	if (!startup) {
@@ -2347,42 +2589,77 @@ static int iommu_should_identity_map(struct pci_dev *pdev, int startup)
 		 * If the device's dma_mask is less than the system's memory
 		 * size then this is not a candidate for identity mapping.
 		 */
-		u64 dma_mask = pdev->dma_mask;
+		u64 dma_mask = *dev->dma_mask;
 
-		if (pdev->dev.coherent_dma_mask &&
-		    pdev->dev.coherent_dma_mask < dma_mask)
-			dma_mask = pdev->dev.coherent_dma_mask;
+		if (dev->coherent_dma_mask &&
+		    dev->coherent_dma_mask < dma_mask)
+			dma_mask = dev->coherent_dma_mask;
 
-		return dma_mask >= dma_get_required_mask(&pdev->dev);
+		return dma_mask >= dma_get_required_mask(dev);
 	}
 
 	return 1;
 }
 
+static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw)
+{
+	int ret;
+
+	if (!iommu_should_identity_map(dev, 1))
+		return 0;
+
+	ret = domain_add_dev_info(si_domain, dev,
+				  hw ? CONTEXT_TT_PASS_THROUGH :
+				       CONTEXT_TT_MULTI_LEVEL);
+	if (!ret)
+		pr_info("IOMMU: %s identity mapping for device %s\n",
+			hw ? "hardware" : "software", dev_name(dev));
+	else if (ret == -ENODEV)
+		/* device not associated with an iommu */
+		ret = 0;
+
+	return ret;
+}
+
+
 static int __init iommu_prepare_static_identity_mapping(int hw)
 {
 	struct pci_dev *pdev = NULL;
-	int ret;
+	struct dmar_drhd_unit *drhd;
+	struct intel_iommu *iommu;
+	struct device *dev;
+	int i;
+	int ret = 0;
 
 	ret = si_domain_init(hw);
 	if (ret)
 		return -EFAULT;
 
 	for_each_pci_dev(pdev) {
-		/* Skip Host/PCI Bridge devices */
-		if (IS_BRIDGE_HOST_DEVICE(pdev))
-			continue;
-		if (iommu_should_identity_map(pdev, 1)) {
-			printk(KERN_INFO "IOMMU: %s identity mapping for device %s\n",
-			       hw ? "hardware" : "software", pci_name(pdev));
+		ret = dev_prepare_static_identity_mapping(&pdev->dev, hw);
+		if (ret)
+			return ret;
+	}
 
-			ret = domain_add_dev_info(si_domain, pdev,
-						     hw ? CONTEXT_TT_PASS_THROUGH :
-						     CONTEXT_TT_MULTI_LEVEL);
+	for_each_active_iommu(iommu, drhd)
+		for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) {
+			struct acpi_device_physical_node *pn;
+			struct acpi_device *adev;
+
+			if (dev->bus != &acpi_bus_type)
+				continue;
+				
+			adev= to_acpi_device(dev);
+			mutex_lock(&adev->physical_node_lock);
+			list_for_each_entry(pn, &adev->physical_node_list, node) {
+				ret = dev_prepare_static_identity_mapping(pn->dev, hw);
+				if (ret)
+					break;
+			}
+			mutex_unlock(&adev->physical_node_lock);
 			if (ret)
 				return ret;
 		}
-	}
 
 	return 0;
 }
@@ -2391,7 +2668,7 @@ static int __init init_dmars(void)
 {
 	struct dmar_drhd_unit *drhd;
 	struct dmar_rmrr_unit *rmrr;
-	struct pci_dev *pdev;
+	struct device *dev;
 	struct intel_iommu *iommu;
 	int i, ret;
 
@@ -2402,12 +2679,17 @@ static int __init init_dmars(void)
 	 * endfor
 	 */
 	for_each_drhd_unit(drhd) {
-		g_num_of_iommus++;
 		/*
 		 * lock not needed as this is only incremented in the single
 		 * threaded kernel __init code path all other access are read
 		 * only
 		 */
+		if (g_num_of_iommus < IOMMU_UNITS_SUPPORTED) {
+			g_num_of_iommus++;
+			continue;
+		}
+		printk_once(KERN_ERR "intel-iommu: exceeded %d IOMMUs\n",
+			  IOMMU_UNITS_SUPPORTED);
 	}
 
 	g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
@@ -2422,19 +2704,15 @@ static int __init init_dmars(void)
 		sizeof(struct deferred_flush_tables), GFP_KERNEL);
 	if (!deferred_flush) {
 		ret = -ENOMEM;
-		goto error;
+		goto free_g_iommus;
 	}
 
-	for_each_drhd_unit(drhd) {
-		if (drhd->ignored)
-			continue;
-
-		iommu = drhd->iommu;
+	for_each_active_iommu(iommu, drhd) {
 		g_iommus[iommu->seq_id] = iommu;
 
 		ret = iommu_init_domains(iommu);
 		if (ret)
-			goto error;
+			goto free_iommu;
 
 		/*
 		 * TBD:
@@ -2444,7 +2722,7 @@ static int __init init_dmars(void)
 		ret = iommu_alloc_root_entry(iommu);
 		if (ret) {
 			printk(KERN_ERR "IOMMU: allocate root entry failed\n");
-			goto error;
+			goto free_iommu;
 		}
 		if (!ecap_pass_through(iommu->ecap))
 			hw_pass_through = 0;
@@ -2453,12 +2731,7 @@ static int __init init_dmars(void)
 	/*
 	 * Start from the sane iommu hardware state.
 	 */
-	for_each_drhd_unit(drhd) {
-		if (drhd->ignored)
-			continue;
-
-		iommu = drhd->iommu;
-
+	for_each_active_iommu(iommu, drhd) {
 		/*
 		 * If the queued invalidation is already initialized by us
 		 * (for example, while enabling interrupt-remapping) then
@@ -2478,12 +2751,7 @@ static int __init init_dmars(void)
 		dmar_disable_qi(iommu);
 	}
 
-	for_each_drhd_unit(drhd) {
-		if (drhd->ignored)
-			continue;
-
-		iommu = drhd->iommu;
-
+	for_each_active_iommu(iommu, drhd) {
 		if (dmar_enable_qi(iommu)) {
 			/*
 			 * Queued Invalidate not enabled, use Register Based
@@ -2523,7 +2791,7 @@ static int __init init_dmars(void)
 		ret = iommu_prepare_static_identity_mapping(hw_pass_through);
 		if (ret) {
 			printk(KERN_CRIT "Failed to setup IOMMU pass-through\n");
-			goto error;
+			goto free_iommu;
 		}
 	}
 	/*
@@ -2542,15 +2810,10 @@ static int __init init_dmars(void)
 	 */
 	printk(KERN_INFO "IOMMU: Setting RMRR:\n");
 	for_each_rmrr_units(rmrr) {
-		for (i = 0; i < rmrr->devices_cnt; i++) {
-			pdev = rmrr->devices[i];
-			/*
-			 * some BIOS lists non-exist devices in DMAR
-			 * table.
-			 */
-			if (!pdev)
-				continue;
-			ret = iommu_prepare_rmrr_dev(rmrr, pdev);
+		/* some BIOS lists non-exist devices in DMAR table. */
+		for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
+					  i, dev) {
+			ret = iommu_prepare_rmrr_dev(rmrr, dev);
 			if (ret)
 				printk(KERN_ERR
 				       "IOMMU: mapping reserved region failed\n");
@@ -2566,23 +2829,22 @@ static int __init init_dmars(void)
 	 *   global invalidate iotlb
 	 *   enable translation
 	 */
-	for_each_drhd_unit(drhd) {
+	for_each_iommu(iommu, drhd) {
 		if (drhd->ignored) {
 			/*
 			 * we always have to disable PMRs or DMA may fail on
 			 * this device
 			 */
 			if (force_on)
-				iommu_disable_protect_mem_regions(drhd->iommu);
+				iommu_disable_protect_mem_regions(iommu);
 			continue;
 		}
-		iommu = drhd->iommu;
 
 		iommu_flush_write_buffer(iommu);
 
 		ret = dmar_set_interrupt(iommu);
 		if (ret)
-			goto error;
+			goto free_iommu;
 
 		iommu_set_root_entry(iommu);
 
@@ -2591,20 +2853,20 @@ static int __init init_dmars(void)
 
 		ret = iommu_enable_translation(iommu);
 		if (ret)
-			goto error;
+			goto free_iommu;
 
 		iommu_disable_protect_mem_regions(iommu);
 	}
 
 	return 0;
-error:
-	for_each_drhd_unit(drhd) {
-		if (drhd->ignored)
-			continue;
-		iommu = drhd->iommu;
-		free_iommu(iommu);
-	}
+
+free_iommu:
+	for_each_active_iommu(iommu, drhd)
+		free_dmar_iommu(iommu);
+	kfree(deferred_flush);
+free_g_iommus:
 	kfree(g_iommus);
+error:
 	return ret;
 }
 
@@ -2613,7 +2875,6 @@ static struct iova *intel_alloc_iova(struct device *dev,
 				     struct dmar_domain *domain,
 				     unsigned long nrpages, uint64_t dma_mask)
 {
-	struct pci_dev *pdev = to_pci_dev(dev);
 	struct iova *iova = NULL;
 
 	/* Restrict dma_mask to the width that the iommu can handle */
@@ -2633,34 +2894,31 @@ static struct iova *intel_alloc_iova(struct device *dev,
 	iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
 	if (unlikely(!iova)) {
 		printk(KERN_ERR "Allocating %ld-page iova for %s failed",
-		       nrpages, pci_name(pdev));
+		       nrpages, dev_name(dev));
 		return NULL;
 	}
 
 	return iova;
 }
 
-static struct dmar_domain *__get_valid_domain_for_dev(struct pci_dev *pdev)
+static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)
 {
 	struct dmar_domain *domain;
 	int ret;
 
-	domain = get_domain_for_dev(pdev,
-			DEFAULT_DOMAIN_ADDRESS_WIDTH);
+	domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
 	if (!domain) {
-		printk(KERN_ERR
-			"Allocating domain for %s failed", pci_name(pdev));
+		printk(KERN_ERR "Allocating domain for %s failed",
+		       dev_name(dev));
 		return NULL;
 	}
 
 	/* make sure context mapping is ok */
-	if (unlikely(!domain_context_mapped(pdev))) {
-		ret = domain_context_mapping(domain, pdev,
-					     CONTEXT_TT_MULTI_LEVEL);
+	if (unlikely(!domain_context_mapped(dev))) {
+		ret = domain_context_mapping(domain, dev, CONTEXT_TT_MULTI_LEVEL);
 		if (ret) {
-			printk(KERN_ERR
-				"Domain context map for %s failed",
-				pci_name(pdev));
+			printk(KERN_ERR "Domain context map for %s failed",
+			       dev_name(dev));
 			return NULL;
 		}
 	}
@@ -2668,51 +2926,46 @@ static struct dmar_domain *__get_valid_domain_for_dev(struct pci_dev *pdev)
 	return domain;
 }
 
-static inline struct dmar_domain *get_valid_domain_for_dev(struct pci_dev *dev)
+static inline struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
 {
 	struct device_domain_info *info;
 
 	/* No lock here, assumes no domain exit in normal case */
-	info = dev->dev.archdata.iommu;
+	info = dev->archdata.iommu;
 	if (likely(info))
 		return info->domain;
 
 	return __get_valid_domain_for_dev(dev);
 }
 
-static int iommu_dummy(struct pci_dev *pdev)
+static int iommu_dummy(struct device *dev)
 {
-	return pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
+	return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
 }
 
-/* Check if the pdev needs to go through non-identity map and unmap process.*/
+/* Check if the dev needs to go through non-identity map and unmap process.*/
 static int iommu_no_mapping(struct device *dev)
 {
-	struct pci_dev *pdev;
 	int found;
 
-	if (unlikely(dev->bus != &pci_bus_type))
-		return 1;
-
-	pdev = to_pci_dev(dev);
-	if (iommu_dummy(pdev))
+	if (iommu_dummy(dev))
 		return 1;
 
 	if (!iommu_identity_mapping)
 		return 0;
 
-	found = identity_mapping(pdev);
+	found = identity_mapping(dev);
 	if (found) {
-		if (iommu_should_identity_map(pdev, 0))
+		if (iommu_should_identity_map(dev, 0))
 			return 1;
 		else {
 			/*
 			 * 32 bit DMA is removed from si_domain and fall back
 			 * to non-identity mapping.
 			 */
-			domain_remove_one_dev_info(si_domain, pdev);
+			domain_remove_one_dev_info(si_domain, dev);
 			printk(KERN_INFO "32bit %s uses non-identity mapping\n",
-			       pci_name(pdev));
+			       dev_name(dev));
 			return 0;
 		}
 	} else {
@@ -2720,15 +2973,15 @@ static int iommu_no_mapping(struct device *dev)
 		 * In case of a detached 64 bit DMA device from vm, the device
 		 * is put into si_domain for identity mapping.
 		 */
-		if (iommu_should_identity_map(pdev, 0)) {
+		if (iommu_should_identity_map(dev, 0)) {
 			int ret;
-			ret = domain_add_dev_info(si_domain, pdev,
+			ret = domain_add_dev_info(si_domain, dev,
 						  hw_pass_through ?
 						  CONTEXT_TT_PASS_THROUGH :
 						  CONTEXT_TT_MULTI_LEVEL);
 			if (!ret) {
 				printk(KERN_INFO "64bit %s uses identity mapping\n",
-				       pci_name(pdev));
+				       dev_name(dev));
 				return 1;
 			}
 		}
@@ -2737,10 +2990,9 @@ static int iommu_no_mapping(struct device *dev)
 	return 0;
 }
 
-static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
+static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
 				     size_t size, int dir, u64 dma_mask)
 {
-	struct pci_dev *pdev = to_pci_dev(hwdev);
 	struct dmar_domain *domain;
 	phys_addr_t start_paddr;
 	struct iova *iova;
@@ -2751,17 +3003,17 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
 
 	BUG_ON(dir == DMA_NONE);
 
-	if (iommu_no_mapping(hwdev))
+	if (iommu_no_mapping(dev))
 		return paddr;
 
-	domain = get_valid_domain_for_dev(pdev);
+	domain = get_valid_domain_for_dev(dev);
 	if (!domain)
 		return 0;
 
 	iommu = domain_get_iommu(domain);
 	size = aligned_nrpages(paddr, size);
 
-	iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), dma_mask);
+	iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
 	if (!iova)
 		goto error;
 
@@ -2787,7 +3039,7 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
 
 	/* it's a non-present to present mapping. Only flush if caching mode */
 	if (cap_caching_mode(iommu->cap))
-		iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 1);
+		iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 0, 1);
 	else
 		iommu_flush_write_buffer(iommu);
 
@@ -2799,7 +3051,7 @@ error:
 	if (iova)
 		__free_iova(&domain->iovad, iova);
 	printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n",
-		pci_name(pdev), size, (unsigned long long)paddr, dir);
+		dev_name(dev), size, (unsigned long long)paddr, dir);
 	return 0;
 }
 
@@ -2809,7 +3061,7 @@ static dma_addr_t intel_map_page(struct device *dev, struct page *page,
 				 struct dma_attrs *attrs)
 {
 	return __intel_map_single(dev, page_to_phys(page) + offset, size,
-				  dir, to_pci_dev(dev)->dma_mask);
+				  dir, *dev->dma_mask);
 }
 
 static void flush_unmaps(void)
@@ -2839,13 +3091,16 @@ static void flush_unmaps(void)
 			/* On real hardware multiple invalidations are expensive */
 			if (cap_caching_mode(iommu->cap))
 				iommu_flush_iotlb_psi(iommu, domain->id,
-				iova->pfn_lo, iova->pfn_hi - iova->pfn_lo + 1, 0);
+					iova->pfn_lo, iova->pfn_hi - iova->pfn_lo + 1,
+					!deferred_flush[i].freelist[j], 0);
 			else {
 				mask = ilog2(mm_to_dma_pfn(iova->pfn_hi - iova->pfn_lo + 1));
 				iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
 						(uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
 			}
 			__free_iova(&deferred_flush[i].domain[j]->iovad, iova);
+			if (deferred_flush[i].freelist[j])
+				dma_free_pagelist(deferred_flush[i].freelist[j]);
 		}
 		deferred_flush[i].next = 0;
 	}
@@ -2862,7 +3117,7 @@ static void flush_unmaps_timeout(unsigned long data)
 	spin_unlock_irqrestore(&async_umap_flush_lock, flags);
 }
 
-static void add_unmap(struct dmar_domain *dom, struct iova *iova)
+static void add_unmap(struct dmar_domain *dom, struct iova *iova, struct page *freelist)
 {
 	unsigned long flags;
 	int next, iommu_id;
@@ -2878,6 +3133,7 @@ static void add_unmap(struct dmar_domain *dom, struct iova *iova)
 	next = deferred_flush[iommu_id].next;
 	deferred_flush[iommu_id].domain[next] = dom;
 	deferred_flush[iommu_id].iova[next] = iova;
+	deferred_flush[iommu_id].freelist[next] = freelist;
 	deferred_flush[iommu_id].next++;
 
 	if (!timer_on) {
@@ -2892,16 +3148,16 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
 			     size_t size, enum dma_data_direction dir,
 			     struct dma_attrs *attrs)
 {
-	struct pci_dev *pdev = to_pci_dev(dev);
 	struct dmar_domain *domain;
 	unsigned long start_pfn, last_pfn;
 	struct iova *iova;
 	struct intel_iommu *iommu;
+	struct page *freelist;
 
 	if (iommu_no_mapping(dev))
 		return;
 
-	domain = find_domain(pdev);
+	domain = find_domain(dev);
 	BUG_ON(!domain);
 
 	iommu = domain_get_iommu(domain);
@@ -2915,21 +3171,18 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
 	last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
 
 	pr_debug("Device %s unmapping: pfn %lx-%lx\n",
-		 pci_name(pdev), start_pfn, last_pfn);
+		 dev_name(dev), start_pfn, last_pfn);
 
-	/*  clear the whole page */
-	dma_pte_clear_range(domain, start_pfn, last_pfn);
-
-	/* free page tables */
-	dma_pte_free_pagetable(domain, start_pfn, last_pfn);
+	freelist = domain_unmap(domain, start_pfn, last_pfn);
 
 	if (intel_iommu_strict) {
 		iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
-				      last_pfn - start_pfn + 1, 0);
+				      last_pfn - start_pfn + 1, !freelist, 0);
 		/* free iova */
 		__free_iova(&domain->iovad, iova);
+		dma_free_pagelist(freelist);
 	} else {
-		add_unmap(domain, iova);
+		add_unmap(domain, iova, freelist);
 		/*
 		 * queue up the release of the unmap to save the 1/6th of the
 		 * cpu used up by the iotlb flush operation...
@@ -2937,64 +3190,81 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
 	}
 }
 
-static void *intel_alloc_coherent(struct device *hwdev, size_t size,
-				  dma_addr_t *dma_handle, gfp_t flags)
+static void *intel_alloc_coherent(struct device *dev, size_t size,
+				  dma_addr_t *dma_handle, gfp_t flags,
+				  struct dma_attrs *attrs)
 {
-	void *vaddr;
+	struct page *page = NULL;
 	int order;
 
 	size = PAGE_ALIGN(size);
 	order = get_order(size);
 
-	if (!iommu_no_mapping(hwdev))
+	if (!iommu_no_mapping(dev))
 		flags &= ~(GFP_DMA | GFP_DMA32);
-	else if (hwdev->coherent_dma_mask < dma_get_required_mask(hwdev)) {
-		if (hwdev->coherent_dma_mask < DMA_BIT_MASK(32))
+	else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) {
+		if (dev->coherent_dma_mask < DMA_BIT_MASK(32))
 			flags |= GFP_DMA;
 		else
 			flags |= GFP_DMA32;
 	}
 
-	vaddr = (void *)__get_free_pages(flags, order);
-	if (!vaddr)
+	if (flags & __GFP_WAIT) {
+		unsigned int count = size >> PAGE_SHIFT;
+
+		page = dma_alloc_from_contiguous(dev, count, order);
+		if (page && iommu_no_mapping(dev) &&
+		    page_to_phys(page) + size > dev->coherent_dma_mask) {
+			dma_release_from_contiguous(dev, page, count);
+			page = NULL;
+		}
+	}
+
+	if (!page)
+		page = alloc_pages(flags, order);
+	if (!page)
 		return NULL;
-	memset(vaddr, 0, size);
+	memset(page_address(page), 0, size);
 
-	*dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
+	*dma_handle = __intel_map_single(dev, page_to_phys(page), size,
 					 DMA_BIDIRECTIONAL,
-					 hwdev->coherent_dma_mask);
+					 dev->coherent_dma_mask);
 	if (*dma_handle)
-		return vaddr;
-	free_pages((unsigned long)vaddr, order);
+		return page_address(page);
+	if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
+		__free_pages(page, order);
+
 	return NULL;
 }
 
-static void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
-				dma_addr_t dma_handle)
+static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
+				dma_addr_t dma_handle, struct dma_attrs *attrs)
 {
 	int order;
+	struct page *page = virt_to_page(vaddr);
 
 	size = PAGE_ALIGN(size);
 	order = get_order(size);
 
-	intel_unmap_page(hwdev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
-	free_pages((unsigned long)vaddr, order);
+	intel_unmap_page(dev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
+	if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
+		__free_pages(page, order);
 }
 
-static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
+static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
 			   int nelems, enum dma_data_direction dir,
 			   struct dma_attrs *attrs)
 {
-	struct pci_dev *pdev = to_pci_dev(hwdev);
 	struct dmar_domain *domain;
 	unsigned long start_pfn, last_pfn;
 	struct iova *iova;
 	struct intel_iommu *iommu;
+	struct page *freelist;
 
-	if (iommu_no_mapping(hwdev))
+	if (iommu_no_mapping(dev))
 		return;
 
-	domain = find_domain(pdev);
+	domain = find_domain(dev);
 	BUG_ON(!domain);
 
 	iommu = domain_get_iommu(domain);
@@ -3007,19 +3277,16 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
 	start_pfn = mm_to_dma_pfn(iova->pfn_lo);
 	last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
 
-	/*  clear the whole page */
-	dma_pte_clear_range(domain, start_pfn, last_pfn);
-
-	/* free page tables */
-	dma_pte_free_pagetable(domain, start_pfn, last_pfn);
+	freelist = domain_unmap(domain, start_pfn, last_pfn);
 
 	if (intel_iommu_strict) {
 		iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
-				      last_pfn - start_pfn + 1, 0);
+				      last_pfn - start_pfn + 1, !freelist, 0);
 		/* free iova */
 		__free_iova(&domain->iovad, iova);
+		dma_free_pagelist(freelist);
 	} else {
-		add_unmap(domain, iova);
+		add_unmap(domain, iova, freelist);
 		/*
 		 * queue up the release of the unmap to save the 1/6th of the
 		 * cpu used up by the iotlb flush operation...
@@ -3041,11 +3308,10 @@ static int intel_nontranslate_map_sg(struct device *hddev,
 	return nelems;
 }
 
-static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
+static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
 			enum dma_data_direction dir, struct dma_attrs *attrs)
 {
 	int i;
-	struct pci_dev *pdev = to_pci_dev(hwdev);
 	struct dmar_domain *domain;
 	size_t size = 0;
 	int prot = 0;
@@ -3056,10 +3322,10 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne
 	struct intel_iommu *iommu;
 
 	BUG_ON(dir == DMA_NONE);
-	if (iommu_no_mapping(hwdev))
-		return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
+	if (iommu_no_mapping(dev))
+		return intel_nontranslate_map_sg(dev, sglist, nelems, dir);
 
-	domain = get_valid_domain_for_dev(pdev);
+	domain = get_valid_domain_for_dev(dev);
 	if (!domain)
 		return 0;
 
@@ -3068,8 +3334,8 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne
 	for_each_sg(sglist, sg, nelems, i)
 		size += aligned_nrpages(sg->offset, sg->length);
 
-	iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size),
-				pdev->dma_mask);
+	iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
+				*dev->dma_mask);
 	if (!iova) {
 		sglist->dma_length = 0;
 		return 0;
@@ -3102,7 +3368,7 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne
 
 	/* it's a non-present to present mapping. Only flush if caching mode */
 	if (cap_caching_mode(iommu->cap))
-		iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 1);
+		iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 0, 1);
 	else
 		iommu_flush_write_buffer(iommu);
 
@@ -3115,8 +3381,8 @@ static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
 }
 
 struct dma_map_ops intel_dma_ops = {
-	.alloc_coherent = intel_alloc_coherent,
-	.free_coherent = intel_free_coherent,
+	.alloc = intel_alloc_coherent,
+	.free = intel_free_coherent,
 	.map_sg = intel_map_sg,
 	.unmap_sg = intel_unmap_sg,
 	.map_page = intel_map_page,
@@ -3237,29 +3503,28 @@ DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quir
 static void __init init_no_remapping_devices(void)
 {
 	struct dmar_drhd_unit *drhd;
+	struct device *dev;
+	int i;
 
 	for_each_drhd_unit(drhd) {
 		if (!drhd->include_all) {
-			int i;
-			for (i = 0; i < drhd->devices_cnt; i++)
-				if (drhd->devices[i] != NULL)
-					break;
-			/* ignore DMAR unit if no pci devices exist */
+			for_each_active_dev_scope(drhd->devices,
+						  drhd->devices_cnt, i, dev)
+				break;
+			/* ignore DMAR unit if no devices exist */
 			if (i == drhd->devices_cnt)
 				drhd->ignored = 1;
 		}
 	}
 
-	for_each_drhd_unit(drhd) {
-		int i;
-		if (drhd->ignored || drhd->include_all)
+	for_each_active_drhd_unit(drhd) {
+		if (drhd->include_all)
 			continue;
 
-		for (i = 0; i < drhd->devices_cnt; i++)
-			if (drhd->devices[i] &&
-			    !IS_GFX_DEVICE(drhd->devices[i]))
+		for_each_active_dev_scope(drhd->devices,
+					  drhd->devices_cnt, i, dev)
+			if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
 				break;
-
 		if (i < drhd->devices_cnt)
 			continue;
 
@@ -3269,11 +3534,9 @@ static void __init init_no_remapping_devices(void)
 			intel_iommu_gfx_mapped = 1;
 		} else {
 			drhd->ignored = 1;
-			for (i = 0; i < drhd->devices_cnt; i++) {
-				if (!drhd->devices[i])
-					continue;
-				drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
-			}
+			for_each_active_dev_scope(drhd->devices,
+						  drhd->devices_cnt, i, dev)
+				dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
 		}
 	}
 }
@@ -3416,13 +3679,6 @@ static void __init init_iommu_pm_ops(void)
 static inline void init_iommu_pm_ops(void) {}
 #endif	/* CONFIG_PM */
 
-LIST_HEAD(dmar_rmrr_units);
-
-static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
-{
-	list_add(&rmrr->list, &dmar_rmrr_units);
-}
-
 
 int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header)
 {
@@ -3437,30 +3693,18 @@ int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header)
 	rmrr = (struct acpi_dmar_reserved_memory *)header;
 	rmrru->base_address = rmrr->base_address;
 	rmrru->end_address = rmrr->end_address;
-
-	dmar_register_rmrr_unit(rmrru);
-	return 0;
-}
-
-static int __init
-rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
-{
-	struct acpi_dmar_reserved_memory *rmrr;
-	int ret;
-
-	rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr;
-	ret = dmar_parse_dev_scope((void *)(rmrr + 1),
-		((void *)rmrr) + rmrr->header.length,
-		&rmrru->devices_cnt, &rmrru->devices, rmrr->segment);
-
-	if (ret || (rmrru->devices_cnt == 0)) {
-		list_del(&rmrru->list);
+	rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
+				((void *)rmrr) + rmrr->header.length,
+				&rmrru->devices_cnt);
+	if (rmrru->devices_cnt && rmrru->devices == NULL) {
 		kfree(rmrru);
+		return -ENOMEM;
 	}
-	return ret;
-}
 
-static LIST_HEAD(dmar_atsr_units);
+	list_add(&rmrru->list, &dmar_rmrr_units);
+
+	return 0;
+}
 
 int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)
 {
@@ -3474,91 +3718,134 @@ int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)
 
 	atsru->hdr = hdr;
 	atsru->include_all = atsr->flags & 0x1;
+	if (!atsru->include_all) {
+		atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
+				(void *)atsr + atsr->header.length,
+				&atsru->devices_cnt);
+		if (atsru->devices_cnt && atsru->devices == NULL) {
+			kfree(atsru);
+			return -ENOMEM;
+		}
+	}
 
-	list_add(&atsru->list, &dmar_atsr_units);
+	list_add_rcu(&atsru->list, &dmar_atsr_units);
 
 	return 0;
 }
 
-static int __init atsr_parse_dev(struct dmar_atsr_unit *atsru)
+static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
 {
-	int rc;
-	struct acpi_dmar_atsr *atsr;
+	dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
+	kfree(atsru);
+}
 
-	if (atsru->include_all)
-		return 0;
+static void intel_iommu_free_dmars(void)
+{
+	struct dmar_rmrr_unit *rmrru, *rmrr_n;
+	struct dmar_atsr_unit *atsru, *atsr_n;
 
-	atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
-	rc = dmar_parse_dev_scope((void *)(atsr + 1),
-				(void *)atsr + atsr->header.length,
-				&atsru->devices_cnt, &atsru->devices,
-				atsr->segment);
-	if (rc || !atsru->devices_cnt) {
-		list_del(&atsru->list);
-		kfree(atsru);
+	list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
+		list_del(&rmrru->list);
+		dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
+		kfree(rmrru);
 	}
 
-	return rc;
+	list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
+		list_del(&atsru->list);
+		intel_iommu_free_atsr(atsru);
+	}
 }
 
 int dmar_find_matched_atsr_unit(struct pci_dev *dev)
 {
-	int i;
+	int i, ret = 1;
 	struct pci_bus *bus;
+	struct pci_dev *bridge = NULL;
+	struct device *tmp;
 	struct acpi_dmar_atsr *atsr;
 	struct dmar_atsr_unit *atsru;
 
 	dev = pci_physfn(dev);
-
-	list_for_each_entry(atsru, &dmar_atsr_units, list) {
-		atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
-		if (atsr->segment == pci_domain_nr(dev->bus))
-			goto found;
-	}
-
-	return 0;
-
-found:
 	for (bus = dev->bus; bus; bus = bus->parent) {
-		struct pci_dev *bridge = bus->self;
-
+		bridge = bus->self;
 		if (!bridge || !pci_is_pcie(bridge) ||
-		    bridge->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
+		    pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
 			return 0;
-
-		if (bridge->pcie_type == PCI_EXP_TYPE_ROOT_PORT) {
-			for (i = 0; i < atsru->devices_cnt; i++)
-				if (atsru->devices[i] == bridge)
-					return 1;
+		if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
 			break;
-		}
 	}
+	if (!bridge)
+		return 0;
 
-	if (atsru->include_all)
-		return 1;
+	rcu_read_lock();
+	list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
+		atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
+		if (atsr->segment != pci_domain_nr(dev->bus))
+			continue;
 
-	return 0;
+		for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
+			if (tmp == &bridge->dev)
+				goto out;
+
+		if (atsru->include_all)
+			goto out;
+	}
+	ret = 0;
+out:
+	rcu_read_unlock();
+
+	return ret;
 }
 
-int __init dmar_parse_rmrr_atsr_dev(void)
+int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
 {
-	struct dmar_rmrr_unit *rmrr, *rmrr_n;
-	struct dmar_atsr_unit *atsr, *atsr_n;
 	int ret = 0;
+	struct dmar_rmrr_unit *rmrru;
+	struct dmar_atsr_unit *atsru;
+	struct acpi_dmar_atsr *atsr;
+	struct acpi_dmar_reserved_memory *rmrr;
 
-	list_for_each_entry_safe(rmrr, rmrr_n, &dmar_rmrr_units, list) {
-		ret = rmrr_parse_dev(rmrr);
-		if (ret)
-			return ret;
+	if (!intel_iommu_enabled && system_state != SYSTEM_BOOTING)
+		return 0;
+
+	list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
+		rmrr = container_of(rmrru->hdr,
+				    struct acpi_dmar_reserved_memory, header);
+		if (info->event == BUS_NOTIFY_ADD_DEVICE) {
+			ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
+				((void *)rmrr) + rmrr->header.length,
+				rmrr->segment, rmrru->devices,
+				rmrru->devices_cnt);
+			if(ret < 0)
+				return ret;
+		} else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
+			dmar_remove_dev_scope(info, rmrr->segment,
+				rmrru->devices, rmrru->devices_cnt);
+		}
 	}
 
-	list_for_each_entry_safe(atsr, atsr_n, &dmar_atsr_units, list) {
-		ret = atsr_parse_dev(atsr);
-		if (ret)
-			return ret;
+	list_for_each_entry(atsru, &dmar_atsr_units, list) {
+		if (atsru->include_all)
+			continue;
+
+		atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
+		if (info->event == BUS_NOTIFY_ADD_DEVICE) {
+			ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
+					(void *)atsr + atsr->header.length,
+					atsr->segment, atsru->devices,
+					atsru->devices_cnt);
+			if (ret > 0)
+				break;
+			else if(ret < 0)
+				return ret;
+		} else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
+			if (dmar_remove_dev_scope(info, atsr->segment,
+					atsru->devices, atsru->devices_cnt))
+				break;
+		}
 	}
 
-	return ret;
+	return 0;
 }
 
 /*
@@ -3571,24 +3858,26 @@ static int device_notifier(struct notifier_block *nb,
 				  unsigned long action, void *data)
 {
 	struct device *dev = data;
-	struct pci_dev *pdev = to_pci_dev(dev);
 	struct dmar_domain *domain;
 
-	if (iommu_no_mapping(dev))
+	if (iommu_dummy(dev))
 		return 0;
 
-	domain = find_domain(pdev);
-	if (!domain)
+	if (action != BUS_NOTIFY_UNBOUND_DRIVER &&
+	    action != BUS_NOTIFY_DEL_DEVICE)
 		return 0;
 
-	if (action == BUS_NOTIFY_UNBOUND_DRIVER && !iommu_pass_through) {
-		domain_remove_one_dev_info(domain, pdev);
+	domain = find_domain(dev);
+	if (!domain)
+		return 0;
 
-		if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
-		    !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) &&
-		    list_empty(&domain->devices))
-			domain_exit(domain);
-	}
+	down_read(&dmar_global_lock);
+	domain_remove_one_dev_info(domain, dev);
+	if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
+	    !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) &&
+	    list_empty(&domain->devices))
+		domain_exit(domain);
+	up_read(&dmar_global_lock);
 
 	return 0;
 }
@@ -3597,33 +3886,112 @@ static struct notifier_block device_nb = {
 	.notifier_call = device_notifier,
 };
 
+static int intel_iommu_memory_notifier(struct notifier_block *nb,
+				       unsigned long val, void *v)
+{
+	struct memory_notify *mhp = v;
+	unsigned long long start, end;
+	unsigned long start_vpfn, last_vpfn;
+
+	switch (val) {
+	case MEM_GOING_ONLINE:
+		start = mhp->start_pfn << PAGE_SHIFT;
+		end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
+		if (iommu_domain_identity_map(si_domain, start, end)) {
+			pr_warn("dmar: failed to build identity map for [%llx-%llx]\n",
+				start, end);
+			return NOTIFY_BAD;
+		}
+		break;
+
+	case MEM_OFFLINE:
+	case MEM_CANCEL_ONLINE:
+		start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
+		last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
+		while (start_vpfn <= last_vpfn) {
+			struct iova *iova;
+			struct dmar_drhd_unit *drhd;
+			struct intel_iommu *iommu;
+			struct page *freelist;
+
+			iova = find_iova(&si_domain->iovad, start_vpfn);
+			if (iova == NULL) {
+				pr_debug("dmar: failed get IOVA for PFN %lx\n",
+					 start_vpfn);
+				break;
+			}
+
+			iova = split_and_remove_iova(&si_domain->iovad, iova,
+						     start_vpfn, last_vpfn);
+			if (iova == NULL) {
+				pr_warn("dmar: failed to split IOVA PFN [%lx-%lx]\n",
+					start_vpfn, last_vpfn);
+				return NOTIFY_BAD;
+			}
+
+			freelist = domain_unmap(si_domain, iova->pfn_lo,
+					       iova->pfn_hi);
+
+			rcu_read_lock();
+			for_each_active_iommu(iommu, drhd)
+				iommu_flush_iotlb_psi(iommu, si_domain->id,
+					iova->pfn_lo,
+					iova->pfn_hi - iova->pfn_lo + 1,
+					!freelist, 0);
+			rcu_read_unlock();
+			dma_free_pagelist(freelist);
+
+			start_vpfn = iova->pfn_hi + 1;
+			free_iova_mem(iova);
+		}
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block intel_iommu_memory_nb = {
+	.notifier_call = intel_iommu_memory_notifier,
+	.priority = 0
+};
+
 int __init intel_iommu_init(void)
 {
-	int ret = 0;
+	int ret = -ENODEV;
+	struct dmar_drhd_unit *drhd;
+	struct intel_iommu *iommu;
 
 	/* VT-d is required for a TXT/tboot launch, so enforce that */
 	force_on = tboot_force_iommu();
 
+	if (iommu_init_mempool()) {
+		if (force_on)
+			panic("tboot: Failed to initialize iommu memory\n");
+		return -ENOMEM;
+	}
+
+	down_write(&dmar_global_lock);
 	if (dmar_table_init()) {
 		if (force_on)
 			panic("tboot: Failed to initialize DMAR table\n");
-		return 	-ENODEV;
+		goto out_free_dmar;
 	}
 
+	/*
+	 * Disable translation if already enabled prior to OS handover.
+	 */
+	for_each_active_iommu(iommu, drhd)
+		if (iommu->gcmd & DMA_GCMD_TE)
+			iommu_disable_translation(iommu);
+
 	if (dmar_dev_scope_init() < 0) {
 		if (force_on)
 			panic("tboot: Failed to initialize DMAR device scope\n");
-		return 	-ENODEV;
+		goto out_free_dmar;
 	}
 
 	if (no_iommu || dmar_disabled)
-		return -ENODEV;
-
-	if (iommu_init_mempool()) {
-		if (force_on)
-			panic("tboot: Failed to initialize iommu memory\n");
-		return 	-ENODEV;
-	}
+		goto out_free_dmar;
 
 	if (list_empty(&dmar_rmrr_units))
 		printk(KERN_INFO "DMAR: No RMRR found\n");
@@ -3634,7 +4002,7 @@ int __init intel_iommu_init(void)
 	if (dmar_init_reserved_ranges()) {
 		if (force_on)
 			panic("tboot: Failed to reserve iommu ranges\n");
-		return 	-ENODEV;
+		goto out_free_reserved_range;
 	}
 
 	init_no_remapping_devices();
@@ -3644,10 +4012,9 @@ int __init intel_iommu_init(void)
 		if (force_on)
 			panic("tboot: Failed to initialize DMARs\n");
 		printk(KERN_ERR "IOMMU: dmar init failed\n");
-		put_iova_domain(&reserved_iova_list);
-		iommu_exit_mempool();
-		return ret;
+		goto out_free_reserved_range;
 	}
+	up_write(&dmar_global_lock);
 	printk(KERN_INFO
 	"PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
 
@@ -3660,22 +4027,33 @@ int __init intel_iommu_init(void)
 	init_iommu_pm_ops();
 
 	bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
-
 	bus_register_notifier(&pci_bus_type, &device_nb);
+	if (si_domain && !hw_pass_through)
+		register_memory_notifier(&intel_iommu_memory_nb);
 
 	intel_iommu_enabled = 1;
 
 	return 0;
+
+out_free_reserved_range:
+	put_iova_domain(&reserved_iova_list);
+out_free_dmar:
+	intel_iommu_free_dmars();
+	up_write(&dmar_global_lock);
+	iommu_exit_mempool();
+	return ret;
 }
 
 static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
-					   struct pci_dev *pdev)
+					   struct device *dev)
 {
-	struct pci_dev *tmp, *parent;
+	struct pci_dev *tmp, *parent, *pdev;
 
-	if (!iommu || !pdev)
+	if (!iommu || !dev || !dev_is_pci(dev))
 		return;
 
+	pdev = to_pci_dev(dev);
+
 	/* dependent device detach */
 	tmp = pci_find_upstream_pcie_bridge(pdev);
 	/* Secondary interface's bus number and devfn 0 */
@@ -3696,34 +4074,28 @@ static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
 }
 
 static void domain_remove_one_dev_info(struct dmar_domain *domain,
-					  struct pci_dev *pdev)
+				       struct device *dev)
 {
-	struct device_domain_info *info;
+	struct device_domain_info *info, *tmp;
 	struct intel_iommu *iommu;
 	unsigned long flags;
 	int found = 0;
-	struct list_head *entry, *tmp;
+	u8 bus, devfn;
 
-	iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
-				pdev->devfn);
+	iommu = device_to_iommu(dev, &bus, &devfn);
 	if (!iommu)
 		return;
 
 	spin_lock_irqsave(&device_domain_lock, flags);
-	list_for_each_safe(entry, tmp, &domain->devices) {
-		info = list_entry(entry, struct device_domain_info, link);
-		if (info->segment == pci_domain_nr(pdev->bus) &&
-		    info->bus == pdev->bus->number &&
-		    info->devfn == pdev->devfn) {
-			list_del(&info->link);
-			list_del(&info->global);
-			if (info->dev)
-				info->dev->dev.archdata.iommu = NULL;
+	list_for_each_entry_safe(info, tmp, &domain->devices, link) {
+		if (info->iommu == iommu && info->bus == bus &&
+		    info->devfn == devfn) {
+			unlink_domain_info(info);
 			spin_unlock_irqrestore(&device_domain_lock, flags);
 
 			iommu_disable_dev_iotlb(info);
 			iommu_detach_dev(iommu, info->bus, info->devfn);
-			iommu_detach_dependent_devices(iommu, pdev);
+			iommu_detach_dependent_devices(iommu, dev);
 			free_devinfo_mem(info);
 
 			spin_lock_irqsave(&device_domain_lock, flags);
@@ -3738,8 +4110,7 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain,
 		 * owned by this domain, clear this iommu in iommu_bmp
 		 * update iommu count and coherency
 		 */
-		if (iommu == device_to_iommu(info->segment, info->bus,
-					    info->devfn))
+		if (info->iommu == iommu)
 			found = 1;
 	}
 
@@ -3748,7 +4119,7 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain,
 	if (found == 0) {
 		unsigned long tmp_flags;
 		spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
-		clear_bit(iommu->seq_id, &domain->iommu_bmp);
+		clear_bit(iommu->seq_id, domain->iommu_bmp);
 		domain->iommu_count--;
 		domain_update_iommu_cap(domain);
 		spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
@@ -3763,71 +4134,11 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain,
 	}
 }
 
-static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
-{
-	struct device_domain_info *info;
-	struct intel_iommu *iommu;
-	unsigned long flags1, flags2;
-
-	spin_lock_irqsave(&device_domain_lock, flags1);
-	while (!list_empty(&domain->devices)) {
-		info = list_entry(domain->devices.next,
-			struct device_domain_info, link);
-		list_del(&info->link);
-		list_del(&info->global);
-		if (info->dev)
-			info->dev->dev.archdata.iommu = NULL;
-
-		spin_unlock_irqrestore(&device_domain_lock, flags1);
-
-		iommu_disable_dev_iotlb(info);
-		iommu = device_to_iommu(info->segment, info->bus, info->devfn);
-		iommu_detach_dev(iommu, info->bus, info->devfn);
-		iommu_detach_dependent_devices(iommu, info->dev);
-
-		/* clear this iommu in iommu_bmp, update iommu count
-		 * and capabilities
-		 */
-		spin_lock_irqsave(&domain->iommu_lock, flags2);
-		if (test_and_clear_bit(iommu->seq_id,
-				       &domain->iommu_bmp)) {
-			domain->iommu_count--;
-			domain_update_iommu_cap(domain);
-		}
-		spin_unlock_irqrestore(&domain->iommu_lock, flags2);
-
-		free_devinfo_mem(info);
-		spin_lock_irqsave(&device_domain_lock, flags1);
-	}
-	spin_unlock_irqrestore(&device_domain_lock, flags1);
-}
-
-/* domain id for virtual machine, it won't be set in context */
-static unsigned long vm_domid;
-
-static struct dmar_domain *iommu_alloc_vm_domain(void)
-{
-	struct dmar_domain *domain;
-
-	domain = alloc_domain_mem();
-	if (!domain)
-		return NULL;
-
-	domain->id = vm_domid++;
-	domain->nid = -1;
-	memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
-	domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
-
-	return domain;
-}
-
 static int md_domain_init(struct dmar_domain *domain, int guest_width)
 {
 	int adjust_width;
 
 	init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
-	spin_lock_init(&domain->iommu_lock);
-
 	domain_reserve_special_ranges(domain);
 
 	/* calculate AGAW */
@@ -3835,9 +4146,6 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width)
 	adjust_width = guestwidth_to_adjustwidth(guest_width);
 	domain->agaw = width_to_agaw(adjust_width);
 
-	INIT_LIST_HEAD(&domain->devices);
-
-	domain->iommu_count = 0;
 	domain->iommu_coherency = 0;
 	domain->iommu_snooping = 0;
 	domain->iommu_superpage = 0;
@@ -3852,57 +4160,11 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width)
 	return 0;
 }
 
-static void iommu_free_vm_domain(struct dmar_domain *domain)
-{
-	unsigned long flags;
-	struct dmar_drhd_unit *drhd;
-	struct intel_iommu *iommu;
-	unsigned long i;
-	unsigned long ndomains;
-
-	for_each_drhd_unit(drhd) {
-		if (drhd->ignored)
-			continue;
-		iommu = drhd->iommu;
-
-		ndomains = cap_ndoms(iommu->cap);
-		for_each_set_bit(i, iommu->domain_ids, ndomains) {
-			if (iommu->domains[i] == domain) {
-				spin_lock_irqsave(&iommu->lock, flags);
-				clear_bit(i, iommu->domain_ids);
-				iommu->domains[i] = NULL;
-				spin_unlock_irqrestore(&iommu->lock, flags);
-				break;
-			}
-		}
-	}
-}
-
-static void vm_domain_exit(struct dmar_domain *domain)
-{
-	/* Domain 0 is reserved, so dont process it */
-	if (!domain)
-		return;
-
-	vm_domain_remove_all_dev_info(domain);
-	/* destroy iovas */
-	put_iova_domain(&domain->iovad);
-
-	/* clear ptes */
-	dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
-
-	/* free page tables */
-	dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
-
-	iommu_free_vm_domain(domain);
-	free_domain_mem(domain);
-}
-
 static int intel_iommu_domain_init(struct iommu_domain *domain)
 {
 	struct dmar_domain *dmar_domain;
 
-	dmar_domain = iommu_alloc_vm_domain();
+	dmar_domain = alloc_domain(true);
 	if (!dmar_domain) {
 		printk(KERN_ERR
 			"intel_iommu_domain_init: dmar_domain == NULL\n");
@@ -3911,12 +4173,16 @@ static int intel_iommu_domain_init(struct iommu_domain *domain)
 	if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
 		printk(KERN_ERR
 			"intel_iommu_domain_init() failed\n");
-		vm_domain_exit(dmar_domain);
+		domain_exit(dmar_domain);
 		return -ENOMEM;
 	}
 	domain_update_iommu_cap(dmar_domain);
 	domain->priv = dmar_domain;
 
+	domain->geometry.aperture_start = 0;
+	domain->geometry.aperture_end   = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
+	domain->geometry.force_aperture = true;
+
 	return 0;
 }
 
@@ -3925,33 +4191,32 @@ static void intel_iommu_domain_destroy(struct iommu_domain *domain)
 	struct dmar_domain *dmar_domain = domain->priv;
 
 	domain->priv = NULL;
-	vm_domain_exit(dmar_domain);
+	domain_exit(dmar_domain);
 }
 
 static int intel_iommu_attach_device(struct iommu_domain *domain,
 				     struct device *dev)
 {
 	struct dmar_domain *dmar_domain = domain->priv;
-	struct pci_dev *pdev = to_pci_dev(dev);
 	struct intel_iommu *iommu;
 	int addr_width;
+	u8 bus, devfn;
 
-	/* normally pdev is not mapped */
-	if (unlikely(domain_context_mapped(pdev))) {
+	/* normally dev is not mapped */
+	if (unlikely(domain_context_mapped(dev))) {
 		struct dmar_domain *old_domain;
 
-		old_domain = find_domain(pdev);
+		old_domain = find_domain(dev);
 		if (old_domain) {
 			if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
 			    dmar_domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)
-				domain_remove_one_dev_info(old_domain, pdev);
+				domain_remove_one_dev_info(old_domain, dev);
 			else
 				domain_remove_dev_info(old_domain);
 		}
 	}
 
-	iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
-				pdev->devfn);
+	iommu = device_to_iommu(dev, &bus, &devfn);
 	if (!iommu)
 		return -ENODEV;
 
@@ -3983,16 +4248,15 @@ static int intel_iommu_attach_device(struct iommu_domain *domain,
 		dmar_domain->agaw--;
 	}
 
-	return domain_add_dev_info(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL);
+	return domain_add_dev_info(dmar_domain, dev, CONTEXT_TT_MULTI_LEVEL);
 }
 
 static void intel_iommu_detach_device(struct iommu_domain *domain,
 				      struct device *dev)
 {
 	struct dmar_domain *dmar_domain = domain->priv;
-	struct pci_dev *pdev = to_pci_dev(dev);
 
-	domain_remove_one_dev_info(dmar_domain, pdev);
+	domain_remove_one_dev_info(dmar_domain, dev);
 }
 
 static int intel_iommu_map(struct iommu_domain *domain,
@@ -4034,28 +4298,62 @@ static int intel_iommu_map(struct iommu_domain *domain,
 }
 
 static size_t intel_iommu_unmap(struct iommu_domain *domain,
-			     unsigned long iova, size_t size)
+				unsigned long iova, size_t size)
 {
 	struct dmar_domain *dmar_domain = domain->priv;
-	int order;
+	struct page *freelist = NULL;
+	struct intel_iommu *iommu;
+	unsigned long start_pfn, last_pfn;
+	unsigned int npages;
+	int iommu_id, num, ndomains, level = 0;
+
+	/* Cope with horrid API which requires us to unmap more than the
+	   size argument if it happens to be a large-page mapping. */
+	if (!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level))
+		BUG();
+
+	if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
+		size = VTD_PAGE_SIZE << level_to_offset_bits(level);
+
+	start_pfn = iova >> VTD_PAGE_SHIFT;
+	last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
+
+	freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
+
+	npages = last_pfn - start_pfn + 1;
+
+	for_each_set_bit(iommu_id, dmar_domain->iommu_bmp, g_num_of_iommus) {
+               iommu = g_iommus[iommu_id];
+
+               /*
+                * find bit position of dmar_domain
+                */
+               ndomains = cap_ndoms(iommu->cap);
+               for_each_set_bit(num, iommu->domain_ids, ndomains) {
+                       if (iommu->domains[num] == dmar_domain)
+                               iommu_flush_iotlb_psi(iommu, num, start_pfn,
+						     npages, !freelist, 0);
+	       }
+
+	}
 
-	order = dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT,
-			    (iova + size - 1) >> VTD_PAGE_SHIFT);
+	dma_free_pagelist(freelist);
 
 	if (dmar_domain->max_addr == iova + size)
 		dmar_domain->max_addr = iova;
 
-	return PAGE_SIZE << order;
+	return size;
 }
 
 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
-					    unsigned long iova)
+					    dma_addr_t iova)
 {
 	struct dmar_domain *dmar_domain = domain->priv;
 	struct dma_pte *pte;
+	int level = 0;
 	u64 phys = 0;
 
-	pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, 0);
+	pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
 	if (pte)
 		phys = dma_pte_addr(pte);
 
@@ -4070,57 +4368,101 @@ static int intel_iommu_domain_has_cap(struct iommu_domain *domain,
 	if (cap == IOMMU_CAP_CACHE_COHERENCY)
 		return dmar_domain->iommu_snooping;
 	if (cap == IOMMU_CAP_INTR_REMAP)
-		return intr_remapping_enabled;
+		return irq_remapping_enabled;
 
 	return 0;
 }
 
-/*
- * Group numbers are arbitrary.  Device with the same group number
- * indicate the iommu cannot differentiate between them.  To avoid
- * tracking used groups we just use the seg|bus|devfn of the lowest
- * level we're able to differentiate devices
- */
-static int intel_iommu_device_group(struct device *dev, unsigned int *groupid)
+#define REQ_ACS_FLAGS	(PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF)
+
+static int intel_iommu_add_device(struct device *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
-	struct pci_dev *bridge;
-	union {
-		struct {
-			u8 devfn;
-			u8 bus;
-			u16 segment;
-		} pci;
-		u32 group;
-	} id;
-
-	if (iommu_no_mapping(dev))
-		return -ENODEV;
-
-	id.pci.segment = pci_domain_nr(pdev->bus);
-	id.pci.bus = pdev->bus->number;
-	id.pci.devfn = pdev->devfn;
+	struct pci_dev *bridge, *dma_pdev = NULL;
+	struct iommu_group *group;
+	int ret;
+	u8 bus, devfn;
 
-	if (!device_to_iommu(id.pci.segment, id.pci.bus, id.pci.devfn))
+	if (!device_to_iommu(dev, &bus, &devfn))
 		return -ENODEV;
 
 	bridge = pci_find_upstream_pcie_bridge(pdev);
 	if (bridge) {
-		if (pci_is_pcie(bridge)) {
-			id.pci.bus = bridge->subordinate->number;
-			id.pci.devfn = 0;
-		} else {
-			id.pci.bus = bridge->bus->number;
-			id.pci.devfn = bridge->devfn;
+		if (pci_is_pcie(bridge))
+			dma_pdev = pci_get_domain_bus_and_slot(
+						pci_domain_nr(pdev->bus),
+						bridge->subordinate->number, 0);
+		if (!dma_pdev)
+			dma_pdev = pci_dev_get(bridge);
+	} else
+		dma_pdev = pci_dev_get(pdev);
+
+	/* Account for quirked devices */
+	swap_pci_ref(&dma_pdev, pci_get_dma_source(dma_pdev));
+
+	/*
+	 * If it's a multifunction device that does not support our
+	 * required ACS flags, add to the same group as lowest numbered
+	 * function that also does not suport the required ACS flags.
+	 */
+	if (dma_pdev->multifunction &&
+	    !pci_acs_enabled(dma_pdev, REQ_ACS_FLAGS)) {
+		u8 i, slot = PCI_SLOT(dma_pdev->devfn);
+
+		for (i = 0; i < 8; i++) {
+			struct pci_dev *tmp;
+
+			tmp = pci_get_slot(dma_pdev->bus, PCI_DEVFN(slot, i));
+			if (!tmp)
+				continue;
+
+			if (!pci_acs_enabled(tmp, REQ_ACS_FLAGS)) {
+				swap_pci_ref(&dma_pdev, tmp);
+				break;
+			}
+			pci_dev_put(tmp);
+		}
+	}
+
+	/*
+	 * Devices on the root bus go through the iommu.  If that's not us,
+	 * find the next upstream device and test ACS up to the root bus.
+	 * Finding the next device may require skipping virtual buses.
+	 */
+	while (!pci_is_root_bus(dma_pdev->bus)) {
+		struct pci_bus *bus = dma_pdev->bus;
+
+		while (!bus->self) {
+			if (!pci_is_root_bus(bus))
+				bus = bus->parent;
+			else
+				goto root_bus;
 		}
+
+		if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS))
+			break;
+
+		swap_pci_ref(&dma_pdev, pci_dev_get(bus->self));
 	}
 
-	if (!pdev->is_virtfn && iommu_group_mf)
-		id.pci.devfn = PCI_DEVFN(PCI_SLOT(id.pci.devfn), 0);
+root_bus:
+	group = iommu_group_get(&dma_pdev->dev);
+	pci_dev_put(dma_pdev);
+	if (!group) {
+		group = iommu_group_alloc();
+		if (IS_ERR(group))
+			return PTR_ERR(group);
+	}
 
-	*groupid = id.group;
+	ret = iommu_group_add_device(group, dev);
 
-	return 0;
+	iommu_group_put(group);
+	return ret;
+}
+
+static void intel_iommu_remove_device(struct device *dev)
+{
+	iommu_group_remove_device(dev);
 }
 
 static struct iommu_ops intel_iommu_ops = {
@@ -4132,27 +4474,43 @@ static struct iommu_ops intel_iommu_ops = {
 	.unmap		= intel_iommu_unmap,
 	.iova_to_phys	= intel_iommu_iova_to_phys,
 	.domain_has_cap = intel_iommu_domain_has_cap,
-	.device_group	= intel_iommu_device_group,
+	.add_device	= intel_iommu_add_device,
+	.remove_device	= intel_iommu_remove_device,
 	.pgsize_bitmap	= INTEL_IOMMU_PGSIZES,
 };
 
-static void __devinit quirk_iommu_rwbf(struct pci_dev *dev)
+static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
+{
+	/* G4x/GM45 integrated gfx dmar support is totally busted. */
+	printk(KERN_INFO "DMAR: Disabling IOMMU for graphics on this chipset\n");
+	dmar_map_gfx = 0;
+}
+
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
+
+static void quirk_iommu_rwbf(struct pci_dev *dev)
 {
 	/*
 	 * Mobile 4 Series Chipset neglects to set RWBF capability,
-	 * but needs it:
+	 * but needs it. Same seems to hold for the desktop versions.
 	 */
 	printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n");
 	rwbf_quirk = 1;
-
-	/* https://bugzilla.redhat.com/show_bug.cgi?id=538163 */
-	if (dev->revision == 0x07) {
-		printk(KERN_INFO "DMAR: Disabling IOMMU for graphics on this chipset\n");
-		dmar_map_gfx = 0;
-	}
 }
 
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
 
 #define GGC 0x52
 #define GGC_MEMORY_SIZE_MASK	(0xf << 8)
@@ -4164,7 +4522,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
 #define GGC_MEMORY_SIZE_3M_VT	(0xa << 8)
 #define GGC_MEMORY_SIZE_4M_VT	(0xb << 8)
 
-static void __devinit quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
+static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
 {
 	unsigned short ggc;
 
diff --git a/drivers/iommu/intr_remapping.c b/drivers/iommu/intel_irq_remapping.c
index 6777ca04947..9b174893f0f 100644
--- a/drivers/iommu/intr_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -6,63 +6,60 @@
 #include <linux/hpet.h>
 #include <linux/pci.h>
 #include <linux/irq.h>
+#include <linux/intel-iommu.h>
+#include <linux/acpi.h>
 #include <asm/io_apic.h>
 #include <asm/smp.h>
 #include <asm/cpu.h>
-#include <linux/intel-iommu.h>
-#include "intr_remapping.h"
-#include <acpi/acpi.h>
+#include <asm/irq_remapping.h>
 #include <asm/pci-direct.h>
+#include <asm/msidef.h>
 
-static struct ioapic_scope ir_ioapic[MAX_IO_APICS];
-static struct hpet_scope ir_hpet[MAX_HPET_TBS];
-static int ir_ioapic_num, ir_hpet_num;
-int intr_remapping_enabled;
-
-static int disable_intremap;
-static int disable_sourceid_checking;
-static int no_x2apic_optout;
+#include "irq_remapping.h"
 
-static __init int setup_nointremap(char *str)
-{
-	disable_intremap = 1;
-	return 0;
-}
-early_param("nointremap", setup_nointremap);
+struct ioapic_scope {
+	struct intel_iommu *iommu;
+	unsigned int id;
+	unsigned int bus;	/* PCI bus number */
+	unsigned int devfn;	/* PCI devfn number */
+};
 
-static __init int setup_intremap(char *str)
-{
-	if (!str)
-		return -EINVAL;
+struct hpet_scope {
+	struct intel_iommu *iommu;
+	u8 id;
+	unsigned int bus;
+	unsigned int devfn;
+};
 
-	while (*str) {
-		if (!strncmp(str, "on", 2))
-			disable_intremap = 0;
-		else if (!strncmp(str, "off", 3))
-			disable_intremap = 1;
-		else if (!strncmp(str, "nosid", 5))
-			disable_sourceid_checking = 1;
-		else if (!strncmp(str, "no_x2apic_optout", 16))
-			no_x2apic_optout = 1;
-
-		str += strcspn(str, ",");
-		while (*str == ',')
-			str++;
-	}
+#define IR_X2APIC_MODE(mode) (mode ? (1 << 11) : 0)
+#define IRTE_DEST(dest) ((x2apic_mode) ? dest : dest << 8)
 
-	return 0;
-}
-early_param("intremap", setup_intremap);
+static struct ioapic_scope ir_ioapic[MAX_IO_APICS];
+static struct hpet_scope ir_hpet[MAX_HPET_TBS];
+static int ir_ioapic_num, ir_hpet_num;
 
+/*
+ * Lock ordering:
+ * ->dmar_global_lock
+ *	->irq_2_ir_lock
+ *		->qi->q_lock
+ *	->iommu->register_lock
+ * Note:
+ * intel_irq_remap_ops.{supported,prepare,enable,disable,reenable} are called
+ * in single-threaded environment with interrupt disabled, so no need to tabke
+ * the dmar_global_lock.
+ */
 static DEFINE_RAW_SPINLOCK(irq_2_ir_lock);
 
+static int __init parse_ioapics_under_ir(void);
+
 static struct irq_2_iommu *irq_2_iommu(unsigned int irq)
 {
 	struct irq_cfg *cfg = irq_get_chip_data(irq);
 	return cfg ? &cfg->irq_2_iommu : NULL;
 }
 
-int get_irte(int irq, struct irte *entry)
+static int get_irte(int irq, struct irte *entry)
 {
 	struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
 	unsigned long flags;
@@ -80,23 +77,18 @@ int get_irte(int irq, struct irte *entry)
 	return 0;
 }
 
-int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
+static int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
 {
 	struct ir_table *table = iommu->ir_table;
 	struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
-	u16 index, start_index;
+	struct irq_cfg *cfg = irq_get_chip_data(irq);
 	unsigned int mask = 0;
 	unsigned long flags;
-	int i;
+	int index;
 
 	if (!count || !irq_iommu)
 		return -1;
 
-	/*
-	 * start the IRTE search from index 0.
-	 */
-	index = start_index = 0;
-
 	if (count > 1) {
 		count = __roundup_pow_of_two(count);
 		mask = ilog2(count);
@@ -111,31 +103,17 @@ int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
 	}
 
 	raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
-	do {
-		for (i = index; i < index + count; i++)
-			if  (table->base[i].present)
-				break;
-		/* empty index found */
-		if (i == index + count)
-			break;
-
-		index = (index + count) % INTR_REMAP_TABLE_ENTRIES;
-
-		if (index == start_index) {
-			raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
-			printk(KERN_ERR "can't allocate an IRTE\n");
-			return -1;
-		}
-	} while (1);
-
-	for (i = index; i < index + count; i++)
-		table->base[i].present = 1;
-
-	irq_iommu->iommu = iommu;
-	irq_iommu->irte_index =  index;
-	irq_iommu->sub_handle = 0;
-	irq_iommu->irte_mask = mask;
-
+	index = bitmap_find_free_region(table->bitmap,
+					INTR_REMAP_TABLE_ENTRIES, mask);
+	if (index < 0) {
+		pr_warn("IR%d: can't allocate an IRTE\n", iommu->seq_id);
+	} else {
+		cfg->remapped = 1;
+		irq_iommu->iommu = iommu;
+		irq_iommu->irte_index =  index;
+		irq_iommu->sub_handle = 0;
+		irq_iommu->irte_mask = mask;
+	}
 	raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
 
 	return index;
@@ -152,7 +130,7 @@ static int qi_flush_iec(struct intel_iommu *iommu, int index, int mask)
 	return qi_submit_sync(&desc, iommu);
 }
 
-int map_irq_to_irte_handle(int irq, u16 *sub_handle)
+static int map_irq_to_irte_handle(int irq, u16 *sub_handle)
 {
 	struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
 	unsigned long flags;
@@ -168,9 +146,10 @@ int map_irq_to_irte_handle(int irq, u16 *sub_handle)
 	return index;
 }
 
-int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle)
+static int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle)
 {
 	struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
+	struct irq_cfg *cfg = irq_get_chip_data(irq);
 	unsigned long flags;
 
 	if (!irq_iommu)
@@ -178,6 +157,7 @@ int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle)
 
 	raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
 
+	cfg->remapped = 1;
 	irq_iommu->iommu = iommu;
 	irq_iommu->irte_index = index;
 	irq_iommu->sub_handle = subhandle;
@@ -188,7 +168,7 @@ int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle)
 	return 0;
 }
 
-int modify_irte(int irq, struct irte *irte_modified)
+static int modify_irte(int irq, struct irte *irte_modified)
 {
 	struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
 	struct intel_iommu *iommu;
@@ -216,7 +196,7 @@ int modify_irte(int irq, struct irte *irte_modified)
 	return rc;
 }
 
-struct intel_iommu *map_hpet_to_ir(u8 hpet_id)
+static struct intel_iommu *map_hpet_to_ir(u8 hpet_id)
 {
 	int i;
 
@@ -226,7 +206,7 @@ struct intel_iommu *map_hpet_to_ir(u8 hpet_id)
 	return NULL;
 }
 
-struct intel_iommu *map_ioapic_to_ir(int apic)
+static struct intel_iommu *map_ioapic_to_ir(int apic)
 {
 	int i;
 
@@ -236,7 +216,7 @@ struct intel_iommu *map_ioapic_to_ir(int apic)
 	return NULL;
 }
 
-struct intel_iommu *map_dev_to_ir(struct pci_dev *dev)
+static struct intel_iommu *map_dev_to_ir(struct pci_dev *dev)
 {
 	struct dmar_drhd_unit *drhd;
 
@@ -266,11 +246,13 @@ static int clear_entries(struct irq_2_iommu *irq_iommu)
 		set_64bit(&entry->low, 0);
 		set_64bit(&entry->high, 0);
 	}
+	bitmap_release_region(iommu->ir_table->bitmap, index,
+			      irq_iommu->irte_mask);
 
 	return qi_flush_iec(iommu, index, irq_iommu->irte_mask);
 }
 
-int free_irte(int irq)
+static int free_irte(int irq)
 {
 	struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
 	unsigned long flags;
@@ -328,7 +310,7 @@ static void set_irte_sid(struct irte *irte, unsigned int svt,
 	irte->sid = sid;
 }
 
-int set_ioapic_sid(struct irte *irte, int apic)
+static int set_ioapic_sid(struct irte *irte, int apic)
 {
 	int i;
 	u16 sid = 0;
@@ -336,24 +318,26 @@ int set_ioapic_sid(struct irte *irte, int apic)
 	if (!irte)
 		return -1;
 
+	down_read(&dmar_global_lock);
 	for (i = 0; i < MAX_IO_APICS; i++) {
 		if (ir_ioapic[i].id == apic) {
 			sid = (ir_ioapic[i].bus << 8) | ir_ioapic[i].devfn;
 			break;
 		}
 	}
+	up_read(&dmar_global_lock);
 
 	if (sid == 0) {
 		pr_warning("Failed to set source-id of IOAPIC (%d)\n", apic);
 		return -1;
 	}
 
-	set_irte_sid(irte, 1, 0, sid);
+	set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16, sid);
 
 	return 0;
 }
 
-int set_hpet_sid(struct irte *irte, u8 id)
+static int set_hpet_sid(struct irte *irte, u8 id)
 {
 	int i;
 	u16 sid = 0;
@@ -361,12 +345,14 @@ int set_hpet_sid(struct irte *irte, u8 id)
 	if (!irte)
 		return -1;
 
+	down_read(&dmar_global_lock);
 	for (i = 0; i < MAX_HPET_TBS; i++) {
 		if (ir_hpet[i].id == id) {
 			sid = (ir_hpet[i].bus << 8) | ir_hpet[i].devfn;
 			break;
 		}
 	}
+	up_read(&dmar_global_lock);
 
 	if (sid == 0) {
 		pr_warning("Failed to set source-id of HPET block (%d)\n", id);
@@ -383,7 +369,7 @@ int set_hpet_sid(struct irte *irte, u8 id)
 	return 0;
 }
 
-int set_msi_sid(struct irte *irte, struct pci_dev *dev)
+static int set_msi_sid(struct irte *irte, struct pci_dev *dev)
 {
 	struct pci_dev *bridge;
 
@@ -410,7 +396,7 @@ int set_msi_sid(struct irte *irte, struct pci_dev *dev)
 	return 0;
 }
 
-static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode)
+static void iommu_set_irq_remapping(struct intel_iommu *iommu, int mode)
 {
 	u64 addr;
 	u32 sts;
@@ -441,19 +427,31 @@ static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode)
 
 	/* Enable interrupt-remapping */
 	iommu->gcmd |= DMA_GCMD_IRE;
+	iommu->gcmd &= ~DMA_GCMD_CFI;  /* Block compatibility-format MSIs */
 	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
 
 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
 		      readl, (sts & DMA_GSTS_IRES), sts);
 
+	/*
+	 * With CFI clear in the Global Command register, we should be
+	 * protected from dangerous (i.e. compatibility) interrupts
+	 * regardless of x2apic status.  Check just to be sure.
+	 */
+	if (sts & DMA_GSTS_CFIS)
+		WARN(1, KERN_WARNING
+			"Compatibility-format IRQs enabled despite intr remapping;\n"
+			"you are vulnerable to IRQ injection.\n");
+
 	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
 }
 
 
-static int setup_intr_remapping(struct intel_iommu *iommu, int mode)
+static int intel_setup_irq_remapping(struct intel_iommu *iommu, int mode)
 {
 	struct ir_table *ir_table;
 	struct page *pages;
+	unsigned long *bitmap;
 
 	ir_table = iommu->ir_table = kzalloc(sizeof(struct ir_table),
 					     GFP_ATOMIC);
@@ -465,22 +463,32 @@ static int setup_intr_remapping(struct intel_iommu *iommu, int mode)
 				 INTR_REMAP_PAGE_ORDER);
 
 	if (!pages) {
-		printk(KERN_ERR "failed to allocate pages of order %d\n",
-		       INTR_REMAP_PAGE_ORDER);
+		pr_err("IR%d: failed to allocate pages of order %d\n",
+		       iommu->seq_id, INTR_REMAP_PAGE_ORDER);
 		kfree(iommu->ir_table);
 		return -ENOMEM;
 	}
 
+	bitmap = kcalloc(BITS_TO_LONGS(INTR_REMAP_TABLE_ENTRIES),
+			 sizeof(long), GFP_ATOMIC);
+	if (bitmap == NULL) {
+		pr_err("IR%d: failed to allocate bitmap\n", iommu->seq_id);
+		__free_pages(pages, INTR_REMAP_PAGE_ORDER);
+		kfree(ir_table);
+		return -ENOMEM;
+	}
+
 	ir_table->base = page_address(pages);
+	ir_table->bitmap = bitmap;
 
-	iommu_set_intr_remapping(iommu, mode);
+	iommu_set_irq_remapping(iommu, mode);
 	return 0;
 }
 
 /*
  * Disable Interrupt Remapping.
  */
-static void iommu_disable_intr_remapping(struct intel_iommu *iommu)
+static void iommu_disable_irq_remapping(struct intel_iommu *iommu)
 {
 	unsigned long flags;
 	u32 sts;
@@ -519,48 +527,62 @@ static int __init dmar_x2apic_optout(void)
 	return dmar->flags & DMAR_X2APIC_OPT_OUT;
 }
 
-int __init intr_remapping_supported(void)
+static int __init intel_irq_remapping_supported(void)
 {
 	struct dmar_drhd_unit *drhd;
+	struct intel_iommu *iommu;
 
-	if (disable_intremap)
+	if (disable_irq_remap)
 		return 0;
+	if (irq_remap_broken) {
+		printk(KERN_WARNING
+			"This system BIOS has enabled interrupt remapping\n"
+			"on a chipset that contains an erratum making that\n"
+			"feature unstable.  To maintain system stability\n"
+			"interrupt remapping is being disabled.  Please\n"
+			"contact your BIOS vendor for an update\n");
+		add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
+		disable_irq_remap = 1;
+		return 0;
+	}
 
 	if (!dmar_ir_support())
 		return 0;
 
-	for_each_drhd_unit(drhd) {
-		struct intel_iommu *iommu = drhd->iommu;
-
+	for_each_iommu(iommu, drhd)
 		if (!ecap_ir_support(iommu->ecap))
 			return 0;
-	}
 
 	return 1;
 }
 
-int __init enable_intr_remapping(void)
+static int __init intel_enable_irq_remapping(void)
 {
 	struct dmar_drhd_unit *drhd;
+	struct intel_iommu *iommu;
+	bool x2apic_present;
 	int setup = 0;
 	int eim = 0;
 
+	x2apic_present = x2apic_supported();
+
 	if (parse_ioapics_under_ir() != 1) {
 		printk(KERN_INFO "Not enable interrupt remapping\n");
-		return -1;
+		goto error;
 	}
 
-	if (x2apic_supported()) {
+	if (x2apic_present) {
+		pr_info("Queued invalidation will be enabled to support x2apic and Intr-remapping.\n");
+
 		eim = !dmar_x2apic_optout();
-		WARN(!eim, KERN_WARNING
-			   "Your BIOS is broken and requested that x2apic be disabled\n"
-			   "This will leave your machine vulnerable to irq-injection attacks\n"
-			   "Use 'intremap=no_x2apic_optout' to override BIOS request\n");
+		if (!eim)
+			printk(KERN_WARNING
+				"Your BIOS is broken and requested that x2apic be disabled.\n"
+				"This will slightly decrease performance.\n"
+				"Use 'intremap=no_x2apic_optout' to override BIOS request.\n");
 	}
 
-	for_each_drhd_unit(drhd) {
-		struct intel_iommu *iommu = drhd->iommu;
-
+	for_each_iommu(iommu, drhd) {
 		/*
 		 * If the queued invalidation is already initialized,
 		 * shouldn't disable it.
@@ -577,7 +599,7 @@ int __init enable_intr_remapping(void)
 		 * Disable intr remapping and queued invalidation, if already
 		 * enabled prior to OS handover.
 		 */
-		iommu_disable_intr_remapping(iommu);
+		iommu_disable_irq_remapping(iommu);
 
 		dmar_disable_qi(iommu);
 	}
@@ -585,45 +607,39 @@ int __init enable_intr_remapping(void)
 	/*
 	 * check for the Interrupt-remapping support
 	 */
-	for_each_drhd_unit(drhd) {
-		struct intel_iommu *iommu = drhd->iommu;
-
+	for_each_iommu(iommu, drhd) {
 		if (!ecap_ir_support(iommu->ecap))
 			continue;
 
 		if (eim && !ecap_eim_support(iommu->ecap)) {
 			printk(KERN_INFO "DRHD %Lx: EIM not supported by DRHD, "
 			       " ecap %Lx\n", drhd->reg_base_addr, iommu->ecap);
-			return -1;
+			goto error;
 		}
 	}
 
 	/*
 	 * Enable queued invalidation for all the DRHD's.
 	 */
-	for_each_drhd_unit(drhd) {
-		int ret;
-		struct intel_iommu *iommu = drhd->iommu;
-		ret = dmar_enable_qi(iommu);
+	for_each_iommu(iommu, drhd) {
+		int ret = dmar_enable_qi(iommu);
 
 		if (ret) {
 			printk(KERN_ERR "DRHD %Lx: failed to enable queued, "
 			       " invalidation, ecap %Lx, ret %d\n",
 			       drhd->reg_base_addr, iommu->ecap, ret);
-			return -1;
+			goto error;
 		}
 	}
 
 	/*
 	 * Setup Interrupt-remapping for all the DRHD's now.
 	 */
-	for_each_drhd_unit(drhd) {
-		struct intel_iommu *iommu = drhd->iommu;
-
+	for_each_iommu(iommu, drhd) {
 		if (!ecap_ir_support(iommu->ecap))
 			continue;
 
-		if (setup_intr_remapping(iommu, eim))
+		if (intel_setup_irq_remapping(iommu, eim))
 			goto error;
 
 		setup = 1;
@@ -632,7 +648,15 @@ int __init enable_intr_remapping(void)
 	if (!setup)
 		goto error;
 
-	intr_remapping_enabled = 1;
+	irq_remapping_enabled = 1;
+
+	/*
+	 * VT-d has a different layout for IO-APIC entries when
+	 * interrupt remapping is enabled. So it needs a special routine
+	 * to print IO-APIC entries for debugging purposes too.
+	 */
+	x86_io_apic_ops.print_entries = intel_ir_io_apic_print_entries;
+
 	pr_info("Enabled IRQ remapping in %s mode\n", eim ? "x2apic" : "xapic");
 
 	return eim ? IRQ_REMAP_X2APIC_MODE : IRQ_REMAP_XAPIC_MODE;
@@ -641,6 +665,10 @@ error:
 	/*
 	 * handle error condition gracefully here!
 	 */
+
+	if (x2apic_present)
+		pr_warn("Failed to enable irq remapping.  You are vulnerable to irq-injection attacks.\n");
+
 	return -1;
 }
 
@@ -661,12 +689,12 @@ static void ir_parse_one_hpet_scope(struct acpi_dmar_device_scope *scope,
 		 * Access PCI directly due to the PCI
 		 * subsystem isn't initialized yet.
 		 */
-		bus = read_pci_config_byte(bus, path->dev, path->fn,
+		bus = read_pci_config_byte(bus, path->device, path->function,
 					   PCI_SECONDARY_BUS);
 		path++;
 	}
 	ir_hpet[ir_hpet_num].bus   = bus;
-	ir_hpet[ir_hpet_num].devfn = PCI_DEVFN(path->dev, path->fn);
+	ir_hpet[ir_hpet_num].devfn = PCI_DEVFN(path->device, path->function);
 	ir_hpet[ir_hpet_num].iommu = iommu;
 	ir_hpet[ir_hpet_num].id    = scope->enumeration_id;
 	ir_hpet_num++;
@@ -689,13 +717,13 @@ static void ir_parse_one_ioapic_scope(struct acpi_dmar_device_scope *scope,
 		 * Access PCI directly due to the PCI
 		 * subsystem isn't initialized yet.
 		 */
-		bus = read_pci_config_byte(bus, path->dev, path->fn,
+		bus = read_pci_config_byte(bus, path->device, path->function,
 					   PCI_SECONDARY_BUS);
 		path++;
 	}
 
 	ir_ioapic[ir_ioapic_num].bus   = bus;
-	ir_ioapic[ir_ioapic_num].devfn = PCI_DEVFN(path->dev, path->fn);
+	ir_ioapic[ir_ioapic_num].devfn = PCI_DEVFN(path->device, path->function);
 	ir_ioapic[ir_ioapic_num].iommu = iommu;
 	ir_ioapic[ir_ioapic_num].id    = scope->enumeration_id;
 	ir_ioapic_num++;
@@ -748,41 +776,53 @@ static int ir_parse_ioapic_hpet_scope(struct acpi_dmar_header *header,
  * Finds the assocaition between IOAPIC's and its Interrupt-remapping
  * hardware unit.
  */
-int __init parse_ioapics_under_ir(void)
+static int __init parse_ioapics_under_ir(void)
 {
 	struct dmar_drhd_unit *drhd;
+	struct intel_iommu *iommu;
 	int ir_supported = 0;
+	int ioapic_idx;
 
-	for_each_drhd_unit(drhd) {
-		struct intel_iommu *iommu = drhd->iommu;
-
+	for_each_iommu(iommu, drhd)
 		if (ecap_ir_support(iommu->ecap)) {
 			if (ir_parse_ioapic_hpet_scope(drhd->hdr, iommu))
 				return -1;
 
 			ir_supported = 1;
 		}
-	}
 
-	if (ir_supported && ir_ioapic_num != nr_ioapics) {
-		printk(KERN_WARNING
-		       "Not all IO-APIC's listed under remapping hardware\n");
-		return -1;
+	if (!ir_supported)
+		return 0;
+
+	for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) {
+		int ioapic_id = mpc_ioapic_id(ioapic_idx);
+		if (!map_ioapic_to_ir(ioapic_id)) {
+			pr_err(FW_BUG "ioapic %d has no mapping iommu, "
+			       "interrupt remapping will be disabled\n",
+			       ioapic_id);
+			return -1;
+		}
 	}
 
-	return ir_supported;
+	return 1;
 }
 
-int __init ir_dev_scope_init(void)
+static int __init ir_dev_scope_init(void)
 {
-	if (!intr_remapping_enabled)
+	int ret;
+
+	if (!irq_remapping_enabled)
 		return 0;
 
-	return dmar_dev_scope_init();
+	down_write(&dmar_global_lock);
+	ret = dmar_dev_scope_init();
+	up_write(&dmar_global_lock);
+
+	return ret;
 }
 rootfs_initcall(ir_dev_scope_init);
 
-void disable_intr_remapping(void)
+static void disable_irq_remapping(void)
 {
 	struct dmar_drhd_unit *drhd;
 	struct intel_iommu *iommu = NULL;
@@ -794,11 +834,11 @@ void disable_intr_remapping(void)
 		if (!ecap_ir_support(iommu->ecap))
 			continue;
 
-		iommu_disable_intr_remapping(iommu);
+		iommu_disable_irq_remapping(iommu);
 	}
 }
 
-int reenable_intr_remapping(int eim)
+static int reenable_irq_remapping(int eim)
 {
 	struct dmar_drhd_unit *drhd;
 	int setup = 0;
@@ -816,7 +856,7 @@ int reenable_intr_remapping(int eim)
 			continue;
 
 		/* Set up interrupt remapping for iommu.*/
-		iommu_set_intr_remapping(iommu, eim);
+		iommu_set_irq_remapping(iommu, eim);
 		setup = 1;
 	}
 
@@ -832,3 +872,275 @@ error:
 	return -1;
 }
 
+static void prepare_irte(struct irte *irte, int vector,
+			 unsigned int dest)
+{
+	memset(irte, 0, sizeof(*irte));
+
+	irte->present = 1;
+	irte->dst_mode = apic->irq_dest_mode;
+	/*
+	 * Trigger mode in the IRTE will always be edge, and for IO-APIC, the
+	 * actual level or edge trigger will be setup in the IO-APIC
+	 * RTE. This will help simplify level triggered irq migration.
+	 * For more details, see the comments (in io_apic.c) explainig IO-APIC
+	 * irq migration in the presence of interrupt-remapping.
+	*/
+	irte->trigger_mode = 0;
+	irte->dlvry_mode = apic->irq_delivery_mode;
+	irte->vector = vector;
+	irte->dest_id = IRTE_DEST(dest);
+	irte->redir_hint = 1;
+}
+
+static int intel_setup_ioapic_entry(int irq,
+				    struct IO_APIC_route_entry *route_entry,
+				    unsigned int destination, int vector,
+				    struct io_apic_irq_attr *attr)
+{
+	int ioapic_id = mpc_ioapic_id(attr->ioapic);
+	struct intel_iommu *iommu;
+	struct IR_IO_APIC_route_entry *entry;
+	struct irte irte;
+	int index;
+
+	down_read(&dmar_global_lock);
+	iommu = map_ioapic_to_ir(ioapic_id);
+	if (!iommu) {
+		pr_warn("No mapping iommu for ioapic %d\n", ioapic_id);
+		index = -ENODEV;
+	} else {
+		index = alloc_irte(iommu, irq, 1);
+		if (index < 0) {
+			pr_warn("Failed to allocate IRTE for ioapic %d\n",
+				ioapic_id);
+			index = -ENOMEM;
+		}
+	}
+	up_read(&dmar_global_lock);
+	if (index < 0)
+		return index;
+
+	prepare_irte(&irte, vector, destination);
+
+	/* Set source-id of interrupt request */
+	set_ioapic_sid(&irte, ioapic_id);
+
+	modify_irte(irq, &irte);
+
+	apic_printk(APIC_VERBOSE, KERN_DEBUG "IOAPIC[%d]: "
+		"Set IRTE entry (P:%d FPD:%d Dst_Mode:%d "
+		"Redir_hint:%d Trig_Mode:%d Dlvry_Mode:%X "
+		"Avail:%X Vector:%02X Dest:%08X "
+		"SID:%04X SQ:%X SVT:%X)\n",
+		attr->ioapic, irte.present, irte.fpd, irte.dst_mode,
+		irte.redir_hint, irte.trigger_mode, irte.dlvry_mode,
+		irte.avail, irte.vector, irte.dest_id,
+		irte.sid, irte.sq, irte.svt);
+
+	entry = (struct IR_IO_APIC_route_entry *)route_entry;
+	memset(entry, 0, sizeof(*entry));
+
+	entry->index2	= (index >> 15) & 0x1;
+	entry->zero	= 0;
+	entry->format	= 1;
+	entry->index	= (index & 0x7fff);
+	/*
+	 * IO-APIC RTE will be configured with virtual vector.
+	 * irq handler will do the explicit EOI to the io-apic.
+	 */
+	entry->vector	= attr->ioapic_pin;
+	entry->mask	= 0;			/* enable IRQ */
+	entry->trigger	= attr->trigger;
+	entry->polarity	= attr->polarity;
+
+	/* Mask level triggered irqs.
+	 * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
+	 */
+	if (attr->trigger)
+		entry->mask = 1;
+
+	return 0;
+}
+
+/*
+ * Migrate the IO-APIC irq in the presence of intr-remapping.
+ *
+ * For both level and edge triggered, irq migration is a simple atomic
+ * update(of vector and cpu destination) of IRTE and flush the hardware cache.
+ *
+ * For level triggered, we eliminate the io-apic RTE modification (with the
+ * updated vector information), by using a virtual vector (io-apic pin number).
+ * Real vector that is used for interrupting cpu will be coming from
+ * the interrupt-remapping table entry.
+ *
+ * As the migration is a simple atomic update of IRTE, the same mechanism
+ * is used to migrate MSI irq's in the presence of interrupt-remapping.
+ */
+static int
+intel_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
+			  bool force)
+{
+	struct irq_cfg *cfg = data->chip_data;
+	unsigned int dest, irq = data->irq;
+	struct irte irte;
+	int err;
+
+	if (!config_enabled(CONFIG_SMP))
+		return -EINVAL;
+
+	if (!cpumask_intersects(mask, cpu_online_mask))
+		return -EINVAL;
+
+	if (get_irte(irq, &irte))
+		return -EBUSY;
+
+	err = assign_irq_vector(irq, cfg, mask);
+	if (err)
+		return err;
+
+	err = apic->cpu_mask_to_apicid_and(cfg->domain, mask, &dest);
+	if (err) {
+		if (assign_irq_vector(irq, cfg, data->affinity))
+			pr_err("Failed to recover vector for irq %d\n", irq);
+		return err;
+	}
+
+	irte.vector = cfg->vector;
+	irte.dest_id = IRTE_DEST(dest);
+
+	/*
+	 * Atomically updates the IRTE with the new destination, vector
+	 * and flushes the interrupt entry cache.
+	 */
+	modify_irte(irq, &irte);
+
+	/*
+	 * After this point, all the interrupts will start arriving
+	 * at the new destination. So, time to cleanup the previous
+	 * vector allocation.
+	 */
+	if (cfg->move_in_progress)
+		send_cleanup_vector(cfg);
+
+	cpumask_copy(data->affinity, mask);
+	return 0;
+}
+
+static void intel_compose_msi_msg(struct pci_dev *pdev,
+				  unsigned int irq, unsigned int dest,
+				  struct msi_msg *msg, u8 hpet_id)
+{
+	struct irq_cfg *cfg;
+	struct irte irte;
+	u16 sub_handle = 0;
+	int ir_index;
+
+	cfg = irq_get_chip_data(irq);
+
+	ir_index = map_irq_to_irte_handle(irq, &sub_handle);
+	BUG_ON(ir_index == -1);
+
+	prepare_irte(&irte, cfg->vector, dest);
+
+	/* Set source-id of interrupt request */
+	if (pdev)
+		set_msi_sid(&irte, pdev);
+	else
+		set_hpet_sid(&irte, hpet_id);
+
+	modify_irte(irq, &irte);
+
+	msg->address_hi = MSI_ADDR_BASE_HI;
+	msg->data = sub_handle;
+	msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
+			  MSI_ADDR_IR_SHV |
+			  MSI_ADDR_IR_INDEX1(ir_index) |
+			  MSI_ADDR_IR_INDEX2(ir_index);
+}
+
+/*
+ * Map the PCI dev to the corresponding remapping hardware unit
+ * and allocate 'nvec' consecutive interrupt-remapping table entries
+ * in it.
+ */
+static int intel_msi_alloc_irq(struct pci_dev *dev, int irq, int nvec)
+{
+	struct intel_iommu *iommu;
+	int index;
+
+	down_read(&dmar_global_lock);
+	iommu = map_dev_to_ir(dev);
+	if (!iommu) {
+		printk(KERN_ERR
+		       "Unable to map PCI %s to iommu\n", pci_name(dev));
+		index = -ENOENT;
+	} else {
+		index = alloc_irte(iommu, irq, nvec);
+		if (index < 0) {
+			printk(KERN_ERR
+			       "Unable to allocate %d IRTE for PCI %s\n",
+			       nvec, pci_name(dev));
+			index = -ENOSPC;
+		}
+	}
+	up_read(&dmar_global_lock);
+
+	return index;
+}
+
+static int intel_msi_setup_irq(struct pci_dev *pdev, unsigned int irq,
+			       int index, int sub_handle)
+{
+	struct intel_iommu *iommu;
+	int ret = -ENOENT;
+
+	down_read(&dmar_global_lock);
+	iommu = map_dev_to_ir(pdev);
+	if (iommu) {
+		/*
+		 * setup the mapping between the irq and the IRTE
+		 * base index, the sub_handle pointing to the
+		 * appropriate interrupt remap table entry.
+		 */
+		set_irte_irq(irq, iommu, index, sub_handle);
+		ret = 0;
+	}
+	up_read(&dmar_global_lock);
+
+	return ret;
+}
+
+static int intel_setup_hpet_msi(unsigned int irq, unsigned int id)
+{
+	int ret = -1;
+	struct intel_iommu *iommu;
+	int index;
+
+	down_read(&dmar_global_lock);
+	iommu = map_hpet_to_ir(id);
+	if (iommu) {
+		index = alloc_irte(iommu, irq, 1);
+		if (index >= 0)
+			ret = 0;
+	}
+	up_read(&dmar_global_lock);
+
+	return ret;
+}
+
+struct irq_remap_ops intel_irq_remap_ops = {
+	.supported		= intel_irq_remapping_supported,
+	.prepare		= dmar_table_init,
+	.enable			= intel_enable_irq_remapping,
+	.disable		= disable_irq_remapping,
+	.reenable		= reenable_irq_remapping,
+	.enable_faulting	= enable_drhd_fault_handling,
+	.setup_ioapic_entry	= intel_setup_ioapic_entry,
+	.set_affinity		= intel_ioapic_set_affinity,
+	.free_irq		= free_irte,
+	.compose_msi_msg	= intel_compose_msi_msg,
+	.msi_alloc_irq		= intel_msi_alloc_irq,
+	.msi_setup_irq		= intel_msi_setup_irq,
+	.setup_hpet_msi		= intel_setup_hpet_msi,
+};
diff --git a/drivers/iommu/intr_remapping.h b/drivers/iommu/intr_remapping.h
deleted file mode 100644
index 5662fecfee6..00000000000
--- a/drivers/iommu/intr_remapping.h
+++ /dev/null
@@ -1,17 +0,0 @@
-#include <linux/intel-iommu.h>
-
-struct ioapic_scope {
-	struct intel_iommu *iommu;
-	unsigned int id;
-	unsigned int bus;	/* PCI bus number */
-	unsigned int devfn;	/* PCI devfn number */
-};
-
-struct hpet_scope {
-	struct intel_iommu *iommu;
-	u8 id;
-	unsigned int bus;
-	unsigned int devfn;
-};
-
-#define IR_X2APIC_MODE(mode) (mode ? (1 << 11) : 0)
diff --git a/drivers/iommu/iommu-traces.c b/drivers/iommu/iommu-traces.c
new file mode 100644
index 00000000000..bf3b317ff0c
--- /dev/null
+++ b/drivers/iommu/iommu-traces.c
@@ -0,0 +1,27 @@
+/*
+ * iommu trace points
+ *
+ * Copyright (C) 2013 Shuah Khan <shuah.kh@samsung.com>
+ *
+ */
+
+#include <linux/string.h>
+#include <linux/types.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/iommu.h>
+
+/* iommu_group_event */
+EXPORT_TRACEPOINT_SYMBOL_GPL(add_device_to_group);
+EXPORT_TRACEPOINT_SYMBOL_GPL(remove_device_from_group);
+
+/* iommu_device_event */
+EXPORT_TRACEPOINT_SYMBOL_GPL(attach_device_to_domain);
+EXPORT_TRACEPOINT_SYMBOL_GPL(detach_device_from_domain);
+
+/* iommu_map_unmap */
+EXPORT_TRACEPOINT_SYMBOL_GPL(map);
+EXPORT_TRACEPOINT_SYMBOL_GPL(unmap);
+
+/* iommu_error */
+EXPORT_TRACEPOINT_SYMBOL_GPL(io_page_fault);
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 2198b2dbbcd..e5555fcfe70 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -26,60 +26,569 @@
 #include <linux/slab.h>
 #include <linux/errno.h>
 #include <linux/iommu.h>
+#include <linux/idr.h>
+#include <linux/notifier.h>
+#include <linux/err.h>
+#include <trace/events/iommu.h>
+
+static struct kset *iommu_group_kset;
+static struct ida iommu_group_ida;
+static struct mutex iommu_group_mutex;
+
+struct iommu_group {
+	struct kobject kobj;
+	struct kobject *devices_kobj;
+	struct list_head devices;
+	struct mutex mutex;
+	struct blocking_notifier_head notifier;
+	void *iommu_data;
+	void (*iommu_data_release)(void *iommu_data);
+	char *name;
+	int id;
+};
+
+struct iommu_device {
+	struct list_head list;
+	struct device *dev;
+	char *name;
+};
+
+struct iommu_group_attribute {
+	struct attribute attr;
+	ssize_t (*show)(struct iommu_group *group, char *buf);
+	ssize_t (*store)(struct iommu_group *group,
+			 const char *buf, size_t count);
+};
+
+#define IOMMU_GROUP_ATTR(_name, _mode, _show, _store)		\
+struct iommu_group_attribute iommu_group_attr_##_name =		\
+	__ATTR(_name, _mode, _show, _store)
 
-static ssize_t show_iommu_group(struct device *dev,
-				struct device_attribute *attr, char *buf)
+#define to_iommu_group_attr(_attr)	\
+	container_of(_attr, struct iommu_group_attribute, attr)
+#define to_iommu_group(_kobj)		\
+	container_of(_kobj, struct iommu_group, kobj)
+
+static ssize_t iommu_group_attr_show(struct kobject *kobj,
+				     struct attribute *__attr, char *buf)
 {
-	unsigned int groupid;
+	struct iommu_group_attribute *attr = to_iommu_group_attr(__attr);
+	struct iommu_group *group = to_iommu_group(kobj);
+	ssize_t ret = -EIO;
 
-	if (iommu_device_group(dev, &groupid))
-		return 0;
+	if (attr->show)
+		ret = attr->show(group, buf);
+	return ret;
+}
+
+static ssize_t iommu_group_attr_store(struct kobject *kobj,
+				      struct attribute *__attr,
+				      const char *buf, size_t count)
+{
+	struct iommu_group_attribute *attr = to_iommu_group_attr(__attr);
+	struct iommu_group *group = to_iommu_group(kobj);
+	ssize_t ret = -EIO;
 
-	return sprintf(buf, "%u", groupid);
+	if (attr->store)
+		ret = attr->store(group, buf, count);
+	return ret;
 }
-static DEVICE_ATTR(iommu_group, S_IRUGO, show_iommu_group, NULL);
 
-static int add_iommu_group(struct device *dev, void *data)
+static const struct sysfs_ops iommu_group_sysfs_ops = {
+	.show = iommu_group_attr_show,
+	.store = iommu_group_attr_store,
+};
+
+static int iommu_group_create_file(struct iommu_group *group,
+				   struct iommu_group_attribute *attr)
+{
+	return sysfs_create_file(&group->kobj, &attr->attr);
+}
+
+static void iommu_group_remove_file(struct iommu_group *group,
+				    struct iommu_group_attribute *attr)
+{
+	sysfs_remove_file(&group->kobj, &attr->attr);
+}
+
+static ssize_t iommu_group_show_name(struct iommu_group *group, char *buf)
+{
+	return sprintf(buf, "%s\n", group->name);
+}
+
+static IOMMU_GROUP_ATTR(name, S_IRUGO, iommu_group_show_name, NULL);
+
+static void iommu_group_release(struct kobject *kobj)
+{
+	struct iommu_group *group = to_iommu_group(kobj);
+
+	if (group->iommu_data_release)
+		group->iommu_data_release(group->iommu_data);
+
+	mutex_lock(&iommu_group_mutex);
+	ida_remove(&iommu_group_ida, group->id);
+	mutex_unlock(&iommu_group_mutex);
+
+	kfree(group->name);
+	kfree(group);
+}
+
+static struct kobj_type iommu_group_ktype = {
+	.sysfs_ops = &iommu_group_sysfs_ops,
+	.release = iommu_group_release,
+};
+
+/**
+ * iommu_group_alloc - Allocate a new group
+ * @name: Optional name to associate with group, visible in sysfs
+ *
+ * This function is called by an iommu driver to allocate a new iommu
+ * group.  The iommu group represents the minimum granularity of the iommu.
+ * Upon successful return, the caller holds a reference to the supplied
+ * group in order to hold the group until devices are added.  Use
+ * iommu_group_put() to release this extra reference count, allowing the
+ * group to be automatically reclaimed once it has no devices or external
+ * references.
+ */
+struct iommu_group *iommu_group_alloc(void)
+{
+	struct iommu_group *group;
+	int ret;
+
+	group = kzalloc(sizeof(*group), GFP_KERNEL);
+	if (!group)
+		return ERR_PTR(-ENOMEM);
+
+	group->kobj.kset = iommu_group_kset;
+	mutex_init(&group->mutex);
+	INIT_LIST_HEAD(&group->devices);
+	BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier);
+
+	mutex_lock(&iommu_group_mutex);
+
+again:
+	if (unlikely(0 == ida_pre_get(&iommu_group_ida, GFP_KERNEL))) {
+		kfree(group);
+		mutex_unlock(&iommu_group_mutex);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	if (-EAGAIN == ida_get_new(&iommu_group_ida, &group->id))
+		goto again;
+
+	mutex_unlock(&iommu_group_mutex);
+
+	ret = kobject_init_and_add(&group->kobj, &iommu_group_ktype,
+				   NULL, "%d", group->id);
+	if (ret) {
+		mutex_lock(&iommu_group_mutex);
+		ida_remove(&iommu_group_ida, group->id);
+		mutex_unlock(&iommu_group_mutex);
+		kfree(group);
+		return ERR_PTR(ret);
+	}
+
+	group->devices_kobj = kobject_create_and_add("devices", &group->kobj);
+	if (!group->devices_kobj) {
+		kobject_put(&group->kobj); /* triggers .release & free */
+		return ERR_PTR(-ENOMEM);
+	}
+
+	/*
+	 * The devices_kobj holds a reference on the group kobject, so
+	 * as long as that exists so will the group.  We can therefore
+	 * use the devices_kobj for reference counting.
+	 */
+	kobject_put(&group->kobj);
+
+	return group;
+}
+EXPORT_SYMBOL_GPL(iommu_group_alloc);
+
+struct iommu_group *iommu_group_get_by_id(int id)
 {
-	unsigned int groupid;
+	struct kobject *group_kobj;
+	struct iommu_group *group;
+	const char *name;
+
+	if (!iommu_group_kset)
+		return NULL;
+
+	name = kasprintf(GFP_KERNEL, "%d", id);
+	if (!name)
+		return NULL;
+
+	group_kobj = kset_find_obj(iommu_group_kset, name);
+	kfree(name);
+
+	if (!group_kobj)
+		return NULL;
+
+	group = container_of(group_kobj, struct iommu_group, kobj);
+	BUG_ON(group->id != id);
 
-	if (iommu_device_group(dev, &groupid) == 0)
-		return device_create_file(dev, &dev_attr_iommu_group);
+	kobject_get(group->devices_kobj);
+	kobject_put(&group->kobj);
+
+	return group;
+}
+EXPORT_SYMBOL_GPL(iommu_group_get_by_id);
+
+/**
+ * iommu_group_get_iommudata - retrieve iommu_data registered for a group
+ * @group: the group
+ *
+ * iommu drivers can store data in the group for use when doing iommu
+ * operations.  This function provides a way to retrieve it.  Caller
+ * should hold a group reference.
+ */
+void *iommu_group_get_iommudata(struct iommu_group *group)
+{
+	return group->iommu_data;
+}
+EXPORT_SYMBOL_GPL(iommu_group_get_iommudata);
+
+/**
+ * iommu_group_set_iommudata - set iommu_data for a group
+ * @group: the group
+ * @iommu_data: new data
+ * @release: release function for iommu_data
+ *
+ * iommu drivers can store data in the group for use when doing iommu
+ * operations.  This function provides a way to set the data after
+ * the group has been allocated.  Caller should hold a group reference.
+ */
+void iommu_group_set_iommudata(struct iommu_group *group, void *iommu_data,
+			       void (*release)(void *iommu_data))
+{
+	group->iommu_data = iommu_data;
+	group->iommu_data_release = release;
+}
+EXPORT_SYMBOL_GPL(iommu_group_set_iommudata);
+
+/**
+ * iommu_group_set_name - set name for a group
+ * @group: the group
+ * @name: name
+ *
+ * Allow iommu driver to set a name for a group.  When set it will
+ * appear in a name attribute file under the group in sysfs.
+ */
+int iommu_group_set_name(struct iommu_group *group, const char *name)
+{
+	int ret;
+
+	if (group->name) {
+		iommu_group_remove_file(group, &iommu_group_attr_name);
+		kfree(group->name);
+		group->name = NULL;
+		if (!name)
+			return 0;
+	}
+
+	group->name = kstrdup(name, GFP_KERNEL);
+	if (!group->name)
+		return -ENOMEM;
+
+	ret = iommu_group_create_file(group, &iommu_group_attr_name);
+	if (ret) {
+		kfree(group->name);
+		group->name = NULL;
+		return ret;
+	}
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(iommu_group_set_name);
+
+/**
+ * iommu_group_add_device - add a device to an iommu group
+ * @group: the group into which to add the device (reference should be held)
+ * @dev: the device
+ *
+ * This function is called by an iommu driver to add a device into a
+ * group.  Adding a device increments the group reference count.
+ */
+int iommu_group_add_device(struct iommu_group *group, struct device *dev)
+{
+	int ret, i = 0;
+	struct iommu_device *device;
+
+	device = kzalloc(sizeof(*device), GFP_KERNEL);
+	if (!device)
+		return -ENOMEM;
+
+	device->dev = dev;
+
+	ret = sysfs_create_link(&dev->kobj, &group->kobj, "iommu_group");
+	if (ret) {
+		kfree(device);
+		return ret;
+	}
+
+	device->name = kasprintf(GFP_KERNEL, "%s", kobject_name(&dev->kobj));
+rename:
+	if (!device->name) {
+		sysfs_remove_link(&dev->kobj, "iommu_group");
+		kfree(device);
+		return -ENOMEM;
+	}
+
+	ret = sysfs_create_link_nowarn(group->devices_kobj,
+				       &dev->kobj, device->name);
+	if (ret) {
+		kfree(device->name);
+		if (ret == -EEXIST && i >= 0) {
+			/*
+			 * Account for the slim chance of collision
+			 * and append an instance to the name.
+			 */
+			device->name = kasprintf(GFP_KERNEL, "%s.%d",
+						 kobject_name(&dev->kobj), i++);
+			goto rename;
+		}
+
+		sysfs_remove_link(&dev->kobj, "iommu_group");
+		kfree(device);
+		return ret;
+	}
+
+	kobject_get(group->devices_kobj);
+
+	dev->iommu_group = group;
+
+	mutex_lock(&group->mutex);
+	list_add_tail(&device->list, &group->devices);
+	mutex_unlock(&group->mutex);
+
+	/* Notify any listeners about change to group. */
+	blocking_notifier_call_chain(&group->notifier,
+				     IOMMU_GROUP_NOTIFY_ADD_DEVICE, dev);
+
+	trace_add_device_to_group(group->id, dev);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(iommu_group_add_device);
+
+/**
+ * iommu_group_remove_device - remove a device from it's current group
+ * @dev: device to be removed
+ *
+ * This function is called by an iommu driver to remove the device from
+ * it's current group.  This decrements the iommu group reference count.
+ */
+void iommu_group_remove_device(struct device *dev)
+{
+	struct iommu_group *group = dev->iommu_group;
+	struct iommu_device *tmp_device, *device = NULL;
+
+	/* Pre-notify listeners that a device is being removed. */
+	blocking_notifier_call_chain(&group->notifier,
+				     IOMMU_GROUP_NOTIFY_DEL_DEVICE, dev);
+
+	mutex_lock(&group->mutex);
+	list_for_each_entry(tmp_device, &group->devices, list) {
+		if (tmp_device->dev == dev) {
+			device = tmp_device;
+			list_del(&device->list);
+			break;
+		}
+	}
+	mutex_unlock(&group->mutex);
+
+	if (!device)
+		return;
+
+	sysfs_remove_link(group->devices_kobj, device->name);
+	sysfs_remove_link(&dev->kobj, "iommu_group");
+
+	trace_remove_device_from_group(group->id, dev);
+
+	kfree(device->name);
+	kfree(device);
+	dev->iommu_group = NULL;
+	kobject_put(group->devices_kobj);
+}
+EXPORT_SYMBOL_GPL(iommu_group_remove_device);
+
+/**
+ * iommu_group_for_each_dev - iterate over each device in the group
+ * @group: the group
+ * @data: caller opaque data to be passed to callback function
+ * @fn: caller supplied callback function
+ *
+ * This function is called by group users to iterate over group devices.
+ * Callers should hold a reference count to the group during callback.
+ * The group->mutex is held across callbacks, which will block calls to
+ * iommu_group_add/remove_device.
+ */
+int iommu_group_for_each_dev(struct iommu_group *group, void *data,
+			     int (*fn)(struct device *, void *))
+{
+	struct iommu_device *device;
+	int ret = 0;
+
+	mutex_lock(&group->mutex);
+	list_for_each_entry(device, &group->devices, list) {
+		ret = fn(device->dev, data);
+		if (ret)
+			break;
+	}
+	mutex_unlock(&group->mutex);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(iommu_group_for_each_dev);
+
+/**
+ * iommu_group_get - Return the group for a device and increment reference
+ * @dev: get the group that this device belongs to
+ *
+ * This function is called by iommu drivers and users to get the group
+ * for the specified device.  If found, the group is returned and the group
+ * reference in incremented, else NULL.
+ */
+struct iommu_group *iommu_group_get(struct device *dev)
+{
+	struct iommu_group *group = dev->iommu_group;
+
+	if (group)
+		kobject_get(group->devices_kobj);
+
+	return group;
+}
+EXPORT_SYMBOL_GPL(iommu_group_get);
+
+/**
+ * iommu_group_put - Decrement group reference
+ * @group: the group to use
+ *
+ * This function is called by iommu drivers and users to release the
+ * iommu group.  Once the reference count is zero, the group is released.
+ */
+void iommu_group_put(struct iommu_group *group)
+{
+	if (group)
+		kobject_put(group->devices_kobj);
+}
+EXPORT_SYMBOL_GPL(iommu_group_put);
+
+/**
+ * iommu_group_register_notifier - Register a notifier for group changes
+ * @group: the group to watch
+ * @nb: notifier block to signal
+ *
+ * This function allows iommu group users to track changes in a group.
+ * See include/linux/iommu.h for actions sent via this notifier.  Caller
+ * should hold a reference to the group throughout notifier registration.
+ */
+int iommu_group_register_notifier(struct iommu_group *group,
+				  struct notifier_block *nb)
+{
+	return blocking_notifier_chain_register(&group->notifier, nb);
+}
+EXPORT_SYMBOL_GPL(iommu_group_register_notifier);
+
+/**
+ * iommu_group_unregister_notifier - Unregister a notifier
+ * @group: the group to watch
+ * @nb: notifier block to signal
+ *
+ * Unregister a previously registered group notifier block.
+ */
+int iommu_group_unregister_notifier(struct iommu_group *group,
+				    struct notifier_block *nb)
+{
+	return blocking_notifier_chain_unregister(&group->notifier, nb);
+}
+EXPORT_SYMBOL_GPL(iommu_group_unregister_notifier);
+
+/**
+ * iommu_group_id - Return ID for a group
+ * @group: the group to ID
+ *
+ * Return the unique ID for the group matching the sysfs group number.
+ */
+int iommu_group_id(struct iommu_group *group)
+{
+	return group->id;
+}
+EXPORT_SYMBOL_GPL(iommu_group_id);
 
-static int remove_iommu_group(struct device *dev)
+static int add_iommu_group(struct device *dev, void *data)
 {
-	unsigned int groupid;
+	struct iommu_ops *ops = data;
+
+	if (!ops->add_device)
+		return -ENODEV;
+
+	WARN_ON(dev->iommu_group);
 
-	if (iommu_device_group(dev, &groupid) == 0)
-		device_remove_file(dev, &dev_attr_iommu_group);
+	ops->add_device(dev);
 
 	return 0;
 }
 
-static int iommu_device_notifier(struct notifier_block *nb,
-				 unsigned long action, void *data)
+static int iommu_bus_notifier(struct notifier_block *nb,
+			      unsigned long action, void *data)
 {
 	struct device *dev = data;
+	struct iommu_ops *ops = dev->bus->iommu_ops;
+	struct iommu_group *group;
+	unsigned long group_action = 0;
+
+	/*
+	 * ADD/DEL call into iommu driver ops if provided, which may
+	 * result in ADD/DEL notifiers to group->notifier
+	 */
+	if (action == BUS_NOTIFY_ADD_DEVICE) {
+		if (ops->add_device)
+			return ops->add_device(dev);
+	} else if (action == BUS_NOTIFY_DEL_DEVICE) {
+		if (ops->remove_device && dev->iommu_group) {
+			ops->remove_device(dev);
+			return 0;
+		}
+	}
+
+	/*
+	 * Remaining BUS_NOTIFYs get filtered and republished to the
+	 * group, if anyone is listening
+	 */
+	group = iommu_group_get(dev);
+	if (!group)
+		return 0;
+
+	switch (action) {
+	case BUS_NOTIFY_BIND_DRIVER:
+		group_action = IOMMU_GROUP_NOTIFY_BIND_DRIVER;
+		break;
+	case BUS_NOTIFY_BOUND_DRIVER:
+		group_action = IOMMU_GROUP_NOTIFY_BOUND_DRIVER;
+		break;
+	case BUS_NOTIFY_UNBIND_DRIVER:
+		group_action = IOMMU_GROUP_NOTIFY_UNBIND_DRIVER;
+		break;
+	case BUS_NOTIFY_UNBOUND_DRIVER:
+		group_action = IOMMU_GROUP_NOTIFY_UNBOUND_DRIVER;
+		break;
+	}
 
-	if (action == BUS_NOTIFY_ADD_DEVICE)
-		return add_iommu_group(dev, NULL);
-	else if (action == BUS_NOTIFY_DEL_DEVICE)
-		return remove_iommu_group(dev);
+	if (group_action)
+		blocking_notifier_call_chain(&group->notifier,
+					     group_action, dev);
 
+	iommu_group_put(group);
 	return 0;
 }
 
-static struct notifier_block iommu_device_nb = {
-	.notifier_call = iommu_device_notifier,
+static struct notifier_block iommu_bus_nb = {
+	.notifier_call = iommu_bus_notifier,
 };
 
 static void iommu_bus_init(struct bus_type *bus, struct iommu_ops *ops)
 {
-	bus_register_notifier(bus, &iommu_device_nb);
-	bus_for_each_dev(bus, NULL, NULL, add_iommu_group);
+	bus_register_notifier(bus, &iommu_bus_nb);
+	bus_for_each_dev(bus, NULL, ops, add_iommu_group);
 }
 
 /**
@@ -119,6 +628,7 @@ EXPORT_SYMBOL_GPL(iommu_present);
  * iommu_set_fault_handler() - set a fault handler for an iommu domain
  * @domain: iommu domain
  * @handler: fault handler
+ * @token: user data, will be passed back to the fault handler
  *
  * This function should be used by IOMMU users which want to be notified
  * whenever an IOMMU fault happens.
@@ -127,11 +637,13 @@ EXPORT_SYMBOL_GPL(iommu_present);
  * error code otherwise.
  */
 void iommu_set_fault_handler(struct iommu_domain *domain,
-					iommu_fault_handler_t handler)
+					iommu_fault_handler_t handler,
+					void *token)
 {
 	BUG_ON(!domain);
 
 	domain->handler = handler;
+	domain->handler_token = token;
 }
 EXPORT_SYMBOL_GPL(iommu_set_fault_handler);
 
@@ -173,10 +685,14 @@ EXPORT_SYMBOL_GPL(iommu_domain_free);
 
 int iommu_attach_device(struct iommu_domain *domain, struct device *dev)
 {
+	int ret;
 	if (unlikely(domain->ops->attach_dev == NULL))
 		return -ENODEV;
 
-	return domain->ops->attach_dev(domain, dev);
+	ret = domain->ops->attach_dev(domain, dev);
+	if (!ret)
+		trace_attach_device_to_domain(dev);
+	return ret;
 }
 EXPORT_SYMBOL_GPL(iommu_attach_device);
 
@@ -186,11 +702,50 @@ void iommu_detach_device(struct iommu_domain *domain, struct device *dev)
 		return;
 
 	domain->ops->detach_dev(domain, dev);
+	trace_detach_device_from_domain(dev);
 }
 EXPORT_SYMBOL_GPL(iommu_detach_device);
 
-phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain,
-			       unsigned long iova)
+/*
+ * IOMMU groups are really the natrual working unit of the IOMMU, but
+ * the IOMMU API works on domains and devices.  Bridge that gap by
+ * iterating over the devices in a group.  Ideally we'd have a single
+ * device which represents the requestor ID of the group, but we also
+ * allow IOMMU drivers to create policy defined minimum sets, where
+ * the physical hardware may be able to distiguish members, but we
+ * wish to group them at a higher level (ex. untrusted multi-function
+ * PCI devices).  Thus we attach each device.
+ */
+static int iommu_group_do_attach_device(struct device *dev, void *data)
+{
+	struct iommu_domain *domain = data;
+
+	return iommu_attach_device(domain, dev);
+}
+
+int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group)
+{
+	return iommu_group_for_each_dev(group, domain,
+					iommu_group_do_attach_device);
+}
+EXPORT_SYMBOL_GPL(iommu_attach_group);
+
+static int iommu_group_do_detach_device(struct device *dev, void *data)
+{
+	struct iommu_domain *domain = data;
+
+	iommu_detach_device(domain, dev);
+
+	return 0;
+}
+
+void iommu_detach_group(struct iommu_domain *domain, struct iommu_group *group)
+{
+	iommu_group_for_each_dev(group, domain, iommu_group_do_detach_device);
+}
+EXPORT_SYMBOL_GPL(iommu_detach_group);
+
+phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
 {
 	if (unlikely(domain->ops->iova_to_phys == NULL))
 		return 0;
@@ -209,6 +764,38 @@ int iommu_domain_has_cap(struct iommu_domain *domain,
 }
 EXPORT_SYMBOL_GPL(iommu_domain_has_cap);
 
+static size_t iommu_pgsize(struct iommu_domain *domain,
+			   unsigned long addr_merge, size_t size)
+{
+	unsigned int pgsize_idx;
+	size_t pgsize;
+
+	/* Max page size that still fits into 'size' */
+	pgsize_idx = __fls(size);
+
+	/* need to consider alignment requirements ? */
+	if (likely(addr_merge)) {
+		/* Max page size allowed by address */
+		unsigned int align_pgsize_idx = __ffs(addr_merge);
+		pgsize_idx = min(pgsize_idx, align_pgsize_idx);
+	}
+
+	/* build a mask of acceptable page sizes */
+	pgsize = (1UL << (pgsize_idx + 1)) - 1;
+
+	/* throw away page sizes not supported by the hardware */
+	pgsize &= domain->ops->pgsize_bitmap;
+
+	/* make sure we're still sane */
+	BUG_ON(!pgsize);
+
+	/* pick the biggest page */
+	pgsize_idx = __fls(pgsize);
+	pgsize = 1UL << pgsize_idx;
+
+	return pgsize;
+}
+
 int iommu_map(struct iommu_domain *domain, unsigned long iova,
 	      phys_addr_t paddr, size_t size, int prot)
 {
@@ -217,7 +804,8 @@ int iommu_map(struct iommu_domain *domain, unsigned long iova,
 	size_t orig_size = size;
 	int ret = 0;
 
-	if (unlikely(domain->ops->map == NULL))
+	if (unlikely(domain->ops->unmap == NULL ||
+		     domain->ops->pgsize_bitmap == 0UL))
 		return -ENODEV;
 
 	/* find out the minimum page size supported */
@@ -229,45 +817,18 @@ int iommu_map(struct iommu_domain *domain, unsigned long iova,
 	 * size of the smallest page supported by the hardware
 	 */
 	if (!IS_ALIGNED(iova | paddr | size, min_pagesz)) {
-		pr_err("unaligned: iova 0x%lx pa 0x%lx size 0x%lx min_pagesz "
-			"0x%x\n", iova, (unsigned long)paddr,
-			(unsigned long)size, min_pagesz);
+		pr_err("unaligned: iova 0x%lx pa %pa size 0x%zx min_pagesz 0x%x\n",
+		       iova, &paddr, size, min_pagesz);
 		return -EINVAL;
 	}
 
-	pr_debug("map: iova 0x%lx pa 0x%lx size 0x%lx\n", iova,
-				(unsigned long)paddr, (unsigned long)size);
+	pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size);
 
 	while (size) {
-		unsigned long pgsize, addr_merge = iova | paddr;
-		unsigned int pgsize_idx;
-
-		/* Max page size that still fits into 'size' */
-		pgsize_idx = __fls(size);
-
-		/* need to consider alignment requirements ? */
-		if (likely(addr_merge)) {
-			/* Max page size allowed by both iova and paddr */
-			unsigned int align_pgsize_idx = __ffs(addr_merge);
-
-			pgsize_idx = min(pgsize_idx, align_pgsize_idx);
-		}
-
-		/* build a mask of acceptable page sizes */
-		pgsize = (1UL << (pgsize_idx + 1)) - 1;
-
-		/* throw away page sizes not supported by the hardware */
-		pgsize &= domain->ops->pgsize_bitmap;
-
-		/* make sure we're still sane */
-		BUG_ON(!pgsize);
-
-		/* pick the biggest page */
-		pgsize_idx = __fls(pgsize);
-		pgsize = 1UL << pgsize_idx;
+		size_t pgsize = iommu_pgsize(domain, iova | paddr, size);
 
-		pr_debug("mapping: iova 0x%lx pa 0x%lx pgsize %lu\n", iova,
-					(unsigned long)paddr, pgsize);
+		pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx\n",
+			 iova, &paddr, pgsize);
 
 		ret = domain->ops->map(domain, iova, paddr, pgsize, prot);
 		if (ret)
@@ -281,6 +842,8 @@ int iommu_map(struct iommu_domain *domain, unsigned long iova,
 	/* unroll mapping in case something went wrong */
 	if (ret)
 		iommu_unmap(domain, orig_iova, orig_size - size);
+	else
+		trace_map(iova, paddr, size);
 
 	return ret;
 }
@@ -291,7 +854,8 @@ size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
 	size_t unmapped_page, unmapped = 0;
 	unsigned int min_pagesz;
 
-	if (unlikely(domain->ops->unmap == NULL))
+	if (unlikely(domain->ops->unmap == NULL ||
+		     domain->ops->pgsize_bitmap == 0UL))
 		return -ENODEV;
 
 	/* find out the minimum page size supported */
@@ -303,41 +867,131 @@ size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
 	 * by the hardware
 	 */
 	if (!IS_ALIGNED(iova | size, min_pagesz)) {
-		pr_err("unaligned: iova 0x%lx size 0x%lx min_pagesz 0x%x\n",
-					iova, (unsigned long)size, min_pagesz);
+		pr_err("unaligned: iova 0x%lx size 0x%zx min_pagesz 0x%x\n",
+		       iova, size, min_pagesz);
 		return -EINVAL;
 	}
 
-	pr_debug("unmap this: iova 0x%lx size 0x%lx\n", iova,
-							(unsigned long)size);
+	pr_debug("unmap this: iova 0x%lx size 0x%zx\n", iova, size);
 
 	/*
 	 * Keep iterating until we either unmap 'size' bytes (or more)
 	 * or we hit an area that isn't mapped.
 	 */
 	while (unmapped < size) {
-		size_t left = size - unmapped;
+		size_t pgsize = iommu_pgsize(domain, iova, size - unmapped);
 
-		unmapped_page = domain->ops->unmap(domain, iova, left);
+		unmapped_page = domain->ops->unmap(domain, iova, pgsize);
 		if (!unmapped_page)
 			break;
 
-		pr_debug("unmapped: iova 0x%lx size %lx\n", iova,
-					(unsigned long)unmapped_page);
+		pr_debug("unmapped: iova 0x%lx size 0x%zx\n",
+			 iova, unmapped_page);
 
 		iova += unmapped_page;
 		unmapped += unmapped_page;
 	}
 
+	trace_unmap(iova, 0, size);
 	return unmapped;
 }
 EXPORT_SYMBOL_GPL(iommu_unmap);
 
-int iommu_device_group(struct device *dev, unsigned int *groupid)
+
+int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr,
+			       phys_addr_t paddr, u64 size, int prot)
+{
+	if (unlikely(domain->ops->domain_window_enable == NULL))
+		return -ENODEV;
+
+	return domain->ops->domain_window_enable(domain, wnd_nr, paddr, size,
+						 prot);
+}
+EXPORT_SYMBOL_GPL(iommu_domain_window_enable);
+
+void iommu_domain_window_disable(struct iommu_domain *domain, u32 wnd_nr)
+{
+	if (unlikely(domain->ops->domain_window_disable == NULL))
+		return;
+
+	return domain->ops->domain_window_disable(domain, wnd_nr);
+}
+EXPORT_SYMBOL_GPL(iommu_domain_window_disable);
+
+static int __init iommu_init(void)
 {
-	if (iommu_present(dev->bus) && dev->bus->iommu_ops->device_group)
-		return dev->bus->iommu_ops->device_group(dev, groupid);
+	iommu_group_kset = kset_create_and_add("iommu_groups",
+					       NULL, kernel_kobj);
+	ida_init(&iommu_group_ida);
+	mutex_init(&iommu_group_mutex);
+
+	BUG_ON(!iommu_group_kset);
 
-	return -ENODEV;
+	return 0;
+}
+arch_initcall(iommu_init);
+
+int iommu_domain_get_attr(struct iommu_domain *domain,
+			  enum iommu_attr attr, void *data)
+{
+	struct iommu_domain_geometry *geometry;
+	bool *paging;
+	int ret = 0;
+	u32 *count;
+
+	switch (attr) {
+	case DOMAIN_ATTR_GEOMETRY:
+		geometry  = data;
+		*geometry = domain->geometry;
+
+		break;
+	case DOMAIN_ATTR_PAGING:
+		paging  = data;
+		*paging = (domain->ops->pgsize_bitmap != 0UL);
+		break;
+	case DOMAIN_ATTR_WINDOWS:
+		count = data;
+
+		if (domain->ops->domain_get_windows != NULL)
+			*count = domain->ops->domain_get_windows(domain);
+		else
+			ret = -ENODEV;
+
+		break;
+	default:
+		if (!domain->ops->domain_get_attr)
+			return -EINVAL;
+
+		ret = domain->ops->domain_get_attr(domain, attr, data);
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(iommu_domain_get_attr);
+
+int iommu_domain_set_attr(struct iommu_domain *domain,
+			  enum iommu_attr attr, void *data)
+{
+	int ret = 0;
+	u32 *count;
+
+	switch (attr) {
+	case DOMAIN_ATTR_WINDOWS:
+		count = data;
+
+		if (domain->ops->domain_set_windows != NULL)
+			ret = domain->ops->domain_set_windows(domain, *count);
+		else
+			ret = -ENODEV;
+
+		break;
+	default:
+		if (domain->ops->domain_set_attr == NULL)
+			return -EINVAL;
+
+		ret = domain->ops->domain_set_attr(domain, attr, data);
+	}
+
+	return ret;
 }
-EXPORT_SYMBOL_GPL(iommu_device_group);
+EXPORT_SYMBOL_GPL(iommu_domain_set_attr);
diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index c5c274ab5c5..f6b17e6af2f 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -198,10 +198,10 @@ iova_insert_rbtree(struct rb_root *root, struct iova *iova)
 
 /**
  * alloc_iova - allocates an iova
- * @iovad - iova domain in question
- * @size - size of page frames to allocate
- * @limit_pfn - max limit address
- * @size_aligned - set if size_aligned address range is required
+ * @iovad: - iova domain in question
+ * @size: - size of page frames to allocate
+ * @limit_pfn: - max limit address
+ * @size_aligned: - set if size_aligned address range is required
  * This function allocates an iova in the range limit_pfn to IOVA_START_PFN
  * looking from limit_pfn instead from IOVA_START_PFN. If the size_aligned
  * flag is set then the allocated address iova->pfn_lo will be naturally
@@ -238,8 +238,8 @@ alloc_iova(struct iova_domain *iovad, unsigned long size,
 
 /**
  * find_iova - find's an iova for a given pfn
- * @iovad - iova domain in question.
- * pfn - page frame number
+ * @iovad: - iova domain in question.
+ * @pfn: - page frame number
  * This function finds and returns an iova belonging to the
  * given doamin which matches the given pfn.
  */
@@ -260,7 +260,7 @@ struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn)
 			/* We are not holding the lock while this iova
 			 * is referenced by the caller as the same thread
 			 * which called this function also calls __free_iova()
-			 * and it is by desing that only one thread can possibly
+			 * and it is by design that only one thread can possibly
 			 * reference a particular iova and hence no conflict.
 			 */
 			return iova;
@@ -342,19 +342,30 @@ __is_range_overlap(struct rb_node *node,
 	return 0;
 }
 
+static inline struct iova *
+alloc_and_init_iova(unsigned long pfn_lo, unsigned long pfn_hi)
+{
+	struct iova *iova;
+
+	iova = alloc_iova_mem();
+	if (iova) {
+		iova->pfn_lo = pfn_lo;
+		iova->pfn_hi = pfn_hi;
+	}
+
+	return iova;
+}
+
 static struct iova *
 __insert_new_range(struct iova_domain *iovad,
 	unsigned long pfn_lo, unsigned long pfn_hi)
 {
 	struct iova *iova;
 
-	iova = alloc_iova_mem();
-	if (!iova)
-		return iova;
+	iova = alloc_and_init_iova(pfn_lo, pfn_hi);
+	if (iova)
+		iova_insert_rbtree(&iovad->rbroot, iova);
 
-	iova->pfn_hi = pfn_hi;
-	iova->pfn_lo = pfn_lo;
-	iova_insert_rbtree(&iovad->rbroot, iova);
 	return iova;
 }
 
@@ -433,3 +444,44 @@ copy_reserved_iova(struct iova_domain *from, struct iova_domain *to)
 	}
 	spin_unlock_irqrestore(&from->iova_rbtree_lock, flags);
 }
+
+struct iova *
+split_and_remove_iova(struct iova_domain *iovad, struct iova *iova,
+		      unsigned long pfn_lo, unsigned long pfn_hi)
+{
+	unsigned long flags;
+	struct iova *prev = NULL, *next = NULL;
+
+	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
+	if (iova->pfn_lo < pfn_lo) {
+		prev = alloc_and_init_iova(iova->pfn_lo, pfn_lo - 1);
+		if (prev == NULL)
+			goto error;
+	}
+	if (iova->pfn_hi > pfn_hi) {
+		next = alloc_and_init_iova(pfn_hi + 1, iova->pfn_hi);
+		if (next == NULL)
+			goto error;
+	}
+
+	__cached_rbnode_delete_update(iovad, iova);
+	rb_erase(&iova->node, &iovad->rbroot);
+
+	if (prev) {
+		iova_insert_rbtree(&iovad->rbroot, prev);
+		iova->pfn_lo = pfn_lo;
+	}
+	if (next) {
+		iova_insert_rbtree(&iovad->rbroot, next);
+		iova->pfn_hi = pfn_hi;
+	}
+	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
+
+	return iova;
+
+error:
+	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
+	if (prev)
+		free_iova_mem(prev);
+	return NULL;
+}
diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
new file mode 100644
index 00000000000..53cde086e83
--- /dev/null
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -0,0 +1,1255 @@
+/*
+ * IPMMU VMSA
+ *
+ * Copyright (C) 2014 Renesas Electronics Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ */
+
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/iommu.h>
+#include <linux/module.h>
+#include <linux/platform_data/ipmmu-vmsa.h>
+#include <linux/platform_device.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+
+#include <asm/dma-iommu.h>
+#include <asm/pgalloc.h>
+
+struct ipmmu_vmsa_device {
+	struct device *dev;
+	void __iomem *base;
+	struct list_head list;
+
+	const struct ipmmu_vmsa_platform_data *pdata;
+	unsigned int num_utlbs;
+
+	struct dma_iommu_mapping *mapping;
+};
+
+struct ipmmu_vmsa_domain {
+	struct ipmmu_vmsa_device *mmu;
+	struct iommu_domain *io_domain;
+
+	unsigned int context_id;
+	spinlock_t lock;			/* Protects mappings */
+	pgd_t *pgd;
+};
+
+struct ipmmu_vmsa_archdata {
+	struct ipmmu_vmsa_device *mmu;
+	unsigned int utlb;
+};
+
+static DEFINE_SPINLOCK(ipmmu_devices_lock);
+static LIST_HEAD(ipmmu_devices);
+
+#define TLB_LOOP_TIMEOUT		100	/* 100us */
+
+/* -----------------------------------------------------------------------------
+ * Registers Definition
+ */
+
+#define IM_CTX_SIZE			0x40
+
+#define IMCTR				0x0000
+#define IMCTR_TRE			(1 << 17)
+#define IMCTR_AFE			(1 << 16)
+#define IMCTR_RTSEL_MASK		(3 << 4)
+#define IMCTR_RTSEL_SHIFT		4
+#define IMCTR_TREN			(1 << 3)
+#define IMCTR_INTEN			(1 << 2)
+#define IMCTR_FLUSH			(1 << 1)
+#define IMCTR_MMUEN			(1 << 0)
+
+#define IMCAAR				0x0004
+
+#define IMTTBCR				0x0008
+#define IMTTBCR_EAE			(1 << 31)
+#define IMTTBCR_PMB			(1 << 30)
+#define IMTTBCR_SH1_NON_SHAREABLE	(0 << 28)
+#define IMTTBCR_SH1_OUTER_SHAREABLE	(2 << 28)
+#define IMTTBCR_SH1_INNER_SHAREABLE	(3 << 28)
+#define IMTTBCR_SH1_MASK		(3 << 28)
+#define IMTTBCR_ORGN1_NC		(0 << 26)
+#define IMTTBCR_ORGN1_WB_WA		(1 << 26)
+#define IMTTBCR_ORGN1_WT		(2 << 26)
+#define IMTTBCR_ORGN1_WB		(3 << 26)
+#define IMTTBCR_ORGN1_MASK		(3 << 26)
+#define IMTTBCR_IRGN1_NC		(0 << 24)
+#define IMTTBCR_IRGN1_WB_WA		(1 << 24)
+#define IMTTBCR_IRGN1_WT		(2 << 24)
+#define IMTTBCR_IRGN1_WB		(3 << 24)
+#define IMTTBCR_IRGN1_MASK		(3 << 24)
+#define IMTTBCR_TSZ1_MASK		(7 << 16)
+#define IMTTBCR_TSZ1_SHIFT		16
+#define IMTTBCR_SH0_NON_SHAREABLE	(0 << 12)
+#define IMTTBCR_SH0_OUTER_SHAREABLE	(2 << 12)
+#define IMTTBCR_SH0_INNER_SHAREABLE	(3 << 12)
+#define IMTTBCR_SH0_MASK		(3 << 12)
+#define IMTTBCR_ORGN0_NC		(0 << 10)
+#define IMTTBCR_ORGN0_WB_WA		(1 << 10)
+#define IMTTBCR_ORGN0_WT		(2 << 10)
+#define IMTTBCR_ORGN0_WB		(3 << 10)
+#define IMTTBCR_ORGN0_MASK		(3 << 10)
+#define IMTTBCR_IRGN0_NC		(0 << 8)
+#define IMTTBCR_IRGN0_WB_WA		(1 << 8)
+#define IMTTBCR_IRGN0_WT		(2 << 8)
+#define IMTTBCR_IRGN0_WB		(3 << 8)
+#define IMTTBCR_IRGN0_MASK		(3 << 8)
+#define IMTTBCR_SL0_LVL_2		(0 << 4)
+#define IMTTBCR_SL0_LVL_1		(1 << 4)
+#define IMTTBCR_TSZ0_MASK		(7 << 0)
+#define IMTTBCR_TSZ0_SHIFT		O
+
+#define IMBUSCR				0x000c
+#define IMBUSCR_DVM			(1 << 2)
+#define IMBUSCR_BUSSEL_SYS		(0 << 0)
+#define IMBUSCR_BUSSEL_CCI		(1 << 0)
+#define IMBUSCR_BUSSEL_IMCAAR		(2 << 0)
+#define IMBUSCR_BUSSEL_CCI_IMCAAR	(3 << 0)
+#define IMBUSCR_BUSSEL_MASK		(3 << 0)
+
+#define IMTTLBR0			0x0010
+#define IMTTUBR0			0x0014
+#define IMTTLBR1			0x0018
+#define IMTTUBR1			0x001c
+
+#define IMSTR				0x0020
+#define IMSTR_ERRLVL_MASK		(3 << 12)
+#define IMSTR_ERRLVL_SHIFT		12
+#define IMSTR_ERRCODE_TLB_FORMAT	(1 << 8)
+#define IMSTR_ERRCODE_ACCESS_PERM	(4 << 8)
+#define IMSTR_ERRCODE_SECURE_ACCESS	(5 << 8)
+#define IMSTR_ERRCODE_MASK		(7 << 8)
+#define IMSTR_MHIT			(1 << 4)
+#define IMSTR_ABORT			(1 << 2)
+#define IMSTR_PF			(1 << 1)
+#define IMSTR_TF			(1 << 0)
+
+#define IMMAIR0				0x0028
+#define IMMAIR1				0x002c
+#define IMMAIR_ATTR_MASK		0xff
+#define IMMAIR_ATTR_DEVICE		0x04
+#define IMMAIR_ATTR_NC			0x44
+#define IMMAIR_ATTR_WBRWA		0xff
+#define IMMAIR_ATTR_SHIFT(n)		((n) << 3)
+#define IMMAIR_ATTR_IDX_NC		0
+#define IMMAIR_ATTR_IDX_WBRWA		1
+#define IMMAIR_ATTR_IDX_DEV		2
+
+#define IMEAR				0x0030
+
+#define IMPCTR				0x0200
+#define IMPSTR				0x0208
+#define IMPEAR				0x020c
+#define IMPMBA(n)			(0x0280 + ((n) * 4))
+#define IMPMBD(n)			(0x02c0 + ((n) * 4))
+
+#define IMUCTR(n)			(0x0300 + ((n) * 16))
+#define IMUCTR_FIXADDEN			(1 << 31)
+#define IMUCTR_FIXADD_MASK		(0xff << 16)
+#define IMUCTR_FIXADD_SHIFT		16
+#define IMUCTR_TTSEL_MMU(n)		((n) << 4)
+#define IMUCTR_TTSEL_PMB		(8 << 4)
+#define IMUCTR_TTSEL_MASK		(15 << 4)
+#define IMUCTR_FLUSH			(1 << 1)
+#define IMUCTR_MMUEN			(1 << 0)
+
+#define IMUASID(n)			(0x0308 + ((n) * 16))
+#define IMUASID_ASID8_MASK		(0xff << 8)
+#define IMUASID_ASID8_SHIFT		8
+#define IMUASID_ASID0_MASK		(0xff << 0)
+#define IMUASID_ASID0_SHIFT		0
+
+/* -----------------------------------------------------------------------------
+ * Page Table Bits
+ */
+
+/*
+ * VMSA states in section B3.6.3 "Control of Secure or Non-secure memory access,
+ * Long-descriptor format" that the NStable bit being set in a table descriptor
+ * will result in the NStable and NS bits of all child entries being ignored and
+ * considered as being set. The IPMMU seems not to comply with this, as it
+ * generates a secure access page fault if any of the NStable and NS bits isn't
+ * set when running in non-secure mode.
+ */
+#ifndef PMD_NSTABLE
+#define PMD_NSTABLE			(_AT(pmdval_t, 1) << 63)
+#endif
+
+#define ARM_VMSA_PTE_XN			(((pteval_t)3) << 53)
+#define ARM_VMSA_PTE_CONT		(((pteval_t)1) << 52)
+#define ARM_VMSA_PTE_AF			(((pteval_t)1) << 10)
+#define ARM_VMSA_PTE_SH_NS		(((pteval_t)0) << 8)
+#define ARM_VMSA_PTE_SH_OS		(((pteval_t)2) << 8)
+#define ARM_VMSA_PTE_SH_IS		(((pteval_t)3) << 8)
+#define ARM_VMSA_PTE_SH_MASK		(((pteval_t)3) << 8)
+#define ARM_VMSA_PTE_NS			(((pteval_t)1) << 5)
+#define ARM_VMSA_PTE_PAGE		(((pteval_t)3) << 0)
+
+/* Stage-1 PTE */
+#define ARM_VMSA_PTE_nG			(((pteval_t)1) << 11)
+#define ARM_VMSA_PTE_AP_UNPRIV		(((pteval_t)1) << 6)
+#define ARM_VMSA_PTE_AP_RDONLY		(((pteval_t)2) << 6)
+#define ARM_VMSA_PTE_AP_MASK		(((pteval_t)3) << 6)
+#define ARM_VMSA_PTE_ATTRINDX_MASK	(((pteval_t)3) << 2)
+#define ARM_VMSA_PTE_ATTRINDX_SHIFT	2
+
+#define ARM_VMSA_PTE_ATTRS_MASK \
+	(ARM_VMSA_PTE_XN | ARM_VMSA_PTE_CONT | ARM_VMSA_PTE_nG | \
+	 ARM_VMSA_PTE_AF | ARM_VMSA_PTE_SH_MASK | ARM_VMSA_PTE_AP_MASK | \
+	 ARM_VMSA_PTE_NS | ARM_VMSA_PTE_ATTRINDX_MASK)
+
+#define ARM_VMSA_PTE_CONT_ENTRIES	16
+#define ARM_VMSA_PTE_CONT_SIZE		(PAGE_SIZE * ARM_VMSA_PTE_CONT_ENTRIES)
+
+#define IPMMU_PTRS_PER_PTE		512
+#define IPMMU_PTRS_PER_PMD		512
+#define IPMMU_PTRS_PER_PGD		4
+
+/* -----------------------------------------------------------------------------
+ * Read/Write Access
+ */
+
+static u32 ipmmu_read(struct ipmmu_vmsa_device *mmu, unsigned int offset)
+{
+	return ioread32(mmu->base + offset);
+}
+
+static void ipmmu_write(struct ipmmu_vmsa_device *mmu, unsigned int offset,
+			u32 data)
+{
+	iowrite32(data, mmu->base + offset);
+}
+
+static u32 ipmmu_ctx_read(struct ipmmu_vmsa_domain *domain, unsigned int reg)
+{
+	return ipmmu_read(domain->mmu, domain->context_id * IM_CTX_SIZE + reg);
+}
+
+static void ipmmu_ctx_write(struct ipmmu_vmsa_domain *domain, unsigned int reg,
+			    u32 data)
+{
+	ipmmu_write(domain->mmu, domain->context_id * IM_CTX_SIZE + reg, data);
+}
+
+/* -----------------------------------------------------------------------------
+ * TLB and microTLB Management
+ */
+
+/* Wait for any pending TLB invalidations to complete */
+static void ipmmu_tlb_sync(struct ipmmu_vmsa_domain *domain)
+{
+	unsigned int count = 0;
+
+	while (ipmmu_ctx_read(domain, IMCTR) & IMCTR_FLUSH) {
+		cpu_relax();
+		if (++count == TLB_LOOP_TIMEOUT) {
+			dev_err_ratelimited(domain->mmu->dev,
+			"TLB sync timed out -- MMU may be deadlocked\n");
+			return;
+		}
+		udelay(1);
+	}
+}
+
+static void ipmmu_tlb_invalidate(struct ipmmu_vmsa_domain *domain)
+{
+	u32 reg;
+
+	reg = ipmmu_ctx_read(domain, IMCTR);
+	reg |= IMCTR_FLUSH;
+	ipmmu_ctx_write(domain, IMCTR, reg);
+
+	ipmmu_tlb_sync(domain);
+}
+
+/*
+ * Enable MMU translation for the microTLB.
+ */
+static void ipmmu_utlb_enable(struct ipmmu_vmsa_domain *domain,
+			      unsigned int utlb)
+{
+	struct ipmmu_vmsa_device *mmu = domain->mmu;
+
+	/*
+	 * TODO: Reference-count the microTLB as several bus masters can be
+	 * connected to the same microTLB.
+	 */
+
+	/* TODO: What should we set the ASID to ? */
+	ipmmu_write(mmu, IMUASID(utlb), 0);
+	/* TODO: Do we need to flush the microTLB ? */
+	ipmmu_write(mmu, IMUCTR(utlb),
+		    IMUCTR_TTSEL_MMU(domain->context_id) | IMUCTR_FLUSH |
+		    IMUCTR_MMUEN);
+}
+
+/*
+ * Disable MMU translation for the microTLB.
+ */
+static void ipmmu_utlb_disable(struct ipmmu_vmsa_domain *domain,
+			       unsigned int utlb)
+{
+	struct ipmmu_vmsa_device *mmu = domain->mmu;
+
+	ipmmu_write(mmu, IMUCTR(utlb), 0);
+}
+
+static void ipmmu_flush_pgtable(struct ipmmu_vmsa_device *mmu, void *addr,
+				size_t size)
+{
+	unsigned long offset = (unsigned long)addr & ~PAGE_MASK;
+
+	/*
+	 * TODO: Add support for coherent walk through CCI with DVM and remove
+	 * cache handling.
+	 */
+	dma_map_page(mmu->dev, virt_to_page(addr), offset, size, DMA_TO_DEVICE);
+}
+
+/* -----------------------------------------------------------------------------
+ * Domain/Context Management
+ */
+
+static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain)
+{
+	phys_addr_t ttbr;
+	u32 reg;
+
+	/*
+	 * TODO: When adding support for multiple contexts, find an unused
+	 * context.
+	 */
+	domain->context_id = 0;
+
+	/* TTBR0 */
+	ipmmu_flush_pgtable(domain->mmu, domain->pgd,
+			    IPMMU_PTRS_PER_PGD * sizeof(*domain->pgd));
+	ttbr = __pa(domain->pgd);
+	ipmmu_ctx_write(domain, IMTTLBR0, ttbr);
+	ipmmu_ctx_write(domain, IMTTUBR0, ttbr >> 32);
+
+	/*
+	 * TTBCR
+	 * We use long descriptors with inner-shareable WBWA tables and allocate
+	 * the whole 32-bit VA space to TTBR0.
+	 */
+	ipmmu_ctx_write(domain, IMTTBCR, IMTTBCR_EAE |
+			IMTTBCR_SH0_INNER_SHAREABLE | IMTTBCR_ORGN0_WB_WA |
+			IMTTBCR_IRGN0_WB_WA | IMTTBCR_SL0_LVL_1);
+
+	/*
+	 * MAIR0
+	 * We need three attributes only, non-cacheable, write-back read/write
+	 * allocate and device memory.
+	 */
+	reg = (IMMAIR_ATTR_NC << IMMAIR_ATTR_SHIFT(IMMAIR_ATTR_IDX_NC))
+	    | (IMMAIR_ATTR_WBRWA << IMMAIR_ATTR_SHIFT(IMMAIR_ATTR_IDX_WBRWA))
+	    | (IMMAIR_ATTR_DEVICE << IMMAIR_ATTR_SHIFT(IMMAIR_ATTR_IDX_DEV));
+	ipmmu_ctx_write(domain, IMMAIR0, reg);
+
+	/* IMBUSCR */
+	ipmmu_ctx_write(domain, IMBUSCR,
+			ipmmu_ctx_read(domain, IMBUSCR) &
+			~(IMBUSCR_DVM | IMBUSCR_BUSSEL_MASK));
+
+	/*
+	 * IMSTR
+	 * Clear all interrupt flags.
+	 */
+	ipmmu_ctx_write(domain, IMSTR, ipmmu_ctx_read(domain, IMSTR));
+
+	/*
+	 * IMCTR
+	 * Enable the MMU and interrupt generation. The long-descriptor
+	 * translation table format doesn't use TEX remapping. Don't enable AF
+	 * software management as we have no use for it. Flush the TLB as
+	 * required when modifying the context registers.
+	 */
+	ipmmu_ctx_write(domain, IMCTR, IMCTR_INTEN | IMCTR_FLUSH | IMCTR_MMUEN);
+
+	return 0;
+}
+
+static void ipmmu_domain_destroy_context(struct ipmmu_vmsa_domain *domain)
+{
+	/*
+	 * Disable the context. Flush the TLB as required when modifying the
+	 * context registers.
+	 *
+	 * TODO: Is TLB flush really needed ?
+	 */
+	ipmmu_ctx_write(domain, IMCTR, IMCTR_FLUSH);
+	ipmmu_tlb_sync(domain);
+}
+
+/* -----------------------------------------------------------------------------
+ * Fault Handling
+ */
+
+static irqreturn_t ipmmu_domain_irq(struct ipmmu_vmsa_domain *domain)
+{
+	const u32 err_mask = IMSTR_MHIT | IMSTR_ABORT | IMSTR_PF | IMSTR_TF;
+	struct ipmmu_vmsa_device *mmu = domain->mmu;
+	u32 status;
+	u32 iova;
+
+	status = ipmmu_ctx_read(domain, IMSTR);
+	if (!(status & err_mask))
+		return IRQ_NONE;
+
+	iova = ipmmu_ctx_read(domain, IMEAR);
+
+	/*
+	 * Clear the error status flags. Unlike traditional interrupt flag
+	 * registers that must be cleared by writing 1, this status register
+	 * seems to require 0. The error address register must be read before,
+	 * otherwise its value will be 0.
+	 */
+	ipmmu_ctx_write(domain, IMSTR, 0);
+
+	/* Log fatal errors. */
+	if (status & IMSTR_MHIT)
+		dev_err_ratelimited(mmu->dev, "Multiple TLB hits @0x%08x\n",
+				    iova);
+	if (status & IMSTR_ABORT)
+		dev_err_ratelimited(mmu->dev, "Page Table Walk Abort @0x%08x\n",
+				    iova);
+
+	if (!(status & (IMSTR_PF | IMSTR_TF)))
+		return IRQ_NONE;
+
+	/*
+	 * Try to handle page faults and translation faults.
+	 *
+	 * TODO: We need to look up the faulty device based on the I/O VA. Use
+	 * the IOMMU device for now.
+	 */
+	if (!report_iommu_fault(domain->io_domain, mmu->dev, iova, 0))
+		return IRQ_HANDLED;
+
+	dev_err_ratelimited(mmu->dev,
+			    "Unhandled fault: status 0x%08x iova 0x%08x\n",
+			    status, iova);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t ipmmu_irq(int irq, void *dev)
+{
+	struct ipmmu_vmsa_device *mmu = dev;
+	struct iommu_domain *io_domain;
+	struct ipmmu_vmsa_domain *domain;
+
+	if (!mmu->mapping)
+		return IRQ_NONE;
+
+	io_domain = mmu->mapping->domain;
+	domain = io_domain->priv;
+
+	return ipmmu_domain_irq(domain);
+}
+
+/* -----------------------------------------------------------------------------
+ * Page Table Management
+ */
+
+#define pud_pgtable(pud) pfn_to_page(__phys_to_pfn(pud_val(pud) & PHYS_MASK))
+
+static void ipmmu_free_ptes(pmd_t *pmd)
+{
+	pgtable_t table = pmd_pgtable(*pmd);
+	__free_page(table);
+}
+
+static void ipmmu_free_pmds(pud_t *pud)
+{
+	pmd_t *pmd = pmd_offset(pud, 0);
+	pgtable_t table;
+	unsigned int i;
+
+	for (i = 0; i < IPMMU_PTRS_PER_PMD; ++i) {
+		if (!pmd_table(*pmd))
+			continue;
+
+		ipmmu_free_ptes(pmd);
+		pmd++;
+	}
+
+	table = pud_pgtable(*pud);
+	__free_page(table);
+}
+
+static void ipmmu_free_pgtables(struct ipmmu_vmsa_domain *domain)
+{
+	pgd_t *pgd, *pgd_base = domain->pgd;
+	unsigned int i;
+
+	/*
+	 * Recursively free the page tables for this domain. We don't care about
+	 * speculative TLB filling, because the TLB will be nuked next time this
+	 * context bank is re-allocated and no devices currently map to these
+	 * tables.
+	 */
+	pgd = pgd_base;
+	for (i = 0; i < IPMMU_PTRS_PER_PGD; ++i) {
+		if (pgd_none(*pgd))
+			continue;
+		ipmmu_free_pmds((pud_t *)pgd);
+		pgd++;
+	}
+
+	kfree(pgd_base);
+}
+
+/*
+ * We can't use the (pgd|pud|pmd|pte)_populate or the set_(pgd|pud|pmd|pte)
+ * functions as they would flush the CPU TLB.
+ */
+
+static pte_t *ipmmu_alloc_pte(struct ipmmu_vmsa_device *mmu, pmd_t *pmd,
+			      unsigned long iova)
+{
+	pte_t *pte;
+
+	if (!pmd_none(*pmd))
+		return pte_offset_kernel(pmd, iova);
+
+	pte = (pte_t *)get_zeroed_page(GFP_ATOMIC);
+	if (!pte)
+		return NULL;
+
+	ipmmu_flush_pgtable(mmu, pte, PAGE_SIZE);
+	*pmd = __pmd(__pa(pte) | PMD_NSTABLE | PMD_TYPE_TABLE);
+	ipmmu_flush_pgtable(mmu, pmd, sizeof(*pmd));
+
+	return pte + pte_index(iova);
+}
+
+static pmd_t *ipmmu_alloc_pmd(struct ipmmu_vmsa_device *mmu, pgd_t *pgd,
+			      unsigned long iova)
+{
+	pud_t *pud = (pud_t *)pgd;
+	pmd_t *pmd;
+
+	if (!pud_none(*pud))
+		return pmd_offset(pud, iova);
+
+	pmd = (pmd_t *)get_zeroed_page(GFP_ATOMIC);
+	if (!pmd)
+		return NULL;
+
+	ipmmu_flush_pgtable(mmu, pmd, PAGE_SIZE);
+	*pud = __pud(__pa(pmd) | PMD_NSTABLE | PMD_TYPE_TABLE);
+	ipmmu_flush_pgtable(mmu, pud, sizeof(*pud));
+
+	return pmd + pmd_index(iova);
+}
+
+static u64 ipmmu_page_prot(unsigned int prot, u64 type)
+{
+	u64 pgprot = ARM_VMSA_PTE_XN | ARM_VMSA_PTE_nG | ARM_VMSA_PTE_AF
+		   | ARM_VMSA_PTE_SH_IS | ARM_VMSA_PTE_AP_UNPRIV
+		   | ARM_VMSA_PTE_NS | type;
+
+	if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ))
+		pgprot |= ARM_VMSA_PTE_AP_RDONLY;
+
+	if (prot & IOMMU_CACHE)
+		pgprot |= IMMAIR_ATTR_IDX_WBRWA << ARM_VMSA_PTE_ATTRINDX_SHIFT;
+
+	if (prot & IOMMU_EXEC)
+		pgprot &= ~ARM_VMSA_PTE_XN;
+	else if (!(prot & (IOMMU_READ | IOMMU_WRITE)))
+		/* If no access create a faulting entry to avoid TLB fills. */
+		pgprot &= ~ARM_VMSA_PTE_PAGE;
+
+	return pgprot;
+}
+
+static int ipmmu_alloc_init_pte(struct ipmmu_vmsa_device *mmu, pmd_t *pmd,
+				unsigned long iova, unsigned long pfn,
+				size_t size, int prot)
+{
+	pteval_t pteval = ipmmu_page_prot(prot, ARM_VMSA_PTE_PAGE);
+	unsigned int num_ptes = 1;
+	pte_t *pte, *start;
+	unsigned int i;
+
+	pte = ipmmu_alloc_pte(mmu, pmd, iova);
+	if (!pte)
+		return -ENOMEM;
+
+	start = pte;
+
+	/*
+	 * Install the page table entries. We can be called both for a single
+	 * page or for a block of 16 physically contiguous pages. In the latter
+	 * case set the PTE contiguous hint.
+	 */
+	if (size == SZ_64K) {
+		pteval |= ARM_VMSA_PTE_CONT;
+		num_ptes = ARM_VMSA_PTE_CONT_ENTRIES;
+	}
+
+	for (i = num_ptes; i; --i)
+		*pte++ = pfn_pte(pfn++, __pgprot(pteval));
+
+	ipmmu_flush_pgtable(mmu, start, sizeof(*pte) * num_ptes);
+
+	return 0;
+}
+
+static int ipmmu_alloc_init_pmd(struct ipmmu_vmsa_device *mmu, pmd_t *pmd,
+				unsigned long iova, unsigned long pfn,
+				int prot)
+{
+	pmdval_t pmdval = ipmmu_page_prot(prot, PMD_TYPE_SECT);
+
+	*pmd = pfn_pmd(pfn, __pgprot(pmdval));
+	ipmmu_flush_pgtable(mmu, pmd, sizeof(*pmd));
+
+	return 0;
+}
+
+static int ipmmu_create_mapping(struct ipmmu_vmsa_domain *domain,
+				unsigned long iova, phys_addr_t paddr,
+				size_t size, int prot)
+{
+	struct ipmmu_vmsa_device *mmu = domain->mmu;
+	pgd_t *pgd = domain->pgd;
+	unsigned long flags;
+	unsigned long pfn;
+	pmd_t *pmd;
+	int ret;
+
+	if (!pgd)
+		return -EINVAL;
+
+	if (size & ~PAGE_MASK)
+		return -EINVAL;
+
+	if (paddr & ~((1ULL << 40) - 1))
+		return -ERANGE;
+
+	pfn = __phys_to_pfn(paddr);
+	pgd += pgd_index(iova);
+
+	/* Update the page tables. */
+	spin_lock_irqsave(&domain->lock, flags);
+
+	pmd = ipmmu_alloc_pmd(mmu, pgd, iova);
+	if (!pmd) {
+		ret = -ENOMEM;
+		goto done;
+	}
+
+	switch (size) {
+	case SZ_2M:
+		ret = ipmmu_alloc_init_pmd(mmu, pmd, iova, pfn, prot);
+		break;
+	case SZ_64K:
+	case SZ_4K:
+		ret = ipmmu_alloc_init_pte(mmu, pmd, iova, pfn, size, prot);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+done:
+	spin_unlock_irqrestore(&domain->lock, flags);
+
+	if (!ret)
+		ipmmu_tlb_invalidate(domain);
+
+	return ret;
+}
+
+static void ipmmu_clear_pud(struct ipmmu_vmsa_device *mmu, pud_t *pud)
+{
+	/* Free the page table. */
+	pgtable_t table = pud_pgtable(*pud);
+	__free_page(table);
+
+	/* Clear the PUD. */
+	*pud = __pud(0);
+	ipmmu_flush_pgtable(mmu, pud, sizeof(*pud));
+}
+
+static void ipmmu_clear_pmd(struct ipmmu_vmsa_device *mmu, pud_t *pud,
+			    pmd_t *pmd)
+{
+	unsigned int i;
+
+	/* Free the page table. */
+	if (pmd_table(*pmd)) {
+		pgtable_t table = pmd_pgtable(*pmd);
+		__free_page(table);
+	}
+
+	/* Clear the PMD. */
+	*pmd = __pmd(0);
+	ipmmu_flush_pgtable(mmu, pmd, sizeof(*pmd));
+
+	/* Check whether the PUD is still needed. */
+	pmd = pmd_offset(pud, 0);
+	for (i = 0; i < IPMMU_PTRS_PER_PMD; ++i) {
+		if (!pmd_none(pmd[i]))
+			return;
+	}
+
+	/* Clear the parent PUD. */
+	ipmmu_clear_pud(mmu, pud);
+}
+
+static void ipmmu_clear_pte(struct ipmmu_vmsa_device *mmu, pud_t *pud,
+			    pmd_t *pmd, pte_t *pte, unsigned int num_ptes)
+{
+	unsigned int i;
+
+	/* Clear the PTE. */
+	for (i = num_ptes; i; --i)
+		pte[i-1] = __pte(0);
+
+	ipmmu_flush_pgtable(mmu, pte, sizeof(*pte) * num_ptes);
+
+	/* Check whether the PMD is still needed. */
+	pte = pte_offset_kernel(pmd, 0);
+	for (i = 0; i < IPMMU_PTRS_PER_PTE; ++i) {
+		if (!pte_none(pte[i]))
+			return;
+	}
+
+	/* Clear the parent PMD. */
+	ipmmu_clear_pmd(mmu, pud, pmd);
+}
+
+static int ipmmu_split_pmd(struct ipmmu_vmsa_device *mmu, pmd_t *pmd)
+{
+	pte_t *pte, *start;
+	pteval_t pteval;
+	unsigned long pfn;
+	unsigned int i;
+
+	pte = (pte_t *)get_zeroed_page(GFP_ATOMIC);
+	if (!pte)
+		return -ENOMEM;
+
+	/* Copy the PMD attributes. */
+	pteval = (pmd_val(*pmd) & ARM_VMSA_PTE_ATTRS_MASK)
+	       | ARM_VMSA_PTE_CONT | ARM_VMSA_PTE_PAGE;
+
+	pfn = pmd_pfn(*pmd);
+	start = pte;
+
+	for (i = IPMMU_PTRS_PER_PTE; i; --i)
+		*pte++ = pfn_pte(pfn++, __pgprot(pteval));
+
+	ipmmu_flush_pgtable(mmu, start, PAGE_SIZE);
+	*pmd = __pmd(__pa(start) | PMD_NSTABLE | PMD_TYPE_TABLE);
+	ipmmu_flush_pgtable(mmu, pmd, sizeof(*pmd));
+
+	return 0;
+}
+
+static void ipmmu_split_pte(struct ipmmu_vmsa_device *mmu, pte_t *pte)
+{
+	unsigned int i;
+
+	for (i = ARM_VMSA_PTE_CONT_ENTRIES; i; --i)
+		pte[i-1] = __pte(pte_val(*pte) & ~ARM_VMSA_PTE_CONT);
+
+	ipmmu_flush_pgtable(mmu, pte, sizeof(*pte) * ARM_VMSA_PTE_CONT_ENTRIES);
+}
+
+static int ipmmu_clear_mapping(struct ipmmu_vmsa_domain *domain,
+			       unsigned long iova, size_t size)
+{
+	struct ipmmu_vmsa_device *mmu = domain->mmu;
+	unsigned long flags;
+	pgd_t *pgd = domain->pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+	int ret = 0;
+
+	if (!pgd)
+		return -EINVAL;
+
+	if (size & ~PAGE_MASK)
+		return -EINVAL;
+
+	pgd += pgd_index(iova);
+	pud = (pud_t *)pgd;
+
+	spin_lock_irqsave(&domain->lock, flags);
+
+	/* If there's no PUD or PMD we're done. */
+	if (pud_none(*pud))
+		goto done;
+
+	pmd = pmd_offset(pud, iova);
+	if (pmd_none(*pmd))
+		goto done;
+
+	/*
+	 * When freeing a 2MB block just clear the PMD. In the unlikely case the
+	 * block is mapped as individual pages this will free the corresponding
+	 * PTE page table.
+	 */
+	if (size == SZ_2M) {
+		ipmmu_clear_pmd(mmu, pud, pmd);
+		goto done;
+	}
+
+	/*
+	 * If the PMD has been mapped as a section remap it as pages to allow
+	 * freeing individual pages.
+	 */
+	if (pmd_sect(*pmd))
+		ipmmu_split_pmd(mmu, pmd);
+
+	pte = pte_offset_kernel(pmd, iova);
+
+	/*
+	 * When freeing a 64kB block just clear the PTE entries. We don't have
+	 * to care about the contiguous hint of the surrounding entries.
+	 */
+	if (size == SZ_64K) {
+		ipmmu_clear_pte(mmu, pud, pmd, pte, ARM_VMSA_PTE_CONT_ENTRIES);
+		goto done;
+	}
+
+	/*
+	 * If the PTE has been mapped with the contiguous hint set remap it and
+	 * its surrounding PTEs to allow unmapping a single page.
+	 */
+	if (pte_val(*pte) & ARM_VMSA_PTE_CONT)
+		ipmmu_split_pte(mmu, pte);
+
+	/* Clear the PTE. */
+	ipmmu_clear_pte(mmu, pud, pmd, pte, 1);
+
+done:
+	spin_unlock_irqrestore(&domain->lock, flags);
+
+	if (ret)
+		ipmmu_tlb_invalidate(domain);
+
+	return 0;
+}
+
+/* -----------------------------------------------------------------------------
+ * IOMMU Operations
+ */
+
+static int ipmmu_domain_init(struct iommu_domain *io_domain)
+{
+	struct ipmmu_vmsa_domain *domain;
+
+	domain = kzalloc(sizeof(*domain), GFP_KERNEL);
+	if (!domain)
+		return -ENOMEM;
+
+	spin_lock_init(&domain->lock);
+
+	domain->pgd = kzalloc(IPMMU_PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL);
+	if (!domain->pgd) {
+		kfree(domain);
+		return -ENOMEM;
+	}
+
+	io_domain->priv = domain;
+	domain->io_domain = io_domain;
+
+	return 0;
+}
+
+static void ipmmu_domain_destroy(struct iommu_domain *io_domain)
+{
+	struct ipmmu_vmsa_domain *domain = io_domain->priv;
+
+	/*
+	 * Free the domain resources. We assume that all devices have already
+	 * been detached.
+	 */
+	ipmmu_domain_destroy_context(domain);
+	ipmmu_free_pgtables(domain);
+	kfree(domain);
+}
+
+static int ipmmu_attach_device(struct iommu_domain *io_domain,
+			       struct device *dev)
+{
+	struct ipmmu_vmsa_archdata *archdata = dev->archdata.iommu;
+	struct ipmmu_vmsa_device *mmu = archdata->mmu;
+	struct ipmmu_vmsa_domain *domain = io_domain->priv;
+	unsigned long flags;
+	int ret = 0;
+
+	if (!mmu) {
+		dev_err(dev, "Cannot attach to IPMMU\n");
+		return -ENXIO;
+	}
+
+	spin_lock_irqsave(&domain->lock, flags);
+
+	if (!domain->mmu) {
+		/* The domain hasn't been used yet, initialize it. */
+		domain->mmu = mmu;
+		ret = ipmmu_domain_init_context(domain);
+	} else if (domain->mmu != mmu) {
+		/*
+		 * Something is wrong, we can't attach two devices using
+		 * different IOMMUs to the same domain.
+		 */
+		dev_err(dev, "Can't attach IPMMU %s to domain on IPMMU %s\n",
+			dev_name(mmu->dev), dev_name(domain->mmu->dev));
+		ret = -EINVAL;
+	}
+
+	spin_unlock_irqrestore(&domain->lock, flags);
+
+	if (ret < 0)
+		return ret;
+
+	ipmmu_utlb_enable(domain, archdata->utlb);
+
+	return 0;
+}
+
+static void ipmmu_detach_device(struct iommu_domain *io_domain,
+				struct device *dev)
+{
+	struct ipmmu_vmsa_archdata *archdata = dev->archdata.iommu;
+	struct ipmmu_vmsa_domain *domain = io_domain->priv;
+
+	ipmmu_utlb_disable(domain, archdata->utlb);
+
+	/*
+	 * TODO: Optimize by disabling the context when no device is attached.
+	 */
+}
+
+static int ipmmu_map(struct iommu_domain *io_domain, unsigned long iova,
+		     phys_addr_t paddr, size_t size, int prot)
+{
+	struct ipmmu_vmsa_domain *domain = io_domain->priv;
+
+	if (!domain)
+		return -ENODEV;
+
+	return ipmmu_create_mapping(domain, iova, paddr, size, prot);
+}
+
+static size_t ipmmu_unmap(struct iommu_domain *io_domain, unsigned long iova,
+			  size_t size)
+{
+	struct ipmmu_vmsa_domain *domain = io_domain->priv;
+	int ret;
+
+	ret = ipmmu_clear_mapping(domain, iova, size);
+	return ret ? 0 : size;
+}
+
+static phys_addr_t ipmmu_iova_to_phys(struct iommu_domain *io_domain,
+				      dma_addr_t iova)
+{
+	struct ipmmu_vmsa_domain *domain = io_domain->priv;
+	pgd_t pgd;
+	pud_t pud;
+	pmd_t pmd;
+	pte_t pte;
+
+	/* TODO: Is locking needed ? */
+
+	if (!domain->pgd)
+		return 0;
+
+	pgd = *(domain->pgd + pgd_index(iova));
+	if (pgd_none(pgd))
+		return 0;
+
+	pud = *pud_offset(&pgd, iova);
+	if (pud_none(pud))
+		return 0;
+
+	pmd = *pmd_offset(&pud, iova);
+	if (pmd_none(pmd))
+		return 0;
+
+	if (pmd_sect(pmd))
+		return __pfn_to_phys(pmd_pfn(pmd)) | (iova & ~PMD_MASK);
+
+	pte = *(pmd_page_vaddr(pmd) + pte_index(iova));
+	if (pte_none(pte))
+		return 0;
+
+	return __pfn_to_phys(pte_pfn(pte)) | (iova & ~PAGE_MASK);
+}
+
+static int ipmmu_find_utlb(struct ipmmu_vmsa_device *mmu, struct device *dev)
+{
+	const struct ipmmu_vmsa_master *master = mmu->pdata->masters;
+	const char *devname = dev_name(dev);
+	unsigned int i;
+
+	for (i = 0; i < mmu->pdata->num_masters; ++i, ++master) {
+		if (strcmp(master->name, devname) == 0)
+			return master->utlb;
+	}
+
+	return -1;
+}
+
+static int ipmmu_add_device(struct device *dev)
+{
+	struct ipmmu_vmsa_archdata *archdata;
+	struct ipmmu_vmsa_device *mmu;
+	struct iommu_group *group;
+	int utlb = -1;
+	int ret;
+
+	if (dev->archdata.iommu) {
+		dev_warn(dev, "IOMMU driver already assigned to device %s\n",
+			 dev_name(dev));
+		return -EINVAL;
+	}
+
+	/* Find the master corresponding to the device. */
+	spin_lock(&ipmmu_devices_lock);
+
+	list_for_each_entry(mmu, &ipmmu_devices, list) {
+		utlb = ipmmu_find_utlb(mmu, dev);
+		if (utlb >= 0) {
+			/*
+			 * TODO Take a reference to the MMU to protect
+			 * against device removal.
+			 */
+			break;
+		}
+	}
+
+	spin_unlock(&ipmmu_devices_lock);
+
+	if (utlb < 0)
+		return -ENODEV;
+
+	if (utlb >= mmu->num_utlbs)
+		return -EINVAL;
+
+	/* Create a device group and add the device to it. */
+	group = iommu_group_alloc();
+	if (IS_ERR(group)) {
+		dev_err(dev, "Failed to allocate IOMMU group\n");
+		return PTR_ERR(group);
+	}
+
+	ret = iommu_group_add_device(group, dev);
+	iommu_group_put(group);
+
+	if (ret < 0) {
+		dev_err(dev, "Failed to add device to IPMMU group\n");
+		return ret;
+	}
+
+	archdata = kzalloc(sizeof(*archdata), GFP_KERNEL);
+	if (!archdata) {
+		ret = -ENOMEM;
+		goto error;
+	}
+
+	archdata->mmu = mmu;
+	archdata->utlb = utlb;
+	dev->archdata.iommu = archdata;
+
+	/*
+	 * Create the ARM mapping, used by the ARM DMA mapping core to allocate
+	 * VAs. This will allocate a corresponding IOMMU domain.
+	 *
+	 * TODO:
+	 * - Create one mapping per context (TLB).
+	 * - Make the mapping size configurable ? We currently use a 2GB mapping
+	 *   at a 1GB offset to ensure that NULL VAs will fault.
+	 */
+	if (!mmu->mapping) {
+		struct dma_iommu_mapping *mapping;
+
+		mapping = arm_iommu_create_mapping(&platform_bus_type,
+						   SZ_1G, SZ_2G);
+		if (IS_ERR(mapping)) {
+			dev_err(mmu->dev, "failed to create ARM IOMMU mapping\n");
+			return PTR_ERR(mapping);
+		}
+
+		mmu->mapping = mapping;
+	}
+
+	/* Attach the ARM VA mapping to the device. */
+	ret = arm_iommu_attach_device(dev, mmu->mapping);
+	if (ret < 0) {
+		dev_err(dev, "Failed to attach device to VA mapping\n");
+		goto error;
+	}
+
+	return 0;
+
+error:
+	kfree(dev->archdata.iommu);
+	dev->archdata.iommu = NULL;
+	iommu_group_remove_device(dev);
+	return ret;
+}
+
+static void ipmmu_remove_device(struct device *dev)
+{
+	arm_iommu_detach_device(dev);
+	iommu_group_remove_device(dev);
+	kfree(dev->archdata.iommu);
+	dev->archdata.iommu = NULL;
+}
+
+static struct iommu_ops ipmmu_ops = {
+	.domain_init = ipmmu_domain_init,
+	.domain_destroy = ipmmu_domain_destroy,
+	.attach_dev = ipmmu_attach_device,
+	.detach_dev = ipmmu_detach_device,
+	.map = ipmmu_map,
+	.unmap = ipmmu_unmap,
+	.iova_to_phys = ipmmu_iova_to_phys,
+	.add_device = ipmmu_add_device,
+	.remove_device = ipmmu_remove_device,
+	.pgsize_bitmap = SZ_2M | SZ_64K | SZ_4K,
+};
+
+/* -----------------------------------------------------------------------------
+ * Probe/remove and init
+ */
+
+static void ipmmu_device_reset(struct ipmmu_vmsa_device *mmu)
+{
+	unsigned int i;
+
+	/* Disable all contexts. */
+	for (i = 0; i < 4; ++i)
+		ipmmu_write(mmu, i * IM_CTX_SIZE + IMCTR, 0);
+}
+
+static int ipmmu_probe(struct platform_device *pdev)
+{
+	struct ipmmu_vmsa_device *mmu;
+	struct resource *res;
+	int irq;
+	int ret;
+
+	if (!pdev->dev.platform_data) {
+		dev_err(&pdev->dev, "missing platform data\n");
+		return -EINVAL;
+	}
+
+	mmu = devm_kzalloc(&pdev->dev, sizeof(*mmu), GFP_KERNEL);
+	if (!mmu) {
+		dev_err(&pdev->dev, "cannot allocate device data\n");
+		return -ENOMEM;
+	}
+
+	mmu->dev = &pdev->dev;
+	mmu->pdata = pdev->dev.platform_data;
+	mmu->num_utlbs = 32;
+
+	/* Map I/O memory and request IRQ. */
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	mmu->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(mmu->base))
+		return PTR_ERR(mmu->base);
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		dev_err(&pdev->dev, "no IRQ found\n");
+		return irq;
+	}
+
+	ret = devm_request_irq(&pdev->dev, irq, ipmmu_irq, 0,
+			       dev_name(&pdev->dev), mmu);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "failed to request IRQ %d\n", irq);
+		return irq;
+	}
+
+	ipmmu_device_reset(mmu);
+
+	/*
+	 * We can't create the ARM mapping here as it requires the bus to have
+	 * an IOMMU, which only happens when bus_set_iommu() is called in
+	 * ipmmu_init() after the probe function returns.
+	 */
+
+	spin_lock(&ipmmu_devices_lock);
+	list_add(&mmu->list, &ipmmu_devices);
+	spin_unlock(&ipmmu_devices_lock);
+
+	platform_set_drvdata(pdev, mmu);
+
+	return 0;
+}
+
+static int ipmmu_remove(struct platform_device *pdev)
+{
+	struct ipmmu_vmsa_device *mmu = platform_get_drvdata(pdev);
+
+	spin_lock(&ipmmu_devices_lock);
+	list_del(&mmu->list);
+	spin_unlock(&ipmmu_devices_lock);
+
+	arm_iommu_release_mapping(mmu->mapping);
+
+	ipmmu_device_reset(mmu);
+
+	return 0;
+}
+
+static struct platform_driver ipmmu_driver = {
+	.driver = {
+		.owner = THIS_MODULE,
+		.name = "ipmmu-vmsa",
+	},
+	.probe = ipmmu_probe,
+	.remove	= ipmmu_remove,
+};
+
+static int __init ipmmu_init(void)
+{
+	int ret;
+
+	ret = platform_driver_register(&ipmmu_driver);
+	if (ret < 0)
+		return ret;
+
+	if (!iommu_present(&platform_bus_type))
+		bus_set_iommu(&platform_bus_type, &ipmmu_ops);
+
+	return 0;
+}
+
+static void __exit ipmmu_exit(void)
+{
+	return platform_driver_unregister(&ipmmu_driver);
+}
+
+subsys_initcall(ipmmu_init);
+module_exit(ipmmu_exit);
+
+MODULE_DESCRIPTION("IOMMU API for Renesas VMSA-compatible IPMMU");
+MODULE_AUTHOR("Laurent Pinchart <laurent.pinchart@ideasonboard.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c
new file mode 100644
index 00000000000..33c43952408
--- /dev/null
+++ b/drivers/iommu/irq_remapping.c
@@ -0,0 +1,391 @@
+#include <linux/seq_file.h>
+#include <linux/cpumask.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/msi.h>
+#include <linux/irq.h>
+#include <linux/pci.h>
+
+#include <asm/hw_irq.h>
+#include <asm/irq_remapping.h>
+#include <asm/processor.h>
+#include <asm/x86_init.h>
+#include <asm/apic.h>
+
+#include "irq_remapping.h"
+
+int irq_remapping_enabled;
+
+int disable_irq_remap;
+int irq_remap_broken;
+int disable_sourceid_checking;
+int no_x2apic_optout;
+
+static struct irq_remap_ops *remap_ops;
+
+static int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec);
+static int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq,
+				  int index, int sub_handle);
+static int set_remapped_irq_affinity(struct irq_data *data,
+				     const struct cpumask *mask,
+				     bool force);
+
+static bool irq_remapped(struct irq_cfg *cfg)
+{
+	return (cfg->remapped == 1);
+}
+
+static void irq_remapping_disable_io_apic(void)
+{
+	/*
+	 * With interrupt-remapping, for now we will use virtual wire A
+	 * mode, as virtual wire B is little complex (need to configure
+	 * both IOAPIC RTE as well as interrupt-remapping table entry).
+	 * As this gets called during crash dump, keep this simple for
+	 * now.
+	 */
+	if (cpu_has_apic || apic_from_smp_config())
+		disconnect_bsp_APIC(0);
+}
+
+static int do_setup_msi_irqs(struct pci_dev *dev, int nvec)
+{
+	int ret, sub_handle, nvec_pow2, index = 0;
+	unsigned int irq;
+	struct msi_desc *msidesc;
+
+	WARN_ON(!list_is_singular(&dev->msi_list));
+	msidesc = list_entry(dev->msi_list.next, struct msi_desc, list);
+	WARN_ON(msidesc->irq);
+	WARN_ON(msidesc->msi_attrib.multiple);
+	WARN_ON(msidesc->nvec_used);
+
+	irq = irq_alloc_hwirqs(nvec, dev_to_node(&dev->dev));
+	if (irq == 0)
+		return -ENOSPC;
+
+	nvec_pow2 = __roundup_pow_of_two(nvec);
+	msidesc->nvec_used = nvec;
+	msidesc->msi_attrib.multiple = ilog2(nvec_pow2);
+	for (sub_handle = 0; sub_handle < nvec; sub_handle++) {
+		if (!sub_handle) {
+			index = msi_alloc_remapped_irq(dev, irq, nvec_pow2);
+			if (index < 0) {
+				ret = index;
+				goto error;
+			}
+		} else {
+			ret = msi_setup_remapped_irq(dev, irq + sub_handle,
+						     index, sub_handle);
+			if (ret < 0)
+				goto error;
+		}
+		ret = setup_msi_irq(dev, msidesc, irq, sub_handle);
+		if (ret < 0)
+			goto error;
+	}
+	return 0;
+
+error:
+	irq_free_hwirqs(irq, nvec);
+
+	/*
+	 * Restore altered MSI descriptor fields and prevent just destroyed
+	 * IRQs from tearing down again in default_teardown_msi_irqs()
+	 */
+	msidesc->irq = 0;
+	msidesc->nvec_used = 0;
+	msidesc->msi_attrib.multiple = 0;
+
+	return ret;
+}
+
+static int do_setup_msix_irqs(struct pci_dev *dev, int nvec)
+{
+	int node, ret, sub_handle, index = 0;
+	struct msi_desc *msidesc;
+	unsigned int irq;
+
+	node		= dev_to_node(&dev->dev);
+	sub_handle	= 0;
+
+	list_for_each_entry(msidesc, &dev->msi_list, list) {
+
+		irq = irq_alloc_hwirq(node);
+		if (irq == 0)
+			return -1;
+
+		if (sub_handle == 0)
+			ret = index = msi_alloc_remapped_irq(dev, irq, nvec);
+		else
+			ret = msi_setup_remapped_irq(dev, irq, index, sub_handle);
+
+		if (ret < 0)
+			goto error;
+
+		ret = setup_msi_irq(dev, msidesc, irq, 0);
+		if (ret < 0)
+			goto error;
+
+		sub_handle += 1;
+		irq        += 1;
+	}
+
+	return 0;
+
+error:
+	irq_free_hwirq(irq);
+	return ret;
+}
+
+static int irq_remapping_setup_msi_irqs(struct pci_dev *dev,
+					int nvec, int type)
+{
+	if (type == PCI_CAP_ID_MSI)
+		return do_setup_msi_irqs(dev, nvec);
+	else
+		return do_setup_msix_irqs(dev, nvec);
+}
+
+static void eoi_ioapic_pin_remapped(int apic, int pin, int vector)
+{
+	/*
+	 * Intr-remapping uses pin number as the virtual vector
+	 * in the RTE. Actual vector is programmed in
+	 * intr-remapping table entry. Hence for the io-apic
+	 * EOI we use the pin number.
+	 */
+	io_apic_eoi(apic, pin);
+}
+
+static void __init irq_remapping_modify_x86_ops(void)
+{
+	x86_io_apic_ops.disable		= irq_remapping_disable_io_apic;
+	x86_io_apic_ops.set_affinity	= set_remapped_irq_affinity;
+	x86_io_apic_ops.setup_entry	= setup_ioapic_remapped_entry;
+	x86_io_apic_ops.eoi_ioapic_pin	= eoi_ioapic_pin_remapped;
+	x86_msi.setup_msi_irqs		= irq_remapping_setup_msi_irqs;
+	x86_msi.setup_hpet_msi		= setup_hpet_msi_remapped;
+	x86_msi.compose_msi_msg		= compose_remapped_msi_msg;
+}
+
+static __init int setup_nointremap(char *str)
+{
+	disable_irq_remap = 1;
+	return 0;
+}
+early_param("nointremap", setup_nointremap);
+
+static __init int setup_irqremap(char *str)
+{
+	if (!str)
+		return -EINVAL;
+
+	while (*str) {
+		if (!strncmp(str, "on", 2))
+			disable_irq_remap = 0;
+		else if (!strncmp(str, "off", 3))
+			disable_irq_remap = 1;
+		else if (!strncmp(str, "nosid", 5))
+			disable_sourceid_checking = 1;
+		else if (!strncmp(str, "no_x2apic_optout", 16))
+			no_x2apic_optout = 1;
+
+		str += strcspn(str, ",");
+		while (*str == ',')
+			str++;
+	}
+
+	return 0;
+}
+early_param("intremap", setup_irqremap);
+
+void __init setup_irq_remapping_ops(void)
+{
+	remap_ops = &intel_irq_remap_ops;
+
+#ifdef CONFIG_AMD_IOMMU
+	if (amd_iommu_irq_ops.prepare() == 0)
+		remap_ops = &amd_iommu_irq_ops;
+#endif
+}
+
+void set_irq_remapping_broken(void)
+{
+	irq_remap_broken = 1;
+}
+
+int irq_remapping_supported(void)
+{
+	if (disable_irq_remap)
+		return 0;
+
+	if (!remap_ops || !remap_ops->supported)
+		return 0;
+
+	return remap_ops->supported();
+}
+
+int __init irq_remapping_prepare(void)
+{
+	if (!remap_ops || !remap_ops->prepare)
+		return -ENODEV;
+
+	return remap_ops->prepare();
+}
+
+int __init irq_remapping_enable(void)
+{
+	int ret;
+
+	if (!remap_ops || !remap_ops->enable)
+		return -ENODEV;
+
+	ret = remap_ops->enable();
+
+	if (irq_remapping_enabled)
+		irq_remapping_modify_x86_ops();
+
+	return ret;
+}
+
+void irq_remapping_disable(void)
+{
+	if (!irq_remapping_enabled ||
+	    !remap_ops ||
+	    !remap_ops->disable)
+		return;
+
+	remap_ops->disable();
+}
+
+int irq_remapping_reenable(int mode)
+{
+	if (!irq_remapping_enabled ||
+	    !remap_ops ||
+	    !remap_ops->reenable)
+		return 0;
+
+	return remap_ops->reenable(mode);
+}
+
+int __init irq_remap_enable_fault_handling(void)
+{
+	if (!irq_remapping_enabled)
+		return 0;
+
+	if (!remap_ops || !remap_ops->enable_faulting)
+		return -ENODEV;
+
+	return remap_ops->enable_faulting();
+}
+
+int setup_ioapic_remapped_entry(int irq,
+				struct IO_APIC_route_entry *entry,
+				unsigned int destination, int vector,
+				struct io_apic_irq_attr *attr)
+{
+	if (!remap_ops || !remap_ops->setup_ioapic_entry)
+		return -ENODEV;
+
+	return remap_ops->setup_ioapic_entry(irq, entry, destination,
+					     vector, attr);
+}
+
+static int set_remapped_irq_affinity(struct irq_data *data,
+				     const struct cpumask *mask, bool force)
+{
+	if (!config_enabled(CONFIG_SMP) || !remap_ops ||
+	    !remap_ops->set_affinity)
+		return 0;
+
+	return remap_ops->set_affinity(data, mask, force);
+}
+
+void free_remapped_irq(int irq)
+{
+	struct irq_cfg *cfg = irq_get_chip_data(irq);
+
+	if (!remap_ops || !remap_ops->free_irq)
+		return;
+
+	if (irq_remapped(cfg))
+		remap_ops->free_irq(irq);
+}
+
+void compose_remapped_msi_msg(struct pci_dev *pdev,
+			      unsigned int irq, unsigned int dest,
+			      struct msi_msg *msg, u8 hpet_id)
+{
+	struct irq_cfg *cfg = irq_get_chip_data(irq);
+
+	if (!irq_remapped(cfg))
+		native_compose_msi_msg(pdev, irq, dest, msg, hpet_id);
+	else if (remap_ops && remap_ops->compose_msi_msg)
+		remap_ops->compose_msi_msg(pdev, irq, dest, msg, hpet_id);
+}
+
+static int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec)
+{
+	if (!remap_ops || !remap_ops->msi_alloc_irq)
+		return -ENODEV;
+
+	return remap_ops->msi_alloc_irq(pdev, irq, nvec);
+}
+
+static int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq,
+				  int index, int sub_handle)
+{
+	if (!remap_ops || !remap_ops->msi_setup_irq)
+		return -ENODEV;
+
+	return remap_ops->msi_setup_irq(pdev, irq, index, sub_handle);
+}
+
+int setup_hpet_msi_remapped(unsigned int irq, unsigned int id)
+{
+	if (!remap_ops || !remap_ops->setup_hpet_msi)
+		return -ENODEV;
+
+	return remap_ops->setup_hpet_msi(irq, id);
+}
+
+void panic_if_irq_remap(const char *msg)
+{
+	if (irq_remapping_enabled)
+		panic(msg);
+}
+
+static void ir_ack_apic_edge(struct irq_data *data)
+{
+	ack_APIC_irq();
+}
+
+static void ir_ack_apic_level(struct irq_data *data)
+{
+	ack_APIC_irq();
+	eoi_ioapic_irq(data->irq, data->chip_data);
+}
+
+static void ir_print_prefix(struct irq_data *data, struct seq_file *p)
+{
+	seq_printf(p, " IR-%s", data->chip->name);
+}
+
+void irq_remap_modify_chip_defaults(struct irq_chip *chip)
+{
+	chip->irq_print_chip = ir_print_prefix;
+	chip->irq_ack = ir_ack_apic_edge;
+	chip->irq_eoi = ir_ack_apic_level;
+	chip->irq_set_affinity = x86_io_apic_ops.set_affinity;
+}
+
+bool setup_remapped_irq(int irq, struct irq_cfg *cfg, struct irq_chip *chip)
+{
+	if (!irq_remapped(cfg))
+		return false;
+	irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
+	irq_remap_modify_chip_defaults(chip);
+	return true;
+}
diff --git a/drivers/iommu/irq_remapping.h b/drivers/iommu/irq_remapping.h
new file mode 100644
index 00000000000..90c4dae5a46
--- /dev/null
+++ b/drivers/iommu/irq_remapping.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright (C) 2012 Advanced Micro Devices, Inc.
+ * Author: Joerg Roedel <joerg.roedel@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ *
+ * This header file contains stuff that is shared between different interrupt
+ * remapping drivers but with no need to be visible outside of the IOMMU layer.
+ */
+
+#ifndef __IRQ_REMAPPING_H
+#define __IRQ_REMAPPING_H
+
+#ifdef CONFIG_IRQ_REMAP
+
+struct IO_APIC_route_entry;
+struct io_apic_irq_attr;
+struct irq_data;
+struct cpumask;
+struct pci_dev;
+struct msi_msg;
+
+extern int disable_irq_remap;
+extern int irq_remap_broken;
+extern int disable_sourceid_checking;
+extern int no_x2apic_optout;
+extern int irq_remapping_enabled;
+
+struct irq_remap_ops {
+	/* Check whether Interrupt Remapping is supported */
+	int (*supported)(void);
+
+	/* Initializes hardware and makes it ready for remapping interrupts */
+	int  (*prepare)(void);
+
+	/* Enables the remapping hardware */
+	int  (*enable)(void);
+
+	/* Disables the remapping hardware */
+	void (*disable)(void);
+
+	/* Reenables the remapping hardware */
+	int  (*reenable)(int);
+
+	/* Enable fault handling */
+	int  (*enable_faulting)(void);
+
+	/* IO-APIC setup routine */
+	int (*setup_ioapic_entry)(int irq, struct IO_APIC_route_entry *,
+				  unsigned int, int,
+				  struct io_apic_irq_attr *);
+
+	/* Set the CPU affinity of a remapped interrupt */
+	int (*set_affinity)(struct irq_data *data, const struct cpumask *mask,
+			    bool force);
+
+	/* Free an IRQ */
+	int (*free_irq)(int);
+
+	/* Create MSI msg to use for interrupt remapping */
+	void (*compose_msi_msg)(struct pci_dev *,
+				unsigned int, unsigned int,
+				struct msi_msg *, u8);
+
+	/* Allocate remapping resources for MSI */
+	int (*msi_alloc_irq)(struct pci_dev *, int, int);
+
+	/* Setup the remapped MSI irq */
+	int (*msi_setup_irq)(struct pci_dev *, unsigned int, int, int);
+
+	/* Setup interrupt remapping for an HPET MSI */
+	int (*setup_hpet_msi)(unsigned int, unsigned int);
+};
+
+extern struct irq_remap_ops intel_irq_remap_ops;
+extern struct irq_remap_ops amd_iommu_irq_ops;
+
+#else  /* CONFIG_IRQ_REMAP */
+
+#define irq_remapping_enabled 0
+#define disable_irq_remap     1
+#define irq_remap_broken      0
+
+#endif /* CONFIG_IRQ_REMAP */
+
+#endif /* __IRQ_REMAPPING_H */
diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
index cee307e8660..f5ff657f49f 100644
--- a/drivers/iommu/msm_iommu.c
+++ b/drivers/iommu/msm_iommu.c
@@ -31,8 +31,8 @@
 #include <asm/cacheflush.h>
 #include <asm/sizes.h>
 
-#include <mach/iommu_hw-8xxx.h>
-#include <mach/iommu.h>
+#include "msm_iommu_hw-8xxx.h"
+#include "msm_iommu.h"
 
 #define MRC(reg, processor, op1, crn, crm, op2)				\
 __asm__ __volatile__ (							\
@@ -226,6 +226,11 @@ static int msm_iommu_domain_init(struct iommu_domain *domain)
 
 	memset(priv->pgtable, 0, SZ_16K);
 	domain->priv = priv;
+
+	domain->geometry.aperture_start = 0;
+	domain->geometry.aperture_end   = (1ULL << 32) - 1;
+	domain->geometry.force_aperture = true;
+
 	return 0;
 
 fail_nomem:
@@ -549,7 +554,7 @@ fail:
 }
 
 static phys_addr_t msm_iommu_iova_to_phys(struct iommu_domain *domain,
-					  unsigned long va)
+					  dma_addr_t va)
 {
 	struct msm_priv *priv;
 	struct msm_iommu_drvdata *iommu_drvdata;
diff --git a/drivers/iommu/msm_iommu.h b/drivers/iommu/msm_iommu.h
new file mode 100644
index 00000000000..5c7c955e6d2
--- /dev/null
+++ b/drivers/iommu/msm_iommu.h
@@ -0,0 +1,120 @@
+/* Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef MSM_IOMMU_H
+#define MSM_IOMMU_H
+
+#include <linux/interrupt.h>
+#include <linux/clk.h>
+
+/* Sharability attributes of MSM IOMMU mappings */
+#define MSM_IOMMU_ATTR_NON_SH		0x0
+#define MSM_IOMMU_ATTR_SH		0x4
+
+/* Cacheability attributes of MSM IOMMU mappings */
+#define MSM_IOMMU_ATTR_NONCACHED	0x0
+#define MSM_IOMMU_ATTR_CACHED_WB_WA	0x1
+#define MSM_IOMMU_ATTR_CACHED_WB_NWA	0x2
+#define MSM_IOMMU_ATTR_CACHED_WT	0x3
+
+/* Mask for the cache policy attribute */
+#define MSM_IOMMU_CP_MASK		0x03
+
+/* Maximum number of Machine IDs that we are allowing to be mapped to the same
+ * context bank. The number of MIDs mapped to the same CB does not affect
+ * performance, but there is a practical limit on how many distinct MIDs may
+ * be present. These mappings are typically determined at design time and are
+ * not expected to change at run time.
+ */
+#define MAX_NUM_MIDS	32
+
+/**
+ * struct msm_iommu_dev - a single IOMMU hardware instance
+ * name		Human-readable name given to this IOMMU HW instance
+ * ncb		Number of context banks present on this IOMMU HW instance
+ */
+struct msm_iommu_dev {
+	const char *name;
+	int ncb;
+};
+
+/**
+ * struct msm_iommu_ctx_dev - an IOMMU context bank instance
+ * name		Human-readable name given to this context bank
+ * num		Index of this context bank within the hardware
+ * mids		List of Machine IDs that are to be mapped into this context
+ *		bank, terminated by -1. The MID is a set of signals on the
+ *		AXI bus that identifies the function associated with a specific
+ *		memory request. (See ARM spec).
+ */
+struct msm_iommu_ctx_dev {
+	const char *name;
+	int num;
+	int mids[MAX_NUM_MIDS];
+};
+
+
+/**
+ * struct msm_iommu_drvdata - A single IOMMU hardware instance
+ * @base:	IOMMU config port base address (VA)
+ * @ncb		The number of contexts on this IOMMU
+ * @irq:	Interrupt number
+ * @clk:	The bus clock for this IOMMU hardware instance
+ * @pclk:	The clock for the IOMMU bus interconnect
+ *
+ * A msm_iommu_drvdata holds the global driver data about a single piece
+ * of an IOMMU hardware instance.
+ */
+struct msm_iommu_drvdata {
+	void __iomem *base;
+	int irq;
+	int ncb;
+	struct clk *clk;
+	struct clk *pclk;
+};
+
+/**
+ * struct msm_iommu_ctx_drvdata - an IOMMU context bank instance
+ * @num:		Hardware context number of this context
+ * @pdev:		Platform device associated wit this HW instance
+ * @attached_elm:	List element for domains to track which devices are
+ *			attached to them
+ *
+ * A msm_iommu_ctx_drvdata holds the driver data for a single context bank
+ * within each IOMMU hardware instance
+ */
+struct msm_iommu_ctx_drvdata {
+	int num;
+	struct platform_device *pdev;
+	struct list_head attached_elm;
+};
+
+/*
+ * Look up an IOMMU context device by its context name. NULL if none found.
+ * Useful for testing and drivers that do not yet fully have IOMMU stuff in
+ * their platform devices.
+ */
+struct device *msm_iommu_get_ctx(const char *ctx_name);
+
+/*
+ * Interrupt handler for the IOMMU context fault interrupt. Hooking the
+ * interrupt is not supported in the API yet, but this will print an error
+ * message and dump useful IOMMU registers.
+ */
+irqreturn_t msm_iommu_fault_handler(int irq, void *dev_id);
+
+#endif
diff --git a/drivers/iommu/msm_iommu_dev.c b/drivers/iommu/msm_iommu_dev.c
index 8e8fb079852..61def7cb526 100644
--- a/drivers/iommu/msm_iommu_dev.c
+++ b/drivers/iommu/msm_iommu_dev.c
@@ -27,9 +27,8 @@
 #include <linux/err.h>
 #include <linux/slab.h>
 
-#include <mach/iommu_hw-8xxx.h>
-#include <mach/iommu.h>
-#include <mach/clk.h>
+#include "msm_iommu_hw-8xxx.h"
+#include "msm_iommu.h"
 
 struct iommu_ctx_iter_data {
 	/* input */
@@ -128,13 +127,12 @@ static void msm_iommu_reset(void __iomem *base, int ncb)
 
 static int msm_iommu_probe(struct platform_device *pdev)
 {
-	struct resource *r, *r2;
+	struct resource *r;
 	struct clk *iommu_clk;
 	struct clk *iommu_pclk;
 	struct msm_iommu_drvdata *drvdata;
 	struct msm_iommu_dev *iommu_dev = pdev->dev.platform_data;
 	void __iomem *regs_base;
-	resource_size_t	len;
 	int ret, irq, par;
 
 	if (pdev->id == -1) {
@@ -160,7 +158,7 @@ static int msm_iommu_probe(struct platform_device *pdev)
 		goto fail;
 	}
 
-	ret = clk_enable(iommu_pclk);
+	ret = clk_prepare_enable(iommu_pclk);
 	if (ret)
 		goto fail_enable;
 
@@ -168,9 +166,9 @@ static int msm_iommu_probe(struct platform_device *pdev)
 
 	if (!IS_ERR(iommu_clk))	{
 		if (clk_get_rate(iommu_clk) == 0)
-			clk_set_min_rate(iommu_clk, 1);
+			clk_set_rate(iommu_clk, 1);
 
-		ret = clk_enable(iommu_clk);
+		ret = clk_prepare_enable(iommu_clk);
 		if (ret) {
 			clk_put(iommu_clk);
 			goto fail_pclk;
@@ -179,35 +177,16 @@ static int msm_iommu_probe(struct platform_device *pdev)
 		iommu_clk = NULL;
 
 	r = platform_get_resource_byname(pdev, IORESOURCE_MEM, "physbase");
-
-	if (!r) {
-		ret = -ENODEV;
+	regs_base = devm_ioremap_resource(&pdev->dev, r);
+	if (IS_ERR(regs_base)) {
+		ret = PTR_ERR(regs_base);
 		goto fail_clk;
 	}
 
-	len = resource_size(r);
-
-	r2 = request_mem_region(r->start, len, r->name);
-	if (!r2) {
-		pr_err("Could not request memory region: start=%p, len=%d\n",
-							(void *) r->start, len);
-		ret = -EBUSY;
-		goto fail_clk;
-	}
-
-	regs_base = ioremap(r2->start, len);
-
-	if (!regs_base) {
-		pr_err("Could not ioremap: start=%p, len=%d\n",
-			 (void *) r2->start, len);
-		ret = -EBUSY;
-		goto fail_mem;
-	}
-
 	irq = platform_get_irq_byname(pdev, "secure_irq");
 	if (irq < 0) {
 		ret = -ENODEV;
-		goto fail_io;
+		goto fail_clk;
 	}
 
 	msm_iommu_reset(regs_base, iommu_dev->ncb);
@@ -223,14 +202,14 @@ static int msm_iommu_probe(struct platform_device *pdev)
 	if (!par) {
 		pr_err("%s: Invalid PAR value detected\n", iommu_dev->name);
 		ret = -ENODEV;
-		goto fail_io;
+		goto fail_clk;
 	}
 
 	ret = request_irq(irq, msm_iommu_fault_handler, 0,
 			"msm_iommu_secure_irpt_handler", drvdata);
 	if (ret) {
 		pr_err("Request IRQ %d failed with ret=%d\n", irq, ret);
-		goto fail_io;
+		goto fail_clk;
 	}
 
 
@@ -251,17 +230,13 @@ static int msm_iommu_probe(struct platform_device *pdev)
 	clk_disable(iommu_pclk);
 
 	return 0;
-fail_io:
-	iounmap(regs_base);
-fail_mem:
-	release_mem_region(r->start, len);
 fail_clk:
 	if (iommu_clk) {
 		clk_disable(iommu_clk);
 		clk_put(iommu_clk);
 	}
 fail_pclk:
-	clk_disable(iommu_pclk);
+	clk_disable_unprepare(iommu_pclk);
 fail_enable:
 	clk_put(iommu_pclk);
 fail:
@@ -275,12 +250,14 @@ static int msm_iommu_remove(struct platform_device *pdev)
 
 	drv = platform_get_drvdata(pdev);
 	if (drv) {
-		if (drv->clk)
+		if (drv->clk) {
+			clk_unprepare(drv->clk);
 			clk_put(drv->clk);
+		}
+		clk_unprepare(drv->pclk);
 		clk_put(drv->pclk);
 		memset(drv, 0, sizeof(*drv));
 		kfree(drv);
-		platform_set_drvdata(pdev, NULL);
 	}
 	return 0;
 }
@@ -289,39 +266,34 @@ static int msm_iommu_ctx_probe(struct platform_device *pdev)
 {
 	struct msm_iommu_ctx_dev *c = pdev->dev.platform_data;
 	struct msm_iommu_drvdata *drvdata;
-	struct msm_iommu_ctx_drvdata *ctx_drvdata = NULL;
+	struct msm_iommu_ctx_drvdata *ctx_drvdata;
 	int i, ret;
-	if (!c || !pdev->dev.parent) {
-		ret = -EINVAL;
-		goto fail;
-	}
 
-	drvdata = dev_get_drvdata(pdev->dev.parent);
+	if (!c || !pdev->dev.parent)
+		return -EINVAL;
 
-	if (!drvdata) {
-		ret = -ENODEV;
-		goto fail;
-	}
+	drvdata = dev_get_drvdata(pdev->dev.parent);
+	if (!drvdata)
+		return -ENODEV;
 
 	ctx_drvdata = kzalloc(sizeof(*ctx_drvdata), GFP_KERNEL);
-	if (!ctx_drvdata) {
-		ret = -ENOMEM;
-		goto fail;
-	}
+	if (!ctx_drvdata)
+		return -ENOMEM;
+
 	ctx_drvdata->num = c->num;
 	ctx_drvdata->pdev = pdev;
 
 	INIT_LIST_HEAD(&ctx_drvdata->attached_elm);
 	platform_set_drvdata(pdev, ctx_drvdata);
 
-	ret = clk_enable(drvdata->pclk);
+	ret = clk_prepare_enable(drvdata->pclk);
 	if (ret)
 		goto fail;
 
 	if (drvdata->clk) {
-		ret = clk_enable(drvdata->clk);
+		ret = clk_prepare_enable(drvdata->clk);
 		if (ret) {
-			clk_disable(drvdata->pclk);
+			clk_disable_unprepare(drvdata->pclk);
 			goto fail;
 		}
 	}
@@ -369,7 +341,6 @@ static int msm_iommu_ctx_remove(struct platform_device *pdev)
 	if (drv) {
 		memset(drv, 0, sizeof(struct msm_iommu_ctx_drvdata));
 		kfree(drv);
-		platform_set_drvdata(pdev, NULL);
 	}
 	return 0;
 }
@@ -401,6 +372,7 @@ static int __init msm_iommu_driver_init(void)
 
 	ret = platform_driver_register(&msm_iommu_ctx_driver);
 	if (ret != 0) {
+		platform_driver_unregister(&msm_iommu_driver);
 		pr_err("Failed to register IOMMU context driver\n");
 		goto error;
 	}
diff --git a/drivers/iommu/msm_iommu_hw-8xxx.h b/drivers/iommu/msm_iommu_hw-8xxx.h
new file mode 100644
index 00000000000..fc160101dea
--- /dev/null
+++ b/drivers/iommu/msm_iommu_hw-8xxx.h
@@ -0,0 +1,1865 @@
+/* Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __ARCH_ARM_MACH_MSM_IOMMU_HW_8XXX_H
+#define __ARCH_ARM_MACH_MSM_IOMMU_HW_8XXX_H
+
+#define CTX_SHIFT 12
+
+#define GET_GLOBAL_REG(reg, base) (readl((base) + (reg)))
+#define GET_CTX_REG(reg, base, ctx) \
+				(readl((base) + (reg) + ((ctx) << CTX_SHIFT)))
+
+#define SET_GLOBAL_REG(reg, base, val)	writel((val), ((base) + (reg)))
+
+#define SET_CTX_REG(reg, base, ctx, val) \
+			writel((val), ((base) + (reg) + ((ctx) << CTX_SHIFT)))
+
+/* Wrappers for numbered registers */
+#define SET_GLOBAL_REG_N(b, n, r, v) SET_GLOBAL_REG(b, ((r) + (n << 2)), (v))
+#define GET_GLOBAL_REG_N(b, n, r)    GET_GLOBAL_REG(b, ((r) + (n << 2)))
+
+/* Field wrappers */
+#define GET_GLOBAL_FIELD(b, r, F)    GET_FIELD(((b) + (r)), F##_MASK, F##_SHIFT)
+#define GET_CONTEXT_FIELD(b, c, r, F)	\
+	GET_FIELD(((b) + (r) + ((c) << CTX_SHIFT)), F##_MASK, F##_SHIFT)
+
+#define SET_GLOBAL_FIELD(b, r, F, v) \
+	SET_FIELD(((b) + (r)), F##_MASK, F##_SHIFT, (v))
+#define SET_CONTEXT_FIELD(b, c, r, F, v)	\
+	SET_FIELD(((b) + (r) + ((c) << CTX_SHIFT)), F##_MASK, F##_SHIFT, (v))
+
+#define GET_FIELD(addr, mask, shift)  ((readl(addr) >> (shift)) & (mask))
+
+#define SET_FIELD(addr, mask, shift, v) \
+do { \
+	int t = readl(addr); \
+	writel((t & ~((mask) << (shift))) + (((v) & (mask)) << (shift)), addr);\
+} while (0)
+
+
+#define NUM_FL_PTE	4096
+#define NUM_SL_PTE	256
+#define NUM_TEX_CLASS	8
+
+/* First-level page table bits */
+#define FL_BASE_MASK		0xFFFFFC00
+#define FL_TYPE_TABLE		(1 << 0)
+#define FL_TYPE_SECT		(2 << 0)
+#define FL_SUPERSECTION		(1 << 18)
+#define FL_AP_WRITE		(1 << 10)
+#define FL_AP_READ		(1 << 11)
+#define FL_SHARED		(1 << 16)
+#define FL_BUFFERABLE		(1 << 2)
+#define FL_CACHEABLE		(1 << 3)
+#define FL_TEX0			(1 << 12)
+#define FL_OFFSET(va)		(((va) & 0xFFF00000) >> 20)
+#define FL_NG			(1 << 17)
+
+/* Second-level page table bits */
+#define SL_BASE_MASK_LARGE	0xFFFF0000
+#define SL_BASE_MASK_SMALL	0xFFFFF000
+#define SL_TYPE_LARGE		(1 << 0)
+#define SL_TYPE_SMALL		(2 << 0)
+#define SL_AP0			(1 << 4)
+#define SL_AP1			(2 << 4)
+#define SL_SHARED		(1 << 10)
+#define SL_BUFFERABLE		(1 << 2)
+#define SL_CACHEABLE		(1 << 3)
+#define SL_TEX0			(1 << 6)
+#define SL_OFFSET(va)		(((va) & 0xFF000) >> 12)
+#define SL_NG			(1 << 11)
+
+/* Memory type and cache policy attributes */
+#define MT_SO			0
+#define MT_DEV			1
+#define MT_NORMAL		2
+#define CP_NONCACHED		0
+#define CP_WB_WA		1
+#define CP_WT			2
+#define CP_WB_NWA		3
+
+/* Global register setters / getters */
+#define SET_M2VCBR_N(b, N, v)	 SET_GLOBAL_REG_N(M2VCBR_N, N, (b), (v))
+#define SET_CBACR_N(b, N, v)	 SET_GLOBAL_REG_N(CBACR_N, N, (b), (v))
+#define SET_TLBRSW(b, v)	 SET_GLOBAL_REG(TLBRSW, (b), (v))
+#define SET_TLBTR0(b, v)	 SET_GLOBAL_REG(TLBTR0, (b), (v))
+#define SET_TLBTR1(b, v)	 SET_GLOBAL_REG(TLBTR1, (b), (v))
+#define SET_TLBTR2(b, v)	 SET_GLOBAL_REG(TLBTR2, (b), (v))
+#define SET_TESTBUSCR(b, v)	 SET_GLOBAL_REG(TESTBUSCR, (b), (v))
+#define SET_GLOBAL_TLBIALL(b, v) SET_GLOBAL_REG(GLOBAL_TLBIALL, (b), (v))
+#define SET_TLBIVMID(b, v)	 SET_GLOBAL_REG(TLBIVMID, (b), (v))
+#define SET_CR(b, v)		 SET_GLOBAL_REG(CR, (b), (v))
+#define SET_EAR(b, v)		 SET_GLOBAL_REG(EAR, (b), (v))
+#define SET_ESR(b, v)		 SET_GLOBAL_REG(ESR, (b), (v))
+#define SET_ESRRESTORE(b, v)	 SET_GLOBAL_REG(ESRRESTORE, (b), (v))
+#define SET_ESYNR0(b, v)	 SET_GLOBAL_REG(ESYNR0, (b), (v))
+#define SET_ESYNR1(b, v)	 SET_GLOBAL_REG(ESYNR1, (b), (v))
+#define SET_RPU_ACR(b, v)	 SET_GLOBAL_REG(RPU_ACR, (b), (v))
+
+#define GET_M2VCBR_N(b, N)	 GET_GLOBAL_REG_N(M2VCBR_N, N, (b))
+#define GET_CBACR_N(b, N)	 GET_GLOBAL_REG_N(CBACR_N, N, (b))
+#define GET_TLBTR0(b)		 GET_GLOBAL_REG(TLBTR0, (b))
+#define GET_TLBTR1(b)		 GET_GLOBAL_REG(TLBTR1, (b))
+#define GET_TLBTR2(b)		 GET_GLOBAL_REG(TLBTR2, (b))
+#define GET_TESTBUSCR(b)	 GET_GLOBAL_REG(TESTBUSCR, (b))
+#define GET_GLOBAL_TLBIALL(b)	 GET_GLOBAL_REG(GLOBAL_TLBIALL, (b))
+#define GET_TLBIVMID(b)		 GET_GLOBAL_REG(TLBIVMID, (b))
+#define GET_CR(b)		 GET_GLOBAL_REG(CR, (b))
+#define GET_EAR(b)		 GET_GLOBAL_REG(EAR, (b))
+#define GET_ESR(b)		 GET_GLOBAL_REG(ESR, (b))
+#define GET_ESRRESTORE(b)	 GET_GLOBAL_REG(ESRRESTORE, (b))
+#define GET_ESYNR0(b)		 GET_GLOBAL_REG(ESYNR0, (b))
+#define GET_ESYNR1(b)		 GET_GLOBAL_REG(ESYNR1, (b))
+#define GET_REV(b)		 GET_GLOBAL_REG(REV, (b))
+#define GET_IDR(b)		 GET_GLOBAL_REG(IDR, (b))
+#define GET_RPU_ACR(b)		 GET_GLOBAL_REG(RPU_ACR, (b))
+
+
+/* Context register setters/getters */
+#define SET_SCTLR(b, c, v)	 SET_CTX_REG(SCTLR, (b), (c), (v))
+#define SET_ACTLR(b, c, v)	 SET_CTX_REG(ACTLR, (b), (c), (v))
+#define SET_CONTEXTIDR(b, c, v)	 SET_CTX_REG(CONTEXTIDR, (b), (c), (v))
+#define SET_TTBR0(b, c, v)	 SET_CTX_REG(TTBR0, (b), (c), (v))
+#define SET_TTBR1(b, c, v)	 SET_CTX_REG(TTBR1, (b), (c), (v))
+#define SET_TTBCR(b, c, v)	 SET_CTX_REG(TTBCR, (b), (c), (v))
+#define SET_PAR(b, c, v)	 SET_CTX_REG(PAR, (b), (c), (v))
+#define SET_FSR(b, c, v)	 SET_CTX_REG(FSR, (b), (c), (v))
+#define SET_FSRRESTORE(b, c, v)	 SET_CTX_REG(FSRRESTORE, (b), (c), (v))
+#define SET_FAR(b, c, v)	 SET_CTX_REG(FAR, (b), (c), (v))
+#define SET_FSYNR0(b, c, v)	 SET_CTX_REG(FSYNR0, (b), (c), (v))
+#define SET_FSYNR1(b, c, v)	 SET_CTX_REG(FSYNR1, (b), (c), (v))
+#define SET_PRRR(b, c, v)	 SET_CTX_REG(PRRR, (b), (c), (v))
+#define SET_NMRR(b, c, v)	 SET_CTX_REG(NMRR, (b), (c), (v))
+#define SET_TLBLKCR(b, c, v)	 SET_CTX_REG(TLBLCKR, (b), (c), (v))
+#define SET_V2PSR(b, c, v)	 SET_CTX_REG(V2PSR, (b), (c), (v))
+#define SET_TLBFLPTER(b, c, v)	 SET_CTX_REG(TLBFLPTER, (b), (c), (v))
+#define SET_TLBSLPTER(b, c, v)	 SET_CTX_REG(TLBSLPTER, (b), (c), (v))
+#define SET_BFBCR(b, c, v)	 SET_CTX_REG(BFBCR, (b), (c), (v))
+#define SET_CTX_TLBIALL(b, c, v) SET_CTX_REG(CTX_TLBIALL, (b), (c), (v))
+#define SET_TLBIASID(b, c, v)	 SET_CTX_REG(TLBIASID, (b), (c), (v))
+#define SET_TLBIVA(b, c, v)	 SET_CTX_REG(TLBIVA, (b), (c), (v))
+#define SET_TLBIVAA(b, c, v)	 SET_CTX_REG(TLBIVAA, (b), (c), (v))
+#define SET_V2PPR(b, c, v)	 SET_CTX_REG(V2PPR, (b), (c), (v))
+#define SET_V2PPW(b, c, v)	 SET_CTX_REG(V2PPW, (b), (c), (v))
+#define SET_V2PUR(b, c, v)	 SET_CTX_REG(V2PUR, (b), (c), (v))
+#define SET_V2PUW(b, c, v)	 SET_CTX_REG(V2PUW, (b), (c), (v))
+#define SET_RESUME(b, c, v)	 SET_CTX_REG(RESUME, (b), (c), (v))
+
+#define GET_SCTLR(b, c)		 GET_CTX_REG(SCTLR, (b), (c))
+#define GET_ACTLR(b, c)		 GET_CTX_REG(ACTLR, (b), (c))
+#define GET_CONTEXTIDR(b, c)	 GET_CTX_REG(CONTEXTIDR, (b), (c))
+#define GET_TTBR0(b, c)		 GET_CTX_REG(TTBR0, (b), (c))
+#define GET_TTBR1(b, c)		 GET_CTX_REG(TTBR1, (b), (c))
+#define GET_TTBCR(b, c)		 GET_CTX_REG(TTBCR, (b), (c))
+#define GET_PAR(b, c)		 GET_CTX_REG(PAR, (b), (c))
+#define GET_FSR(b, c)		 GET_CTX_REG(FSR, (b), (c))
+#define GET_FSRRESTORE(b, c)	 GET_CTX_REG(FSRRESTORE, (b), (c))
+#define GET_FAR(b, c)		 GET_CTX_REG(FAR, (b), (c))
+#define GET_FSYNR0(b, c)	 GET_CTX_REG(FSYNR0, (b), (c))
+#define GET_FSYNR1(b, c)	 GET_CTX_REG(FSYNR1, (b), (c))
+#define GET_PRRR(b, c)		 GET_CTX_REG(PRRR, (b), (c))
+#define GET_NMRR(b, c)		 GET_CTX_REG(NMRR, (b), (c))
+#define GET_TLBLCKR(b, c)	 GET_CTX_REG(TLBLCKR, (b), (c))
+#define GET_V2PSR(b, c)		 GET_CTX_REG(V2PSR, (b), (c))
+#define GET_TLBFLPTER(b, c)	 GET_CTX_REG(TLBFLPTER, (b), (c))
+#define GET_TLBSLPTER(b, c)	 GET_CTX_REG(TLBSLPTER, (b), (c))
+#define GET_BFBCR(b, c)		 GET_CTX_REG(BFBCR, (b), (c))
+#define GET_CTX_TLBIALL(b, c)	 GET_CTX_REG(CTX_TLBIALL, (b), (c))
+#define GET_TLBIASID(b, c)	 GET_CTX_REG(TLBIASID, (b), (c))
+#define GET_TLBIVA(b, c)	 GET_CTX_REG(TLBIVA, (b), (c))
+#define GET_TLBIVAA(b, c)	 GET_CTX_REG(TLBIVAA, (b), (c))
+#define GET_V2PPR(b, c)		 GET_CTX_REG(V2PPR, (b), (c))
+#define GET_V2PPW(b, c)		 GET_CTX_REG(V2PPW, (b), (c))
+#define GET_V2PUR(b, c)		 GET_CTX_REG(V2PUR, (b), (c))
+#define GET_V2PUW(b, c)		 GET_CTX_REG(V2PUW, (b), (c))
+#define GET_RESUME(b, c)	 GET_CTX_REG(RESUME, (b), (c))
+
+
+/* Global field setters / getters */
+/* Global Field Setters: */
+/* CBACR_N */
+#define SET_RWVMID(b, n, v)   SET_GLOBAL_FIELD(b, (n<<2)|(CBACR_N), RWVMID, v)
+#define SET_RWE(b, n, v)      SET_GLOBAL_FIELD(b, (n<<2)|(CBACR_N), RWE, v)
+#define SET_RWGE(b, n, v)     SET_GLOBAL_FIELD(b, (n<<2)|(CBACR_N), RWGE, v)
+#define SET_CBVMID(b, n, v)   SET_GLOBAL_FIELD(b, (n<<2)|(CBACR_N), CBVMID, v)
+#define SET_IRPTNDX(b, n, v)  SET_GLOBAL_FIELD(b, (n<<2)|(CBACR_N), IRPTNDX, v)
+
+
+/* M2VCBR_N */
+#define SET_VMID(b, n, v)     SET_GLOBAL_FIELD(b, (n<<2)|(M2VCBR_N), VMID, v)
+#define SET_CBNDX(b, n, v)    SET_GLOBAL_FIELD(b, (n<<2)|(M2VCBR_N), CBNDX, v)
+#define SET_BYPASSD(b, n, v)  SET_GLOBAL_FIELD(b, (n<<2)|(M2VCBR_N), BYPASSD, v)
+#define SET_BPRCOSH(b, n, v)  SET_GLOBAL_FIELD(b, (n<<2)|(M2VCBR_N), BPRCOSH, v)
+#define SET_BPRCISH(b, n, v)  SET_GLOBAL_FIELD(b, (n<<2)|(M2VCBR_N), BPRCISH, v)
+#define SET_BPRCNSH(b, n, v)  SET_GLOBAL_FIELD(b, (n<<2)|(M2VCBR_N), BPRCNSH, v)
+#define SET_BPSHCFG(b, n, v)  SET_GLOBAL_FIELD(b, (n<<2)|(M2VCBR_N), BPSHCFG, v)
+#define SET_NSCFG(b, n, v)    SET_GLOBAL_FIELD(b, (n<<2)|(M2VCBR_N), NSCFG, v)
+#define SET_BPMTCFG(b, n, v)  SET_GLOBAL_FIELD(b, (n<<2)|(M2VCBR_N), BPMTCFG, v)
+#define SET_BPMEMTYPE(b, n, v) \
+	SET_GLOBAL_FIELD(b, (n<<2)|(M2VCBR_N), BPMEMTYPE, v)
+
+
+/* CR */
+#define SET_RPUE(b, v)		 SET_GLOBAL_FIELD(b, CR, RPUE, v)
+#define SET_RPUERE(b, v)	 SET_GLOBAL_FIELD(b, CR, RPUERE, v)
+#define SET_RPUEIE(b, v)	 SET_GLOBAL_FIELD(b, CR, RPUEIE, v)
+#define SET_DCDEE(b, v)		 SET_GLOBAL_FIELD(b, CR, DCDEE, v)
+#define SET_CLIENTPD(b, v)       SET_GLOBAL_FIELD(b, CR, CLIENTPD, v)
+#define SET_STALLD(b, v)	 SET_GLOBAL_FIELD(b, CR, STALLD, v)
+#define SET_TLBLKCRWE(b, v)      SET_GLOBAL_FIELD(b, CR, TLBLKCRWE, v)
+#define SET_CR_TLBIALLCFG(b, v)  SET_GLOBAL_FIELD(b, CR, CR_TLBIALLCFG, v)
+#define SET_TLBIVMIDCFG(b, v)    SET_GLOBAL_FIELD(b, CR, TLBIVMIDCFG, v)
+#define SET_CR_HUME(b, v)        SET_GLOBAL_FIELD(b, CR, CR_HUME, v)
+
+
+/* ESR */
+#define SET_CFG(b, v)		 SET_GLOBAL_FIELD(b, ESR, CFG, v)
+#define SET_BYPASS(b, v)	 SET_GLOBAL_FIELD(b, ESR, BYPASS, v)
+#define SET_ESR_MULTI(b, v)      SET_GLOBAL_FIELD(b, ESR, ESR_MULTI, v)
+
+
+/* ESYNR0 */
+#define SET_ESYNR0_AMID(b, v)    SET_GLOBAL_FIELD(b, ESYNR0, ESYNR0_AMID, v)
+#define SET_ESYNR0_APID(b, v)    SET_GLOBAL_FIELD(b, ESYNR0, ESYNR0_APID, v)
+#define SET_ESYNR0_ABID(b, v)    SET_GLOBAL_FIELD(b, ESYNR0, ESYNR0_ABID, v)
+#define SET_ESYNR0_AVMID(b, v)   SET_GLOBAL_FIELD(b, ESYNR0, ESYNR0_AVMID, v)
+#define SET_ESYNR0_ATID(b, v)    SET_GLOBAL_FIELD(b, ESYNR0, ESYNR0_ATID, v)
+
+
+/* ESYNR1 */
+#define SET_ESYNR1_AMEMTYPE(b, v) \
+			SET_GLOBAL_FIELD(b, ESYNR1, ESYNR1_AMEMTYPE, v)
+#define SET_ESYNR1_ASHARED(b, v)  SET_GLOBAL_FIELD(b, ESYNR1, ESYNR1_ASHARED, v)
+#define SET_ESYNR1_AINNERSHARED(b, v) \
+			SET_GLOBAL_FIELD(b, ESYNR1, ESYNR1_AINNERSHARED, v)
+#define SET_ESYNR1_APRIV(b, v)   SET_GLOBAL_FIELD(b, ESYNR1, ESYNR1_APRIV, v)
+#define SET_ESYNR1_APROTNS(b, v) SET_GLOBAL_FIELD(b, ESYNR1, ESYNR1_APROTNS, v)
+#define SET_ESYNR1_AINST(b, v)   SET_GLOBAL_FIELD(b, ESYNR1, ESYNR1_AINST, v)
+#define SET_ESYNR1_AWRITE(b, v)  SET_GLOBAL_FIELD(b, ESYNR1, ESYNR1_AWRITE, v)
+#define SET_ESYNR1_ABURST(b, v)  SET_GLOBAL_FIELD(b, ESYNR1, ESYNR1_ABURST, v)
+#define SET_ESYNR1_ALEN(b, v)    SET_GLOBAL_FIELD(b, ESYNR1, ESYNR1_ALEN, v)
+#define SET_ESYNR1_ASIZE(b, v)   SET_GLOBAL_FIELD(b, ESYNR1, ESYNR1_ASIZE, v)
+#define SET_ESYNR1_ALOCK(b, v)   SET_GLOBAL_FIELD(b, ESYNR1, ESYNR1_ALOCK, v)
+#define SET_ESYNR1_AOOO(b, v)    SET_GLOBAL_FIELD(b, ESYNR1, ESYNR1_AOOO, v)
+#define SET_ESYNR1_AFULL(b, v)   SET_GLOBAL_FIELD(b, ESYNR1, ESYNR1_AFULL, v)
+#define SET_ESYNR1_AC(b, v)      SET_GLOBAL_FIELD(b, ESYNR1, ESYNR1_AC, v)
+#define SET_ESYNR1_DCD(b, v)     SET_GLOBAL_FIELD(b, ESYNR1, ESYNR1_DCD, v)
+
+
+/* TESTBUSCR */
+#define SET_TBE(b, v)		 SET_GLOBAL_FIELD(b, TESTBUSCR, TBE, v)
+#define SET_SPDMBE(b, v)	 SET_GLOBAL_FIELD(b, TESTBUSCR, SPDMBE, v)
+#define SET_WGSEL(b, v)		 SET_GLOBAL_FIELD(b, TESTBUSCR, WGSEL, v)
+#define SET_TBLSEL(b, v)	 SET_GLOBAL_FIELD(b, TESTBUSCR, TBLSEL, v)
+#define SET_TBHSEL(b, v)	 SET_GLOBAL_FIELD(b, TESTBUSCR, TBHSEL, v)
+#define SET_SPDM0SEL(b, v)       SET_GLOBAL_FIELD(b, TESTBUSCR, SPDM0SEL, v)
+#define SET_SPDM1SEL(b, v)       SET_GLOBAL_FIELD(b, TESTBUSCR, SPDM1SEL, v)
+#define SET_SPDM2SEL(b, v)       SET_GLOBAL_FIELD(b, TESTBUSCR, SPDM2SEL, v)
+#define SET_SPDM3SEL(b, v)       SET_GLOBAL_FIELD(b, TESTBUSCR, SPDM3SEL, v)
+
+
+/* TLBIVMID */
+#define SET_TLBIVMID_VMID(b, v)  SET_GLOBAL_FIELD(b, TLBIVMID, TLBIVMID_VMID, v)
+
+
+/* TLBRSW */
+#define SET_TLBRSW_INDEX(b, v)   SET_GLOBAL_FIELD(b, TLBRSW, TLBRSW_INDEX, v)
+#define SET_TLBBFBS(b, v)	 SET_GLOBAL_FIELD(b, TLBRSW, TLBBFBS, v)
+
+
+/* TLBTR0 */
+#define SET_PR(b, v)		 SET_GLOBAL_FIELD(b, TLBTR0, PR, v)
+#define SET_PW(b, v)		 SET_GLOBAL_FIELD(b, TLBTR0, PW, v)
+#define SET_UR(b, v)		 SET_GLOBAL_FIELD(b, TLBTR0, UR, v)
+#define SET_UW(b, v)		 SET_GLOBAL_FIELD(b, TLBTR0, UW, v)
+#define SET_XN(b, v)		 SET_GLOBAL_FIELD(b, TLBTR0, XN, v)
+#define SET_NSDESC(b, v)	 SET_GLOBAL_FIELD(b, TLBTR0, NSDESC, v)
+#define SET_ISH(b, v)		 SET_GLOBAL_FIELD(b, TLBTR0, ISH, v)
+#define SET_SH(b, v)		 SET_GLOBAL_FIELD(b, TLBTR0, SH, v)
+#define SET_MT(b, v)		 SET_GLOBAL_FIELD(b, TLBTR0, MT, v)
+#define SET_DPSIZR(b, v)	 SET_GLOBAL_FIELD(b, TLBTR0, DPSIZR, v)
+#define SET_DPSIZC(b, v)	 SET_GLOBAL_FIELD(b, TLBTR0, DPSIZC, v)
+
+
+/* TLBTR1 */
+#define SET_TLBTR1_VMID(b, v)    SET_GLOBAL_FIELD(b, TLBTR1, TLBTR1_VMID, v)
+#define SET_TLBTR1_PA(b, v)      SET_GLOBAL_FIELD(b, TLBTR1, TLBTR1_PA, v)
+
+
+/* TLBTR2 */
+#define SET_TLBTR2_ASID(b, v)    SET_GLOBAL_FIELD(b, TLBTR2, TLBTR2_ASID, v)
+#define SET_TLBTR2_V(b, v)       SET_GLOBAL_FIELD(b, TLBTR2, TLBTR2_V, v)
+#define SET_TLBTR2_NSTID(b, v)   SET_GLOBAL_FIELD(b, TLBTR2, TLBTR2_NSTID, v)
+#define SET_TLBTR2_NV(b, v)      SET_GLOBAL_FIELD(b, TLBTR2, TLBTR2_NV, v)
+#define SET_TLBTR2_VA(b, v)      SET_GLOBAL_FIELD(b, TLBTR2, TLBTR2_VA, v)
+
+
+/* Global Field Getters */
+/* CBACR_N */
+#define GET_RWVMID(b, n)	 GET_GLOBAL_FIELD(b, (n<<2)|(CBACR_N), RWVMID)
+#define GET_RWE(b, n)		 GET_GLOBAL_FIELD(b, (n<<2)|(CBACR_N), RWE)
+#define GET_RWGE(b, n)		 GET_GLOBAL_FIELD(b, (n<<2)|(CBACR_N), RWGE)
+#define GET_CBVMID(b, n)	 GET_GLOBAL_FIELD(b, (n<<2)|(CBACR_N), CBVMID)
+#define GET_IRPTNDX(b, n)	 GET_GLOBAL_FIELD(b, (n<<2)|(CBACR_N), IRPTNDX)
+
+
+/* M2VCBR_N */
+#define GET_VMID(b, n)       GET_GLOBAL_FIELD(b, (n<<2)|(M2VCBR_N), VMID)
+#define GET_CBNDX(b, n)      GET_GLOBAL_FIELD(b, (n<<2)|(M2VCBR_N), CBNDX)
+#define GET_BYPASSD(b, n)    GET_GLOBAL_FIELD(b, (n<<2)|(M2VCBR_N), BYPASSD)
+#define GET_BPRCOSH(b, n)    GET_GLOBAL_FIELD(b, (n<<2)|(M2VCBR_N), BPRCOSH)
+#define GET_BPRCISH(b, n)    GET_GLOBAL_FIELD(b, (n<<2)|(M2VCBR_N), BPRCISH)
+#define GET_BPRCNSH(b, n)    GET_GLOBAL_FIELD(b, (n<<2)|(M2VCBR_N), BPRCNSH)
+#define GET_BPSHCFG(b, n)    GET_GLOBAL_FIELD(b, (n<<2)|(M2VCBR_N), BPSHCFG)
+#define GET_NSCFG(b, n)      GET_GLOBAL_FIELD(b, (n<<2)|(M2VCBR_N), NSCFG)
+#define GET_BPMTCFG(b, n)    GET_GLOBAL_FIELD(b, (n<<2)|(M2VCBR_N), BPMTCFG)
+#define GET_BPMEMTYPE(b, n)  GET_GLOBAL_FIELD(b, (n<<2)|(M2VCBR_N), BPMEMTYPE)
+
+
+/* CR */
+#define GET_RPUE(b)		 GET_GLOBAL_FIELD(b, CR, RPUE)
+#define GET_RPUERE(b)		 GET_GLOBAL_FIELD(b, CR, RPUERE)
+#define GET_RPUEIE(b)		 GET_GLOBAL_FIELD(b, CR, RPUEIE)
+#define GET_DCDEE(b)		 GET_GLOBAL_FIELD(b, CR, DCDEE)
+#define GET_CLIENTPD(b)		 GET_GLOBAL_FIELD(b, CR, CLIENTPD)
+#define GET_STALLD(b)		 GET_GLOBAL_FIELD(b, CR, STALLD)
+#define GET_TLBLKCRWE(b)	 GET_GLOBAL_FIELD(b, CR, TLBLKCRWE)
+#define GET_CR_TLBIALLCFG(b)	 GET_GLOBAL_FIELD(b, CR, CR_TLBIALLCFG)
+#define GET_TLBIVMIDCFG(b)	 GET_GLOBAL_FIELD(b, CR, TLBIVMIDCFG)
+#define GET_CR_HUME(b)		 GET_GLOBAL_FIELD(b, CR, CR_HUME)
+
+
+/* ESR */
+#define GET_CFG(b)		 GET_GLOBAL_FIELD(b, ESR, CFG)
+#define GET_BYPASS(b)		 GET_GLOBAL_FIELD(b, ESR, BYPASS)
+#define GET_ESR_MULTI(b)	 GET_GLOBAL_FIELD(b, ESR, ESR_MULTI)
+
+
+/* ESYNR0 */
+#define GET_ESYNR0_AMID(b)	 GET_GLOBAL_FIELD(b, ESYNR0, ESYNR0_AMID)
+#define GET_ESYNR0_APID(b)	 GET_GLOBAL_FIELD(b, ESYNR0, ESYNR0_APID)
+#define GET_ESYNR0_ABID(b)	 GET_GLOBAL_FIELD(b, ESYNR0, ESYNR0_ABID)
+#define GET_ESYNR0_AVMID(b)	 GET_GLOBAL_FIELD(b, ESYNR0, ESYNR0_AVMID)
+#define GET_ESYNR0_ATID(b)	 GET_GLOBAL_FIELD(b, ESYNR0, ESYNR0_ATID)
+
+
+/* ESYNR1 */
+#define GET_ESYNR1_AMEMTYPE(b)   GET_GLOBAL_FIELD(b, ESYNR1, ESYNR1_AMEMTYPE)
+#define GET_ESYNR1_ASHARED(b)    GET_GLOBAL_FIELD(b, ESYNR1, ESYNR1_ASHARED)
+#define GET_ESYNR1_AINNERSHARED(b) \
+			GET_GLOBAL_FIELD(b, ESYNR1, ESYNR1_AINNERSHARED)
+#define GET_ESYNR1_APRIV(b)      GET_GLOBAL_FIELD(b, ESYNR1, ESYNR1_APRIV)
+#define GET_ESYNR1_APROTNS(b)	 GET_GLOBAL_FIELD(b, ESYNR1, ESYNR1_APROTNS)
+#define GET_ESYNR1_AINST(b)	 GET_GLOBAL_FIELD(b, ESYNR1, ESYNR1_AINST)
+#define GET_ESYNR1_AWRITE(b)	 GET_GLOBAL_FIELD(b, ESYNR1, ESYNR1_AWRITE)
+#define GET_ESYNR1_ABURST(b)	 GET_GLOBAL_FIELD(b, ESYNR1, ESYNR1_ABURST)
+#define GET_ESYNR1_ALEN(b)	 GET_GLOBAL_FIELD(b, ESYNR1, ESYNR1_ALEN)
+#define GET_ESYNR1_ASIZE(b)	 GET_GLOBAL_FIELD(b, ESYNR1, ESYNR1_ASIZE)
+#define GET_ESYNR1_ALOCK(b)	 GET_GLOBAL_FIELD(b, ESYNR1, ESYNR1_ALOCK)
+#define GET_ESYNR1_AOOO(b)	 GET_GLOBAL_FIELD(b, ESYNR1, ESYNR1_AOOO)
+#define GET_ESYNR1_AFULL(b)	 GET_GLOBAL_FIELD(b, ESYNR1, ESYNR1_AFULL)
+#define GET_ESYNR1_AC(b)	 GET_GLOBAL_FIELD(b, ESYNR1, ESYNR1_AC)
+#define GET_ESYNR1_DCD(b)	 GET_GLOBAL_FIELD(b, ESYNR1, ESYNR1_DCD)
+
+
+/* IDR */
+#define GET_NM2VCBMT(b)		 GET_GLOBAL_FIELD(b, IDR, NM2VCBMT)
+#define GET_HTW(b)		 GET_GLOBAL_FIELD(b, IDR, HTW)
+#define GET_HUM(b)		 GET_GLOBAL_FIELD(b, IDR, HUM)
+#define GET_TLBSIZE(b)		 GET_GLOBAL_FIELD(b, IDR, TLBSIZE)
+#define GET_NCB(b)		 GET_GLOBAL_FIELD(b, IDR, NCB)
+#define GET_NIRPT(b)		 GET_GLOBAL_FIELD(b, IDR, NIRPT)
+
+
+/* REV */
+#define GET_MAJOR(b)		 GET_GLOBAL_FIELD(b, REV, MAJOR)
+#define GET_MINOR(b)		 GET_GLOBAL_FIELD(b, REV, MINOR)
+
+
+/* TESTBUSCR */
+#define GET_TBE(b)		 GET_GLOBAL_FIELD(b, TESTBUSCR, TBE)
+#define GET_SPDMBE(b)		 GET_GLOBAL_FIELD(b, TESTBUSCR, SPDMBE)
+#define GET_WGSEL(b)		 GET_GLOBAL_FIELD(b, TESTBUSCR, WGSEL)
+#define GET_TBLSEL(b)		 GET_GLOBAL_FIELD(b, TESTBUSCR, TBLSEL)
+#define GET_TBHSEL(b)		 GET_GLOBAL_FIELD(b, TESTBUSCR, TBHSEL)
+#define GET_SPDM0SEL(b)		 GET_GLOBAL_FIELD(b, TESTBUSCR, SPDM0SEL)
+#define GET_SPDM1SEL(b)		 GET_GLOBAL_FIELD(b, TESTBUSCR, SPDM1SEL)
+#define GET_SPDM2SEL(b)		 GET_GLOBAL_FIELD(b, TESTBUSCR, SPDM2SEL)
+#define GET_SPDM3SEL(b)		 GET_GLOBAL_FIELD(b, TESTBUSCR, SPDM3SEL)
+
+
+/* TLBIVMID */
+#define GET_TLBIVMID_VMID(b)	 GET_GLOBAL_FIELD(b, TLBIVMID, TLBIVMID_VMID)
+
+
+/* TLBTR0 */
+#define GET_PR(b)		 GET_GLOBAL_FIELD(b, TLBTR0, PR)
+#define GET_PW(b)		 GET_GLOBAL_FIELD(b, TLBTR0, PW)
+#define GET_UR(b)		 GET_GLOBAL_FIELD(b, TLBTR0, UR)
+#define GET_UW(b)		 GET_GLOBAL_FIELD(b, TLBTR0, UW)
+#define GET_XN(b)		 GET_GLOBAL_FIELD(b, TLBTR0, XN)
+#define GET_NSDESC(b)		 GET_GLOBAL_FIELD(b, TLBTR0, NSDESC)
+#define GET_ISH(b)		 GET_GLOBAL_FIELD(b, TLBTR0, ISH)
+#define GET_SH(b)		 GET_GLOBAL_FIELD(b, TLBTR0, SH)
+#define GET_MT(b)		 GET_GLOBAL_FIELD(b, TLBTR0, MT)
+#define GET_DPSIZR(b)		 GET_GLOBAL_FIELD(b, TLBTR0, DPSIZR)
+#define GET_DPSIZC(b)		 GET_GLOBAL_FIELD(b, TLBTR0, DPSIZC)
+
+
+/* TLBTR1 */
+#define GET_TLBTR1_VMID(b)	 GET_GLOBAL_FIELD(b, TLBTR1, TLBTR1_VMID)
+#define GET_TLBTR1_PA(b)	 GET_GLOBAL_FIELD(b, TLBTR1, TLBTR1_PA)
+
+
+/* TLBTR2 */
+#define GET_TLBTR2_ASID(b)	 GET_GLOBAL_FIELD(b, TLBTR2, TLBTR2_ASID)
+#define GET_TLBTR2_V(b)		 GET_GLOBAL_FIELD(b, TLBTR2, TLBTR2_V)
+#define GET_TLBTR2_NSTID(b)	 GET_GLOBAL_FIELD(b, TLBTR2, TLBTR2_NSTID)
+#define GET_TLBTR2_NV(b)	 GET_GLOBAL_FIELD(b, TLBTR2, TLBTR2_NV)
+#define GET_TLBTR2_VA(b)	 GET_GLOBAL_FIELD(b, TLBTR2, TLBTR2_VA)
+
+
+/* Context Register setters / getters */
+/* Context Register setters */
+/* ACTLR */
+#define SET_CFERE(b, c, v)	 SET_CONTEXT_FIELD(b, c, ACTLR, CFERE, v)
+#define SET_CFEIE(b, c, v)	 SET_CONTEXT_FIELD(b, c, ACTLR, CFEIE, v)
+#define SET_PTSHCFG(b, c, v)	 SET_CONTEXT_FIELD(b, c, ACTLR, PTSHCFG, v)
+#define SET_RCOSH(b, c, v)	 SET_CONTEXT_FIELD(b, c, ACTLR, RCOSH, v)
+#define SET_RCISH(b, c, v)	 SET_CONTEXT_FIELD(b, c, ACTLR, RCISH, v)
+#define SET_RCNSH(b, c, v)	 SET_CONTEXT_FIELD(b, c, ACTLR, RCNSH, v)
+#define SET_PRIVCFG(b, c, v)	 SET_CONTEXT_FIELD(b, c, ACTLR, PRIVCFG, v)
+#define SET_DNA(b, c, v)	 SET_CONTEXT_FIELD(b, c, ACTLR, DNA, v)
+#define SET_DNLV2PA(b, c, v)	 SET_CONTEXT_FIELD(b, c, ACTLR, DNLV2PA, v)
+#define SET_TLBMCFG(b, c, v)	 SET_CONTEXT_FIELD(b, c, ACTLR, TLBMCFG, v)
+#define SET_CFCFG(b, c, v)	 SET_CONTEXT_FIELD(b, c, ACTLR, CFCFG, v)
+#define SET_TIPCF(b, c, v)	 SET_CONTEXT_FIELD(b, c, ACTLR, TIPCF, v)
+#define SET_V2PCFG(b, c, v)	 SET_CONTEXT_FIELD(b, c, ACTLR, V2PCFG, v)
+#define SET_HUME(b, c, v)	 SET_CONTEXT_FIELD(b, c, ACTLR, HUME, v)
+#define SET_PTMTCFG(b, c, v)	 SET_CONTEXT_FIELD(b, c, ACTLR, PTMTCFG, v)
+#define SET_PTMEMTYPE(b, c, v)	 SET_CONTEXT_FIELD(b, c, ACTLR, PTMEMTYPE, v)
+
+
+/* BFBCR */
+#define SET_BFBDFE(b, c, v)	 SET_CONTEXT_FIELD(b, c, BFBCR, BFBDFE, v)
+#define SET_BFBSFE(b, c, v)	 SET_CONTEXT_FIELD(b, c, BFBCR, BFBSFE, v)
+#define SET_SFVS(b, c, v)	 SET_CONTEXT_FIELD(b, c, BFBCR, SFVS, v)
+#define SET_FLVIC(b, c, v)	 SET_CONTEXT_FIELD(b, c, BFBCR, FLVIC, v)
+#define SET_SLVIC(b, c, v)	 SET_CONTEXT_FIELD(b, c, BFBCR, SLVIC, v)
+
+
+/* CONTEXTIDR */
+#define SET_CONTEXTIDR_ASID(b, c, v)   \
+		SET_CONTEXT_FIELD(b, c, CONTEXTIDR, CONTEXTIDR_ASID, v)
+#define SET_CONTEXTIDR_PROCID(b, c, v) \
+		SET_CONTEXT_FIELD(b, c, CONTEXTIDR, PROCID, v)
+
+
+/* FSR */
+#define SET_TF(b, c, v)		 SET_CONTEXT_FIELD(b, c, FSR, TF, v)
+#define SET_AFF(b, c, v)	 SET_CONTEXT_FIELD(b, c, FSR, AFF, v)
+#define SET_APF(b, c, v)	 SET_CONTEXT_FIELD(b, c, FSR, APF, v)
+#define SET_TLBMF(b, c, v)	 SET_CONTEXT_FIELD(b, c, FSR, TLBMF, v)
+#define SET_HTWDEEF(b, c, v)	 SET_CONTEXT_FIELD(b, c, FSR, HTWDEEF, v)
+#define SET_HTWSEEF(b, c, v)	 SET_CONTEXT_FIELD(b, c, FSR, HTWSEEF, v)
+#define SET_MHF(b, c, v)	 SET_CONTEXT_FIELD(b, c, FSR, MHF, v)
+#define SET_SL(b, c, v)		 SET_CONTEXT_FIELD(b, c, FSR, SL, v)
+#define SET_SS(b, c, v)		 SET_CONTEXT_FIELD(b, c, FSR, SS, v)
+#define SET_MULTI(b, c, v)	 SET_CONTEXT_FIELD(b, c, FSR, MULTI, v)
+
+
+/* FSYNR0 */
+#define SET_AMID(b, c, v)	 SET_CONTEXT_FIELD(b, c, FSYNR0, AMID, v)
+#define SET_APID(b, c, v)	 SET_CONTEXT_FIELD(b, c, FSYNR0, APID, v)
+#define SET_ABID(b, c, v)	 SET_CONTEXT_FIELD(b, c, FSYNR0, ABID, v)
+#define SET_ATID(b, c, v)	 SET_CONTEXT_FIELD(b, c, FSYNR0, ATID, v)
+
+
+/* FSYNR1 */
+#define SET_AMEMTYPE(b, c, v)	 SET_CONTEXT_FIELD(b, c, FSYNR1, AMEMTYPE, v)
+#define SET_ASHARED(b, c, v)	 SET_CONTEXT_FIELD(b, c, FSYNR1, ASHARED, v)
+#define SET_AINNERSHARED(b, c, v)  \
+				SET_CONTEXT_FIELD(b, c, FSYNR1, AINNERSHARED, v)
+#define SET_APRIV(b, c, v)	 SET_CONTEXT_FIELD(b, c, FSYNR1, APRIV, v)
+#define SET_APROTNS(b, c, v)	 SET_CONTEXT_FIELD(b, c, FSYNR1, APROTNS, v)
+#define SET_AINST(b, c, v)	 SET_CONTEXT_FIELD(b, c, FSYNR1, AINST, v)
+#define SET_AWRITE(b, c, v)	 SET_CONTEXT_FIELD(b, c, FSYNR1, AWRITE, v)
+#define SET_ABURST(b, c, v)	 SET_CONTEXT_FIELD(b, c, FSYNR1, ABURST, v)
+#define SET_ALEN(b, c, v)	 SET_CONTEXT_FIELD(b, c, FSYNR1, ALEN, v)
+#define SET_FSYNR1_ASIZE(b, c, v) \
+				SET_CONTEXT_FIELD(b, c, FSYNR1, FSYNR1_ASIZE, v)
+#define SET_ALOCK(b, c, v)	 SET_CONTEXT_FIELD(b, c, FSYNR1, ALOCK, v)
+#define SET_AFULL(b, c, v)	 SET_CONTEXT_FIELD(b, c, FSYNR1, AFULL, v)
+
+
+/* NMRR */
+#define SET_ICPC0(b, c, v)	 SET_CONTEXT_FIELD(b, c, NMRR, ICPC0, v)
+#define SET_ICPC1(b, c, v)	 SET_CONTEXT_FIELD(b, c, NMRR, ICPC1, v)
+#define SET_ICPC2(b, c, v)	 SET_CONTEXT_FIELD(b, c, NMRR, ICPC2, v)
+#define SET_ICPC3(b, c, v)	 SET_CONTEXT_FIELD(b, c, NMRR, ICPC3, v)
+#define SET_ICPC4(b, c, v)	 SET_CONTEXT_FIELD(b, c, NMRR, ICPC4, v)
+#define SET_ICPC5(b, c, v)	 SET_CONTEXT_FIELD(b, c, NMRR, ICPC5, v)
+#define SET_ICPC6(b, c, v)	 SET_CONTEXT_FIELD(b, c, NMRR, ICPC6, v)
+#define SET_ICPC7(b, c, v)	 SET_CONTEXT_FIELD(b, c, NMRR, ICPC7, v)
+#define SET_OCPC0(b, c, v)	 SET_CONTEXT_FIELD(b, c, NMRR, OCPC0, v)
+#define SET_OCPC1(b, c, v)	 SET_CONTEXT_FIELD(b, c, NMRR, OCPC1, v)
+#define SET_OCPC2(b, c, v)	 SET_CONTEXT_FIELD(b, c, NMRR, OCPC2, v)
+#define SET_OCPC3(b, c, v)	 SET_CONTEXT_FIELD(b, c, NMRR, OCPC3, v)
+#define SET_OCPC4(b, c, v)	 SET_CONTEXT_FIELD(b, c, NMRR, OCPC4, v)
+#define SET_OCPC5(b, c, v)	 SET_CONTEXT_FIELD(b, c, NMRR, OCPC5, v)
+#define SET_OCPC6(b, c, v)	 SET_CONTEXT_FIELD(b, c, NMRR, OCPC6, v)
+#define SET_OCPC7(b, c, v)	 SET_CONTEXT_FIELD(b, c, NMRR, OCPC7, v)
+
+
+/* PAR */
+#define SET_FAULT(b, c, v)	 SET_CONTEXT_FIELD(b, c, PAR, FAULT, v)
+
+#define SET_FAULT_TF(b, c, v)	 SET_CONTEXT_FIELD(b, c, PAR, FAULT_TF, v)
+#define SET_FAULT_AFF(b, c, v)	 SET_CONTEXT_FIELD(b, c, PAR, FAULT_AFF, v)
+#define SET_FAULT_APF(b, c, v)	 SET_CONTEXT_FIELD(b, c, PAR, FAULT_APF, v)
+#define SET_FAULT_TLBMF(b, c, v) SET_CONTEXT_FIELD(b, c, PAR, FAULT_TLBMF, v)
+#define SET_FAULT_HTWDEEF(b, c, v) \
+				SET_CONTEXT_FIELD(b, c, PAR, FAULT_HTWDEEF, v)
+#define SET_FAULT_HTWSEEF(b, c, v) \
+				SET_CONTEXT_FIELD(b, c, PAR, FAULT_HTWSEEF, v)
+#define SET_FAULT_MHF(b, c, v)	 SET_CONTEXT_FIELD(b, c, PAR, FAULT_MHF, v)
+#define SET_FAULT_SL(b, c, v)	 SET_CONTEXT_FIELD(b, c, PAR, FAULT_SL, v)
+#define SET_FAULT_SS(b, c, v)	 SET_CONTEXT_FIELD(b, c, PAR, FAULT_SS, v)
+
+#define SET_NOFAULT_SS(b, c, v)	 SET_CONTEXT_FIELD(b, c, PAR, NOFAULT_SS, v)
+#define SET_NOFAULT_MT(b, c, v)	 SET_CONTEXT_FIELD(b, c, PAR, NOFAULT_MT, v)
+#define SET_NOFAULT_SH(b, c, v)	 SET_CONTEXT_FIELD(b, c, PAR, NOFAULT_SH, v)
+#define SET_NOFAULT_NS(b, c, v)	 SET_CONTEXT_FIELD(b, c, PAR, NOFAULT_NS, v)
+#define SET_NOFAULT_NOS(b, c, v) SET_CONTEXT_FIELD(b, c, PAR, NOFAULT_NOS, v)
+#define SET_NPFAULT_PA(b, c, v)	 SET_CONTEXT_FIELD(b, c, PAR, NPFAULT_PA, v)
+
+
+/* PRRR */
+#define SET_MTC0(b, c, v)	 SET_CONTEXT_FIELD(b, c, PRRR, MTC0, v)
+#define SET_MTC1(b, c, v)	 SET_CONTEXT_FIELD(b, c, PRRR, MTC1, v)
+#define SET_MTC2(b, c, v)	 SET_CONTEXT_FIELD(b, c, PRRR, MTC2, v)
+#define SET_MTC3(b, c, v)	 SET_CONTEXT_FIELD(b, c, PRRR, MTC3, v)
+#define SET_MTC4(b, c, v)	 SET_CONTEXT_FIELD(b, c, PRRR, MTC4, v)
+#define SET_MTC5(b, c, v)	 SET_CONTEXT_FIELD(b, c, PRRR, MTC5, v)
+#define SET_MTC6(b, c, v)	 SET_CONTEXT_FIELD(b, c, PRRR, MTC6, v)
+#define SET_MTC7(b, c, v)	 SET_CONTEXT_FIELD(b, c, PRRR, MTC7, v)
+#define SET_SHDSH0(b, c, v)	 SET_CONTEXT_FIELD(b, c, PRRR, SHDSH0, v)
+#define SET_SHDSH1(b, c, v)	 SET_CONTEXT_FIELD(b, c, PRRR, SHDSH1, v)
+#define SET_SHNMSH0(b, c, v)	 SET_CONTEXT_FIELD(b, c, PRRR, SHNMSH0, v)
+#define SET_SHNMSH1(b, c, v)     SET_CONTEXT_FIELD(b, c, PRRR, SHNMSH1, v)
+#define SET_NOS0(b, c, v)	 SET_CONTEXT_FIELD(b, c, PRRR, NOS0, v)
+#define SET_NOS1(b, c, v)	 SET_CONTEXT_FIELD(b, c, PRRR, NOS1, v)
+#define SET_NOS2(b, c, v)	 SET_CONTEXT_FIELD(b, c, PRRR, NOS2, v)
+#define SET_NOS3(b, c, v)	 SET_CONTEXT_FIELD(b, c, PRRR, NOS3, v)
+#define SET_NOS4(b, c, v)	 SET_CONTEXT_FIELD(b, c, PRRR, NOS4, v)
+#define SET_NOS5(b, c, v)	 SET_CONTEXT_FIELD(b, c, PRRR, NOS5, v)
+#define SET_NOS6(b, c, v)	 SET_CONTEXT_FIELD(b, c, PRRR, NOS6, v)
+#define SET_NOS7(b, c, v)	 SET_CONTEXT_FIELD(b, c, PRRR, NOS7, v)
+
+
+/* RESUME */
+#define SET_TNR(b, c, v)	 SET_CONTEXT_FIELD(b, c, RESUME, TNR, v)
+
+
+/* SCTLR */
+#define SET_M(b, c, v)		 SET_CONTEXT_FIELD(b, c, SCTLR, M, v)
+#define SET_TRE(b, c, v)	 SET_CONTEXT_FIELD(b, c, SCTLR, TRE, v)
+#define SET_AFE(b, c, v)	 SET_CONTEXT_FIELD(b, c, SCTLR, AFE, v)
+#define SET_HAF(b, c, v)	 SET_CONTEXT_FIELD(b, c, SCTLR, HAF, v)
+#define SET_BE(b, c, v)		 SET_CONTEXT_FIELD(b, c, SCTLR, BE, v)
+#define SET_AFFD(b, c, v)	 SET_CONTEXT_FIELD(b, c, SCTLR, AFFD, v)
+
+
+/* TLBLKCR */
+#define SET_LKE(b, c, v)	   SET_CONTEXT_FIELD(b, c, TLBLKCR, LKE, v)
+#define SET_TLBLKCR_TLBIALLCFG(b, c, v) \
+			SET_CONTEXT_FIELD(b, c, TLBLKCR, TLBLCKR_TLBIALLCFG, v)
+#define SET_TLBIASIDCFG(b, c, v) \
+			SET_CONTEXT_FIELD(b, c, TLBLKCR, TLBIASIDCFG, v)
+#define SET_TLBIVAACFG(b, c, v)	SET_CONTEXT_FIELD(b, c, TLBLKCR, TLBIVAACFG, v)
+#define SET_FLOOR(b, c, v)	SET_CONTEXT_FIELD(b, c, TLBLKCR, FLOOR, v)
+#define SET_VICTIM(b, c, v)	SET_CONTEXT_FIELD(b, c, TLBLKCR, VICTIM, v)
+
+
+/* TTBCR */
+#define SET_N(b, c, v)	         SET_CONTEXT_FIELD(b, c, TTBCR, N, v)
+#define SET_PD0(b, c, v)	 SET_CONTEXT_FIELD(b, c, TTBCR, PD0, v)
+#define SET_PD1(b, c, v)	 SET_CONTEXT_FIELD(b, c, TTBCR, PD1, v)
+
+
+/* TTBR0 */
+#define SET_TTBR0_IRGNH(b, c, v) SET_CONTEXT_FIELD(b, c, TTBR0, TTBR0_IRGNH, v)
+#define SET_TTBR0_SH(b, c, v)	 SET_CONTEXT_FIELD(b, c, TTBR0, TTBR0_SH, v)
+#define SET_TTBR0_ORGN(b, c, v)	 SET_CONTEXT_FIELD(b, c, TTBR0, TTBR0_ORGN, v)
+#define SET_TTBR0_NOS(b, c, v)	 SET_CONTEXT_FIELD(b, c, TTBR0, TTBR0_NOS, v)
+#define SET_TTBR0_IRGNL(b, c, v) SET_CONTEXT_FIELD(b, c, TTBR0, TTBR0_IRGNL, v)
+#define SET_TTBR0_PA(b, c, v)	 SET_CONTEXT_FIELD(b, c, TTBR0, TTBR0_PA, v)
+
+
+/* TTBR1 */
+#define SET_TTBR1_IRGNH(b, c, v) SET_CONTEXT_FIELD(b, c, TTBR1, TTBR1_IRGNH, v)
+#define SET_TTBR1_SH(b, c, v)	 SET_CONTEXT_FIELD(b, c, TTBR1, TTBR1_SH, v)
+#define SET_TTBR1_ORGN(b, c, v)	 SET_CONTEXT_FIELD(b, c, TTBR1, TTBR1_ORGN, v)
+#define SET_TTBR1_NOS(b, c, v)	 SET_CONTEXT_FIELD(b, c, TTBR1, TTBR1_NOS, v)
+#define SET_TTBR1_IRGNL(b, c, v) SET_CONTEXT_FIELD(b, c, TTBR1, TTBR1_IRGNL, v)
+#define SET_TTBR1_PA(b, c, v)	 SET_CONTEXT_FIELD(b, c, TTBR1, TTBR1_PA, v)
+
+
+/* V2PSR */
+#define SET_HIT(b, c, v)	 SET_CONTEXT_FIELD(b, c, V2PSR, HIT, v)
+#define SET_INDEX(b, c, v)	 SET_CONTEXT_FIELD(b, c, V2PSR, INDEX, v)
+
+
+/* Context Register getters */
+/* ACTLR */
+#define GET_CFERE(b, c)	        GET_CONTEXT_FIELD(b, c, ACTLR, CFERE)
+#define GET_CFEIE(b, c)	        GET_CONTEXT_FIELD(b, c, ACTLR, CFEIE)
+#define GET_PTSHCFG(b, c)       GET_CONTEXT_FIELD(b, c, ACTLR, PTSHCFG)
+#define GET_RCOSH(b, c)	        GET_CONTEXT_FIELD(b, c, ACTLR, RCOSH)
+#define GET_RCISH(b, c)	        GET_CONTEXT_FIELD(b, c, ACTLR, RCISH)
+#define GET_RCNSH(b, c)	        GET_CONTEXT_FIELD(b, c, ACTLR, RCNSH)
+#define GET_PRIVCFG(b, c)       GET_CONTEXT_FIELD(b, c, ACTLR, PRIVCFG)
+#define GET_DNA(b, c)	        GET_CONTEXT_FIELD(b, c, ACTLR, DNA)
+#define GET_DNLV2PA(b, c)       GET_CONTEXT_FIELD(b, c, ACTLR, DNLV2PA)
+#define GET_TLBMCFG(b, c)       GET_CONTEXT_FIELD(b, c, ACTLR, TLBMCFG)
+#define GET_CFCFG(b, c)	        GET_CONTEXT_FIELD(b, c, ACTLR, CFCFG)
+#define GET_TIPCF(b, c)	        GET_CONTEXT_FIELD(b, c, ACTLR, TIPCF)
+#define GET_V2PCFG(b, c)        GET_CONTEXT_FIELD(b, c, ACTLR, V2PCFG)
+#define GET_HUME(b, c)	        GET_CONTEXT_FIELD(b, c, ACTLR, HUME)
+#define GET_PTMTCFG(b, c)       GET_CONTEXT_FIELD(b, c, ACTLR, PTMTCFG)
+#define GET_PTMEMTYPE(b, c)     GET_CONTEXT_FIELD(b, c, ACTLR, PTMEMTYPE)
+
+/* BFBCR */
+#define GET_BFBDFE(b, c)	GET_CONTEXT_FIELD(b, c, BFBCR, BFBDFE)
+#define GET_BFBSFE(b, c)	GET_CONTEXT_FIELD(b, c, BFBCR, BFBSFE)
+#define GET_SFVS(b, c)		GET_CONTEXT_FIELD(b, c, BFBCR, SFVS)
+#define GET_FLVIC(b, c)		GET_CONTEXT_FIELD(b, c, BFBCR, FLVIC)
+#define GET_SLVIC(b, c)		GET_CONTEXT_FIELD(b, c, BFBCR, SLVIC)
+
+
+/* CONTEXTIDR */
+#define GET_CONTEXTIDR_ASID(b, c) \
+			GET_CONTEXT_FIELD(b, c, CONTEXTIDR, CONTEXTIDR_ASID)
+#define GET_CONTEXTIDR_PROCID(b, c) GET_CONTEXT_FIELD(b, c, CONTEXTIDR, PROCID)
+
+
+/* FSR */
+#define GET_TF(b, c)		GET_CONTEXT_FIELD(b, c, FSR, TF)
+#define GET_AFF(b, c)		GET_CONTEXT_FIELD(b, c, FSR, AFF)
+#define GET_APF(b, c)		GET_CONTEXT_FIELD(b, c, FSR, APF)
+#define GET_TLBMF(b, c)		GET_CONTEXT_FIELD(b, c, FSR, TLBMF)
+#define GET_HTWDEEF(b, c)	GET_CONTEXT_FIELD(b, c, FSR, HTWDEEF)
+#define GET_HTWSEEF(b, c)	GET_CONTEXT_FIELD(b, c, FSR, HTWSEEF)
+#define GET_MHF(b, c)		GET_CONTEXT_FIELD(b, c, FSR, MHF)
+#define GET_SL(b, c)		GET_CONTEXT_FIELD(b, c, FSR, SL)
+#define GET_SS(b, c)		GET_CONTEXT_FIELD(b, c, FSR, SS)
+#define GET_MULTI(b, c)		GET_CONTEXT_FIELD(b, c, FSR, MULTI)
+
+
+/* FSYNR0 */
+#define GET_AMID(b, c)		GET_CONTEXT_FIELD(b, c, FSYNR0, AMID)
+#define GET_APID(b, c)		GET_CONTEXT_FIELD(b, c, FSYNR0, APID)
+#define GET_ABID(b, c)		GET_CONTEXT_FIELD(b, c, FSYNR0, ABID)
+#define GET_ATID(b, c)		GET_CONTEXT_FIELD(b, c, FSYNR0, ATID)
+
+
+/* FSYNR1 */
+#define GET_AMEMTYPE(b, c)	GET_CONTEXT_FIELD(b, c, FSYNR1, AMEMTYPE)
+#define GET_ASHARED(b, c)	GET_CONTEXT_FIELD(b, c, FSYNR1, ASHARED)
+#define GET_AINNERSHARED(b, c)  GET_CONTEXT_FIELD(b, c, FSYNR1, AINNERSHARED)
+#define GET_APRIV(b, c)		GET_CONTEXT_FIELD(b, c, FSYNR1, APRIV)
+#define GET_APROTNS(b, c)	GET_CONTEXT_FIELD(b, c, FSYNR1, APROTNS)
+#define GET_AINST(b, c)		GET_CONTEXT_FIELD(b, c, FSYNR1, AINST)
+#define GET_AWRITE(b, c)	GET_CONTEXT_FIELD(b, c, FSYNR1, AWRITE)
+#define GET_ABURST(b, c)	GET_CONTEXT_FIELD(b, c, FSYNR1, ABURST)
+#define GET_ALEN(b, c)		GET_CONTEXT_FIELD(b, c, FSYNR1, ALEN)
+#define GET_FSYNR1_ASIZE(b, c)	GET_CONTEXT_FIELD(b, c, FSYNR1, FSYNR1_ASIZE)
+#define GET_ALOCK(b, c)		GET_CONTEXT_FIELD(b, c, FSYNR1, ALOCK)
+#define GET_AFULL(b, c)		GET_CONTEXT_FIELD(b, c, FSYNR1, AFULL)
+
+
+/* NMRR */
+#define GET_ICPC0(b, c)		GET_CONTEXT_FIELD(b, c, NMRR, ICPC0)
+#define GET_ICPC1(b, c)		GET_CONTEXT_FIELD(b, c, NMRR, ICPC1)
+#define GET_ICPC2(b, c)		GET_CONTEXT_FIELD(b, c, NMRR, ICPC2)
+#define GET_ICPC3(b, c)		GET_CONTEXT_FIELD(b, c, NMRR, ICPC3)
+#define GET_ICPC4(b, c)		GET_CONTEXT_FIELD(b, c, NMRR, ICPC4)
+#define GET_ICPC5(b, c)		GET_CONTEXT_FIELD(b, c, NMRR, ICPC5)
+#define GET_ICPC6(b, c)		GET_CONTEXT_FIELD(b, c, NMRR, ICPC6)
+#define GET_ICPC7(b, c)		GET_CONTEXT_FIELD(b, c, NMRR, ICPC7)
+#define GET_OCPC0(b, c)		GET_CONTEXT_FIELD(b, c, NMRR, OCPC0)
+#define GET_OCPC1(b, c)		GET_CONTEXT_FIELD(b, c, NMRR, OCPC1)
+#define GET_OCPC2(b, c)		GET_CONTEXT_FIELD(b, c, NMRR, OCPC2)
+#define GET_OCPC3(b, c)		GET_CONTEXT_FIELD(b, c, NMRR, OCPC3)
+#define GET_OCPC4(b, c)		GET_CONTEXT_FIELD(b, c, NMRR, OCPC4)
+#define GET_OCPC5(b, c)		GET_CONTEXT_FIELD(b, c, NMRR, OCPC5)
+#define GET_OCPC6(b, c)		GET_CONTEXT_FIELD(b, c, NMRR, OCPC6)
+#define GET_OCPC7(b, c)		GET_CONTEXT_FIELD(b, c, NMRR, OCPC7)
+#define NMRR_ICP(nmrr, n)	(((nmrr) & (3 << ((n) * 2))) >> ((n) * 2))
+#define NMRR_OCP(nmrr, n)	(((nmrr) & (3 << ((n) * 2 + 16))) >> \
+								((n) * 2 + 16))
+
+/* PAR */
+#define GET_FAULT(b, c)		GET_CONTEXT_FIELD(b, c, PAR, FAULT)
+
+#define GET_FAULT_TF(b, c)	GET_CONTEXT_FIELD(b, c, PAR, FAULT_TF)
+#define GET_FAULT_AFF(b, c)	GET_CONTEXT_FIELD(b, c, PAR, FAULT_AFF)
+#define GET_FAULT_APF(b, c)	GET_CONTEXT_FIELD(b, c, PAR, FAULT_APF)
+#define GET_FAULT_TLBMF(b, c)   GET_CONTEXT_FIELD(b, c, PAR, FAULT_TLBMF)
+#define GET_FAULT_HTWDEEF(b, c) GET_CONTEXT_FIELD(b, c, PAR, FAULT_HTWDEEF)
+#define GET_FAULT_HTWSEEF(b, c) GET_CONTEXT_FIELD(b, c, PAR, FAULT_HTWSEEF)
+#define GET_FAULT_MHF(b, c)	GET_CONTEXT_FIELD(b, c, PAR, FAULT_MHF)
+#define GET_FAULT_SL(b, c)	GET_CONTEXT_FIELD(b, c, PAR, FAULT_SL)
+#define GET_FAULT_SS(b, c)	GET_CONTEXT_FIELD(b, c, PAR, FAULT_SS)
+
+#define GET_NOFAULT_SS(b, c)	GET_CONTEXT_FIELD(b, c, PAR, PAR_NOFAULT_SS)
+#define GET_NOFAULT_MT(b, c)	GET_CONTEXT_FIELD(b, c, PAR, PAR_NOFAULT_MT)
+#define GET_NOFAULT_SH(b, c)	GET_CONTEXT_FIELD(b, c, PAR, PAR_NOFAULT_SH)
+#define GET_NOFAULT_NS(b, c)	GET_CONTEXT_FIELD(b, c, PAR, PAR_NOFAULT_NS)
+#define GET_NOFAULT_NOS(b, c)   GET_CONTEXT_FIELD(b, c, PAR, PAR_NOFAULT_NOS)
+#define GET_NPFAULT_PA(b, c)	GET_CONTEXT_FIELD(b, c, PAR, PAR_NPFAULT_PA)
+
+
+/* PRRR */
+#define GET_MTC0(b, c)		GET_CONTEXT_FIELD(b, c, PRRR, MTC0)
+#define GET_MTC1(b, c)		GET_CONTEXT_FIELD(b, c, PRRR, MTC1)
+#define GET_MTC2(b, c)		GET_CONTEXT_FIELD(b, c, PRRR, MTC2)
+#define GET_MTC3(b, c)		GET_CONTEXT_FIELD(b, c, PRRR, MTC3)
+#define GET_MTC4(b, c)		GET_CONTEXT_FIELD(b, c, PRRR, MTC4)
+#define GET_MTC5(b, c)		GET_CONTEXT_FIELD(b, c, PRRR, MTC5)
+#define GET_MTC6(b, c)		GET_CONTEXT_FIELD(b, c, PRRR, MTC6)
+#define GET_MTC7(b, c)		GET_CONTEXT_FIELD(b, c, PRRR, MTC7)
+#define GET_SHDSH0(b, c)	GET_CONTEXT_FIELD(b, c, PRRR, SHDSH0)
+#define GET_SHDSH1(b, c)	GET_CONTEXT_FIELD(b, c, PRRR, SHDSH1)
+#define GET_SHNMSH0(b, c)	GET_CONTEXT_FIELD(b, c, PRRR, SHNMSH0)
+#define GET_SHNMSH1(b, c)	GET_CONTEXT_FIELD(b, c, PRRR, SHNMSH1)
+#define GET_NOS0(b, c)		GET_CONTEXT_FIELD(b, c, PRRR, NOS0)
+#define GET_NOS1(b, c)		GET_CONTEXT_FIELD(b, c, PRRR, NOS1)
+#define GET_NOS2(b, c)		GET_CONTEXT_FIELD(b, c, PRRR, NOS2)
+#define GET_NOS3(b, c)		GET_CONTEXT_FIELD(b, c, PRRR, NOS3)
+#define GET_NOS4(b, c)		GET_CONTEXT_FIELD(b, c, PRRR, NOS4)
+#define GET_NOS5(b, c)		GET_CONTEXT_FIELD(b, c, PRRR, NOS5)
+#define GET_NOS6(b, c)		GET_CONTEXT_FIELD(b, c, PRRR, NOS6)
+#define GET_NOS7(b, c)		GET_CONTEXT_FIELD(b, c, PRRR, NOS7)
+#define PRRR_NOS(prrr, n)	 ((prrr) & (1 << ((n) + 24)) ? 1 : 0)
+#define PRRR_MT(prrr, n)	 ((((prrr) & (3 << ((n) * 2))) >> ((n) * 2)))
+
+
+/* RESUME */
+#define GET_TNR(b, c)		GET_CONTEXT_FIELD(b, c, RESUME, TNR)
+
+
+/* SCTLR */
+#define GET_M(b, c)		GET_CONTEXT_FIELD(b, c, SCTLR, M)
+#define GET_TRE(b, c)		GET_CONTEXT_FIELD(b, c, SCTLR, TRE)
+#define GET_AFE(b, c)		GET_CONTEXT_FIELD(b, c, SCTLR, AFE)
+#define GET_HAF(b, c)		GET_CONTEXT_FIELD(b, c, SCTLR, HAF)
+#define GET_BE(b, c)		GET_CONTEXT_FIELD(b, c, SCTLR, BE)
+#define GET_AFFD(b, c)		GET_CONTEXT_FIELD(b, c, SCTLR, AFFD)
+
+
+/* TLBLKCR */
+#define GET_LKE(b, c)		GET_CONTEXT_FIELD(b, c, TLBLKCR, LKE)
+#define GET_TLBLCKR_TLBIALLCFG(b, c) \
+			GET_CONTEXT_FIELD(b, c, TLBLKCR, TLBLCKR_TLBIALLCFG)
+#define GET_TLBIASIDCFG(b, c)   GET_CONTEXT_FIELD(b, c, TLBLKCR, TLBIASIDCFG)
+#define GET_TLBIVAACFG(b, c)	GET_CONTEXT_FIELD(b, c, TLBLKCR, TLBIVAACFG)
+#define GET_FLOOR(b, c)		GET_CONTEXT_FIELD(b, c, TLBLKCR, FLOOR)
+#define GET_VICTIM(b, c)	GET_CONTEXT_FIELD(b, c, TLBLKCR, VICTIM)
+
+
+/* TTBCR */
+#define GET_N(b, c)		GET_CONTEXT_FIELD(b, c, TTBCR, N)
+#define GET_PD0(b, c)		GET_CONTEXT_FIELD(b, c, TTBCR, PD0)
+#define GET_PD1(b, c)		GET_CONTEXT_FIELD(b, c, TTBCR, PD1)
+
+
+/* TTBR0 */
+#define GET_TTBR0_IRGNH(b, c)	GET_CONTEXT_FIELD(b, c, TTBR0, TTBR0_IRGNH)
+#define GET_TTBR0_SH(b, c)	GET_CONTEXT_FIELD(b, c, TTBR0, TTBR0_SH)
+#define GET_TTBR0_ORGN(b, c)	GET_CONTEXT_FIELD(b, c, TTBR0, TTBR0_ORGN)
+#define GET_TTBR0_NOS(b, c)	GET_CONTEXT_FIELD(b, c, TTBR0, TTBR0_NOS)
+#define GET_TTBR0_IRGNL(b, c)	GET_CONTEXT_FIELD(b, c, TTBR0, TTBR0_IRGNL)
+#define GET_TTBR0_PA(b, c)	GET_CONTEXT_FIELD(b, c, TTBR0, TTBR0_PA)
+
+
+/* TTBR1 */
+#define GET_TTBR1_IRGNH(b, c)	GET_CONTEXT_FIELD(b, c, TTBR1, TTBR1_IRGNH)
+#define GET_TTBR1_SH(b, c)	GET_CONTEXT_FIELD(b, c, TTBR1, TTBR1_SH)
+#define GET_TTBR1_ORGN(b, c)	GET_CONTEXT_FIELD(b, c, TTBR1, TTBR1_ORGN)
+#define GET_TTBR1_NOS(b, c)	GET_CONTEXT_FIELD(b, c, TTBR1, TTBR1_NOS)
+#define GET_TTBR1_IRGNL(b, c)	GET_CONTEXT_FIELD(b, c, TTBR1, TTBR1_IRGNL)
+#define GET_TTBR1_PA(b, c)	GET_CONTEXT_FIELD(b, c, TTBR1, TTBR1_PA)
+
+
+/* V2PSR */
+#define GET_HIT(b, c)		GET_CONTEXT_FIELD(b, c, V2PSR, HIT)
+#define GET_INDEX(b, c)		GET_CONTEXT_FIELD(b, c, V2PSR, INDEX)
+
+
+/* Global Registers */
+#define M2VCBR_N	(0xFF000)
+#define CBACR_N		(0xFF800)
+#define TLBRSW		(0xFFE00)
+#define TLBTR0		(0xFFE80)
+#define TLBTR1		(0xFFE84)
+#define TLBTR2		(0xFFE88)
+#define TESTBUSCR	(0xFFE8C)
+#define GLOBAL_TLBIALL	(0xFFF00)
+#define TLBIVMID	(0xFFF04)
+#define CR		(0xFFF80)
+#define EAR		(0xFFF84)
+#define ESR		(0xFFF88)
+#define ESRRESTORE	(0xFFF8C)
+#define ESYNR0		(0xFFF90)
+#define ESYNR1		(0xFFF94)
+#define REV		(0xFFFF4)
+#define IDR		(0xFFFF8)
+#define RPU_ACR		(0xFFFFC)
+
+
+/* Context Bank Registers */
+#define SCTLR		(0x000)
+#define ACTLR		(0x004)
+#define CONTEXTIDR	(0x008)
+#define TTBR0		(0x010)
+#define TTBR1		(0x014)
+#define TTBCR		(0x018)
+#define PAR		(0x01C)
+#define FSR		(0x020)
+#define FSRRESTORE	(0x024)
+#define FAR		(0x028)
+#define FSYNR0		(0x02C)
+#define FSYNR1		(0x030)
+#define PRRR		(0x034)
+#define NMRR		(0x038)
+#define TLBLCKR		(0x03C)
+#define V2PSR		(0x040)
+#define TLBFLPTER	(0x044)
+#define TLBSLPTER	(0x048)
+#define BFBCR		(0x04C)
+#define CTX_TLBIALL	(0x800)
+#define TLBIASID	(0x804)
+#define TLBIVA		(0x808)
+#define TLBIVAA		(0x80C)
+#define V2PPR		(0x810)
+#define V2PPW		(0x814)
+#define V2PUR		(0x818)
+#define V2PUW		(0x81C)
+#define RESUME		(0x820)
+
+
+/* Global Register Fields */
+/* CBACRn */
+#define RWVMID        (RWVMID_MASK       << RWVMID_SHIFT)
+#define RWE           (RWE_MASK          << RWE_SHIFT)
+#define RWGE          (RWGE_MASK         << RWGE_SHIFT)
+#define CBVMID        (CBVMID_MASK       << CBVMID_SHIFT)
+#define IRPTNDX       (IRPTNDX_MASK      << IRPTNDX_SHIFT)
+
+
+/* CR */
+#define RPUE          (RPUE_MASK          << RPUE_SHIFT)
+#define RPUERE        (RPUERE_MASK        << RPUERE_SHIFT)
+#define RPUEIE        (RPUEIE_MASK        << RPUEIE_SHIFT)
+#define DCDEE         (DCDEE_MASK         << DCDEE_SHIFT)
+#define CLIENTPD      (CLIENTPD_MASK      << CLIENTPD_SHIFT)
+#define STALLD        (STALLD_MASK        << STALLD_SHIFT)
+#define TLBLKCRWE     (TLBLKCRWE_MASK     << TLBLKCRWE_SHIFT)
+#define CR_TLBIALLCFG (CR_TLBIALLCFG_MASK << CR_TLBIALLCFG_SHIFT)
+#define TLBIVMIDCFG   (TLBIVMIDCFG_MASK   << TLBIVMIDCFG_SHIFT)
+#define CR_HUME       (CR_HUME_MASK       << CR_HUME_SHIFT)
+
+
+/* ESR */
+#define CFG           (CFG_MASK          << CFG_SHIFT)
+#define BYPASS        (BYPASS_MASK       << BYPASS_SHIFT)
+#define ESR_MULTI     (ESR_MULTI_MASK    << ESR_MULTI_SHIFT)
+
+
+/* ESYNR0 */
+#define ESYNR0_AMID   (ESYNR0_AMID_MASK  << ESYNR0_AMID_SHIFT)
+#define ESYNR0_APID   (ESYNR0_APID_MASK  << ESYNR0_APID_SHIFT)
+#define ESYNR0_ABID   (ESYNR0_ABID_MASK  << ESYNR0_ABID_SHIFT)
+#define ESYNR0_AVMID  (ESYNR0_AVMID_MASK << ESYNR0_AVMID_SHIFT)
+#define ESYNR0_ATID   (ESYNR0_ATID_MASK  << ESYNR0_ATID_SHIFT)
+
+
+/* ESYNR1 */
+#define ESYNR1_AMEMTYPE      (ESYNR1_AMEMTYPE_MASK    << ESYNR1_AMEMTYPE_SHIFT)
+#define ESYNR1_ASHARED       (ESYNR1_ASHARED_MASK     << ESYNR1_ASHARED_SHIFT)
+#define ESYNR1_AINNERSHARED  (ESYNR1_AINNERSHARED_MASK<< \
+						ESYNR1_AINNERSHARED_SHIFT)
+#define ESYNR1_APRIV         (ESYNR1_APRIV_MASK       << ESYNR1_APRIV_SHIFT)
+#define ESYNR1_APROTNS       (ESYNR1_APROTNS_MASK     << ESYNR1_APROTNS_SHIFT)
+#define ESYNR1_AINST         (ESYNR1_AINST_MASK       << ESYNR1_AINST_SHIFT)
+#define ESYNR1_AWRITE        (ESYNR1_AWRITE_MASK      << ESYNR1_AWRITE_SHIFT)
+#define ESYNR1_ABURST        (ESYNR1_ABURST_MASK      << ESYNR1_ABURST_SHIFT)
+#define ESYNR1_ALEN          (ESYNR1_ALEN_MASK        << ESYNR1_ALEN_SHIFT)
+#define ESYNR1_ASIZE         (ESYNR1_ASIZE_MASK       << ESYNR1_ASIZE_SHIFT)
+#define ESYNR1_ALOCK         (ESYNR1_ALOCK_MASK       << ESYNR1_ALOCK_SHIFT)
+#define ESYNR1_AOOO          (ESYNR1_AOOO_MASK        << ESYNR1_AOOO_SHIFT)
+#define ESYNR1_AFULL         (ESYNR1_AFULL_MASK       << ESYNR1_AFULL_SHIFT)
+#define ESYNR1_AC            (ESYNR1_AC_MASK          << ESYNR1_AC_SHIFT)
+#define ESYNR1_DCD           (ESYNR1_DCD_MASK         << ESYNR1_DCD_SHIFT)
+
+
+/* IDR */
+#define NM2VCBMT      (NM2VCBMT_MASK     << NM2VCBMT_SHIFT)
+#define HTW           (HTW_MASK          << HTW_SHIFT)
+#define HUM           (HUM_MASK          << HUM_SHIFT)
+#define TLBSIZE       (TLBSIZE_MASK      << TLBSIZE_SHIFT)
+#define NCB           (NCB_MASK          << NCB_SHIFT)
+#define NIRPT         (NIRPT_MASK        << NIRPT_SHIFT)
+
+
+/* M2VCBRn */
+#define VMID          (VMID_MASK         << VMID_SHIFT)
+#define CBNDX         (CBNDX_MASK        << CBNDX_SHIFT)
+#define BYPASSD       (BYPASSD_MASK      << BYPASSD_SHIFT)
+#define BPRCOSH       (BPRCOSH_MASK      << BPRCOSH_SHIFT)
+#define BPRCISH       (BPRCISH_MASK      << BPRCISH_SHIFT)
+#define BPRCNSH       (BPRCNSH_MASK      << BPRCNSH_SHIFT)
+#define BPSHCFG       (BPSHCFG_MASK      << BPSHCFG_SHIFT)
+#define NSCFG         (NSCFG_MASK        << NSCFG_SHIFT)
+#define BPMTCFG       (BPMTCFG_MASK      << BPMTCFG_SHIFT)
+#define BPMEMTYPE     (BPMEMTYPE_MASK    << BPMEMTYPE_SHIFT)
+
+
+/* REV */
+#define IDR_MINOR     (MINOR_MASK        << MINOR_SHIFT)
+#define IDR_MAJOR     (MAJOR_MASK        << MAJOR_SHIFT)
+
+
+/* TESTBUSCR */
+#define TBE           (TBE_MASK          << TBE_SHIFT)
+#define SPDMBE        (SPDMBE_MASK       << SPDMBE_SHIFT)
+#define WGSEL         (WGSEL_MASK        << WGSEL_SHIFT)
+#define TBLSEL        (TBLSEL_MASK       << TBLSEL_SHIFT)
+#define TBHSEL        (TBHSEL_MASK       << TBHSEL_SHIFT)
+#define SPDM0SEL      (SPDM0SEL_MASK     << SPDM0SEL_SHIFT)
+#define SPDM1SEL      (SPDM1SEL_MASK     << SPDM1SEL_SHIFT)
+#define SPDM2SEL      (SPDM2SEL_MASK     << SPDM2SEL_SHIFT)
+#define SPDM3SEL      (SPDM3SEL_MASK     << SPDM3SEL_SHIFT)
+
+
+/* TLBIVMID */
+#define TLBIVMID_VMID (TLBIVMID_VMID_MASK << TLBIVMID_VMID_SHIFT)
+
+
+/* TLBRSW */
+#define TLBRSW_INDEX  (TLBRSW_INDEX_MASK << TLBRSW_INDEX_SHIFT)
+#define TLBBFBS       (TLBBFBS_MASK      << TLBBFBS_SHIFT)
+
+
+/* TLBTR0 */
+#define PR            (PR_MASK           << PR_SHIFT)
+#define PW            (PW_MASK           << PW_SHIFT)
+#define UR            (UR_MASK           << UR_SHIFT)
+#define UW            (UW_MASK           << UW_SHIFT)
+#define XN            (XN_MASK           << XN_SHIFT)
+#define NSDESC        (NSDESC_MASK       << NSDESC_SHIFT)
+#define ISH           (ISH_MASK          << ISH_SHIFT)
+#define SH            (SH_MASK           << SH_SHIFT)
+#define MT            (MT_MASK           << MT_SHIFT)
+#define DPSIZR        (DPSIZR_MASK       << DPSIZR_SHIFT)
+#define DPSIZC        (DPSIZC_MASK       << DPSIZC_SHIFT)
+
+
+/* TLBTR1 */
+#define TLBTR1_VMID   (TLBTR1_VMID_MASK  << TLBTR1_VMID_SHIFT)
+#define TLBTR1_PA     (TLBTR1_PA_MASK    << TLBTR1_PA_SHIFT)
+
+
+/* TLBTR2 */
+#define TLBTR2_ASID   (TLBTR2_ASID_MASK  << TLBTR2_ASID_SHIFT)
+#define TLBTR2_V      (TLBTR2_V_MASK     << TLBTR2_V_SHIFT)
+#define TLBTR2_NSTID  (TLBTR2_NSTID_MASK << TLBTR2_NSTID_SHIFT)
+#define TLBTR2_NV     (TLBTR2_NV_MASK    << TLBTR2_NV_SHIFT)
+#define TLBTR2_VA     (TLBTR2_VA_MASK    << TLBTR2_VA_SHIFT)
+
+
+/* Context Register Fields */
+/* ACTLR */
+#define CFERE              (CFERE_MASK              << CFERE_SHIFT)
+#define CFEIE              (CFEIE_MASK              << CFEIE_SHIFT)
+#define PTSHCFG            (PTSHCFG_MASK            << PTSHCFG_SHIFT)
+#define RCOSH              (RCOSH_MASK              << RCOSH_SHIFT)
+#define RCISH              (RCISH_MASK              << RCISH_SHIFT)
+#define RCNSH              (RCNSH_MASK              << RCNSH_SHIFT)
+#define PRIVCFG            (PRIVCFG_MASK            << PRIVCFG_SHIFT)
+#define DNA                (DNA_MASK                << DNA_SHIFT)
+#define DNLV2PA            (DNLV2PA_MASK            << DNLV2PA_SHIFT)
+#define TLBMCFG            (TLBMCFG_MASK            << TLBMCFG_SHIFT)
+#define CFCFG              (CFCFG_MASK              << CFCFG_SHIFT)
+#define TIPCF              (TIPCF_MASK              << TIPCF_SHIFT)
+#define V2PCFG             (V2PCFG_MASK             << V2PCFG_SHIFT)
+#define HUME               (HUME_MASK               << HUME_SHIFT)
+#define PTMTCFG            (PTMTCFG_MASK            << PTMTCFG_SHIFT)
+#define PTMEMTYPE          (PTMEMTYPE_MASK          << PTMEMTYPE_SHIFT)
+
+
+/* BFBCR */
+#define BFBDFE             (BFBDFE_MASK             << BFBDFE_SHIFT)
+#define BFBSFE             (BFBSFE_MASK             << BFBSFE_SHIFT)
+#define SFVS               (SFVS_MASK               << SFVS_SHIFT)
+#define FLVIC              (FLVIC_MASK              << FLVIC_SHIFT)
+#define SLVIC              (SLVIC_MASK              << SLVIC_SHIFT)
+
+
+/* CONTEXTIDR */
+#define CONTEXTIDR_ASID    (CONTEXTIDR_ASID_MASK    << CONTEXTIDR_ASID_SHIFT)
+#define PROCID             (PROCID_MASK             << PROCID_SHIFT)
+
+
+/* FSR */
+#define TF                 (TF_MASK                 << TF_SHIFT)
+#define AFF                (AFF_MASK                << AFF_SHIFT)
+#define APF                (APF_MASK                << APF_SHIFT)
+#define TLBMF              (TLBMF_MASK              << TLBMF_SHIFT)
+#define HTWDEEF            (HTWDEEF_MASK            << HTWDEEF_SHIFT)
+#define HTWSEEF            (HTWSEEF_MASK            << HTWSEEF_SHIFT)
+#define MHF                (MHF_MASK                << MHF_SHIFT)
+#define SL                 (SL_MASK                 << SL_SHIFT)
+#define SS                 (SS_MASK                 << SS_SHIFT)
+#define MULTI              (MULTI_MASK              << MULTI_SHIFT)
+
+
+/* FSYNR0 */
+#define AMID               (AMID_MASK               << AMID_SHIFT)
+#define APID               (APID_MASK               << APID_SHIFT)
+#define ABID               (ABID_MASK               << ABID_SHIFT)
+#define ATID               (ATID_MASK               << ATID_SHIFT)
+
+
+/* FSYNR1 */
+#define AMEMTYPE           (AMEMTYPE_MASK           << AMEMTYPE_SHIFT)
+#define ASHARED            (ASHARED_MASK            << ASHARED_SHIFT)
+#define AINNERSHARED       (AINNERSHARED_MASK       << AINNERSHARED_SHIFT)
+#define APRIV              (APRIV_MASK              << APRIV_SHIFT)
+#define APROTNS            (APROTNS_MASK            << APROTNS_SHIFT)
+#define AINST              (AINST_MASK              << AINST_SHIFT)
+#define AWRITE             (AWRITE_MASK             << AWRITE_SHIFT)
+#define ABURST             (ABURST_MASK             << ABURST_SHIFT)
+#define ALEN               (ALEN_MASK               << ALEN_SHIFT)
+#define FSYNR1_ASIZE       (FSYNR1_ASIZE_MASK       << FSYNR1_ASIZE_SHIFT)
+#define ALOCK              (ALOCK_MASK              << ALOCK_SHIFT)
+#define AFULL              (AFULL_MASK              << AFULL_SHIFT)
+
+
+/* NMRR */
+#define ICPC0              (ICPC0_MASK              << ICPC0_SHIFT)
+#define ICPC1              (ICPC1_MASK              << ICPC1_SHIFT)
+#define ICPC2              (ICPC2_MASK              << ICPC2_SHIFT)
+#define ICPC3              (ICPC3_MASK              << ICPC3_SHIFT)
+#define ICPC4              (ICPC4_MASK              << ICPC4_SHIFT)
+#define ICPC5              (ICPC5_MASK              << ICPC5_SHIFT)
+#define ICPC6              (ICPC6_MASK              << ICPC6_SHIFT)
+#define ICPC7              (ICPC7_MASK              << ICPC7_SHIFT)
+#define OCPC0              (OCPC0_MASK              << OCPC0_SHIFT)
+#define OCPC1              (OCPC1_MASK              << OCPC1_SHIFT)
+#define OCPC2              (OCPC2_MASK              << OCPC2_SHIFT)
+#define OCPC3              (OCPC3_MASK              << OCPC3_SHIFT)
+#define OCPC4              (OCPC4_MASK              << OCPC4_SHIFT)
+#define OCPC5              (OCPC5_MASK              << OCPC5_SHIFT)
+#define OCPC6              (OCPC6_MASK              << OCPC6_SHIFT)
+#define OCPC7              (OCPC7_MASK              << OCPC7_SHIFT)
+
+
+/* PAR */
+#define FAULT              (FAULT_MASK              << FAULT_SHIFT)
+/* If a fault is present, these are the
+same as the fault fields in the FAR */
+#define FAULT_TF           (FAULT_TF_MASK           << FAULT_TF_SHIFT)
+#define FAULT_AFF          (FAULT_AFF_MASK          << FAULT_AFF_SHIFT)
+#define FAULT_APF          (FAULT_APF_MASK          << FAULT_APF_SHIFT)
+#define FAULT_TLBMF        (FAULT_TLBMF_MASK        << FAULT_TLBMF_SHIFT)
+#define FAULT_HTWDEEF      (FAULT_HTWDEEF_MASK      << FAULT_HTWDEEF_SHIFT)
+#define FAULT_HTWSEEF      (FAULT_HTWSEEF_MASK      << FAULT_HTWSEEF_SHIFT)
+#define FAULT_MHF          (FAULT_MHF_MASK          << FAULT_MHF_SHIFT)
+#define FAULT_SL           (FAULT_SL_MASK           << FAULT_SL_SHIFT)
+#define FAULT_SS           (FAULT_SS_MASK           << FAULT_SS_SHIFT)
+
+/* If NO fault is present, the following fields are in effect */
+/* (FAULT remains as before) */
+#define PAR_NOFAULT_SS     (PAR_NOFAULT_SS_MASK     << PAR_NOFAULT_SS_SHIFT)
+#define PAR_NOFAULT_MT     (PAR_NOFAULT_MT_MASK     << PAR_NOFAULT_MT_SHIFT)
+#define PAR_NOFAULT_SH     (PAR_NOFAULT_SH_MASK     << PAR_NOFAULT_SH_SHIFT)
+#define PAR_NOFAULT_NS     (PAR_NOFAULT_NS_MASK     << PAR_NOFAULT_NS_SHIFT)
+#define PAR_NOFAULT_NOS    (PAR_NOFAULT_NOS_MASK    << PAR_NOFAULT_NOS_SHIFT)
+#define PAR_NPFAULT_PA     (PAR_NPFAULT_PA_MASK     << PAR_NPFAULT_PA_SHIFT)
+
+
+/* PRRR */
+#define MTC0               (MTC0_MASK               << MTC0_SHIFT)
+#define MTC1               (MTC1_MASK               << MTC1_SHIFT)
+#define MTC2               (MTC2_MASK               << MTC2_SHIFT)
+#define MTC3               (MTC3_MASK               << MTC3_SHIFT)
+#define MTC4               (MTC4_MASK               << MTC4_SHIFT)
+#define MTC5               (MTC5_MASK               << MTC5_SHIFT)
+#define MTC6               (MTC6_MASK               << MTC6_SHIFT)
+#define MTC7               (MTC7_MASK               << MTC7_SHIFT)
+#define SHDSH0             (SHDSH0_MASK             << SHDSH0_SHIFT)
+#define SHDSH1             (SHDSH1_MASK             << SHDSH1_SHIFT)
+#define SHNMSH0            (SHNMSH0_MASK            << SHNMSH0_SHIFT)
+#define SHNMSH1            (SHNMSH1_MASK            << SHNMSH1_SHIFT)
+#define NOS0               (NOS0_MASK               << NOS0_SHIFT)
+#define NOS1               (NOS1_MASK               << NOS1_SHIFT)
+#define NOS2               (NOS2_MASK               << NOS2_SHIFT)
+#define NOS3               (NOS3_MASK               << NOS3_SHIFT)
+#define NOS4               (NOS4_MASK               << NOS4_SHIFT)
+#define NOS5               (NOS5_MASK               << NOS5_SHIFT)
+#define NOS6               (NOS6_MASK               << NOS6_SHIFT)
+#define NOS7               (NOS7_MASK               << NOS7_SHIFT)
+
+
+/* RESUME */
+#define TNR                (TNR_MASK                << TNR_SHIFT)
+
+
+/* SCTLR */
+#define M                  (M_MASK                  << M_SHIFT)
+#define TRE                (TRE_MASK                << TRE_SHIFT)
+#define AFE                (AFE_MASK                << AFE_SHIFT)
+#define HAF                (HAF_MASK                << HAF_SHIFT)
+#define BE                 (BE_MASK                 << BE_SHIFT)
+#define AFFD               (AFFD_MASK               << AFFD_SHIFT)
+
+
+/* TLBIASID */
+#define TLBIASID_ASID      (TLBIASID_ASID_MASK      << TLBIASID_ASID_SHIFT)
+
+
+/* TLBIVA */
+#define TLBIVA_ASID        (TLBIVA_ASID_MASK        << TLBIVA_ASID_SHIFT)
+#define TLBIVA_VA          (TLBIVA_VA_MASK          << TLBIVA_VA_SHIFT)
+
+
+/* TLBIVAA */
+#define TLBIVAA_VA         (TLBIVAA_VA_MASK         << TLBIVAA_VA_SHIFT)
+
+
+/* TLBLCKR */
+#define LKE                (LKE_MASK                << LKE_SHIFT)
+#define TLBLCKR_TLBIALLCFG (TLBLCKR_TLBIALLCFG_MASK<<TLBLCKR_TLBIALLCFG_SHIFT)
+#define TLBIASIDCFG        (TLBIASIDCFG_MASK        << TLBIASIDCFG_SHIFT)
+#define TLBIVAACFG         (TLBIVAACFG_MASK         << TLBIVAACFG_SHIFT)
+#define FLOOR              (FLOOR_MASK              << FLOOR_SHIFT)
+#define VICTIM             (VICTIM_MASK             << VICTIM_SHIFT)
+
+
+/* TTBCR */
+#define N                  (N_MASK                  << N_SHIFT)
+#define PD0                (PD0_MASK                << PD0_SHIFT)
+#define PD1                (PD1_MASK                << PD1_SHIFT)
+
+
+/* TTBR0 */
+#define TTBR0_IRGNH        (TTBR0_IRGNH_MASK        << TTBR0_IRGNH_SHIFT)
+#define TTBR0_SH           (TTBR0_SH_MASK           << TTBR0_SH_SHIFT)
+#define TTBR0_ORGN         (TTBR0_ORGN_MASK         << TTBR0_ORGN_SHIFT)
+#define TTBR0_NOS          (TTBR0_NOS_MASK          << TTBR0_NOS_SHIFT)
+#define TTBR0_IRGNL        (TTBR0_IRGNL_MASK        << TTBR0_IRGNL_SHIFT)
+#define TTBR0_PA           (TTBR0_PA_MASK           << TTBR0_PA_SHIFT)
+
+
+/* TTBR1 */
+#define TTBR1_IRGNH        (TTBR1_IRGNH_MASK        << TTBR1_IRGNH_SHIFT)
+#define TTBR1_SH           (TTBR1_SH_MASK           << TTBR1_SH_SHIFT)
+#define TTBR1_ORGN         (TTBR1_ORGN_MASK         << TTBR1_ORGN_SHIFT)
+#define TTBR1_NOS          (TTBR1_NOS_MASK          << TTBR1_NOS_SHIFT)
+#define TTBR1_IRGNL        (TTBR1_IRGNL_MASK        << TTBR1_IRGNL_SHIFT)
+#define TTBR1_PA           (TTBR1_PA_MASK           << TTBR1_PA_SHIFT)
+
+
+/* V2PSR */
+#define HIT                (HIT_MASK                << HIT_SHIFT)
+#define INDEX              (INDEX_MASK              << INDEX_SHIFT)
+
+
+/* V2Pxx */
+#define V2Pxx_INDEX        (V2Pxx_INDEX_MASK        << V2Pxx_INDEX_SHIFT)
+#define V2Pxx_VA           (V2Pxx_VA_MASK           << V2Pxx_VA_SHIFT)
+
+
+/* Global Register Masks */
+/* CBACRn */
+#define RWVMID_MASK               0x1F
+#define RWE_MASK                  0x01
+#define RWGE_MASK                 0x01
+#define CBVMID_MASK               0x1F
+#define IRPTNDX_MASK              0xFF
+
+
+/* CR */
+#define RPUE_MASK                 0x01
+#define RPUERE_MASK               0x01
+#define RPUEIE_MASK               0x01
+#define DCDEE_MASK                0x01
+#define CLIENTPD_MASK             0x01
+#define STALLD_MASK               0x01
+#define TLBLKCRWE_MASK            0x01
+#define CR_TLBIALLCFG_MASK        0x01
+#define TLBIVMIDCFG_MASK          0x01
+#define CR_HUME_MASK              0x01
+
+
+/* ESR */
+#define CFG_MASK                  0x01
+#define BYPASS_MASK               0x01
+#define ESR_MULTI_MASK            0x01
+
+
+/* ESYNR0 */
+#define ESYNR0_AMID_MASK          0xFF
+#define ESYNR0_APID_MASK          0x1F
+#define ESYNR0_ABID_MASK          0x07
+#define ESYNR0_AVMID_MASK         0x1F
+#define ESYNR0_ATID_MASK          0xFF
+
+
+/* ESYNR1 */
+#define ESYNR1_AMEMTYPE_MASK             0x07
+#define ESYNR1_ASHARED_MASK              0x01
+#define ESYNR1_AINNERSHARED_MASK         0x01
+#define ESYNR1_APRIV_MASK                0x01
+#define ESYNR1_APROTNS_MASK              0x01
+#define ESYNR1_AINST_MASK                0x01
+#define ESYNR1_AWRITE_MASK               0x01
+#define ESYNR1_ABURST_MASK               0x01
+#define ESYNR1_ALEN_MASK                 0x0F
+#define ESYNR1_ASIZE_MASK                0x01
+#define ESYNR1_ALOCK_MASK                0x03
+#define ESYNR1_AOOO_MASK                 0x01
+#define ESYNR1_AFULL_MASK                0x01
+#define ESYNR1_AC_MASK                   0x01
+#define ESYNR1_DCD_MASK                  0x01
+
+
+/* IDR */
+#define NM2VCBMT_MASK             0x1FF
+#define HTW_MASK                  0x01
+#define HUM_MASK                  0x01
+#define TLBSIZE_MASK              0x0F
+#define NCB_MASK                  0xFF
+#define NIRPT_MASK                0xFF
+
+
+/* M2VCBRn */
+#define VMID_MASK                 0x1F
+#define CBNDX_MASK                0xFF
+#define BYPASSD_MASK              0x01
+#define BPRCOSH_MASK              0x01
+#define BPRCISH_MASK              0x01
+#define BPRCNSH_MASK              0x01
+#define BPSHCFG_MASK              0x03
+#define NSCFG_MASK                0x03
+#define BPMTCFG_MASK              0x01
+#define BPMEMTYPE_MASK            0x07
+
+
+/* REV */
+#define MINOR_MASK                0x0F
+#define MAJOR_MASK                0x0F
+
+
+/* TESTBUSCR */
+#define TBE_MASK                  0x01
+#define SPDMBE_MASK               0x01
+#define WGSEL_MASK                0x03
+#define TBLSEL_MASK               0x03
+#define TBHSEL_MASK               0x03
+#define SPDM0SEL_MASK             0x0F
+#define SPDM1SEL_MASK             0x0F
+#define SPDM2SEL_MASK             0x0F
+#define SPDM3SEL_MASK             0x0F
+
+
+/* TLBIMID */
+#define TLBIVMID_VMID_MASK        0x1F
+
+
+/* TLBRSW */
+#define TLBRSW_INDEX_MASK         0xFF
+#define TLBBFBS_MASK              0x03
+
+
+/* TLBTR0 */
+#define PR_MASK                   0x01
+#define PW_MASK                   0x01
+#define UR_MASK                   0x01
+#define UW_MASK                   0x01
+#define XN_MASK                   0x01
+#define NSDESC_MASK               0x01
+#define ISH_MASK                  0x01
+#define SH_MASK                   0x01
+#define MT_MASK                   0x07
+#define DPSIZR_MASK               0x07
+#define DPSIZC_MASK               0x07
+
+
+/* TLBTR1 */
+#define TLBTR1_VMID_MASK          0x1F
+#define TLBTR1_PA_MASK            0x000FFFFF
+
+
+/* TLBTR2 */
+#define TLBTR2_ASID_MASK          0xFF
+#define TLBTR2_V_MASK             0x01
+#define TLBTR2_NSTID_MASK         0x01
+#define TLBTR2_NV_MASK            0x01
+#define TLBTR2_VA_MASK            0x000FFFFF
+
+
+/* Global Register Shifts */
+/* CBACRn */
+#define RWVMID_SHIFT             0
+#define RWE_SHIFT                8
+#define RWGE_SHIFT               9
+#define CBVMID_SHIFT             16
+#define IRPTNDX_SHIFT            24
+
+
+/* CR */
+#define RPUE_SHIFT               0
+#define RPUERE_SHIFT             1
+#define RPUEIE_SHIFT             2
+#define DCDEE_SHIFT              3
+#define CLIENTPD_SHIFT           4
+#define STALLD_SHIFT             5
+#define TLBLKCRWE_SHIFT          6
+#define CR_TLBIALLCFG_SHIFT      7
+#define TLBIVMIDCFG_SHIFT        8
+#define CR_HUME_SHIFT            9
+
+
+/* ESR */
+#define CFG_SHIFT                0
+#define BYPASS_SHIFT             1
+#define ESR_MULTI_SHIFT          31
+
+
+/* ESYNR0 */
+#define ESYNR0_AMID_SHIFT        0
+#define ESYNR0_APID_SHIFT        8
+#define ESYNR0_ABID_SHIFT        13
+#define ESYNR0_AVMID_SHIFT       16
+#define ESYNR0_ATID_SHIFT        24
+
+
+/* ESYNR1 */
+#define ESYNR1_AMEMTYPE_SHIFT           0
+#define ESYNR1_ASHARED_SHIFT            3
+#define ESYNR1_AINNERSHARED_SHIFT       4
+#define ESYNR1_APRIV_SHIFT              5
+#define ESYNR1_APROTNS_SHIFT            6
+#define ESYNR1_AINST_SHIFT              7
+#define ESYNR1_AWRITE_SHIFT             8
+#define ESYNR1_ABURST_SHIFT             10
+#define ESYNR1_ALEN_SHIFT               12
+#define ESYNR1_ASIZE_SHIFT              16
+#define ESYNR1_ALOCK_SHIFT              20
+#define ESYNR1_AOOO_SHIFT               22
+#define ESYNR1_AFULL_SHIFT              24
+#define ESYNR1_AC_SHIFT                 30
+#define ESYNR1_DCD_SHIFT                31
+
+
+/* IDR */
+#define NM2VCBMT_SHIFT           0
+#define HTW_SHIFT                9
+#define HUM_SHIFT                10
+#define TLBSIZE_SHIFT            12
+#define NCB_SHIFT                16
+#define NIRPT_SHIFT              24
+
+
+/* M2VCBRn */
+#define VMID_SHIFT               0
+#define CBNDX_SHIFT              8
+#define BYPASSD_SHIFT            16
+#define BPRCOSH_SHIFT            17
+#define BPRCISH_SHIFT            18
+#define BPRCNSH_SHIFT            19
+#define BPSHCFG_SHIFT            20
+#define NSCFG_SHIFT              22
+#define BPMTCFG_SHIFT            24
+#define BPMEMTYPE_SHIFT          25
+
+
+/* REV */
+#define MINOR_SHIFT              0
+#define MAJOR_SHIFT              4
+
+
+/* TESTBUSCR */
+#define TBE_SHIFT                0
+#define SPDMBE_SHIFT             1
+#define WGSEL_SHIFT              8
+#define TBLSEL_SHIFT             12
+#define TBHSEL_SHIFT             14
+#define SPDM0SEL_SHIFT           16
+#define SPDM1SEL_SHIFT           20
+#define SPDM2SEL_SHIFT           24
+#define SPDM3SEL_SHIFT           28
+
+
+/* TLBIMID */
+#define TLBIVMID_VMID_SHIFT      0
+
+
+/* TLBRSW */
+#define TLBRSW_INDEX_SHIFT       0
+#define TLBBFBS_SHIFT            8
+
+
+/* TLBTR0 */
+#define PR_SHIFT                 0
+#define PW_SHIFT                 1
+#define UR_SHIFT                 2
+#define UW_SHIFT                 3
+#define XN_SHIFT                 4
+#define NSDESC_SHIFT             6
+#define ISH_SHIFT                7
+#define SH_SHIFT                 8
+#define MT_SHIFT                 9
+#define DPSIZR_SHIFT             16
+#define DPSIZC_SHIFT             20
+
+
+/* TLBTR1 */
+#define TLBTR1_VMID_SHIFT        0
+#define TLBTR1_PA_SHIFT          12
+
+
+/* TLBTR2 */
+#define TLBTR2_ASID_SHIFT        0
+#define TLBTR2_V_SHIFT           8
+#define TLBTR2_NSTID_SHIFT       9
+#define TLBTR2_NV_SHIFT          10
+#define TLBTR2_VA_SHIFT          12
+
+
+/* Context Register Masks */
+/* ACTLR */
+#define CFERE_MASK                       0x01
+#define CFEIE_MASK                       0x01
+#define PTSHCFG_MASK                     0x03
+#define RCOSH_MASK                       0x01
+#define RCISH_MASK                       0x01
+#define RCNSH_MASK                       0x01
+#define PRIVCFG_MASK                     0x03
+#define DNA_MASK                         0x01
+#define DNLV2PA_MASK                     0x01
+#define TLBMCFG_MASK                     0x03
+#define CFCFG_MASK                       0x01
+#define TIPCF_MASK                       0x01
+#define V2PCFG_MASK                      0x03
+#define HUME_MASK                        0x01
+#define PTMTCFG_MASK                     0x01
+#define PTMEMTYPE_MASK                   0x07
+
+
+/* BFBCR */
+#define BFBDFE_MASK                      0x01
+#define BFBSFE_MASK                      0x01
+#define SFVS_MASK                        0x01
+#define FLVIC_MASK                       0x0F
+#define SLVIC_MASK                       0x0F
+
+
+/* CONTEXTIDR */
+#define CONTEXTIDR_ASID_MASK             0xFF
+#define PROCID_MASK                      0x00FFFFFF
+
+
+/* FSR */
+#define TF_MASK                          0x01
+#define AFF_MASK                         0x01
+#define APF_MASK                         0x01
+#define TLBMF_MASK                       0x01
+#define HTWDEEF_MASK                     0x01
+#define HTWSEEF_MASK                     0x01
+#define MHF_MASK                         0x01
+#define SL_MASK                          0x01
+#define SS_MASK                          0x01
+#define MULTI_MASK                       0x01
+
+
+/* FSYNR0 */
+#define AMID_MASK                        0xFF
+#define APID_MASK                        0x1F
+#define ABID_MASK                        0x07
+#define ATID_MASK                        0xFF
+
+
+/* FSYNR1 */
+#define AMEMTYPE_MASK                    0x07
+#define ASHARED_MASK                     0x01
+#define AINNERSHARED_MASK                0x01
+#define APRIV_MASK                       0x01
+#define APROTNS_MASK                     0x01
+#define AINST_MASK                       0x01
+#define AWRITE_MASK                      0x01
+#define ABURST_MASK                      0x01
+#define ALEN_MASK                        0x0F
+#define FSYNR1_ASIZE_MASK                0x07
+#define ALOCK_MASK                       0x03
+#define AFULL_MASK                       0x01
+
+
+/* NMRR */
+#define ICPC0_MASK                       0x03
+#define ICPC1_MASK                       0x03
+#define ICPC2_MASK                       0x03
+#define ICPC3_MASK                       0x03
+#define ICPC4_MASK                       0x03
+#define ICPC5_MASK                       0x03
+#define ICPC6_MASK                       0x03
+#define ICPC7_MASK                       0x03
+#define OCPC0_MASK                       0x03
+#define OCPC1_MASK                       0x03
+#define OCPC2_MASK                       0x03
+#define OCPC3_MASK                       0x03
+#define OCPC4_MASK                       0x03
+#define OCPC5_MASK                       0x03
+#define OCPC6_MASK                       0x03
+#define OCPC7_MASK                       0x03
+
+
+/* PAR */
+#define FAULT_MASK                       0x01
+/* If a fault is present, these are the
+same as the fault fields in the FAR */
+#define FAULT_TF_MASK                    0x01
+#define FAULT_AFF_MASK                   0x01
+#define FAULT_APF_MASK                   0x01
+#define FAULT_TLBMF_MASK                 0x01
+#define FAULT_HTWDEEF_MASK               0x01
+#define FAULT_HTWSEEF_MASK               0x01
+#define FAULT_MHF_MASK                   0x01
+#define FAULT_SL_MASK                    0x01
+#define FAULT_SS_MASK                    0x01
+
+/* If NO fault is present, the following
+ * fields are in effect
+ * (FAULT remains as before) */
+#define PAR_NOFAULT_SS_MASK              0x01
+#define PAR_NOFAULT_MT_MASK              0x07
+#define PAR_NOFAULT_SH_MASK              0x01
+#define PAR_NOFAULT_NS_MASK              0x01
+#define PAR_NOFAULT_NOS_MASK             0x01
+#define PAR_NPFAULT_PA_MASK              0x000FFFFF
+
+
+/* PRRR */
+#define MTC0_MASK                        0x03
+#define MTC1_MASK                        0x03
+#define MTC2_MASK                        0x03
+#define MTC3_MASK                        0x03
+#define MTC4_MASK                        0x03
+#define MTC5_MASK                        0x03
+#define MTC6_MASK                        0x03
+#define MTC7_MASK                        0x03
+#define SHDSH0_MASK                      0x01
+#define SHDSH1_MASK                      0x01
+#define SHNMSH0_MASK                     0x01
+#define SHNMSH1_MASK                     0x01
+#define NOS0_MASK                        0x01
+#define NOS1_MASK                        0x01
+#define NOS2_MASK                        0x01
+#define NOS3_MASK                        0x01
+#define NOS4_MASK                        0x01
+#define NOS5_MASK                        0x01
+#define NOS6_MASK                        0x01
+#define NOS7_MASK                        0x01
+
+
+/* RESUME */
+#define TNR_MASK                         0x01
+
+
+/* SCTLR */
+#define M_MASK                           0x01
+#define TRE_MASK                         0x01
+#define AFE_MASK                         0x01
+#define HAF_MASK                         0x01
+#define BE_MASK                          0x01
+#define AFFD_MASK                        0x01
+
+
+/* TLBIASID */
+#define TLBIASID_ASID_MASK               0xFF
+
+
+/* TLBIVA */
+#define TLBIVA_ASID_MASK                 0xFF
+#define TLBIVA_VA_MASK                   0x000FFFFF
+
+
+/* TLBIVAA */
+#define TLBIVAA_VA_MASK                  0x000FFFFF
+
+
+/* TLBLCKR */
+#define LKE_MASK                         0x01
+#define TLBLCKR_TLBIALLCFG_MASK          0x01
+#define TLBIASIDCFG_MASK                 0x01
+#define TLBIVAACFG_MASK                  0x01
+#define FLOOR_MASK                       0xFF
+#define VICTIM_MASK                      0xFF
+
+
+/* TTBCR */
+#define N_MASK                           0x07
+#define PD0_MASK                         0x01
+#define PD1_MASK                         0x01
+
+
+/* TTBR0 */
+#define TTBR0_IRGNH_MASK                 0x01
+#define TTBR0_SH_MASK                    0x01
+#define TTBR0_ORGN_MASK                  0x03
+#define TTBR0_NOS_MASK                   0x01
+#define TTBR0_IRGNL_MASK                 0x01
+#define TTBR0_PA_MASK                    0x0003FFFF
+
+
+/* TTBR1 */
+#define TTBR1_IRGNH_MASK                 0x01
+#define TTBR1_SH_MASK                    0x01
+#define TTBR1_ORGN_MASK                  0x03
+#define TTBR1_NOS_MASK                   0x01
+#define TTBR1_IRGNL_MASK                 0x01
+#define TTBR1_PA_MASK                    0x0003FFFF
+
+
+/* V2PSR */
+#define HIT_MASK                         0x01
+#define INDEX_MASK                       0xFF
+
+
+/* V2Pxx */
+#define V2Pxx_INDEX_MASK                 0xFF
+#define V2Pxx_VA_MASK                    0x000FFFFF
+
+
+/* Context Register Shifts */
+/* ACTLR */
+#define CFERE_SHIFT                    0
+#define CFEIE_SHIFT                    1
+#define PTSHCFG_SHIFT                  2
+#define RCOSH_SHIFT                    4
+#define RCISH_SHIFT                    5
+#define RCNSH_SHIFT                    6
+#define PRIVCFG_SHIFT                  8
+#define DNA_SHIFT                      10
+#define DNLV2PA_SHIFT                  11
+#define TLBMCFG_SHIFT                  12
+#define CFCFG_SHIFT                    14
+#define TIPCF_SHIFT                    15
+#define V2PCFG_SHIFT                   16
+#define HUME_SHIFT                     18
+#define PTMTCFG_SHIFT                  20
+#define PTMEMTYPE_SHIFT                21
+
+
+/* BFBCR */
+#define BFBDFE_SHIFT                   0
+#define BFBSFE_SHIFT                   1
+#define SFVS_SHIFT                     2
+#define FLVIC_SHIFT                    4
+#define SLVIC_SHIFT                    8
+
+
+/* CONTEXTIDR */
+#define CONTEXTIDR_ASID_SHIFT          0
+#define PROCID_SHIFT                   8
+
+
+/* FSR */
+#define TF_SHIFT                       1
+#define AFF_SHIFT                      2
+#define APF_SHIFT                      3
+#define TLBMF_SHIFT                    4
+#define HTWDEEF_SHIFT                  5
+#define HTWSEEF_SHIFT                  6
+#define MHF_SHIFT                      7
+#define SL_SHIFT                       16
+#define SS_SHIFT                       30
+#define MULTI_SHIFT                    31
+
+
+/* FSYNR0 */
+#define AMID_SHIFT                     0
+#define APID_SHIFT                     8
+#define ABID_SHIFT                     13
+#define ATID_SHIFT                     24
+
+
+/* FSYNR1 */
+#define AMEMTYPE_SHIFT                 0
+#define ASHARED_SHIFT                  3
+#define AINNERSHARED_SHIFT             4
+#define APRIV_SHIFT                    5
+#define APROTNS_SHIFT                  6
+#define AINST_SHIFT                    7
+#define AWRITE_SHIFT                   8
+#define ABURST_SHIFT                   10
+#define ALEN_SHIFT                     12
+#define FSYNR1_ASIZE_SHIFT             16
+#define ALOCK_SHIFT                    20
+#define AFULL_SHIFT                    24
+
+
+/* NMRR */
+#define ICPC0_SHIFT                    0
+#define ICPC1_SHIFT                    2
+#define ICPC2_SHIFT                    4
+#define ICPC3_SHIFT                    6
+#define ICPC4_SHIFT                    8
+#define ICPC5_SHIFT                    10
+#define ICPC6_SHIFT                    12
+#define ICPC7_SHIFT                    14
+#define OCPC0_SHIFT                    16
+#define OCPC1_SHIFT                    18
+#define OCPC2_SHIFT                    20
+#define OCPC3_SHIFT                    22
+#define OCPC4_SHIFT                    24
+#define OCPC5_SHIFT                    26
+#define OCPC6_SHIFT                    28
+#define OCPC7_SHIFT                    30
+
+
+/* PAR */
+#define FAULT_SHIFT                    0
+/* If a fault is present, these are the
+same as the fault fields in the FAR */
+#define FAULT_TF_SHIFT                 1
+#define FAULT_AFF_SHIFT                2
+#define FAULT_APF_SHIFT                3
+#define FAULT_TLBMF_SHIFT              4
+#define FAULT_HTWDEEF_SHIFT            5
+#define FAULT_HTWSEEF_SHIFT            6
+#define FAULT_MHF_SHIFT                7
+#define FAULT_SL_SHIFT                 16
+#define FAULT_SS_SHIFT                 30
+
+/* If NO fault is present, the following
+ * fields are in effect
+ * (FAULT remains as before) */
+#define PAR_NOFAULT_SS_SHIFT           1
+#define PAR_NOFAULT_MT_SHIFT           4
+#define PAR_NOFAULT_SH_SHIFT           7
+#define PAR_NOFAULT_NS_SHIFT           9
+#define PAR_NOFAULT_NOS_SHIFT          10
+#define PAR_NPFAULT_PA_SHIFT           12
+
+
+/* PRRR */
+#define MTC0_SHIFT                     0
+#define MTC1_SHIFT                     2
+#define MTC2_SHIFT                     4
+#define MTC3_SHIFT                     6
+#define MTC4_SHIFT                     8
+#define MTC5_SHIFT                     10
+#define MTC6_SHIFT                     12
+#define MTC7_SHIFT                     14
+#define SHDSH0_SHIFT                   16
+#define SHDSH1_SHIFT                   17
+#define SHNMSH0_SHIFT                  18
+#define SHNMSH1_SHIFT                  19
+#define NOS0_SHIFT                     24
+#define NOS1_SHIFT                     25
+#define NOS2_SHIFT                     26
+#define NOS3_SHIFT                     27
+#define NOS4_SHIFT                     28
+#define NOS5_SHIFT                     29
+#define NOS6_SHIFT                     30
+#define NOS7_SHIFT                     31
+
+
+/* RESUME */
+#define TNR_SHIFT                      0
+
+
+/* SCTLR */
+#define M_SHIFT                        0
+#define TRE_SHIFT                      1
+#define AFE_SHIFT                      2
+#define HAF_SHIFT                      3
+#define BE_SHIFT                       4
+#define AFFD_SHIFT                     5
+
+
+/* TLBIASID */
+#define TLBIASID_ASID_SHIFT            0
+
+
+/* TLBIVA */
+#define TLBIVA_ASID_SHIFT              0
+#define TLBIVA_VA_SHIFT                12
+
+
+/* TLBIVAA */
+#define TLBIVAA_VA_SHIFT               12
+
+
+/* TLBLCKR */
+#define LKE_SHIFT                      0
+#define TLBLCKR_TLBIALLCFG_SHIFT       1
+#define TLBIASIDCFG_SHIFT              2
+#define TLBIVAACFG_SHIFT               3
+#define FLOOR_SHIFT                    8
+#define VICTIM_SHIFT                   8
+
+
+/* TTBCR */
+#define N_SHIFT                        3
+#define PD0_SHIFT                      4
+#define PD1_SHIFT                      5
+
+
+/* TTBR0 */
+#define TTBR0_IRGNH_SHIFT              0
+#define TTBR0_SH_SHIFT                 1
+#define TTBR0_ORGN_SHIFT               3
+#define TTBR0_NOS_SHIFT                5
+#define TTBR0_IRGNL_SHIFT              6
+#define TTBR0_PA_SHIFT                 14
+
+
+/* TTBR1 */
+#define TTBR1_IRGNH_SHIFT              0
+#define TTBR1_SH_SHIFT                 1
+#define TTBR1_ORGN_SHIFT               3
+#define TTBR1_NOS_SHIFT                5
+#define TTBR1_IRGNL_SHIFT              6
+#define TTBR1_PA_SHIFT                 14
+
+
+/* V2PSR */
+#define HIT_SHIFT                      0
+#define INDEX_SHIFT                    8
+
+
+/* V2Pxx */
+#define V2Pxx_INDEX_SHIFT              0
+#define V2Pxx_VA_SHIFT                 12
+
+#endif
diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
new file mode 100644
index 00000000000..e550ccb7634
--- /dev/null
+++ b/drivers/iommu/of_iommu.c
@@ -0,0 +1,91 @@
+/*
+ * OF helpers for IOMMU
+ *
+ * Copyright (c) 2012, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/export.h>
+#include <linux/limits.h>
+#include <linux/of.h>
+#include <linux/of_iommu.h>
+
+/**
+ * of_get_dma_window - Parse *dma-window property and returns 0 if found.
+ *
+ * @dn: device node
+ * @prefix: prefix for property name if any
+ * @index: index to start to parse
+ * @busno: Returns busno if supported. Otherwise pass NULL
+ * @addr: Returns address that DMA starts
+ * @size: Returns the range that DMA can handle
+ *
+ * This supports different formats flexibly. "prefix" can be
+ * configured if any. "busno" and "index" are optionally
+ * specified. Set 0(or NULL) if not used.
+ */
+int of_get_dma_window(struct device_node *dn, const char *prefix, int index,
+		      unsigned long *busno, dma_addr_t *addr, size_t *size)
+{
+	const __be32 *dma_window, *end;
+	int bytes, cur_index = 0;
+	char propname[NAME_MAX], addrname[NAME_MAX], sizename[NAME_MAX];
+
+	if (!dn || !addr || !size)
+		return -EINVAL;
+
+	if (!prefix)
+		prefix = "";
+
+	snprintf(propname, sizeof(propname), "%sdma-window", prefix);
+	snprintf(addrname, sizeof(addrname), "%s#dma-address-cells", prefix);
+	snprintf(sizename, sizeof(sizename), "%s#dma-size-cells", prefix);
+
+	dma_window = of_get_property(dn, propname, &bytes);
+	if (!dma_window)
+		return -ENODEV;
+	end = dma_window + bytes / sizeof(*dma_window);
+
+	while (dma_window < end) {
+		u32 cells;
+		const void *prop;
+
+		/* busno is one cell if supported */
+		if (busno)
+			*busno = be32_to_cpup(dma_window++);
+
+		prop = of_get_property(dn, addrname, NULL);
+		if (!prop)
+			prop = of_get_property(dn, "#address-cells", NULL);
+
+		cells = prop ? be32_to_cpup(prop) : of_n_addr_cells(dn);
+		if (!cells)
+			return -EINVAL;
+		*addr = of_read_number(dma_window, cells);
+		dma_window += cells;
+
+		prop = of_get_property(dn, sizename, NULL);
+		cells = prop ? be32_to_cpup(prop) : of_n_size_cells(dn);
+		if (!cells)
+			return -EINVAL;
+		*size = of_read_number(dma_window, cells);
+		dma_window += cells;
+
+		if (cur_index++ == index)
+			break;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(of_get_dma_window);
diff --git a/drivers/iommu/omap-iommu-debug.c b/drivers/iommu/omap-iommu-debug.c
index 103dbd92e25..80fffba7f12 100644
--- a/drivers/iommu/omap-iommu-debug.c
+++ b/drivers/iommu/omap-iommu-debug.c
@@ -18,11 +18,11 @@
 #include <linux/uaccess.h>
 #include <linux/platform_device.h>
 #include <linux/debugfs.h>
+#include <linux/omap-iommu.h>
+#include <linux/platform_data/iommu-omap.h>
 
-#include <plat/iommu.h>
-#include <plat/iovmm.h>
-
-#include <plat/iopgtable.h>
+#include "omap-iopgtable.h"
+#include "omap-iommu.h"
 
 #define MAXCOLUMN 100 /* for short messages */
 
@@ -323,15 +323,9 @@ err_out:
 	return count;
 }
 
-static int debug_open_generic(struct inode *inode, struct file *file)
-{
-	file->private_data = inode->i_private;
-	return 0;
-}
-
 #define DEBUG_FOPS(name)						\
 	static const struct file_operations debug_##name##_fops = {	\
-		.open = debug_open_generic,				\
+		.open = simple_open,					\
 		.read = debug_read_##name,				\
 		.write = debug_write_##name,				\
 		.llseek = generic_file_llseek,				\
@@ -339,7 +333,7 @@ static int debug_open_generic(struct inode *inode, struct file *file)
 
 #define DEBUG_FOPS_RO(name)						\
 	static const struct file_operations debug_##name##_fops = {	\
-		.open = debug_open_generic,				\
+		.open = simple_open,					\
 		.read = debug_read_##name,				\
 		.llseek = generic_file_llseek,				\
 	};
@@ -360,8 +354,8 @@ DEBUG_FOPS(mem);
 			return -ENOMEM;					\
 	}
 
-#define DEBUG_ADD_FILE(name) __DEBUG_ADD_FILE(name, 600)
-#define DEBUG_ADD_FILE_RO(name) __DEBUG_ADD_FILE(name, 400)
+#define DEBUG_ADD_FILE(name) __DEBUG_ADD_FILE(name, 0600)
+#define DEBUG_ADD_FILE_RO(name) __DEBUG_ADD_FILE(name, 0400)
 
 static int iommu_debug_register(struct device *dev, void *data)
 {
diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index 6899dcd02df..895af06a667 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -16,17 +16,26 @@
 #include <linux/slab.h>
 #include <linux/interrupt.h>
 #include <linux/ioport.h>
-#include <linux/clk.h>
 #include <linux/platform_device.h>
 #include <linux/iommu.h>
+#include <linux/omap-iommu.h>
 #include <linux/mutex.h>
 #include <linux/spinlock.h>
+#include <linux/io.h>
+#include <linux/pm_runtime.h>
+#include <linux/of.h>
+#include <linux/of_iommu.h>
+#include <linux/of_irq.h>
 
 #include <asm/cacheflush.h>
 
-#include <plat/iommu.h>
+#include <linux/platform_data/iommu-omap.h>
 
-#include <plat/iopgtable.h>
+#include "omap-iopgtable.h"
+#include "omap-iommu.h"
+
+#define to_iommu(dev)							\
+	((struct omap_iommu *)platform_get_drvdata(to_platform_device(dev)))
 
 #define for_each_iotlb_cr(obj, n, __i, cr)				\
 	for (__i = 0;							\
@@ -41,14 +50,31 @@
  * @pgtable:	the page table
  * @iommu_dev:	an omap iommu device attached to this domain. only a single
  *		iommu device can be attached for now.
+ * @dev:	Device using this domain.
  * @lock:	domain lock, should be taken when attaching/detaching
  */
 struct omap_iommu_domain {
 	u32 *pgtable;
 	struct omap_iommu *iommu_dev;
+	struct device *dev;
 	spinlock_t lock;
 };
 
+#define MMU_LOCK_BASE_SHIFT	10
+#define MMU_LOCK_BASE_MASK	(0x1f << MMU_LOCK_BASE_SHIFT)
+#define MMU_LOCK_BASE(x)	\
+	((x & MMU_LOCK_BASE_MASK) >> MMU_LOCK_BASE_SHIFT)
+
+#define MMU_LOCK_VICT_SHIFT	4
+#define MMU_LOCK_VICT_MASK	(0x1f << MMU_LOCK_VICT_SHIFT)
+#define MMU_LOCK_VICT(x)	\
+	((x & MMU_LOCK_VICT_MASK) >> MMU_LOCK_VICT_SHIFT)
+
+struct iotlb_lock {
+	short base;
+	short vict;
+};
+
 /* accommodate the difference between omap1 and omap2/3 */
 static const struct iommu_functions *arch_iommu;
 
@@ -123,31 +149,38 @@ EXPORT_SYMBOL_GPL(omap_iommu_arch_version);
 static int iommu_enable(struct omap_iommu *obj)
 {
 	int err;
-
-	if (!obj)
-		return -EINVAL;
+	struct platform_device *pdev = to_platform_device(obj->dev);
+	struct iommu_platform_data *pdata = pdev->dev.platform_data;
 
 	if (!arch_iommu)
 		return -ENODEV;
 
-	clk_enable(obj->clk);
+	if (pdata && pdata->deassert_reset) {
+		err = pdata->deassert_reset(pdev, pdata->reset_name);
+		if (err) {
+			dev_err(obj->dev, "deassert_reset failed: %d\n", err);
+			return err;
+		}
+	}
+
+	pm_runtime_get_sync(obj->dev);
 
 	err = arch_iommu->enable(obj);
 
-	clk_disable(obj->clk);
 	return err;
 }
 
 static void iommu_disable(struct omap_iommu *obj)
 {
-	if (!obj)
-		return;
-
-	clk_enable(obj->clk);
+	struct platform_device *pdev = to_platform_device(obj->dev);
+	struct iommu_platform_data *pdata = pdev->dev.platform_data;
 
 	arch_iommu->disable(obj);
 
-	clk_disable(obj->clk);
+	pm_runtime_put_sync(obj->dev);
+
+	if (pdata && pdata->assert_reset)
+		pdata->assert_reset(pdev, pdata->reset_name);
 }
 
 /*
@@ -270,7 +303,7 @@ static int load_iotlb_entry(struct omap_iommu *obj, struct iotlb_entry *e)
 	if (!obj || !obj->nr_tlb_entries || !e)
 		return -EINVAL;
 
-	clk_enable(obj->clk);
+	pm_runtime_get_sync(obj->dev);
 
 	iotlb_lock_get(obj, &l);
 	if (l.base == obj->nr_tlb_entries) {
@@ -300,7 +333,7 @@ static int load_iotlb_entry(struct omap_iommu *obj, struct iotlb_entry *e)
 
 	cr = iotlb_alloc_cr(obj, e);
 	if (IS_ERR(cr)) {
-		clk_disable(obj->clk);
+		pm_runtime_put_sync(obj->dev);
 		return PTR_ERR(cr);
 	}
 
@@ -314,7 +347,7 @@ static int load_iotlb_entry(struct omap_iommu *obj, struct iotlb_entry *e)
 		l.vict = l.base;
 	iotlb_lock_set(obj, &l);
 out:
-	clk_disable(obj->clk);
+	pm_runtime_put_sync(obj->dev);
 	return err;
 }
 
@@ -344,7 +377,7 @@ static void flush_iotlb_page(struct omap_iommu *obj, u32 da)
 	int i;
 	struct cr_regs cr;
 
-	clk_enable(obj->clk);
+	pm_runtime_get_sync(obj->dev);
 
 	for_each_iotlb_cr(obj, obj->nr_tlb_entries, i, cr) {
 		u32 start;
@@ -361,9 +394,10 @@ static void flush_iotlb_page(struct omap_iommu *obj, u32 da)
 				__func__, start, da, bytes);
 			iotlb_load_cr(obj, &cr);
 			iommu_write_reg(obj, 1, MMU_FLUSH_ENTRY);
+			break;
 		}
 	}
-	clk_disable(obj->clk);
+	pm_runtime_put_sync(obj->dev);
 
 	if (i == obj->nr_tlb_entries)
 		dev_dbg(obj->dev, "%s: no page for %08x\n", __func__, da);
@@ -377,7 +411,7 @@ static void flush_iotlb_all(struct omap_iommu *obj)
 {
 	struct iotlb_lock l;
 
-	clk_enable(obj->clk);
+	pm_runtime_get_sync(obj->dev);
 
 	l.base = 0;
 	l.vict = 0;
@@ -385,7 +419,7 @@ static void flush_iotlb_all(struct omap_iommu *obj)
 
 	iommu_write_reg(obj, 1, MMU_GFLUSH);
 
-	clk_disable(obj->clk);
+	pm_runtime_put_sync(obj->dev);
 }
 
 #if defined(CONFIG_OMAP_IOMMU_DEBUG) || defined(CONFIG_OMAP_IOMMU_DEBUG_MODULE)
@@ -395,11 +429,11 @@ ssize_t omap_iommu_dump_ctx(struct omap_iommu *obj, char *buf, ssize_t bytes)
 	if (!obj || !buf)
 		return -EINVAL;
 
-	clk_enable(obj->clk);
+	pm_runtime_get_sync(obj->dev);
 
 	bytes = arch_iommu->dump_ctx(obj, buf, bytes);
 
-	clk_disable(obj->clk);
+	pm_runtime_put_sync(obj->dev);
 
 	return bytes;
 }
@@ -413,7 +447,7 @@ __dump_tlb_entries(struct omap_iommu *obj, struct cr_regs *crs, int num)
 	struct cr_regs tmp;
 	struct cr_regs *p = crs;
 
-	clk_enable(obj->clk);
+	pm_runtime_get_sync(obj->dev);
 	iotlb_lock_get(obj, &saved);
 
 	for_each_iotlb_cr(obj, num, i, tmp) {
@@ -423,7 +457,7 @@ __dump_tlb_entries(struct omap_iommu *obj, struct cr_regs *crs, int num)
 	}
 
 	iotlb_lock_set(obj, &saved);
-	clk_disable(obj->clk);
+	pm_runtime_put_sync(obj->dev);
 
 	return  p - crs;
 }
@@ -490,7 +524,8 @@ static void flush_iopte_range(u32 *first, u32 *last)
 static void iopte_free(u32 *iopte)
 {
 	/* Note: freed iopte's must be clean ready for re-use */
-	kmem_cache_free(iopte_cachep, iopte);
+	if (iopte)
+		kmem_cache_free(iopte_cachep, iopte);
 }
 
 static u32 *iopte_alloc(struct omap_iommu *obj, u32 *iopgd, u32 da)
@@ -787,9 +822,7 @@ static irqreturn_t iommu_fault_handler(int irq, void *data)
 	if (!obj->refcount)
 		return IRQ_NONE;
 
-	clk_enable(obj->clk);
 	errs = iommu_report_fault(obj, &da);
-	clk_disable(obj->clk);
 	if (errs == 0)
 		return IRQ_HANDLED;
 
@@ -802,16 +835,15 @@ static irqreturn_t iommu_fault_handler(int irq, void *data)
 	iopgd = iopgd_offset(obj, da);
 
 	if (!iopgd_is_table(*iopgd)) {
-		dev_err(obj->dev, "%s: errs:0x%08x da:0x%08x pgd:0x%p "
-			"*pgd:px%08x\n", obj->name, errs, da, iopgd, *iopgd);
+		dev_err(obj->dev, "%s: errs:0x%08x da:0x%08x pgd:0x%p *pgd:px%08x\n",
+				obj->name, errs, da, iopgd, *iopgd);
 		return IRQ_NONE;
 	}
 
 	iopte = iopte_offset(iopgd, da);
 
-	dev_err(obj->dev, "%s: errs:0x%08x da:0x%08x pgd:0x%p *pgd:0x%08x "
-		"pte:0x%p *pte:0x%08x\n", obj->name, errs, da, iopgd, *iopgd,
-		iopte, *iopte);
+	dev_err(obj->dev, "%s: errs:0x%08x da:0x%08x pgd:0x%p *pgd:0x%08x pte:0x%p *pte:0x%08x\n",
+			obj->name, errs, da, iopgd, *iopgd, iopte, *iopte);
 
 	return IRQ_NONE;
 }
@@ -833,7 +865,7 @@ static int device_match_by_alias(struct device *dev, void *data)
  **/
 static struct omap_iommu *omap_iommu_attach(const char *name, u32 *iopgd)
 {
-	int err = -ENOMEM;
+	int err;
 	struct device *dev;
 	struct omap_iommu *obj;
 
@@ -841,7 +873,7 @@ static struct omap_iommu *omap_iommu_attach(const char *name, u32 *iopgd)
 				(void *)name,
 				device_match_by_alias);
 	if (!dev)
-		return NULL;
+		return ERR_PTR(-ENODEV);
 
 	obj = to_iommu(dev);
 
@@ -860,8 +892,10 @@ static struct omap_iommu *omap_iommu_attach(const char *name, u32 *iopgd)
 		goto err_enable;
 	flush_iotlb_all(obj);
 
-	if (!try_module_get(obj->owner))
+	if (!try_module_get(obj->owner)) {
+		err = -ENODEV;
 		goto err_module;
+	}
 
 	spin_unlock(&obj->iommu_lock);
 
@@ -903,31 +937,48 @@ static void omap_iommu_detach(struct omap_iommu *obj)
 /*
  *	OMAP Device MMU(IOMMU) detection
  */
-static int __devinit omap_iommu_probe(struct platform_device *pdev)
+static int omap_iommu_probe(struct platform_device *pdev)
 {
 	int err = -ENODEV;
 	int irq;
 	struct omap_iommu *obj;
 	struct resource *res;
 	struct iommu_platform_data *pdata = pdev->dev.platform_data;
+	struct device_node *of = pdev->dev.of_node;
 
-	if (pdev->num_resources != 2)
-		return -EINVAL;
-
-	obj = kzalloc(sizeof(*obj) + MMU_REG_SIZE, GFP_KERNEL);
+	obj = devm_kzalloc(&pdev->dev, sizeof(*obj) + MMU_REG_SIZE, GFP_KERNEL);
 	if (!obj)
 		return -ENOMEM;
 
-	obj->clk = clk_get(&pdev->dev, pdata->clk_name);
-	if (IS_ERR(obj->clk))
-		goto err_clk;
+	if (of) {
+		obj->name = dev_name(&pdev->dev);
+		obj->nr_tlb_entries = 32;
+		err = of_property_read_u32(of, "ti,#tlb-entries",
+					   &obj->nr_tlb_entries);
+		if (err && err != -EINVAL)
+			return err;
+		if (obj->nr_tlb_entries != 32 && obj->nr_tlb_entries != 8)
+			return -EINVAL;
+		/*
+		 * da_start and da_end are needed for omap-iovmm, so hardcode
+		 * these values as used by OMAP3 ISP - the only user for
+		 * omap-iovmm
+		 */
+		obj->da_start = 0;
+		obj->da_end = 0xfffff000;
+		if (of_find_property(of, "ti,iommu-bus-err-back", NULL))
+			obj->has_bus_err_back = MMU_GP_REG_BUS_ERR_BACK_EN;
+	} else {
+		obj->nr_tlb_entries = pdata->nr_tlb_entries;
+		obj->name = pdata->name;
+		obj->da_start = pdata->da_start;
+		obj->da_end = pdata->da_end;
+	}
+	if (obj->da_end <= obj->da_start)
+		return -EINVAL;
 
-	obj->nr_tlb_entries = pdata->nr_tlb_entries;
-	obj->name = pdata->name;
 	obj->dev = &pdev->dev;
 	obj->ctx = (void *)obj + sizeof(*obj);
-	obj->da_start = pdata->da_start;
-	obj->da_end = pdata->da_end;
 
 	spin_lock_init(&obj->iommu_lock);
 	mutex_init(&obj->mmap_lock);
@@ -935,76 +986,53 @@ static int __devinit omap_iommu_probe(struct platform_device *pdev)
 	INIT_LIST_HEAD(&obj->mmap);
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res) {
-		err = -ENODEV;
-		goto err_mem;
-	}
-
-	res = request_mem_region(res->start, resource_size(res),
-				 dev_name(&pdev->dev));
-	if (!res) {
-		err = -EIO;
-		goto err_mem;
-	}
-
-	obj->regbase = ioremap(res->start, resource_size(res));
-	if (!obj->regbase) {
-		err = -ENOMEM;
-		goto err_ioremap;
-	}
+	obj->regbase = devm_ioremap_resource(obj->dev, res);
+	if (IS_ERR(obj->regbase))
+		return PTR_ERR(obj->regbase);
 
 	irq = platform_get_irq(pdev, 0);
-	if (irq < 0) {
-		err = -ENODEV;
-		goto err_irq;
-	}
-	err = request_irq(irq, iommu_fault_handler, IRQF_SHARED,
-			  dev_name(&pdev->dev), obj);
+	if (irq < 0)
+		return -ENODEV;
+
+	err = devm_request_irq(obj->dev, irq, iommu_fault_handler, IRQF_SHARED,
+			       dev_name(obj->dev), obj);
 	if (err < 0)
-		goto err_irq;
+		return err;
 	platform_set_drvdata(pdev, obj);
 
+	pm_runtime_irq_safe(obj->dev);
+	pm_runtime_enable(obj->dev);
+
 	dev_info(&pdev->dev, "%s registered\n", obj->name);
 	return 0;
-
-err_irq:
-	iounmap(obj->regbase);
-err_ioremap:
-	release_mem_region(res->start, resource_size(res));
-err_mem:
-	clk_put(obj->clk);
-err_clk:
-	kfree(obj);
-	return err;
 }
 
-static int __devexit omap_iommu_remove(struct platform_device *pdev)
+static int omap_iommu_remove(struct platform_device *pdev)
 {
-	int irq;
-	struct resource *res;
 	struct omap_iommu *obj = platform_get_drvdata(pdev);
 
-	platform_set_drvdata(pdev, NULL);
-
 	iopgtable_clear_entry_all(obj);
 
-	irq = platform_get_irq(pdev, 0);
-	free_irq(irq, obj);
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	release_mem_region(res->start, resource_size(res));
-	iounmap(obj->regbase);
+	pm_runtime_disable(obj->dev);
 
-	clk_put(obj->clk);
 	dev_info(&pdev->dev, "%s removed\n", obj->name);
-	kfree(obj);
 	return 0;
 }
 
+static struct of_device_id omap_iommu_of_match[] = {
+	{ .compatible = "ti,omap2-iommu" },
+	{ .compatible = "ti,omap4-iommu" },
+	{ .compatible = "ti,dra7-iommu"	},
+	{},
+};
+MODULE_DEVICE_TABLE(of, omap_iommu_of_match);
+
 static struct platform_driver omap_iommu_driver = {
 	.probe	= omap_iommu_probe,
-	.remove	= __devexit_p(omap_iommu_remove),
+	.remove	= omap_iommu_remove,
 	.driver	= {
 		.name	= "omap-iommu",
+		.of_match_table = of_match_ptr(omap_iommu_of_match),
 	},
 };
 
@@ -1013,6 +1041,22 @@ static void iopte_cachep_ctor(void *iopte)
 	clean_dcache_area(iopte, IOPTE_TABLE_SIZE);
 }
 
+static u32 iotlb_init_entry(struct iotlb_entry *e, u32 da, u32 pa, int pgsz)
+{
+	memset(e, 0, sizeof(*e));
+
+	e->da		= da;
+	e->pa		= pa;
+	e->valid	= MMU_CAM_V;
+	/* FIXME: add OMAP1 support */
+	e->pgsz		= pgsz;
+	e->endian	= MMU_RAM_ENDIAN_LITTLE;
+	e->elsz		= MMU_RAM_ELSZ_8;
+	e->mixed	= 0;
+
+	return iopgsz_to_bytes(e->pgsz);
+}
+
 static int omap_iommu_map(struct iommu_domain *domain, unsigned long da,
 			 phys_addr_t pa, size_t bytes, int prot)
 {
@@ -1021,9 +1065,8 @@ static int omap_iommu_map(struct iommu_domain *domain, unsigned long da,
 	struct device *dev = oiommu->dev;
 	struct iotlb_entry e;
 	int omap_pgsz;
-	u32 ret, flags;
+	u32 ret;
 
-	/* we only support mapping a single iommu page for now */
 	omap_pgsz = bytes_to_iopgsz(bytes);
 	if (omap_pgsz < 0) {
 		dev_err(dev, "invalid size to map: %d\n", bytes);
@@ -1032,9 +1075,7 @@ static int omap_iommu_map(struct iommu_domain *domain, unsigned long da,
 
 	dev_dbg(dev, "mapping da 0x%lx to pa 0x%x size 0x%x\n", da, pa, bytes);
 
-	flags = omap_pgsz | prot;
-
-	iotlb_init_entry(&e, da, pa, flags);
+	iotlb_init_entry(&e, da, pa, omap_pgsz);
 
 	ret = omap_iopgtable_store_entry(oiommu, &e);
 	if (ret)
@@ -1081,6 +1122,7 @@ omap_iommu_attach_dev(struct iommu_domain *domain, struct device *dev)
 	}
 
 	omap_domain->iommu_dev = arch_data->iommu_dev = oiommu;
+	omap_domain->dev = dev;
 	oiommu->domain = domain;
 
 out:
@@ -1088,19 +1130,16 @@ out:
 	return ret;
 }
 
-static void omap_iommu_detach_dev(struct iommu_domain *domain,
-				 struct device *dev)
+static void _omap_iommu_detach_dev(struct omap_iommu_domain *omap_domain,
+			struct device *dev)
 {
-	struct omap_iommu_domain *omap_domain = domain->priv;
-	struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
 	struct omap_iommu *oiommu = dev_to_omap_iommu(dev);
-
-	spin_lock(&omap_domain->lock);
+	struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
 
 	/* only a single device is supported per domain for now */
 	if (omap_domain->iommu_dev != oiommu) {
 		dev_err(dev, "invalid iommu device\n");
-		goto out;
+		return;
 	}
 
 	iopgtable_clear_entry_all(oiommu);
@@ -1108,8 +1147,16 @@ static void omap_iommu_detach_dev(struct iommu_domain *domain,
 	omap_iommu_detach(oiommu);
 
 	omap_domain->iommu_dev = arch_data->iommu_dev = NULL;
+	omap_domain->dev = NULL;
+}
 
-out:
+static void omap_iommu_detach_dev(struct iommu_domain *domain,
+				 struct device *dev)
+{
+	struct omap_iommu_domain *omap_domain = domain->priv;
+
+	spin_lock(&omap_domain->lock);
+	_omap_iommu_detach_dev(omap_domain, dev);
 	spin_unlock(&omap_domain->lock);
 }
 
@@ -1140,6 +1187,10 @@ static int omap_iommu_domain_init(struct iommu_domain *domain)
 
 	domain->priv = omap_domain;
 
+	domain->geometry.aperture_start = 0;
+	domain->geometry.aperture_end   = (1ULL << 32) - 1;
+	domain->geometry.force_aperture = true;
+
 	return 0;
 
 fail_nomem:
@@ -1148,19 +1199,25 @@ out:
 	return -ENOMEM;
 }
 
-/* assume device was already detached */
 static void omap_iommu_domain_destroy(struct iommu_domain *domain)
 {
 	struct omap_iommu_domain *omap_domain = domain->priv;
 
 	domain->priv = NULL;
 
+	/*
+	 * An iommu device is still attached
+	 * (currently, only one device can be attached) ?
+	 */
+	if (omap_domain->iommu_dev)
+		_omap_iommu_detach_dev(omap_domain, omap_domain->dev);
+
 	kfree(omap_domain->pgtable);
 	kfree(omap_domain);
 }
 
 static phys_addr_t omap_iommu_iova_to_phys(struct iommu_domain *domain,
-					  unsigned long da)
+					  dma_addr_t da)
 {
 	struct omap_iommu_domain *omap_domain = domain->priv;
 	struct omap_iommu *oiommu = omap_domain->iommu_dev;
@@ -1176,25 +1233,64 @@ static phys_addr_t omap_iommu_iova_to_phys(struct iommu_domain *domain,
 		else if (iopte_is_large(*pte))
 			ret = omap_iommu_translate(*pte, da, IOLARGE_MASK);
 		else
-			dev_err(dev, "bogus pte 0x%x, da 0x%lx", *pte, da);
+			dev_err(dev, "bogus pte 0x%x, da 0x%llx", *pte,
+							(unsigned long long)da);
 	} else {
 		if (iopgd_is_section(*pgd))
 			ret = omap_iommu_translate(*pgd, da, IOSECTION_MASK);
 		else if (iopgd_is_super(*pgd))
 			ret = omap_iommu_translate(*pgd, da, IOSUPER_MASK);
 		else
-			dev_err(dev, "bogus pgd 0x%x, da 0x%lx", *pgd, da);
+			dev_err(dev, "bogus pgd 0x%x, da 0x%llx", *pgd,
+							(unsigned long long)da);
 	}
 
 	return ret;
 }
 
-static int omap_iommu_domain_has_cap(struct iommu_domain *domain,
-				    unsigned long cap)
+static int omap_iommu_add_device(struct device *dev)
 {
+	struct omap_iommu_arch_data *arch_data;
+	struct device_node *np;
+
+	/*
+	 * Allocate the archdata iommu structure for DT-based devices.
+	 *
+	 * TODO: Simplify this when removing non-DT support completely from the
+	 * IOMMU users.
+	 */
+	if (!dev->of_node)
+		return 0;
+
+	np = of_parse_phandle(dev->of_node, "iommus", 0);
+	if (!np)
+		return 0;
+
+	arch_data = kzalloc(sizeof(*arch_data), GFP_KERNEL);
+	if (!arch_data) {
+		of_node_put(np);
+		return -ENOMEM;
+	}
+
+	arch_data->name = kstrdup(dev_name(dev), GFP_KERNEL);
+	dev->archdata.iommu = arch_data;
+
+	of_node_put(np);
+
 	return 0;
 }
 
+static void omap_iommu_remove_device(struct device *dev)
+{
+	struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
+
+	if (!dev->of_node || !arch_data)
+		return;
+
+	kfree(arch_data->name);
+	kfree(arch_data);
+}
+
 static struct iommu_ops omap_iommu_ops = {
 	.domain_init	= omap_iommu_domain_init,
 	.domain_destroy	= omap_iommu_domain_destroy,
@@ -1203,7 +1299,8 @@ static struct iommu_ops omap_iommu_ops = {
 	.map		= omap_iommu_map,
 	.unmap		= omap_iommu_unmap,
 	.iova_to_phys	= omap_iommu_iova_to_phys,
-	.domain_has_cap	= omap_iommu_domain_has_cap,
+	.add_device	= omap_iommu_add_device,
+	.remove_device	= omap_iommu_remove_device,
 	.pgsize_bitmap	= OMAP_IOMMU_PGSIZES,
 };
 
diff --git a/drivers/iommu/omap-iommu.h b/drivers/iommu/omap-iommu.h
new file mode 100644
index 00000000000..ea920c3e94f
--- /dev/null
+++ b/drivers/iommu/omap-iommu.h
@@ -0,0 +1,230 @@
+/*
+ * omap iommu: main structures
+ *
+ * Copyright (C) 2008-2009 Nokia Corporation
+ *
+ * Written by Hiroshi DOYU <Hiroshi.DOYU@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#if defined(CONFIG_ARCH_OMAP1)
+#error "iommu for this processor not implemented yet"
+#endif
+
+struct iotlb_entry {
+	u32 da;
+	u32 pa;
+	u32 pgsz, prsvd, valid;
+	union {
+		u16 ap;
+		struct {
+			u32 endian, elsz, mixed;
+		};
+	};
+};
+
+struct omap_iommu {
+	const char	*name;
+	struct module	*owner;
+	void __iomem	*regbase;
+	struct device	*dev;
+	void		*isr_priv;
+	struct iommu_domain *domain;
+
+	unsigned int	refcount;
+	spinlock_t	iommu_lock;	/* global for this whole object */
+
+	/*
+	 * We don't change iopgd for a situation like pgd for a task,
+	 * but share it globally for each iommu.
+	 */
+	u32		*iopgd;
+	spinlock_t	page_table_lock; /* protect iopgd */
+
+	int		nr_tlb_entries;
+
+	struct list_head	mmap;
+	struct mutex		mmap_lock; /* protect mmap */
+
+	void *ctx; /* iommu context: registres saved area */
+	u32 da_start;
+	u32 da_end;
+
+	int has_bus_err_back;
+};
+
+struct cr_regs {
+	union {
+		struct {
+			u16 cam_l;
+			u16 cam_h;
+		};
+		u32 cam;
+	};
+	union {
+		struct {
+			u16 ram_l;
+			u16 ram_h;
+		};
+		u32 ram;
+	};
+};
+
+/* architecture specific functions */
+struct iommu_functions {
+	unsigned long	version;
+
+	int (*enable)(struct omap_iommu *obj);
+	void (*disable)(struct omap_iommu *obj);
+	void (*set_twl)(struct omap_iommu *obj, bool on);
+	u32 (*fault_isr)(struct omap_iommu *obj, u32 *ra);
+
+	void (*tlb_read_cr)(struct omap_iommu *obj, struct cr_regs *cr);
+	void (*tlb_load_cr)(struct omap_iommu *obj, struct cr_regs *cr);
+
+	struct cr_regs *(*alloc_cr)(struct omap_iommu *obj,
+							struct iotlb_entry *e);
+	int (*cr_valid)(struct cr_regs *cr);
+	u32 (*cr_to_virt)(struct cr_regs *cr);
+	void (*cr_to_e)(struct cr_regs *cr, struct iotlb_entry *e);
+	ssize_t (*dump_cr)(struct omap_iommu *obj, struct cr_regs *cr,
+							char *buf);
+
+	u32 (*get_pte_attr)(struct iotlb_entry *e);
+
+	void (*save_ctx)(struct omap_iommu *obj);
+	void (*restore_ctx)(struct omap_iommu *obj);
+	ssize_t (*dump_ctx)(struct omap_iommu *obj, char *buf, ssize_t len);
+};
+
+#ifdef CONFIG_IOMMU_API
+/**
+ * dev_to_omap_iommu() - retrieves an omap iommu object from a user device
+ * @dev: iommu client device
+ */
+static inline struct omap_iommu *dev_to_omap_iommu(struct device *dev)
+{
+	struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
+
+	return arch_data->iommu_dev;
+}
+#endif
+
+/*
+ * MMU Register offsets
+ */
+#define MMU_REVISION		0x00
+#define MMU_IRQSTATUS		0x18
+#define MMU_IRQENABLE		0x1c
+#define MMU_WALKING_ST		0x40
+#define MMU_CNTL		0x44
+#define MMU_FAULT_AD		0x48
+#define MMU_TTB			0x4c
+#define MMU_LOCK		0x50
+#define MMU_LD_TLB		0x54
+#define MMU_CAM			0x58
+#define MMU_RAM			0x5c
+#define MMU_GFLUSH		0x60
+#define MMU_FLUSH_ENTRY		0x64
+#define MMU_READ_CAM		0x68
+#define MMU_READ_RAM		0x6c
+#define MMU_EMU_FAULT_AD	0x70
+#define MMU_GP_REG		0x88
+
+#define MMU_REG_SIZE		256
+
+/*
+ * MMU Register bit definitions
+ */
+#define MMU_CAM_VATAG_SHIFT	12
+#define MMU_CAM_VATAG_MASK \
+	((~0UL >> MMU_CAM_VATAG_SHIFT) << MMU_CAM_VATAG_SHIFT)
+#define MMU_CAM_P		(1 << 3)
+#define MMU_CAM_V		(1 << 2)
+#define MMU_CAM_PGSZ_MASK	3
+#define MMU_CAM_PGSZ_1M		(0 << 0)
+#define MMU_CAM_PGSZ_64K	(1 << 0)
+#define MMU_CAM_PGSZ_4K		(2 << 0)
+#define MMU_CAM_PGSZ_16M	(3 << 0)
+
+#define MMU_RAM_PADDR_SHIFT	12
+#define MMU_RAM_PADDR_MASK \
+	((~0UL >> MMU_RAM_PADDR_SHIFT) << MMU_RAM_PADDR_SHIFT)
+
+#define MMU_RAM_ENDIAN_MASK	(1 << MMU_RAM_ENDIAN_SHIFT)
+#define MMU_RAM_ENDIAN_BIG	(1 << MMU_RAM_ENDIAN_SHIFT)
+
+#define MMU_RAM_ELSZ_MASK	(3 << MMU_RAM_ELSZ_SHIFT)
+#define MMU_RAM_ELSZ_8		(0 << MMU_RAM_ELSZ_SHIFT)
+#define MMU_RAM_ELSZ_16		(1 << MMU_RAM_ELSZ_SHIFT)
+#define MMU_RAM_ELSZ_32		(2 << MMU_RAM_ELSZ_SHIFT)
+#define MMU_RAM_ELSZ_NONE	(3 << MMU_RAM_ELSZ_SHIFT)
+#define MMU_RAM_MIXED_SHIFT	6
+#define MMU_RAM_MIXED_MASK	(1 << MMU_RAM_MIXED_SHIFT)
+#define MMU_RAM_MIXED		MMU_RAM_MIXED_MASK
+
+#define MMU_GP_REG_BUS_ERR_BACK_EN	0x1
+
+/*
+ * utilities for super page(16MB, 1MB, 64KB and 4KB)
+ */
+
+#define iopgsz_max(bytes)			\
+	(((bytes) >= SZ_16M) ? SZ_16M :		\
+	 ((bytes) >= SZ_1M)  ? SZ_1M  :		\
+	 ((bytes) >= SZ_64K) ? SZ_64K :		\
+	 ((bytes) >= SZ_4K)  ? SZ_4K  :	0)
+
+#define bytes_to_iopgsz(bytes)				\
+	(((bytes) == SZ_16M) ? MMU_CAM_PGSZ_16M :	\
+	 ((bytes) == SZ_1M)  ? MMU_CAM_PGSZ_1M  :	\
+	 ((bytes) == SZ_64K) ? MMU_CAM_PGSZ_64K :	\
+	 ((bytes) == SZ_4K)  ? MMU_CAM_PGSZ_4K  : -1)
+
+#define iopgsz_to_bytes(iopgsz)				\
+	(((iopgsz) == MMU_CAM_PGSZ_16M)	? SZ_16M :	\
+	 ((iopgsz) == MMU_CAM_PGSZ_1M)	? SZ_1M  :	\
+	 ((iopgsz) == MMU_CAM_PGSZ_64K)	? SZ_64K :	\
+	 ((iopgsz) == MMU_CAM_PGSZ_4K)	? SZ_4K  : 0)
+
+#define iopgsz_ok(bytes) (bytes_to_iopgsz(bytes) >= 0)
+
+/*
+ * global functions
+ */
+extern u32 omap_iommu_arch_version(void);
+
+extern void omap_iotlb_cr_to_e(struct cr_regs *cr, struct iotlb_entry *e);
+
+extern int
+omap_iopgtable_store_entry(struct omap_iommu *obj, struct iotlb_entry *e);
+
+extern void omap_iommu_save_ctx(struct device *dev);
+extern void omap_iommu_restore_ctx(struct device *dev);
+
+extern int omap_foreach_iommu_device(void *data,
+				int (*fn)(struct device *, void *));
+
+extern int omap_install_iommu_arch(const struct iommu_functions *ops);
+extern void omap_uninstall_iommu_arch(const struct iommu_functions *ops);
+
+extern ssize_t
+omap_iommu_dump_ctx(struct omap_iommu *obj, char *buf, ssize_t len);
+extern size_t
+omap_dump_tlb_entries(struct omap_iommu *obj, char *buf, ssize_t len);
+
+/*
+ * register accessors
+ */
+static inline u32 iommu_read_reg(struct omap_iommu *obj, size_t offs)
+{
+	return __raw_readl(obj->regbase + offs);
+}
+
+static inline void iommu_write_reg(struct omap_iommu *obj, u32 val, size_t offs)
+{
+	__raw_writel(val, obj->regbase + offs);
+}
diff --git a/drivers/iommu/omap-iommu2.c b/drivers/iommu/omap-iommu2.c
new file mode 100644
index 00000000000..5e1ea3b0bf1
--- /dev/null
+++ b/drivers/iommu/omap-iommu2.c
@@ -0,0 +1,337 @@
+/*
+ * omap iommu: omap2/3 architecture specific functions
+ *
+ * Copyright (C) 2008-2009 Nokia Corporation
+ *
+ * Written by Hiroshi DOYU <Hiroshi.DOYU@nokia.com>,
+ *		Paul Mundt and Toshihiro Kobayashi
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/err.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/jiffies.h>
+#include <linux/module.h>
+#include <linux/omap-iommu.h>
+#include <linux/slab.h>
+#include <linux/stringify.h>
+#include <linux/platform_data/iommu-omap.h>
+
+#include "omap-iommu.h"
+
+/*
+ * omap2 architecture specific register bit definitions
+ */
+#define IOMMU_ARCH_VERSION	0x00000011
+
+/* IRQSTATUS & IRQENABLE */
+#define MMU_IRQ_MULTIHITFAULT	(1 << 4)
+#define MMU_IRQ_TABLEWALKFAULT	(1 << 3)
+#define MMU_IRQ_EMUMISS		(1 << 2)
+#define MMU_IRQ_TRANSLATIONFAULT	(1 << 1)
+#define MMU_IRQ_TLBMISS		(1 << 0)
+
+#define __MMU_IRQ_FAULT		\
+	(MMU_IRQ_MULTIHITFAULT | MMU_IRQ_EMUMISS | MMU_IRQ_TRANSLATIONFAULT)
+#define MMU_IRQ_MASK		\
+	(__MMU_IRQ_FAULT | MMU_IRQ_TABLEWALKFAULT | MMU_IRQ_TLBMISS)
+#define MMU_IRQ_TWL_MASK	(__MMU_IRQ_FAULT | MMU_IRQ_TABLEWALKFAULT)
+#define MMU_IRQ_TLB_MISS_MASK	(__MMU_IRQ_FAULT | MMU_IRQ_TLBMISS)
+
+/* MMU_CNTL */
+#define MMU_CNTL_SHIFT		1
+#define MMU_CNTL_MASK		(7 << MMU_CNTL_SHIFT)
+#define MMU_CNTL_EML_TLB	(1 << 3)
+#define MMU_CNTL_TWL_EN		(1 << 2)
+#define MMU_CNTL_MMU_EN		(1 << 1)
+
+#define get_cam_va_mask(pgsz)				\
+	(((pgsz) == MMU_CAM_PGSZ_16M) ? 0xff000000 :	\
+	 ((pgsz) == MMU_CAM_PGSZ_1M)  ? 0xfff00000 :	\
+	 ((pgsz) == MMU_CAM_PGSZ_64K) ? 0xffff0000 :	\
+	 ((pgsz) == MMU_CAM_PGSZ_4K)  ? 0xfffff000 : 0)
+
+/* IOMMU errors */
+#define OMAP_IOMMU_ERR_TLB_MISS		(1 << 0)
+#define OMAP_IOMMU_ERR_TRANS_FAULT	(1 << 1)
+#define OMAP_IOMMU_ERR_EMU_MISS		(1 << 2)
+#define OMAP_IOMMU_ERR_TBLWALK_FAULT	(1 << 3)
+#define OMAP_IOMMU_ERR_MULTIHIT_FAULT	(1 << 4)
+
+static void __iommu_set_twl(struct omap_iommu *obj, bool on)
+{
+	u32 l = iommu_read_reg(obj, MMU_CNTL);
+
+	if (on)
+		iommu_write_reg(obj, MMU_IRQ_TWL_MASK, MMU_IRQENABLE);
+	else
+		iommu_write_reg(obj, MMU_IRQ_TLB_MISS_MASK, MMU_IRQENABLE);
+
+	l &= ~MMU_CNTL_MASK;
+	if (on)
+		l |= (MMU_CNTL_MMU_EN | MMU_CNTL_TWL_EN);
+	else
+		l |= (MMU_CNTL_MMU_EN);
+
+	iommu_write_reg(obj, l, MMU_CNTL);
+}
+
+
+static int omap2_iommu_enable(struct omap_iommu *obj)
+{
+	u32 l, pa;
+
+	if (!obj->iopgd || !IS_ALIGNED((u32)obj->iopgd,  SZ_16K))
+		return -EINVAL;
+
+	pa = virt_to_phys(obj->iopgd);
+	if (!IS_ALIGNED(pa, SZ_16K))
+		return -EINVAL;
+
+	l = iommu_read_reg(obj, MMU_REVISION);
+	dev_info(obj->dev, "%s: version %d.%d\n", obj->name,
+		 (l >> 4) & 0xf, l & 0xf);
+
+	iommu_write_reg(obj, pa, MMU_TTB);
+
+	if (obj->has_bus_err_back)
+		iommu_write_reg(obj, MMU_GP_REG_BUS_ERR_BACK_EN, MMU_GP_REG);
+
+	__iommu_set_twl(obj, true);
+
+	return 0;
+}
+
+static void omap2_iommu_disable(struct omap_iommu *obj)
+{
+	u32 l = iommu_read_reg(obj, MMU_CNTL);
+
+	l &= ~MMU_CNTL_MASK;
+	iommu_write_reg(obj, l, MMU_CNTL);
+
+	dev_dbg(obj->dev, "%s is shutting down\n", obj->name);
+}
+
+static void omap2_iommu_set_twl(struct omap_iommu *obj, bool on)
+{
+	__iommu_set_twl(obj, false);
+}
+
+static u32 omap2_iommu_fault_isr(struct omap_iommu *obj, u32 *ra)
+{
+	u32 stat, da;
+	u32 errs = 0;
+
+	stat = iommu_read_reg(obj, MMU_IRQSTATUS);
+	stat &= MMU_IRQ_MASK;
+	if (!stat) {
+		*ra = 0;
+		return 0;
+	}
+
+	da = iommu_read_reg(obj, MMU_FAULT_AD);
+	*ra = da;
+
+	if (stat & MMU_IRQ_TLBMISS)
+		errs |= OMAP_IOMMU_ERR_TLB_MISS;
+	if (stat & MMU_IRQ_TRANSLATIONFAULT)
+		errs |= OMAP_IOMMU_ERR_TRANS_FAULT;
+	if (stat & MMU_IRQ_EMUMISS)
+		errs |= OMAP_IOMMU_ERR_EMU_MISS;
+	if (stat & MMU_IRQ_TABLEWALKFAULT)
+		errs |= OMAP_IOMMU_ERR_TBLWALK_FAULT;
+	if (stat & MMU_IRQ_MULTIHITFAULT)
+		errs |= OMAP_IOMMU_ERR_MULTIHIT_FAULT;
+	iommu_write_reg(obj, stat, MMU_IRQSTATUS);
+
+	return errs;
+}
+
+static void omap2_tlb_read_cr(struct omap_iommu *obj, struct cr_regs *cr)
+{
+	cr->cam = iommu_read_reg(obj, MMU_READ_CAM);
+	cr->ram = iommu_read_reg(obj, MMU_READ_RAM);
+}
+
+static void omap2_tlb_load_cr(struct omap_iommu *obj, struct cr_regs *cr)
+{
+	iommu_write_reg(obj, cr->cam | MMU_CAM_V, MMU_CAM);
+	iommu_write_reg(obj, cr->ram, MMU_RAM);
+}
+
+static u32 omap2_cr_to_virt(struct cr_regs *cr)
+{
+	u32 page_size = cr->cam & MMU_CAM_PGSZ_MASK;
+	u32 mask = get_cam_va_mask(cr->cam & page_size);
+
+	return cr->cam & mask;
+}
+
+static struct cr_regs *omap2_alloc_cr(struct omap_iommu *obj,
+						struct iotlb_entry *e)
+{
+	struct cr_regs *cr;
+
+	if (e->da & ~(get_cam_va_mask(e->pgsz))) {
+		dev_err(obj->dev, "%s:\twrong alignment: %08x\n", __func__,
+			e->da);
+		return ERR_PTR(-EINVAL);
+	}
+
+	cr = kmalloc(sizeof(*cr), GFP_KERNEL);
+	if (!cr)
+		return ERR_PTR(-ENOMEM);
+
+	cr->cam = (e->da & MMU_CAM_VATAG_MASK) | e->prsvd | e->pgsz | e->valid;
+	cr->ram = e->pa | e->endian | e->elsz | e->mixed;
+
+	return cr;
+}
+
+static inline int omap2_cr_valid(struct cr_regs *cr)
+{
+	return cr->cam & MMU_CAM_V;
+}
+
+static u32 omap2_get_pte_attr(struct iotlb_entry *e)
+{
+	u32 attr;
+
+	attr = e->mixed << 5;
+	attr |= e->endian;
+	attr |= e->elsz >> 3;
+	attr <<= (((e->pgsz == MMU_CAM_PGSZ_4K) ||
+			(e->pgsz == MMU_CAM_PGSZ_64K)) ? 0 : 6);
+	return attr;
+}
+
+static ssize_t
+omap2_dump_cr(struct omap_iommu *obj, struct cr_regs *cr, char *buf)
+{
+	char *p = buf;
+
+	/* FIXME: Need more detail analysis of cam/ram */
+	p += sprintf(p, "%08x %08x %01x\n", cr->cam, cr->ram,
+					(cr->cam & MMU_CAM_P) ? 1 : 0);
+
+	return p - buf;
+}
+
+#define pr_reg(name)							\
+	do {								\
+		ssize_t bytes;						\
+		const char *str = "%20s: %08x\n";			\
+		const int maxcol = 32;					\
+		bytes = snprintf(p, maxcol, str, __stringify(name),	\
+				 iommu_read_reg(obj, MMU_##name));	\
+		p += bytes;						\
+		len -= bytes;						\
+		if (len < maxcol)					\
+			goto out;					\
+	} while (0)
+
+static ssize_t
+omap2_iommu_dump_ctx(struct omap_iommu *obj, char *buf, ssize_t len)
+{
+	char *p = buf;
+
+	pr_reg(REVISION);
+	pr_reg(IRQSTATUS);
+	pr_reg(IRQENABLE);
+	pr_reg(WALKING_ST);
+	pr_reg(CNTL);
+	pr_reg(FAULT_AD);
+	pr_reg(TTB);
+	pr_reg(LOCK);
+	pr_reg(LD_TLB);
+	pr_reg(CAM);
+	pr_reg(RAM);
+	pr_reg(GFLUSH);
+	pr_reg(FLUSH_ENTRY);
+	pr_reg(READ_CAM);
+	pr_reg(READ_RAM);
+	pr_reg(EMU_FAULT_AD);
+out:
+	return p - buf;
+}
+
+static void omap2_iommu_save_ctx(struct omap_iommu *obj)
+{
+	int i;
+	u32 *p = obj->ctx;
+
+	for (i = 0; i < (MMU_REG_SIZE / sizeof(u32)); i++) {
+		p[i] = iommu_read_reg(obj, i * sizeof(u32));
+		dev_dbg(obj->dev, "%s\t[%02d] %08x\n", __func__, i, p[i]);
+	}
+
+	BUG_ON(p[0] != IOMMU_ARCH_VERSION);
+}
+
+static void omap2_iommu_restore_ctx(struct omap_iommu *obj)
+{
+	int i;
+	u32 *p = obj->ctx;
+
+	for (i = 0; i < (MMU_REG_SIZE / sizeof(u32)); i++) {
+		iommu_write_reg(obj, p[i], i * sizeof(u32));
+		dev_dbg(obj->dev, "%s\t[%02d] %08x\n", __func__, i, p[i]);
+	}
+
+	BUG_ON(p[0] != IOMMU_ARCH_VERSION);
+}
+
+static void omap2_cr_to_e(struct cr_regs *cr, struct iotlb_entry *e)
+{
+	e->da		= cr->cam & MMU_CAM_VATAG_MASK;
+	e->pa		= cr->ram & MMU_RAM_PADDR_MASK;
+	e->valid	= cr->cam & MMU_CAM_V;
+	e->pgsz		= cr->cam & MMU_CAM_PGSZ_MASK;
+	e->endian	= cr->ram & MMU_RAM_ENDIAN_MASK;
+	e->elsz		= cr->ram & MMU_RAM_ELSZ_MASK;
+	e->mixed	= cr->ram & MMU_RAM_MIXED;
+}
+
+static const struct iommu_functions omap2_iommu_ops = {
+	.version	= IOMMU_ARCH_VERSION,
+
+	.enable		= omap2_iommu_enable,
+	.disable	= omap2_iommu_disable,
+	.set_twl	= omap2_iommu_set_twl,
+	.fault_isr	= omap2_iommu_fault_isr,
+
+	.tlb_read_cr	= omap2_tlb_read_cr,
+	.tlb_load_cr	= omap2_tlb_load_cr,
+
+	.cr_to_e	= omap2_cr_to_e,
+	.cr_to_virt	= omap2_cr_to_virt,
+	.alloc_cr	= omap2_alloc_cr,
+	.cr_valid	= omap2_cr_valid,
+	.dump_cr	= omap2_dump_cr,
+
+	.get_pte_attr	= omap2_get_pte_attr,
+
+	.save_ctx	= omap2_iommu_save_ctx,
+	.restore_ctx	= omap2_iommu_restore_ctx,
+	.dump_ctx	= omap2_iommu_dump_ctx,
+};
+
+static int __init omap2_iommu_init(void)
+{
+	return omap_install_iommu_arch(&omap2_iommu_ops);
+}
+module_init(omap2_iommu_init);
+
+static void __exit omap2_iommu_exit(void)
+{
+	omap_uninstall_iommu_arch(&omap2_iommu_ops);
+}
+module_exit(omap2_iommu_exit);
+
+MODULE_AUTHOR("Hiroshi DOYU, Paul Mundt and Toshihiro Kobayashi");
+MODULE_DESCRIPTION("omap iommu: omap2/3 architecture specific functions");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/iommu/omap-iopgtable.h b/drivers/iommu/omap-iopgtable.h
new file mode 100644
index 00000000000..f891683e3f0
--- /dev/null
+++ b/drivers/iommu/omap-iopgtable.h
@@ -0,0 +1,95 @@
+/*
+ * omap iommu: pagetable definitions
+ *
+ * Copyright (C) 2008-2010 Nokia Corporation
+ *
+ * Written by Hiroshi DOYU <Hiroshi.DOYU@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * "L2 table" address mask and size definitions.
+ */
+#define IOPGD_SHIFT		20
+#define IOPGD_SIZE		(1UL << IOPGD_SHIFT)
+#define IOPGD_MASK		(~(IOPGD_SIZE - 1))
+
+/*
+ * "section" address mask and size definitions.
+ */
+#define IOSECTION_SHIFT		20
+#define IOSECTION_SIZE		(1UL << IOSECTION_SHIFT)
+#define IOSECTION_MASK		(~(IOSECTION_SIZE - 1))
+
+/*
+ * "supersection" address mask and size definitions.
+ */
+#define IOSUPER_SHIFT		24
+#define IOSUPER_SIZE		(1UL << IOSUPER_SHIFT)
+#define IOSUPER_MASK		(~(IOSUPER_SIZE - 1))
+
+#define PTRS_PER_IOPGD		(1UL << (32 - IOPGD_SHIFT))
+#define IOPGD_TABLE_SIZE	(PTRS_PER_IOPGD * sizeof(u32))
+
+/*
+ * "small page" address mask and size definitions.
+ */
+#define IOPTE_SHIFT		12
+#define IOPTE_SIZE		(1UL << IOPTE_SHIFT)
+#define IOPTE_MASK		(~(IOPTE_SIZE - 1))
+
+/*
+ * "large page" address mask and size definitions.
+ */
+#define IOLARGE_SHIFT		16
+#define IOLARGE_SIZE		(1UL << IOLARGE_SHIFT)
+#define IOLARGE_MASK		(~(IOLARGE_SIZE - 1))
+
+#define PTRS_PER_IOPTE		(1UL << (IOPGD_SHIFT - IOPTE_SHIFT))
+#define IOPTE_TABLE_SIZE	(PTRS_PER_IOPTE * sizeof(u32))
+
+#define IOPAGE_MASK		IOPTE_MASK
+
+/**
+ * omap_iommu_translate() - va to pa translation
+ * @d:		omap iommu descriptor
+ * @va:		virtual address
+ * @mask:	omap iommu descriptor mask
+ *
+ * va to pa translation
+ */
+static inline phys_addr_t omap_iommu_translate(u32 d, u32 va, u32 mask)
+{
+	return (d & mask) | (va & (~mask));
+}
+
+/*
+ * some descriptor attributes.
+ */
+#define IOPGD_TABLE		(1 << 0)
+#define IOPGD_SECTION		(2 << 0)
+#define IOPGD_SUPER		(1 << 18 | 2 << 0)
+
+#define iopgd_is_table(x)	(((x) & 3) == IOPGD_TABLE)
+#define iopgd_is_section(x)	(((x) & (1 << 18 | 3)) == IOPGD_SECTION)
+#define iopgd_is_super(x)	(((x) & (1 << 18 | 3)) == IOPGD_SUPER)
+
+#define IOPTE_SMALL		(2 << 0)
+#define IOPTE_LARGE		(1 << 0)
+
+#define iopte_is_small(x)	(((x) & 2) == IOPTE_SMALL)
+#define iopte_is_large(x)	(((x) & 3) == IOPTE_LARGE)
+
+/* to find an entry in a page-table-directory */
+#define iopgd_index(da)		(((da) >> IOPGD_SHIFT) & (PTRS_PER_IOPGD - 1))
+#define iopgd_offset(obj, da)	((obj)->iopgd + iopgd_index(da))
+
+#define iopgd_page_paddr(iopgd)	(*iopgd & ~((1 << 10) - 1))
+#define iopgd_page_vaddr(iopgd)	((u32 *)phys_to_virt(iopgd_page_paddr(iopgd)))
+
+/* to find an entry in the second-level page table. */
+#define iopte_index(da)		(((da) >> IOPTE_SHIFT) & (PTRS_PER_IOPTE - 1))
+#define iopte_offset(iopgd, da)	(iopgd_page_vaddr(iopgd) + iopte_index(da))
diff --git a/drivers/iommu/omap-iovmm.c b/drivers/iommu/omap-iovmm.c
index 2e10c3e0a7a..d1472598415 100644
--- a/drivers/iommu/omap-iovmm.c
+++ b/drivers/iommu/omap-iovmm.c
@@ -17,14 +17,58 @@
 #include <linux/device.h>
 #include <linux/scatterlist.h>
 #include <linux/iommu.h>
+#include <linux/omap-iommu.h>
+#include <linux/platform_data/iommu-omap.h>
 
 #include <asm/cacheflush.h>
 #include <asm/mach/map.h>
 
-#include <plat/iommu.h>
-#include <plat/iovmm.h>
+#include "omap-iopgtable.h"
+#include "omap-iommu.h"
 
-#include <plat/iopgtable.h>
+/*
+ * IOVMF_FLAGS: attribute for iommu virtual memory area(iovma)
+ *
+ * lower 16 bit is used for h/w and upper 16 bit is for s/w.
+ */
+#define IOVMF_SW_SHIFT		16
+
+/*
+ * iovma: h/w flags derived from cam and ram attribute
+ */
+#define IOVMF_CAM_MASK		(~((1 << 10) - 1))
+#define IOVMF_RAM_MASK		(~IOVMF_CAM_MASK)
+
+#define IOVMF_PGSZ_MASK		(3 << 0)
+#define IOVMF_PGSZ_1M		MMU_CAM_PGSZ_1M
+#define IOVMF_PGSZ_64K		MMU_CAM_PGSZ_64K
+#define IOVMF_PGSZ_4K		MMU_CAM_PGSZ_4K
+#define IOVMF_PGSZ_16M		MMU_CAM_PGSZ_16M
+
+#define IOVMF_ENDIAN_MASK	(1 << 9)
+#define IOVMF_ENDIAN_BIG	MMU_RAM_ENDIAN_BIG
+
+#define IOVMF_ELSZ_MASK		(3 << 7)
+#define IOVMF_ELSZ_16		MMU_RAM_ELSZ_16
+#define IOVMF_ELSZ_32		MMU_RAM_ELSZ_32
+#define IOVMF_ELSZ_NONE		MMU_RAM_ELSZ_NONE
+
+#define IOVMF_MIXED_MASK	(1 << 6)
+#define IOVMF_MIXED		MMU_RAM_MIXED
+
+/*
+ * iovma: s/w flags, used for mapping and umapping internally.
+ */
+#define IOVMF_MMIO		(1 << IOVMF_SW_SHIFT)
+#define IOVMF_ALLOC		(2 << IOVMF_SW_SHIFT)
+#define IOVMF_ALLOC_MASK	(3 << IOVMF_SW_SHIFT)
+
+/* "superpages" is supported just with physically linear pages */
+#define IOVMF_DISCONT		(1 << (2 + IOVMF_SW_SHIFT))
+#define IOVMF_LINEAR		(2 << (2 + IOVMF_SW_SHIFT))
+#define IOVMF_LINEAR_MASK	(3 << (2 + IOVMF_SW_SHIFT))
+
+#define IOVMF_DA_FIXED		(1 << (4 + IOVMF_SW_SHIFT))
 
 static struct kmem_cache *iovm_area_cachep;
 
@@ -58,8 +102,8 @@ static size_t sgtable_len(const struct sg_table *sgt)
 		}
 
 		if (i && sg->offset) {
-			pr_err("%s: sg[%d] offset not allowed in internal "
-					"entries\n", __func__, i);
+			pr_err("%s: sg[%d] offset not allowed in internal entries\n",
+				__func__, i);
 			return 0;
 		}
 
diff --git a/drivers/iommu/pci.h b/drivers/iommu/pci.h
new file mode 100644
index 00000000000..352d80ae744
--- /dev/null
+++ b/drivers/iommu/pci.h
@@ -0,0 +1,29 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright (C) 2013 Red Hat, Inc.
+ * Copyright (C) 2013 Freescale Semiconductor, Inc.
+ *
+ */
+#ifndef __IOMMU_PCI_H
+#define __IOMMU_PCI_H
+
+/* Helper function for swapping pci device reference */
+static inline void swap_pci_ref(struct pci_dev **from, struct pci_dev *to)
+{
+	pci_dev_put(*from);
+	*from = to;
+}
+
+#endif  /* __IOMMU_PCI_H */
diff --git a/drivers/iommu/shmobile-iommu.c b/drivers/iommu/shmobile-iommu.c
new file mode 100644
index 00000000000..464acda0bbc
--- /dev/null
+++ b/drivers/iommu/shmobile-iommu.c
@@ -0,0 +1,394 @@
+/*
+ * IOMMU for IPMMU/IPMMUI
+ * Copyright (C) 2012  Hideki EIRAKU
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/io.h>
+#include <linux/iommu.h>
+#include <linux/platform_device.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+#include <asm/dma-iommu.h>
+#include "shmobile-ipmmu.h"
+
+#define L1_SIZE CONFIG_SHMOBILE_IOMMU_L1SIZE
+#define L1_LEN (L1_SIZE / 4)
+#define L1_ALIGN L1_SIZE
+#define L2_SIZE SZ_1K
+#define L2_LEN (L2_SIZE / 4)
+#define L2_ALIGN L2_SIZE
+
+struct shmobile_iommu_domain_pgtable {
+	uint32_t *pgtable;
+	dma_addr_t handle;
+};
+
+struct shmobile_iommu_archdata {
+	struct list_head attached_list;
+	struct dma_iommu_mapping *iommu_mapping;
+	spinlock_t attach_lock;
+	struct shmobile_iommu_domain *attached;
+	int num_attached_devices;
+	struct shmobile_ipmmu *ipmmu;
+};
+
+struct shmobile_iommu_domain {
+	struct shmobile_iommu_domain_pgtable l1, l2[L1_LEN];
+	spinlock_t map_lock;
+	spinlock_t attached_list_lock;
+	struct list_head attached_list;
+};
+
+static struct shmobile_iommu_archdata *ipmmu_archdata;
+static struct kmem_cache *l1cache, *l2cache;
+
+static int pgtable_alloc(struct shmobile_iommu_domain_pgtable *pgtable,
+			 struct kmem_cache *cache, size_t size)
+{
+	pgtable->pgtable = kmem_cache_zalloc(cache, GFP_ATOMIC);
+	if (!pgtable->pgtable)
+		return -ENOMEM;
+	pgtable->handle = dma_map_single(NULL, pgtable->pgtable, size,
+					 DMA_TO_DEVICE);
+	return 0;
+}
+
+static void pgtable_free(struct shmobile_iommu_domain_pgtable *pgtable,
+			 struct kmem_cache *cache, size_t size)
+{
+	dma_unmap_single(NULL, pgtable->handle, size, DMA_TO_DEVICE);
+	kmem_cache_free(cache, pgtable->pgtable);
+}
+
+static uint32_t pgtable_read(struct shmobile_iommu_domain_pgtable *pgtable,
+			     unsigned int index)
+{
+	return pgtable->pgtable[index];
+}
+
+static void pgtable_write(struct shmobile_iommu_domain_pgtable *pgtable,
+			  unsigned int index, unsigned int count, uint32_t val)
+{
+	unsigned int i;
+
+	for (i = 0; i < count; i++)
+		pgtable->pgtable[index + i] = val;
+	dma_sync_single_for_device(NULL, pgtable->handle + index * sizeof(val),
+				   sizeof(val) * count, DMA_TO_DEVICE);
+}
+
+static int shmobile_iommu_domain_init(struct iommu_domain *domain)
+{
+	struct shmobile_iommu_domain *sh_domain;
+	int i, ret;
+
+	sh_domain = kmalloc(sizeof(*sh_domain), GFP_KERNEL);
+	if (!sh_domain)
+		return -ENOMEM;
+	ret = pgtable_alloc(&sh_domain->l1, l1cache, L1_SIZE);
+	if (ret < 0) {
+		kfree(sh_domain);
+		return ret;
+	}
+	for (i = 0; i < L1_LEN; i++)
+		sh_domain->l2[i].pgtable = NULL;
+	spin_lock_init(&sh_domain->map_lock);
+	spin_lock_init(&sh_domain->attached_list_lock);
+	INIT_LIST_HEAD(&sh_domain->attached_list);
+	domain->priv = sh_domain;
+	return 0;
+}
+
+static void shmobile_iommu_domain_destroy(struct iommu_domain *domain)
+{
+	struct shmobile_iommu_domain *sh_domain = domain->priv;
+	int i;
+
+	for (i = 0; i < L1_LEN; i++) {
+		if (sh_domain->l2[i].pgtable)
+			pgtable_free(&sh_domain->l2[i], l2cache, L2_SIZE);
+	}
+	pgtable_free(&sh_domain->l1, l1cache, L1_SIZE);
+	kfree(sh_domain);
+	domain->priv = NULL;
+}
+
+static int shmobile_iommu_attach_device(struct iommu_domain *domain,
+					struct device *dev)
+{
+	struct shmobile_iommu_archdata *archdata = dev->archdata.iommu;
+	struct shmobile_iommu_domain *sh_domain = domain->priv;
+	int ret = -EBUSY;
+
+	if (!archdata)
+		return -ENODEV;
+	spin_lock(&sh_domain->attached_list_lock);
+	spin_lock(&archdata->attach_lock);
+	if (archdata->attached != sh_domain) {
+		if (archdata->attached)
+			goto err;
+		ipmmu_tlb_set(archdata->ipmmu, sh_domain->l1.handle, L1_SIZE,
+			      0);
+		ipmmu_tlb_flush(archdata->ipmmu);
+		archdata->attached = sh_domain;
+		archdata->num_attached_devices = 0;
+		list_add(&archdata->attached_list, &sh_domain->attached_list);
+	}
+	archdata->num_attached_devices++;
+	ret = 0;
+err:
+	spin_unlock(&archdata->attach_lock);
+	spin_unlock(&sh_domain->attached_list_lock);
+	return ret;
+}
+
+static void shmobile_iommu_detach_device(struct iommu_domain *domain,
+					 struct device *dev)
+{
+	struct shmobile_iommu_archdata *archdata = dev->archdata.iommu;
+	struct shmobile_iommu_domain *sh_domain = domain->priv;
+
+	if (!archdata)
+		return;
+	spin_lock(&sh_domain->attached_list_lock);
+	spin_lock(&archdata->attach_lock);
+	archdata->num_attached_devices--;
+	if (!archdata->num_attached_devices) {
+		ipmmu_tlb_set(archdata->ipmmu, 0, 0, 0);
+		ipmmu_tlb_flush(archdata->ipmmu);
+		archdata->attached = NULL;
+		list_del(&archdata->attached_list);
+	}
+	spin_unlock(&archdata->attach_lock);
+	spin_unlock(&sh_domain->attached_list_lock);
+}
+
+static void domain_tlb_flush(struct shmobile_iommu_domain *sh_domain)
+{
+	struct shmobile_iommu_archdata *archdata;
+
+	spin_lock(&sh_domain->attached_list_lock);
+	list_for_each_entry(archdata, &sh_domain->attached_list, attached_list)
+		ipmmu_tlb_flush(archdata->ipmmu);
+	spin_unlock(&sh_domain->attached_list_lock);
+}
+
+static int l2alloc(struct shmobile_iommu_domain *sh_domain,
+		   unsigned int l1index)
+{
+	int ret;
+
+	if (!sh_domain->l2[l1index].pgtable) {
+		ret = pgtable_alloc(&sh_domain->l2[l1index], l2cache, L2_SIZE);
+		if (ret < 0)
+			return ret;
+	}
+	pgtable_write(&sh_domain->l1, l1index, 1,
+		      sh_domain->l2[l1index].handle | 0x1);
+	return 0;
+}
+
+static void l2realfree(struct shmobile_iommu_domain_pgtable *l2)
+{
+	if (l2->pgtable)
+		pgtable_free(l2, l2cache, L2_SIZE);
+}
+
+static void l2free(struct shmobile_iommu_domain *sh_domain,
+		   unsigned int l1index,
+		   struct shmobile_iommu_domain_pgtable *l2)
+{
+	pgtable_write(&sh_domain->l1, l1index, 1, 0);
+	if (sh_domain->l2[l1index].pgtable) {
+		*l2 = sh_domain->l2[l1index];
+		sh_domain->l2[l1index].pgtable = NULL;
+	}
+}
+
+static int shmobile_iommu_map(struct iommu_domain *domain, unsigned long iova,
+			      phys_addr_t paddr, size_t size, int prot)
+{
+	struct shmobile_iommu_domain_pgtable l2 = { .pgtable = NULL };
+	struct shmobile_iommu_domain *sh_domain = domain->priv;
+	unsigned int l1index, l2index;
+	int ret;
+
+	l1index = iova >> 20;
+	switch (size) {
+	case SZ_4K:
+		l2index = (iova >> 12) & 0xff;
+		spin_lock(&sh_domain->map_lock);
+		ret = l2alloc(sh_domain, l1index);
+		if (!ret)
+			pgtable_write(&sh_domain->l2[l1index], l2index, 1,
+				      paddr | 0xff2);
+		spin_unlock(&sh_domain->map_lock);
+		break;
+	case SZ_64K:
+		l2index = (iova >> 12) & 0xf0;
+		spin_lock(&sh_domain->map_lock);
+		ret = l2alloc(sh_domain, l1index);
+		if (!ret)
+			pgtable_write(&sh_domain->l2[l1index], l2index, 0x10,
+				      paddr | 0xff1);
+		spin_unlock(&sh_domain->map_lock);
+		break;
+	case SZ_1M:
+		spin_lock(&sh_domain->map_lock);
+		l2free(sh_domain, l1index, &l2);
+		pgtable_write(&sh_domain->l1, l1index, 1, paddr | 0xc02);
+		spin_unlock(&sh_domain->map_lock);
+		ret = 0;
+		break;
+	default:
+		ret = -EINVAL;
+	}
+	if (!ret)
+		domain_tlb_flush(sh_domain);
+	l2realfree(&l2);
+	return ret;
+}
+
+static size_t shmobile_iommu_unmap(struct iommu_domain *domain,
+				   unsigned long iova, size_t size)
+{
+	struct shmobile_iommu_domain_pgtable l2 = { .pgtable = NULL };
+	struct shmobile_iommu_domain *sh_domain = domain->priv;
+	unsigned int l1index, l2index;
+	uint32_t l2entry = 0;
+	size_t ret = 0;
+
+	l1index = iova >> 20;
+	if (!(iova & 0xfffff) && size >= SZ_1M) {
+		spin_lock(&sh_domain->map_lock);
+		l2free(sh_domain, l1index, &l2);
+		spin_unlock(&sh_domain->map_lock);
+		ret = SZ_1M;
+		goto done;
+	}
+	l2index = (iova >> 12) & 0xff;
+	spin_lock(&sh_domain->map_lock);
+	if (sh_domain->l2[l1index].pgtable)
+		l2entry = pgtable_read(&sh_domain->l2[l1index], l2index);
+	switch (l2entry & 3) {
+	case 1:
+		if (l2index & 0xf)
+			break;
+		pgtable_write(&sh_domain->l2[l1index], l2index, 0x10, 0);
+		ret = SZ_64K;
+		break;
+	case 2:
+		pgtable_write(&sh_domain->l2[l1index], l2index, 1, 0);
+		ret = SZ_4K;
+		break;
+	}
+	spin_unlock(&sh_domain->map_lock);
+done:
+	if (ret)
+		domain_tlb_flush(sh_domain);
+	l2realfree(&l2);
+	return ret;
+}
+
+static phys_addr_t shmobile_iommu_iova_to_phys(struct iommu_domain *domain,
+					       dma_addr_t iova)
+{
+	struct shmobile_iommu_domain *sh_domain = domain->priv;
+	uint32_t l1entry = 0, l2entry = 0;
+	unsigned int l1index, l2index;
+
+	l1index = iova >> 20;
+	l2index = (iova >> 12) & 0xff;
+	spin_lock(&sh_domain->map_lock);
+	if (sh_domain->l2[l1index].pgtable)
+		l2entry = pgtable_read(&sh_domain->l2[l1index], l2index);
+	else
+		l1entry = pgtable_read(&sh_domain->l1, l1index);
+	spin_unlock(&sh_domain->map_lock);
+	switch (l2entry & 3) {
+	case 1:
+		return (l2entry & ~0xffff) | (iova & 0xffff);
+	case 2:
+		return (l2entry & ~0xfff) | (iova & 0xfff);
+	default:
+		if ((l1entry & 3) == 2)
+			return (l1entry & ~0xfffff) | (iova & 0xfffff);
+		return 0;
+	}
+}
+
+static int find_dev_name(struct shmobile_ipmmu *ipmmu, const char *dev_name)
+{
+	unsigned int i, n = ipmmu->num_dev_names;
+
+	for (i = 0; i < n; i++) {
+		if (strcmp(ipmmu->dev_names[i], dev_name) == 0)
+			return 1;
+	}
+	return 0;
+}
+
+static int shmobile_iommu_add_device(struct device *dev)
+{
+	struct shmobile_iommu_archdata *archdata = ipmmu_archdata;
+	struct dma_iommu_mapping *mapping;
+
+	if (!find_dev_name(archdata->ipmmu, dev_name(dev)))
+		return 0;
+	mapping = archdata->iommu_mapping;
+	if (!mapping) {
+		mapping = arm_iommu_create_mapping(&platform_bus_type, 0,
+						   L1_LEN << 20);
+		if (IS_ERR(mapping))
+			return PTR_ERR(mapping);
+		archdata->iommu_mapping = mapping;
+	}
+	dev->archdata.iommu = archdata;
+	if (arm_iommu_attach_device(dev, mapping))
+		pr_err("arm_iommu_attach_device failed\n");
+	return 0;
+}
+
+static struct iommu_ops shmobile_iommu_ops = {
+	.domain_init = shmobile_iommu_domain_init,
+	.domain_destroy = shmobile_iommu_domain_destroy,
+	.attach_dev = shmobile_iommu_attach_device,
+	.detach_dev = shmobile_iommu_detach_device,
+	.map = shmobile_iommu_map,
+	.unmap = shmobile_iommu_unmap,
+	.iova_to_phys = shmobile_iommu_iova_to_phys,
+	.add_device = shmobile_iommu_add_device,
+	.pgsize_bitmap = SZ_1M | SZ_64K | SZ_4K,
+};
+
+int ipmmu_iommu_init(struct shmobile_ipmmu *ipmmu)
+{
+	static struct shmobile_iommu_archdata *archdata;
+
+	l1cache = kmem_cache_create("shmobile-iommu-pgtable1", L1_SIZE,
+				    L1_ALIGN, SLAB_HWCACHE_ALIGN, NULL);
+	if (!l1cache)
+		return -ENOMEM;
+	l2cache = kmem_cache_create("shmobile-iommu-pgtable2", L2_SIZE,
+				    L2_ALIGN, SLAB_HWCACHE_ALIGN, NULL);
+	if (!l2cache) {
+		kmem_cache_destroy(l1cache);
+		return -ENOMEM;
+	}
+	archdata = kzalloc(sizeof(*archdata), GFP_KERNEL);
+	if (!archdata) {
+		kmem_cache_destroy(l1cache);
+		kmem_cache_destroy(l2cache);
+		return -ENOMEM;
+	}
+	spin_lock_init(&archdata->attach_lock);
+	archdata->ipmmu = ipmmu;
+	ipmmu_archdata = archdata;
+	bus_set_iommu(&platform_bus_type, &shmobile_iommu_ops);
+	return 0;
+}
diff --git a/drivers/iommu/shmobile-ipmmu.c b/drivers/iommu/shmobile-ipmmu.c
new file mode 100644
index 00000000000..bd97adecb1f
--- /dev/null
+++ b/drivers/iommu/shmobile-ipmmu.c
@@ -0,0 +1,130 @@
+/*
+ * IPMMU/IPMMUI
+ * Copyright (C) 2012  Hideki EIRAKU
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ */
+
+#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/io.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/platform_data/sh_ipmmu.h>
+#include "shmobile-ipmmu.h"
+
+#define IMCTR1 0x000
+#define IMCTR2 0x004
+#define IMASID 0x010
+#define IMTTBR 0x014
+#define IMTTBCR 0x018
+
+#define IMCTR1_TLBEN (1 << 0)
+#define IMCTR1_FLUSH (1 << 1)
+
+static void ipmmu_reg_write(struct shmobile_ipmmu *ipmmu, unsigned long reg_off,
+			    unsigned long data)
+{
+	iowrite32(data, ipmmu->ipmmu_base + reg_off);
+}
+
+void ipmmu_tlb_flush(struct shmobile_ipmmu *ipmmu)
+{
+	if (!ipmmu)
+		return;
+
+	spin_lock(&ipmmu->flush_lock);
+	if (ipmmu->tlb_enabled)
+		ipmmu_reg_write(ipmmu, IMCTR1, IMCTR1_FLUSH | IMCTR1_TLBEN);
+	else
+		ipmmu_reg_write(ipmmu, IMCTR1, IMCTR1_FLUSH);
+	spin_unlock(&ipmmu->flush_lock);
+}
+
+void ipmmu_tlb_set(struct shmobile_ipmmu *ipmmu, unsigned long phys, int size,
+		   int asid)
+{
+	if (!ipmmu)
+		return;
+
+	spin_lock(&ipmmu->flush_lock);
+	switch (size) {
+	default:
+		ipmmu->tlb_enabled = 0;
+		break;
+	case 0x2000:
+		ipmmu_reg_write(ipmmu, IMTTBCR, 1);
+		ipmmu->tlb_enabled = 1;
+		break;
+	case 0x1000:
+		ipmmu_reg_write(ipmmu, IMTTBCR, 2);
+		ipmmu->tlb_enabled = 1;
+		break;
+	case 0x800:
+		ipmmu_reg_write(ipmmu, IMTTBCR, 3);
+		ipmmu->tlb_enabled = 1;
+		break;
+	case 0x400:
+		ipmmu_reg_write(ipmmu, IMTTBCR, 4);
+		ipmmu->tlb_enabled = 1;
+		break;
+	case 0x200:
+		ipmmu_reg_write(ipmmu, IMTTBCR, 5);
+		ipmmu->tlb_enabled = 1;
+		break;
+	case 0x100:
+		ipmmu_reg_write(ipmmu, IMTTBCR, 6);
+		ipmmu->tlb_enabled = 1;
+		break;
+	case 0x80:
+		ipmmu_reg_write(ipmmu, IMTTBCR, 7);
+		ipmmu->tlb_enabled = 1;
+		break;
+	}
+	ipmmu_reg_write(ipmmu, IMTTBR, phys);
+	ipmmu_reg_write(ipmmu, IMASID, asid);
+	spin_unlock(&ipmmu->flush_lock);
+}
+
+static int ipmmu_probe(struct platform_device *pdev)
+{
+	struct shmobile_ipmmu *ipmmu;
+	struct resource *res;
+	struct shmobile_ipmmu_platform_data *pdata = pdev->dev.platform_data;
+
+	ipmmu = devm_kzalloc(&pdev->dev, sizeof(*ipmmu), GFP_KERNEL);
+	if (!ipmmu) {
+		dev_err(&pdev->dev, "cannot allocate device data\n");
+		return -ENOMEM;
+	}
+	spin_lock_init(&ipmmu->flush_lock);
+	ipmmu->dev = &pdev->dev;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	ipmmu->ipmmu_base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(ipmmu->ipmmu_base))
+		return PTR_ERR(ipmmu->ipmmu_base);
+
+	ipmmu->dev_names = pdata->dev_names;
+	ipmmu->num_dev_names = pdata->num_dev_names;
+	platform_set_drvdata(pdev, ipmmu);
+	ipmmu_reg_write(ipmmu, IMCTR1, 0x0); /* disable TLB */
+	ipmmu_reg_write(ipmmu, IMCTR2, 0x0); /* disable PMB */
+	return ipmmu_iommu_init(ipmmu);
+}
+
+static struct platform_driver ipmmu_driver = {
+	.probe = ipmmu_probe,
+	.driver = {
+		.owner = THIS_MODULE,
+		.name = "ipmmu",
+	},
+};
+
+static int __init ipmmu_init(void)
+{
+	return platform_driver_register(&ipmmu_driver);
+}
+subsys_initcall(ipmmu_init);
diff --git a/drivers/iommu/shmobile-ipmmu.h b/drivers/iommu/shmobile-ipmmu.h
new file mode 100644
index 00000000000..9524743ca1f
--- /dev/null
+++ b/drivers/iommu/shmobile-ipmmu.h
@@ -0,0 +1,34 @@
+/* shmobile-ipmmu.h
+ *
+ * Copyright (C) 2012  Hideki EIRAKU
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ */
+
+#ifndef __SHMOBILE_IPMMU_H__
+#define __SHMOBILE_IPMMU_H__
+
+struct shmobile_ipmmu {
+	struct device *dev;
+	void __iomem *ipmmu_base;
+	int tlb_enabled;
+	spinlock_t flush_lock;
+	const char * const *dev_names;
+	unsigned int num_dev_names;
+};
+
+#ifdef CONFIG_SHMOBILE_IPMMU_TLB
+void ipmmu_tlb_flush(struct shmobile_ipmmu *ipmmu);
+void ipmmu_tlb_set(struct shmobile_ipmmu *ipmmu, unsigned long phys, int size,
+		   int asid);
+int ipmmu_iommu_init(struct shmobile_ipmmu *ipmmu);
+#else
+static inline int ipmmu_iommu_init(struct shmobile_ipmmu *ipmmu)
+{
+	return -EINVAL;
+}
+#endif
+
+#endif /* __SHMOBILE_IPMMU_H__ */
diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c
new file mode 100644
index 00000000000..dba1a9fd507
--- /dev/null
+++ b/drivers/iommu/tegra-gart.c
@@ -0,0 +1,452 @@
+/*
+ * IOMMU API for GART in Tegra20
+ *
+ * Copyright (c) 2010-2012, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#define pr_fmt(fmt)	"%s(): " fmt, __func__
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <linux/list.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/iommu.h>
+#include <linux/of.h>
+
+#include <asm/cacheflush.h>
+
+/* bitmap of the page sizes currently supported */
+#define GART_IOMMU_PGSIZES	(SZ_4K)
+
+#define GART_REG_BASE		0x24
+#define GART_CONFIG		(0x24 - GART_REG_BASE)
+#define GART_ENTRY_ADDR		(0x28 - GART_REG_BASE)
+#define GART_ENTRY_DATA		(0x2c - GART_REG_BASE)
+#define GART_ENTRY_PHYS_ADDR_VALID	(1 << 31)
+
+#define GART_PAGE_SHIFT		12
+#define GART_PAGE_SIZE		(1 << GART_PAGE_SHIFT)
+#define GART_PAGE_MASK						\
+	(~(GART_PAGE_SIZE - 1) & ~GART_ENTRY_PHYS_ADDR_VALID)
+
+struct gart_client {
+	struct device		*dev;
+	struct list_head	list;
+};
+
+struct gart_device {
+	void __iomem		*regs;
+	u32			*savedata;
+	u32			page_count;	/* total remappable size */
+	dma_addr_t		iovmm_base;	/* offset to vmm_area */
+	spinlock_t		pte_lock;	/* for pagetable */
+	struct list_head	client;
+	spinlock_t		client_lock;	/* for client list */
+	struct device		*dev;
+};
+
+static struct gart_device *gart_handle; /* unique for a system */
+
+#define GART_PTE(_pfn)						\
+	(GART_ENTRY_PHYS_ADDR_VALID | ((_pfn) << PAGE_SHIFT))
+
+/*
+ * Any interaction between any block on PPSB and a block on APB or AHB
+ * must have these read-back to ensure the APB/AHB bus transaction is
+ * complete before initiating activity on the PPSB block.
+ */
+#define FLUSH_GART_REGS(gart)	((void)readl((gart)->regs + GART_CONFIG))
+
+#define for_each_gart_pte(gart, iova)					\
+	for (iova = gart->iovmm_base;					\
+	     iova < gart->iovmm_base + GART_PAGE_SIZE * gart->page_count; \
+	     iova += GART_PAGE_SIZE)
+
+static inline void gart_set_pte(struct gart_device *gart,
+				unsigned long offs, u32 pte)
+{
+	writel(offs, gart->regs + GART_ENTRY_ADDR);
+	writel(pte, gart->regs + GART_ENTRY_DATA);
+
+	dev_dbg(gart->dev, "%s %08lx:%08x\n",
+		 pte ? "map" : "unmap", offs, pte & GART_PAGE_MASK);
+}
+
+static inline unsigned long gart_read_pte(struct gart_device *gart,
+					  unsigned long offs)
+{
+	unsigned long pte;
+
+	writel(offs, gart->regs + GART_ENTRY_ADDR);
+	pte = readl(gart->regs + GART_ENTRY_DATA);
+
+	return pte;
+}
+
+static void do_gart_setup(struct gart_device *gart, const u32 *data)
+{
+	unsigned long iova;
+
+	for_each_gart_pte(gart, iova)
+		gart_set_pte(gart, iova, data ? *(data++) : 0);
+
+	writel(1, gart->regs + GART_CONFIG);
+	FLUSH_GART_REGS(gart);
+}
+
+#ifdef DEBUG
+static void gart_dump_table(struct gart_device *gart)
+{
+	unsigned long iova;
+	unsigned long flags;
+
+	spin_lock_irqsave(&gart->pte_lock, flags);
+	for_each_gart_pte(gart, iova) {
+		unsigned long pte;
+
+		pte = gart_read_pte(gart, iova);
+
+		dev_dbg(gart->dev, "%s %08lx:%08lx\n",
+			(GART_ENTRY_PHYS_ADDR_VALID & pte) ? "v" : " ",
+			iova, pte & GART_PAGE_MASK);
+	}
+	spin_unlock_irqrestore(&gart->pte_lock, flags);
+}
+#else
+static inline void gart_dump_table(struct gart_device *gart)
+{
+}
+#endif
+
+static inline bool gart_iova_range_valid(struct gart_device *gart,
+					 unsigned long iova, size_t bytes)
+{
+	unsigned long iova_start, iova_end, gart_start, gart_end;
+
+	iova_start = iova;
+	iova_end = iova_start + bytes - 1;
+	gart_start = gart->iovmm_base;
+	gart_end = gart_start + gart->page_count * GART_PAGE_SIZE - 1;
+
+	if (iova_start < gart_start)
+		return false;
+	if (iova_end > gart_end)
+		return false;
+	return true;
+}
+
+static int gart_iommu_attach_dev(struct iommu_domain *domain,
+				 struct device *dev)
+{
+	struct gart_device *gart;
+	struct gart_client *client, *c;
+	int err = 0;
+
+	gart = gart_handle;
+	if (!gart)
+		return -EINVAL;
+	domain->priv = gart;
+
+	domain->geometry.aperture_start = gart->iovmm_base;
+	domain->geometry.aperture_end   = gart->iovmm_base +
+					gart->page_count * GART_PAGE_SIZE - 1;
+	domain->geometry.force_aperture = true;
+
+	client = devm_kzalloc(gart->dev, sizeof(*c), GFP_KERNEL);
+	if (!client)
+		return -ENOMEM;
+	client->dev = dev;
+
+	spin_lock(&gart->client_lock);
+	list_for_each_entry(c, &gart->client, list) {
+		if (c->dev == dev) {
+			dev_err(gart->dev,
+				"%s is already attached\n", dev_name(dev));
+			err = -EINVAL;
+			goto fail;
+		}
+	}
+	list_add(&client->list, &gart->client);
+	spin_unlock(&gart->client_lock);
+	dev_dbg(gart->dev, "Attached %s\n", dev_name(dev));
+	return 0;
+
+fail:
+	devm_kfree(gart->dev, client);
+	spin_unlock(&gart->client_lock);
+	return err;
+}
+
+static void gart_iommu_detach_dev(struct iommu_domain *domain,
+				  struct device *dev)
+{
+	struct gart_device *gart = domain->priv;
+	struct gart_client *c;
+
+	spin_lock(&gart->client_lock);
+
+	list_for_each_entry(c, &gart->client, list) {
+		if (c->dev == dev) {
+			list_del(&c->list);
+			devm_kfree(gart->dev, c);
+			dev_dbg(gart->dev, "Detached %s\n", dev_name(dev));
+			goto out;
+		}
+	}
+	dev_err(gart->dev, "Couldn't find\n");
+out:
+	spin_unlock(&gart->client_lock);
+}
+
+static int gart_iommu_domain_init(struct iommu_domain *domain)
+{
+	return 0;
+}
+
+static void gart_iommu_domain_destroy(struct iommu_domain *domain)
+{
+	struct gart_device *gart = domain->priv;
+
+	if (!gart)
+		return;
+
+	spin_lock(&gart->client_lock);
+	if (!list_empty(&gart->client)) {
+		struct gart_client *c;
+
+		list_for_each_entry(c, &gart->client, list)
+			gart_iommu_detach_dev(domain, c->dev);
+	}
+	spin_unlock(&gart->client_lock);
+	domain->priv = NULL;
+}
+
+static int gart_iommu_map(struct iommu_domain *domain, unsigned long iova,
+			  phys_addr_t pa, size_t bytes, int prot)
+{
+	struct gart_device *gart = domain->priv;
+	unsigned long flags;
+	unsigned long pfn;
+
+	if (!gart_iova_range_valid(gart, iova, bytes))
+		return -EINVAL;
+
+	spin_lock_irqsave(&gart->pte_lock, flags);
+	pfn = __phys_to_pfn(pa);
+	if (!pfn_valid(pfn)) {
+		dev_err(gart->dev, "Invalid page: %pa\n", &pa);
+		spin_unlock_irqrestore(&gart->pte_lock, flags);
+		return -EINVAL;
+	}
+	gart_set_pte(gart, iova, GART_PTE(pfn));
+	FLUSH_GART_REGS(gart);
+	spin_unlock_irqrestore(&gart->pte_lock, flags);
+	return 0;
+}
+
+static size_t gart_iommu_unmap(struct iommu_domain *domain, unsigned long iova,
+			       size_t bytes)
+{
+	struct gart_device *gart = domain->priv;
+	unsigned long flags;
+
+	if (!gart_iova_range_valid(gart, iova, bytes))
+		return 0;
+
+	spin_lock_irqsave(&gart->pte_lock, flags);
+	gart_set_pte(gart, iova, 0);
+	FLUSH_GART_REGS(gart);
+	spin_unlock_irqrestore(&gart->pte_lock, flags);
+	return 0;
+}
+
+static phys_addr_t gart_iommu_iova_to_phys(struct iommu_domain *domain,
+					   dma_addr_t iova)
+{
+	struct gart_device *gart = domain->priv;
+	unsigned long pte;
+	phys_addr_t pa;
+	unsigned long flags;
+
+	if (!gart_iova_range_valid(gart, iova, 0))
+		return -EINVAL;
+
+	spin_lock_irqsave(&gart->pte_lock, flags);
+	pte = gart_read_pte(gart, iova);
+	spin_unlock_irqrestore(&gart->pte_lock, flags);
+
+	pa = (pte & GART_PAGE_MASK);
+	if (!pfn_valid(__phys_to_pfn(pa))) {
+		dev_err(gart->dev, "No entry for %08llx:%pa\n",
+			 (unsigned long long)iova, &pa);
+		gart_dump_table(gart);
+		return -EINVAL;
+	}
+	return pa;
+}
+
+static int gart_iommu_domain_has_cap(struct iommu_domain *domain,
+				     unsigned long cap)
+{
+	return 0;
+}
+
+static struct iommu_ops gart_iommu_ops = {
+	.domain_init	= gart_iommu_domain_init,
+	.domain_destroy	= gart_iommu_domain_destroy,
+	.attach_dev	= gart_iommu_attach_dev,
+	.detach_dev	= gart_iommu_detach_dev,
+	.map		= gart_iommu_map,
+	.unmap		= gart_iommu_unmap,
+	.iova_to_phys	= gart_iommu_iova_to_phys,
+	.domain_has_cap	= gart_iommu_domain_has_cap,
+	.pgsize_bitmap	= GART_IOMMU_PGSIZES,
+};
+
+static int tegra_gart_suspend(struct device *dev)
+{
+	struct gart_device *gart = dev_get_drvdata(dev);
+	unsigned long iova;
+	u32 *data = gart->savedata;
+	unsigned long flags;
+
+	spin_lock_irqsave(&gart->pte_lock, flags);
+	for_each_gart_pte(gart, iova)
+		*(data++) = gart_read_pte(gart, iova);
+	spin_unlock_irqrestore(&gart->pte_lock, flags);
+	return 0;
+}
+
+static int tegra_gart_resume(struct device *dev)
+{
+	struct gart_device *gart = dev_get_drvdata(dev);
+	unsigned long flags;
+
+	spin_lock_irqsave(&gart->pte_lock, flags);
+	do_gart_setup(gart, gart->savedata);
+	spin_unlock_irqrestore(&gart->pte_lock, flags);
+	return 0;
+}
+
+static int tegra_gart_probe(struct platform_device *pdev)
+{
+	struct gart_device *gart;
+	struct resource *res, *res_remap;
+	void __iomem *gart_regs;
+	struct device *dev = &pdev->dev;
+
+	if (gart_handle)
+		return -EIO;
+
+	BUILD_BUG_ON(PAGE_SHIFT != GART_PAGE_SHIFT);
+
+	/* the GART memory aperture is required */
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	res_remap = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (!res || !res_remap) {
+		dev_err(dev, "GART memory aperture expected\n");
+		return -ENXIO;
+	}
+
+	gart = devm_kzalloc(dev, sizeof(*gart), GFP_KERNEL);
+	if (!gart) {
+		dev_err(dev, "failed to allocate gart_device\n");
+		return -ENOMEM;
+	}
+
+	gart_regs = devm_ioremap(dev, res->start, resource_size(res));
+	if (!gart_regs) {
+		dev_err(dev, "failed to remap GART registers\n");
+		return -ENXIO;
+	}
+
+	gart->dev = &pdev->dev;
+	spin_lock_init(&gart->pte_lock);
+	spin_lock_init(&gart->client_lock);
+	INIT_LIST_HEAD(&gart->client);
+	gart->regs = gart_regs;
+	gart->iovmm_base = (dma_addr_t)res_remap->start;
+	gart->page_count = (resource_size(res_remap) >> GART_PAGE_SHIFT);
+
+	gart->savedata = vmalloc(sizeof(u32) * gart->page_count);
+	if (!gart->savedata) {
+		dev_err(dev, "failed to allocate context save area\n");
+		return -ENOMEM;
+	}
+
+	platform_set_drvdata(pdev, gart);
+	do_gart_setup(gart, NULL);
+
+	gart_handle = gart;
+	bus_set_iommu(&platform_bus_type, &gart_iommu_ops);
+	return 0;
+}
+
+static int tegra_gart_remove(struct platform_device *pdev)
+{
+	struct gart_device *gart = platform_get_drvdata(pdev);
+
+	writel(0, gart->regs + GART_CONFIG);
+	if (gart->savedata)
+		vfree(gart->savedata);
+	gart_handle = NULL;
+	return 0;
+}
+
+static const struct dev_pm_ops tegra_gart_pm_ops = {
+	.suspend	= tegra_gart_suspend,
+	.resume		= tegra_gart_resume,
+};
+
+static struct of_device_id tegra_gart_of_match[] = {
+	{ .compatible = "nvidia,tegra20-gart", },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, tegra_gart_of_match);
+
+static struct platform_driver tegra_gart_driver = {
+	.probe		= tegra_gart_probe,
+	.remove		= tegra_gart_remove,
+	.driver = {
+		.owner	= THIS_MODULE,
+		.name	= "tegra-gart",
+		.pm	= &tegra_gart_pm_ops,
+		.of_match_table = tegra_gart_of_match,
+	},
+};
+
+static int tegra_gart_init(void)
+{
+	return platform_driver_register(&tegra_gart_driver);
+}
+
+static void __exit tegra_gart_exit(void)
+{
+	platform_driver_unregister(&tegra_gart_driver);
+}
+
+subsys_initcall(tegra_gart_init);
+module_exit(tegra_gart_exit);
+
+MODULE_DESCRIPTION("IOMMU API for GART in Tegra20");
+MODULE_AUTHOR("Hiroshi DOYU <hdoyu@nvidia.com>");
+MODULE_ALIAS("platform:tegra-gart");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
new file mode 100644
index 00000000000..605b5b46a90
--- /dev/null
+++ b/drivers/iommu/tegra-smmu.c
@@ -0,0 +1,1295 @@
+/*
+ * IOMMU API for SMMU in Tegra30
+ *
+ * Copyright (c) 2011-2013, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#define pr_fmt(fmt)	"%s(): " fmt, __func__
+
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/device.h>
+#include <linux/sched.h>
+#include <linux/iommu.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_iommu.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/tegra-ahb.h>
+
+#include <asm/page.h>
+#include <asm/cacheflush.h>
+
+enum smmu_hwgrp {
+	HWGRP_AFI,
+	HWGRP_AVPC,
+	HWGRP_DC,
+	HWGRP_DCB,
+	HWGRP_EPP,
+	HWGRP_G2,
+	HWGRP_HC,
+	HWGRP_HDA,
+	HWGRP_ISP,
+	HWGRP_MPE,
+	HWGRP_NV,
+	HWGRP_NV2,
+	HWGRP_PPCS,
+	HWGRP_SATA,
+	HWGRP_VDE,
+	HWGRP_VI,
+
+	HWGRP_COUNT,
+
+	HWGRP_END = ~0,
+};
+
+#define HWG_AFI		(1 << HWGRP_AFI)
+#define HWG_AVPC	(1 << HWGRP_AVPC)
+#define HWG_DC		(1 << HWGRP_DC)
+#define HWG_DCB		(1 << HWGRP_DCB)
+#define HWG_EPP		(1 << HWGRP_EPP)
+#define HWG_G2		(1 << HWGRP_G2)
+#define HWG_HC		(1 << HWGRP_HC)
+#define HWG_HDA		(1 << HWGRP_HDA)
+#define HWG_ISP		(1 << HWGRP_ISP)
+#define HWG_MPE		(1 << HWGRP_MPE)
+#define HWG_NV		(1 << HWGRP_NV)
+#define HWG_NV2		(1 << HWGRP_NV2)
+#define HWG_PPCS	(1 << HWGRP_PPCS)
+#define HWG_SATA	(1 << HWGRP_SATA)
+#define HWG_VDE		(1 << HWGRP_VDE)
+#define HWG_VI		(1 << HWGRP_VI)
+
+/* bitmap of the page sizes currently supported */
+#define SMMU_IOMMU_PGSIZES	(SZ_4K)
+
+#define SMMU_CONFIG				0x10
+#define SMMU_CONFIG_DISABLE			0
+#define SMMU_CONFIG_ENABLE			1
+
+/* REVISIT: To support multiple MCs */
+enum {
+	_MC = 0,
+};
+
+enum {
+	_TLB = 0,
+	_PTC,
+};
+
+#define SMMU_CACHE_CONFIG_BASE			0x14
+#define __SMMU_CACHE_CONFIG(mc, cache)		(SMMU_CACHE_CONFIG_BASE + 4 * cache)
+#define SMMU_CACHE_CONFIG(cache)		__SMMU_CACHE_CONFIG(_MC, cache)
+
+#define SMMU_CACHE_CONFIG_STATS_SHIFT		31
+#define SMMU_CACHE_CONFIG_STATS_ENABLE		(1 << SMMU_CACHE_CONFIG_STATS_SHIFT)
+#define SMMU_CACHE_CONFIG_STATS_TEST_SHIFT	30
+#define SMMU_CACHE_CONFIG_STATS_TEST		(1 << SMMU_CACHE_CONFIG_STATS_TEST_SHIFT)
+
+#define SMMU_TLB_CONFIG_HIT_UNDER_MISS__ENABLE	(1 << 29)
+#define SMMU_TLB_CONFIG_ACTIVE_LINES__VALUE	0x10
+#define SMMU_TLB_CONFIG_RESET_VAL		0x20000010
+
+#define SMMU_PTC_CONFIG_CACHE__ENABLE		(1 << 29)
+#define SMMU_PTC_CONFIG_INDEX_MAP__PATTERN	0x3f
+#define SMMU_PTC_CONFIG_RESET_VAL		0x2000003f
+
+#define SMMU_PTB_ASID				0x1c
+#define SMMU_PTB_ASID_CURRENT_SHIFT		0
+
+#define SMMU_PTB_DATA				0x20
+#define SMMU_PTB_DATA_RESET_VAL			0
+#define SMMU_PTB_DATA_ASID_NONSECURE_SHIFT	29
+#define SMMU_PTB_DATA_ASID_WRITABLE_SHIFT	30
+#define SMMU_PTB_DATA_ASID_READABLE_SHIFT	31
+
+#define SMMU_TLB_FLUSH				0x30
+#define SMMU_TLB_FLUSH_VA_MATCH_ALL		0
+#define SMMU_TLB_FLUSH_VA_MATCH_SECTION		2
+#define SMMU_TLB_FLUSH_VA_MATCH_GROUP		3
+#define SMMU_TLB_FLUSH_ASID_SHIFT		29
+#define SMMU_TLB_FLUSH_ASID_MATCH_DISABLE	0
+#define SMMU_TLB_FLUSH_ASID_MATCH_ENABLE	1
+#define SMMU_TLB_FLUSH_ASID_MATCH_SHIFT		31
+
+#define SMMU_PTC_FLUSH				0x34
+#define SMMU_PTC_FLUSH_TYPE_ALL			0
+#define SMMU_PTC_FLUSH_TYPE_ADR			1
+#define SMMU_PTC_FLUSH_ADR_SHIFT		4
+
+#define SMMU_ASID_SECURITY			0x38
+
+#define SMMU_STATS_CACHE_COUNT_BASE		0x1f0
+
+#define SMMU_STATS_CACHE_COUNT(mc, cache, hitmiss)		\
+	(SMMU_STATS_CACHE_COUNT_BASE + 8 * cache + 4 * hitmiss)
+
+#define SMMU_TRANSLATION_ENABLE_0		0x228
+#define SMMU_TRANSLATION_ENABLE_1		0x22c
+#define SMMU_TRANSLATION_ENABLE_2		0x230
+
+#define SMMU_AFI_ASID	0x238   /* PCIE */
+#define SMMU_AVPC_ASID	0x23c   /* AVP */
+#define SMMU_DC_ASID	0x240   /* Display controller */
+#define SMMU_DCB_ASID	0x244   /* Display controller B */
+#define SMMU_EPP_ASID	0x248   /* Encoder pre-processor */
+#define SMMU_G2_ASID	0x24c   /* 2D engine */
+#define SMMU_HC_ASID	0x250   /* Host1x */
+#define SMMU_HDA_ASID	0x254   /* High-def audio */
+#define SMMU_ISP_ASID	0x258   /* Image signal processor */
+#define SMMU_MPE_ASID	0x264   /* MPEG encoder */
+#define SMMU_NV_ASID	0x268   /* (3D) */
+#define SMMU_NV2_ASID	0x26c   /* (3D) */
+#define SMMU_PPCS_ASID	0x270   /* AHB */
+#define SMMU_SATA_ASID	0x278   /* SATA */
+#define SMMU_VDE_ASID	0x27c   /* Video decoder */
+#define SMMU_VI_ASID	0x280   /* Video input */
+
+#define SMMU_PDE_NEXT_SHIFT		28
+
+#define SMMU_TLB_FLUSH_VA_SECTION__MASK		0xffc00000
+#define SMMU_TLB_FLUSH_VA_SECTION__SHIFT	12 /* right shift */
+#define SMMU_TLB_FLUSH_VA_GROUP__MASK		0xffffc000
+#define SMMU_TLB_FLUSH_VA_GROUP__SHIFT		12 /* right shift */
+#define SMMU_TLB_FLUSH_VA(iova, which)	\
+	((((iova) & SMMU_TLB_FLUSH_VA_##which##__MASK) >> \
+		SMMU_TLB_FLUSH_VA_##which##__SHIFT) |	\
+	SMMU_TLB_FLUSH_VA_MATCH_##which)
+#define SMMU_PTB_ASID_CUR(n)	\
+		((n) << SMMU_PTB_ASID_CURRENT_SHIFT)
+#define SMMU_TLB_FLUSH_ASID_MATCH_disable		\
+		(SMMU_TLB_FLUSH_ASID_MATCH_DISABLE <<	\
+			SMMU_TLB_FLUSH_ASID_MATCH_SHIFT)
+#define SMMU_TLB_FLUSH_ASID_MATCH__ENABLE		\
+		(SMMU_TLB_FLUSH_ASID_MATCH_ENABLE <<	\
+			SMMU_TLB_FLUSH_ASID_MATCH_SHIFT)
+
+#define SMMU_PAGE_SHIFT 12
+#define SMMU_PAGE_SIZE	(1 << SMMU_PAGE_SHIFT)
+#define SMMU_PAGE_MASK	((1 << SMMU_PAGE_SHIFT) - 1)
+
+#define SMMU_PDIR_COUNT	1024
+#define SMMU_PDIR_SIZE	(sizeof(unsigned long) * SMMU_PDIR_COUNT)
+#define SMMU_PTBL_COUNT	1024
+#define SMMU_PTBL_SIZE	(sizeof(unsigned long) * SMMU_PTBL_COUNT)
+#define SMMU_PDIR_SHIFT	12
+#define SMMU_PDE_SHIFT	12
+#define SMMU_PTE_SHIFT	12
+#define SMMU_PFN_MASK	0x000fffff
+
+#define SMMU_ADDR_TO_PFN(addr)	((addr) >> 12)
+#define SMMU_ADDR_TO_PDN(addr)	((addr) >> 22)
+#define SMMU_PDN_TO_ADDR(pdn)	((pdn) << 22)
+
+#define _READABLE	(1 << SMMU_PTB_DATA_ASID_READABLE_SHIFT)
+#define _WRITABLE	(1 << SMMU_PTB_DATA_ASID_WRITABLE_SHIFT)
+#define _NONSECURE	(1 << SMMU_PTB_DATA_ASID_NONSECURE_SHIFT)
+#define _PDE_NEXT	(1 << SMMU_PDE_NEXT_SHIFT)
+#define _MASK_ATTR	(_READABLE | _WRITABLE | _NONSECURE)
+
+#define _PDIR_ATTR	(_READABLE | _WRITABLE | _NONSECURE)
+
+#define _PDE_ATTR	(_READABLE | _WRITABLE | _NONSECURE)
+#define _PDE_ATTR_N	(_PDE_ATTR | _PDE_NEXT)
+#define _PDE_VACANT(pdn)	(((pdn) << 10) | _PDE_ATTR)
+
+#define _PTE_ATTR	(_READABLE | _WRITABLE | _NONSECURE)
+#define _PTE_VACANT(addr)	(((addr) >> SMMU_PAGE_SHIFT) | _PTE_ATTR)
+
+#define SMMU_MK_PDIR(page, attr)	\
+		((page_to_phys(page) >> SMMU_PDIR_SHIFT) | (attr))
+#define SMMU_MK_PDE(page, attr)		\
+		(unsigned long)((page_to_phys(page) >> SMMU_PDE_SHIFT) | (attr))
+#define SMMU_EX_PTBL_PAGE(pde)		\
+		pfn_to_page((unsigned long)(pde) & SMMU_PFN_MASK)
+#define SMMU_PFN_TO_PTE(pfn, attr)	(unsigned long)((pfn) | (attr))
+
+#define SMMU_ASID_ENABLE(asid)	((asid) | (1 << 31))
+#define SMMU_ASID_DISABLE	0
+#define SMMU_ASID_ASID(n)	((n) & ~SMMU_ASID_ENABLE(0))
+
+#define NUM_SMMU_REG_BANKS	3
+
+#define smmu_client_enable_hwgrp(c, m)	smmu_client_set_hwgrp(c, m, 1)
+#define smmu_client_disable_hwgrp(c)	smmu_client_set_hwgrp(c, 0, 0)
+#define __smmu_client_enable_hwgrp(c, m) __smmu_client_set_hwgrp(c, m, 1)
+#define __smmu_client_disable_hwgrp(c)	__smmu_client_set_hwgrp(c, 0, 0)
+
+#define HWGRP_INIT(client) [HWGRP_##client] = SMMU_##client##_ASID
+
+static const u32 smmu_hwgrp_asid_reg[] = {
+	HWGRP_INIT(AFI),
+	HWGRP_INIT(AVPC),
+	HWGRP_INIT(DC),
+	HWGRP_INIT(DCB),
+	HWGRP_INIT(EPP),
+	HWGRP_INIT(G2),
+	HWGRP_INIT(HC),
+	HWGRP_INIT(HDA),
+	HWGRP_INIT(ISP),
+	HWGRP_INIT(MPE),
+	HWGRP_INIT(NV),
+	HWGRP_INIT(NV2),
+	HWGRP_INIT(PPCS),
+	HWGRP_INIT(SATA),
+	HWGRP_INIT(VDE),
+	HWGRP_INIT(VI),
+};
+#define HWGRP_ASID_REG(x) (smmu_hwgrp_asid_reg[x])
+
+/*
+ * Per client for address space
+ */
+struct smmu_client {
+	struct device		*dev;
+	struct list_head	list;
+	struct smmu_as		*as;
+	u32			hwgrp;
+};
+
+/*
+ * Per address space
+ */
+struct smmu_as {
+	struct smmu_device	*smmu;	/* back pointer to container */
+	unsigned int		asid;
+	spinlock_t		lock;	/* for pagetable */
+	struct page		*pdir_page;
+	unsigned long		pdir_attr;
+	unsigned long		pde_attr;
+	unsigned long		pte_attr;
+	unsigned int		*pte_count;
+
+	struct list_head	client;
+	spinlock_t		client_lock; /* for client list */
+};
+
+struct smmu_debugfs_info {
+	struct smmu_device *smmu;
+	int mc;
+	int cache;
+};
+
+/*
+ * Per SMMU device - IOMMU device
+ */
+struct smmu_device {
+	void __iomem	*regbase;	/* register offset base */
+	void __iomem	**regs;		/* register block start address array */
+	void __iomem	**rege;		/* register block end address array */
+	int		nregs;		/* number of register blocks */
+
+	unsigned long	iovmm_base;	/* remappable base address */
+	unsigned long	page_count;	/* total remappable size */
+	spinlock_t	lock;
+	char		*name;
+	struct device	*dev;
+	struct page *avp_vector_page;	/* dummy page shared by all AS's */
+
+	/*
+	 * Register image savers for suspend/resume
+	 */
+	unsigned long translation_enable_0;
+	unsigned long translation_enable_1;
+	unsigned long translation_enable_2;
+	unsigned long asid_security;
+
+	struct dentry *debugfs_root;
+	struct smmu_debugfs_info *debugfs_info;
+
+	struct device_node *ahb;
+
+	int		num_as;
+	struct smmu_as	as[0];		/* Run-time allocated array */
+};
+
+static struct smmu_device *smmu_handle; /* unique for a system */
+
+/*
+ *	SMMU register accessors
+ */
+static bool inline smmu_valid_reg(struct smmu_device *smmu,
+				  void __iomem *addr)
+{
+	int i;
+
+	for (i = 0; i < smmu->nregs; i++) {
+		if (addr < smmu->regs[i])
+			break;
+		if (addr <= smmu->rege[i])
+			return true;
+	}
+
+	return false;
+}
+
+static inline u32 smmu_read(struct smmu_device *smmu, size_t offs)
+{
+	void __iomem *addr = smmu->regbase + offs;
+
+	BUG_ON(!smmu_valid_reg(smmu, addr));
+
+	return readl(addr);
+}
+
+static inline void smmu_write(struct smmu_device *smmu, u32 val, size_t offs)
+{
+	void __iomem *addr = smmu->regbase + offs;
+
+	BUG_ON(!smmu_valid_reg(smmu, addr));
+
+	writel(val, addr);
+}
+
+#define VA_PAGE_TO_PA(va, page)	\
+	(page_to_phys(page) + ((unsigned long)(va) & ~PAGE_MASK))
+
+#define FLUSH_CPU_DCACHE(va, page, size)	\
+	do {	\
+		unsigned long _pa_ = VA_PAGE_TO_PA(va, page);		\
+		__cpuc_flush_dcache_area((void *)(va), (size_t)(size));	\
+		outer_flush_range(_pa_, _pa_+(size_t)(size));		\
+	} while (0)
+
+/*
+ * Any interaction between any block on PPSB and a block on APB or AHB
+ * must have these read-back barriers to ensure the APB/AHB bus
+ * transaction is complete before initiating activity on the PPSB
+ * block.
+ */
+#define FLUSH_SMMU_REGS(smmu)	smmu_read(smmu, SMMU_CONFIG)
+
+#define smmu_client_hwgrp(c) (u32)((c)->dev->platform_data)
+
+static int __smmu_client_set_hwgrp(struct smmu_client *c,
+				   unsigned long map, int on)
+{
+	int i;
+	struct smmu_as *as = c->as;
+	u32 val, offs, mask = SMMU_ASID_ENABLE(as->asid);
+	struct smmu_device *smmu = as->smmu;
+
+	WARN_ON(!on && map);
+	if (on && !map)
+		return -EINVAL;
+	if (!on)
+		map = smmu_client_hwgrp(c);
+
+	for_each_set_bit(i, &map, HWGRP_COUNT) {
+		offs = HWGRP_ASID_REG(i);
+		val = smmu_read(smmu, offs);
+		if (on) {
+			if (WARN_ON(val & mask))
+				goto err_hw_busy;
+			val |= mask;
+		} else {
+			WARN_ON((val & mask) == mask);
+			val &= ~mask;
+		}
+		smmu_write(smmu, val, offs);
+	}
+	FLUSH_SMMU_REGS(smmu);
+	c->hwgrp = map;
+	return 0;
+
+err_hw_busy:
+	for_each_set_bit(i, &map, HWGRP_COUNT) {
+		offs = HWGRP_ASID_REG(i);
+		val = smmu_read(smmu, offs);
+		val &= ~mask;
+		smmu_write(smmu, val, offs);
+	}
+	return -EBUSY;
+}
+
+static int smmu_client_set_hwgrp(struct smmu_client *c, u32 map, int on)
+{
+	u32 val;
+	unsigned long flags;
+	struct smmu_as *as = c->as;
+	struct smmu_device *smmu = as->smmu;
+
+	spin_lock_irqsave(&smmu->lock, flags);
+	val = __smmu_client_set_hwgrp(c, map, on);
+	spin_unlock_irqrestore(&smmu->lock, flags);
+	return val;
+}
+
+/*
+ * Flush all TLB entries and all PTC entries
+ * Caller must lock smmu
+ */
+static void smmu_flush_regs(struct smmu_device *smmu, int enable)
+{
+	u32 val;
+
+	smmu_write(smmu, SMMU_PTC_FLUSH_TYPE_ALL, SMMU_PTC_FLUSH);
+	FLUSH_SMMU_REGS(smmu);
+	val = SMMU_TLB_FLUSH_VA_MATCH_ALL |
+		SMMU_TLB_FLUSH_ASID_MATCH_disable;
+	smmu_write(smmu, val, SMMU_TLB_FLUSH);
+
+	if (enable)
+		smmu_write(smmu, SMMU_CONFIG_ENABLE, SMMU_CONFIG);
+	FLUSH_SMMU_REGS(smmu);
+}
+
+static int smmu_setup_regs(struct smmu_device *smmu)
+{
+	int i;
+	u32 val;
+
+	for (i = 0; i < smmu->num_as; i++) {
+		struct smmu_as *as = &smmu->as[i];
+		struct smmu_client *c;
+
+		smmu_write(smmu, SMMU_PTB_ASID_CUR(as->asid), SMMU_PTB_ASID);
+		val = as->pdir_page ?
+			SMMU_MK_PDIR(as->pdir_page, as->pdir_attr) :
+			SMMU_PTB_DATA_RESET_VAL;
+		smmu_write(smmu, val, SMMU_PTB_DATA);
+
+		list_for_each_entry(c, &as->client, list)
+			__smmu_client_set_hwgrp(c, c->hwgrp, 1);
+	}
+
+	smmu_write(smmu, smmu->translation_enable_0, SMMU_TRANSLATION_ENABLE_0);
+	smmu_write(smmu, smmu->translation_enable_1, SMMU_TRANSLATION_ENABLE_1);
+	smmu_write(smmu, smmu->translation_enable_2, SMMU_TRANSLATION_ENABLE_2);
+	smmu_write(smmu, smmu->asid_security, SMMU_ASID_SECURITY);
+	smmu_write(smmu, SMMU_TLB_CONFIG_RESET_VAL, SMMU_CACHE_CONFIG(_TLB));
+	smmu_write(smmu, SMMU_PTC_CONFIG_RESET_VAL, SMMU_CACHE_CONFIG(_PTC));
+
+	smmu_flush_regs(smmu, 1);
+
+	return tegra_ahb_enable_smmu(smmu->ahb);
+}
+
+static void flush_ptc_and_tlb(struct smmu_device *smmu,
+		      struct smmu_as *as, dma_addr_t iova,
+		      unsigned long *pte, struct page *page, int is_pde)
+{
+	u32 val;
+	unsigned long tlb_flush_va = is_pde
+		?  SMMU_TLB_FLUSH_VA(iova, SECTION)
+		:  SMMU_TLB_FLUSH_VA(iova, GROUP);
+
+	val = SMMU_PTC_FLUSH_TYPE_ADR | VA_PAGE_TO_PA(pte, page);
+	smmu_write(smmu, val, SMMU_PTC_FLUSH);
+	FLUSH_SMMU_REGS(smmu);
+	val = tlb_flush_va |
+		SMMU_TLB_FLUSH_ASID_MATCH__ENABLE |
+		(as->asid << SMMU_TLB_FLUSH_ASID_SHIFT);
+	smmu_write(smmu, val, SMMU_TLB_FLUSH);
+	FLUSH_SMMU_REGS(smmu);
+}
+
+static void free_ptbl(struct smmu_as *as, dma_addr_t iova)
+{
+	unsigned long pdn = SMMU_ADDR_TO_PDN(iova);
+	unsigned long *pdir = (unsigned long *)page_address(as->pdir_page);
+
+	if (pdir[pdn] != _PDE_VACANT(pdn)) {
+		dev_dbg(as->smmu->dev, "pdn: %lx\n", pdn);
+
+		ClearPageReserved(SMMU_EX_PTBL_PAGE(pdir[pdn]));
+		__free_page(SMMU_EX_PTBL_PAGE(pdir[pdn]));
+		pdir[pdn] = _PDE_VACANT(pdn);
+		FLUSH_CPU_DCACHE(&pdir[pdn], as->pdir_page, sizeof pdir[pdn]);
+		flush_ptc_and_tlb(as->smmu, as, iova, &pdir[pdn],
+				  as->pdir_page, 1);
+	}
+}
+
+static void free_pdir(struct smmu_as *as)
+{
+	unsigned addr;
+	int count;
+	struct device *dev = as->smmu->dev;
+
+	if (!as->pdir_page)
+		return;
+
+	addr = as->smmu->iovmm_base;
+	count = as->smmu->page_count;
+	while (count-- > 0) {
+		free_ptbl(as, addr);
+		addr += SMMU_PAGE_SIZE * SMMU_PTBL_COUNT;
+	}
+	ClearPageReserved(as->pdir_page);
+	__free_page(as->pdir_page);
+	as->pdir_page = NULL;
+	devm_kfree(dev, as->pte_count);
+	as->pte_count = NULL;
+}
+
+/*
+ * Maps PTBL for given iova and returns the PTE address
+ * Caller must unmap the mapped PTBL returned in *ptbl_page_p
+ */
+static unsigned long *locate_pte(struct smmu_as *as,
+				 dma_addr_t iova, bool allocate,
+				 struct page **ptbl_page_p,
+				 unsigned int **count)
+{
+	unsigned long ptn = SMMU_ADDR_TO_PFN(iova);
+	unsigned long pdn = SMMU_ADDR_TO_PDN(iova);
+	unsigned long *pdir = page_address(as->pdir_page);
+	unsigned long *ptbl;
+
+	if (pdir[pdn] != _PDE_VACANT(pdn)) {
+		/* Mapped entry table already exists */
+		*ptbl_page_p = SMMU_EX_PTBL_PAGE(pdir[pdn]);
+		ptbl = page_address(*ptbl_page_p);
+	} else if (!allocate) {
+		return NULL;
+	} else {
+		int pn;
+		unsigned long addr = SMMU_PDN_TO_ADDR(pdn);
+
+		/* Vacant - allocate a new page table */
+		dev_dbg(as->smmu->dev, "New PTBL pdn: %lx\n", pdn);
+
+		*ptbl_page_p = alloc_page(GFP_ATOMIC);
+		if (!*ptbl_page_p) {
+			dev_err(as->smmu->dev,
+				"failed to allocate smmu_device page table\n");
+			return NULL;
+		}
+		SetPageReserved(*ptbl_page_p);
+		ptbl = (unsigned long *)page_address(*ptbl_page_p);
+		for (pn = 0; pn < SMMU_PTBL_COUNT;
+		     pn++, addr += SMMU_PAGE_SIZE) {
+			ptbl[pn] = _PTE_VACANT(addr);
+		}
+		FLUSH_CPU_DCACHE(ptbl, *ptbl_page_p, SMMU_PTBL_SIZE);
+		pdir[pdn] = SMMU_MK_PDE(*ptbl_page_p,
+					as->pde_attr | _PDE_NEXT);
+		FLUSH_CPU_DCACHE(&pdir[pdn], as->pdir_page, sizeof pdir[pdn]);
+		flush_ptc_and_tlb(as->smmu, as, iova, &pdir[pdn],
+				  as->pdir_page, 1);
+	}
+	*count = &as->pte_count[pdn];
+
+	return &ptbl[ptn % SMMU_PTBL_COUNT];
+}
+
+#ifdef CONFIG_SMMU_SIG_DEBUG
+static void put_signature(struct smmu_as *as,
+			  dma_addr_t iova, unsigned long pfn)
+{
+	struct page *page;
+	unsigned long *vaddr;
+
+	page = pfn_to_page(pfn);
+	vaddr = page_address(page);
+	if (!vaddr)
+		return;
+
+	vaddr[0] = iova;
+	vaddr[1] = pfn << PAGE_SHIFT;
+	FLUSH_CPU_DCACHE(vaddr, page, sizeof(vaddr[0]) * 2);
+}
+#else
+static inline void put_signature(struct smmu_as *as,
+				 unsigned long addr, unsigned long pfn)
+{
+}
+#endif
+
+/*
+ * Caller must not hold as->lock
+ */
+static int alloc_pdir(struct smmu_as *as)
+{
+	unsigned long *pdir, flags;
+	int pdn, err = 0;
+	u32 val;
+	struct smmu_device *smmu = as->smmu;
+	struct page *page;
+	unsigned int *cnt;
+
+	/*
+	 * do the allocation, then grab as->lock
+	 */
+	cnt = devm_kzalloc(smmu->dev,
+			   sizeof(cnt[0]) * SMMU_PDIR_COUNT,
+			   GFP_KERNEL);
+	page = alloc_page(GFP_KERNEL | __GFP_DMA);
+
+	spin_lock_irqsave(&as->lock, flags);
+
+	if (as->pdir_page) {
+		/* We raced, free the redundant */
+		err = -EAGAIN;
+		goto err_out;
+	}
+
+	if (!page || !cnt) {
+		dev_err(smmu->dev, "failed to allocate at %s\n", __func__);
+		err = -ENOMEM;
+		goto err_out;
+	}
+
+	as->pdir_page = page;
+	as->pte_count = cnt;
+
+	SetPageReserved(as->pdir_page);
+	pdir = page_address(as->pdir_page);
+
+	for (pdn = 0; pdn < SMMU_PDIR_COUNT; pdn++)
+		pdir[pdn] = _PDE_VACANT(pdn);
+	FLUSH_CPU_DCACHE(pdir, as->pdir_page, SMMU_PDIR_SIZE);
+	val = SMMU_PTC_FLUSH_TYPE_ADR | VA_PAGE_TO_PA(pdir, as->pdir_page);
+	smmu_write(smmu, val, SMMU_PTC_FLUSH);
+	FLUSH_SMMU_REGS(as->smmu);
+	val = SMMU_TLB_FLUSH_VA_MATCH_ALL |
+		SMMU_TLB_FLUSH_ASID_MATCH__ENABLE |
+		(as->asid << SMMU_TLB_FLUSH_ASID_SHIFT);
+	smmu_write(smmu, val, SMMU_TLB_FLUSH);
+	FLUSH_SMMU_REGS(as->smmu);
+
+	spin_unlock_irqrestore(&as->lock, flags);
+
+	return 0;
+
+err_out:
+	spin_unlock_irqrestore(&as->lock, flags);
+
+	devm_kfree(smmu->dev, cnt);
+	if (page)
+		__free_page(page);
+	return err;
+}
+
+static void __smmu_iommu_unmap(struct smmu_as *as, dma_addr_t iova)
+{
+	unsigned long *pte;
+	struct page *page;
+	unsigned int *count;
+
+	pte = locate_pte(as, iova, false, &page, &count);
+	if (WARN_ON(!pte))
+		return;
+
+	if (WARN_ON(*pte == _PTE_VACANT(iova)))
+		return;
+
+	*pte = _PTE_VACANT(iova);
+	FLUSH_CPU_DCACHE(pte, page, sizeof(*pte));
+	flush_ptc_and_tlb(as->smmu, as, iova, pte, page, 0);
+	if (!--(*count))
+		free_ptbl(as, iova);
+}
+
+static void __smmu_iommu_map_pfn(struct smmu_as *as, dma_addr_t iova,
+				 unsigned long pfn)
+{
+	struct smmu_device *smmu = as->smmu;
+	unsigned long *pte;
+	unsigned int *count;
+	struct page *page;
+
+	pte = locate_pte(as, iova, true, &page, &count);
+	if (WARN_ON(!pte))
+		return;
+
+	if (*pte == _PTE_VACANT(iova))
+		(*count)++;
+	*pte = SMMU_PFN_TO_PTE(pfn, as->pte_attr);
+	if (unlikely((*pte == _PTE_VACANT(iova))))
+		(*count)--;
+	FLUSH_CPU_DCACHE(pte, page, sizeof(*pte));
+	flush_ptc_and_tlb(smmu, as, iova, pte, page, 0);
+	put_signature(as, iova, pfn);
+}
+
+static int smmu_iommu_map(struct iommu_domain *domain, unsigned long iova,
+			  phys_addr_t pa, size_t bytes, int prot)
+{
+	struct smmu_as *as = domain->priv;
+	unsigned long pfn = __phys_to_pfn(pa);
+	unsigned long flags;
+
+	dev_dbg(as->smmu->dev, "[%d] %08lx:%pa\n", as->asid, iova, &pa);
+
+	if (!pfn_valid(pfn))
+		return -ENOMEM;
+
+	spin_lock_irqsave(&as->lock, flags);
+	__smmu_iommu_map_pfn(as, iova, pfn);
+	spin_unlock_irqrestore(&as->lock, flags);
+	return 0;
+}
+
+static size_t smmu_iommu_unmap(struct iommu_domain *domain, unsigned long iova,
+			       size_t bytes)
+{
+	struct smmu_as *as = domain->priv;
+	unsigned long flags;
+
+	dev_dbg(as->smmu->dev, "[%d] %08lx\n", as->asid, iova);
+
+	spin_lock_irqsave(&as->lock, flags);
+	__smmu_iommu_unmap(as, iova);
+	spin_unlock_irqrestore(&as->lock, flags);
+	return SMMU_PAGE_SIZE;
+}
+
+static phys_addr_t smmu_iommu_iova_to_phys(struct iommu_domain *domain,
+					   dma_addr_t iova)
+{
+	struct smmu_as *as = domain->priv;
+	unsigned long *pte;
+	unsigned int *count;
+	struct page *page;
+	unsigned long pfn;
+	unsigned long flags;
+
+	spin_lock_irqsave(&as->lock, flags);
+
+	pte = locate_pte(as, iova, true, &page, &count);
+	pfn = *pte & SMMU_PFN_MASK;
+	WARN_ON(!pfn_valid(pfn));
+	dev_dbg(as->smmu->dev,
+		"iova:%08llx pfn:%08lx asid:%d\n", (unsigned long long)iova,
+		 pfn, as->asid);
+
+	spin_unlock_irqrestore(&as->lock, flags);
+	return PFN_PHYS(pfn);
+}
+
+static int smmu_iommu_domain_has_cap(struct iommu_domain *domain,
+				     unsigned long cap)
+{
+	return 0;
+}
+
+static int smmu_iommu_attach_dev(struct iommu_domain *domain,
+				 struct device *dev)
+{
+	struct smmu_as *as = domain->priv;
+	struct smmu_device *smmu = as->smmu;
+	struct smmu_client *client, *c;
+	u32 map;
+	int err;
+
+	client = devm_kzalloc(smmu->dev, sizeof(*c), GFP_KERNEL);
+	if (!client)
+		return -ENOMEM;
+	client->dev = dev;
+	client->as = as;
+	map = (unsigned long)dev->platform_data;
+	if (!map)
+		return -EINVAL;
+
+	err = smmu_client_enable_hwgrp(client, map);
+	if (err)
+		goto err_hwgrp;
+
+	spin_lock(&as->client_lock);
+	list_for_each_entry(c, &as->client, list) {
+		if (c->dev == dev) {
+			dev_err(smmu->dev,
+				"%s is already attached\n", dev_name(c->dev));
+			err = -EINVAL;
+			goto err_client;
+		}
+	}
+	list_add(&client->list, &as->client);
+	spin_unlock(&as->client_lock);
+
+	/*
+	 * Reserve "page zero" for AVP vectors using a common dummy
+	 * page.
+	 */
+	if (map & HWG_AVPC) {
+		struct page *page;
+
+		page = as->smmu->avp_vector_page;
+		__smmu_iommu_map_pfn(as, 0, page_to_pfn(page));
+
+		pr_info("Reserve \"page zero\" for AVP vectors using a common dummy\n");
+	}
+
+	dev_dbg(smmu->dev, "%s is attached\n", dev_name(dev));
+	return 0;
+
+err_client:
+	smmu_client_disable_hwgrp(client);
+	spin_unlock(&as->client_lock);
+err_hwgrp:
+	devm_kfree(smmu->dev, client);
+	return err;
+}
+
+static void smmu_iommu_detach_dev(struct iommu_domain *domain,
+				  struct device *dev)
+{
+	struct smmu_as *as = domain->priv;
+	struct smmu_device *smmu = as->smmu;
+	struct smmu_client *c;
+
+	spin_lock(&as->client_lock);
+
+	list_for_each_entry(c, &as->client, list) {
+		if (c->dev == dev) {
+			smmu_client_disable_hwgrp(c);
+			list_del(&c->list);
+			devm_kfree(smmu->dev, c);
+			c->as = NULL;
+			dev_dbg(smmu->dev,
+				"%s is detached\n", dev_name(c->dev));
+			goto out;
+		}
+	}
+	dev_err(smmu->dev, "Couldn't find %s\n", dev_name(dev));
+out:
+	spin_unlock(&as->client_lock);
+}
+
+static int smmu_iommu_domain_init(struct iommu_domain *domain)
+{
+	int i, err = -EAGAIN;
+	unsigned long flags;
+	struct smmu_as *as;
+	struct smmu_device *smmu = smmu_handle;
+
+	/* Look for a free AS with lock held */
+	for  (i = 0; i < smmu->num_as; i++) {
+		as = &smmu->as[i];
+
+		if (as->pdir_page)
+			continue;
+
+		err = alloc_pdir(as);
+		if (!err)
+			goto found;
+
+		if (err != -EAGAIN)
+			break;
+	}
+	if (i == smmu->num_as)
+		dev_err(smmu->dev,  "no free AS\n");
+	return err;
+
+found:
+	spin_lock_irqsave(&smmu->lock, flags);
+
+	/* Update PDIR register */
+	smmu_write(smmu, SMMU_PTB_ASID_CUR(as->asid), SMMU_PTB_ASID);
+	smmu_write(smmu,
+		   SMMU_MK_PDIR(as->pdir_page, as->pdir_attr), SMMU_PTB_DATA);
+	FLUSH_SMMU_REGS(smmu);
+
+	spin_unlock_irqrestore(&smmu->lock, flags);
+
+	domain->priv = as;
+
+	domain->geometry.aperture_start = smmu->iovmm_base;
+	domain->geometry.aperture_end   = smmu->iovmm_base +
+		smmu->page_count * SMMU_PAGE_SIZE - 1;
+	domain->geometry.force_aperture = true;
+
+	dev_dbg(smmu->dev, "smmu_as@%p\n", as);
+
+	return 0;
+}
+
+static void smmu_iommu_domain_destroy(struct iommu_domain *domain)
+{
+	struct smmu_as *as = domain->priv;
+	struct smmu_device *smmu = as->smmu;
+	unsigned long flags;
+
+	spin_lock_irqsave(&as->lock, flags);
+
+	if (as->pdir_page) {
+		spin_lock(&smmu->lock);
+		smmu_write(smmu, SMMU_PTB_ASID_CUR(as->asid), SMMU_PTB_ASID);
+		smmu_write(smmu, SMMU_PTB_DATA_RESET_VAL, SMMU_PTB_DATA);
+		FLUSH_SMMU_REGS(smmu);
+		spin_unlock(&smmu->lock);
+
+		free_pdir(as);
+	}
+
+	if (!list_empty(&as->client)) {
+		struct smmu_client *c;
+
+		list_for_each_entry(c, &as->client, list)
+			smmu_iommu_detach_dev(domain, c->dev);
+	}
+
+	spin_unlock_irqrestore(&as->lock, flags);
+
+	domain->priv = NULL;
+	dev_dbg(smmu->dev, "smmu_as@%p\n", as);
+}
+
+static struct iommu_ops smmu_iommu_ops = {
+	.domain_init	= smmu_iommu_domain_init,
+	.domain_destroy	= smmu_iommu_domain_destroy,
+	.attach_dev	= smmu_iommu_attach_dev,
+	.detach_dev	= smmu_iommu_detach_dev,
+	.map		= smmu_iommu_map,
+	.unmap		= smmu_iommu_unmap,
+	.iova_to_phys	= smmu_iommu_iova_to_phys,
+	.domain_has_cap	= smmu_iommu_domain_has_cap,
+	.pgsize_bitmap	= SMMU_IOMMU_PGSIZES,
+};
+
+/* Should be in the order of enum */
+static const char * const smmu_debugfs_mc[] = { "mc", };
+static const char * const smmu_debugfs_cache[] = {  "tlb", "ptc", };
+
+static ssize_t smmu_debugfs_stats_write(struct file *file,
+					const char __user *buffer,
+					size_t count, loff_t *pos)
+{
+	struct smmu_debugfs_info *info;
+	struct smmu_device *smmu;
+	int i;
+	enum {
+		_OFF = 0,
+		_ON,
+		_RESET,
+	};
+	const char * const command[] = {
+		[_OFF]		= "off",
+		[_ON]		= "on",
+		[_RESET]	= "reset",
+	};
+	char str[] = "reset";
+	u32 val;
+	size_t offs;
+
+	count = min_t(size_t, count, sizeof(str));
+	if (copy_from_user(str, buffer, count))
+		return -EINVAL;
+
+	for (i = 0; i < ARRAY_SIZE(command); i++)
+		if (strncmp(str, command[i],
+			    strlen(command[i])) == 0)
+			break;
+
+	if (i == ARRAY_SIZE(command))
+		return -EINVAL;
+
+	info = file_inode(file)->i_private;
+	smmu = info->smmu;
+
+	offs = SMMU_CACHE_CONFIG(info->cache);
+	val = smmu_read(smmu, offs);
+	switch (i) {
+	case _OFF:
+		val &= ~SMMU_CACHE_CONFIG_STATS_ENABLE;
+		val &= ~SMMU_CACHE_CONFIG_STATS_TEST;
+		smmu_write(smmu, val, offs);
+		break;
+	case _ON:
+		val |= SMMU_CACHE_CONFIG_STATS_ENABLE;
+		val &= ~SMMU_CACHE_CONFIG_STATS_TEST;
+		smmu_write(smmu, val, offs);
+		break;
+	case _RESET:
+		val |= SMMU_CACHE_CONFIG_STATS_TEST;
+		smmu_write(smmu, val, offs);
+		val &= ~SMMU_CACHE_CONFIG_STATS_TEST;
+		smmu_write(smmu, val, offs);
+		break;
+	default:
+		BUG();
+		break;
+	}
+
+	dev_dbg(smmu->dev, "%s() %08x, %08x @%08x\n", __func__,
+		val, smmu_read(smmu, offs), offs);
+
+	return count;
+}
+
+static int smmu_debugfs_stats_show(struct seq_file *s, void *v)
+{
+	struct smmu_debugfs_info *info = s->private;
+	struct smmu_device *smmu = info->smmu;
+	int i;
+	const char * const stats[] = { "hit", "miss", };
+
+
+	for (i = 0; i < ARRAY_SIZE(stats); i++) {
+		u32 val;
+		size_t offs;
+
+		offs = SMMU_STATS_CACHE_COUNT(info->mc, info->cache, i);
+		val = smmu_read(smmu, offs);
+		seq_printf(s, "%s:%08x ", stats[i], val);
+
+		dev_dbg(smmu->dev, "%s() %s %08x @%08x\n", __func__,
+			stats[i], val, offs);
+	}
+	seq_printf(s, "\n");
+	return 0;
+}
+
+static int smmu_debugfs_stats_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, smmu_debugfs_stats_show, inode->i_private);
+}
+
+static const struct file_operations smmu_debugfs_stats_fops = {
+	.open		= smmu_debugfs_stats_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+	.write		= smmu_debugfs_stats_write,
+};
+
+static void smmu_debugfs_delete(struct smmu_device *smmu)
+{
+	debugfs_remove_recursive(smmu->debugfs_root);
+	kfree(smmu->debugfs_info);
+}
+
+static void smmu_debugfs_create(struct smmu_device *smmu)
+{
+	int i;
+	size_t bytes;
+	struct dentry *root;
+
+	bytes = ARRAY_SIZE(smmu_debugfs_mc) * ARRAY_SIZE(smmu_debugfs_cache) *
+		sizeof(*smmu->debugfs_info);
+	smmu->debugfs_info = kmalloc(bytes, GFP_KERNEL);
+	if (!smmu->debugfs_info)
+		return;
+
+	root = debugfs_create_dir(dev_name(smmu->dev), NULL);
+	if (!root)
+		goto err_out;
+	smmu->debugfs_root = root;
+
+	for (i = 0; i < ARRAY_SIZE(smmu_debugfs_mc); i++) {
+		int j;
+		struct dentry *mc;
+
+		mc = debugfs_create_dir(smmu_debugfs_mc[i], root);
+		if (!mc)
+			goto err_out;
+
+		for (j = 0; j < ARRAY_SIZE(smmu_debugfs_cache); j++) {
+			struct dentry *cache;
+			struct smmu_debugfs_info *info;
+
+			info = smmu->debugfs_info;
+			info += i * ARRAY_SIZE(smmu_debugfs_mc) + j;
+			info->smmu = smmu;
+			info->mc = i;
+			info->cache = j;
+
+			cache = debugfs_create_file(smmu_debugfs_cache[j],
+						    S_IWUGO | S_IRUGO, mc,
+						    (void *)info,
+						    &smmu_debugfs_stats_fops);
+			if (!cache)
+				goto err_out;
+		}
+	}
+
+	return;
+
+err_out:
+	smmu_debugfs_delete(smmu);
+}
+
+static int tegra_smmu_suspend(struct device *dev)
+{
+	struct smmu_device *smmu = dev_get_drvdata(dev);
+
+	smmu->translation_enable_0 = smmu_read(smmu, SMMU_TRANSLATION_ENABLE_0);
+	smmu->translation_enable_1 = smmu_read(smmu, SMMU_TRANSLATION_ENABLE_1);
+	smmu->translation_enable_2 = smmu_read(smmu, SMMU_TRANSLATION_ENABLE_2);
+	smmu->asid_security = smmu_read(smmu, SMMU_ASID_SECURITY);
+	return 0;
+}
+
+static int tegra_smmu_resume(struct device *dev)
+{
+	struct smmu_device *smmu = dev_get_drvdata(dev);
+	unsigned long flags;
+	int err;
+
+	spin_lock_irqsave(&smmu->lock, flags);
+	err = smmu_setup_regs(smmu);
+	spin_unlock_irqrestore(&smmu->lock, flags);
+	return err;
+}
+
+static int tegra_smmu_probe(struct platform_device *pdev)
+{
+	struct smmu_device *smmu;
+	struct device *dev = &pdev->dev;
+	int i, asids, err = 0;
+	dma_addr_t uninitialized_var(base);
+	size_t bytes, uninitialized_var(size);
+
+	if (smmu_handle)
+		return -EIO;
+
+	BUILD_BUG_ON(PAGE_SHIFT != SMMU_PAGE_SHIFT);
+
+	if (of_property_read_u32(dev->of_node, "nvidia,#asids", &asids))
+		return -ENODEV;
+
+	bytes = sizeof(*smmu) + asids * sizeof(*smmu->as);
+	smmu = devm_kzalloc(dev, bytes, GFP_KERNEL);
+	if (!smmu) {
+		dev_err(dev, "failed to allocate smmu_device\n");
+		return -ENOMEM;
+	}
+
+	smmu->nregs = pdev->num_resources;
+	smmu->regs = devm_kzalloc(dev, 2 * smmu->nregs * sizeof(*smmu->regs),
+				  GFP_KERNEL);
+	smmu->rege = smmu->regs + smmu->nregs;
+	if (!smmu->regs)
+		return -ENOMEM;
+	for (i = 0; i < smmu->nregs; i++) {
+		struct resource *res;
+
+		res = platform_get_resource(pdev, IORESOURCE_MEM, i);
+		smmu->regs[i] = devm_ioremap_resource(&pdev->dev, res);
+		if (IS_ERR(smmu->regs[i]))
+			return PTR_ERR(smmu->regs[i]);
+		smmu->rege[i] = smmu->regs[i] + resource_size(res) - 1;
+	}
+	/* Same as "mc" 1st regiter block start address */
+	smmu->regbase = (void __iomem *)((u32)smmu->regs[0] & PAGE_MASK);
+
+	err = of_get_dma_window(dev->of_node, NULL, 0, NULL, &base, &size);
+	if (err)
+		return -ENODEV;
+
+	if (size & SMMU_PAGE_MASK)
+		return -EINVAL;
+
+	size >>= SMMU_PAGE_SHIFT;
+	if (!size)
+		return -EINVAL;
+
+	smmu->ahb = of_parse_phandle(dev->of_node, "nvidia,ahb", 0);
+	if (!smmu->ahb)
+		return -ENODEV;
+
+	smmu->dev = dev;
+	smmu->num_as = asids;
+	smmu->iovmm_base = base;
+	smmu->page_count = size;
+
+	smmu->translation_enable_0 = ~0;
+	smmu->translation_enable_1 = ~0;
+	smmu->translation_enable_2 = ~0;
+	smmu->asid_security = 0;
+
+	for (i = 0; i < smmu->num_as; i++) {
+		struct smmu_as *as = &smmu->as[i];
+
+		as->smmu = smmu;
+		as->asid = i;
+		as->pdir_attr = _PDIR_ATTR;
+		as->pde_attr = _PDE_ATTR;
+		as->pte_attr = _PTE_ATTR;
+
+		spin_lock_init(&as->lock);
+		spin_lock_init(&as->client_lock);
+		INIT_LIST_HEAD(&as->client);
+	}
+	spin_lock_init(&smmu->lock);
+	err = smmu_setup_regs(smmu);
+	if (err)
+		return err;
+	platform_set_drvdata(pdev, smmu);
+
+	smmu->avp_vector_page = alloc_page(GFP_KERNEL);
+	if (!smmu->avp_vector_page)
+		return -ENOMEM;
+
+	smmu_debugfs_create(smmu);
+	smmu_handle = smmu;
+	bus_set_iommu(&platform_bus_type, &smmu_iommu_ops);
+	return 0;
+}
+
+static int tegra_smmu_remove(struct platform_device *pdev)
+{
+	struct smmu_device *smmu = platform_get_drvdata(pdev);
+	int i;
+
+	smmu_debugfs_delete(smmu);
+
+	smmu_write(smmu, SMMU_CONFIG_DISABLE, SMMU_CONFIG);
+	for (i = 0; i < smmu->num_as; i++)
+		free_pdir(&smmu->as[i]);
+	__free_page(smmu->avp_vector_page);
+	smmu_handle = NULL;
+	return 0;
+}
+
+static const struct dev_pm_ops tegra_smmu_pm_ops = {
+	.suspend	= tegra_smmu_suspend,
+	.resume		= tegra_smmu_resume,
+};
+
+static struct of_device_id tegra_smmu_of_match[] = {
+	{ .compatible = "nvidia,tegra30-smmu", },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, tegra_smmu_of_match);
+
+static struct platform_driver tegra_smmu_driver = {
+	.probe		= tegra_smmu_probe,
+	.remove		= tegra_smmu_remove,
+	.driver = {
+		.owner	= THIS_MODULE,
+		.name	= "tegra-smmu",
+		.pm	= &tegra_smmu_pm_ops,
+		.of_match_table = tegra_smmu_of_match,
+	},
+};
+
+static int tegra_smmu_init(void)
+{
+	return platform_driver_register(&tegra_smmu_driver);
+}
+
+static void __exit tegra_smmu_exit(void)
+{
+	platform_driver_unregister(&tegra_smmu_driver);
+}
+
+subsys_initcall(tegra_smmu_init);
+module_exit(tegra_smmu_exit);
+
+MODULE_DESCRIPTION("IOMMU API for SMMU in Tegra30");
+MODULE_AUTHOR("Hiroshi DOYU <hdoyu@nvidia.com>");
+MODULE_ALIAS("platform:tegra-smmu");
+MODULE_LICENSE("GPL v2");