diff options
Diffstat (limited to 'drivers/iommu')
30 files changed, 4118 insertions, 1982 deletions
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index fe302e33f72..d260605e6d5 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -52,7 +52,7 @@ config AMD_IOMMU  	select PCI_PRI  	select PCI_PASID  	select IOMMU_API -	depends on X86_64 && PCI && ACPI && X86_IO_APIC +	depends on X86_64 && PCI && ACPI  	---help---  	  With this option you can enable support for AMD IOMMU hardware in  	  your system. An IOMMU is a hardware component which provides @@ -178,13 +178,13 @@ config TEGRA_IOMMU_SMMU  config EXYNOS_IOMMU  	bool "Exynos IOMMU Support" -	depends on ARCH_EXYNOS && EXYNOS_DEV_SYSMMU +	depends on ARCH_EXYNOS  	select IOMMU_API  	help -	  Support for the IOMMU(System MMU) of Samsung Exynos application -	  processor family. This enables H/W multimedia accellerators to see -	  non-linear physical memory chunks as a linear memory in their -	  address spaces +	  Support for the IOMMU (System MMU) of Samsung Exynos application +	  processor family. This enables H/W multimedia accelerators to see +	  non-linear physical memory chunks as linear memory in their +	  address space.  	  If unsure, say N here. @@ -193,9 +193,9 @@ config EXYNOS_IOMMU_DEBUG  	depends on EXYNOS_IOMMU  	help  	  Select this to see the detailed log message that shows what -	  happens in the IOMMU driver +	  happens in the IOMMU driver. -	  Say N unless you need kernel log message for IOMMU debugging +	  Say N unless you need kernel log message for IOMMU debugging.  config SHMOBILE_IPMMU  	bool @@ -206,7 +206,8 @@ config SHMOBILE_IPMMU_TLB  config SHMOBILE_IOMMU  	bool "IOMMU for Renesas IPMMU/IPMMUI"  	default n -	depends on (ARM && ARCH_SHMOBILE) +	depends on ARM +	depends on ARCH_SHMOBILE || COMPILE_TEST  	select IOMMU_API  	select ARM_DMA_USE_IOMMU  	select SHMOBILE_IPMMU @@ -271,6 +272,18 @@ config SHMOBILE_IOMMU_L1SIZE  	default 256 if SHMOBILE_IOMMU_ADDRSIZE_64MB  	default 128 if SHMOBILE_IOMMU_ADDRSIZE_32MB +config IPMMU_VMSA +	bool "Renesas VMSA-compatible IPMMU" +	depends on ARM_LPAE +	depends on ARCH_SHMOBILE || COMPILE_TEST +	select IOMMU_API +	select ARM_DMA_USE_IOMMU +	help +	  Support for the Renesas VMSA-compatible IPMMU Renesas found in the +	  R-Mobile APE6 and R-Car H2/M2 SoCs. + +	  If unsure, say N. +  config SPAPR_TCE_IOMMU  	bool "sPAPR TCE IOMMU Support"  	depends on PPC_POWERNV || PPC_PSERIES diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index 14c1f474cf1..8893bad048e 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -1,4 +1,5 @@  obj-$(CONFIG_IOMMU_API) += iommu.o +obj-$(CONFIG_IOMMU_API) += iommu-traces.o  obj-$(CONFIG_OF_IOMMU)	+= of_iommu.o  obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o msm_iommu_dev.o  obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o amd_iommu_init.o @@ -6,6 +7,7 @@ obj-$(CONFIG_AMD_IOMMU_V2) += amd_iommu_v2.o  obj-$(CONFIG_ARM_SMMU) += arm-smmu.o  obj-$(CONFIG_DMAR_TABLE) += dmar.o  obj-$(CONFIG_INTEL_IOMMU) += iova.o intel-iommu.o +obj-$(CONFIG_IPMMU_VMSA) += ipmmu-vmsa.o  obj-$(CONFIG_IRQ_REMAP) += intel_irq_remapping.o irq_remapping.o  obj-$(CONFIG_OMAP_IOMMU) += omap-iommu.o  obj-$(CONFIG_OMAP_IOMMU) += omap-iommu2.o diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 72531f008a5..4aec6a29e31 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -248,8 +248,8 @@ static bool check_device(struct device *dev)  	if (!dev || !dev->dma_mask)  		return false; -	/* No device or no PCI device */ -	if (dev->bus != &pci_bus_type) +	/* No PCI device */ +	if (!dev_is_pci(dev))  		return false;  	devid = get_device_id(dev); @@ -963,7 +963,7 @@ static void build_inv_iommu_pasid(struct iommu_cmd *cmd, u16 domid, int pasid,  	address &= ~(0xfffULL); -	cmd->data[0]  = pasid & PASID_MASK; +	cmd->data[0]  = pasid;  	cmd->data[1]  = domid;  	cmd->data[2]  = lower_32_bits(address);  	cmd->data[3]  = upper_32_bits(address); @@ -982,10 +982,10 @@ static void build_inv_iotlb_pasid(struct iommu_cmd *cmd, u16 devid, int pasid,  	address &= ~(0xfffULL);  	cmd->data[0]  = devid; -	cmd->data[0] |= (pasid & 0xff) << 16; +	cmd->data[0] |= ((pasid >> 8) & 0xff) << 16;  	cmd->data[0] |= (qdep  & 0xff) << 24;  	cmd->data[1]  = devid; -	cmd->data[1] |= ((pasid >> 8) & 0xfff) << 16; +	cmd->data[1] |= (pasid & 0xff) << 16;  	cmd->data[2]  = lower_32_bits(address);  	cmd->data[2] |= CMD_INV_IOMMU_PAGES_GN_MASK;  	cmd->data[3]  = upper_32_bits(address); @@ -1001,7 +1001,7 @@ static void build_complete_ppr(struct iommu_cmd *cmd, u16 devid, int pasid,  	cmd->data[0]  = devid;  	if (gn) { -		cmd->data[1]  = pasid & PASID_MASK; +		cmd->data[1]  = pasid;  		cmd->data[2]  = CMD_INV_IOMMU_PAGES_GN_MASK;  	}  	cmd->data[3]  = tag & 0x1ff; @@ -3499,8 +3499,6 @@ int __init amd_iommu_init_passthrough(void)  {  	struct iommu_dev_data *dev_data;  	struct pci_dev *dev = NULL; -	struct amd_iommu *iommu; -	u16 devid;  	int ret;  	ret = alloc_passthrough_domain(); @@ -3514,12 +3512,6 @@ int __init amd_iommu_init_passthrough(void)  		dev_data = get_dev_data(&dev->dev);  		dev_data->passthrough = true; -		devid = get_device_id(&dev->dev); - -		iommu = amd_iommu_rlookup_table[devid]; -		if (!iommu) -			continue; -  		attach_device(&dev->dev, pt_domain);  	} @@ -3999,7 +3991,7 @@ static struct irq_remap_table *get_irq_table(u16 devid, bool ioapic)  	iommu_flush_dte(iommu, devid);  	if (devid != alias) {  		irq_lookup_table[alias] = table; -		set_dte_irq_entry(devid, table); +		set_dte_irq_entry(alias, table);  		iommu_flush_dte(iommu, alias);  	} diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 8f798be6e39..0e08545d729 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -26,7 +26,6 @@  #include <linux/msi.h>  #include <linux/amd-iommu.h>  #include <linux/export.h> -#include <acpi/acpi.h>  #include <asm/pci-direct.h>  #include <asm/iommu.h>  #include <asm/gart.h> @@ -151,7 +150,7 @@ int amd_iommus_present;  bool amd_iommu_np_cache __read_mostly;  bool amd_iommu_iotlb_sup __read_mostly = true; -u32 amd_iommu_max_pasids __read_mostly = ~0; +u32 amd_iommu_max_pasid __read_mostly = ~0;  bool amd_iommu_v2_present __read_mostly;  bool amd_iommu_pc_present __read_mostly; @@ -789,7 +788,7 @@ static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m)  		 * per device. But we can enable the exclusion range per  		 * device. This is done here  		 */ -		set_dev_entry_bit(m->devid, DEV_ENTRY_EX); +		set_dev_entry_bit(devid, DEV_ENTRY_EX);  		iommu->exclusion_start = m->range_start;  		iommu->exclusion_length = m->range_length;  	} @@ -1232,14 +1231,16 @@ static int iommu_init_pci(struct amd_iommu *iommu)  	if (iommu_feature(iommu, FEATURE_GT)) {  		int glxval; -		u32 pasids; -		u64 shift; +		u32 max_pasid; +		u64 pasmax; -		shift   = iommu->features & FEATURE_PASID_MASK; -		shift >>= FEATURE_PASID_SHIFT; -		pasids  = (1 << shift); +		pasmax = iommu->features & FEATURE_PASID_MASK; +		pasmax >>= FEATURE_PASID_SHIFT; +		max_pasid  = (1 << (pasmax + 1)) - 1; -		amd_iommu_max_pasids = min(amd_iommu_max_pasids, pasids); +		amd_iommu_max_pasid = min(amd_iommu_max_pasid, max_pasid); + +		BUG_ON(amd_iommu_max_pasid & ~PASID_MASK);  		glxval   = iommu->features & FEATURE_GLXVAL_MASK;  		glxval >>= FEATURE_GLXVAL_SHIFT; diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index e400fbe411d..f1a5abf11ac 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -25,6 +25,7 @@  #include <linux/list.h>  #include <linux/spinlock.h>  #include <linux/pci.h> +#include <linux/irqreturn.h>  /*   * Maximum number of IOMMUs supported @@ -98,7 +99,12 @@  #define FEATURE_GLXVAL_SHIFT	14  #define FEATURE_GLXVAL_MASK	(0x03ULL << FEATURE_GLXVAL_SHIFT) -#define PASID_MASK		0x000fffff +/* Note: + * The current driver only support 16-bit PASID. + * Currently, hardware only implement upto 16-bit PASID + * even though the spec says it could have upto 20 bits. + */ +#define PASID_MASK		0x0000ffff  /* MMIO status bits */  #define MMIO_STATUS_EVT_INT_MASK	(1 << 1) @@ -696,8 +702,8 @@ extern unsigned long *amd_iommu_pd_alloc_bitmap;   */  extern u32 amd_iommu_unmap_flush; -/* Smallest number of PASIDs supported by any IOMMU in the system */ -extern u32 amd_iommu_max_pasids; +/* Smallest max PASID supported by any IOMMU in the system */ +extern u32 amd_iommu_max_pasid;  extern bool amd_iommu_v2_present; diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c index 5208828792e..499b4366a98 100644 --- a/drivers/iommu/amd_iommu_v2.c +++ b/drivers/iommu/amd_iommu_v2.c @@ -45,17 +45,22 @@ struct pri_queue {  struct pasid_state {  	struct list_head list;			/* For global state-list */  	atomic_t count;				/* Reference count */ +	unsigned mmu_notifier_count;		/* Counting nested mmu_notifier +						   calls */  	struct task_struct *task;		/* Task bound to this PASID */  	struct mm_struct *mm;			/* mm_struct for the faults */  	struct mmu_notifier mn;                 /* mmu_otifier handle */  	struct pri_queue pri[PRI_QUEUE_SIZE];	/* PRI tag states */  	struct device_state *device_state;	/* Link to our device_state */  	int pasid;				/* PASID index */ -	spinlock_t lock;			/* Protect pri_queues */ +	spinlock_t lock;			/* Protect pri_queues and +						   mmu_notifer_count */  	wait_queue_head_t wq;			/* To wait for count == 0 */  };  struct device_state { +	struct list_head list; +	u16 devid;  	atomic_t count;  	struct pci_dev *pdev;  	struct pasid_state **states; @@ -81,13 +86,9 @@ struct fault {  	u16 flags;  }; -static struct device_state **state_table; +static LIST_HEAD(state_list);  static spinlock_t state_lock; -/* List and lock for all pasid_states */ -static LIST_HEAD(pasid_state_list); -static DEFINE_SPINLOCK(ps_lock); -  static struct workqueue_struct *iommu_wq;  /* @@ -99,7 +100,6 @@ static u64 *empty_page_table;  static void free_pasid_states(struct device_state *dev_state);  static void unbind_pasid(struct device_state *dev_state, int pasid); -static int task_exit(struct notifier_block *nb, unsigned long e, void *data);  static u16 device_id(struct pci_dev *pdev)  { @@ -111,13 +111,25 @@ static u16 device_id(struct pci_dev *pdev)  	return devid;  } +static struct device_state *__get_device_state(u16 devid) +{ +	struct device_state *dev_state; + +	list_for_each_entry(dev_state, &state_list, list) { +		if (dev_state->devid == devid) +			return dev_state; +	} + +	return NULL; +} +  static struct device_state *get_device_state(u16 devid)  {  	struct device_state *dev_state;  	unsigned long flags;  	spin_lock_irqsave(&state_lock, flags); -	dev_state = state_table[devid]; +	dev_state = __get_device_state(devid);  	if (dev_state != NULL)  		atomic_inc(&dev_state->count);  	spin_unlock_irqrestore(&state_lock, flags); @@ -158,29 +170,6 @@ static void put_device_state_wait(struct device_state *dev_state)  	free_device_state(dev_state);  } -static struct notifier_block profile_nb = { -	.notifier_call = task_exit, -}; - -static void link_pasid_state(struct pasid_state *pasid_state) -{ -	spin_lock(&ps_lock); -	list_add_tail(&pasid_state->list, &pasid_state_list); -	spin_unlock(&ps_lock); -} - -static void __unlink_pasid_state(struct pasid_state *pasid_state) -{ -	list_del(&pasid_state->list); -} - -static void unlink_pasid_state(struct pasid_state *pasid_state) -{ -	spin_lock(&ps_lock); -	__unlink_pasid_state(pasid_state); -	spin_unlock(&ps_lock); -} -  /* Must be called under dev_state->lock */  static struct pasid_state **__get_pasid_state_ptr(struct device_state *dev_state,  						  int pasid, bool alloc) @@ -337,7 +326,6 @@ static void unbind_pasid(struct device_state *dev_state, int pasid)  	if (pasid_state == NULL)  		return; -	unlink_pasid_state(pasid_state);  	__unbind_pasid(pasid_state);  	put_pasid_state_wait(pasid_state); /* Reference taken in this function */  } @@ -379,7 +367,12 @@ static void free_pasid_states(struct device_state *dev_state)  			continue;  		put_pasid_state(pasid_state); -		unbind_pasid(dev_state, i); + +		/* +		 * This will call the mn_release function and +		 * unbind the PASID +		 */ +		mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm);  	}  	if (dev_state->pasid_levels == 2) @@ -439,12 +432,19 @@ static void mn_invalidate_range_start(struct mmu_notifier *mn,  {  	struct pasid_state *pasid_state;  	struct device_state *dev_state; +	unsigned long flags;  	pasid_state = mn_to_state(mn);  	dev_state   = pasid_state->device_state; -	amd_iommu_domain_set_gcr3(dev_state->domain, pasid_state->pasid, -				  __pa(empty_page_table)); +	spin_lock_irqsave(&pasid_state->lock, flags); +	if (pasid_state->mmu_notifier_count == 0) { +		amd_iommu_domain_set_gcr3(dev_state->domain, +					  pasid_state->pasid, +					  __pa(empty_page_table)); +	} +	pasid_state->mmu_notifier_count += 1; +	spin_unlock_irqrestore(&pasid_state->lock, flags);  }  static void mn_invalidate_range_end(struct mmu_notifier *mn, @@ -453,15 +453,39 @@ static void mn_invalidate_range_end(struct mmu_notifier *mn,  {  	struct pasid_state *pasid_state;  	struct device_state *dev_state; +	unsigned long flags;  	pasid_state = mn_to_state(mn);  	dev_state   = pasid_state->device_state; -	amd_iommu_domain_set_gcr3(dev_state->domain, pasid_state->pasid, -				  __pa(pasid_state->mm->pgd)); +	spin_lock_irqsave(&pasid_state->lock, flags); +	pasid_state->mmu_notifier_count -= 1; +	if (pasid_state->mmu_notifier_count == 0) { +		amd_iommu_domain_set_gcr3(dev_state->domain, +					  pasid_state->pasid, +					  __pa(pasid_state->mm->pgd)); +	} +	spin_unlock_irqrestore(&pasid_state->lock, flags); +} + +static void mn_release(struct mmu_notifier *mn, struct mm_struct *mm) +{ +	struct pasid_state *pasid_state; +	struct device_state *dev_state; + +	might_sleep(); + +	pasid_state = mn_to_state(mn); +	dev_state   = pasid_state->device_state; + +	if (pasid_state->device_state->inv_ctx_cb) +		dev_state->inv_ctx_cb(dev_state->pdev, pasid_state->pasid); + +	unbind_pasid(dev_state, pasid_state->pasid);  }  static struct mmu_notifier_ops iommu_mn = { +	.release		= mn_release,  	.clear_flush_young      = mn_clear_flush_young,  	.change_pte             = mn_change_pte,  	.invalidate_page        = mn_invalidate_page, @@ -504,8 +528,10 @@ static void do_fault(struct work_struct *work)  	write = !!(fault->flags & PPR_FAULT_WRITE); +	down_read(&fault->state->mm->mmap_sem);  	npages = get_user_pages(fault->state->task, fault->state->mm,  				fault->address, 1, write, 0, &page, NULL); +	up_read(&fault->state->mm->mmap_sem);  	if (npages == 1) {  		put_page(page); @@ -604,53 +630,6 @@ static struct notifier_block ppr_nb = {  	.notifier_call = ppr_notifier,  }; -static int task_exit(struct notifier_block *nb, unsigned long e, void *data) -{ -	struct pasid_state *pasid_state; -	struct task_struct *task; - -	task = data; - -	/* -	 * Using this notifier is a hack - but there is no other choice -	 * at the moment. What I really want is a sleeping notifier that -	 * is called when an MM goes down. But such a notifier doesn't -	 * exist yet. The notifier needs to sleep because it has to make -	 * sure that the device does not use the PASID and the address -	 * space anymore before it is destroyed. This includes waiting -	 * for pending PRI requests to pass the workqueue. The -	 * MMU-Notifiers would be a good fit, but they use RCU and so -	 * they are not allowed to sleep. Lets see how we can solve this -	 * in a more intelligent way in the future. -	 */ -again: -	spin_lock(&ps_lock); -	list_for_each_entry(pasid_state, &pasid_state_list, list) { -		struct device_state *dev_state; -		int pasid; - -		if (pasid_state->task != task) -			continue; - -		/* Drop Lock and unbind */ -		spin_unlock(&ps_lock); - -		dev_state = pasid_state->device_state; -		pasid     = pasid_state->pasid; - -		if (pasid_state->device_state->inv_ctx_cb) -			dev_state->inv_ctx_cb(dev_state->pdev, pasid); - -		unbind_pasid(dev_state, pasid); - -		/* Task may be in the list multiple times */ -		goto again; -	} -	spin_unlock(&ps_lock); - -	return NOTIFY_OK; -} -  int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid,  			 struct task_struct *task)  { @@ -703,8 +682,6 @@ int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid,  	if (ret)  		goto out_clear_state; -	link_pasid_state(pasid_state); -  	return 0;  out_clear_state: @@ -725,6 +702,7 @@ EXPORT_SYMBOL(amd_iommu_bind_pasid);  void amd_iommu_unbind_pasid(struct pci_dev *pdev, int pasid)  { +	struct pasid_state *pasid_state;  	struct device_state *dev_state;  	u16 devid; @@ -741,7 +719,17 @@ void amd_iommu_unbind_pasid(struct pci_dev *pdev, int pasid)  	if (pasid < 0 || pasid >= dev_state->max_pasids)  		goto out; -	unbind_pasid(dev_state, pasid); +	pasid_state = get_pasid_state(dev_state, pasid); +	if (pasid_state == NULL) +		goto out; +	/* +	 * Drop reference taken here. We are safe because we still hold +	 * the reference taken in the amd_iommu_bind_pasid function. +	 */ +	put_pasid_state(pasid_state); + +	/* This will call the mn_release function and unbind the PASID */ +	mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm);  out:  	put_device_state(dev_state); @@ -771,7 +759,8 @@ int amd_iommu_init_device(struct pci_dev *pdev, int pasids)  	spin_lock_init(&dev_state->lock);  	init_waitqueue_head(&dev_state->wq); -	dev_state->pdev = pdev; +	dev_state->pdev  = pdev; +	dev_state->devid = devid;  	tmp = pasids;  	for (dev_state->pasid_levels = 0; (tmp - 1) & ~0x1ff; tmp >>= 9) @@ -801,13 +790,13 @@ int amd_iommu_init_device(struct pci_dev *pdev, int pasids)  	spin_lock_irqsave(&state_lock, flags); -	if (state_table[devid] != NULL) { +	if (__get_device_state(devid) != NULL) {  		spin_unlock_irqrestore(&state_lock, flags);  		ret = -EBUSY;  		goto out_free_domain;  	} -	state_table[devid] = dev_state; +	list_add_tail(&dev_state->list, &state_list);  	spin_unlock_irqrestore(&state_lock, flags); @@ -839,13 +828,13 @@ void amd_iommu_free_device(struct pci_dev *pdev)  	spin_lock_irqsave(&state_lock, flags); -	dev_state = state_table[devid]; +	dev_state = __get_device_state(devid);  	if (dev_state == NULL) {  		spin_unlock_irqrestore(&state_lock, flags);  		return;  	} -	state_table[devid] = NULL; +	list_del(&dev_state->list);  	spin_unlock_irqrestore(&state_lock, flags); @@ -872,7 +861,7 @@ int amd_iommu_set_invalid_ppr_cb(struct pci_dev *pdev,  	spin_lock_irqsave(&state_lock, flags);  	ret = -EINVAL; -	dev_state = state_table[devid]; +	dev_state = __get_device_state(devid);  	if (dev_state == NULL)  		goto out_unlock; @@ -903,7 +892,7 @@ int amd_iommu_set_invalidate_ctx_cb(struct pci_dev *pdev,  	spin_lock_irqsave(&state_lock, flags);  	ret = -EINVAL; -	dev_state = state_table[devid]; +	dev_state = __get_device_state(devid);  	if (dev_state == NULL)  		goto out_unlock; @@ -920,7 +909,6 @@ EXPORT_SYMBOL(amd_iommu_set_invalidate_ctx_cb);  static int __init amd_iommu_v2_init(void)  { -	size_t state_table_size;  	int ret;  	pr_info("AMD IOMMUv2 driver by Joerg Roedel <joerg.roedel@amd.com>\n"); @@ -936,16 +924,10 @@ static int __init amd_iommu_v2_init(void)  	spin_lock_init(&state_lock); -	state_table_size = MAX_DEVICES * sizeof(struct device_state *); -	state_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, -					       get_order(state_table_size)); -	if (state_table == NULL) -		return -ENOMEM; -  	ret = -ENOMEM;  	iommu_wq = create_workqueue("amd_iommu_v2");  	if (iommu_wq == NULL) -		goto out_free; +		goto out;  	ret = -ENOMEM;  	empty_page_table = (u64 *)get_zeroed_page(GFP_KERNEL); @@ -953,29 +935,24 @@ static int __init amd_iommu_v2_init(void)  		goto out_destroy_wq;  	amd_iommu_register_ppr_notifier(&ppr_nb); -	profile_event_register(PROFILE_TASK_EXIT, &profile_nb);  	return 0;  out_destroy_wq:  	destroy_workqueue(iommu_wq); -out_free: -	free_pages((unsigned long)state_table, get_order(state_table_size)); - +out:  	return ret;  }  static void __exit amd_iommu_v2_exit(void)  {  	struct device_state *dev_state; -	size_t state_table_size;  	int i;  	if (!amd_iommu_v2_supported())  		return; -	profile_event_unregister(PROFILE_TASK_EXIT, &profile_nb);  	amd_iommu_unregister_ppr_notifier(&ppr_nb);  	flush_workqueue(iommu_wq); @@ -998,9 +975,6 @@ static void __exit amd_iommu_v2_exit(void)  	destroy_workqueue(iommu_wq); -	state_table_size = MAX_DEVICES * sizeof(struct device_state *); -	free_pages((unsigned long)state_table, get_order(state_table_size)); -  	free_page((unsigned long)empty_page_table);  } diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index f417e89e1e7..1599354e974 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -24,7 +24,7 @@   *	- v7/v8 long-descriptor format   *	- Non-secure access to the SMMU   *	- 4k and 64k pages, with contiguous pte hints. - *	- Up to 39-bit addressing + *	- Up to 42-bit addressing (dependent on VA_BITS)   *	- Context fault reporting   */ @@ -48,7 +48,7 @@  #include <asm/pgalloc.h>  /* Maximum number of stream IDs assigned to a single device */ -#define MAX_MASTER_STREAMIDS		8 +#define MAX_MASTER_STREAMIDS		MAX_PHANDLE_ARGS  /* Maximum number of context banks per SMMU */  #define ARM_SMMU_MAX_CBS		128 @@ -60,13 +60,24 @@  #define ARM_SMMU_GR0(smmu)		((smmu)->base)  #define ARM_SMMU_GR1(smmu)		((smmu)->base + (smmu)->pagesize) +/* + * SMMU global address space with conditional offset to access secure + * aliases of non-secure registers (e.g. nsCR0: 0x400, nsGFSR: 0x448, + * nsGFSYNR0: 0x450) + */ +#define ARM_SMMU_GR0_NS(smmu)						\ +	((smmu)->base +							\ +		((smmu->options & ARM_SMMU_OPT_SECURE_CFG_ACCESS)	\ +			? 0x400 : 0)) +  /* Page table bits */ -#define ARM_SMMU_PTE_PAGE		(((pteval_t)3) << 0) +#define ARM_SMMU_PTE_XN			(((pteval_t)3) << 53)  #define ARM_SMMU_PTE_CONT		(((pteval_t)1) << 52)  #define ARM_SMMU_PTE_AF			(((pteval_t)1) << 10)  #define ARM_SMMU_PTE_SH_NS		(((pteval_t)0) << 8)  #define ARM_SMMU_PTE_SH_OS		(((pteval_t)2) << 8)  #define ARM_SMMU_PTE_SH_IS		(((pteval_t)3) << 8) +#define ARM_SMMU_PTE_PAGE		(((pteval_t)3) << 0)  #if PAGE_SIZE == SZ_4K  #define ARM_SMMU_PTE_CONT_ENTRIES	16 @@ -78,7 +89,6 @@  #define ARM_SMMU_PTE_CONT_SIZE		(PAGE_SIZE * ARM_SMMU_PTE_CONT_ENTRIES)  #define ARM_SMMU_PTE_CONT_MASK		(~(ARM_SMMU_PTE_CONT_SIZE - 1)) -#define ARM_SMMU_PTE_HWTABLE_SIZE	(PTRS_PER_PTE * sizeof(pte_t))  /* Stage-1 PTE */  #define ARM_SMMU_PTE_AP_UNPRIV		(((pteval_t)1) << 6) @@ -190,6 +200,9 @@  #define ARM_SMMU_GR1_CBAR(n)		(0x0 + ((n) << 2))  #define CBAR_VMID_SHIFT			0  #define CBAR_VMID_MASK			0xff +#define CBAR_S1_BPSHCFG_SHIFT		8 +#define CBAR_S1_BPSHCFG_MASK		3 +#define CBAR_S1_BPSHCFG_NSH		3  #define CBAR_S1_MEMATTR_SHIFT		12  #define CBAR_S1_MEMATTR_MASK		0xf  #define CBAR_S1_MEMATTR_WB		0xf @@ -348,6 +361,9 @@ struct arm_smmu_device {  #define ARM_SMMU_FEAT_TRANS_S2		(1 << 3)  #define ARM_SMMU_FEAT_TRANS_NESTED	(1 << 4)  	u32				features; + +#define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0) +	u32				options;  	int				version;  	u32				num_context_banks; @@ -377,6 +393,7 @@ struct arm_smmu_cfg {  	u32				cbar;  	pgd_t				*pgd;  }; +#define INVALID_IRPTNDX			0xff  #define ARM_SMMU_CB_ASID(cfg)		((cfg)->cbndx)  #define ARM_SMMU_CB_VMID(cfg)		((cfg)->cbndx + 1) @@ -397,6 +414,29 @@ struct arm_smmu_domain {  static DEFINE_SPINLOCK(arm_smmu_devices_lock);  static LIST_HEAD(arm_smmu_devices); +struct arm_smmu_option_prop { +	u32 opt; +	const char *prop; +}; + +static struct arm_smmu_option_prop arm_smmu_options [] = { +	{ ARM_SMMU_OPT_SECURE_CFG_ACCESS, "calxeda,smmu-secure-config-access" }, +	{ 0, NULL}, +}; + +static void parse_driver_options(struct arm_smmu_device *smmu) +{ +	int i = 0; +	do { +		if (of_property_read_bool(smmu->dev->of_node, +						arm_smmu_options[i].prop)) { +			smmu->options |= arm_smmu_options[i].opt; +			dev_notice(smmu->dev, "option %s\n", +				arm_smmu_options[i].prop); +		} +	} while (arm_smmu_options[++i].opt); +} +  static struct arm_smmu_master *find_smmu_master(struct arm_smmu_device *smmu,  						struct device_node *dev_node)  { @@ -589,6 +629,9 @@ static irqreturn_t arm_smmu_context_fault(int irq, void *dev)  		ret = IRQ_HANDLED;  		resume = RESUME_RETRY;  	} else { +		dev_err_ratelimited(smmu->dev, +		    "Unhandled context fault: iova=0x%08lx, fsynr=0x%x, cb=%d\n", +		    iova, fsynr, root_cfg->cbndx);  		ret = IRQ_NONE;  		resume = RESUME_TERMINATE;  	} @@ -607,16 +650,16 @@ static irqreturn_t arm_smmu_global_fault(int irq, void *dev)  {  	u32 gfsr, gfsynr0, gfsynr1, gfsynr2;  	struct arm_smmu_device *smmu = dev; -	void __iomem *gr0_base = ARM_SMMU_GR0(smmu); +	void __iomem *gr0_base = ARM_SMMU_GR0_NS(smmu);  	gfsr = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSR); -	if (!gfsr) -		return IRQ_NONE; -  	gfsynr0 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR0);  	gfsynr1 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR1);  	gfsynr2 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR2); +	if (!gfsr) +		return IRQ_NONE; +  	dev_err_ratelimited(smmu->dev,  		"Unexpected global fault, this could be serious\n");  	dev_err_ratelimited(smmu->dev, @@ -627,6 +670,28 @@ static irqreturn_t arm_smmu_global_fault(int irq, void *dev)  	return IRQ_HANDLED;  } +static void arm_smmu_flush_pgtable(struct arm_smmu_device *smmu, void *addr, +				   size_t size) +{ +	unsigned long offset = (unsigned long)addr & ~PAGE_MASK; + + +	/* Ensure new page tables are visible to the hardware walker */ +	if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) { +		dsb(ishst); +	} else { +		/* +		 * If the SMMU can't walk tables in the CPU caches, treat them +		 * like non-coherent DMA since we need to flush the new entries +		 * all the way out to memory. There's no possibility of +		 * recursion here as the SMMU table walker will not be wired +		 * through another SMMU. +		 */ +		dma_map_page(smmu->dev, virt_to_page(addr), offset, size, +				DMA_TO_DEVICE); +	} +} +  static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain)  {  	u32 reg; @@ -645,11 +710,16 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain)  	if (smmu->version == 1)  	      reg |= root_cfg->irptndx << CBAR_IRPTNDX_SHIFT; -	/* Use the weakest memory type, so it is overridden by the pte */ -	if (stage1) -		reg |= (CBAR_S1_MEMATTR_WB << CBAR_S1_MEMATTR_SHIFT); -	else +	/* +	 * Use the weakest shareability/memory types, so they are +	 * overridden by the ttbcr/pte. +	 */ +	if (stage1) { +		reg |= (CBAR_S1_BPSHCFG_NSH << CBAR_S1_BPSHCFG_SHIFT) | +			(CBAR_S1_MEMATTR_WB << CBAR_S1_MEMATTR_SHIFT); +	} else {  		reg |= ARM_SMMU_CB_VMID(root_cfg) << CBAR_VMID_SHIFT; +	}  	writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(root_cfg->cbndx));  	if (smmu->version > 1) { @@ -710,6 +780,8 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain)  	}  	/* TTBR0 */ +	arm_smmu_flush_pgtable(smmu, root_cfg->pgd, +			       PTRS_PER_PGD * sizeof(pgd_t));  	reg = __pa(root_cfg->pgd);  	writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_LO);  	reg = (phys_addr_t)__pa(root_cfg->pgd) >> 32; @@ -777,7 +849,7 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain)  #ifdef __BIG_ENDIAN  	reg |= SCTLR_E;  #endif -	writel(reg, cb_base + ARM_SMMU_CB_SCTLR); +	writel_relaxed(reg, cb_base + ARM_SMMU_CB_SCTLR);  }  static int arm_smmu_init_domain_context(struct iommu_domain *domain, @@ -840,7 +912,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,  	if (IS_ERR_VALUE(ret)) {  		dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",  			root_cfg->irptndx, irq); -		root_cfg->irptndx = -1; +		root_cfg->irptndx = INVALID_IRPTNDX;  		goto out_free_context;  	} @@ -869,7 +941,7 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)  	writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR);  	arm_smmu_tlb_inv_context(root_cfg); -	if (root_cfg->irptndx != -1) { +	if (root_cfg->irptndx != INVALID_IRPTNDX) {  		irq = smmu->irqs[smmu->num_global_irqs + root_cfg->irptndx];  		free_irq(irq, domain);  	} @@ -954,9 +1026,8 @@ static void arm_smmu_free_pgtables(struct arm_smmu_domain *smmu_domain)  	/*  	 * Recursively free the page tables for this domain. We don't -	 * care about speculative TLB filling, because the TLB will be -	 * nuked next time this context bank is re-allocated and no devices -	 * currently map to these tables. +	 * care about speculative TLB filling because the tables should +	 * not be active in any context bank at this point (SCTLR.M is 0).  	 */  	pgd = pgd_base;  	for (i = 0; i < PTRS_PER_PGD; ++i) { @@ -1096,7 +1167,7 @@ static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,  	for (i = 0; i < master->num_streamids; ++i) {  		u32 idx, s2cr;  		idx = master->smrs ? master->smrs[i].idx : master->streamids[i]; -		s2cr = (S2CR_TYPE_TRANS << S2CR_TYPE_SHIFT) | +		s2cr = S2CR_TYPE_TRANS |  		       (smmu_domain->root_cfg.cbndx << S2CR_CBNDX_SHIFT);  		writel_relaxed(s2cr, gr0_base + ARM_SMMU_GR0_S2CR(idx));  	} @@ -1123,6 +1194,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)  	struct arm_smmu_domain *smmu_domain = domain->priv;  	struct arm_smmu_device *device_smmu = dev->archdata.iommu;  	struct arm_smmu_master *master; +	unsigned long flags;  	if (!device_smmu) {  		dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n"); @@ -1133,7 +1205,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)  	 * Sanity check the domain. We don't currently support domains  	 * that cross between different SMMU chains.  	 */ -	spin_lock(&smmu_domain->lock); +	spin_lock_irqsave(&smmu_domain->lock, flags);  	if (!smmu_domain->leaf_smmu) {  		/* Now that we have a master, we can finalise the domain */  		ret = arm_smmu_init_domain_context(domain, dev); @@ -1148,7 +1220,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)  			dev_name(device_smmu->dev));  		goto err_unlock;  	} -	spin_unlock(&smmu_domain->lock); +	spin_unlock_irqrestore(&smmu_domain->lock, flags);  	/* Looks ok, so add the device to the domain */  	master = find_smmu_master(smmu_domain->leaf_smmu, dev->of_node); @@ -1158,7 +1230,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)  	return arm_smmu_domain_add_master(smmu_domain, master);  err_unlock: -	spin_unlock(&smmu_domain->lock); +	spin_unlock_irqrestore(&smmu_domain->lock, flags);  	return ret;  } @@ -1172,23 +1244,6 @@ static void arm_smmu_detach_dev(struct iommu_domain *domain, struct device *dev)  		arm_smmu_domain_remove_master(smmu_domain, master);  } -static void arm_smmu_flush_pgtable(struct arm_smmu_device *smmu, void *addr, -				   size_t size) -{ -	unsigned long offset = (unsigned long)addr & ~PAGE_MASK; - -	/* -	 * If the SMMU can't walk tables in the CPU caches, treat them -	 * like non-coherent DMA since we need to flush the new entries -	 * all the way out to memory. There's no possibility of recursion -	 * here as the SMMU table walker will not be wired through another -	 * SMMU. -	 */ -	if (!(smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)) -		dma_map_page(smmu->dev, virt_to_page(addr), offset, size, -			     DMA_TO_DEVICE); -} -  static bool arm_smmu_pte_is_contiguous_range(unsigned long addr,  					     unsigned long end)  { @@ -1198,46 +1253,50 @@ static bool arm_smmu_pte_is_contiguous_range(unsigned long addr,  static int arm_smmu_alloc_init_pte(struct arm_smmu_device *smmu, pmd_t *pmd,  				   unsigned long addr, unsigned long end, -				   unsigned long pfn, int flags, int stage) +				   unsigned long pfn, int prot, int stage)  {  	pte_t *pte, *start; -	pteval_t pteval = ARM_SMMU_PTE_PAGE | ARM_SMMU_PTE_AF; +	pteval_t pteval = ARM_SMMU_PTE_PAGE | ARM_SMMU_PTE_AF | ARM_SMMU_PTE_XN;  	if (pmd_none(*pmd)) {  		/* Allocate a new set of tables */ -		pgtable_t table = alloc_page(PGALLOC_GFP); +		pgtable_t table = alloc_page(GFP_ATOMIC|__GFP_ZERO);  		if (!table)  			return -ENOMEM; -		arm_smmu_flush_pgtable(smmu, page_address(table), -				       ARM_SMMU_PTE_HWTABLE_SIZE); -		pgtable_page_ctor(table); +		arm_smmu_flush_pgtable(smmu, page_address(table), PAGE_SIZE); +		if (!pgtable_page_ctor(table)) { +			__free_page(table); +			return -ENOMEM; +		}  		pmd_populate(NULL, pmd, table);  		arm_smmu_flush_pgtable(smmu, pmd, sizeof(*pmd));  	}  	if (stage == 1) {  		pteval |= ARM_SMMU_PTE_AP_UNPRIV | ARM_SMMU_PTE_nG; -		if (!(flags & IOMMU_WRITE) && (flags & IOMMU_READ)) +		if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ))  			pteval |= ARM_SMMU_PTE_AP_RDONLY; -		if (flags & IOMMU_CACHE) +		if (prot & IOMMU_CACHE)  			pteval |= (MAIR_ATTR_IDX_CACHE <<  				   ARM_SMMU_PTE_ATTRINDX_SHIFT);  	} else {  		pteval |= ARM_SMMU_PTE_HAP_FAULT; -		if (flags & IOMMU_READ) +		if (prot & IOMMU_READ)  			pteval |= ARM_SMMU_PTE_HAP_READ; -		if (flags & IOMMU_WRITE) +		if (prot & IOMMU_WRITE)  			pteval |= ARM_SMMU_PTE_HAP_WRITE; -		if (flags & IOMMU_CACHE) +		if (prot & IOMMU_CACHE)  			pteval |= ARM_SMMU_PTE_MEMATTR_OIWB;  		else  			pteval |= ARM_SMMU_PTE_MEMATTR_NC;  	}  	/* If no access, create a faulting entry to avoid TLB fills */ -	if (!(flags & (IOMMU_READ | IOMMU_WRITE))) +	if (prot & IOMMU_EXEC) +		pteval &= ~ARM_SMMU_PTE_XN; +	else if (!(prot & (IOMMU_READ | IOMMU_WRITE)))  		pteval &= ~ARM_SMMU_PTE_PAGE;  	pteval |= ARM_SMMU_PTE_SH_IS; @@ -1299,7 +1358,7 @@ static int arm_smmu_alloc_init_pte(struct arm_smmu_device *smmu, pmd_t *pmd,  static int arm_smmu_alloc_init_pmd(struct arm_smmu_device *smmu, pud_t *pud,  				   unsigned long addr, unsigned long end, -				   phys_addr_t phys, int flags, int stage) +				   phys_addr_t phys, int prot, int stage)  {  	int ret;  	pmd_t *pmd; @@ -1307,19 +1366,23 @@ static int arm_smmu_alloc_init_pmd(struct arm_smmu_device *smmu, pud_t *pud,  #ifndef __PAGETABLE_PMD_FOLDED  	if (pud_none(*pud)) { -		pmd = pmd_alloc_one(NULL, addr); +		pmd = (pmd_t *)get_zeroed_page(GFP_ATOMIC);  		if (!pmd)  			return -ENOMEM; + +		arm_smmu_flush_pgtable(smmu, pmd, PAGE_SIZE); +		pud_populate(NULL, pud, pmd); +		arm_smmu_flush_pgtable(smmu, pud, sizeof(*pud)); + +		pmd += pmd_index(addr);  	} else  #endif  		pmd = pmd_offset(pud, addr);  	do {  		next = pmd_addr_end(addr, end); -		ret = arm_smmu_alloc_init_pte(smmu, pmd, addr, end, pfn, -					      flags, stage); -		pud_populate(NULL, pud, pmd); -		arm_smmu_flush_pgtable(smmu, pud, sizeof(*pud)); +		ret = arm_smmu_alloc_init_pte(smmu, pmd, addr, next, pfn, +					      prot, stage);  		phys += next - addr;  	} while (pmd++, addr = next, addr < end); @@ -1328,7 +1391,7 @@ static int arm_smmu_alloc_init_pmd(struct arm_smmu_device *smmu, pud_t *pud,  static int arm_smmu_alloc_init_pud(struct arm_smmu_device *smmu, pgd_t *pgd,  				   unsigned long addr, unsigned long end, -				   phys_addr_t phys, int flags, int stage) +				   phys_addr_t phys, int prot, int stage)  {  	int ret = 0;  	pud_t *pud; @@ -1336,9 +1399,15 @@ static int arm_smmu_alloc_init_pud(struct arm_smmu_device *smmu, pgd_t *pgd,  #ifndef __PAGETABLE_PUD_FOLDED  	if (pgd_none(*pgd)) { -		pud = pud_alloc_one(NULL, addr); +		pud = (pud_t *)get_zeroed_page(GFP_ATOMIC);  		if (!pud)  			return -ENOMEM; + +		arm_smmu_flush_pgtable(smmu, pud, PAGE_SIZE); +		pgd_populate(NULL, pgd, pud); +		arm_smmu_flush_pgtable(smmu, pgd, sizeof(*pgd)); + +		pud += pud_index(addr);  	} else  #endif  		pud = pud_offset(pgd, addr); @@ -1346,9 +1415,7 @@ static int arm_smmu_alloc_init_pud(struct arm_smmu_device *smmu, pgd_t *pgd,  	do {  		next = pud_addr_end(addr, end);  		ret = arm_smmu_alloc_init_pmd(smmu, pud, addr, next, phys, -					      flags, stage); -		pgd_populate(NULL, pud, pgd); -		arm_smmu_flush_pgtable(smmu, pgd, sizeof(*pgd)); +					      prot, stage);  		phys += next - addr;  	} while (pud++, addr = next, addr < end); @@ -1357,7 +1424,7 @@ static int arm_smmu_alloc_init_pud(struct arm_smmu_device *smmu, pgd_t *pgd,  static int arm_smmu_handle_mapping(struct arm_smmu_domain *smmu_domain,  				   unsigned long iova, phys_addr_t paddr, -				   size_t size, int flags) +				   size_t size, int prot)  {  	int ret, stage;  	unsigned long end; @@ -1365,6 +1432,7 @@ static int arm_smmu_handle_mapping(struct arm_smmu_domain *smmu_domain,  	struct arm_smmu_cfg *root_cfg = &smmu_domain->root_cfg;  	pgd_t *pgd = root_cfg->pgd;  	struct arm_smmu_device *smmu = root_cfg->smmu; +	unsigned long flags;  	if (root_cfg->cbar == CBAR_TYPE_S2_TRANS) {  		stage = 2; @@ -1387,14 +1455,14 @@ static int arm_smmu_handle_mapping(struct arm_smmu_domain *smmu_domain,  	if (paddr & ~output_mask)  		return -ERANGE; -	spin_lock(&smmu_domain->lock); +	spin_lock_irqsave(&smmu_domain->lock, flags);  	pgd += pgd_index(iova);  	end = iova + size;  	do {  		unsigned long next = pgd_addr_end(iova, end);  		ret = arm_smmu_alloc_init_pud(smmu, pgd, iova, next, paddr, -					      flags, stage); +					      prot, stage);  		if (ret)  			goto out_unlock; @@ -1403,29 +1471,24 @@ static int arm_smmu_handle_mapping(struct arm_smmu_domain *smmu_domain,  	} while (pgd++, iova != end);  out_unlock: -	spin_unlock(&smmu_domain->lock); - -	/* Ensure new page tables are visible to the hardware walker */ -	if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) -		dsb(); +	spin_unlock_irqrestore(&smmu_domain->lock, flags);  	return ret;  }  static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova, -			phys_addr_t paddr, size_t size, int flags) +			phys_addr_t paddr, size_t size, int prot)  {  	struct arm_smmu_domain *smmu_domain = domain->priv; -	struct arm_smmu_device *smmu = smmu_domain->leaf_smmu; -	if (!smmu_domain || !smmu) +	if (!smmu_domain)  		return -ENODEV;  	/* Check for silent address truncation up the SMMU chain. */  	if ((phys_addr_t)iova & ~smmu_domain->output_mask)  		return -ERANGE; -	return arm_smmu_handle_mapping(smmu_domain, iova, paddr, size, flags); +	return arm_smmu_handle_mapping(smmu_domain, iova, paddr, size, prot);  }  static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, @@ -1436,50 +1499,40 @@ static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,  	ret = arm_smmu_handle_mapping(smmu_domain, iova, 0, size, 0);  	arm_smmu_tlb_inv_context(&smmu_domain->root_cfg); -	return ret ? ret : size; +	return ret ? 0 : size;  }  static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,  					 dma_addr_t iova)  { -	pgd_t *pgd; -	pud_t *pud; -	pmd_t *pmd; -	pte_t *pte; +	pgd_t *pgdp, pgd; +	pud_t pud; +	pmd_t pmd; +	pte_t pte;  	struct arm_smmu_domain *smmu_domain = domain->priv;  	struct arm_smmu_cfg *root_cfg = &smmu_domain->root_cfg; -	struct arm_smmu_device *smmu = root_cfg->smmu; -	spin_lock(&smmu_domain->lock); -	pgd = root_cfg->pgd; -	if (!pgd) -		goto err_unlock; +	pgdp = root_cfg->pgd; +	if (!pgdp) +		return 0; -	pgd += pgd_index(iova); -	if (pgd_none_or_clear_bad(pgd)) -		goto err_unlock; +	pgd = *(pgdp + pgd_index(iova)); +	if (pgd_none(pgd)) +		return 0; -	pud = pud_offset(pgd, iova); -	if (pud_none_or_clear_bad(pud)) -		goto err_unlock; +	pud = *pud_offset(&pgd, iova); +	if (pud_none(pud)) +		return 0; -	pmd = pmd_offset(pud, iova); -	if (pmd_none_or_clear_bad(pmd)) -		goto err_unlock; +	pmd = *pmd_offset(&pud, iova); +	if (pmd_none(pmd)) +		return 0; -	pte = pmd_page_vaddr(*pmd) + pte_index(iova); +	pte = *(pmd_page_vaddr(pmd) + pte_index(iova));  	if (pte_none(pte)) -		goto err_unlock; - -	spin_unlock(&smmu_domain->lock); -	return __pfn_to_phys(pte_pfn(*pte)) | (iova & ~PAGE_MASK); +		return 0; -err_unlock: -	spin_unlock(&smmu_domain->lock); -	dev_warn(smmu->dev, -		 "invalid (corrupt?) page tables detected for iova 0x%llx\n", -		 (unsigned long long)iova); -	return -EINVAL; +	return __pfn_to_phys(pte_pfn(pte)) | (iova & ~PAGE_MASK);  }  static int arm_smmu_domain_has_cap(struct iommu_domain *domain, @@ -1498,6 +1551,13 @@ static int arm_smmu_add_device(struct device *dev)  {  	struct arm_smmu_device *child, *parent, *smmu;  	struct arm_smmu_master *master = NULL; +	struct iommu_group *group; +	int ret; + +	if (dev->archdata.iommu) { +		dev_warn(dev, "IOMMU driver already assigned to device\n"); +		return -EINVAL; +	}  	spin_lock(&arm_smmu_devices_lock);  	list_for_each_entry(parent, &arm_smmu_devices, list) { @@ -1530,13 +1590,23 @@ static int arm_smmu_add_device(struct device *dev)  	if (!master)  		return -ENODEV; +	group = iommu_group_alloc(); +	if (IS_ERR(group)) { +		dev_err(dev, "Failed to allocate IOMMU group\n"); +		return PTR_ERR(group); +	} + +	ret = iommu_group_add_device(group, dev); +	iommu_group_put(group);  	dev->archdata.iommu = smmu; -	return 0; + +	return ret;  }  static void arm_smmu_remove_device(struct device *dev)  {  	dev->archdata.iommu = NULL; +	iommu_group_remove_device(dev);  }  static struct iommu_ops arm_smmu_ops = { @@ -1558,9 +1628,13 @@ static struct iommu_ops arm_smmu_ops = {  static void arm_smmu_device_reset(struct arm_smmu_device *smmu)  {  	void __iomem *gr0_base = ARM_SMMU_GR0(smmu); -	void __iomem *sctlr_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB_SCTLR; +	void __iomem *cb_base;  	int i = 0; -	u32 scr0 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sCR0); +	u32 reg; + +	/* clear global FSR */ +	reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR); +	writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);  	/* Mark all SMRn as invalid and all S2CRn as bypass */  	for (i = 0; i < smmu->num_mapping_groups; ++i) { @@ -1568,33 +1642,38 @@ static void arm_smmu_device_reset(struct arm_smmu_device *smmu)  		writel_relaxed(S2CR_TYPE_BYPASS, gr0_base + ARM_SMMU_GR0_S2CR(i));  	} -	/* Make sure all context banks are disabled */ -	for (i = 0; i < smmu->num_context_banks; ++i) -		writel_relaxed(0, sctlr_base + ARM_SMMU_CB(smmu, i)); +	/* Make sure all context banks are disabled and clear CB_FSR  */ +	for (i = 0; i < smmu->num_context_banks; ++i) { +		cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, i); +		writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR); +		writel_relaxed(FSR_FAULT, cb_base + ARM_SMMU_CB_FSR); +	}  	/* Invalidate the TLB, just in case */  	writel_relaxed(0, gr0_base + ARM_SMMU_GR0_STLBIALL);  	writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLH);  	writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLNSNH); +	reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0); +  	/* Enable fault reporting */ -	scr0 |= (sCR0_GFRE | sCR0_GFIE | sCR0_GCFGFRE | sCR0_GCFGFIE); +	reg |= (sCR0_GFRE | sCR0_GFIE | sCR0_GCFGFRE | sCR0_GCFGFIE);  	/* Disable TLB broadcasting. */ -	scr0 |= (sCR0_VMIDPNE | sCR0_PTM); +	reg |= (sCR0_VMIDPNE | sCR0_PTM);  	/* Enable client access, but bypass when no mapping is found */ -	scr0 &= ~(sCR0_CLIENTPD | sCR0_USFCFG); +	reg &= ~(sCR0_CLIENTPD | sCR0_USFCFG);  	/* Disable forced broadcasting */ -	scr0 &= ~sCR0_FB; +	reg &= ~sCR0_FB;  	/* Don't upgrade barriers */ -	scr0 &= ~(sCR0_BSU_MASK << sCR0_BSU_SHIFT); +	reg &= ~(sCR0_BSU_MASK << sCR0_BSU_SHIFT);  	/* Push the button */  	arm_smmu_tlb_sync(smmu); -	writel(scr0, gr0_base + ARM_SMMU_GR0_sCR0); +	writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);  }  static int arm_smmu_id_size_to_bits(int size) @@ -1699,13 +1778,12 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)  	id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID1);  	smmu->pagesize = (id & ID1_PAGESIZE) ? SZ_64K : SZ_4K; -	/* Check that we ioremapped enough */ +	/* Check for size mismatch of SMMU address space from mapped region */  	size = 1 << (((id >> ID1_NUMPAGENDXB_SHIFT) & ID1_NUMPAGENDXB_MASK) + 1);  	size *= (smmu->pagesize << 1); -	if (smmu->size < size) -		dev_warn(smmu->dev, -			 "device is 0x%lx bytes but only mapped 0x%lx!\n", -			 size, smmu->size); +	if (smmu->size != size) +		dev_warn(smmu->dev, "SMMU address space size (0x%lx) differs " +			"from mapped region size (0x%lx)!\n", size, smmu->size);  	smmu->num_s2_context_banks = (id >> ID1_NUMS2CB_SHIFT) &  				      ID1_NUMS2CB_MASK; @@ -1726,8 +1804,7 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)  	 * allocation (PTRS_PER_PGD).  	 */  #ifdef CONFIG_64BIT -	/* Current maximum output size of 39 bits */ -	smmu->s1_output_size = min(39UL, size); +	smmu->s1_output_size = min((unsigned long)VA_BITS, size);  #else  	smmu->s1_output_size = min(32UL, size);  #endif @@ -1741,7 +1818,7 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)  	} else {  #ifdef CONFIG_64BIT  		size = (id >> ID2_UBS_SHIFT) & ID2_UBS_MASK; -		size = min(39, arm_smmu_id_size_to_bits(size)); +		size = min(VA_BITS, arm_smmu_id_size_to_bits(size));  #else  		size = 32;  #endif @@ -1780,15 +1857,10 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev)  	smmu->dev = dev;  	res = platform_get_resource(pdev, IORESOURCE_MEM, 0); -	if (!res) { -		dev_err(dev, "missing base address/size\n"); -		return -ENODEV; -	} - +	smmu->base = devm_ioremap_resource(dev, res); +	if (IS_ERR(smmu->base)) +		return PTR_ERR(smmu->base);  	smmu->size = resource_size(res); -	smmu->base = devm_request_and_ioremap(dev, res); -	if (!smmu->base) -		return -EADDRNOTAVAIL;  	if (of_property_read_u32(dev->of_node, "#global-interrupts",  				 &smmu->num_global_irqs)) { @@ -1803,12 +1875,11 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev)  			smmu->num_context_irqs++;  	} -	if (num_irqs < smmu->num_global_irqs) { -		dev_warn(dev, "found %d interrupts but expected at least %d\n", -			 num_irqs, smmu->num_global_irqs); -		smmu->num_global_irqs = num_irqs; +	if (!smmu->num_context_irqs) { +		dev_err(dev, "found %d interrupts but expected at least %d\n", +			num_irqs, smmu->num_global_irqs + 1); +		return -ENODEV;  	} -	smmu->num_context_irqs = num_irqs - smmu->num_global_irqs;  	smmu->irqs = devm_kzalloc(dev, sizeof(*smmu->irqs) * num_irqs,  				  GFP_KERNEL); @@ -1849,16 +1920,17 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev)  	if (err)  		goto out_put_parent; +	parse_driver_options(smmu); +  	if (smmu->version > 1 &&  	    smmu->num_context_banks != smmu->num_context_irqs) {  		dev_err(dev,  			"found only %d context interrupt(s) but %d required\n",  			smmu->num_context_irqs, smmu->num_context_banks); +		err = -ENODEV;  		goto out_put_parent;  	} -	arm_smmu_device_reset(smmu); -  	for (i = 0; i < smmu->num_global_irqs; ++i) {  		err = request_irq(smmu->irqs[i],  				  arm_smmu_global_fault, @@ -1876,6 +1948,8 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev)  	spin_lock(&arm_smmu_devices_lock);  	list_add(&smmu->list, &arm_smmu_devices);  	spin_unlock(&arm_smmu_devices_lock); + +	arm_smmu_device_reset(smmu);  	return 0;  out_free_irqs: @@ -1932,7 +2006,7 @@ static int arm_smmu_device_remove(struct platform_device *pdev)  		free_irq(smmu->irqs[i], smmu);  	/* Turn the thing off */ -	writel(sCR0_CLIENTPD, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_sCR0); +	writel(sCR0_CLIENTPD,ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);  	return 0;  } @@ -1966,11 +2040,13 @@ static int __init arm_smmu_init(void)  		return ret;  	/* Oh, for a proper bus abstraction */ -	if (!iommu_present(&platform_bus_type)); +	if (!iommu_present(&platform_bus_type))  		bus_set_iommu(&platform_bus_type, &arm_smmu_ops); -	if (!iommu_present(&amba_bustype)); +#ifdef CONFIG_ARM_AMBA +	if (!iommu_present(&amba_bustype))  		bus_set_iommu(&amba_bustype, &arm_smmu_ops); +#endif  	return 0;  } @@ -1980,7 +2056,7 @@ static void __exit arm_smmu_exit(void)  	return platform_driver_unregister(&arm_smmu_driver);  } -module_init(arm_smmu_init); +subsys_initcall(arm_smmu_init);  module_exit(arm_smmu_exit);  MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations"); diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 785675a56a1..9a4f05e5b23 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -43,14 +43,27 @@  #include "irq_remapping.h" -/* No locks are needed as DMA remapping hardware unit - * list is constructed at boot time and hotplug of - * these units are not supported by the architecture. +/* + * Assumptions: + * 1) The hotplug framework guarentees that DMAR unit will be hot-added + *    before IO devices managed by that unit. + * 2) The hotplug framework guarantees that DMAR unit will be hot-removed + *    after IO devices managed by that unit. + * 3) Hotplug events are rare. + * + * Locking rules for DMA and interrupt remapping related global data structures: + * 1) Use dmar_global_lock in process context + * 2) Use RCU in interrupt context   */ +DECLARE_RWSEM(dmar_global_lock);  LIST_HEAD(dmar_drhd_units);  struct acpi_table_header * __initdata dmar_tbl;  static acpi_size dmar_tbl_size; +static int dmar_dev_scope_status = 1; + +static int alloc_iommu(struct dmar_drhd_unit *drhd); +static void free_iommu(struct intel_iommu *iommu);  static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)  { @@ -59,74 +72,20 @@ static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)  	 * the very end.  	 */  	if (drhd->include_all) -		list_add_tail(&drhd->list, &dmar_drhd_units); +		list_add_tail_rcu(&drhd->list, &dmar_drhd_units);  	else -		list_add(&drhd->list, &dmar_drhd_units); -} - -static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope, -					   struct pci_dev **dev, u16 segment) -{ -	struct pci_bus *bus; -	struct pci_dev *pdev = NULL; -	struct acpi_dmar_pci_path *path; -	int count; - -	bus = pci_find_bus(segment, scope->bus); -	path = (struct acpi_dmar_pci_path *)(scope + 1); -	count = (scope->length - sizeof(struct acpi_dmar_device_scope)) -		/ sizeof(struct acpi_dmar_pci_path); - -	while (count) { -		if (pdev) -			pci_dev_put(pdev); -		/* -		 * Some BIOSes list non-exist devices in DMAR table, just -		 * ignore it -		 */ -		if (!bus) { -			pr_warn("Device scope bus [%d] not found\n", scope->bus); -			break; -		} -		pdev = pci_get_slot(bus, PCI_DEVFN(path->dev, path->fn)); -		if (!pdev) { -			/* warning will be printed below */ -			break; -		} -		path ++; -		count --; -		bus = pdev->subordinate; -	} -	if (!pdev) { -		pr_warn("Device scope device [%04x:%02x:%02x.%02x] not found\n", -			segment, scope->bus, path->dev, path->fn); -		*dev = NULL; -		return 0; -	} -	if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT && \ -			pdev->subordinate) || (scope->entry_type == \ -			ACPI_DMAR_SCOPE_TYPE_BRIDGE && !pdev->subordinate)) { -		pci_dev_put(pdev); -		pr_warn("Device scope type does not match for %s\n", -			pci_name(pdev)); -		return -EINVAL; -	} -	*dev = pdev; -	return 0; +		list_add_rcu(&drhd->list, &dmar_drhd_units);  } -int __init dmar_parse_dev_scope(void *start, void *end, int *cnt, -				struct pci_dev ***devices, u16 segment) +void *dmar_alloc_dev_scope(void *start, void *end, int *cnt)  {  	struct acpi_dmar_device_scope *scope; -	void * tmp = start; -	int index; -	int ret;  	*cnt = 0;  	while (start < end) {  		scope = start; -		if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT || +		if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ACPI || +		    scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||  		    scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)  			(*cnt)++;  		else if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_IOAPIC && @@ -136,32 +95,237 @@ int __init dmar_parse_dev_scope(void *start, void *end, int *cnt,  		start += scope->length;  	}  	if (*cnt == 0) -		return 0; +		return NULL; -	*devices = kcalloc(*cnt, sizeof(struct pci_dev *), GFP_KERNEL); -	if (!*devices) -		return -ENOMEM; +	return kcalloc(*cnt, sizeof(struct dmar_dev_scope), GFP_KERNEL); +} -	start = tmp; -	index = 0; -	while (start < end) { +void dmar_free_dev_scope(struct dmar_dev_scope **devices, int *cnt) +{ +	int i; +	struct device *tmp_dev; + +	if (*devices && *cnt) { +		for_each_active_dev_scope(*devices, *cnt, i, tmp_dev) +			put_device(tmp_dev); +		kfree(*devices); +	} + +	*devices = NULL; +	*cnt = 0; +} + +/* Optimize out kzalloc()/kfree() for normal cases */ +static char dmar_pci_notify_info_buf[64]; + +static struct dmar_pci_notify_info * +dmar_alloc_pci_notify_info(struct pci_dev *dev, unsigned long event) +{ +	int level = 0; +	size_t size; +	struct pci_dev *tmp; +	struct dmar_pci_notify_info *info; + +	BUG_ON(dev->is_virtfn); + +	/* Only generate path[] for device addition event */ +	if (event == BUS_NOTIFY_ADD_DEVICE) +		for (tmp = dev; tmp; tmp = tmp->bus->self) +			level++; + +	size = sizeof(*info) + level * sizeof(struct acpi_dmar_pci_path); +	if (size <= sizeof(dmar_pci_notify_info_buf)) { +		info = (struct dmar_pci_notify_info *)dmar_pci_notify_info_buf; +	} else { +		info = kzalloc(size, GFP_KERNEL); +		if (!info) { +			pr_warn("Out of memory when allocating notify_info " +				"for %s.\n", pci_name(dev)); +			if (dmar_dev_scope_status == 0) +				dmar_dev_scope_status = -ENOMEM; +			return NULL; +		} +	} + +	info->event = event; +	info->dev = dev; +	info->seg = pci_domain_nr(dev->bus); +	info->level = level; +	if (event == BUS_NOTIFY_ADD_DEVICE) { +		for (tmp = dev; tmp; tmp = tmp->bus->self) { +			level--; +			info->path[level].device = PCI_SLOT(tmp->devfn); +			info->path[level].function = PCI_FUNC(tmp->devfn); +			if (pci_is_root_bus(tmp->bus)) +				info->bus = tmp->bus->number; +		} +	} + +	return info; +} + +static inline void dmar_free_pci_notify_info(struct dmar_pci_notify_info *info) +{ +	if ((void *)info != dmar_pci_notify_info_buf) +		kfree(info); +} + +static bool dmar_match_pci_path(struct dmar_pci_notify_info *info, int bus, +				struct acpi_dmar_pci_path *path, int count) +{ +	int i; + +	if (info->bus != bus) +		return false; +	if (info->level != count) +		return false; + +	for (i = 0; i < count; i++) { +		if (path[i].device != info->path[i].device || +		    path[i].function != info->path[i].function) +			return false; +	} + +	return true; +} + +/* Return: > 0 if match found, 0 if no match found, < 0 if error happens */ +int dmar_insert_dev_scope(struct dmar_pci_notify_info *info, +			  void *start, void*end, u16 segment, +			  struct dmar_dev_scope *devices, +			  int devices_cnt) +{ +	int i, level; +	struct device *tmp, *dev = &info->dev->dev; +	struct acpi_dmar_device_scope *scope; +	struct acpi_dmar_pci_path *path; + +	if (segment != info->seg) +		return 0; + +	for (; start < end; start += scope->length) {  		scope = start; -		if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT || -		    scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE) { -			ret = dmar_parse_one_dev_scope(scope, -				&(*devices)[index], segment); -			if (ret) { -				kfree(*devices); -				return ret; -			} -			index ++; +		if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_ENDPOINT && +		    scope->entry_type != ACPI_DMAR_SCOPE_TYPE_BRIDGE) +			continue; + +		path = (struct acpi_dmar_pci_path *)(scope + 1); +		level = (scope->length - sizeof(*scope)) / sizeof(*path); +		if (!dmar_match_pci_path(info, scope->bus, path, level)) +			continue; + +		if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT) ^ +		    (info->dev->hdr_type == PCI_HEADER_TYPE_NORMAL)) { +			pr_warn("Device scope type does not match for %s\n", +				pci_name(info->dev)); +			return -EINVAL;  		} -		start += scope->length; + +		for_each_dev_scope(devices, devices_cnt, i, tmp) +			if (tmp == NULL) { +				devices[i].bus = info->dev->bus->number; +				devices[i].devfn = info->dev->devfn; +				rcu_assign_pointer(devices[i].dev, +						   get_device(dev)); +				return 1; +			} +		BUG_ON(i >= devices_cnt);  	}  	return 0;  } +int dmar_remove_dev_scope(struct dmar_pci_notify_info *info, u16 segment, +			  struct dmar_dev_scope *devices, int count) +{ +	int index; +	struct device *tmp; + +	if (info->seg != segment) +		return 0; + +	for_each_active_dev_scope(devices, count, index, tmp) +		if (tmp == &info->dev->dev) { +			rcu_assign_pointer(devices[index].dev, NULL); +			synchronize_rcu(); +			put_device(tmp); +			return 1; +		} + +	return 0; +} + +static int dmar_pci_bus_add_dev(struct dmar_pci_notify_info *info) +{ +	int ret = 0; +	struct dmar_drhd_unit *dmaru; +	struct acpi_dmar_hardware_unit *drhd; + +	for_each_drhd_unit(dmaru) { +		if (dmaru->include_all) +			continue; + +		drhd = container_of(dmaru->hdr, +				    struct acpi_dmar_hardware_unit, header); +		ret = dmar_insert_dev_scope(info, (void *)(drhd + 1), +				((void *)drhd) + drhd->header.length, +				dmaru->segment, +				dmaru->devices, dmaru->devices_cnt); +		if (ret != 0) +			break; +	} +	if (ret >= 0) +		ret = dmar_iommu_notify_scope_dev(info); +	if (ret < 0 && dmar_dev_scope_status == 0) +		dmar_dev_scope_status = ret; + +	return ret; +} + +static void  dmar_pci_bus_del_dev(struct dmar_pci_notify_info *info) +{ +	struct dmar_drhd_unit *dmaru; + +	for_each_drhd_unit(dmaru) +		if (dmar_remove_dev_scope(info, dmaru->segment, +			dmaru->devices, dmaru->devices_cnt)) +			break; +	dmar_iommu_notify_scope_dev(info); +} + +static int dmar_pci_bus_notifier(struct notifier_block *nb, +				 unsigned long action, void *data) +{ +	struct pci_dev *pdev = to_pci_dev(data); +	struct dmar_pci_notify_info *info; + +	/* Only care about add/remove events for physical functions */ +	if (pdev->is_virtfn) +		return NOTIFY_DONE; +	if (action != BUS_NOTIFY_ADD_DEVICE && action != BUS_NOTIFY_DEL_DEVICE) +		return NOTIFY_DONE; + +	info = dmar_alloc_pci_notify_info(pdev, action); +	if (!info) +		return NOTIFY_DONE; + +	down_write(&dmar_global_lock); +	if (action == BUS_NOTIFY_ADD_DEVICE) +		dmar_pci_bus_add_dev(info); +	else if (action == BUS_NOTIFY_DEL_DEVICE) +		dmar_pci_bus_del_dev(info); +	up_write(&dmar_global_lock); + +	dmar_free_pci_notify_info(info); + +	return NOTIFY_OK; +} + +static struct notifier_block dmar_pci_bus_nb = { +	.notifier_call = dmar_pci_bus_notifier, +	.priority = INT_MIN, +}; +  /**   * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition   * structure which uniquely represent one DMA remapping hardware unit @@ -183,9 +347,18 @@ dmar_parse_one_drhd(struct acpi_dmar_header *header)  	dmaru->reg_base_addr = drhd->address;  	dmaru->segment = drhd->segment;  	dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */ +	dmaru->devices = dmar_alloc_dev_scope((void *)(drhd + 1), +					      ((void *)drhd) + drhd->header.length, +					      &dmaru->devices_cnt); +	if (dmaru->devices_cnt && dmaru->devices == NULL) { +		kfree(dmaru); +		return -ENOMEM; +	}  	ret = alloc_iommu(dmaru);  	if (ret) { +		dmar_free_dev_scope(&dmaru->devices, +				    &dmaru->devices_cnt);  		kfree(dmaru);  		return ret;  	} @@ -193,25 +366,33 @@ dmar_parse_one_drhd(struct acpi_dmar_header *header)  	return 0;  } -static int __init dmar_parse_dev(struct dmar_drhd_unit *dmaru) +static void dmar_free_drhd(struct dmar_drhd_unit *dmaru)  { -	struct acpi_dmar_hardware_unit *drhd; -	int ret = 0; - -	drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr; - -	if (dmaru->include_all) -		return 0; +	if (dmaru->devices && dmaru->devices_cnt) +		dmar_free_dev_scope(&dmaru->devices, &dmaru->devices_cnt); +	if (dmaru->iommu) +		free_iommu(dmaru->iommu); +	kfree(dmaru); +} -	ret = dmar_parse_dev_scope((void *)(drhd + 1), -				((void *)drhd) + drhd->header.length, -				&dmaru->devices_cnt, &dmaru->devices, -				drhd->segment); -	if (ret) { -		list_del(&dmaru->list); -		kfree(dmaru); +static int __init dmar_parse_one_andd(struct acpi_dmar_header *header) +{ +	struct acpi_dmar_andd *andd = (void *)header; + +	/* Check for NUL termination within the designated length */ +	if (strnlen(andd->object_name, header->length - 8) == header->length - 8) { +		WARN_TAINT(1, TAINT_FIRMWARE_WORKAROUND, +			   "Your BIOS is broken; ANDD object name is not NUL-terminated\n" +			   "BIOS vendor: %s; Ver: %s; Product Version: %s\n", +			   dmi_get_system_info(DMI_BIOS_VENDOR), +			   dmi_get_system_info(DMI_BIOS_VERSION), +			   dmi_get_system_info(DMI_PRODUCT_VERSION)); +		return -EINVAL;  	} -	return ret; +	pr_info("ANDD device: %x name: %s\n", andd->device_number, +		andd->object_name); + +	return 0;  }  #ifdef CONFIG_ACPI_NUMA @@ -277,6 +458,10 @@ dmar_table_print_dmar_entry(struct acpi_dmar_header *header)  		       (unsigned long long)rhsa->base_address,  		       rhsa->proximity_domain);  		break; +	case ACPI_DMAR_TYPE_ANDD: +		/* We don't print this here because we need to sanity-check +		   it first. So print it in dmar_parse_one_andd() instead. */ +		break;  	}  } @@ -362,6 +547,9 @@ parse_dmar_table(void)  			ret = dmar_parse_one_rhsa(entry_header);  #endif  			break; +		case ACPI_DMAR_TYPE_ANDD: +			ret = dmar_parse_one_andd(entry_header); +			break;  		default:  			pr_warn("Unknown DMAR structure type %d\n",  				entry_header->type); @@ -378,14 +566,15 @@ parse_dmar_table(void)  	return ret;  } -static int dmar_pci_device_match(struct pci_dev *devices[], int cnt, -			  struct pci_dev *dev) +static int dmar_pci_device_match(struct dmar_dev_scope devices[], +				 int cnt, struct pci_dev *dev)  {  	int index; +	struct device *tmp;  	while (dev) { -		for (index = 0; index < cnt; index++) -			if (dev == devices[index]) +		for_each_active_dev_scope(devices, cnt, index, tmp) +			if (dev_is_pci(tmp) && dev == to_pci_dev(tmp))  				return 1;  		/* Check our parent */ @@ -398,56 +587,141 @@ static int dmar_pci_device_match(struct pci_dev *devices[], int cnt,  struct dmar_drhd_unit *  dmar_find_matched_drhd_unit(struct pci_dev *dev)  { -	struct dmar_drhd_unit *dmaru = NULL; +	struct dmar_drhd_unit *dmaru;  	struct acpi_dmar_hardware_unit *drhd;  	dev = pci_physfn(dev); -	list_for_each_entry(dmaru, &dmar_drhd_units, list) { +	rcu_read_lock(); +	for_each_drhd_unit(dmaru) {  		drhd = container_of(dmaru->hdr,  				    struct acpi_dmar_hardware_unit,  				    header);  		if (dmaru->include_all &&  		    drhd->segment == pci_domain_nr(dev->bus)) -			return dmaru; +			goto out;  		if (dmar_pci_device_match(dmaru->devices,  					  dmaru->devices_cnt, dev)) -			return dmaru; +			goto out;  	} +	dmaru = NULL; +out: +	rcu_read_unlock(); -	return NULL; +	return dmaru;  } -int __init dmar_dev_scope_init(void) +static void __init dmar_acpi_insert_dev_scope(u8 device_number, +					      struct acpi_device *adev)  { -	static int dmar_dev_scope_initialized; -	struct dmar_drhd_unit *drhd, *drhd_n; -	int ret = -ENODEV; - -	if (dmar_dev_scope_initialized) -		return dmar_dev_scope_initialized; +	struct dmar_drhd_unit *dmaru; +	struct acpi_dmar_hardware_unit *drhd; +	struct acpi_dmar_device_scope *scope; +	struct device *tmp; +	int i; +	struct acpi_dmar_pci_path *path; -	if (list_empty(&dmar_drhd_units)) -		goto fail; +	for_each_drhd_unit(dmaru) { +		drhd = container_of(dmaru->hdr, +				    struct acpi_dmar_hardware_unit, +				    header); -	list_for_each_entry_safe(drhd, drhd_n, &dmar_drhd_units, list) { -		ret = dmar_parse_dev(drhd); -		if (ret) -			goto fail; +		for (scope = (void *)(drhd + 1); +		     (unsigned long)scope < ((unsigned long)drhd) + drhd->header.length; +		     scope = ((void *)scope) + scope->length) { +			if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_ACPI) +				continue; +			if (scope->enumeration_id != device_number) +				continue; + +			path = (void *)(scope + 1); +			pr_info("ACPI device \"%s\" under DMAR at %llx as %02x:%02x.%d\n", +				dev_name(&adev->dev), dmaru->reg_base_addr, +				scope->bus, path->device, path->function); +			for_each_dev_scope(dmaru->devices, dmaru->devices_cnt, i, tmp) +				if (tmp == NULL) { +					dmaru->devices[i].bus = scope->bus; +					dmaru->devices[i].devfn = PCI_DEVFN(path->device, +									    path->function); +					rcu_assign_pointer(dmaru->devices[i].dev, +							   get_device(&adev->dev)); +					return; +				} +			BUG_ON(i >= dmaru->devices_cnt); +		}  	} +	pr_warn("No IOMMU scope found for ANDD enumeration ID %d (%s)\n", +		device_number, dev_name(&adev->dev)); +} -	ret = dmar_parse_rmrr_atsr_dev(); -	if (ret) -		goto fail; +static int __init dmar_acpi_dev_scope_init(void) +{ +	struct acpi_dmar_andd *andd; + +	if (dmar_tbl == NULL) +		return -ENODEV; -	dmar_dev_scope_initialized = 1; +	for (andd = (void *)dmar_tbl + sizeof(struct acpi_table_dmar); +	     ((unsigned long)andd) < ((unsigned long)dmar_tbl) + dmar_tbl->length; +	     andd = ((void *)andd) + andd->header.length) { +		if (andd->header.type == ACPI_DMAR_TYPE_ANDD) { +			acpi_handle h; +			struct acpi_device *adev; + +			if (!ACPI_SUCCESS(acpi_get_handle(ACPI_ROOT_OBJECT, +							  andd->object_name, +							  &h))) { +				pr_err("Failed to find handle for ACPI object %s\n", +				       andd->object_name); +				continue; +			} +			acpi_bus_get_device(h, &adev); +			if (!adev) { +				pr_err("Failed to get device for ACPI object %s\n", +				       andd->object_name); +				continue; +			} +			dmar_acpi_insert_dev_scope(andd->device_number, adev); +		} +	}  	return 0; +} -fail: -	dmar_dev_scope_initialized = ret; -	return ret; +int __init dmar_dev_scope_init(void) +{ +	struct pci_dev *dev = NULL; +	struct dmar_pci_notify_info *info; + +	if (dmar_dev_scope_status != 1) +		return dmar_dev_scope_status; + +	if (list_empty(&dmar_drhd_units)) { +		dmar_dev_scope_status = -ENODEV; +	} else { +		dmar_dev_scope_status = 0; + +		dmar_acpi_dev_scope_init(); + +		for_each_pci_dev(dev) { +			if (dev->is_virtfn) +				continue; + +			info = dmar_alloc_pci_notify_info(dev, +					BUS_NOTIFY_ADD_DEVICE); +			if (!info) { +				return dmar_dev_scope_status; +			} else { +				dmar_pci_bus_add_dev(info); +				dmar_free_pci_notify_info(info); +			} +		} + +		bus_register_notifier(&pci_bus_type, &dmar_pci_bus_nb); +	} + +	return dmar_dev_scope_status;  } @@ -456,24 +730,23 @@ int __init dmar_table_init(void)  	static int dmar_table_initialized;  	int ret; -	if (dmar_table_initialized) -		return 0; - -	dmar_table_initialized = 1; - -	ret = parse_dmar_table(); -	if (ret) { -		if (ret != -ENODEV) -			pr_info("parse DMAR table failure.\n"); -		return ret; -	} +	if (dmar_table_initialized == 0) { +		ret = parse_dmar_table(); +		if (ret < 0) { +			if (ret != -ENODEV) +				pr_info("parse DMAR table failure.\n"); +		} else  if (list_empty(&dmar_drhd_units)) { +			pr_info("No DMAR devices found\n"); +			ret = -ENODEV; +		} -	if (list_empty(&dmar_drhd_units)) { -		pr_info("No DMAR devices found\n"); -		return -ENODEV; +		if (ret < 0) +			dmar_table_initialized = ret; +		else +			dmar_table_initialized = 1;  	} -	return 0; +	return dmar_table_initialized < 0 ? dmar_table_initialized : 0;  }  static void warn_invalid_dmar(u64 addr, const char *message) @@ -488,7 +761,7 @@ static void warn_invalid_dmar(u64 addr, const char *message)  		dmi_get_system_info(DMI_PRODUCT_VERSION));  } -int __init check_zero_address(void) +static int __init check_zero_address(void)  {  	struct acpi_table_dmar *dmar;  	struct acpi_dmar_header *entry_header; @@ -542,18 +815,11 @@ int __init detect_intel_iommu(void)  {  	int ret; +	down_write(&dmar_global_lock);  	ret = dmar_table_detect();  	if (ret)  		ret = check_zero_address();  	{ -		struct acpi_table_dmar *dmar; - -		dmar = (struct acpi_table_dmar *) dmar_tbl; - -		if (ret && irq_remapping_enabled && cpu_has_x2apic && -		    dmar->flags & 0x1) -			pr_info("Queued invalidation will be enabled to support x2apic and Intr-remapping.\n"); -  		if (ret && !no_iommu && !iommu_detected && !dmar_disabled) {  			iommu_detected = 1;  			/* Make sure ACS will be enabled */ @@ -565,8 +831,9 @@ int __init detect_intel_iommu(void)  			x86_init.iommu.iommu_init = intel_iommu_init;  #endif  	} -	early_acpi_os_unmap_memory(dmar_tbl, dmar_tbl_size); +	early_acpi_os_unmap_memory((void __iomem *)dmar_tbl, dmar_tbl_size);  	dmar_tbl = NULL; +	up_write(&dmar_global_lock);  	return ret ? 1 : -ENODEV;  } @@ -647,7 +914,7 @@ out:  	return err;  } -int alloc_iommu(struct dmar_drhd_unit *drhd) +static int alloc_iommu(struct dmar_drhd_unit *drhd)  {  	struct intel_iommu *iommu;  	u32 ver, sts; @@ -689,6 +956,7 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)  	}  	iommu->agaw = agaw;  	iommu->msagaw = msagaw; +	iommu->segment = drhd->segment;  	iommu->node = -1; @@ -721,12 +989,19 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)  	return err;  } -void free_iommu(struct intel_iommu *iommu) +static void free_iommu(struct intel_iommu *iommu)  { -	if (!iommu) -		return; +	if (iommu->irq) { +		free_irq(iommu->irq, iommu); +		irq_set_handler_data(iommu->irq, NULL); +		dmar_free_hwirq(iommu->irq); +	} -	free_dmar_iommu(iommu); +	if (iommu->qi) { +		free_page((unsigned long)iommu->qi->desc); +		kfree(iommu->qi->desc_status); +		kfree(iommu->qi); +	}  	if (iommu->reg)  		unmap_iommu(iommu); @@ -1050,7 +1325,7 @@ int dmar_enable_qi(struct intel_iommu *iommu)  	desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO, 0);  	if (!desc_page) {  		kfree(qi); -		iommu->qi = 0; +		iommu->qi = NULL;  		return -ENOMEM;  	} @@ -1060,7 +1335,7 @@ int dmar_enable_qi(struct intel_iommu *iommu)  	if (!qi->desc_status) {  		free_page((unsigned long) qi->desc);  		kfree(qi); -		iommu->qi = 0; +		iommu->qi = NULL;  		return -ENOMEM;  	} @@ -1111,9 +1386,7 @@ static const char *irq_remap_fault_reasons[] =  	"Blocked an interrupt request due to source-id verification failure",  }; -#define MAX_FAULT_REASON_IDX 	(ARRAY_SIZE(fault_reason_strings) - 1) - -const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type) +static const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)  {  	if (fault_reason >= 0x20 && (fault_reason - 0x20 <  					ARRAY_SIZE(irq_remap_fault_reasons))) { @@ -1277,8 +1550,8 @@ int dmar_set_interrupt(struct intel_iommu *iommu)  	if (iommu->irq)  		return 0; -	irq = create_irq(); -	if (!irq) { +	irq = dmar_alloc_hwirq(); +	if (irq <= 0) {  		pr_err("IOMMU: no free vectors\n");  		return -EINVAL;  	} @@ -1290,7 +1563,7 @@ int dmar_set_interrupt(struct intel_iommu *iommu)  	if (ret) {  		irq_set_handler_data(irq, NULL);  		iommu->irq = 0; -		destroy_irq(irq); +		dmar_free_hwirq(irq);  		return ret;  	} @@ -1303,15 +1576,14 @@ int dmar_set_interrupt(struct intel_iommu *iommu)  int __init enable_drhd_fault_handling(void)  {  	struct dmar_drhd_unit *drhd; +	struct intel_iommu *iommu;  	/*  	 * Enable fault control interrupt.  	 */ -	for_each_drhd_unit(drhd) { -		int ret; -		struct intel_iommu *iommu = drhd->iommu; +	for_each_iommu(iommu, drhd) {  		u32 fault_status; -		ret = dmar_set_interrupt(iommu); +		int ret = dmar_set_interrupt(iommu);  		if (ret) {  			pr_err("DRHD %Lx: failed to enable fault, interrupt, ret %d\n", @@ -1366,4 +1638,27 @@ int __init dmar_ir_support(void)  		return 0;  	return dmar->flags & 0x1;  } + +static int __init dmar_free_unused_resources(void) +{ +	struct dmar_drhd_unit *dmaru, *dmaru_n; + +	/* DMAR units are in use */ +	if (irq_remapping_enabled || intel_iommu_enabled) +		return 0; + +	if (dmar_dev_scope_status != 1 && !list_empty(&dmar_drhd_units)) +		bus_unregister_notifier(&pci_bus_type, &dmar_pci_bus_nb); + +	down_write(&dmar_global_lock); +	list_for_each_entry_safe(dmaru, dmaru_n, &dmar_drhd_units, list) { +		list_del(&dmaru->list); +		dmar_free_drhd(dmaru); +	} +	up_write(&dmar_global_lock); + +	return 0; +} + +late_initcall(dmar_free_unused_resources);  IOMMU_INIT_POST(detect_intel_iommu); diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 074018979cd..99054d2c040 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -29,7 +29,8 @@  #include <asm/cacheflush.h>  #include <asm/pgtable.h> -#include <mach/sysmmu.h> +typedef u32 sysmmu_iova_t; +typedef u32 sysmmu_pte_t;  /* We does not consider super section mapping (16MB) */  #define SECT_ORDER 20 @@ -44,28 +45,44 @@  #define LPAGE_MASK (~(LPAGE_SIZE - 1))  #define SPAGE_MASK (~(SPAGE_SIZE - 1)) -#define lv1ent_fault(sent) (((*(sent) & 3) == 0) || ((*(sent) & 3) == 3)) -#define lv1ent_page(sent) ((*(sent) & 3) == 1) +#define lv1ent_fault(sent) ((*(sent) == ZERO_LV2LINK) || \ +			   ((*(sent) & 3) == 0) || ((*(sent) & 3) == 3)) +#define lv1ent_zero(sent) (*(sent) == ZERO_LV2LINK) +#define lv1ent_page_zero(sent) ((*(sent) & 3) == 1) +#define lv1ent_page(sent) ((*(sent) != ZERO_LV2LINK) && \ +			  ((*(sent) & 3) == 1))  #define lv1ent_section(sent) ((*(sent) & 3) == 2)  #define lv2ent_fault(pent) ((*(pent) & 3) == 0)  #define lv2ent_small(pent) ((*(pent) & 2) == 2)  #define lv2ent_large(pent) ((*(pent) & 3) == 1) +static u32 sysmmu_page_offset(sysmmu_iova_t iova, u32 size) +{ +	return iova & (size - 1); +} +  #define section_phys(sent) (*(sent) & SECT_MASK) -#define section_offs(iova) ((iova) & 0xFFFFF) +#define section_offs(iova) sysmmu_page_offset((iova), SECT_SIZE)  #define lpage_phys(pent) (*(pent) & LPAGE_MASK) -#define lpage_offs(iova) ((iova) & 0xFFFF) +#define lpage_offs(iova) sysmmu_page_offset((iova), LPAGE_SIZE)  #define spage_phys(pent) (*(pent) & SPAGE_MASK) -#define spage_offs(iova) ((iova) & 0xFFF) - -#define lv1ent_offset(iova) ((iova) >> SECT_ORDER) -#define lv2ent_offset(iova) (((iova) & 0xFF000) >> SPAGE_ORDER) +#define spage_offs(iova) sysmmu_page_offset((iova), SPAGE_SIZE)  #define NUM_LV1ENTRIES 4096 -#define NUM_LV2ENTRIES 256 +#define NUM_LV2ENTRIES (SECT_SIZE / SPAGE_SIZE) + +static u32 lv1ent_offset(sysmmu_iova_t iova) +{ +	return iova >> SECT_ORDER; +} + +static u32 lv2ent_offset(sysmmu_iova_t iova) +{ +	return (iova >> SPAGE_ORDER) & (NUM_LV2ENTRIES - 1); +} -#define LV2TABLE_SIZE (NUM_LV2ENTRIES * sizeof(long)) +#define LV2TABLE_SIZE (NUM_LV2ENTRIES * sizeof(sysmmu_pte_t))  #define SPAGES_PER_LPAGE (LPAGE_SIZE / SPAGE_SIZE) @@ -80,6 +97,13 @@  #define CTRL_BLOCK	0x7  #define CTRL_DISABLE	0x0 +#define CFG_LRU		0x1 +#define CFG_QOS(n)	((n & 0xF) << 7) +#define CFG_MASK	0x0150FFFF /* Selecting bit 0-15, 20, 22 and 24 */ +#define CFG_ACGEN	(1 << 24) /* System MMU 3.3 only */ +#define CFG_SYSSEL	(1 << 22) /* System MMU 3.2 only */ +#define CFG_FLPDCACHE	(1 << 20) /* System MMU 3.2+ only */ +  #define REG_MMU_CTRL		0x000  #define REG_MMU_CFG		0x004  #define REG_MMU_STATUS		0x008 @@ -96,19 +120,32 @@  #define REG_MMU_VERSION		0x034 +#define MMU_MAJ_VER(val)	((val) >> 7) +#define MMU_MIN_VER(val)	((val) & 0x7F) +#define MMU_RAW_VER(reg)	(((reg) >> 21) & ((1 << 11) - 1)) /* 11 bits */ + +#define MAKE_MMU_VER(maj, min)	((((maj) & 0xF) << 7) | ((min) & 0x7F)) +  #define REG_PB0_SADDR		0x04C  #define REG_PB0_EADDR		0x050  #define REG_PB1_SADDR		0x054  #define REG_PB1_EADDR		0x058 -static unsigned long *section_entry(unsigned long *pgtable, unsigned long iova) +#define has_sysmmu(dev)		(dev->archdata.iommu != NULL) + +static struct kmem_cache *lv2table_kmem_cache; +static sysmmu_pte_t *zero_lv2_table; +#define ZERO_LV2LINK mk_lv1ent_page(virt_to_phys(zero_lv2_table)) + +static sysmmu_pte_t *section_entry(sysmmu_pte_t *pgtable, sysmmu_iova_t iova)  {  	return pgtable + lv1ent_offset(iova);  } -static unsigned long *page_entry(unsigned long *sent, unsigned long iova) +static sysmmu_pte_t *page_entry(sysmmu_pte_t *sent, sysmmu_iova_t iova)  { -	return (unsigned long *)__va(lv2table_base(sent)) + lv2ent_offset(iova); +	return (sysmmu_pte_t *)phys_to_virt( +				lv2table_base(sent)) + lv2ent_offset(iova);  }  enum exynos_sysmmu_inttype { @@ -124,16 +161,6 @@ enum exynos_sysmmu_inttype {  	SYSMMU_FAULTS_NUM  }; -/* - * @itype: type of fault. - * @pgtable_base: the physical address of page table base. This is 0 if @itype - *                is SYSMMU_BUSERROR. - * @fault_addr: the device (virtual) address that the System MMU tried to - *             translated. This is 0 if @itype is SYSMMU_BUSERROR. - */ -typedef int (*sysmmu_fault_handler_t)(enum exynos_sysmmu_inttype itype, -			unsigned long pgtable_base, unsigned long fault_addr); -  static unsigned short fault_reg_offset[SYSMMU_FAULTS_NUM] = {  	REG_PAGE_FAULT_ADDR,  	REG_AR_FAULT_ADDR, @@ -157,27 +184,34 @@ static char *sysmmu_fault_name[SYSMMU_FAULTS_NUM] = {  	"UNKNOWN FAULT"  }; +/* attached to dev.archdata.iommu of the master device */ +struct exynos_iommu_owner { +	struct list_head client; /* entry of exynos_iommu_domain.clients */ +	struct device *dev; +	struct device *sysmmu; +	struct iommu_domain *domain; +	void *vmm_data;         /* IO virtual memory manager's data */ +	spinlock_t lock;        /* Lock to preserve consistency of System MMU */ +}; +  struct exynos_iommu_domain {  	struct list_head clients; /* list of sysmmu_drvdata.node */ -	unsigned long *pgtable; /* lv1 page table, 16KB */ +	sysmmu_pte_t *pgtable; /* lv1 page table, 16KB */  	short *lv2entcnt; /* free lv2 entry counter for each section */  	spinlock_t lock; /* lock for this structure */  	spinlock_t pgtablelock; /* lock for modifying page table @ pgtable */  };  struct sysmmu_drvdata { -	struct list_head node; /* entry of exynos_iommu_domain.clients */  	struct device *sysmmu;	/* System MMU's device descriptor */ -	struct device *dev;	/* Owner of system MMU */ -	char *dbgname; -	int nsfrs; -	void __iomem **sfrbases; -	struct clk *clk[2]; +	struct device *master;	/* Owner of system MMU */ +	void __iomem *sfrbase; +	struct clk *clk; +	struct clk *clk_master;  	int activations; -	rwlock_t lock; +	spinlock_t lock;  	struct iommu_domain *domain; -	sysmmu_fault_handler_t fault_handler; -	unsigned long pgtable; +	phys_addr_t pgtable;  };  static bool set_sysmmu_active(struct sysmmu_drvdata *data) @@ -204,6 +238,11 @@ static void sysmmu_unblock(void __iomem *sfrbase)  	__raw_writel(CTRL_ENABLE, sfrbase + REG_MMU_CTRL);  } +static unsigned int __raw_sysmmu_version(struct sysmmu_drvdata *data) +{ +	return MMU_RAW_VER(__raw_readl(data->sfrbase + REG_MMU_VERSION)); +} +  static bool sysmmu_block(void __iomem *sfrbase)  {  	int i = 120; @@ -226,434 +265,428 @@ static void __sysmmu_tlb_invalidate(void __iomem *sfrbase)  }  static void __sysmmu_tlb_invalidate_entry(void __iomem *sfrbase, -						unsigned long iova) +				sysmmu_iova_t iova, unsigned int num_inv)  { -	__raw_writel((iova & SPAGE_MASK) | 1, sfrbase + REG_MMU_FLUSH_ENTRY); +	unsigned int i; + +	for (i = 0; i < num_inv; i++) { +		__raw_writel((iova & SPAGE_MASK) | 1, +				sfrbase + REG_MMU_FLUSH_ENTRY); +		iova += SPAGE_SIZE; +	}  }  static void __sysmmu_set_ptbase(void __iomem *sfrbase, -				       unsigned long pgd) +				       phys_addr_t pgd)  { -	__raw_writel(0x1, sfrbase + REG_MMU_CFG); /* 16KB LV1, LRU */  	__raw_writel(pgd, sfrbase + REG_PT_BASE_ADDR);  	__sysmmu_tlb_invalidate(sfrbase);  } -static void __sysmmu_set_prefbuf(void __iomem *sfrbase, unsigned long base, -						unsigned long size, int idx) -{ -	__raw_writel(base, sfrbase + REG_PB0_SADDR + idx * 8); -	__raw_writel(size - 1 + base,  sfrbase + REG_PB0_EADDR + idx * 8); -} - -static void __set_fault_handler(struct sysmmu_drvdata *data, -					sysmmu_fault_handler_t handler) -{ -	unsigned long flags; - -	write_lock_irqsave(&data->lock, flags); -	data->fault_handler = handler; -	write_unlock_irqrestore(&data->lock, flags); -} - -void exynos_sysmmu_set_fault_handler(struct device *dev, -					sysmmu_fault_handler_t handler) -{ -	struct sysmmu_drvdata *data = dev_get_drvdata(dev->archdata.iommu); - -	__set_fault_handler(data, handler); -} - -static int default_fault_handler(enum exynos_sysmmu_inttype itype, -		     unsigned long pgtable_base, unsigned long fault_addr) +static void show_fault_information(const char *name, +		enum exynos_sysmmu_inttype itype, +		phys_addr_t pgtable_base, sysmmu_iova_t fault_addr)  { -	unsigned long *ent; +	sysmmu_pte_t *ent;  	if ((itype >= SYSMMU_FAULTS_NUM) || (itype < SYSMMU_PAGEFAULT))  		itype = SYSMMU_FAULT_UNKNOWN; -	pr_err("%s occurred at 0x%lx(Page table base: 0x%lx)\n", -			sysmmu_fault_name[itype], fault_addr, pgtable_base); +	pr_err("%s occurred at %#x by %s(Page table base: %pa)\n", +		sysmmu_fault_name[itype], fault_addr, name, &pgtable_base); -	ent = section_entry(__va(pgtable_base), fault_addr); -	pr_err("\tLv1 entry: 0x%lx\n", *ent); +	ent = section_entry(phys_to_virt(pgtable_base), fault_addr); +	pr_err("\tLv1 entry: %#x\n", *ent);  	if (lv1ent_page(ent)) {  		ent = page_entry(ent, fault_addr); -		pr_err("\t Lv2 entry: 0x%lx\n", *ent); +		pr_err("\t Lv2 entry: %#x\n", *ent);  	} - -	pr_err("Generating Kernel OOPS... because it is unrecoverable.\n"); - -	BUG(); - -	return 0;  }  static irqreturn_t exynos_sysmmu_irq(int irq, void *dev_id)  {  	/* SYSMMU is in blocked when interrupt occurred. */  	struct sysmmu_drvdata *data = dev_id; -	struct resource *irqres; -	struct platform_device *pdev;  	enum exynos_sysmmu_inttype itype; -	unsigned long addr = -1; - -	int i, ret = -ENOSYS; - -	read_lock(&data->lock); +	sysmmu_iova_t addr = -1; +	int ret = -ENOSYS;  	WARN_ON(!is_sysmmu_active(data)); -	pdev = to_platform_device(data->sysmmu); -	for (i = 0; i < (pdev->num_resources / 2); i++) { -		irqres = platform_get_resource(pdev, IORESOURCE_IRQ, i); -		if (irqres && ((int)irqres->start == irq)) -			break; -	} +	spin_lock(&data->lock); -	if (i == pdev->num_resources) { +	if (!IS_ERR(data->clk_master)) +		clk_enable(data->clk_master); + +	itype = (enum exynos_sysmmu_inttype) +		__ffs(__raw_readl(data->sfrbase + REG_INT_STATUS)); +	if (WARN_ON(!((itype >= 0) && (itype < SYSMMU_FAULT_UNKNOWN))))  		itype = SYSMMU_FAULT_UNKNOWN; +	else +		addr = __raw_readl(data->sfrbase + fault_reg_offset[itype]); + +	if (itype == SYSMMU_FAULT_UNKNOWN) { +		pr_err("%s: Fault is not occurred by System MMU '%s'!\n", +			__func__, dev_name(data->sysmmu)); +		pr_err("%s: Please check if IRQ is correctly configured.\n", +			__func__); +		BUG();  	} else { -		itype = (enum exynos_sysmmu_inttype) -			__ffs(__raw_readl(data->sfrbases[i] + REG_INT_STATUS)); -		if (WARN_ON(!((itype >= 0) && (itype < SYSMMU_FAULT_UNKNOWN)))) -			itype = SYSMMU_FAULT_UNKNOWN; -		else -			addr = __raw_readl( -				data->sfrbases[i] + fault_reg_offset[itype]); +		unsigned int base = +				__raw_readl(data->sfrbase + REG_PT_BASE_ADDR); +		show_fault_information(dev_name(data->sysmmu), +					itype, base, addr); +		if (data->domain) +			ret = report_iommu_fault(data->domain, +					data->master, addr, itype);  	} -	if (data->domain) -		ret = report_iommu_fault(data->domain, data->dev, -				addr, itype); +	/* fault is not recovered by fault handler */ +	BUG_ON(ret != 0); -	if ((ret == -ENOSYS) && data->fault_handler) { -		unsigned long base = data->pgtable; -		if (itype != SYSMMU_FAULT_UNKNOWN) -			base = __raw_readl( -					data->sfrbases[i] + REG_PT_BASE_ADDR); -		ret = data->fault_handler(itype, base, addr); -	} +	__raw_writel(1 << itype, data->sfrbase + REG_INT_CLEAR); -	if (!ret && (itype != SYSMMU_FAULT_UNKNOWN)) -		__raw_writel(1 << itype, data->sfrbases[i] + REG_INT_CLEAR); -	else -		dev_dbg(data->sysmmu, "(%s) %s is not handled.\n", -				data->dbgname, sysmmu_fault_name[itype]); +	sysmmu_unblock(data->sfrbase); -	if (itype != SYSMMU_FAULT_UNKNOWN) -		sysmmu_unblock(data->sfrbases[i]); +	if (!IS_ERR(data->clk_master)) +		clk_disable(data->clk_master); -	read_unlock(&data->lock); +	spin_unlock(&data->lock);  	return IRQ_HANDLED;  } -static bool __exynos_sysmmu_disable(struct sysmmu_drvdata *data) +static void __sysmmu_disable_nocount(struct sysmmu_drvdata *data)  { +	if (!IS_ERR(data->clk_master)) +		clk_enable(data->clk_master); + +	__raw_writel(CTRL_DISABLE, data->sfrbase + REG_MMU_CTRL); +	__raw_writel(0, data->sfrbase + REG_MMU_CFG); + +	clk_disable(data->clk); +	if (!IS_ERR(data->clk_master)) +		clk_disable(data->clk_master); +} + +static bool __sysmmu_disable(struct sysmmu_drvdata *data) +{ +	bool disabled;  	unsigned long flags; -	bool disabled = false; -	int i; -	write_lock_irqsave(&data->lock, flags); +	spin_lock_irqsave(&data->lock, flags); -	if (!set_sysmmu_inactive(data)) -		goto finish; +	disabled = set_sysmmu_inactive(data); -	for (i = 0; i < data->nsfrs; i++) -		__raw_writel(CTRL_DISABLE, data->sfrbases[i] + REG_MMU_CTRL); +	if (disabled) { +		data->pgtable = 0; +		data->domain = NULL; -	if (data->clk[1]) -		clk_disable(data->clk[1]); -	if (data->clk[0]) -		clk_disable(data->clk[0]); +		__sysmmu_disable_nocount(data); -	disabled = true; -	data->pgtable = 0; -	data->domain = NULL; -finish: -	write_unlock_irqrestore(&data->lock, flags); +		dev_dbg(data->sysmmu, "Disabled\n"); +	} else  { +		dev_dbg(data->sysmmu, "%d times left to disable\n", +					data->activations); +	} -	if (disabled) -		dev_dbg(data->sysmmu, "(%s) Disabled\n", data->dbgname); -	else -		dev_dbg(data->sysmmu, "(%s) %d times left to be disabled\n", -					data->dbgname, data->activations); +	spin_unlock_irqrestore(&data->lock, flags);  	return disabled;  } -/* __exynos_sysmmu_enable: Enables System MMU - * - * returns -error if an error occurred and System MMU is not enabled, - * 0 if the System MMU has been just enabled and 1 if System MMU was already - * enabled before. - */ -static int __exynos_sysmmu_enable(struct sysmmu_drvdata *data, -			unsigned long pgtable, struct iommu_domain *domain) +static void __sysmmu_init_config(struct sysmmu_drvdata *data)  { -	int i, ret = 0; -	unsigned long flags; +	unsigned int cfg = CFG_LRU | CFG_QOS(15); +	unsigned int ver; + +	ver = __raw_sysmmu_version(data); +	if (MMU_MAJ_VER(ver) == 3) { +		if (MMU_MIN_VER(ver) >= 2) { +			cfg |= CFG_FLPDCACHE; +			if (MMU_MIN_VER(ver) == 3) { +				cfg |= CFG_ACGEN; +				cfg &= ~CFG_LRU; +			} else { +				cfg |= CFG_SYSSEL; +			} +		} +	} -	write_lock_irqsave(&data->lock, flags); +	__raw_writel(cfg, data->sfrbase + REG_MMU_CFG); +} -	if (!set_sysmmu_active(data)) { -		if (WARN_ON(pgtable != data->pgtable)) { -			ret = -EBUSY; -			set_sysmmu_inactive(data); -		} else { -			ret = 1; -		} +static void __sysmmu_enable_nocount(struct sysmmu_drvdata *data) +{ +	if (!IS_ERR(data->clk_master)) +		clk_enable(data->clk_master); +	clk_enable(data->clk); -		dev_dbg(data->sysmmu, "(%s) Already enabled\n", data->dbgname); -		goto finish; -	} +	__raw_writel(CTRL_BLOCK, data->sfrbase + REG_MMU_CTRL); -	if (data->clk[0]) -		clk_enable(data->clk[0]); -	if (data->clk[1]) -		clk_enable(data->clk[1]); +	__sysmmu_init_config(data); -	data->pgtable = pgtable; +	__sysmmu_set_ptbase(data->sfrbase, data->pgtable); -	for (i = 0; i < data->nsfrs; i++) { -		__sysmmu_set_ptbase(data->sfrbases[i], pgtable); +	__raw_writel(CTRL_ENABLE, data->sfrbase + REG_MMU_CTRL); -		if ((readl(data->sfrbases[i] + REG_MMU_VERSION) >> 28) == 3) { -			/* System MMU version is 3.x */ -			__raw_writel((1 << 12) | (2 << 28), -					data->sfrbases[i] + REG_MMU_CFG); -			__sysmmu_set_prefbuf(data->sfrbases[i], 0, -1, 0); -			__sysmmu_set_prefbuf(data->sfrbases[i], 0, -1, 1); -		} +	if (!IS_ERR(data->clk_master)) +		clk_disable(data->clk_master); +} -		__raw_writel(CTRL_ENABLE, data->sfrbases[i] + REG_MMU_CTRL); +static int __sysmmu_enable(struct sysmmu_drvdata *data, +			phys_addr_t pgtable, struct iommu_domain *domain) +{ +	int ret = 0; +	unsigned long flags; + +	spin_lock_irqsave(&data->lock, flags); +	if (set_sysmmu_active(data)) { +		data->pgtable = pgtable; +		data->domain = domain; + +		__sysmmu_enable_nocount(data); + +		dev_dbg(data->sysmmu, "Enabled\n"); +	} else { +		ret = (pgtable == data->pgtable) ? 1 : -EBUSY; + +		dev_dbg(data->sysmmu, "already enabled\n");  	} -	data->domain = domain; +	if (WARN_ON(ret < 0)) +		set_sysmmu_inactive(data); /* decrement count */ -	dev_dbg(data->sysmmu, "(%s) Enabled\n", data->dbgname); -finish: -	write_unlock_irqrestore(&data->lock, flags); +	spin_unlock_irqrestore(&data->lock, flags);  	return ret;  } -int exynos_sysmmu_enable(struct device *dev, unsigned long pgtable) +/* __exynos_sysmmu_enable: Enables System MMU + * + * returns -error if an error occurred and System MMU is not enabled, + * 0 if the System MMU has been just enabled and 1 if System MMU was already + * enabled before. + */ +static int __exynos_sysmmu_enable(struct device *dev, phys_addr_t pgtable, +				  struct iommu_domain *domain)  { -	struct sysmmu_drvdata *data = dev_get_drvdata(dev->archdata.iommu); -	int ret; +	int ret = 0; +	unsigned long flags; +	struct exynos_iommu_owner *owner = dev->archdata.iommu; +	struct sysmmu_drvdata *data; -	BUG_ON(!memblock_is_memory(pgtable)); +	BUG_ON(!has_sysmmu(dev)); -	ret = pm_runtime_get_sync(data->sysmmu); -	if (ret < 0) { -		dev_dbg(data->sysmmu, "(%s) Failed to enable\n", data->dbgname); -		return ret; -	} +	spin_lock_irqsave(&owner->lock, flags); -	ret = __exynos_sysmmu_enable(data, pgtable, NULL); -	if (WARN_ON(ret < 0)) { -		pm_runtime_put(data->sysmmu); -		dev_err(data->sysmmu, -			"(%s) Already enabled with page table %#lx\n", -			data->dbgname, data->pgtable); -	} else { -		data->dev = dev; -	} +	data = dev_get_drvdata(owner->sysmmu); + +	ret = __sysmmu_enable(data, pgtable, domain); +	if (ret >= 0) +		data->master = dev; + +	spin_unlock_irqrestore(&owner->lock, flags);  	return ret;  } +int exynos_sysmmu_enable(struct device *dev, phys_addr_t pgtable) +{ +	BUG_ON(!memblock_is_memory(pgtable)); + +	return __exynos_sysmmu_enable(dev, pgtable, NULL); +} +  static bool exynos_sysmmu_disable(struct device *dev)  { -	struct sysmmu_drvdata *data = dev_get_drvdata(dev->archdata.iommu); -	bool disabled; +	unsigned long flags; +	bool disabled = true; +	struct exynos_iommu_owner *owner = dev->archdata.iommu; +	struct sysmmu_drvdata *data; + +	BUG_ON(!has_sysmmu(dev)); + +	spin_lock_irqsave(&owner->lock, flags); -	disabled = __exynos_sysmmu_disable(data); -	pm_runtime_put(data->sysmmu); +	data = dev_get_drvdata(owner->sysmmu); + +	disabled = __sysmmu_disable(data); +	if (disabled) +		data->master = NULL; + +	spin_unlock_irqrestore(&owner->lock, flags);  	return disabled;  } -static void sysmmu_tlb_invalidate_entry(struct device *dev, unsigned long iova) +static void __sysmmu_tlb_invalidate_flpdcache(struct sysmmu_drvdata *data, +					      sysmmu_iova_t iova) +{ +	if (__raw_sysmmu_version(data) == MAKE_MMU_VER(3, 3)) +		__raw_writel(iova | 0x1, data->sfrbase + REG_MMU_FLUSH_ENTRY); +} + +static void sysmmu_tlb_invalidate_flpdcache(struct device *dev, +					    sysmmu_iova_t iova) +{ +	unsigned long flags; +	struct exynos_iommu_owner *owner = dev->archdata.iommu; +	struct sysmmu_drvdata *data = dev_get_drvdata(owner->sysmmu); + +	if (!IS_ERR(data->clk_master)) +		clk_enable(data->clk_master); + +	spin_lock_irqsave(&data->lock, flags); +	if (is_sysmmu_active(data)) +		__sysmmu_tlb_invalidate_flpdcache(data, iova); +	spin_unlock_irqrestore(&data->lock, flags); + +	if (!IS_ERR(data->clk_master)) +		clk_disable(data->clk_master); +} + +static void sysmmu_tlb_invalidate_entry(struct device *dev, sysmmu_iova_t iova, +					size_t size)  { +	struct exynos_iommu_owner *owner = dev->archdata.iommu;  	unsigned long flags; -	struct sysmmu_drvdata *data = dev_get_drvdata(dev->archdata.iommu); +	struct sysmmu_drvdata *data; -	read_lock_irqsave(&data->lock, flags); +	data = dev_get_drvdata(owner->sysmmu); +	spin_lock_irqsave(&data->lock, flags);  	if (is_sysmmu_active(data)) { -		int i; -		for (i = 0; i < data->nsfrs; i++) { -			if (sysmmu_block(data->sfrbases[i])) { -				__sysmmu_tlb_invalidate_entry( -						data->sfrbases[i], iova); -				sysmmu_unblock(data->sfrbases[i]); -			} +		unsigned int num_inv = 1; + +		if (!IS_ERR(data->clk_master)) +			clk_enable(data->clk_master); + +		/* +		 * L2TLB invalidation required +		 * 4KB page: 1 invalidation +		 * 64KB page: 16 invalidation +		 * 1MB page: 64 invalidation +		 * because it is set-associative TLB +		 * with 8-way and 64 sets. +		 * 1MB page can be cached in one of all sets. +		 * 64KB page can be one of 16 consecutive sets. +		 */ +		if (MMU_MAJ_VER(__raw_sysmmu_version(data)) == 2) +			num_inv = min_t(unsigned int, size / PAGE_SIZE, 64); + +		if (sysmmu_block(data->sfrbase)) { +			__sysmmu_tlb_invalidate_entry( +				data->sfrbase, iova, num_inv); +			sysmmu_unblock(data->sfrbase);  		} +		if (!IS_ERR(data->clk_master)) +			clk_disable(data->clk_master);  	} else { -		dev_dbg(data->sysmmu, -			"(%s) Disabled. Skipping invalidating TLB.\n", -			data->dbgname); +		dev_dbg(dev, "disabled. Skipping TLB invalidation @ %#x\n", +			iova);  	} - -	read_unlock_irqrestore(&data->lock, flags); +	spin_unlock_irqrestore(&data->lock, flags);  }  void exynos_sysmmu_tlb_invalidate(struct device *dev)  { +	struct exynos_iommu_owner *owner = dev->archdata.iommu;  	unsigned long flags; -	struct sysmmu_drvdata *data = dev_get_drvdata(dev->archdata.iommu); +	struct sysmmu_drvdata *data; -	read_lock_irqsave(&data->lock, flags); +	data = dev_get_drvdata(owner->sysmmu); +	spin_lock_irqsave(&data->lock, flags);  	if (is_sysmmu_active(data)) { -		int i; -		for (i = 0; i < data->nsfrs; i++) { -			if (sysmmu_block(data->sfrbases[i])) { -				__sysmmu_tlb_invalidate(data->sfrbases[i]); -				sysmmu_unblock(data->sfrbases[i]); -			} +		if (!IS_ERR(data->clk_master)) +			clk_enable(data->clk_master); +		if (sysmmu_block(data->sfrbase)) { +			__sysmmu_tlb_invalidate(data->sfrbase); +			sysmmu_unblock(data->sfrbase);  		} +		if (!IS_ERR(data->clk_master)) +			clk_disable(data->clk_master);  	} else { -		dev_dbg(data->sysmmu, -			"(%s) Disabled. Skipping invalidating TLB.\n", -			data->dbgname); +		dev_dbg(dev, "disabled. Skipping TLB invalidation\n");  	} - -	read_unlock_irqrestore(&data->lock, flags); +	spin_unlock_irqrestore(&data->lock, flags);  } -static int exynos_sysmmu_probe(struct platform_device *pdev) +static int __init exynos_sysmmu_probe(struct platform_device *pdev)  { -	int i, ret; -	struct device *dev; +	int irq, ret; +	struct device *dev = &pdev->dev;  	struct sysmmu_drvdata *data; +	struct resource *res; -	dev = &pdev->dev; +	data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); +	if (!data) +		return -ENOMEM; -	data = kzalloc(sizeof(*data), GFP_KERNEL); -	if (!data) { -		dev_dbg(dev, "Not enough memory\n"); -		ret = -ENOMEM; -		goto err_alloc; -	} +	res = platform_get_resource(pdev, IORESOURCE_MEM, 0); +	data->sfrbase = devm_ioremap_resource(dev, res); +	if (IS_ERR(data->sfrbase)) +		return PTR_ERR(data->sfrbase); -	ret = dev_set_drvdata(dev, data); -	if (ret) { -		dev_dbg(dev, "Unabled to initialize driver data\n"); -		goto err_init; +	irq = platform_get_irq(pdev, 0); +	if (irq <= 0) { +		dev_err(dev, "Unable to find IRQ resource\n"); +		return irq;  	} -	data->nsfrs = pdev->num_resources / 2; -	data->sfrbases = kmalloc(sizeof(*data->sfrbases) * data->nsfrs, -								GFP_KERNEL); -	if (data->sfrbases == NULL) { -		dev_dbg(dev, "Not enough memory\n"); -		ret = -ENOMEM; -		goto err_init; -	} - -	for (i = 0; i < data->nsfrs; i++) { -		struct resource *res; -		res = platform_get_resource(pdev, IORESOURCE_MEM, i); -		if (!res) { -			dev_dbg(dev, "Unable to find IOMEM region\n"); -			ret = -ENOENT; -			goto err_res; -		} - -		data->sfrbases[i] = ioremap(res->start, resource_size(res)); -		if (!data->sfrbases[i]) { -			dev_dbg(dev, "Unable to map IOMEM @ PA:%#x\n", -							res->start); -			ret = -ENOENT; -			goto err_res; -		} +	ret = devm_request_irq(dev, irq, exynos_sysmmu_irq, 0, +				dev_name(dev), data); +	if (ret) { +		dev_err(dev, "Unabled to register handler of irq %d\n", irq); +		return ret;  	} -	for (i = 0; i < data->nsfrs; i++) { -		ret = platform_get_irq(pdev, i); -		if (ret <= 0) { -			dev_dbg(dev, "Unable to find IRQ resource\n"); -			goto err_irq; -		} - -		ret = request_irq(ret, exynos_sysmmu_irq, 0, -					dev_name(dev), data); +	data->clk = devm_clk_get(dev, "sysmmu"); +	if (IS_ERR(data->clk)) { +		dev_err(dev, "Failed to get clock!\n"); +		return PTR_ERR(data->clk); +	} else  { +		ret = clk_prepare(data->clk);  		if (ret) { -			dev_dbg(dev, "Unabled to register interrupt handler\n"); -			goto err_irq; +			dev_err(dev, "Failed to prepare clk\n"); +			return ret;  		}  	} -	if (dev_get_platdata(dev)) { -		char *deli, *beg; -		struct sysmmu_platform_data *platdata = dev_get_platdata(dev); - -		beg = platdata->clockname; - -		for (deli = beg; (*deli != '\0') && (*deli != ','); deli++) -			/* NOTHING */; - -		if (*deli == '\0') -			deli = NULL; -		else -			*deli = '\0'; - -		data->clk[0] = clk_get(dev, beg); -		if (IS_ERR(data->clk[0])) { -			data->clk[0] = NULL; -			dev_dbg(dev, "No clock descriptor registered\n"); -		} - -		if (data->clk[0] && deli) { -			*deli = ','; -			data->clk[1] = clk_get(dev, deli + 1); -			if (IS_ERR(data->clk[1])) -				data->clk[1] = NULL; +	data->clk_master = devm_clk_get(dev, "master"); +	if (!IS_ERR(data->clk_master)) { +		ret = clk_prepare(data->clk_master); +		if (ret) { +			clk_unprepare(data->clk); +			dev_err(dev, "Failed to prepare master's clk\n"); +			return ret;  		} - -		data->dbgname = platdata->dbgname;  	}  	data->sysmmu = dev; -	rwlock_init(&data->lock); -	INIT_LIST_HEAD(&data->node); +	spin_lock_init(&data->lock); -	__set_fault_handler(data, &default_fault_handler); +	platform_set_drvdata(pdev, data); -	if (dev->parent) -		pm_runtime_enable(dev); +	pm_runtime_enable(dev); -	dev_dbg(dev, "(%s) Initialized\n", data->dbgname);  	return 0; -err_irq: -	while (i-- > 0) { -		int irq; - -		irq = platform_get_irq(pdev, i); -		free_irq(irq, data); -	} -err_res: -	while (data->nsfrs-- > 0) -		iounmap(data->sfrbases[data->nsfrs]); -	kfree(data->sfrbases); -err_init: -	kfree(data); -err_alloc: -	dev_err(dev, "Failed to initialize\n"); -	return ret;  } -static struct platform_driver exynos_sysmmu_driver = { -	.probe		= exynos_sysmmu_probe, -	.driver		= { +static const struct of_device_id sysmmu_of_match[] __initconst = { +	{ .compatible	= "samsung,exynos-sysmmu", }, +	{ }, +}; + +static struct platform_driver exynos_sysmmu_driver __refdata = { +	.probe	= exynos_sysmmu_probe, +	.driver	= {  		.owner		= THIS_MODULE,  		.name		= "exynos-sysmmu", +		.of_match_table	= sysmmu_of_match,  	}  }; @@ -667,21 +700,32 @@ static inline void pgtable_flush(void *vastart, void *vaend)  static int exynos_iommu_domain_init(struct iommu_domain *domain)  {  	struct exynos_iommu_domain *priv; +	int i;  	priv = kzalloc(sizeof(*priv), GFP_KERNEL);  	if (!priv)  		return -ENOMEM; -	priv->pgtable = (unsigned long *)__get_free_pages( -						GFP_KERNEL | __GFP_ZERO, 2); +	priv->pgtable = (sysmmu_pte_t *)__get_free_pages(GFP_KERNEL, 2);  	if (!priv->pgtable)  		goto err_pgtable; -	priv->lv2entcnt = (short *)__get_free_pages( -						GFP_KERNEL | __GFP_ZERO, 1); +	priv->lv2entcnt = (short *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1);  	if (!priv->lv2entcnt)  		goto err_counter; +	/* w/a of System MMU v3.3 to prevent caching 1MiB mapping */ +	for (i = 0; i < NUM_LV1ENTRIES; i += 8) { +		priv->pgtable[i + 0] = ZERO_LV2LINK; +		priv->pgtable[i + 1] = ZERO_LV2LINK; +		priv->pgtable[i + 2] = ZERO_LV2LINK; +		priv->pgtable[i + 3] = ZERO_LV2LINK; +		priv->pgtable[i + 4] = ZERO_LV2LINK; +		priv->pgtable[i + 5] = ZERO_LV2LINK; +		priv->pgtable[i + 6] = ZERO_LV2LINK; +		priv->pgtable[i + 7] = ZERO_LV2LINK; +	} +  	pgtable_flush(priv->pgtable, priv->pgtable + NUM_LV1ENTRIES);  	spin_lock_init(&priv->lock); @@ -705,7 +749,7 @@ err_pgtable:  static void exynos_iommu_domain_destroy(struct iommu_domain *domain)  {  	struct exynos_iommu_domain *priv = domain->priv; -	struct sysmmu_drvdata *data; +	struct exynos_iommu_owner *owner;  	unsigned long flags;  	int i; @@ -713,16 +757,20 @@ static void exynos_iommu_domain_destroy(struct iommu_domain *domain)  	spin_lock_irqsave(&priv->lock, flags); -	list_for_each_entry(data, &priv->clients, node) { -		while (!exynos_sysmmu_disable(data->dev)) +	list_for_each_entry(owner, &priv->clients, client) { +		while (!exynos_sysmmu_disable(owner->dev))  			; /* until System MMU is actually disabled */  	} +	while (!list_empty(&priv->clients)) +		list_del_init(priv->clients.next); +  	spin_unlock_irqrestore(&priv->lock, flags);  	for (i = 0; i < NUM_LV1ENTRIES; i++)  		if (lv1ent_page(priv->pgtable + i)) -			kfree(__va(lv2table_base(priv->pgtable + i))); +			kmem_cache_free(lv2table_kmem_cache, +				phys_to_virt(lv2table_base(priv->pgtable + i)));  	free_pages((unsigned long)priv->pgtable, 2);  	free_pages((unsigned long)priv->lv2entcnt, 1); @@ -733,114 +781,134 @@ static void exynos_iommu_domain_destroy(struct iommu_domain *domain)  static int exynos_iommu_attach_device(struct iommu_domain *domain,  				   struct device *dev)  { -	struct sysmmu_drvdata *data = dev_get_drvdata(dev->archdata.iommu); +	struct exynos_iommu_owner *owner = dev->archdata.iommu;  	struct exynos_iommu_domain *priv = domain->priv; +	phys_addr_t pagetable = virt_to_phys(priv->pgtable);  	unsigned long flags;  	int ret; -	ret = pm_runtime_get_sync(data->sysmmu); -	if (ret < 0) -		return ret; - -	ret = 0; -  	spin_lock_irqsave(&priv->lock, flags); -	ret = __exynos_sysmmu_enable(data, __pa(priv->pgtable), domain); - +	ret = __exynos_sysmmu_enable(dev, pagetable, domain);  	if (ret == 0) { -		/* 'data->node' must not be appeared in priv->clients */ -		BUG_ON(!list_empty(&data->node)); -		data->dev = dev; -		list_add_tail(&data->node, &priv->clients); +		list_add_tail(&owner->client, &priv->clients); +		owner->domain = domain;  	}  	spin_unlock_irqrestore(&priv->lock, flags);  	if (ret < 0) { -		dev_err(dev, "%s: Failed to attach IOMMU with pgtable %#lx\n", -				__func__, __pa(priv->pgtable)); -		pm_runtime_put(data->sysmmu); -	} else if (ret > 0) { -		dev_dbg(dev, "%s: IOMMU with pgtable 0x%lx already attached\n", -					__func__, __pa(priv->pgtable)); -	} else { -		dev_dbg(dev, "%s: Attached new IOMMU with pgtable 0x%lx\n", -					__func__, __pa(priv->pgtable)); +		dev_err(dev, "%s: Failed to attach IOMMU with pgtable %pa\n", +					__func__, &pagetable); +		return ret;  	} +	dev_dbg(dev, "%s: Attached IOMMU with pgtable %pa %s\n", +		__func__, &pagetable, (ret == 0) ? "" : ", again"); +  	return ret;  }  static void exynos_iommu_detach_device(struct iommu_domain *domain,  				    struct device *dev)  { -	struct sysmmu_drvdata *data = dev_get_drvdata(dev->archdata.iommu); +	struct exynos_iommu_owner *owner;  	struct exynos_iommu_domain *priv = domain->priv; -	struct list_head *pos; +	phys_addr_t pagetable = virt_to_phys(priv->pgtable);  	unsigned long flags; -	bool found = false;  	spin_lock_irqsave(&priv->lock, flags); -	list_for_each(pos, &priv->clients) { -		if (list_entry(pos, struct sysmmu_drvdata, node) == data) { -			found = true; +	list_for_each_entry(owner, &priv->clients, client) { +		if (owner == dev->archdata.iommu) { +			if (exynos_sysmmu_disable(dev)) { +				list_del_init(&owner->client); +				owner->domain = NULL; +			}  			break;  		}  	} -	if (!found) -		goto finish; - -	if (__exynos_sysmmu_disable(data)) { -		dev_dbg(dev, "%s: Detached IOMMU with pgtable %#lx\n", -					__func__, __pa(priv->pgtable)); -		list_del_init(&data->node); - -	} else { -		dev_dbg(dev, "%s: Detaching IOMMU with pgtable %#lx delayed", -					__func__, __pa(priv->pgtable)); -	} - -finish:  	spin_unlock_irqrestore(&priv->lock, flags); -	if (found) -		pm_runtime_put(data->sysmmu); +	if (owner == dev->archdata.iommu) +		dev_dbg(dev, "%s: Detached IOMMU with pgtable %pa\n", +					__func__, &pagetable); +	else +		dev_err(dev, "%s: No IOMMU is attached\n", __func__);  } -static unsigned long *alloc_lv2entry(unsigned long *sent, unsigned long iova, -					short *pgcounter) +static sysmmu_pte_t *alloc_lv2entry(struct exynos_iommu_domain *priv, +		sysmmu_pte_t *sent, sysmmu_iova_t iova, short *pgcounter)  { +	if (lv1ent_section(sent)) { +		WARN(1, "Trying mapping on %#08x mapped with 1MiB page", iova); +		return ERR_PTR(-EADDRINUSE); +	} +  	if (lv1ent_fault(sent)) { -		unsigned long *pent; +		sysmmu_pte_t *pent; +		bool need_flush_flpd_cache = lv1ent_zero(sent); -		pent = kzalloc(LV2TABLE_SIZE, GFP_ATOMIC); -		BUG_ON((unsigned long)pent & (LV2TABLE_SIZE - 1)); +		pent = kmem_cache_zalloc(lv2table_kmem_cache, GFP_ATOMIC); +		BUG_ON((unsigned int)pent & (LV2TABLE_SIZE - 1));  		if (!pent) -			return NULL; +			return ERR_PTR(-ENOMEM); -		*sent = mk_lv1ent_page(__pa(pent)); +		*sent = mk_lv1ent_page(virt_to_phys(pent));  		*pgcounter = NUM_LV2ENTRIES;  		pgtable_flush(pent, pent + NUM_LV2ENTRIES);  		pgtable_flush(sent, sent + 1); + +		/* +		 * If pretched SLPD is a fault SLPD in zero_l2_table, FLPD cache +		 * may caches the address of zero_l2_table. This function +		 * replaces the zero_l2_table with new L2 page table to write +		 * valid mappings. +		 * Accessing the valid area may cause page fault since FLPD +		 * cache may still caches zero_l2_table for the valid area +		 * instead of new L2 page table that have the mapping +		 * information of the valid area +		 * Thus any replacement of zero_l2_table with other valid L2 +		 * page table must involve FLPD cache invalidation for System +		 * MMU v3.3. +		 * FLPD cache invalidation is performed with TLB invalidation +		 * by VPN without blocking. It is safe to invalidate TLB without +		 * blocking because the target address of TLB invalidation is +		 * not currently mapped. +		 */ +		if (need_flush_flpd_cache) { +			struct exynos_iommu_owner *owner; + +			spin_lock(&priv->lock); +			list_for_each_entry(owner, &priv->clients, client) +				sysmmu_tlb_invalidate_flpdcache( +							owner->dev, iova); +			spin_unlock(&priv->lock); +		}  	}  	return page_entry(sent, iova);  } -static int lv1set_section(unsigned long *sent, phys_addr_t paddr, short *pgcnt) +static int lv1set_section(struct exynos_iommu_domain *priv, +			  sysmmu_pte_t *sent, sysmmu_iova_t iova, +			  phys_addr_t paddr, short *pgcnt)  { -	if (lv1ent_section(sent)) +	if (lv1ent_section(sent)) { +		WARN(1, "Trying mapping on 1MiB@%#08x that is mapped", +			iova);  		return -EADDRINUSE; +	}  	if (lv1ent_page(sent)) { -		if (*pgcnt != NUM_LV2ENTRIES) +		if (*pgcnt != NUM_LV2ENTRIES) { +			WARN(1, "Trying mapping on 1MiB@%#08x that is mapped", +				iova);  			return -EADDRINUSE; +		} -		kfree(page_entry(sent, 0)); - +		kmem_cache_free(lv2table_kmem_cache, page_entry(sent, 0));  		*pgcnt = 0;  	} @@ -848,14 +916,26 @@ static int lv1set_section(unsigned long *sent, phys_addr_t paddr, short *pgcnt)  	pgtable_flush(sent, sent + 1); +	spin_lock(&priv->lock); +	if (lv1ent_page_zero(sent)) { +		struct exynos_iommu_owner *owner; +		/* +		 * Flushing FLPD cache in System MMU v3.3 that may cache a FLPD +		 * entry by speculative prefetch of SLPD which has no mapping. +		 */ +		list_for_each_entry(owner, &priv->clients, client) +			sysmmu_tlb_invalidate_flpdcache(owner->dev, iova); +	} +	spin_unlock(&priv->lock); +  	return 0;  } -static int lv2set_page(unsigned long *pent, phys_addr_t paddr, size_t size, +static int lv2set_page(sysmmu_pte_t *pent, phys_addr_t paddr, size_t size,  								short *pgcnt)  {  	if (size == SPAGE_SIZE) { -		if (!lv2ent_fault(pent)) +		if (WARN_ON(!lv2ent_fault(pent)))  			return -EADDRINUSE;  		*pent = mk_lv2ent_spage(paddr); @@ -863,9 +943,11 @@ static int lv2set_page(unsigned long *pent, phys_addr_t paddr, size_t size,  		*pgcnt -= 1;  	} else { /* size == LPAGE_SIZE */  		int i; +  		for (i = 0; i < SPAGES_PER_LPAGE; i++, pent++) { -			if (!lv2ent_fault(pent)) { -				memset(pent, 0, sizeof(*pent) * i); +			if (WARN_ON(!lv2ent_fault(pent))) { +				if (i > 0) +					memset(pent - i, 0, sizeof(*pent) * i);  				return -EADDRINUSE;  			} @@ -878,11 +960,38 @@ static int lv2set_page(unsigned long *pent, phys_addr_t paddr, size_t size,  	return 0;  } -static int exynos_iommu_map(struct iommu_domain *domain, unsigned long iova, +/* + * *CAUTION* to the I/O virtual memory managers that support exynos-iommu: + * + * System MMU v3.x have an advanced logic to improve address translation + * performance with caching more page table entries by a page table walk. + * However, the logic has a bug that caching fault page table entries and System + * MMU reports page fault if the cached fault entry is hit even though the fault + * entry is updated to a valid entry after the entry is cached. + * To prevent caching fault page table entries which may be updated to valid + * entries later, the virtual memory manager should care about the w/a about the + * problem. The followings describe w/a. + * + * Any two consecutive I/O virtual address regions must have a hole of 128KiB + * in maximum to prevent misbehavior of System MMU 3.x. (w/a of h/w bug) + * + * Precisely, any start address of I/O virtual region must be aligned by + * the following sizes for System MMU v3.1 and v3.2. + * System MMU v3.1: 128KiB + * System MMU v3.2: 256KiB + * + * Because System MMU v3.3 caches page table entries more aggressively, it needs + * more w/a. + * - Any two consecutive I/O virtual regions must be have a hole of larger size + *   than or equal size to 128KiB. + * - Start address of an I/O virtual region must be aligned by 128KiB. + */ +static int exynos_iommu_map(struct iommu_domain *domain, unsigned long l_iova,  			 phys_addr_t paddr, size_t size, int prot)  {  	struct exynos_iommu_domain *priv = domain->priv; -	unsigned long *entry; +	sysmmu_pte_t *entry; +	sysmmu_iova_t iova = (sysmmu_iova_t)l_iova;  	unsigned long flags;  	int ret = -ENOMEM; @@ -893,38 +1002,52 @@ static int exynos_iommu_map(struct iommu_domain *domain, unsigned long iova,  	entry = section_entry(priv->pgtable, iova);  	if (size == SECT_SIZE) { -		ret = lv1set_section(entry, paddr, +		ret = lv1set_section(priv, entry, iova, paddr,  					&priv->lv2entcnt[lv1ent_offset(iova)]);  	} else { -		unsigned long *pent; +		sysmmu_pte_t *pent; -		pent = alloc_lv2entry(entry, iova, +		pent = alloc_lv2entry(priv, entry, iova,  					&priv->lv2entcnt[lv1ent_offset(iova)]); -		if (!pent) -			ret = -ENOMEM; +		if (IS_ERR(pent)) +			ret = PTR_ERR(pent);  		else  			ret = lv2set_page(pent, paddr, size,  					&priv->lv2entcnt[lv1ent_offset(iova)]);  	} -	if (ret) { -		pr_debug("%s: Failed to map iova 0x%lx/0x%x bytes\n", -							__func__, iova, size); -	} +	if (ret) +		pr_err("%s: Failed(%d) to map %#zx bytes @ %#x\n", +			__func__, ret, size, iova);  	spin_unlock_irqrestore(&priv->pgtablelock, flags);  	return ret;  } +static void exynos_iommu_tlb_invalidate_entry(struct exynos_iommu_domain *priv, +						sysmmu_iova_t iova, size_t size) +{ +	struct exynos_iommu_owner *owner; +	unsigned long flags; + +	spin_lock_irqsave(&priv->lock, flags); + +	list_for_each_entry(owner, &priv->clients, client) +		sysmmu_tlb_invalidate_entry(owner->dev, iova, size); + +	spin_unlock_irqrestore(&priv->lock, flags); +} +  static size_t exynos_iommu_unmap(struct iommu_domain *domain, -					       unsigned long iova, size_t size) +					unsigned long l_iova, size_t size)  {  	struct exynos_iommu_domain *priv = domain->priv; -	struct sysmmu_drvdata *data; +	sysmmu_iova_t iova = (sysmmu_iova_t)l_iova; +	sysmmu_pte_t *ent; +	size_t err_pgsize;  	unsigned long flags; -	unsigned long *ent;  	BUG_ON(priv->pgtable == NULL); @@ -933,9 +1056,12 @@ static size_t exynos_iommu_unmap(struct iommu_domain *domain,  	ent = section_entry(priv->pgtable, iova);  	if (lv1ent_section(ent)) { -		BUG_ON(size < SECT_SIZE); +		if (WARN_ON(size < SECT_SIZE)) { +			err_pgsize = SECT_SIZE; +			goto err; +		} -		*ent = 0; +		*ent = ZERO_LV2LINK; /* w/a for h/w bug in Sysmem MMU v3.3 */  		pgtable_flush(ent, ent + 1);  		size = SECT_SIZE;  		goto done; @@ -959,34 +1085,42 @@ static size_t exynos_iommu_unmap(struct iommu_domain *domain,  	if (lv2ent_small(ent)) {  		*ent = 0;  		size = SPAGE_SIZE; +		pgtable_flush(ent, ent + 1);  		priv->lv2entcnt[lv1ent_offset(iova)] += 1;  		goto done;  	}  	/* lv1ent_large(ent) == true here */ -	BUG_ON(size < LPAGE_SIZE); +	if (WARN_ON(size < LPAGE_SIZE)) { +		err_pgsize = LPAGE_SIZE; +		goto err; +	}  	memset(ent, 0, sizeof(*ent) * SPAGES_PER_LPAGE); +	pgtable_flush(ent, ent + SPAGES_PER_LPAGE);  	size = LPAGE_SIZE;  	priv->lv2entcnt[lv1ent_offset(iova)] += SPAGES_PER_LPAGE;  done:  	spin_unlock_irqrestore(&priv->pgtablelock, flags); -	spin_lock_irqsave(&priv->lock, flags); -	list_for_each_entry(data, &priv->clients, node) -		sysmmu_tlb_invalidate_entry(data->dev, iova); -	spin_unlock_irqrestore(&priv->lock, flags); - +	exynos_iommu_tlb_invalidate_entry(priv, iova, size);  	return size; +err: +	spin_unlock_irqrestore(&priv->pgtablelock, flags); + +	pr_err("%s: Failed: size(%#zx) @ %#x is smaller than page size %#zx\n", +		__func__, size, iova, err_pgsize); + +	return 0;  }  static phys_addr_t exynos_iommu_iova_to_phys(struct iommu_domain *domain,  					  dma_addr_t iova)  {  	struct exynos_iommu_domain *priv = domain->priv; -	unsigned long *entry; +	sysmmu_pte_t *entry;  	unsigned long flags;  	phys_addr_t phys = 0; @@ -1010,14 +1144,42 @@ static phys_addr_t exynos_iommu_iova_to_phys(struct iommu_domain *domain,  	return phys;  } +static int exynos_iommu_add_device(struct device *dev) +{ +	struct iommu_group *group; +	int ret; + +	group = iommu_group_get(dev); + +	if (!group) { +		group = iommu_group_alloc(); +		if (IS_ERR(group)) { +			dev_err(dev, "Failed to allocate IOMMU group\n"); +			return PTR_ERR(group); +		} +	} + +	ret = iommu_group_add_device(group, dev); +	iommu_group_put(group); + +	return ret; +} + +static void exynos_iommu_remove_device(struct device *dev) +{ +	iommu_group_remove_device(dev); +} +  static struct iommu_ops exynos_iommu_ops = { -	.domain_init = &exynos_iommu_domain_init, -	.domain_destroy = &exynos_iommu_domain_destroy, -	.attach_dev = &exynos_iommu_attach_device, -	.detach_dev = &exynos_iommu_detach_device, -	.map = &exynos_iommu_map, -	.unmap = &exynos_iommu_unmap, -	.iova_to_phys = &exynos_iommu_iova_to_phys, +	.domain_init = exynos_iommu_domain_init, +	.domain_destroy = exynos_iommu_domain_destroy, +	.attach_dev = exynos_iommu_attach_device, +	.detach_dev = exynos_iommu_detach_device, +	.map = exynos_iommu_map, +	.unmap = exynos_iommu_unmap, +	.iova_to_phys = exynos_iommu_iova_to_phys, +	.add_device = exynos_iommu_add_device, +	.remove_device = exynos_iommu_remove_device,  	.pgsize_bitmap = SECT_SIZE | LPAGE_SIZE | SPAGE_SIZE,  }; @@ -1025,11 +1187,41 @@ static int __init exynos_iommu_init(void)  {  	int ret; +	lv2table_kmem_cache = kmem_cache_create("exynos-iommu-lv2table", +				LV2TABLE_SIZE, LV2TABLE_SIZE, 0, NULL); +	if (!lv2table_kmem_cache) { +		pr_err("%s: Failed to create kmem cache\n", __func__); +		return -ENOMEM; +	} +  	ret = platform_driver_register(&exynos_sysmmu_driver); +	if (ret) { +		pr_err("%s: Failed to register driver\n", __func__); +		goto err_reg_driver; +	} -	if (ret == 0) -		bus_set_iommu(&platform_bus_type, &exynos_iommu_ops); +	zero_lv2_table = kmem_cache_zalloc(lv2table_kmem_cache, GFP_KERNEL); +	if (zero_lv2_table == NULL) { +		pr_err("%s: Failed to allocate zero level2 page table\n", +			__func__); +		ret = -ENOMEM; +		goto err_zero_lv2; +	} +	ret = bus_set_iommu(&platform_bus_type, &exynos_iommu_ops); +	if (ret) { +		pr_err("%s: Failed to register exynos-iommu driver.\n", +								__func__); +		goto err_set_iommu; +	} + +	return 0; +err_set_iommu: +	kmem_cache_free(lv2table_kmem_cache, zero_lv2_table); +err_zero_lv2: +	platform_driver_unregister(&exynos_sysmmu_driver); +err_reg_driver: +	kmem_cache_destroy(lv2table_kmem_cache);  	return ret;  }  subsys_initcall(exynos_iommu_init); diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c index cba0498eb01..bb446d742a2 100644 --- a/drivers/iommu/fsl_pamu.c +++ b/drivers/iommu/fsl_pamu.c @@ -170,10 +170,10 @@ int pamu_disable_liodn(int liodn)  static unsigned int map_addrspace_size_to_wse(phys_addr_t addrspace_size)  {  	/* Bug if not a power of 2 */ -	BUG_ON(!is_power_of_2(addrspace_size)); +	BUG_ON((addrspace_size & (addrspace_size - 1)));  	/* window size is 2^(WSE+1) bytes */ -	return __ffs(addrspace_size) - 1; +	return fls64(addrspace_size) - 2;  }  /* Derive the PAACE window count encoding for the subwindow count */ @@ -351,7 +351,7 @@ int pamu_config_ppaace(int liodn, phys_addr_t win_addr, phys_addr_t win_size,  	struct paace *ppaace;  	unsigned long fspi; -	if (!is_power_of_2(win_size) || win_size < PAMU_PAGE_SIZE) { +	if ((win_size & (win_size - 1)) || win_size < PAMU_PAGE_SIZE) {  		pr_debug("window size too small or not a power of two %llx\n", win_size);  		return -EINVAL;  	} @@ -464,7 +464,7 @@ int pamu_config_spaace(int liodn, u32 subwin_cnt, u32 subwin,  		return -ENOENT;  	} -	if (!is_power_of_2(subwin_size) || subwin_size < PAMU_PAGE_SIZE) { +	if ((subwin_size & (subwin_size - 1)) || subwin_size < PAMU_PAGE_SIZE) {  		pr_debug("subwindow size out of range, or not a power of 2\n");  		return -EINVAL;  	} @@ -592,8 +592,7 @@ found_cpu_node:  		/* advance to next node in cache hierarchy */  		node = of_find_node_by_phandle(*prop);  		if (!node) { -			pr_debug("Invalid node for cache hierarchy %s\n", -				node->full_name); +			pr_debug("Invalid node for cache hierarchy\n");  			return ~(u32)0;  		}  	} diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c index c857c30da97..af47648301a 100644 --- a/drivers/iommu/fsl_pamu_domain.c +++ b/drivers/iommu/fsl_pamu_domain.c @@ -301,7 +301,7 @@ static int check_size(u64 size, dma_addr_t iova)  	 * Size must be a power of two and at least be equal  	 * to PAMU page size.  	 */ -	if (!is_power_of_2(size) || size < PAMU_PAGE_SIZE) { +	if ((size & (size - 1)) || size < PAMU_PAGE_SIZE) {  		pr_debug("%s: size too small or not a power of two\n", __func__);  		return -EINVAL;  	} @@ -335,11 +335,6 @@ static struct fsl_dma_domain *iommu_alloc_dma_domain(void)  	return domain;  } -static inline struct device_domain_info *find_domain(struct device *dev) -{ -	return dev->archdata.iommu_domain; -} -  static void remove_device_ref(struct device_domain_info *info, u32 win_cnt)  {  	unsigned long flags; @@ -380,7 +375,7 @@ static void attach_device(struct fsl_dma_domain *dma_domain, int liodn, struct d  	 * Check here if the device is already attached to domain or not.  	 * If the device is already attached to a domain detach it.  	 */ -	old_domain_info = find_domain(dev); +	old_domain_info = dev->archdata.iommu_domain;  	if (old_domain_info && old_domain_info->domain != dma_domain) {  		spin_unlock_irqrestore(&device_domain_lock, flags);  		detach_device(dev, old_domain_info->domain); @@ -399,7 +394,7 @@ static void attach_device(struct fsl_dma_domain *dma_domain, int liodn, struct d  	 * the info for the first LIODN as all  	 * LIODNs share the same domain  	 */ -	if (!old_domain_info) +	if (!dev->archdata.iommu_domain)  		dev->archdata.iommu_domain = info;  	spin_unlock_irqrestore(&device_domain_lock, flags); @@ -691,7 +686,7 @@ static int fsl_pamu_attach_device(struct iommu_domain *domain,  	 * Use LIODN of the PCI controller while attaching a  	 * PCI device.  	 */ -	if (dev->bus == &pci_bus_type) { +	if (dev_is_pci(dev)) {  		pdev = to_pci_dev(dev);  		pci_ctl = pci_bus_to_host(pdev->bus);  		/* @@ -729,7 +724,7 @@ static void fsl_pamu_detach_device(struct iommu_domain *domain,  	 * Use LIODN of the PCI controller while detaching a  	 * PCI device.  	 */ -	if (dev->bus == &pci_bus_type) { +	if (dev_is_pci(dev)) {  		pdev = to_pci_dev(dev);  		pci_ctl = pci_bus_to_host(pdev->bus);  		/* @@ -1042,12 +1037,15 @@ root_bus:  			group = get_shared_pci_device_group(pdev);  	} +	if (!group) +		group = ERR_PTR(-ENODEV); +  	return group;  }  static int fsl_pamu_add_device(struct device *dev)  { -	struct iommu_group *group = NULL; +	struct iommu_group *group = ERR_PTR(-ENODEV);  	struct pci_dev *pdev;  	const u32 *prop;  	int ret, len; @@ -1056,7 +1054,7 @@ static int fsl_pamu_add_device(struct device *dev)  	 * For platform devices we allocate a separate group for  	 * each of the devices.  	 */ -	if (dev->bus == &pci_bus_type) { +	if (dev_is_pci(dev)) {  		pdev = to_pci_dev(dev);  		/* Don't create device groups for virtual PCI bridges */  		if (pdev->subordinate) @@ -1070,7 +1068,7 @@ static int fsl_pamu_add_device(struct device *dev)  			group = get_device_iommu_group(dev);  	} -	if (!group || IS_ERR(group)) +	if (IS_ERR(group))  		return PTR_ERR(group);  	ret = iommu_group_add_device(group, dev); diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 15e9b57e9cf..51b6b77dc3e 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1,5 +1,5 @@  /* - * Copyright (c) 2006, Intel Corporation. + * Copyright © 2006-2014 Intel Corporation.   *   * This program is free software; you can redistribute it and/or modify it   * under the terms and conditions of the GNU General Public License, @@ -10,15 +10,11 @@   * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for   * more details.   * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - * Copyright (C) 2006-2008 Intel Corporation - * Author: Ashok Raj <ashok.raj@intel.com> - * Author: Shaohua Li <shaohua.li@intel.com> - * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> - * Author: Fenghua Yu <fenghua.yu@intel.com> + * Authors: David Woodhouse <dwmw2@infradead.org>, + *          Ashok Raj <ashok.raj@intel.com>, + *          Shaohua Li <shaohua.li@intel.com>, + *          Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>, + *          Fenghua Yu <fenghua.yu@intel.com>   */  #include <linux/init.h> @@ -33,6 +29,7 @@  #include <linux/dmar.h>  #include <linux/dma-mapping.h>  #include <linux/mempool.h> +#include <linux/memory.h>  #include <linux/timer.h>  #include <linux/iova.h>  #include <linux/iommu.h> @@ -42,6 +39,7 @@  #include <linux/dmi.h>  #include <linux/pci-ats.h>  #include <linux/memblock.h> +#include <linux/dma-contiguous.h>  #include <asm/irq_remapping.h>  #include <asm/cacheflush.h>  #include <asm/iommu.h> @@ -63,6 +61,7 @@  #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48  #define MAX_AGAW_WIDTH 64 +#define MAX_AGAW_PFN_WIDTH	(MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)  #define __DOMAIN_MAX_PFN(gaw)  ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)  #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1) @@ -106,12 +105,12 @@ static inline int agaw_to_level(int agaw)  static inline int agaw_to_width(int agaw)  { -	return 30 + agaw * LEVEL_STRIDE; +	return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);  }  static inline int width_to_agaw(int width)  { -	return (width - 30) / LEVEL_STRIDE; +	return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);  }  static inline unsigned int level_to_offset_bits(int level) @@ -141,7 +140,7 @@ static inline unsigned long align_to_level(unsigned long pfn, int level)  static inline unsigned long lvl_to_nr_pages(unsigned int lvl)  { -	return  1 << ((lvl - 1) * LEVEL_STRIDE); +	return  1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);  }  /* VT-d pages must always be _smaller_ than MM pages. Otherwise things @@ -288,26 +287,6 @@ static inline void dma_clear_pte(struct dma_pte *pte)  	pte->val = 0;  } -static inline void dma_set_pte_readable(struct dma_pte *pte) -{ -	pte->val |= DMA_PTE_READ; -} - -static inline void dma_set_pte_writable(struct dma_pte *pte) -{ -	pte->val |= DMA_PTE_WRITE; -} - -static inline void dma_set_pte_snp(struct dma_pte *pte) -{ -	pte->val |= DMA_PTE_SNP; -} - -static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot) -{ -	pte->val = (pte->val & ~3) | (prot & 3); -} -  static inline u64 dma_pte_addr(struct dma_pte *pte)  {  #ifdef CONFIG_64BIT @@ -318,11 +297,6 @@ static inline u64 dma_pte_addr(struct dma_pte *pte)  #endif  } -static inline void dma_set_pte_pfn(struct dma_pte *pte, unsigned long pfn) -{ -	pte->val |= (uint64_t)pfn << VTD_PAGE_SHIFT; -} -  static inline bool dma_pte_present(struct dma_pte *pte)  {  	return (pte->val & 3) != 0; @@ -396,23 +370,46 @@ struct dmar_domain {  struct device_domain_info {  	struct list_head link;	/* link to domain siblings */  	struct list_head global; /* link to global list */ -	int segment;		/* PCI domain */  	u8 bus;			/* PCI bus number */  	u8 devfn;		/* PCI devfn number */ -	struct pci_dev *dev; /* it's NULL for PCIe-to-PCI bridge */ +	struct device *dev; /* it's NULL for PCIe-to-PCI bridge */  	struct intel_iommu *iommu; /* IOMMU used by this device */  	struct dmar_domain *domain; /* pointer to domain */  }; +struct dmar_rmrr_unit { +	struct list_head list;		/* list of rmrr units	*/ +	struct acpi_dmar_header *hdr;	/* ACPI header		*/ +	u64	base_address;		/* reserved base address*/ +	u64	end_address;		/* reserved end address */ +	struct dmar_dev_scope *devices;	/* target devices */ +	int	devices_cnt;		/* target device count */ +}; + +struct dmar_atsr_unit { +	struct list_head list;		/* list of ATSR units */ +	struct acpi_dmar_header *hdr;	/* ACPI header */ +	struct dmar_dev_scope *devices;	/* target devices */ +	int devices_cnt;		/* target device count */ +	u8 include_all:1;		/* include all ports */ +}; + +static LIST_HEAD(dmar_atsr_units); +static LIST_HEAD(dmar_rmrr_units); + +#define for_each_rmrr_units(rmrr) \ +	list_for_each_entry(rmrr, &dmar_rmrr_units, list) +  static void flush_unmaps_timeout(unsigned long data); -DEFINE_TIMER(unmap_timer,  flush_unmaps_timeout, 0, 0); +static DEFINE_TIMER(unmap_timer,  flush_unmaps_timeout, 0, 0);  #define HIGH_WATER_MARK 250  struct deferred_flush_tables {  	int next;  	struct iova *iova[HIGH_WATER_MARK];  	struct dmar_domain *domain[HIGH_WATER_MARK]; +	struct page *freelist[HIGH_WATER_MARK];  };  static struct deferred_flush_tables *deferred_flush; @@ -426,7 +423,12 @@ static LIST_HEAD(unmaps_to_do);  static int timer_on;  static long list_size; +static void domain_exit(struct dmar_domain *domain);  static void domain_remove_dev_info(struct dmar_domain *domain); +static void domain_remove_one_dev_info(struct dmar_domain *domain, +				       struct device *dev); +static void iommu_detach_dependent_devices(struct intel_iommu *iommu, +					   struct device *dev);  #ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON  int dmar_disabled = 0; @@ -590,18 +592,31 @@ static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)  static void domain_update_iommu_coherency(struct dmar_domain *domain)  { -	int i; - -	i = find_first_bit(domain->iommu_bmp, g_num_of_iommus); +	struct dmar_drhd_unit *drhd; +	struct intel_iommu *iommu; +	int i, found = 0; -	domain->iommu_coherency = i < g_num_of_iommus ? 1 : 0; +	domain->iommu_coherency = 1;  	for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) { +		found = 1;  		if (!ecap_coherent(g_iommus[i]->ecap)) {  			domain->iommu_coherency = 0;  			break;  		}  	} +	if (found) +		return; + +	/* No hardware attached; use lowest common denominator */ +	rcu_read_lock(); +	for_each_active_iommu(iommu, drhd) { +		if (!ecap_coherent(iommu->ecap)) { +			domain->iommu_coherency = 0; +			break; +		} +	} +	rcu_read_unlock();  }  static void domain_update_iommu_snooping(struct dmar_domain *domain) @@ -630,12 +645,15 @@ static void domain_update_iommu_superpage(struct dmar_domain *domain)  	}  	/* set iommu_superpage to the smallest common denominator */ +	rcu_read_lock();  	for_each_active_iommu(iommu, drhd) {  		mask &= cap_super_page_val(iommu->cap);  		if (!mask) {  			break;  		}  	} +	rcu_read_unlock(); +  	domain->iommu_superpage = fls(mask);  } @@ -647,34 +665,56 @@ static void domain_update_iommu_cap(struct dmar_domain *domain)  	domain_update_iommu_superpage(domain);  } -static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn) +static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)  {  	struct dmar_drhd_unit *drhd = NULL; +	struct intel_iommu *iommu; +	struct device *tmp; +	struct pci_dev *ptmp, *pdev = NULL; +	u16 segment;  	int i; -	for_each_drhd_unit(drhd) { -		if (drhd->ignored) -			continue; -		if (segment != drhd->segment) +	if (dev_is_pci(dev)) { +		pdev = to_pci_dev(dev); +		segment = pci_domain_nr(pdev->bus); +	} else if (ACPI_COMPANION(dev)) +		dev = &ACPI_COMPANION(dev)->dev; + +	rcu_read_lock(); +	for_each_active_iommu(iommu, drhd) { +		if (pdev && segment != drhd->segment)  			continue; -		for (i = 0; i < drhd->devices_cnt; i++) { -			if (drhd->devices[i] && -			    drhd->devices[i]->bus->number == bus && -			    drhd->devices[i]->devfn == devfn) -				return drhd->iommu; -			if (drhd->devices[i] && -			    drhd->devices[i]->subordinate && -			    drhd->devices[i]->subordinate->number <= bus && -			    drhd->devices[i]->subordinate->busn_res.end >= bus) -				return drhd->iommu; +		for_each_active_dev_scope(drhd->devices, +					  drhd->devices_cnt, i, tmp) { +			if (tmp == dev) { +				*bus = drhd->devices[i].bus; +				*devfn = drhd->devices[i].devfn; +				goto out; +			} + +			if (!pdev || !dev_is_pci(tmp)) +				continue; + +			ptmp = to_pci_dev(tmp); +			if (ptmp->subordinate && +			    ptmp->subordinate->number <= pdev->bus->number && +			    ptmp->subordinate->busn_res.end >= pdev->bus->number) +				goto got_pdev;  		} -		if (drhd->include_all) -			return drhd->iommu; +		if (pdev && drhd->include_all) { +		got_pdev: +			*bus = pdev->bus->number; +			*devfn = pdev->devfn; +			goto out; +		}  	} +	iommu = NULL; + out: +	rcu_read_unlock(); -	return NULL; +	return iommu;  }  static void domain_flush_cache(struct dmar_domain *domain, @@ -774,7 +814,7 @@ out:  }  static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, -				      unsigned long pfn, int target_level) +				      unsigned long pfn, int *target_level)  {  	int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;  	struct dma_pte *parent, *pte = NULL; @@ -782,17 +822,21 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,  	int offset;  	BUG_ON(!domain->pgd); -	BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width); + +	if (addr_width < BITS_PER_LONG && pfn >> addr_width) +		/* Address beyond IOMMU's addressing capabilities. */ +		return NULL; +  	parent = domain->pgd; -	while (level > 0) { +	while (1) {  		void *tmp_page;  		offset = pfn_level_offset(pfn, level);  		pte = &parent[offset]; -		if (!target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte))) +		if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))  			break; -		if (level == target_level) +		if (level == *target_level)  			break;  		if (!dma_pte_present(pte)) { @@ -813,10 +857,16 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,  				domain_flush_cache(domain, pte, sizeof(*pte));  			}  		} +		if (level == 1) +			break; +  		parent = phys_to_virt(dma_pte_addr(pte));  		level--;  	} +	if (!*target_level) +		*target_level = level; +  	return pte;  } @@ -854,14 +904,13 @@ static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,  }  /* clear last level pte, a tlb flush should be followed */ -static int dma_pte_clear_range(struct dmar_domain *domain, +static void dma_pte_clear_range(struct dmar_domain *domain,  				unsigned long start_pfn,  				unsigned long last_pfn)  {  	int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;  	unsigned int large_page = 1;  	struct dma_pte *first_pte, *pte; -	int order;  	BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);  	BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width); @@ -885,9 +934,6 @@ static int dma_pte_clear_range(struct dmar_domain *domain,  				   (void *)pte - (void *)first_pte);  	} while (start_pfn && start_pfn <= last_pfn); - -	order = (large_page - 1) * 9; -	return order;  }  static void dma_pte_free_level(struct dmar_domain *domain, int level, @@ -913,7 +959,7 @@ static void dma_pte_free_level(struct dmar_domain *domain, int level,  		/* If range covers entire pagetable, free it */  		if (!(start_pfn > level_pfn || -		      last_pfn < level_pfn + level_size(level))) { +		      last_pfn < level_pfn + level_size(level) - 1)) {  			dma_clear_pte(pte);  			domain_flush_cache(domain, pte, sizeof(*pte));  			free_pgtable_page(level_pte); @@ -945,6 +991,125 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain,  	}  } +/* When a page at a given level is being unlinked from its parent, we don't +   need to *modify* it at all. All we need to do is make a list of all the +   pages which can be freed just as soon as we've flushed the IOTLB and we +   know the hardware page-walk will no longer touch them. +   The 'pte' argument is the *parent* PTE, pointing to the page that is to +   be freed. */ +static struct page *dma_pte_list_pagetables(struct dmar_domain *domain, +					    int level, struct dma_pte *pte, +					    struct page *freelist) +{ +	struct page *pg; + +	pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT); +	pg->freelist = freelist; +	freelist = pg; + +	if (level == 1) +		return freelist; + +	pte = page_address(pg); +	do { +		if (dma_pte_present(pte) && !dma_pte_superpage(pte)) +			freelist = dma_pte_list_pagetables(domain, level - 1, +							   pte, freelist); +		pte++; +	} while (!first_pte_in_page(pte)); + +	return freelist; +} + +static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level, +					struct dma_pte *pte, unsigned long pfn, +					unsigned long start_pfn, +					unsigned long last_pfn, +					struct page *freelist) +{ +	struct dma_pte *first_pte = NULL, *last_pte = NULL; + +	pfn = max(start_pfn, pfn); +	pte = &pte[pfn_level_offset(pfn, level)]; + +	do { +		unsigned long level_pfn; + +		if (!dma_pte_present(pte)) +			goto next; + +		level_pfn = pfn & level_mask(level); + +		/* If range covers entire pagetable, free it */ +		if (start_pfn <= level_pfn && +		    last_pfn >= level_pfn + level_size(level) - 1) { +			/* These suborbinate page tables are going away entirely. Don't +			   bother to clear them; we're just going to *free* them. */ +			if (level > 1 && !dma_pte_superpage(pte)) +				freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist); + +			dma_clear_pte(pte); +			if (!first_pte) +				first_pte = pte; +			last_pte = pte; +		} else if (level > 1) { +			/* Recurse down into a level that isn't *entirely* obsolete */ +			freelist = dma_pte_clear_level(domain, level - 1, +						       phys_to_virt(dma_pte_addr(pte)), +						       level_pfn, start_pfn, last_pfn, +						       freelist); +		} +next: +		pfn += level_size(level); +	} while (!first_pte_in_page(++pte) && pfn <= last_pfn); + +	if (first_pte) +		domain_flush_cache(domain, first_pte, +				   (void *)++last_pte - (void *)first_pte); + +	return freelist; +} + +/* We can't just free the pages because the IOMMU may still be walking +   the page tables, and may have cached the intermediate levels. The +   pages can only be freed after the IOTLB flush has been done. */ +struct page *domain_unmap(struct dmar_domain *domain, +			  unsigned long start_pfn, +			  unsigned long last_pfn) +{ +	int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; +	struct page *freelist = NULL; + +	BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); +	BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width); +	BUG_ON(start_pfn > last_pfn); + +	/* we don't need lock here; nobody else touches the iova range */ +	freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw), +				       domain->pgd, 0, start_pfn, last_pfn, NULL); + +	/* free pgd */ +	if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) { +		struct page *pgd_page = virt_to_page(domain->pgd); +		pgd_page->freelist = freelist; +		freelist = pgd_page; + +		domain->pgd = NULL; +	} + +	return freelist; +} + +void dma_free_pagelist(struct page *freelist) +{ +	struct page *pg; + +	while ((pg = freelist)) { +		freelist = pg->freelist; +		free_pgtable_page(page_address(pg)); +	} +} +  /* iommu handling */  static int iommu_alloc_root_entry(struct intel_iommu *iommu)  { @@ -1054,7 +1219,7 @@ static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,  		break;  	case DMA_TLB_PSI_FLUSH:  		val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did); -		/* Note: always flush non-leaf currently */ +		/* IH bit is passed in as part of address */  		val_iva = size_order | addr;  		break;  	default: @@ -1093,13 +1258,14 @@ static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,  			(unsigned long long)DMA_TLB_IAIG(val));  } -static struct device_domain_info *iommu_support_dev_iotlb( -	struct dmar_domain *domain, int segment, u8 bus, u8 devfn) +static struct device_domain_info * +iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu, +			 u8 bus, u8 devfn)  {  	int found = 0;  	unsigned long flags;  	struct device_domain_info *info; -	struct intel_iommu *iommu = device_to_iommu(segment, bus, devfn); +	struct pci_dev *pdev;  	if (!ecap_dev_iotlb_support(iommu->ecap))  		return NULL; @@ -1115,34 +1281,35 @@ static struct device_domain_info *iommu_support_dev_iotlb(  		}  	spin_unlock_irqrestore(&device_domain_lock, flags); -	if (!found || !info->dev) +	if (!found || !info->dev || !dev_is_pci(info->dev))  		return NULL; -	if (!pci_find_ext_capability(info->dev, PCI_EXT_CAP_ID_ATS)) -		return NULL; +	pdev = to_pci_dev(info->dev); -	if (!dmar_find_matched_atsr_unit(info->dev)) +	if (!pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS))  		return NULL; -	info->iommu = iommu; +	if (!dmar_find_matched_atsr_unit(pdev)) +		return NULL;  	return info;  }  static void iommu_enable_dev_iotlb(struct device_domain_info *info)  { -	if (!info) +	if (!info || !dev_is_pci(info->dev))  		return; -	pci_enable_ats(info->dev, VTD_PAGE_SHIFT); +	pci_enable_ats(to_pci_dev(info->dev), VTD_PAGE_SHIFT);  }  static void iommu_disable_dev_iotlb(struct device_domain_info *info)  { -	if (!info->dev || !pci_ats_enabled(info->dev)) +	if (!info->dev || !dev_is_pci(info->dev) || +	    !pci_ats_enabled(to_pci_dev(info->dev)))  		return; -	pci_disable_ats(info->dev); +	pci_disable_ats(to_pci_dev(info->dev));  }  static void iommu_flush_dev_iotlb(struct dmar_domain *domain, @@ -1154,24 +1321,31 @@ static void iommu_flush_dev_iotlb(struct dmar_domain *domain,  	spin_lock_irqsave(&device_domain_lock, flags);  	list_for_each_entry(info, &domain->devices, link) { -		if (!info->dev || !pci_ats_enabled(info->dev)) +		struct pci_dev *pdev; +		if (!info->dev || !dev_is_pci(info->dev)) +			continue; + +		pdev = to_pci_dev(info->dev); +		if (!pci_ats_enabled(pdev))  			continue;  		sid = info->bus << 8 | info->devfn; -		qdep = pci_ats_queue_depth(info->dev); +		qdep = pci_ats_queue_depth(pdev);  		qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);  	}  	spin_unlock_irqrestore(&device_domain_lock, flags);  }  static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, -				  unsigned long pfn, unsigned int pages, int map) +				  unsigned long pfn, unsigned int pages, int ih, int map)  {  	unsigned int mask = ilog2(__roundup_pow_of_two(pages));  	uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;  	BUG_ON(pages == 0); +	if (ih) +		ih = 1 << 6;  	/*  	 * Fallback to domain selective flush if no PSI support or the size is  	 * too big. @@ -1182,7 +1356,7 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,  		iommu->flush.flush_iotlb(iommu, did, 0, 0,  						DMA_TLB_DSI_FLUSH);  	else -		iommu->flush.flush_iotlb(iommu, did, addr, mask, +		iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,  						DMA_TLB_PSI_FLUSH);  	/* @@ -1251,8 +1425,8 @@ static int iommu_init_domains(struct intel_iommu *iommu)  	unsigned long nlongs;  	ndomains = cap_ndoms(iommu->cap); -	pr_debug("IOMMU %d: Number of Domains supported <%ld>\n", iommu->seq_id, -			ndomains); +	pr_debug("IOMMU%d: Number of Domains supported <%ld>\n", +		 iommu->seq_id, ndomains);  	nlongs = BITS_TO_LONGS(ndomains);  	spin_lock_init(&iommu->lock); @@ -1262,13 +1436,17 @@ static int iommu_init_domains(struct intel_iommu *iommu)  	 */  	iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);  	if (!iommu->domain_ids) { -		printk(KERN_ERR "Allocating domain id array failed\n"); +		pr_err("IOMMU%d: allocating domain id array failed\n", +		       iommu->seq_id);  		return -ENOMEM;  	}  	iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),  			GFP_KERNEL);  	if (!iommu->domains) { -		printk(KERN_ERR "Allocating domain array failed\n"); +		pr_err("IOMMU%d: allocating domain array failed\n", +		       iommu->seq_id); +		kfree(iommu->domain_ids); +		iommu->domain_ids = NULL;  		return -ENOMEM;  	} @@ -1281,62 +1459,50 @@ static int iommu_init_domains(struct intel_iommu *iommu)  	return 0;  } - -static void domain_exit(struct dmar_domain *domain); -static void vm_domain_exit(struct dmar_domain *domain); - -void free_dmar_iommu(struct intel_iommu *iommu) +static void free_dmar_iommu(struct intel_iommu *iommu)  {  	struct dmar_domain *domain; -	int i; +	int i, count;  	unsigned long flags;  	if ((iommu->domains) && (iommu->domain_ids)) {  		for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) { +			/* +			 * Domain id 0 is reserved for invalid translation +			 * if hardware supports caching mode. +			 */ +			if (cap_caching_mode(iommu->cap) && i == 0) +				continue; +  			domain = iommu->domains[i];  			clear_bit(i, iommu->domain_ids);  			spin_lock_irqsave(&domain->iommu_lock, flags); -			if (--domain->iommu_count == 0) { -				if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) -					vm_domain_exit(domain); -				else -					domain_exit(domain); -			} +			count = --domain->iommu_count;  			spin_unlock_irqrestore(&domain->iommu_lock, flags); +			if (count == 0) +				domain_exit(domain);  		}  	}  	if (iommu->gcmd & DMA_GCMD_TE)  		iommu_disable_translation(iommu); -	if (iommu->irq) { -		irq_set_handler_data(iommu->irq, NULL); -		/* This will mask the irq */ -		free_irq(iommu->irq, iommu); -		destroy_irq(iommu->irq); -	} -  	kfree(iommu->domains);  	kfree(iommu->domain_ids); +	iommu->domains = NULL; +	iommu->domain_ids = NULL;  	g_iommus[iommu->seq_id] = NULL; -	/* if all iommus are freed, free g_iommus */ -	for (i = 0; i < g_num_of_iommus; i++) { -		if (g_iommus[i]) -			break; -	} - -	if (i == g_num_of_iommus) -		kfree(g_iommus); -  	/* free context mapping */  	free_context_table(iommu);  } -static struct dmar_domain *alloc_domain(void) +static struct dmar_domain *alloc_domain(bool vm)  { +	/* domain id for virtual machine, it won't be set in context */ +	static atomic_t vm_domid = ATOMIC_INIT(0);  	struct dmar_domain *domain;  	domain = alloc_domain_mem(); @@ -1344,8 +1510,15 @@ static struct dmar_domain *alloc_domain(void)  		return NULL;  	domain->nid = -1; +	domain->iommu_count = 0;  	memset(domain->iommu_bmp, 0, sizeof(domain->iommu_bmp));  	domain->flags = 0; +	spin_lock_init(&domain->iommu_lock); +	INIT_LIST_HEAD(&domain->devices); +	if (vm) { +		domain->id = atomic_inc_return(&vm_domid); +		domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE; +	}  	return domain;  } @@ -1369,6 +1542,7 @@ static int iommu_attach_domain(struct dmar_domain *domain,  	}  	domain->id = num; +	domain->iommu_count++;  	set_bit(num, iommu->domain_ids);  	set_bit(iommu->seq_id, domain->iommu_bmp);  	iommu->domains[num] = domain; @@ -1382,22 +1556,16 @@ static void iommu_detach_domain(struct dmar_domain *domain,  {  	unsigned long flags;  	int num, ndomains; -	int found = 0;  	spin_lock_irqsave(&iommu->lock, flags);  	ndomains = cap_ndoms(iommu->cap);  	for_each_set_bit(num, iommu->domain_ids, ndomains) {  		if (iommu->domains[num] == domain) { -			found = 1; +			clear_bit(num, iommu->domain_ids); +			iommu->domains[num] = NULL;  			break;  		}  	} - -	if (found) { -		clear_bit(num, iommu->domain_ids); -		clear_bit(iommu->seq_id, domain->iommu_bmp); -		iommu->domains[num] = NULL; -	}  	spin_unlock_irqrestore(&iommu->lock, flags);  } @@ -1469,8 +1637,6 @@ static int domain_init(struct dmar_domain *domain, int guest_width)  	unsigned long sagaw;  	init_iova_domain(&domain->iovad, DMA_32BIT_PFN); -	spin_lock_init(&domain->iommu_lock); -  	domain_reserve_special_ranges(domain);  	/* calculate AGAW */ @@ -1489,7 +1655,6 @@ static int domain_init(struct dmar_domain *domain, int guest_width)  			return -ENODEV;  	}  	domain->agaw = agaw; -	INIT_LIST_HEAD(&domain->devices);  	if (ecap_coherent(iommu->ecap))  		domain->iommu_coherency = 1; @@ -1501,8 +1666,11 @@ static int domain_init(struct dmar_domain *domain, int guest_width)  	else  		domain->iommu_snooping = 0; -	domain->iommu_superpage = fls(cap_super_page_val(iommu->cap)); -	domain->iommu_count = 1; +	if (intel_iommu_superpage) +		domain->iommu_superpage = fls(cap_super_page_val(iommu->cap)); +	else +		domain->iommu_superpage = 0; +  	domain->nid = iommu->node;  	/* always allocate the top pgd */ @@ -1517,6 +1685,7 @@ static void domain_exit(struct dmar_domain *domain)  {  	struct dmar_drhd_unit *drhd;  	struct intel_iommu *iommu; +	struct page *freelist = NULL;  	/* Domain 0 is reserved, so dont process it */  	if (!domain) @@ -1526,29 +1695,33 @@ static void domain_exit(struct dmar_domain *domain)  	if (!intel_iommu_strict)  		flush_unmaps_timeout(0); +	/* remove associated devices */  	domain_remove_dev_info(domain); +  	/* destroy iovas */  	put_iova_domain(&domain->iovad); -	/* clear ptes */ -	dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw)); - -	/* free page tables */ -	dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw)); +	freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw)); +	/* clear attached or cached domains */ +	rcu_read_lock();  	for_each_active_iommu(iommu, drhd) -		if (test_bit(iommu->seq_id, domain->iommu_bmp)) +		if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE || +		    test_bit(iommu->seq_id, domain->iommu_bmp))  			iommu_detach_domain(domain, iommu); +	rcu_read_unlock(); + +	dma_free_pagelist(freelist);  	free_domain_mem(domain);  } -static int domain_context_mapping_one(struct dmar_domain *domain, int segment, -				 u8 bus, u8 devfn, int translation) +static int domain_context_mapping_one(struct dmar_domain *domain, +				      struct intel_iommu *iommu, +				      u8 bus, u8 devfn, int translation)  {  	struct context_entry *context;  	unsigned long flags; -	struct intel_iommu *iommu;  	struct dma_pte *pgd;  	unsigned long num;  	unsigned long ndomains; @@ -1563,10 +1736,6 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment,  	BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&  	       translation != CONTEXT_TT_MULTI_LEVEL); -	iommu = device_to_iommu(segment, bus, devfn); -	if (!iommu) -		return -ENODEV; -  	context = device_to_context_entry(iommu, bus, devfn);  	if (!context)  		return -ENOMEM; @@ -1624,7 +1793,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment,  	context_set_domain_id(context, id);  	if (translation != CONTEXT_TT_PASS_THROUGH) { -		info = iommu_support_dev_iotlb(domain, segment, bus, devfn); +		info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);  		translation = info ? CONTEXT_TT_DEV_IOTLB :  				     CONTEXT_TT_MULTI_LEVEL;  	} @@ -1674,27 +1843,32 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment,  }  static int -domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev, -			int translation) +domain_context_mapping(struct dmar_domain *domain, struct device *dev, +		       int translation)  {  	int ret; -	struct pci_dev *tmp, *parent; +	struct pci_dev *pdev, *tmp, *parent; +	struct intel_iommu *iommu; +	u8 bus, devfn; + +	iommu = device_to_iommu(dev, &bus, &devfn); +	if (!iommu) +		return -ENODEV; -	ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus), -					 pdev->bus->number, pdev->devfn, +	ret = domain_context_mapping_one(domain, iommu, bus, devfn,  					 translation); -	if (ret) +	if (ret || !dev_is_pci(dev))  		return ret;  	/* dependent device mapping */ +	pdev = to_pci_dev(dev);  	tmp = pci_find_upstream_pcie_bridge(pdev);  	if (!tmp)  		return 0;  	/* Secondary interface's bus number and devfn 0 */  	parent = pdev->bus->self;  	while (parent != tmp) { -		ret = domain_context_mapping_one(domain, -						 pci_domain_nr(parent->bus), +		ret = domain_context_mapping_one(domain, iommu,  						 parent->bus->number,  						 parent->devfn, translation);  		if (ret) @@ -1702,33 +1876,33 @@ domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev,  		parent = parent->bus->self;  	}  	if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */ -		return domain_context_mapping_one(domain, -					pci_domain_nr(tmp->subordinate), +		return domain_context_mapping_one(domain, iommu,  					tmp->subordinate->number, 0,  					translation);  	else /* this is a legacy PCI bridge */ -		return domain_context_mapping_one(domain, -						  pci_domain_nr(tmp->bus), +		return domain_context_mapping_one(domain, iommu,  						  tmp->bus->number,  						  tmp->devfn,  						  translation);  } -static int domain_context_mapped(struct pci_dev *pdev) +static int domain_context_mapped(struct device *dev)  {  	int ret; -	struct pci_dev *tmp, *parent; +	struct pci_dev *pdev, *tmp, *parent;  	struct intel_iommu *iommu; +	u8 bus, devfn; -	iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number, -				pdev->devfn); +	iommu = device_to_iommu(dev, &bus, &devfn);  	if (!iommu)  		return -ENODEV; -	ret = device_context_mapped(iommu, pdev->bus->number, pdev->devfn); -	if (!ret) +	ret = device_context_mapped(iommu, bus, devfn); +	if (!ret || !dev_is_pci(dev))  		return ret; +  	/* dependent device mapping */ +	pdev = to_pci_dev(dev);  	tmp = pci_find_upstream_pcie_bridge(pdev);  	if (!tmp)  		return ret; @@ -1824,7 +1998,7 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,  		if (!pte) {  			largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res); -			first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, largepage_lvl); +			first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);  			if (!pte)  				return -ENOMEM;  			/* It is large page*/ @@ -1923,14 +2097,13 @@ static inline void unlink_domain_info(struct device_domain_info *info)  	list_del(&info->link);  	list_del(&info->global);  	if (info->dev) -		info->dev->dev.archdata.iommu = NULL; +		info->dev->archdata.iommu = NULL;  }  static void domain_remove_dev_info(struct dmar_domain *domain)  {  	struct device_domain_info *info; -	unsigned long flags; -	struct intel_iommu *iommu; +	unsigned long flags, flags2;  	spin_lock_irqsave(&device_domain_lock, flags);  	while (!list_empty(&domain->devices)) { @@ -1940,10 +2113,23 @@ static void domain_remove_dev_info(struct dmar_domain *domain)  		spin_unlock_irqrestore(&device_domain_lock, flags);  		iommu_disable_dev_iotlb(info); -		iommu = device_to_iommu(info->segment, info->bus, info->devfn); -		iommu_detach_dev(iommu, info->bus, info->devfn); -		free_devinfo_mem(info); +		iommu_detach_dev(info->iommu, info->bus, info->devfn); +		if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) { +			iommu_detach_dependent_devices(info->iommu, info->dev); +			/* clear this iommu in iommu_bmp, update iommu count +			 * and capabilities +			 */ +			spin_lock_irqsave(&domain->iommu_lock, flags2); +			if (test_and_clear_bit(info->iommu->seq_id, +					       domain->iommu_bmp)) { +				domain->iommu_count--; +				domain_update_iommu_cap(domain); +			} +			spin_unlock_irqrestore(&domain->iommu_lock, flags2); +		} + +		free_devinfo_mem(info);  		spin_lock_irqsave(&device_domain_lock, flags);  	}  	spin_unlock_irqrestore(&device_domain_lock, flags); @@ -1951,155 +2137,153 @@ static void domain_remove_dev_info(struct dmar_domain *domain)  /*   * find_domain - * Note: we use struct pci_dev->dev.archdata.iommu stores the info + * Note: we use struct device->archdata.iommu stores the info   */ -static struct dmar_domain * -find_domain(struct pci_dev *pdev) +static struct dmar_domain *find_domain(struct device *dev)  {  	struct device_domain_info *info;  	/* No lock here, assumes no domain exit in normal case */ -	info = pdev->dev.archdata.iommu; +	info = dev->archdata.iommu;  	if (info)  		return info->domain;  	return NULL;  } +static inline struct device_domain_info * +dmar_search_domain_by_dev_info(int segment, int bus, int devfn) +{ +	struct device_domain_info *info; + +	list_for_each_entry(info, &device_domain_list, global) +		if (info->iommu->segment == segment && info->bus == bus && +		    info->devfn == devfn) +			return info; + +	return NULL; +} + +static struct dmar_domain *dmar_insert_dev_info(struct intel_iommu *iommu, +						int bus, int devfn, +						struct device *dev, +						struct dmar_domain *domain) +{ +	struct dmar_domain *found = NULL; +	struct device_domain_info *info; +	unsigned long flags; + +	info = alloc_devinfo_mem(); +	if (!info) +		return NULL; + +	info->bus = bus; +	info->devfn = devfn; +	info->dev = dev; +	info->domain = domain; +	info->iommu = iommu; +	if (!dev) +		domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES; + +	spin_lock_irqsave(&device_domain_lock, flags); +	if (dev) +		found = find_domain(dev); +	else { +		struct device_domain_info *info2; +		info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn); +		if (info2) +			found = info2->domain; +	} +	if (found) { +		spin_unlock_irqrestore(&device_domain_lock, flags); +		free_devinfo_mem(info); +		/* Caller must free the original domain */ +		return found; +	} + +	list_add(&info->link, &domain->devices); +	list_add(&info->global, &device_domain_list); +	if (dev) +		dev->archdata.iommu = info; +	spin_unlock_irqrestore(&device_domain_lock, flags); + +	return domain; +} +  /* domain is initialized */ -static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw) +static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)  { -	struct dmar_domain *domain, *found = NULL; -	struct intel_iommu *iommu; -	struct dmar_drhd_unit *drhd; -	struct device_domain_info *info, *tmp; -	struct pci_dev *dev_tmp; +	struct dmar_domain *domain, *free = NULL; +	struct intel_iommu *iommu = NULL; +	struct device_domain_info *info; +	struct pci_dev *dev_tmp = NULL;  	unsigned long flags; -	int bus = 0, devfn = 0; -	int segment; -	int ret; +	u8 bus, devfn, bridge_bus, bridge_devfn; -	domain = find_domain(pdev); +	domain = find_domain(dev);  	if (domain)  		return domain; -	segment = pci_domain_nr(pdev->bus); +	if (dev_is_pci(dev)) { +		struct pci_dev *pdev = to_pci_dev(dev); +		u16 segment; -	dev_tmp = pci_find_upstream_pcie_bridge(pdev); -	if (dev_tmp) { -		if (pci_is_pcie(dev_tmp)) { -			bus = dev_tmp->subordinate->number; -			devfn = 0; -		} else { -			bus = dev_tmp->bus->number; -			devfn = dev_tmp->devfn; -		} -		spin_lock_irqsave(&device_domain_lock, flags); -		list_for_each_entry(info, &device_domain_list, global) { -			if (info->segment == segment && -			    info->bus == bus && info->devfn == devfn) { -				found = info->domain; -				break; +		segment = pci_domain_nr(pdev->bus); +		dev_tmp = pci_find_upstream_pcie_bridge(pdev); +		if (dev_tmp) { +			if (pci_is_pcie(dev_tmp)) { +				bridge_bus = dev_tmp->subordinate->number; +				bridge_devfn = 0; +			} else { +				bridge_bus = dev_tmp->bus->number; +				bridge_devfn = dev_tmp->devfn;  			} -		} -		spin_unlock_irqrestore(&device_domain_lock, flags); -		/* pcie-pci bridge already has a domain, uses it */ -		if (found) { -			domain = found; -			goto found_domain; +			spin_lock_irqsave(&device_domain_lock, flags); +			info = dmar_search_domain_by_dev_info(segment, +							      bridge_bus, +							      bridge_devfn); +			if (info) { +				iommu = info->iommu; +				domain = info->domain; +			} +			spin_unlock_irqrestore(&device_domain_lock, flags); +			/* pcie-pci bridge already has a domain, uses it */ +			if (info) +				goto found_domain;  		}  	} -	domain = alloc_domain(); -	if (!domain) +	iommu = device_to_iommu(dev, &bus, &devfn); +	if (!iommu)  		goto error; -	/* Allocate new domain for the device */ -	drhd = dmar_find_matched_drhd_unit(pdev); -	if (!drhd) { -		printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n", -			pci_name(pdev)); -		free_domain_mem(domain); -		return NULL; -	} -	iommu = drhd->iommu; - -	ret = iommu_attach_domain(domain, iommu); -	if (ret) { +	/* Allocate and initialize new domain for the device */ +	domain = alloc_domain(false); +	if (!domain) +		goto error; +	if (iommu_attach_domain(domain, iommu)) {  		free_domain_mem(domain); +		domain = NULL;  		goto error;  	} - -	if (domain_init(domain, gaw)) { -		domain_exit(domain); +	free = domain; +	if (domain_init(domain, gaw))  		goto error; -	}  	/* register pcie-to-pci device */  	if (dev_tmp) { -		info = alloc_devinfo_mem(); -		if (!info) { -			domain_exit(domain); +		domain = dmar_insert_dev_info(iommu, bridge_bus, bridge_devfn, +					      NULL, domain); +		if (!domain)  			goto error; -		} -		info->segment = segment; -		info->bus = bus; -		info->devfn = devfn; -		info->dev = NULL; -		info->domain = domain; -		/* This domain is shared by devices under p2p bridge */ -		domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES; - -		/* pcie-to-pci bridge already has a domain, uses it */ -		found = NULL; -		spin_lock_irqsave(&device_domain_lock, flags); -		list_for_each_entry(tmp, &device_domain_list, global) { -			if (tmp->segment == segment && -			    tmp->bus == bus && tmp->devfn == devfn) { -				found = tmp->domain; -				break; -			} -		} -		if (found) { -			spin_unlock_irqrestore(&device_domain_lock, flags); -			free_devinfo_mem(info); -			domain_exit(domain); -			domain = found; -		} else { -			list_add(&info->link, &domain->devices); -			list_add(&info->global, &device_domain_list); -			spin_unlock_irqrestore(&device_domain_lock, flags); -		}  	}  found_domain: -	info = alloc_devinfo_mem(); -	if (!info) -		goto error; -	info->segment = segment; -	info->bus = pdev->bus->number; -	info->devfn = pdev->devfn; -	info->dev = pdev; -	info->domain = domain; -	spin_lock_irqsave(&device_domain_lock, flags); -	/* somebody is fast */ -	found = find_domain(pdev); -	if (found != NULL) { -		spin_unlock_irqrestore(&device_domain_lock, flags); -		if (found != domain) { -			domain_exit(domain); -			domain = found; -		} -		free_devinfo_mem(info); -		return domain; -	} -	list_add(&info->link, &domain->devices); -	list_add(&info->global, &device_domain_list); -	pdev->dev.archdata.iommu = info; -	spin_unlock_irqrestore(&device_domain_lock, flags); -	return domain; +	domain = dmar_insert_dev_info(iommu, bus, devfn, dev, domain);  error: -	/* recheck it here, maybe others set it */ -	return find_domain(pdev); +	if (free != domain) +		domain_exit(free); + +	return domain;  }  static int iommu_identity_mapping; @@ -2133,14 +2317,14 @@ static int iommu_domain_identity_map(struct dmar_domain *domain,  				  DMA_PTE_READ|DMA_PTE_WRITE);  } -static int iommu_prepare_identity_map(struct pci_dev *pdev, +static int iommu_prepare_identity_map(struct device *dev,  				      unsigned long long start,  				      unsigned long long end)  {  	struct dmar_domain *domain;  	int ret; -	domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH); +	domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);  	if (!domain)  		return -ENOMEM; @@ -2150,13 +2334,13 @@ static int iommu_prepare_identity_map(struct pci_dev *pdev,  	   up to start with in si_domain */  	if (domain == si_domain && hw_pass_through) {  		printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n", -		       pci_name(pdev), start, end); +		       dev_name(dev), start, end);  		return 0;  	}  	printk(KERN_INFO  	       "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n", -	       pci_name(pdev), start, end); +	       dev_name(dev), start, end);  	if (end < start) {  		WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n" @@ -2184,7 +2368,7 @@ static int iommu_prepare_identity_map(struct pci_dev *pdev,  		goto error;  	/* context entry init */ -	ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL); +	ret = domain_context_mapping(domain, dev, CONTEXT_TT_MULTI_LEVEL);  	if (ret)  		goto error; @@ -2196,12 +2380,12 @@ static int iommu_prepare_identity_map(struct pci_dev *pdev,  }  static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr, -	struct pci_dev *pdev) +					 struct device *dev)  { -	if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) +	if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)  		return 0; -	return iommu_prepare_identity_map(pdev, rmrr->base_address, -		rmrr->end_address); +	return iommu_prepare_identity_map(dev, rmrr->base_address, +					  rmrr->end_address);  }  #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA @@ -2215,7 +2399,7 @@ static inline void iommu_prepare_isa(void)  		return;  	printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n"); -	ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024 - 1); +	ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1);  	if (ret)  		printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; " @@ -2237,11 +2421,11 @@ static int __init si_domain_init(int hw)  	struct intel_iommu *iommu;  	int nid, ret = 0; -	si_domain = alloc_domain(); +	si_domain = alloc_domain(false);  	if (!si_domain)  		return -EFAULT; -	pr_debug("Identity mapping domain is domain %d\n", si_domain->id); +	si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY;  	for_each_active_iommu(iommu, drhd) {  		ret = iommu_attach_domain(si_domain, iommu); @@ -2256,7 +2440,8 @@ static int __init si_domain_init(int hw)  		return -EFAULT;  	} -	si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY; +	pr_debug("IOMMU: identity mapping domain is domain %d\n", +		 si_domain->id);  	if (hw)  		return 0; @@ -2276,16 +2461,14 @@ static int __init si_domain_init(int hw)  	return 0;  } -static void domain_remove_one_dev_info(struct dmar_domain *domain, -					  struct pci_dev *pdev); -static int identity_mapping(struct pci_dev *pdev) +static int identity_mapping(struct device *dev)  {  	struct device_domain_info *info;  	if (likely(!iommu_identity_mapping))  		return 0; -	info = pdev->dev.archdata.iommu; +	info = dev->archdata.iommu;  	if (info && info != DUMMY_DEVICE_DOMAIN_INFO)  		return (info->domain == si_domain); @@ -2293,111 +2476,112 @@ static int identity_mapping(struct pci_dev *pdev)  }  static int domain_add_dev_info(struct dmar_domain *domain, -			       struct pci_dev *pdev, -			       int translation) +			       struct device *dev, int translation)  { -	struct device_domain_info *info; -	unsigned long flags; +	struct dmar_domain *ndomain; +	struct intel_iommu *iommu; +	u8 bus, devfn;  	int ret; -	info = alloc_devinfo_mem(); -	if (!info) -		return -ENOMEM; +	iommu = device_to_iommu(dev, &bus, &devfn); +	if (!iommu) +		return -ENODEV; -	info->segment = pci_domain_nr(pdev->bus); -	info->bus = pdev->bus->number; -	info->devfn = pdev->devfn; -	info->dev = pdev; -	info->domain = domain; +	ndomain = dmar_insert_dev_info(iommu, bus, devfn, dev, domain); +	if (ndomain != domain) +		return -EBUSY; -	spin_lock_irqsave(&device_domain_lock, flags); -	list_add(&info->link, &domain->devices); -	list_add(&info->global, &device_domain_list); -	pdev->dev.archdata.iommu = info; -	spin_unlock_irqrestore(&device_domain_lock, flags); - -	ret = domain_context_mapping(domain, pdev, translation); +	ret = domain_context_mapping(domain, dev, translation);  	if (ret) { -		spin_lock_irqsave(&device_domain_lock, flags); -		unlink_domain_info(info); -		spin_unlock_irqrestore(&device_domain_lock, flags); -		free_devinfo_mem(info); +		domain_remove_one_dev_info(domain, dev);  		return ret;  	}  	return 0;  } -static bool device_has_rmrr(struct pci_dev *dev) +static bool device_has_rmrr(struct device *dev)  {  	struct dmar_rmrr_unit *rmrr; +	struct device *tmp;  	int i; +	rcu_read_lock();  	for_each_rmrr_units(rmrr) { -		for (i = 0; i < rmrr->devices_cnt; i++) { -			/* -			 * Return TRUE if this RMRR contains the device that -			 * is passed in. -			 */ -			if (rmrr->devices[i] == dev) +		/* +		 * Return TRUE if this RMRR contains the device that +		 * is passed in. +		 */ +		for_each_active_dev_scope(rmrr->devices, +					  rmrr->devices_cnt, i, tmp) +			if (tmp == dev) { +				rcu_read_unlock();  				return true; -		} +			}  	} +	rcu_read_unlock();  	return false;  } -static int iommu_should_identity_map(struct pci_dev *pdev, int startup) +static int iommu_should_identity_map(struct device *dev, int startup)  { -	/* -	 * We want to prevent any device associated with an RMRR from -	 * getting placed into the SI Domain. This is done because -	 * problems exist when devices are moved in and out of domains -	 * and their respective RMRR info is lost. We exempt USB devices -	 * from this process due to their usage of RMRRs that are known -	 * to not be needed after BIOS hand-off to OS. -	 */ -	if (device_has_rmrr(pdev) && -	    (pdev->class >> 8) != PCI_CLASS_SERIAL_USB) -		return 0; +	if (dev_is_pci(dev)) { +		struct pci_dev *pdev = to_pci_dev(dev); -	if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev)) -		return 1; +		/* +		 * We want to prevent any device associated with an RMRR from +		 * getting placed into the SI Domain. This is done because +		 * problems exist when devices are moved in and out of domains +		 * and their respective RMRR info is lost. We exempt USB devices +		 * from this process due to their usage of RMRRs that are known +		 * to not be needed after BIOS hand-off to OS. +		 */ +		if (device_has_rmrr(dev) && +		    (pdev->class >> 8) != PCI_CLASS_SERIAL_USB) +			return 0; -	if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev)) -		return 1; +		if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev)) +			return 1; -	if (!(iommu_identity_mapping & IDENTMAP_ALL)) -		return 0; +		if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev)) +			return 1; -	/* -	 * We want to start off with all devices in the 1:1 domain, and -	 * take them out later if we find they can't access all of memory. -	 * -	 * However, we can't do this for PCI devices behind bridges, -	 * because all PCI devices behind the same bridge will end up -	 * with the same source-id on their transactions. -	 * -	 * Practically speaking, we can't change things around for these -	 * devices at run-time, because we can't be sure there'll be no -	 * DMA transactions in flight for any of their siblings. -	 *  -	 * So PCI devices (unless they're on the root bus) as well as -	 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of -	 * the 1:1 domain, just in _case_ one of their siblings turns out -	 * not to be able to map all of memory. -	 */ -	if (!pci_is_pcie(pdev)) { -		if (!pci_is_root_bus(pdev->bus)) +		if (!(iommu_identity_mapping & IDENTMAP_ALL)) +			return 0; + +		/* +		 * We want to start off with all devices in the 1:1 domain, and +		 * take them out later if we find they can't access all of memory. +		 * +		 * However, we can't do this for PCI devices behind bridges, +		 * because all PCI devices behind the same bridge will end up +		 * with the same source-id on their transactions. +		 * +		 * Practically speaking, we can't change things around for these +		 * devices at run-time, because we can't be sure there'll be no +		 * DMA transactions in flight for any of their siblings. +		 * +		 * So PCI devices (unless they're on the root bus) as well as +		 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of +		 * the 1:1 domain, just in _case_ one of their siblings turns out +		 * not to be able to map all of memory. +		 */ +		if (!pci_is_pcie(pdev)) { +			if (!pci_is_root_bus(pdev->bus)) +				return 0; +			if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI) +				return 0; +		} else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)  			return 0; -		if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI) +	} else { +		if (device_has_rmrr(dev))  			return 0; -	} else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE) -		return 0; +	} -	/*  +	/*  	 * At boot time, we don't yet know if devices will be 64-bit capable. -	 * Assume that they will -- if they turn out not to be, then we can  +	 * Assume that they will — if they turn out not to be, then we can  	 * take them out of the 1:1 domain later.  	 */  	if (!startup) { @@ -2405,42 +2589,77 @@ static int iommu_should_identity_map(struct pci_dev *pdev, int startup)  		 * If the device's dma_mask is less than the system's memory  		 * size then this is not a candidate for identity mapping.  		 */ -		u64 dma_mask = pdev->dma_mask; +		u64 dma_mask = *dev->dma_mask; -		if (pdev->dev.coherent_dma_mask && -		    pdev->dev.coherent_dma_mask < dma_mask) -			dma_mask = pdev->dev.coherent_dma_mask; +		if (dev->coherent_dma_mask && +		    dev->coherent_dma_mask < dma_mask) +			dma_mask = dev->coherent_dma_mask; -		return dma_mask >= dma_get_required_mask(&pdev->dev); +		return dma_mask >= dma_get_required_mask(dev);  	}  	return 1;  } +static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw) +{ +	int ret; + +	if (!iommu_should_identity_map(dev, 1)) +		return 0; + +	ret = domain_add_dev_info(si_domain, dev, +				  hw ? CONTEXT_TT_PASS_THROUGH : +				       CONTEXT_TT_MULTI_LEVEL); +	if (!ret) +		pr_info("IOMMU: %s identity mapping for device %s\n", +			hw ? "hardware" : "software", dev_name(dev)); +	else if (ret == -ENODEV) +		/* device not associated with an iommu */ +		ret = 0; + +	return ret; +} + +  static int __init iommu_prepare_static_identity_mapping(int hw)  {  	struct pci_dev *pdev = NULL; -	int ret; +	struct dmar_drhd_unit *drhd; +	struct intel_iommu *iommu; +	struct device *dev; +	int i; +	int ret = 0;  	ret = si_domain_init(hw);  	if (ret)  		return -EFAULT;  	for_each_pci_dev(pdev) { -		if (iommu_should_identity_map(pdev, 1)) { -			ret = domain_add_dev_info(si_domain, pdev, -					     hw ? CONTEXT_TT_PASS_THROUGH : -						  CONTEXT_TT_MULTI_LEVEL); -			if (ret) { -				/* device not associated with an iommu */ -				if (ret == -ENODEV) -					continue; -				return ret; +		ret = dev_prepare_static_identity_mapping(&pdev->dev, hw); +		if (ret) +			return ret; +	} + +	for_each_active_iommu(iommu, drhd) +		for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) { +			struct acpi_device_physical_node *pn; +			struct acpi_device *adev; + +			if (dev->bus != &acpi_bus_type) +				continue; +				 +			adev= to_acpi_device(dev); +			mutex_lock(&adev->physical_node_lock); +			list_for_each_entry(pn, &adev->physical_node_list, node) { +				ret = dev_prepare_static_identity_mapping(pn->dev, hw); +				if (ret) +					break;  			} -			pr_info("IOMMU: %s identity mapping for device %s\n", -				hw ? "hardware" : "software", pci_name(pdev)); +			mutex_unlock(&adev->physical_node_lock); +			if (ret) +				return ret;  		} -	}  	return 0;  } @@ -2449,7 +2668,7 @@ static int __init init_dmars(void)  {  	struct dmar_drhd_unit *drhd;  	struct dmar_rmrr_unit *rmrr; -	struct pci_dev *pdev; +	struct device *dev;  	struct intel_iommu *iommu;  	int i, ret; @@ -2485,19 +2704,15 @@ static int __init init_dmars(void)  		sizeof(struct deferred_flush_tables), GFP_KERNEL);  	if (!deferred_flush) {  		ret = -ENOMEM; -		goto error; +		goto free_g_iommus;  	} -	for_each_drhd_unit(drhd) { -		if (drhd->ignored) -			continue; - -		iommu = drhd->iommu; +	for_each_active_iommu(iommu, drhd) {  		g_iommus[iommu->seq_id] = iommu;  		ret = iommu_init_domains(iommu);  		if (ret) -			goto error; +			goto free_iommu;  		/*  		 * TBD: @@ -2507,7 +2722,7 @@ static int __init init_dmars(void)  		ret = iommu_alloc_root_entry(iommu);  		if (ret) {  			printk(KERN_ERR "IOMMU: allocate root entry failed\n"); -			goto error; +			goto free_iommu;  		}  		if (!ecap_pass_through(iommu->ecap))  			hw_pass_through = 0; @@ -2516,12 +2731,7 @@ static int __init init_dmars(void)  	/*  	 * Start from the sane iommu hardware state.  	 */ -	for_each_drhd_unit(drhd) { -		if (drhd->ignored) -			continue; - -		iommu = drhd->iommu; - +	for_each_active_iommu(iommu, drhd) {  		/*  		 * If the queued invalidation is already initialized by us  		 * (for example, while enabling interrupt-remapping) then @@ -2541,12 +2751,7 @@ static int __init init_dmars(void)  		dmar_disable_qi(iommu);  	} -	for_each_drhd_unit(drhd) { -		if (drhd->ignored) -			continue; - -		iommu = drhd->iommu; - +	for_each_active_iommu(iommu, drhd) {  		if (dmar_enable_qi(iommu)) {  			/*  			 * Queued Invalidate not enabled, use Register Based @@ -2586,7 +2791,7 @@ static int __init init_dmars(void)  		ret = iommu_prepare_static_identity_mapping(hw_pass_through);  		if (ret) {  			printk(KERN_CRIT "Failed to setup IOMMU pass-through\n"); -			goto error; +			goto free_iommu;  		}  	}  	/* @@ -2605,15 +2810,10 @@ static int __init init_dmars(void)  	 */  	printk(KERN_INFO "IOMMU: Setting RMRR:\n");  	for_each_rmrr_units(rmrr) { -		for (i = 0; i < rmrr->devices_cnt; i++) { -			pdev = rmrr->devices[i]; -			/* -			 * some BIOS lists non-exist devices in DMAR -			 * table. -			 */ -			if (!pdev) -				continue; -			ret = iommu_prepare_rmrr_dev(rmrr, pdev); +		/* some BIOS lists non-exist devices in DMAR table. */ +		for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt, +					  i, dev) { +			ret = iommu_prepare_rmrr_dev(rmrr, dev);  			if (ret)  				printk(KERN_ERR  				       "IOMMU: mapping reserved region failed\n"); @@ -2629,23 +2829,22 @@ static int __init init_dmars(void)  	 *   global invalidate iotlb  	 *   enable translation  	 */ -	for_each_drhd_unit(drhd) { +	for_each_iommu(iommu, drhd) {  		if (drhd->ignored) {  			/*  			 * we always have to disable PMRs or DMA may fail on  			 * this device  			 */  			if (force_on) -				iommu_disable_protect_mem_regions(drhd->iommu); +				iommu_disable_protect_mem_regions(iommu);  			continue;  		} -		iommu = drhd->iommu;  		iommu_flush_write_buffer(iommu);  		ret = dmar_set_interrupt(iommu);  		if (ret) -			goto error; +			goto free_iommu;  		iommu_set_root_entry(iommu); @@ -2654,20 +2853,20 @@ static int __init init_dmars(void)  		ret = iommu_enable_translation(iommu);  		if (ret) -			goto error; +			goto free_iommu;  		iommu_disable_protect_mem_regions(iommu);  	}  	return 0; -error: -	for_each_drhd_unit(drhd) { -		if (drhd->ignored) -			continue; -		iommu = drhd->iommu; -		free_iommu(iommu); -	} + +free_iommu: +	for_each_active_iommu(iommu, drhd) +		free_dmar_iommu(iommu); +	kfree(deferred_flush); +free_g_iommus:  	kfree(g_iommus); +error:  	return ret;  } @@ -2676,7 +2875,6 @@ static struct iova *intel_alloc_iova(struct device *dev,  				     struct dmar_domain *domain,  				     unsigned long nrpages, uint64_t dma_mask)  { -	struct pci_dev *pdev = to_pci_dev(dev);  	struct iova *iova = NULL;  	/* Restrict dma_mask to the width that the iommu can handle */ @@ -2696,34 +2894,31 @@ static struct iova *intel_alloc_iova(struct device *dev,  	iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);  	if (unlikely(!iova)) {  		printk(KERN_ERR "Allocating %ld-page iova for %s failed", -		       nrpages, pci_name(pdev)); +		       nrpages, dev_name(dev));  		return NULL;  	}  	return iova;  } -static struct dmar_domain *__get_valid_domain_for_dev(struct pci_dev *pdev) +static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)  {  	struct dmar_domain *domain;  	int ret; -	domain = get_domain_for_dev(pdev, -			DEFAULT_DOMAIN_ADDRESS_WIDTH); +	domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);  	if (!domain) { -		printk(KERN_ERR -			"Allocating domain for %s failed", pci_name(pdev)); +		printk(KERN_ERR "Allocating domain for %s failed", +		       dev_name(dev));  		return NULL;  	}  	/* make sure context mapping is ok */ -	if (unlikely(!domain_context_mapped(pdev))) { -		ret = domain_context_mapping(domain, pdev, -					     CONTEXT_TT_MULTI_LEVEL); +	if (unlikely(!domain_context_mapped(dev))) { +		ret = domain_context_mapping(domain, dev, CONTEXT_TT_MULTI_LEVEL);  		if (ret) { -			printk(KERN_ERR -				"Domain context map for %s failed", -				pci_name(pdev)); +			printk(KERN_ERR "Domain context map for %s failed", +			       dev_name(dev));  			return NULL;  		}  	} @@ -2731,51 +2926,46 @@ static struct dmar_domain *__get_valid_domain_for_dev(struct pci_dev *pdev)  	return domain;  } -static inline struct dmar_domain *get_valid_domain_for_dev(struct pci_dev *dev) +static inline struct dmar_domain *get_valid_domain_for_dev(struct device *dev)  {  	struct device_domain_info *info;  	/* No lock here, assumes no domain exit in normal case */ -	info = dev->dev.archdata.iommu; +	info = dev->archdata.iommu;  	if (likely(info))  		return info->domain;  	return __get_valid_domain_for_dev(dev);  } -static int iommu_dummy(struct pci_dev *pdev) +static int iommu_dummy(struct device *dev)  { -	return pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO; +	return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;  } -/* Check if the pdev needs to go through non-identity map and unmap process.*/ +/* Check if the dev needs to go through non-identity map and unmap process.*/  static int iommu_no_mapping(struct device *dev)  { -	struct pci_dev *pdev;  	int found; -	if (unlikely(dev->bus != &pci_bus_type)) -		return 1; - -	pdev = to_pci_dev(dev); -	if (iommu_dummy(pdev)) +	if (iommu_dummy(dev))  		return 1;  	if (!iommu_identity_mapping)  		return 0; -	found = identity_mapping(pdev); +	found = identity_mapping(dev);  	if (found) { -		if (iommu_should_identity_map(pdev, 0)) +		if (iommu_should_identity_map(dev, 0))  			return 1;  		else {  			/*  			 * 32 bit DMA is removed from si_domain and fall back  			 * to non-identity mapping.  			 */ -			domain_remove_one_dev_info(si_domain, pdev); +			domain_remove_one_dev_info(si_domain, dev);  			printk(KERN_INFO "32bit %s uses non-identity mapping\n", -			       pci_name(pdev)); +			       dev_name(dev));  			return 0;  		}  	} else { @@ -2783,15 +2973,15 @@ static int iommu_no_mapping(struct device *dev)  		 * In case of a detached 64 bit DMA device from vm, the device  		 * is put into si_domain for identity mapping.  		 */ -		if (iommu_should_identity_map(pdev, 0)) { +		if (iommu_should_identity_map(dev, 0)) {  			int ret; -			ret = domain_add_dev_info(si_domain, pdev, +			ret = domain_add_dev_info(si_domain, dev,  						  hw_pass_through ?  						  CONTEXT_TT_PASS_THROUGH :  						  CONTEXT_TT_MULTI_LEVEL);  			if (!ret) {  				printk(KERN_INFO "64bit %s uses identity mapping\n", -				       pci_name(pdev)); +				       dev_name(dev));  				return 1;  			}  		} @@ -2800,10 +2990,9 @@ static int iommu_no_mapping(struct device *dev)  	return 0;  } -static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, +static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,  				     size_t size, int dir, u64 dma_mask)  { -	struct pci_dev *pdev = to_pci_dev(hwdev);  	struct dmar_domain *domain;  	phys_addr_t start_paddr;  	struct iova *iova; @@ -2814,17 +3003,17 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,  	BUG_ON(dir == DMA_NONE); -	if (iommu_no_mapping(hwdev)) +	if (iommu_no_mapping(dev))  		return paddr; -	domain = get_valid_domain_for_dev(pdev); +	domain = get_valid_domain_for_dev(dev);  	if (!domain)  		return 0;  	iommu = domain_get_iommu(domain);  	size = aligned_nrpages(paddr, size); -	iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), dma_mask); +	iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);  	if (!iova)  		goto error; @@ -2850,7 +3039,7 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,  	/* it's a non-present to present mapping. Only flush if caching mode */  	if (cap_caching_mode(iommu->cap)) -		iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 1); +		iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 0, 1);  	else  		iommu_flush_write_buffer(iommu); @@ -2862,7 +3051,7 @@ error:  	if (iova)  		__free_iova(&domain->iovad, iova);  	printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n", -		pci_name(pdev), size, (unsigned long long)paddr, dir); +		dev_name(dev), size, (unsigned long long)paddr, dir);  	return 0;  } @@ -2872,7 +3061,7 @@ static dma_addr_t intel_map_page(struct device *dev, struct page *page,  				 struct dma_attrs *attrs)  {  	return __intel_map_single(dev, page_to_phys(page) + offset, size, -				  dir, to_pci_dev(dev)->dma_mask); +				  dir, *dev->dma_mask);  }  static void flush_unmaps(void) @@ -2902,13 +3091,16 @@ static void flush_unmaps(void)  			/* On real hardware multiple invalidations are expensive */  			if (cap_caching_mode(iommu->cap))  				iommu_flush_iotlb_psi(iommu, domain->id, -				iova->pfn_lo, iova->pfn_hi - iova->pfn_lo + 1, 0); +					iova->pfn_lo, iova->pfn_hi - iova->pfn_lo + 1, +					!deferred_flush[i].freelist[j], 0);  			else {  				mask = ilog2(mm_to_dma_pfn(iova->pfn_hi - iova->pfn_lo + 1));  				iommu_flush_dev_iotlb(deferred_flush[i].domain[j],  						(uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);  			}  			__free_iova(&deferred_flush[i].domain[j]->iovad, iova); +			if (deferred_flush[i].freelist[j]) +				dma_free_pagelist(deferred_flush[i].freelist[j]);  		}  		deferred_flush[i].next = 0;  	} @@ -2925,7 +3117,7 @@ static void flush_unmaps_timeout(unsigned long data)  	spin_unlock_irqrestore(&async_umap_flush_lock, flags);  } -static void add_unmap(struct dmar_domain *dom, struct iova *iova) +static void add_unmap(struct dmar_domain *dom, struct iova *iova, struct page *freelist)  {  	unsigned long flags;  	int next, iommu_id; @@ -2941,6 +3133,7 @@ static void add_unmap(struct dmar_domain *dom, struct iova *iova)  	next = deferred_flush[iommu_id].next;  	deferred_flush[iommu_id].domain[next] = dom;  	deferred_flush[iommu_id].iova[next] = iova; +	deferred_flush[iommu_id].freelist[next] = freelist;  	deferred_flush[iommu_id].next++;  	if (!timer_on) { @@ -2955,16 +3148,16 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,  			     size_t size, enum dma_data_direction dir,  			     struct dma_attrs *attrs)  { -	struct pci_dev *pdev = to_pci_dev(dev);  	struct dmar_domain *domain;  	unsigned long start_pfn, last_pfn;  	struct iova *iova;  	struct intel_iommu *iommu; +	struct page *freelist;  	if (iommu_no_mapping(dev))  		return; -	domain = find_domain(pdev); +	domain = find_domain(dev);  	BUG_ON(!domain);  	iommu = domain_get_iommu(domain); @@ -2978,21 +3171,18 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,  	last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;  	pr_debug("Device %s unmapping: pfn %lx-%lx\n", -		 pci_name(pdev), start_pfn, last_pfn); +		 dev_name(dev), start_pfn, last_pfn); -	/*  clear the whole page */ -	dma_pte_clear_range(domain, start_pfn, last_pfn); - -	/* free page tables */ -	dma_pte_free_pagetable(domain, start_pfn, last_pfn); +	freelist = domain_unmap(domain, start_pfn, last_pfn);  	if (intel_iommu_strict) {  		iommu_flush_iotlb_psi(iommu, domain->id, start_pfn, -				      last_pfn - start_pfn + 1, 0); +				      last_pfn - start_pfn + 1, !freelist, 0);  		/* free iova */  		__free_iova(&domain->iovad, iova); +		dma_free_pagelist(freelist);  	} else { -		add_unmap(domain, iova); +		add_unmap(domain, iova, freelist);  		/*  		 * queue up the release of the unmap to save the 1/6th of the  		 * cpu used up by the iotlb flush operation... @@ -3000,65 +3190,81 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,  	}  } -static void *intel_alloc_coherent(struct device *hwdev, size_t size, +static void *intel_alloc_coherent(struct device *dev, size_t size,  				  dma_addr_t *dma_handle, gfp_t flags,  				  struct dma_attrs *attrs)  { -	void *vaddr; +	struct page *page = NULL;  	int order;  	size = PAGE_ALIGN(size);  	order = get_order(size); -	if (!iommu_no_mapping(hwdev)) +	if (!iommu_no_mapping(dev))  		flags &= ~(GFP_DMA | GFP_DMA32); -	else if (hwdev->coherent_dma_mask < dma_get_required_mask(hwdev)) { -		if (hwdev->coherent_dma_mask < DMA_BIT_MASK(32)) +	else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) { +		if (dev->coherent_dma_mask < DMA_BIT_MASK(32))  			flags |= GFP_DMA;  		else  			flags |= GFP_DMA32;  	} -	vaddr = (void *)__get_free_pages(flags, order); -	if (!vaddr) +	if (flags & __GFP_WAIT) { +		unsigned int count = size >> PAGE_SHIFT; + +		page = dma_alloc_from_contiguous(dev, count, order); +		if (page && iommu_no_mapping(dev) && +		    page_to_phys(page) + size > dev->coherent_dma_mask) { +			dma_release_from_contiguous(dev, page, count); +			page = NULL; +		} +	} + +	if (!page) +		page = alloc_pages(flags, order); +	if (!page)  		return NULL; -	memset(vaddr, 0, size); +	memset(page_address(page), 0, size); -	*dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size, +	*dma_handle = __intel_map_single(dev, page_to_phys(page), size,  					 DMA_BIDIRECTIONAL, -					 hwdev->coherent_dma_mask); +					 dev->coherent_dma_mask);  	if (*dma_handle) -		return vaddr; -	free_pages((unsigned long)vaddr, order); +		return page_address(page); +	if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT)) +		__free_pages(page, order); +  	return NULL;  } -static void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr, +static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,  				dma_addr_t dma_handle, struct dma_attrs *attrs)  {  	int order; +	struct page *page = virt_to_page(vaddr);  	size = PAGE_ALIGN(size);  	order = get_order(size); -	intel_unmap_page(hwdev, dma_handle, size, DMA_BIDIRECTIONAL, NULL); -	free_pages((unsigned long)vaddr, order); +	intel_unmap_page(dev, dma_handle, size, DMA_BIDIRECTIONAL, NULL); +	if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT)) +		__free_pages(page, order);  } -static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist, +static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,  			   int nelems, enum dma_data_direction dir,  			   struct dma_attrs *attrs)  { -	struct pci_dev *pdev = to_pci_dev(hwdev);  	struct dmar_domain *domain;  	unsigned long start_pfn, last_pfn;  	struct iova *iova;  	struct intel_iommu *iommu; +	struct page *freelist; -	if (iommu_no_mapping(hwdev)) +	if (iommu_no_mapping(dev))  		return; -	domain = find_domain(pdev); +	domain = find_domain(dev);  	BUG_ON(!domain);  	iommu = domain_get_iommu(domain); @@ -3071,19 +3277,16 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,  	start_pfn = mm_to_dma_pfn(iova->pfn_lo);  	last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1; -	/*  clear the whole page */ -	dma_pte_clear_range(domain, start_pfn, last_pfn); - -	/* free page tables */ -	dma_pte_free_pagetable(domain, start_pfn, last_pfn); +	freelist = domain_unmap(domain, start_pfn, last_pfn);  	if (intel_iommu_strict) {  		iommu_flush_iotlb_psi(iommu, domain->id, start_pfn, -				      last_pfn - start_pfn + 1, 0); +				      last_pfn - start_pfn + 1, !freelist, 0);  		/* free iova */  		__free_iova(&domain->iovad, iova); +		dma_free_pagelist(freelist);  	} else { -		add_unmap(domain, iova); +		add_unmap(domain, iova, freelist);  		/*  		 * queue up the release of the unmap to save the 1/6th of the  		 * cpu used up by the iotlb flush operation... @@ -3105,11 +3308,10 @@ static int intel_nontranslate_map_sg(struct device *hddev,  	return nelems;  } -static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems, +static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,  			enum dma_data_direction dir, struct dma_attrs *attrs)  {  	int i; -	struct pci_dev *pdev = to_pci_dev(hwdev);  	struct dmar_domain *domain;  	size_t size = 0;  	int prot = 0; @@ -3120,10 +3322,10 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne  	struct intel_iommu *iommu;  	BUG_ON(dir == DMA_NONE); -	if (iommu_no_mapping(hwdev)) -		return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir); +	if (iommu_no_mapping(dev)) +		return intel_nontranslate_map_sg(dev, sglist, nelems, dir); -	domain = get_valid_domain_for_dev(pdev); +	domain = get_valid_domain_for_dev(dev);  	if (!domain)  		return 0; @@ -3132,8 +3334,8 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne  	for_each_sg(sglist, sg, nelems, i)  		size += aligned_nrpages(sg->offset, sg->length); -	iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), -				pdev->dma_mask); +	iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), +				*dev->dma_mask);  	if (!iova) {  		sglist->dma_length = 0;  		return 0; @@ -3166,7 +3368,7 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne  	/* it's a non-present to present mapping. Only flush if caching mode */  	if (cap_caching_mode(iommu->cap)) -		iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 1); +		iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 0, 1);  	else  		iommu_flush_write_buffer(iommu); @@ -3301,29 +3503,28 @@ DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quir  static void __init init_no_remapping_devices(void)  {  	struct dmar_drhd_unit *drhd; +	struct device *dev; +	int i;  	for_each_drhd_unit(drhd) {  		if (!drhd->include_all) { -			int i; -			for (i = 0; i < drhd->devices_cnt; i++) -				if (drhd->devices[i] != NULL) -					break; -			/* ignore DMAR unit if no pci devices exist */ +			for_each_active_dev_scope(drhd->devices, +						  drhd->devices_cnt, i, dev) +				break; +			/* ignore DMAR unit if no devices exist */  			if (i == drhd->devices_cnt)  				drhd->ignored = 1;  		}  	} -	for_each_drhd_unit(drhd) { -		int i; -		if (drhd->ignored || drhd->include_all) +	for_each_active_drhd_unit(drhd) { +		if (drhd->include_all)  			continue; -		for (i = 0; i < drhd->devices_cnt; i++) -			if (drhd->devices[i] && -			    !IS_GFX_DEVICE(drhd->devices[i])) +		for_each_active_dev_scope(drhd->devices, +					  drhd->devices_cnt, i, dev) +			if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))  				break; -  		if (i < drhd->devices_cnt)  			continue; @@ -3333,11 +3534,9 @@ static void __init init_no_remapping_devices(void)  			intel_iommu_gfx_mapped = 1;  		} else {  			drhd->ignored = 1; -			for (i = 0; i < drhd->devices_cnt; i++) { -				if (!drhd->devices[i]) -					continue; -				drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO; -			} +			for_each_active_dev_scope(drhd->devices, +						  drhd->devices_cnt, i, dev) +				dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;  		}  	}  } @@ -3480,13 +3679,6 @@ static void __init init_iommu_pm_ops(void)  static inline void init_iommu_pm_ops(void) {}  #endif	/* CONFIG_PM */ -LIST_HEAD(dmar_rmrr_units); - -static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr) -{ -	list_add(&rmrr->list, &dmar_rmrr_units); -} -  int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header)  { @@ -3501,30 +3693,18 @@ int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header)  	rmrr = (struct acpi_dmar_reserved_memory *)header;  	rmrru->base_address = rmrr->base_address;  	rmrru->end_address = rmrr->end_address; - -	dmar_register_rmrr_unit(rmrru); -	return 0; -} - -static int __init -rmrr_parse_dev(struct dmar_rmrr_unit *rmrru) -{ -	struct acpi_dmar_reserved_memory *rmrr; -	int ret; - -	rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr; -	ret = dmar_parse_dev_scope((void *)(rmrr + 1), -		((void *)rmrr) + rmrr->header.length, -		&rmrru->devices_cnt, &rmrru->devices, rmrr->segment); - -	if (ret || (rmrru->devices_cnt == 0)) { -		list_del(&rmrru->list); +	rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1), +				((void *)rmrr) + rmrr->header.length, +				&rmrru->devices_cnt); +	if (rmrru->devices_cnt && rmrru->devices == NULL) {  		kfree(rmrru); +		return -ENOMEM;  	} -	return ret; -} -static LIST_HEAD(dmar_atsr_units); +	list_add(&rmrru->list, &dmar_rmrr_units); + +	return 0; +}  int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)  { @@ -3538,91 +3718,134 @@ int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)  	atsru->hdr = hdr;  	atsru->include_all = atsr->flags & 0x1; +	if (!atsru->include_all) { +		atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1), +				(void *)atsr + atsr->header.length, +				&atsru->devices_cnt); +		if (atsru->devices_cnt && atsru->devices == NULL) { +			kfree(atsru); +			return -ENOMEM; +		} +	} -	list_add(&atsru->list, &dmar_atsr_units); +	list_add_rcu(&atsru->list, &dmar_atsr_units);  	return 0;  } -static int __init atsr_parse_dev(struct dmar_atsr_unit *atsru) +static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)  { -	int rc; -	struct acpi_dmar_atsr *atsr; +	dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt); +	kfree(atsru); +} -	if (atsru->include_all) -		return 0; +static void intel_iommu_free_dmars(void) +{ +	struct dmar_rmrr_unit *rmrru, *rmrr_n; +	struct dmar_atsr_unit *atsru, *atsr_n; -	atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header); -	rc = dmar_parse_dev_scope((void *)(atsr + 1), -				(void *)atsr + atsr->header.length, -				&atsru->devices_cnt, &atsru->devices, -				atsr->segment); -	if (rc || !atsru->devices_cnt) { -		list_del(&atsru->list); -		kfree(atsru); +	list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) { +		list_del(&rmrru->list); +		dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt); +		kfree(rmrru);  	} -	return rc; +	list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) { +		list_del(&atsru->list); +		intel_iommu_free_atsr(atsru); +	}  }  int dmar_find_matched_atsr_unit(struct pci_dev *dev)  { -	int i; +	int i, ret = 1;  	struct pci_bus *bus; +	struct pci_dev *bridge = NULL; +	struct device *tmp;  	struct acpi_dmar_atsr *atsr;  	struct dmar_atsr_unit *atsru;  	dev = pci_physfn(dev); - -	list_for_each_entry(atsru, &dmar_atsr_units, list) { -		atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header); -		if (atsr->segment == pci_domain_nr(dev->bus)) -			goto found; -	} - -	return 0; - -found:  	for (bus = dev->bus; bus; bus = bus->parent) { -		struct pci_dev *bridge = bus->self; - +		bridge = bus->self;  		if (!bridge || !pci_is_pcie(bridge) ||  		    pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)  			return 0; - -		if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT) { -			for (i = 0; i < atsru->devices_cnt; i++) -				if (atsru->devices[i] == bridge) -					return 1; +		if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)  			break; -		}  	} +	if (!bridge) +		return 0; -	if (atsru->include_all) -		return 1; +	rcu_read_lock(); +	list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) { +		atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header); +		if (atsr->segment != pci_domain_nr(dev->bus)) +			continue; -	return 0; +		for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp) +			if (tmp == &bridge->dev) +				goto out; + +		if (atsru->include_all) +			goto out; +	} +	ret = 0; +out: +	rcu_read_unlock(); + +	return ret;  } -int __init dmar_parse_rmrr_atsr_dev(void) +int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)  { -	struct dmar_rmrr_unit *rmrr, *rmrr_n; -	struct dmar_atsr_unit *atsr, *atsr_n;  	int ret = 0; +	struct dmar_rmrr_unit *rmrru; +	struct dmar_atsr_unit *atsru; +	struct acpi_dmar_atsr *atsr; +	struct acpi_dmar_reserved_memory *rmrr; -	list_for_each_entry_safe(rmrr, rmrr_n, &dmar_rmrr_units, list) { -		ret = rmrr_parse_dev(rmrr); -		if (ret) -			return ret; +	if (!intel_iommu_enabled && system_state != SYSTEM_BOOTING) +		return 0; + +	list_for_each_entry(rmrru, &dmar_rmrr_units, list) { +		rmrr = container_of(rmrru->hdr, +				    struct acpi_dmar_reserved_memory, header); +		if (info->event == BUS_NOTIFY_ADD_DEVICE) { +			ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1), +				((void *)rmrr) + rmrr->header.length, +				rmrr->segment, rmrru->devices, +				rmrru->devices_cnt); +			if(ret < 0) +				return ret; +		} else if (info->event == BUS_NOTIFY_DEL_DEVICE) { +			dmar_remove_dev_scope(info, rmrr->segment, +				rmrru->devices, rmrru->devices_cnt); +		}  	} -	list_for_each_entry_safe(atsr, atsr_n, &dmar_atsr_units, list) { -		ret = atsr_parse_dev(atsr); -		if (ret) -			return ret; +	list_for_each_entry(atsru, &dmar_atsr_units, list) { +		if (atsru->include_all) +			continue; + +		atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header); +		if (info->event == BUS_NOTIFY_ADD_DEVICE) { +			ret = dmar_insert_dev_scope(info, (void *)(atsr + 1), +					(void *)atsr + atsr->header.length, +					atsr->segment, atsru->devices, +					atsru->devices_cnt); +			if (ret > 0) +				break; +			else if(ret < 0) +				return ret; +		} else if (info->event == BUS_NOTIFY_DEL_DEVICE) { +			if (dmar_remove_dev_scope(info, atsr->segment, +					atsru->devices, atsru->devices_cnt)) +				break; +		}  	} -	return ret; +	return 0;  }  /* @@ -3635,24 +3858,26 @@ static int device_notifier(struct notifier_block *nb,  				  unsigned long action, void *data)  {  	struct device *dev = data; -	struct pci_dev *pdev = to_pci_dev(dev);  	struct dmar_domain *domain; -	if (iommu_no_mapping(dev)) +	if (iommu_dummy(dev))  		return 0; -	domain = find_domain(pdev); -	if (!domain) +	if (action != BUS_NOTIFY_UNBOUND_DRIVER && +	    action != BUS_NOTIFY_DEL_DEVICE)  		return 0; -	if (action == BUS_NOTIFY_UNBOUND_DRIVER && !iommu_pass_through) { -		domain_remove_one_dev_info(domain, pdev); +	domain = find_domain(dev); +	if (!domain) +		return 0; -		if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) && -		    !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) && -		    list_empty(&domain->devices)) -			domain_exit(domain); -	} +	down_read(&dmar_global_lock); +	domain_remove_one_dev_info(domain, dev); +	if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) && +	    !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) && +	    list_empty(&domain->devices)) +		domain_exit(domain); +	up_read(&dmar_global_lock);  	return 0;  } @@ -3661,48 +3886,112 @@ static struct notifier_block device_nb = {  	.notifier_call = device_notifier,  }; +static int intel_iommu_memory_notifier(struct notifier_block *nb, +				       unsigned long val, void *v) +{ +	struct memory_notify *mhp = v; +	unsigned long long start, end; +	unsigned long start_vpfn, last_vpfn; + +	switch (val) { +	case MEM_GOING_ONLINE: +		start = mhp->start_pfn << PAGE_SHIFT; +		end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1; +		if (iommu_domain_identity_map(si_domain, start, end)) { +			pr_warn("dmar: failed to build identity map for [%llx-%llx]\n", +				start, end); +			return NOTIFY_BAD; +		} +		break; + +	case MEM_OFFLINE: +	case MEM_CANCEL_ONLINE: +		start_vpfn = mm_to_dma_pfn(mhp->start_pfn); +		last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1); +		while (start_vpfn <= last_vpfn) { +			struct iova *iova; +			struct dmar_drhd_unit *drhd; +			struct intel_iommu *iommu; +			struct page *freelist; + +			iova = find_iova(&si_domain->iovad, start_vpfn); +			if (iova == NULL) { +				pr_debug("dmar: failed get IOVA for PFN %lx\n", +					 start_vpfn); +				break; +			} + +			iova = split_and_remove_iova(&si_domain->iovad, iova, +						     start_vpfn, last_vpfn); +			if (iova == NULL) { +				pr_warn("dmar: failed to split IOVA PFN [%lx-%lx]\n", +					start_vpfn, last_vpfn); +				return NOTIFY_BAD; +			} + +			freelist = domain_unmap(si_domain, iova->pfn_lo, +					       iova->pfn_hi); + +			rcu_read_lock(); +			for_each_active_iommu(iommu, drhd) +				iommu_flush_iotlb_psi(iommu, si_domain->id, +					iova->pfn_lo, +					iova->pfn_hi - iova->pfn_lo + 1, +					!freelist, 0); +			rcu_read_unlock(); +			dma_free_pagelist(freelist); + +			start_vpfn = iova->pfn_hi + 1; +			free_iova_mem(iova); +		} +		break; +	} + +	return NOTIFY_OK; +} + +static struct notifier_block intel_iommu_memory_nb = { +	.notifier_call = intel_iommu_memory_notifier, +	.priority = 0 +}; +  int __init intel_iommu_init(void)  { -	int ret = 0; +	int ret = -ENODEV;  	struct dmar_drhd_unit *drhd; +	struct intel_iommu *iommu;  	/* VT-d is required for a TXT/tboot launch, so enforce that */  	force_on = tboot_force_iommu(); +	if (iommu_init_mempool()) { +		if (force_on) +			panic("tboot: Failed to initialize iommu memory\n"); +		return -ENOMEM; +	} + +	down_write(&dmar_global_lock);  	if (dmar_table_init()) {  		if (force_on)  			panic("tboot: Failed to initialize DMAR table\n"); -		return 	-ENODEV; +		goto out_free_dmar;  	}  	/*  	 * Disable translation if already enabled prior to OS handover.  	 */ -	for_each_drhd_unit(drhd) { -		struct intel_iommu *iommu; - -		if (drhd->ignored) -			continue; - -		iommu = drhd->iommu; +	for_each_active_iommu(iommu, drhd)  		if (iommu->gcmd & DMA_GCMD_TE)  			iommu_disable_translation(iommu); -	}  	if (dmar_dev_scope_init() < 0) {  		if (force_on)  			panic("tboot: Failed to initialize DMAR device scope\n"); -		return 	-ENODEV; +		goto out_free_dmar;  	}  	if (no_iommu || dmar_disabled) -		return -ENODEV; - -	if (iommu_init_mempool()) { -		if (force_on) -			panic("tboot: Failed to initialize iommu memory\n"); -		return 	-ENODEV; -	} +		goto out_free_dmar;  	if (list_empty(&dmar_rmrr_units))  		printk(KERN_INFO "DMAR: No RMRR found\n"); @@ -3713,7 +4002,7 @@ int __init intel_iommu_init(void)  	if (dmar_init_reserved_ranges()) {  		if (force_on)  			panic("tboot: Failed to reserve iommu ranges\n"); -		return 	-ENODEV; +		goto out_free_reserved_range;  	}  	init_no_remapping_devices(); @@ -3723,10 +4012,9 @@ int __init intel_iommu_init(void)  		if (force_on)  			panic("tboot: Failed to initialize DMARs\n");  		printk(KERN_ERR "IOMMU: dmar init failed\n"); -		put_iova_domain(&reserved_iova_list); -		iommu_exit_mempool(); -		return ret; +		goto out_free_reserved_range;  	} +	up_write(&dmar_global_lock);  	printk(KERN_INFO  	"PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n"); @@ -3739,22 +4027,33 @@ int __init intel_iommu_init(void)  	init_iommu_pm_ops();  	bus_set_iommu(&pci_bus_type, &intel_iommu_ops); -  	bus_register_notifier(&pci_bus_type, &device_nb); +	if (si_domain && !hw_pass_through) +		register_memory_notifier(&intel_iommu_memory_nb);  	intel_iommu_enabled = 1;  	return 0; + +out_free_reserved_range: +	put_iova_domain(&reserved_iova_list); +out_free_dmar: +	intel_iommu_free_dmars(); +	up_write(&dmar_global_lock); +	iommu_exit_mempool(); +	return ret;  }  static void iommu_detach_dependent_devices(struct intel_iommu *iommu, -					   struct pci_dev *pdev) +					   struct device *dev)  { -	struct pci_dev *tmp, *parent; +	struct pci_dev *tmp, *parent, *pdev; -	if (!iommu || !pdev) +	if (!iommu || !dev || !dev_is_pci(dev))  		return; +	pdev = to_pci_dev(dev); +  	/* dependent device detach */  	tmp = pci_find_upstream_pcie_bridge(pdev);  	/* Secondary interface's bus number and devfn 0 */ @@ -3775,31 +4074,28 @@ static void iommu_detach_dependent_devices(struct intel_iommu *iommu,  }  static void domain_remove_one_dev_info(struct dmar_domain *domain, -					  struct pci_dev *pdev) +				       struct device *dev)  { -	struct device_domain_info *info; +	struct device_domain_info *info, *tmp;  	struct intel_iommu *iommu;  	unsigned long flags;  	int found = 0; -	struct list_head *entry, *tmp; +	u8 bus, devfn; -	iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number, -				pdev->devfn); +	iommu = device_to_iommu(dev, &bus, &devfn);  	if (!iommu)  		return;  	spin_lock_irqsave(&device_domain_lock, flags); -	list_for_each_safe(entry, tmp, &domain->devices) { -		info = list_entry(entry, struct device_domain_info, link); -		if (info->segment == pci_domain_nr(pdev->bus) && -		    info->bus == pdev->bus->number && -		    info->devfn == pdev->devfn) { +	list_for_each_entry_safe(info, tmp, &domain->devices, link) { +		if (info->iommu == iommu && info->bus == bus && +		    info->devfn == devfn) {  			unlink_domain_info(info);  			spin_unlock_irqrestore(&device_domain_lock, flags);  			iommu_disable_dev_iotlb(info);  			iommu_detach_dev(iommu, info->bus, info->devfn); -			iommu_detach_dependent_devices(iommu, pdev); +			iommu_detach_dependent_devices(iommu, dev);  			free_devinfo_mem(info);  			spin_lock_irqsave(&device_domain_lock, flags); @@ -3814,8 +4110,7 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain,  		 * owned by this domain, clear this iommu in iommu_bmp  		 * update iommu count and coherency  		 */ -		if (iommu == device_to_iommu(info->segment, info->bus, -					    info->devfn)) +		if (info->iommu == iommu)  			found = 1;  	} @@ -3839,67 +4134,11 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain,  	}  } -static void vm_domain_remove_all_dev_info(struct dmar_domain *domain) -{ -	struct device_domain_info *info; -	struct intel_iommu *iommu; -	unsigned long flags1, flags2; - -	spin_lock_irqsave(&device_domain_lock, flags1); -	while (!list_empty(&domain->devices)) { -		info = list_entry(domain->devices.next, -			struct device_domain_info, link); -		unlink_domain_info(info); -		spin_unlock_irqrestore(&device_domain_lock, flags1); - -		iommu_disable_dev_iotlb(info); -		iommu = device_to_iommu(info->segment, info->bus, info->devfn); -		iommu_detach_dev(iommu, info->bus, info->devfn); -		iommu_detach_dependent_devices(iommu, info->dev); - -		/* clear this iommu in iommu_bmp, update iommu count -		 * and capabilities -		 */ -		spin_lock_irqsave(&domain->iommu_lock, flags2); -		if (test_and_clear_bit(iommu->seq_id, -				       domain->iommu_bmp)) { -			domain->iommu_count--; -			domain_update_iommu_cap(domain); -		} -		spin_unlock_irqrestore(&domain->iommu_lock, flags2); - -		free_devinfo_mem(info); -		spin_lock_irqsave(&device_domain_lock, flags1); -	} -	spin_unlock_irqrestore(&device_domain_lock, flags1); -} - -/* domain id for virtual machine, it won't be set in context */ -static unsigned long vm_domid; - -static struct dmar_domain *iommu_alloc_vm_domain(void) -{ -	struct dmar_domain *domain; - -	domain = alloc_domain_mem(); -	if (!domain) -		return NULL; - -	domain->id = vm_domid++; -	domain->nid = -1; -	memset(domain->iommu_bmp, 0, sizeof(domain->iommu_bmp)); -	domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE; - -	return domain; -} -  static int md_domain_init(struct dmar_domain *domain, int guest_width)  {  	int adjust_width;  	init_iova_domain(&domain->iovad, DMA_32BIT_PFN); -	spin_lock_init(&domain->iommu_lock); -  	domain_reserve_special_ranges(domain);  	/* calculate AGAW */ @@ -3907,9 +4146,6 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width)  	adjust_width = guestwidth_to_adjustwidth(guest_width);  	domain->agaw = width_to_agaw(adjust_width); -	INIT_LIST_HEAD(&domain->devices); - -	domain->iommu_count = 0;  	domain->iommu_coherency = 0;  	domain->iommu_snooping = 0;  	domain->iommu_superpage = 0; @@ -3924,57 +4160,11 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width)  	return 0;  } -static void iommu_free_vm_domain(struct dmar_domain *domain) -{ -	unsigned long flags; -	struct dmar_drhd_unit *drhd; -	struct intel_iommu *iommu; -	unsigned long i; -	unsigned long ndomains; - -	for_each_drhd_unit(drhd) { -		if (drhd->ignored) -			continue; -		iommu = drhd->iommu; - -		ndomains = cap_ndoms(iommu->cap); -		for_each_set_bit(i, iommu->domain_ids, ndomains) { -			if (iommu->domains[i] == domain) { -				spin_lock_irqsave(&iommu->lock, flags); -				clear_bit(i, iommu->domain_ids); -				iommu->domains[i] = NULL; -				spin_unlock_irqrestore(&iommu->lock, flags); -				break; -			} -		} -	} -} - -static void vm_domain_exit(struct dmar_domain *domain) -{ -	/* Domain 0 is reserved, so dont process it */ -	if (!domain) -		return; - -	vm_domain_remove_all_dev_info(domain); -	/* destroy iovas */ -	put_iova_domain(&domain->iovad); - -	/* clear ptes */ -	dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw)); - -	/* free page tables */ -	dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw)); - -	iommu_free_vm_domain(domain); -	free_domain_mem(domain); -} -  static int intel_iommu_domain_init(struct iommu_domain *domain)  {  	struct dmar_domain *dmar_domain; -	dmar_domain = iommu_alloc_vm_domain(); +	dmar_domain = alloc_domain(true);  	if (!dmar_domain) {  		printk(KERN_ERR  			"intel_iommu_domain_init: dmar_domain == NULL\n"); @@ -3983,7 +4173,7 @@ static int intel_iommu_domain_init(struct iommu_domain *domain)  	if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {  		printk(KERN_ERR  			"intel_iommu_domain_init() failed\n"); -		vm_domain_exit(dmar_domain); +		domain_exit(dmar_domain);  		return -ENOMEM;  	}  	domain_update_iommu_cap(dmar_domain); @@ -4001,33 +4191,32 @@ static void intel_iommu_domain_destroy(struct iommu_domain *domain)  	struct dmar_domain *dmar_domain = domain->priv;  	domain->priv = NULL; -	vm_domain_exit(dmar_domain); +	domain_exit(dmar_domain);  }  static int intel_iommu_attach_device(struct iommu_domain *domain,  				     struct device *dev)  {  	struct dmar_domain *dmar_domain = domain->priv; -	struct pci_dev *pdev = to_pci_dev(dev);  	struct intel_iommu *iommu;  	int addr_width; +	u8 bus, devfn; -	/* normally pdev is not mapped */ -	if (unlikely(domain_context_mapped(pdev))) { +	/* normally dev is not mapped */ +	if (unlikely(domain_context_mapped(dev))) {  		struct dmar_domain *old_domain; -		old_domain = find_domain(pdev); +		old_domain = find_domain(dev);  		if (old_domain) {  			if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||  			    dmar_domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) -				domain_remove_one_dev_info(old_domain, pdev); +				domain_remove_one_dev_info(old_domain, dev);  			else  				domain_remove_dev_info(old_domain);  		}  	} -	iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number, -				pdev->devfn); +	iommu = device_to_iommu(dev, &bus, &devfn);  	if (!iommu)  		return -ENODEV; @@ -4059,16 +4248,15 @@ static int intel_iommu_attach_device(struct iommu_domain *domain,  		dmar_domain->agaw--;  	} -	return domain_add_dev_info(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL); +	return domain_add_dev_info(dmar_domain, dev, CONTEXT_TT_MULTI_LEVEL);  }  static void intel_iommu_detach_device(struct iommu_domain *domain,  				      struct device *dev)  {  	struct dmar_domain *dmar_domain = domain->priv; -	struct pci_dev *pdev = to_pci_dev(dev); -	domain_remove_one_dev_info(dmar_domain, pdev); +	domain_remove_one_dev_info(dmar_domain, dev);  }  static int intel_iommu_map(struct iommu_domain *domain, @@ -4110,18 +4298,51 @@ static int intel_iommu_map(struct iommu_domain *domain,  }  static size_t intel_iommu_unmap(struct iommu_domain *domain, -			     unsigned long iova, size_t size) +				unsigned long iova, size_t size)  {  	struct dmar_domain *dmar_domain = domain->priv; -	int order; +	struct page *freelist = NULL; +	struct intel_iommu *iommu; +	unsigned long start_pfn, last_pfn; +	unsigned int npages; +	int iommu_id, num, ndomains, level = 0; + +	/* Cope with horrid API which requires us to unmap more than the +	   size argument if it happens to be a large-page mapping. */ +	if (!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level)) +		BUG(); + +	if (size < VTD_PAGE_SIZE << level_to_offset_bits(level)) +		size = VTD_PAGE_SIZE << level_to_offset_bits(level); + +	start_pfn = iova >> VTD_PAGE_SHIFT; +	last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT; + +	freelist = domain_unmap(dmar_domain, start_pfn, last_pfn); + +	npages = last_pfn - start_pfn + 1; + +	for_each_set_bit(iommu_id, dmar_domain->iommu_bmp, g_num_of_iommus) { +               iommu = g_iommus[iommu_id]; + +               /* +                * find bit position of dmar_domain +                */ +               ndomains = cap_ndoms(iommu->cap); +               for_each_set_bit(num, iommu->domain_ids, ndomains) { +                       if (iommu->domains[num] == dmar_domain) +                               iommu_flush_iotlb_psi(iommu, num, start_pfn, +						     npages, !freelist, 0); +	       } + +	} -	order = dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT, -			    (iova + size - 1) >> VTD_PAGE_SHIFT); +	dma_free_pagelist(freelist);  	if (dmar_domain->max_addr == iova + size)  		dmar_domain->max_addr = iova; -	return PAGE_SIZE << order; +	return size;  }  static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, @@ -4129,9 +4350,10 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,  {  	struct dmar_domain *dmar_domain = domain->priv;  	struct dma_pte *pte; +	int level = 0;  	u64 phys = 0; -	pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, 0); +	pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);  	if (pte)  		phys = dma_pte_addr(pte); @@ -4159,9 +4381,9 @@ static int intel_iommu_add_device(struct device *dev)  	struct pci_dev *bridge, *dma_pdev = NULL;  	struct iommu_group *group;  	int ret; +	u8 bus, devfn; -	if (!device_to_iommu(pci_domain_nr(pdev->bus), -			     pdev->bus->number, pdev->devfn)) +	if (!device_to_iommu(dev, &bus, &devfn))  		return -ENODEV;  	bridge = pci_find_upstream_pcie_bridge(pdev); diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index f71673dbb23..9b174893f0f 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -6,11 +6,11 @@  #include <linux/hpet.h>  #include <linux/pci.h>  #include <linux/irq.h> +#include <linux/intel-iommu.h> +#include <linux/acpi.h>  #include <asm/io_apic.h>  #include <asm/smp.h>  #include <asm/cpu.h> -#include <linux/intel-iommu.h> -#include <acpi/acpi.h>  #include <asm/irq_remapping.h>  #include <asm/pci-direct.h>  #include <asm/msidef.h> @@ -38,15 +38,28 @@ static struct ioapic_scope ir_ioapic[MAX_IO_APICS];  static struct hpet_scope ir_hpet[MAX_HPET_TBS];  static int ir_ioapic_num, ir_hpet_num; +/* + * Lock ordering: + * ->dmar_global_lock + *	->irq_2_ir_lock + *		->qi->q_lock + *	->iommu->register_lock + * Note: + * intel_irq_remap_ops.{supported,prepare,enable,disable,reenable} are called + * in single-threaded environment with interrupt disabled, so no need to tabke + * the dmar_global_lock. + */  static DEFINE_RAW_SPINLOCK(irq_2_ir_lock); +static int __init parse_ioapics_under_ir(void); +  static struct irq_2_iommu *irq_2_iommu(unsigned int irq)  {  	struct irq_cfg *cfg = irq_get_chip_data(irq);  	return cfg ? &cfg->irq_2_iommu : NULL;  } -int get_irte(int irq, struct irte *entry) +static int get_irte(int irq, struct irte *entry)  {  	struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);  	unsigned long flags; @@ -69,19 +82,13 @@ static int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)  	struct ir_table *table = iommu->ir_table;  	struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);  	struct irq_cfg *cfg = irq_get_chip_data(irq); -	u16 index, start_index;  	unsigned int mask = 0;  	unsigned long flags; -	int i; +	int index;  	if (!count || !irq_iommu)  		return -1; -	/* -	 * start the IRTE search from index 0. -	 */ -	index = start_index = 0; -  	if (count > 1) {  		count = __roundup_pow_of_two(count);  		mask = ilog2(count); @@ -96,32 +103,17 @@ static int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)  	}  	raw_spin_lock_irqsave(&irq_2_ir_lock, flags); -	do { -		for (i = index; i < index + count; i++) -			if  (table->base[i].present) -				break; -		/* empty index found */ -		if (i == index + count) -			break; - -		index = (index + count) % INTR_REMAP_TABLE_ENTRIES; - -		if (index == start_index) { -			raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags); -			printk(KERN_ERR "can't allocate an IRTE\n"); -			return -1; -		} -	} while (1); - -	for (i = index; i < index + count; i++) -		table->base[i].present = 1; - -	cfg->remapped = 1; -	irq_iommu->iommu = iommu; -	irq_iommu->irte_index =  index; -	irq_iommu->sub_handle = 0; -	irq_iommu->irte_mask = mask; - +	index = bitmap_find_free_region(table->bitmap, +					INTR_REMAP_TABLE_ENTRIES, mask); +	if (index < 0) { +		pr_warn("IR%d: can't allocate an IRTE\n", iommu->seq_id); +	} else { +		cfg->remapped = 1; +		irq_iommu->iommu = iommu; +		irq_iommu->irte_index =  index; +		irq_iommu->sub_handle = 0; +		irq_iommu->irte_mask = mask; +	}  	raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);  	return index; @@ -254,6 +246,8 @@ static int clear_entries(struct irq_2_iommu *irq_iommu)  		set_64bit(&entry->low, 0);  		set_64bit(&entry->high, 0);  	} +	bitmap_release_region(iommu->ir_table->bitmap, index, +			      irq_iommu->irte_mask);  	return qi_flush_iec(iommu, index, irq_iommu->irte_mask);  } @@ -324,19 +318,21 @@ static int set_ioapic_sid(struct irte *irte, int apic)  	if (!irte)  		return -1; +	down_read(&dmar_global_lock);  	for (i = 0; i < MAX_IO_APICS; i++) {  		if (ir_ioapic[i].id == apic) {  			sid = (ir_ioapic[i].bus << 8) | ir_ioapic[i].devfn;  			break;  		}  	} +	up_read(&dmar_global_lock);  	if (sid == 0) {  		pr_warning("Failed to set source-id of IOAPIC (%d)\n", apic);  		return -1;  	} -	set_irte_sid(irte, 1, 0, sid); +	set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16, sid);  	return 0;  } @@ -349,12 +345,14 @@ static int set_hpet_sid(struct irte *irte, u8 id)  	if (!irte)  		return -1; +	down_read(&dmar_global_lock);  	for (i = 0; i < MAX_HPET_TBS; i++) {  		if (ir_hpet[i].id == id) {  			sid = (ir_hpet[i].bus << 8) | ir_hpet[i].devfn;  			break;  		}  	} +	up_read(&dmar_global_lock);  	if (sid == 0) {  		pr_warning("Failed to set source-id of HPET block (%d)\n", id); @@ -453,6 +451,7 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu, int mode)  {  	struct ir_table *ir_table;  	struct page *pages; +	unsigned long *bitmap;  	ir_table = iommu->ir_table = kzalloc(sizeof(struct ir_table),  					     GFP_ATOMIC); @@ -464,13 +463,23 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu, int mode)  				 INTR_REMAP_PAGE_ORDER);  	if (!pages) { -		printk(KERN_ERR "failed to allocate pages of order %d\n", -		       INTR_REMAP_PAGE_ORDER); +		pr_err("IR%d: failed to allocate pages of order %d\n", +		       iommu->seq_id, INTR_REMAP_PAGE_ORDER);  		kfree(iommu->ir_table);  		return -ENOMEM;  	} +	bitmap = kcalloc(BITS_TO_LONGS(INTR_REMAP_TABLE_ENTRIES), +			 sizeof(long), GFP_ATOMIC); +	if (bitmap == NULL) { +		pr_err("IR%d: failed to allocate bitmap\n", iommu->seq_id); +		__free_pages(pages, INTR_REMAP_PAGE_ORDER); +		kfree(ir_table); +		return -ENOMEM; +	} +  	ir_table->base = page_address(pages); +	ir_table->bitmap = bitmap;  	iommu_set_irq_remapping(iommu, mode);  	return 0; @@ -521,16 +530,18 @@ static int __init dmar_x2apic_optout(void)  static int __init intel_irq_remapping_supported(void)  {  	struct dmar_drhd_unit *drhd; +	struct intel_iommu *iommu;  	if (disable_irq_remap)  		return 0;  	if (irq_remap_broken) { -		WARN_TAINT(1, TAINT_FIRMWARE_WORKAROUND, -			   "This system BIOS has enabled interrupt remapping\n" -			   "on a chipset that contains an erratum making that\n" -			   "feature unstable.  To maintain system stability\n" -			   "interrupt remapping is being disabled.  Please\n" -			   "contact your BIOS vendor for an update\n"); +		printk(KERN_WARNING +			"This system BIOS has enabled interrupt remapping\n" +			"on a chipset that contains an erratum making that\n" +			"feature unstable.  To maintain system stability\n" +			"interrupt remapping is being disabled.  Please\n" +			"contact your BIOS vendor for an update\n"); +		add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);  		disable_irq_remap = 1;  		return 0;  	} @@ -538,12 +549,9 @@ static int __init intel_irq_remapping_supported(void)  	if (!dmar_ir_support())  		return 0; -	for_each_drhd_unit(drhd) { -		struct intel_iommu *iommu = drhd->iommu; - +	for_each_iommu(iommu, drhd)  		if (!ecap_ir_support(iommu->ecap))  			return 0; -	}  	return 1;  } @@ -551,6 +559,7 @@ static int __init intel_irq_remapping_supported(void)  static int __init intel_enable_irq_remapping(void)  {  	struct dmar_drhd_unit *drhd; +	struct intel_iommu *iommu;  	bool x2apic_present;  	int setup = 0;  	int eim = 0; @@ -563,6 +572,8 @@ static int __init intel_enable_irq_remapping(void)  	}  	if (x2apic_present) { +		pr_info("Queued invalidation will be enabled to support x2apic and Intr-remapping.\n"); +  		eim = !dmar_x2apic_optout();  		if (!eim)  			printk(KERN_WARNING @@ -571,9 +582,7 @@ static int __init intel_enable_irq_remapping(void)  				"Use 'intremap=no_x2apic_optout' to override BIOS request.\n");  	} -	for_each_drhd_unit(drhd) { -		struct intel_iommu *iommu = drhd->iommu; - +	for_each_iommu(iommu, drhd) {  		/*  		 * If the queued invalidation is already initialized,  		 * shouldn't disable it. @@ -598,9 +607,7 @@ static int __init intel_enable_irq_remapping(void)  	/*  	 * check for the Interrupt-remapping support  	 */ -	for_each_drhd_unit(drhd) { -		struct intel_iommu *iommu = drhd->iommu; - +	for_each_iommu(iommu, drhd) {  		if (!ecap_ir_support(iommu->ecap))  			continue; @@ -614,10 +621,8 @@ static int __init intel_enable_irq_remapping(void)  	/*  	 * Enable queued invalidation for all the DRHD's.  	 */ -	for_each_drhd_unit(drhd) { -		int ret; -		struct intel_iommu *iommu = drhd->iommu; -		ret = dmar_enable_qi(iommu); +	for_each_iommu(iommu, drhd) { +		int ret = dmar_enable_qi(iommu);  		if (ret) {  			printk(KERN_ERR "DRHD %Lx: failed to enable queued, " @@ -630,9 +635,7 @@ static int __init intel_enable_irq_remapping(void)  	/*  	 * Setup Interrupt-remapping for all the DRHD's now.  	 */ -	for_each_drhd_unit(drhd) { -		struct intel_iommu *iommu = drhd->iommu; - +	for_each_iommu(iommu, drhd) {  		if (!ecap_ir_support(iommu->ecap))  			continue; @@ -686,12 +689,12 @@ static void ir_parse_one_hpet_scope(struct acpi_dmar_device_scope *scope,  		 * Access PCI directly due to the PCI  		 * subsystem isn't initialized yet.  		 */ -		bus = read_pci_config_byte(bus, path->dev, path->fn, +		bus = read_pci_config_byte(bus, path->device, path->function,  					   PCI_SECONDARY_BUS);  		path++;  	}  	ir_hpet[ir_hpet_num].bus   = bus; -	ir_hpet[ir_hpet_num].devfn = PCI_DEVFN(path->dev, path->fn); +	ir_hpet[ir_hpet_num].devfn = PCI_DEVFN(path->device, path->function);  	ir_hpet[ir_hpet_num].iommu = iommu;  	ir_hpet[ir_hpet_num].id    = scope->enumeration_id;  	ir_hpet_num++; @@ -714,13 +717,13 @@ static void ir_parse_one_ioapic_scope(struct acpi_dmar_device_scope *scope,  		 * Access PCI directly due to the PCI  		 * subsystem isn't initialized yet.  		 */ -		bus = read_pci_config_byte(bus, path->dev, path->fn, +		bus = read_pci_config_byte(bus, path->device, path->function,  					   PCI_SECONDARY_BUS);  		path++;  	}  	ir_ioapic[ir_ioapic_num].bus   = bus; -	ir_ioapic[ir_ioapic_num].devfn = PCI_DEVFN(path->dev, path->fn); +	ir_ioapic[ir_ioapic_num].devfn = PCI_DEVFN(path->device, path->function);  	ir_ioapic[ir_ioapic_num].iommu = iommu;  	ir_ioapic[ir_ioapic_num].id    = scope->enumeration_id;  	ir_ioapic_num++; @@ -773,22 +776,20 @@ static int ir_parse_ioapic_hpet_scope(struct acpi_dmar_header *header,   * Finds the assocaition between IOAPIC's and its Interrupt-remapping   * hardware unit.   */ -int __init parse_ioapics_under_ir(void) +static int __init parse_ioapics_under_ir(void)  {  	struct dmar_drhd_unit *drhd; +	struct intel_iommu *iommu;  	int ir_supported = 0;  	int ioapic_idx; -	for_each_drhd_unit(drhd) { -		struct intel_iommu *iommu = drhd->iommu; - +	for_each_iommu(iommu, drhd)  		if (ecap_ir_support(iommu->ecap)) {  			if (ir_parse_ioapic_hpet_scope(drhd->hdr, iommu))  				return -1;  			ir_supported = 1;  		} -	}  	if (!ir_supported)  		return 0; @@ -806,12 +807,18 @@ int __init parse_ioapics_under_ir(void)  	return 1;  } -int __init ir_dev_scope_init(void) +static int __init ir_dev_scope_init(void)  { +	int ret; +  	if (!irq_remapping_enabled)  		return 0; -	return dmar_dev_scope_init(); +	down_write(&dmar_global_lock); +	ret = dmar_dev_scope_init(); +	up_write(&dmar_global_lock); + +	return ret;  }  rootfs_initcall(ir_dev_scope_init); @@ -892,23 +899,27 @@ static int intel_setup_ioapic_entry(int irq,  				    struct io_apic_irq_attr *attr)  {  	int ioapic_id = mpc_ioapic_id(attr->ioapic); -	struct intel_iommu *iommu = map_ioapic_to_ir(ioapic_id); +	struct intel_iommu *iommu;  	struct IR_IO_APIC_route_entry *entry;  	struct irte irte;  	int index; +	down_read(&dmar_global_lock); +	iommu = map_ioapic_to_ir(ioapic_id);  	if (!iommu) {  		pr_warn("No mapping iommu for ioapic %d\n", ioapic_id); -		return -ENODEV; -	} - -	entry = (struct IR_IO_APIC_route_entry *)route_entry; - -	index = alloc_irte(iommu, irq, 1); -	if (index < 0) { -		pr_warn("Failed to allocate IRTE for ioapic %d\n", ioapic_id); -		return -ENOMEM; +		index = -ENODEV; +	} else { +		index = alloc_irte(iommu, irq, 1); +		if (index < 0) { +			pr_warn("Failed to allocate IRTE for ioapic %d\n", +				ioapic_id); +			index = -ENOMEM; +		}  	} +	up_read(&dmar_global_lock); +	if (index < 0) +		return index;  	prepare_irte(&irte, vector, destination); @@ -927,6 +938,7 @@ static int intel_setup_ioapic_entry(int irq,  		irte.avail, irte.vector, irte.dest_id,  		irte.sid, irte.sq, irte.svt); +	entry = (struct IR_IO_APIC_route_entry *)route_entry;  	memset(entry, 0, sizeof(*entry));  	entry->index2	= (index >> 15) & 0x1; @@ -1057,20 +1069,23 @@ static int intel_msi_alloc_irq(struct pci_dev *dev, int irq, int nvec)  	struct intel_iommu *iommu;  	int index; +	down_read(&dmar_global_lock);  	iommu = map_dev_to_ir(dev);  	if (!iommu) {  		printk(KERN_ERR  		       "Unable to map PCI %s to iommu\n", pci_name(dev)); -		return -ENOENT; +		index = -ENOENT; +	} else { +		index = alloc_irte(iommu, irq, nvec); +		if (index < 0) { +			printk(KERN_ERR +			       "Unable to allocate %d IRTE for PCI %s\n", +			       nvec, pci_name(dev)); +			index = -ENOSPC; +		}  	} +	up_read(&dmar_global_lock); -	index = alloc_irte(iommu, irq, nvec); -	if (index < 0) { -		printk(KERN_ERR -		       "Unable to allocate %d IRTE for PCI %s\n", nvec, -		       pci_name(dev)); -		return -ENOSPC; -	}  	return index;  } @@ -1078,33 +1093,40 @@ static int intel_msi_setup_irq(struct pci_dev *pdev, unsigned int irq,  			       int index, int sub_handle)  {  	struct intel_iommu *iommu; +	int ret = -ENOENT; +	down_read(&dmar_global_lock);  	iommu = map_dev_to_ir(pdev); -	if (!iommu) -		return -ENOENT; -	/* -	 * setup the mapping between the irq and the IRTE -	 * base index, the sub_handle pointing to the -	 * appropriate interrupt remap table entry. -	 */ -	set_irte_irq(irq, iommu, index, sub_handle); +	if (iommu) { +		/* +		 * setup the mapping between the irq and the IRTE +		 * base index, the sub_handle pointing to the +		 * appropriate interrupt remap table entry. +		 */ +		set_irte_irq(irq, iommu, index, sub_handle); +		ret = 0; +	} +	up_read(&dmar_global_lock); -	return 0; +	return ret;  }  static int intel_setup_hpet_msi(unsigned int irq, unsigned int id)  { -	struct intel_iommu *iommu = map_hpet_to_ir(id); +	int ret = -1; +	struct intel_iommu *iommu;  	int index; -	if (!iommu) -		return -1; - -	index = alloc_irte(iommu, irq, 1); -	if (index < 0) -		return -1; +	down_read(&dmar_global_lock); +	iommu = map_hpet_to_ir(id); +	if (iommu) { +		index = alloc_irte(iommu, irq, 1); +		if (index >= 0) +			ret = 0; +	} +	up_read(&dmar_global_lock); -	return 0; +	return ret;  }  struct irq_remap_ops intel_irq_remap_ops = { diff --git a/drivers/iommu/iommu-traces.c b/drivers/iommu/iommu-traces.c new file mode 100644 index 00000000000..bf3b317ff0c --- /dev/null +++ b/drivers/iommu/iommu-traces.c @@ -0,0 +1,27 @@ +/* + * iommu trace points + * + * Copyright (C) 2013 Shuah Khan <shuah.kh@samsung.com> + * + */ + +#include <linux/string.h> +#include <linux/types.h> + +#define CREATE_TRACE_POINTS +#include <trace/events/iommu.h> + +/* iommu_group_event */ +EXPORT_TRACEPOINT_SYMBOL_GPL(add_device_to_group); +EXPORT_TRACEPOINT_SYMBOL_GPL(remove_device_from_group); + +/* iommu_device_event */ +EXPORT_TRACEPOINT_SYMBOL_GPL(attach_device_to_domain); +EXPORT_TRACEPOINT_SYMBOL_GPL(detach_device_from_domain); + +/* iommu_map_unmap */ +EXPORT_TRACEPOINT_SYMBOL_GPL(map); +EXPORT_TRACEPOINT_SYMBOL_GPL(unmap); + +/* iommu_error */ +EXPORT_TRACEPOINT_SYMBOL_GPL(io_page_fault); diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index fbe9ca734f8..e5555fcfe70 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -29,6 +29,7 @@  #include <linux/idr.h>  #include <linux/notifier.h>  #include <linux/err.h> +#include <trace/events/iommu.h>  static struct kset *iommu_group_kset;  static struct ida iommu_group_ida; @@ -363,6 +364,8 @@ rename:  	/* Notify any listeners about change to group. */  	blocking_notifier_call_chain(&group->notifier,  				     IOMMU_GROUP_NOTIFY_ADD_DEVICE, dev); + +	trace_add_device_to_group(group->id, dev);  	return 0;  }  EXPORT_SYMBOL_GPL(iommu_group_add_device); @@ -399,6 +402,8 @@ void iommu_group_remove_device(struct device *dev)  	sysfs_remove_link(group->devices_kobj, device->name);  	sysfs_remove_link(&dev->kobj, "iommu_group"); +	trace_remove_device_from_group(group->id, dev); +  	kfree(device->name);  	kfree(device);  	dev->iommu_group = NULL; @@ -680,10 +685,14 @@ EXPORT_SYMBOL_GPL(iommu_domain_free);  int iommu_attach_device(struct iommu_domain *domain, struct device *dev)  { +	int ret;  	if (unlikely(domain->ops->attach_dev == NULL))  		return -ENODEV; -	return domain->ops->attach_dev(domain, dev); +	ret = domain->ops->attach_dev(domain, dev); +	if (!ret) +		trace_attach_device_to_domain(dev); +	return ret;  }  EXPORT_SYMBOL_GPL(iommu_attach_device); @@ -693,6 +702,7 @@ void iommu_detach_device(struct iommu_domain *domain, struct device *dev)  		return;  	domain->ops->detach_dev(domain, dev); +	trace_detach_device_from_domain(dev);  }  EXPORT_SYMBOL_GPL(iommu_detach_device); @@ -807,17 +817,17 @@ int iommu_map(struct iommu_domain *domain, unsigned long iova,  	 * size of the smallest page supported by the hardware  	 */  	if (!IS_ALIGNED(iova | paddr | size, min_pagesz)) { -		pr_err("unaligned: iova 0x%lx pa 0x%pa size 0x%zx min_pagesz 0x%x\n", +		pr_err("unaligned: iova 0x%lx pa %pa size 0x%zx min_pagesz 0x%x\n",  		       iova, &paddr, size, min_pagesz);  		return -EINVAL;  	} -	pr_debug("map: iova 0x%lx pa 0x%pa size 0x%zx\n", iova, &paddr, size); +	pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size);  	while (size) {  		size_t pgsize = iommu_pgsize(domain, iova | paddr, size); -		pr_debug("mapping: iova 0x%lx pa 0x%pa pgsize 0x%zx\n", +		pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx\n",  			 iova, &paddr, pgsize);  		ret = domain->ops->map(domain, iova, paddr, pgsize, prot); @@ -832,6 +842,8 @@ int iommu_map(struct iommu_domain *domain, unsigned long iova,  	/* unroll mapping in case something went wrong */  	if (ret)  		iommu_unmap(domain, orig_iova, orig_size - size); +	else +		trace_map(iova, paddr, size);  	return ret;  } @@ -880,6 +892,7 @@ size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)  		unmapped += unmapped_page;  	} +	trace_unmap(iova, 0, size);  	return unmapped;  }  EXPORT_SYMBOL_GPL(iommu_unmap); diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 67da6cff74e..f6b17e6af2f 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -342,19 +342,30 @@ __is_range_overlap(struct rb_node *node,  	return 0;  } +static inline struct iova * +alloc_and_init_iova(unsigned long pfn_lo, unsigned long pfn_hi) +{ +	struct iova *iova; + +	iova = alloc_iova_mem(); +	if (iova) { +		iova->pfn_lo = pfn_lo; +		iova->pfn_hi = pfn_hi; +	} + +	return iova; +} +  static struct iova *  __insert_new_range(struct iova_domain *iovad,  	unsigned long pfn_lo, unsigned long pfn_hi)  {  	struct iova *iova; -	iova = alloc_iova_mem(); -	if (!iova) -		return iova; +	iova = alloc_and_init_iova(pfn_lo, pfn_hi); +	if (iova) +		iova_insert_rbtree(&iovad->rbroot, iova); -	iova->pfn_hi = pfn_hi; -	iova->pfn_lo = pfn_lo; -	iova_insert_rbtree(&iovad->rbroot, iova);  	return iova;  } @@ -433,3 +444,44 @@ copy_reserved_iova(struct iova_domain *from, struct iova_domain *to)  	}  	spin_unlock_irqrestore(&from->iova_rbtree_lock, flags);  } + +struct iova * +split_and_remove_iova(struct iova_domain *iovad, struct iova *iova, +		      unsigned long pfn_lo, unsigned long pfn_hi) +{ +	unsigned long flags; +	struct iova *prev = NULL, *next = NULL; + +	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); +	if (iova->pfn_lo < pfn_lo) { +		prev = alloc_and_init_iova(iova->pfn_lo, pfn_lo - 1); +		if (prev == NULL) +			goto error; +	} +	if (iova->pfn_hi > pfn_hi) { +		next = alloc_and_init_iova(pfn_hi + 1, iova->pfn_hi); +		if (next == NULL) +			goto error; +	} + +	__cached_rbnode_delete_update(iovad, iova); +	rb_erase(&iova->node, &iovad->rbroot); + +	if (prev) { +		iova_insert_rbtree(&iovad->rbroot, prev); +		iova->pfn_lo = pfn_lo; +	} +	if (next) { +		iova_insert_rbtree(&iovad->rbroot, next); +		iova->pfn_hi = pfn_hi; +	} +	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); + +	return iova; + +error: +	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); +	if (prev) +		free_iova_mem(prev); +	return NULL; +} diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c new file mode 100644 index 00000000000..53cde086e83 --- /dev/null +++ b/drivers/iommu/ipmmu-vmsa.c @@ -0,0 +1,1255 @@ +/* + * IPMMU VMSA + * + * Copyright (C) 2014 Renesas Electronics Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + */ + +#include <linux/delay.h> +#include <linux/dma-mapping.h> +#include <linux/err.h> +#include <linux/export.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/iommu.h> +#include <linux/module.h> +#include <linux/platform_data/ipmmu-vmsa.h> +#include <linux/platform_device.h> +#include <linux/sizes.h> +#include <linux/slab.h> + +#include <asm/dma-iommu.h> +#include <asm/pgalloc.h> + +struct ipmmu_vmsa_device { +	struct device *dev; +	void __iomem *base; +	struct list_head list; + +	const struct ipmmu_vmsa_platform_data *pdata; +	unsigned int num_utlbs; + +	struct dma_iommu_mapping *mapping; +}; + +struct ipmmu_vmsa_domain { +	struct ipmmu_vmsa_device *mmu; +	struct iommu_domain *io_domain; + +	unsigned int context_id; +	spinlock_t lock;			/* Protects mappings */ +	pgd_t *pgd; +}; + +struct ipmmu_vmsa_archdata { +	struct ipmmu_vmsa_device *mmu; +	unsigned int utlb; +}; + +static DEFINE_SPINLOCK(ipmmu_devices_lock); +static LIST_HEAD(ipmmu_devices); + +#define TLB_LOOP_TIMEOUT		100	/* 100us */ + +/* ----------------------------------------------------------------------------- + * Registers Definition + */ + +#define IM_CTX_SIZE			0x40 + +#define IMCTR				0x0000 +#define IMCTR_TRE			(1 << 17) +#define IMCTR_AFE			(1 << 16) +#define IMCTR_RTSEL_MASK		(3 << 4) +#define IMCTR_RTSEL_SHIFT		4 +#define IMCTR_TREN			(1 << 3) +#define IMCTR_INTEN			(1 << 2) +#define IMCTR_FLUSH			(1 << 1) +#define IMCTR_MMUEN			(1 << 0) + +#define IMCAAR				0x0004 + +#define IMTTBCR				0x0008 +#define IMTTBCR_EAE			(1 << 31) +#define IMTTBCR_PMB			(1 << 30) +#define IMTTBCR_SH1_NON_SHAREABLE	(0 << 28) +#define IMTTBCR_SH1_OUTER_SHAREABLE	(2 << 28) +#define IMTTBCR_SH1_INNER_SHAREABLE	(3 << 28) +#define IMTTBCR_SH1_MASK		(3 << 28) +#define IMTTBCR_ORGN1_NC		(0 << 26) +#define IMTTBCR_ORGN1_WB_WA		(1 << 26) +#define IMTTBCR_ORGN1_WT		(2 << 26) +#define IMTTBCR_ORGN1_WB		(3 << 26) +#define IMTTBCR_ORGN1_MASK		(3 << 26) +#define IMTTBCR_IRGN1_NC		(0 << 24) +#define IMTTBCR_IRGN1_WB_WA		(1 << 24) +#define IMTTBCR_IRGN1_WT		(2 << 24) +#define IMTTBCR_IRGN1_WB		(3 << 24) +#define IMTTBCR_IRGN1_MASK		(3 << 24) +#define IMTTBCR_TSZ1_MASK		(7 << 16) +#define IMTTBCR_TSZ1_SHIFT		16 +#define IMTTBCR_SH0_NON_SHAREABLE	(0 << 12) +#define IMTTBCR_SH0_OUTER_SHAREABLE	(2 << 12) +#define IMTTBCR_SH0_INNER_SHAREABLE	(3 << 12) +#define IMTTBCR_SH0_MASK		(3 << 12) +#define IMTTBCR_ORGN0_NC		(0 << 10) +#define IMTTBCR_ORGN0_WB_WA		(1 << 10) +#define IMTTBCR_ORGN0_WT		(2 << 10) +#define IMTTBCR_ORGN0_WB		(3 << 10) +#define IMTTBCR_ORGN0_MASK		(3 << 10) +#define IMTTBCR_IRGN0_NC		(0 << 8) +#define IMTTBCR_IRGN0_WB_WA		(1 << 8) +#define IMTTBCR_IRGN0_WT		(2 << 8) +#define IMTTBCR_IRGN0_WB		(3 << 8) +#define IMTTBCR_IRGN0_MASK		(3 << 8) +#define IMTTBCR_SL0_LVL_2		(0 << 4) +#define IMTTBCR_SL0_LVL_1		(1 << 4) +#define IMTTBCR_TSZ0_MASK		(7 << 0) +#define IMTTBCR_TSZ0_SHIFT		O + +#define IMBUSCR				0x000c +#define IMBUSCR_DVM			(1 << 2) +#define IMBUSCR_BUSSEL_SYS		(0 << 0) +#define IMBUSCR_BUSSEL_CCI		(1 << 0) +#define IMBUSCR_BUSSEL_IMCAAR		(2 << 0) +#define IMBUSCR_BUSSEL_CCI_IMCAAR	(3 << 0) +#define IMBUSCR_BUSSEL_MASK		(3 << 0) + +#define IMTTLBR0			0x0010 +#define IMTTUBR0			0x0014 +#define IMTTLBR1			0x0018 +#define IMTTUBR1			0x001c + +#define IMSTR				0x0020 +#define IMSTR_ERRLVL_MASK		(3 << 12) +#define IMSTR_ERRLVL_SHIFT		12 +#define IMSTR_ERRCODE_TLB_FORMAT	(1 << 8) +#define IMSTR_ERRCODE_ACCESS_PERM	(4 << 8) +#define IMSTR_ERRCODE_SECURE_ACCESS	(5 << 8) +#define IMSTR_ERRCODE_MASK		(7 << 8) +#define IMSTR_MHIT			(1 << 4) +#define IMSTR_ABORT			(1 << 2) +#define IMSTR_PF			(1 << 1) +#define IMSTR_TF			(1 << 0) + +#define IMMAIR0				0x0028 +#define IMMAIR1				0x002c +#define IMMAIR_ATTR_MASK		0xff +#define IMMAIR_ATTR_DEVICE		0x04 +#define IMMAIR_ATTR_NC			0x44 +#define IMMAIR_ATTR_WBRWA		0xff +#define IMMAIR_ATTR_SHIFT(n)		((n) << 3) +#define IMMAIR_ATTR_IDX_NC		0 +#define IMMAIR_ATTR_IDX_WBRWA		1 +#define IMMAIR_ATTR_IDX_DEV		2 + +#define IMEAR				0x0030 + +#define IMPCTR				0x0200 +#define IMPSTR				0x0208 +#define IMPEAR				0x020c +#define IMPMBA(n)			(0x0280 + ((n) * 4)) +#define IMPMBD(n)			(0x02c0 + ((n) * 4)) + +#define IMUCTR(n)			(0x0300 + ((n) * 16)) +#define IMUCTR_FIXADDEN			(1 << 31) +#define IMUCTR_FIXADD_MASK		(0xff << 16) +#define IMUCTR_FIXADD_SHIFT		16 +#define IMUCTR_TTSEL_MMU(n)		((n) << 4) +#define IMUCTR_TTSEL_PMB		(8 << 4) +#define IMUCTR_TTSEL_MASK		(15 << 4) +#define IMUCTR_FLUSH			(1 << 1) +#define IMUCTR_MMUEN			(1 << 0) + +#define IMUASID(n)			(0x0308 + ((n) * 16)) +#define IMUASID_ASID8_MASK		(0xff << 8) +#define IMUASID_ASID8_SHIFT		8 +#define IMUASID_ASID0_MASK		(0xff << 0) +#define IMUASID_ASID0_SHIFT		0 + +/* ----------------------------------------------------------------------------- + * Page Table Bits + */ + +/* + * VMSA states in section B3.6.3 "Control of Secure or Non-secure memory access, + * Long-descriptor format" that the NStable bit being set in a table descriptor + * will result in the NStable and NS bits of all child entries being ignored and + * considered as being set. The IPMMU seems not to comply with this, as it + * generates a secure access page fault if any of the NStable and NS bits isn't + * set when running in non-secure mode. + */ +#ifndef PMD_NSTABLE +#define PMD_NSTABLE			(_AT(pmdval_t, 1) << 63) +#endif + +#define ARM_VMSA_PTE_XN			(((pteval_t)3) << 53) +#define ARM_VMSA_PTE_CONT		(((pteval_t)1) << 52) +#define ARM_VMSA_PTE_AF			(((pteval_t)1) << 10) +#define ARM_VMSA_PTE_SH_NS		(((pteval_t)0) << 8) +#define ARM_VMSA_PTE_SH_OS		(((pteval_t)2) << 8) +#define ARM_VMSA_PTE_SH_IS		(((pteval_t)3) << 8) +#define ARM_VMSA_PTE_SH_MASK		(((pteval_t)3) << 8) +#define ARM_VMSA_PTE_NS			(((pteval_t)1) << 5) +#define ARM_VMSA_PTE_PAGE		(((pteval_t)3) << 0) + +/* Stage-1 PTE */ +#define ARM_VMSA_PTE_nG			(((pteval_t)1) << 11) +#define ARM_VMSA_PTE_AP_UNPRIV		(((pteval_t)1) << 6) +#define ARM_VMSA_PTE_AP_RDONLY		(((pteval_t)2) << 6) +#define ARM_VMSA_PTE_AP_MASK		(((pteval_t)3) << 6) +#define ARM_VMSA_PTE_ATTRINDX_MASK	(((pteval_t)3) << 2) +#define ARM_VMSA_PTE_ATTRINDX_SHIFT	2 + +#define ARM_VMSA_PTE_ATTRS_MASK \ +	(ARM_VMSA_PTE_XN | ARM_VMSA_PTE_CONT | ARM_VMSA_PTE_nG | \ +	 ARM_VMSA_PTE_AF | ARM_VMSA_PTE_SH_MASK | ARM_VMSA_PTE_AP_MASK | \ +	 ARM_VMSA_PTE_NS | ARM_VMSA_PTE_ATTRINDX_MASK) + +#define ARM_VMSA_PTE_CONT_ENTRIES	16 +#define ARM_VMSA_PTE_CONT_SIZE		(PAGE_SIZE * ARM_VMSA_PTE_CONT_ENTRIES) + +#define IPMMU_PTRS_PER_PTE		512 +#define IPMMU_PTRS_PER_PMD		512 +#define IPMMU_PTRS_PER_PGD		4 + +/* ----------------------------------------------------------------------------- + * Read/Write Access + */ + +static u32 ipmmu_read(struct ipmmu_vmsa_device *mmu, unsigned int offset) +{ +	return ioread32(mmu->base + offset); +} + +static void ipmmu_write(struct ipmmu_vmsa_device *mmu, unsigned int offset, +			u32 data) +{ +	iowrite32(data, mmu->base + offset); +} + +static u32 ipmmu_ctx_read(struct ipmmu_vmsa_domain *domain, unsigned int reg) +{ +	return ipmmu_read(domain->mmu, domain->context_id * IM_CTX_SIZE + reg); +} + +static void ipmmu_ctx_write(struct ipmmu_vmsa_domain *domain, unsigned int reg, +			    u32 data) +{ +	ipmmu_write(domain->mmu, domain->context_id * IM_CTX_SIZE + reg, data); +} + +/* ----------------------------------------------------------------------------- + * TLB and microTLB Management + */ + +/* Wait for any pending TLB invalidations to complete */ +static void ipmmu_tlb_sync(struct ipmmu_vmsa_domain *domain) +{ +	unsigned int count = 0; + +	while (ipmmu_ctx_read(domain, IMCTR) & IMCTR_FLUSH) { +		cpu_relax(); +		if (++count == TLB_LOOP_TIMEOUT) { +			dev_err_ratelimited(domain->mmu->dev, +			"TLB sync timed out -- MMU may be deadlocked\n"); +			return; +		} +		udelay(1); +	} +} + +static void ipmmu_tlb_invalidate(struct ipmmu_vmsa_domain *domain) +{ +	u32 reg; + +	reg = ipmmu_ctx_read(domain, IMCTR); +	reg |= IMCTR_FLUSH; +	ipmmu_ctx_write(domain, IMCTR, reg); + +	ipmmu_tlb_sync(domain); +} + +/* + * Enable MMU translation for the microTLB. + */ +static void ipmmu_utlb_enable(struct ipmmu_vmsa_domain *domain, +			      unsigned int utlb) +{ +	struct ipmmu_vmsa_device *mmu = domain->mmu; + +	/* +	 * TODO: Reference-count the microTLB as several bus masters can be +	 * connected to the same microTLB. +	 */ + +	/* TODO: What should we set the ASID to ? */ +	ipmmu_write(mmu, IMUASID(utlb), 0); +	/* TODO: Do we need to flush the microTLB ? */ +	ipmmu_write(mmu, IMUCTR(utlb), +		    IMUCTR_TTSEL_MMU(domain->context_id) | IMUCTR_FLUSH | +		    IMUCTR_MMUEN); +} + +/* + * Disable MMU translation for the microTLB. + */ +static void ipmmu_utlb_disable(struct ipmmu_vmsa_domain *domain, +			       unsigned int utlb) +{ +	struct ipmmu_vmsa_device *mmu = domain->mmu; + +	ipmmu_write(mmu, IMUCTR(utlb), 0); +} + +static void ipmmu_flush_pgtable(struct ipmmu_vmsa_device *mmu, void *addr, +				size_t size) +{ +	unsigned long offset = (unsigned long)addr & ~PAGE_MASK; + +	/* +	 * TODO: Add support for coherent walk through CCI with DVM and remove +	 * cache handling. +	 */ +	dma_map_page(mmu->dev, virt_to_page(addr), offset, size, DMA_TO_DEVICE); +} + +/* ----------------------------------------------------------------------------- + * Domain/Context Management + */ + +static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain) +{ +	phys_addr_t ttbr; +	u32 reg; + +	/* +	 * TODO: When adding support for multiple contexts, find an unused +	 * context. +	 */ +	domain->context_id = 0; + +	/* TTBR0 */ +	ipmmu_flush_pgtable(domain->mmu, domain->pgd, +			    IPMMU_PTRS_PER_PGD * sizeof(*domain->pgd)); +	ttbr = __pa(domain->pgd); +	ipmmu_ctx_write(domain, IMTTLBR0, ttbr); +	ipmmu_ctx_write(domain, IMTTUBR0, ttbr >> 32); + +	/* +	 * TTBCR +	 * We use long descriptors with inner-shareable WBWA tables and allocate +	 * the whole 32-bit VA space to TTBR0. +	 */ +	ipmmu_ctx_write(domain, IMTTBCR, IMTTBCR_EAE | +			IMTTBCR_SH0_INNER_SHAREABLE | IMTTBCR_ORGN0_WB_WA | +			IMTTBCR_IRGN0_WB_WA | IMTTBCR_SL0_LVL_1); + +	/* +	 * MAIR0 +	 * We need three attributes only, non-cacheable, write-back read/write +	 * allocate and device memory. +	 */ +	reg = (IMMAIR_ATTR_NC << IMMAIR_ATTR_SHIFT(IMMAIR_ATTR_IDX_NC)) +	    | (IMMAIR_ATTR_WBRWA << IMMAIR_ATTR_SHIFT(IMMAIR_ATTR_IDX_WBRWA)) +	    | (IMMAIR_ATTR_DEVICE << IMMAIR_ATTR_SHIFT(IMMAIR_ATTR_IDX_DEV)); +	ipmmu_ctx_write(domain, IMMAIR0, reg); + +	/* IMBUSCR */ +	ipmmu_ctx_write(domain, IMBUSCR, +			ipmmu_ctx_read(domain, IMBUSCR) & +			~(IMBUSCR_DVM | IMBUSCR_BUSSEL_MASK)); + +	/* +	 * IMSTR +	 * Clear all interrupt flags. +	 */ +	ipmmu_ctx_write(domain, IMSTR, ipmmu_ctx_read(domain, IMSTR)); + +	/* +	 * IMCTR +	 * Enable the MMU and interrupt generation. The long-descriptor +	 * translation table format doesn't use TEX remapping. Don't enable AF +	 * software management as we have no use for it. Flush the TLB as +	 * required when modifying the context registers. +	 */ +	ipmmu_ctx_write(domain, IMCTR, IMCTR_INTEN | IMCTR_FLUSH | IMCTR_MMUEN); + +	return 0; +} + +static void ipmmu_domain_destroy_context(struct ipmmu_vmsa_domain *domain) +{ +	/* +	 * Disable the context. Flush the TLB as required when modifying the +	 * context registers. +	 * +	 * TODO: Is TLB flush really needed ? +	 */ +	ipmmu_ctx_write(domain, IMCTR, IMCTR_FLUSH); +	ipmmu_tlb_sync(domain); +} + +/* ----------------------------------------------------------------------------- + * Fault Handling + */ + +static irqreturn_t ipmmu_domain_irq(struct ipmmu_vmsa_domain *domain) +{ +	const u32 err_mask = IMSTR_MHIT | IMSTR_ABORT | IMSTR_PF | IMSTR_TF; +	struct ipmmu_vmsa_device *mmu = domain->mmu; +	u32 status; +	u32 iova; + +	status = ipmmu_ctx_read(domain, IMSTR); +	if (!(status & err_mask)) +		return IRQ_NONE; + +	iova = ipmmu_ctx_read(domain, IMEAR); + +	/* +	 * Clear the error status flags. Unlike traditional interrupt flag +	 * registers that must be cleared by writing 1, this status register +	 * seems to require 0. The error address register must be read before, +	 * otherwise its value will be 0. +	 */ +	ipmmu_ctx_write(domain, IMSTR, 0); + +	/* Log fatal errors. */ +	if (status & IMSTR_MHIT) +		dev_err_ratelimited(mmu->dev, "Multiple TLB hits @0x%08x\n", +				    iova); +	if (status & IMSTR_ABORT) +		dev_err_ratelimited(mmu->dev, "Page Table Walk Abort @0x%08x\n", +				    iova); + +	if (!(status & (IMSTR_PF | IMSTR_TF))) +		return IRQ_NONE; + +	/* +	 * Try to handle page faults and translation faults. +	 * +	 * TODO: We need to look up the faulty device based on the I/O VA. Use +	 * the IOMMU device for now. +	 */ +	if (!report_iommu_fault(domain->io_domain, mmu->dev, iova, 0)) +		return IRQ_HANDLED; + +	dev_err_ratelimited(mmu->dev, +			    "Unhandled fault: status 0x%08x iova 0x%08x\n", +			    status, iova); + +	return IRQ_HANDLED; +} + +static irqreturn_t ipmmu_irq(int irq, void *dev) +{ +	struct ipmmu_vmsa_device *mmu = dev; +	struct iommu_domain *io_domain; +	struct ipmmu_vmsa_domain *domain; + +	if (!mmu->mapping) +		return IRQ_NONE; + +	io_domain = mmu->mapping->domain; +	domain = io_domain->priv; + +	return ipmmu_domain_irq(domain); +} + +/* ----------------------------------------------------------------------------- + * Page Table Management + */ + +#define pud_pgtable(pud) pfn_to_page(__phys_to_pfn(pud_val(pud) & PHYS_MASK)) + +static void ipmmu_free_ptes(pmd_t *pmd) +{ +	pgtable_t table = pmd_pgtable(*pmd); +	__free_page(table); +} + +static void ipmmu_free_pmds(pud_t *pud) +{ +	pmd_t *pmd = pmd_offset(pud, 0); +	pgtable_t table; +	unsigned int i; + +	for (i = 0; i < IPMMU_PTRS_PER_PMD; ++i) { +		if (!pmd_table(*pmd)) +			continue; + +		ipmmu_free_ptes(pmd); +		pmd++; +	} + +	table = pud_pgtable(*pud); +	__free_page(table); +} + +static void ipmmu_free_pgtables(struct ipmmu_vmsa_domain *domain) +{ +	pgd_t *pgd, *pgd_base = domain->pgd; +	unsigned int i; + +	/* +	 * Recursively free the page tables for this domain. We don't care about +	 * speculative TLB filling, because the TLB will be nuked next time this +	 * context bank is re-allocated and no devices currently map to these +	 * tables. +	 */ +	pgd = pgd_base; +	for (i = 0; i < IPMMU_PTRS_PER_PGD; ++i) { +		if (pgd_none(*pgd)) +			continue; +		ipmmu_free_pmds((pud_t *)pgd); +		pgd++; +	} + +	kfree(pgd_base); +} + +/* + * We can't use the (pgd|pud|pmd|pte)_populate or the set_(pgd|pud|pmd|pte) + * functions as they would flush the CPU TLB. + */ + +static pte_t *ipmmu_alloc_pte(struct ipmmu_vmsa_device *mmu, pmd_t *pmd, +			      unsigned long iova) +{ +	pte_t *pte; + +	if (!pmd_none(*pmd)) +		return pte_offset_kernel(pmd, iova); + +	pte = (pte_t *)get_zeroed_page(GFP_ATOMIC); +	if (!pte) +		return NULL; + +	ipmmu_flush_pgtable(mmu, pte, PAGE_SIZE); +	*pmd = __pmd(__pa(pte) | PMD_NSTABLE | PMD_TYPE_TABLE); +	ipmmu_flush_pgtable(mmu, pmd, sizeof(*pmd)); + +	return pte + pte_index(iova); +} + +static pmd_t *ipmmu_alloc_pmd(struct ipmmu_vmsa_device *mmu, pgd_t *pgd, +			      unsigned long iova) +{ +	pud_t *pud = (pud_t *)pgd; +	pmd_t *pmd; + +	if (!pud_none(*pud)) +		return pmd_offset(pud, iova); + +	pmd = (pmd_t *)get_zeroed_page(GFP_ATOMIC); +	if (!pmd) +		return NULL; + +	ipmmu_flush_pgtable(mmu, pmd, PAGE_SIZE); +	*pud = __pud(__pa(pmd) | PMD_NSTABLE | PMD_TYPE_TABLE); +	ipmmu_flush_pgtable(mmu, pud, sizeof(*pud)); + +	return pmd + pmd_index(iova); +} + +static u64 ipmmu_page_prot(unsigned int prot, u64 type) +{ +	u64 pgprot = ARM_VMSA_PTE_XN | ARM_VMSA_PTE_nG | ARM_VMSA_PTE_AF +		   | ARM_VMSA_PTE_SH_IS | ARM_VMSA_PTE_AP_UNPRIV +		   | ARM_VMSA_PTE_NS | type; + +	if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ)) +		pgprot |= ARM_VMSA_PTE_AP_RDONLY; + +	if (prot & IOMMU_CACHE) +		pgprot |= IMMAIR_ATTR_IDX_WBRWA << ARM_VMSA_PTE_ATTRINDX_SHIFT; + +	if (prot & IOMMU_EXEC) +		pgprot &= ~ARM_VMSA_PTE_XN; +	else if (!(prot & (IOMMU_READ | IOMMU_WRITE))) +		/* If no access create a faulting entry to avoid TLB fills. */ +		pgprot &= ~ARM_VMSA_PTE_PAGE; + +	return pgprot; +} + +static int ipmmu_alloc_init_pte(struct ipmmu_vmsa_device *mmu, pmd_t *pmd, +				unsigned long iova, unsigned long pfn, +				size_t size, int prot) +{ +	pteval_t pteval = ipmmu_page_prot(prot, ARM_VMSA_PTE_PAGE); +	unsigned int num_ptes = 1; +	pte_t *pte, *start; +	unsigned int i; + +	pte = ipmmu_alloc_pte(mmu, pmd, iova); +	if (!pte) +		return -ENOMEM; + +	start = pte; + +	/* +	 * Install the page table entries. We can be called both for a single +	 * page or for a block of 16 physically contiguous pages. In the latter +	 * case set the PTE contiguous hint. +	 */ +	if (size == SZ_64K) { +		pteval |= ARM_VMSA_PTE_CONT; +		num_ptes = ARM_VMSA_PTE_CONT_ENTRIES; +	} + +	for (i = num_ptes; i; --i) +		*pte++ = pfn_pte(pfn++, __pgprot(pteval)); + +	ipmmu_flush_pgtable(mmu, start, sizeof(*pte) * num_ptes); + +	return 0; +} + +static int ipmmu_alloc_init_pmd(struct ipmmu_vmsa_device *mmu, pmd_t *pmd, +				unsigned long iova, unsigned long pfn, +				int prot) +{ +	pmdval_t pmdval = ipmmu_page_prot(prot, PMD_TYPE_SECT); + +	*pmd = pfn_pmd(pfn, __pgprot(pmdval)); +	ipmmu_flush_pgtable(mmu, pmd, sizeof(*pmd)); + +	return 0; +} + +static int ipmmu_create_mapping(struct ipmmu_vmsa_domain *domain, +				unsigned long iova, phys_addr_t paddr, +				size_t size, int prot) +{ +	struct ipmmu_vmsa_device *mmu = domain->mmu; +	pgd_t *pgd = domain->pgd; +	unsigned long flags; +	unsigned long pfn; +	pmd_t *pmd; +	int ret; + +	if (!pgd) +		return -EINVAL; + +	if (size & ~PAGE_MASK) +		return -EINVAL; + +	if (paddr & ~((1ULL << 40) - 1)) +		return -ERANGE; + +	pfn = __phys_to_pfn(paddr); +	pgd += pgd_index(iova); + +	/* Update the page tables. */ +	spin_lock_irqsave(&domain->lock, flags); + +	pmd = ipmmu_alloc_pmd(mmu, pgd, iova); +	if (!pmd) { +		ret = -ENOMEM; +		goto done; +	} + +	switch (size) { +	case SZ_2M: +		ret = ipmmu_alloc_init_pmd(mmu, pmd, iova, pfn, prot); +		break; +	case SZ_64K: +	case SZ_4K: +		ret = ipmmu_alloc_init_pte(mmu, pmd, iova, pfn, size, prot); +		break; +	default: +		ret = -EINVAL; +		break; +	} + +done: +	spin_unlock_irqrestore(&domain->lock, flags); + +	if (!ret) +		ipmmu_tlb_invalidate(domain); + +	return ret; +} + +static void ipmmu_clear_pud(struct ipmmu_vmsa_device *mmu, pud_t *pud) +{ +	/* Free the page table. */ +	pgtable_t table = pud_pgtable(*pud); +	__free_page(table); + +	/* Clear the PUD. */ +	*pud = __pud(0); +	ipmmu_flush_pgtable(mmu, pud, sizeof(*pud)); +} + +static void ipmmu_clear_pmd(struct ipmmu_vmsa_device *mmu, pud_t *pud, +			    pmd_t *pmd) +{ +	unsigned int i; + +	/* Free the page table. */ +	if (pmd_table(*pmd)) { +		pgtable_t table = pmd_pgtable(*pmd); +		__free_page(table); +	} + +	/* Clear the PMD. */ +	*pmd = __pmd(0); +	ipmmu_flush_pgtable(mmu, pmd, sizeof(*pmd)); + +	/* Check whether the PUD is still needed. */ +	pmd = pmd_offset(pud, 0); +	for (i = 0; i < IPMMU_PTRS_PER_PMD; ++i) { +		if (!pmd_none(pmd[i])) +			return; +	} + +	/* Clear the parent PUD. */ +	ipmmu_clear_pud(mmu, pud); +} + +static void ipmmu_clear_pte(struct ipmmu_vmsa_device *mmu, pud_t *pud, +			    pmd_t *pmd, pte_t *pte, unsigned int num_ptes) +{ +	unsigned int i; + +	/* Clear the PTE. */ +	for (i = num_ptes; i; --i) +		pte[i-1] = __pte(0); + +	ipmmu_flush_pgtable(mmu, pte, sizeof(*pte) * num_ptes); + +	/* Check whether the PMD is still needed. */ +	pte = pte_offset_kernel(pmd, 0); +	for (i = 0; i < IPMMU_PTRS_PER_PTE; ++i) { +		if (!pte_none(pte[i])) +			return; +	} + +	/* Clear the parent PMD. */ +	ipmmu_clear_pmd(mmu, pud, pmd); +} + +static int ipmmu_split_pmd(struct ipmmu_vmsa_device *mmu, pmd_t *pmd) +{ +	pte_t *pte, *start; +	pteval_t pteval; +	unsigned long pfn; +	unsigned int i; + +	pte = (pte_t *)get_zeroed_page(GFP_ATOMIC); +	if (!pte) +		return -ENOMEM; + +	/* Copy the PMD attributes. */ +	pteval = (pmd_val(*pmd) & ARM_VMSA_PTE_ATTRS_MASK) +	       | ARM_VMSA_PTE_CONT | ARM_VMSA_PTE_PAGE; + +	pfn = pmd_pfn(*pmd); +	start = pte; + +	for (i = IPMMU_PTRS_PER_PTE; i; --i) +		*pte++ = pfn_pte(pfn++, __pgprot(pteval)); + +	ipmmu_flush_pgtable(mmu, start, PAGE_SIZE); +	*pmd = __pmd(__pa(start) | PMD_NSTABLE | PMD_TYPE_TABLE); +	ipmmu_flush_pgtable(mmu, pmd, sizeof(*pmd)); + +	return 0; +} + +static void ipmmu_split_pte(struct ipmmu_vmsa_device *mmu, pte_t *pte) +{ +	unsigned int i; + +	for (i = ARM_VMSA_PTE_CONT_ENTRIES; i; --i) +		pte[i-1] = __pte(pte_val(*pte) & ~ARM_VMSA_PTE_CONT); + +	ipmmu_flush_pgtable(mmu, pte, sizeof(*pte) * ARM_VMSA_PTE_CONT_ENTRIES); +} + +static int ipmmu_clear_mapping(struct ipmmu_vmsa_domain *domain, +			       unsigned long iova, size_t size) +{ +	struct ipmmu_vmsa_device *mmu = domain->mmu; +	unsigned long flags; +	pgd_t *pgd = domain->pgd; +	pud_t *pud; +	pmd_t *pmd; +	pte_t *pte; +	int ret = 0; + +	if (!pgd) +		return -EINVAL; + +	if (size & ~PAGE_MASK) +		return -EINVAL; + +	pgd += pgd_index(iova); +	pud = (pud_t *)pgd; + +	spin_lock_irqsave(&domain->lock, flags); + +	/* If there's no PUD or PMD we're done. */ +	if (pud_none(*pud)) +		goto done; + +	pmd = pmd_offset(pud, iova); +	if (pmd_none(*pmd)) +		goto done; + +	/* +	 * When freeing a 2MB block just clear the PMD. In the unlikely case the +	 * block is mapped as individual pages this will free the corresponding +	 * PTE page table. +	 */ +	if (size == SZ_2M) { +		ipmmu_clear_pmd(mmu, pud, pmd); +		goto done; +	} + +	/* +	 * If the PMD has been mapped as a section remap it as pages to allow +	 * freeing individual pages. +	 */ +	if (pmd_sect(*pmd)) +		ipmmu_split_pmd(mmu, pmd); + +	pte = pte_offset_kernel(pmd, iova); + +	/* +	 * When freeing a 64kB block just clear the PTE entries. We don't have +	 * to care about the contiguous hint of the surrounding entries. +	 */ +	if (size == SZ_64K) { +		ipmmu_clear_pte(mmu, pud, pmd, pte, ARM_VMSA_PTE_CONT_ENTRIES); +		goto done; +	} + +	/* +	 * If the PTE has been mapped with the contiguous hint set remap it and +	 * its surrounding PTEs to allow unmapping a single page. +	 */ +	if (pte_val(*pte) & ARM_VMSA_PTE_CONT) +		ipmmu_split_pte(mmu, pte); + +	/* Clear the PTE. */ +	ipmmu_clear_pte(mmu, pud, pmd, pte, 1); + +done: +	spin_unlock_irqrestore(&domain->lock, flags); + +	if (ret) +		ipmmu_tlb_invalidate(domain); + +	return 0; +} + +/* ----------------------------------------------------------------------------- + * IOMMU Operations + */ + +static int ipmmu_domain_init(struct iommu_domain *io_domain) +{ +	struct ipmmu_vmsa_domain *domain; + +	domain = kzalloc(sizeof(*domain), GFP_KERNEL); +	if (!domain) +		return -ENOMEM; + +	spin_lock_init(&domain->lock); + +	domain->pgd = kzalloc(IPMMU_PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL); +	if (!domain->pgd) { +		kfree(domain); +		return -ENOMEM; +	} + +	io_domain->priv = domain; +	domain->io_domain = io_domain; + +	return 0; +} + +static void ipmmu_domain_destroy(struct iommu_domain *io_domain) +{ +	struct ipmmu_vmsa_domain *domain = io_domain->priv; + +	/* +	 * Free the domain resources. We assume that all devices have already +	 * been detached. +	 */ +	ipmmu_domain_destroy_context(domain); +	ipmmu_free_pgtables(domain); +	kfree(domain); +} + +static int ipmmu_attach_device(struct iommu_domain *io_domain, +			       struct device *dev) +{ +	struct ipmmu_vmsa_archdata *archdata = dev->archdata.iommu; +	struct ipmmu_vmsa_device *mmu = archdata->mmu; +	struct ipmmu_vmsa_domain *domain = io_domain->priv; +	unsigned long flags; +	int ret = 0; + +	if (!mmu) { +		dev_err(dev, "Cannot attach to IPMMU\n"); +		return -ENXIO; +	} + +	spin_lock_irqsave(&domain->lock, flags); + +	if (!domain->mmu) { +		/* The domain hasn't been used yet, initialize it. */ +		domain->mmu = mmu; +		ret = ipmmu_domain_init_context(domain); +	} else if (domain->mmu != mmu) { +		/* +		 * Something is wrong, we can't attach two devices using +		 * different IOMMUs to the same domain. +		 */ +		dev_err(dev, "Can't attach IPMMU %s to domain on IPMMU %s\n", +			dev_name(mmu->dev), dev_name(domain->mmu->dev)); +		ret = -EINVAL; +	} + +	spin_unlock_irqrestore(&domain->lock, flags); + +	if (ret < 0) +		return ret; + +	ipmmu_utlb_enable(domain, archdata->utlb); + +	return 0; +} + +static void ipmmu_detach_device(struct iommu_domain *io_domain, +				struct device *dev) +{ +	struct ipmmu_vmsa_archdata *archdata = dev->archdata.iommu; +	struct ipmmu_vmsa_domain *domain = io_domain->priv; + +	ipmmu_utlb_disable(domain, archdata->utlb); + +	/* +	 * TODO: Optimize by disabling the context when no device is attached. +	 */ +} + +static int ipmmu_map(struct iommu_domain *io_domain, unsigned long iova, +		     phys_addr_t paddr, size_t size, int prot) +{ +	struct ipmmu_vmsa_domain *domain = io_domain->priv; + +	if (!domain) +		return -ENODEV; + +	return ipmmu_create_mapping(domain, iova, paddr, size, prot); +} + +static size_t ipmmu_unmap(struct iommu_domain *io_domain, unsigned long iova, +			  size_t size) +{ +	struct ipmmu_vmsa_domain *domain = io_domain->priv; +	int ret; + +	ret = ipmmu_clear_mapping(domain, iova, size); +	return ret ? 0 : size; +} + +static phys_addr_t ipmmu_iova_to_phys(struct iommu_domain *io_domain, +				      dma_addr_t iova) +{ +	struct ipmmu_vmsa_domain *domain = io_domain->priv; +	pgd_t pgd; +	pud_t pud; +	pmd_t pmd; +	pte_t pte; + +	/* TODO: Is locking needed ? */ + +	if (!domain->pgd) +		return 0; + +	pgd = *(domain->pgd + pgd_index(iova)); +	if (pgd_none(pgd)) +		return 0; + +	pud = *pud_offset(&pgd, iova); +	if (pud_none(pud)) +		return 0; + +	pmd = *pmd_offset(&pud, iova); +	if (pmd_none(pmd)) +		return 0; + +	if (pmd_sect(pmd)) +		return __pfn_to_phys(pmd_pfn(pmd)) | (iova & ~PMD_MASK); + +	pte = *(pmd_page_vaddr(pmd) + pte_index(iova)); +	if (pte_none(pte)) +		return 0; + +	return __pfn_to_phys(pte_pfn(pte)) | (iova & ~PAGE_MASK); +} + +static int ipmmu_find_utlb(struct ipmmu_vmsa_device *mmu, struct device *dev) +{ +	const struct ipmmu_vmsa_master *master = mmu->pdata->masters; +	const char *devname = dev_name(dev); +	unsigned int i; + +	for (i = 0; i < mmu->pdata->num_masters; ++i, ++master) { +		if (strcmp(master->name, devname) == 0) +			return master->utlb; +	} + +	return -1; +} + +static int ipmmu_add_device(struct device *dev) +{ +	struct ipmmu_vmsa_archdata *archdata; +	struct ipmmu_vmsa_device *mmu; +	struct iommu_group *group; +	int utlb = -1; +	int ret; + +	if (dev->archdata.iommu) { +		dev_warn(dev, "IOMMU driver already assigned to device %s\n", +			 dev_name(dev)); +		return -EINVAL; +	} + +	/* Find the master corresponding to the device. */ +	spin_lock(&ipmmu_devices_lock); + +	list_for_each_entry(mmu, &ipmmu_devices, list) { +		utlb = ipmmu_find_utlb(mmu, dev); +		if (utlb >= 0) { +			/* +			 * TODO Take a reference to the MMU to protect +			 * against device removal. +			 */ +			break; +		} +	} + +	spin_unlock(&ipmmu_devices_lock); + +	if (utlb < 0) +		return -ENODEV; + +	if (utlb >= mmu->num_utlbs) +		return -EINVAL; + +	/* Create a device group and add the device to it. */ +	group = iommu_group_alloc(); +	if (IS_ERR(group)) { +		dev_err(dev, "Failed to allocate IOMMU group\n"); +		return PTR_ERR(group); +	} + +	ret = iommu_group_add_device(group, dev); +	iommu_group_put(group); + +	if (ret < 0) { +		dev_err(dev, "Failed to add device to IPMMU group\n"); +		return ret; +	} + +	archdata = kzalloc(sizeof(*archdata), GFP_KERNEL); +	if (!archdata) { +		ret = -ENOMEM; +		goto error; +	} + +	archdata->mmu = mmu; +	archdata->utlb = utlb; +	dev->archdata.iommu = archdata; + +	/* +	 * Create the ARM mapping, used by the ARM DMA mapping core to allocate +	 * VAs. This will allocate a corresponding IOMMU domain. +	 * +	 * TODO: +	 * - Create one mapping per context (TLB). +	 * - Make the mapping size configurable ? We currently use a 2GB mapping +	 *   at a 1GB offset to ensure that NULL VAs will fault. +	 */ +	if (!mmu->mapping) { +		struct dma_iommu_mapping *mapping; + +		mapping = arm_iommu_create_mapping(&platform_bus_type, +						   SZ_1G, SZ_2G); +		if (IS_ERR(mapping)) { +			dev_err(mmu->dev, "failed to create ARM IOMMU mapping\n"); +			return PTR_ERR(mapping); +		} + +		mmu->mapping = mapping; +	} + +	/* Attach the ARM VA mapping to the device. */ +	ret = arm_iommu_attach_device(dev, mmu->mapping); +	if (ret < 0) { +		dev_err(dev, "Failed to attach device to VA mapping\n"); +		goto error; +	} + +	return 0; + +error: +	kfree(dev->archdata.iommu); +	dev->archdata.iommu = NULL; +	iommu_group_remove_device(dev); +	return ret; +} + +static void ipmmu_remove_device(struct device *dev) +{ +	arm_iommu_detach_device(dev); +	iommu_group_remove_device(dev); +	kfree(dev->archdata.iommu); +	dev->archdata.iommu = NULL; +} + +static struct iommu_ops ipmmu_ops = { +	.domain_init = ipmmu_domain_init, +	.domain_destroy = ipmmu_domain_destroy, +	.attach_dev = ipmmu_attach_device, +	.detach_dev = ipmmu_detach_device, +	.map = ipmmu_map, +	.unmap = ipmmu_unmap, +	.iova_to_phys = ipmmu_iova_to_phys, +	.add_device = ipmmu_add_device, +	.remove_device = ipmmu_remove_device, +	.pgsize_bitmap = SZ_2M | SZ_64K | SZ_4K, +}; + +/* ----------------------------------------------------------------------------- + * Probe/remove and init + */ + +static void ipmmu_device_reset(struct ipmmu_vmsa_device *mmu) +{ +	unsigned int i; + +	/* Disable all contexts. */ +	for (i = 0; i < 4; ++i) +		ipmmu_write(mmu, i * IM_CTX_SIZE + IMCTR, 0); +} + +static int ipmmu_probe(struct platform_device *pdev) +{ +	struct ipmmu_vmsa_device *mmu; +	struct resource *res; +	int irq; +	int ret; + +	if (!pdev->dev.platform_data) { +		dev_err(&pdev->dev, "missing platform data\n"); +		return -EINVAL; +	} + +	mmu = devm_kzalloc(&pdev->dev, sizeof(*mmu), GFP_KERNEL); +	if (!mmu) { +		dev_err(&pdev->dev, "cannot allocate device data\n"); +		return -ENOMEM; +	} + +	mmu->dev = &pdev->dev; +	mmu->pdata = pdev->dev.platform_data; +	mmu->num_utlbs = 32; + +	/* Map I/O memory and request IRQ. */ +	res = platform_get_resource(pdev, IORESOURCE_MEM, 0); +	mmu->base = devm_ioremap_resource(&pdev->dev, res); +	if (IS_ERR(mmu->base)) +		return PTR_ERR(mmu->base); + +	irq = platform_get_irq(pdev, 0); +	if (irq < 0) { +		dev_err(&pdev->dev, "no IRQ found\n"); +		return irq; +	} + +	ret = devm_request_irq(&pdev->dev, irq, ipmmu_irq, 0, +			       dev_name(&pdev->dev), mmu); +	if (ret < 0) { +		dev_err(&pdev->dev, "failed to request IRQ %d\n", irq); +		return irq; +	} + +	ipmmu_device_reset(mmu); + +	/* +	 * We can't create the ARM mapping here as it requires the bus to have +	 * an IOMMU, which only happens when bus_set_iommu() is called in +	 * ipmmu_init() after the probe function returns. +	 */ + +	spin_lock(&ipmmu_devices_lock); +	list_add(&mmu->list, &ipmmu_devices); +	spin_unlock(&ipmmu_devices_lock); + +	platform_set_drvdata(pdev, mmu); + +	return 0; +} + +static int ipmmu_remove(struct platform_device *pdev) +{ +	struct ipmmu_vmsa_device *mmu = platform_get_drvdata(pdev); + +	spin_lock(&ipmmu_devices_lock); +	list_del(&mmu->list); +	spin_unlock(&ipmmu_devices_lock); + +	arm_iommu_release_mapping(mmu->mapping); + +	ipmmu_device_reset(mmu); + +	return 0; +} + +static struct platform_driver ipmmu_driver = { +	.driver = { +		.owner = THIS_MODULE, +		.name = "ipmmu-vmsa", +	}, +	.probe = ipmmu_probe, +	.remove	= ipmmu_remove, +}; + +static int __init ipmmu_init(void) +{ +	int ret; + +	ret = platform_driver_register(&ipmmu_driver); +	if (ret < 0) +		return ret; + +	if (!iommu_present(&platform_bus_type)) +		bus_set_iommu(&platform_bus_type, &ipmmu_ops); + +	return 0; +} + +static void __exit ipmmu_exit(void) +{ +	return platform_driver_unregister(&ipmmu_driver); +} + +subsys_initcall(ipmmu_init); +module_exit(ipmmu_exit); + +MODULE_DESCRIPTION("IOMMU API for Renesas VMSA-compatible IPMMU"); +MODULE_AUTHOR("Laurent Pinchart <laurent.pinchart@ideasonboard.com>"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c index 39f81aeefcd..33c43952408 100644 --- a/drivers/iommu/irq_remapping.c +++ b/drivers/iommu/irq_remapping.c @@ -51,7 +51,7 @@ static void irq_remapping_disable_io_apic(void)  static int do_setup_msi_irqs(struct pci_dev *dev, int nvec)  { -	int node, ret, sub_handle, nvec_pow2, index = 0; +	int ret, sub_handle, nvec_pow2, index = 0;  	unsigned int irq;  	struct msi_desc *msidesc; @@ -61,8 +61,7 @@ static int do_setup_msi_irqs(struct pci_dev *dev, int nvec)  	WARN_ON(msidesc->msi_attrib.multiple);  	WARN_ON(msidesc->nvec_used); -	node = dev_to_node(&dev->dev); -	irq = __create_irqs(get_nr_irqs_gsi(), nvec, node); +	irq = irq_alloc_hwirqs(nvec, dev_to_node(&dev->dev));  	if (irq == 0)  		return -ENOSPC; @@ -89,7 +88,7 @@ static int do_setup_msi_irqs(struct pci_dev *dev, int nvec)  	return 0;  error: -	destroy_irqs(irq, nvec); +	irq_free_hwirqs(irq, nvec);  	/*  	 * Restore altered MSI descriptor fields and prevent just destroyed @@ -109,12 +108,11 @@ static int do_setup_msix_irqs(struct pci_dev *dev, int nvec)  	unsigned int irq;  	node		= dev_to_node(&dev->dev); -	irq		= get_nr_irqs_gsi();  	sub_handle	= 0;  	list_for_each_entry(msidesc, &dev->msi_list, list) { -		irq = create_irq_nr(irq, node); +		irq = irq_alloc_hwirq(node);  		if (irq == 0)  			return -1; @@ -137,7 +135,7 @@ static int do_setup_msix_irqs(struct pci_dev *dev, int nvec)  	return 0;  error: -	destroy_irq(irq); +	irq_free_hwirq(irq);  	return ret;  } @@ -150,7 +148,7 @@ static int irq_remapping_setup_msi_irqs(struct pci_dev *dev,  		return do_setup_msix_irqs(dev, nvec);  } -void eoi_ioapic_pin_remapped(int apic, int pin, int vector) +static void eoi_ioapic_pin_remapped(int apic, int pin, int vector)  {  	/*  	 * Intr-remapping uses pin number as the virtual vector @@ -295,8 +293,8 @@ int setup_ioapic_remapped_entry(int irq,  					     vector, attr);  } -int set_remapped_irq_affinity(struct irq_data *data, const struct cpumask *mask, -			      bool force) +static int set_remapped_irq_affinity(struct irq_data *data, +				     const struct cpumask *mask, bool force)  {  	if (!config_enabled(CONFIG_SMP) || !remap_ops ||  	    !remap_ops->set_affinity) diff --git a/drivers/iommu/msm_iommu_dev.c b/drivers/iommu/msm_iommu_dev.c index 08ba4972da9..61def7cb526 100644 --- a/drivers/iommu/msm_iommu_dev.c +++ b/drivers/iommu/msm_iommu_dev.c @@ -127,13 +127,12 @@ static void msm_iommu_reset(void __iomem *base, int ncb)  static int msm_iommu_probe(struct platform_device *pdev)  { -	struct resource *r, *r2; +	struct resource *r;  	struct clk *iommu_clk;  	struct clk *iommu_pclk;  	struct msm_iommu_drvdata *drvdata;  	struct msm_iommu_dev *iommu_dev = pdev->dev.platform_data;  	void __iomem *regs_base; -	resource_size_t	len;  	int ret, irq, par;  	if (pdev->id == -1) { @@ -178,35 +177,16 @@ static int msm_iommu_probe(struct platform_device *pdev)  		iommu_clk = NULL;  	r = platform_get_resource_byname(pdev, IORESOURCE_MEM, "physbase"); - -	if (!r) { -		ret = -ENODEV; -		goto fail_clk; -	} - -	len = resource_size(r); - -	r2 = request_mem_region(r->start, len, r->name); -	if (!r2) { -		pr_err("Could not request memory region: start=%p, len=%d\n", -							(void *) r->start, len); -		ret = -EBUSY; +	regs_base = devm_ioremap_resource(&pdev->dev, r); +	if (IS_ERR(regs_base)) { +		ret = PTR_ERR(regs_base);  		goto fail_clk;  	} -	regs_base = ioremap(r2->start, len); - -	if (!regs_base) { -		pr_err("Could not ioremap: start=%p, len=%d\n", -			 (void *) r2->start, len); -		ret = -EBUSY; -		goto fail_mem; -	} -  	irq = platform_get_irq_byname(pdev, "secure_irq");  	if (irq < 0) {  		ret = -ENODEV; -		goto fail_io; +		goto fail_clk;  	}  	msm_iommu_reset(regs_base, iommu_dev->ncb); @@ -222,14 +202,14 @@ static int msm_iommu_probe(struct platform_device *pdev)  	if (!par) {  		pr_err("%s: Invalid PAR value detected\n", iommu_dev->name);  		ret = -ENODEV; -		goto fail_io; +		goto fail_clk;  	}  	ret = request_irq(irq, msm_iommu_fault_handler, 0,  			"msm_iommu_secure_irpt_handler", drvdata);  	if (ret) {  		pr_err("Request IRQ %d failed with ret=%d\n", irq, ret); -		goto fail_io; +		goto fail_clk;  	} @@ -250,10 +230,6 @@ static int msm_iommu_probe(struct platform_device *pdev)  	clk_disable(iommu_pclk);  	return 0; -fail_io: -	iounmap(regs_base); -fail_mem: -	release_mem_region(r->start, len);  fail_clk:  	if (iommu_clk) {  		clk_disable(iommu_clk); diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c index ee249bc959f..e550ccb7634 100644 --- a/drivers/iommu/of_iommu.c +++ b/drivers/iommu/of_iommu.c @@ -20,6 +20,7 @@  #include <linux/export.h>  #include <linux/limits.h>  #include <linux/of.h> +#include <linux/of_iommu.h>  /**   * of_get_dma_window - Parse *dma-window property and returns 0 if found. diff --git a/drivers/iommu/omap-iommu-debug.c b/drivers/iommu/omap-iommu-debug.c index d97fbe4fb9b..80fffba7f12 100644 --- a/drivers/iommu/omap-iommu-debug.c +++ b/drivers/iommu/omap-iommu-debug.c @@ -354,8 +354,8 @@ DEBUG_FOPS(mem);  			return -ENOMEM;					\  	} -#define DEBUG_ADD_FILE(name) __DEBUG_ADD_FILE(name, 600) -#define DEBUG_ADD_FILE_RO(name) __DEBUG_ADD_FILE(name, 400) +#define DEBUG_ADD_FILE(name) __DEBUG_ADD_FILE(name, 0600) +#define DEBUG_ADD_FILE_RO(name) __DEBUG_ADD_FILE(name, 0400)  static int iommu_debug_register(struct device *dev, void *data)  { diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index bcd78a72063..895af06a667 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -23,6 +23,9 @@  #include <linux/spinlock.h>  #include <linux/io.h>  #include <linux/pm_runtime.h> +#include <linux/of.h> +#include <linux/of_iommu.h> +#include <linux/of_irq.h>  #include <asm/cacheflush.h> @@ -31,6 +34,9 @@  #include "omap-iopgtable.h"  #include "omap-iommu.h" +#define to_iommu(dev)							\ +	((struct omap_iommu *)platform_get_drvdata(to_platform_device(dev))) +  #define for_each_iotlb_cr(obj, n, __i, cr)				\  	for (__i = 0;							\  	     (__i < (n)) && (cr = __iotlb_read_cr((obj), __i), true);	\ @@ -146,13 +152,10 @@ static int iommu_enable(struct omap_iommu *obj)  	struct platform_device *pdev = to_platform_device(obj->dev);  	struct iommu_platform_data *pdata = pdev->dev.platform_data; -	if (!pdata) -		return -EINVAL; -  	if (!arch_iommu)  		return -ENODEV; -	if (pdata->deassert_reset) { +	if (pdata && pdata->deassert_reset) {  		err = pdata->deassert_reset(pdev, pdata->reset_name);  		if (err) {  			dev_err(obj->dev, "deassert_reset failed: %d\n", err); @@ -172,14 +175,11 @@ static void iommu_disable(struct omap_iommu *obj)  	struct platform_device *pdev = to_platform_device(obj->dev);  	struct iommu_platform_data *pdata = pdev->dev.platform_data; -	if (!pdata) -		return; -  	arch_iommu->disable(obj);  	pm_runtime_put_sync(obj->dev); -	if (pdata->assert_reset) +	if (pdata && pdata->assert_reset)  		pdata->assert_reset(pdev, pdata->reset_name);  } @@ -394,6 +394,7 @@ static void flush_iotlb_page(struct omap_iommu *obj, u32 da)  				__func__, start, da, bytes);  			iotlb_load_cr(obj, &cr);  			iommu_write_reg(obj, 1, MMU_FLUSH_ENTRY); +			break;  		}  	}  	pm_runtime_put_sync(obj->dev); @@ -523,7 +524,8 @@ static void flush_iopte_range(u32 *first, u32 *last)  static void iopte_free(u32 *iopte)  {  	/* Note: freed iopte's must be clean ready for re-use */ -	kmem_cache_free(iopte_cachep, iopte); +	if (iopte) +		kmem_cache_free(iopte_cachep, iopte);  }  static u32 *iopte_alloc(struct omap_iommu *obj, u32 *iopgd, u32 da) @@ -863,7 +865,7 @@ static int device_match_by_alias(struct device *dev, void *data)   **/  static struct omap_iommu *omap_iommu_attach(const char *name, u32 *iopgd)  { -	int err = -ENOMEM; +	int err;  	struct device *dev;  	struct omap_iommu *obj; @@ -871,7 +873,7 @@ static struct omap_iommu *omap_iommu_attach(const char *name, u32 *iopgd)  				(void *)name,  				device_match_by_alias);  	if (!dev) -		return NULL; +		return ERR_PTR(-ENODEV);  	obj = to_iommu(dev); @@ -890,8 +892,10 @@ static struct omap_iommu *omap_iommu_attach(const char *name, u32 *iopgd)  		goto err_enable;  	flush_iotlb_all(obj); -	if (!try_module_get(obj->owner)) +	if (!try_module_get(obj->owner)) { +		err = -ENODEV;  		goto err_module; +	}  	spin_unlock(&obj->iommu_lock); @@ -940,17 +944,41 @@ static int omap_iommu_probe(struct platform_device *pdev)  	struct omap_iommu *obj;  	struct resource *res;  	struct iommu_platform_data *pdata = pdev->dev.platform_data; +	struct device_node *of = pdev->dev.of_node; -	obj = kzalloc(sizeof(*obj) + MMU_REG_SIZE, GFP_KERNEL); +	obj = devm_kzalloc(&pdev->dev, sizeof(*obj) + MMU_REG_SIZE, GFP_KERNEL);  	if (!obj)  		return -ENOMEM; -	obj->nr_tlb_entries = pdata->nr_tlb_entries; -	obj->name = pdata->name; +	if (of) { +		obj->name = dev_name(&pdev->dev); +		obj->nr_tlb_entries = 32; +		err = of_property_read_u32(of, "ti,#tlb-entries", +					   &obj->nr_tlb_entries); +		if (err && err != -EINVAL) +			return err; +		if (obj->nr_tlb_entries != 32 && obj->nr_tlb_entries != 8) +			return -EINVAL; +		/* +		 * da_start and da_end are needed for omap-iovmm, so hardcode +		 * these values as used by OMAP3 ISP - the only user for +		 * omap-iovmm +		 */ +		obj->da_start = 0; +		obj->da_end = 0xfffff000; +		if (of_find_property(of, "ti,iommu-bus-err-back", NULL)) +			obj->has_bus_err_back = MMU_GP_REG_BUS_ERR_BACK_EN; +	} else { +		obj->nr_tlb_entries = pdata->nr_tlb_entries; +		obj->name = pdata->name; +		obj->da_start = pdata->da_start; +		obj->da_end = pdata->da_end; +	} +	if (obj->da_end <= obj->da_start) +		return -EINVAL; +  	obj->dev = &pdev->dev;  	obj->ctx = (void *)obj + sizeof(*obj); -	obj->da_start = pdata->da_start; -	obj->da_end = pdata->da_end;  	spin_lock_init(&obj->iommu_lock);  	mutex_init(&obj->mmap_lock); @@ -958,33 +986,18 @@ static int omap_iommu_probe(struct platform_device *pdev)  	INIT_LIST_HEAD(&obj->mmap);  	res = platform_get_resource(pdev, IORESOURCE_MEM, 0); -	if (!res) { -		err = -ENODEV; -		goto err_mem; -	} - -	res = request_mem_region(res->start, resource_size(res), -				 dev_name(&pdev->dev)); -	if (!res) { -		err = -EIO; -		goto err_mem; -	} - -	obj->regbase = ioremap(res->start, resource_size(res)); -	if (!obj->regbase) { -		err = -ENOMEM; -		goto err_ioremap; -	} +	obj->regbase = devm_ioremap_resource(obj->dev, res); +	if (IS_ERR(obj->regbase)) +		return PTR_ERR(obj->regbase);  	irq = platform_get_irq(pdev, 0); -	if (irq < 0) { -		err = -ENODEV; -		goto err_irq; -	} -	err = request_irq(irq, iommu_fault_handler, IRQF_SHARED, -			  dev_name(&pdev->dev), obj); +	if (irq < 0) +		return -ENODEV; + +	err = devm_request_irq(obj->dev, irq, iommu_fault_handler, IRQF_SHARED, +			       dev_name(obj->dev), obj);  	if (err < 0) -		goto err_irq; +		return err;  	platform_set_drvdata(pdev, obj);  	pm_runtime_irq_safe(obj->dev); @@ -992,42 +1005,34 @@ static int omap_iommu_probe(struct platform_device *pdev)  	dev_info(&pdev->dev, "%s registered\n", obj->name);  	return 0; - -err_irq: -	iounmap(obj->regbase); -err_ioremap: -	release_mem_region(res->start, resource_size(res)); -err_mem: -	kfree(obj); -	return err;  }  static int omap_iommu_remove(struct platform_device *pdev)  { -	int irq; -	struct resource *res;  	struct omap_iommu *obj = platform_get_drvdata(pdev);  	iopgtable_clear_entry_all(obj); -	irq = platform_get_irq(pdev, 0); -	free_irq(irq, obj); -	res = platform_get_resource(pdev, IORESOURCE_MEM, 0); -	release_mem_region(res->start, resource_size(res)); -	iounmap(obj->regbase); -  	pm_runtime_disable(obj->dev);  	dev_info(&pdev->dev, "%s removed\n", obj->name); -	kfree(obj);  	return 0;  } +static struct of_device_id omap_iommu_of_match[] = { +	{ .compatible = "ti,omap2-iommu" }, +	{ .compatible = "ti,omap4-iommu" }, +	{ .compatible = "ti,dra7-iommu"	}, +	{}, +}; +MODULE_DEVICE_TABLE(of, omap_iommu_of_match); +  static struct platform_driver omap_iommu_driver = {  	.probe	= omap_iommu_probe,  	.remove	= omap_iommu_remove,  	.driver	= {  		.name	= "omap-iommu", +		.of_match_table = of_match_ptr(omap_iommu_of_match),  	},  }; @@ -1036,19 +1041,18 @@ static void iopte_cachep_ctor(void *iopte)  	clean_dcache_area(iopte, IOPTE_TABLE_SIZE);  } -static u32 iotlb_init_entry(struct iotlb_entry *e, u32 da, u32 pa, -				   u32 flags) +static u32 iotlb_init_entry(struct iotlb_entry *e, u32 da, u32 pa, int pgsz)  {  	memset(e, 0, sizeof(*e));  	e->da		= da;  	e->pa		= pa; -	e->valid	= 1; +	e->valid	= MMU_CAM_V;  	/* FIXME: add OMAP1 support */ -	e->pgsz		= flags & MMU_CAM_PGSZ_MASK; -	e->endian	= flags & MMU_RAM_ENDIAN_MASK; -	e->elsz		= flags & MMU_RAM_ELSZ_MASK; -	e->mixed	= flags & MMU_RAM_MIXED_MASK; +	e->pgsz		= pgsz; +	e->endian	= MMU_RAM_ENDIAN_LITTLE; +	e->elsz		= MMU_RAM_ELSZ_8; +	e->mixed	= 0;  	return iopgsz_to_bytes(e->pgsz);  } @@ -1061,9 +1065,8 @@ static int omap_iommu_map(struct iommu_domain *domain, unsigned long da,  	struct device *dev = oiommu->dev;  	struct iotlb_entry e;  	int omap_pgsz; -	u32 ret, flags; +	u32 ret; -	/* we only support mapping a single iommu page for now */  	omap_pgsz = bytes_to_iopgsz(bytes);  	if (omap_pgsz < 0) {  		dev_err(dev, "invalid size to map: %d\n", bytes); @@ -1072,9 +1075,7 @@ static int omap_iommu_map(struct iommu_domain *domain, unsigned long da,  	dev_dbg(dev, "mapping da 0x%lx to pa 0x%x size 0x%x\n", da, pa, bytes); -	flags = omap_pgsz | prot; - -	iotlb_init_entry(&e, da, pa, flags); +	iotlb_init_entry(&e, da, pa, omap_pgsz);  	ret = omap_iopgtable_store_entry(oiommu, &e);  	if (ret) @@ -1247,12 +1248,49 @@ static phys_addr_t omap_iommu_iova_to_phys(struct iommu_domain *domain,  	return ret;  } -static int omap_iommu_domain_has_cap(struct iommu_domain *domain, -				    unsigned long cap) +static int omap_iommu_add_device(struct device *dev)  { +	struct omap_iommu_arch_data *arch_data; +	struct device_node *np; + +	/* +	 * Allocate the archdata iommu structure for DT-based devices. +	 * +	 * TODO: Simplify this when removing non-DT support completely from the +	 * IOMMU users. +	 */ +	if (!dev->of_node) +		return 0; + +	np = of_parse_phandle(dev->of_node, "iommus", 0); +	if (!np) +		return 0; + +	arch_data = kzalloc(sizeof(*arch_data), GFP_KERNEL); +	if (!arch_data) { +		of_node_put(np); +		return -ENOMEM; +	} + +	arch_data->name = kstrdup(dev_name(dev), GFP_KERNEL); +	dev->archdata.iommu = arch_data; + +	of_node_put(np); +  	return 0;  } +static void omap_iommu_remove_device(struct device *dev) +{ +	struct omap_iommu_arch_data *arch_data = dev->archdata.iommu; + +	if (!dev->of_node || !arch_data) +		return; + +	kfree(arch_data->name); +	kfree(arch_data); +} +  static struct iommu_ops omap_iommu_ops = {  	.domain_init	= omap_iommu_domain_init,  	.domain_destroy	= omap_iommu_domain_destroy, @@ -1261,7 +1299,8 @@ static struct iommu_ops omap_iommu_ops = {  	.map		= omap_iommu_map,  	.unmap		= omap_iommu_unmap,  	.iova_to_phys	= omap_iommu_iova_to_phys, -	.domain_has_cap	= omap_iommu_domain_has_cap, +	.add_device	= omap_iommu_add_device, +	.remove_device	= omap_iommu_remove_device,  	.pgsize_bitmap	= OMAP_IOMMU_PGSIZES,  }; diff --git a/drivers/iommu/omap-iommu.h b/drivers/iommu/omap-iommu.h index 12008420660..ea920c3e94f 100644 --- a/drivers/iommu/omap-iommu.h +++ b/drivers/iommu/omap-iommu.h @@ -52,6 +52,8 @@ struct omap_iommu {  	void *ctx; /* iommu context: registres saved area */  	u32 da_start;  	u32 da_end; + +	int has_bus_err_back;  };  struct cr_regs { @@ -130,6 +132,7 @@ static inline struct omap_iommu *dev_to_omap_iommu(struct device *dev)  #define MMU_READ_CAM		0x68  #define MMU_READ_RAM		0x6c  #define MMU_EMU_FAULT_AD	0x70 +#define MMU_GP_REG		0x88  #define MMU_REG_SIZE		256 @@ -163,6 +166,8 @@ static inline struct omap_iommu *dev_to_omap_iommu(struct device *dev)  #define MMU_RAM_MIXED_MASK	(1 << MMU_RAM_MIXED_SHIFT)  #define MMU_RAM_MIXED		MMU_RAM_MIXED_MASK +#define MMU_GP_REG_BUS_ERR_BACK_EN	0x1 +  /*   * utilities for super page(16MB, 1MB, 64KB and 4KB)   */ diff --git a/drivers/iommu/omap-iommu2.c b/drivers/iommu/omap-iommu2.c index d745094a69d..5e1ea3b0bf1 100644 --- a/drivers/iommu/omap-iommu2.c +++ b/drivers/iommu/omap-iommu2.c @@ -98,6 +98,9 @@ static int omap2_iommu_enable(struct omap_iommu *obj)  	iommu_write_reg(obj, pa, MMU_TTB); +	if (obj->has_bus_err_back) +		iommu_write_reg(obj, MMU_GP_REG_BUS_ERR_BACK_EN, MMU_GP_REG); +  	__iommu_set_twl(obj, true);  	return 0; diff --git a/drivers/iommu/omap-iopgtable.h b/drivers/iommu/omap-iopgtable.h index f4003d568a9..f891683e3f0 100644 --- a/drivers/iommu/omap-iopgtable.h +++ b/drivers/iommu/omap-iopgtable.h @@ -93,6 +93,3 @@ static inline phys_addr_t omap_iommu_translate(u32 d, u32 va, u32 mask)  /* to find an entry in the second-level page table. */  #define iopte_index(da)		(((da) >> IOPTE_SHIFT) & (PTRS_PER_IOPTE - 1))  #define iopte_offset(iopgd, da)	(iopgd_page_vaddr(iopgd) + iopte_index(da)) - -#define to_iommu(dev)							\ -	((struct omap_iommu *)platform_get_drvdata(to_platform_device(dev))) diff --git a/drivers/iommu/shmobile-iommu.c b/drivers/iommu/shmobile-iommu.c index d572863dfcc..464acda0bbc 100644 --- a/drivers/iommu/shmobile-iommu.c +++ b/drivers/iommu/shmobile-iommu.c @@ -343,7 +343,7 @@ static int shmobile_iommu_add_device(struct device *dev)  	mapping = archdata->iommu_mapping;  	if (!mapping) {  		mapping = arm_iommu_create_mapping(&platform_bus_type, 0, -						   L1_LEN << 20, 0); +						   L1_LEN << 20);  		if (IS_ERR(mapping))  			return PTR_ERR(mapping);  		archdata->iommu_mapping = mapping; @@ -380,14 +380,13 @@ int ipmmu_iommu_init(struct shmobile_ipmmu *ipmmu)  		kmem_cache_destroy(l1cache);  		return -ENOMEM;  	} -	archdata = kmalloc(sizeof(*archdata), GFP_KERNEL); +	archdata = kzalloc(sizeof(*archdata), GFP_KERNEL);  	if (!archdata) {  		kmem_cache_destroy(l1cache);  		kmem_cache_destroy(l2cache);  		return -ENOMEM;  	}  	spin_lock_init(&archdata->attach_lock); -	archdata->attached = NULL;  	archdata->ipmmu = ipmmu;  	ipmmu_archdata = archdata;  	bus_set_iommu(&platform_bus_type, &shmobile_iommu_ops); diff --git a/drivers/iommu/shmobile-ipmmu.c b/drivers/iommu/shmobile-ipmmu.c index 8321f89596c..bd97adecb1f 100644 --- a/drivers/iommu/shmobile-ipmmu.c +++ b/drivers/iommu/shmobile-ipmmu.c @@ -35,12 +35,12 @@ void ipmmu_tlb_flush(struct shmobile_ipmmu *ipmmu)  	if (!ipmmu)  		return; -	mutex_lock(&ipmmu->flush_lock); +	spin_lock(&ipmmu->flush_lock);  	if (ipmmu->tlb_enabled)  		ipmmu_reg_write(ipmmu, IMCTR1, IMCTR1_FLUSH | IMCTR1_TLBEN);  	else  		ipmmu_reg_write(ipmmu, IMCTR1, IMCTR1_FLUSH); -	mutex_unlock(&ipmmu->flush_lock); +	spin_unlock(&ipmmu->flush_lock);  }  void ipmmu_tlb_set(struct shmobile_ipmmu *ipmmu, unsigned long phys, int size, @@ -49,7 +49,7 @@ void ipmmu_tlb_set(struct shmobile_ipmmu *ipmmu, unsigned long phys, int size,  	if (!ipmmu)  		return; -	mutex_lock(&ipmmu->flush_lock); +	spin_lock(&ipmmu->flush_lock);  	switch (size) {  	default:  		ipmmu->tlb_enabled = 0; @@ -85,7 +85,7 @@ void ipmmu_tlb_set(struct shmobile_ipmmu *ipmmu, unsigned long phys, int size,  	}  	ipmmu_reg_write(ipmmu, IMTTBR, phys);  	ipmmu_reg_write(ipmmu, IMASID, asid); -	mutex_unlock(&ipmmu->flush_lock); +	spin_unlock(&ipmmu->flush_lock);  }  static int ipmmu_probe(struct platform_device *pdev) @@ -94,31 +94,25 @@ static int ipmmu_probe(struct platform_device *pdev)  	struct resource *res;  	struct shmobile_ipmmu_platform_data *pdata = pdev->dev.platform_data; -	res = platform_get_resource(pdev, IORESOURCE_MEM, 0); -	if (!res) { -		dev_err(&pdev->dev, "cannot get platform resources\n"); -		return -ENOENT; -	}  	ipmmu = devm_kzalloc(&pdev->dev, sizeof(*ipmmu), GFP_KERNEL);  	if (!ipmmu) {  		dev_err(&pdev->dev, "cannot allocate device data\n");  		return -ENOMEM;  	} -	mutex_init(&ipmmu->flush_lock); +	spin_lock_init(&ipmmu->flush_lock);  	ipmmu->dev = &pdev->dev; -	ipmmu->ipmmu_base = devm_ioremap_nocache(&pdev->dev, res->start, -						resource_size(res)); -	if (!ipmmu->ipmmu_base) { -		dev_err(&pdev->dev, "ioremap_nocache failed\n"); -		return -ENOMEM; -	} + +	res = platform_get_resource(pdev, IORESOURCE_MEM, 0); +	ipmmu->ipmmu_base = devm_ioremap_resource(&pdev->dev, res); +	if (IS_ERR(ipmmu->ipmmu_base)) +		return PTR_ERR(ipmmu->ipmmu_base); +  	ipmmu->dev_names = pdata->dev_names;  	ipmmu->num_dev_names = pdata->num_dev_names;  	platform_set_drvdata(pdev, ipmmu);  	ipmmu_reg_write(ipmmu, IMCTR1, 0x0); /* disable TLB */  	ipmmu_reg_write(ipmmu, IMCTR2, 0x0); /* disable PMB */ -	ipmmu_iommu_init(ipmmu); -	return 0; +	return ipmmu_iommu_init(ipmmu);  }  static struct platform_driver ipmmu_driver = { diff --git a/drivers/iommu/shmobile-ipmmu.h b/drivers/iommu/shmobile-ipmmu.h index 4d53684673e..9524743ca1f 100644 --- a/drivers/iommu/shmobile-ipmmu.h +++ b/drivers/iommu/shmobile-ipmmu.h @@ -14,7 +14,7 @@ struct shmobile_ipmmu {  	struct device *dev;  	void __iomem *ipmmu_base;  	int tlb_enabled; -	struct mutex flush_lock; +	spinlock_t flush_lock;  	const char * const *dev_names;  	unsigned int num_dev_names;  }; diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c index 108c0e9c24d..dba1a9fd507 100644 --- a/drivers/iommu/tegra-gart.c +++ b/drivers/iommu/tegra-gart.c @@ -252,7 +252,7 @@ static int gart_iommu_map(struct iommu_domain *domain, unsigned long iova,  	spin_lock_irqsave(&gart->pte_lock, flags);  	pfn = __phys_to_pfn(pa);  	if (!pfn_valid(pfn)) { -		dev_err(gart->dev, "Invalid page: %08x\n", pa); +		dev_err(gart->dev, "Invalid page: %pa\n", &pa);  		spin_unlock_irqrestore(&gart->pte_lock, flags);  		return -EINVAL;  	} @@ -295,8 +295,8 @@ static phys_addr_t gart_iommu_iova_to_phys(struct iommu_domain *domain,  	pa = (pte & GART_PAGE_MASK);  	if (!pfn_valid(__phys_to_pfn(pa))) { -		dev_err(gart->dev, "No entry for %08llx:%08x\n", -			 (unsigned long long)iova, pa); +		dev_err(gart->dev, "No entry for %08llx:%pa\n", +			 (unsigned long long)iova, &pa);  		gart_dump_table(gart);  		return -EINVAL;  	} @@ -351,7 +351,6 @@ static int tegra_gart_probe(struct platform_device *pdev)  	struct gart_device *gart;  	struct resource *res, *res_remap;  	void __iomem *gart_regs; -	int err;  	struct device *dev = &pdev->dev;  	if (gart_handle) @@ -376,8 +375,7 @@ static int tegra_gart_probe(struct platform_device *pdev)  	gart_regs = devm_ioremap(dev, res->start, resource_size(res));  	if (!gart_regs) {  		dev_err(dev, "failed to remap GART registers\n"); -		err = -ENXIO; -		goto fail; +		return -ENXIO;  	}  	gart->dev = &pdev->dev; @@ -391,8 +389,7 @@ static int tegra_gart_probe(struct platform_device *pdev)  	gart->savedata = vmalloc(sizeof(u32) * gart->page_count);  	if (!gart->savedata) {  		dev_err(dev, "failed to allocate context save area\n"); -		err = -ENOMEM; -		goto fail; +		return -ENOMEM;  	}  	platform_set_drvdata(pdev, gart); @@ -401,32 +398,20 @@ static int tegra_gart_probe(struct platform_device *pdev)  	gart_handle = gart;  	bus_set_iommu(&platform_bus_type, &gart_iommu_ops);  	return 0; - -fail: -	if (gart_regs) -		devm_iounmap(dev, gart_regs); -	if (gart && gart->savedata) -		vfree(gart->savedata); -	devm_kfree(dev, gart); -	return err;  }  static int tegra_gart_remove(struct platform_device *pdev)  {  	struct gart_device *gart = platform_get_drvdata(pdev); -	struct device *dev = gart->dev;  	writel(0, gart->regs + GART_CONFIG);  	if (gart->savedata)  		vfree(gart->savedata); -	if (gart->regs) -		devm_iounmap(dev, gart->regs); -	devm_kfree(dev, gart);  	gart_handle = NULL;  	return 0;  } -const struct dev_pm_ops tegra_gart_pm_ops = { +static const struct dev_pm_ops tegra_gart_pm_ops = {  	.suspend	= tegra_gart_suspend,  	.resume		= tegra_gart_resume,  }; diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index e0665603afd..605b5b46a90 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -731,7 +731,7 @@ static int smmu_iommu_map(struct iommu_domain *domain, unsigned long iova,  	unsigned long pfn = __phys_to_pfn(pa);  	unsigned long flags; -	dev_dbg(as->smmu->dev, "[%d] %08lx:%08x\n", as->asid, iova, pa); +	dev_dbg(as->smmu->dev, "[%d] %08lx:%pa\n", as->asid, iova, &pa);  	if (!pfn_valid(pfn))  		return -ENOMEM; @@ -1254,7 +1254,7 @@ static int tegra_smmu_remove(struct platform_device *pdev)  	return 0;  } -const struct dev_pm_ops tegra_smmu_pm_ops = { +static const struct dev_pm_ops tegra_smmu_pm_ops = {  	.suspend	= tegra_smmu_suspend,  	.resume		= tegra_smmu_resume,  };  | 
