diff options
Diffstat (limited to 'drivers/iommu/intel-iommu.c')
| -rw-r--r-- | drivers/iommu/intel-iommu.c | 1812 | 
1 files changed, 1017 insertions, 795 deletions
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 15e9b57e9cf..51b6b77dc3e 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1,5 +1,5 @@  /* - * Copyright (c) 2006, Intel Corporation. + * Copyright © 2006-2014 Intel Corporation.   *   * This program is free software; you can redistribute it and/or modify it   * under the terms and conditions of the GNU General Public License, @@ -10,15 +10,11 @@   * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for   * more details.   * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - * Copyright (C) 2006-2008 Intel Corporation - * Author: Ashok Raj <ashok.raj@intel.com> - * Author: Shaohua Li <shaohua.li@intel.com> - * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> - * Author: Fenghua Yu <fenghua.yu@intel.com> + * Authors: David Woodhouse <dwmw2@infradead.org>, + *          Ashok Raj <ashok.raj@intel.com>, + *          Shaohua Li <shaohua.li@intel.com>, + *          Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>, + *          Fenghua Yu <fenghua.yu@intel.com>   */  #include <linux/init.h> @@ -33,6 +29,7 @@  #include <linux/dmar.h>  #include <linux/dma-mapping.h>  #include <linux/mempool.h> +#include <linux/memory.h>  #include <linux/timer.h>  #include <linux/iova.h>  #include <linux/iommu.h> @@ -42,6 +39,7 @@  #include <linux/dmi.h>  #include <linux/pci-ats.h>  #include <linux/memblock.h> +#include <linux/dma-contiguous.h>  #include <asm/irq_remapping.h>  #include <asm/cacheflush.h>  #include <asm/iommu.h> @@ -63,6 +61,7 @@  #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48  #define MAX_AGAW_WIDTH 64 +#define MAX_AGAW_PFN_WIDTH	(MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)  #define __DOMAIN_MAX_PFN(gaw)  ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)  #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1) @@ -106,12 +105,12 @@ static inline int agaw_to_level(int agaw)  static inline int agaw_to_width(int agaw)  { -	return 30 + agaw * LEVEL_STRIDE; +	return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);  }  static inline int width_to_agaw(int width)  { -	return (width - 30) / LEVEL_STRIDE; +	return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);  }  static inline unsigned int level_to_offset_bits(int level) @@ -141,7 +140,7 @@ static inline unsigned long align_to_level(unsigned long pfn, int level)  static inline unsigned long lvl_to_nr_pages(unsigned int lvl)  { -	return  1 << ((lvl - 1) * LEVEL_STRIDE); +	return  1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);  }  /* VT-d pages must always be _smaller_ than MM pages. Otherwise things @@ -288,26 +287,6 @@ static inline void dma_clear_pte(struct dma_pte *pte)  	pte->val = 0;  } -static inline void dma_set_pte_readable(struct dma_pte *pte) -{ -	pte->val |= DMA_PTE_READ; -} - -static inline void dma_set_pte_writable(struct dma_pte *pte) -{ -	pte->val |= DMA_PTE_WRITE; -} - -static inline void dma_set_pte_snp(struct dma_pte *pte) -{ -	pte->val |= DMA_PTE_SNP; -} - -static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot) -{ -	pte->val = (pte->val & ~3) | (prot & 3); -} -  static inline u64 dma_pte_addr(struct dma_pte *pte)  {  #ifdef CONFIG_64BIT @@ -318,11 +297,6 @@ static inline u64 dma_pte_addr(struct dma_pte *pte)  #endif  } -static inline void dma_set_pte_pfn(struct dma_pte *pte, unsigned long pfn) -{ -	pte->val |= (uint64_t)pfn << VTD_PAGE_SHIFT; -} -  static inline bool dma_pte_present(struct dma_pte *pte)  {  	return (pte->val & 3) != 0; @@ -396,23 +370,46 @@ struct dmar_domain {  struct device_domain_info {  	struct list_head link;	/* link to domain siblings */  	struct list_head global; /* link to global list */ -	int segment;		/* PCI domain */  	u8 bus;			/* PCI bus number */  	u8 devfn;		/* PCI devfn number */ -	struct pci_dev *dev; /* it's NULL for PCIe-to-PCI bridge */ +	struct device *dev; /* it's NULL for PCIe-to-PCI bridge */  	struct intel_iommu *iommu; /* IOMMU used by this device */  	struct dmar_domain *domain; /* pointer to domain */  }; +struct dmar_rmrr_unit { +	struct list_head list;		/* list of rmrr units	*/ +	struct acpi_dmar_header *hdr;	/* ACPI header		*/ +	u64	base_address;		/* reserved base address*/ +	u64	end_address;		/* reserved end address */ +	struct dmar_dev_scope *devices;	/* target devices */ +	int	devices_cnt;		/* target device count */ +}; + +struct dmar_atsr_unit { +	struct list_head list;		/* list of ATSR units */ +	struct acpi_dmar_header *hdr;	/* ACPI header */ +	struct dmar_dev_scope *devices;	/* target devices */ +	int devices_cnt;		/* target device count */ +	u8 include_all:1;		/* include all ports */ +}; + +static LIST_HEAD(dmar_atsr_units); +static LIST_HEAD(dmar_rmrr_units); + +#define for_each_rmrr_units(rmrr) \ +	list_for_each_entry(rmrr, &dmar_rmrr_units, list) +  static void flush_unmaps_timeout(unsigned long data); -DEFINE_TIMER(unmap_timer,  flush_unmaps_timeout, 0, 0); +static DEFINE_TIMER(unmap_timer,  flush_unmaps_timeout, 0, 0);  #define HIGH_WATER_MARK 250  struct deferred_flush_tables {  	int next;  	struct iova *iova[HIGH_WATER_MARK];  	struct dmar_domain *domain[HIGH_WATER_MARK]; +	struct page *freelist[HIGH_WATER_MARK];  };  static struct deferred_flush_tables *deferred_flush; @@ -426,7 +423,12 @@ static LIST_HEAD(unmaps_to_do);  static int timer_on;  static long list_size; +static void domain_exit(struct dmar_domain *domain);  static void domain_remove_dev_info(struct dmar_domain *domain); +static void domain_remove_one_dev_info(struct dmar_domain *domain, +				       struct device *dev); +static void iommu_detach_dependent_devices(struct intel_iommu *iommu, +					   struct device *dev);  #ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON  int dmar_disabled = 0; @@ -590,18 +592,31 @@ static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)  static void domain_update_iommu_coherency(struct dmar_domain *domain)  { -	int i; - -	i = find_first_bit(domain->iommu_bmp, g_num_of_iommus); +	struct dmar_drhd_unit *drhd; +	struct intel_iommu *iommu; +	int i, found = 0; -	domain->iommu_coherency = i < g_num_of_iommus ? 1 : 0; +	domain->iommu_coherency = 1;  	for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) { +		found = 1;  		if (!ecap_coherent(g_iommus[i]->ecap)) {  			domain->iommu_coherency = 0;  			break;  		}  	} +	if (found) +		return; + +	/* No hardware attached; use lowest common denominator */ +	rcu_read_lock(); +	for_each_active_iommu(iommu, drhd) { +		if (!ecap_coherent(iommu->ecap)) { +			domain->iommu_coherency = 0; +			break; +		} +	} +	rcu_read_unlock();  }  static void domain_update_iommu_snooping(struct dmar_domain *domain) @@ -630,12 +645,15 @@ static void domain_update_iommu_superpage(struct dmar_domain *domain)  	}  	/* set iommu_superpage to the smallest common denominator */ +	rcu_read_lock();  	for_each_active_iommu(iommu, drhd) {  		mask &= cap_super_page_val(iommu->cap);  		if (!mask) {  			break;  		}  	} +	rcu_read_unlock(); +  	domain->iommu_superpage = fls(mask);  } @@ -647,34 +665,56 @@ static void domain_update_iommu_cap(struct dmar_domain *domain)  	domain_update_iommu_superpage(domain);  } -static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn) +static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)  {  	struct dmar_drhd_unit *drhd = NULL; +	struct intel_iommu *iommu; +	struct device *tmp; +	struct pci_dev *ptmp, *pdev = NULL; +	u16 segment;  	int i; -	for_each_drhd_unit(drhd) { -		if (drhd->ignored) -			continue; -		if (segment != drhd->segment) +	if (dev_is_pci(dev)) { +		pdev = to_pci_dev(dev); +		segment = pci_domain_nr(pdev->bus); +	} else if (ACPI_COMPANION(dev)) +		dev = &ACPI_COMPANION(dev)->dev; + +	rcu_read_lock(); +	for_each_active_iommu(iommu, drhd) { +		if (pdev && segment != drhd->segment)  			continue; -		for (i = 0; i < drhd->devices_cnt; i++) { -			if (drhd->devices[i] && -			    drhd->devices[i]->bus->number == bus && -			    drhd->devices[i]->devfn == devfn) -				return drhd->iommu; -			if (drhd->devices[i] && -			    drhd->devices[i]->subordinate && -			    drhd->devices[i]->subordinate->number <= bus && -			    drhd->devices[i]->subordinate->busn_res.end >= bus) -				return drhd->iommu; +		for_each_active_dev_scope(drhd->devices, +					  drhd->devices_cnt, i, tmp) { +			if (tmp == dev) { +				*bus = drhd->devices[i].bus; +				*devfn = drhd->devices[i].devfn; +				goto out; +			} + +			if (!pdev || !dev_is_pci(tmp)) +				continue; + +			ptmp = to_pci_dev(tmp); +			if (ptmp->subordinate && +			    ptmp->subordinate->number <= pdev->bus->number && +			    ptmp->subordinate->busn_res.end >= pdev->bus->number) +				goto got_pdev;  		} -		if (drhd->include_all) -			return drhd->iommu; +		if (pdev && drhd->include_all) { +		got_pdev: +			*bus = pdev->bus->number; +			*devfn = pdev->devfn; +			goto out; +		}  	} +	iommu = NULL; + out: +	rcu_read_unlock(); -	return NULL; +	return iommu;  }  static void domain_flush_cache(struct dmar_domain *domain, @@ -774,7 +814,7 @@ out:  }  static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, -				      unsigned long pfn, int target_level) +				      unsigned long pfn, int *target_level)  {  	int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;  	struct dma_pte *parent, *pte = NULL; @@ -782,17 +822,21 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,  	int offset;  	BUG_ON(!domain->pgd); -	BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width); + +	if (addr_width < BITS_PER_LONG && pfn >> addr_width) +		/* Address beyond IOMMU's addressing capabilities. */ +		return NULL; +  	parent = domain->pgd; -	while (level > 0) { +	while (1) {  		void *tmp_page;  		offset = pfn_level_offset(pfn, level);  		pte = &parent[offset]; -		if (!target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte))) +		if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))  			break; -		if (level == target_level) +		if (level == *target_level)  			break;  		if (!dma_pte_present(pte)) { @@ -813,10 +857,16 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,  				domain_flush_cache(domain, pte, sizeof(*pte));  			}  		} +		if (level == 1) +			break; +  		parent = phys_to_virt(dma_pte_addr(pte));  		level--;  	} +	if (!*target_level) +		*target_level = level; +  	return pte;  } @@ -854,14 +904,13 @@ static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,  }  /* clear last level pte, a tlb flush should be followed */ -static int dma_pte_clear_range(struct dmar_domain *domain, +static void dma_pte_clear_range(struct dmar_domain *domain,  				unsigned long start_pfn,  				unsigned long last_pfn)  {  	int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;  	unsigned int large_page = 1;  	struct dma_pte *first_pte, *pte; -	int order;  	BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);  	BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width); @@ -885,9 +934,6 @@ static int dma_pte_clear_range(struct dmar_domain *domain,  				   (void *)pte - (void *)first_pte);  	} while (start_pfn && start_pfn <= last_pfn); - -	order = (large_page - 1) * 9; -	return order;  }  static void dma_pte_free_level(struct dmar_domain *domain, int level, @@ -913,7 +959,7 @@ static void dma_pte_free_level(struct dmar_domain *domain, int level,  		/* If range covers entire pagetable, free it */  		if (!(start_pfn > level_pfn || -		      last_pfn < level_pfn + level_size(level))) { +		      last_pfn < level_pfn + level_size(level) - 1)) {  			dma_clear_pte(pte);  			domain_flush_cache(domain, pte, sizeof(*pte));  			free_pgtable_page(level_pte); @@ -945,6 +991,125 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain,  	}  } +/* When a page at a given level is being unlinked from its parent, we don't +   need to *modify* it at all. All we need to do is make a list of all the +   pages which can be freed just as soon as we've flushed the IOTLB and we +   know the hardware page-walk will no longer touch them. +   The 'pte' argument is the *parent* PTE, pointing to the page that is to +   be freed. */ +static struct page *dma_pte_list_pagetables(struct dmar_domain *domain, +					    int level, struct dma_pte *pte, +					    struct page *freelist) +{ +	struct page *pg; + +	pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT); +	pg->freelist = freelist; +	freelist = pg; + +	if (level == 1) +		return freelist; + +	pte = page_address(pg); +	do { +		if (dma_pte_present(pte) && !dma_pte_superpage(pte)) +			freelist = dma_pte_list_pagetables(domain, level - 1, +							   pte, freelist); +		pte++; +	} while (!first_pte_in_page(pte)); + +	return freelist; +} + +static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level, +					struct dma_pte *pte, unsigned long pfn, +					unsigned long start_pfn, +					unsigned long last_pfn, +					struct page *freelist) +{ +	struct dma_pte *first_pte = NULL, *last_pte = NULL; + +	pfn = max(start_pfn, pfn); +	pte = &pte[pfn_level_offset(pfn, level)]; + +	do { +		unsigned long level_pfn; + +		if (!dma_pte_present(pte)) +			goto next; + +		level_pfn = pfn & level_mask(level); + +		/* If range covers entire pagetable, free it */ +		if (start_pfn <= level_pfn && +		    last_pfn >= level_pfn + level_size(level) - 1) { +			/* These suborbinate page tables are going away entirely. Don't +			   bother to clear them; we're just going to *free* them. */ +			if (level > 1 && !dma_pte_superpage(pte)) +				freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist); + +			dma_clear_pte(pte); +			if (!first_pte) +				first_pte = pte; +			last_pte = pte; +		} else if (level > 1) { +			/* Recurse down into a level that isn't *entirely* obsolete */ +			freelist = dma_pte_clear_level(domain, level - 1, +						       phys_to_virt(dma_pte_addr(pte)), +						       level_pfn, start_pfn, last_pfn, +						       freelist); +		} +next: +		pfn += level_size(level); +	} while (!first_pte_in_page(++pte) && pfn <= last_pfn); + +	if (first_pte) +		domain_flush_cache(domain, first_pte, +				   (void *)++last_pte - (void *)first_pte); + +	return freelist; +} + +/* We can't just free the pages because the IOMMU may still be walking +   the page tables, and may have cached the intermediate levels. The +   pages can only be freed after the IOTLB flush has been done. */ +struct page *domain_unmap(struct dmar_domain *domain, +			  unsigned long start_pfn, +			  unsigned long last_pfn) +{ +	int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; +	struct page *freelist = NULL; + +	BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); +	BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width); +	BUG_ON(start_pfn > last_pfn); + +	/* we don't need lock here; nobody else touches the iova range */ +	freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw), +				       domain->pgd, 0, start_pfn, last_pfn, NULL); + +	/* free pgd */ +	if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) { +		struct page *pgd_page = virt_to_page(domain->pgd); +		pgd_page->freelist = freelist; +		freelist = pgd_page; + +		domain->pgd = NULL; +	} + +	return freelist; +} + +void dma_free_pagelist(struct page *freelist) +{ +	struct page *pg; + +	while ((pg = freelist)) { +		freelist = pg->freelist; +		free_pgtable_page(page_address(pg)); +	} +} +  /* iommu handling */  static int iommu_alloc_root_entry(struct intel_iommu *iommu)  { @@ -1054,7 +1219,7 @@ static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,  		break;  	case DMA_TLB_PSI_FLUSH:  		val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did); -		/* Note: always flush non-leaf currently */ +		/* IH bit is passed in as part of address */  		val_iva = size_order | addr;  		break;  	default: @@ -1093,13 +1258,14 @@ static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,  			(unsigned long long)DMA_TLB_IAIG(val));  } -static struct device_domain_info *iommu_support_dev_iotlb( -	struct dmar_domain *domain, int segment, u8 bus, u8 devfn) +static struct device_domain_info * +iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu, +			 u8 bus, u8 devfn)  {  	int found = 0;  	unsigned long flags;  	struct device_domain_info *info; -	struct intel_iommu *iommu = device_to_iommu(segment, bus, devfn); +	struct pci_dev *pdev;  	if (!ecap_dev_iotlb_support(iommu->ecap))  		return NULL; @@ -1115,34 +1281,35 @@ static struct device_domain_info *iommu_support_dev_iotlb(  		}  	spin_unlock_irqrestore(&device_domain_lock, flags); -	if (!found || !info->dev) +	if (!found || !info->dev || !dev_is_pci(info->dev))  		return NULL; -	if (!pci_find_ext_capability(info->dev, PCI_EXT_CAP_ID_ATS)) -		return NULL; +	pdev = to_pci_dev(info->dev); -	if (!dmar_find_matched_atsr_unit(info->dev)) +	if (!pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS))  		return NULL; -	info->iommu = iommu; +	if (!dmar_find_matched_atsr_unit(pdev)) +		return NULL;  	return info;  }  static void iommu_enable_dev_iotlb(struct device_domain_info *info)  { -	if (!info) +	if (!info || !dev_is_pci(info->dev))  		return; -	pci_enable_ats(info->dev, VTD_PAGE_SHIFT); +	pci_enable_ats(to_pci_dev(info->dev), VTD_PAGE_SHIFT);  }  static void iommu_disable_dev_iotlb(struct device_domain_info *info)  { -	if (!info->dev || !pci_ats_enabled(info->dev)) +	if (!info->dev || !dev_is_pci(info->dev) || +	    !pci_ats_enabled(to_pci_dev(info->dev)))  		return; -	pci_disable_ats(info->dev); +	pci_disable_ats(to_pci_dev(info->dev));  }  static void iommu_flush_dev_iotlb(struct dmar_domain *domain, @@ -1154,24 +1321,31 @@ static void iommu_flush_dev_iotlb(struct dmar_domain *domain,  	spin_lock_irqsave(&device_domain_lock, flags);  	list_for_each_entry(info, &domain->devices, link) { -		if (!info->dev || !pci_ats_enabled(info->dev)) +		struct pci_dev *pdev; +		if (!info->dev || !dev_is_pci(info->dev)) +			continue; + +		pdev = to_pci_dev(info->dev); +		if (!pci_ats_enabled(pdev))  			continue;  		sid = info->bus << 8 | info->devfn; -		qdep = pci_ats_queue_depth(info->dev); +		qdep = pci_ats_queue_depth(pdev);  		qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);  	}  	spin_unlock_irqrestore(&device_domain_lock, flags);  }  static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, -				  unsigned long pfn, unsigned int pages, int map) +				  unsigned long pfn, unsigned int pages, int ih, int map)  {  	unsigned int mask = ilog2(__roundup_pow_of_two(pages));  	uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;  	BUG_ON(pages == 0); +	if (ih) +		ih = 1 << 6;  	/*  	 * Fallback to domain selective flush if no PSI support or the size is  	 * too big. @@ -1182,7 +1356,7 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,  		iommu->flush.flush_iotlb(iommu, did, 0, 0,  						DMA_TLB_DSI_FLUSH);  	else -		iommu->flush.flush_iotlb(iommu, did, addr, mask, +		iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,  						DMA_TLB_PSI_FLUSH);  	/* @@ -1251,8 +1425,8 @@ static int iommu_init_domains(struct intel_iommu *iommu)  	unsigned long nlongs;  	ndomains = cap_ndoms(iommu->cap); -	pr_debug("IOMMU %d: Number of Domains supported <%ld>\n", iommu->seq_id, -			ndomains); +	pr_debug("IOMMU%d: Number of Domains supported <%ld>\n", +		 iommu->seq_id, ndomains);  	nlongs = BITS_TO_LONGS(ndomains);  	spin_lock_init(&iommu->lock); @@ -1262,13 +1436,17 @@ static int iommu_init_domains(struct intel_iommu *iommu)  	 */  	iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);  	if (!iommu->domain_ids) { -		printk(KERN_ERR "Allocating domain id array failed\n"); +		pr_err("IOMMU%d: allocating domain id array failed\n", +		       iommu->seq_id);  		return -ENOMEM;  	}  	iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),  			GFP_KERNEL);  	if (!iommu->domains) { -		printk(KERN_ERR "Allocating domain array failed\n"); +		pr_err("IOMMU%d: allocating domain array failed\n", +		       iommu->seq_id); +		kfree(iommu->domain_ids); +		iommu->domain_ids = NULL;  		return -ENOMEM;  	} @@ -1281,62 +1459,50 @@ static int iommu_init_domains(struct intel_iommu *iommu)  	return 0;  } - -static void domain_exit(struct dmar_domain *domain); -static void vm_domain_exit(struct dmar_domain *domain); - -void free_dmar_iommu(struct intel_iommu *iommu) +static void free_dmar_iommu(struct intel_iommu *iommu)  {  	struct dmar_domain *domain; -	int i; +	int i, count;  	unsigned long flags;  	if ((iommu->domains) && (iommu->domain_ids)) {  		for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) { +			/* +			 * Domain id 0 is reserved for invalid translation +			 * if hardware supports caching mode. +			 */ +			if (cap_caching_mode(iommu->cap) && i == 0) +				continue; +  			domain = iommu->domains[i];  			clear_bit(i, iommu->domain_ids);  			spin_lock_irqsave(&domain->iommu_lock, flags); -			if (--domain->iommu_count == 0) { -				if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) -					vm_domain_exit(domain); -				else -					domain_exit(domain); -			} +			count = --domain->iommu_count;  			spin_unlock_irqrestore(&domain->iommu_lock, flags); +			if (count == 0) +				domain_exit(domain);  		}  	}  	if (iommu->gcmd & DMA_GCMD_TE)  		iommu_disable_translation(iommu); -	if (iommu->irq) { -		irq_set_handler_data(iommu->irq, NULL); -		/* This will mask the irq */ -		free_irq(iommu->irq, iommu); -		destroy_irq(iommu->irq); -	} -  	kfree(iommu->domains);  	kfree(iommu->domain_ids); +	iommu->domains = NULL; +	iommu->domain_ids = NULL;  	g_iommus[iommu->seq_id] = NULL; -	/* if all iommus are freed, free g_iommus */ -	for (i = 0; i < g_num_of_iommus; i++) { -		if (g_iommus[i]) -			break; -	} - -	if (i == g_num_of_iommus) -		kfree(g_iommus); -  	/* free context mapping */  	free_context_table(iommu);  } -static struct dmar_domain *alloc_domain(void) +static struct dmar_domain *alloc_domain(bool vm)  { +	/* domain id for virtual machine, it won't be set in context */ +	static atomic_t vm_domid = ATOMIC_INIT(0);  	struct dmar_domain *domain;  	domain = alloc_domain_mem(); @@ -1344,8 +1510,15 @@ static struct dmar_domain *alloc_domain(void)  		return NULL;  	domain->nid = -1; +	domain->iommu_count = 0;  	memset(domain->iommu_bmp, 0, sizeof(domain->iommu_bmp));  	domain->flags = 0; +	spin_lock_init(&domain->iommu_lock); +	INIT_LIST_HEAD(&domain->devices); +	if (vm) { +		domain->id = atomic_inc_return(&vm_domid); +		domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE; +	}  	return domain;  } @@ -1369,6 +1542,7 @@ static int iommu_attach_domain(struct dmar_domain *domain,  	}  	domain->id = num; +	domain->iommu_count++;  	set_bit(num, iommu->domain_ids);  	set_bit(iommu->seq_id, domain->iommu_bmp);  	iommu->domains[num] = domain; @@ -1382,22 +1556,16 @@ static void iommu_detach_domain(struct dmar_domain *domain,  {  	unsigned long flags;  	int num, ndomains; -	int found = 0;  	spin_lock_irqsave(&iommu->lock, flags);  	ndomains = cap_ndoms(iommu->cap);  	for_each_set_bit(num, iommu->domain_ids, ndomains) {  		if (iommu->domains[num] == domain) { -			found = 1; +			clear_bit(num, iommu->domain_ids); +			iommu->domains[num] = NULL;  			break;  		}  	} - -	if (found) { -		clear_bit(num, iommu->domain_ids); -		clear_bit(iommu->seq_id, domain->iommu_bmp); -		iommu->domains[num] = NULL; -	}  	spin_unlock_irqrestore(&iommu->lock, flags);  } @@ -1469,8 +1637,6 @@ static int domain_init(struct dmar_domain *domain, int guest_width)  	unsigned long sagaw;  	init_iova_domain(&domain->iovad, DMA_32BIT_PFN); -	spin_lock_init(&domain->iommu_lock); -  	domain_reserve_special_ranges(domain);  	/* calculate AGAW */ @@ -1489,7 +1655,6 @@ static int domain_init(struct dmar_domain *domain, int guest_width)  			return -ENODEV;  	}  	domain->agaw = agaw; -	INIT_LIST_HEAD(&domain->devices);  	if (ecap_coherent(iommu->ecap))  		domain->iommu_coherency = 1; @@ -1501,8 +1666,11 @@ static int domain_init(struct dmar_domain *domain, int guest_width)  	else  		domain->iommu_snooping = 0; -	domain->iommu_superpage = fls(cap_super_page_val(iommu->cap)); -	domain->iommu_count = 1; +	if (intel_iommu_superpage) +		domain->iommu_superpage = fls(cap_super_page_val(iommu->cap)); +	else +		domain->iommu_superpage = 0; +  	domain->nid = iommu->node;  	/* always allocate the top pgd */ @@ -1517,6 +1685,7 @@ static void domain_exit(struct dmar_domain *domain)  {  	struct dmar_drhd_unit *drhd;  	struct intel_iommu *iommu; +	struct page *freelist = NULL;  	/* Domain 0 is reserved, so dont process it */  	if (!domain) @@ -1526,29 +1695,33 @@ static void domain_exit(struct dmar_domain *domain)  	if (!intel_iommu_strict)  		flush_unmaps_timeout(0); +	/* remove associated devices */  	domain_remove_dev_info(domain); +  	/* destroy iovas */  	put_iova_domain(&domain->iovad); -	/* clear ptes */ -	dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw)); - -	/* free page tables */ -	dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw)); +	freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw)); +	/* clear attached or cached domains */ +	rcu_read_lock();  	for_each_active_iommu(iommu, drhd) -		if (test_bit(iommu->seq_id, domain->iommu_bmp)) +		if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE || +		    test_bit(iommu->seq_id, domain->iommu_bmp))  			iommu_detach_domain(domain, iommu); +	rcu_read_unlock(); + +	dma_free_pagelist(freelist);  	free_domain_mem(domain);  } -static int domain_context_mapping_one(struct dmar_domain *domain, int segment, -				 u8 bus, u8 devfn, int translation) +static int domain_context_mapping_one(struct dmar_domain *domain, +				      struct intel_iommu *iommu, +				      u8 bus, u8 devfn, int translation)  {  	struct context_entry *context;  	unsigned long flags; -	struct intel_iommu *iommu;  	struct dma_pte *pgd;  	unsigned long num;  	unsigned long ndomains; @@ -1563,10 +1736,6 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment,  	BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&  	       translation != CONTEXT_TT_MULTI_LEVEL); -	iommu = device_to_iommu(segment, bus, devfn); -	if (!iommu) -		return -ENODEV; -  	context = device_to_context_entry(iommu, bus, devfn);  	if (!context)  		return -ENOMEM; @@ -1624,7 +1793,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment,  	context_set_domain_id(context, id);  	if (translation != CONTEXT_TT_PASS_THROUGH) { -		info = iommu_support_dev_iotlb(domain, segment, bus, devfn); +		info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);  		translation = info ? CONTEXT_TT_DEV_IOTLB :  				     CONTEXT_TT_MULTI_LEVEL;  	} @@ -1674,27 +1843,32 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment,  }  static int -domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev, -			int translation) +domain_context_mapping(struct dmar_domain *domain, struct device *dev, +		       int translation)  {  	int ret; -	struct pci_dev *tmp, *parent; +	struct pci_dev *pdev, *tmp, *parent; +	struct intel_iommu *iommu; +	u8 bus, devfn; + +	iommu = device_to_iommu(dev, &bus, &devfn); +	if (!iommu) +		return -ENODEV; -	ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus), -					 pdev->bus->number, pdev->devfn, +	ret = domain_context_mapping_one(domain, iommu, bus, devfn,  					 translation); -	if (ret) +	if (ret || !dev_is_pci(dev))  		return ret;  	/* dependent device mapping */ +	pdev = to_pci_dev(dev);  	tmp = pci_find_upstream_pcie_bridge(pdev);  	if (!tmp)  		return 0;  	/* Secondary interface's bus number and devfn 0 */  	parent = pdev->bus->self;  	while (parent != tmp) { -		ret = domain_context_mapping_one(domain, -						 pci_domain_nr(parent->bus), +		ret = domain_context_mapping_one(domain, iommu,  						 parent->bus->number,  						 parent->devfn, translation);  		if (ret) @@ -1702,33 +1876,33 @@ domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev,  		parent = parent->bus->self;  	}  	if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */ -		return domain_context_mapping_one(domain, -					pci_domain_nr(tmp->subordinate), +		return domain_context_mapping_one(domain, iommu,  					tmp->subordinate->number, 0,  					translation);  	else /* this is a legacy PCI bridge */ -		return domain_context_mapping_one(domain, -						  pci_domain_nr(tmp->bus), +		return domain_context_mapping_one(domain, iommu,  						  tmp->bus->number,  						  tmp->devfn,  						  translation);  } -static int domain_context_mapped(struct pci_dev *pdev) +static int domain_context_mapped(struct device *dev)  {  	int ret; -	struct pci_dev *tmp, *parent; +	struct pci_dev *pdev, *tmp, *parent;  	struct intel_iommu *iommu; +	u8 bus, devfn; -	iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number, -				pdev->devfn); +	iommu = device_to_iommu(dev, &bus, &devfn);  	if (!iommu)  		return -ENODEV; -	ret = device_context_mapped(iommu, pdev->bus->number, pdev->devfn); -	if (!ret) +	ret = device_context_mapped(iommu, bus, devfn); +	if (!ret || !dev_is_pci(dev))  		return ret; +  	/* dependent device mapping */ +	pdev = to_pci_dev(dev);  	tmp = pci_find_upstream_pcie_bridge(pdev);  	if (!tmp)  		return ret; @@ -1824,7 +1998,7 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,  		if (!pte) {  			largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res); -			first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, largepage_lvl); +			first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);  			if (!pte)  				return -ENOMEM;  			/* It is large page*/ @@ -1923,14 +2097,13 @@ static inline void unlink_domain_info(struct device_domain_info *info)  	list_del(&info->link);  	list_del(&info->global);  	if (info->dev) -		info->dev->dev.archdata.iommu = NULL; +		info->dev->archdata.iommu = NULL;  }  static void domain_remove_dev_info(struct dmar_domain *domain)  {  	struct device_domain_info *info; -	unsigned long flags; -	struct intel_iommu *iommu; +	unsigned long flags, flags2;  	spin_lock_irqsave(&device_domain_lock, flags);  	while (!list_empty(&domain->devices)) { @@ -1940,10 +2113,23 @@ static void domain_remove_dev_info(struct dmar_domain *domain)  		spin_unlock_irqrestore(&device_domain_lock, flags);  		iommu_disable_dev_iotlb(info); -		iommu = device_to_iommu(info->segment, info->bus, info->devfn); -		iommu_detach_dev(iommu, info->bus, info->devfn); -		free_devinfo_mem(info); +		iommu_detach_dev(info->iommu, info->bus, info->devfn); +		if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) { +			iommu_detach_dependent_devices(info->iommu, info->dev); +			/* clear this iommu in iommu_bmp, update iommu count +			 * and capabilities +			 */ +			spin_lock_irqsave(&domain->iommu_lock, flags2); +			if (test_and_clear_bit(info->iommu->seq_id, +					       domain->iommu_bmp)) { +				domain->iommu_count--; +				domain_update_iommu_cap(domain); +			} +			spin_unlock_irqrestore(&domain->iommu_lock, flags2); +		} + +		free_devinfo_mem(info);  		spin_lock_irqsave(&device_domain_lock, flags);  	}  	spin_unlock_irqrestore(&device_domain_lock, flags); @@ -1951,155 +2137,153 @@ static void domain_remove_dev_info(struct dmar_domain *domain)  /*   * find_domain - * Note: we use struct pci_dev->dev.archdata.iommu stores the info + * Note: we use struct device->archdata.iommu stores the info   */ -static struct dmar_domain * -find_domain(struct pci_dev *pdev) +static struct dmar_domain *find_domain(struct device *dev)  {  	struct device_domain_info *info;  	/* No lock here, assumes no domain exit in normal case */ -	info = pdev->dev.archdata.iommu; +	info = dev->archdata.iommu;  	if (info)  		return info->domain;  	return NULL;  } +static inline struct device_domain_info * +dmar_search_domain_by_dev_info(int segment, int bus, int devfn) +{ +	struct device_domain_info *info; + +	list_for_each_entry(info, &device_domain_list, global) +		if (info->iommu->segment == segment && info->bus == bus && +		    info->devfn == devfn) +			return info; + +	return NULL; +} + +static struct dmar_domain *dmar_insert_dev_info(struct intel_iommu *iommu, +						int bus, int devfn, +						struct device *dev, +						struct dmar_domain *domain) +{ +	struct dmar_domain *found = NULL; +	struct device_domain_info *info; +	unsigned long flags; + +	info = alloc_devinfo_mem(); +	if (!info) +		return NULL; + +	info->bus = bus; +	info->devfn = devfn; +	info->dev = dev; +	info->domain = domain; +	info->iommu = iommu; +	if (!dev) +		domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES; + +	spin_lock_irqsave(&device_domain_lock, flags); +	if (dev) +		found = find_domain(dev); +	else { +		struct device_domain_info *info2; +		info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn); +		if (info2) +			found = info2->domain; +	} +	if (found) { +		spin_unlock_irqrestore(&device_domain_lock, flags); +		free_devinfo_mem(info); +		/* Caller must free the original domain */ +		return found; +	} + +	list_add(&info->link, &domain->devices); +	list_add(&info->global, &device_domain_list); +	if (dev) +		dev->archdata.iommu = info; +	spin_unlock_irqrestore(&device_domain_lock, flags); + +	return domain; +} +  /* domain is initialized */ -static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw) +static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)  { -	struct dmar_domain *domain, *found = NULL; -	struct intel_iommu *iommu; -	struct dmar_drhd_unit *drhd; -	struct device_domain_info *info, *tmp; -	struct pci_dev *dev_tmp; +	struct dmar_domain *domain, *free = NULL; +	struct intel_iommu *iommu = NULL; +	struct device_domain_info *info; +	struct pci_dev *dev_tmp = NULL;  	unsigned long flags; -	int bus = 0, devfn = 0; -	int segment; -	int ret; +	u8 bus, devfn, bridge_bus, bridge_devfn; -	domain = find_domain(pdev); +	domain = find_domain(dev);  	if (domain)  		return domain; -	segment = pci_domain_nr(pdev->bus); +	if (dev_is_pci(dev)) { +		struct pci_dev *pdev = to_pci_dev(dev); +		u16 segment; -	dev_tmp = pci_find_upstream_pcie_bridge(pdev); -	if (dev_tmp) { -		if (pci_is_pcie(dev_tmp)) { -			bus = dev_tmp->subordinate->number; -			devfn = 0; -		} else { -			bus = dev_tmp->bus->number; -			devfn = dev_tmp->devfn; -		} -		spin_lock_irqsave(&device_domain_lock, flags); -		list_for_each_entry(info, &device_domain_list, global) { -			if (info->segment == segment && -			    info->bus == bus && info->devfn == devfn) { -				found = info->domain; -				break; +		segment = pci_domain_nr(pdev->bus); +		dev_tmp = pci_find_upstream_pcie_bridge(pdev); +		if (dev_tmp) { +			if (pci_is_pcie(dev_tmp)) { +				bridge_bus = dev_tmp->subordinate->number; +				bridge_devfn = 0; +			} else { +				bridge_bus = dev_tmp->bus->number; +				bridge_devfn = dev_tmp->devfn;  			} -		} -		spin_unlock_irqrestore(&device_domain_lock, flags); -		/* pcie-pci bridge already has a domain, uses it */ -		if (found) { -			domain = found; -			goto found_domain; +			spin_lock_irqsave(&device_domain_lock, flags); +			info = dmar_search_domain_by_dev_info(segment, +							      bridge_bus, +							      bridge_devfn); +			if (info) { +				iommu = info->iommu; +				domain = info->domain; +			} +			spin_unlock_irqrestore(&device_domain_lock, flags); +			/* pcie-pci bridge already has a domain, uses it */ +			if (info) +				goto found_domain;  		}  	} -	domain = alloc_domain(); -	if (!domain) +	iommu = device_to_iommu(dev, &bus, &devfn); +	if (!iommu)  		goto error; -	/* Allocate new domain for the device */ -	drhd = dmar_find_matched_drhd_unit(pdev); -	if (!drhd) { -		printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n", -			pci_name(pdev)); -		free_domain_mem(domain); -		return NULL; -	} -	iommu = drhd->iommu; - -	ret = iommu_attach_domain(domain, iommu); -	if (ret) { +	/* Allocate and initialize new domain for the device */ +	domain = alloc_domain(false); +	if (!domain) +		goto error; +	if (iommu_attach_domain(domain, iommu)) {  		free_domain_mem(domain); +		domain = NULL;  		goto error;  	} - -	if (domain_init(domain, gaw)) { -		domain_exit(domain); +	free = domain; +	if (domain_init(domain, gaw))  		goto error; -	}  	/* register pcie-to-pci device */  	if (dev_tmp) { -		info = alloc_devinfo_mem(); -		if (!info) { -			domain_exit(domain); +		domain = dmar_insert_dev_info(iommu, bridge_bus, bridge_devfn, +					      NULL, domain); +		if (!domain)  			goto error; -		} -		info->segment = segment; -		info->bus = bus; -		info->devfn = devfn; -		info->dev = NULL; -		info->domain = domain; -		/* This domain is shared by devices under p2p bridge */ -		domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES; - -		/* pcie-to-pci bridge already has a domain, uses it */ -		found = NULL; -		spin_lock_irqsave(&device_domain_lock, flags); -		list_for_each_entry(tmp, &device_domain_list, global) { -			if (tmp->segment == segment && -			    tmp->bus == bus && tmp->devfn == devfn) { -				found = tmp->domain; -				break; -			} -		} -		if (found) { -			spin_unlock_irqrestore(&device_domain_lock, flags); -			free_devinfo_mem(info); -			domain_exit(domain); -			domain = found; -		} else { -			list_add(&info->link, &domain->devices); -			list_add(&info->global, &device_domain_list); -			spin_unlock_irqrestore(&device_domain_lock, flags); -		}  	}  found_domain: -	info = alloc_devinfo_mem(); -	if (!info) -		goto error; -	info->segment = segment; -	info->bus = pdev->bus->number; -	info->devfn = pdev->devfn; -	info->dev = pdev; -	info->domain = domain; -	spin_lock_irqsave(&device_domain_lock, flags); -	/* somebody is fast */ -	found = find_domain(pdev); -	if (found != NULL) { -		spin_unlock_irqrestore(&device_domain_lock, flags); -		if (found != domain) { -			domain_exit(domain); -			domain = found; -		} -		free_devinfo_mem(info); -		return domain; -	} -	list_add(&info->link, &domain->devices); -	list_add(&info->global, &device_domain_list); -	pdev->dev.archdata.iommu = info; -	spin_unlock_irqrestore(&device_domain_lock, flags); -	return domain; +	domain = dmar_insert_dev_info(iommu, bus, devfn, dev, domain);  error: -	/* recheck it here, maybe others set it */ -	return find_domain(pdev); +	if (free != domain) +		domain_exit(free); + +	return domain;  }  static int iommu_identity_mapping; @@ -2133,14 +2317,14 @@ static int iommu_domain_identity_map(struct dmar_domain *domain,  				  DMA_PTE_READ|DMA_PTE_WRITE);  } -static int iommu_prepare_identity_map(struct pci_dev *pdev, +static int iommu_prepare_identity_map(struct device *dev,  				      unsigned long long start,  				      unsigned long long end)  {  	struct dmar_domain *domain;  	int ret; -	domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH); +	domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);  	if (!domain)  		return -ENOMEM; @@ -2150,13 +2334,13 @@ static int iommu_prepare_identity_map(struct pci_dev *pdev,  	   up to start with in si_domain */  	if (domain == si_domain && hw_pass_through) {  		printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n", -		       pci_name(pdev), start, end); +		       dev_name(dev), start, end);  		return 0;  	}  	printk(KERN_INFO  	       "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n", -	       pci_name(pdev), start, end); +	       dev_name(dev), start, end);  	if (end < start) {  		WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n" @@ -2184,7 +2368,7 @@ static int iommu_prepare_identity_map(struct pci_dev *pdev,  		goto error;  	/* context entry init */ -	ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL); +	ret = domain_context_mapping(domain, dev, CONTEXT_TT_MULTI_LEVEL);  	if (ret)  		goto error; @@ -2196,12 +2380,12 @@ static int iommu_prepare_identity_map(struct pci_dev *pdev,  }  static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr, -	struct pci_dev *pdev) +					 struct device *dev)  { -	if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) +	if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)  		return 0; -	return iommu_prepare_identity_map(pdev, rmrr->base_address, -		rmrr->end_address); +	return iommu_prepare_identity_map(dev, rmrr->base_address, +					  rmrr->end_address);  }  #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA @@ -2215,7 +2399,7 @@ static inline void iommu_prepare_isa(void)  		return;  	printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n"); -	ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024 - 1); +	ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1);  	if (ret)  		printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; " @@ -2237,11 +2421,11 @@ static int __init si_domain_init(int hw)  	struct intel_iommu *iommu;  	int nid, ret = 0; -	si_domain = alloc_domain(); +	si_domain = alloc_domain(false);  	if (!si_domain)  		return -EFAULT; -	pr_debug("Identity mapping domain is domain %d\n", si_domain->id); +	si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY;  	for_each_active_iommu(iommu, drhd) {  		ret = iommu_attach_domain(si_domain, iommu); @@ -2256,7 +2440,8 @@ static int __init si_domain_init(int hw)  		return -EFAULT;  	} -	si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY; +	pr_debug("IOMMU: identity mapping domain is domain %d\n", +		 si_domain->id);  	if (hw)  		return 0; @@ -2276,16 +2461,14 @@ static int __init si_domain_init(int hw)  	return 0;  } -static void domain_remove_one_dev_info(struct dmar_domain *domain, -					  struct pci_dev *pdev); -static int identity_mapping(struct pci_dev *pdev) +static int identity_mapping(struct device *dev)  {  	struct device_domain_info *info;  	if (likely(!iommu_identity_mapping))  		return 0; -	info = pdev->dev.archdata.iommu; +	info = dev->archdata.iommu;  	if (info && info != DUMMY_DEVICE_DOMAIN_INFO)  		return (info->domain == si_domain); @@ -2293,111 +2476,112 @@ static int identity_mapping(struct pci_dev *pdev)  }  static int domain_add_dev_info(struct dmar_domain *domain, -			       struct pci_dev *pdev, -			       int translation) +			       struct device *dev, int translation)  { -	struct device_domain_info *info; -	unsigned long flags; +	struct dmar_domain *ndomain; +	struct intel_iommu *iommu; +	u8 bus, devfn;  	int ret; -	info = alloc_devinfo_mem(); -	if (!info) -		return -ENOMEM; +	iommu = device_to_iommu(dev, &bus, &devfn); +	if (!iommu) +		return -ENODEV; -	info->segment = pci_domain_nr(pdev->bus); -	info->bus = pdev->bus->number; -	info->devfn = pdev->devfn; -	info->dev = pdev; -	info->domain = domain; +	ndomain = dmar_insert_dev_info(iommu, bus, devfn, dev, domain); +	if (ndomain != domain) +		return -EBUSY; -	spin_lock_irqsave(&device_domain_lock, flags); -	list_add(&info->link, &domain->devices); -	list_add(&info->global, &device_domain_list); -	pdev->dev.archdata.iommu = info; -	spin_unlock_irqrestore(&device_domain_lock, flags); - -	ret = domain_context_mapping(domain, pdev, translation); +	ret = domain_context_mapping(domain, dev, translation);  	if (ret) { -		spin_lock_irqsave(&device_domain_lock, flags); -		unlink_domain_info(info); -		spin_unlock_irqrestore(&device_domain_lock, flags); -		free_devinfo_mem(info); +		domain_remove_one_dev_info(domain, dev);  		return ret;  	}  	return 0;  } -static bool device_has_rmrr(struct pci_dev *dev) +static bool device_has_rmrr(struct device *dev)  {  	struct dmar_rmrr_unit *rmrr; +	struct device *tmp;  	int i; +	rcu_read_lock();  	for_each_rmrr_units(rmrr) { -		for (i = 0; i < rmrr->devices_cnt; i++) { -			/* -			 * Return TRUE if this RMRR contains the device that -			 * is passed in. -			 */ -			if (rmrr->devices[i] == dev) +		/* +		 * Return TRUE if this RMRR contains the device that +		 * is passed in. +		 */ +		for_each_active_dev_scope(rmrr->devices, +					  rmrr->devices_cnt, i, tmp) +			if (tmp == dev) { +				rcu_read_unlock();  				return true; -		} +			}  	} +	rcu_read_unlock();  	return false;  } -static int iommu_should_identity_map(struct pci_dev *pdev, int startup) +static int iommu_should_identity_map(struct device *dev, int startup)  { -	/* -	 * We want to prevent any device associated with an RMRR from -	 * getting placed into the SI Domain. This is done because -	 * problems exist when devices are moved in and out of domains -	 * and their respective RMRR info is lost. We exempt USB devices -	 * from this process due to their usage of RMRRs that are known -	 * to not be needed after BIOS hand-off to OS. -	 */ -	if (device_has_rmrr(pdev) && -	    (pdev->class >> 8) != PCI_CLASS_SERIAL_USB) -		return 0; +	if (dev_is_pci(dev)) { +		struct pci_dev *pdev = to_pci_dev(dev); -	if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev)) -		return 1; +		/* +		 * We want to prevent any device associated with an RMRR from +		 * getting placed into the SI Domain. This is done because +		 * problems exist when devices are moved in and out of domains +		 * and their respective RMRR info is lost. We exempt USB devices +		 * from this process due to their usage of RMRRs that are known +		 * to not be needed after BIOS hand-off to OS. +		 */ +		if (device_has_rmrr(dev) && +		    (pdev->class >> 8) != PCI_CLASS_SERIAL_USB) +			return 0; -	if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev)) -		return 1; +		if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev)) +			return 1; -	if (!(iommu_identity_mapping & IDENTMAP_ALL)) -		return 0; +		if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev)) +			return 1; -	/* -	 * We want to start off with all devices in the 1:1 domain, and -	 * take them out later if we find they can't access all of memory. -	 * -	 * However, we can't do this for PCI devices behind bridges, -	 * because all PCI devices behind the same bridge will end up -	 * with the same source-id on their transactions. -	 * -	 * Practically speaking, we can't change things around for these -	 * devices at run-time, because we can't be sure there'll be no -	 * DMA transactions in flight for any of their siblings. -	 *  -	 * So PCI devices (unless they're on the root bus) as well as -	 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of -	 * the 1:1 domain, just in _case_ one of their siblings turns out -	 * not to be able to map all of memory. -	 */ -	if (!pci_is_pcie(pdev)) { -		if (!pci_is_root_bus(pdev->bus)) +		if (!(iommu_identity_mapping & IDENTMAP_ALL)) +			return 0; + +		/* +		 * We want to start off with all devices in the 1:1 domain, and +		 * take them out later if we find they can't access all of memory. +		 * +		 * However, we can't do this for PCI devices behind bridges, +		 * because all PCI devices behind the same bridge will end up +		 * with the same source-id on their transactions. +		 * +		 * Practically speaking, we can't change things around for these +		 * devices at run-time, because we can't be sure there'll be no +		 * DMA transactions in flight for any of their siblings. +		 * +		 * So PCI devices (unless they're on the root bus) as well as +		 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of +		 * the 1:1 domain, just in _case_ one of their siblings turns out +		 * not to be able to map all of memory. +		 */ +		if (!pci_is_pcie(pdev)) { +			if (!pci_is_root_bus(pdev->bus)) +				return 0; +			if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI) +				return 0; +		} else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)  			return 0; -		if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI) +	} else { +		if (device_has_rmrr(dev))  			return 0; -	} else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE) -		return 0; +	} -	/*  +	/*  	 * At boot time, we don't yet know if devices will be 64-bit capable. -	 * Assume that they will -- if they turn out not to be, then we can  +	 * Assume that they will — if they turn out not to be, then we can  	 * take them out of the 1:1 domain later.  	 */  	if (!startup) { @@ -2405,42 +2589,77 @@ static int iommu_should_identity_map(struct pci_dev *pdev, int startup)  		 * If the device's dma_mask is less than the system's memory  		 * size then this is not a candidate for identity mapping.  		 */ -		u64 dma_mask = pdev->dma_mask; +		u64 dma_mask = *dev->dma_mask; -		if (pdev->dev.coherent_dma_mask && -		    pdev->dev.coherent_dma_mask < dma_mask) -			dma_mask = pdev->dev.coherent_dma_mask; +		if (dev->coherent_dma_mask && +		    dev->coherent_dma_mask < dma_mask) +			dma_mask = dev->coherent_dma_mask; -		return dma_mask >= dma_get_required_mask(&pdev->dev); +		return dma_mask >= dma_get_required_mask(dev);  	}  	return 1;  } +static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw) +{ +	int ret; + +	if (!iommu_should_identity_map(dev, 1)) +		return 0; + +	ret = domain_add_dev_info(si_domain, dev, +				  hw ? CONTEXT_TT_PASS_THROUGH : +				       CONTEXT_TT_MULTI_LEVEL); +	if (!ret) +		pr_info("IOMMU: %s identity mapping for device %s\n", +			hw ? "hardware" : "software", dev_name(dev)); +	else if (ret == -ENODEV) +		/* device not associated with an iommu */ +		ret = 0; + +	return ret; +} + +  static int __init iommu_prepare_static_identity_mapping(int hw)  {  	struct pci_dev *pdev = NULL; -	int ret; +	struct dmar_drhd_unit *drhd; +	struct intel_iommu *iommu; +	struct device *dev; +	int i; +	int ret = 0;  	ret = si_domain_init(hw);  	if (ret)  		return -EFAULT;  	for_each_pci_dev(pdev) { -		if (iommu_should_identity_map(pdev, 1)) { -			ret = domain_add_dev_info(si_domain, pdev, -					     hw ? CONTEXT_TT_PASS_THROUGH : -						  CONTEXT_TT_MULTI_LEVEL); -			if (ret) { -				/* device not associated with an iommu */ -				if (ret == -ENODEV) -					continue; -				return ret; +		ret = dev_prepare_static_identity_mapping(&pdev->dev, hw); +		if (ret) +			return ret; +	} + +	for_each_active_iommu(iommu, drhd) +		for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) { +			struct acpi_device_physical_node *pn; +			struct acpi_device *adev; + +			if (dev->bus != &acpi_bus_type) +				continue; +				 +			adev= to_acpi_device(dev); +			mutex_lock(&adev->physical_node_lock); +			list_for_each_entry(pn, &adev->physical_node_list, node) { +				ret = dev_prepare_static_identity_mapping(pn->dev, hw); +				if (ret) +					break;  			} -			pr_info("IOMMU: %s identity mapping for device %s\n", -				hw ? "hardware" : "software", pci_name(pdev)); +			mutex_unlock(&adev->physical_node_lock); +			if (ret) +				return ret;  		} -	}  	return 0;  } @@ -2449,7 +2668,7 @@ static int __init init_dmars(void)  {  	struct dmar_drhd_unit *drhd;  	struct dmar_rmrr_unit *rmrr; -	struct pci_dev *pdev; +	struct device *dev;  	struct intel_iommu *iommu;  	int i, ret; @@ -2485,19 +2704,15 @@ static int __init init_dmars(void)  		sizeof(struct deferred_flush_tables), GFP_KERNEL);  	if (!deferred_flush) {  		ret = -ENOMEM; -		goto error; +		goto free_g_iommus;  	} -	for_each_drhd_unit(drhd) { -		if (drhd->ignored) -			continue; - -		iommu = drhd->iommu; +	for_each_active_iommu(iommu, drhd) {  		g_iommus[iommu->seq_id] = iommu;  		ret = iommu_init_domains(iommu);  		if (ret) -			goto error; +			goto free_iommu;  		/*  		 * TBD: @@ -2507,7 +2722,7 @@ static int __init init_dmars(void)  		ret = iommu_alloc_root_entry(iommu);  		if (ret) {  			printk(KERN_ERR "IOMMU: allocate root entry failed\n"); -			goto error; +			goto free_iommu;  		}  		if (!ecap_pass_through(iommu->ecap))  			hw_pass_through = 0; @@ -2516,12 +2731,7 @@ static int __init init_dmars(void)  	/*  	 * Start from the sane iommu hardware state.  	 */ -	for_each_drhd_unit(drhd) { -		if (drhd->ignored) -			continue; - -		iommu = drhd->iommu; - +	for_each_active_iommu(iommu, drhd) {  		/*  		 * If the queued invalidation is already initialized by us  		 * (for example, while enabling interrupt-remapping) then @@ -2541,12 +2751,7 @@ static int __init init_dmars(void)  		dmar_disable_qi(iommu);  	} -	for_each_drhd_unit(drhd) { -		if (drhd->ignored) -			continue; - -		iommu = drhd->iommu; - +	for_each_active_iommu(iommu, drhd) {  		if (dmar_enable_qi(iommu)) {  			/*  			 * Queued Invalidate not enabled, use Register Based @@ -2586,7 +2791,7 @@ static int __init init_dmars(void)  		ret = iommu_prepare_static_identity_mapping(hw_pass_through);  		if (ret) {  			printk(KERN_CRIT "Failed to setup IOMMU pass-through\n"); -			goto error; +			goto free_iommu;  		}  	}  	/* @@ -2605,15 +2810,10 @@ static int __init init_dmars(void)  	 */  	printk(KERN_INFO "IOMMU: Setting RMRR:\n");  	for_each_rmrr_units(rmrr) { -		for (i = 0; i < rmrr->devices_cnt; i++) { -			pdev = rmrr->devices[i]; -			/* -			 * some BIOS lists non-exist devices in DMAR -			 * table. -			 */ -			if (!pdev) -				continue; -			ret = iommu_prepare_rmrr_dev(rmrr, pdev); +		/* some BIOS lists non-exist devices in DMAR table. */ +		for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt, +					  i, dev) { +			ret = iommu_prepare_rmrr_dev(rmrr, dev);  			if (ret)  				printk(KERN_ERR  				       "IOMMU: mapping reserved region failed\n"); @@ -2629,23 +2829,22 @@ static int __init init_dmars(void)  	 *   global invalidate iotlb  	 *   enable translation  	 */ -	for_each_drhd_unit(drhd) { +	for_each_iommu(iommu, drhd) {  		if (drhd->ignored) {  			/*  			 * we always have to disable PMRs or DMA may fail on  			 * this device  			 */  			if (force_on) -				iommu_disable_protect_mem_regions(drhd->iommu); +				iommu_disable_protect_mem_regions(iommu);  			continue;  		} -		iommu = drhd->iommu;  		iommu_flush_write_buffer(iommu);  		ret = dmar_set_interrupt(iommu);  		if (ret) -			goto error; +			goto free_iommu;  		iommu_set_root_entry(iommu); @@ -2654,20 +2853,20 @@ static int __init init_dmars(void)  		ret = iommu_enable_translation(iommu);  		if (ret) -			goto error; +			goto free_iommu;  		iommu_disable_protect_mem_regions(iommu);  	}  	return 0; -error: -	for_each_drhd_unit(drhd) { -		if (drhd->ignored) -			continue; -		iommu = drhd->iommu; -		free_iommu(iommu); -	} + +free_iommu: +	for_each_active_iommu(iommu, drhd) +		free_dmar_iommu(iommu); +	kfree(deferred_flush); +free_g_iommus:  	kfree(g_iommus); +error:  	return ret;  } @@ -2676,7 +2875,6 @@ static struct iova *intel_alloc_iova(struct device *dev,  				     struct dmar_domain *domain,  				     unsigned long nrpages, uint64_t dma_mask)  { -	struct pci_dev *pdev = to_pci_dev(dev);  	struct iova *iova = NULL;  	/* Restrict dma_mask to the width that the iommu can handle */ @@ -2696,34 +2894,31 @@ static struct iova *intel_alloc_iova(struct device *dev,  	iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);  	if (unlikely(!iova)) {  		printk(KERN_ERR "Allocating %ld-page iova for %s failed", -		       nrpages, pci_name(pdev)); +		       nrpages, dev_name(dev));  		return NULL;  	}  	return iova;  } -static struct dmar_domain *__get_valid_domain_for_dev(struct pci_dev *pdev) +static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)  {  	struct dmar_domain *domain;  	int ret; -	domain = get_domain_for_dev(pdev, -			DEFAULT_DOMAIN_ADDRESS_WIDTH); +	domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);  	if (!domain) { -		printk(KERN_ERR -			"Allocating domain for %s failed", pci_name(pdev)); +		printk(KERN_ERR "Allocating domain for %s failed", +		       dev_name(dev));  		return NULL;  	}  	/* make sure context mapping is ok */ -	if (unlikely(!domain_context_mapped(pdev))) { -		ret = domain_context_mapping(domain, pdev, -					     CONTEXT_TT_MULTI_LEVEL); +	if (unlikely(!domain_context_mapped(dev))) { +		ret = domain_context_mapping(domain, dev, CONTEXT_TT_MULTI_LEVEL);  		if (ret) { -			printk(KERN_ERR -				"Domain context map for %s failed", -				pci_name(pdev)); +			printk(KERN_ERR "Domain context map for %s failed", +			       dev_name(dev));  			return NULL;  		}  	} @@ -2731,51 +2926,46 @@ static struct dmar_domain *__get_valid_domain_for_dev(struct pci_dev *pdev)  	return domain;  } -static inline struct dmar_domain *get_valid_domain_for_dev(struct pci_dev *dev) +static inline struct dmar_domain *get_valid_domain_for_dev(struct device *dev)  {  	struct device_domain_info *info;  	/* No lock here, assumes no domain exit in normal case */ -	info = dev->dev.archdata.iommu; +	info = dev->archdata.iommu;  	if (likely(info))  		return info->domain;  	return __get_valid_domain_for_dev(dev);  } -static int iommu_dummy(struct pci_dev *pdev) +static int iommu_dummy(struct device *dev)  { -	return pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO; +	return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;  } -/* Check if the pdev needs to go through non-identity map and unmap process.*/ +/* Check if the dev needs to go through non-identity map and unmap process.*/  static int iommu_no_mapping(struct device *dev)  { -	struct pci_dev *pdev;  	int found; -	if (unlikely(dev->bus != &pci_bus_type)) -		return 1; - -	pdev = to_pci_dev(dev); -	if (iommu_dummy(pdev)) +	if (iommu_dummy(dev))  		return 1;  	if (!iommu_identity_mapping)  		return 0; -	found = identity_mapping(pdev); +	found = identity_mapping(dev);  	if (found) { -		if (iommu_should_identity_map(pdev, 0)) +		if (iommu_should_identity_map(dev, 0))  			return 1;  		else {  			/*  			 * 32 bit DMA is removed from si_domain and fall back  			 * to non-identity mapping.  			 */ -			domain_remove_one_dev_info(si_domain, pdev); +			domain_remove_one_dev_info(si_domain, dev);  			printk(KERN_INFO "32bit %s uses non-identity mapping\n", -			       pci_name(pdev)); +			       dev_name(dev));  			return 0;  		}  	} else { @@ -2783,15 +2973,15 @@ static int iommu_no_mapping(struct device *dev)  		 * In case of a detached 64 bit DMA device from vm, the device  		 * is put into si_domain for identity mapping.  		 */ -		if (iommu_should_identity_map(pdev, 0)) { +		if (iommu_should_identity_map(dev, 0)) {  			int ret; -			ret = domain_add_dev_info(si_domain, pdev, +			ret = domain_add_dev_info(si_domain, dev,  						  hw_pass_through ?  						  CONTEXT_TT_PASS_THROUGH :  						  CONTEXT_TT_MULTI_LEVEL);  			if (!ret) {  				printk(KERN_INFO "64bit %s uses identity mapping\n", -				       pci_name(pdev)); +				       dev_name(dev));  				return 1;  			}  		} @@ -2800,10 +2990,9 @@ static int iommu_no_mapping(struct device *dev)  	return 0;  } -static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, +static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,  				     size_t size, int dir, u64 dma_mask)  { -	struct pci_dev *pdev = to_pci_dev(hwdev);  	struct dmar_domain *domain;  	phys_addr_t start_paddr;  	struct iova *iova; @@ -2814,17 +3003,17 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,  	BUG_ON(dir == DMA_NONE); -	if (iommu_no_mapping(hwdev)) +	if (iommu_no_mapping(dev))  		return paddr; -	domain = get_valid_domain_for_dev(pdev); +	domain = get_valid_domain_for_dev(dev);  	if (!domain)  		return 0;  	iommu = domain_get_iommu(domain);  	size = aligned_nrpages(paddr, size); -	iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), dma_mask); +	iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);  	if (!iova)  		goto error; @@ -2850,7 +3039,7 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,  	/* it's a non-present to present mapping. Only flush if caching mode */  	if (cap_caching_mode(iommu->cap)) -		iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 1); +		iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 0, 1);  	else  		iommu_flush_write_buffer(iommu); @@ -2862,7 +3051,7 @@ error:  	if (iova)  		__free_iova(&domain->iovad, iova);  	printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n", -		pci_name(pdev), size, (unsigned long long)paddr, dir); +		dev_name(dev), size, (unsigned long long)paddr, dir);  	return 0;  } @@ -2872,7 +3061,7 @@ static dma_addr_t intel_map_page(struct device *dev, struct page *page,  				 struct dma_attrs *attrs)  {  	return __intel_map_single(dev, page_to_phys(page) + offset, size, -				  dir, to_pci_dev(dev)->dma_mask); +				  dir, *dev->dma_mask);  }  static void flush_unmaps(void) @@ -2902,13 +3091,16 @@ static void flush_unmaps(void)  			/* On real hardware multiple invalidations are expensive */  			if (cap_caching_mode(iommu->cap))  				iommu_flush_iotlb_psi(iommu, domain->id, -				iova->pfn_lo, iova->pfn_hi - iova->pfn_lo + 1, 0); +					iova->pfn_lo, iova->pfn_hi - iova->pfn_lo + 1, +					!deferred_flush[i].freelist[j], 0);  			else {  				mask = ilog2(mm_to_dma_pfn(iova->pfn_hi - iova->pfn_lo + 1));  				iommu_flush_dev_iotlb(deferred_flush[i].domain[j],  						(uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);  			}  			__free_iova(&deferred_flush[i].domain[j]->iovad, iova); +			if (deferred_flush[i].freelist[j]) +				dma_free_pagelist(deferred_flush[i].freelist[j]);  		}  		deferred_flush[i].next = 0;  	} @@ -2925,7 +3117,7 @@ static void flush_unmaps_timeout(unsigned long data)  	spin_unlock_irqrestore(&async_umap_flush_lock, flags);  } -static void add_unmap(struct dmar_domain *dom, struct iova *iova) +static void add_unmap(struct dmar_domain *dom, struct iova *iova, struct page *freelist)  {  	unsigned long flags;  	int next, iommu_id; @@ -2941,6 +3133,7 @@ static void add_unmap(struct dmar_domain *dom, struct iova *iova)  	next = deferred_flush[iommu_id].next;  	deferred_flush[iommu_id].domain[next] = dom;  	deferred_flush[iommu_id].iova[next] = iova; +	deferred_flush[iommu_id].freelist[next] = freelist;  	deferred_flush[iommu_id].next++;  	if (!timer_on) { @@ -2955,16 +3148,16 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,  			     size_t size, enum dma_data_direction dir,  			     struct dma_attrs *attrs)  { -	struct pci_dev *pdev = to_pci_dev(dev);  	struct dmar_domain *domain;  	unsigned long start_pfn, last_pfn;  	struct iova *iova;  	struct intel_iommu *iommu; +	struct page *freelist;  	if (iommu_no_mapping(dev))  		return; -	domain = find_domain(pdev); +	domain = find_domain(dev);  	BUG_ON(!domain);  	iommu = domain_get_iommu(domain); @@ -2978,21 +3171,18 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,  	last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;  	pr_debug("Device %s unmapping: pfn %lx-%lx\n", -		 pci_name(pdev), start_pfn, last_pfn); +		 dev_name(dev), start_pfn, last_pfn); -	/*  clear the whole page */ -	dma_pte_clear_range(domain, start_pfn, last_pfn); - -	/* free page tables */ -	dma_pte_free_pagetable(domain, start_pfn, last_pfn); +	freelist = domain_unmap(domain, start_pfn, last_pfn);  	if (intel_iommu_strict) {  		iommu_flush_iotlb_psi(iommu, domain->id, start_pfn, -				      last_pfn - start_pfn + 1, 0); +				      last_pfn - start_pfn + 1, !freelist, 0);  		/* free iova */  		__free_iova(&domain->iovad, iova); +		dma_free_pagelist(freelist);  	} else { -		add_unmap(domain, iova); +		add_unmap(domain, iova, freelist);  		/*  		 * queue up the release of the unmap to save the 1/6th of the  		 * cpu used up by the iotlb flush operation... @@ -3000,65 +3190,81 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,  	}  } -static void *intel_alloc_coherent(struct device *hwdev, size_t size, +static void *intel_alloc_coherent(struct device *dev, size_t size,  				  dma_addr_t *dma_handle, gfp_t flags,  				  struct dma_attrs *attrs)  { -	void *vaddr; +	struct page *page = NULL;  	int order;  	size = PAGE_ALIGN(size);  	order = get_order(size); -	if (!iommu_no_mapping(hwdev)) +	if (!iommu_no_mapping(dev))  		flags &= ~(GFP_DMA | GFP_DMA32); -	else if (hwdev->coherent_dma_mask < dma_get_required_mask(hwdev)) { -		if (hwdev->coherent_dma_mask < DMA_BIT_MASK(32)) +	else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) { +		if (dev->coherent_dma_mask < DMA_BIT_MASK(32))  			flags |= GFP_DMA;  		else  			flags |= GFP_DMA32;  	} -	vaddr = (void *)__get_free_pages(flags, order); -	if (!vaddr) +	if (flags & __GFP_WAIT) { +		unsigned int count = size >> PAGE_SHIFT; + +		page = dma_alloc_from_contiguous(dev, count, order); +		if (page && iommu_no_mapping(dev) && +		    page_to_phys(page) + size > dev->coherent_dma_mask) { +			dma_release_from_contiguous(dev, page, count); +			page = NULL; +		} +	} + +	if (!page) +		page = alloc_pages(flags, order); +	if (!page)  		return NULL; -	memset(vaddr, 0, size); +	memset(page_address(page), 0, size); -	*dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size, +	*dma_handle = __intel_map_single(dev, page_to_phys(page), size,  					 DMA_BIDIRECTIONAL, -					 hwdev->coherent_dma_mask); +					 dev->coherent_dma_mask);  	if (*dma_handle) -		return vaddr; -	free_pages((unsigned long)vaddr, order); +		return page_address(page); +	if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT)) +		__free_pages(page, order); +  	return NULL;  } -static void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr, +static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,  				dma_addr_t dma_handle, struct dma_attrs *attrs)  {  	int order; +	struct page *page = virt_to_page(vaddr);  	size = PAGE_ALIGN(size);  	order = get_order(size); -	intel_unmap_page(hwdev, dma_handle, size, DMA_BIDIRECTIONAL, NULL); -	free_pages((unsigned long)vaddr, order); +	intel_unmap_page(dev, dma_handle, size, DMA_BIDIRECTIONAL, NULL); +	if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT)) +		__free_pages(page, order);  } -static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist, +static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,  			   int nelems, enum dma_data_direction dir,  			   struct dma_attrs *attrs)  { -	struct pci_dev *pdev = to_pci_dev(hwdev);  	struct dmar_domain *domain;  	unsigned long start_pfn, last_pfn;  	struct iova *iova;  	struct intel_iommu *iommu; +	struct page *freelist; -	if (iommu_no_mapping(hwdev)) +	if (iommu_no_mapping(dev))  		return; -	domain = find_domain(pdev); +	domain = find_domain(dev);  	BUG_ON(!domain);  	iommu = domain_get_iommu(domain); @@ -3071,19 +3277,16 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,  	start_pfn = mm_to_dma_pfn(iova->pfn_lo);  	last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1; -	/*  clear the whole page */ -	dma_pte_clear_range(domain, start_pfn, last_pfn); - -	/* free page tables */ -	dma_pte_free_pagetable(domain, start_pfn, last_pfn); +	freelist = domain_unmap(domain, start_pfn, last_pfn);  	if (intel_iommu_strict) {  		iommu_flush_iotlb_psi(iommu, domain->id, start_pfn, -				      last_pfn - start_pfn + 1, 0); +				      last_pfn - start_pfn + 1, !freelist, 0);  		/* free iova */  		__free_iova(&domain->iovad, iova); +		dma_free_pagelist(freelist);  	} else { -		add_unmap(domain, iova); +		add_unmap(domain, iova, freelist);  		/*  		 * queue up the release of the unmap to save the 1/6th of the  		 * cpu used up by the iotlb flush operation... @@ -3105,11 +3308,10 @@ static int intel_nontranslate_map_sg(struct device *hddev,  	return nelems;  } -static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems, +static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,  			enum dma_data_direction dir, struct dma_attrs *attrs)  {  	int i; -	struct pci_dev *pdev = to_pci_dev(hwdev);  	struct dmar_domain *domain;  	size_t size = 0;  	int prot = 0; @@ -3120,10 +3322,10 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne  	struct intel_iommu *iommu;  	BUG_ON(dir == DMA_NONE); -	if (iommu_no_mapping(hwdev)) -		return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir); +	if (iommu_no_mapping(dev)) +		return intel_nontranslate_map_sg(dev, sglist, nelems, dir); -	domain = get_valid_domain_for_dev(pdev); +	domain = get_valid_domain_for_dev(dev);  	if (!domain)  		return 0; @@ -3132,8 +3334,8 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne  	for_each_sg(sglist, sg, nelems, i)  		size += aligned_nrpages(sg->offset, sg->length); -	iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), -				pdev->dma_mask); +	iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), +				*dev->dma_mask);  	if (!iova) {  		sglist->dma_length = 0;  		return 0; @@ -3166,7 +3368,7 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne  	/* it's a non-present to present mapping. Only flush if caching mode */  	if (cap_caching_mode(iommu->cap)) -		iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 1); +		iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 0, 1);  	else  		iommu_flush_write_buffer(iommu); @@ -3301,29 +3503,28 @@ DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quir  static void __init init_no_remapping_devices(void)  {  	struct dmar_drhd_unit *drhd; +	struct device *dev; +	int i;  	for_each_drhd_unit(drhd) {  		if (!drhd->include_all) { -			int i; -			for (i = 0; i < drhd->devices_cnt; i++) -				if (drhd->devices[i] != NULL) -					break; -			/* ignore DMAR unit if no pci devices exist */ +			for_each_active_dev_scope(drhd->devices, +						  drhd->devices_cnt, i, dev) +				break; +			/* ignore DMAR unit if no devices exist */  			if (i == drhd->devices_cnt)  				drhd->ignored = 1;  		}  	} -	for_each_drhd_unit(drhd) { -		int i; -		if (drhd->ignored || drhd->include_all) +	for_each_active_drhd_unit(drhd) { +		if (drhd->include_all)  			continue; -		for (i = 0; i < drhd->devices_cnt; i++) -			if (drhd->devices[i] && -			    !IS_GFX_DEVICE(drhd->devices[i])) +		for_each_active_dev_scope(drhd->devices, +					  drhd->devices_cnt, i, dev) +			if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))  				break; -  		if (i < drhd->devices_cnt)  			continue; @@ -3333,11 +3534,9 @@ static void __init init_no_remapping_devices(void)  			intel_iommu_gfx_mapped = 1;  		} else {  			drhd->ignored = 1; -			for (i = 0; i < drhd->devices_cnt; i++) { -				if (!drhd->devices[i]) -					continue; -				drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO; -			} +			for_each_active_dev_scope(drhd->devices, +						  drhd->devices_cnt, i, dev) +				dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;  		}  	}  } @@ -3480,13 +3679,6 @@ static void __init init_iommu_pm_ops(void)  static inline void init_iommu_pm_ops(void) {}  #endif	/* CONFIG_PM */ -LIST_HEAD(dmar_rmrr_units); - -static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr) -{ -	list_add(&rmrr->list, &dmar_rmrr_units); -} -  int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header)  { @@ -3501,30 +3693,18 @@ int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header)  	rmrr = (struct acpi_dmar_reserved_memory *)header;  	rmrru->base_address = rmrr->base_address;  	rmrru->end_address = rmrr->end_address; - -	dmar_register_rmrr_unit(rmrru); -	return 0; -} - -static int __init -rmrr_parse_dev(struct dmar_rmrr_unit *rmrru) -{ -	struct acpi_dmar_reserved_memory *rmrr; -	int ret; - -	rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr; -	ret = dmar_parse_dev_scope((void *)(rmrr + 1), -		((void *)rmrr) + rmrr->header.length, -		&rmrru->devices_cnt, &rmrru->devices, rmrr->segment); - -	if (ret || (rmrru->devices_cnt == 0)) { -		list_del(&rmrru->list); +	rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1), +				((void *)rmrr) + rmrr->header.length, +				&rmrru->devices_cnt); +	if (rmrru->devices_cnt && rmrru->devices == NULL) {  		kfree(rmrru); +		return -ENOMEM;  	} -	return ret; -} -static LIST_HEAD(dmar_atsr_units); +	list_add(&rmrru->list, &dmar_rmrr_units); + +	return 0; +}  int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)  { @@ -3538,91 +3718,134 @@ int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)  	atsru->hdr = hdr;  	atsru->include_all = atsr->flags & 0x1; +	if (!atsru->include_all) { +		atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1), +				(void *)atsr + atsr->header.length, +				&atsru->devices_cnt); +		if (atsru->devices_cnt && atsru->devices == NULL) { +			kfree(atsru); +			return -ENOMEM; +		} +	} -	list_add(&atsru->list, &dmar_atsr_units); +	list_add_rcu(&atsru->list, &dmar_atsr_units);  	return 0;  } -static int __init atsr_parse_dev(struct dmar_atsr_unit *atsru) +static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)  { -	int rc; -	struct acpi_dmar_atsr *atsr; +	dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt); +	kfree(atsru); +} -	if (atsru->include_all) -		return 0; +static void intel_iommu_free_dmars(void) +{ +	struct dmar_rmrr_unit *rmrru, *rmrr_n; +	struct dmar_atsr_unit *atsru, *atsr_n; -	atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header); -	rc = dmar_parse_dev_scope((void *)(atsr + 1), -				(void *)atsr + atsr->header.length, -				&atsru->devices_cnt, &atsru->devices, -				atsr->segment); -	if (rc || !atsru->devices_cnt) { -		list_del(&atsru->list); -		kfree(atsru); +	list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) { +		list_del(&rmrru->list); +		dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt); +		kfree(rmrru);  	} -	return rc; +	list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) { +		list_del(&atsru->list); +		intel_iommu_free_atsr(atsru); +	}  }  int dmar_find_matched_atsr_unit(struct pci_dev *dev)  { -	int i; +	int i, ret = 1;  	struct pci_bus *bus; +	struct pci_dev *bridge = NULL; +	struct device *tmp;  	struct acpi_dmar_atsr *atsr;  	struct dmar_atsr_unit *atsru;  	dev = pci_physfn(dev); - -	list_for_each_entry(atsru, &dmar_atsr_units, list) { -		atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header); -		if (atsr->segment == pci_domain_nr(dev->bus)) -			goto found; -	} - -	return 0; - -found:  	for (bus = dev->bus; bus; bus = bus->parent) { -		struct pci_dev *bridge = bus->self; - +		bridge = bus->self;  		if (!bridge || !pci_is_pcie(bridge) ||  		    pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)  			return 0; - -		if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT) { -			for (i = 0; i < atsru->devices_cnt; i++) -				if (atsru->devices[i] == bridge) -					return 1; +		if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)  			break; -		}  	} +	if (!bridge) +		return 0; -	if (atsru->include_all) -		return 1; +	rcu_read_lock(); +	list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) { +		atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header); +		if (atsr->segment != pci_domain_nr(dev->bus)) +			continue; -	return 0; +		for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp) +			if (tmp == &bridge->dev) +				goto out; + +		if (atsru->include_all) +			goto out; +	} +	ret = 0; +out: +	rcu_read_unlock(); + +	return ret;  } -int __init dmar_parse_rmrr_atsr_dev(void) +int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)  { -	struct dmar_rmrr_unit *rmrr, *rmrr_n; -	struct dmar_atsr_unit *atsr, *atsr_n;  	int ret = 0; +	struct dmar_rmrr_unit *rmrru; +	struct dmar_atsr_unit *atsru; +	struct acpi_dmar_atsr *atsr; +	struct acpi_dmar_reserved_memory *rmrr; -	list_for_each_entry_safe(rmrr, rmrr_n, &dmar_rmrr_units, list) { -		ret = rmrr_parse_dev(rmrr); -		if (ret) -			return ret; +	if (!intel_iommu_enabled && system_state != SYSTEM_BOOTING) +		return 0; + +	list_for_each_entry(rmrru, &dmar_rmrr_units, list) { +		rmrr = container_of(rmrru->hdr, +				    struct acpi_dmar_reserved_memory, header); +		if (info->event == BUS_NOTIFY_ADD_DEVICE) { +			ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1), +				((void *)rmrr) + rmrr->header.length, +				rmrr->segment, rmrru->devices, +				rmrru->devices_cnt); +			if(ret < 0) +				return ret; +		} else if (info->event == BUS_NOTIFY_DEL_DEVICE) { +			dmar_remove_dev_scope(info, rmrr->segment, +				rmrru->devices, rmrru->devices_cnt); +		}  	} -	list_for_each_entry_safe(atsr, atsr_n, &dmar_atsr_units, list) { -		ret = atsr_parse_dev(atsr); -		if (ret) -			return ret; +	list_for_each_entry(atsru, &dmar_atsr_units, list) { +		if (atsru->include_all) +			continue; + +		atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header); +		if (info->event == BUS_NOTIFY_ADD_DEVICE) { +			ret = dmar_insert_dev_scope(info, (void *)(atsr + 1), +					(void *)atsr + atsr->header.length, +					atsr->segment, atsru->devices, +					atsru->devices_cnt); +			if (ret > 0) +				break; +			else if(ret < 0) +				return ret; +		} else if (info->event == BUS_NOTIFY_DEL_DEVICE) { +			if (dmar_remove_dev_scope(info, atsr->segment, +					atsru->devices, atsru->devices_cnt)) +				break; +		}  	} -	return ret; +	return 0;  }  /* @@ -3635,24 +3858,26 @@ static int device_notifier(struct notifier_block *nb,  				  unsigned long action, void *data)  {  	struct device *dev = data; -	struct pci_dev *pdev = to_pci_dev(dev);  	struct dmar_domain *domain; -	if (iommu_no_mapping(dev)) +	if (iommu_dummy(dev))  		return 0; -	domain = find_domain(pdev); -	if (!domain) +	if (action != BUS_NOTIFY_UNBOUND_DRIVER && +	    action != BUS_NOTIFY_DEL_DEVICE)  		return 0; -	if (action == BUS_NOTIFY_UNBOUND_DRIVER && !iommu_pass_through) { -		domain_remove_one_dev_info(domain, pdev); +	domain = find_domain(dev); +	if (!domain) +		return 0; -		if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) && -		    !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) && -		    list_empty(&domain->devices)) -			domain_exit(domain); -	} +	down_read(&dmar_global_lock); +	domain_remove_one_dev_info(domain, dev); +	if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) && +	    !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) && +	    list_empty(&domain->devices)) +		domain_exit(domain); +	up_read(&dmar_global_lock);  	return 0;  } @@ -3661,48 +3886,112 @@ static struct notifier_block device_nb = {  	.notifier_call = device_notifier,  }; +static int intel_iommu_memory_notifier(struct notifier_block *nb, +				       unsigned long val, void *v) +{ +	struct memory_notify *mhp = v; +	unsigned long long start, end; +	unsigned long start_vpfn, last_vpfn; + +	switch (val) { +	case MEM_GOING_ONLINE: +		start = mhp->start_pfn << PAGE_SHIFT; +		end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1; +		if (iommu_domain_identity_map(si_domain, start, end)) { +			pr_warn("dmar: failed to build identity map for [%llx-%llx]\n", +				start, end); +			return NOTIFY_BAD; +		} +		break; + +	case MEM_OFFLINE: +	case MEM_CANCEL_ONLINE: +		start_vpfn = mm_to_dma_pfn(mhp->start_pfn); +		last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1); +		while (start_vpfn <= last_vpfn) { +			struct iova *iova; +			struct dmar_drhd_unit *drhd; +			struct intel_iommu *iommu; +			struct page *freelist; + +			iova = find_iova(&si_domain->iovad, start_vpfn); +			if (iova == NULL) { +				pr_debug("dmar: failed get IOVA for PFN %lx\n", +					 start_vpfn); +				break; +			} + +			iova = split_and_remove_iova(&si_domain->iovad, iova, +						     start_vpfn, last_vpfn); +			if (iova == NULL) { +				pr_warn("dmar: failed to split IOVA PFN [%lx-%lx]\n", +					start_vpfn, last_vpfn); +				return NOTIFY_BAD; +			} + +			freelist = domain_unmap(si_domain, iova->pfn_lo, +					       iova->pfn_hi); + +			rcu_read_lock(); +			for_each_active_iommu(iommu, drhd) +				iommu_flush_iotlb_psi(iommu, si_domain->id, +					iova->pfn_lo, +					iova->pfn_hi - iova->pfn_lo + 1, +					!freelist, 0); +			rcu_read_unlock(); +			dma_free_pagelist(freelist); + +			start_vpfn = iova->pfn_hi + 1; +			free_iova_mem(iova); +		} +		break; +	} + +	return NOTIFY_OK; +} + +static struct notifier_block intel_iommu_memory_nb = { +	.notifier_call = intel_iommu_memory_notifier, +	.priority = 0 +}; +  int __init intel_iommu_init(void)  { -	int ret = 0; +	int ret = -ENODEV;  	struct dmar_drhd_unit *drhd; +	struct intel_iommu *iommu;  	/* VT-d is required for a TXT/tboot launch, so enforce that */  	force_on = tboot_force_iommu(); +	if (iommu_init_mempool()) { +		if (force_on) +			panic("tboot: Failed to initialize iommu memory\n"); +		return -ENOMEM; +	} + +	down_write(&dmar_global_lock);  	if (dmar_table_init()) {  		if (force_on)  			panic("tboot: Failed to initialize DMAR table\n"); -		return 	-ENODEV; +		goto out_free_dmar;  	}  	/*  	 * Disable translation if already enabled prior to OS handover.  	 */ -	for_each_drhd_unit(drhd) { -		struct intel_iommu *iommu; - -		if (drhd->ignored) -			continue; - -		iommu = drhd->iommu; +	for_each_active_iommu(iommu, drhd)  		if (iommu->gcmd & DMA_GCMD_TE)  			iommu_disable_translation(iommu); -	}  	if (dmar_dev_scope_init() < 0) {  		if (force_on)  			panic("tboot: Failed to initialize DMAR device scope\n"); -		return 	-ENODEV; +		goto out_free_dmar;  	}  	if (no_iommu || dmar_disabled) -		return -ENODEV; - -	if (iommu_init_mempool()) { -		if (force_on) -			panic("tboot: Failed to initialize iommu memory\n"); -		return 	-ENODEV; -	} +		goto out_free_dmar;  	if (list_empty(&dmar_rmrr_units))  		printk(KERN_INFO "DMAR: No RMRR found\n"); @@ -3713,7 +4002,7 @@ int __init intel_iommu_init(void)  	if (dmar_init_reserved_ranges()) {  		if (force_on)  			panic("tboot: Failed to reserve iommu ranges\n"); -		return 	-ENODEV; +		goto out_free_reserved_range;  	}  	init_no_remapping_devices(); @@ -3723,10 +4012,9 @@ int __init intel_iommu_init(void)  		if (force_on)  			panic("tboot: Failed to initialize DMARs\n");  		printk(KERN_ERR "IOMMU: dmar init failed\n"); -		put_iova_domain(&reserved_iova_list); -		iommu_exit_mempool(); -		return ret; +		goto out_free_reserved_range;  	} +	up_write(&dmar_global_lock);  	printk(KERN_INFO  	"PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n"); @@ -3739,22 +4027,33 @@ int __init intel_iommu_init(void)  	init_iommu_pm_ops();  	bus_set_iommu(&pci_bus_type, &intel_iommu_ops); -  	bus_register_notifier(&pci_bus_type, &device_nb); +	if (si_domain && !hw_pass_through) +		register_memory_notifier(&intel_iommu_memory_nb);  	intel_iommu_enabled = 1;  	return 0; + +out_free_reserved_range: +	put_iova_domain(&reserved_iova_list); +out_free_dmar: +	intel_iommu_free_dmars(); +	up_write(&dmar_global_lock); +	iommu_exit_mempool(); +	return ret;  }  static void iommu_detach_dependent_devices(struct intel_iommu *iommu, -					   struct pci_dev *pdev) +					   struct device *dev)  { -	struct pci_dev *tmp, *parent; +	struct pci_dev *tmp, *parent, *pdev; -	if (!iommu || !pdev) +	if (!iommu || !dev || !dev_is_pci(dev))  		return; +	pdev = to_pci_dev(dev); +  	/* dependent device detach */  	tmp = pci_find_upstream_pcie_bridge(pdev);  	/* Secondary interface's bus number and devfn 0 */ @@ -3775,31 +4074,28 @@ static void iommu_detach_dependent_devices(struct intel_iommu *iommu,  }  static void domain_remove_one_dev_info(struct dmar_domain *domain, -					  struct pci_dev *pdev) +				       struct device *dev)  { -	struct device_domain_info *info; +	struct device_domain_info *info, *tmp;  	struct intel_iommu *iommu;  	unsigned long flags;  	int found = 0; -	struct list_head *entry, *tmp; +	u8 bus, devfn; -	iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number, -				pdev->devfn); +	iommu = device_to_iommu(dev, &bus, &devfn);  	if (!iommu)  		return;  	spin_lock_irqsave(&device_domain_lock, flags); -	list_for_each_safe(entry, tmp, &domain->devices) { -		info = list_entry(entry, struct device_domain_info, link); -		if (info->segment == pci_domain_nr(pdev->bus) && -		    info->bus == pdev->bus->number && -		    info->devfn == pdev->devfn) { +	list_for_each_entry_safe(info, tmp, &domain->devices, link) { +		if (info->iommu == iommu && info->bus == bus && +		    info->devfn == devfn) {  			unlink_domain_info(info);  			spin_unlock_irqrestore(&device_domain_lock, flags);  			iommu_disable_dev_iotlb(info);  			iommu_detach_dev(iommu, info->bus, info->devfn); -			iommu_detach_dependent_devices(iommu, pdev); +			iommu_detach_dependent_devices(iommu, dev);  			free_devinfo_mem(info);  			spin_lock_irqsave(&device_domain_lock, flags); @@ -3814,8 +4110,7 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain,  		 * owned by this domain, clear this iommu in iommu_bmp  		 * update iommu count and coherency  		 */ -		if (iommu == device_to_iommu(info->segment, info->bus, -					    info->devfn)) +		if (info->iommu == iommu)  			found = 1;  	} @@ -3839,67 +4134,11 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain,  	}  } -static void vm_domain_remove_all_dev_info(struct dmar_domain *domain) -{ -	struct device_domain_info *info; -	struct intel_iommu *iommu; -	unsigned long flags1, flags2; - -	spin_lock_irqsave(&device_domain_lock, flags1); -	while (!list_empty(&domain->devices)) { -		info = list_entry(domain->devices.next, -			struct device_domain_info, link); -		unlink_domain_info(info); -		spin_unlock_irqrestore(&device_domain_lock, flags1); - -		iommu_disable_dev_iotlb(info); -		iommu = device_to_iommu(info->segment, info->bus, info->devfn); -		iommu_detach_dev(iommu, info->bus, info->devfn); -		iommu_detach_dependent_devices(iommu, info->dev); - -		/* clear this iommu in iommu_bmp, update iommu count -		 * and capabilities -		 */ -		spin_lock_irqsave(&domain->iommu_lock, flags2); -		if (test_and_clear_bit(iommu->seq_id, -				       domain->iommu_bmp)) { -			domain->iommu_count--; -			domain_update_iommu_cap(domain); -		} -		spin_unlock_irqrestore(&domain->iommu_lock, flags2); - -		free_devinfo_mem(info); -		spin_lock_irqsave(&device_domain_lock, flags1); -	} -	spin_unlock_irqrestore(&device_domain_lock, flags1); -} - -/* domain id for virtual machine, it won't be set in context */ -static unsigned long vm_domid; - -static struct dmar_domain *iommu_alloc_vm_domain(void) -{ -	struct dmar_domain *domain; - -	domain = alloc_domain_mem(); -	if (!domain) -		return NULL; - -	domain->id = vm_domid++; -	domain->nid = -1; -	memset(domain->iommu_bmp, 0, sizeof(domain->iommu_bmp)); -	domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE; - -	return domain; -} -  static int md_domain_init(struct dmar_domain *domain, int guest_width)  {  	int adjust_width;  	init_iova_domain(&domain->iovad, DMA_32BIT_PFN); -	spin_lock_init(&domain->iommu_lock); -  	domain_reserve_special_ranges(domain);  	/* calculate AGAW */ @@ -3907,9 +4146,6 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width)  	adjust_width = guestwidth_to_adjustwidth(guest_width);  	domain->agaw = width_to_agaw(adjust_width); -	INIT_LIST_HEAD(&domain->devices); - -	domain->iommu_count = 0;  	domain->iommu_coherency = 0;  	domain->iommu_snooping = 0;  	domain->iommu_superpage = 0; @@ -3924,57 +4160,11 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width)  	return 0;  } -static void iommu_free_vm_domain(struct dmar_domain *domain) -{ -	unsigned long flags; -	struct dmar_drhd_unit *drhd; -	struct intel_iommu *iommu; -	unsigned long i; -	unsigned long ndomains; - -	for_each_drhd_unit(drhd) { -		if (drhd->ignored) -			continue; -		iommu = drhd->iommu; - -		ndomains = cap_ndoms(iommu->cap); -		for_each_set_bit(i, iommu->domain_ids, ndomains) { -			if (iommu->domains[i] == domain) { -				spin_lock_irqsave(&iommu->lock, flags); -				clear_bit(i, iommu->domain_ids); -				iommu->domains[i] = NULL; -				spin_unlock_irqrestore(&iommu->lock, flags); -				break; -			} -		} -	} -} - -static void vm_domain_exit(struct dmar_domain *domain) -{ -	/* Domain 0 is reserved, so dont process it */ -	if (!domain) -		return; - -	vm_domain_remove_all_dev_info(domain); -	/* destroy iovas */ -	put_iova_domain(&domain->iovad); - -	/* clear ptes */ -	dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw)); - -	/* free page tables */ -	dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw)); - -	iommu_free_vm_domain(domain); -	free_domain_mem(domain); -} -  static int intel_iommu_domain_init(struct iommu_domain *domain)  {  	struct dmar_domain *dmar_domain; -	dmar_domain = iommu_alloc_vm_domain(); +	dmar_domain = alloc_domain(true);  	if (!dmar_domain) {  		printk(KERN_ERR  			"intel_iommu_domain_init: dmar_domain == NULL\n"); @@ -3983,7 +4173,7 @@ static int intel_iommu_domain_init(struct iommu_domain *domain)  	if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {  		printk(KERN_ERR  			"intel_iommu_domain_init() failed\n"); -		vm_domain_exit(dmar_domain); +		domain_exit(dmar_domain);  		return -ENOMEM;  	}  	domain_update_iommu_cap(dmar_domain); @@ -4001,33 +4191,32 @@ static void intel_iommu_domain_destroy(struct iommu_domain *domain)  	struct dmar_domain *dmar_domain = domain->priv;  	domain->priv = NULL; -	vm_domain_exit(dmar_domain); +	domain_exit(dmar_domain);  }  static int intel_iommu_attach_device(struct iommu_domain *domain,  				     struct device *dev)  {  	struct dmar_domain *dmar_domain = domain->priv; -	struct pci_dev *pdev = to_pci_dev(dev);  	struct intel_iommu *iommu;  	int addr_width; +	u8 bus, devfn; -	/* normally pdev is not mapped */ -	if (unlikely(domain_context_mapped(pdev))) { +	/* normally dev is not mapped */ +	if (unlikely(domain_context_mapped(dev))) {  		struct dmar_domain *old_domain; -		old_domain = find_domain(pdev); +		old_domain = find_domain(dev);  		if (old_domain) {  			if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||  			    dmar_domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) -				domain_remove_one_dev_info(old_domain, pdev); +				domain_remove_one_dev_info(old_domain, dev);  			else  				domain_remove_dev_info(old_domain);  		}  	} -	iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number, -				pdev->devfn); +	iommu = device_to_iommu(dev, &bus, &devfn);  	if (!iommu)  		return -ENODEV; @@ -4059,16 +4248,15 @@ static int intel_iommu_attach_device(struct iommu_domain *domain,  		dmar_domain->agaw--;  	} -	return domain_add_dev_info(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL); +	return domain_add_dev_info(dmar_domain, dev, CONTEXT_TT_MULTI_LEVEL);  }  static void intel_iommu_detach_device(struct iommu_domain *domain,  				      struct device *dev)  {  	struct dmar_domain *dmar_domain = domain->priv; -	struct pci_dev *pdev = to_pci_dev(dev); -	domain_remove_one_dev_info(dmar_domain, pdev); +	domain_remove_one_dev_info(dmar_domain, dev);  }  static int intel_iommu_map(struct iommu_domain *domain, @@ -4110,18 +4298,51 @@ static int intel_iommu_map(struct iommu_domain *domain,  }  static size_t intel_iommu_unmap(struct iommu_domain *domain, -			     unsigned long iova, size_t size) +				unsigned long iova, size_t size)  {  	struct dmar_domain *dmar_domain = domain->priv; -	int order; +	struct page *freelist = NULL; +	struct intel_iommu *iommu; +	unsigned long start_pfn, last_pfn; +	unsigned int npages; +	int iommu_id, num, ndomains, level = 0; + +	/* Cope with horrid API which requires us to unmap more than the +	   size argument if it happens to be a large-page mapping. */ +	if (!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level)) +		BUG(); + +	if (size < VTD_PAGE_SIZE << level_to_offset_bits(level)) +		size = VTD_PAGE_SIZE << level_to_offset_bits(level); + +	start_pfn = iova >> VTD_PAGE_SHIFT; +	last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT; + +	freelist = domain_unmap(dmar_domain, start_pfn, last_pfn); + +	npages = last_pfn - start_pfn + 1; + +	for_each_set_bit(iommu_id, dmar_domain->iommu_bmp, g_num_of_iommus) { +               iommu = g_iommus[iommu_id]; + +               /* +                * find bit position of dmar_domain +                */ +               ndomains = cap_ndoms(iommu->cap); +               for_each_set_bit(num, iommu->domain_ids, ndomains) { +                       if (iommu->domains[num] == dmar_domain) +                               iommu_flush_iotlb_psi(iommu, num, start_pfn, +						     npages, !freelist, 0); +	       } + +	} -	order = dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT, -			    (iova + size - 1) >> VTD_PAGE_SHIFT); +	dma_free_pagelist(freelist);  	if (dmar_domain->max_addr == iova + size)  		dmar_domain->max_addr = iova; -	return PAGE_SIZE << order; +	return size;  }  static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, @@ -4129,9 +4350,10 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,  {  	struct dmar_domain *dmar_domain = domain->priv;  	struct dma_pte *pte; +	int level = 0;  	u64 phys = 0; -	pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, 0); +	pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);  	if (pte)  		phys = dma_pte_addr(pte); @@ -4159,9 +4381,9 @@ static int intel_iommu_add_device(struct device *dev)  	struct pci_dev *bridge, *dma_pdev = NULL;  	struct iommu_group *group;  	int ret; +	u8 bus, devfn; -	if (!device_to_iommu(pci_domain_nr(pdev->bus), -			     pdev->bus->number, pdev->devfn)) +	if (!device_to_iommu(dev, &bus, &devfn))  		return -ENODEV;  	bridge = pci_find_upstream_pcie_bridge(pdev);  | 
