aboutsummaryrefslogtreecommitdiff
path: root/drivers/iommu/intel-iommu.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/iommu/intel-iommu.c')
-rw-r--r--drivers/iommu/intel-iommu.c1610
1 files changed, 926 insertions, 684 deletions
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index a22c86c867f..69fa7da5e48 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006, Intel Corporation.
+ * Copyright © 2006-2014 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -10,15 +10,11 @@
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- * Copyright (C) 2006-2008 Intel Corporation
- * Author: Ashok Raj <ashok.raj@intel.com>
- * Author: Shaohua Li <shaohua.li@intel.com>
- * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
- * Author: Fenghua Yu <fenghua.yu@intel.com>
+ * Authors: David Woodhouse <dwmw2@infradead.org>,
+ * Ashok Raj <ashok.raj@intel.com>,
+ * Shaohua Li <shaohua.li@intel.com>,
+ * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
+ * Fenghua Yu <fenghua.yu@intel.com>
*/
#include <linux/init.h>
@@ -33,6 +29,7 @@
#include <linux/dmar.h>
#include <linux/dma-mapping.h>
#include <linux/mempool.h>
+#include <linux/memory.h>
#include <linux/timer.h>
#include <linux/iova.h>
#include <linux/iommu.h>
@@ -372,14 +369,36 @@ struct dmar_domain {
struct device_domain_info {
struct list_head link; /* link to domain siblings */
struct list_head global; /* link to global list */
- int segment; /* PCI domain */
u8 bus; /* PCI bus number */
u8 devfn; /* PCI devfn number */
- struct pci_dev *dev; /* it's NULL for PCIe-to-PCI bridge */
+ struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
struct intel_iommu *iommu; /* IOMMU used by this device */
struct dmar_domain *domain; /* pointer to domain */
};
+struct dmar_rmrr_unit {
+ struct list_head list; /* list of rmrr units */
+ struct acpi_dmar_header *hdr; /* ACPI header */
+ u64 base_address; /* reserved base address*/
+ u64 end_address; /* reserved end address */
+ struct dmar_dev_scope *devices; /* target devices */
+ int devices_cnt; /* target device count */
+};
+
+struct dmar_atsr_unit {
+ struct list_head list; /* list of ATSR units */
+ struct acpi_dmar_header *hdr; /* ACPI header */
+ struct dmar_dev_scope *devices; /* target devices */
+ int devices_cnt; /* target device count */
+ u8 include_all:1; /* include all ports */
+};
+
+static LIST_HEAD(dmar_atsr_units);
+static LIST_HEAD(dmar_rmrr_units);
+
+#define for_each_rmrr_units(rmrr) \
+ list_for_each_entry(rmrr, &dmar_rmrr_units, list)
+
static void flush_unmaps_timeout(unsigned long data);
static DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
@@ -389,6 +408,7 @@ struct deferred_flush_tables {
int next;
struct iova *iova[HIGH_WATER_MARK];
struct dmar_domain *domain[HIGH_WATER_MARK];
+ struct page *freelist[HIGH_WATER_MARK];
};
static struct deferred_flush_tables *deferred_flush;
@@ -402,7 +422,12 @@ static LIST_HEAD(unmaps_to_do);
static int timer_on;
static long list_size;
+static void domain_exit(struct dmar_domain *domain);
static void domain_remove_dev_info(struct dmar_domain *domain);
+static void domain_remove_one_dev_info(struct dmar_domain *domain,
+ struct device *dev);
+static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
+ struct device *dev);
#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
int dmar_disabled = 0;
@@ -566,18 +591,31 @@ static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
static void domain_update_iommu_coherency(struct dmar_domain *domain)
{
- int i;
-
- i = find_first_bit(domain->iommu_bmp, g_num_of_iommus);
+ struct dmar_drhd_unit *drhd;
+ struct intel_iommu *iommu;
+ int i, found = 0;
- domain->iommu_coherency = i < g_num_of_iommus ? 1 : 0;
+ domain->iommu_coherency = 1;
for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
+ found = 1;
if (!ecap_coherent(g_iommus[i]->ecap)) {
domain->iommu_coherency = 0;
break;
}
}
+ if (found)
+ return;
+
+ /* No hardware attached; use lowest common denominator */
+ rcu_read_lock();
+ for_each_active_iommu(iommu, drhd) {
+ if (!ecap_coherent(iommu->ecap)) {
+ domain->iommu_coherency = 0;
+ break;
+ }
+ }
+ rcu_read_unlock();
}
static void domain_update_iommu_snooping(struct dmar_domain *domain)
@@ -606,12 +644,15 @@ static void domain_update_iommu_superpage(struct dmar_domain *domain)
}
/* set iommu_superpage to the smallest common denominator */
+ rcu_read_lock();
for_each_active_iommu(iommu, drhd) {
mask &= cap_super_page_val(iommu->cap);
if (!mask) {
break;
}
}
+ rcu_read_unlock();
+
domain->iommu_superpage = fls(mask);
}
@@ -623,32 +664,56 @@ static void domain_update_iommu_cap(struct dmar_domain *domain)
domain_update_iommu_superpage(domain);
}
-static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn)
+static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
{
struct dmar_drhd_unit *drhd = NULL;
+ struct intel_iommu *iommu;
+ struct device *tmp;
+ struct pci_dev *ptmp, *pdev = NULL;
+ u16 segment;
int i;
- for_each_active_drhd_unit(drhd) {
- if (segment != drhd->segment)
+ if (dev_is_pci(dev)) {
+ pdev = to_pci_dev(dev);
+ segment = pci_domain_nr(pdev->bus);
+ } else if (ACPI_COMPANION(dev))
+ dev = &ACPI_COMPANION(dev)->dev;
+
+ rcu_read_lock();
+ for_each_active_iommu(iommu, drhd) {
+ if (pdev && segment != drhd->segment)
continue;
- for (i = 0; i < drhd->devices_cnt; i++) {
- if (drhd->devices[i] &&
- drhd->devices[i]->bus->number == bus &&
- drhd->devices[i]->devfn == devfn)
- return drhd->iommu;
- if (drhd->devices[i] &&
- drhd->devices[i]->subordinate &&
- drhd->devices[i]->subordinate->number <= bus &&
- drhd->devices[i]->subordinate->busn_res.end >= bus)
- return drhd->iommu;
+ for_each_active_dev_scope(drhd->devices,
+ drhd->devices_cnt, i, tmp) {
+ if (tmp == dev) {
+ *bus = drhd->devices[i].bus;
+ *devfn = drhd->devices[i].devfn;
+ goto out;
+ }
+
+ if (!pdev || !dev_is_pci(tmp))
+ continue;
+
+ ptmp = to_pci_dev(tmp);
+ if (ptmp->subordinate &&
+ ptmp->subordinate->number <= pdev->bus->number &&
+ ptmp->subordinate->busn_res.end >= pdev->bus->number)
+ goto got_pdev;
}
- if (drhd->include_all)
- return drhd->iommu;
+ if (pdev && drhd->include_all) {
+ got_pdev:
+ *bus = pdev->bus->number;
+ *devfn = pdev->devfn;
+ goto out;
+ }
}
+ iommu = NULL;
+ out:
+ rcu_read_unlock();
- return NULL;
+ return iommu;
}
static void domain_flush_cache(struct dmar_domain *domain,
@@ -748,7 +813,7 @@ out:
}
static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
- unsigned long pfn, int target_level)
+ unsigned long pfn, int *target_level)
{
int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
struct dma_pte *parent, *pte = NULL;
@@ -763,14 +828,14 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
parent = domain->pgd;
- while (level > 0) {
+ while (1) {
void *tmp_page;
offset = pfn_level_offset(pfn, level);
pte = &parent[offset];
- if (!target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
+ if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
break;
- if (level == target_level)
+ if (level == *target_level)
break;
if (!dma_pte_present(pte)) {
@@ -791,10 +856,16 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
domain_flush_cache(domain, pte, sizeof(*pte));
}
}
+ if (level == 1)
+ break;
+
parent = phys_to_virt(dma_pte_addr(pte));
level--;
}
+ if (!*target_level)
+ *target_level = level;
+
return pte;
}
@@ -832,7 +903,7 @@ static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
}
/* clear last level pte, a tlb flush should be followed */
-static int dma_pte_clear_range(struct dmar_domain *domain,
+static void dma_pte_clear_range(struct dmar_domain *domain,
unsigned long start_pfn,
unsigned long last_pfn)
{
@@ -862,8 +933,6 @@ static int dma_pte_clear_range(struct dmar_domain *domain,
(void *)pte - (void *)first_pte);
} while (start_pfn && start_pfn <= last_pfn);
-
- return min_t(int, (large_page - 1) * 9, MAX_AGAW_PFN_WIDTH);
}
static void dma_pte_free_level(struct dmar_domain *domain, int level,
@@ -921,6 +990,123 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain,
}
}
+/* When a page at a given level is being unlinked from its parent, we don't
+ need to *modify* it at all. All we need to do is make a list of all the
+ pages which can be freed just as soon as we've flushed the IOTLB and we
+ know the hardware page-walk will no longer touch them.
+ The 'pte' argument is the *parent* PTE, pointing to the page that is to
+ be freed. */
+static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
+ int level, struct dma_pte *pte,
+ struct page *freelist)
+{
+ struct page *pg;
+
+ pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
+ pg->freelist = freelist;
+ freelist = pg;
+
+ if (level == 1)
+ return freelist;
+
+ for (pte = page_address(pg); !first_pte_in_page(pte); pte++) {
+ if (dma_pte_present(pte) && !dma_pte_superpage(pte))
+ freelist = dma_pte_list_pagetables(domain, level - 1,
+ pte, freelist);
+ }
+
+ return freelist;
+}
+
+static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
+ struct dma_pte *pte, unsigned long pfn,
+ unsigned long start_pfn,
+ unsigned long last_pfn,
+ struct page *freelist)
+{
+ struct dma_pte *first_pte = NULL, *last_pte = NULL;
+
+ pfn = max(start_pfn, pfn);
+ pte = &pte[pfn_level_offset(pfn, level)];
+
+ do {
+ unsigned long level_pfn;
+
+ if (!dma_pte_present(pte))
+ goto next;
+
+ level_pfn = pfn & level_mask(level);
+
+ /* If range covers entire pagetable, free it */
+ if (start_pfn <= level_pfn &&
+ last_pfn >= level_pfn + level_size(level) - 1) {
+ /* These suborbinate page tables are going away entirely. Don't
+ bother to clear them; we're just going to *free* them. */
+ if (level > 1 && !dma_pte_superpage(pte))
+ freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
+
+ dma_clear_pte(pte);
+ if (!first_pte)
+ first_pte = pte;
+ last_pte = pte;
+ } else if (level > 1) {
+ /* Recurse down into a level that isn't *entirely* obsolete */
+ freelist = dma_pte_clear_level(domain, level - 1,
+ phys_to_virt(dma_pte_addr(pte)),
+ level_pfn, start_pfn, last_pfn,
+ freelist);
+ }
+next:
+ pfn += level_size(level);
+ } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
+
+ if (first_pte)
+ domain_flush_cache(domain, first_pte,
+ (void *)++last_pte - (void *)first_pte);
+
+ return freelist;
+}
+
+/* We can't just free the pages because the IOMMU may still be walking
+ the page tables, and may have cached the intermediate levels. The
+ pages can only be freed after the IOTLB flush has been done. */
+struct page *domain_unmap(struct dmar_domain *domain,
+ unsigned long start_pfn,
+ unsigned long last_pfn)
+{
+ int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
+ struct page *freelist = NULL;
+
+ BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
+ BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
+ BUG_ON(start_pfn > last_pfn);
+
+ /* we don't need lock here; nobody else touches the iova range */
+ freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
+ domain->pgd, 0, start_pfn, last_pfn, NULL);
+
+ /* free pgd */
+ if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
+ struct page *pgd_page = virt_to_page(domain->pgd);
+ pgd_page->freelist = freelist;
+ freelist = pgd_page;
+
+ domain->pgd = NULL;
+ }
+
+ return freelist;
+}
+
+void dma_free_pagelist(struct page *freelist)
+{
+ struct page *pg;
+
+ while ((pg = freelist)) {
+ freelist = pg->freelist;
+ free_pgtable_page(page_address(pg));
+ }
+}
+
/* iommu handling */
static int iommu_alloc_root_entry(struct intel_iommu *iommu)
{
@@ -1030,7 +1216,7 @@ static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
break;
case DMA_TLB_PSI_FLUSH:
val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
- /* Note: always flush non-leaf currently */
+ /* IH bit is passed in as part of address */
val_iva = size_order | addr;
break;
default:
@@ -1069,13 +1255,14 @@ static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
(unsigned long long)DMA_TLB_IAIG(val));
}
-static struct device_domain_info *iommu_support_dev_iotlb(
- struct dmar_domain *domain, int segment, u8 bus, u8 devfn)
+static struct device_domain_info *
+iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
+ u8 bus, u8 devfn)
{
int found = 0;
unsigned long flags;
struct device_domain_info *info;
- struct intel_iommu *iommu = device_to_iommu(segment, bus, devfn);
+ struct pci_dev *pdev;
if (!ecap_dev_iotlb_support(iommu->ecap))
return NULL;
@@ -1091,34 +1278,35 @@ static struct device_domain_info *iommu_support_dev_iotlb(
}
spin_unlock_irqrestore(&device_domain_lock, flags);
- if (!found || !info->dev)
+ if (!found || !info->dev || !dev_is_pci(info->dev))
return NULL;
- if (!pci_find_ext_capability(info->dev, PCI_EXT_CAP_ID_ATS))
- return NULL;
+ pdev = to_pci_dev(info->dev);
- if (!dmar_find_matched_atsr_unit(info->dev))
+ if (!pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS))
return NULL;
- info->iommu = iommu;
+ if (!dmar_find_matched_atsr_unit(pdev))
+ return NULL;
return info;
}
static void iommu_enable_dev_iotlb(struct device_domain_info *info)
{
- if (!info)
+ if (!info || !dev_is_pci(info->dev))
return;
- pci_enable_ats(info->dev, VTD_PAGE_SHIFT);
+ pci_enable_ats(to_pci_dev(info->dev), VTD_PAGE_SHIFT);
}
static void iommu_disable_dev_iotlb(struct device_domain_info *info)
{
- if (!info->dev || !pci_ats_enabled(info->dev))
+ if (!info->dev || !dev_is_pci(info->dev) ||
+ !pci_ats_enabled(to_pci_dev(info->dev)))
return;
- pci_disable_ats(info->dev);
+ pci_disable_ats(to_pci_dev(info->dev));
}
static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
@@ -1130,24 +1318,31 @@ static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
spin_lock_irqsave(&device_domain_lock, flags);
list_for_each_entry(info, &domain->devices, link) {
- if (!info->dev || !pci_ats_enabled(info->dev))
+ struct pci_dev *pdev;
+ if (!info->dev || !dev_is_pci(info->dev))
+ continue;
+
+ pdev = to_pci_dev(info->dev);
+ if (!pci_ats_enabled(pdev))
continue;
sid = info->bus << 8 | info->devfn;
- qdep = pci_ats_queue_depth(info->dev);
+ qdep = pci_ats_queue_depth(pdev);
qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
}
spin_unlock_irqrestore(&device_domain_lock, flags);
}
static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
- unsigned long pfn, unsigned int pages, int map)
+ unsigned long pfn, unsigned int pages, int ih, int map)
{
unsigned int mask = ilog2(__roundup_pow_of_two(pages));
uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
BUG_ON(pages == 0);
+ if (ih)
+ ih = 1 << 6;
/*
* Fallback to domain selective flush if no PSI support or the size is
* too big.
@@ -1158,7 +1353,7 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
iommu->flush.flush_iotlb(iommu, did, 0, 0,
DMA_TLB_DSI_FLUSH);
else
- iommu->flush.flush_iotlb(iommu, did, addr, mask,
+ iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
DMA_TLB_PSI_FLUSH);
/*
@@ -1261,10 +1456,6 @@ static int iommu_init_domains(struct intel_iommu *iommu)
return 0;
}
-
-static void domain_exit(struct dmar_domain *domain);
-static void vm_domain_exit(struct dmar_domain *domain);
-
static void free_dmar_iommu(struct intel_iommu *iommu)
{
struct dmar_domain *domain;
@@ -1273,18 +1464,21 @@ static void free_dmar_iommu(struct intel_iommu *iommu)
if ((iommu->domains) && (iommu->domain_ids)) {
for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) {
+ /*
+ * Domain id 0 is reserved for invalid translation
+ * if hardware supports caching mode.
+ */
+ if (cap_caching_mode(iommu->cap) && i == 0)
+ continue;
+
domain = iommu->domains[i];
clear_bit(i, iommu->domain_ids);
spin_lock_irqsave(&domain->iommu_lock, flags);
count = --domain->iommu_count;
spin_unlock_irqrestore(&domain->iommu_lock, flags);
- if (count == 0) {
- if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
- vm_domain_exit(domain);
- else
- domain_exit(domain);
- }
+ if (count == 0)
+ domain_exit(domain);
}
}
@@ -1298,21 +1492,14 @@ static void free_dmar_iommu(struct intel_iommu *iommu)
g_iommus[iommu->seq_id] = NULL;
- /* if all iommus are freed, free g_iommus */
- for (i = 0; i < g_num_of_iommus; i++) {
- if (g_iommus[i])
- break;
- }
-
- if (i == g_num_of_iommus)
- kfree(g_iommus);
-
/* free context mapping */
free_context_table(iommu);
}
-static struct dmar_domain *alloc_domain(void)
+static struct dmar_domain *alloc_domain(bool vm)
{
+ /* domain id for virtual machine, it won't be set in context */
+ static atomic_t vm_domid = ATOMIC_INIT(0);
struct dmar_domain *domain;
domain = alloc_domain_mem();
@@ -1320,8 +1507,15 @@ static struct dmar_domain *alloc_domain(void)
return NULL;
domain->nid = -1;
+ domain->iommu_count = 0;
memset(domain->iommu_bmp, 0, sizeof(domain->iommu_bmp));
domain->flags = 0;
+ spin_lock_init(&domain->iommu_lock);
+ INIT_LIST_HEAD(&domain->devices);
+ if (vm) {
+ domain->id = atomic_inc_return(&vm_domid);
+ domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
+ }
return domain;
}
@@ -1345,6 +1539,7 @@ static int iommu_attach_domain(struct dmar_domain *domain,
}
domain->id = num;
+ domain->iommu_count++;
set_bit(num, iommu->domain_ids);
set_bit(iommu->seq_id, domain->iommu_bmp);
iommu->domains[num] = domain;
@@ -1358,22 +1553,16 @@ static void iommu_detach_domain(struct dmar_domain *domain,
{
unsigned long flags;
int num, ndomains;
- int found = 0;
spin_lock_irqsave(&iommu->lock, flags);
ndomains = cap_ndoms(iommu->cap);
for_each_set_bit(num, iommu->domain_ids, ndomains) {
if (iommu->domains[num] == domain) {
- found = 1;
+ clear_bit(num, iommu->domain_ids);
+ iommu->domains[num] = NULL;
break;
}
}
-
- if (found) {
- clear_bit(num, iommu->domain_ids);
- clear_bit(iommu->seq_id, domain->iommu_bmp);
- iommu->domains[num] = NULL;
- }
spin_unlock_irqrestore(&iommu->lock, flags);
}
@@ -1445,8 +1634,6 @@ static int domain_init(struct dmar_domain *domain, int guest_width)
unsigned long sagaw;
init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
- spin_lock_init(&domain->iommu_lock);
-
domain_reserve_special_ranges(domain);
/* calculate AGAW */
@@ -1465,7 +1652,6 @@ static int domain_init(struct dmar_domain *domain, int guest_width)
return -ENODEV;
}
domain->agaw = agaw;
- INIT_LIST_HEAD(&domain->devices);
if (ecap_coherent(iommu->ecap))
domain->iommu_coherency = 1;
@@ -1477,8 +1663,11 @@ static int domain_init(struct dmar_domain *domain, int guest_width)
else
domain->iommu_snooping = 0;
- domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
- domain->iommu_count = 1;
+ if (intel_iommu_superpage)
+ domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
+ else
+ domain->iommu_superpage = 0;
+
domain->nid = iommu->node;
/* always allocate the top pgd */
@@ -1493,6 +1682,7 @@ static void domain_exit(struct dmar_domain *domain)
{
struct dmar_drhd_unit *drhd;
struct intel_iommu *iommu;
+ struct page *freelist = NULL;
/* Domain 0 is reserved, so dont process it */
if (!domain)
@@ -1502,29 +1692,33 @@ static void domain_exit(struct dmar_domain *domain)
if (!intel_iommu_strict)
flush_unmaps_timeout(0);
+ /* remove associated devices */
domain_remove_dev_info(domain);
+
/* destroy iovas */
put_iova_domain(&domain->iovad);
- /* clear ptes */
- dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
-
- /* free page tables */
- dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
+ freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
+ /* clear attached or cached domains */
+ rcu_read_lock();
for_each_active_iommu(iommu, drhd)
- if (test_bit(iommu->seq_id, domain->iommu_bmp))
+ if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
+ test_bit(iommu->seq_id, domain->iommu_bmp))
iommu_detach_domain(domain, iommu);
+ rcu_read_unlock();
+
+ dma_free_pagelist(freelist);
free_domain_mem(domain);
}
-static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
- u8 bus, u8 devfn, int translation)
+static int domain_context_mapping_one(struct dmar_domain *domain,
+ struct intel_iommu *iommu,
+ u8 bus, u8 devfn, int translation)
{
struct context_entry *context;
unsigned long flags;
- struct intel_iommu *iommu;
struct dma_pte *pgd;
unsigned long num;
unsigned long ndomains;
@@ -1539,10 +1733,6 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
translation != CONTEXT_TT_MULTI_LEVEL);
- iommu = device_to_iommu(segment, bus, devfn);
- if (!iommu)
- return -ENODEV;
-
context = device_to_context_entry(iommu, bus, devfn);
if (!context)
return -ENOMEM;
@@ -1600,7 +1790,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
context_set_domain_id(context, id);
if (translation != CONTEXT_TT_PASS_THROUGH) {
- info = iommu_support_dev_iotlb(domain, segment, bus, devfn);
+ info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
translation = info ? CONTEXT_TT_DEV_IOTLB :
CONTEXT_TT_MULTI_LEVEL;
}
@@ -1650,27 +1840,32 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
}
static int
-domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev,
- int translation)
+domain_context_mapping(struct dmar_domain *domain, struct device *dev,
+ int translation)
{
int ret;
- struct pci_dev *tmp, *parent;
+ struct pci_dev *pdev, *tmp, *parent;
+ struct intel_iommu *iommu;
+ u8 bus, devfn;
+
+ iommu = device_to_iommu(dev, &bus, &devfn);
+ if (!iommu)
+ return -ENODEV;
- ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus),
- pdev->bus->number, pdev->devfn,
+ ret = domain_context_mapping_one(domain, iommu, bus, devfn,
translation);
- if (ret)
+ if (ret || !dev_is_pci(dev))
return ret;
/* dependent device mapping */
+ pdev = to_pci_dev(dev);
tmp = pci_find_upstream_pcie_bridge(pdev);
if (!tmp)
return 0;
/* Secondary interface's bus number and devfn 0 */
parent = pdev->bus->self;
while (parent != tmp) {
- ret = domain_context_mapping_one(domain,
- pci_domain_nr(parent->bus),
+ ret = domain_context_mapping_one(domain, iommu,
parent->bus->number,
parent->devfn, translation);
if (ret)
@@ -1678,33 +1873,33 @@ domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev,
parent = parent->bus->self;
}
if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
- return domain_context_mapping_one(domain,
- pci_domain_nr(tmp->subordinate),
+ return domain_context_mapping_one(domain, iommu,
tmp->subordinate->number, 0,
translation);
else /* this is a legacy PCI bridge */
- return domain_context_mapping_one(domain,
- pci_domain_nr(tmp->bus),
+ return domain_context_mapping_one(domain, iommu,
tmp->bus->number,
tmp->devfn,
translation);
}
-static int domain_context_mapped(struct pci_dev *pdev)
+static int domain_context_mapped(struct device *dev)
{
int ret;
- struct pci_dev *tmp, *parent;
+ struct pci_dev *pdev, *tmp, *parent;
struct intel_iommu *iommu;
+ u8 bus, devfn;
- iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
- pdev->devfn);
+ iommu = device_to_iommu(dev, &bus, &devfn);
if (!iommu)
return -ENODEV;
- ret = device_context_mapped(iommu, pdev->bus->number, pdev->devfn);
- if (!ret)
+ ret = device_context_mapped(iommu, bus, devfn);
+ if (!ret || !dev_is_pci(dev))
return ret;
+
/* dependent device mapping */
+ pdev = to_pci_dev(dev);
tmp = pci_find_upstream_pcie_bridge(pdev);
if (!tmp)
return ret;
@@ -1800,7 +1995,7 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
if (!pte) {
largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
- first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, largepage_lvl);
+ first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
if (!pte)
return -ENOMEM;
/* It is large page*/
@@ -1899,14 +2094,13 @@ static inline void unlink_domain_info(struct device_domain_info *info)
list_del(&info->link);
list_del(&info->global);
if (info->dev)
- info->dev->dev.archdata.iommu = NULL;
+ info->dev->archdata.iommu = NULL;
}
static void domain_remove_dev_info(struct dmar_domain *domain)
{
struct device_domain_info *info;
- unsigned long flags;
- struct intel_iommu *iommu;
+ unsigned long flags, flags2;
spin_lock_irqsave(&device_domain_lock, flags);
while (!list_empty(&domain->devices)) {
@@ -1916,10 +2110,23 @@ static void domain_remove_dev_info(struct dmar_domain *domain)
spin_unlock_irqrestore(&device_domain_lock, flags);
iommu_disable_dev_iotlb(info);
- iommu = device_to_iommu(info->segment, info->bus, info->devfn);
- iommu_detach_dev(iommu, info->bus, info->devfn);
- free_devinfo_mem(info);
+ iommu_detach_dev(info->iommu, info->bus, info->devfn);
+ if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) {
+ iommu_detach_dependent_devices(info->iommu, info->dev);
+ /* clear this iommu in iommu_bmp, update iommu count
+ * and capabilities
+ */
+ spin_lock_irqsave(&domain->iommu_lock, flags2);
+ if (test_and_clear_bit(info->iommu->seq_id,
+ domain->iommu_bmp)) {
+ domain->iommu_count--;
+ domain_update_iommu_cap(domain);
+ }
+ spin_unlock_irqrestore(&domain->iommu_lock, flags2);
+ }
+
+ free_devinfo_mem(info);
spin_lock_irqsave(&device_domain_lock, flags);
}
spin_unlock_irqrestore(&device_domain_lock, flags);
@@ -1927,155 +2134,151 @@ static void domain_remove_dev_info(struct dmar_domain *domain)
/*
* find_domain
- * Note: we use struct pci_dev->dev.archdata.iommu stores the info
+ * Note: we use struct device->archdata.iommu stores the info
*/
-static struct dmar_domain *
-find_domain(struct pci_dev *pdev)
+static struct dmar_domain *find_domain(struct device *dev)
{
struct device_domain_info *info;
/* No lock here, assumes no domain exit in normal case */
- info = pdev->dev.archdata.iommu;
+ info = dev->archdata.iommu;
if (info)
return info->domain;
return NULL;
}
+static inline struct device_domain_info *
+dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
+{
+ struct device_domain_info *info;
+
+ list_for_each_entry(info, &device_domain_list, global)
+ if (info->iommu->segment == segment && info->bus == bus &&
+ info->devfn == devfn)
+ return info;
+
+ return NULL;
+}
+
+static struct dmar_domain *dmar_insert_dev_info(struct intel_iommu *iommu,
+ int bus, int devfn,
+ struct device *dev,
+ struct dmar_domain *domain)
+{
+ struct dmar_domain *found = NULL;
+ struct device_domain_info *info;
+ unsigned long flags;
+
+ info = alloc_devinfo_mem();
+ if (!info)
+ return NULL;
+
+ info->bus = bus;
+ info->devfn = devfn;
+ info->dev = dev;
+ info->domain = domain;
+ info->iommu = iommu;
+ if (!dev)
+ domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
+
+ spin_lock_irqsave(&device_domain_lock, flags);
+ if (dev)
+ found = find_domain(dev);
+ else {
+ struct device_domain_info *info2;
+ info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
+ if (info2)
+ found = info2->domain;
+ }
+ if (found) {
+ spin_unlock_irqrestore(&device_domain_lock, flags);
+ free_devinfo_mem(info);
+ /* Caller must free the original domain */
+ return found;
+ }
+
+ list_add(&info->link, &domain->devices);
+ list_add(&info->global, &device_domain_list);
+ if (dev)
+ dev->archdata.iommu = info;
+ spin_unlock_irqrestore(&device_domain_lock, flags);
+
+ return domain;
+}
+
/* domain is initialized */
-static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
+static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
{
- struct dmar_domain *domain, *found = NULL;
- struct intel_iommu *iommu;
- struct dmar_drhd_unit *drhd;
- struct device_domain_info *info, *tmp;
- struct pci_dev *dev_tmp;
+ struct dmar_domain *domain, *free = NULL;
+ struct intel_iommu *iommu = NULL;
+ struct device_domain_info *info;
+ struct pci_dev *dev_tmp = NULL;
unsigned long flags;
- int bus = 0, devfn = 0;
- int segment;
- int ret;
+ u8 bus, devfn, bridge_bus, bridge_devfn;
- domain = find_domain(pdev);
+ domain = find_domain(dev);
if (domain)
return domain;
- segment = pci_domain_nr(pdev->bus);
+ if (dev_is_pci(dev)) {
+ struct pci_dev *pdev = to_pci_dev(dev);
+ u16 segment;
- dev_tmp = pci_find_upstream_pcie_bridge(pdev);
- if (dev_tmp) {
- if (pci_is_pcie(dev_tmp)) {
- bus = dev_tmp->subordinate->number;
- devfn = 0;
- } else {
- bus = dev_tmp->bus->number;
- devfn = dev_tmp->devfn;
- }
- spin_lock_irqsave(&device_domain_lock, flags);
- list_for_each_entry(info, &device_domain_list, global) {
- if (info->segment == segment &&
- info->bus == bus && info->devfn == devfn) {
- found = info->domain;
- break;
+ segment = pci_domain_nr(pdev->bus);
+ dev_tmp = pci_find_upstream_pcie_bridge(pdev);
+ if (dev_tmp) {
+ if (pci_is_pcie(dev_tmp)) {
+ bridge_bus = dev_tmp->subordinate->number;
+ bridge_devfn = 0;
+ } else {
+ bridge_bus = dev_tmp->bus->number;
+ bridge_devfn = dev_tmp->devfn;
}
- }
- spin_unlock_irqrestore(&device_domain_lock, flags);
- /* pcie-pci bridge already has a domain, uses it */
- if (found) {
- domain = found;
- goto found_domain;
+ spin_lock_irqsave(&device_domain_lock, flags);
+ info = dmar_search_domain_by_dev_info(segment, bus, devfn);
+ if (info) {
+ iommu = info->iommu;
+ domain = info->domain;
+ }
+ spin_unlock_irqrestore(&device_domain_lock, flags);
+ /* pcie-pci bridge already has a domain, uses it */
+ if (info)
+ goto found_domain;
}
}
- domain = alloc_domain();
- if (!domain)
+ iommu = device_to_iommu(dev, &bus, &devfn);
+ if (!iommu)
goto error;
- /* Allocate new domain for the device */
- drhd = dmar_find_matched_drhd_unit(pdev);
- if (!drhd) {
- printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
- pci_name(pdev));
- free_domain_mem(domain);
- return NULL;
- }
- iommu = drhd->iommu;
-
- ret = iommu_attach_domain(domain, iommu);
- if (ret) {
+ /* Allocate and initialize new domain for the device */
+ domain = alloc_domain(false);
+ if (!domain)
+ goto error;
+ if (iommu_attach_domain(domain, iommu)) {
free_domain_mem(domain);
+ domain = NULL;
goto error;
}
-
- if (domain_init(domain, gaw)) {
- domain_exit(domain);
+ free = domain;
+ if (domain_init(domain, gaw))
goto error;
- }
/* register pcie-to-pci device */
if (dev_tmp) {
- info = alloc_devinfo_mem();
- if (!info) {
- domain_exit(domain);
+ domain = dmar_insert_dev_info(iommu, bridge_bus, bridge_devfn,
+ NULL, domain);
+ if (!domain)
goto error;
- }
- info->segment = segment;
- info->bus = bus;
- info->devfn = devfn;
- info->dev = NULL;
- info->domain = domain;
- /* This domain is shared by devices under p2p bridge */
- domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
-
- /* pcie-to-pci bridge already has a domain, uses it */
- found = NULL;
- spin_lock_irqsave(&device_domain_lock, flags);
- list_for_each_entry(tmp, &device_domain_list, global) {
- if (tmp->segment == segment &&
- tmp->bus == bus && tmp->devfn == devfn) {
- found = tmp->domain;
- break;
- }
- }
- if (found) {
- spin_unlock_irqrestore(&device_domain_lock, flags);
- free_devinfo_mem(info);
- domain_exit(domain);
- domain = found;
- } else {
- list_add(&info->link, &domain->devices);
- list_add(&info->global, &device_domain_list);
- spin_unlock_irqrestore(&device_domain_lock, flags);
- }
}
found_domain:
- info = alloc_devinfo_mem();
- if (!info)
- goto error;
- info->segment = segment;
- info->bus = pdev->bus->number;
- info->devfn = pdev->devfn;
- info->dev = pdev;
- info->domain = domain;
- spin_lock_irqsave(&device_domain_lock, flags);
- /* somebody is fast */
- found = find_domain(pdev);
- if (found != NULL) {
- spin_unlock_irqrestore(&device_domain_lock, flags);
- if (found != domain) {
- domain_exit(domain);
- domain = found;
- }
- free_devinfo_mem(info);
- return domain;
- }
- list_add(&info->link, &domain->devices);
- list_add(&info->global, &device_domain_list);
- pdev->dev.archdata.iommu = info;
- spin_unlock_irqrestore(&device_domain_lock, flags);
- return domain;
+ domain = dmar_insert_dev_info(iommu, bus, devfn, dev, domain);
error:
- /* recheck it here, maybe others set it */
- return find_domain(pdev);
+ if (free != domain)