aboutsummaryrefslogtreecommitdiff
path: root/drivers/pci/intel-iommu.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/pci/intel-iommu.c')
-rw-r--r--drivers/pci/intel-iommu.c630
1 files changed, 372 insertions, 258 deletions
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index f3f686581a9..fb3a3f3fca7 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -36,6 +36,7 @@
#include <linux/iova.h>
#include <linux/iommu.h>
#include <linux/intel-iommu.h>
+#include <linux/sysdev.h>
#include <asm/cacheflush.h>
#include <asm/iommu.h>
#include "pci.h"
@@ -55,8 +56,8 @@
#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
-#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK)
-#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK)
+#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
+#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
/* global iommu list, set NULL for ignored DMAR units */
static struct intel_iommu **g_iommus;
@@ -164,7 +165,8 @@ static inline void context_clear_entry(struct context_entry *context)
* 1: writable
* 2-6: reserved
* 7: super page
- * 8-11: available
+ * 8-10: available
+ * 11: snoop behavior
* 12-63: Host physcial address
*/
struct dma_pte {
@@ -186,6 +188,11 @@ static inline void dma_set_pte_writable(struct dma_pte *pte)
pte->val |= DMA_PTE_WRITE;
}
+static inline void dma_set_pte_snp(struct dma_pte *pte)
+{
+ pte->val |= DMA_PTE_SNP;
+}
+
static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
{
pte->val = (pte->val & ~3) | (prot & 3);
@@ -231,6 +238,7 @@ struct dmar_domain {
int flags; /* flags to find out type of domain */
int iommu_coherency;/* indicate coherency of iommu access */
+ int iommu_snooping; /* indicate snooping control feature*/
int iommu_count; /* reference count of iommu */
spinlock_t iommu_lock; /* protect iommu set in domain */
u64 max_addr; /* maximum mapped address */
@@ -240,7 +248,8 @@ struct dmar_domain {
struct device_domain_info {
struct list_head link; /* link to domain siblings */
struct list_head global; /* link to global list */
- u8 bus; /* PCI bus numer */
+ int segment; /* PCI domain */
+ u8 bus; /* PCI bus number */
u8 devfn; /* PCI devfn number */
struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
struct dmar_domain *domain; /* pointer to domain */
@@ -421,7 +430,6 @@ static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
return g_iommus[iommu_id];
}
-/* "Coherency" capability may be different across iommus */
static void domain_update_iommu_coherency(struct dmar_domain *domain)
{
int i;
@@ -438,7 +446,30 @@ static void domain_update_iommu_coherency(struct dmar_domain *domain)
}
}
-static struct intel_iommu *device_to_iommu(u8 bus, u8 devfn)
+static void domain_update_iommu_snooping(struct dmar_domain *domain)
+{
+ int i;
+
+ domain->iommu_snooping = 1;
+
+ i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
+ for (; i < g_num_of_iommus; ) {
+ if (!ecap_sc_support(g_iommus[i]->ecap)) {
+ domain->iommu_snooping = 0;
+ break;
+ }
+ i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1);
+ }
+}
+
+/* Some capabilities may be different across iommus */
+static void domain_update_iommu_cap(struct dmar_domain *domain)
+{
+ domain_update_iommu_coherency(domain);
+ domain_update_iommu_snooping(domain);
+}
+
+static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn)
{
struct dmar_drhd_unit *drhd = NULL;
int i;
@@ -446,12 +477,20 @@ static struct intel_iommu *device_to_iommu(u8 bus, u8 devfn)
for_each_drhd_unit(drhd) {
if (drhd->ignored)
continue;
+ if (segment != drhd->segment)
+ continue;
- for (i = 0; i < drhd->devices_cnt; i++)
+ for (i = 0; i < drhd->devices_cnt; i++) {
if (drhd->devices[i] &&
drhd->devices[i]->bus->number == bus &&
drhd->devices[i]->devfn == devfn)
return drhd->iommu;
+ if (drhd->devices[i] &&
+ drhd->devices[i]->subordinate &&
+ drhd->devices[i]->subordinate->number <= bus &&
+ drhd->devices[i]->subordinate->subordinate >= bus)
+ return drhd->iommu;
+ }
if (drhd->include_all)
return drhd->iommu;
@@ -689,15 +728,17 @@ static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end)
{
int addr_width = agaw_to_width(domain->agaw);
+ int npages;
start &= (((u64)1) << addr_width) - 1;
end &= (((u64)1) << addr_width) - 1;
/* in case it's partial page */
start = PAGE_ALIGN(start);
end &= PAGE_MASK;
+ npages = (end - start) / VTD_PAGE_SIZE;
/* we don't need lock here, nobody else touches the iova range */
- while (start < end) {
+ while (npages--) {
dma_pte_clear_one(domain, start);
start += VTD_PAGE_SIZE;
}
@@ -1004,194 +1045,6 @@ static int iommu_disable_translation(struct intel_iommu *iommu)
return 0;
}
-/* iommu interrupt handling. Most stuff are MSI-like. */
-
-static const char *fault_reason_strings[] =
-{
- "Software",
- "Present bit in root entry is clear",
- "Present bit in context entry is clear",
- "Invalid context entry",
- "Access beyond MGAW",
- "PTE Write access is not set",
- "PTE Read access is not set",
- "Next page table ptr is invalid",
- "Root table address invalid",
- "Context table ptr is invalid",
- "non-zero reserved fields in RTP",
- "non-zero reserved fields in CTP",
- "non-zero reserved fields in PTE",
-};
-#define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1)
-
-const char *dmar_get_fault_reason(u8 fault_reason)
-{
- if (fault_reason > MAX_FAULT_REASON_IDX)
- return "Unknown";
- else
- return fault_reason_strings[fault_reason];
-}
-
-void dmar_msi_unmask(unsigned int irq)
-{
- struct intel_iommu *iommu = get_irq_data(irq);
- unsigned long flag;
-
- /* unmask it */
- spin_lock_irqsave(&iommu->register_lock, flag);
- writel(0, iommu->reg + DMAR_FECTL_REG);
- /* Read a reg to force flush the post write */
- readl(iommu->reg + DMAR_FECTL_REG);
- spin_unlock_irqrestore(&iommu->register_lock, flag);
-}
-
-void dmar_msi_mask(unsigned int irq)
-{
- unsigned long flag;
- struct intel_iommu *iommu = get_irq_data(irq);
-
- /* mask it */
- spin_lock_irqsave(&iommu->register_lock, flag);
- writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
- /* Read a reg to force flush the post write */
- readl(iommu->reg + DMAR_FECTL_REG);
- spin_unlock_irqrestore(&iommu->register_lock, flag);
-}
-
-void dmar_msi_write(int irq, struct msi_msg *msg)
-{
- struct intel_iommu *iommu = get_irq_data(irq);
- unsigned long flag;
-
- spin_lock_irqsave(&iommu->register_lock, flag);
- writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
- writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
- writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
- spin_unlock_irqrestore(&iommu->register_lock, flag);
-}
-
-void dmar_msi_read(int irq, struct msi_msg *msg)
-{
- struct intel_iommu *iommu = get_irq_data(irq);
- unsigned long flag;
-
- spin_lock_irqsave(&iommu->register_lock, flag);
- msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
- msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
- msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
- spin_unlock_irqrestore(&iommu->register_lock, flag);
-}
-
-static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
- u8 fault_reason, u16 source_id, unsigned long long addr)
-{
- const char *reason;
-
- reason = dmar_get_fault_reason(fault_reason);
-
- printk(KERN_ERR
- "DMAR:[%s] Request device [%02x:%02x.%d] "
- "fault addr %llx \n"
- "DMAR:[fault reason %02d] %s\n",
- (type ? "DMA Read" : "DMA Write"),
- (source_id >> 8), PCI_SLOT(source_id & 0xFF),
- PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
- return 0;
-}
-
-#define PRIMARY_FAULT_REG_LEN (16)
-static irqreturn_t iommu_page_fault(int irq, void *dev_id)
-{
- struct intel_iommu *iommu = dev_id;
- int reg, fault_index;
- u32 fault_status;
- unsigned long flag;
-
- spin_lock_irqsave(&iommu->register_lock, flag);
- fault_status = readl(iommu->reg + DMAR_FSTS_REG);
-
- /* TBD: ignore advanced fault log currently */
- if (!(fault_status & DMA_FSTS_PPF))
- goto clear_overflow;
-
- fault_index = dma_fsts_fault_record_index(fault_status);
- reg = cap_fault_reg_offset(iommu->cap);
- while (1) {
- u8 fault_reason;
- u16 source_id;
- u64 guest_addr;
- int type;
- u32 data;
-
- /* highest 32 bits */
- data = readl(iommu->reg + reg +
- fault_index * PRIMARY_FAULT_REG_LEN + 12);
- if (!(data & DMA_FRCD_F))
- break;
-
- fault_reason = dma_frcd_fault_reason(data);
- type = dma_frcd_type(data);
-
- data = readl(iommu->reg + reg +
- fault_index * PRIMARY_FAULT_REG_LEN + 8);
- source_id = dma_frcd_source_id(data);
-
- guest_addr = dmar_readq(iommu->reg + reg +
- fault_index * PRIMARY_FAULT_REG_LEN);
- guest_addr = dma_frcd_page_addr(guest_addr);
- /* clear the fault */
- writel(DMA_FRCD_F, iommu->reg + reg +
- fault_index * PRIMARY_FAULT_REG_LEN + 12);
-
- spin_unlock_irqrestore(&iommu->register_lock, flag);
-
- iommu_page_fault_do_one(iommu, type, fault_reason,
- source_id, guest_addr);
-
- fault_index++;
- if (fault_index > cap_num_fault_regs(iommu->cap))
- fault_index = 0;
- spin_lock_irqsave(&iommu->register_lock, flag);
- }
-clear_overflow:
- /* clear primary fault overflow */
- fault_status = readl(iommu->reg + DMAR_FSTS_REG);
- if (fault_status & DMA_FSTS_PFO)
- writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG);
-
- spin_unlock_irqrestore(&iommu->register_lock, flag);
- return IRQ_HANDLED;
-}
-
-int dmar_set_interrupt(struct intel_iommu *iommu)
-{
- int irq, ret;
-
- irq = create_irq();
- if (!irq) {
- printk(KERN_ERR "IOMMU: no free vectors\n");
- return -EINVAL;
- }
-
- set_irq_data(irq, iommu);
- iommu->irq = irq;
-
- ret = arch_setup_dmar_msi(irq);
- if (ret) {
- set_irq_data(irq, NULL);
- iommu->irq = 0;
- destroy_irq(irq);
- return 0;
- }
-
- /* Force fault register is cleared */
- iommu_page_fault(irq, iommu);
-
- ret = request_irq(irq, iommu_page_fault, 0, iommu->name, iommu);
- if (ret)
- printk(KERN_ERR "IOMMU: can't request irq\n");
- return ret;
-}
static int iommu_init_domains(struct intel_iommu *iommu)
{
@@ -1429,6 +1282,11 @@ static int domain_init(struct dmar_domain *domain, int guest_width)
else
domain->iommu_coherency = 0;
+ if (ecap_sc_support(iommu->ecap))
+ domain->iommu_snooping = 1;
+ else
+ domain->iommu_snooping = 0;
+
domain->iommu_count = 1;
/* always allocate the top pgd */
@@ -1464,7 +1322,7 @@ static void domain_exit(struct dmar_domain *domain)
}
static int domain_context_mapping_one(struct dmar_domain *domain,
- u8 bus, u8 devfn)
+ int segment, u8 bus, u8 devfn)
{
struct context_entry *context;
unsigned long flags;
@@ -1479,7 +1337,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
BUG_ON(!domain->pgd);
- iommu = device_to_iommu(bus, devfn);
+ iommu = device_to_iommu(segment, bus, devfn);
if (!iommu)
return -ENODEV;
@@ -1557,7 +1415,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
spin_lock_irqsave(&domain->iommu_lock, flags);
if (!test_and_set_bit(iommu->seq_id, &domain->iommu_bmp)) {
domain->iommu_count++;
- domain_update_iommu_coherency(domain);
+ domain_update_iommu_cap(domain);
}
spin_unlock_irqrestore(&domain->iommu_lock, flags);
return 0;
@@ -1569,8 +1427,8 @@ domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
int ret;
struct pci_dev *tmp, *parent;
- ret = domain_context_mapping_one(domain, pdev->bus->number,
- pdev->devfn);
+ ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus),
+ pdev->bus->number, pdev->devfn);
if (ret)
return ret;
@@ -1581,18 +1439,23 @@ domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
/* Secondary interface's bus number and devfn 0 */
parent = pdev->bus->self;
while (parent != tmp) {
- ret = domain_context_mapping_one(domain, parent->bus->number,
- parent->devfn);
+ ret = domain_context_mapping_one(domain,
+ pci_domain_nr(parent->bus),
+ parent->bus->number,
+ parent->devfn);
if (ret)
return ret;
parent = parent->bus->self;
}
if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
return domain_context_mapping_one(domain,
- tmp->subordinate->number, 0);
+ pci_domain_nr(tmp->subordinate),
+ tmp->subordinate->number, 0);
else /* this is a legacy PCI bridge */
return domain_context_mapping_one(domain,
- tmp->bus->number, tmp->devfn);
+ pci_domain_nr(tmp->bus),
+ tmp->bus->number,
+ tmp->devfn);
}
static int domain_context_mapped(struct pci_dev *pdev)
@@ -1601,12 +1464,12 @@ static int domain_context_mapped(struct pci_dev *pdev)
struct pci_dev *tmp, *parent;
struct intel_iommu *iommu;
- iommu = device_to_iommu(pdev->bus->number, pdev->devfn);
+ iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
+ pdev->devfn);
if (!iommu)
return -ENODEV;
- ret = device_context_mapped(iommu,
- pdev->bus->number, pdev->devfn);
+ ret = device_context_mapped(iommu, pdev->bus->number, pdev->devfn);
if (!ret)
return ret;
/* dependent device mapping */
@@ -1617,17 +1480,17 @@ static int domain_context_mapped(struct pci_dev *pdev)
parent = pdev->bus->self;
while (parent != tmp) {
ret = device_context_mapped(iommu, parent->bus->number,
- parent->devfn);
+ parent->devfn);
if (!ret)
return ret;
parent = parent->bus->self;
}
if (tmp->is_pcie)
- return device_context_mapped(iommu,
- tmp->subordinate->number, 0);
+ return device_context_mapped(iommu, tmp->subordinate->number,
+ 0);
else
- return device_context_mapped(iommu,
- tmp->bus->number, tmp->devfn);
+ return device_context_mapped(iommu, tmp->bus->number,
+ tmp->devfn);
}
static int
@@ -1657,6 +1520,8 @@ domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
BUG_ON(dma_pte_addr(pte));
dma_set_pte_addr(pte, start_pfn << VTD_PAGE_SHIFT);
dma_set_pte_prot(pte, prot);
+ if (prot & DMA_PTE_SNP)
+ dma_set_pte_snp(pte);
domain_flush_cache(domain, pte, sizeof(*pte));
start_pfn++;
index++;
@@ -1692,7 +1557,7 @@ static void domain_remove_dev_info(struct dmar_domain *domain)
info->dev->dev.archdata.iommu = NULL;
spin_unlock_irqrestore(&device_domain_lock, flags);
- iommu = device_to_iommu(info->bus, info->devfn);
+ iommu = device_to_iommu(info->segment, info->bus, info->devfn);
iommu_detach_dev(iommu, info->bus, info->devfn);
free_devinfo_mem(info);
@@ -1727,11 +1592,14 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
struct pci_dev *dev_tmp;
unsigned long flags;
int bus = 0, devfn = 0;
+ int segment;
domain = find_domain(pdev);
if (domain)
return domain;
+ segment = pci_domain_nr(pdev->bus);
+
dev_tmp = pci_find_upstream_pcie_bridge(pdev);
if (dev_tmp) {
if (dev_tmp->is_pcie) {
@@ -1743,7 +1611,8 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
}
spin_lock_irqsave(&device_domain_lock, flags);
list_for_each_entry(info, &device_domain_list, global) {
- if (info->bus == bus && info->devfn == devfn) {
+ if (info->segment == segment &&
+ info->bus == bus && info->devfn == devfn) {
found = info->domain;
break;
}
@@ -1781,6 +1650,7 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
domain_exit(domain);
goto error;
}
+ info->segment = segment;
info->bus = bus;
info->devfn = devfn;
info->dev = NULL;
@@ -1792,7 +1662,8 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
found = NULL;
spin_lock_irqsave(&device_domain_lock, flags);
list_for_each_entry(tmp, &device_domain_list, global) {
- if (tmp->bus == bus && tmp->devfn == devfn) {
+ if (tmp->segment == segment &&
+ tmp->bus == bus && tmp->devfn == devfn) {
found = tmp->domain;
break;
}
@@ -1812,6 +1683,7 @@ found_domain:
info = alloc_devinfo_mem();
if (!info)
goto error;
+ info->segment = segment;
info->bus = pdev->bus->number;
info->devfn = pdev->devfn;
info->dev = pdev;
@@ -1970,7 +1842,7 @@ static inline void iommu_prepare_isa(void)
ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
if (ret)
- printk("IOMMU: Failed to create 0-64M identity map, "
+ printk(KERN_ERR "IOMMU: Failed to create 0-64M identity map, "
"floppy might not work\n");
}
@@ -1987,7 +1859,7 @@ static int __init init_dmars(void)
struct dmar_rmrr_unit *rmrr;
struct pci_dev *pdev;
struct intel_iommu *iommu;
- int i, ret, unit = 0;
+ int i, ret;
/*
* for each drhd
@@ -2043,11 +1915,40 @@ static int __init init_dmars(void)
}
}
+ /*
+ * Start from the sane iommu hardware state.
+ */
+ for_each_drhd_unit(drhd) {
+ if (drhd->ignored)
+ continue;
+
+ iommu = drhd->iommu;
+
+ /*
+ * If the queued invalidation is already initialized by us
+ * (for example, while enabling interrupt-remapping) then
+ * we got the things already rolling from a sane state.
+ */
+ if (iommu->qi)
+ continue;
+
+ /*
+ * Clear any previous faults.
+ */
+ dmar_fault(-1, iommu);
+ /*
+ * Disable queued invalidation if supported and already enabled
+ * before OS handover.
+ */
+ dmar_disable_qi(iommu);
+ }
+
for_each_drhd_unit(drhd) {
if (drhd->ignored)
continue;
iommu = drhd->iommu;
+
if (dmar_enable_qi(iommu)) {
/*
* Queued Invalidate not enabled, use Register Based
@@ -2067,6 +1968,15 @@ static int __init init_dmars(void)
}
}
+#ifdef CONFIG_INTR_REMAP
+ if (!intr_remapping_enabled) {
+ ret = enable_intr_remapping(0);
+ if (ret)
+ printk(KERN_ERR
+ "IOMMU: enable interrupt remapping failed\n");
+ }
+#endif
+
/*
* For each rmrr
* for each dev attached to rmrr
@@ -2109,7 +2019,6 @@ static int __init init_dmars(void)
if (drhd->ignored)
continue;
iommu = drhd->iommu;
- sprintf (iommu->name, "dmar%d", unit++);
iommu_flush_write_buffer(iommu);
@@ -2171,15 +2080,15 @@ __intel_alloc_iova(struct device *dev, struct dmar_domain *domain,
struct pci_dev *pdev = to_pci_dev(dev);
struct iova *iova = NULL;
- if (dma_mask <= DMA_32BIT_MASK || dmar_forcedac)
+ if (dma_mask <= DMA_BIT_MASK(32) || dmar_forcedac)
iova = iommu_alloc_iova(domain, size, dma_mask);
else {
/*
* First try to allocate an io virtual address in
- * DMA_32BIT_MASK and if that fails then try allocating
+ * DMA_BIT_MASK(32) and if that fails then try allocating
* from higher range
*/
- iova = iommu_alloc_iova(domain, size, DMA_32BIT_MASK);
+ iova = iommu_alloc_iova(domain, size, DMA_BIT_MASK(32));
if (!iova)
iova = iommu_alloc_iova(domain, size, dma_mask);
}
@@ -2279,16 +2188,18 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
error:
if (iova)
__free_iova(&domain->iovad, iova);
- printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n",
+ printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n",
pci_name(pdev), size, (unsigned long long)paddr, dir);
return 0;
}
-dma_addr_t intel_map_single(struct device *hwdev, phys_addr_t paddr,
- size_t size, int dir)
+static dma_addr_t intel_map_page(struct device *dev, struct page *page,
+ unsigned long offset, size_t size,
+ enum dma_data_direction dir,
+ struct dma_attrs *attrs)
{
- return __intel_map_single(hwdev, paddr, size, dir,
- to_pci_dev(hwdev)->dma_mask);
+ return __intel_map_single(dev, page_to_phys(page) + offset, size,
+ dir, to_pci_dev(dev)->dma_mask);
}
static void flush_unmaps(void)
@@ -2352,8 +2263,9 @@ static void add_unmap(struct dmar_domain *dom, struct iova *iova)
spin_unlock_irqrestore(&async_umap_flush_lock, flags);
}
-void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
- int dir)
+static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
+ size_t size, enum dma_data_direction dir,
+ struct dma_attrs *attrs)
{
struct pci_dev *pdev = to_pci_dev(dev);
struct dmar_domain *domain;
@@ -2375,7 +2287,7 @@ void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
start_addr = iova->pfn_lo << PAGE_SHIFT;
size = aligned_size((u64)dev_addr, size);
- pr_debug("Device %s unmapping: %lx@%llx\n",
+ pr_debug("Device %s unmapping: %zx@%llx\n",
pci_name(pdev), size, (unsigned long long)start_addr);
/* clear the whole page */
@@ -2397,8 +2309,14 @@ void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
}
}
-void *intel_alloc_coherent(struct device *hwdev, size_t size,
- dma_addr_t *dma_handle, gfp_t flags)
+static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
+ int dir)
+{
+ intel_unmap_page(dev, dev_addr, size, dir, NULL);
+}
+
+static void *intel_alloc_coherent(struct device *hwdev, size_t size,
+ dma_addr_t *dma_handle, gfp_t flags)
{
void *vaddr;
int order;
@@ -2421,8 +2339,8 @@ void *intel_alloc_coherent(struct device *hwdev, size_t size,
return NULL;
}
-void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
- dma_addr_t dma_handle)
+static void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
+ dma_addr_t dma_handle)
{
int order;
@@ -2433,10 +2351,9 @@ void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
free_pages((unsigned long)vaddr, order);
}
-#define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
-
-void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
- int nelems, int dir)
+static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
+ int nelems, enum dma_data_direction dir,
+ struct dma_attrs *attrs)
{
int i;
struct pci_dev *pdev = to_pci_dev(hwdev);
@@ -2444,7 +2361,7 @@ void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
unsigned long start_addr;
struct iova *iova;
size_t size = 0;
- void *addr;
+ phys_addr_t addr;
struct scatterlist *sg;
struct intel_iommu *iommu;
@@ -2460,7 +2377,7 @@ void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
if (!iova)
return;
for_each_sg(sglist, sg, nelems, i) {
- addr = SG_ENT_VIRT_ADDRESS(sg);
+ addr = page_to_phys(sg_page(sg)) + sg->offset;
size += aligned_size((u64)addr, sg->length);
}
@@ -2487,16 +2404,16 @@ static int intel_nontranslate_map_sg(struct device *hddev,
for_each_sg(sglist, sg, nelems, i) {
BUG_ON(!sg_page(sg));
- sg->dma_address = virt_to_bus(SG_ENT_VIRT_ADDRESS(sg));
+ sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
sg->dma_length = sg->length;
}
return nelems;
}
-int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
- int dir)
+static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
+ enum dma_data_direction dir, struct dma_attrs *attrs)
{
- void *addr;
+ phys_addr_t addr;
int i;
struct pci_dev *pdev = to_pci_dev(hwdev);
struct dmar_domain *domain;
@@ -2520,8 +2437,7 @@ int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
iommu = domain_get_iommu(domain);
for_each_sg(sglist, sg, nelems, i) {
- addr = SG_ENT_VIRT_ADDRESS(sg);
- addr = (void *)virt_to_phys(addr);
+ addr = page_to_phys(sg_page(sg)) + sg->offset;
size += aligned_size((u64)addr, sg->length);
}
@@ -2544,8 +2460,7 @@ int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
start_addr = iova->pfn_lo << PAGE_SHIFT;
offset = 0;
for_each_sg(sglist, sg, nelems, i) {
- addr = SG_ENT_VIRT_ADDRESS(sg);
- addr = (void *)virt_to_phys(addr);
+ addr = page_to_phys(sg_page(sg)) + sg->offset;
size = aligned_size((u64)addr, sg->length);
ret = domain_page_mapping(domain, start_addr + offset,
((u64)addr) & PAGE_MASK,
@@ -2574,13 +2489,19 @@ int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
return nelems;
}
-static struct dma_mapping_ops intel_dma_ops = {
+static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+ return !dma_addr;
+}
+
+struct dma_map_ops intel_dma_ops = {
.alloc_coherent = intel_alloc_coherent,
.free_coherent = intel_free_coherent,
- .map_single = intel_map_single,
- .unmap_single = intel_unmap_single,
.map_sg = intel_map_sg,
.unmap_sg = intel_unmap_sg,
+ .map_page = intel_map_page,
+ .unmap_page = intel_unmap_page,
+ .mapping_error = intel_mapping_error,
};
static inline int iommu_domain_cache_init(void)
@@ -2707,6 +2628,150 @@ static void __init init_no_remapping_devices(void)
}
}
+#ifdef CONFIG_SUSPEND
+static int init_iommu_hw(void)
+{
+ struct dmar_drhd_unit *drhd;
+ struct intel_iommu *iommu = NULL;
+
+ for_each_active_iommu(iommu, drhd)
+ if (iommu->qi)
+ dmar_reenable_qi(iommu);
+
+ for_each_active_iommu(iommu, drhd) {
+ iommu_flush_write_buffer(iommu);
+
+ iommu_set_root_entry(iommu);
+
+ iommu->flush.flush_context(iommu, 0, 0, 0,
+ DMA_CCMD_GLOBAL_INVL, 0);
+ iommu->flush.flush_iotlb(iommu, 0, 0, 0,
+ DMA_TLB_GLOBAL_FLUSH, 0);
+ iommu_disable_protect_mem_regions(iommu);
+ iommu_enable_translation(iommu);
+ }
+
+ return 0;
+}
+
+static void iommu_flush_all(void)
+{
+ struct dmar_drhd_unit *drhd;
+ struct intel_iommu *iommu;
+
+ for_each_active_iommu(iommu, drhd) {
+ iommu->flush.flush_context(iommu, 0, 0, 0,
+ DMA_CCMD_GLOBAL_INVL, 0);
+ iommu->flush.flush_iotlb(iommu, 0, 0, 0,
+ DMA_TLB_GLOBAL_FLUSH, 0);
+ }
+}
+
+static int iommu_suspend(struct sys_device *dev, pm_message_t state)
+{
+ struct dmar_drhd_unit *drhd;
+ struct intel_iommu *iommu = NULL;
+ unsigned long flag;
+
+ for_each_active_iommu(iommu, drhd) {
+ iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
+ GFP_ATOMIC);
+ if (!iommu->iommu_state)
+ goto nomem;
+ }
+
+ iommu_flush_all();
+
+ for_each_active_iommu(iommu, drhd) {
+ iommu_disable_translation(iommu);
+
+ spin_lock_irqsave(&iommu->register_lock, flag);
+
+ iommu->iommu_state[SR_DMAR_FECTL_REG] =
+ readl(iommu->reg + DMAR_FECTL_REG);
+ iommu->iommu_state[SR_DMAR_FEDATA_REG] =
+ readl(iommu->reg + DMAR_FEDATA_REG);
+ iommu->iommu_state[SR_DMAR_FEADDR_REG] =
+ readl(iommu->reg + DMAR_FEADDR_REG);
+ iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
+ readl(iommu->reg + DMAR_FEUADDR_REG);
+
+ spin_unlock_irqrestore(&iommu->register_lock, flag);
+ }
+ return 0;
+
+nomem:
+ for_each_active_iommu(iommu, drhd)
+ kfree(iommu->iommu_state);
+
+ return -ENOMEM;
+}
+
+static int iommu_resume(struct sys_device *dev)
+{
+ struct dmar_drhd_unit *drhd;
+ struct intel_iommu *iommu = NULL;
+ unsigned long flag;
+
+ if (init_iommu_hw()) {
+ WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
+ return -EIO;
+ }
+
+ for_each_active_iommu(iommu, drhd) {
+
+ spin_lock_irqsave(&iommu->register_lock, flag);
+
+ writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
+ iommu->reg + DMAR_FECTL_REG);
+ writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
+ iommu->reg + DMAR_FEDATA_REG);
+ writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
+ iommu->reg + DMAR_FEADDR_REG);
+ writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
+ iommu->reg + DMAR_FEUADDR_REG);
+
+ spin_unlock_irqrestore(&iommu->register_lock, flag);
+ }
+
+ for_each_active_iommu(iommu, drhd)
+ kfree(iommu->iommu_state);
+
+ return 0;
+}
+
+static struct sysdev_class iommu_sysclass = {
+ .name = "iommu",
+ .resume = iommu_resume,
+ .suspend = iommu_suspend,
+};
+
+static struct sys_device device_iommu = {
+ .cls = &iommu_sysclass,
+};
+
+static int __init init_iommu_sysfs(void)
+{
+ int error;
+
+ error = sysdev_class_register(&iommu_sysclass);
+ if (error)
+ return error;
+
+ error = sysdev_register(&device_iommu);
+ if (error)
+ sysdev_class_unregister(&iommu_sysclass);
+
+ return error;
+}
+
+#else
+static int __init init_iommu_sysfs(void)
+{
+ return 0;
+}
+#endif /* CONFIG_PM */
+
int __init intel_iommu_init(void)
{
int ret = 0;
@@ -2742,6 +2807,7 @@ int __init intel_iommu_init(void)
init_timer(&unmap_timer);
force_iommu = 1;
dma_ops = &intel_dma_ops;
+ init_iommu_sysfs();
register_iommu(&intel_iommu_ops);
@@ -2758,6 +2824,7 @@ static int vm_domain_add_dev_info(struct dmar_domain *domain,
if (!info)
return -ENOMEM;
+ info->segment = pci_domain_nr(pdev->bus);
info->bus = pdev->bus->number;
info->devfn = pdev->devfn;
info->dev = pdev;
@@ -2772,6 +2839,33 @@ static int vm_domain_add_dev_info(struct dmar_domain *domain,
return 0;
}
+static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
+ struct pci_dev *pdev)
+{
+ struct pci_dev *tmp, *parent;
+
+ if (!iommu || !pdev)
+ return;
+
+ /* dependent device detach */
+ tmp = pci_find_upstream_pcie_bridge(pdev);
+ /* Secondary interface's bus number and devfn 0 */
+ if (tmp) {
+ parent = pdev->bus->self;
+ while (parent != tmp) {
+ iommu_detach_dev(iommu, parent->bus->number,
+ parent->devfn);
+ parent = parent->bus->self;
+ }
+ if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
+ iommu_detach_dev(iommu,
+ tmp->subordinate->number, 0);
+ else /* this is a legacy PCI bridge */
+ iommu_detach_dev(iommu, tmp->bus->number,
+ tmp->devfn);
+ }
+}
+
static void vm_domain_remove_one_dev_info(struct dmar_domain *domain,
struct pci_dev *pdev)
{
@@ -2781,13 +2875,15 @@ static void vm_domain_remove_one_dev_info(struct dmar_domain *domain,
int found = 0;
struct list_head *entry, *tmp;
- iommu = device_to_iommu(pdev->bus->number, pdev->devfn);
+ iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
+ pdev->devfn);
if (!iommu)
return;
spin_lock_irqsave(&device_domain_lock, flags);
list_for_each_safe(entry, tmp, &domain->devices) {
info = list_entry(entry, struct device_domain_info, link);
+ /* No need to compare PCI domain; it has to be the same */
if (info->bus == pdev->bus->number &&
info->devfn == pdev->devfn) {
list_del(&info->link);
@@ -2797,6 +2893,7 @@ static void vm_domain_remove_one_dev_info(struct dmar_domain *domain,
spin_unlock_irqrestore(&device_domain_lock, flags);
iommu_detach_dev(iommu, info->bus, info->devfn);
+ iommu_detach_dependent_devices(iommu, pdev);
free_devinfo_mem(info);
spin_lock_irqsave(&device_domain_lock, flags);
@@ -2811,7 +2908,8 @@ static void vm_domain_remove_one_dev_info(struct dmar_domain *domain,
* owned by this domain, clear this iommu in iommu_bmp
* update iommu count and coherency
*/
- if (device_to_iommu(info->bus, info->devfn) == iommu)
+ if (iommu == device_to_iommu(info->segment, info->bus,
+ info->devfn))
found = 1;
}
@@ -2820,7 +2918,7 @@ static void vm_domain_remove_one_dev_info(struct dmar_domain *domain,
spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
clear_bit(iommu->seq_id, &domain->iommu_bmp);
domain->iommu_count--;
- domain_update_iommu_coherency(domain);
+ domain_update_iommu_cap(domain);
spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
}
@@ -2844,17 +2942,18 @@ static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
spin_unlock_irqrestore(&device_domain_lock, flags1);
- iommu = device_to_iommu(info->bus, info->devfn);
+ iommu = device_to_iommu(info->segment, info->bus, info->devfn);
iommu_detach_dev(iommu, info->bus, info->devfn);
+ iommu_detach_dependent_devices(iommu, info->dev);
/* clear this iommu in iommu_bmp, update iommu count
- * and coherency
+ * and capabilities
*/
spin_lock_irqsave(&domain->iommu_lock, flags2);
if (test_and_clear_bit(iommu->seq_id,
&domain->iommu_bmp)) {
domain->iommu_count--;
- domain_update_iommu_coherency(domain);
+ domain_update_iommu_cap(domain);
}
spin_unlock_irqrestore(&domain->iommu_lock, flags2);
@@ -3031,7 +3130,8 @@ static int intel_iommu_attach_device(struct iommu_domain *domain,
}
}
- iommu = device_to_iommu(pdev->bus->number, pdev->devfn);
+ iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
+ pdev->devfn);
if (!iommu)
return -ENODEV;
@@ -3077,6 +3177,8 @@ static int intel_iommu_map_range(struct iommu_domain *domain,
prot |= DMA_PTE_READ;
if (iommu_prot & IOMMU_WRITE)
prot |= DMA_PTE_WRITE;
+ if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
+ prot |= DMA_PTE_SNP;
max_addr = (iova & VTD_PAGE_MASK) + VTD_PAGE_ALIGN(size);
if (dmar_domain->max_addr < max_addr) {
@@ -3130,6 +3232,17 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
return phys;
}
+static int intel_iommu_domain_has_cap(struct iommu_domain *domain,
+ unsigned long cap)
+{
+ struct dmar_domain *dmar_domain = domain->priv;
+
+ if (cap == IOMMU_CAP_CACHE_COHERENCY)
+ return dmar_domain->iommu_snooping;
+
+ return 0;
+}
+
static struct iommu_ops intel_iommu_ops = {
.domain_init = intel_iommu_domain_init,
.domain_destroy = intel_iommu_domain_destroy,
@@ -3138,6 +3251,7 @@ static struct iommu_ops intel_iommu_ops = {
.map = intel_iommu_map_range,
.unmap = intel_iommu_unmap_range,
.iova_to_phys = intel_iommu_iova_to_phys,
+ .domain_has_cap = intel_iommu_domain_has_cap,
};
static void __devinit quirk_iommu_rwbf(struct pci_dev *dev)