diff options
Diffstat (limited to 'drivers/iommu/amd_iommu.c')
| -rw-r--r-- | drivers/iommu/amd_iommu.c | 322 |
1 files changed, 169 insertions, 153 deletions
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index c1c74e030a5..4aec6a29e31 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -46,6 +46,7 @@ #include "amd_iommu_proto.h" #include "amd_iommu_types.h" #include "irq_remapping.h" +#include "pci.h" #define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28)) @@ -173,7 +174,7 @@ static inline u16 get_device_id(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); - return calc_devid(pdev->bus->number, pdev->devfn); + return PCI_DEVID(pdev->bus->number, pdev->devfn); } static struct iommu_dev_data *get_dev_data(struct device *dev) @@ -247,8 +248,8 @@ static bool check_device(struct device *dev) if (!dev || !dev->dma_mask) return false; - /* No device or no PCI device */ - if (dev->bus != &pci_bus_type) + /* No PCI device */ + if (!dev_is_pci(dev)) return false; devid = get_device_id(dev); @@ -263,12 +264,6 @@ static bool check_device(struct device *dev) return true; } -static void swap_pci_ref(struct pci_dev **from, struct pci_dev *to) -{ - pci_dev_put(*from); - *from = to; -} - static struct pci_bus *find_hosted_bus(struct pci_bus *bus) { while (!bus->self) { @@ -292,14 +287,27 @@ static struct pci_dev *get_isolation_root(struct pci_dev *pdev) /* * If it's a multifunction device that does not support our - * required ACS flags, add to the same group as function 0. + * required ACS flags, add to the same group as lowest numbered + * function that also does not suport the required ACS flags. */ if (dma_pdev->multifunction && - !pci_acs_enabled(dma_pdev, REQ_ACS_FLAGS)) - swap_pci_ref(&dma_pdev, - pci_get_slot(dma_pdev->bus, - PCI_DEVFN(PCI_SLOT(dma_pdev->devfn), - 0))); + !pci_acs_enabled(dma_pdev, REQ_ACS_FLAGS)) { + u8 i, slot = PCI_SLOT(dma_pdev->devfn); + + for (i = 0; i < 8; i++) { + struct pci_dev *tmp; + + tmp = pci_get_slot(dma_pdev->bus, PCI_DEVFN(slot, i)); + if (!tmp) + continue; + + if (!pci_acs_enabled(tmp, REQ_ACS_FLAGS)) { + swap_pci_ref(&dma_pdev, tmp); + break; + } + pci_dev_put(tmp); + } + } /* * Devices on the root bus go through the iommu. If that's not us, @@ -448,8 +456,10 @@ static int iommu_init_device(struct device *dev) } ret = init_iommu_group(dev); - if (ret) + if (ret) { + free_dev_data(dev_data); return ret; + } if (pci_iommuv2_capable(pdev)) { struct amd_iommu *iommu; @@ -649,26 +659,26 @@ retry: case EVENT_TYPE_ILL_DEV: printk("ILLEGAL_DEV_TABLE_ENTRY device=%02x:%02x.%x " "address=0x%016llx flags=0x%04x]\n", - PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), + PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), address, flags); dump_dte_entry(devid); break; case EVENT_TYPE_IO_FAULT: printk("IO_PAGE_FAULT device=%02x:%02x.%x " "domain=0x%04x address=0x%016llx flags=0x%04x]\n", - PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), + PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), domid, address, flags); break; case EVENT_TYPE_DEV_TAB_ERR: printk("DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x " "address=0x%016llx flags=0x%04x]\n", - PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), + PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), address, flags); break; case EVENT_TYPE_PAGE_TAB_ERR: printk("PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x " "domain=0x%04x address=0x%016llx flags=0x%04x]\n", - PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), + PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), domid, address, flags); break; case EVENT_TYPE_ILL_CMD: @@ -682,13 +692,13 @@ retry: case EVENT_TYPE_IOTLB_INV_TO: printk("IOTLB_INV_TIMEOUT device=%02x:%02x.%x " "address=0x%016llx]\n", - PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), + PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), address); break; case EVENT_TYPE_INV_DEV_REQ: printk("INVALID_DEVICE_REQUEST device=%02x:%02x.%x " "address=0x%016llx flags=0x%04x]\n", - PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), + PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), address, flags); break; default: @@ -701,9 +711,6 @@ retry: static void iommu_poll_events(struct amd_iommu *iommu) { u32 head, tail; - unsigned long flags; - - spin_lock_irqsave(&iommu->lock, flags); head = readl(iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); tail = readl(iommu->mmio_base + MMIO_EVT_TAIL_OFFSET); @@ -714,8 +721,6 @@ static void iommu_poll_events(struct amd_iommu *iommu) } writel(head, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); - - spin_unlock_irqrestore(&iommu->lock, flags); } static void iommu_handle_ppr_entry(struct amd_iommu *iommu, u64 *raw) @@ -740,17 +745,11 @@ static void iommu_handle_ppr_entry(struct amd_iommu *iommu, u64 *raw) static void iommu_poll_ppr_log(struct amd_iommu *iommu) { - unsigned long flags; u32 head, tail; if (iommu->ppr_log == NULL) return; - /* enable ppr interrupts again */ - writel(MMIO_STATUS_PPR_INT_MASK, iommu->mmio_base + MMIO_STATUS_OFFSET); - - spin_lock_irqsave(&iommu->lock, flags); - head = readl(iommu->mmio_base + MMIO_PPR_HEAD_OFFSET); tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET); @@ -786,34 +785,50 @@ static void iommu_poll_ppr_log(struct amd_iommu *iommu) head = (head + PPR_ENTRY_SIZE) % PPR_LOG_SIZE; writel(head, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET); - /* - * Release iommu->lock because ppr-handling might need to - * re-acquire it - */ - spin_unlock_irqrestore(&iommu->lock, flags); - /* Handle PPR entry */ iommu_handle_ppr_entry(iommu, entry); - spin_lock_irqsave(&iommu->lock, flags); - /* Refresh ring-buffer information */ head = readl(iommu->mmio_base + MMIO_PPR_HEAD_OFFSET); tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET); } - - spin_unlock_irqrestore(&iommu->lock, flags); } irqreturn_t amd_iommu_int_thread(int irq, void *data) { - struct amd_iommu *iommu; + struct amd_iommu *iommu = (struct amd_iommu *) data; + u32 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); - for_each_iommu(iommu) { - iommu_poll_events(iommu); - iommu_poll_ppr_log(iommu); - } + while (status & (MMIO_STATUS_EVT_INT_MASK | MMIO_STATUS_PPR_INT_MASK)) { + /* Enable EVT and PPR interrupts again */ + writel((MMIO_STATUS_EVT_INT_MASK | MMIO_STATUS_PPR_INT_MASK), + iommu->mmio_base + MMIO_STATUS_OFFSET); + + if (status & MMIO_STATUS_EVT_INT_MASK) { + pr_devel("AMD-Vi: Processing IOMMU Event Log\n"); + iommu_poll_events(iommu); + } + if (status & MMIO_STATUS_PPR_INT_MASK) { + pr_devel("AMD-Vi: Processing IOMMU PPR Log\n"); + iommu_poll_ppr_log(iommu); + } + + /* + * Hardware bug: ERBT1312 + * When re-enabling interrupt (by writing 1 + * to clear the bit), the hardware might also try to set + * the interrupt bit in the event status register. + * In this scenario, the bit will be set, and disable + * subsequent interrupts. + * + * Workaround: The IOMMU driver should read back the + * status register and check if the interrupt bits are cleared. + * If not, driver will need to go through the interrupt handler + * again and re-clear the bits + */ + status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); + } return IRQ_HANDLED; } @@ -948,7 +963,7 @@ static void build_inv_iommu_pasid(struct iommu_cmd *cmd, u16 domid, int pasid, address &= ~(0xfffULL); - cmd->data[0] = pasid & PASID_MASK; + cmd->data[0] = pasid; cmd->data[1] = domid; cmd->data[2] = lower_32_bits(address); cmd->data[3] = upper_32_bits(address); @@ -967,10 +982,10 @@ static void build_inv_iotlb_pasid(struct iommu_cmd *cmd, u16 devid, int pasid, address &= ~(0xfffULL); cmd->data[0] = devid; - cmd->data[0] |= (pasid & 0xff) << 16; + cmd->data[0] |= ((pasid >> 8) & 0xff) << 16; cmd->data[0] |= (qdep & 0xff) << 24; cmd->data[1] = devid; - cmd->data[1] |= ((pasid >> 8) & 0xfff) << 16; + cmd->data[1] |= (pasid & 0xff) << 16; cmd->data[2] = lower_32_bits(address); cmd->data[2] |= CMD_INV_IOMMU_PAGES_GN_MASK; cmd->data[3] = upper_32_bits(address); @@ -986,7 +1001,7 @@ static void build_complete_ppr(struct iommu_cmd *cmd, u16 devid, int pasid, cmd->data[0] = devid; if (gn) { - cmd->data[1] = pasid & PASID_MASK; + cmd->data[1] = pasid; cmd->data[2] = CMD_INV_IOMMU_PAGES_GN_MASK; } cmd->data[3] = tag & 0x1ff; @@ -1484,6 +1499,10 @@ static unsigned long iommu_unmap_page(struct protection_domain *dom, /* Large PTE found which maps this address */ unmap_size = PTE_PAGE_SIZE(*pte); + + /* Only unmap from the first pte in the page */ + if ((unmap_size - 1) & bus_addr) + break; count = PAGE_SIZE_PTE_COUNT(unmap_size); for (i = 0; i < count; i++) pte[i] = 0ULL; @@ -1493,7 +1512,7 @@ static unsigned long iommu_unmap_page(struct protection_domain *dom, unmapped += unmap_size; } - BUG_ON(!is_power_of_2(unmapped)); + BUG_ON(unmapped && !is_power_of_2(unmapped)); return unmapped; } @@ -1893,34 +1912,59 @@ static void domain_id_free(int id) write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); } +#define DEFINE_FREE_PT_FN(LVL, FN) \ +static void free_pt_##LVL (unsigned long __pt) \ +{ \ + unsigned long p; \ + u64 *pt; \ + int i; \ + \ + pt = (u64 *)__pt; \ + \ + for (i = 0; i < 512; ++i) { \ + if (!IOMMU_PTE_PRESENT(pt[i])) \ + continue; \ + \ + p = (unsigned long)IOMMU_PTE_PAGE(pt[i]); \ + FN(p); \ + } \ + free_page((unsigned long)pt); \ +} + +DEFINE_FREE_PT_FN(l2, free_page) +DEFINE_FREE_PT_FN(l3, free_pt_l2) +DEFINE_FREE_PT_FN(l4, free_pt_l3) +DEFINE_FREE_PT_FN(l5, free_pt_l4) +DEFINE_FREE_PT_FN(l6, free_pt_l5) + static void free_pagetable(struct protection_domain *domain) { - int i, j; - u64 *p1, *p2, *p3; - - p1 = domain->pt_root; - - if (!p1) - return; + unsigned long root = (unsigned long)domain->pt_root; - for (i = 0; i < 512; ++i) { - if (!IOMMU_PTE_PRESENT(p1[i])) - continue; - - p2 = IOMMU_PTE_PAGE(p1[i]); - for (j = 0; j < 512; ++j) { - if (!IOMMU_PTE_PRESENT(p2[j])) - continue; - p3 = IOMMU_PTE_PAGE(p2[j]); - free_page((unsigned long)p3); - } - - free_page((unsigned long)p2); + switch (domain->mode) { + case PAGE_MODE_NONE: + break; + case PAGE_MODE_1_LEVEL: + free_page(root); + break; + case PAGE_MODE_2_LEVEL: + free_pt_l2(root); + break; + case PAGE_MODE_3_LEVEL: + free_pt_l3(root); + break; + case PAGE_MODE_4_LEVEL: + free_pt_l4(root); + break; + case PAGE_MODE_5_LEVEL: + free_pt_l5(root); + break; + case PAGE_MODE_6_LEVEL: + free_pt_l6(root); + break; + default: + BUG(); } - - free_page((unsigned long)p1); - - domain->pt_root = NULL; } static void free_gcr3_tbl_level1(u64 *tbl) @@ -2466,18 +2510,16 @@ static int device_change_notifier(struct notifier_block *nb, /* allocate a protection domain if a device is added */ dma_domain = find_protection_domain(devid); - if (dma_domain) - goto out; - dma_domain = dma_ops_domain_alloc(); - if (!dma_domain) - goto out; - dma_domain->target_dev = devid; - - spin_lock_irqsave(&iommu_pd_list_lock, flags); - list_add_tail(&dma_domain->list, &iommu_pd_list); - spin_unlock_irqrestore(&iommu_pd_list_lock, flags); - - dev_data = get_dev_data(dev); + if (!dma_domain) { + dma_domain = dma_ops_domain_alloc(); + if (!dma_domain) + goto out; + dma_domain->target_dev = devid; + + spin_lock_irqsave(&iommu_pd_list_lock, flags); + list_add_tail(&dma_domain->list, &iommu_pd_list); + spin_unlock_irqrestore(&iommu_pd_list_lock, flags); + } dev->archdata.dma_ops = &amd_iommu_dma_ops; @@ -2841,24 +2883,6 @@ static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, } /* - * This is a special map_sg function which is used if we should map a - * device which is not handled by an AMD IOMMU in the system. - */ -static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist, - int nelems, int dir) -{ - struct scatterlist *s; - int i; - - for_each_sg(sglist, s, nelems, i) { - s->dma_address = (dma_addr_t)sg_phys(s); - s->dma_length = s->length; - } - - return nelems; -} - -/* * The exported map_sg function for dma_ops (handles scatter-gather * lists). */ @@ -2877,9 +2901,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, INC_STATS_COUNTER(cnt_map_sg); domain = get_domain(dev); - if (PTR_ERR(domain) == -EINVAL) - return map_sg_no_iommu(dev, sglist, nelems, dir); - else if (IS_ERR(domain)) + if (IS_ERR(domain)) return 0; dma_mask = *dev->dma_mask; @@ -3187,8 +3209,7 @@ int __init amd_iommu_init_dma_ops(void) free_domains: for_each_iommu(iommu) { - if (iommu->default_dom) - dma_ops_domain_free(iommu->default_dom); + dma_ops_domain_free(iommu->default_dom); } return ret; @@ -3413,7 +3434,7 @@ static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova, } static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, - unsigned long iova) + dma_addr_t iova) { struct protection_domain *domain = dom->priv; unsigned long offset_mask; @@ -3478,8 +3499,6 @@ int __init amd_iommu_init_passthrough(void) { struct iommu_dev_data *dev_data; struct pci_dev *dev = NULL; - struct amd_iommu *iommu; - u16 devid; int ret; ret = alloc_passthrough_domain(); @@ -3493,12 +3512,6 @@ int __init amd_iommu_init_passthrough(void) dev_data = get_dev_data(&dev->dev); dev_data->passthrough = true; - devid = get_device_id(&dev->dev); - - iommu = amd_iommu_rlookup_table[devid]; - if (!iommu) - continue; - attach_device(&dev->dev, pt_domain); } @@ -3950,6 +3963,9 @@ static struct irq_remap_table *get_irq_table(u16 devid, bool ioapic) if (!table) goto out; + /* Initialize table spin-lock */ + spin_lock_init(&table->lock); + if (ioapic) /* Keep the first 32 indexes free for IOAPIC interrupts */ table->min_index = 32; @@ -3975,7 +3991,7 @@ static struct irq_remap_table *get_irq_table(u16 devid, bool ioapic) iommu_flush_dte(iommu, devid); if (devid != alias) { irq_lookup_table[alias] = table; - set_dte_irq_entry(devid, table); + set_dte_irq_entry(alias, table); iommu_flush_dte(iommu, alias); } @@ -4010,17 +4026,17 @@ static int alloc_irq_index(struct irq_cfg *cfg, u16 devid, int count) c = 0; if (c == count) { - struct irq_2_iommu *irte_info; + struct irq_2_irte *irte_info; for (; c != 0; --c) table->table[index - c + 1] = IRTE_ALLOCATED; index -= count - 1; - irte_info = &cfg->irq_2_iommu; - irte_info->sub_handle = devid; - irte_info->irte_index = index; - irte_info->iommu = (void *)cfg; + cfg->remapped = 1; + irte_info = &cfg->irq_2_irte; + irte_info->devid = devid; + irte_info->index = index; goto out; } @@ -4101,7 +4117,7 @@ static int setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry, struct io_apic_irq_attr *attr) { struct irq_remap_table *table; - struct irq_2_iommu *irte_info; + struct irq_2_irte *irte_info; struct irq_cfg *cfg; union irte irte; int ioapic_id; @@ -4113,7 +4129,7 @@ static int setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry, if (!cfg) return -EINVAL; - irte_info = &cfg->irq_2_iommu; + irte_info = &cfg->irq_2_irte; ioapic_id = mpc_ioapic_id(attr->ioapic); devid = get_ioapic_devid(ioapic_id); @@ -4127,9 +4143,9 @@ static int setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry, index = attr->ioapic_pin; /* Setup IRQ remapping info */ - irte_info->sub_handle = devid; - irte_info->irte_index = index; - irte_info->iommu = (void *)cfg; + cfg->remapped = 1; + irte_info->devid = devid; + irte_info->index = index; /* Setup IRTE for IOMMU */ irte.val = 0; @@ -4163,7 +4179,7 @@ static int setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry, static int set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) { - struct irq_2_iommu *irte_info; + struct irq_2_irte *irte_info; unsigned int dest, irq; struct irq_cfg *cfg; union irte irte; @@ -4174,12 +4190,12 @@ static int set_affinity(struct irq_data *data, const struct cpumask *mask, cfg = data->chip_data; irq = data->irq; - irte_info = &cfg->irq_2_iommu; + irte_info = &cfg->irq_2_irte; if (!cpumask_intersects(mask, cpu_online_mask)) return -EINVAL; - if (get_irte(irte_info->sub_handle, irte_info->irte_index, &irte)) + if (get_irte(irte_info->devid, irte_info->index, &irte)) return -EBUSY; if (assign_irq_vector(irq, cfg, mask)) @@ -4195,7 +4211,7 @@ static int set_affinity(struct irq_data *data, const struct cpumask *mask, irte.fields.vector = cfg->vector; irte.fields.destination = dest; - modify_irte(irte_info->sub_handle, irte_info->irte_index, irte); + modify_irte(irte_info->devid, irte_info->index, irte); if (cfg->move_in_progress) send_cleanup_vector(cfg); @@ -4207,16 +4223,16 @@ static int set_affinity(struct irq_data *data, const struct cpumask *mask, static int free_irq(int irq) { - struct irq_2_iommu *irte_info; + struct irq_2_irte *irte_info; struct irq_cfg *cfg; cfg = irq_get_chip_data(irq); if (!cfg) return -EINVAL; - irte_info = &cfg->irq_2_iommu; + irte_info = &cfg->irq_2_irte; - free_irte(irte_info->sub_handle, irte_info->irte_index); + free_irte(irte_info->devid, irte_info->index); return 0; } @@ -4225,7 +4241,7 @@ static void compose_msi_msg(struct pci_dev *pdev, unsigned int irq, unsigned int dest, struct msi_msg *msg, u8 hpet_id) { - struct irq_2_iommu *irte_info; + struct irq_2_irte *irte_info; struct irq_cfg *cfg; union irte irte; @@ -4233,7 +4249,7 @@ static void compose_msi_msg(struct pci_dev *pdev, if (!cfg) return; - irte_info = &cfg->irq_2_iommu; + irte_info = &cfg->irq_2_irte; irte.val = 0; irte.fields.vector = cfg->vector; @@ -4242,11 +4258,11 @@ static void compose_msi_msg(struct pci_dev *pdev, irte.fields.dm = apic->irq_dest_mode; irte.fields.valid = 1; - modify_irte(irte_info->sub_handle, irte_info->irte_index, irte); + modify_irte(irte_info->devid, irte_info->index, irte); msg->address_hi = MSI_ADDR_BASE_HI; msg->address_lo = MSI_ADDR_BASE_LO; - msg->data = irte_info->irte_index; + msg->data = irte_info->index; } static int msi_alloc_irq(struct pci_dev *pdev, int irq, int nvec) @@ -4271,7 +4287,7 @@ static int msi_alloc_irq(struct pci_dev *pdev, int irq, int nvec) static int msi_setup_irq(struct pci_dev *pdev, unsigned int irq, int index, int offset) { - struct irq_2_iommu *irte_info; + struct irq_2_irte *irte_info; struct irq_cfg *cfg; u16 devid; @@ -4286,18 +4302,18 @@ static int msi_setup_irq(struct pci_dev *pdev, unsigned int irq, return 0; devid = get_device_id(&pdev->dev); - irte_info = &cfg->irq_2_iommu; + irte_info = &cfg->irq_2_irte; - irte_info->sub_handle = devid; - irte_info->irte_index = index + offset; - irte_info->iommu = (void *)cfg; + cfg->remapped = 1; + irte_info->devid = devid; + irte_info->index = index + offset; return 0; } static int setup_hpet_msi(unsigned int irq, unsigned int id) { - struct irq_2_iommu *irte_info; + struct irq_2_irte *irte_info; struct irq_cfg *cfg; int index, devid; @@ -4305,7 +4321,7 @@ static int setup_hpet_msi(unsigned int irq, unsigned int id) if (!cfg) return -EINVAL; - irte_info = &cfg->irq_2_iommu; + irte_info = &cfg->irq_2_irte; devid = get_hpet_devid(id); if (devid < 0) return devid; @@ -4314,9 +4330,9 @@ static int setup_hpet_msi(unsigned int irq, unsigned int id) if (index < 0) return index; - irte_info->sub_handle = devid; - irte_info->irte_index = index; - irte_info->iommu = (void *)cfg; + cfg->remapped = 1; + irte_info->devid = devid; + irte_info->index = index; return 0; } |
