From 761434a318a64bf521f8abcc920e1d9837640fa2 Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Fri, 6 Nov 2009 16:22:44 +0900 Subject: PCI ASPM: fix oops on root port removal Fix the following BUG_ON() problem reported by Alex Chiang. This problem happened when removing PCIe root port using PCI logical hotplug operation. The immediate cause of this problem is that the pointer to invalid data structure is passed to pcie_update_aspm_capable() by pcie_aspm_exit_link_state(). When pcie_aspm_exit_link_state() received a pointer to root port link, it unconfigures the root port link and frees its data structure at first. At this point, there are not links to configure under the root port and the data structure for root port link is already freed. So pcie_aspm_exit_link_state() must not call pcie_update_aspm_capable() and pcie_config_aspm_path(). This patch fixes the problem by changing pcie_aspm_exit_link_state() not to call pcie_update_aspm_capable() and pcie_config_aspm_path() if the specified link is root port link. ------------[ cut here ]------------ kernel BUG at drivers/pci/pcie/aspm.c:606! invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC last sysfs file: /sys/devices/pci0000:40/0000:40:13.0/remove CPU 1 Modules linked in: shpchp Pid: 9345, comm: sysfsd Not tainted 2.6.32-rc5 #98 ProLiant DL785 G6 RIP: 0010:[] [] pcie_update_aspm_capable+0x15/0xbe RSP: 0018:ffff88082a2f5ca0 EFLAGS: 00010202 RAX: 0000000000000e77 RBX: ffff88182cc3e000 RCX: ffff88082a33d006 RDX: 0000000000000001 RSI: ffffffff811dff4a RDI: ffff88182cc3e000 RBP: ffff88082a2f5cc0 R08: ffff88182cc3e000 R09: 0000000000000000 R10: ffff88182fc00180 R11: ffff88182fc00198 R12: ffff88182cc3e000 R13: 0000000000000000 R14: ffff88182cc3e000 R15: ffff88082a2f5e20 FS: 00007f259a64b6f0(0000) GS:ffff880864600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0018 ES: 0018 CR0: 000000008005003b CR2: 00007feb53f73da0 CR3: 000000102cc94000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process sysfsd (pid: 9345, threadinfo ffff88082a2f4000, task ffff88082a33cf00) Stack: ffff88182cc3e000 ffff88182cc3e000 0000000000000000 ffff88082a33cf00 <0> ffff88082a2f5cf0 ffffffff811dff52 ffff88082a2f5cf0 ffff88082c525168 <0> ffff88402c9fd2f8 ffff88402c9fd2f8 ffff88082a2f5d20 ffffffff811d7db2 Call Trace: [] pcie_aspm_exit_link_state+0xf5/0x11e [] pci_stop_bus_device+0x76/0x7e [] pci_stop_bus_device+0x2b/0x7e [] pci_remove_bus_device+0x15/0xb9 [] remove_callback+0x29/0x3a [] sysfs_schedule_callback_work+0x15/0x6d [] worker_thread+0x19d/0x298 [] ? worker_thread+0x148/0x298 [] ? sysfs_schedule_callback_work+0x0/0x6d [] ? autoremove_wake_function+0x0/0x38 [] ? worker_thread+0x0/0x298 [] kthread+0x7d/0x85 [] child_rip+0xa/0x20 [] ? restore_args+0x0/0x30 [] ? kthread+0x0/0x85 [] ? child_rip+0x0/0x20 Code: 89 e5 8a 50 48 31 c0 c0 ea 03 83 e2 07 e8 b2 de fe ff c9 48 98 c3 55 48 89 e5 41 56 49 89 fe 41 55 41 54 53 48 83 7f 10 00 74 04 <0f> 0b eb fe 48 8b 05 da 7d 63 00 4c 8d 60 e8 4c 89 e1 eb 24 4c RIP [] pcie_update_aspm_capable+0x15/0xbe RSP ---[ end trace 6ae0f65bdeab8555 ]--- Reported-by: Alex Chiang Signed-off-by: Kenji Kaneshige Tested-by: Alex Chiang Signed-off-by: Jesse Barnes --- drivers/pci/pcie/aspm.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index 745402e8e49..5b7056cec00 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -656,8 +656,10 @@ void pcie_aspm_exit_link_state(struct pci_dev *pdev) free_link_state(link); /* Recheck latencies and configure upstream links */ - pcie_update_aspm_capable(root); - pcie_config_aspm_path(parent_link); + if (parent_link) { + pcie_update_aspm_capable(root); + pcie_config_aspm_path(parent_link); + } out: mutex_unlock(&aspm_lock); up_read(&pci_bus_sem); -- cgit v1.2.3-18-g5258 From 86cf898e1d0fca245173980e3897580db38569a8 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 9 Nov 2009 22:15:15 +0000 Subject: intel-iommu: Check for 'DMAR at zero' BIOS error earlier. Chris Wright has some patches which let us fall back to swiotlb nicely if IOMMU initialisation fails. But those are a bit much for 2.6.32. Instead, let's shift the check for the biggest problem, the HP and Acer BIOS bug which reports a DMAR at physical address zero. That one can actually be checked much earlier -- before we even admit to having detected an IOMMU in the first place. So the swiotlb init goes ahead as we want. Signed-off-by: David Woodhouse --- drivers/pci/dmar.c | 49 +++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 39 insertions(+), 10 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 22b02c6df85..e5f8fc164fd 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -175,15 +175,6 @@ dmar_parse_one_drhd(struct acpi_dmar_header *header) int ret = 0; drhd = (struct acpi_dmar_hardware_unit *)header; - if (!drhd->address) { - /* Promote an attitude of violence to a BIOS engineer today */ - WARN(1, "Your BIOS is broken; DMAR reported at address zero!\n" - "BIOS vendor: %s; Ver: %s; Product Version: %s\n", - dmi_get_system_info(DMI_BIOS_VENDOR), - dmi_get_system_info(DMI_BIOS_VERSION), - dmi_get_system_info(DMI_PRODUCT_VERSION)); - return -ENODEV; - } dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL); if (!dmaru) return -ENOMEM; @@ -591,12 +582,50 @@ int __init dmar_table_init(void) return 0; } +int __init check_zero_address(void) +{ + struct acpi_table_dmar *dmar; + struct acpi_dmar_header *entry_header; + struct acpi_dmar_hardware_unit *drhd; + + dmar = (struct acpi_table_dmar *)dmar_tbl; + entry_header = (struct acpi_dmar_header *)(dmar + 1); + + while (((unsigned long)entry_header) < + (((unsigned long)dmar) + dmar_tbl->length)) { + /* Avoid looping forever on bad ACPI tables */ + if (entry_header->length == 0) { + printk(KERN_WARNING PREFIX + "Invalid 0-length structure\n"); + return 0; + } + + if (entry_header->type == ACPI_DMAR_TYPE_HARDWARE_UNIT) { + drhd = (void *)entry_header; + if (!drhd->address) { + /* Promote an attitude of violence to a BIOS engineer today */ + WARN(1, "Your BIOS is broken; DMAR reported at address zero!\n" + "BIOS vendor: %s; Ver: %s; Product Version: %s\n", + dmi_get_system_info(DMI_BIOS_VENDOR), + dmi_get_system_info(DMI_BIOS_VERSION), + dmi_get_system_info(DMI_PRODUCT_VERSION)); + return 0; + } + break; + } + + entry_header = ((void *)entry_header + entry_header->length); + } + return 1; +} + void __init detect_intel_iommu(void) { int ret; ret = dmar_table_detect(); - + if (ret) + ret = check_zero_address(); { #ifdef CONFIG_INTR_REMAP struct acpi_table_dmar *dmar; -- cgit v1.2.3-18-g5258 From e8bb910d1bbc65e7081e73aab4b3a3dd8630332c Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 4 Nov 2009 15:59:34 -0700 Subject: intel-iommu: Obey coherent_dma_mask for alloc_coherent on passthrough The model for IOMMU passthrough is that decent devices that can cope with DMA to all of memory get passthrough; crappy devices with a limited dma_mask don't -- they get to use the IOMMU anyway. This is done on the basis that IOMMU passthrough is usually wanted for performance reasons, and it's only the decent PCI devices that you really care about performance for, while the crappy 32-bit ones like your USB controller can just use the IOMMU and you won't really care. Unfortunately, the check for this was only looking at dev->dma_mask, not at dev->coherent_dma_mask. And some devices have a 32-bit coherent_dma_mask even though they have a full 64-bit dma_mask. Even more unfortunately, fixing that simple oversight would upset certain broken HP devices. Not only do they have a 32-bit coherent_dma_mask, but they also have a tendency to do stray DMA to unmapped addresses. And then they die when they take the DMA fault they so richly deserve. So if we do the 'correct' fix, it'll mean that affected users have to disable IOMMU support completely on "a large percentage of servers from a major vendor." Personally, I have little sympathy -- given that this is the _same_ 'major vendor' who is shipping machines which claim to have IOMMU support but have obviously never _once_ booted a VT-d capable OS to do any form of QA. But strictly speaking, it _would_ be a regression even though it only ever worked by fluke. For 2.6.33, we'll come up with a quirk which gives swiotlb support for this particular device, and other devices with an inadequate coherent_dma_mask will just get normal IOMMU mapping. The simplest fix for 2.6.32, though, is just to jump through some hoops to try to allocate coherent DMA memory for such devices in a place that they can reach. We'd use dma_generic_alloc_coherent() for this if it existed on IA64. Signed-off-by: Alex Williamson Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index b1e97e68250..7fe5f7920ca 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -2767,7 +2767,15 @@ static void *intel_alloc_coherent(struct device *hwdev, size_t size, size = PAGE_ALIGN(size); order = get_order(size); - flags &= ~(GFP_DMA | GFP_DMA32); + + if (!iommu_no_mapping(hwdev)) + flags &= ~(GFP_DMA | GFP_DMA32); + else if (hwdev->coherent_dma_mask < dma_get_required_mask(hwdev)) { + if (hwdev->coherent_dma_mask < DMA_BIT_MASK(32)) + flags |= GFP_DMA; + else + flags |= GFP_DMA32; + } vaddr = (void *)__get_free_pages(flags, order); if (!vaddr) -- cgit v1.2.3-18-g5258 From 99dcadede42f8898d4c963ef69192ef4b9b76ba8 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Wed, 11 Nov 2009 07:23:06 -0800 Subject: intel-iommu: Support PCIe hot-plug To support PCIe hot plug in IOMMU, we register a notifier to respond to device change action. When the notifier gets BUS_NOTIFY_UNBOUND_DRIVER, it removes the device from its DMAR domain. A hot added device will be added into an IOMMU domain when it first does IOMMU op. So there is no need to add more code for hot add. Without the patch, after a hot-remove, a hot-added device on the same slot will not work. Signed-off-by: Fenghua Yu Tested-by: Yinghai Lu Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 7fe5f7920ca..1840a0578a4 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -3215,6 +3215,33 @@ static int __init init_iommu_sysfs(void) } #endif /* CONFIG_PM */ +/* + * Here we only respond to action of unbound device from driver. + * + * Added device is not attached to its DMAR domain here yet. That will happen + * when mapping the device to iova. + */ +static int device_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct device *dev = data; + struct pci_dev *pdev = to_pci_dev(dev); + struct dmar_domain *domain; + + domain = find_domain(pdev); + if (!domain) + return 0; + + if (action == BUS_NOTIFY_UNBOUND_DRIVER && !iommu_pass_through) + domain_remove_one_dev_info(domain, pdev); + + return 0; +} + +static struct notifier_block device_nb = { + .notifier_call = device_notifier, +}; + int __init intel_iommu_init(void) { int ret = 0; @@ -3267,6 +3294,8 @@ int __init intel_iommu_init(void) register_iommu(&intel_iommu_ops); + bus_register_notifier(&pci_bus_type, &device_nb); + return 0; } -- cgit v1.2.3-18-g5258 From 5854d9c8d18359b1fc2f23c0ef2d51dd53281bd6 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 19 Nov 2009 02:18:44 +0000 Subject: Fix handling of the HP/Acer 'DMAR at zero' BIOS error for machines with <4GiB RAM. Commit 86cf898e1d0fca245173980e3897580db38569a8 ("intel-iommu: Check for 'DMAR at zero' BIOS error earlier.") was supposed to work by pretending not to detect an IOMMU if it was actually being reported by the BIOS at physical address zero. However, the intel_iommu_init() function is called unconditionally, as are the corresponding functions for other IOMMU hardware. So the patch only worked if you have RAM above the 4GiB boundary. It caused swiotlb to be initialised when no IOMMU was detected during early boot, and thus the later IOMMU init would refuse to run. But if you have less RAM than that, swiotlb wouldn't get set up and the IOMMU _would_ still end up being initialised, even though we never claimed to detect it. This patch also sets the dmar_disabled flag when the error is detected during the initial detection phase -- so that the later call to intel_iommu_init() will return without doing anything, regardless of whether swiotlb is used or not. Signed-off-by: David Woodhouse Signed-off-by: Linus Torvalds --- drivers/pci/dmar.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/pci') diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index e5f8fc164fd..b952ebc7a78 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -609,6 +609,9 @@ int __init check_zero_address(void) dmi_get_system_info(DMI_BIOS_VENDOR), dmi_get_system_info(DMI_BIOS_VERSION), dmi_get_system_info(DMI_PRODUCT_VERSION)); +#ifdef CONFIG_DMAR + dmar_disabled = 1; +#endif return 0; } break; -- cgit v1.2.3-18-g5258