aboutsummaryrefslogtreecommitdiff
path: root/arch/x86/kernel/pci-dma.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/pci-dma.c')
-rw-r--r--arch/x86/kernel/pci-dma.c354
1 files changed, 110 insertions, 244 deletions
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 87d4d6964ec..a25e202bb31 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -1,17 +1,23 @@
#include <linux/dma-mapping.h>
+#include <linux/dma-debug.h>
#include <linux/dmar.h>
+#include <linux/export.h>
#include <linux/bootmem.h>
+#include <linux/gfp.h>
#include <linux/pci.h>
+#include <linux/kmemleak.h>
#include <asm/proto.h>
#include <asm/dma.h>
#include <asm/iommu.h>
+#include <asm/gart.h>
#include <asm/calgary.h>
-#include <asm/amd_iommu.h>
+#include <asm/x86_init.h>
+#include <asm/iommu_table.h>
static int forbid_dac __read_mostly;
-struct dma_mapping_ops *dma_ops;
+struct dma_map_ops *dma_ops = &nommu_dma_ops;
EXPORT_SYMBOL(dma_ops);
static int iommu_sac_force __read_mostly;
@@ -30,22 +36,27 @@ int no_iommu __read_mostly;
/* Set this to 1 if there is a HW IOMMU in the system */
int iommu_detected __read_mostly = 0;
-/* This tells the BIO block layer to assume merging. Default to off
- because we cannot guarantee merging later. */
-int iommu_bio_merge __read_mostly = 0;
-EXPORT_SYMBOL(iommu_bio_merge);
-
-dma_addr_t bad_dma_address __read_mostly = 0;
-EXPORT_SYMBOL(bad_dma_address);
-
-/* Dummy device used for NULL arguments (normally ISA). Better would
- be probably a smaller DMA mask, but this is bug-to-bug compatible
- to older i386. */
-struct device fallback_dev = {
- .bus_id = "fallback device",
- .coherent_dma_mask = DMA_32BIT_MASK,
- .dma_mask = &fallback_dev.coherent_dma_mask,
+/*
+ * This variable becomes 1 if iommu=pt is passed on the kernel command line.
+ * If this variable is 1, IOMMU implementations do no DMA translation for
+ * devices and allow every device to access to whole physical memory. This is
+ * useful if a user wants to use an IOMMU only for KVM device assignment to
+ * guests and not for driver dma translation.
+ */
+int iommu_pass_through __read_mostly;
+
+extern struct iommu_table_entry __iommu_table[], __iommu_table_end[];
+
+/* Dummy device used for NULL arguments (normally ISA). */
+struct device x86_dma_fallback_dev = {
+ .init_name = "fallback device",
+ .coherent_dma_mask = ISA_DMA_BIT_MASK,
+ .dma_mask = &x86_dma_fallback_dev.coherent_dma_mask,
};
+EXPORT_SYMBOL(x86_dma_fallback_dev);
+
+/* Number of entries preallocated for DMA-API debugging */
+#define PREALLOC_DMA_DEBUG_ENTRIES 65536
int dma_set_mask(struct device *dev, u64 mask)
{
@@ -58,84 +69,80 @@ int dma_set_mask(struct device *dev, u64 mask)
}
EXPORT_SYMBOL(dma_set_mask);
-#ifdef CONFIG_X86_64
-static __initdata void *dma32_bootmem_ptr;
-static unsigned long dma32_bootmem_size __initdata = (128ULL<<20);
-
-static int __init parse_dma32_size_opt(char *p)
-{
- if (!p)
- return -EINVAL;
- dma32_bootmem_size = memparse(p, &p);
- return 0;
-}
-early_param("dma32_size", parse_dma32_size_opt);
-
-void __init dma32_reserve_bootmem(void)
-{
- unsigned long size, align;
- if (max_pfn <= MAX_DMA32_PFN)
- return;
-
- /*
- * check aperture_64.c allocate_aperture() for reason about
- * using 512M as goal
- */
- align = 64ULL<<20;
- size = round_up(dma32_bootmem_size, align);
- dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align,
- 512ULL<<20);
- if (dma32_bootmem_ptr)
- dma32_bootmem_size = size;
- else
- dma32_bootmem_size = 0;
-}
-static void __init dma32_free_bootmem(void)
+void __init pci_iommu_alloc(void)
{
-
- if (max_pfn <= MAX_DMA32_PFN)
- return;
-
- if (!dma32_bootmem_ptr)
- return;
-
- free_bootmem(__pa(dma32_bootmem_ptr), dma32_bootmem_size);
-
- dma32_bootmem_ptr = NULL;
- dma32_bootmem_size = 0;
+ struct iommu_table_entry *p;
+
+ sort_iommu_table(__iommu_table, __iommu_table_end);
+ check_iommu_entries(__iommu_table, __iommu_table_end);
+
+ for (p = __iommu_table; p < __iommu_table_end; p++) {
+ if (p && p->detect && p->detect() > 0) {
+ p->flags |= IOMMU_DETECTED;
+ if (p->early_init)
+ p->early_init();
+ if (p->flags & IOMMU_FINISH_IF_DETECTED)
+ break;
+ }
+ }
}
-
-void __init pci_iommu_alloc(void)
+void *dma_generic_alloc_coherent(struct device *dev, size_t size,
+ dma_addr_t *dma_addr, gfp_t flag,
+ struct dma_attrs *attrs)
{
- /* free the range so iommu could get some range less than 4G */
- dma32_free_bootmem();
- /*
- * The order of these functions is important for
- * fall-back/fail-over reasons
- */
- gart_iommu_hole_init();
-
- detect_calgary();
+ unsigned long dma_mask;
+ struct page *page;
+ unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+ dma_addr_t addr;
+
+ dma_mask = dma_alloc_coherent_mask(dev, flag);
+
+ flag &= ~__GFP_ZERO;
+again:
+ page = NULL;
+ /* CMA can be used only in the context which permits sleeping */
+ if (flag & __GFP_WAIT) {
+ page = dma_alloc_from_contiguous(dev, count, get_order(size));
+ if (page && page_to_phys(page) + size > dma_mask) {
+ dma_release_from_contiguous(dev, page, count);
+ page = NULL;
+ }
+ }
+ /* fallback */
+ if (!page)
+ page = alloc_pages_node(dev_to_node(dev), flag, get_order(size));
+ if (!page)
+ return NULL;
- detect_intel_iommu();
+ addr = page_to_phys(page);
+ if (addr + size > dma_mask) {
+ __free_pages(page, get_order(size));
- amd_iommu_detect();
+ if (dma_mask < DMA_BIT_MASK(32) && !(flag & GFP_DMA)) {
+ flag = (flag & ~GFP_DMA32) | GFP_DMA;
+ goto again;
+ }
- pci_swiotlb_init();
+ return NULL;
+ }
+ memset(page_address(page), 0, size);
+ *dma_addr = addr;
+ return page_address(page);
}
-unsigned long iommu_num_pages(unsigned long addr, unsigned long len)
+void dma_generic_free_coherent(struct device *dev, size_t size, void *vaddr,
+ dma_addr_t dma_addr, struct dma_attrs *attrs)
{
- unsigned long size = roundup((addr & ~PAGE_MASK) + len, PAGE_SIZE);
+ unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+ struct page *page = virt_to_page(vaddr);
- return size >> PAGE_SHIFT;
+ if (!dma_release_from_contiguous(dev, page, count))
+ free_pages((unsigned long)vaddr, get_order(size));
}
-EXPORT_SYMBOL(iommu_num_pages);
-#endif
/*
- * See <Documentation/x86_64/boot-options.txt> for the iommu kernel parameter
- * documentation.
+ * See <Documentation/x86/x86_64/boot-options.txt> for the iommu kernel
+ * parameter documentation.
*/
static __init int iommu_setup(char *p)
{
@@ -156,7 +163,6 @@ static __init int iommu_setup(char *p)
}
if (!strncmp(p, "biomerge", 8)) {
- iommu_bio_merge = 4096;
iommu_merge = 1;
force_iommu = 1;
}
@@ -175,7 +181,7 @@ static __init int iommu_setup(char *p)
if (!strncmp(p, "allowdac", 8))
forbid_dac = 0;
if (!strncmp(p, "nodac", 5))
- forbid_dac = -1;
+ forbid_dac = 1;
if (!strncmp(p, "usedac", 6)) {
forbid_dac = -1;
return 1;
@@ -184,6 +190,8 @@ static __init int iommu_setup(char *p)
if (!strncmp(p, "soft", 4))
swiotlb = 1;
#endif
+ if (!strncmp(p, "pt", 2))
+ iommu_pass_through = 1;
gart_parse_options(p);
@@ -202,7 +210,7 @@ early_param("iommu", iommu_setup);
int dma_supported(struct device *dev, u64 mask)
{
- struct dma_mapping_ops *ops = get_dma_ops(dev);
+ struct dma_map_ops *ops = get_dma_ops(dev);
#ifdef CONFIG_PCI
if (mask > 0xffffffff && forbid_dac > 0) {
@@ -217,7 +225,7 @@ int dma_supported(struct device *dev, u64 mask)
/* Copied from i386. Doesn't make much sense, because it will
only work for pci_alloc_coherent.
The caller just has to use GFP_DMA in this case. */
- if (mask < DMA_24BIT_MASK)
+ if (mask < DMA_BIT_MASK(24))
return 0;
/* Tell the device to use SAC when IOMMU force is on. This
@@ -232,7 +240,7 @@ int dma_supported(struct device *dev, u64 mask)
SAC for these. Assume all masks <= 40 bits are of this
type. Normally this doesn't make any difference, but gives
more gentle handling of IOMMU overflow. */
- if (iommu_sac_force && (mask >= DMA_40BIT_MASK)) {
+ if (iommu_sac_force && (mask >= DMA_BIT_MASK(40))) {
dev_info(dev, "Force SAC with mask %Lx\n", mask);
return 0;
}
@@ -241,178 +249,36 @@ int dma_supported(struct device *dev, u64 mask)
}
EXPORT_SYMBOL(dma_supported);
-/* Allocate DMA memory on node near device */
-static noinline struct page *
-dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order)
-{
- int node;
-
- node = dev_to_node(dev);
-
- return alloc_pages_node(node, gfp, order);
-}
-
-/*
- * Allocate memory for a coherent mapping.
- */
-void *
-dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
- gfp_t gfp)
+static int __init pci_iommu_init(void)
{
- struct dma_mapping_ops *ops = get_dma_ops(dev);
- void *memory = NULL;
- struct page *page;
- unsigned long dma_mask = 0;
- dma_addr_t bus;
- int noretry = 0;
-
- /* ignore region specifiers */
- gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
-
- if (dma_alloc_from_coherent(dev, size, dma_handle, &memory))
- return memory;
-
- if (!dev) {
- dev = &fallback_dev;
- gfp |= GFP_DMA;
- }
- dma_mask = dev->coherent_dma_mask;
- if (dma_mask == 0)
- dma_mask = (gfp & GFP_DMA) ? DMA_24BIT_MASK : DMA_32BIT_MASK;
-
- /* Device not DMA able */
- if (dev->dma_mask == NULL)
- return NULL;
+ struct iommu_table_entry *p;
+ dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
- /* Don't invoke OOM killer or retry in lower 16MB DMA zone */
- if (gfp & __GFP_DMA)
- noretry = 1;
-
-#ifdef CONFIG_X86_64
- /* Why <=? Even when the mask is smaller than 4GB it is often
- larger than 16MB and in this case we have a chance of
- finding fitting memory in the next higher zone first. If
- not retry with true GFP_DMA. -AK */
- if (dma_mask <= DMA_32BIT_MASK && !(gfp & GFP_DMA)) {
- gfp |= GFP_DMA32;
- if (dma_mask < DMA_32BIT_MASK)
- noretry = 1;
- }
+#ifdef CONFIG_PCI
+ dma_debug_add_bus(&pci_bus_type);
#endif
+ x86_init.iommu.iommu_init();
- again:
- page = dma_alloc_pages(dev,
- noretry ? gfp | __GFP_NORETRY : gfp, get_order(size));
- if (page == NULL)
- return NULL;
-
- {
- int high, mmu;
- bus = page_to_phys(page);
- memory = page_address(page);
- high = (bus + size) >= dma_mask;
- mmu = high;
- if (force_iommu && !(gfp & GFP_DMA))
- mmu = 1;
- else if (high) {
- free_pages((unsigned long)memory,
- get_order(size));
-
- /* Don't use the 16MB ZONE_DMA unless absolutely
- needed. It's better to use remapping first. */
- if (dma_mask < DMA_32BIT_MASK && !(gfp & GFP_DMA)) {
- gfp = (gfp & ~GFP_DMA32) | GFP_DMA;
- goto again;
- }
-
- /* Let low level make its own zone decisions */
- gfp &= ~(GFP_DMA32|GFP_DMA);
-
- if (ops->alloc_coherent)
- return ops->alloc_coherent(dev, size,
- dma_handle, gfp);
- return NULL;
- }
-
- memset(memory, 0, size);
- if (!mmu) {
- *dma_handle = bus;
- return memory;
- }
- }
-
- if (ops->alloc_coherent) {
- free_pages((unsigned long)memory, get_order(size));
- gfp &= ~(GFP_DMA|GFP_DMA32);
- return ops->alloc_coherent(dev, size, dma_handle, gfp);
+ for (p = __iommu_table; p < __iommu_table_end; p++) {
+ if (p && (p->flags & IOMMU_DETECTED) && p->late_init)
+ p->late_init();
}
- if (ops->map_simple) {
- *dma_handle = ops->map_simple(dev, virt_to_phys(memory),
- size,
- PCI_DMA_BIDIRECTIONAL);
- if (*dma_handle != bad_dma_address)
- return memory;
- }
-
- if (panic_on_overflow)
- panic("dma_alloc_coherent: IOMMU overflow by %lu bytes\n",
- (unsigned long)size);
- free_pages((unsigned long)memory, get_order(size));
- return NULL;
-}
-EXPORT_SYMBOL(dma_alloc_coherent);
-
-/*
- * Unmap coherent memory.
- * The caller must ensure that the device has finished accessing the mapping.
- */
-void dma_free_coherent(struct device *dev, size_t size,
- void *vaddr, dma_addr_t bus)
-{
- struct dma_mapping_ops *ops = get_dma_ops(dev);
-
- int order = get_order(size);
- WARN_ON(irqs_disabled()); /* for portability */
- if (dma_release_from_coherent(dev, order, vaddr))
- return;
- if (ops->unmap_single)
- ops->unmap_single(dev, bus, size, 0);
- free_pages((unsigned long)vaddr, order);
-}
-EXPORT_SYMBOL(dma_free_coherent);
-
-static int __init pci_iommu_init(void)
-{
- calgary_iommu_init();
-
- intel_iommu_init();
-
- amd_iommu_init();
-
- gart_iommu_init();
-
- no_iommu_init();
return 0;
}
-
-void pci_iommu_shutdown(void)
-{
- gart_iommu_shutdown();
-}
/* Must execute after PCI subsystem */
-fs_initcall(pci_iommu_init);
+rootfs_initcall(pci_iommu_init);
#ifdef CONFIG_PCI
/* Many VIA bridges seem to corrupt data for DAC. Disable it here */
-static __devinit void via_no_dac(struct pci_dev *dev)
+static void via_no_dac(struct pci_dev *dev)
{
- if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) {
- printk(KERN_INFO "PCI: VIA PCI bridge detected."
- "Disabling DAC.\n");
+ if (forbid_dac == 0) {
+ dev_info(&dev->dev, "disabling DAC on VIA PCI bridge\n");
forbid_dac = 1;
}
}
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac);
+DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID,
+ PCI_CLASS_BRIDGE_PCI, 8, via_no_dac);
#endif