diff options
Diffstat (limited to 'arch/x86/kernel/pci-dma.c')
| -rw-r--r-- | arch/x86/kernel/pci-dma.c | 354 |
1 files changed, 110 insertions, 244 deletions
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 87d4d6964ec..a25e202bb31 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -1,17 +1,23 @@ #include <linux/dma-mapping.h> +#include <linux/dma-debug.h> #include <linux/dmar.h> +#include <linux/export.h> #include <linux/bootmem.h> +#include <linux/gfp.h> #include <linux/pci.h> +#include <linux/kmemleak.h> #include <asm/proto.h> #include <asm/dma.h> #include <asm/iommu.h> +#include <asm/gart.h> #include <asm/calgary.h> -#include <asm/amd_iommu.h> +#include <asm/x86_init.h> +#include <asm/iommu_table.h> static int forbid_dac __read_mostly; -struct dma_mapping_ops *dma_ops; +struct dma_map_ops *dma_ops = &nommu_dma_ops; EXPORT_SYMBOL(dma_ops); static int iommu_sac_force __read_mostly; @@ -30,22 +36,27 @@ int no_iommu __read_mostly; /* Set this to 1 if there is a HW IOMMU in the system */ int iommu_detected __read_mostly = 0; -/* This tells the BIO block layer to assume merging. Default to off - because we cannot guarantee merging later. */ -int iommu_bio_merge __read_mostly = 0; -EXPORT_SYMBOL(iommu_bio_merge); - -dma_addr_t bad_dma_address __read_mostly = 0; -EXPORT_SYMBOL(bad_dma_address); - -/* Dummy device used for NULL arguments (normally ISA). Better would - be probably a smaller DMA mask, but this is bug-to-bug compatible - to older i386. */ -struct device fallback_dev = { - .bus_id = "fallback device", - .coherent_dma_mask = DMA_32BIT_MASK, - .dma_mask = &fallback_dev.coherent_dma_mask, +/* + * This variable becomes 1 if iommu=pt is passed on the kernel command line. + * If this variable is 1, IOMMU implementations do no DMA translation for + * devices and allow every device to access to whole physical memory. This is + * useful if a user wants to use an IOMMU only for KVM device assignment to + * guests and not for driver dma translation. + */ +int iommu_pass_through __read_mostly; + +extern struct iommu_table_entry __iommu_table[], __iommu_table_end[]; + +/* Dummy device used for NULL arguments (normally ISA). */ +struct device x86_dma_fallback_dev = { + .init_name = "fallback device", + .coherent_dma_mask = ISA_DMA_BIT_MASK, + .dma_mask = &x86_dma_fallback_dev.coherent_dma_mask, }; +EXPORT_SYMBOL(x86_dma_fallback_dev); + +/* Number of entries preallocated for DMA-API debugging */ +#define PREALLOC_DMA_DEBUG_ENTRIES 65536 int dma_set_mask(struct device *dev, u64 mask) { @@ -58,84 +69,80 @@ int dma_set_mask(struct device *dev, u64 mask) } EXPORT_SYMBOL(dma_set_mask); -#ifdef CONFIG_X86_64 -static __initdata void *dma32_bootmem_ptr; -static unsigned long dma32_bootmem_size __initdata = (128ULL<<20); - -static int __init parse_dma32_size_opt(char *p) -{ - if (!p) - return -EINVAL; - dma32_bootmem_size = memparse(p, &p); - return 0; -} -early_param("dma32_size", parse_dma32_size_opt); - -void __init dma32_reserve_bootmem(void) -{ - unsigned long size, align; - if (max_pfn <= MAX_DMA32_PFN) - return; - - /* - * check aperture_64.c allocate_aperture() for reason about - * using 512M as goal - */ - align = 64ULL<<20; - size = round_up(dma32_bootmem_size, align); - dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align, - 512ULL<<20); - if (dma32_bootmem_ptr) - dma32_bootmem_size = size; - else - dma32_bootmem_size = 0; -} -static void __init dma32_free_bootmem(void) +void __init pci_iommu_alloc(void) { - - if (max_pfn <= MAX_DMA32_PFN) - return; - - if (!dma32_bootmem_ptr) - return; - - free_bootmem(__pa(dma32_bootmem_ptr), dma32_bootmem_size); - - dma32_bootmem_ptr = NULL; - dma32_bootmem_size = 0; + struct iommu_table_entry *p; + + sort_iommu_table(__iommu_table, __iommu_table_end); + check_iommu_entries(__iommu_table, __iommu_table_end); + + for (p = __iommu_table; p < __iommu_table_end; p++) { + if (p && p->detect && p->detect() > 0) { + p->flags |= IOMMU_DETECTED; + if (p->early_init) + p->early_init(); + if (p->flags & IOMMU_FINISH_IF_DETECTED) + break; + } + } } - -void __init pci_iommu_alloc(void) +void *dma_generic_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_addr, gfp_t flag, + struct dma_attrs *attrs) { - /* free the range so iommu could get some range less than 4G */ - dma32_free_bootmem(); - /* - * The order of these functions is important for - * fall-back/fail-over reasons - */ - gart_iommu_hole_init(); - - detect_calgary(); + unsigned long dma_mask; + struct page *page; + unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; + dma_addr_t addr; + + dma_mask = dma_alloc_coherent_mask(dev, flag); + + flag &= ~__GFP_ZERO; +again: + page = NULL; + /* CMA can be used only in the context which permits sleeping */ + if (flag & __GFP_WAIT) { + page = dma_alloc_from_contiguous(dev, count, get_order(size)); + if (page && page_to_phys(page) + size > dma_mask) { + dma_release_from_contiguous(dev, page, count); + page = NULL; + } + } + /* fallback */ + if (!page) + page = alloc_pages_node(dev_to_node(dev), flag, get_order(size)); + if (!page) + return NULL; - detect_intel_iommu(); + addr = page_to_phys(page); + if (addr + size > dma_mask) { + __free_pages(page, get_order(size)); - amd_iommu_detect(); + if (dma_mask < DMA_BIT_MASK(32) && !(flag & GFP_DMA)) { + flag = (flag & ~GFP_DMA32) | GFP_DMA; + goto again; + } - pci_swiotlb_init(); + return NULL; + } + memset(page_address(page), 0, size); + *dma_addr = addr; + return page_address(page); } -unsigned long iommu_num_pages(unsigned long addr, unsigned long len) +void dma_generic_free_coherent(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_addr, struct dma_attrs *attrs) { - unsigned long size = roundup((addr & ~PAGE_MASK) + len, PAGE_SIZE); + unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; + struct page *page = virt_to_page(vaddr); - return size >> PAGE_SHIFT; + if (!dma_release_from_contiguous(dev, page, count)) + free_pages((unsigned long)vaddr, get_order(size)); } -EXPORT_SYMBOL(iommu_num_pages); -#endif /* - * See <Documentation/x86_64/boot-options.txt> for the iommu kernel parameter - * documentation. + * See <Documentation/x86/x86_64/boot-options.txt> for the iommu kernel + * parameter documentation. */ static __init int iommu_setup(char *p) { @@ -156,7 +163,6 @@ static __init int iommu_setup(char *p) } if (!strncmp(p, "biomerge", 8)) { - iommu_bio_merge = 4096; iommu_merge = 1; force_iommu = 1; } @@ -175,7 +181,7 @@ static __init int iommu_setup(char *p) if (!strncmp(p, "allowdac", 8)) forbid_dac = 0; if (!strncmp(p, "nodac", 5)) - forbid_dac = -1; + forbid_dac = 1; if (!strncmp(p, "usedac", 6)) { forbid_dac = -1; return 1; @@ -184,6 +190,8 @@ static __init int iommu_setup(char *p) if (!strncmp(p, "soft", 4)) swiotlb = 1; #endif + if (!strncmp(p, "pt", 2)) + iommu_pass_through = 1; gart_parse_options(p); @@ -202,7 +210,7 @@ early_param("iommu", iommu_setup); int dma_supported(struct device *dev, u64 mask) { - struct dma_mapping_ops *ops = get_dma_ops(dev); + struct dma_map_ops *ops = get_dma_ops(dev); #ifdef CONFIG_PCI if (mask > 0xffffffff && forbid_dac > 0) { @@ -217,7 +225,7 @@ int dma_supported(struct device *dev, u64 mask) /* Copied from i386. Doesn't make much sense, because it will only work for pci_alloc_coherent. The caller just has to use GFP_DMA in this case. */ - if (mask < DMA_24BIT_MASK) + if (mask < DMA_BIT_MASK(24)) return 0; /* Tell the device to use SAC when IOMMU force is on. This @@ -232,7 +240,7 @@ int dma_supported(struct device *dev, u64 mask) SAC for these. Assume all masks <= 40 bits are of this type. Normally this doesn't make any difference, but gives more gentle handling of IOMMU overflow. */ - if (iommu_sac_force && (mask >= DMA_40BIT_MASK)) { + if (iommu_sac_force && (mask >= DMA_BIT_MASK(40))) { dev_info(dev, "Force SAC with mask %Lx\n", mask); return 0; } @@ -241,178 +249,36 @@ int dma_supported(struct device *dev, u64 mask) } EXPORT_SYMBOL(dma_supported); -/* Allocate DMA memory on node near device */ -static noinline struct page * -dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order) -{ - int node; - - node = dev_to_node(dev); - - return alloc_pages_node(node, gfp, order); -} - -/* - * Allocate memory for a coherent mapping. - */ -void * -dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, - gfp_t gfp) +static int __init pci_iommu_init(void) { - struct dma_mapping_ops *ops = get_dma_ops(dev); - void *memory = NULL; - struct page *page; - unsigned long dma_mask = 0; - dma_addr_t bus; - int noretry = 0; - - /* ignore region specifiers */ - gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); - - if (dma_alloc_from_coherent(dev, size, dma_handle, &memory)) - return memory; - - if (!dev) { - dev = &fallback_dev; - gfp |= GFP_DMA; - } - dma_mask = dev->coherent_dma_mask; - if (dma_mask == 0) - dma_mask = (gfp & GFP_DMA) ? DMA_24BIT_MASK : DMA_32BIT_MASK; - - /* Device not DMA able */ - if (dev->dma_mask == NULL) - return NULL; + struct iommu_table_entry *p; + dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); - /* Don't invoke OOM killer or retry in lower 16MB DMA zone */ - if (gfp & __GFP_DMA) - noretry = 1; - -#ifdef CONFIG_X86_64 - /* Why <=? Even when the mask is smaller than 4GB it is often - larger than 16MB and in this case we have a chance of - finding fitting memory in the next higher zone first. If - not retry with true GFP_DMA. -AK */ - if (dma_mask <= DMA_32BIT_MASK && !(gfp & GFP_DMA)) { - gfp |= GFP_DMA32; - if (dma_mask < DMA_32BIT_MASK) - noretry = 1; - } +#ifdef CONFIG_PCI + dma_debug_add_bus(&pci_bus_type); #endif + x86_init.iommu.iommu_init(); - again: - page = dma_alloc_pages(dev, - noretry ? gfp | __GFP_NORETRY : gfp, get_order(size)); - if (page == NULL) - return NULL; - - { - int high, mmu; - bus = page_to_phys(page); - memory = page_address(page); - high = (bus + size) >= dma_mask; - mmu = high; - if (force_iommu && !(gfp & GFP_DMA)) - mmu = 1; - else if (high) { - free_pages((unsigned long)memory, - get_order(size)); - - /* Don't use the 16MB ZONE_DMA unless absolutely - needed. It's better to use remapping first. */ - if (dma_mask < DMA_32BIT_MASK && !(gfp & GFP_DMA)) { - gfp = (gfp & ~GFP_DMA32) | GFP_DMA; - goto again; - } - - /* Let low level make its own zone decisions */ - gfp &= ~(GFP_DMA32|GFP_DMA); - - if (ops->alloc_coherent) - return ops->alloc_coherent(dev, size, - dma_handle, gfp); - return NULL; - } - - memset(memory, 0, size); - if (!mmu) { - *dma_handle = bus; - return memory; - } - } - - if (ops->alloc_coherent) { - free_pages((unsigned long)memory, get_order(size)); - gfp &= ~(GFP_DMA|GFP_DMA32); - return ops->alloc_coherent(dev, size, dma_handle, gfp); + for (p = __iommu_table; p < __iommu_table_end; p++) { + if (p && (p->flags & IOMMU_DETECTED) && p->late_init) + p->late_init(); } - if (ops->map_simple) { - *dma_handle = ops->map_simple(dev, virt_to_phys(memory), - size, - PCI_DMA_BIDIRECTIONAL); - if (*dma_handle != bad_dma_address) - return memory; - } - - if (panic_on_overflow) - panic("dma_alloc_coherent: IOMMU overflow by %lu bytes\n", - (unsigned long)size); - free_pages((unsigned long)memory, get_order(size)); - return NULL; -} -EXPORT_SYMBOL(dma_alloc_coherent); - -/* - * Unmap coherent memory. - * The caller must ensure that the device has finished accessing the mapping. - */ -void dma_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t bus) -{ - struct dma_mapping_ops *ops = get_dma_ops(dev); - - int order = get_order(size); - WARN_ON(irqs_disabled()); /* for portability */ - if (dma_release_from_coherent(dev, order, vaddr)) - return; - if (ops->unmap_single) - ops->unmap_single(dev, bus, size, 0); - free_pages((unsigned long)vaddr, order); -} -EXPORT_SYMBOL(dma_free_coherent); - -static int __init pci_iommu_init(void) -{ - calgary_iommu_init(); - - intel_iommu_init(); - - amd_iommu_init(); - - gart_iommu_init(); - - no_iommu_init(); return 0; } - -void pci_iommu_shutdown(void) -{ - gart_iommu_shutdown(); -} /* Must execute after PCI subsystem */ -fs_initcall(pci_iommu_init); +rootfs_initcall(pci_iommu_init); #ifdef CONFIG_PCI /* Many VIA bridges seem to corrupt data for DAC. Disable it here */ -static __devinit void via_no_dac(struct pci_dev *dev) +static void via_no_dac(struct pci_dev *dev) { - if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) { - printk(KERN_INFO "PCI: VIA PCI bridge detected." - "Disabling DAC.\n"); + if (forbid_dac == 0) { + dev_info(&dev->dev, "disabling DAC on VIA PCI bridge\n"); forbid_dac = 1; } } -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac); +DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, + PCI_CLASS_BRIDGE_PCI, 8, via_no_dac); #endif |
