diff options
Diffstat (limited to 'arch/arm/mm/dma-mapping.c')
| -rw-r--r-- | arch/arm/mm/dma-mapping.c | 1379 |
1 files changed, 894 insertions, 485 deletions
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 4044abcf6f9..1f88db06b13 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -9,6 +9,7 @@ * * DMA uncached mapping support. */ +#include <linux/bootmem.h> #include <linux/module.h> #include <linux/mm.h> #include <linux/gfp.h> @@ -22,13 +23,14 @@ #include <linux/memblock.h> #include <linux/slab.h> #include <linux/iommu.h> +#include <linux/io.h> #include <linux/vmalloc.h> +#include <linux/sizes.h> #include <asm/memory.h> #include <asm/highmem.h> #include <asm/cacheflush.h> #include <asm/tlbflush.h> -#include <asm/sizes.h> #include <asm/mach/arch.h> #include <asm/dma-iommu.h> #include <asm/mach/map.h> @@ -72,11 +74,18 @@ static dma_addr_t arm_dma_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs) { - if (!arch_is_coherent()) + if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) __dma_page_cpu_to_dev(page, offset, size, dir); return pfn_to_dma(dev, page_to_pfn(page)) + offset; } +static dma_addr_t arm_coherent_dma_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + return pfn_to_dma(dev, page_to_pfn(page)) + offset; +} + /** * arm_dma_unmap_page - unmap a buffer previously mapped through dma_map_page() * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices @@ -95,7 +104,7 @@ static void arm_dma_unmap_page(struct device *dev, dma_addr_t handle, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs) { - if (!arch_is_coherent()) + if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) __dma_page_dev_to_cpu(pfn_to_page(dma_to_pfn(dev, handle)), handle & ~PAGE_MASK, size, dir); } @@ -105,8 +114,7 @@ static void arm_dma_sync_single_for_cpu(struct device *dev, { unsigned int offset = handle & (PAGE_SIZE - 1); struct page *page = pfn_to_page(dma_to_pfn(dev, handle-offset)); - if (!arch_is_coherent()) - __dma_page_dev_to_cpu(page, offset, size, dir); + __dma_page_dev_to_cpu(page, offset, size, dir); } static void arm_dma_sync_single_for_device(struct device *dev, @@ -114,16 +122,14 @@ static void arm_dma_sync_single_for_device(struct device *dev, { unsigned int offset = handle & (PAGE_SIZE - 1); struct page *page = pfn_to_page(dma_to_pfn(dev, handle-offset)); - if (!arch_is_coherent()) - __dma_page_cpu_to_dev(page, offset, size, dir); + __dma_page_cpu_to_dev(page, offset, size, dir); } -static int arm_dma_set_mask(struct device *dev, u64 dma_mask); - struct dma_map_ops arm_dma_ops = { .alloc = arm_dma_alloc, .free = arm_dma_free, .mmap = arm_dma_mmap, + .get_sgtable = arm_dma_get_sgtable, .map_page = arm_dma_map_page, .unmap_page = arm_dma_unmap_page, .map_sg = arm_dma_map_sg, @@ -136,9 +142,63 @@ struct dma_map_ops arm_dma_ops = { }; EXPORT_SYMBOL(arm_dma_ops); +static void *arm_coherent_dma_alloc(struct device *dev, size_t size, + dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs); +static void arm_coherent_dma_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t handle, struct dma_attrs *attrs); + +struct dma_map_ops arm_coherent_dma_ops = { + .alloc = arm_coherent_dma_alloc, + .free = arm_coherent_dma_free, + .mmap = arm_dma_mmap, + .get_sgtable = arm_dma_get_sgtable, + .map_page = arm_coherent_dma_map_page, + .map_sg = arm_dma_map_sg, + .set_dma_mask = arm_dma_set_mask, +}; +EXPORT_SYMBOL(arm_coherent_dma_ops); + +static int __dma_supported(struct device *dev, u64 mask, bool warn) +{ + unsigned long max_dma_pfn; + + /* + * If the mask allows for more memory than we can address, + * and we actually have that much memory, then we must + * indicate that DMA to this device is not supported. + */ + if (sizeof(mask) != sizeof(dma_addr_t) && + mask > (dma_addr_t)~0 && + dma_to_pfn(dev, ~0) < max_pfn) { + if (warn) { + dev_warn(dev, "Coherent DMA mask %#llx is larger than dma_addr_t allows\n", + mask); + dev_warn(dev, "Driver did not use or check the return value from dma_set_coherent_mask()?\n"); + } + return 0; + } + + max_dma_pfn = min(max_pfn, arm_dma_pfn_limit); + + /* + * Translate the device's DMA mask to a PFN limit. This + * PFN number includes the page which we can DMA to. + */ + if (dma_to_pfn(dev, mask) < max_dma_pfn) { + if (warn) + dev_warn(dev, "Coherent DMA mask %#llx (pfn %#lx-%#lx) covers a smaller range of system memory than the DMA zone pfn 0x0-%#lx\n", + mask, + dma_to_pfn(dev, 0), dma_to_pfn(dev, mask) + 1, + max_dma_pfn + 1); + return 0; + } + + return 1; +} + static u64 get_coherent_dma_mask(struct device *dev) { - u64 mask = (u64)arm_dma_limit; + u64 mask = (u64)DMA_BIT_MASK(32); if (dev) { mask = dev->coherent_dma_mask; @@ -152,12 +212,8 @@ static u64 get_coherent_dma_mask(struct device *dev) return 0; } - if ((~mask) & (u64)arm_dma_limit) { - dev_warn(dev, "coherent DMA mask %#llx is smaller " - "than system GFP_DMA mask %#llx\n", - mask, (u64)arm_dma_limit); + if (!__dma_supported(dev, mask, true)) return 0; - } } return mask; @@ -165,13 +221,24 @@ static u64 get_coherent_dma_mask(struct device *dev) static void __dma_clear_buffer(struct page *page, size_t size) { - void *ptr; /* * Ensure that the allocated pages are zeroed, and that any data * lurking in the kernel direct-mapped region is invalidated. */ - ptr = page_address(page); - if (ptr) { + if (PageHighMem(page)) { + phys_addr_t base = __pfn_to_phys(page_to_pfn(page)); + phys_addr_t end = base + size; + while (size > 0) { + void *ptr = kmap_atomic(page); + memset(ptr, 0, PAGE_SIZE); + dmac_flush_range(ptr, ptr + PAGE_SIZE); + kunmap_atomic(ptr); + page++; + size -= PAGE_SIZE; + } + outer_flush_range(base, end); + } else { + void *ptr = page_address(page); memset(ptr, 0, size); dmac_flush_range(ptr, ptr + size); outer_flush_range(__pa(ptr), __pa(ptr) + size); @@ -218,147 +285,145 @@ static void __dma_free_buffer(struct page *page, size_t size) #ifdef CONFIG_MMU -#define CONSISTENT_OFFSET(x) (((unsigned long)(x) - consistent_base) >> PAGE_SHIFT) -#define CONSISTENT_PTE_INDEX(x) (((unsigned long)(x) - consistent_base) >> PMD_SHIFT) - -/* - * These are the page tables (2MB each) covering uncached, DMA consistent allocations - */ -static pte_t **consistent_pte; - -#define DEFAULT_CONSISTENT_DMA_SIZE SZ_2M +static void *__alloc_from_contiguous(struct device *dev, size_t size, + pgprot_t prot, struct page **ret_page, + const void *caller); -static unsigned long consistent_base = CONSISTENT_END - DEFAULT_CONSISTENT_DMA_SIZE; +static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp, + pgprot_t prot, struct page **ret_page, + const void *caller); -void __init init_consistent_dma_size(unsigned long size) +static void * +__dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot, + const void *caller) { - unsigned long base = CONSISTENT_END - ALIGN(size, SZ_2M); + struct vm_struct *area; + unsigned long addr; - BUG_ON(consistent_pte); /* Check we're called before DMA region init */ - BUG_ON(base < VMALLOC_END); + /* + * DMA allocation can be mapped to user space, so lets + * set VM_USERMAP flags too. + */ + area = get_vm_area_caller(size, VM_ARM_DMA_CONSISTENT | VM_USERMAP, + caller); + if (!area) + return NULL; + addr = (unsigned long)area->addr; + area->phys_addr = __pfn_to_phys(page_to_pfn(page)); - /* Grow region to accommodate specified size */ - if (base < consistent_base) - consistent_base = base; + if (ioremap_page_range(addr, addr + size, area->phys_addr, prot)) { + vunmap((void *)addr); + return NULL; + } + return (void *)addr; } -#include "vmregion.h" - -static struct arm_vmregion_head consistent_head = { - .vm_lock = __SPIN_LOCK_UNLOCKED(&consistent_head.vm_lock), - .vm_list = LIST_HEAD_INIT(consistent_head.vm_list), - .vm_end = CONSISTENT_END, -}; - -#ifdef CONFIG_HUGETLB_PAGE -#error ARM Coherent DMA allocator does not (yet) support huge TLB -#endif - -/* - * Initialise the consistent memory allocation. - */ -static int __init consistent_init(void) +static void __dma_free_remap(void *cpu_addr, size_t size) { - int ret = 0; - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - int i = 0; - unsigned long base = consistent_base; - unsigned long num_ptes = (CONSISTENT_END - base) >> PMD_SHIFT; - - if (IS_ENABLED(CONFIG_CMA) && !IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)) - return 0; - - consistent_pte = kmalloc(num_ptes * sizeof(pte_t), GFP_KERNEL); - if (!consistent_pte) { - pr_err("%s: no memory\n", __func__); - return -ENOMEM; + unsigned int flags = VM_ARM_DMA_CONSISTENT | VM_USERMAP; + struct vm_struct *area = find_vm_area(cpu_addr); + if (!area || (area->flags & flags) != flags) { + WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr); + return; } - - pr_debug("DMA memory: 0x%08lx - 0x%08lx:\n", base, CONSISTENT_END); - consistent_head.vm_start = base; - - do { - pgd = pgd_offset(&init_mm, base); - - pud = pud_alloc(&init_mm, pgd, base); - if (!pud) { - pr_err("%s: no pud tables\n", __func__); - ret = -ENOMEM; - break; - } - - pmd = pmd_alloc(&init_mm, pud, base); - if (!pmd) { - pr_err("%s: no pmd tables\n", __func__); - ret = -ENOMEM; - break; - } - WARN_ON(!pmd_none(*pmd)); - - pte = pte_alloc_kernel(pmd, base); - if (!pte) { - pr_err("%s: no pte tables\n", __func__); - ret = -ENOMEM; - break; - } - - consistent_pte[i++] = pte; - base += PMD_SIZE; - } while (base < CONSISTENT_END); - - return ret; + unmap_kernel_range((unsigned long)cpu_addr, size); + vunmap(cpu_addr); } -core_initcall(consistent_init); -static void *__alloc_from_contiguous(struct device *dev, size_t size, - pgprot_t prot, struct page **ret_page); +#define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K -static struct arm_vmregion_head coherent_head = { - .vm_lock = __SPIN_LOCK_UNLOCKED(&coherent_head.vm_lock), - .vm_list = LIST_HEAD_INIT(coherent_head.vm_list), +struct dma_pool { + size_t size; + spinlock_t lock; + unsigned long *bitmap; + unsigned long nr_pages; + void *vaddr; + struct page **pages; }; -static size_t coherent_pool_size = DEFAULT_CONSISTENT_DMA_SIZE / 8; +static struct dma_pool atomic_pool = { + .size = DEFAULT_DMA_COHERENT_POOL_SIZE, +}; static int __init early_coherent_pool(char *p) { - coherent_pool_size = memparse(p, &p); + atomic_pool.size = memparse(p, &p); return 0; } early_param("coherent_pool", early_coherent_pool); +void __init init_dma_coherent_pool_size(unsigned long size) +{ + /* + * Catch any attempt to set the pool size too late. + */ + BUG_ON(atomic_pool.vaddr); + + /* + * Set architecture specific coherent pool size only if + * it has not been changed by kernel command line parameter. + */ + if (atomic_pool.size == DEFAULT_DMA_COHERENT_POOL_SIZE) + atomic_pool.size = size; +} + /* * Initialise the coherent pool for atomic allocations. */ -static int __init coherent_init(void) +static int __init atomic_pool_init(void) { - pgprot_t prot = pgprot_dmacoherent(pgprot_kernel); - size_t size = coherent_pool_size; + struct dma_pool *pool = &atomic_pool; + pgprot_t prot = pgprot_dmacoherent(PAGE_KERNEL); + gfp_t gfp = GFP_KERNEL | GFP_DMA; + unsigned long nr_pages = pool->size >> PAGE_SHIFT; + unsigned long *bitmap; struct page *page; + struct page **pages; void *ptr; + int bitmap_size = BITS_TO_LONGS(nr_pages) * sizeof(long); - if (!IS_ENABLED(CONFIG_CMA)) - return 0; + bitmap = kzalloc(bitmap_size, GFP_KERNEL); + if (!bitmap) + goto no_bitmap; + + pages = kzalloc(nr_pages * sizeof(struct page *), GFP_KERNEL); + if (!pages) + goto no_pages; - ptr = __alloc_from_contiguous(NULL, size, prot, &page); + if (dev_get_cma_area(NULL)) + ptr = __alloc_from_contiguous(NULL, pool->size, prot, &page, + atomic_pool_init); + else + ptr = __alloc_remap_buffer(NULL, pool->size, gfp, prot, &page, + atomic_pool_init); if (ptr) { - coherent_head.vm_start = (unsigned long) ptr; - coherent_head.vm_end = (unsigned long) ptr + size; - printk(KERN_INFO "DMA: preallocated %u KiB pool for atomic coherent allocations\n", - (unsigned)size / 1024); + int i; + + for (i = 0; i < nr_pages; i++) + pages[i] = page + i; + + spin_lock_init(&pool->lock); + pool->vaddr = ptr; + pool->pages = pages; + pool->bitmap = bitmap; + pool->nr_pages = nr_pages; + pr_info("DMA: preallocated %u KiB pool for atomic coherent allocations\n", + (unsigned)pool->size / 1024); return 0; } - printk(KERN_ERR "DMA: failed to allocate %u KiB pool for atomic coherent allocation\n", - (unsigned)size / 1024); + + kfree(pages); +no_pages: + kfree(bitmap); +no_bitmap: + pr_err("DMA: failed to allocate %u KiB pool for atomic coherent allocation\n", + (unsigned)pool->size / 1024); return -ENOMEM; } /* * CMA is activated by core_initcall, so we must be called after it. */ -postcore_initcall(coherent_init); +postcore_initcall(atomic_pool_init); struct dma_contig_early_reserve { phys_addr_t base; @@ -388,7 +453,7 @@ void __init dma_contiguous_remap(void) if (end > arm_lowmem_limit) end = arm_lowmem_limit; if (start >= end) - return; + continue; map.pfn = __phys_to_pfn(start); map.virtual = __phys_to_virt(start); @@ -396,120 +461,23 @@ void __init dma_contiguous_remap(void) map.type = MT_MEMORY_DMA_READY; /* - * Clear previous low-memory mapping + * Clear previous low-memory mapping to ensure that the + * TLB does not see any conflicting entries, then flush + * the TLB of the old entries before creating new mappings. + * + * This ensures that any speculatively loaded TLB entries + * (even though they may be rare) can not cause any problems, + * and ensures that this code is architecturally compliant. */ for (addr = __phys_to_virt(start); addr < __phys_to_virt(end); addr += PMD_SIZE) pmd_clear(pmd_off_k(addr)); - iotable_init(&map, 1); - } -} - -static void * -__dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot, - const void *caller) -{ - struct arm_vmregion *c; - size_t align; - int bit; - - if (!consistent_pte) { - pr_err("%s: not initialised\n", __func__); - dump_stack(); - return NULL; - } - - /* - * Align the virtual region allocation - maximum alignment is - * a section size, minimum is a page size. This helps reduce - * fragmentation of the DMA space, and also prevents allocations - * smaller than a section from crossing a section boundary. - */ - bit = fls(size - 1); - if (bit > SECTION_SHIFT) - bit = SECTION_SHIFT; - align = 1 << bit; - - /* - * Allocate a virtual address in the consistent mapping region. - */ - c = arm_vmregion_alloc(&consistent_head, align, size, - gfp & ~(__GFP_DMA | __GFP_HIGHMEM), caller); - if (c) { - pte_t *pte; - int idx = CONSISTENT_PTE_INDEX(c->vm_start); - u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1); - - pte = consistent_pte[idx] + off; - c->priv = page; - - do { - BUG_ON(!pte_none(*pte)); - - set_pte_ext(pte, mk_pte(page, prot), 0); - page++; - pte++; - off++; - if (off >= PTRS_PER_PTE) { - off = 0; - pte = consistent_pte[++idx]; - } - } while (size -= PAGE_SIZE); - - dsb(); + flush_tlb_kernel_range(__phys_to_virt(start), + __phys_to_virt(end)); - return (void *)c->vm_start; + iotable_init(&map, 1); } - return NULL; -} - -static void __dma_free_remap(void *cpu_addr, size_t size) -{ - struct arm_vmregion *c; - unsigned long addr; - pte_t *ptep; - int idx; - u32 off; - - c = arm_vmregion_find_remove(&consistent_head, (unsigned long)cpu_addr); - if (!c) { - pr_err("%s: trying to free invalid coherent area: %p\n", - __func__, cpu_addr); - dump_stack(); - return; - } - - if ((c->vm_end - c->vm_start) != size) { - pr_err("%s: freeing wrong coherent size (%ld != %d)\n", - __func__, c->vm_end - c->vm_start, size); - dump_stack(); - size = c->vm_end - c->vm_start; - } - - idx = CONSISTENT_PTE_INDEX(c->vm_start); - off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1); - ptep = consistent_pte[idx] + off; - addr = c->vm_start; - do { - pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep); - - ptep++; - addr += PAGE_SIZE; - off++; - if (off >= PTRS_PER_PTE) { - off = 0; - ptep = consistent_pte[++idx]; - } - - if (pte_none(pte) || !pte_present(pte)) - pr_crit("%s: bad page in kernel page table\n", - __func__); - } while (size -= PAGE_SIZE); - - flush_tlb_kernel_range(c->vm_start, c->vm_end); - - arm_vmregion_free(&consistent_head, c); } static int __dma_update_pte(pte_t *pte, pgtable_t token, unsigned long addr, @@ -528,7 +496,6 @@ static void __dma_remap(struct page *page, size_t size, pgprot_t prot) unsigned end = start + size; apply_to_page_range(&init_mm, start, size, __dma_update_pte, &prot); - dsb(); flush_tlb_kernel_range(start, end); } @@ -552,16 +519,17 @@ static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp, return ptr; } -static void *__alloc_from_pool(struct device *dev, size_t size, - struct page **ret_page, const void *caller) +static void *__alloc_from_pool(size_t size, struct page **ret_page) { - struct arm_vmregion *c; - size_t align; + struct dma_pool *pool = &atomic_pool; + unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; + unsigned int pageno; + unsigned long flags; + void *ptr = NULL; + unsigned long align_mask; - if (!coherent_head.vm_start) { - printk(KERN_ERR "%s: coherent pool not initialised!\n", - __func__); - dump_stack(); + if (!pool->vaddr) { + WARN(1, "coherent pool not initialised!\n"); return NULL; } @@ -570,61 +538,99 @@ static void *__alloc_from_pool(struct device *dev, size_t size, * small, so align them to their order in pages, minimum is a page * size. This helps reduce fragmentation of the DMA space. */ - align = PAGE_SIZE << get_order(size); - c = arm_vmregion_alloc(&coherent_head, align, size, 0, caller); - if (c) { - void *ptr = (void *)c->vm_start; - struct page *page = virt_to_page(ptr); - *ret_page = page; - return ptr; + align_mask = (1 << get_order(size)) - 1; + + spin_lock_irqsave(&pool->lock, flags); + pageno = bitmap_find_next_zero_area(pool->bitmap, pool->nr_pages, + 0, count, align_mask); + if (pageno < pool->nr_pages) { + bitmap_set(pool->bitmap, pageno, count); + ptr = pool->vaddr + PAGE_SIZE * pageno; + *ret_page = pool->pages[pageno]; + } else { + pr_err_once("ERROR: %u KiB atomic DMA coherent pool is too small!\n" + "Please increase it with coherent_pool= kernel parameter!\n", + (unsigned)pool->size / 1024); } - return NULL; + spin_unlock_irqrestore(&pool->lock, flags); + + return ptr; } -static int __free_from_pool(void *cpu_addr, size_t size) +static bool __in_atomic_pool(void *start, size_t size) { - unsigned long start = (unsigned long)cpu_addr; - unsigned long end = start + size; - struct arm_vmregion *c; + struct dma_pool *pool = &atomic_pool; + void *end = start + size; + void *pool_start = pool->vaddr; + void *pool_end = pool->vaddr + pool->size; + + if (start < pool_start || start >= pool_end) + return false; + + if (end <= pool_end) + return true; + + WARN(1, "Wrong coherent size(%p-%p) from atomic pool(%p-%p)\n", + start, end - 1, pool_start, pool_end - 1); + + return false; +} + +static int __free_from_pool(void *start, size_t size) +{ + struct dma_pool *pool = &atomic_pool; + unsigned long pageno, count; + unsigned long flags; - if (start < coherent_head.vm_start || end > coherent_head.vm_end) + if (!__in_atomic_pool(start, size)) return 0; - c = arm_vmregion_find_remove(&coherent_head, (unsigned long)start); + pageno = (start - pool->vaddr) >> PAGE_SHIFT; + count = size >> PAGE_SHIFT; - if ((c->vm_end - c->vm_start) != size) { - printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n", - __func__, c->vm_end - c->vm_start, size); - dump_stack(); - size = c->vm_end - c->vm_start; - } + spin_lock_irqsave(&pool->lock, flags); + bitmap_clear(pool->bitmap, pageno, count); + spin_unlock_irqrestore(&pool->lock, flags); - arm_vmregion_free(&coherent_head, c); return 1; } static void *__alloc_from_contiguous(struct device *dev, size_t size, - pgprot_t prot, struct page **ret_page) + pgprot_t prot, struct page **ret_page, + const void *caller) { unsigned long order = get_order(size); size_t count = size >> PAGE_SHIFT; struct page *page; + void *ptr; page = dma_alloc_from_contiguous(dev, count, order); if (!page) return NULL; __dma_clear_buffer(page, size); - __dma_remap(page, size, prot); + if (PageHighMem(page)) { + ptr = __dma_alloc_remap(page, size, GFP_KERNEL, prot, caller); + if (!ptr) { + dma_release_from_contiguous(dev, page, count); + return NULL; + } + } else { + __dma_remap(page, size, prot); + ptr = page_address(page); + } *ret_page = page; - return page_address(page); + return ptr; } static void __free_from_contiguous(struct device *dev, struct page *page, - size_t size) + void *cpu_addr, size_t size) { - __dma_remap(page, size, pgprot_kernel); + if (PageHighMem(page)) + __dma_free_remap(cpu_addr, size); + else + __dma_remap(page, size, PAGE_KERNEL); dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT); } @@ -644,10 +650,10 @@ static inline pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot) #define __get_dma_pgprot(attrs, prot) __pgprot(0) #define __alloc_remap_buffer(dev, size, gfp, prot, ret, c) NULL -#define __alloc_from_pool(dev, size, ret_page, c) NULL -#define __alloc_from_contiguous(dev, size, prot, ret) NULL +#define __alloc_from_pool(size, ret_page) NULL +#define __alloc_from_contiguous(dev, size, prot, ret, c) NULL #define __free_from_pool(cpu_addr, size) 0 -#define __free_from_contiguous(dev, page, size) do { } while (0) +#define __free_from_contiguous(dev, page, cpu_addr, size) do { } while (0) #define __dma_free_remap(cpu_addr, size) do { } while (0) #endif /* CONFIG_MMU */ @@ -667,10 +673,10 @@ static void *__alloc_simple_buffer(struct device *dev, size_t size, gfp_t gfp, static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, - gfp_t gfp, pgprot_t prot, const void *caller) + gfp_t gfp, pgprot_t prot, bool is_coherent, const void *caller) { u64 mask = get_coherent_dma_mask(dev); - struct page *page; + struct page *page = NULL; void *addr; #ifdef CONFIG_DMA_API_DEBUG @@ -700,14 +706,14 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, *handle = DMA_ERROR_CODE; size = PAGE_ALIGN(size); - if (arch_is_coherent() || nommu()) + if (is_coherent || nommu()) addr = __alloc_simple_buffer(dev, size, gfp, &page); - else if (!IS_ENABLED(CONFIG_CMA)) + else if (!(gfp & __GFP_WAIT)) + addr = __alloc_from_pool(size, &page); + else if (!dev_get_cma_area(dev)) addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller); - else if (gfp & GFP_ATOMIC) - addr = __alloc_from_pool(dev, size, &page, caller); else - addr = __alloc_from_contiguous(dev, size, prot, &page); + addr = __alloc_from_contiguous(dev, size, prot, &page, caller); if (addr) *handle = pfn_to_dma(dev, page_to_pfn(page)); @@ -722,13 +728,26 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs) { - pgprot_t prot = __get_dma_pgprot(attrs, pgprot_kernel); + pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL); + void *memory; + + if (dma_alloc_from_coherent(dev, size, handle, &memory)) + return memory; + + return __dma_alloc(dev, size, handle, gfp, prot, false, + __builtin_return_address(0)); +} + +static void *arm_coherent_dma_alloc(struct device *dev, size_t size, + dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs) +{ + pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL); void *memory; if (dma_alloc_from_coherent(dev, size, handle, &memory)) return memory; - return __dma_alloc(dev, size, handle, gfp, prot, + return __dma_alloc(dev, size, handle, gfp, prot, true, __builtin_return_address(0)); } @@ -741,16 +760,22 @@ int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, { int ret = -ENXIO; #ifdef CONFIG_MMU + unsigned long nr_vma_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; unsigned long pfn = dma_to_pfn(dev, dma_addr); + unsigned long off = vma->vm_pgoff; + vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot); if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) return ret; - ret = remap_pfn_range(vma, vma->vm_start, - pfn + vma->vm_pgoff, - vma->vm_end - vma->vm_start, - vma->vm_page_prot); + if (off < nr_pages && nr_vma_pages <= (nr_pages - off)) { + ret = remap_pfn_range(vma, vma->vm_start, + pfn + off, + vma->vm_end - vma->vm_start, + vma->vm_page_prot); + } #endif /* CONFIG_MMU */ return ret; @@ -759,8 +784,9 @@ int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, /* * Free a buffer as defined by the above mapping. */ -void arm_dma_free(struct device *dev, size_t size, void *cpu_addr, - dma_addr_t handle, struct dma_attrs *attrs) +static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t handle, struct dma_attrs *attrs, + bool is_coherent) { struct page *page = pfn_to_page(dma_to_pfn(dev, handle)); @@ -769,62 +795,92 @@ void arm_dma_free(struct device *dev, size_t size, void *cpu_addr, size = PAGE_ALIGN(size); - if (arch_is_coherent() || nommu()) { + if (is_coherent || nommu()) { __dma_free_buffer(page, size); - } else if (!IS_ENABLED(CONFIG_CMA)) { + } else if (__free_from_pool(cpu_addr, size)) { + return; + } else if (!dev_get_cma_area(dev)) { __dma_free_remap(cpu_addr, size); __dma_free_buffer(page, size); } else { - if (__free_from_pool(cpu_addr, size)) - return; /* * Non-atomic allocations cannot be freed with IRQs disabled */ WARN_ON(irqs_disabled()); - __free_from_contiguous(dev, page, size); + __free_from_contiguous(dev, page, cpu_addr, size); } } +void arm_dma_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t handle, struct dma_attrs *attrs) +{ + __arm_dma_free(dev, size, cpu_addr, handle, attrs, false); +} + +static void arm_coherent_dma_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t handle, struct dma_attrs *attrs) +{ + __arm_dma_free(dev, size, cpu_addr, handle, attrs, true); +} + +int arm_dma_get_sgtable(struct device *dev, struct sg_table *sgt, + void *cpu_addr, dma_addr_t handle, size_t size, + struct dma_attrs *attrs) +{ + struct page *page = pfn_to_page(dma_to_pfn(dev, handle)); + int ret; + + ret = sg_alloc_table(sgt, 1, GFP_KERNEL); + if (unlikely(ret)) + return ret; + + sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0); + return 0; +} + static void dma_cache_maint_page(struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, void (*op)(const void *, size_t, int)) { + unsigned long pfn; + size_t left = size; + + pfn = page_to_pfn(page) + offset / PAGE_SIZE; + offset %= PAGE_SIZE; + /* * A single sg entry may refer to multiple physically contiguous * pages. But we still need to process highmem pages individually. * If highmem is not configured then the bulk of this loop gets * optimized out. */ - size_t left = size; do { size_t len = left; void *vaddr; + page = pfn_to_page(pfn); + if (PageHighMem(page)) { - if (len + offset > PAGE_SIZE) { - if (offset >= PAGE_SIZE) { - page += offset / PAGE_SIZE; - offset %= PAGE_SIZE; - } + if (len + offset > PAGE_SIZE) len = PAGE_SIZE - offset; - } - vaddr = kmap_high_get(page); - if (vaddr) { - vaddr += offset; - op(vaddr, len, dir); - kunmap_high(page); - } else if (cache_is_vipt()) { - /* unmapped pages might still be cached */ + + if (cache_is_vipt_nonaliasing()) { vaddr = kmap_atomic(page); op(vaddr + offset, len, dir); kunmap_atomic(vaddr); + } else { + vaddr = kmap_high_get(page); + if (vaddr) { + op(vaddr + offset, len, dir); + kunmap_high(page); + } } } else { vaddr = page_address(page) + offset; op(vaddr, len, dir); } offset = 0; - page++; + pfn++; left -= len; } while (left); } @@ -838,7 +894,7 @@ static void dma_cache_maint_page(struct page *page, unsigned long offset, static void __dma_page_cpu_to_dev(struct page *page, unsigned long off, size_t size, enum dma_data_direction dir) { - unsigned long paddr; + phys_addr_t paddr; dma_cache_maint_page(page, off, size, dir, dmac_map_area); @@ -854,20 +910,35 @@ static void __dma_page_cpu_to_dev(struct page *page, unsigned long off, static void __dma_page_dev_to_cpu(struct page *page, unsigned long off, size_t size, enum dma_data_direction dir) { - unsigned long paddr = page_to_phys(page) + off; + phys_addr_t paddr = page_to_phys(page) + off; /* FIXME: non-speculating: not required */ - /* don't bother invalidating if DMA to device */ - if (dir != DMA_TO_DEVICE) + /* in any case, don't bother invalidating if DMA to device */ + if (dir != DMA_TO_DEVICE) { outer_inv_range(paddr, paddr + size); - dma_cache_maint_page(page, off, size, dir, dmac_unmap_area); + dma_cache_maint_page(page, off, size, dir, dmac_unmap_area); + } /* - * Mark the D-cache clean for this page to avoid extra flushing. + * Mark the D-cache clean for these pages to avoid extra flushing. */ - if (dir != DMA_TO_DEVICE && off == 0 && size >= PAGE_SIZE) - set_bit(PG_dcache_clean, &page->flags); + if (dir != DMA_TO_DEVICE && size >= PAGE_SIZE) { + unsigned long pfn; + size_t left = size; + + pfn = page_to_pfn(page) + off / PAGE_SIZE; + off %= PAGE_SIZE; + if (off) { + pfn++; + left -= PAGE_SIZE - off; + } + while (left >= PAGE_SIZE) { + page = pfn_to_page(pfn++); + set_bit(PG_dcache_clean, &page->flags); + left -= PAGE_SIZE; + } + } } /** @@ -978,13 +1049,11 @@ void arm_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, */ int dma_supported(struct device *dev, u64 mask) { - if (mask < (u64)arm_dma_limit) - return 0; - return 1; + return __dma_supported(dev, mask, false); } EXPORT_SYMBOL(dma_supported); -static int arm_dma_set_mask(struct device *dev, u64 dma_mask) +int arm_dma_set_mask(struct device *dev, u64 dma_mask) { if (!dev->dma_mask || !dma_supported(dev, dma_mask)) return -EIO; @@ -998,9 +1067,6 @@ static int arm_dma_set_mask(struct device *dev, u64 dma_mask) static int __init dma_debug_do_init(void) { -#ifdef CONFIG_MMU - arm_vmregion_create_proc("dma-mappings", &consistent_head); -#endif dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); return 0; } @@ -1010,49 +1076,103 @@ fs_initcall(dma_debug_do_init); /* IOMMU */ +static int extend_iommu_mapping(struct dma_iommu_mapping *mapping); + static inline dma_addr_t __alloc_iova(struct dma_iommu_mapping *mapping, size_t size) { unsigned int order = get_order(size); unsigned int align = 0; unsigned int count, start; + size_t mapping_size = mapping->bits << PAGE_SHIFT; unsigned long flags; + dma_addr_t iova; + int i; - count = ((PAGE_ALIGN(size) >> PAGE_SHIFT) + - (1 << mapping->order) - 1) >> mapping->order; + if (order > CONFIG_ARM_DMA_IOMMU_ALIGNMENT) + order = CONFIG_ARM_DMA_IOMMU_ALIGNMENT; - if (order > mapping->order) - align = (1 << (order - mapping->order)) - 1; + count = PAGE_ALIGN(size) >> PAGE_SHIFT; + align = (1 << order) - 1; spin_lock_irqsave(&mapping->lock, flags); - start = bitmap_find_next_zero_area(mapping->bitmap, mapping->bits, 0, - count, align); - if (start > mapping->bits) { - spin_unlock_irqrestore(&mapping->lock, flags); - return DMA_ERROR_CODE; + for (i = 0; i < mapping->nr_bitmaps; i++) { + start = bitmap_find_next_zero_area(mapping->bitmaps[i], + mapping->bits, 0, count, align); + + if (start > mapping->bits) + continue; + + bitmap_set(mapping->bitmaps[i], start, count); + break; } - bitmap_set(mapping->bitmap, start, count); + /* + * No unused range found. Try to extend the existing mapping + * and perform a second attempt to reserve an IO virtual + * address range of size bytes. + */ + if (i == mapping->nr_bitmaps) { + if (extend_iommu_mapping(mapping)) { + spin_unlock_irqrestore(&mapping->lock, flags); + return DMA_ERROR_CODE; + } + + start = bitmap_find_next_zero_area(mapping->bitmaps[i], + mapping->bits, 0, count, align); + + if (start > mapping->bits) { + spin_unlock_irqrestore(&mapping->lock, flags); + return DMA_ERROR_CODE; + } + + bitmap_set(mapping->bitmaps[i], start, count); + } spin_unlock_irqrestore(&mapping->lock, flags); - return mapping->base + (start << (mapping->order + PAGE_SHIFT)); + iova = mapping->base + (mapping_size * i); + iova += start << PAGE_SHIFT; + + return iova; } static inline void __free_iova(struct dma_iommu_mapping *mapping, dma_addr_t addr, size_t size) { - unsigned int start = (addr - mapping->base) >> - (mapping->order + PAGE_SHIFT); - unsigned int count = ((size >> PAGE_SHIFT) + - (1 << mapping->order) - 1) >> mapping->order; + unsigned int start, count; + size_t mapping_size = mapping->bits << PAGE_SHIFT; unsigned long flags; + dma_addr_t bitmap_base; + u32 bitmap_index; + + if (!size) + return; + + bitmap_index = (u32) (addr - mapping->base) / (u32) mapping_size; + BUG_ON(addr < mapping->base || bitmap_index > mapping->extensions); + + bitmap_base = mapping->base + mapping_size * bitmap_index; + + start = (addr - bitmap_base) >> PAGE_SHIFT; + + if (addr + size > bitmap_base + mapping_size) { + /* + * The address range to be freed reaches into the iova + * range of the next bitmap. This should not happen as + * we don't allow this in __alloc_iova (at the + * moment). + */ + BUG(); + } else + count = size >> PAGE_SHIFT; spin_lock_irqsave(&mapping->lock, flags); - bitmap_clear(mapping->bitmap, start, count); + bitmap_clear(mapping->bitmaps[bitmap_index], start, count); spin_unlock_irqrestore(&mapping->lock, flags); } -static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, gfp_t gfp) +static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, + gfp_t gfp, struct dma_attrs *attrs) { struct page **pages; int count = size >> PAGE_SHIFT; @@ -1066,20 +1186,43 @@ static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, gfp_t if (!pages) return NULL; + if (dma_get_attr(DMA_ATTR_FORCE_CONTIGUOUS, attrs)) + { + unsigned long order = get_order(size); + struct page *page; + + page = dma_alloc_from_contiguous(dev, count, order); + if (!page) + goto error; + + __dma_clear_buffer(page, size); + + for (i = 0; i < count; i++) + pages[i] = page + i; + + return pages; + } + + /* + * IOMMU can map any pages, so himem can also be used here + */ + gfp |= __GFP_NOWARN | __GFP_HIGHMEM; + while (count) { int j, order = __fls(count); - pages[i] = alloc_pages(gfp | __GFP_NOWARN, order); + pages[i] = alloc_pages(gfp, order); while (!pages[i] && order) - pages[i] = alloc_pages(gfp | __GFP_NOWARN, --order); + pages[i] = alloc_pages(gfp, --order); if (!pages[i]) goto error; - if (order) + if (order) { split_page(pages[i], order); - j = 1 << order; - while (--j) - pages[i + j] = pages[i] + j; + j = 1 << order; + while (--j) + pages[i + j] = pages[i] + j; + } __dma_clear_buffer(pages[i], PAGE_SIZE << order); i += 1 << order; @@ -1088,25 +1231,32 @@ static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, gfp_t return pages; error: - while (--i) + while (i--) if (pages[i]) __free_pages(pages[i], 0); - if (array_size < PAGE_SIZE) + if (array_size <= PAGE_SIZE) kfree(pages); else vfree(pages); return NULL; } -static int __iommu_free_buffer(struct device *dev, struct page **pages, size_t size) +static int __iommu_free_buffer(struct device *dev, struct page **pages, + size_t size, struct dma_attrs *attrs) { int count = size >> PAGE_SHIFT; int array_size = count * sizeof(struct page *); int i; - for (i = 0; i < count; i++) - if (pages[i]) - __free_pages(pages[i], 0); - if (array_size < PAGE_SIZE) + + if (dma_get_attr(DMA_ATTR_FORCE_CONTIGUOUS, attrs)) { + dma_release_from_contiguous(dev, pages[0], count); + } else { + for (i = 0; i < count; i++) + if (pages[i]) + __free_pages(pages[i], 0); + } + + if (array_size <= PAGE_SIZE) kfree(pages); else vfree(pages); @@ -1117,61 +1267,32 @@ static int __iommu_free_buffer(struct device *dev, struct page **pages, size_t s * Create a CPU mapping for a specified pages */ static void * -__iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t prot) +__iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t prot, + const void *caller) { - struct arm_vmregion *c; - size_t align; - size_t count = size >> PAGE_SHIFT; - int bit; + unsigned int i, nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; + struct vm_struct *area; + unsigned long p; - if (!consistent_pte[0]) { - pr_err("%s: not initialised\n", __func__); - dump_stack(); + area = get_vm_area_caller(size, VM_ARM_DMA_CONSISTENT | VM_USERMAP, + caller); + if (!area) return NULL; - } - /* - * Align the virtual region allocation - maximum alignment is - * a section size, minimum is a page size. This helps reduce - * fragmentation of the DMA space, and also prevents allocations - * smaller than a section from crossing a section boundary. - */ - bit = fls(size - 1); - if (bit > SECTION_SHIFT) - bit = SECTION_SHIFT; - align = 1 << bit; + area->pages = pages; + area->nr_pages = nr_pages; + p = (unsigned long)area->addr; - /* - * Allocate a virtual address in the consistent mapping region. - */ - c = arm_vmregion_alloc(&consistent_head, align, size, - gfp & ~(__GFP_DMA | __GFP_HIGHMEM), NULL); - if (c) { - pte_t *pte; - int idx = CONSISTENT_PTE_INDEX(c->vm_start); - int i = 0; - u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1); - - pte = consistent_pte[idx] + off; - c->priv = pages; - - do { - BUG_ON(!pte_none(*pte)); - - set_pte_ext(pte, mk_pte(pages[i], prot), 0); - pte++; - off++; - i++; - if (off >= PTRS_PER_PTE) { - off = 0; - pte = consistent_pte[++idx]; - } - } while (i < count); - - dsb(); - - return (void *)c->vm_start; + for (i = 0; i < nr_pages; i++) { + phys_addr_t phys = __pfn_to_phys(page_to_pfn(pages[i])); + if (ioremap_page_range(p, p + PAGE_SIZE, phys, prot)) + goto err; + p += PAGE_SIZE; } + return area->addr; +err: + unmap_kernel_range((unsigned long)area->addr, size); + vunmap(area->addr); return NULL; } @@ -1201,7 +1322,8 @@ __iommu_create_mapping(struct device *dev, struct page **pages, size_t size) break; len = (j - i) << PAGE_SHIFT; - ret = iommu_map(mapping->domain, iova, phys, len, 0); + ret = iommu_map(mapping->domain, iova, phys, len, + IOMMU_READ|IOMMU_WRITE); if (ret < 0) goto fail; iova += len; @@ -1230,17 +1352,82 @@ static int __iommu_remove_mapping(struct device *dev, dma_addr_t iova, size_t si return 0; } +static struct page **__atomic_get_pages(void *addr) +{ + struct dma_pool *pool = &atomic_pool; + struct page **pages = pool->pages; + int offs = (addr - pool->vaddr) >> PAGE_SHIFT; + + return pages + offs; +} + +static struct page **__iommu_get_pages(void *cpu_addr, struct dma_attrs *attrs) +{ + struct vm_struct *area; + + if (__in_atomic_pool(cpu_addr, PAGE_SIZE)) + return __atomic_get_pages(cpu_addr); + + if (dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs)) + return cpu_addr; + + area = find_vm_area(cpu_addr); + if (area && (area->flags & VM_ARM_DMA_CONSISTENT)) + return area->pages; + return NULL; +} + +static void *__iommu_alloc_atomic(struct device *dev, size_t size, + dma_addr_t *handle) +{ + struct page *page; + void *addr; + + addr = __alloc_from_pool(size, &page); + if (!addr) + return NULL; + + *handle = __iommu_create_mapping(dev, &page, size); + if (*handle == DMA_ERROR_CODE) + goto err_mapping; + + return addr; + +err_mapping: + __free_from_pool(addr, size); + return NULL; +} + +static void __iommu_free_atomic(struct device *dev, void *cpu_addr, + dma_addr_t handle, size_t size) +{ + __iommu_remove_mapping(dev, handle, size); + __free_from_pool(cpu_addr, size); +} + static void *arm_iommu_alloc_attrs(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs) { - pgprot_t prot = __get_dma_pgprot(attrs, pgprot_kernel); + pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL); struct page **pages; void *addr = NULL; *handle = DMA_ERROR_CODE; size = PAGE_ALIGN(size); - pages = __iommu_alloc_buffer(dev, size, gfp); + if (!(gfp & __GFP_WAIT)) + return __iommu_alloc_atomic(dev, size, handle); + + /* + * Following is a work-around (a.k.a. hack) to prevent pages + * with __GFP_COMP being passed to split_page() which cannot + * handle them. The real problem is that this flag probably + * should be 0 on ARM as it is not supported on this + * platform; see CONFIG_HUGETLBFS. + */ + gfp &= ~(__GFP_COMP); + + pages = __iommu_alloc_buffer(dev, size, gfp, attrs); if (!pages) return NULL; @@ -1248,7 +1435,11 @@ static void *arm_iommu_alloc_attrs(struct device *dev, size_t size, if (*handle == DMA_ERROR_CODE) goto err_buffer; - addr = __iommu_alloc_remap(pages, size, gfp, prot); + if (dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs)) + return pages; + + addr = __iommu_alloc_remap(pages, size, gfp, prot, + __builtin_return_address(0)); if (!addr) goto err_mapping; @@ -1257,7 +1448,7 @@ static void *arm_iommu_alloc_attrs(struct device *dev, size_t size, err_mapping: __iommu_remove_mapping(dev, *handle, size); err_buffer: - __iommu_free_buffer(dev, pages, size); + __iommu_free_buffer(dev, pages, size, attrs); return NULL; } @@ -1265,31 +1456,25 @@ static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, struct dma_attrs *attrs) { - struct arm_vmregion *c; + unsigned long uaddr = vma->vm_start; + unsigned long usize = vma->vm_end - vma->vm_start; + struct page **pages = __iommu_get_pages(cpu_addr, attrs); vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot); - c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr); - - if (c) { - struct page **pages = c->priv; - unsigned long uaddr = vma->vm_start; - unsigned long usize = vma->vm_end - vma->vm_start; - int i = 0; - - do { - int ret; + if (!pages) + return -ENXIO; - ret = vm_insert_page(vma, uaddr, pages[i++]); - if (ret) { - pr_err("Remapping memory, error: %d\n", ret); - return ret; - } + do { + int ret = vm_insert_page(vma, uaddr, *pages++); + if (ret) { + pr_err("Remapping memory failed: %d\n", ret); + return ret; + } + uaddr += PAGE_SIZE; + usize -= PAGE_SIZE; + } while (usize > 0); - uaddr += PAGE_SIZE; - usize -= PAGE_SIZE; - } while (usize > 0); - } return 0; } @@ -1300,16 +1485,62 @@ static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, void arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle, struct dma_attrs *attrs) { - struct arm_vmregion *c; + struct page **pages; size = PAGE_ALIGN(size); - c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr); - if (c) { - struct page **pages = c->priv; - __dma_free_remap(cpu_addr, size); - __iommu_remove_mapping(dev, handle, size); - __iommu_free_buffer(dev, pages, size); + if (__in_atomic_pool(cpu_addr, size)) { + __iommu_free_atomic(dev, cpu_addr, handle, size); + return; + } + + pages = __iommu_get_pages(cpu_addr, attrs); + if (!pages) { + WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr); + return; } + + if (!dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs)) { + unmap_kernel_range((unsigned long)cpu_addr, size); + vunmap(cpu_addr); + } + + __iommu_remove_mapping(dev, handle, size); + __iommu_free_buffer(dev, pages, size, attrs); +} + +static int arm_iommu_get_sgtable(struct device *dev, struct sg_table *sgt, + void *cpu_addr, dma_addr_t dma_addr, + size_t size, struct dma_attrs *attrs) +{ + unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; + struct page **pages = __iommu_get_pages(cpu_addr, attrs); + + if (!pages) + return -ENXIO; + + return sg_alloc_table_from_pages(sgt, pages, count, 0, size, + GFP_KERNEL); +} + +static int __dma_direction_to_prot(enum dma_data_direction dir) +{ + int prot; + + switch (dir) { + case DMA_BIDIRECTIONAL: + prot = IOMMU_READ | IOMMU_WRITE; + break; + case DMA_TO_DEVICE: + prot = IOMMU_READ; + break; + case DMA_FROM_DEVICE: + prot = IOMMU_WRITE; + break; + default: + prot = 0; + } + + return prot; } /* @@ -1317,13 +1548,15 @@ void arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, */ static int __map_sg_chunk(struct device *dev, struct scatterlist *sg, size_t size, dma_addr_t *handle, - enum dma_data_direction dir) + enum dma_data_direction dir, struct dma_attrs *attrs, + bool is_coherent) { struct dma_iommu_mapping *mapping = dev->archdata.mapping; dma_addr_t iova, iova_base; int ret = 0; unsigned int count; struct scatterlist *s; + int prot; size = PAGE_ALIGN(size); *handle = DMA_ERROR_CODE; @@ -1336,10 +1569,13 @@ static int __map_sg_chunk(struct device *dev, struct scatterlist *sg, phys_addr_t phys = page_to_phys(sg_page(s)); unsigned int len = PAGE_ALIGN(s->offset + s->length); - if (!arch_is_coherent()) + if (!is_coherent && + !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir); - ret = iommu_map(mapping->domain, iova, phys, len, 0); + prot = __dma_direction_to_prot(dir); + + ret = iommu_map(mapping->domain, iova, phys, len, prot); if (ret < 0) goto fail; count += len >> PAGE_SHIFT; @@ -1354,20 +1590,9 @@ fail: return ret; } -/** - * arm_iommu_map_sg - map a set of SG buffers for streaming mode DMA - * @dev: valid struct device pointer - * @sg: list of buffers - * @nents: number of buffers to map - * @dir: DMA transfer direction - * - * Map a set of buffers described by scatterlist in streaming mode for DMA. - * The scatter gather list elements are merged together (if possible) and - * tagged with the appropriate dma address and length. They are obtained via - * sg_dma_{address,length}. - */ -int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents, - enum dma_data_direction dir, struct dma_attrs *attrs) +static int __iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, struct dma_attrs *attrs, + bool is_coherent) { struct scatterlist *s = sg, *dma = sg, *start = sg; int i, count = 0; @@ -1383,7 +1608,7 @@ int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents, if (s->offset || (size & ~PAGE_MASK) || size + s->length > max) { if (__map_sg_chunk(dev, start, size, &dma->dma_address, - dir) < 0) + dir, attrs, is_coherent) < 0) goto bad_mapping; dma->dma_address += offset; @@ -1396,7 +1621,8 @@ int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents, } size += s->length; } - if (__map_sg_chunk(dev, start, size, &dma->dma_address, dir) < 0) + if (__map_sg_chunk(dev, start, size, &dma->dma_address, dir, attrs, + is_coherent) < 0) goto bad_mapping; dma->dma_address += offset; @@ -1411,17 +1637,44 @@ bad_mapping: } /** - * arm_iommu_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg + * arm_coherent_iommu_map_sg - map a set of SG buffers for streaming mode DMA * @dev: valid struct device pointer * @sg: list of buffers - * @nents: number of buffers to unmap (same as was passed to dma_map_sg) - * @dir: DMA transfer direction (same as was passed to dma_map_sg) + * @nents: number of buffers to map + * @dir: DMA transfer direction * - * Unmap a set of streaming mode DMA translations. Again, CPU access - * rules concerning calls here are the same as for dma_unmap_single(). + * Map a set of i/o coherent buffers described by scatterlist in streaming + * mode for DMA. The scatter gather list elements are merged together (if + * possible) and tagged with the appropriate dma address and length. They are + * obtained via sg_dma_{address,length}. */ -void arm_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, - enum dma_data_direction dir, struct dma_attrs *attrs) +int arm_coherent_iommu_map_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, struct dma_attrs *attrs) +{ + return __iommu_map_sg(dev, sg, nents, dir, attrs, true); +} + +/** + * arm_iommu_map_sg - map a set of SG buffers for streaming mode DMA + * @dev: valid struct device pointer + * @sg: list of buffers + * @nents: number of buffers to map + * @dir: DMA transfer direction + * + * Map a set of buffers described by scatterlist in streaming mode for DMA. + * The scatter gather list elements are merged together (if possible) and + * tagged with the appropriate dma address and length. They are obtained via + * sg_dma_{address,length}. + */ +int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, struct dma_attrs *attrs) +{ + return __iommu_map_sg(dev, sg, nents, dir, attrs, false); +} + +static void __iommu_unmap_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, struct dma_attrs *attrs, + bool is_coherent) { struct scatterlist *s; int i; @@ -1430,13 +1683,46 @@ void arm_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, if (sg_dma_len(s)) __iommu_remove_mapping(dev, sg_dma_address(s), sg_dma_len(s)); - if (!arch_is_coherent()) + if (!is_coherent && + !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) __dma_page_dev_to_cpu(sg_page(s), s->offset, s->length, dir); } } /** + * arm_coherent_iommu_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg + * @dev: valid struct device pointer + * @sg: list of buffers + * @nents: number of buffers to unmap (same as was passed to dma_map_sg) + * @dir: DMA transfer direction (same as was passed to dma_map_sg) + * + * Unmap a set of streaming mode DMA translations. Again, CPU access + * rules concerning calls here are the same as for dma_unmap_single(). + */ +void arm_coherent_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, struct dma_attrs *attrs) +{ + __iommu_unmap_sg(dev, sg, nents, dir, attrs, true); +} + +/** + * arm_iommu_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg + * @dev: valid struct device pointer + * @sg: list of buffers + * @nents: number of buffers to unmap (same as was passed to dma_map_sg) + * @dir: DMA transfer direction (same as was passed to dma_map_sg) + * + * Unmap a set of streaming mode DMA translations. Again, CPU access + * rules concerning calls here are the same as for dma_unmap_single(). + */ +void arm_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, struct dma_attrs *attrs) +{ + __iommu_unmap_sg(dev, sg, nents, dir, attrs, false); +} + +/** * arm_iommu_sync_sg_for_cpu * @dev: valid struct device pointer * @sg: list of buffers @@ -1450,8 +1736,7 @@ void arm_iommu_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int i; for_each_sg(sg, s, nents, i) - if (!arch_is_coherent()) - __dma_page_dev_to_cpu(sg_page(s), s->offset, s->length, dir); + __dma_page_dev_to_cpu(sg_page(s), s->offset, s->length, dir); } @@ -1469,37 +1754,35 @@ void arm_iommu_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int i; for_each_sg(sg, s, nents, i) - if (!arch_is_coherent()) - __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir); + __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir); } /** - * arm_iommu_map_page + * arm_coherent_iommu_map_page * @dev: valid struct device pointer * @page: page that buffer resides in * @offset: offset into page for start of buffer * @size: size of buffer to map * @dir: DMA transfer direction * - * IOMMU aware version of arm_dma_map_page() + * Coherent IOMMU aware version of arm_dma_map_page() */ -static dma_addr_t arm_iommu_map_page(struct device *dev, struct page *page, +static dma_addr_t arm_coherent_iommu_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs) { struct dma_iommu_mapping *mapping = dev->archdata.mapping; dma_addr_t dma_addr; - int ret, len = PAGE_ALIGN(size + offset); - - if (!arch_is_coherent()) - __dma_page_cpu_to_dev(page, offset, size, dir); + int ret, prot, len = PAGE_ALIGN(size + offset); dma_addr = __alloc_iova(mapping, len); if (dma_addr == DMA_ERROR_CODE) return dma_addr; - ret = iommu_map(mapping->domain, dma_addr, page_to_phys(page), len, 0); + prot = __dma_direction_to_prot(dir); + + ret = iommu_map(mapping->domain, dma_addr, page_to_phys(page), len, prot); if (ret < 0) goto fail; @@ -1510,6 +1793,51 @@ fail: } /** + * arm_iommu_map_page + * @dev: valid struct device pointer + * @page: page that buffer resides in + * @offset: offset into page for start of buffer + * @size: size of buffer to map + * @dir: DMA transfer direction + * + * IOMMU aware version of arm_dma_map_page() + */ +static dma_addr_t arm_iommu_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) + __dma_page_cpu_to_dev(page, offset, size, dir); + + return arm_coherent_iommu_map_page(dev, page, offset, size, dir, attrs); +} + +/** + * arm_coherent_iommu_unmap_page + * @dev: valid struct device pointer + * @handle: DMA address of buffer + * @size: size of buffer (same as passed to dma_map_page) + * @dir: DMA transfer direction (same as passed to dma_map_page) + * + * Coherent IOMMU aware version of arm_dma_unmap_page() + */ +static void arm_coherent_iommu_unmap_page(struct device *dev, dma_addr_t handle, + size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + struct dma_iommu_mapping *mapping = dev->archdata.mapping; + dma_addr_t iova = handle & PAGE_MASK; + int offset = handle & ~PAGE_MASK; + int len = PAGE_ALIGN(size + offset); + + if (!iova) + return; + + iommu_unmap(mapping->domain, iova, len); + __free_iova(mapping, iova, len); +} + +/** * arm_iommu_unmap_page * @dev: valid struct device pointer * @handle: DMA address of buffer @@ -1531,7 +1859,7 @@ static void arm_iommu_unmap_page(struct device *dev, dma_addr_t handle, if (!iova) return; - if (!arch_is_coherent()) + if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) __dma_page_dev_to_cpu(page, offset, size, dir); iommu_unmap(mapping->domain, iova, len); @@ -1549,8 +1877,7 @@ static void arm_iommu_sync_single_for_cpu(struct device *dev, if (!iova) return; - if (!arch_is_coherent()) - __dma_page_dev_to_cpu(page, offset, size, dir); + __dma_page_dev_to_cpu(page, offset, size, dir); } static void arm_iommu_sync_single_for_device(struct device *dev, @@ -1571,6 +1898,7 @@ struct dma_map_ops iommu_ops = { .alloc = arm_iommu_alloc_attrs, .free = arm_iommu_free_attrs, .mmap = arm_iommu_mmap_attrs, + .get_sgtable = arm_iommu_get_sgtable, .map_page = arm_iommu_map_page, .unmap_page = arm_iommu_unmap_page, @@ -1581,14 +1909,30 @@ struct dma_map_ops iommu_ops = { .unmap_sg = arm_iommu_unmap_sg, .sync_sg_for_cpu = arm_iommu_sync_sg_for_cpu, .sync_sg_for_device = arm_iommu_sync_sg_for_device, + + .set_dma_mask = arm_dma_set_mask, +}; + +struct dma_map_ops iommu_coherent_ops = { + .alloc = arm_iommu_alloc_attrs, + .free = arm_iommu_free_attrs, + .mmap = arm_iommu_mmap_attrs, + .get_sgtable = arm_iommu_get_sgtable, + + .map_page = arm_coherent_iommu_map_page, + .unmap_page = arm_coherent_iommu_unmap_page, + + .map_sg = arm_coherent_iommu_map_sg, + .unmap_sg = arm_coherent_iommu_unmap_sg, + + .set_dma_mask = arm_dma_set_mask, }; /** * arm_iommu_create_mapping * @bus: pointer to the bus holding the client device (for IOMMU calls) * @base: start address of the valid IO address space - * @size: size of the valid IO address space - * @order: accuracy of the IO addresses allocations + * @size: maximum size of the valid IO address space * * Creates a mapping structure which holds information about used/unused * IO address ranges, which is required to perform memory allocation and @@ -1598,59 +1942,97 @@ struct dma_map_ops iommu_ops = { * arm_iommu_attach_device function. */ struct dma_iommu_mapping * -arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size, - int order) +arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size) { - unsigned int count = size >> (PAGE_SHIFT + order); - unsigned int bitmap_size = BITS_TO_LONGS(count) * sizeof(long); + unsigned int bits = size >> PAGE_SHIFT; + unsigned int bitmap_size = BITS_TO_LONGS(bits) * sizeof(long); struct dma_iommu_mapping *mapping; + int extensions = 1; int err = -ENOMEM; - if (!count) + if (!bitmap_size) return ERR_PTR(-EINVAL); + if (bitmap_size > PAGE_SIZE) { + extensions = bitmap_size / PAGE_SIZE; + bitmap_size = PAGE_SIZE; + } + mapping = kzalloc(sizeof(struct dma_iommu_mapping), GFP_KERNEL); if (!mapping) goto err; - mapping->bitmap = kzalloc(bitmap_size, GFP_KERNEL); - if (!mapping->bitmap) + mapping->bitmap_size = bitmap_size; + mapping->bitmaps = kzalloc(extensions * sizeof(unsigned long *), + GFP_KERNEL); + if (!mapping->bitmaps) goto err2; + mapping->bitmaps[0] = kzalloc(bitmap_size, GFP_KERNEL); + if (!mapping->bitmaps[0]) + goto err3; + + mapping->nr_bitmaps = 1; + mapping->extensions = extensions; mapping->base = base; mapping->bits = BITS_PER_BYTE * bitmap_size; - mapping->order = order; + spin_lock_init(&mapping->lock); mapping->domain = iommu_domain_alloc(bus); if (!mapping->domain) - goto err3; + goto err4; kref_init(&mapping->kref); return mapping; +err4: + kfree(mapping->bitmaps[0]); err3: - kfree(mapping->bitmap); + kfree(mapping->bitmaps); err2: kfree(mapping); err: return ERR_PTR(err); } +EXPORT_SYMBOL_GPL(arm_iommu_create_mapping); static void release_iommu_mapping(struct kref *kref) { + int i; struct dma_iommu_mapping *mapping = container_of(kref, struct dma_iommu_mapping, kref); iommu_domain_free(mapping->domain); - kfree(mapping->bitmap); + for (i = 0; i < mapping->nr_bitmaps; i++) + kfree(mapping->bitmaps[i]); + kfree(mapping->bitmaps); kfree(mapping); } +static int extend_iommu_mapping(struct dma_iommu_mapping *mapping) +{ + int next_bitmap; + + if (mapping->nr_bitmaps > mapping->extensions) + return -EINVAL; + + next_bitmap = mapping->nr_bitmaps; + mapping->bitmaps[next_bitmap] = kzalloc(mapping->bitmap_size, + GFP_ATOMIC); + if (!mapping->bitmaps[next_bitmap]) + return -ENOMEM; + + mapping->nr_bitmaps++; + + return 0; +} + void arm_iommu_release_mapping(struct dma_iommu_mapping *mapping) { if (mapping) kref_put(&mapping->kref, release_iommu_mapping); } +EXPORT_SYMBOL_GPL(arm_iommu_release_mapping); /** * arm_iommu_attach_device @@ -1676,8 +2058,35 @@ int arm_iommu_attach_device(struct device *dev, dev->archdata.mapping = mapping; set_dma_ops(dev, &iommu_ops); - pr_info("Attached IOMMU controller to %s device.\n", dev_name(dev)); + pr_debug("Attached IOMMU controller to %s device.\n", dev_name(dev)); return 0; } +EXPORT_SYMBOL_GPL(arm_iommu_attach_device); + +/** + * arm_iommu_detach_device + * @dev: valid struct device pointer + * + * Detaches the provided device from a previously attached map. + * This voids the dma operations (dma_map_ops pointer) + */ +void arm_iommu_detach_device(struct device *dev) +{ + struct dma_iommu_mapping *mapping; + + mapping = to_dma_iommu_mapping(dev); + if (!mapping) { + dev_warn(dev, "Not attached\n"); + return; + } + + iommu_detach_device(mapping->domain, dev); + kref_put(&mapping->kref, release_iommu_mapping); + dev->archdata.mapping = NULL; + set_dma_ops(dev, NULL); + + pr_debug("Detached IOMMU controller from %s device.\n", dev_name(dev)); +} +EXPORT_SYMBOL_GPL(arm_iommu_detach_device); #endif |
