diff options
Diffstat (limited to 'mm/vmalloc.c')
| -rw-r--r-- | mm/vmalloc.c | 143 |
1 files changed, 63 insertions, 80 deletions
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 91a10472a39..f64632b6719 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -27,7 +27,9 @@ #include <linux/pfn.h> #include <linux/kmemleak.h> #include <linux/atomic.h> +#include <linux/compiler.h> #include <linux/llist.h> + #include <asm/uaccess.h> #include <asm/tlbflush.h> #include <asm/shmparam.h> @@ -359,6 +361,12 @@ static struct vmap_area *alloc_vmap_area(unsigned long size, if (unlikely(!va)) return ERR_PTR(-ENOMEM); + /* + * Only scan the relevant parts containing pointers to other objects + * to avoid false negatives. + */ + kmemleak_scan_area(&va->rb_node, SIZE_MAX, gfp_mask & GFP_RECLAIM_MASK); + retry: spin_lock(&vmap_area_lock); /* @@ -388,12 +396,12 @@ nocache: addr = ALIGN(first->va_end, align); if (addr < vstart) goto nocache; - if (addr + size - 1 < addr) + if (addr + size < addr) goto overflow; } else { addr = ALIGN(vstart, align); - if (addr + size - 1 < addr) + if (addr + size < addr) goto overflow; n = vmap_area_root.rb_node; @@ -420,7 +428,7 @@ nocache: if (addr + cached_hole_size < first->va_start) cached_hole_size = first->va_start - addr; addr = ALIGN(first->va_end, align); - if (addr + size - 1 < addr) + if (addr + size < addr) goto overflow; if (list_is_last(&first->list, &vmap_area_list)) @@ -752,9 +760,7 @@ struct vmap_block_queue { struct vmap_block { spinlock_t lock; struct vmap_area *va; - struct vmap_block_queue *vbq; unsigned long free, dirty; - DECLARE_BITMAP(alloc_map, VMAP_BBMAP_BITS); DECLARE_BITMAP(dirty_map, VMAP_BBMAP_BITS); struct list_head free_list; struct rcu_head rcu_head; @@ -820,7 +826,6 @@ static struct vmap_block *new_vmap_block(gfp_t gfp_mask) vb->va = va; vb->free = VMAP_BBMAP_BITS; vb->dirty = 0; - bitmap_zero(vb->alloc_map, VMAP_BBMAP_BITS); bitmap_zero(vb->dirty_map, VMAP_BBMAP_BITS); INIT_LIST_HEAD(&vb->free_list); @@ -832,7 +837,6 @@ static struct vmap_block *new_vmap_block(gfp_t gfp_mask) radix_tree_preload_end(); vbq = &get_cpu_var(vmap_block_queue); - vb->vbq = vbq; spin_lock(&vbq->lock); list_add_rcu(&vb->free_list, &vbq->free); spin_unlock(&vbq->lock); @@ -873,7 +877,6 @@ static void purge_fragmented_blocks(int cpu) if (vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS) { vb->free = 0; /* prevent further allocs after releasing lock */ vb->dirty = VMAP_BBMAP_BITS; /* prevent purging it again */ - bitmap_fill(vb->alloc_map, VMAP_BBMAP_BITS); bitmap_fill(vb->dirty_map, VMAP_BBMAP_BITS); spin_lock(&vbq->lock); list_del_rcu(&vb->free_list); @@ -891,11 +894,6 @@ static void purge_fragmented_blocks(int cpu) } } -static void purge_fragmented_blocks_thiscpu(void) -{ - purge_fragmented_blocks(smp_processor_id()); -} - static void purge_fragmented_blocks_allcpus(void) { int cpu; @@ -910,7 +908,6 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask) struct vmap_block *vb; unsigned long addr = 0; unsigned int order; - int purge = 0; BUG_ON(size & ~PAGE_MASK); BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC); @@ -934,17 +931,7 @@ again: if (vb->free < 1UL << order) goto next; - i = bitmap_find_free_region(vb->alloc_map, - VMAP_BBMAP_BITS, order); - - if (i < 0) { - if (vb->free + vb->dirty == VMAP_BBMAP_BITS) { - /* fragmented and no outstanding allocations */ - BUG_ON(vb->dirty != VMAP_BBMAP_BITS); - purge = 1; - } - goto next; - } + i = VMAP_BBMAP_BITS - vb->free; addr = vb->va->va_start + (i << PAGE_SHIFT); BUG_ON(addr_to_vb_idx(addr) != addr_to_vb_idx(vb->va->va_start)); @@ -960,9 +947,6 @@ next: spin_unlock(&vb->lock); } - if (purge) - purge_fragmented_blocks_thiscpu(); - put_cpu_var(vmap_block_queue); rcu_read_unlock(); @@ -1040,15 +1024,16 @@ void vm_unmap_aliases(void) rcu_read_lock(); list_for_each_entry_rcu(vb, &vbq->free, free_list) { - int i; + int i, j; spin_lock(&vb->lock); i = find_first_bit(vb->dirty_map, VMAP_BBMAP_BITS); - while (i < VMAP_BBMAP_BITS) { + if (i < VMAP_BBMAP_BITS) { unsigned long s, e; - int j; - j = find_next_zero_bit(vb->dirty_map, - VMAP_BBMAP_BITS, i); + + j = find_last_bit(vb->dirty_map, + VMAP_BBMAP_BITS); + j = j + 1; /* need exclusive index */ s = vb->va->va_start + (i << PAGE_SHIFT); e = vb->va->va_start + (j << PAGE_SHIFT); @@ -1058,10 +1043,6 @@ void vm_unmap_aliases(void) start = s; if (e > end) end = e; - - i = j; - i = find_next_bit(vb->dirty_map, - VMAP_BBMAP_BITS, i); } spin_unlock(&vb->lock); } @@ -1104,6 +1085,12 @@ EXPORT_SYMBOL(vm_unmap_ram); * @node: prefer to allocate data structures on this node * @prot: memory protection to use. PAGE_KERNEL for regular RAM * + * If you use this function for less than VMAP_MAX_ALLOC pages, it could be + * faster than vmap so it's good. But if you mix long-life and short-life + * objects with vm_map_ram(), it could consume lots of address space through + * fragmentation (especially on a 32bit machine). You could see failures in + * the end. Please use this function for short-lived objects. + * * Returns: a pointer to the address that has been mapped, or %NULL on failure */ void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t prot) @@ -1281,11 +1268,12 @@ void unmap_kernel_range(unsigned long addr, unsigned long size) vunmap_page_range(addr, end); flush_tlb_kernel_range(addr, end); } +EXPORT_SYMBOL_GPL(unmap_kernel_range); int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages) { unsigned long addr = (unsigned long)area->addr; - unsigned long end = addr + area->size - PAGE_SIZE; + unsigned long end = addr + get_vm_area_size(area); int err; err = vmap_page_range(addr, end, prot, *pages); @@ -1311,15 +1299,15 @@ static void setup_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va, spin_unlock(&vmap_area_lock); } -static void clear_vm_unlist(struct vm_struct *vm) +static void clear_vm_uninitialized_flag(struct vm_struct *vm) { /* - * Before removing VM_UNLIST, + * Before removing VM_UNINITIALIZED, * we should make sure that vm has proper values. * Pair with smp_rmb() in show_numa_info(). */ smp_wmb(); - vm->flags &= ~VM_UNLIST; + vm->flags &= ~VM_UNINITIALIZED; } static struct vm_struct *__get_vm_area_node(unsigned long size, @@ -1453,7 +1441,7 @@ static void __vunmap(const void *addr, int deallocate_pages) return; if (WARN(!PAGE_ALIGNED(addr), "Trying to vfree() bad address (%p)\n", - addr)); + addr)) return; area = remove_vm_area(addr); @@ -1499,7 +1487,6 @@ static void __vunmap(const void *addr, int deallocate_pages) * conventions for vfree() arch-depenedent would be a really bad idea) * * NOTE: assumes that the object at *addr has a size >= sizeof(llist_node) - * */ void vfree(const void *addr) { @@ -1510,9 +1497,9 @@ void vfree(const void *addr) if (!addr) return; if (unlikely(in_interrupt())) { - struct vfree_deferred *p = &__get_cpu_var(vfree_deferred); - llist_add((struct llist_node *)addr, &p->list); - schedule_work(&p->wq); + struct vfree_deferred *p = this_cpu_ptr(&vfree_deferred); + if (llist_add((struct llist_node *)addr, &p->list)) + schedule_work(&p->wq); } else __vunmap(addr, 1); } @@ -1574,27 +1561,26 @@ static void *__vmalloc_node(unsigned long size, unsigned long align, gfp_t gfp_mask, pgprot_t prot, int node, const void *caller); static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, - pgprot_t prot, int node, const void *caller) + pgprot_t prot, int node) { const int order = 0; struct page **pages; unsigned int nr_pages, array_size, i; gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO; - nr_pages = (area->size - PAGE_SIZE) >> PAGE_SHIFT; + nr_pages = get_vm_area_size(area) >> PAGE_SHIFT; array_size = (nr_pages * sizeof(struct page *)); area->nr_pages = nr_pages; /* Please note that the recursion is strictly bounded. */ if (array_size > PAGE_SIZE) { pages = __vmalloc_node(array_size, 1, nested_gfp|__GFP_HIGHMEM, - PAGE_KERNEL, node, caller); + PAGE_KERNEL, node, area->caller); area->flags |= VM_VPAGES; } else { pages = kmalloc_node(array_size, nested_gfp, node); } area->pages = pages; - area->caller = caller; if (!area->pages) { remove_vm_area(area->addr); kfree(area); @@ -1605,7 +1591,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, struct page *page; gfp_t tmp_mask = gfp_mask | __GFP_NOWARN; - if (node < 0) + if (node == NUMA_NO_NODE) page = alloc_page(tmp_mask); else page = alloc_pages_node(node, tmp_mask, order); @@ -1657,28 +1643,28 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, if (!size || (size >> PAGE_SHIFT) > totalram_pages) goto fail; - area = __get_vm_area_node(size, align, VM_ALLOC | VM_UNLIST, + area = __get_vm_area_node(size, align, VM_ALLOC | VM_UNINITIALIZED, start, end, node, gfp_mask, caller); if (!area) goto fail; - addr = __vmalloc_area_node(area, gfp_mask, prot, node, caller); + addr = __vmalloc_area_node(area, gfp_mask, prot, node); if (!addr) return NULL; /* - * In this function, newly allocated vm_struct has VM_UNLIST flag. - * It means that vm_struct is not fully initialized. + * In this function, newly allocated vm_struct has VM_UNINITIALIZED + * flag. It means that vm_struct is not fully initialized. * Now, it is fully initialized, so remove this flag here. */ - clear_vm_unlist(area); + clear_vm_uninitialized_flag(area); /* - * A ref_count = 3 is needed because the vm_struct and vmap_area - * structures allocated in the __get_vm_area_node() function contain - * references to the virtual address of the vmalloc'ed block. + * A ref_count = 2 is needed because vm_struct allocated in + * __get_vm_area_node() contains a reference to the virtual address of + * the vmalloc'ed block. */ - kmemleak_alloc(addr, real_size, 3, gfp_mask); + kmemleak_alloc(addr, real_size, 2, gfp_mask); return addr; @@ -2013,7 +1999,7 @@ long vread(char *buf, char *addr, unsigned long count) vm = va->vm; vaddr = (char *) vm->addr; - if (addr >= vaddr + vm->size - PAGE_SIZE) + if (addr >= vaddr + get_vm_area_size(vm)) continue; while (addr < vaddr) { if (count == 0) @@ -2023,7 +2009,7 @@ long vread(char *buf, char *addr, unsigned long count) addr++; count--; } - n = vaddr + vm->size - PAGE_SIZE - addr; + n = vaddr + get_vm_area_size(vm) - addr; if (n > count) n = count; if (!(vm->flags & VM_IOREMAP)) @@ -2095,7 +2081,7 @@ long vwrite(char *buf, char *addr, unsigned long count) vm = va->vm; vaddr = (char *) vm->addr; - if (addr >= vaddr + vm->size - PAGE_SIZE) + if (addr >= vaddr + get_vm_area_size(vm)) continue; while (addr < vaddr) { if (count == 0) @@ -2104,7 +2090,7 @@ long vwrite(char *buf, char *addr, unsigned long count) addr++; count--; } - n = vaddr + vm->size - PAGE_SIZE - addr; + n = vaddr + get_vm_area_size(vm) - addr; if (n > count) n = count; if (!(vm->flags & VM_IOREMAP)) { @@ -2204,7 +2190,7 @@ EXPORT_SYMBOL(remap_vmalloc_range); * Implement a stub for vmalloc_sync_all() if the architecture chose not to * have one. */ -void __attribute__((weak)) vmalloc_sync_all(void) +void __weak vmalloc_sync_all(void) { } @@ -2591,9 +2577,9 @@ static void show_numa_info(struct seq_file *m, struct vm_struct *v) if (!counters) return; - /* Pair with smp_wmb() in clear_vm_unlist() */ + /* Pair with smp_wmb() in clear_vm_uninitialized_flag() */ smp_rmb(); - if (v->flags & VM_UNLIST) + if (v->flags & VM_UNINITIALIZED) return; memset(counters, 0, nr_node_ids * sizeof(unsigned int)); @@ -2612,15 +2598,12 @@ static int s_show(struct seq_file *m, void *p) struct vmap_area *va = p; struct vm_struct *v; - if (va->flags & (VM_LAZY_FREE | VM_LAZY_FREEING)) - return 0; - - if (!(va->flags & VM_VM_AREA)) { - seq_printf(m, "0x%pK-0x%pK %7ld vm_map_ram\n", - (void *)va->va_start, (void *)va->va_end, - va->va_end - va->va_start); + /* + * s_show can encounter race with remove_vm_area, !VM_VM_AREA on + * behalf of vmap area is being tear down or vm_map_ram allocation. + */ + if (!(va->flags & VM_VM_AREA)) return 0; - } v = va->vm; @@ -2637,19 +2620,19 @@ static int s_show(struct seq_file *m, void *p) seq_printf(m, " phys=%llx", (unsigned long long)v->phys_addr); if (v->flags & VM_IOREMAP) - seq_printf(m, " ioremap"); + seq_puts(m, " ioremap"); if (v->flags & VM_ALLOC) - seq_printf(m, " vmalloc"); + seq_puts(m, " vmalloc"); if (v->flags & VM_MAP) - seq_printf(m, " vmap"); + seq_puts(m, " vmap"); if (v->flags & VM_USERMAP) - seq_printf(m, " user"); + seq_puts(m, " user"); if (v->flags & VM_VPAGES) - seq_printf(m, " vpages"); + seq_puts(m, " vpages"); show_numa_info(m, v); seq_putc(m, '\n'); |
