aboutsummaryrefslogtreecommitdiff
path: root/mm/vmalloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/vmalloc.c')
-rw-r--r--mm/vmalloc.c143
1 files changed, 63 insertions, 80 deletions
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 91a10472a39..f64632b6719 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -27,7 +27,9 @@
#include <linux/pfn.h>
#include <linux/kmemleak.h>
#include <linux/atomic.h>
+#include <linux/compiler.h>
#include <linux/llist.h>
+
#include <asm/uaccess.h>
#include <asm/tlbflush.h>
#include <asm/shmparam.h>
@@ -359,6 +361,12 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
if (unlikely(!va))
return ERR_PTR(-ENOMEM);
+ /*
+ * Only scan the relevant parts containing pointers to other objects
+ * to avoid false negatives.
+ */
+ kmemleak_scan_area(&va->rb_node, SIZE_MAX, gfp_mask & GFP_RECLAIM_MASK);
+
retry:
spin_lock(&vmap_area_lock);
/*
@@ -388,12 +396,12 @@ nocache:
addr = ALIGN(first->va_end, align);
if (addr < vstart)
goto nocache;
- if (addr + size - 1 < addr)
+ if (addr + size < addr)
goto overflow;
} else {
addr = ALIGN(vstart, align);
- if (addr + size - 1 < addr)
+ if (addr + size < addr)
goto overflow;
n = vmap_area_root.rb_node;
@@ -420,7 +428,7 @@ nocache:
if (addr + cached_hole_size < first->va_start)
cached_hole_size = first->va_start - addr;
addr = ALIGN(first->va_end, align);
- if (addr + size - 1 < addr)
+ if (addr + size < addr)
goto overflow;
if (list_is_last(&first->list, &vmap_area_list))
@@ -752,9 +760,7 @@ struct vmap_block_queue {
struct vmap_block {
spinlock_t lock;
struct vmap_area *va;
- struct vmap_block_queue *vbq;
unsigned long free, dirty;
- DECLARE_BITMAP(alloc_map, VMAP_BBMAP_BITS);
DECLARE_BITMAP(dirty_map, VMAP_BBMAP_BITS);
struct list_head free_list;
struct rcu_head rcu_head;
@@ -820,7 +826,6 @@ static struct vmap_block *new_vmap_block(gfp_t gfp_mask)
vb->va = va;
vb->free = VMAP_BBMAP_BITS;
vb->dirty = 0;
- bitmap_zero(vb->alloc_map, VMAP_BBMAP_BITS);
bitmap_zero(vb->dirty_map, VMAP_BBMAP_BITS);
INIT_LIST_HEAD(&vb->free_list);
@@ -832,7 +837,6 @@ static struct vmap_block *new_vmap_block(gfp_t gfp_mask)
radix_tree_preload_end();
vbq = &get_cpu_var(vmap_block_queue);
- vb->vbq = vbq;
spin_lock(&vbq->lock);
list_add_rcu(&vb->free_list, &vbq->free);
spin_unlock(&vbq->lock);
@@ -873,7 +877,6 @@ static void purge_fragmented_blocks(int cpu)
if (vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS) {
vb->free = 0; /* prevent further allocs after releasing lock */
vb->dirty = VMAP_BBMAP_BITS; /* prevent purging it again */
- bitmap_fill(vb->alloc_map, VMAP_BBMAP_BITS);
bitmap_fill(vb->dirty_map, VMAP_BBMAP_BITS);
spin_lock(&vbq->lock);
list_del_rcu(&vb->free_list);
@@ -891,11 +894,6 @@ static void purge_fragmented_blocks(int cpu)
}
}
-static void purge_fragmented_blocks_thiscpu(void)
-{
- purge_fragmented_blocks(smp_processor_id());
-}
-
static void purge_fragmented_blocks_allcpus(void)
{
int cpu;
@@ -910,7 +908,6 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
struct vmap_block *vb;
unsigned long addr = 0;
unsigned int order;
- int purge = 0;
BUG_ON(size & ~PAGE_MASK);
BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
@@ -934,17 +931,7 @@ again:
if (vb->free < 1UL << order)
goto next;
- i = bitmap_find_free_region(vb->alloc_map,
- VMAP_BBMAP_BITS, order);
-
- if (i < 0) {
- if (vb->free + vb->dirty == VMAP_BBMAP_BITS) {
- /* fragmented and no outstanding allocations */
- BUG_ON(vb->dirty != VMAP_BBMAP_BITS);
- purge = 1;
- }
- goto next;
- }
+ i = VMAP_BBMAP_BITS - vb->free;
addr = vb->va->va_start + (i << PAGE_SHIFT);
BUG_ON(addr_to_vb_idx(addr) !=
addr_to_vb_idx(vb->va->va_start));
@@ -960,9 +947,6 @@ next:
spin_unlock(&vb->lock);
}
- if (purge)
- purge_fragmented_blocks_thiscpu();
-
put_cpu_var(vmap_block_queue);
rcu_read_unlock();
@@ -1040,15 +1024,16 @@ void vm_unmap_aliases(void)
rcu_read_lock();
list_for_each_entry_rcu(vb, &vbq->free, free_list) {
- int i;
+ int i, j;
spin_lock(&vb->lock);
i = find_first_bit(vb->dirty_map, VMAP_BBMAP_BITS);
- while (i < VMAP_BBMAP_BITS) {
+ if (i < VMAP_BBMAP_BITS) {
unsigned long s, e;
- int j;
- j = find_next_zero_bit(vb->dirty_map,
- VMAP_BBMAP_BITS, i);
+
+ j = find_last_bit(vb->dirty_map,
+ VMAP_BBMAP_BITS);
+ j = j + 1; /* need exclusive index */
s = vb->va->va_start + (i << PAGE_SHIFT);
e = vb->va->va_start + (j << PAGE_SHIFT);
@@ -1058,10 +1043,6 @@ void vm_unmap_aliases(void)
start = s;
if (e > end)
end = e;
-
- i = j;
- i = find_next_bit(vb->dirty_map,
- VMAP_BBMAP_BITS, i);
}
spin_unlock(&vb->lock);
}
@@ -1104,6 +1085,12 @@ EXPORT_SYMBOL(vm_unmap_ram);
* @node: prefer to allocate data structures on this node
* @prot: memory protection to use. PAGE_KERNEL for regular RAM
*
+ * If you use this function for less than VMAP_MAX_ALLOC pages, it could be
+ * faster than vmap so it's good. But if you mix long-life and short-life
+ * objects with vm_map_ram(), it could consume lots of address space through
+ * fragmentation (especially on a 32bit machine). You could see failures in
+ * the end. Please use this function for short-lived objects.
+ *
* Returns: a pointer to the address that has been mapped, or %NULL on failure
*/
void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t prot)
@@ -1281,11 +1268,12 @@ void unmap_kernel_range(unsigned long addr, unsigned long size)
vunmap_page_range(addr, end);
flush_tlb_kernel_range(addr, end);
}
+EXPORT_SYMBOL_GPL(unmap_kernel_range);
int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages)
{
unsigned long addr = (unsigned long)area->addr;
- unsigned long end = addr + area->size - PAGE_SIZE;
+ unsigned long end = addr + get_vm_area_size(area);
int err;
err = vmap_page_range(addr, end, prot, *pages);
@@ -1311,15 +1299,15 @@ static void setup_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va,
spin_unlock(&vmap_area_lock);
}
-static void clear_vm_unlist(struct vm_struct *vm)
+static void clear_vm_uninitialized_flag(struct vm_struct *vm)
{
/*
- * Before removing VM_UNLIST,
+ * Before removing VM_UNINITIALIZED,
* we should make sure that vm has proper values.
* Pair with smp_rmb() in show_numa_info().
*/
smp_wmb();
- vm->flags &= ~VM_UNLIST;
+ vm->flags &= ~VM_UNINITIALIZED;
}
static struct vm_struct *__get_vm_area_node(unsigned long size,
@@ -1453,7 +1441,7 @@ static void __vunmap(const void *addr, int deallocate_pages)
return;
if (WARN(!PAGE_ALIGNED(addr), "Trying to vfree() bad address (%p)\n",
- addr));
+ addr))
return;
area = remove_vm_area(addr);
@@ -1499,7 +1487,6 @@ static void __vunmap(const void *addr, int deallocate_pages)
* conventions for vfree() arch-depenedent would be a really bad idea)
*
* NOTE: assumes that the object at *addr has a size >= sizeof(llist_node)
- *
*/
void vfree(const void *addr)
{
@@ -1510,9 +1497,9 @@ void vfree(const void *addr)
if (!addr)
return;
if (unlikely(in_interrupt())) {
- struct vfree_deferred *p = &__get_cpu_var(vfree_deferred);
- llist_add((struct llist_node *)addr, &p->list);
- schedule_work(&p->wq);
+ struct vfree_deferred *p = this_cpu_ptr(&vfree_deferred);
+ if (llist_add((struct llist_node *)addr, &p->list))
+ schedule_work(&p->wq);
} else
__vunmap(addr, 1);
}
@@ -1574,27 +1561,26 @@ static void *__vmalloc_node(unsigned long size, unsigned long align,
gfp_t gfp_mask, pgprot_t prot,
int node, const void *caller);
static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
- pgprot_t prot, int node, const void *caller)
+ pgprot_t prot, int node)
{
const int order = 0;
struct page **pages;
unsigned int nr_pages, array_size, i;
gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
- nr_pages = (area->size - PAGE_SIZE) >> PAGE_SHIFT;
+ nr_pages = get_vm_area_size(area) >> PAGE_SHIFT;
array_size = (nr_pages * sizeof(struct page *));
area->nr_pages = nr_pages;
/* Please note that the recursion is strictly bounded. */
if (array_size > PAGE_SIZE) {
pages = __vmalloc_node(array_size, 1, nested_gfp|__GFP_HIGHMEM,
- PAGE_KERNEL, node, caller);
+ PAGE_KERNEL, node, area->caller);
area->flags |= VM_VPAGES;
} else {
pages = kmalloc_node(array_size, nested_gfp, node);
}
area->pages = pages;
- area->caller = caller;
if (!area->pages) {
remove_vm_area(area->addr);
kfree(area);
@@ -1605,7 +1591,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
struct page *page;
gfp_t tmp_mask = gfp_mask | __GFP_NOWARN;
- if (node < 0)
+ if (node == NUMA_NO_NODE)
page = alloc_page(tmp_mask);
else
page = alloc_pages_node(node, tmp_mask, order);
@@ -1657,28 +1643,28 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
if (!size || (size >> PAGE_SHIFT) > totalram_pages)
goto fail;
- area = __get_vm_area_node(size, align, VM_ALLOC | VM_UNLIST,
+ area = __get_vm_area_node(size, align, VM_ALLOC | VM_UNINITIALIZED,
start, end, node, gfp_mask, caller);
if (!area)
goto fail;
- addr = __vmalloc_area_node(area, gfp_mask, prot, node, caller);
+ addr = __vmalloc_area_node(area, gfp_mask, prot, node);
if (!addr)
return NULL;
/*
- * In this function, newly allocated vm_struct has VM_UNLIST flag.
- * It means that vm_struct is not fully initialized.
+ * In this function, newly allocated vm_struct has VM_UNINITIALIZED
+ * flag. It means that vm_struct is not fully initialized.
* Now, it is fully initialized, so remove this flag here.
*/
- clear_vm_unlist(area);
+ clear_vm_uninitialized_flag(area);
/*
- * A ref_count = 3 is needed because the vm_struct and vmap_area
- * structures allocated in the __get_vm_area_node() function contain
- * references to the virtual address of the vmalloc'ed block.
+ * A ref_count = 2 is needed because vm_struct allocated in
+ * __get_vm_area_node() contains a reference to the virtual address of
+ * the vmalloc'ed block.
*/
- kmemleak_alloc(addr, real_size, 3, gfp_mask);
+ kmemleak_alloc(addr, real_size, 2, gfp_mask);
return addr;
@@ -2013,7 +1999,7 @@ long vread(char *buf, char *addr, unsigned long count)
vm = va->vm;
vaddr = (char *) vm->addr;
- if (addr >= vaddr + vm->size - PAGE_SIZE)
+ if (addr >= vaddr + get_vm_area_size(vm))
continue;
while (addr < vaddr) {
if (count == 0)
@@ -2023,7 +2009,7 @@ long vread(char *buf, char *addr, unsigned long count)
addr++;
count--;
}
- n = vaddr + vm->size - PAGE_SIZE - addr;
+ n = vaddr + get_vm_area_size(vm) - addr;
if (n > count)
n = count;
if (!(vm->flags & VM_IOREMAP))
@@ -2095,7 +2081,7 @@ long vwrite(char *buf, char *addr, unsigned long count)
vm = va->vm;
vaddr = (char *) vm->addr;
- if (addr >= vaddr + vm->size - PAGE_SIZE)
+ if (addr >= vaddr + get_vm_area_size(vm))
continue;
while (addr < vaddr) {
if (count == 0)
@@ -2104,7 +2090,7 @@ long vwrite(char *buf, char *addr, unsigned long count)
addr++;
count--;
}
- n = vaddr + vm->size - PAGE_SIZE - addr;
+ n = vaddr + get_vm_area_size(vm) - addr;
if (n > count)
n = count;
if (!(vm->flags & VM_IOREMAP)) {
@@ -2204,7 +2190,7 @@ EXPORT_SYMBOL(remap_vmalloc_range);
* Implement a stub for vmalloc_sync_all() if the architecture chose not to
* have one.
*/
-void __attribute__((weak)) vmalloc_sync_all(void)
+void __weak vmalloc_sync_all(void)
{
}
@@ -2591,9 +2577,9 @@ static void show_numa_info(struct seq_file *m, struct vm_struct *v)
if (!counters)
return;
- /* Pair with smp_wmb() in clear_vm_unlist() */
+ /* Pair with smp_wmb() in clear_vm_uninitialized_flag() */
smp_rmb();
- if (v->flags & VM_UNLIST)
+ if (v->flags & VM_UNINITIALIZED)
return;
memset(counters, 0, nr_node_ids * sizeof(unsigned int));
@@ -2612,15 +2598,12 @@ static int s_show(struct seq_file *m, void *p)
struct vmap_area *va = p;
struct vm_struct *v;
- if (va->flags & (VM_LAZY_FREE | VM_LAZY_FREEING))
- return 0;
-
- if (!(va->flags & VM_VM_AREA)) {
- seq_printf(m, "0x%pK-0x%pK %7ld vm_map_ram\n",
- (void *)va->va_start, (void *)va->va_end,
- va->va_end - va->va_start);
+ /*
+ * s_show can encounter race with remove_vm_area, !VM_VM_AREA on
+ * behalf of vmap area is being tear down or vm_map_ram allocation.
+ */
+ if (!(va->flags & VM_VM_AREA))
return 0;
- }
v = va->vm;
@@ -2637,19 +2620,19 @@ static int s_show(struct seq_file *m, void *p)
seq_printf(m, " phys=%llx", (unsigned long long)v->phys_addr);
if (v->flags & VM_IOREMAP)
- seq_printf(m, " ioremap");
+ seq_puts(m, " ioremap");
if (v->flags & VM_ALLOC)
- seq_printf(m, " vmalloc");
+ seq_puts(m, " vmalloc");
if (v->flags & VM_MAP)
- seq_printf(m, " vmap");
+ seq_puts(m, " vmap");
if (v->flags & VM_USERMAP)
- seq_printf(m, " user");
+ seq_puts(m, " user");
if (v->flags & VM_VPAGES)
- seq_printf(m, " vpages");
+ seq_puts(m, " vpages");
show_numa_info(m, v);
seq_putc(m, '\n');