diff options
Diffstat (limited to 'mm/mempool.c')
| -rw-r--r-- | mm/mempool.c | 130 |
1 files changed, 88 insertions, 42 deletions
diff --git a/mm/mempool.c b/mm/mempool.c index 1a3bc3d4d55..e209c98c720 100644 --- a/mm/mempool.c +++ b/mm/mempool.c @@ -10,7 +10,8 @@ #include <linux/mm.h> #include <linux/slab.h> -#include <linux/module.h> +#include <linux/kmemleak.h> +#include <linux/export.h> #include <linux/mempool.h> #include <linux/blkdev.h> #include <linux/writeback.h> @@ -27,7 +28,15 @@ static void *remove_element(mempool_t *pool) return pool->elements[--pool->curr_nr]; } -static void free_pool(mempool_t *pool) +/** + * mempool_destroy - deallocate a memory pool + * @pool: pointer to the memory pool which was allocated via + * mempool_create(). + * + * Free all reserved elements in @pool and @pool itself. This function + * only sleeps if the free_fn() function sleeps. + */ +void mempool_destroy(mempool_t *pool) { while (pool->curr_nr) { void *element = remove_element(pool); @@ -36,6 +45,7 @@ static void free_pool(mempool_t *pool) kfree(pool->elements); kfree(pool); } +EXPORT_SYMBOL(mempool_destroy); /** * mempool_create - create a memory pool @@ -54,19 +64,21 @@ static void free_pool(mempool_t *pool) mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn, mempool_free_t *free_fn, void *pool_data) { - return mempool_create_node(min_nr,alloc_fn,free_fn, pool_data,-1); + return mempool_create_node(min_nr,alloc_fn,free_fn, pool_data, + GFP_KERNEL, NUMA_NO_NODE); } EXPORT_SYMBOL(mempool_create); mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn, - mempool_free_t *free_fn, void *pool_data, int node_id) + mempool_free_t *free_fn, void *pool_data, + gfp_t gfp_mask, int node_id) { mempool_t *pool; - pool = kmalloc_node(sizeof(*pool), GFP_KERNEL | __GFP_ZERO, node_id); + pool = kzalloc_node(sizeof(*pool), gfp_mask, node_id); if (!pool) return NULL; pool->elements = kmalloc_node(min_nr * sizeof(void *), - GFP_KERNEL, node_id); + gfp_mask, node_id); if (!pool->elements) { kfree(pool); return NULL; @@ -84,9 +96,9 @@ mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn, while (pool->curr_nr < pool->min_nr) { void *element; - element = pool->alloc(GFP_KERNEL, pool->pool_data); + element = pool->alloc(gfp_mask, pool->pool_data); if (unlikely(!element)) { - free_pool(pool); + mempool_destroy(pool); return NULL; } add_element(pool, element); @@ -172,23 +184,6 @@ out: EXPORT_SYMBOL(mempool_resize); /** - * mempool_destroy - deallocate a memory pool - * @pool: pointer to the memory pool which was allocated via - * mempool_create(). - * - * this function only sleeps if the free_fn() function sleeps. The caller - * has to guarantee that all elements have been returned to the pool (ie: - * freed) prior to calling mempool_destroy(). - */ -void mempool_destroy(mempool_t *pool) -{ - /* Check for outstanding elements */ - BUG_ON(pool->curr_nr != pool->min_nr); - free_pool(pool); -} -EXPORT_SYMBOL(mempool_destroy); - -/** * mempool_alloc - allocate an element from a specific memory pool * @pool: pointer to the memory pool which was allocated via * mempool_create(). @@ -198,6 +193,7 @@ EXPORT_SYMBOL(mempool_destroy); * returns NULL. Note that due to preallocation, this function * *never* fails when called from process contexts. (it might * fail if called from an IRQ context.) + * Note: using __GFP_ZERO is not supported. */ void * mempool_alloc(mempool_t *pool, gfp_t gfp_mask) { @@ -206,6 +202,7 @@ void * mempool_alloc(mempool_t *pool, gfp_t gfp_mask) wait_queue_t wait; gfp_t gfp_temp; + VM_WARN_ON_ONCE(gfp_mask & __GFP_ZERO); might_sleep_if(gfp_mask & __GFP_WAIT); gfp_mask |= __GFP_NOMEMALLOC; /* don't allocate emergency reserves */ @@ -224,28 +221,45 @@ repeat_alloc: if (likely(pool->curr_nr)) { element = remove_element(pool); spin_unlock_irqrestore(&pool->lock, flags); + /* paired with rmb in mempool_free(), read comment there */ + smp_wmb(); + /* + * Update the allocation stack trace as this is more useful + * for debugging. + */ + kmemleak_update_trace(element); return element; } - spin_unlock_irqrestore(&pool->lock, flags); - /* We must not sleep in the GFP_ATOMIC case */ - if (!(gfp_mask & __GFP_WAIT)) + /* + * We use gfp mask w/o __GFP_WAIT or IO for the first round. If + * alloc failed with that and @pool was empty, retry immediately. + */ + if (gfp_temp != gfp_mask) { + spin_unlock_irqrestore(&pool->lock, flags); + gfp_temp = gfp_mask; + goto repeat_alloc; + } + + /* We must not sleep if !__GFP_WAIT */ + if (!(gfp_mask & __GFP_WAIT)) { + spin_unlock_irqrestore(&pool->lock, flags); return NULL; + } - /* Now start performing page reclaim */ - gfp_temp = gfp_mask; + /* Let's wait for someone else to return an element to @pool */ init_wait(&wait); prepare_to_wait(&pool->wait, &wait, TASK_UNINTERRUPTIBLE); - smp_mb(); - if (!pool->curr_nr) { - /* - * FIXME: this should be io_schedule(). The timeout is there - * as a workaround for some DM problems in 2.6.18. - */ - io_schedule_timeout(5*HZ); - } - finish_wait(&pool->wait, &wait); + spin_unlock_irqrestore(&pool->lock, flags); + + /* + * FIXME: this should be io_schedule(). The timeout is there as a + * workaround for some DM problems in 2.6.18. + */ + io_schedule_timeout(5*HZ); + + finish_wait(&pool->wait, &wait); goto repeat_alloc; } EXPORT_SYMBOL(mempool_alloc); @@ -265,10 +279,42 @@ void mempool_free(void *element, mempool_t *pool) if (unlikely(element == NULL)) return; - smp_mb(); - if (pool->curr_nr < pool->min_nr) { + /* + * Paired with the wmb in mempool_alloc(). The preceding read is + * for @element and the following @pool->curr_nr. This ensures + * that the visible value of @pool->curr_nr is from after the + * allocation of @element. This is necessary for fringe cases + * where @element was passed to this task without going through + * barriers. + * + * For example, assume @p is %NULL at the beginning and one task + * performs "p = mempool_alloc(...);" while another task is doing + * "while (!p) cpu_relax(); mempool_free(p, ...);". This function + * may end up using curr_nr value which is from before allocation + * of @p without the following rmb. + */ + smp_rmb(); + + /* + * For correctness, we need a test which is guaranteed to trigger + * if curr_nr + #allocated == min_nr. Testing curr_nr < min_nr + * without locking achieves that and refilling as soon as possible + * is desirable. + * + * Because curr_nr visible here is always a value after the + * allocation of @element, any task which decremented curr_nr below + * min_nr is guaranteed to see curr_nr < min_nr unless curr_nr gets + * incremented to min_nr afterwards. If curr_nr gets incremented + * to min_nr after the allocation of @element, the elements + * allocated after that are subject to the same guarantee. + * + * Waiters happen iff curr_nr is 0 and the above guarantee also + * ensures that there will be frees which return elements to the + * pool waking up the waiters. + */ + if (unlikely(pool->curr_nr < pool->min_nr)) { spin_lock_irqsave(&pool->lock, flags); - if (pool->curr_nr < pool->min_nr) { + if (likely(pool->curr_nr < pool->min_nr)) { add_element(pool, element); spin_unlock_irqrestore(&pool->lock, flags); wake_up(&pool->wait); |
