aboutsummaryrefslogtreecommitdiff
path: root/mm/mempool.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/mempool.c')
-rw-r--r--mm/mempool.c130
1 files changed, 88 insertions, 42 deletions
diff --git a/mm/mempool.c b/mm/mempool.c
index 1a3bc3d4d55..e209c98c720 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -10,7 +10,8 @@
#include <linux/mm.h>
#include <linux/slab.h>
-#include <linux/module.h>
+#include <linux/kmemleak.h>
+#include <linux/export.h>
#include <linux/mempool.h>
#include <linux/blkdev.h>
#include <linux/writeback.h>
@@ -27,7 +28,15 @@ static void *remove_element(mempool_t *pool)
return pool->elements[--pool->curr_nr];
}
-static void free_pool(mempool_t *pool)
+/**
+ * mempool_destroy - deallocate a memory pool
+ * @pool: pointer to the memory pool which was allocated via
+ * mempool_create().
+ *
+ * Free all reserved elements in @pool and @pool itself. This function
+ * only sleeps if the free_fn() function sleeps.
+ */
+void mempool_destroy(mempool_t *pool)
{
while (pool->curr_nr) {
void *element = remove_element(pool);
@@ -36,6 +45,7 @@ static void free_pool(mempool_t *pool)
kfree(pool->elements);
kfree(pool);
}
+EXPORT_SYMBOL(mempool_destroy);
/**
* mempool_create - create a memory pool
@@ -54,19 +64,21 @@ static void free_pool(mempool_t *pool)
mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn,
mempool_free_t *free_fn, void *pool_data)
{
- return mempool_create_node(min_nr,alloc_fn,free_fn, pool_data,-1);
+ return mempool_create_node(min_nr,alloc_fn,free_fn, pool_data,
+ GFP_KERNEL, NUMA_NO_NODE);
}
EXPORT_SYMBOL(mempool_create);
mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn,
- mempool_free_t *free_fn, void *pool_data, int node_id)
+ mempool_free_t *free_fn, void *pool_data,
+ gfp_t gfp_mask, int node_id)
{
mempool_t *pool;
- pool = kmalloc_node(sizeof(*pool), GFP_KERNEL | __GFP_ZERO, node_id);
+ pool = kzalloc_node(sizeof(*pool), gfp_mask, node_id);
if (!pool)
return NULL;
pool->elements = kmalloc_node(min_nr * sizeof(void *),
- GFP_KERNEL, node_id);
+ gfp_mask, node_id);
if (!pool->elements) {
kfree(pool);
return NULL;
@@ -84,9 +96,9 @@ mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn,
while (pool->curr_nr < pool->min_nr) {
void *element;
- element = pool->alloc(GFP_KERNEL, pool->pool_data);
+ element = pool->alloc(gfp_mask, pool->pool_data);
if (unlikely(!element)) {
- free_pool(pool);
+ mempool_destroy(pool);
return NULL;
}
add_element(pool, element);
@@ -172,23 +184,6 @@ out:
EXPORT_SYMBOL(mempool_resize);
/**
- * mempool_destroy - deallocate a memory pool
- * @pool: pointer to the memory pool which was allocated via
- * mempool_create().
- *
- * this function only sleeps if the free_fn() function sleeps. The caller
- * has to guarantee that all elements have been returned to the pool (ie:
- * freed) prior to calling mempool_destroy().
- */
-void mempool_destroy(mempool_t *pool)
-{
- /* Check for outstanding elements */
- BUG_ON(pool->curr_nr != pool->min_nr);
- free_pool(pool);
-}
-EXPORT_SYMBOL(mempool_destroy);
-
-/**
* mempool_alloc - allocate an element from a specific memory pool
* @pool: pointer to the memory pool which was allocated via
* mempool_create().
@@ -198,6 +193,7 @@ EXPORT_SYMBOL(mempool_destroy);
* returns NULL. Note that due to preallocation, this function
* *never* fails when called from process contexts. (it might
* fail if called from an IRQ context.)
+ * Note: using __GFP_ZERO is not supported.
*/
void * mempool_alloc(mempool_t *pool, gfp_t gfp_mask)
{
@@ -206,6 +202,7 @@ void * mempool_alloc(mempool_t *pool, gfp_t gfp_mask)
wait_queue_t wait;
gfp_t gfp_temp;
+ VM_WARN_ON_ONCE(gfp_mask & __GFP_ZERO);
might_sleep_if(gfp_mask & __GFP_WAIT);
gfp_mask |= __GFP_NOMEMALLOC; /* don't allocate emergency reserves */
@@ -224,28 +221,45 @@ repeat_alloc:
if (likely(pool->curr_nr)) {
element = remove_element(pool);
spin_unlock_irqrestore(&pool->lock, flags);
+ /* paired with rmb in mempool_free(), read comment there */
+ smp_wmb();
+ /*
+ * Update the allocation stack trace as this is more useful
+ * for debugging.
+ */
+ kmemleak_update_trace(element);
return element;
}
- spin_unlock_irqrestore(&pool->lock, flags);
- /* We must not sleep in the GFP_ATOMIC case */
- if (!(gfp_mask & __GFP_WAIT))
+ /*
+ * We use gfp mask w/o __GFP_WAIT or IO for the first round. If
+ * alloc failed with that and @pool was empty, retry immediately.
+ */
+ if (gfp_temp != gfp_mask) {
+ spin_unlock_irqrestore(&pool->lock, flags);
+ gfp_temp = gfp_mask;
+ goto repeat_alloc;
+ }
+
+ /* We must not sleep if !__GFP_WAIT */
+ if (!(gfp_mask & __GFP_WAIT)) {
+ spin_unlock_irqrestore(&pool->lock, flags);
return NULL;
+ }
- /* Now start performing page reclaim */
- gfp_temp = gfp_mask;
+ /* Let's wait for someone else to return an element to @pool */
init_wait(&wait);
prepare_to_wait(&pool->wait, &wait, TASK_UNINTERRUPTIBLE);
- smp_mb();
- if (!pool->curr_nr) {
- /*
- * FIXME: this should be io_schedule(). The timeout is there
- * as a workaround for some DM problems in 2.6.18.
- */
- io_schedule_timeout(5*HZ);
- }
- finish_wait(&pool->wait, &wait);
+ spin_unlock_irqrestore(&pool->lock, flags);
+
+ /*
+ * FIXME: this should be io_schedule(). The timeout is there as a
+ * workaround for some DM problems in 2.6.18.
+ */
+ io_schedule_timeout(5*HZ);
+
+ finish_wait(&pool->wait, &wait);
goto repeat_alloc;
}
EXPORT_SYMBOL(mempool_alloc);
@@ -265,10 +279,42 @@ void mempool_free(void *element, mempool_t *pool)
if (unlikely(element == NULL))
return;
- smp_mb();
- if (pool->curr_nr < pool->min_nr) {
+ /*
+ * Paired with the wmb in mempool_alloc(). The preceding read is
+ * for @element and the following @pool->curr_nr. This ensures
+ * that the visible value of @pool->curr_nr is from after the
+ * allocation of @element. This is necessary for fringe cases
+ * where @element was passed to this task without going through
+ * barriers.
+ *
+ * For example, assume @p is %NULL at the beginning and one task
+ * performs "p = mempool_alloc(...);" while another task is doing
+ * "while (!p) cpu_relax(); mempool_free(p, ...);". This function
+ * may end up using curr_nr value which is from before allocation
+ * of @p without the following rmb.
+ */
+ smp_rmb();
+
+ /*
+ * For correctness, we need a test which is guaranteed to trigger
+ * if curr_nr + #allocated == min_nr. Testing curr_nr < min_nr
+ * without locking achieves that and refilling as soon as possible
+ * is desirable.
+ *
+ * Because curr_nr visible here is always a value after the
+ * allocation of @element, any task which decremented curr_nr below
+ * min_nr is guaranteed to see curr_nr < min_nr unless curr_nr gets
+ * incremented to min_nr afterwards. If curr_nr gets incremented
+ * to min_nr after the allocation of @element, the elements
+ * allocated after that are subject to the same guarantee.
+ *
+ * Waiters happen iff curr_nr is 0 and the above guarantee also
+ * ensures that there will be frees which return elements to the
+ * pool waking up the waiters.
+ */
+ if (unlikely(pool->curr_nr < pool->min_nr)) {
spin_lock_irqsave(&pool->lock, flags);
- if (pool->curr_nr < pool->min_nr) {
+ if (likely(pool->curr_nr < pool->min_nr)) {
add_element(pool, element);
spin_unlock_irqrestore(&pool->lock, flags);
wake_up(&pool->wait);