aboutsummaryrefslogtreecommitdiff
path: root/mm/mempool.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/mempool.c')
-rw-r--r--mm/mempool.c104
1 files changed, 70 insertions, 34 deletions
diff --git a/mm/mempool.c b/mm/mempool.c
index e73641b79bb..d9049811f35 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -27,7 +27,15 @@ static void *remove_element(mempool_t *pool)
return pool->elements[--pool->curr_nr];
}
-static void free_pool(mempool_t *pool)
+/**
+ * mempool_destroy - deallocate a memory pool
+ * @pool: pointer to the memory pool which was allocated via
+ * mempool_create().
+ *
+ * Free all reserved elements in @pool and @pool itself. This function
+ * only sleeps if the free_fn() function sleeps.
+ */
+void mempool_destroy(mempool_t *pool)
{
while (pool->curr_nr) {
void *element = remove_element(pool);
@@ -36,6 +44,7 @@ static void free_pool(mempool_t *pool)
kfree(pool->elements);
kfree(pool);
}
+EXPORT_SYMBOL(mempool_destroy);
/**
* mempool_create - create a memory pool
@@ -86,7 +95,7 @@ mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn,
element = pool->alloc(GFP_KERNEL, pool->pool_data);
if (unlikely(!element)) {
- free_pool(pool);
+ mempool_destroy(pool);
return NULL;
}
add_element(pool, element);
@@ -172,23 +181,6 @@ out:
EXPORT_SYMBOL(mempool_resize);
/**
- * mempool_destroy - deallocate a memory pool
- * @pool: pointer to the memory pool which was allocated via
- * mempool_create().
- *
- * this function only sleeps if the free_fn() function sleeps. The caller
- * has to guarantee that all elements have been returned to the pool (ie:
- * freed) prior to calling mempool_destroy().
- */
-void mempool_destroy(mempool_t *pool)
-{
- /* Check for outstanding elements */
- BUG_ON(pool->curr_nr != pool->min_nr);
- free_pool(pool);
-}
-EXPORT_SYMBOL(mempool_destroy);
-
-/**
* mempool_alloc - allocate an element from a specific memory pool
* @pool: pointer to the memory pool which was allocated via
* mempool_create().
@@ -224,28 +216,40 @@ repeat_alloc:
if (likely(pool->curr_nr)) {
element = remove_element(pool);
spin_unlock_irqrestore(&pool->lock, flags);
+ /* paired with rmb in mempool_free(), read comment there */
+ smp_wmb();
return element;
}
- spin_unlock_irqrestore(&pool->lock, flags);
- /* We must not sleep in the GFP_ATOMIC case */
- if (!(gfp_mask & __GFP_WAIT))
+ /*
+ * We use gfp mask w/o __GFP_WAIT or IO for the first round. If
+ * alloc failed with that and @pool was empty, retry immediately.
+ */
+ if (gfp_temp != gfp_mask) {
+ spin_unlock_irqrestore(&pool->lock, flags);
+ gfp_temp = gfp_mask;
+ goto repeat_alloc;
+ }
+
+ /* We must not sleep if !__GFP_WAIT */
+ if (!(gfp_mask & __GFP_WAIT)) {
+ spin_unlock_irqrestore(&pool->lock, flags);
return NULL;
+ }
- /* Now start performing page reclaim */
- gfp_temp = gfp_mask;
+ /* Let's wait for someone else to return an element to @pool */
init_wait(&wait);
prepare_to_wait(&pool->wait, &wait, TASK_UNINTERRUPTIBLE);
- smp_mb();
- if (!pool->curr_nr) {
- /*
- * FIXME: this should be io_schedule(). The timeout is there
- * as a workaround for some DM problems in 2.6.18.
- */
- io_schedule_timeout(5*HZ);
- }
- finish_wait(&pool->wait, &wait);
+ spin_unlock_irqrestore(&pool->lock, flags);
+
+ /*
+ * FIXME: this should be io_schedule(). The timeout is there as a
+ * workaround for some DM problems in 2.6.18.
+ */
+ io_schedule_timeout(5*HZ);
+
+ finish_wait(&pool->wait, &wait);
goto repeat_alloc;
}
EXPORT_SYMBOL(mempool_alloc);
@@ -265,7 +269,39 @@ void mempool_free(void *element, mempool_t *pool)
if (unlikely(element == NULL))
return;
- smp_mb();
+ /*
+ * Paired with the wmb in mempool_alloc(). The preceding read is
+ * for @element and the following @pool->curr_nr. This ensures
+ * that the visible value of @pool->curr_nr is from after the
+ * allocation of @element. This is necessary for fringe cases
+ * where @element was passed to this task without going through
+ * barriers.
+ *
+ * For example, assume @p is %NULL at the beginning and one task
+ * performs "p = mempool_alloc(...);" while another task is doing
+ * "while (!p) cpu_relax(); mempool_free(p, ...);". This function
+ * may end up using curr_nr value which is from before allocation
+ * of @p without the following rmb.
+ */
+ smp_rmb();
+
+ /*
+ * For correctness, we need a test which is guaranteed to trigger
+ * if curr_nr + #allocated == min_nr. Testing curr_nr < min_nr
+ * without locking achieves that and refilling as soon as possible
+ * is desirable.
+ *
+ * Because curr_nr visible here is always a value after the
+ * allocation of @element, any task which decremented curr_nr below
+ * min_nr is guaranteed to see curr_nr < min_nr unless curr_nr gets
+ * incremented to min_nr afterwards. If curr_nr gets incremented
+ * to min_nr after the allocation of @element, the elements
+ * allocated after that are subject to the same guarantee.
+ *
+ * Waiters happen iff curr_nr is 0 and the above guarantee also
+ * ensures that there will be frees which return elements to the
+ * pool waking up the waiters.
+ */
if (pool->curr_nr < pool->min_nr) {
spin_lock_irqsave(&pool->lock, flags);
if (pool->curr_nr < pool->min_nr) {