aboutsummaryrefslogtreecommitdiff
path: root/net/sunrpc/sched.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/sunrpc/sched.c')
-rw-r--r--net/sunrpc/sched.c441
1 files changed, 302 insertions, 139 deletions
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 243fc09b164..c0365c14b85 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -18,6 +18,7 @@
#include <linux/smp.h>
#include <linux/spinlock.h>
#include <linux/mutex.h>
+#include <linux/freezer.h>
#include <linux/sunrpc/clnt.h>
@@ -27,6 +28,9 @@
#define RPCDBG_FACILITY RPCDBG_SCHED
#endif
+#define CREATE_TRACE_POINTS
+#include <trace/events/sunrpc.h>
+
/*
* RPC slabs and memory pools
*/
@@ -94,18 +98,55 @@ __rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task)
list_add(&task->u.tk_wait.timer_list, &queue->timer_list.list);
}
+static void rpc_rotate_queue_owner(struct rpc_wait_queue *queue)
+{
+ struct list_head *q = &queue->tasks[queue->priority];
+ struct rpc_task *task;
+
+ if (!list_empty(q)) {
+ task = list_first_entry(q, struct rpc_task, u.tk_wait.list);
+ if (task->tk_owner == queue->owner)
+ list_move_tail(&task->u.tk_wait.list, q);
+ }
+}
+
+static void rpc_set_waitqueue_priority(struct rpc_wait_queue *queue, int priority)
+{
+ if (queue->priority != priority) {
+ /* Fairness: rotate the list when changing priority */
+ rpc_rotate_queue_owner(queue);
+ queue->priority = priority;
+ }
+}
+
+static void rpc_set_waitqueue_owner(struct rpc_wait_queue *queue, pid_t pid)
+{
+ queue->owner = pid;
+ queue->nr = RPC_BATCH_COUNT;
+}
+
+static void rpc_reset_waitqueue_priority(struct rpc_wait_queue *queue)
+{
+ rpc_set_waitqueue_priority(queue, queue->maxpriority);
+ rpc_set_waitqueue_owner(queue, 0);
+}
+
/*
* Add new request to a priority queue.
*/
-static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue, struct rpc_task *task)
+static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue,
+ struct rpc_task *task,
+ unsigned char queue_priority)
{
struct list_head *q;
struct rpc_task *t;
INIT_LIST_HEAD(&task->u.tk_wait.links);
- q = &queue->tasks[task->tk_priority];
- if (unlikely(task->tk_priority > queue->maxpriority))
- q = &queue->tasks[queue->maxpriority];
+ if (unlikely(queue_priority > queue->maxpriority))
+ queue_priority = queue->maxpriority;
+ if (queue_priority > queue->priority)
+ rpc_set_waitqueue_priority(queue, queue_priority);
+ q = &queue->tasks[queue_priority];
list_for_each_entry(t, q, u.tk_wait.list) {
if (t->tk_owner == task->tk_owner) {
list_add_tail(&task->u.tk_wait.list, &t->u.tk_wait.links);
@@ -123,18 +164,24 @@ static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue, struct r
* improve overall performance.
* Everyone else gets appended to the queue to ensure proper FIFO behavior.
*/
-static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task)
+static void __rpc_add_wait_queue(struct rpc_wait_queue *queue,
+ struct rpc_task *task,
+ unsigned char queue_priority)
{
- BUG_ON (RPC_IS_QUEUED(task));
+ WARN_ON_ONCE(RPC_IS_QUEUED(task));
+ if (RPC_IS_QUEUED(task))
+ return;
if (RPC_IS_PRIORITY(queue))
- __rpc_add_wait_queue_priority(queue, task);
+ __rpc_add_wait_queue_priority(queue, task, queue_priority);
else if (RPC_IS_SWAPPER(task))
list_add(&task->u.tk_wait.list, &queue->tasks[0]);
else
list_add_tail(&task->u.tk_wait.list, &queue->tasks[0]);
task->tk_waitqueue = queue;
queue->qlen++;
+ /* barrier matches the read in rpc_wake_up_task_queue_locked() */
+ smp_wmb();
rpc_set_queued(task);
dprintk("RPC: %5u added to queue %p \"%s\"\n",
@@ -170,24 +217,6 @@ static void __rpc_remove_wait_queue(struct rpc_wait_queue *queue, struct rpc_tas
task->tk_pid, queue, rpc_qname(queue));
}
-static inline void rpc_set_waitqueue_priority(struct rpc_wait_queue *queue, int priority)
-{
- queue->priority = priority;
- queue->count = 1 << (priority * 2);
-}
-
-static inline void rpc_set_waitqueue_owner(struct rpc_wait_queue *queue, pid_t pid)
-{
- queue->owner = pid;
- queue->nr = RPC_BATCH_COUNT;
-}
-
-static inline void rpc_reset_waitqueue_priority(struct rpc_wait_queue *queue)
-{
- rpc_set_waitqueue_priority(queue, queue->maxpriority);
- rpc_set_waitqueue_owner(queue, 0);
-}
-
static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qname, unsigned char nr_queues)
{
int i;
@@ -200,9 +229,7 @@ static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const c
queue->qlen = 0;
setup_timer(&queue->timer_list.timer, __rpc_queue_timer_fn, (unsigned long)queue);
INIT_LIST_HEAD(&queue->timer_list.list);
-#ifdef RPC_DEBUG
- queue->name = qname;
-#endif
+ rpc_assign_waitqueue_name(queue, qname);
}
void rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qname)
@@ -227,11 +254,11 @@ static int rpc_wait_bit_killable(void *word)
{
if (fatal_signal_pending(current))
return -ERESTARTSYS;
- schedule();
+ freezable_schedule_unsafe();
return 0;
}
-#ifdef RPC_DEBUG
+#if defined(RPC_DEBUG) || defined(RPC_TRACEPOINTS)
static void rpc_task_set_debuginfo(struct rpc_task *task)
{
static atomic_t rpc_pid;
@@ -246,29 +273,47 @@ static inline void rpc_task_set_debuginfo(struct rpc_task *task)
static void rpc_set_active(struct rpc_task *task)
{
+ trace_rpc_task_begin(task->tk_client, task, NULL);
+
rpc_task_set_debuginfo(task);
set_bit(RPC_TASK_ACTIVE, &task->tk_runstate);
}
/*
* Mark an RPC call as having completed by clearing the 'active' bit
+ * and then waking up all tasks that were sleeping.
*/
-static void rpc_mark_complete_task(struct rpc_task *task)
+static int rpc_complete_task(struct rpc_task *task)
{
- smp_mb__before_clear_bit();
+ void *m = &task->tk_runstate;
+ wait_queue_head_t *wq = bit_waitqueue(m, RPC_TASK_ACTIVE);
+ struct wait_bit_key k = __WAIT_BIT_KEY_INITIALIZER(m, RPC_TASK_ACTIVE);
+ unsigned long flags;
+ int ret;
+
+ trace_rpc_task_complete(task->tk_client, task, NULL);
+
+ spin_lock_irqsave(&wq->lock, flags);
clear_bit(RPC_TASK_ACTIVE, &task->tk_runstate);
- smp_mb__after_clear_bit();
- wake_up_bit(&task->tk_runstate, RPC_TASK_ACTIVE);
+ ret = atomic_dec_and_test(&task->tk_count);
+ if (waitqueue_active(wq))
+ __wake_up_locked_key(wq, TASK_NORMAL, &k);
+ spin_unlock_irqrestore(&wq->lock, flags);
+ return ret;
}
/*
* Allow callers to wait for completion of an RPC call
+ *
+ * Note the use of out_of_line_wait_on_bit() rather than wait_on_bit()
+ * to enforce taking of the wq->lock and hence avoid races with
+ * rpc_complete_task().
*/
int __rpc_wait_for_completion_task(struct rpc_task *task, int (*action)(void *))
{
if (action == NULL)
action = rpc_wait_bit_killable;
- return wait_on_bit(&task->tk_runstate, RPC_TASK_ACTIVE,
+ return out_of_line_wait_on_bit(&task->tk_runstate, RPC_TASK_ACTIVE,
action, TASK_KILLABLE);
}
EXPORT_SYMBOL_GPL(__rpc_wait_for_completion_task);
@@ -276,24 +321,24 @@ EXPORT_SYMBOL_GPL(__rpc_wait_for_completion_task);
/*
* Make an RPC task runnable.
*
- * Note: If the task is ASYNC, this must be called with
- * the spinlock held to protect the wait queue operation.
+ * Note: If the task is ASYNC, and is being made runnable after sitting on an
+ * rpc_wait_queue, this must be called with the queue spinlock held to protect
+ * the wait queue operation.
+ * Note the ordering of rpc_test_and_set_running() and rpc_clear_queued(),
+ * which is needed to ensure that __rpc_execute() doesn't loop (due to the
+ * lockless RPC_IS_QUEUED() test) before we've had a chance to test
+ * the RPC_TASK_RUNNING flag.
*/
static void rpc_make_runnable(struct rpc_task *task)
{
+ bool need_wakeup = !rpc_test_and_set_running(task);
+
rpc_clear_queued(task);
- if (rpc_test_and_set_running(task))
+ if (!need_wakeup)
return;
if (RPC_IS_ASYNC(task)) {
- int status;
-
INIT_WORK(&task->u.tk_work, rpc_async_schedule);
- status = queue_work(rpciod_workqueue, &task->u.tk_work);
- if (status < 0) {
- printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status);
- task->tk_status = status;
- return;
- }
+ queue_work(rpciod_workqueue, &task->u.tk_work);
} else
wake_up_bit(&task->tk_runstate, RPC_TASK_QUEUED);
}
@@ -304,15 +349,19 @@ static void rpc_make_runnable(struct rpc_task *task)
* NB: An RPC task will only receive interrupt-driven events as long
* as it's on a wait queue.
*/
-static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
- rpc_action action)
+static void __rpc_sleep_on_priority(struct rpc_wait_queue *q,
+ struct rpc_task *task,
+ rpc_action action,
+ unsigned char queue_priority)
{
dprintk("RPC: %5u sleep_on(queue \"%s\" time %lu)\n",
task->tk_pid, rpc_qname(q), jiffies);
- __rpc_add_wait_queue(q, task);
+ trace_rpc_task_sleep(task->tk_client, task, q);
- BUG_ON(task->tk_callback != NULL);
+ __rpc_add_wait_queue(q, task, queue_priority);
+
+ WARN_ON_ONCE(task->tk_callback != NULL);
task->tk_callback = action;
__rpc_add_timer(q, task);
}
@@ -321,17 +370,42 @@ void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
rpc_action action)
{
/* We shouldn't ever put an inactive task to sleep */
- BUG_ON(!RPC_IS_ACTIVATED(task));
+ WARN_ON_ONCE(!RPC_IS_ACTIVATED(task));
+ if (!RPC_IS_ACTIVATED(task)) {
+ task->tk_status = -EIO;
+ rpc_put_task_async(task);
+ return;
+ }
/*
* Protect the queue operations.
*/
spin_lock_bh(&q->lock);
- __rpc_sleep_on(q, task, action);
+ __rpc_sleep_on_priority(q, task, action, task->tk_priority);
spin_unlock_bh(&q->lock);
}
EXPORT_SYMBOL_GPL(rpc_sleep_on);
+void rpc_sleep_on_priority(struct rpc_wait_queue *q, struct rpc_task *task,
+ rpc_action action, int priority)
+{
+ /* We shouldn't ever put an inactive task to sleep */
+ WARN_ON_ONCE(!RPC_IS_ACTIVATED(task));
+ if (!RPC_IS_ACTIVATED(task)) {
+ task->tk_status = -EIO;
+ rpc_put_task_async(task);
+ return;
+ }
+
+ /*
+ * Protect the queue operations.
+ */
+ spin_lock_bh(&q->lock);
+ __rpc_sleep_on_priority(q, task, action, priority - RPC_PRIORITY_LOW);
+ spin_unlock_bh(&q->lock);
+}
+EXPORT_SYMBOL_GPL(rpc_sleep_on_priority);
+
/**
* __rpc_do_wake_up_task - wake up a single rpc_task
* @queue: wait queue
@@ -350,6 +424,8 @@ static void __rpc_do_wake_up_task(struct rpc_wait_queue *queue, struct rpc_task
return;
}
+ trace_rpc_task_wakeup(task->tk_client, task, queue);
+
__rpc_remove_wait_queue(queue, task);
rpc_make_runnable(task);
@@ -362,23 +438,12 @@ static void __rpc_do_wake_up_task(struct rpc_wait_queue *queue, struct rpc_task
*/
static void rpc_wake_up_task_queue_locked(struct rpc_wait_queue *queue, struct rpc_task *task)
{
- if (RPC_IS_QUEUED(task) && task->tk_waitqueue == queue)
- __rpc_do_wake_up_task(queue, task);
-}
-
-/*
- * Tests whether rpc queue is empty
- */
-int rpc_queue_empty(struct rpc_wait_queue *queue)
-{
- int res;
-
- spin_lock_bh(&queue->lock);
- res = queue->qlen;
- spin_unlock_bh(&queue->lock);
- return res == 0;
+ if (RPC_IS_QUEUED(task)) {
+ smp_rmb();
+ if (task->tk_waitqueue == queue)
+ __rpc_do_wake_up_task(queue, task);
+ }
}
-EXPORT_SYMBOL_GPL(rpc_queue_empty);
/*
* Wake up a task on a specific queue
@@ -394,7 +459,7 @@ EXPORT_SYMBOL_GPL(rpc_wake_up_queued_task);
/*
* Wake up the next task on a priority queue.
*/
-static struct rpc_task * __rpc_wake_up_next_priority(struct rpc_wait_queue *queue)
+static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *queue)
{
struct list_head *q;
struct rpc_task *task;
@@ -413,8 +478,7 @@ static struct rpc_task * __rpc_wake_up_next_priority(struct rpc_wait_queue *queu
/*
* Check if we need to switch queues.
*/
- if (--queue->count)
- goto new_owner;
+ goto new_owner;
}
/*
@@ -439,30 +503,54 @@ new_queue:
new_owner:
rpc_set_waitqueue_owner(queue, task->tk_owner);
out:
- rpc_wake_up_task_queue_locked(queue, task);
return task;
}
+static struct rpc_task *__rpc_find_next_queued(struct rpc_wait_queue *queue)
+{
+ if (RPC_IS_PRIORITY(queue))
+ return __rpc_find_next_queued_priority(queue);
+ if (!list_empty(&queue->tasks[0]))
+ return list_first_entry(&queue->tasks[0], struct rpc_task, u.tk_wait.list);
+ return NULL;
+}
+
/*
- * Wake up the next task on the wait queue.
+ * Wake up the first task on the wait queue.
*/
-struct rpc_task * rpc_wake_up_next(struct rpc_wait_queue *queue)
+struct rpc_task *rpc_wake_up_first(struct rpc_wait_queue *queue,
+ bool (*func)(struct rpc_task *, void *), void *data)
{
struct rpc_task *task = NULL;
- dprintk("RPC: wake_up_next(%p \"%s\")\n",
+ dprintk("RPC: wake_up_first(%p \"%s\")\n",
queue, rpc_qname(queue));
spin_lock_bh(&queue->lock);
- if (RPC_IS_PRIORITY(queue))
- task = __rpc_wake_up_next_priority(queue);
- else {
- task_for_first(task, &queue->tasks[0])
+ task = __rpc_find_next_queued(queue);
+ if (task != NULL) {
+ if (func(task, data))
rpc_wake_up_task_queue_locked(queue, task);
+ else
+ task = NULL;
}
spin_unlock_bh(&queue->lock);
return task;
}
+EXPORT_SYMBOL_GPL(rpc_wake_up_first);
+
+static bool rpc_wake_up_next_func(struct rpc_task *task, void *data)
+{
+ return true;
+}
+
+/*
+ * Wake up the next task on the wait queue.
+*/
+struct rpc_task *rpc_wake_up_next(struct rpc_wait_queue *queue)
+{
+ return rpc_wake_up_first(queue, rpc_wake_up_next_func, NULL);
+}
EXPORT_SYMBOL_GPL(rpc_wake_up_next);
/**
@@ -473,14 +561,18 @@ EXPORT_SYMBOL_GPL(rpc_wake_up_next);
*/
void rpc_wake_up(struct rpc_wait_queue *queue)
{
- struct rpc_task *task, *next;
struct list_head *head;
spin_lock_bh(&queue->lock);
head = &queue->tasks[queue->maxpriority];
for (;;) {
- list_for_each_entry_safe(task, next, head, u.tk_wait.list)
+ while (!list_empty(head)) {
+ struct rpc_task *task;
+ task = list_first_entry(head,
+ struct rpc_task,
+ u.tk_wait.list);
rpc_wake_up_task_queue_locked(queue, task);
+ }
if (head == &queue->tasks[0])
break;
head--;
@@ -498,13 +590,16 @@ EXPORT_SYMBOL_GPL(rpc_wake_up);
*/
void rpc_wake_up_status(struct rpc_wait_queue *queue, int status)
{
- struct rpc_task *task, *next;
struct list_head *head;
spin_lock_bh(&queue->lock);
head = &queue->tasks[queue->maxpriority];
for (;;) {
- list_for_each_entry_safe(task, next, head, u.tk_wait.list) {
+ while (!list_empty(head)) {
+ struct rpc_task *task;
+ task = list_first_entry(head,
+ struct rpc_task,
+ u.tk_wait.list);
task->tk_status = status;
rpc_wake_up_task_queue_locked(queue, task);
}
@@ -542,7 +637,8 @@ static void __rpc_queue_timer_fn(unsigned long ptr)
static void __rpc_atrun(struct rpc_task *task)
{
- task->tk_status = 0;
+ if (task->tk_status == -ETIMEDOUT)
+ task->tk_status = 0;
}
/*
@@ -563,6 +659,27 @@ void rpc_prepare_task(struct rpc_task *task)
task->tk_ops->rpc_call_prepare(task, task->tk_calldata);
}
+static void
+rpc_init_task_statistics(struct rpc_task *task)
+{
+ /* Initialize retry counters */
+ task->tk_garb_retry = 2;
+ task->tk_cred_retry = 2;
+ task->tk_rebind_retry = 2;
+
+ /* starting timestamp */
+ task->tk_start = ktime_get();
+}
+
+static void
+rpc_reset_task_statistics(struct rpc_task *task)
+{
+ task->tk_timeouts = 0;
+ task->tk_flags &= ~(RPC_CALL_MAJORSEEN|RPC_TASK_KILLED|RPC_TASK_SENT);
+
+ rpc_init_task_statistics(task);
+}
+
/*
* Helper that calls task->tk_ops->rpc_call_done if it exists
*/
@@ -575,6 +692,7 @@ void rpc_exit_task(struct rpc_task *task)
WARN_ON(RPC_ASSASSINATED(task));
/* Always release the RPC slot and buffer memory */
xprt_release(task);
+ rpc_reset_task_statistics(task);
}
}
}
@@ -606,35 +724,31 @@ static void __rpc_execute(struct rpc_task *task)
dprintk("RPC: %5u __rpc_execute flags=0x%x\n",
task->tk_pid, task->tk_flags);
- BUG_ON(RPC_IS_QUEUED(task));
+ WARN_ON_ONCE(RPC_IS_QUEUED(task));
+ if (RPC_IS_QUEUED(task))
+ return;
for (;;) {
+ void (*do_action)(struct rpc_task *);
/*
- * Execute any pending callback.
+ * Execute any pending callback first.
*/
- if (task->tk_callback) {
- void (*save_callback)(struct rpc_task *);
-
+ do_action = task->tk_callback;
+ task->tk_callback = NULL;
+ if (do_action == NULL) {
/*
- * We set tk_callback to NULL before calling it,
- * in case it sets the tk_callback field itself:
+ * Perform the next FSM step.
+ * tk_action may be NULL if the task has been killed.
+ * In particular, note that rpc_killall_tasks may
+ * do this at any time, so beware when dereferencing.
*/
- save_callback = task->tk_callback;
- task->tk_callback = NULL;
- save_callback(task);
- }
-
- /*
- * Perform the next FSM step.
- * tk_action may be NULL when the task has been killed
- * by someone else.
- */
- if (!RPC_IS_QUEUED(task)) {
- if (task->tk_action == NULL)
+ do_action = task->tk_action;
+ if (do_action == NULL)
break;
- task->tk_action(task);
}
+ trace_rpc_task_run_action(task->tk_client, task, task->tk_action);
+ do_action(task);
/*
* Lockless check for whether task is sleeping or not.
@@ -677,7 +791,6 @@ static void __rpc_execute(struct rpc_task *task)
task->tk_flags |= RPC_TASK_KILLED;
rpc_exit(task, -ERESTARTSYS);
}
- rpc_set_running(task);
dprintk("RPC: %5u sync task resuming\n", task->tk_pid);
}
@@ -698,15 +811,19 @@ static void __rpc_execute(struct rpc_task *task)
*/
void rpc_execute(struct rpc_task *task)
{
+ bool is_async = RPC_IS_ASYNC(task);
+
rpc_set_active(task);
rpc_make_runnable(task);
- if (!RPC_IS_ASYNC(task))
+ if (!is_async)
__rpc_execute(task);
}
static void rpc_async_schedule(struct work_struct *work)
{
+ current->flags |= PF_FSTRANS;
__rpc_execute(container_of(work, struct rpc_task, u.tk_work));
+ current->flags &= ~PF_FSTRANS;
}
/**
@@ -715,7 +832,8 @@ static void rpc_async_schedule(struct work_struct *work)
* @size: requested byte size
*
* To prevent rpciod from hanging, this allocator never sleeps,
- * returning NULL if the request cannot be serviced immediately.
+ * returning NULL and suppressing warning if the request cannot be serviced
+ * immediately.
* The caller can arrange to sleep in a way that is safe for rpciod.
*
* Most requests are 'small' (under 2KiB) and can be serviced from a
@@ -728,7 +846,10 @@ static void rpc_async_schedule(struct work_struct *work)
void *rpc_malloc(struct rpc_task *task, size_t size)
{
struct rpc_buffer *buf;
- gfp_t gfp = RPC_IS_SWAPPER(task) ? GFP_ATOMIC : GFP_NOWAIT;
+ gfp_t gfp = GFP_NOWAIT | __GFP_NOWARN;
+
+ if (RPC_IS_SWAPPER(task))
+ gfp |= __GFP_MEMALLOC;
size += sizeof(struct rpc_buffer);
if (size <= RPC_BUFFER_MAXSIZE)
@@ -784,10 +905,6 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta
task->tk_calldata = task_setup_data->callback_data;
INIT_LIST_HEAD(&task->tk_task);
- /* Initialize retry counters */
- task->tk_garb_retry = 2;
- task->tk_cred_retry = 2;
-
task->tk_priority = task_setup_data->priority - RPC_PRIORITY_LOW;
task->tk_owner = current->tgid;
@@ -797,8 +914,7 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta
if (task->tk_ops->rpc_call_prepare != NULL)
task->tk_action = rpc_prepare_task;
- /* starting timestamp */
- task->tk_start = ktime_get();
+ rpc_init_task_statistics(task);
dprintk("RPC: new task initialized, procpid %u\n",
task_pid_nr(current));
@@ -807,7 +923,7 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta
static struct rpc_task *
rpc_alloc_task(void)
{
- return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOFS);
+ return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOIO);
}
/*
@@ -829,27 +945,40 @@ struct rpc_task *rpc_new_task(const struct rpc_task_setup *setup_data)
}
rpc_init_task(task, setup_data);
- if (task->tk_status < 0) {
- int err = task->tk_status;
- rpc_put_task(task);
- return ERR_PTR(err);
- }
-
task->tk_flags |= flags;
dprintk("RPC: allocated task %p\n", task);
return task;
}
+/*
+ * rpc_free_task - release rpc task and perform cleanups
+ *
+ * Note that we free up the rpc_task _after_ rpc_release_calldata()
+ * in order to work around a workqueue dependency issue.
+ *
+ * Tejun Heo states:
+ * "Workqueue currently considers two work items to be the same if they're
+ * on the same address and won't execute them concurrently - ie. it
+ * makes a work item which is queued again while being executed wait
+ * for the previous execution to complete.
+ *
+ * If a work function frees the work item, and then waits for an event
+ * which should be performed by another work item and *that* work item
+ * recycles the freed work item, it can create a false dependency loop.
+ * There really is no reliable way to detect this short of verifying
+ * every memory free."
+ *
+ */
static void rpc_free_task(struct rpc_task *task)
{
- const struct rpc_call_ops *tk_ops = task->tk_ops;
- void *calldata = task->tk_calldata;
+ unsigned short tk_flags = task->tk_flags;
+
+ rpc_release_calldata(task->tk_ops, task->tk_calldata);
- if (task->tk_flags & RPC_TASK_DYNAMIC) {
+ if (tk_flags & RPC_TASK_DYNAMIC) {
dprintk("RPC: %5u freeing task\n", task->tk_pid);
mempool_free(task, rpc_task_mempool);
}
- rpc_release_calldata(tk_ops, calldata);
}
static void rpc_async_release(struct work_struct *work)
@@ -857,34 +986,68 @@ static void rpc_async_release(struct work_struct *work)
rpc_free_task(container_of(work, struct rpc_task, u.tk_work));
}
-void rpc_put_task(struct rpc_task *task)
+static void rpc_release_resources_task(struct rpc_task *task)
{
- if (!atomic_dec_and_test(&task->tk_count))
- return;
- /* Release resources */
- if (task->tk_rqstp)
- xprt_release(task);
- if (task->tk_msg.rpc_cred)
+ xprt_release(task);
+ if (task->tk_msg.rpc_cred) {
put_rpccred(task->tk_msg.rpc_cred);
+ task->tk_msg.rpc_cred = NULL;
+ }
rpc_task_release_client(task);
- if (task->tk_workqueue != NULL) {
+}
+
+static void rpc_final_put_task(struct rpc_task *task,
+ struct workqueue_struct *q)
+{
+ if (q != NULL) {
INIT_WORK(&task->u.tk_work, rpc_async_release);
- queue_work(task->tk_workqueue, &task->u.tk_work);
+ queue_work(q, &task->u.tk_work);
} else
rpc_free_task(task);
}
+
+static void rpc_do_put_task(struct rpc_task *task, struct workqueue_struct *q)
+{
+ if (atomic_dec_and_test(&task->tk_count)) {
+ rpc_release_resources_task(task);
+ rpc_final_put_task(task, q);
+ }
+}
+
+void rpc_put_task(struct rpc_task *task)
+{
+ rpc_do_put_task(task, NULL);
+}
EXPORT_SYMBOL_GPL(rpc_put_task);
+void rpc_put_task_async(struct rpc_task *task)
+{
+ rpc_do_put_task(task, task->tk_workqueue);
+}
+EXPORT_SYMBOL_GPL(rpc_put_task_async);
+
static void rpc_release_task(struct rpc_task *task)
{
dprintk("RPC: %5u release task\n", task->tk_pid);
- BUG_ON (RPC_IS_QUEUED(task));
+ WARN_ON_ONCE(RPC_IS_QUEUED(task));
- /* Wake up anyone who is waiting for task completion */
- rpc_mark_complete_task(task);
+ rpc_release_resources_task(task);
- rpc_put_task(task);
+ /*
+ * Note: at this point we have been removed from rpc_clnt->cl_tasks,
+ * so it should be safe to use task->tk_count as a test for whether
+ * or not any other processes still hold references to our rpc_task.
+ */
+ if (atomic_read(&task->tk_count) != 1 + !RPC_IS_ASYNC(task)) {
+ /* Wake up anyone who may be waiting for task completion */
+ if (!rpc_complete_task(task))
+ return;
+ } else {
+ if (!atomic_dec_and_test(&task->tk_count))
+ return;
+ }
+ rpc_final_put_task(task, task->tk_workqueue);
}
int rpciod_up(void)
@@ -908,7 +1071,7 @@ static int rpciod_start(void)
* Create the rpciod thread and wait for it to start.
*/
dprintk("RPC: creating workqueue rpciod\n");
- wq = alloc_workqueue("rpciod", WQ_RESCUER, 0);
+ wq = alloc_workqueue("rpciod", WQ_MEM_RECLAIM, 1);
rpciod_workqueue = wq;
return rpciod_workqueue != NULL;
}