diff options
Diffstat (limited to 'net/sunrpc/sched.c')
| -rw-r--r-- | net/sunrpc/sched.c | 252 |
1 files changed, 174 insertions, 78 deletions
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 3341d896278..c0365c14b85 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -28,6 +28,9 @@ #define RPCDBG_FACILITY RPCDBG_SCHED #endif +#define CREATE_TRACE_POINTS +#include <trace/events/sunrpc.h> + /* * RPC slabs and memory pools */ @@ -95,6 +98,39 @@ __rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task) list_add(&task->u.tk_wait.timer_list, &queue->timer_list.list); } +static void rpc_rotate_queue_owner(struct rpc_wait_queue *queue) +{ + struct list_head *q = &queue->tasks[queue->priority]; + struct rpc_task *task; + + if (!list_empty(q)) { + task = list_first_entry(q, struct rpc_task, u.tk_wait.list); + if (task->tk_owner == queue->owner) + list_move_tail(&task->u.tk_wait.list, q); + } +} + +static void rpc_set_waitqueue_priority(struct rpc_wait_queue *queue, int priority) +{ + if (queue->priority != priority) { + /* Fairness: rotate the list when changing priority */ + rpc_rotate_queue_owner(queue); + queue->priority = priority; + } +} + +static void rpc_set_waitqueue_owner(struct rpc_wait_queue *queue, pid_t pid) +{ + queue->owner = pid; + queue->nr = RPC_BATCH_COUNT; +} + +static void rpc_reset_waitqueue_priority(struct rpc_wait_queue *queue) +{ + rpc_set_waitqueue_priority(queue, queue->maxpriority); + rpc_set_waitqueue_owner(queue, 0); +} + /* * Add new request to a priority queue. */ @@ -106,9 +142,11 @@ static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue, struct rpc_task *t; INIT_LIST_HEAD(&task->u.tk_wait.links); - q = &queue->tasks[queue_priority]; if (unlikely(queue_priority > queue->maxpriority)) - q = &queue->tasks[queue->maxpriority]; + queue_priority = queue->maxpriority; + if (queue_priority > queue->priority) + rpc_set_waitqueue_priority(queue, queue_priority); + q = &queue->tasks[queue_priority]; list_for_each_entry(t, q, u.tk_wait.list) { if (t->tk_owner == task->tk_owner) { list_add_tail(&task->u.tk_wait.list, &t->u.tk_wait.links); @@ -130,7 +168,9 @@ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task, unsigned char queue_priority) { - BUG_ON (RPC_IS_QUEUED(task)); + WARN_ON_ONCE(RPC_IS_QUEUED(task)); + if (RPC_IS_QUEUED(task)) + return; if (RPC_IS_PRIORITY(queue)) __rpc_add_wait_queue_priority(queue, task, queue_priority); @@ -140,6 +180,8 @@ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, list_add_tail(&task->u.tk_wait.list, &queue->tasks[0]); task->tk_waitqueue = queue; queue->qlen++; + /* barrier matches the read in rpc_wake_up_task_queue_locked() */ + smp_wmb(); rpc_set_queued(task); dprintk("RPC: %5u added to queue %p \"%s\"\n", @@ -175,24 +217,6 @@ static void __rpc_remove_wait_queue(struct rpc_wait_queue *queue, struct rpc_tas task->tk_pid, queue, rpc_qname(queue)); } -static inline void rpc_set_waitqueue_priority(struct rpc_wait_queue *queue, int priority) -{ - queue->priority = priority; - queue->count = 1 << (priority * 2); -} - -static inline void rpc_set_waitqueue_owner(struct rpc_wait_queue *queue, pid_t pid) -{ - queue->owner = pid; - queue->nr = RPC_BATCH_COUNT; -} - -static inline void rpc_reset_waitqueue_priority(struct rpc_wait_queue *queue) -{ - rpc_set_waitqueue_priority(queue, queue->maxpriority); - rpc_set_waitqueue_owner(queue, 0); -} - static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qname, unsigned char nr_queues) { int i; @@ -205,9 +229,7 @@ static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const c queue->qlen = 0; setup_timer(&queue->timer_list.timer, __rpc_queue_timer_fn, (unsigned long)queue); INIT_LIST_HEAD(&queue->timer_list.list); -#ifdef RPC_DEBUG - queue->name = qname; -#endif + rpc_assign_waitqueue_name(queue, qname); } void rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qname) @@ -232,11 +254,11 @@ static int rpc_wait_bit_killable(void *word) { if (fatal_signal_pending(current)) return -ERESTARTSYS; - freezable_schedule(); + freezable_schedule_unsafe(); return 0; } -#ifdef RPC_DEBUG +#if defined(RPC_DEBUG) || defined(RPC_TRACEPOINTS) static void rpc_task_set_debuginfo(struct rpc_task *task) { static atomic_t rpc_pid; @@ -251,6 +273,8 @@ static inline void rpc_task_set_debuginfo(struct rpc_task *task) static void rpc_set_active(struct rpc_task *task) { + trace_rpc_task_begin(task->tk_client, task, NULL); + rpc_task_set_debuginfo(task); set_bit(RPC_TASK_ACTIVE, &task->tk_runstate); } @@ -267,6 +291,8 @@ static int rpc_complete_task(struct rpc_task *task) unsigned long flags; int ret; + trace_rpc_task_complete(task->tk_client, task, NULL); + spin_lock_irqsave(&wq->lock, flags); clear_bit(RPC_TASK_ACTIVE, &task->tk_runstate); ret = atomic_dec_and_test(&task->tk_count); @@ -295,13 +321,20 @@ EXPORT_SYMBOL_GPL(__rpc_wait_for_completion_task); /* * Make an RPC task runnable. * - * Note: If the task is ASYNC, this must be called with - * the spinlock held to protect the wait queue operation. + * Note: If the task is ASYNC, and is being made runnable after sitting on an + * rpc_wait_queue, this must be called with the queue spinlock held to protect + * the wait queue operation. + * Note the ordering of rpc_test_and_set_running() and rpc_clear_queued(), + * which is needed to ensure that __rpc_execute() doesn't loop (due to the + * lockless RPC_IS_QUEUED() test) before we've had a chance to test + * the RPC_TASK_RUNNING flag. */ static void rpc_make_runnable(struct rpc_task *task) { + bool need_wakeup = !rpc_test_and_set_running(task); + rpc_clear_queued(task); - if (rpc_test_and_set_running(task)) + if (!need_wakeup) return; if (RPC_IS_ASYNC(task)) { INIT_WORK(&task->u.tk_work, rpc_async_schedule); @@ -324,9 +357,11 @@ static void __rpc_sleep_on_priority(struct rpc_wait_queue *q, dprintk("RPC: %5u sleep_on(queue \"%s\" time %lu)\n", task->tk_pid, rpc_qname(q), jiffies); + trace_rpc_task_sleep(task->tk_client, task, q); + __rpc_add_wait_queue(q, task, queue_priority); - BUG_ON(task->tk_callback != NULL); + WARN_ON_ONCE(task->tk_callback != NULL); task->tk_callback = action; __rpc_add_timer(q, task); } @@ -335,7 +370,12 @@ void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, rpc_action action) { /* We shouldn't ever put an inactive task to sleep */ - BUG_ON(!RPC_IS_ACTIVATED(task)); + WARN_ON_ONCE(!RPC_IS_ACTIVATED(task)); + if (!RPC_IS_ACTIVATED(task)) { + task->tk_status = -EIO; + rpc_put_task_async(task); + return; + } /* * Protect the queue operations. @@ -350,7 +390,12 @@ void rpc_sleep_on_priority(struct rpc_wait_queue *q, struct rpc_task *task, rpc_action action, int priority) { /* We shouldn't ever put an inactive task to sleep */ - BUG_ON(!RPC_IS_ACTIVATED(task)); + WARN_ON_ONCE(!RPC_IS_ACTIVATED(task)); + if (!RPC_IS_ACTIVATED(task)) { + task->tk_status = -EIO; + rpc_put_task_async(task); + return; + } /* * Protect the queue operations. @@ -359,6 +404,7 @@ void rpc_sleep_on_priority(struct rpc_wait_queue *q, struct rpc_task *task, __rpc_sleep_on_priority(q, task, action, priority - RPC_PRIORITY_LOW); spin_unlock_bh(&q->lock); } +EXPORT_SYMBOL_GPL(rpc_sleep_on_priority); /** * __rpc_do_wake_up_task - wake up a single rpc_task @@ -378,6 +424,8 @@ static void __rpc_do_wake_up_task(struct rpc_wait_queue *queue, struct rpc_task return; } + trace_rpc_task_wakeup(task->tk_client, task, queue); + __rpc_remove_wait_queue(queue, task); rpc_make_runnable(task); @@ -390,23 +438,12 @@ static void __rpc_do_wake_up_task(struct rpc_wait_queue *queue, struct rpc_task */ static void rpc_wake_up_task_queue_locked(struct rpc_wait_queue *queue, struct rpc_task *task) { - if (RPC_IS_QUEUED(task) && task->tk_waitqueue == queue) - __rpc_do_wake_up_task(queue, task); -} - -/* - * Tests whether rpc queue is empty - */ -int rpc_queue_empty(struct rpc_wait_queue *queue) -{ - int res; - - spin_lock_bh(&queue->lock); - res = queue->qlen; - spin_unlock_bh(&queue->lock); - return res == 0; + if (RPC_IS_QUEUED(task)) { + smp_rmb(); + if (task->tk_waitqueue == queue) + __rpc_do_wake_up_task(queue, task); + } } -EXPORT_SYMBOL_GPL(rpc_queue_empty); /* * Wake up a task on a specific queue @@ -422,7 +459,7 @@ EXPORT_SYMBOL_GPL(rpc_wake_up_queued_task); /* * Wake up the next task on a priority queue. */ -static struct rpc_task * __rpc_wake_up_next_priority(struct rpc_wait_queue *queue) +static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *queue) { struct list_head *q; struct rpc_task *task; @@ -441,8 +478,7 @@ static struct rpc_task * __rpc_wake_up_next_priority(struct rpc_wait_queue *queu /* * Check if we need to switch queues. */ - if (--queue->count) - goto new_owner; + goto new_owner; } /* @@ -467,30 +503,54 @@ new_queue: new_owner: rpc_set_waitqueue_owner(queue, task->tk_owner); out: - rpc_wake_up_task_queue_locked(queue, task); return task; } +static struct rpc_task *__rpc_find_next_queued(struct rpc_wait_queue *queue) +{ + if (RPC_IS_PRIORITY(queue)) + return __rpc_find_next_queued_priority(queue); + if (!list_empty(&queue->tasks[0])) + return list_first_entry(&queue->tasks[0], struct rpc_task, u.tk_wait.list); + return NULL; +} + /* - * Wake up the next task on the wait queue. + * Wake up the first task on the wait queue. */ -struct rpc_task * rpc_wake_up_next(struct rpc_wait_queue *queue) +struct rpc_task *rpc_wake_up_first(struct rpc_wait_queue *queue, + bool (*func)(struct rpc_task *, void *), void *data) { struct rpc_task *task = NULL; - dprintk("RPC: wake_up_next(%p \"%s\")\n", + dprintk("RPC: wake_up_first(%p \"%s\")\n", queue, rpc_qname(queue)); spin_lock_bh(&queue->lock); - if (RPC_IS_PRIORITY(queue)) - task = __rpc_wake_up_next_priority(queue); - else { - task_for_first(task, &queue->tasks[0]) + task = __rpc_find_next_queued(queue); + if (task != NULL) { + if (func(task, data)) rpc_wake_up_task_queue_locked(queue, task); + else + task = NULL; } spin_unlock_bh(&queue->lock); return task; } +EXPORT_SYMBOL_GPL(rpc_wake_up_first); + +static bool rpc_wake_up_next_func(struct rpc_task *task, void *data) +{ + return true; +} + +/* + * Wake up the next task on the wait queue. +*/ +struct rpc_task *rpc_wake_up_next(struct rpc_wait_queue *queue) +{ + return rpc_wake_up_first(queue, rpc_wake_up_next_func, NULL); +} EXPORT_SYMBOL_GPL(rpc_wake_up_next); /** @@ -501,14 +561,18 @@ EXPORT_SYMBOL_GPL(rpc_wake_up_next); */ void rpc_wake_up(struct rpc_wait_queue *queue) { - struct rpc_task *task, *next; struct list_head *head; spin_lock_bh(&queue->lock); head = &queue->tasks[queue->maxpriority]; for (;;) { - list_for_each_entry_safe(task, next, head, u.tk_wait.list) + while (!list_empty(head)) { + struct rpc_task *task; + task = list_first_entry(head, + struct rpc_task, + u.tk_wait.list); rpc_wake_up_task_queue_locked(queue, task); + } if (head == &queue->tasks[0]) break; head--; @@ -526,13 +590,16 @@ EXPORT_SYMBOL_GPL(rpc_wake_up); */ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status) { - struct rpc_task *task, *next; struct list_head *head; spin_lock_bh(&queue->lock); head = &queue->tasks[queue->maxpriority]; for (;;) { - list_for_each_entry_safe(task, next, head, u.tk_wait.list) { + while (!list_empty(head)) { + struct rpc_task *task; + task = list_first_entry(head, + struct rpc_task, + u.tk_wait.list); task->tk_status = status; rpc_wake_up_task_queue_locked(queue, task); } @@ -570,7 +637,8 @@ static void __rpc_queue_timer_fn(unsigned long ptr) static void __rpc_atrun(struct rpc_task *task) { - task->tk_status = 0; + if (task->tk_status == -ETIMEDOUT) + task->tk_status = 0; } /* @@ -656,7 +724,9 @@ static void __rpc_execute(struct rpc_task *task) dprintk("RPC: %5u __rpc_execute flags=0x%x\n", task->tk_pid, task->tk_flags); - BUG_ON(RPC_IS_QUEUED(task)); + WARN_ON_ONCE(RPC_IS_QUEUED(task)); + if (RPC_IS_QUEUED(task)) + return; for (;;) { void (*do_action)(struct rpc_task *); @@ -677,6 +747,7 @@ static void __rpc_execute(struct rpc_task *task) if (do_action == NULL) break; } + trace_rpc_task_run_action(task->tk_client, task, task->tk_action); do_action(task); /* @@ -720,7 +791,6 @@ static void __rpc_execute(struct rpc_task *task) task->tk_flags |= RPC_TASK_KILLED; rpc_exit(task, -ERESTARTSYS); } - rpc_set_running(task); dprintk("RPC: %5u sync task resuming\n", task->tk_pid); } @@ -741,15 +811,19 @@ static void __rpc_execute(struct rpc_task *task) */ void rpc_execute(struct rpc_task *task) { + bool is_async = RPC_IS_ASYNC(task); + rpc_set_active(task); rpc_make_runnable(task); - if (!RPC_IS_ASYNC(task)) + if (!is_async) __rpc_execute(task); } static void rpc_async_schedule(struct work_struct *work) { + current->flags |= PF_FSTRANS; __rpc_execute(container_of(work, struct rpc_task, u.tk_work)); + current->flags &= ~PF_FSTRANS; } /** @@ -758,7 +832,8 @@ static void rpc_async_schedule(struct work_struct *work) * @size: requested byte size * * To prevent rpciod from hanging, this allocator never sleeps, - * returning NULL if the request cannot be serviced immediately. + * returning NULL and suppressing warning if the request cannot be serviced + * immediately. * The caller can arrange to sleep in a way that is safe for rpciod. * * Most requests are 'small' (under 2KiB) and can be serviced from a @@ -771,7 +846,10 @@ static void rpc_async_schedule(struct work_struct *work) void *rpc_malloc(struct rpc_task *task, size_t size) { struct rpc_buffer *buf; - gfp_t gfp = RPC_IS_SWAPPER(task) ? GFP_ATOMIC : GFP_NOWAIT; + gfp_t gfp = GFP_NOWAIT | __GFP_NOWARN; + + if (RPC_IS_SWAPPER(task)) + gfp |= __GFP_MEMALLOC; size += sizeof(struct rpc_buffer); if (size <= RPC_BUFFER_MAXSIZE) @@ -845,7 +923,7 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta static struct rpc_task * rpc_alloc_task(void) { - return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOFS); + return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOIO); } /* @@ -872,16 +950,35 @@ struct rpc_task *rpc_new_task(const struct rpc_task_setup *setup_data) return task; } +/* + * rpc_free_task - release rpc task and perform cleanups + * + * Note that we free up the rpc_task _after_ rpc_release_calldata() + * in order to work around a workqueue dependency issue. + * + * Tejun Heo states: + * "Workqueue currently considers two work items to be the same if they're + * on the same address and won't execute them concurrently - ie. it + * makes a work item which is queued again while being executed wait + * for the previous execution to complete. + * + * If a work function frees the work item, and then waits for an event + * which should be performed by another work item and *that* work item + * recycles the freed work item, it can create a false dependency loop. + * There really is no reliable way to detect this short of verifying + * every memory free." + * + */ static void rpc_free_task(struct rpc_task *task) { - const struct rpc_call_ops *tk_ops = task->tk_ops; - void *calldata = task->tk_calldata; + unsigned short tk_flags = task->tk_flags; + + rpc_release_calldata(task->tk_ops, task->tk_calldata); - if (task->tk_flags & RPC_TASK_DYNAMIC) { + if (tk_flags & RPC_TASK_DYNAMIC) { dprintk("RPC: %5u freeing task\n", task->tk_pid); mempool_free(task, rpc_task_mempool); } - rpc_release_calldata(tk_ops, calldata); } static void rpc_async_release(struct work_struct *work) @@ -891,8 +988,7 @@ static void rpc_async_release(struct work_struct *work) static void rpc_release_resources_task(struct rpc_task *task) { - if (task->tk_rqstp) - xprt_release(task); + xprt_release(task); if (task->tk_msg.rpc_cred) { put_rpccred(task->tk_msg.rpc_cred); task->tk_msg.rpc_cred = NULL; @@ -934,7 +1030,7 @@ static void rpc_release_task(struct rpc_task *task) { dprintk("RPC: %5u release task\n", task->tk_pid); - BUG_ON (RPC_IS_QUEUED(task)); + WARN_ON_ONCE(RPC_IS_QUEUED(task)); rpc_release_resources_task(task); @@ -975,7 +1071,7 @@ static int rpciod_start(void) * Create the rpciod thread and wait for it to start. */ dprintk("RPC: creating workqueue rpciod\n"); - wq = alloc_workqueue("rpciod", WQ_MEM_RECLAIM, 0); + wq = alloc_workqueue("rpciod", WQ_MEM_RECLAIM, 1); rpciod_workqueue = wq; return rpciod_workqueue != NULL; } |
