diff options
Diffstat (limited to 'net/sunrpc/sched.c')
| -rw-r--r-- | net/sunrpc/sched.c | 172 |
1 files changed, 112 insertions, 60 deletions
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 1f19aa15f89..c0365c14b85 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -98,6 +98,39 @@ __rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task) list_add(&task->u.tk_wait.timer_list, &queue->timer_list.list); } +static void rpc_rotate_queue_owner(struct rpc_wait_queue *queue) +{ + struct list_head *q = &queue->tasks[queue->priority]; + struct rpc_task *task; + + if (!list_empty(q)) { + task = list_first_entry(q, struct rpc_task, u.tk_wait.list); + if (task->tk_owner == queue->owner) + list_move_tail(&task->u.tk_wait.list, q); + } +} + +static void rpc_set_waitqueue_priority(struct rpc_wait_queue *queue, int priority) +{ + if (queue->priority != priority) { + /* Fairness: rotate the list when changing priority */ + rpc_rotate_queue_owner(queue); + queue->priority = priority; + } +} + +static void rpc_set_waitqueue_owner(struct rpc_wait_queue *queue, pid_t pid) +{ + queue->owner = pid; + queue->nr = RPC_BATCH_COUNT; +} + +static void rpc_reset_waitqueue_priority(struct rpc_wait_queue *queue) +{ + rpc_set_waitqueue_priority(queue, queue->maxpriority); + rpc_set_waitqueue_owner(queue, 0); +} + /* * Add new request to a priority queue. */ @@ -109,9 +142,11 @@ static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue, struct rpc_task *t; INIT_LIST_HEAD(&task->u.tk_wait.links); - q = &queue->tasks[queue_priority]; if (unlikely(queue_priority > queue->maxpriority)) - q = &queue->tasks[queue->maxpriority]; + queue_priority = queue->maxpriority; + if (queue_priority > queue->priority) + rpc_set_waitqueue_priority(queue, queue_priority); + q = &queue->tasks[queue_priority]; list_for_each_entry(t, q, u.tk_wait.list) { if (t->tk_owner == task->tk_owner) { list_add_tail(&task->u.tk_wait.list, &t->u.tk_wait.links); @@ -133,7 +168,9 @@ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task, unsigned char queue_priority) { - BUG_ON (RPC_IS_QUEUED(task)); + WARN_ON_ONCE(RPC_IS_QUEUED(task)); + if (RPC_IS_QUEUED(task)) + return; if (RPC_IS_PRIORITY(queue)) __rpc_add_wait_queue_priority(queue, task, queue_priority); @@ -143,6 +180,8 @@ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, list_add_tail(&task->u.tk_wait.list, &queue->tasks[0]); task->tk_waitqueue = queue; queue->qlen++; + /* barrier matches the read in rpc_wake_up_task_queue_locked() */ + smp_wmb(); rpc_set_queued(task); dprintk("RPC: %5u added to queue %p \"%s\"\n", @@ -178,24 +217,6 @@ static void __rpc_remove_wait_queue(struct rpc_wait_queue *queue, struct rpc_tas task->tk_pid, queue, rpc_qname(queue)); } -static inline void rpc_set_waitqueue_priority(struct rpc_wait_queue *queue, int priority) -{ - queue->priority = priority; - queue->count = 1 << (priority * 2); -} - -static inline void rpc_set_waitqueue_owner(struct rpc_wait_queue *queue, pid_t pid) -{ - queue->owner = pid; - queue->nr = RPC_BATCH_COUNT; -} - -static inline void rpc_reset_waitqueue_priority(struct rpc_wait_queue *queue) -{ - rpc_set_waitqueue_priority(queue, queue->maxpriority); - rpc_set_waitqueue_owner(queue, 0); -} - static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qname, unsigned char nr_queues) { int i; @@ -233,11 +254,11 @@ static int rpc_wait_bit_killable(void *word) { if (fatal_signal_pending(current)) return -ERESTARTSYS; - freezable_schedule(); + freezable_schedule_unsafe(); return 0; } -#ifdef RPC_DEBUG +#if defined(RPC_DEBUG) || defined(RPC_TRACEPOINTS) static void rpc_task_set_debuginfo(struct rpc_task *task) { static atomic_t rpc_pid; @@ -303,11 +324,17 @@ EXPORT_SYMBOL_GPL(__rpc_wait_for_completion_task); * Note: If the task is ASYNC, and is being made runnable after sitting on an * rpc_wait_queue, this must be called with the queue spinlock held to protect * the wait queue operation. + * Note the ordering of rpc_test_and_set_running() and rpc_clear_queued(), + * which is needed to ensure that __rpc_execute() doesn't loop (due to the + * lockless RPC_IS_QUEUED() test) before we've had a chance to test + * the RPC_TASK_RUNNING flag. */ static void rpc_make_runnable(struct rpc_task *task) { + bool need_wakeup = !rpc_test_and_set_running(task); + rpc_clear_queued(task); - if (rpc_test_and_set_running(task)) + if (!need_wakeup) return; if (RPC_IS_ASYNC(task)) { INIT_WORK(&task->u.tk_work, rpc_async_schedule); @@ -334,7 +361,7 @@ static void __rpc_sleep_on_priority(struct rpc_wait_queue *q, __rpc_add_wait_queue(q, task, queue_priority); - BUG_ON(task->tk_callback != NULL); + WARN_ON_ONCE(task->tk_callback != NULL); task->tk_callback = action; __rpc_add_timer(q, task); } @@ -343,7 +370,12 @@ void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, rpc_action action) { /* We shouldn't ever put an inactive task to sleep */ - BUG_ON(!RPC_IS_ACTIVATED(task)); + WARN_ON_ONCE(!RPC_IS_ACTIVATED(task)); + if (!RPC_IS_ACTIVATED(task)) { + task->tk_status = -EIO; + rpc_put_task_async(task); + return; + } /* * Protect the queue operations. @@ -358,7 +390,12 @@ void rpc_sleep_on_priority(struct rpc_wait_queue *q, struct rpc_task *task, rpc_action action, int priority) { /* We shouldn't ever put an inactive task to sleep */ - BUG_ON(!RPC_IS_ACTIVATED(task)); + WARN_ON_ONCE(!RPC_IS_ACTIVATED(task)); + if (!RPC_IS_ACTIVATED(task)) { + task->tk_status = -EIO; + rpc_put_task_async(task); + return; + } /* * Protect the queue operations. @@ -367,6 +404,7 @@ void rpc_sleep_on_priority(struct rpc_wait_queue *q, struct rpc_task *task, __rpc_sleep_on_priority(q, task, action, priority - RPC_PRIORITY_LOW); spin_unlock_bh(&q->lock); } +EXPORT_SYMBOL_GPL(rpc_sleep_on_priority); /** * __rpc_do_wake_up_task - wake up a single rpc_task @@ -400,23 +438,12 @@ static void __rpc_do_wake_up_task(struct rpc_wait_queue *queue, struct rpc_task */ static void rpc_wake_up_task_queue_locked(struct rpc_wait_queue *queue, struct rpc_task *task) { - if (RPC_IS_QUEUED(task) && task->tk_waitqueue == queue) - __rpc_do_wake_up_task(queue, task); -} - -/* - * Tests whether rpc queue is empty - */ -int rpc_queue_empty(struct rpc_wait_queue *queue) -{ - int res; - - spin_lock_bh(&queue->lock); - res = queue->qlen; - spin_unlock_bh(&queue->lock); - return res == 0; + if (RPC_IS_QUEUED(task)) { + smp_rmb(); + if (task->tk_waitqueue == queue) + __rpc_do_wake_up_task(queue, task); + } } -EXPORT_SYMBOL_GPL(rpc_queue_empty); /* * Wake up a task on a specific queue @@ -451,8 +478,7 @@ static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *q /* * Check if we need to switch queues. */ - if (--queue->count) - goto new_owner; + goto new_owner; } /* @@ -611,7 +637,8 @@ static void __rpc_queue_timer_fn(unsigned long ptr) static void __rpc_atrun(struct rpc_task *task) { - task->tk_status = 0; + if (task->tk_status == -ETIMEDOUT) + task->tk_status = 0; } /* @@ -697,7 +724,9 @@ static void __rpc_execute(struct rpc_task *task) dprintk("RPC: %5u __rpc_execute flags=0x%x\n", task->tk_pid, task->tk_flags); - BUG_ON(RPC_IS_QUEUED(task)); + WARN_ON_ONCE(RPC_IS_QUEUED(task)); + if (RPC_IS_QUEUED(task)) + return; for (;;) { void (*do_action)(struct rpc_task *); @@ -762,7 +791,6 @@ static void __rpc_execute(struct rpc_task *task) task->tk_flags |= RPC_TASK_KILLED; rpc_exit(task, -ERESTARTSYS); } - rpc_set_running(task); dprintk("RPC: %5u sync task resuming\n", task->tk_pid); } @@ -783,9 +811,11 @@ static void __rpc_execute(struct rpc_task *task) */ void rpc_execute(struct rpc_task *task) { + bool is_async = RPC_IS_ASYNC(task); + rpc_set_active(task); rpc_make_runnable(task); - if (!RPC_IS_ASYNC(task)) + if (!is_async) __rpc_execute(task); } @@ -802,7 +832,8 @@ static void rpc_async_schedule(struct work_struct *work) * @size: requested byte size * * To prevent rpciod from hanging, this allocator never sleeps, - * returning NULL if the request cannot be serviced immediately. + * returning NULL and suppressing warning if the request cannot be serviced + * immediately. * The caller can arrange to sleep in a way that is safe for rpciod. * * Most requests are 'small' (under 2KiB) and can be serviced from a @@ -815,7 +846,10 @@ static void rpc_async_schedule(struct work_struct *work) void *rpc_malloc(struct rpc_task *task, size_t size) { struct rpc_buffer *buf; - gfp_t gfp = RPC_IS_SWAPPER(task) ? GFP_ATOMIC : GFP_NOWAIT; + gfp_t gfp = GFP_NOWAIT | __GFP_NOWARN; + + if (RPC_IS_SWAPPER(task)) + gfp |= __GFP_MEMALLOC; size += sizeof(struct rpc_buffer); if (size <= RPC_BUFFER_MAXSIZE) @@ -889,7 +923,7 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta static struct rpc_task * rpc_alloc_task(void) { - return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOFS); + return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOIO); } /* @@ -916,16 +950,35 @@ struct rpc_task *rpc_new_task(const struct rpc_task_setup *setup_data) return task; } +/* + * rpc_free_task - release rpc task and perform cleanups + * + * Note that we free up the rpc_task _after_ rpc_release_calldata() + * in order to work around a workqueue dependency issue. + * + * Tejun Heo states: + * "Workqueue currently considers two work items to be the same if they're + * on the same address and won't execute them concurrently - ie. it + * makes a work item which is queued again while being executed wait + * for the previous execution to complete. + * + * If a work function frees the work item, and then waits for an event + * which should be performed by another work item and *that* work item + * recycles the freed work item, it can create a false dependency loop. + * There really is no reliable way to detect this short of verifying + * every memory free." + * + */ static void rpc_free_task(struct rpc_task *task) { - const struct rpc_call_ops *tk_ops = task->tk_ops; - void *calldata = task->tk_calldata; + unsigned short tk_flags = task->tk_flags; + + rpc_release_calldata(task->tk_ops, task->tk_calldata); - if (task->tk_flags & RPC_TASK_DYNAMIC) { + if (tk_flags & RPC_TASK_DYNAMIC) { dprintk("RPC: %5u freeing task\n", task->tk_pid); mempool_free(task, rpc_task_mempool); } - rpc_release_calldata(tk_ops, calldata); } static void rpc_async_release(struct work_struct *work) @@ -935,8 +988,7 @@ static void rpc_async_release(struct work_struct *work) static void rpc_release_resources_task(struct rpc_task *task) { - if (task->tk_rqstp) - xprt_release(task); + xprt_release(task); if (task->tk_msg.rpc_cred) { put_rpccred(task->tk_msg.rpc_cred); task->tk_msg.rpc_cred = NULL; @@ -978,7 +1030,7 @@ static void rpc_release_task(struct rpc_task *task) { dprintk("RPC: %5u release task\n", task->tk_pid); - BUG_ON (RPC_IS_QUEUED(task)); + WARN_ON_ONCE(RPC_IS_QUEUED(task)); rpc_release_resources_task(task); @@ -1019,7 +1071,7 @@ static int rpciod_start(void) * Create the rpciod thread and wait for it to start. */ dprintk("RPC: creating workqueue rpciod\n"); - wq = alloc_workqueue("rpciod", WQ_MEM_RECLAIM, 0); + wq = alloc_workqueue("rpciod", WQ_MEM_RECLAIM, 1); rpciod_workqueue = wq; return rpciod_workqueue != NULL; } |
