From 2785259631697ebb0749a3782cca206e2e542939 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Mon, 4 Feb 2008 23:48:37 -0800 Subject: nfs: use GFP_NOFS preloads for radix-tree insertion NFS should use GFP_NOFS mode radix tree preloads rather than GFP_ATOMIC allocations at radix-tree insertion-time. This is important to reduce the atomic memory requirement. Signed-off-by: Nick Piggin Cc: Trond Myklebust Cc: "J. Bruce Fields" Signed-off-by: Andrew Morton Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index f55c437124a..7be42e6eb63 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -360,15 +360,13 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) /* * Insert a write request into an inode */ -static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) +static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) { struct nfs_inode *nfsi = NFS_I(inode); int error; error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req); - BUG_ON(error == -EEXIST); - if (error) - return error; + BUG_ON(error); if (!nfsi->npages) { igrab(inode); if (nfs_have_delegation(inode, FMODE_WRITE)) @@ -378,8 +376,8 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) set_page_private(req->wb_page, (unsigned long)req); nfsi->npages++; kref_get(&req->wb_kref); - radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); - return 0; + radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, + NFS_PAGE_TAG_LOCKED); } /* @@ -591,6 +589,13 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx, /* Loop over all inode entries and see if we find * A request for the page we wish to update */ + if (new) { + if (radix_tree_preload(GFP_NOFS)) { + nfs_release_request(new); + return ERR_PTR(-ENOMEM); + } + } + spin_lock(&inode->i_lock); req = nfs_page_find_request_locked(page); if (req) { @@ -601,28 +606,27 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx, error = nfs_wait_on_request(req); nfs_release_request(req); if (error < 0) { - if (new) + if (new) { + radix_tree_preload_end(); nfs_release_request(new); + } return ERR_PTR(error); } continue; } spin_unlock(&inode->i_lock); - if (new) + if (new) { + radix_tree_preload_end(); nfs_release_request(new); + } break; } if (new) { - int error; nfs_lock_request_dontget(new); - error = nfs_inode_add_request(inode, new); - if (error) { - spin_unlock(&inode->i_lock); - nfs_unlock_request(new); - return ERR_PTR(error); - } + nfs_inode_add_request(inode, new); spin_unlock(&inode->i_lock); + radix_tree_preload_end(); req = new; goto zero_page; } -- cgit v1.2.3-18-g5258 From cbc20059259edee4ea24c923e6627c394830c972 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 14 Feb 2008 11:11:30 -0500 Subject: SUNRPC: Declare as const the rpc_message arguments to rpc_call_sync/async Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 9 +++++---- net/sunrpc/clnt.c | 4 ++-- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 129a86e25d2..6fff7f82ef1 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -127,11 +127,12 @@ int rpcb_getport_sync(struct sockaddr_in *, u32, u32, int); void rpcb_getport_async(struct rpc_task *); void rpc_call_start(struct rpc_task *); -int rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, - int flags, const struct rpc_call_ops *tk_ops, +int rpc_call_async(struct rpc_clnt *clnt, + const struct rpc_message *msg, int flags, + const struct rpc_call_ops *tk_ops, void *calldata); -int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, - int flags); +int rpc_call_sync(struct rpc_clnt *clnt, + const struct rpc_message *msg, int flags); struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, int flags); void rpc_restart_call(struct rpc_task *); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 8c6a7f1a25e..a23512bb240 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -548,7 +548,7 @@ EXPORT_SYMBOL_GPL(rpc_run_task); * @msg: RPC call parameters * @flags: RPC call flags */ -int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) +int rpc_call_sync(struct rpc_clnt *clnt, const struct rpc_message *msg, int flags) { struct rpc_task *task; struct rpc_task_setup task_setup_data = { @@ -579,7 +579,7 @@ EXPORT_SYMBOL_GPL(rpc_call_sync); * @data: user call data */ int -rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags, +rpc_call_async(struct rpc_clnt *clnt, const struct rpc_message *msg, int flags, const struct rpc_call_ops *tk_ops, void *data) { struct rpc_task *task; -- cgit v1.2.3-18-g5258 From 4b5621f6b127bce9218998c187bd25bf7f9fc371 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 25 Feb 2008 15:56:29 -0800 Subject: NFS: Fix an f_mode/f_flags confusion in fs/nfs/write.c O_SYNC is stored in filp->f_flags. Thanks to Al Viro for pointing out the bug. Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index f55c437124a..80c61fdb272 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -734,7 +734,7 @@ int nfs_updatepage(struct file *file, struct page *page, */ if (nfs_write_pageuptodate(page, inode) && inode->i_flock == NULL && - !(file->f_mode & O_SYNC)) { + !(file->f_flags & O_SYNC)) { count = max(count + offset, nfs_page_length(page)); offset = 0; } -- cgit v1.2.3-18-g5258 From 383ba71938519959be8e0b598ec658f0c211ff45 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 19 Feb 2008 20:04:20 -0500 Subject: NFS: Fix a deadlock with lazy umount We can't allow rpc callback functions like task->tk_ops->rpc_call_prepare() and task->tk_ops->rpc_call_done() to call mntput() in any way, since that will cause a deadlock when the call to rpc_shutdown_client() attempts to wait on 'task' to complete. We can avoid the above deadlock by moving calls to mntput to task->tk_ops->rpc_release() callback, since at that time the task will be marked as completed, and so rpc_shutdown_client won't attempt to wait on it. Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 5 +++-- fs/nfs/inode.c | 6 +++++- fs/nfs/read.c | 7 +++++-- fs/nfs/write.c | 13 ++++++++++--- 4 files changed, 23 insertions(+), 8 deletions(-) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 16844f98f50..e0170407a88 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -323,7 +323,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq, data->inode = inode; data->cred = msg.rpc_cred; data->args.fh = NFS_FH(inode); - data->args.context = ctx; + data->args.context = get_nfs_open_context(ctx); data->args.offset = pos; data->args.pgbase = pgbase; data->args.pages = data->pagevec; @@ -546,6 +546,7 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) data->args.fh = NFS_FH(data->inode); data->args.offset = 0; data->args.count = 0; + data->args.context = get_nfs_open_context(dreq->ctx); data->res.count = 0; data->res.fattr = &data->fattr; data->res.verf = &data->verf; @@ -728,7 +729,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq, data->inode = inode; data->cred = msg.rpc_cred; data->args.fh = NFS_FH(inode); - data->args.context = ctx; + data->args.context = get_nfs_open_context(ctx); data->args.offset = pos; data->args.pgbase = pgbase; data->args.pages = data->pagevec; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 966a8850aa3..a499fb58d85 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -521,8 +521,12 @@ struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx) static void __put_nfs_open_context(struct nfs_open_context *ctx, int wait) { - struct inode *inode = ctx->path.dentry->d_inode; + struct inode *inode; + if (ctx == NULL) + return; + + inode = ctx->path.dentry->d_inode; if (!atomic_dec_and_lock(&ctx->count, &inode->i_lock)) return; list_del(&ctx->list); diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 3d7d9631e12..fab0d3720a0 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -73,7 +73,10 @@ static void nfs_readdata_free(struct nfs_read_data *rdata) void nfs_readdata_release(void *data) { - nfs_readdata_free(data); + struct nfs_read_data *rdata = data; + + put_nfs_open_context(rdata->args.context); + nfs_readdata_free(rdata); } static @@ -186,7 +189,7 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, data->args.pgbase = req->wb_pgbase + offset; data->args.pages = data->pagevec; data->args.count = count; - data->args.context = req->wb_context; + data->args.context = get_nfs_open_context(req->wb_context); data->res.fattr = &data->fattr; data->res.count = count; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 80c61fdb272..69b4158d9a1 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -105,8 +105,11 @@ static void nfs_writedata_free(struct nfs_write_data *wdata) call_rcu_bh(&wdata->task.u.tk_rcu, nfs_writedata_rcu_free); } -void nfs_writedata_release(void *wdata) +void nfs_writedata_release(void *data) { + struct nfs_write_data *wdata = data; + + put_nfs_open_context(wdata->args.context); nfs_writedata_free(wdata); } @@ -816,7 +819,7 @@ static void nfs_write_rpcsetup(struct nfs_page *req, data->args.pgbase = req->wb_pgbase + offset; data->args.pages = data->pagevec; data->args.count = count; - data->args.context = req->wb_context; + data->args.context = get_nfs_open_context(req->wb_context); data->args.stable = NFS_UNSTABLE; if (how & FLUSH_STABLE) { data->args.stable = NFS_DATA_SYNC; @@ -1153,8 +1156,11 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) -void nfs_commit_release(void *wdata) +void nfs_commit_release(void *data) { + struct nfs_write_data *wdata = data; + + put_nfs_open_context(wdata->args.context); nfs_commit_free(wdata); } @@ -1197,6 +1203,7 @@ static void nfs_commit_rpcsetup(struct list_head *head, /* Note: we always request a commit of the entire inode */ data->args.offset = 0; data->args.count = 0; + data->args.context = get_nfs_open_context(first->wb_context); data->res.count = 0; data->res.fattr = &data->fattr; data->res.verf = &data->verf; -- cgit v1.2.3-18-g5258 From 32bfb5c0f495dd88ef6bac4b76885d0820563739 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 19 Feb 2008 20:04:21 -0500 Subject: SUNRPC: Allow the rpc_release() callback to be run on another workqueue A lot of the work done by the rpc_release() callback is inappropriate for rpciod as it will often involve things like starting a new rpc call in order to clean up state after an interrupted NFSv4 open() call, or calls to mntput(), etc. This patch allows the caller of rpc_run_task() to specify that the rpc_release callback should run on a different workqueue than the default rpciod_workqueue. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/sched.h | 1 + net/sunrpc/sched.c | 29 +++++++++++++++++++++-------- 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index f689f02e679..fefb0ab5218 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -123,6 +123,7 @@ struct rpc_task_setup { const struct rpc_message *rpc_message; const struct rpc_call_ops *callback_ops; void *callback_data; + struct workqueue_struct *workqueue; unsigned short flags; signed char priority; }; diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 4c669121e60..3e0b22382a3 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -326,7 +326,7 @@ static void rpc_make_runnable(struct rpc_task *task) int status; INIT_WORK(&task->u.tk_work, rpc_async_schedule); - status = queue_work(task->tk_workqueue, &task->u.tk_work); + status = queue_work(rpciod_workqueue, &task->u.tk_work); if (status < 0) { printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status); task->tk_status = status; @@ -832,7 +832,7 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta task->tk_owner = current->tgid; /* Initialize workqueue for async tasks */ - task->tk_workqueue = rpciod_workqueue; + task->tk_workqueue = task_setup_data->workqueue; task->tk_client = task_setup_data->rpc_client; if (task->tk_client != NULL) { @@ -868,7 +868,7 @@ rpc_alloc_task(void) return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOFS); } -static void rpc_free_task(struct rcu_head *rcu) +static void rpc_free_task_rcu(struct rcu_head *rcu) { struct rpc_task *task = container_of(rcu, struct rpc_task, u.tk_rcu); dprintk("RPC: %5u freeing task\n", task->tk_pid); @@ -898,12 +898,23 @@ out: return task; } - -void rpc_put_task(struct rpc_task *task) +static void rpc_free_task(struct rpc_task *task) { const struct rpc_call_ops *tk_ops = task->tk_ops; void *calldata = task->tk_calldata; + if (task->tk_flags & RPC_TASK_DYNAMIC) + call_rcu_bh(&task->u.tk_rcu, rpc_free_task_rcu); + rpc_release_calldata(tk_ops, calldata); +} + +static void rpc_async_release(struct work_struct *work) +{ + rpc_free_task(container_of(work, struct rpc_task, u.tk_work)); +} + +void rpc_put_task(struct rpc_task *task) +{ if (!atomic_dec_and_test(&task->tk_count)) return; /* Release resources */ @@ -915,9 +926,11 @@ void rpc_put_task(struct rpc_task *task) rpc_release_client(task->tk_client); task->tk_client = NULL; } - if (task->tk_flags & RPC_TASK_DYNAMIC) - call_rcu_bh(&task->u.tk_rcu, rpc_free_task); - rpc_release_calldata(tk_ops, calldata); + if (task->tk_workqueue != NULL) { + INIT_WORK(&task->u.tk_work, rpc_async_release); + queue_work(task->tk_workqueue, &task->u.tk_work); + } else + rpc_free_task(task); } EXPORT_SYMBOL_GPL(rpc_put_task); -- cgit v1.2.3-18-g5258 From 5746006f1d17d9d5a3015051ea54de4341cb31f9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 19 Feb 2008 20:04:22 -0500 Subject: NFS: Add an nfsiod workqueue NFS post-rpciod cleanups often involve tasks that cannot be safely performed within the rpciod context (due to deadlock concerns). We therefore add a dedicated NFS workqueue that can perform tasks like cleaning up state after an interrupted NFSv4 open() call, or calling put_nfs_open_context() after an asynchronous read or write call. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 37 +++++++++++++++++++++++++++++++++++++ fs/nfs/internal.h | 1 + 2 files changed, 38 insertions(+) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index a499fb58d85..36cb99985d2 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1219,6 +1219,36 @@ static void nfs_destroy_inodecache(void) kmem_cache_destroy(nfs_inode_cachep); } +struct workqueue_struct *nfsiod_workqueue; + +/* + * start up the nfsiod workqueue + */ +static int nfsiod_start(void) +{ + struct workqueue_struct *wq; + dprintk("RPC: creating workqueue nfsiod\n"); + wq = create_singlethread_workqueue("nfsiod"); + if (wq == NULL) + return -ENOMEM; + nfsiod_workqueue = wq; + return 0; +} + +/* + * Destroy the nfsiod workqueue + */ +static void nfsiod_stop(void) +{ + struct workqueue_struct *wq; + + wq = nfsiod_workqueue; + if (wq == NULL) + return; + nfsiod_workqueue = NULL; + destroy_workqueue(wq); +} + /* * Initialize NFS */ @@ -1226,6 +1256,10 @@ static int __init init_nfs_fs(void) { int err; + err = nfsiod_start(); + if (err) + goto out6; + err = nfs_fs_proc_init(); if (err) goto out5; @@ -1272,6 +1306,8 @@ out3: out4: nfs_fs_proc_exit(); out5: + nfsiod_stop(); +out6: return err; } @@ -1287,6 +1323,7 @@ static void __exit exit_nfs_fs(void) #endif unregister_nfs_fs(); nfs_fs_proc_exit(); + nfsiod_stop(); } /* Not quite true; I just maintain it */ diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 0f5619611b8..985dc293103 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -143,6 +143,7 @@ extern struct rpc_procinfo nfs4_procedures[]; extern int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask); /* inode.c */ +extern struct workqueue_struct *nfsiod_workqueue; extern struct inode *nfs_alloc_inode(struct super_block *sb); extern void nfs_destroy_inode(struct inode *); extern int nfs_write_inode(struct inode *,int); -- cgit v1.2.3-18-g5258 From 101070ca2fe67186f5f5517b66cb4757b17f4e29 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 19 Feb 2008 20:04:23 -0500 Subject: NFS: Ensure that the asynchronous RPC calls complete on nfsiod. We want to ensure that rpc_call_ops that involve mntput() are run on nfsiod rather than on rpciod, so that they don't deadlock when the resulting umount calls rpc_shutdown_client(). Hence we specify that read, write and commit calls must complete on nfsiod. Ditto for NFSv4 open, lock, locku and close asynchronous calls. Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 4 ++++ fs/nfs/nfs4proc.c | 6 ++++++ fs/nfs/read.c | 1 + fs/nfs/write.c | 2 ++ 4 files changed, 13 insertions(+) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index e0170407a88..e44200579c8 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -280,6 +280,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq, .rpc_client = NFS_CLIENT(inode), .rpc_message = &msg, .callback_ops = &nfs_read_direct_ops, + .workqueue = nfsiod_workqueue, .flags = RPC_TASK_ASYNC, }; unsigned int pgbase; @@ -446,6 +447,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) struct rpc_task_setup task_setup_data = { .rpc_client = NFS_CLIENT(inode), .callback_ops = &nfs_write_direct_ops, + .workqueue = nfsiod_workqueue, .flags = RPC_TASK_ASYNC, }; @@ -537,6 +539,7 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) .rpc_message = &msg, .callback_ops = &nfs_commit_direct_ops, .callback_data = data, + .workqueue = nfsiod_workqueue, .flags = RPC_TASK_ASYNC, }; @@ -683,6 +686,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq, .rpc_client = NFS_CLIENT(inode), .rpc_message = &msg, .callback_ops = &nfs_write_direct_ops, + .workqueue = nfsiod_workqueue, .flags = RPC_TASK_ASYNC, }; size_t wsize = NFS_SERVER(inode)->wsize; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 7ce07862c2f..b6db833f33e 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -51,6 +51,7 @@ #include "nfs4_fs.h" #include "delegation.h" +#include "internal.h" #include "iostat.h" #define NFSDBG_FACILITY NFSDBG_PROC @@ -773,6 +774,7 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data) .rpc_message = &msg, .callback_ops = &nfs4_open_confirm_ops, .callback_data = data, + .workqueue = nfsiod_workqueue, .flags = RPC_TASK_ASYNC, }; int status; @@ -910,6 +912,7 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) .rpc_message = &msg, .callback_ops = &nfs4_open_ops, .callback_data = data, + .workqueue = nfsiod_workqueue, .flags = RPC_TASK_ASYNC, }; int status; @@ -1315,6 +1318,7 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait) .rpc_client = server->client, .rpc_message = &msg, .callback_ops = &nfs4_close_ops, + .workqueue = nfsiod_workqueue, .flags = RPC_TASK_ASYNC, }; int status = -ENOMEM; @@ -3235,6 +3239,7 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl, .rpc_client = NFS_CLIENT(lsp->ls_state->inode), .rpc_message = &msg, .callback_ops = &nfs4_locku_ops, + .workqueue = nfsiod_workqueue, .flags = RPC_TASK_ASYNC, }; @@ -3419,6 +3424,7 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f .rpc_client = NFS_CLIENT(state->inode), .rpc_message = &msg, .callback_ops = &nfs4_lock_ops, + .workqueue = nfsiod_workqueue, .flags = RPC_TASK_ASYNC, }; int ret; diff --git a/fs/nfs/read.c b/fs/nfs/read.c index fab0d3720a0..87546cd277d 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -177,6 +177,7 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, .rpc_message = &msg, .callback_ops = call_ops, .callback_data = data, + .workqueue = nfsiod_workqueue, .flags = RPC_TASK_ASYNC | swap_flags, }; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 69b4158d9a1..84495ef9072 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -803,6 +803,7 @@ static void nfs_write_rpcsetup(struct nfs_page *req, .rpc_message = &msg, .callback_ops = call_ops, .callback_data = data, + .workqueue = nfsiod_workqueue, .flags = flags, .priority = priority, }; @@ -1187,6 +1188,7 @@ static void nfs_commit_rpcsetup(struct list_head *head, .rpc_message = &msg, .callback_ops = &nfs_commit_ops, .callback_data = data, + .workqueue = nfsiod_workqueue, .flags = flags, .priority = priority, }; -- cgit v1.2.3-18-g5258 From fde95c7554aa77f9a242f32b0b5f8f15395abf52 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 22 Feb 2008 15:09:26 -0500 Subject: SUNRPC: Clean up rpc_run_timer() All RPC timeout callback functions are expected to wake the task up. We can enforce this by moving the wakeup back into rpc_run_timer. Signed-off-by: Trond Myklebust --- net/sunrpc/sched.c | 67 ++++++++++++++++++++++++++++-------------------------- net/sunrpc/xprt.c | 2 -- 2 files changed, 35 insertions(+), 34 deletions(-) diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 3e0b22382a3..9433a113862 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -38,7 +38,6 @@ static struct kmem_cache *rpc_buffer_slabp __read_mostly; static mempool_t *rpc_task_mempool __read_mostly; static mempool_t *rpc_buffer_mempool __read_mostly; -static void __rpc_default_timer(struct rpc_task *task); static void rpc_async_schedule(struct work_struct *); static void rpc_release_task(struct rpc_task *task); @@ -66,25 +65,13 @@ __rpc_disable_timer(struct rpc_task *task) } /* - * Run a timeout function. - * We use the callback in order to allow __rpc_wake_up_task() - * and friends to disable the timer synchronously on SMP systems - * without calling del_timer_sync(). The latter could cause a - * deadlock if called while we're holding spinlocks... + * Default timeout handler if none specified by user */ -static void rpc_run_timer(struct rpc_task *task) +static void +__rpc_default_timer(struct rpc_task *task) { - void (*callback)(struct rpc_task *); - - callback = task->tk_timeout_fn; - task->tk_timeout_fn = NULL; - if (callback && RPC_IS_QUEUED(task)) { - dprintk("RPC: %5u running timer\n", task->tk_pid); - callback(task); - } - smp_mb__before_clear_bit(); - clear_bit(RPC_TASK_HAS_TIMER, &task->tk_runstate); - smp_mb__after_clear_bit(); + dprintk("RPC: %5u timeout (default timer)\n", task->tk_pid); + task->tk_status = -ETIMEDOUT; } /* @@ -415,17 +402,6 @@ static void __rpc_wake_up_task(struct rpc_task *task) } } -/* - * Default timeout handler if none specified by user - */ -static void -__rpc_default_timer(struct rpc_task *task) -{ - dprintk("RPC: %5u timeout (default timer)\n", task->tk_pid); - task->tk_status = -ETIMEDOUT; - rpc_wake_up_task(task); -} - /* * Wake up the specified task */ @@ -578,9 +554,37 @@ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status) } EXPORT_SYMBOL_GPL(rpc_wake_up_status); +/* + * Run a timeout function. + */ +static void rpc_run_timer(unsigned long ptr) +{ + struct rpc_task *task = (struct rpc_task *)ptr; + void (*callback)(struct rpc_task *); + + if (!rpc_start_wakeup(task)) + goto out; + if (RPC_IS_QUEUED(task)) { + struct rpc_wait_queue *queue = task->u.tk_wait.rpc_waitq; + callback = task->tk_timeout_fn; + + dprintk("RPC: %5u running timer\n", task->tk_pid); + if (callback != NULL) + callback(task); + /* Note: we're already in a bh-safe context */ + spin_lock(&queue->lock); + __rpc_do_wake_up_task(task); + spin_unlock(&queue->lock); + } + rpc_finish_wakeup(task); +out: + smp_mb__before_clear_bit(); + clear_bit(RPC_TASK_HAS_TIMER, &task->tk_runstate); + smp_mb__after_clear_bit(); +} + static void __rpc_atrun(struct rpc_task *task) { - rpc_wake_up_task(task); } /* @@ -816,8 +820,7 @@ EXPORT_SYMBOL_GPL(rpc_free); static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *task_setup_data) { memset(task, 0, sizeof(*task)); - setup_timer(&task->tk_timer, (void (*)(unsigned long))rpc_run_timer, - (unsigned long)task); + setup_timer(&task->tk_timer, rpc_run_timer, (unsigned long)task); atomic_set(&task->tk_count, 1); task->tk_flags = task_setup_data->flags; task->tk_ops = task_setup_data->callback_ops; diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index d5553b8179f..96c212ddc41 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -777,8 +777,6 @@ static void xprt_timer(struct rpc_task *task) xprt->ops->timer(task); task->tk_status = -ETIMEDOUT; } - task->tk_timeout = 0; - rpc_wake_up_task(task); spin_unlock(&xprt->transport_lock); } -- cgit v1.2.3-18-g5258 From 96ef13b283934fbf60b732e6c4ce23e8babd0042 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 22 Feb 2008 15:46:41 -0500 Subject: SUNRPC: Add a new helper rpc_wake_up_queued_task() In all cases where we currently use rpc_wake_up_task(), we almost always know on which waitqueue the rpc_task is actually sleeping. This will allows us to simplify the queue locking in a future patch. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/sched.h | 4 ++- net/sunrpc/clnt.c | 2 +- net/sunrpc/sched.c | 64 +++++++++++++++++++++----------------------- 3 files changed, 34 insertions(+), 36 deletions(-) diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index fefb0ab5218..83f67779cf0 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -33,7 +33,6 @@ struct rpc_wait_queue; struct rpc_wait { struct list_head list; /* wait queue links */ struct list_head links; /* Links to related tasks */ - struct rpc_wait_queue * rpc_waitq; /* RPC wait queue we're on */ }; /* @@ -80,6 +79,7 @@ struct rpc_task { struct workqueue_struct *tk_workqueue; /* Normally rpciod, but could * be any workqueue */ + struct rpc_wait_queue *tk_waitqueue; /* RPC wait queue we're on */ union { struct work_struct tk_work; /* Async task work queue */ struct rpc_wait tk_wait; /* RPC wait */ @@ -233,6 +233,8 @@ void rpc_init_wait_queue(struct rpc_wait_queue *, const char *); void rpc_sleep_on(struct rpc_wait_queue *, struct rpc_task *, rpc_action action, rpc_action timer); void rpc_wake_up_task(struct rpc_task *); +void rpc_wake_up_queued_task(struct rpc_wait_queue *, + struct rpc_task *); void rpc_wake_up(struct rpc_wait_queue *); struct rpc_task *rpc_wake_up_next(struct rpc_wait_queue *); void rpc_wake_up_status(struct rpc_wait_queue *, int); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 8c6a7f1a25e..fe95bd0ab1e 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1535,7 +1535,7 @@ void rpc_show_tasks(void) proc = -1; if (RPC_IS_QUEUED(t)) - rpc_waitq = rpc_qname(t->u.tk_wait.rpc_waitq); + rpc_waitq = rpc_qname(t->tk_waitqueue); printk("%5u %04d %04x %6d %8p %6d %8p %8ld %8s %8p %8p\n", t->tk_pid, proc, diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 9433a113862..9233ace076a 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -148,7 +148,7 @@ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, struct rpc_task * list_add(&task->u.tk_wait.list, &queue->tasks[0]); else list_add_tail(&task->u.tk_wait.list, &queue->tasks[0]); - task->u.tk_wait.rpc_waitq = queue; + task->tk_waitqueue = queue; queue->qlen++; rpc_set_queued(task); @@ -175,11 +175,8 @@ static void __rpc_remove_wait_queue_priority(struct rpc_task *task) * Remove request from queue. * Note: must be called with spin lock held. */ -static void __rpc_remove_wait_queue(struct rpc_task *task) +static void __rpc_remove_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task) { - struct rpc_wait_queue *queue; - queue = task->u.tk_wait.rpc_waitq; - if (RPC_IS_PRIORITY(queue)) __rpc_remove_wait_queue_priority(task); else @@ -364,11 +361,12 @@ EXPORT_SYMBOL_GPL(rpc_sleep_on); /** * __rpc_do_wake_up_task - wake up a single rpc_task + * @queue: wait queue * @task: task to be woken up * * Caller must hold queue->lock, and have cleared the task queued flag. */ -static void __rpc_do_wake_up_task(struct rpc_task *task) +static void __rpc_do_wake_up_task(struct rpc_wait_queue *queue, struct rpc_task *task) { dprintk("RPC: %5u __rpc_wake_up_task (now %lu)\n", task->tk_pid, jiffies); @@ -383,7 +381,7 @@ static void __rpc_do_wake_up_task(struct rpc_task *task) } __rpc_disable_timer(task); - __rpc_remove_wait_queue(task); + __rpc_remove_wait_queue(queue, task); rpc_make_runnable(task); @@ -391,36 +389,38 @@ static void __rpc_do_wake_up_task(struct rpc_task *task) } /* - * Wake up the specified task + * Wake up a queued task while the queue lock is being held */ -static void __rpc_wake_up_task(struct rpc_task *task) +static void rpc_wake_up_task_queue_locked(struct rpc_wait_queue *queue, struct rpc_task *task) { + if (!RPC_IS_QUEUED(task) || task->tk_waitqueue != queue) + return; if (rpc_start_wakeup(task)) { - if (RPC_IS_QUEUED(task)) - __rpc_do_wake_up_task(task); + __rpc_do_wake_up_task(queue, task); rpc_finish_wakeup(task); } } /* - * Wake up the specified task + * Wake up a task on a specific queue */ -void rpc_wake_up_task(struct rpc_task *task) +void rpc_wake_up_queued_task(struct rpc_wait_queue *queue, struct rpc_task *task) { rcu_read_lock_bh(); - if (rpc_start_wakeup(task)) { - if (RPC_IS_QUEUED(task)) { - struct rpc_wait_queue *queue = task->u.tk_wait.rpc_waitq; - - /* Note: we're already in a bh-safe context */ - spin_lock(&queue->lock); - __rpc_do_wake_up_task(task); - spin_unlock(&queue->lock); - } - rpc_finish_wakeup(task); - } + spin_lock(&queue->lock); + rpc_wake_up_task_queue_locked(queue, task); + spin_unlock(&queue->lock); rcu_read_unlock_bh(); } +EXPORT_SYMBOL_GPL(rpc_wake_up_queued_task); + +/* + * Wake up the specified task + */ +void rpc_wake_up_task(struct rpc_task *task) +{ + rpc_wake_up_queued_task(task->tk_waitqueue, task); +} EXPORT_SYMBOL_GPL(rpc_wake_up_task); /* @@ -471,7 +471,7 @@ new_queue: new_owner: rpc_set_waitqueue_owner(queue, task->tk_owner); out: - __rpc_wake_up_task(task); + rpc_wake_up_task_queue_locked(queue, task); return task; } @@ -490,7 +490,7 @@ struct rpc_task * rpc_wake_up_next(struct rpc_wait_queue *queue) task = __rpc_wake_up_next_priority(queue); else { task_for_first(task, &queue->tasks[0]) - __rpc_wake_up_task(task); + rpc_wake_up_task_queue_locked(queue, task); } spin_unlock(&queue->lock); rcu_read_unlock_bh(); @@ -515,7 +515,7 @@ void rpc_wake_up(struct rpc_wait_queue *queue) head = &queue->tasks[queue->maxpriority]; for (;;) { list_for_each_entry_safe(task, next, head, u.tk_wait.list) - __rpc_wake_up_task(task); + rpc_wake_up_task_queue_locked(queue, task); if (head == &queue->tasks[0]) break; head--; @@ -543,7 +543,7 @@ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status) for (;;) { list_for_each_entry_safe(task, next, head, u.tk_wait.list) { task->tk_status = status; - __rpc_wake_up_task(task); + rpc_wake_up_task_queue_locked(queue, task); } if (head == &queue->tasks[0]) break; @@ -562,10 +562,8 @@ static void rpc_run_timer(unsigned long ptr) struct rpc_task *task = (struct rpc_task *)ptr; void (*callback)(struct rpc_task *); - if (!rpc_start_wakeup(task)) - goto out; if (RPC_IS_QUEUED(task)) { - struct rpc_wait_queue *queue = task->u.tk_wait.rpc_waitq; + struct rpc_wait_queue *queue = task->tk_waitqueue; callback = task->tk_timeout_fn; dprintk("RPC: %5u running timer\n", task->tk_pid); @@ -573,11 +571,9 @@ static void rpc_run_timer(unsigned long ptr) callback(task); /* Note: we're already in a bh-safe context */ spin_lock(&queue->lock); - __rpc_do_wake_up_task(task); + rpc_wake_up_task_queue_locked(queue, task); spin_unlock(&queue->lock); } - rpc_finish_wakeup(task); -out: smp_mb__before_clear_bit(); clear_bit(RPC_TASK_HAS_TIMER, &task->tk_runstate); smp_mb__after_clear_bit(); -- cgit v1.2.3-18-g5258 From fda1393938035559b417dd5b26b9cc293a7aee00 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 22 Feb 2008 16:34:12 -0500 Subject: SUNRPC: Convert users of rpc_wake_up_task to use rpc_wake_up_queued_task Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 +- include/linux/sunrpc/sched.h | 1 - net/sunrpc/clnt.c | 2 +- net/sunrpc/sched.c | 3 +-- net/sunrpc/xprt.c | 10 +++++----- 5 files changed, 8 insertions(+), 10 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index b6db833f33e..54743396b66 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2768,7 +2768,7 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server) rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL, NULL); nfs4_schedule_state_recovery(clp); if (test_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state) == 0) - rpc_wake_up_task(task); + rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task); task->tk_status = 0; return -EAGAIN; case -NFS4ERR_DELAY: diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 83f67779cf0..7963ef0ffb8 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -232,7 +232,6 @@ void rpc_init_priority_wait_queue(struct rpc_wait_queue *, const char *); void rpc_init_wait_queue(struct rpc_wait_queue *, const char *); void rpc_sleep_on(struct rpc_wait_queue *, struct rpc_task *, rpc_action action, rpc_action timer); -void rpc_wake_up_task(struct rpc_task *); void rpc_wake_up_queued_task(struct rpc_wait_queue *, struct rpc_task *); void rpc_wake_up(struct rpc_wait_queue *); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index fe95bd0ab1e..0e209af3979 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1066,7 +1066,7 @@ call_transmit(struct rpc_task *task) if (task->tk_msg.rpc_proc->p_decode != NULL) return; task->tk_action = rpc_exit_task; - rpc_wake_up_task(task); + rpc_wake_up_queued_task(&task->tk_xprt->pending, task); } /* diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 9233ace076a..35acdc39bfc 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -417,11 +417,10 @@ EXPORT_SYMBOL_GPL(rpc_wake_up_queued_task); /* * Wake up the specified task */ -void rpc_wake_up_task(struct rpc_task *task) +static void rpc_wake_up_task(struct rpc_task *task) { rpc_wake_up_queued_task(task->tk_waitqueue, task); } -EXPORT_SYMBOL_GPL(rpc_wake_up_task); /* * Wake up the next task on a priority queue. diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 96c212ddc41..6e2772217e5 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -472,7 +472,7 @@ void xprt_write_space(struct rpc_xprt *xprt) if (xprt->snd_task) { dprintk("RPC: write space: waking waiting task on " "xprt %p\n", xprt); - rpc_wake_up_task(xprt->snd_task); + rpc_wake_up_queued_task(&xprt->pending, xprt->snd_task); } spin_unlock_bh(&xprt->transport_lock); } @@ -602,8 +602,7 @@ void xprt_force_disconnect(struct rpc_xprt *xprt) /* Try to schedule an autoclose RPC call */ if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0) queue_work(rpciod_workqueue, &xprt->task_cleanup); - else if (xprt->snd_task != NULL) - rpc_wake_up_task(xprt->snd_task); + xprt_wake_pending_tasks(xprt, -ENOTCONN); spin_unlock_bh(&xprt->transport_lock); } EXPORT_SYMBOL_GPL(xprt_force_disconnect); @@ -749,18 +748,19 @@ EXPORT_SYMBOL_GPL(xprt_update_rtt); void xprt_complete_rqst(struct rpc_task *task, int copied) { struct rpc_rqst *req = task->tk_rqstp; + struct rpc_xprt *xprt = req->rq_xprt; dprintk("RPC: %5u xid %08x complete (%d bytes received)\n", task->tk_pid, ntohl(req->rq_xid), copied); - task->tk_xprt->stat.recvs++; + xprt->stat.recvs++; task->tk_rtt = (long)jiffies - req->rq_xtime; list_del_init(&req->rq_list); /* Ensure all writes are done before we update req->rq_received */ smp_wmb(); req->rq_received = req->rq_private_buf.len = copied; - rpc_wake_up_task(task); + rpc_wake_up_queued_task(&xprt->pending, task); } EXPORT_SYMBOL_GPL(xprt_complete_rqst); -- cgit v1.2.3-18-g5258 From 5d00837b90340af9106dcd93af75fd664c8eb87f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 22 Feb 2008 16:34:17 -0500 Subject: SUNRPC: Run rpc timeout functions as callbacks instead of in softirqs An audit of the current RPC timeout functions shows that they don't really ever need to run in the softirq context. As long as the softirq is able to signal that the wakeup is due to a timeout (which it can do by setting task->tk_status to -ETIMEDOUT) then the callback functions can just run as standard task->tk_callback functions (in the rpciod/process context). The only possible border-line case would be xprt_timer() for the case of UDP, when the callback is used to reduce the size of the transport congestion window. In testing, however, the effect of moving that update to a callback would appear to be minor. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 +- fs/nfs/nfs4state.c | 2 +- include/linux/sunrpc/sched.h | 4 +--- net/sunrpc/auth_gss/auth_gss.c | 4 ++-- net/sunrpc/rpcb_clnt.c | 2 +- net/sunrpc/sched.c | 50 +++++++++++++----------------------------- net/sunrpc/xprt.c | 28 ++++++++++++----------- 7 files changed, 36 insertions(+), 56 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 54743396b66..bbb0d58ee6a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2765,7 +2765,7 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server) case -NFS4ERR_STALE_CLIENTID: case -NFS4ERR_STALE_STATEID: case -NFS4ERR_EXPIRED: - rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL, NULL); + rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL); nfs4_schedule_state_recovery(clp); if (test_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state) == 0) rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index b962397004c..a2ef02824aa 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -731,7 +731,7 @@ int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task) list_add_tail(&seqid->list, &sequence->list); if (list_first_entry(&sequence->list, struct nfs_seqid, list) == seqid) goto unlock; - rpc_sleep_on(&sequence->wait, task, NULL, NULL); + rpc_sleep_on(&sequence->wait, task, NULL); status = -EAGAIN; unlock: spin_unlock(&sequence->lock); diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 7963ef0ffb8..503a937bdca 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -56,12 +56,10 @@ struct rpc_task { __u8 tk_cred_retry; /* - * timeout_fn to be executed by timer bottom half * callback to be executed after waking up * action next procedure for async tasks * tk_ops caller callbacks */ - void (*tk_timeout_fn)(struct rpc_task *); void (*tk_callback)(struct rpc_task *); void (*tk_action)(struct rpc_task *); const struct rpc_call_ops *tk_ops; @@ -231,7 +229,7 @@ void rpc_execute(struct rpc_task *); void rpc_init_priority_wait_queue(struct rpc_wait_queue *, const char *); void rpc_init_wait_queue(struct rpc_wait_queue *, const char *); void rpc_sleep_on(struct rpc_wait_queue *, struct rpc_task *, - rpc_action action, rpc_action timer); + rpc_action action); void rpc_wake_up_queued_task(struct rpc_wait_queue *, struct rpc_task *); void rpc_wake_up(struct rpc_wait_queue *); diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 6dac3879228..dc6391bcda1 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -408,13 +408,13 @@ gss_refresh_upcall(struct rpc_task *task) } spin_lock(&inode->i_lock); if (gss_cred->gc_upcall != NULL) - rpc_sleep_on(&gss_cred->gc_upcall->rpc_waitqueue, task, NULL, NULL); + rpc_sleep_on(&gss_cred->gc_upcall->rpc_waitqueue, task, NULL); else if (gss_msg->ctx == NULL && gss_msg->msg.errno >= 0) { task->tk_timeout = 0; gss_cred->gc_upcall = gss_msg; /* gss_upcall_callback will release the reference to gss_upcall_msg */ atomic_inc(&gss_msg->count); - rpc_sleep_on(&gss_msg->rpc_waitqueue, task, gss_upcall_callback, NULL); + rpc_sleep_on(&gss_msg->rpc_waitqueue, task, gss_upcall_callback); } else err = gss_msg->msg.errno; spin_unlock(&inode->i_lock); diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 3164a0871cf..f480c718b40 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -298,7 +298,7 @@ void rpcb_getport_async(struct rpc_task *task) /* Put self on queue before sending rpcbind request, in case * rpcb_getport_done completes before we return from rpc_run_task */ - rpc_sleep_on(&xprt->binding, task, NULL, NULL); + rpc_sleep_on(&xprt->binding, task, NULL); /* Someone else may have bound if we slept */ if (xprt_bound(xprt)) { diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 35acdc39bfc..caf12fd6b6a 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -56,29 +56,18 @@ struct workqueue_struct *rpciod_workqueue; * queue->lock and bh_disabled in order to avoid races within * rpc_run_timer(). */ -static inline void +static void __rpc_disable_timer(struct rpc_task *task) { dprintk("RPC: %5u disabling timer\n", task->tk_pid); - task->tk_timeout_fn = NULL; task->tk_timeout = 0; } -/* - * Default timeout handler if none specified by user - */ -static void -__rpc_default_timer(struct rpc_task *task) -{ - dprintk("RPC: %5u timeout (default timer)\n", task->tk_pid); - task->tk_status = -ETIMEDOUT; -} - /* * Set up a timer for the current task. */ -static inline void -__rpc_add_timer(struct rpc_task *task, rpc_action timer) +static void +__rpc_add_timer(struct rpc_task *task) { if (!task->tk_timeout) return; @@ -86,10 +75,6 @@ __rpc_add_timer(struct rpc_task *task, rpc_action timer) dprintk("RPC: %5u setting alarm for %lu ms\n", task->tk_pid, task->tk_timeout * 1000 / HZ); - if (timer) - task->tk_timeout_fn = timer; - else - task->tk_timeout_fn = __rpc_default_timer; set_bit(RPC_TASK_HAS_TIMER, &task->tk_runstate); mod_timer(&task->tk_timer, jiffies + task->tk_timeout); } @@ -297,7 +282,6 @@ EXPORT_SYMBOL_GPL(__rpc_wait_for_completion_task); */ static void rpc_make_runnable(struct rpc_task *task) { - BUG_ON(task->tk_timeout_fn); rpc_clear_queued(task); if (rpc_test_and_set_running(task)) return; @@ -327,7 +311,7 @@ static void rpc_make_runnable(struct rpc_task *task) * as it's on a wait queue. */ static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, - rpc_action action, rpc_action timer) + rpc_action action) { dprintk("RPC: %5u sleep_on(queue \"%s\" time %lu)\n", task->tk_pid, rpc_qname(q), jiffies); @@ -341,11 +325,11 @@ static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, BUG_ON(task->tk_callback != NULL); task->tk_callback = action; - __rpc_add_timer(task, timer); + __rpc_add_timer(task); } void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, - rpc_action action, rpc_action timer) + rpc_action action) { /* Mark the task as being activated if so needed */ rpc_set_active(task); @@ -354,7 +338,7 @@ void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, * Protect the queue operations. */ spin_lock_bh(&q->lock); - __rpc_sleep_on(q, task, action, timer); + __rpc_sleep_on(q, task, action); spin_unlock_bh(&q->lock); } EXPORT_SYMBOL_GPL(rpc_sleep_on); @@ -559,20 +543,15 @@ EXPORT_SYMBOL_GPL(rpc_wake_up_status); static void rpc_run_timer(unsigned long ptr) { struct rpc_task *task = (struct rpc_task *)ptr; - void (*callback)(struct rpc_task *); + struct rpc_wait_queue *queue = task->tk_waitqueue; - if (RPC_IS_QUEUED(task)) { - struct rpc_wait_queue *queue = task->tk_waitqueue; - callback = task->tk_timeout_fn; - - dprintk("RPC: %5u running timer\n", task->tk_pid); - if (callback != NULL) - callback(task); - /* Note: we're already in a bh-safe context */ - spin_lock(&queue->lock); + spin_lock(&queue->lock); + if (RPC_IS_QUEUED(task) && task->tk_waitqueue == queue) { + dprintk("RPC: %5u timeout\n", task->tk_pid); + task->tk_status = -ETIMEDOUT; rpc_wake_up_task_queue_locked(queue, task); - spin_unlock(&queue->lock); } + spin_unlock(&queue->lock); smp_mb__before_clear_bit(); clear_bit(RPC_TASK_HAS_TIMER, &task->tk_runstate); smp_mb__after_clear_bit(); @@ -580,6 +559,7 @@ static void rpc_run_timer(unsigned long ptr) static void __rpc_atrun(struct rpc_task *task) { + task->tk_status = 0; } /* @@ -588,7 +568,7 @@ static void __rpc_atrun(struct rpc_task *task) void rpc_delay(struct rpc_task *task, unsigned long delay) { task->tk_timeout = delay; - rpc_sleep_on(&delay_queue, task, NULL, __rpc_atrun); + rpc_sleep_on(&delay_queue, task, __rpc_atrun); } EXPORT_SYMBOL_GPL(rpc_delay); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 6e2772217e5..9bf118c5431 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -188,9 +188,9 @@ out_sleep: task->tk_timeout = 0; task->tk_status = -EAGAIN; if (req && req->rq_ntrans) - rpc_sleep_on(&xprt->resend, task, NULL, NULL); + rpc_sleep_on(&xprt->resend, task, NULL); else - rpc_sleep_on(&xprt->sending, task, NULL, NULL); + rpc_sleep_on(&xprt->sending, task, NULL); return 0; } EXPORT_SYMBOL_GPL(xprt_reserve_xprt); @@ -238,9 +238,9 @@ out_sleep: task->tk_timeout = 0; task->tk_status = -EAGAIN; if (req && req->rq_ntrans) - rpc_sleep_on(&xprt->resend, task, NULL, NULL); + rpc_sleep_on(&xprt->resend, task, NULL); else - rpc_sleep_on(&xprt->sending, task, NULL, NULL); + rpc_sleep_on(&xprt->sending, task, NULL); return 0; } EXPORT_SYMBOL_GPL(xprt_reserve_xprt_cong); @@ -453,7 +453,7 @@ void xprt_wait_for_buffer_space(struct rpc_task *task) struct rpc_xprt *xprt = req->rq_xprt; task->tk_timeout = req->rq_timeout; - rpc_sleep_on(&xprt->pending, task, NULL, NULL); + rpc_sleep_on(&xprt->pending, task, NULL); } EXPORT_SYMBOL_GPL(xprt_wait_for_buffer_space); @@ -652,7 +652,7 @@ void xprt_connect(struct rpc_task *task) task->tk_rqstp->rq_bytes_sent = 0; task->tk_timeout = xprt->connect_timeout; - rpc_sleep_on(&xprt->pending, task, xprt_connect_status, NULL); + rpc_sleep_on(&xprt->pending, task, xprt_connect_status); xprt->stat.connect_start = jiffies; xprt->ops->connect(task); } @@ -769,15 +769,17 @@ static void xprt_timer(struct rpc_task *task) struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; + if (task->tk_status != -ETIMEDOUT) + return; dprintk("RPC: %5u xprt_timer\n", task->tk_pid); - spin_lock(&xprt->transport_lock); + spin_lock_bh(&xprt->transport_lock); if (!req->rq_received) { if (xprt->ops->timer) xprt->ops->timer(task); - task->tk_status = -ETIMEDOUT; - } - spin_unlock(&xprt->transport_lock); + } else + task->tk_status = 0; + spin_unlock_bh(&xprt->transport_lock); } /** @@ -862,7 +864,7 @@ void xprt_transmit(struct rpc_task *task) if (!xprt_connected(xprt)) task->tk_status = -ENOTCONN; else if (!req->rq_received) - rpc_sleep_on(&xprt->pending, task, NULL, xprt_timer); + rpc_sleep_on(&xprt->pending, task, xprt_timer); spin_unlock_bh(&xprt->transport_lock); return; } @@ -873,7 +875,7 @@ void xprt_transmit(struct rpc_task *task) */ task->tk_status = status; if (status == -ECONNREFUSED) - rpc_sleep_on(&xprt->sending, task, NULL, NULL); + rpc_sleep_on(&xprt->sending, task, NULL); } static inline void do_xprt_reserve(struct rpc_task *task) @@ -893,7 +895,7 @@ static inline void do_xprt_reserve(struct rpc_task *task) dprintk("RPC: waiting for request slot\n"); task->tk_status = -EAGAIN; task->tk_timeout = 0; - rpc_sleep_on(&xprt->backlog, task, NULL, NULL); + rpc_sleep_on(&xprt->backlog, task, NULL); } /** -- cgit v1.2.3-18-g5258 From f6a1cc89309f0ae847a9b6fe418d1c4215e5bc55 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 22 Feb 2008 17:06:55 -0500 Subject: SUNRPC: Add a (empty for the moment) destructor for rpc_wait_queues Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 2 ++ fs/nfs/nfs4state.c | 6 +++++- include/linux/sunrpc/sched.h | 1 + net/sunrpc/auth_gss/auth_gss.c | 1 + net/sunrpc/sched.c | 21 ++++++++++++++------- net/sunrpc/xprt.c | 5 +++++ 6 files changed, 28 insertions(+), 8 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index c5c0175898f..06f064d8fbb 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -170,6 +170,8 @@ static void nfs4_shutdown_client(struct nfs_client *clp) BUG_ON(!RB_EMPTY_ROOT(&clp->cl_state_owners)); if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state)) nfs_idmap_delete(clp); + + rpc_destroy_wait_queue(&clp->cl_rpcwaitq); #endif } diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index a2ef02824aa..7775435ea7a 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -292,8 +292,10 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct spin_unlock(&clp->cl_lock); if (sp == new) get_rpccred(cred); - else + else { + rpc_destroy_wait_queue(&new->so_sequence.wait); kfree(new); + } return sp; } @@ -310,6 +312,7 @@ void nfs4_put_state_owner(struct nfs4_state_owner *sp) return; nfs4_remove_state_owner(clp, sp); spin_unlock(&clp->cl_lock); + rpc_destroy_wait_queue(&sp->so_sequence.wait); put_rpccred(cred); kfree(sp); } @@ -529,6 +532,7 @@ static void nfs4_free_lock_state(struct nfs4_lock_state *lsp) spin_lock(&clp->cl_lock); nfs_free_unique_id(&clp->cl_lockowner_id, &lsp->ls_id); spin_unlock(&clp->cl_lock); + rpc_destroy_wait_queue(&lsp->ls_sequence.wait); kfree(lsp); } diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 503a937bdca..d39729e2b89 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -228,6 +228,7 @@ void rpc_killall_tasks(struct rpc_clnt *); void rpc_execute(struct rpc_task *); void rpc_init_priority_wait_queue(struct rpc_wait_queue *, const char *); void rpc_init_wait_queue(struct rpc_wait_queue *, const char *); +void rpc_destroy_wait_queue(struct rpc_wait_queue *); void rpc_sleep_on(struct rpc_wait_queue *, struct rpc_task *, rpc_action action); void rpc_wake_up_queued_task(struct rpc_wait_queue *, diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index dc6391bcda1..ef638496180 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -266,6 +266,7 @@ gss_release_msg(struct gss_upcall_msg *gss_msg) BUG_ON(!list_empty(&gss_msg->list)); if (gss_msg->ctx != NULL) gss_put_ctx(gss_msg->ctx); + rpc_destroy_wait_queue(&gss_msg->rpc_waitqueue); kfree(gss_msg); } diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index caf12fd6b6a..86aa897e7b0 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -214,6 +214,11 @@ void rpc_init_wait_queue(struct rpc_wait_queue *queue, const char *qname) } EXPORT_SYMBOL_GPL(rpc_init_wait_queue); +void rpc_destroy_wait_queue(struct rpc_wait_queue *queue) +{ +} +EXPORT_SYMBOL_GPL(rpc_destroy_wait_queue); + static int rpc_wait_bit_killable(void *word) { if (fatal_signal_pending(current)) @@ -1020,11 +1025,20 @@ rpc_destroy_mempool(void) kmem_cache_destroy(rpc_task_slabp); if (rpc_buffer_slabp) kmem_cache_destroy(rpc_buffer_slabp); + rpc_destroy_wait_queue(&delay_queue); } int rpc_init_mempool(void) { + /* + * The following is not strictly a mempool initialisation, + * but there is no harm in doing it here + */ + rpc_init_wait_queue(&delay_queue, "delayq"); + if (!rpciod_start()) + goto err_nomem; + rpc_task_slabp = kmem_cache_create("rpc_tasks", sizeof(struct rpc_task), 0, SLAB_HWCACHE_ALIGN, @@ -1045,13 +1059,6 @@ rpc_init_mempool(void) rpc_buffer_slabp); if (!rpc_buffer_mempool) goto err_nomem; - if (!rpciod_start()) - goto err_nomem; - /* - * The following is not strictly a mempool initialisation, - * but there is no harm in doing it here - */ - rpc_init_wait_queue(&delay_queue, "delayq"); return 0; err_nomem: rpc_destroy_mempool(); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 9bf118c5431..85199c64702 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1052,6 +1052,11 @@ static void xprt_destroy(struct kref *kref) xprt->shutdown = 1; del_timer_sync(&xprt->timer); + rpc_destroy_wait_queue(&xprt->binding); + rpc_destroy_wait_queue(&xprt->pending); + rpc_destroy_wait_queue(&xprt->sending); + rpc_destroy_wait_queue(&xprt->resend); + rpc_destroy_wait_queue(&xprt->backlog); /* * Tear down transport state and free the rpc_xprt */ -- cgit v1.2.3-18-g5258 From 36df9aae3158ce8fc4ede241169dc94ac910d884 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 18 Jul 2007 16:18:52 -0400 Subject: SUNRPC: Add a timer function to wait queues. This is designed to replace the timeout timer in the individual rpc_tasks. By putting the timer function in the wait queue, we will eventually be able to reduce the total number of timers in use by the RPC subsystem. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/sched.h | 9 +++++++++ net/sunrpc/sched.c | 46 ++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 51 insertions(+), 4 deletions(-) diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index d39729e2b89..7751d3a0549 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -33,6 +33,8 @@ struct rpc_wait_queue; struct rpc_wait { struct list_head list; /* wait queue links */ struct list_head links; /* Links to related tasks */ + struct list_head timer_list; /* Timer list */ + unsigned long expires; }; /* @@ -191,6 +193,12 @@ struct rpc_task_setup { #define RPC_PRIORITY_HIGH (1) #define RPC_NR_PRIORITY (1 + RPC_PRIORITY_HIGH - RPC_PRIORITY_LOW) +struct rpc_timer { + struct timer_list timer; + struct list_head list; + unsigned long expires; +}; + /* * RPC synchronization objects */ @@ -203,6 +211,7 @@ struct rpc_wait_queue { unsigned char count; /* # task groups remaining serviced so far */ unsigned char nr; /* # tasks remaining for cookie */ unsigned short qlen; /* total # tasks waiting in queue */ + struct rpc_timer timer_list; #ifdef RPC_DEBUG const char * name; #endif diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 86aa897e7b0..29b1c1441f4 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -40,6 +40,7 @@ static mempool_t *rpc_buffer_mempool __read_mostly; static void rpc_async_schedule(struct work_struct *); static void rpc_release_task(struct rpc_task *task); +static void __rpc_queue_timer_fn(unsigned long ptr); /* * RPC tasks sit here while waiting for conditions to improve. @@ -59,8 +60,18 @@ struct workqueue_struct *rpciod_workqueue; static void __rpc_disable_timer(struct rpc_task *task) { + if (task->tk_timeout == 0) + return; dprintk("RPC: %5u disabling timer\n", task->tk_pid); task->tk_timeout = 0; + list_del(&task->u.tk_wait.timer_list); +} + +static void +rpc_set_queue_timer(struct rpc_wait_queue *queue, unsigned long expires) +{ + queue->timer_list.expires = expires; + mod_timer(&queue->timer_list.timer, expires); } /* @@ -153,7 +164,6 @@ static void __rpc_remove_wait_queue_priority(struct rpc_task *task) list_move(&t->u.tk_wait.list, &task->u.tk_wait.list); list_splice_init(&task->u.tk_wait.links, &t->u.tk_wait.links); } - list_del(&task->u.tk_wait.list); } /* @@ -162,10 +172,10 @@ static void __rpc_remove_wait_queue_priority(struct rpc_task *task) */ static void __rpc_remove_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task) { + __rpc_disable_timer(task); if (RPC_IS_PRIORITY(queue)) __rpc_remove_wait_queue_priority(task); - else - list_del(&task->u.tk_wait.list); + list_del(&task->u.tk_wait.list); queue->qlen--; dprintk("RPC: %5u removed from queue %p \"%s\"\n", task->tk_pid, queue, rpc_qname(queue)); @@ -198,6 +208,9 @@ static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const c INIT_LIST_HEAD(&queue->tasks[i]); queue->maxpriority = nr_queues - 1; rpc_reset_waitqueue_priority(queue); + queue->qlen = 0; + setup_timer(&queue->timer_list.timer, __rpc_queue_timer_fn, (unsigned long)queue); + INIT_LIST_HEAD(&queue->timer_list.list); #ifdef RPC_DEBUG queue->name = qname; #endif @@ -216,6 +229,7 @@ EXPORT_SYMBOL_GPL(rpc_init_wait_queue); void rpc_destroy_wait_queue(struct rpc_wait_queue *queue) { + del_timer_sync(&queue->timer_list.timer); } EXPORT_SYMBOL_GPL(rpc_destroy_wait_queue); @@ -369,7 +383,6 @@ static void __rpc_do_wake_up_task(struct rpc_wait_queue *queue, struct rpc_task return; } - __rpc_disable_timer(task); __rpc_remove_wait_queue(queue, task); rpc_make_runnable(task); @@ -562,6 +575,31 @@ static void rpc_run_timer(unsigned long ptr) smp_mb__after_clear_bit(); } +static void __rpc_queue_timer_fn(unsigned long ptr) +{ + struct rpc_wait_queue *queue = (struct rpc_wait_queue *)ptr; + struct rpc_task *task, *n; + unsigned long expires, now, timeo; + + spin_lock(&queue->lock); + expires = now = jiffies; + list_for_each_entry_safe(task, n, &queue->timer_list.list, u.tk_wait.timer_list) { + timeo = task->u.tk_wait.expires; + if (time_after_eq(now, timeo)) { + list_del_init(&task->u.tk_wait.timer_list); + dprintk("RPC: %5u timeout\n", task->tk_pid); + task->tk_status = -ETIMEDOUT; + rpc_wake_up_task_queue_locked(queue, task); + continue; + } + if (expires == now || time_after(expires, timeo)) + expires = timeo; + } + if (!list_empty(&queue->timer_list.list)) + rpc_set_queue_timer(queue, expires); + spin_unlock(&queue->lock); +} + static void __rpc_atrun(struct rpc_task *task) { task->tk_status = 0; -- cgit v1.2.3-18-g5258 From eb276c0e10187702928aeaa133e1d3dbaf3eafc7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 22 Feb 2008 17:27:59 -0500 Subject: SUNRPC: Switch tasks to using the rpc_waitqueue's timer function Signed-off-by: Trond Myklebust --- include/linux/sunrpc/sched.h | 9 +------ net/sunrpc/sched.c | 60 ++++++++------------------------------------ 2 files changed, 11 insertions(+), 58 deletions(-) diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 7751d3a0549..0d7be1642dc 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -67,12 +67,6 @@ struct rpc_task { const struct rpc_call_ops *tk_ops; void * tk_calldata; - /* - * tk_timer is used for async processing by the RPC scheduling - * primitives. You should not access this directly unless - * you have a pathological interest in kernel oopses. - */ - struct timer_list tk_timer; /* kernel timer */ unsigned long tk_timeout; /* timeout for rpc_sleep() */ unsigned short tk_flags; /* misc flags */ unsigned long tk_runstate; /* Task run status */ @@ -149,8 +143,7 @@ struct rpc_task_setup { #define RPC_TASK_RUNNING 0 #define RPC_TASK_QUEUED 1 #define RPC_TASK_WAKEUP 2 -#define RPC_TASK_HAS_TIMER 3 -#define RPC_TASK_ACTIVE 4 +#define RPC_TASK_ACTIVE 3 #define RPC_IS_RUNNING(t) test_bit(RPC_TASK_RUNNING, &(t)->tk_runstate) #define rpc_set_running(t) set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate) diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 29b1c1441f4..043eef4c15a 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -58,13 +58,15 @@ struct workqueue_struct *rpciod_workqueue; * rpc_run_timer(). */ static void -__rpc_disable_timer(struct rpc_task *task) +__rpc_disable_timer(struct rpc_wait_queue *queue, struct rpc_task *task) { if (task->tk_timeout == 0) return; dprintk("RPC: %5u disabling timer\n", task->tk_pid); task->tk_timeout = 0; list_del(&task->u.tk_wait.timer_list); + if (list_empty(&queue->timer_list.list)) + del_timer(&queue->timer_list.timer); } static void @@ -78,7 +80,7 @@ rpc_set_queue_timer(struct rpc_wait_queue *queue, unsigned long expires) * Set up a timer for the current task. */ static void -__rpc_add_timer(struct rpc_task *task) +__rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task) { if (!task->tk_timeout) return; @@ -86,23 +88,10 @@ __rpc_add_timer(struct rpc_task *task) dprintk("RPC: %5u setting alarm for %lu ms\n", task->tk_pid, task->tk_timeout * 1000 / HZ); - set_bit(RPC_TASK_HAS_TIMER, &task->tk_runstate); - mod_timer(&task->tk_timer, jiffies + task->tk_timeout); -} - -/* - * Delete any timer for the current task. Because we use del_timer_sync(), - * this function should never be called while holding queue->lock. - */ -static void -rpc_delete_timer(struct rpc_task *task) -{ - if (RPC_IS_QUEUED(task)) - return; - if (test_and_clear_bit(RPC_TASK_HAS_TIMER, &task->tk_runstate)) { - del_singleshot_timer_sync(&task->tk_timer); - dprintk("RPC: %5u deleting timer\n", task->tk_pid); - } + task->u.tk_wait.expires = jiffies + task->tk_timeout; + if (list_empty(&queue->timer_list.list) || time_before(task->u.tk_wait.expires, queue->timer_list.expires)) + rpc_set_queue_timer(queue, task->u.tk_wait.expires); + list_add(&task->u.tk_wait.timer_list, &queue->timer_list.list); } /* @@ -172,7 +161,7 @@ static void __rpc_remove_wait_queue_priority(struct rpc_task *task) */ static void __rpc_remove_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task) { - __rpc_disable_timer(task); + __rpc_disable_timer(queue, task); if (RPC_IS_PRIORITY(queue)) __rpc_remove_wait_queue_priority(task); list_del(&task->u.tk_wait.list); @@ -344,7 +333,7 @@ static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, BUG_ON(task->tk_callback != NULL); task->tk_callback = action; - __rpc_add_timer(task); + __rpc_add_timer(q, task); } void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, @@ -555,26 +544,6 @@ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status) } EXPORT_SYMBOL_GPL(rpc_wake_up_status); -/* - * Run a timeout function. - */ -static void rpc_run_timer(unsigned long ptr) -{ - struct rpc_task *task = (struct rpc_task *)ptr; - struct rpc_wait_queue *queue = task->tk_waitqueue; - - spin_lock(&queue->lock); - if (RPC_IS_QUEUED(task) && task->tk_waitqueue == queue) { - dprintk("RPC: %5u timeout\n", task->tk_pid); - task->tk_status = -ETIMEDOUT; - rpc_wake_up_task_queue_locked(queue, task); - } - spin_unlock(&queue->lock); - smp_mb__before_clear_bit(); - clear_bit(RPC_TASK_HAS_TIMER, &task->tk_runstate); - smp_mb__after_clear_bit(); -} - static void __rpc_queue_timer_fn(unsigned long ptr) { struct rpc_wait_queue *queue = (struct rpc_wait_queue *)ptr; @@ -586,7 +555,6 @@ static void __rpc_queue_timer_fn(unsigned long ptr)