diff options
Diffstat (limited to 'fs/nfs')
-rw-r--r-- | fs/nfs/callback.c | 31 | ||||
-rw-r--r-- | fs/nfs/callback_xdr.c | 28 | ||||
-rw-r--r-- | fs/nfs/delegation.c | 19 | ||||
-rw-r--r-- | fs/nfs/delegation.h | 1 | ||||
-rw-r--r-- | fs/nfs/dir.c | 116 | ||||
-rw-r--r-- | fs/nfs/direct.c | 952 | ||||
-rw-r--r-- | fs/nfs/file.c | 50 | ||||
-rw-r--r-- | fs/nfs/idmap.c | 47 | ||||
-rw-r--r-- | fs/nfs/inode.c | 240 | ||||
-rw-r--r-- | fs/nfs/iostat.h | 164 | ||||
-rw-r--r-- | fs/nfs/mount_clnt.c | 19 | ||||
-rw-r--r-- | fs/nfs/nfs2xdr.c | 6 | ||||
-rw-r--r-- | fs/nfs/nfs3acl.c | 16 | ||||
-rw-r--r-- | fs/nfs/nfs3proc.c | 246 | ||||
-rw-r--r-- | fs/nfs/nfs3xdr.c | 8 | ||||
-rw-r--r-- | fs/nfs/nfs4proc.c | 180 | ||||
-rw-r--r-- | fs/nfs/nfs4state.c | 1 | ||||
-rw-r--r-- | fs/nfs/nfs4xdr.c | 4 | ||||
-rw-r--r-- | fs/nfs/pagelist.c | 16 | ||||
-rw-r--r-- | fs/nfs/proc.c | 156 | ||||
-rw-r--r-- | fs/nfs/read.c | 108 | ||||
-rw-r--r-- | fs/nfs/unlink.c | 3 | ||||
-rw-r--r-- | fs/nfs/write.c | 300 |
23 files changed, 1716 insertions, 995 deletions
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index fcd97406a77..90c95adc8c1 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -14,6 +14,7 @@ #include <linux/sunrpc/svc.h> #include <linux/sunrpc/svcsock.h> #include <linux/nfs_fs.h> +#include <linux/mutex.h> #include <net/inet_sock.h> @@ -31,7 +32,7 @@ struct nfs_callback_data { }; static struct nfs_callback_data nfs_callback_info; -static DECLARE_MUTEX(nfs_callback_sema); +static DEFINE_MUTEX(nfs_callback_mutex); static struct svc_program nfs4_callback_program; unsigned int nfs_callback_set_tcpport; @@ -55,7 +56,12 @@ static void nfs_callback_svc(struct svc_rqst *rqstp) complete(&nfs_callback_info.started); - while (nfs_callback_info.users != 0 || !signalled()) { + for(;;) { + if (signalled()) { + if (nfs_callback_info.users == 0) + break; + flush_signals(current); + } /* * Listen for a request on the socket */ @@ -73,6 +79,7 @@ static void nfs_callback_svc(struct svc_rqst *rqstp) svc_process(serv, rqstp); } + svc_exit_thread(rqstp); nfs_callback_info.pid = 0; complete(&nfs_callback_info.stopped); unlock_kernel(); @@ -89,7 +96,7 @@ int nfs_callback_up(void) int ret = 0; lock_kernel(); - down(&nfs_callback_sema); + mutex_lock(&nfs_callback_mutex); if (nfs_callback_info.users++ || nfs_callback_info.pid != 0) goto out; init_completion(&nfs_callback_info.started); @@ -115,7 +122,7 @@ int nfs_callback_up(void) nfs_callback_info.serv = serv; wait_for_completion(&nfs_callback_info.started); out: - up(&nfs_callback_sema); + mutex_unlock(&nfs_callback_mutex); unlock_kernel(); return ret; out_destroy: @@ -133,13 +140,15 @@ int nfs_callback_down(void) int ret = 0; lock_kernel(); - down(&nfs_callback_sema); - if (--nfs_callback_info.users || nfs_callback_info.pid == 0) - goto out; - kill_proc(nfs_callback_info.pid, SIGKILL, 1); - wait_for_completion(&nfs_callback_info.stopped); -out: - up(&nfs_callback_sema); + mutex_lock(&nfs_callback_mutex); + nfs_callback_info.users--; + do { + if (nfs_callback_info.users != 0 || nfs_callback_info.pid == 0) + break; + if (kill_proc(nfs_callback_info.pid, SIGKILL, 1) < 0) + break; + } while (wait_for_completion_timeout(&nfs_callback_info.stopped, 5*HZ) == 0); + mutex_unlock(&nfs_callback_mutex); unlock_kernel(); return ret; } diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 7c33b9a81a9..05c38cf40b6 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -330,7 +330,7 @@ static unsigned encode_op_hdr(struct xdr_stream *xdr, uint32_t op, uint32_t res) static unsigned encode_getattr_res(struct svc_rqst *rqstp, struct xdr_stream *xdr, const struct cb_getattrres *res) { - uint32_t *savep; + uint32_t *savep = NULL; unsigned status = res->status; if (unlikely(status != 0)) @@ -358,23 +358,26 @@ static unsigned process_op(struct svc_rqst *rqstp, struct xdr_stream *xdr_in, void *argp, struct xdr_stream *xdr_out, void *resp) { - struct callback_op *op; - unsigned int op_nr; + struct callback_op *op = &callback_ops[0]; + unsigned int op_nr = OP_CB_ILLEGAL; unsigned int status = 0; long maxlen; unsigned res; dprintk("%s: start\n", __FUNCTION__); status = decode_op_hdr(xdr_in, &op_nr); - if (unlikely(status != 0)) { - op_nr = OP_CB_ILLEGAL; - op = &callback_ops[0]; - } else if (unlikely(op_nr != OP_CB_GETATTR && op_nr != OP_CB_RECALL)) { - op_nr = OP_CB_ILLEGAL; - op = &callback_ops[0]; - status = htonl(NFS4ERR_OP_ILLEGAL); - } else - op = &callback_ops[op_nr]; + if (likely(status == 0)) { + switch (op_nr) { + case OP_CB_GETATTR: + case OP_CB_RECALL: + op = &callback_ops[op_nr]; + break; + default: + op_nr = OP_CB_ILLEGAL; + op = &callback_ops[0]; + status = htonl(NFS4ERR_OP_ILLEGAL); + } + } maxlen = xdr_out->end - xdr_out->p; if (maxlen > 0 && maxlen < PAGE_SIZE) { @@ -416,6 +419,7 @@ static int nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *resp decode_compound_hdr_arg(&xdr_in, &hdr_arg); hdr_res.taglen = hdr_arg.taglen; hdr_res.tag = hdr_arg.tag; + hdr_res.nops = NULL; encode_compound_hdr_res(&xdr_out, &hdr_res); for (;;) { diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index c6f07c1c71e..d3be923d4e4 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -421,3 +421,22 @@ void nfs_delegation_reap_unclaimed(struct nfs4_client *clp) nfs_free_delegation(delegation); } } + +int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode) +{ + struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state; + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_delegation *delegation; + int res = 0; + + if (nfsi->delegation_state == 0) + return 0; + spin_lock(&clp->cl_lock); + delegation = nfsi->delegation; + if (delegation != NULL) { + memcpy(dst->data, delegation->stateid.data, sizeof(dst->data)); + res = 1; + } + spin_unlock(&clp->cl_lock); + return res; +} diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 7a0b2bfce77..3858694652f 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -41,6 +41,7 @@ void nfs_delegation_reap_unclaimed(struct nfs4_client *clp); int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid); int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state); int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl); +int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode); static inline int nfs_have_delegation(struct inode *inode, int flags) { diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index a1554bead69..a23f3489416 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -34,6 +34,7 @@ #include "nfs4_fs.h" #include "delegation.h" +#include "iostat.h" #define NFS_PARANOIA 1 /* #define NFS_DEBUG_VERBOSE 1 */ @@ -53,7 +54,7 @@ static int nfs_rename(struct inode *, struct dentry *, static int nfs_fsync_dir(struct file *, struct dentry *, int); static loff_t nfs_llseek_dir(struct file *, loff_t, int); -struct file_operations nfs_dir_operations = { +const struct file_operations nfs_dir_operations = { .llseek = nfs_llseek_dir, .read = generic_read_dir, .readdir = nfs_readdir, @@ -129,6 +130,9 @@ nfs_opendir(struct inode *inode, struct file *filp) { int res = 0; + dfprintk(VFS, "NFS: opendir(%s/%ld)\n", + inode->i_sb->s_id, inode->i_ino); + lock_kernel(); /* Call generic open code in order to cache credentials */ if (!res) @@ -172,7 +176,9 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) unsigned long timestamp; int error; - dfprintk(VFS, "NFS: nfs_readdir_filler() reading cookie %Lu into page %lu.\n", (long long)desc->entry->cookie, page->index); + dfprintk(DIRCACHE, "NFS: %s: reading cookie %Lu into page %lu\n", + __FUNCTION__, (long long)desc->entry->cookie, + page->index); again: timestamp = jiffies; @@ -244,7 +250,8 @@ int find_dirent(nfs_readdir_descriptor_t *desc) status; while((status = dir_decode(desc)) == 0) { - dfprintk(VFS, "NFS: found cookie %Lu\n", (unsigned long long)entry->cookie); + dfprintk(DIRCACHE, "NFS: %s: examining cookie %Lu\n", + __FUNCTION__, (unsigned long long)entry->cookie); if (entry->prev_cookie == *desc->dir_cookie) break; if (loop_count++ > 200) { @@ -252,7 +259,6 @@ int find_dirent(nfs_readdir_descriptor_t *desc) schedule(); } } - dfprintk(VFS, "NFS: find_dirent() returns %d\n", status); return status; } @@ -276,7 +282,8 @@ int find_dirent_index(nfs_readdir_descriptor_t *desc) if (status) break; - dfprintk(VFS, "NFS: found cookie %Lu at index %Ld\n", (unsigned long long)entry->cookie, desc->current_index); + dfprintk(DIRCACHE, "NFS: found cookie %Lu at index %Ld\n", + (unsigned long long)entry->cookie, desc->current_index); if (desc->file->f_pos == desc->current_index) { *desc->dir_cookie = entry->cookie; @@ -288,7 +295,6 @@ int find_dirent_index(nfs_readdir_descriptor_t *desc) schedule(); } } - dfprintk(VFS, "NFS: find_dirent_index() returns %d\n", status); return status; } @@ -303,7 +309,9 @@ int find_dirent_page(nfs_readdir_descriptor_t *desc) struct page *page; int status; - dfprintk(VFS, "NFS: find_dirent_page() searching directory page %ld\n", desc->page_index); + dfprintk(DIRCACHE, "NFS: %s: searching page %ld for target %Lu\n", + __FUNCTION__, desc->page_index, + (long long) *desc->dir_cookie); page = read_cache_page(inode->i_mapping, desc->page_index, (filler_t *)nfs_readdir_filler, desc); @@ -324,7 +332,7 @@ int find_dirent_page(nfs_readdir_descriptor_t *desc) if (status < 0) dir_page_release(desc); out: - dfprintk(VFS, "NFS: find_dirent_page() returns %d\n", status); + dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __FUNCTION__, status); return status; read_error: page_cache_release(page); @@ -346,13 +354,15 @@ int readdir_search_pagecache(nfs_readdir_descriptor_t *desc) /* Always search-by-index from the beginning of the cache */ if (*desc->dir_cookie == 0) { - dfprintk(VFS, "NFS: readdir_search_pagecache() searching for offset %Ld\n", (long long)desc->file->f_pos); + dfprintk(DIRCACHE, "NFS: readdir_search_pagecache() searching for offset %Ld\n", + (long long)desc->file->f_pos); desc->page_index = 0; desc->entry->cookie = desc->entry->prev_cookie = 0; desc->entry->eof = 0; desc->current_index = 0; } else - dfprintk(VFS, "NFS: readdir_search_pagecache() searching for cookie %Lu\n", (unsigned long long)*desc->dir_cookie); + dfprintk(DIRCACHE, "NFS: readdir_search_pagecache() searching for cookie %Lu\n", + (unsigned long long)*desc->dir_cookie); for (;;) { res = find_dirent_page(desc); @@ -365,7 +375,8 @@ int readdir_search_pagecache(nfs_readdir_descriptor_t *desc) schedule(); } } - dfprintk(VFS, "NFS: readdir_search_pagecache() returned %d\n", res); + + dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __FUNCTION__, res); return res; } @@ -390,7 +401,8 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, int loop_count = 0, res; - dfprintk(VFS, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n", (long long)entry->cookie); + dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n", + (unsigned long long)entry->cookie); for(;;) { unsigned d_type = DT_UNKNOWN; @@ -427,7 +439,8 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, dir_page_release(desc); if (dentry != NULL) dput(dentry); - dfprintk(VFS, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", (unsigned long long)*desc->dir_cookie, res); + dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", + (unsigned long long)*desc->dir_cookie, res); return res; } @@ -453,7 +466,8 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, struct page *page = NULL; int status; - dfprintk(VFS, "NFS: uncached_readdir() searching for cookie %Lu\n", (unsigned long long)*desc->dir_cookie); + dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n", + (unsigned long long)*desc->dir_cookie); page = alloc_page(GFP_HIGHUSER); if (!page) { @@ -485,7 +499,8 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, desc->entry->cookie = desc->entry->prev_cookie = 0; desc->entry->eof = 0; out: - dfprintk(VFS, "NFS: uncached_readdir() returns %d\n", status); + dfprintk(DIRCACHE, "NFS: %s: returns %d\n", + __FUNCTION__, status); return status; out_release: dir_page_release(desc); @@ -507,6 +522,11 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) struct nfs_fattr fattr; long res; + dfprintk(VFS, "NFS: readdir(%s/%s) starting at cookie %Lu\n", + dentry->d_parent->d_name.name, dentry->d_name.name, + (long long)filp->f_pos); + nfs_inc_stats(inode, NFSIOS_VFSGETDENTS); + lock_kernel(); res = nfs_revalidate_inode(NFS_SERVER(inode), inode); @@ -566,9 +586,12 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) } } unlock_kernel(); - if (res < 0) - return res; - return 0; + if (res > 0) + res = 0; + dfprintk(VFS, "NFS: readdir(%s/%s) returns %ld\n", + dentry->d_parent->d_name.name, dentry->d_name.name, + res); + return res; } loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin) @@ -599,6 +622,10 @@ out: */ int nfs_fsync_dir(struct file *filp, struct dentry *dentry, int datasync) { + dfprintk(VFS, "NFS: fsync_dir(%s/%s) datasync %d\n", + dentry->d_parent->d_name.name, dentry->d_name.name, + datasync); + return 0; } @@ -713,6 +740,7 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd) parent = dget_parent(dentry); lock_kernel(); dir = parent->d_inode; + nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE); inode = dentry->d_inode; if (!inode) { @@ -722,8 +750,9 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd) } if (is_bad_inode(inode)) { - dfprintk(VFS, "nfs_lookup_validate: %s/%s has dud inode\n", - dentry->d_parent->d_name.name, dentry->d_name.name); + dfprintk(LOOKUPCACHE, "%s: %s/%s has dud inode\n", + __FUNCTION__, dentry->d_parent->d_name.name, + dentry->d_name.name); goto out_bad; } @@ -755,6 +784,9 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd) out_valid: unlock_kernel(); dput(parent); + dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is valid\n", + __FUNCTION__, dentry->d_parent->d_name.name, + dentry->d_name.name); return 1; out_zap_parent: nfs_zap_caches(dir); @@ -771,6 +803,9 @@ out_zap_parent: d_drop(dentry); unlock_kernel(); dput(parent); + dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is invalid\n", + __FUNCTION__, dentry->d_parent->d_name.name, + dentry->d_name.name); return 0; } @@ -844,6 +879,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru dfprintk(VFS, "NFS: lookup(%s/%s)\n", dentry->d_parent->d_name.name, dentry->d_name.name); + nfs_inc_stats(dir, NFSIOS_VFSLOOKUP); res = ERR_PTR(-ENAMETOOLONG); if (dentry->d_name.len > NFS_SERVER(dir)->namelen) @@ -865,9 +901,9 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru res = ERR_PTR(error); goto out_unlock; } - res = ERR_PTR(-EACCES); inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr); - if (!inode) + res = (struct dentry *)inode; + if (IS_ERR(res)) goto out_unlock; no_entry: res = d_add_unique(dentry, inode); @@ -912,6 +948,9 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry struct dentry *res = NULL; int error; + dfprintk(VFS, "NFS: atomic_lookup(%s/%ld), %s\n", + dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); + /* Check that we are indeed trying to open this file */ if (!is_atomic_open(dir, nd)) goto no_open; @@ -1057,7 +1096,7 @@ static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc) return NULL; dentry->d_op = NFS_PROTO(dir)->dentry_ops; inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr); - if (!inode) { + if (IS_ERR(inode)) { dput(dentry); return NULL; } @@ -1095,9 +1134,9 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle, if (error < 0) goto out_err; } - error = -ENOMEM; inode = nfs_fhget(dentry->d_sb, fhandle, fattr); - if (inode == NULL) + error = PTR_ERR(inode); + if (IS_ERR(inode)) goto out_err; d_instantiate(dentry, inode); return 0; @@ -1119,8 +1158,8 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode, int error; int open_flags = 0; - dfprintk(VFS, "NFS: create(%s/%ld, %s\n", dir->i_sb->s_id, - dir->i_ino, dentry->d_name.name); + dfprintk(VFS, "NFS: create(%s/%ld), %s\n", + dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); attr.ia_mode = mode; attr.ia_valid = ATTR_MODE; @@ -1153,8 +1192,8 @@ nfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) struct iattr attr; int status; - dfprintk(VFS, "NFS: mknod(%s/%ld, %s\n", dir->i_sb->s_id, - dir->i_ino, dentry->d_name.name); + dfprintk(VFS, "NFS: mknod(%s/%ld), %s\n", + dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); if (!new_valid_dev(rdev)) return -EINVAL; @@ -1186,8 +1225,8 @@ static int nfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) struct iattr attr; int error; - dfprintk(VFS, "NFS: mkdir(%s/%ld, %s\n", dir->i_sb->s_id, - dir->i_ino, dentry->d_name.name); + dfprintk(VFS, "NFS: mkdir(%s/%ld), %s\n", + dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); attr.ia_valid = ATTR_MODE; attr.ia_mode = mode | S_IFDIR; @@ -1212,8 +1251,8 @@ static int nfs_rmdir(struct inode *dir, struct dentry *dentry) { int error; - dfprintk(VFS, "NFS: rmdir(%s/%ld, %s\n", dir->i_sb->s_id, - dir->i_ino, dentry->d_name.name); + dfprintk(VFS, "NFS: rmdir(%s/%ld), %s\n", + dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); lock_kernel(); nfs_begin_data_update(dir); @@ -1241,6 +1280,7 @@ static int nfs_sillyrename(struct inode *dir, struct dentry *dentry) dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n", dentry->d_parent->d_name.name, dentry->d_name.name, atomic_read(&dentry->d_count)); + nfs_inc_stats(dir, NFSIOS_SILLYRENAME); #ifdef NFS_PARANOIA if (!dentry->d_inode) @@ -1268,8 +1308,8 @@ dentry->d_parent->d_name.name, dentry->d_name.name); sillycounter++; sprintf(suffix, "%*.*x", countersize, countersize, sillycounter); - dfprintk(VFS, "trying to rename %s to %s\n", - dentry->d_name.name, silly); + dfprintk(VFS, "NFS: trying to rename %s to %s\n", + dentry->d_name.name, silly); sdentry = lookup_one_len(silly, dentry->d_parent, slen); /* @@ -1640,6 +1680,8 @@ int nfs_permission(struct inode *inode, int mask, struct nameidata *nd) struct rpc_cred *cred; int res = 0; + nfs_inc_stats(inode, NFSIOS_VFSACCESS); + if (mask == 0) goto out; /* Is this sys_access() ? */ @@ -1679,13 +1721,15 @@ force_lookup: res = PTR_ERR(cred); unlock_kernel(); out: + dfprintk(VFS, "NFS: permission(%s/%ld), mask=0x%x, res=%d\n", + inode->i_sb->s_id, inode->i_ino, mask, res); return res; out_notsup: res = nfs_revalidate_inode(NFS_SERVER(inode), inode); if (res == 0) res = generic_permission(inode, mask, NULL); unlock_kernel(); - return res; + goto out; } /* diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 4e9b3a1b36c..0f583cb16dd 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -7,11 +7,11 @@ * * There are important applications whose performance or correctness * depends on uncached access to file data. Database clusters - * (multiple copies of the same instance running on separate hosts) + * (multiple copies of the same instance running on separate hosts) * implement their own cache coherency protocol that subsumes file - * system cache protocols. Applications that process datasets - * considerably larger than the client's memory do not always benefit - * from a local cache. A streaming video server, for instance, has no + * system cache protocols. Applications that process datasets + * considerably larger than the client's memory do not always benefit + * from a local cache. A streaming video server, for instance, has no * need to cache the contents of a file. * * When an application requests uncached I/O, all read and write requests @@ -34,6 +34,7 @@ * 08 Jun 2003 Port to 2.5 APIs --cel * 31 Mar 2004 Handle direct I/O without VFS support --cel * 15 Sep 2004 Parallel async reads --cel + * 04 May 2005 support O_DIRECT with aio --cel * */ @@ -54,10 +55,10 @@ #include <asm/uaccess.h> #include <asm/atomic.h> +#include "iostat.h" + #define NFSDBG_FACILITY NFSDBG_VFS -#define MAX_DIRECTIO_SIZE (4096UL << PAGE_SHIFT) -static void nfs_free_user_pages(struct page **pages, int npages, int do_dirty); static kmem_cache_t *nfs_direct_cachep; /* @@ -65,38 +66,78 @@ static kmem_cache_t *nfs_direct_cachep; */ struct nfs_direct_req { struct kref kref; /* release manager */ - struct list_head list; /* nfs_read_data structs */ - wait_queue_head_t wait; /* wait for i/o completion */ + + /* I/O parameters */ + struct list_head list, /* nfs_read/write_data structs */ + rewrite_list; /* saved nfs_write_data structs */ + struct nfs_open_context *ctx; /* file open context info */ + struct kiocb * iocb; /* controlling i/o request */ + struct inode * inode; /* target file of i/o */ + unsigned long user_addr; /* location of user's buffer */ + size_t user_count; /* total bytes to move */ + loff_t pos; /* starting offset in file */ struct page ** pages; /* pages in our buffer */ unsigned int npages; /* count of pages */ - atomic_t complete, /* i/os we're waiting for */ - count, /* bytes actually processed */ + + /* completion state */ + spinlock_t lock; /* protect completion state */ + int outstanding; /* i/os we're waiting for */ + ssize_t count, /* bytes actually processed */ error; /* any reported error */ + struct completion completion; /* wait for i/o completion */ + + /* commit state */ + struct nfs_write_data * commit_data; /* special write_data for commits */ + int flags; +#define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */ +#define NFS_ODIRECT_RESCHED_WRITES (2) /* write verification failed */ + struct nfs_writeverf verf; /* unstable write verifier */ }; +static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync); +static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode); /** - * nfs_get_user_pages - find and set up pages underlying user's buffer - * rw: direction (read or write) - * user_addr: starting address of this segment of user's buffer - * count: size of this segment - * @pages: returned array of page struct pointers underlying user's buffer + * nfs_direct_IO - NFS address space operation for direct I/O + * @rw: direction (read or write) + * @iocb: target I/O control block + * @iov: array of vectors that define I/O buffer + * @pos: offset in file to begin the operation + * @nr_segs: size of iovec array + * + * The presence of this routine in the address space ops vector means + * the NFS client supports direct I/O. However, we shunt off direct + * read and write requests before the VFS gets them, so this method + * should never be called. */ -static inline int -nfs_get_user_pages(int rw, unsigned long user_addr, size_t size, - struct page ***pages) +ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs) +{ + struct dentry *dentry = iocb->ki_filp->f_dentry; + + dprintk("NFS: nfs_direct_IO (%s) off/no(%Ld/%lu) EINVAL\n", + dentry->d_name.name, (long long) pos, nr_segs); + + return -EINVAL; +} + +static void nfs_free_user_pages(struct page **pages, int npages, int do_dirty) +{ + int i; + for (i = 0; i < npages; i++) { + struct page *page = pages[i]; + if (do_dirty && !PageCompound(page)) + set_page_dirty_lock(page); + page_cache_release(page); + } + kfree(pages); +} + +static inline int nfs_get_user_pages(int rw, unsigned long user_addr, size_t size, struct page ***pages) { int result = -ENOMEM; unsigned long page_count; size_t array_size; - /* set an arbitrary limit to prevent type overflow */ - /* XXX: this can probably be as large as INT_MAX */ - if (size > MAX_DIRECTIO_SIZE) { - *pages = NULL; - return -EFBIG; - } - page_count = (user_addr + size + PAGE_SIZE - 1) >> PAGE_SHIFT; page_count -= user_addr >> PAGE_SHIFT; @@ -108,75 +149,117 @@ nfs_get_user_pages(int rw, unsigned long user_addr, size_t size, page_count, (rw == READ), 0, *pages, NULL); up_read(¤t->mm->mmap_sem); - /* - * If we got fewer pages than expected from get_user_pages(), - * the user buffer runs off the end of a mapping; return EFAULT. - */ - if (result >= 0 && result < page_count) { - nfs_free_user_pages(*pages, result, 0); + if (result != page_count) { + /* + * If we got fewer pages than expected from + * get_user_pages(), the user buffer runs off the + * end of a mapping; return EFAULT. + */ + if (result >= 0) { + nfs_free_user_pages(*pages, result, 0); + result = -EFAULT; + } else + kfree(*pages); *pages = NULL; - result = -EFAULT; } } return result; } -/** - * nfs_free_user_pages - tear down page struct array - * @pages: array of page struct pointers underlying target buffer - * @npages: number of pages in the array - * @do_dirty: dirty the pages as we release them - */ -static void -nfs_free_user_pages(struct page **pages, int npages, int do_dirty) +static inline struct nfs_direct_req *nfs_direct_req_alloc(void) { - int i; - for (i = 0; i < npages; i++) { - struct page *page = pages[i]; - if (do_dirty && !PageCompound(page)) - set_page_dirty_lock(page); - page_cache_release(page); - } - kfree(pages); + struct nfs_direct_req *dreq; + + dreq = kmem_cache_alloc(nfs_direct_cachep, SLAB_KERNEL); + if (!dreq) + return NULL; + + kref_init(&dreq->kref); + init_completion(&dreq->completion); + INIT_LIST_HEAD(&dreq->list); + INIT_LIST_HEAD(&dreq->rewrite_list); + dreq->iocb = NULL; + dreq->ctx = NULL; + spin_lock_init(&dreq->lock); + dreq->outstanding = 0; + dreq->count = 0; + dreq->error = 0; + dreq->flags = 0; + + return dreq; } -/** - * nfs_direct_req_release - release nfs_direct_req structure for direct read - * @kref: kref object embedded in an nfs_direct_req structure - * - */ static void nfs_direct_req_release(struct kref *kref) { struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref); + + if (dreq->ctx != NULL) + put_nfs_open_context(dreq->ctx); kmem_cache_free(nfs_direct_cachep, dreq); } -/** - * nfs_direct_read_alloc - allocate nfs_read_data structures for direct read - * @count: count of bytes for the read request - * @rsize: local rsize setting +/* + * Collects and returns the final error value/byte-count. + */ +static ssize_t nfs_direct_wait(struct nfs_direct_req *dreq) +{ + ssize_t result = -EIOCBQUEUED; + + /* Async requests don't wait here */ + if (dreq->iocb) + goto out; + + result = wait_for_completion_interruptible(&dreq->completion); + + if (!result) + result = dreq->error; + if (!result) + result = dreq->count; + +out: + kref_put(&dreq->kref, nfs_direct_req_release); + return (ssize_t) result; +} + +/* + * We must hold a reference to all the pages in this direct read request + * until the RPCs complete. This could be long *after* we are woken up in + * nfs_direct_wait (for instance, if someone hits ^C on a slow server). * + * In addition, synchronous I/O uses a stack-allocated iocb. Thus we + * can't trust the iocb is still valid here if this is a synchronous + * request. If the waiter is woken prematurely, the iocb is long gone. + */ +static void nfs_direct_complete(struct nfs_direct_req *dreq) +{ + nfs_free_user_pages(dreq->pages, dreq->npages, 1); + + if (dreq->iocb) { + long res = (long) dreq->error; + if (!res) + res = (long) dreq->count; + aio_complete(dreq->iocb, res, 0); + } + complete_all(&dreq->completion); + + kref_put(&dreq->kref, nfs_direct_req_release); +} + +/* * Note we also set the number of requests we have in the dreq when we are * done. This prevents races with I/O completion so we will always wait * until all requests have been dispatched and completed. */ -static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, unsigned int rsize) +static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, size_t rsize) { struct list_head *list; struct nfs_direct_req *dreq; - unsigned int reads = 0; unsigned int rpages = (rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - dreq = kmem_cache_alloc(nfs_direct_cachep, SLAB_KERNEL); + dreq = nfs_direct_req_alloc(); if (!dreq) return NULL; - kref_init(&dreq->kref); - init_waitqueue_head(&dreq->wait); - INIT_LIST_HEAD(&dreq->list); - atomic_set(&dreq->count, 0); - atomic_set(&dreq->error, 0); - list = &dreq->list; for(;;) { struct nfs_read_data *data = nfs_readdata_alloc(rpages); @@ -196,72 +279,70 @@ static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, unsigned int list_add(&data->pages, list); data->req = (struct nfs_page *) dreq; - reads++; + dreq->outstanding++; if (nbytes <= rsize) break; nbytes -= rsize; } kref_get(&dreq->kref); - atomic_set(&dreq->complete, reads); return dreq; } -/** - * nfs_direct_read_result - handle a read reply for a direct read request - * @data: address of NFS READ operation control block - * @status: status of this NFS READ operation - * - * We must hold a reference to all the pages in this direct read request - * until the RPCs complete. This could be long *after* we are woken up in - * nfs_direct_read_wait (for instance, if someone hits ^C on a slow server). - */ -static void nfs_direct_read_result(struct nfs_read_data *data, int status) +static void nfs_direct_read_result(struct rpc_task *task, void *calldata) { + struct nfs_read_data *data = calldata; struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req; - if (likely(status >= 0)) - atomic_add(data->res.count, &dreq->count); + if (nfs_readpage_result(task, data) != 0) + return; + + spin_lock(&dreq->lock); + + if (likely(task->tk_status >= 0)) + dreq->count += data->res.count; else - atomic_set(&dreq->error, status); + dreq->error = task->tk_status; - if (unlikely(atomic_dec_and_test(&dreq->complete))) { - nfs_free_user_pages(dreq->pages, dreq->npages, 1); - wake_up(&dreq->wait); - kref_put(&dreq->kref, nfs_direct_req_release); + if (--dreq->outstanding) { + spin_unlock(&dreq->lock); + return; } + + spin_unlock(&dreq->lock); + nfs_direct_complete(dreq); } -/** - * nfs_direct_read_schedule - dispatch NFS READ operations for a direct read - * @dreq: address of nfs_direct_req struct for this request - * @inode: target inode - * @ctx: target file open context - * @user_addr: starting address of this segment of user's buffer - * @count: size of this segment - * @file_offset: offset in file to begin the operation - * +static const struct rpc_call_ops nfs_read_direct_ops = { + .rpc_call_done = nfs_direct_read_result, + .rpc_release = nfs_readdata_release, +}; + +/* * For each nfs_read_data struct that was allocated on the list, dispatch * an NFS READ operation */ -static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, - struct inode *inode, struct nfs_open_context *ctx, - unsigned long user_addr, size_t count, loff_t file_offset) +static void nfs_direct_read_schedule(struct nfs_direct_req *dreq) { + struct nfs_open_context *ctx = dreq->ctx; + struct inode *inode = ctx->dentry->d_inode; struct list_head *list = &dreq->list; struct page **pages = dreq->pages; + size_t count = dreq->user_count; + loff_t pos = dreq->pos; + size_t rsize = NFS_SERVER(inode)->rsize; unsigned int curpage, pgbase; - unsigned int rsize = NFS_SERVER(inode)->rsize; curpage = 0; - pgbase = user_addr & ~PAGE_MASK; + pgbase = dreq->user_addr & ~PAGE_MASK; do { struct nfs_read_data *data; - unsigned int bytes; + size_t bytes; bytes = rsize; if (count < rsize) bytes = count; + BUG_ON(list_empty(list)); data = list_entry(list->next, struct nfs_read_data, pages); list_del_init(&data->pages); @@ -269,7 +350,7 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, data->cred = ctx->cred; data->args.fh = NFS_FH(inode); data->args.context = ctx; - data->args.offset = file_offset; + data->args.offset = pos; data->args.pgbase = pgbase; data->args.pages = &pages[curpage]; data->args.count = bytes; @@ -277,77 +358,38 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, data->res.eof = 0; data->res.count = bytes; + rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC, + &nfs_read_direct_ops, data); NFS_PROTO(inode)->read_setup(data); data->task.tk_cookie = (unsigned long) inode; - data->complete = nfs_direct_read_result; lock_kernel(); rpc_execute(&data->task); unlock_kernel(); - dfprintk(VFS, "NFS: %4d initiated direct read call (req %s/%Ld, %u bytes @ offset %Lu)\n", + dfprintk(VFS, "NFS: %5u initiated direct read call (req %s/%Ld, %zu bytes @ offset %Lu)\n", data->task.tk_pid, inode->i_sb->s_id, (long long)NFS_FILEID(inode), bytes, (unsigned long long)data->args.offset); - file_offset += bytes; + pos += bytes; pgbase += bytes; curpage += pgbase >> PAGE_SHIFT; pgbase &= ~PAGE_MASK; count -= bytes; } while (count != 0); + BUG_ON(!list_empty(list)); } -/** - * nfs_direct_read_wait - wait for I/O completion for direct reads - * @dreq: request on which we are to wait - * @intr: whether or not this wait can be interrupted - * - * Collects and returns the final error value/byte-count. - */ -static ssize_t nfs_direct_read_wait(struct nfs_direct_req *dreq, int intr) -{ - int result = 0; - - if (intr) { - result = wait_event_interruptible(dreq->wait, - (atomic_read(&dreq->complete) == 0)); - } else { - wait_event(dreq->wait, (atomic_read(&dreq->complete) == 0)); - } - - if (!result) - result = atomic_read(&dreq->error); - if (!result) - result = atomic_read(&dreq->count); - - kref_put(&dreq->kref, nfs_direct_req_release); - return (ssize_t) result; -} - -/** - * nfs_direct_read_seg - Read in one iov segment. Generate separate - * read RPCs for each "rsize" bytes. - * @inode: target inode - * @ctx: target file open context - * @user_addr: starting address of this segment of user's buffer - * @count: size of this segment - * @file_offset: offset in file to begin the operation - * @pages: array of addresses of page structs defining user's buffer - * @nr_pages: number of pages in the array - * - */ -static ssize_t nfs_direct_read_seg(struct inode *inode, - struct nfs_open_context *ctx, unsigned long user_addr, - size_t count, loff_t file_offset, struct page **pages, - unsigned int nr_pages) +static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos, struct page **pages, unsigned int nr_pages) { ssize_t result; sigset_t oldset; + struct inode *inode = iocb->ki_filp->f_mapping->host; struct rpc_clnt *clnt = NFS_CLIENT(inode); struct nfs_direct_req *dreq; @@ -355,284 +397,350 @@ static ssize_t nfs_direct_read_seg(struct inode *inode, if (!dreq) return -ENOMEM; + dreq->user_addr = user_addr; + dreq->user_count = count; + dreq->pos = pos; dreq->pages = pages; dreq->npages = nr_pages; + dreq->inode = inode; + dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data); + if (!is_sync_kiocb(iocb)) + dreq->iocb = iocb; + nfs_add_stats(inode, NFSIOS_DIRECTREADBYTES, count); rpc_clnt_sigmask(clnt, &oldset); - nfs_direct_read_schedule(dreq, inode, ctx, user_addr, count, - file_offset); - result = nfs_direct_read_wait(dreq, clnt->cl_intr); + nfs_direct_read_schedule(dreq); + result = nfs_direct_wait(dreq); rpc_clnt_sigunmask(clnt, &oldset); return result; } -/** - * nfs_direct_read - For each iov segment, map the user's buffer - * then generate read RPCs. - * @inode: target inode - * @ctx: target file open context - * @iov: array of vectors that define I/O buffer - * file_offset: offset in file to begin the operation - * nr_segs: size of iovec array - * - * We've already pushed out any non-direct writes so that this read - * will see them when we read from the server. - */ -static ssize_t -nfs_direct_read(struct inode *inode, struct nfs_open_context *ctx, - const struct iovec *iov, loff_t file_offset, - unsigned long nr_segs) +static void nfs_direct_free_writedata(struct nfs_direct_req *dreq) { - ssize_t tot_bytes = 0; - unsigned long seg = 0; - - while ((seg < nr_segs) && (tot_bytes >= 0)) { - ssize_t result; - int page_count; - struct page **pages; - const struct iovec *vec = &iov[seg++]; - unsigned long user_addr = (unsigned long) vec->iov_base; - size_t size = vec->iov_len; - - page_count = nfs_get_user_pages(READ, user_addr, size, &pages); - if (page_count < 0) { - nfs_free_user_pages(pages, 0, 0); - if (tot_bytes > 0) - break; - return page_count; - } + list_splice_init(&dreq->rewrite_list, &dreq->list); + while (!list_empty(&dreq->list)) { + struct nfs_write_data *data = list_entry(dreq->list.next, struct nfs_write_data, pages); + list_del(&data->pages); + nfs_writedata_release(data); + } +} - result = nfs_direct_read_seg(inode, ctx, user_addr, size, - file_offset, pages, page_count); +#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) +static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) +{ + struct list_head *pos; - if (result <= 0) { - if (tot_bytes > 0) - break; - return result; - } - tot_bytes += result; - file_offset += result; - if (result < size) - break; + list_splice_init(&dreq->rewrite_list, &dreq->list); + list_for_each(pos, &dreq->list) + dreq->outstanding++; + dreq->count = 0; + + nfs_direct_write_schedule(dreq, FLUSH_STABLE); +} + +static void nfs_direct_commit_result(struct rpc_task *task, void *calldata) +{ + struct nfs_write_data *data = calldata; + struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req; + + /* Call the NFS version-specific code */ + if (NFS_PROTO(data->inode)->commit_done(task, data) != 0) + return; + if (unlikely(task->tk_status < 0)) { + dreq->error = task->tk_status; + dreq->flags = NFS_ODIRECT_RESCHED_WRITES; + } + if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) { + dprintk("NFS: %5u commit verify failed\n", task->tk_pid); + dreq->flags = NFS_ODIRECT_RESCHED_WRITES; } - return tot_bytes; + dprintk("NFS: %5u commit returned %d\n", task->tk_pid, task->tk_status); + nfs_direct_write_complete(dreq, data->inode); } -/** - * nfs_direct_write_seg - Write out one iov segment. Generate separate - * write RPCs for each "wsize" bytes, then commit. - * @inode: target inode - * @ctx: target file open context - * user_addr: starting address of this segment of user's buffer - * count: size of this segment - * file_offset: offset in file to begin the operation - * @pages: array of addresses of page structs defining user's buffer - * nr_pages: size of pages array - */ -static ssize_t nfs_direct_write_seg(struct inode *inode, - struct nfs_open_context *ctx, unsigned long user_addr, - size_t count, loff_t file_offset, struct page **pages, - int nr_pages) +static const struct rpc_call_ops nfs_commit_direct_ops = { + .rpc_call_done = nfs_direct_commit_result, + .rpc_release = nfs_commit_release, +}; + +static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) { - const unsigned int wsize = NFS_SERVER(inode)->wsize; - size_t request; - int curpage, need_commit; - ssize_t result, tot_bytes; - struct nfs_writeverf first_verf; - struct nfs_write_data *wdata; - - wdata = nfs_writedata_alloc(NFS_SERVER(inode)->wpages); - if (!wdata) - return -ENOMEM; + struct nfs_write_data *data = dreq->commit_data; + struct rpc_task *task = &data->task; - wdata->inode = inode; - wdata->cred = ctx->cred; - wdata->args.fh = NFS_FH(inode); - wdata->args.context = ctx; - wdata->args.stable = NFS_UNSTABLE; - if (IS_SYNC(inode) || NFS_PROTO(inode)->version == 2 || count <= wsize) - wdata->args.stable = NFS_FILE_SYNC; - wdata->res.fattr = &wdata->fattr; - wdata->res.verf = &wdata->verf; + data->inode = dreq->inode; + data->cred = dreq->ctx->cred; - nfs_begin_data_update(inode); -retry: - need_commit = 0; - tot_bytes = 0; - curpage = 0; - request = count; - wdata->args.pgbase = user_addr & ~PAGE_MASK; - wdata->args.offset = file_offset; - do { - wdata->args.count = request; - if (wdata->args.count > wsize) - wdata->args.count = wsize; - wdata->args.pages = &pages[curpage]; + data->args.fh = NFS_FH(data->inode); + data->args.offset = dreq->pos; + data->args.count = dreq->user_count; + data->res.count = 0; + data->res.fattr = &data->fattr; + data->res.verf = &data->verf; - dprintk("NFS: direct write: c=%u o=%Ld ua=%lu, pb=%u, cp=%u\n", - wdata->args.count, (long long) wdata->args.offset, - user_addr + tot_bytes, wdata->args.pgbase, curpage); + rpc_init_task(&data->task, NFS_CLIENT(dreq->inode), RPC_TASK_ASYNC, + &nfs_commit_direct_ops, data); + NFS_PROTO(data->inode)->commit_setup(data, 0); - lock_kernel(); - result = NFS_PROTO(inode)->write(wdata); - unlock_kernel(); + data->task.tk_priority = RPC_PRIORITY_NORMAL; + data->task.tk_cookie = (unsigned long)data->inode; + /* Note: task.tk_ops->rpc_release will free dreq->commit_data */ + dreq->commit_data = NULL; - if (result <= 0) { - if (tot_bytes > 0) - break; - goto out; - } + dprintk("NFS: %5u initiated commit call\n", task->tk_pid); - if (tot_bytes == 0) - memcpy(&first_verf.verifier, &wdata->verf.verifier, - sizeof(first_verf.verifier)); - if (wdata->verf.committed != NFS_FILE_SYNC) { - need_commit = 1; - if (memcmp(&first_verf.verifier, &wdata->verf.verifier, - sizeof(first_verf.verifier))) - goto sync_retry; - } + lock_kernel(); + rpc_execute(&data->task); + unlock_kernel(); +} - tot_bytes += result; +static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode) +{ + int flags = dreq->flags; - /* in case of a short write: stop now, let the app recover */ - if (result < wdata->args.count) + dreq->flags = 0; + switch (flags) { + case NFS_ODIRECT_DO_COMMIT: + nfs_direct_commit_schedule(dreq); break; + case NFS_ODIRECT_RESCHED_WRITES: + nfs_direct_write_reschedule(dreq); + break; + default: + nfs_end_data_update(inode); + if (dreq->commit_data != NULL) + nfs_commit_free(dreq->commit_data); + nfs_direct_free_writedata(dreq); + nfs_direct_complete(dreq); + } +} - wdata->args.offset += result; - wdata->args.pgbase += result; - curpage += wdata->args.pgbase >> PAGE_SHIFT; - wdata->args.pgbase &= ~PAGE_MASK; - request -= result; - } while (request != 0); +static void nfs_alloc_commit_data(struct nfs_direct_req *dreq) +{ + dreq->commit_data = nfs_commit_alloc(0); + if (dreq->commit_data != NULL) + dreq->commit_data->req = (struct nfs_page *) dreq; +} +#else +static inline void nfs_alloc_commit_data(struct nfs_direct_req *dreq) +{ + dreq->commit_data = NULL; +} - /* - * Commit data written so far, even in the event of an error - */ - if (need_commit) { - wdata->args.count = tot_bytes; - wdata->args.offset = file_offset; +static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode) +{ + nfs_end_data_update(inode); + nfs_direct_free_writedata(dreq); + nfs_direct_complete(dreq); +} +#endif - lock_kernel(); - result = NFS_PROTO(inode)->commit(wdata); - unlock_kernel(); +static struct nfs_direct_req *nfs_direct_write_alloc(size_t nbytes, size_t wsize) +{ + struct list_head *list; + struct nfs_direct_req *dreq; + unsigned int wpages = (wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + + dreq = nfs_direct_req_alloc(); + if (!dreq) + return NULL; + + list = &dreq->list; + for(;;) { + struct nfs_write_data *data = nfs_writedata_alloc(wpages); - if (result < 0 || memcmp(&first_verf.verifier, - &wdata->verf.verifier, - sizeof(first_verf.verifier)) != 0) - goto sync_retry; + if (unlikely(!data)) { + while (!list_empty(list)) { + data = list_entry(list->next, + struct nfs_write_data, pages); + list_del(&data->pages); + nfs_writedata_free(data); + } + kref_put(&dreq->kref, nfs_direct_req_release); + return NULL; + } + + INIT_LIST_HEAD(&data->pages); + list_add(&data->pages, list); + + data->req = (struct nfs_page *) dreq; + dreq->outstanding++; + if (nbytes <= wsize) + break; + nbytes -= wsize; } - result = tot_bytes; -out: - nfs_end_data_update(inode); - nfs_writedata_free(wdata); - return result; + nfs_alloc_commit_data(dreq); -sync_retry: - wdata->args.stable = NFS_FILE_SYNC; - goto retry; + kref_get(&dreq->kref); + return dreq; } -/** - * nfs_direct_write - For each iov segment, map the user's buffer - * then generate write and commit RPCs. - * @inode: target inode - * @ctx: target file open context - * @iov: array of vectors that define I/O buffer - * file_offset: offset in file to begin the operation - * nr_segs: size of iovec array - * - * Upon return, generic_file_direct_IO invalidates any cached pages - * that non-direct readers might access, so they will pick up these - * writes immediately. - */ -static ssize_t nfs_direct_write(struct inode *inode, - struct nfs_open_context *ctx, const struct iovec *iov, - loff_t file_offset, unsigned long nr_segs) +static void nfs_direct_write_result(struct rpc_task *task, void *calldata) { - ssize_t tot_bytes = 0; - unsigned long seg = 0; - - while ((seg < nr_segs) && (tot_bytes >= 0)) { - ssize_t result; - int page_count; - struct page **pages; - const struct iovec *vec = &iov[seg++]; - unsigned long user_addr = (unsigned long) vec->iov_base; - size_t size = vec->iov_len; - - page_count = nfs_get_user_pages(WRITE, user_addr, size, &pages); - if (page_count < 0) { - nfs_free_user_pages(pages, 0, 0); - if (tot_bytes > 0) - break; - return page_count; - } + struct nfs_write_data *data = calldata; + struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req; + int status = task->tk_status; + + if (nfs_writeback_done(task, data) != 0) + return; + + spin_lock(&dreq->lock); - result = nfs_direct_write_seg(inode, ctx, user_addr, size, - file_offset, pages, page_count); - nfs_free_user_pages(pages, page_count, 0); + if (likely(status >= 0)) + dreq->count += data->res.count; + else + dreq->error = task->tk_status; - if (result <= 0) { - if (tot_bytes > 0) + if (data->res.verf->committed != NFS_FILE_SYNC) { + switch (dreq->flags) { + case 0: + memcpy(&dreq->verf, &data->verf, sizeof(dreq->verf)); + dreq->flags = NFS_ODIRECT_DO_COMMIT; break; - return result; + case NFS_ODIRECT_DO_COMMIT: + if (memcmp(&dreq->verf, &data->verf, sizeof(dreq->verf))) { + dprintk("NFS: %5u write verify failed\n", task->tk_pid); + dreq->flags = NFS_ODIRECT_RESCHED_WRITES; + } } - tot_bytes += result; - file_offset += result; - if (result < size) - break; } - return tot_bytes; + /* In case we have to resend */ + data->args.stable = NFS_FILE_SYNC; + + spin_unlock(&dreq->lock); } -/** - * nfs_direct_IO - NFS address space operation for direct I/O - * rw: direction (read or write) - * @iocb: target I/O control block - * @iov: array of vectors that define I/O buffer - * file_offset: offset in file to begin the operation - * nr_segs: size of iovec array - * +/* + * NB: Return the value of the first error return code. Subsequent + * errors after the first one are ignored. */ -ssize_t -nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, - loff_t file_offset, unsigned long nr_segs) +static void nfs_direct_write_release(void *calldata) { - ssize_t result = -EINVAL; - struct file *file = iocb->ki_filp; - struct nfs_open_context *ctx; - struct dentry *dentry = file->f_dentry; - struct inode *inode = dentry->d_inode; + struct nfs_write_data *data = calldata; + struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req; - /* - * No support for async yet - */ - if (!is_sync_kiocb(iocb)) - return result; - - ctx = (struct nfs_open_context *)file->private_data; - switch (rw) { - case READ: - dprintk("NFS: direct_IO(read) (%s) off/no(%Lu/%lu)\n", - dentry->d_name.name, file_offset, nr_segs); - - result = nfs_direct_read(inode, ctx, iov, - file_offset, nr_segs); - break; - case WRITE: - dprintk("NFS: direct_IO(write) (%s) off/no(%Lu/%lu)\n", - dentry->d_name.name, file_offset, nr_segs); - - result = nfs_direct_write(inode, ctx, iov, - file_offset, nr_segs); - break; - default: - break; + spin_lock(&dreq->lock); + if (--dreq->outstanding) { + spin_unlock(&dreq->lock); + return; } + spin_unlock(&dreq->lock); + + nfs_direct_write_complete(dreq, data->inode); +} + +static const struct rpc_call_ops nfs_write_direct_ops = { + .rpc_call_done = nfs_direct_write_result, + .rpc_release = nfs_direct_write_release, +}; + +/* + * For each nfs_write_data struct that was allocated on the list, dispatch + * an NFS WRITE operation + */ +static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync) +{ + struct nfs_open_context *ctx = dreq->ctx; + struct inode *inode = ctx->dentry->d_inode; + struct list_head *list = &dreq->list; + struct page **pages = dreq->pages; + size_t count = dreq->user_count; + loff_t pos = dreq->pos; + size_t wsize = NFS_SERVER(inode)->wsize; + unsigned int curpage, pgbase; + + curpage = 0; + pgbase = dreq->user_addr & ~PAGE_MASK; + do { + struct nfs_write_data *data; + size_t bytes; + + bytes = wsize; + if (count < wsize) + bytes = count; + + BUG_ON(list_empty(list)); + data = list_entry(list->next, struct nfs_write_data, pages); + list_move_tail(&data->pages, &dreq->rewrite_list); + + data->inode = inode; + data->cred = ctx->cred; + data->args.fh = NFS_FH(inode); + data->args.context = ctx; + data->args.offset = pos; + data->args.pgbase = pgbase; + data->args.pages = &pages[curpage]; + data->args.count = bytes; + data->res.fattr = &data->fattr; + data->res.count = bytes; + data->res.verf = &data->verf; + + rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC, + &nfs_write_direct_ops, data); + NFS_PROTO(inode)->write_setup(data, sync); + + data->task.tk_priority = RPC_PRIORITY_NORMAL; + data->task.tk_cookie = (unsigned long) inode; + + lock_kernel(); + rpc_execute(&data->task); + unlock_kernel(); + + dfprintk(VFS, "NFS: %5u initiated direct write call (req %s/%Ld, %zu bytes @ offset %Lu)\n", + data->task.tk_pid, + inode->i_sb->s_id, + (long long)NFS_FILEID(inode), + bytes, + (unsigned long long)data->args.offset); + + pos += bytes; + pgbase += bytes; + curpage += pgbase >> PAGE_SHIFT; + pgbase &= ~PAGE_MASK; + + count -= bytes; + } while (count != 0); + BUG_ON(!list_empty(list)); +} + +static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos, struct page **pages, int nr_pages) +{ + ssize_t result; + sigset_t oldset; + struct inode *inode = iocb->ki_filp->f_mapping->host; + struct rpc_clnt *clnt = NFS_CLIENT(inode); + struct nfs_direct_req *dreq; + size_t wsize = NFS_SERVER(inode)->wsize; + int sync = 0; + + dreq = nfs_direct_write_alloc(count, wsize); + if (!dreq) + return -ENOMEM; + if (dreq->commit_data == NULL || count < wsize) + sync = FLUSH_STABLE; + + dreq->user_addr = user_addr; + dreq->user_count = count; + dreq->pos = pos; + dreq->pages = pages; + dreq->npages = nr_pages; + dreq->inode = inode; + dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data); + if (!is_sync_kiocb(iocb)) + dreq->iocb = iocb; + + nfs_add_stats(inode, NFSIOS_DIRECTWRITTENBYTES, count); + + nfs_begin_data_update(inode); + + rpc_clnt_sigmask(clnt, &oldset); + nfs_direct_write_schedule(dreq, sync); + result = nfs_direct_wait(dreq); + rpc_clnt_sigunmask(clnt, &oldset); + return result; } @@ -640,49 +748,40 @@ nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, * nfs_file_direct_read - file direct read operation for NFS files * @iocb: target I/O control block * @buf: user's buffer into which to read data - * count: number of bytes to read - * pos: byte offset in file where reading starts + * @count: number of bytes to read + * @pos: byte offset in file where reading starts * * We use this function for direct reads instead of calling * generic_file_aio_read() in order to avoid gfar's check to see if * the request starts before the end of the file. For that check * to work, we must generate a GETATTR before each direct read, and * even then there is a window between the GETATTR and the subsequent - * READ where the file size could change. So our preference is simply + * READ where the file size could change. Our preference is simply * to do all reads the application wants, and the server will take * care of managing the end of file boundary. - * + * * This function also eliminates unnecessarily updating the file's * atime locally, as the NFS server sets the file's atime, and this * client must read the updated atime from the server back into its * cache. */ -ssize_t -nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos) +ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos) { ssize_t retval = -EINVAL; - loff_t *ppos = &iocb->ki_pos; + int page_count; + struct page **pages; struct file *file = iocb->ki_filp; - struct nfs_open_context *ctx = - (struct nfs_open_context *) file->private_data; struct address_space *mapping = file->f_mapping; - struct inode *inode = mapping->host; - struct iovec iov = { - .iov_base = buf, - .iov_len = count, - }; dprintk("nfs: direct read(%s/%s, %lu@%Ld)\n", file->f_dentry->d_parent->d_name.name, file->f_dentry->d_name.name, (unsigned long) count, (long long) pos); - if (!is_sync_kiocb(iocb)) - goto out; if (count < 0) goto out; retval = -EFAULT; - if (!access_ok(VERIFY_WRITE, iov.iov_base, iov.iov_len)) + if (!access_ok(VERIFY_WRITE, buf, count)) goto out; retval = 0; if (!count) @@ -692,9 +791,16 @@ nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t if (retval) goto out; - retval = nfs_direct_read(inode, ctx, &iov, pos, 1); + retval = nfs_get_user_pages(READ, (unsigned long) buf, + count, &pages); + if (retval < 0) + goto out; + page_count = retval; + + retval = nfs_direct_read(iocb, (unsigned long) buf, count, pos, + pages, page_count); if (retval > 0) - *ppos = pos + retval; + iocb->ki_pos = pos + retval; out: return retval; @@ -704,8 +810,8 @@ out: * nfs_file_direct_write - file direct write operation for NFS files * @iocb: target I/O control block * @buf: user's buffer from which to write data - * count: number of bytes to write - * pos: byte offset in file where writing starts + * @count: number of bytes to write + * @pos: byte offset in file where writing starts * * We use this function for direct writes instead of calling * generic_file_aio_write() in order to avoid taking the inode @@ -725,28 +831,19 @@ out: * Note that O_APPEND is not supported for NFS direct writes, as there * is no atomic O_APPEND write facility in the NFS protocol. */ -ssize_t -nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos) +ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos) { ssize_t retval; + int page_count; + struct page **pages; struct file *file = iocb->ki_filp; - struct nfs_open_context *ctx = - (struct nfs_open_context *) file->private_data; struct address_space *mapping = file->f_mapping; - struct inode *inode = mapping->host; - struct iovec iov = { - .iov_base = (char __user *)buf, - }; dfprintk(VFS, "nfs: direct write(%s/%s, %lu@%Ld)\n", file->f_dentry->d_parent->d_name.name, file->f_dentry->d_name.name, (unsigned long) count, (long long) pos); - retval = -EINVAL; - if (!is_sync_kiocb(iocb)) - goto out; - retval = generic_write_checks(file, &pos, &count, 0); if (retval) goto out; @@ -757,19 +854,35 @@ nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, retval = 0; if (!count) goto out; - iov.iov_len = count, retval = -EFAULT; - if (!access_ok(VERIFY_READ, iov.iov_base, iov.iov_len)) + if (!access_ok(VERIFY_READ, buf, count)) goto out; retval = nfs_sync_mapping(mapping); if (retval) goto out; - retval = nfs_direct_write(inode, ctx, &iov, pos, 1); + retval = nfs_get_user_pages(WRITE, (unsigned long) buf, + count, &pages); + if (retval < 0) + goto out; + page_count = retval; + + retval = nfs_direct_write(iocb, (unsigned long) buf, count, + pos, pages, page_count); + + /* + * XXX: nfs_end_data_update() already ensures this file's + * cached data is subsequently invalidated. Do we really + * need to call invalidate_inode_pages2() again here? + * + * For aio writes, this invalidation will almost certainly + * occur before the writes complete. Kind of racey. + */ if (mapping->nrpages) invalidate_inode_pages2(mapping); + if (retval > 0) iocb->ki_pos = pos + retval; @@ -777,11 +890,16 @@ out: return retval; } +/** + * nfs_init_directcache - create a slab cache for nfs_direct_req structures + * + */ int nfs_init_directcache(void) { nfs_direct_cachep = kmem_cache_create("nfs_direct_cache", sizeof(struct nfs_direct_req), - 0, SLAB_RECLAIM_ACCOUNT, + 0, (SLAB_RECLAIM_ACCOUNT| + SLAB_MEM_SPREAD), NULL, NULL); if (nfs_direct_cachep == NULL) return -ENOMEM; @@ -789,6 +907,10 @@ int nfs_init_directcache(void) return 0; } +/** + * nfs_init_directcache - destroy the slab cache for nfs_direct_req structures + * + */ void nfs_destroy_directcache(void) { if (kmem_cache_destroy(nfs_direct_cachep)) diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 7a79fbe9f53..f1df2c8d925 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -32,6 +32,7 @@ #include <asm/system.h> #include "delegation.h" +#include "iostat.h" #define NFSDBG_FACILITY NFSDBG_FILE @@ -48,7 +49,7 @@ static int nfs_check_flags(int flags); static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl); static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl); -struct file_operations nfs_file_operations = { +const struct file_operations nfs_file_operations = { .llseek = nfs_file_llseek, .read = do_sync_read, .write = do_sync_write, @@ -102,18 +103,15 @@ static int nfs_check_flags(int flags) static int nfs_file_open(struct inode *inode, struct file *filp) { - struct nfs_server *server = NFS_SERVER(inode); - int (*open)(struct inode *, struct file *); int res; res = nfs_check_flags(filp->f_flags); if (res) return res; + nfs_inc_stats(inode, NFSIOS_VFSOPEN); lock_kernel(); - /* Do NFSv4 open() call */ - if ((open = server->rpc_ops->file_open) != NULL) - res = open(inode, filp); + res = NFS_SERVER(inode)->rpc_ops->file_open(inode, filp); unlock_kernel(); return res; } @@ -124,6 +122,7 @@ nfs_file_release(struct inode *inode, struct file *filp) /* Ensure that dirty pages are flushed out with the right creds */ if (filp->f_mode & FMODE_WRITE) filemap_fdatawrite(filp->f_mapping); + nfs_inc_stats(inode, NFSIOS_VFSRELEASE); return NFS_PROTO(inode)->file_release(inode, filp); } @@ -199,6 +198,7 @@ nfs_file_flush(struct file *file) if ((file->f_mode & FMODE_WRITE) == 0) return 0; + nfs_inc_stats(inode, NFSIOS_VFSFLUSH); lock_kernel(); /* Ensure that data+attribute caches are up to date after close() */ status = nfs_wb_all(inode); @@ -229,6 +229,7 @@ nfs_file_read(struct kiocb *iocb, char __user * buf, size_t count, loff_t pos) (unsigned long) count, (unsigned long) pos); result = nfs_revalidate_file(inode, iocb->ki_filp); + nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, count); if (!result) result = generic_file_aio_read(iocb, buf, count, pos); return result; @@ -282,6 +283,7 @@ nfs_fsync(struct file *file, struct dentry *dentry, int datasync) dfprintk(VFS, "nfs: fsync(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino); + nfs_inc_stats(inode, NFSIOS_VFSFSYNC); lock_kernel(); status = nfs_wb_all(inode); if (!status) { @@ -316,6 +318,16 @@ static int nfs_commit_write(struct file *file, struct page *page, unsigned offse return status; } +static void nfs_invalidate_page(struct page *page, unsigned long offset) +{ + /* FIXME: we really should cancel any unstarted writes on this page */ +} + +static int nfs_release_page(struct page *page, gfp_t gfp) +{ + return !nfs_wb_page(page->mapping->host, page); +} + struct address_space_operations nfs_file_aops = { .readpage = nfs_readpage, .readpages = nfs_readpages, @@ -324,6 +336,8 @@ struct address_space_operations nfs_file_aops = { .writepages = nfs_writepages, .prepare_write = nfs_prepare_write, .commit_write = nfs_commit_write, + .invalidatepage = nfs_invalidate_page, + .releasepage = nfs_release_page, #ifdef CONFIG_NFS_DIRECTIO .direct_IO = nfs_direct_IO, #endif @@ -365,6 +379,7 @@ nfs_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t if (!count) goto out; + nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count); result = generic_file_aio_write(iocb, buf, count, pos); out: return result; @@ -376,15 +391,17 @@ out_swapfile: static int do_getlk(struct file *filp, int cmd, struct file_lock *fl) { - struct file_lock *cfl; + struct file_lock cfl; struct inode *inode = filp->f_mapping->host; int status = 0; lock_kernel(); /* Try local locking first */ - cfl = posix_test_lock(filp, fl); - if (cfl != NULL) { - locks_copy_lock(fl, cfl); + if (posix_test_lock(filp, fl, &cfl)) { + fl->fl_start = cfl.fl_start; + fl->fl_end = cfl.fl_end; + fl->fl_type = cfl.fl_type; + fl->fl_pid = cfl.fl_pid; goto out; } @@ -425,10 +442,8 @@ static int do_vfs_lock(struct file *file, struct file_lock *fl) static int do_unlk(struct file *filp, int cmd, struct file_lock *fl) { struct inode *inode = filp->f_mapping->host; - sigset_t oldset; int status; - rpc_clnt_sigmask(NFS_CLIENT(inode), &oldset); /* * Flush all pending writes before doing anything * with locks.. @@ -446,17 +461,14 @@ static int do_unlk(struct file *filp, int cmd, struct file_lock *fl) else status = do_vfs_lock(filp, fl); unlock_kernel(); - rpc_clnt_sigunmask(NFS_CLIENT(inode), &oldset); return status; } static int do_setlk(struct file *filp, int cmd, struct file_lock *fl) { struct inode *inode = filp->f_mapping->host; - sigset_t oldset; int status; - rpc_clnt_sigmask(NFS_CLIENT(inode), &oldset); /* * Flush all pending writes before doing anything * with locks.. @@ -489,7 +501,6 @@ static int do_setlk(struct file *filp, int cmd, struct file_lock *fl) nfs_sync_mapping(filp->f_mapping); nfs_zap_caches(inode); out: - rpc_clnt_sigunmask(NFS_CLIENT(inode), &oldset); return status; } @@ -504,9 +515,7 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl) inode->i_sb->s_id, inode->i_ino, fl->fl_type, fl->fl_flags, (long long)fl->fl_start, (long long)fl->fl_end); - - if (!inode) - return -EINVAL; + nfs_inc_stats(inode, NFSIOS_VFSLOCK); /* No mandatory locks over NFS */ if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID && @@ -531,9 +540,6 @@ static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) inode->i_sb->s_id, inode->i_ino, fl->fl_type, fl->fl_flags); - if (!inode) - return -EINVAL; - /* * No BSD flocks over NFS allowed. * Note: we could try to fake a POSIX lock request here by diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 821edd30333..3fab5b0cfc5 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -35,6 +35,7 @@ */ #include <linux/module.h> +#include <linux/mutex.h> #include <linux/init.h> #include <linux/types.h> #include <linux/slab.h> @@ -74,8 +75,8 @@ struct idmap { struct dentry *idmap_dentry; wait_queue_head_t idmap_wq; struct idmap_msg idmap_im; - struct semaphore idmap_lock; /* Serializes upcalls */ - struct semaphore idmap_im_lock; /* Protects the hashtable */ + struct mutex idmap_lock; /* Serializes upcalls */ + struct mutex idmap_im_lock; /* Protects the hashtable */ struct idmap_hashtable idmap_user_hash; struct idmap_hashtable idmap_group_hash; }; @@ -101,11 +102,9 @@ nfs_idmap_new(struct nfs4_client *clp) if (clp->cl_idmap != NULL) return; - if ((idmap = kmalloc(sizeof(*idmap), GFP_KERNEL)) == NULL) + if ((idmap = kzalloc(sizeof(*idmap), GFP_KERNEL)) == NULL) return; - memset(idmap, 0, sizeof(*idmap)); - snprintf(idmap->idmap_path, sizeof(idmap->idmap_path), "%s/idmap", clp->cl_rpcclient->cl_pathname); @@ -116,8 +115,8 @@ nfs_idmap_new(struct nfs4_client *clp) return; } - init_MUTEX(&idmap->idmap_lock); - init_MUTEX(&idmap->idmap_im_lock); + mutex_init(&idmap->idmap_lock); + mutex_init(&idmap->idmap_im_lock); init_waitqueue_head(&idmap->idmap_wq); idmap->idmap_user_hash.h_type = IDMAP_TYPE_USER; idmap->idmap_group_hash.h_type = IDMAP_TYPE_GROUP; @@ -132,6 +131,8 @@ nfs_idmap_delete(struct nfs4_client *clp) if (!idmap) return; + dput(idmap->idmap_dentry); + idmap->idmap_dentry = NULL; rpc_unlink(idmap->idmap_path); clp->cl_idmap = NULL; kfree(idmap); @@ -232,8 +233,8 @@ nfs_idmap_id(struct idmap *idmap, struct idmap_hashtable *h, if (namelen >= IDMAP_NAMESZ) return -EINVAL; - down(&idmap->idmap_lock); - down(&idmap->idmap_im_lock); + mutex_lock(&idmap->idmap_lock); + mutex_lock(&idmap->idmap_im_lock); he = idmap_lookup_name(h, name, namelen); if (he != NULL) { @@ -259,11 +260,11 @@ nfs_idmap_id(struct idmap *idmap, struct idmap_hashtable *h, } set_current_state(TASK_UNINTERRUPTIBLE); - up(&idmap->idmap_im_lock); + mutex_unlock(&idmap->idmap_im_lock); schedule(); current->state = TASK_RUNNING; remove_wait_queue(&idmap->idmap_wq, &wq); - down(&idmap->idmap_im_lock); + mutex_lock(&idmap->idmap_im_lock); if (im->im_status & IDMAP_STATUS_SUCCESS) { *id = im->im_id; @@ -272,8 +273,8 @@ nfs_idmap_id(struct idmap *idmap, struct idmap_hashtable *h, out: memset(im, 0, sizeof(*im)); - up(&idmap->idmap_im_lock); - up(&idmap->idmap_lock); + mutex_unlock(&idmap->idmap_im_lock); + mutex_unlock(&idmap->idmap_lock); return (ret); } @@ -293,8 +294,8 @@ nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h, im = &idmap->idmap_im; - down(&idmap->idmap_lock); - down(&idmap->idmap_im_lock); + mutex_lock(&idmap->idmap_lock); + mutex_lock(&idmap->idmap_im_lock); he = idmap_lookup_id(h, id); if (he != 0) { @@ -320,11 +321,11 @@ nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h, } set_current_state(TASK_UNINTERRUPTIBLE); - up(&idmap->idmap_im_lock); + mutex_unlock(&idmap->idmap_im_lock); schedule(); current->state = TASK_RUNNING; remove_wait_queue(&idmap->idmap_wq, &wq); - down(&idmap->idmap_im_lock); + mutex_lock(&idmap->idmap_im_lock); if (im->im_status & IDMAP_STATUS_SUCCESS) { if ((len = strnlen(im->im_name, IDMAP_NAMESZ)) == 0) @@ -335,8 +336,8 @@ nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h, out: memset(im, 0, sizeof(*im)); - up(&idmap->idmap_im_lock); - up(&idmap->idmap_lock); + mutex_unlock(&idmap->idmap_im_lock); + mutex_unlock(&idmap->idmap_lock); return ret; } @@ -380,7 +381,7 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) if (copy_from_user(&im_in, src, mlen) != 0) return (-EFAULT); - down(&idmap->idmap_im_lock); + mutex_lock(&idmap->idmap_im_lock); ret = mlen; im->im_status = im_in.im_status; @@ -440,7 +441,7 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) idmap_update_entry(he, im_in.im_name, namelen_in, im_in.im_id); ret = mlen; out: - up(&idmap->idmap_im_lock); + mutex_unlock(&idmap->idmap_im_lock); return ret; } @@ -452,10 +453,10 @@ idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg) if (msg->errno >= 0) return; - down(&idmap->idmap_im_lock); + mutex_lock(&idmap->idmap_im_lock); im->im_status = IDMAP_STATUS_LOOKUPFAIL; wake_up(&idmap->idmap_wq); - up(&idmap->idmap_im_lock); + mutex_unlock(&idmap->idmap_im_lock); } /* diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index a77ee95b7ef..2f7656b911b 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -26,6 +26,7 @@ #include <linux/unistd.h> #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/stats.h> +#include <linux/sunrpc/metrics.h> #include <linux/nfs_fs.h> #include <linux/nfs_mount.h> #include <linux/nfs4_mount.h> @@ -42,6 +43,7 @@ #include "nfs4_fs.h" #include "callback.h" #include "delegation.h" +#include "iostat.h" #define NFSDBG_FACILITY NFSDBG_VFS #define NFS_PARANOIA 1 @@ -65,6 +67,7 @@ static void nfs_clear_inode(struct inode *); static void nfs_umount_begin(struct super_block *); static int nfs_statfs(struct super_block *, struct kstatfs *); static int nfs_show_options(struct seq_file *, struct vfsmount *); +static int nfs_show_stats(struct seq_file *, struct vfsmount *); static void nfs_zap_acl_cache(struct inode *); static struct rpc_program nfs_program; @@ -78,6 +81,7 @@ static struct super_operations nfs_sops = { .clear_inode = nfs_clear_inode, .umount_begin = nfs_umount_begin, .show_options = nfs_show_options, + .show_stats = nfs_show_stats, }; /* @@ -103,7 +107,7 @@ static struct rpc_version * nfs_version[] = { static struct rpc_program nfs_program = { .name = "nfs", .number = NFS_PROGRAM, - .nrvers = sizeof(nfs_version) / sizeof(nfs_version[0]), + .nrvers = ARRAY_SIZE(nfs_version), .version = nfs_version, .stats = &nfs_rpcstat, .pipe_dir_name = "/nfs", @@ -118,7 +122,7 @@ static struct rpc_version * nfsacl_version[] = { struct rpc_program nfsacl_program = { .name = "nfsacl", .number = NFS_ACL_PROGRAM, - .nrvers = sizeof(nfsacl_version) / sizeof(nfsacl_version[0]), + .nrvers = ARRAY_SIZE(nfsacl_version), .version = nfsacl_version, .stats = &nfsacl_rpcstat, }; @@ -133,7 +137,7 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr) static int nfs_write_inode(struct inode *inode, int sync) { - int flags = sync ? FLUSH_WAIT : 0; + int flags = sync ? FLUSH_SYNC : 0; int ret; ret = nfs_commit_inode(inode, flags); @@ -237,7 +241,6 @@ static struct inode * nfs_get_root(struct super_block *sb, struct nfs_fh *rootfh, struct nfs_fsinfo *fsinfo) { struct nfs_server *server = NFS_SB(sb); - struct inode *rooti; int error; error = server->rpc_ops->getroot(server, rootfh, fsinfo); @@ -246,10 +249,7 @@ nfs_get_root(struct super_block *sb, struct nfs_fh *rootfh, struct nfs_fsinfo *f return ERR_PTR(error); } - rooti = nfs_fhget(sb, rootfh, fsinfo->fattr); - if (!rooti) - return ERR_PTR(-ENOMEM); - return rooti; + return nfs_fhget(sb, rootfh, fsinfo->fattr); } /* @@ -277,6 +277,10 @@ nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor) sb->s_magic = NFS_SUPER_MAGIC; + server->io_stats = nfs_alloc_iostats(); + if (server->io_stats == NULL) + return -ENOMEM; + root_inode = nfs_get_root(sb, &server->fh, &fsinfo); /* Did getting the root inode fail? */ if (IS_ERR(root_inode)) { @@ -290,6 +294,9 @@ nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor) } sb->s_root->d_op = server->rpc_ops->dentry_ops; + /* mount time stamp, in seconds */ + server->mount_time = jiffies; + /* Get some general file system info */ if (server->namelen == 0 && server->rpc_ops->pathconf(server, &server->fh, &pathinfo) >= 0) @@ -396,6 +403,9 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data) nfs_init_timeout_values(&timeparms, proto, data->timeo, data->retrans); + server->retrans_timeo = timeparms.to_initval; + server->retrans_count = timeparms.to_retries; + /* create transport and client */ xprt = xprt_create_proto(proto, &server->addr, &timeparms); if (IS_ERR(xprt)) { @@ -579,7 +589,7 @@ nfs_statfs(struct super_block *sb, struct kstatfs *buf) } -static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) +static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, int showdefaults) { static struct proc_nfs_info { int flag; @@ -588,28 +598,26 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) } nfs_info[] = { { NFS_MOUNT_SOFT, ",soft", ",hard" }, { NFS_MOUNT_INTR, ",intr", "" }, - { NFS_MOUNT_POSIX, ",posix", "" }, { NFS_MOUNT_NOCTO, ",nocto", "" }, { NFS_MOUNT_NOAC, ",noac", "" }, - { NFS_MOUNT_NONLM, ",nolock", ",lock" }, + { NFS_MOUNT_NONLM, ",nolock", "" }, { NFS_MOUNT_NOACL, ",noacl", "" }, { 0, NULL, NULL } }; struct proc_nfs_info *nfs_infop; - struct nfs_server *nfss = NFS_SB(mnt->mnt_sb); char buf[12]; char *proto; - seq_printf(m, ",v%d", nfss->rpc_ops->version); + seq_printf(m, ",vers=%d", nfss->rpc_ops->version); seq_printf(m, ",rsize=%d", nfss->rsize); seq_printf(m, ",wsize=%d", nfss->wsize); - if (nfss->acregmin != 3*HZ) + if (nfss->acregmin != 3*HZ || showdefaults) seq_printf(m, ",acregmin=%d", nfss->acregmin/HZ); - if (nfss->acregmax != 60*HZ) + if (nfss->acregmax != 60*HZ || showdefaults) seq_printf(m, ",acregmax=%d", nfss->acregmax/HZ); - if (nfss->acdirmin != 30*HZ) + if (nfss->acdirmin != 30*HZ || showdefaults) seq_printf(m, ",acdirmin=%d", nfss->acdirmin/HZ); - if (nfss->acdirmax != 60*HZ) + if (nfss->acdirmax != 60*HZ || showdefaults) seq_printf(m, ",acdirmax=%d", nfss->acdirmax/HZ); for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) { if (nfss->flags & nfs_infop->flag) @@ -629,8 +637,96 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) proto = buf; } seq_printf(m, ",proto=%s", proto); + seq_printf(m, ",timeo=%lu", 10U * nfss->retrans_timeo / HZ); + seq_printf(m, ",retrans=%u", nfss->retrans_count); +} + +static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) +{ + struct nfs_server *nfss = NFS_SB(mnt->mnt_sb); + + nfs_show_mount_options(m, nfss, 0); + seq_puts(m, ",addr="); seq_escape(m, nfss->hostname, " \t\n\\"); + + return 0; +} + +static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt) +{ + int i, cpu; + struct nfs_server *nfss = NFS_SB(mnt->mnt_sb); + struct rpc_auth *auth = nfss->client->cl_auth; + struct nfs_iostats totals = { }; + + seq_printf(m, "statvers=%s", NFS_IOSTAT_VERS); + + /* + * Display all mount option settings + */ + seq_printf(m, "\n\topts:\t"); + seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? "ro" : "rw"); + seq_puts(m, mnt->mnt_sb->s_flags & MS_SYNCHRONOUS ? ",sync" : ""); + seq_puts(m, mnt->mnt_sb->s_flags & MS_NOATIME ? ",noatime" : ""); + seq_puts(m, mnt->mnt_sb->s_flags & MS_NODIRATIME ? ",nodiratime" : ""); + nfs_show_mount_options(m, nfss, 1); + + seq_printf(m, "\n\tage:\t%lu", (jiffies - nfss->mount_time) / HZ); + + seq_printf(m, "\n\tcaps:\t"); + seq_printf(m, "caps=0x%x", nfss->caps); + seq_printf(m, ",wtmult=%d", nfss->wtmult); + seq_printf(m, ",dtsize=%d", nfss->dtsize); + seq_printf(m, ",bsize=%d", nfss->bsize); + seq_printf(m, ",namelen=%d", nfss->namelen); + +#ifdef CONFIG_NFS_V4 + if (nfss->rpc_ops->version == 4) { + seq_printf(m, "\n\tnfsv4:\t"); + seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]); + seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]); + seq_printf(m, ",acl=0x%x", nfss->acl_bitmask); + } +#endif + + /* + * Display security flavor in effect for this mount + */ + seq_printf(m, "\n\tsec:\tflavor=%d", auth->au_ops->au_flavor); + if (auth->au_flavor) + seq_printf(m, ",pseudoflavor=%d", auth->au_flavor); + + /* + * Display superblock I/O counters + */ + for (cpu = 0; cpu < NR_CPUS; cpu++) { + struct nfs_iostats *stats; + + if (!cpu_possible(cpu)) + continue; + + preempt_disable(); + stats = per_cpu_ptr(nfss->io_stats, cpu); + + for (i = 0; i < __NFSIOS_COUNTSMAX; i++) + totals.events[i] += stats->events[i]; + for (i = 0; i < __NFSIOS_BYTESMAX; i++) + totals.bytes[i] += stats->bytes[i]; + + preempt_enable(); + } + + seq_printf(m, "\n\tevents:\t"); + for (i = 0; i < __NFSIOS_COUNTSMAX; i++) + seq_printf(m, "%lu ", totals.events[i]); + seq_printf(m, "\n\tbytes:\t"); + for (i = 0; i < __NFSIOS_BYTESMAX; i++) + seq_printf(m, "%Lu ", totals.bytes[i]); + seq_printf(m, "\n"); + + rpc_print_iostats(m, nfss->client); + return 0; } @@ -660,6 +756,8 @@ static void nfs_zap_caches_locked(struct inode *inode) struct nfs_inode *nfsi = NFS_I(inode); int mode = inode->i_mode; + nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); + NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode); NFS_ATTRTIMEO_UPDATE(inode) = jiffies; @@ -751,7 +849,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) .fh = fh, .fattr = fattr }; - struct inode *inode = NULL; + struct inode *inode = ERR_PTR(-ENOENT); unsigned long hash; if ((fattr->valid & NFS_ATTR_FATTR) == 0) @@ -764,8 +862,11 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) hash = nfs_fattr_to_ino_t(fattr); - if (!(inode = iget5_locked(sb, hash, nfs_find_actor, nfs_init_locked, &desc))) + inode = iget5_locked(sb, hash, nfs_find_actor, nfs_init_locked, &desc); + if (inode == NULL) { + inode = ERR_PTR(-ENOMEM); goto out_no_inode; + } if (inode->i_state & I_NEW) { struct nfs_inode *nfsi = NFS_I(inode); @@ -834,7 +935,7 @@ out: return inode; out_no_inode: - printk("nfs_fhget: iget failed\n"); + dprintk("nfs_fhget: iget failed with error %ld\n", PTR_ERR(inode)); goto out; } @@ -847,6 +948,8 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) struct nfs_fattr fattr; int error; + nfs_inc_stats(inode, NFSIOS_VFSSETATTR); + if (attr->ia_valid & ATTR_SIZE) { if (!S_ISREG(inode->i_mode) || attr->ia_size == i_size_read(inode)) attr->ia_valid &= ~ATTR_SIZE; @@ -859,11 +962,9 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) lock_kernel(); nfs_begin_data_update(inode); - /* Write all dirty data if we're changing file permissions or size */ - if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE)) != 0) { - filemap_write_and_wait(inode->i_mapping); - nfs_wb_all(inode); - } + /* Write all dirty data */ + filemap_write_and_wait(inode->i_mapping); + nfs_wb_all(inode); /* * Return any delegations if we're going to change ACLs */ @@ -902,6 +1003,7 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) spin_unlock(&inode->i_lock); } if ((attr->ia_valid & ATTR_SIZE) != 0) { + nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC); inode->i_size = attr->ia_size; vmtruncate(inode, attr->ia_size); } @@ -949,7 +1051,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) int err; /* Flush out writes to the server in order to update c/mtime */ - nfs_sync_inode(inode, 0, 0, FLUSH_WAIT|FLUSH_NOCOMMIT); + nfs_sync_inode_wait(inode, 0, 0, FLUSH_NOCOMMIT); /* * We may force a getattr if the user cares about atime. @@ -973,7 +1075,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) return err; } -struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rpc_cred *cred) +static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, struct dentry *dentry, struct rpc_cred *cred) { struct nfs_open_context *ctx; @@ -981,6 +1083,7 @@ struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rp if (ctx != NULL) { atomic_set(&ctx->count, 1); ctx->dentry = dget(dentry); + ctx->vfsmnt = mntget(mnt); ctx->cred = get_rpccred(cred); ctx->state = NULL; ctx->lockowner = current->files; @@ -1011,6 +1114,7 @@ void put_nfs_open_context(struct nfs_open_context *ctx) if (ctx->cred != NULL) put_rpccred(ctx->cred); dput(ctx->dentry); + mntput(ctx->vfsmnt); kfree(ctx); } } @@ -1019,7 +1123,7 @@ void put_nfs_open_context(struct nfs_open_context *ctx) * Ensure that mmap has a recent RPC credential for use when writing out * shared pages */ -void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx) +static void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx) { struct inode *inode = filp->f_dentry->d_inode; struct nfs_inode *nfsi = NFS_I(inode); @@ -1051,7 +1155,7 @@ struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_c return ctx; } -void nfs_file_clear_open_context(struct file *filp) +static void nfs_file_clear_open_context(struct file *filp) { struct inode *inode = filp->f_dentry->d_inode; struct nfs_open_context *ctx = (struct nfs_open_context *)filp->private_data; @@ -1076,7 +1180,7 @@ int nfs_open(struct inode *inode, struct file *filp) cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0); if (IS_ERR(cred)) return PTR_ERR(cred); - ctx = alloc_nfs_open_context(filp->f_dentry, cred); + ctx = alloc_nfs_open_context(filp->f_vfsmnt, filp->f_dentry, cred); put_rpccred(cred); if (ctx == NULL) return -ENOMEM; @@ -1185,6 +1289,7 @@ int nfs_attribute_timeout(struct inode *inode) */ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) { + nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE); if (!(NFS_I(inode)->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA)) && !nfs_attribute_timeout(inode)) return NFS_STALE(inode) ? -ESTALE : 0; @@ -1201,6 +1306,7 @@ void nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) struct nfs_inode *nfsi = NFS_I(inode); if (nfsi->cache_validity & NFS_INO_INVALID_DATA) { + nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE); if (S_ISREG(inode->i_mode)) nfs_sync_mapping(mapping); invalidate_inode_pages2(mapping); @@ -1299,39 +1405,37 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat if ((fattr->valid & NFS_ATTR_FATTR) == 0) return 0; + /* Has the inode gone and changed behind our back? */ + if (nfsi->fileid != fattr->fileid + || (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) { + return -EIO; + } + /* Are we in the process of updating data on the server? */ data_unstable = nfs_caches_unstable(inode); /* Do atomic weak cache consistency updates */ nfs_wcc_update_inode(inode, fattr); - if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 && - nfsi->change_attr != fattr->change_attr) { + if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0) { + if (nfsi->change_attr == fattr->change_attr) + goto out; nfsi->cache_validity |= NFS_INO_INVALID_ATTR; if (!data_unstable) nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE; } - /* Has the inode gone and changed behind our back? */ - if (nfsi->fileid != fattr->fileid - || (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) { - return -EIO; - } - - cur_size = i_size_read(inode); - new_isize = nfs_size_to_loff_t(fattr->size); - /* Verify a few of the more important attributes */ if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) { nfsi->cache_validity |= NFS_INO_INVALID_ATTR; if (!data_unstable) nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE; } - if (cur_size != new_isize) { - nfsi->cache_validity |= NFS_INO_INVALID_ATTR; - if (nfsi->npages == 0) - nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE; - } + + cur_size = i_size_read(inode); + new_isize = nfs_size_to_loff_t(fattr->size); + if (cur_size != new_isize && nfsi->npages == 0) + nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; /* Have any file permissions changed? */ if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) @@ -1343,6 +1447,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat if (inode->i_nlink != fattr->nlink) nfsi->cache_validity |= NFS_INO_INVALID_ATTR; +out: if (!timespec_equal(&inode->i_atime, &fattr->atime)) nfsi->cache_validity |= NFS_INO_INVALID_ATIME; @@ -1481,15 +1586,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) nfsi->cache_change_attribute = jiffies; } - if ((fattr->valid & NFS_ATTR_FATTR_V4) - && nfsi->change_attr != fattr->change_attr) { - dprintk("NFS: change_attr change on server for file %s/%ld\n", - inode->i_sb->s_id, inode->i_ino); - nfsi->change_attr = fattr->change_attr; - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; - nfsi->cache_change_attribute = jiffies; - } - /* If ctime has changed we should definitely clear access+acl caches */ if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) { invalid |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; @@ -1519,8 +1615,20 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) inode->i_blksize = fattr->du.nfs2.blocksize; } + if ((fattr->valid & NFS_ATTR_FATTR_V4)) { + if (nfsi->change_attr != fattr->change_attr) { + dprintk("NFS: change_attr change on server for file %s/%ld\n", + inode->i_sb->s_id, inode->i_ino); + nfsi->change_attr = fattr->change_attr; + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; + nfsi->cache_change_attribute = jiffies; + } else + invalid &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA); + } + /* Update attrtimeo value if we're out of the unstable period */ if (invalid & NFS_INO_INVALID_ATTR) { + nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); nfsi->attrtimeo_timestamp = jiffies; } else if (time_after(jiffies, nfsi->attrtimeo_timestamp+nfsi->attrtimeo)) { @@ -1637,10 +1745,9 @@ static struct super_block *nfs_get_sb(struct file_system_type *fs_type, #endif /* CONFIG_NFS_V3 */ s = ERR_PTR(-ENOMEM); - server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL); + server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL); if (!server) goto out_err; - memset(server, 0, sizeof(struct nfs_server)); /* Zero out the NFS state stuff */ init_nfsv4_state(server); server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL); @@ -1679,7 +1786,7 @@ static struct super_block *nfs_get_sb(struct file_system_type *fs_type, s->s_flags = flags; - error = nfs_fill_super(s, data, flags & MS_VERBOSE ? 1 : 0); + error = nfs_fill_super(s, data, flags & MS_SILENT ? 1 : 0); if (error) { up_write(&s->s_umount); deactivate_super(s); @@ -1712,6 +1819,7 @@ static void nfs_kill_super(struct super_block *s) rpciod_down(); /* release rpciod */ + nfs_free_iostats(server->io_stats); kfree(server->hostname); kfree(server); } @@ -1738,6 +1846,7 @@ static struct super_operations nfs4_sops = { .clear_inode = nfs4_clear_inode, .umount_begin = nfs_umount_begin, .show_options = nfs_show_options, + .show_stats = nfs_show_stats, }; /* @@ -1800,6 +1909,9 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, nfs_init_timeout_values(&timeparms, data->proto, data->timeo, data->retrans); + server->retrans_timeo = timeparms.to_initval; + server->retrans_count = timeparms.to_retries; + clp = nfs4_get_client(&server->addr.sin_addr); if (!clp) { dprintk("%s: failed to create NFS4 client.\n", __FUNCTION__); @@ -1941,10 +2053,9 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type, return ERR_PTR(-EINVAL); } - server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL); + server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL); if (!server) return ERR_PTR(-ENOMEM); - memset(server, 0, sizeof(struct nfs_server)); /* Zero out the NFS state stuff */ init_nfsv4_state(server); server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL); @@ -1996,7 +2107,7 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type, s->s_flags = flags; - error = nfs4_fill_super(s, data, flags & MS_VERBOSE ? 1 : 0); + error = nfs4_fill_super(s, data, flags & MS_SILENT ? 1 : 0); if (error) { up_write(&s->s_umount); deactivate_super(s); @@ -2024,10 +2135,12 @@ static void nfs4_kill_super(struct super_block *sb) if (server->client != NULL && !IS_ERR(server->client)) rpc_shutdown_client(server->client); - rpciod_down(); /* release rpciod */ destroy_nfsv4_state(server); + rpciod_down(); + + nfs_free_iostats(server->io_stats); kfree(server->hostname); kfree(server); } @@ -2163,7 +2276,8 @@ static int nfs_init_inodecache(void) { nfs_inode_cachep = kmem_cache_create("nfs_inode_cache", sizeof(struct nfs_inode), - 0, SLAB_RECLAIM_ACCOUNT, + 0, (SLAB_RECLAIM_ACCOUNT| + SLAB_MEM_SPREAD), init_once, NULL); if (nfs_inode_cachep == NULL) return -ENOMEM; diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h new file mode 100644 index 00000000000..6350ecbde58 --- /dev/null +++ b/fs/nfs/iostat.h @@ -0,0 +1,164 @@ +/* + * linux/fs/nfs/iostat.h + * + * Declarations for NFS client per-mount statistics + * + * Copyright (C) 2005, 2006 Chuck Lever <cel@netapp.com> + * + * NFS client per-mount statistics provide information about the health of + * the NFS client and the health of each NFS mount point. Generally these + * are not for detailed problem diagnosis, but simply to indicate that there + * is a problem. + * + * These counters are not meant to be human-readable, but are meant to be + * integrated into system monitoring tools such as "sar" and "iostat". As + * such, the counters are sampled by the tools over time, and are never + * zeroed after a file system is mounted. Moving averages can be computed + * by the tools by taking the difference between two instantaneous samples + * and dividing that by the time between the samples. + */ + +#ifndef _NFS_IOSTAT +#define _NFS_IOSTAT + +#define NFS_IOSTAT_VERS "1.0" + +/* + * NFS byte counters + * + * 1. SERVER - the number of payload bytes read from or written to the + * server by the NFS client via an NFS READ or WRITE request. + * + * 2. NORMAL - the number of bytes read or written by applications via + * the read(2) and write(2) system call interfaces. + * + * 3. DIRECT - the number of bytes read or written from files opened + * with the O_DIRECT flag. + * + * These counters give a view of the data throughput into and out of the NFS + * client. Comparing the number of bytes requested by an application with the + * number of bytes the client requests from the server can provide an + * indication of client efficiency (per-op, cache hits, etc). + * + * These counters can also help characterize which access methods are in + * use. DIRECT by itself shows whether there is any O_DIRECT traffic. + * NORMAL + DIRECT shows how much data is going through the system call + * interface. A large amount of SERVER traffic without much NORMAL or + * DIRECT traffic shows that applications are using mapped files. + * + * NFS page counters + * + * These count the number of pages read or written via nfs_readpage(), + * nfs_readpages(), or their write equivalents. + */ +enum nfs_stat_bytecounters { + NFSIOS_NORMALREADBYTES = 0, + NFSIOS_NORMALWRITTENBYTES, + NFSIOS_DIRECTREADBYTES, + NFSIOS_DIRECTWRITTENBYTES, + NFSIOS_SERVERREADBYTES, + NFSIOS_SERVERWRITTENBYTES, + NFSIOS_READPAGES, + NFSIOS_WRITEPAGES, + __NFSIOS_BYTESMAX, +}; + +/* + * NFS event counters + * + * These counters provide a low-overhead way of monitoring client activity + * without enabling NFS trace debugging. The counters show the rate at + * which VFS requests are made, and how often the client invalidates its + * data and attribute caches. This allows system administrators to monitor + * such things as how close-to-open is working, and answer questions such + * as "why are there so many GETATTR requests on the wire?" + * + * They also count anamolous events such as short reads and writes, silly + * renames due to close-after-delete, and operations that change the size + * of a file (such operations can often be the source of data corruption + * if applications aren't using file locking properly). + */ +enum nfs_stat_eventcounters { + NFSIOS_INODEREVALIDATE = 0, + NFSIOS_DENTRYREVALIDATE, + NFSIOS_DATAINVALIDATE, + NFSIOS_ATTRINVALIDATE, + NFSIOS_VFSOPEN, + NFSIOS_VFSLOOKUP, + NFSIOS_VFSACCESS, + NFSIOS_VFSUPDATEPAGE, + NFSIOS_VFSREADPAGE, + NFSIOS_VFSREADPAGES, + NFSIOS_VFSWRITEPAGE, + NFSIOS_VFSWRITEPAGES, + NFSIOS_VFSGETDENTS, + NFSIOS_VFSSETATTR, + NFSIOS_VFSFLUSH, + NFSIOS_VFSFSYNC, + NFSIOS_VFSLOCK, + NFSIOS_VFSRELEASE, + NFSIOS_CONGESTIONWAIT, + NFSIOS_SETATTRTRUNC, + NFSIOS_EXTENDWRITE, + NFSIOS_SILLYRENAME, + NFSIOS_SHORTREAD, + NFSIOS_SHORTWRITE, + NFSIOS_DELAY, + __NFSIOS_COUNTSMAX, +}; + +#ifdef __KERNEL__ + +#include <linux/percpu.h> +#include <linux/cache.h> + +struct nfs_iostats { + unsigned long long bytes[__NFSIOS_BYTESMAX]; + unsigned long events[__NFSIOS_COUNTSMAX]; +} ____cacheline_aligned; + +static inline void nfs_inc_server_stats(struct nfs_server *server, enum nfs_stat_eventcounters stat) +{ + struct nfs_iostats *iostats; + int cpu; + + cpu = get_cpu(); + iostats = per_cpu_ptr(server->io_stats, cpu); + iostats->events[stat] ++; + put_cpu_no_resched(); +} + +static inline void nfs_inc_stats(struct inode *inode, enum nfs_stat_eventcounters stat) +{ + nfs_inc_server_stats(NFS_SERVER(inode), stat); +} + +static inline void nfs_add_server_stats(struct nfs_server *server, enum nfs_stat_bytecounters stat, unsigned long addend) +{ + struct nfs_iostats *iostats; + int cpu; + + cpu = get_cpu(); + iostats = per_cpu_ptr(server->io_stats, cpu); + iostats->bytes[stat] += addend; + put_cpu_no_resched(); +} + +static inline void nfs_add_stats(struct inode *inode, enum nfs_stat_bytecounters stat, unsigned long addend) +{ + nfs_add_server_stats(NFS_SERVER(inode), stat, addend); +} + +static inline struct nfs_iostats *nfs_alloc_iostats(void) +{ + return alloc_percpu(struct nfs_iostats); +} + +static inline void nfs_free_iostats(struct nfs_iostats *stats) +{ + if (stats != NULL) + free_percpu(stats); +} + +#endif +#endif diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index db99b8f678f..445abb4d421 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -49,9 +49,12 @@ nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh, struct mnt_fhstatus result = { .fh = fh }; + struct rpc_message msg = { + .rpc_argp = path, + .rpc_resp = &result, + }; char hostname[32]; int status; - int call; dprintk("NFS: nfs_mount(%08x:%s)\n", (unsigned)ntohl(addr->sin_addr.s_addr), path); @@ -61,8 +64,12 @@ nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh, if (IS_ERR(mnt_clnt)) return PTR_ERR(mnt_clnt); - call = (version == NFS_MNT3_VERSION) ? MOUNTPROC3_MNT : MNTPROC_MNT; - status = rpc_call(mnt_clnt, call, path, &result, 0); + if (version == NFS_MNT3_VERSION) + msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC3_MNT]; + else + msg.rpc_proc = &mnt_clnt->cl_procinfo[MNTPROC_MNT]; + + status = rpc_call_sync(mnt_clnt, &msg, 0); return status < 0? status : (result.status? -EACCES : 0); } @@ -137,6 +144,8 @@ static struct rpc_procinfo mnt_procedures[] = { .p_encode = (kxdrproc_t) xdr_encode_dirpath, .p_decode = (kxdrproc_t) xdr_decode_fhstatus, .p_bufsiz = MNT_dirpath_sz << 2, + .p_statidx = MNTPROC_MNT, + .p_name = "MOUNT", }, }; @@ -146,6 +155,8 @@ static struct rpc_procinfo mnt3_procedures[] = { .p_encode = (kxdrproc_t) xdr_encode_dirpath, .p_decode = (kxdrproc_t) xdr_decode_fhstatus3, .p_bufsiz = MNT_dirpath_sz << 2, + .p_statidx = MOUNTPROC3_MNT, + .p_name = "MOUNT", }, }; @@ -174,7 +185,7 @@ static struct rpc_stat mnt_stats; static struct rpc_program mnt_program = { .name = "mount", .number = NFS_MNT_PROGRAM, - .nrvers = sizeof(mnt_version)/sizeof(mnt_version[0]), + .nrvers = ARRAY_SIZE(mnt_version), .version = mnt_version, .stats = &mnt_stats, }; diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index 7fc0560c89c..f0015fa876e 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -682,7 +682,9 @@ nfs_stat_to_errno(int stat) .p_encode = (kxdrproc_t) nfs_xdr_##argtype, \ .p_decode = (kxdrproc_t) nfs_xdr_##restype, \ .p_bufsiz = MAX(NFS_##argtype##_sz,NFS_##restype##_sz) << 2, \ - .p_timer = timer \ + .p_timer = timer, \ + .p_statidx = NFSPROC_##proc, \ + .p_name = #proc, \ } struct rpc_procinfo nfs_procedures[] = { PROC(GETATTR, fhandle, attrstat, 1), @@ -704,6 +706,6 @@ struct rpc_procinfo nfs_procedures[] = { struct rpc_version nfs_version2 = { .number = 2, - .nrprocs = sizeof(nfs_procedures)/sizeof(nfs_procedures[0]), + .nrprocs = ARRAY_SIZE(nfs_procedures), .procs = nfs_procedures }; diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index 6a5bbc0ae94..33287879bd2 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -190,6 +190,10 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type) struct nfs3_getaclres res = { .fattr = &fattr, }; + struct rpc_message msg = { + .rpc_argp = &args, + .rpc_resp = &res, + }; struct posix_acl *acl; int status, count; @@ -218,8 +222,8 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type) return NULL; dprintk("NFS call getacl\n"); - status = rpc_call(server->client_acl, ACLPROC3_GETACL, - &args, &res, 0); + msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_GETACL]; + status = rpc_call_sync(server->client_acl, &msg, 0); dprintk("NFS reply getacl: %d\n", status); /* pages may have been allocated at the xdr layer. */ @@ -286,6 +290,10 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, .acl_access = acl, .pages = pages, }; + struct rpc_message msg = { + .rpc_argp = &args, + .rpc_resp = &fattr, + }; int status, count; status = -EOPNOTSUPP; @@ -306,8 +314,8 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, dprintk("NFS call setacl\n"); nfs_begin_data_update(inode); - status = rpc_call(server->client_acl, ACLPROC3_SETACL, - &args, &fattr, 0); + msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_SETACL]; + status = rpc_call_sync(server->client_acl, &msg, 0); spin_lock(&inode->i_lock); NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS; spin_unlock(&inode->i_lock); diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index ed67567f055..cf186f0d2b3 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -19,6 +19,8 @@ #include <linux/smp_lock.h> #include <linux/nfs_mount.h> +#include "iostat.h" + #define NFSDBG_FACILITY NFSDBG_PROC extern struct rpc_procinfo nfs3_procedures[]; @@ -41,27 +43,14 @@ nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) return res; } -static inline int -nfs3_rpc_call_wrapper(struct rpc_clnt *clnt, u32 proc, void *argp, void *resp, int flags) -{ - struct rpc_message msg = { - .rpc_proc = &clnt->cl_procinfo[proc], - .rpc_argp = argp, - .rpc_resp = resp, - }; - return nfs3_rpc_wrapper(clnt, &msg, flags); -} - -#define rpc_call(clnt, proc, argp, resp, flags) \ - nfs3_rpc_call_wrapper(clnt, proc, argp, resp, flags) -#define rpc_call_sync(clnt, msg, flags) \ - nfs3_rpc_wrapper(clnt, msg, flags) +#define rpc_call_sync(clnt, msg, flags) nfs3_rpc_wrapper(clnt, msg, flags) static int -nfs3_async_handle_jukebox(struct rpc_task *task) +nfs3_async_handle_jukebox(struct rpc_task *task, struct inode *inode) { if (task->tk_status != -EJUKEBOX) return 0; + nfs_inc_stats(inode, NFSIOS_DELAY); task->tk_status = 0; rpc_restart_call(task); rpc_delay(task, NFS_JUKEBOX_RETRY_TIME); @@ -72,14 +61,21 @@ static int do_proc_get_root(struct rpc_clnt *client, struct nfs_fh *fhandle, struct nfs_fsinfo *info) { + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_FSINFO], + .rpc_argp = fhandle, + .rpc_resp = info, + }; int status; dprintk("%s: call fsinfo\n", __FUNCTION__); nfs_fattr_init(info->fattr); - status = rpc_call(client, NFS3PROC_FSINFO, fhandle, info, 0); + status = rpc_call_sync(client, &msg, 0); dprintk("%s: reply fsinfo: %d\n", __FUNCTION__, status); if (!(info->fattr->valid & NFS_ATTR_FATTR)) { - status = rpc_call(client, NFS3PROC_GETATTR, fhandle, info->fattr, 0); + msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR]; + msg.rpc_resp = info->fattr; + status = rpc_call_sync(client, &msg, 0); dprintk("%s: reply getattr: %d\n", __FUNCTION__, status); } return status; @@ -107,12 +103,16 @@ static int nfs3_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr) { + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR], + .rpc_argp = fhandle, + .rpc_resp = fattr, + }; int status; dprintk("NFS call getattr\n"); nfs_fattr_init(fattr); - status = rpc_call(server->client, NFS3PROC_GETATTR, - fhandle, fattr, 0); + status = rpc_call_sync(server->client, &msg, 0); dprintk("NFS reply getattr: %d\n", status); return status; } @@ -126,11 +126,16 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, .fh = NFS_FH(inode), .sattr = sattr, }; + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_SETATTR], + .rpc_argp = &arg, + .rpc_resp = fattr, + }; int status; dprintk("NFS call setattr\n"); nfs_fattr_init(fattr); - status = rpc_call(NFS_CLIENT(inode), NFS3PROC_SETATTR, &arg, fattr, 0); + status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); if (status == 0) nfs_setattr_update_inode(inode, sattr); dprintk("NFS reply setattr: %d\n", status); @@ -152,15 +157,23 @@ nfs3_proc_lookup(struct inode *dir, struct qstr *name, .fh = fhandle, .fattr = fattr }; + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_LOOKUP], + .rpc_argp = &arg, + .rpc_resp = &res, + }; int status; dprintk("NFS call lookup %s\n", name->name); nfs_fattr_init(&dir_attr); nfs_fattr_init(fattr); - status = rpc_call(NFS_CLIENT(dir), NFS3PROC_LOOKUP, &arg, &res, 0); - if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR)) - status = rpc_call(NFS_CLIENT(dir), NFS3PROC_GETATTR, - fhandle, fattr, 0); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR)) { + msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR]; + msg.rpc_argp = fhandle; + msg.rpc_resp = fattr; + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + } dprintk("NFS reply lookup: %d\n", status); if (status >= 0) status = nfs_refresh_inode(dir, &dir_attr); @@ -180,7 +193,7 @@ static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry) .rpc_proc = &nfs3_procedures[NFS3PROC_ACCESS], .rpc_argp = &arg, .rpc_resp = &res, - .rpc_cred = entry->cred + .rpc_cred = entry->cred, }; int mode = entry->mask; int status; @@ -226,12 +239,16 @@ static int nfs3_proc_readlink(struct inode *inode, struct page *page, .pglen = pglen, .pages = &page }; + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_READLINK], + .rpc_argp = &args, + .rpc_resp = &fattr, + }; int status; dprintk("NFS call readlink\n"); nfs_fattr_init(&fattr); - status = rpc_call(NFS_CLIENT(inode), NFS3PROC_READLINK, - &args, &fattr, 0); + status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); nfs_refresh_inode(inode, &fattr); dprintk("NFS reply readlink: %d\n", status); return status; @@ -327,6 +344,11 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, .fh = &fhandle, .fattr = &fattr }; + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_CREATE], + .rpc_argp = &arg, + .rpc_resp = &res, + }; mode_t mode = sattr->ia_mode; int status; @@ -343,8 +365,8 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, again: nfs_fattr_init(&dir_attr); nfs_fattr_init(&fattr); - status = rpc_call(NFS_CLIENT(dir), NFS3PROC_CREATE, &arg, &res, 0); - nfs_post_op_update_inode(dir, &dir_attr); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + nfs_refresh_inode(dir, &dir_attr); /* If the server doesn't support the exclusive creation semantics, * try again with simple 'guarded' mode. */ @@ -447,7 +469,7 @@ nfs3_proc_unlink_done(struct dentry *dir, struct rpc_task *task) struct rpc_message *msg = &task->tk_msg; struct nfs_fattr *dir_attr; - if (nfs3_async_handle_jukebox(task)) + if (nfs3_async_handle_jukebox(task, dir->d_inode)) return 1; if (msg->rpc_argp) { dir_attr = (struct nfs_fattr*)msg->rpc_resp; @@ -474,12 +496,17 @@ nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name, .fromattr = &old_dir_attr, .toattr = &new_dir_attr }; + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_RENAME], + .rpc_argp = &arg, + .rpc_resp = &res, + }; int status; dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name); nfs_fattr_init(&old_dir_attr); nfs_fattr_init(&new_dir_attr); - status = rpc_call(NFS_CLIENT(old_dir), NFS3PROC_RENAME, &arg, &res, 0); + status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0); nfs_post_op_update_inode(old_dir, &old_dir_attr); nfs_post_op_update_inode(new_dir, &new_dir_attr); dprintk("NFS reply rename: %d\n", status); @@ -500,12 +527,17 @@ nfs3_proc_link(struct inode *inode, struct inode *dir, struct qstr *name) .dir_attr = &dir_attr, .fattr = &fattr }; + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_LINK], + .rpc_argp = &arg, + .rpc_resp = &res, + }; int status; dprintk("NFS call link %s\n", name->name); nfs_fattr_init(&dir_attr); nfs_fattr_init(&fattr); - status = rpc_call(NFS_CLIENT(inode), NFS3PROC_LINK, &arg, &res, 0); + status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); nfs_post_op_update_inode(dir, &dir_attr); nfs_post_op_update_inode(inode, &fattr); dprintk("NFS reply link: %d\n", status); @@ -531,6 +563,11 @@ nfs3_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path, .fh = fhandle, .fattr = fattr }; + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_SYMLINK], + .rpc_argp = &arg, + .rpc_resp = &res, + }; int status; if (path->len > NFS3_MAXPATHLEN) @@ -538,7 +575,7 @@ nfs3_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path, dprintk("NFS call symlink %s -> %s\n", name->name, path->name); nfs_fattr_init(&dir_attr); nfs_fattr_init(fattr); - status = rpc_call(NFS_CLIENT(dir), NFS3PROC_SYMLINK, &arg, &res, 0); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); nfs_post_op_update_inode(dir, &dir_attr); dprintk("NFS reply symlink: %d\n", status); return status; @@ -560,6 +597,11 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) .fh = &fhandle, .fattr = &fattr }; + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_MKDIR], + .rpc_argp = &arg, + .rpc_resp = &res, + }; int mode = sattr->ia_mode; int status; @@ -569,7 +611,7 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) nfs_fattr_init(&dir_attr); nfs_fattr_init(&fattr); - status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKDIR, &arg, &res, 0); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); nfs_post_op_update_inode(dir, &dir_attr); if (status != 0) goto out; @@ -591,11 +633,16 @@ nfs3_proc_rmdir(struct inode *dir, struct qstr *name) .name = name->name, .len = name->len }; + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_RMDIR], + .rpc_argp = &arg, + .rpc_resp = &dir_attr, + }; int status; dprintk("NFS call rmdir %s\n", name->name); nfs_fattr_init(&dir_attr); - status = rpc_call(NFS_CLIENT(dir), NFS3PROC_RMDIR, &arg, &dir_attr, 0); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); nfs_post_op_update_inode(dir, &dir_attr); dprintk("NFS reply rmdir: %d\n", status); return status; @@ -672,6 +719,11 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, .fh = &fh, .fattr = &fattr }; + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_MKNOD], + .rpc_argp = &arg, + .rpc_resp = &res, + }; mode_t mode = sattr->ia_mode; int status; @@ -690,7 +742,7 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, nfs_fattr_init(&dir_attr); nfs_fattr_init(&fattr); - status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKNOD, &arg, &res, 0); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); nfs_post_op_update_inode(dir, &dir_attr); if (status != 0) goto out; @@ -707,11 +759,16 @@ static int nfs3_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsstat *stat) { + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_FSSTAT], + .rpc_argp = fhandle, + .rpc_resp = stat, + }; int status; dprintk("NFS call fsstat\n"); nfs_fattr_init(stat->fattr); - status = rpc_call(server->client, NFS3PROC_FSSTAT, fhandle, stat, 0); + status = rpc_call_sync(server->client, &msg, 0); dprintk("NFS reply statfs: %d\n", status); return status; } @@ -720,11 +777,16 @@ static int nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *info) { + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_FSINFO], + .rpc_argp = fhandle, + .rpc_resp = info, + }; int status; dprintk("NFS call fsinfo\n"); nfs_fattr_init(info->fattr); - status = rpc_call(server->client_sys, NFS3PROC_FSINFO, fhandle, info, 0); + status = rpc_call_sync(server->client_sys, &msg, 0); dprintk("NFS reply fsinfo: %d\n", status); return status; } @@ -733,40 +795,34 @@ static int nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_pathconf *info) { + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_PATHCONF], + .rpc_argp = fhandle, + .rpc_resp = info, + }; int status; dprintk("NFS call pathconf\n"); nfs_fattr_init(info->fattr); - status = rpc_call(server->client, NFS3PROC_PATHCONF, fhandle, info, 0); + status = rpc_call_sync(server->client, &msg, 0); dprintk("NFS reply pathconf: %d\n", status); return status; } extern u32 *nfs3_decode_dirent(u32 *, struct nfs_entry *, int); -static void nfs3_read_done(struct rpc_task *task, void *calldata) +static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data) { - struct nfs_read_data *data = calldata; - - if (nfs3_async_handle_jukebox(task)) - return; + if (nfs3_async_handle_jukebox(task, data->inode)) + return -EAGAIN; /* Call back common NFS readpage processing */ if (task->tk_status >= 0) nfs_refresh_inode(data->inode, &data->fattr); - nfs_readpage_result(task, calldata); + return 0; } -static const struct rpc_call_ops nfs3_read_ops = { - .rpc_call_done = nfs3_read_done, - .rpc_release = nfs_readdata_release, -}; - -static void -nfs3_proc_read_setup(struct nfs_read_data *data) +static void nfs3_proc_read_setup(struct nfs_read_data *data) { - struct rpc_task *task = &data->task; - struct inode *inode = data->inode; - int flags; struct rpc_message msg = { .rpc_proc = &nfs3_procedures[NFS3PROC_READ], .rpc_argp = &data->args, @@ -774,37 +830,20 @@ nfs3_proc_read_setup(struct nfs_read_data *data) .rpc_cred = data->cred, }; - /* N.B. Do we need to test? Never called for swapfile inode */ - flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0); - - /* Finalize the task. */ - rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs3_read_ops, data); - rpc_call_setup(task, &msg, 0); + rpc_call_setup(&data->task, &msg, 0); } -static void nfs3_write_done(struct rpc_task *task, void *calldata) +static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data) { - struct nfs_write_data *data = calldata; - - if (nfs3_async_handle_jukebox(task)) - return; + if (nfs3_async_handle_jukebox(task, data->inode)) + return -EAGAIN; if (task->tk_status >= 0) nfs_post_op_update_inode(data->inode, data->res.fattr); - nfs_writeback_done(task, calldata); + return 0; } -static const struct rpc_call_ops nfs3_write_ops = { - .rpc_call_done = nfs3_write_done, - .rpc_release = nfs_writedata_release, -}; - -static void -nfs3_proc_write_setup(struct nfs_write_data *data, int how) +static void nfs3_proc_write_setup(struct nfs_write_data *data, int how) { - struct rpc_task *task = &data->task; - struct inode *inode = data->inode; - int stable; - int flags; struct rpc_message msg = { .rpc_proc = &nfs3_procedures[NFS3PROC_WRITE], .rpc_argp = &data->args, @@ -812,45 +851,28 @@ nfs3_proc_write_setup(struct nfs_write_data *data, int how) .rpc_cred = data->cred, }; + data->args.stable = NFS_UNSTABLE; if (how & FLUSH_STABLE) { - if (!NFS_I(inode)->ncommit) - stable = NFS_FILE_SYNC; - else - stable = NFS_DATA_SYNC; - } else - stable = NFS_UNSTABLE; - data->args.stable = stable; - - /* Set the initial flags for the task. */ - flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; + data->args.stable = NFS_FILE_SYNC; + if (NFS_I(data->inode)->ncommit) + data->args.stable = NFS_DATA_SYNC; + } /* Finalize the task. */ - rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs3_write_ops, data); - rpc_call_setup(task, &msg, 0); + rpc_call_setup(&data->task, &msg, 0); } -static void nfs3_commit_done(struct rpc_task *task, void *calldata) +static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data) { - struct nfs_write_data *data = calldata; - - if (nfs3_async_handle_jukebox(task)) - return; + if (nfs3_async_handle_jukebox(task, data->inode)) + return -EAGAIN; if (task->tk_status >= 0) nfs_post_op_update_inode(data->inode, data->res.fattr); - nfs_commit_done(task, calldata); + return 0; } -static const struct rpc_call_ops nfs3_commit_ops = { - .rpc_call_done = nfs3_commit_done, - .rpc_release = nfs_commit_release, -}; - -static void -nfs3_proc_commit_setup(struct nfs_write_data *data, int how) +static void nfs3_proc_commit_setup(struct nfs_write_data *data, int how) { - struct rpc_task *task = &data->task; - struct inode *inode = data->inode; - int flags; struct rpc_message msg = { .rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT], .rpc_argp = &data->args, @@ -858,12 +880,7 @@ nfs3_proc_commit_setup(struct nfs_write_data *data, int how) .rpc_cred = data->cred, }; - /* Set the initial flags for the task. */ - flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; - - /* Finalize the task. */ - rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs3_commit_ops, data); - rpc_call_setup(task, &msg, 0); + rpc_call_setup(&data->task, &msg, 0); } static int @@ -902,8 +919,11 @@ struct nfs_rpc_ops nfs_v3_clientops = { .pathconf = nfs3_proc_pathconf, .decode_dirent = nfs3_decode_dirent, .read_setup = nfs3_proc_read_setup, + .read_done = nfs3_read_done, .write_setup = nfs3_proc_write_setup, + .write_done = nfs3_write_done, .commit_setup = nfs3_proc_commit_setup, + .commit_done = nfs3_commit_done, .file_open = nfs_open, .file_release = nfs_release, .lock = nfs3_proc_lock, diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index b6c0b5012bc..ec233619687 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -1109,7 +1109,9 @@ nfs3_xdr_setaclres(struct rpc_rqst *req, u32 *p, struct nfs_fattr *fattr) .p_encode = (kxdrproc_t) nfs3_xdr_##argtype, \ .p_decode = (kxdrproc_t) nfs3_xdr_##restype, \ .p_bufsiz = MAX(NFS3_##argtype##_sz,NFS3_##restype##_sz) << 2, \ - .p_timer = timer \ + .p_timer = timer, \ + .p_statidx = NFS3PROC_##proc, \ + .p_name = #proc, \ } struct rpc_procinfo nfs3_procedures[] = { @@ -1138,7 +1140,7 @@ struct rpc_procinfo nfs3_procedures[] = { struct rpc_version nfs_version3 = { .number = 3, - .nrprocs = sizeof(nfs3_procedures)/sizeof(nfs3_procedures[0]), + .nrprocs = ARRAY_SIZE(nfs3_procedures), .procs = nfs3_procedures }; @@ -1150,6 +1152,7 @@ static struct rpc_procinfo nfs3_acl_procedures[] = { .p_decode = (kxdrproc_t) nfs3_xdr_getaclres, .p_bufsiz = MAX(ACL3_getaclargs_sz, ACL3_getaclres_sz) << 2, .p_timer = 1, + .p_name = "GETACL", }, [ACLPROC3_SETACL] = { .p_proc = ACLPROC3_SETACL, @@ -1157,6 +1160,7 @@ static struct rpc_procinfo nfs3_acl_procedures[] = { .p_decode = (kxdrproc_t) nfs3_xdr_setaclres, .p_bufsiz = MAX(ACL3_setaclargs_sz, ACL3_setaclres_sz) << 2, .p_timer = 0, + .p_name = "SETACL", }, }; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f8c0066e02e..47ece1dd3c6 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -51,6 +51,7 @@ #include "nfs4_fs.h" #include "delegation.h" +#include "iostat.h" #define NFSDBG_FACILITY NFSDBG_PROC @@ -335,7 +336,7 @@ static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data if (!(data->f_attr.valid & NFS_ATTR_FATTR)) goto out; inode = nfs_fhget(data->dir->d_sb, &data->o_res.fh, &data->f_attr); - if (inode == NULL) + if (IS_ERR(inode)) goto out; state = nfs4_get_open_state(inode, data->owner); if (state == NULL) @@ -604,11 +605,14 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data) int status; atomic_inc(&data->count); + /* + * If rpc_run_task() ends up calling ->rpc_release(), we + * want to ensure that it takes the 'error' code path. + */ + data->rpc_status = -ENOMEM; task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_confirm_ops, data); - if (IS_ERR(task)) { - nfs4_opendata_free(data); + if (IS_ERR(task)) return PTR_ERR(task); - } status = nfs4_wait_for_completion_rpc_task(task); if (status != 0) { data->cancelled = 1; @@ -707,11 +711,14 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) int status; atomic_inc(&data->count); + /* + * If rpc_run_task() ends up calling ->rpc_release(), we + * want to ensure that it takes the 'error' code path. + */ + data->rpc_status = -ENOMEM; task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_ops, data); - if (IS_ERR(task)) { - nfs4_opendata_free(data); + if (IS_ERR(task)) return PTR_ERR(task); - } status = nfs4_wait_for_completion_rpc_task(task); if (status != 0) { data->cancelled = 1; @@ -908,7 +915,7 @@ out_put_state_owner: static struct nfs4_state *nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred *cred) { struct nfs4_exception exception = { }; - struct nfs4_state *res; + struct nfs4_state *res = ERR_PTR(-EIO); int err; do { @@ -1017,12 +1024,12 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, return res; } -static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr, - struct nfs_fh *fhandle, struct iattr *sattr, - struct nfs4_state *state) +static int _nfs4_do_setattr(struct inode *inode, struct nfs_fattr *fattr, + struct iattr *sattr, struct nfs4_state *state) { + struct nfs_server *server = NFS_SERVER(inode); struct nfs_setattrargs arg = { - .fh = fhandle, + .fh = NFS_FH(inode), .iap = sattr, .server = server, .bitmask = server->attr_bitmask, @@ -1041,7 +1048,9 @@ static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr, nfs_fattr_init(fattr); - if (state != NULL) { + if (nfs4_copy_delegation_stateid(&arg.stateid, inode)) { + /* Use that stateid */ + } else if (state != NULL) { msg.rpc_cred = state->owner->so_cred; nfs4_copy_stateid(&arg.stateid, state, current->files); } else @@ -1053,16 +1062,15 @@ static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr, return status; } -static int nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr, - struct nfs_fh *fhandle, struct iattr *sattr, - struct nfs4_state *state) +static int nfs4_do_setattr(struct inode *inode, struct nfs_fattr *fattr, + struct iattr *sattr, struct nfs4_state *state) { + struct nfs_server *server = NFS_SERVER(inode); struct nfs4_exception exception = { }; int err; do { err = nfs4_handle_exception(server, - _nfs4_do_setattr(server, fattr, fhandle, sattr, - state), + _nfs4_do_setattr(inode, fattr, sattr, state), &exception); } while (exception.retry); return err; @@ -1503,8 +1511,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, if (ctx != NULL) state = ctx->state; - status = nfs4_do_setattr(NFS_SERVER(inode), fattr, - NFS_FH(inode), sattr, state); + status = nfs4_do_setattr(inode, fattr, sattr, state); if (status == 0) nfs_setattr_update_inode(inode, sattr); if (ctx != NULL) @@ -1823,8 +1830,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, d_instantiate(dentry, igrab(state->inode)); if (flags & O_EXCL) { struct nfs_fattr fattr; - status = nfs4_do_setattr(NFS_SERVER(dir), &fattr, - NFS_FH(state->inode), sattr, state); + status = nfs4_do_setattr(state->inode, &fattr, sattr, state); if (status == 0) nfs_setattr_update_inode(state->inode, sattr); } @@ -2344,75 +2350,50 @@ static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, return err; } -static void nfs4_read_done(struct rpc_task *task, void *calldata) +static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data) { - struct nfs_read_data *data = calldata; - struct inode *inode = data->inode; + struct nfs_server *server = NFS_SERVER(data->inode); - if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) { + if (nfs4_async_handle_error(task, server) == -EAGAIN) { rpc_restart_call(task); - return; + return -EAGAIN; } if (task->tk_status > 0) - renew_lease(NFS_SERVER(inode), data->timestamp); - /* Call back common NFS readpage processing */ - nfs_readpage_result(task, calldata); + renew_lease(server, data->timestamp); + return 0; } -static const struct rpc_call_ops nfs4_read_ops = { - .rpc_call_done = nfs4_read_done, - .rpc_release = nfs_readdata_release, -}; - -static void -nfs4_proc_read_setup(struct nfs_read_data *data) +static void nfs4_proc_read_setup(struct nfs_read_data *data) { - struct rpc_task *task = &data->task; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ], .rpc_argp = &data->args, .rpc_resp = &data->res, .rpc_cred = data->cred, }; - struct inode *inode = data->inode; - int flags; data->timestamp = jiffies; - /* N.B. Do we need to test? Never called for swapfile inode */ - flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0); - - /* Finalize the task. */ - rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs4_read_ops, data); - rpc_call_setup(task, &msg, 0); + rpc_call_setup(&data->task, &msg, 0); } -static void nfs4_write_done(struct rpc_task *task, void *calldata) +static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data) { - struct nfs_write_data *data = calldata; struct inode *inode = data->inode; if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) { rpc_restart_call(task); - return; + return -EAGAIN; } if (task->tk_status >= 0) { renew_lease(NFS_SERVER(inode), data->timestamp); nfs_post_op_update_inode(inode, data->res.fattr); } - /* Call back common NFS writeback processing */ - nfs_writeback_done(task, calldata); + return 0; } -static const struct rpc_call_ops nfs4_write_ops = { - .rpc_call_done = nfs4_write_done, - .rpc_release = nfs_writedata_release, -}; - -static void -nfs4_proc_write_setup(struct nfs_write_data *data, int how) +static void nfs4_proc_write_setup(struct nfs_write_data *data, int how) { - struct rpc_task *task = &data->task; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE], .rpc_argp = &data->args, @@ -2422,7 +2403,6 @@ nfs4_proc_write_setup(struct nfs_write_data *data, int how) struct inode *inode = data->inode; struct nfs_server *server = NFS_SERVER(inode); int stable; - int flags; if (how & FLUSH_STABLE) { if (!NFS_I(inode)->ncommit) @@ -2437,57 +2417,37 @@ nfs4_proc_write_setup(struct nfs_write_data *data, int how) data->timestamp = jiffies; - /* Set the initial flags for the task. */ - flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; - /* Finalize the task. */ - rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs4_write_ops, data); - rpc_call_setup(task, &msg, 0); + rpc_call_setup(&data->task, &msg, 0); } -static void nfs4_commit_done(struct rpc_task *task, void *calldata) +static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data) { - struct nfs_write_data *data = calldata; struct inode *inode = data->inode; if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) { rpc_restart_call(task); - return; + return -EAGAIN; } if (task->tk_status >= 0) nfs_post_op_update_inode(inode, data->res.fattr); - /* Call back common NFS writeback processing */ - nfs_commit_done(task, calldata); + return 0; } -static const struct rpc_call_ops nfs4_commit_ops = { - .rpc_call_done = nfs4_commit_done, - .rpc_release = nfs_commit_release, -}; - -static void -nfs4_proc_commit_setup(struct nfs_write_data *data, int how) +static void nfs4_proc_commit_setup(struct nfs_write_data *data, int how) { - struct rpc_task *task = &data->task; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT], .rpc_argp = &data->args, .rpc_resp = &data->res, .rpc_cred = data->cred, }; - struct inode *inode = data->inode; - struct nfs_server *server = NFS_SERVER(inode); - int flags; + struct nfs_server *server = NFS_SERVER(data->inode); data->args.bitmask = server->attr_bitmask; data->res.server = server; - /* Set the initial flags for the task. */ - flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; - - /* Finalize the task. */ - rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs4_commit_ops, data); - rpc_call_setup(task, &msg, 0); + rpc_call_setup(&data->task, &msg, 0); } /* @@ -2755,8 +2715,10 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server) rpc_wake_up_task(task); task->tk_status = 0; return -EAGAIN; - case -NFS4ERR_GRACE: case -NFS4ERR_DELAY: + nfs_inc_server_stats((struct nfs_server *) server, + NFSIOS_DELAY); + case -NFS4ERR_GRACE: rpc_delay(task, NFS4_POLL_RETRY_MAX); task->tk_status = 0; return -EAGAIN; @@ -2893,8 +2855,7 @@ int nfs4_proc_setclientid(struct nfs4_client *clp, u32 program, unsigned short p return status; } -int -nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred) +static int _nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred) { struct nfs_fsinfo fsinfo; struct rpc_message msg = { @@ -2918,6 +2879,24 @@ nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred) return status; } +int nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred) +{ + long timeout; + int err; + do { + err = _nfs4_proc_setclientid_confirm(clp, cred); + switch (err) { + case 0: + return err; + case -NFS4ERR_RESOURCE: + /* The IBM lawyers misread another document! */ + case -NFS4ERR_DELAY: + err = nfs4_delay(clp->cl_rpcclient, &timeout); + } + } while (err == 0); + return err; +} + struct nfs4_delegreturndata { struct nfs4_delegreturnargs args; struct nfs4_delegreturnres res; @@ -2958,7 +2937,7 @@ static void nfs4_delegreturn_release(void *calldata) kfree(calldata); } -const static struct rpc_call_ops nfs4_delegreturn_ops = { +static const struct rpc_call_ops nfs4_delegreturn_ops = { .rpc_call_prepare = nfs4_delegreturn_prepare, .rpc_call_done = nfs4_delegreturn_done, .rpc_release = nfs4_delegreturn_release, @@ -2986,10 +2965,8 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co data->rpc_status = 0; task = rpc_run_task(NFS_CLIENT(inode), RPC_TASK_ASYNC, &nfs4_delegreturn_ops, data); - if (IS_ERR(task)) { - nfs4_delegreturn_release(data); + if (IS_ERR(task)) return PTR_ERR(task); - } status = nfs4_wait_for_completion_rpc_task(task); if (status == 0) { status = data->rpc_status; @@ -3209,7 +3186,6 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl, struct nfs_seqid *seqid) { struct nfs4_unlockdata *data; - struct rpc_task *task; data = nfs4_alloc_unlockdata(fl, ctx, lsp, seqid); if (data == NULL) { @@ -3219,10 +3195,7 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl, /* Unlock _before_ we do the RPC call */ do_vfs_lock(fl->fl_file, fl); - task = rpc_run_task(NFS_CLIENT(lsp->ls_state->inode), RPC_TASK_ASYNC, &nfs4_locku_ops, data); - if (IS_ERR(task)) - nfs4_locku_release_calldata(data); - return task; + return rpc_run_task(NFS_CLIENT(lsp->ls_state->inode), RPC_TASK_ASYNC, &nfs4_locku_ops, data); } static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request) @@ -3403,10 +3376,8 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f data->arg.reclaim = 1; task = rpc_run_task(NFS_CLIENT(state->inode), RPC_TASK_ASYNC, &nfs4_lock_ops, data); - if (IS_ERR(task)) { - nfs4_lock_release(data); + if (IS_ERR(task)) return PTR_ERR(task); - } ret = nfs4_wait_for_completion_rpc_task(task); if (ret == 0) { ret = data->rpc_status; @@ -3588,6 +3559,8 @@ ssize_t nfs4_listxattr(struct dentry *dentry, char *buf, size_t buflen) { size_t len = strlen(XATTR_NAME_NFSV4_ACL) + 1; + if (!nfs4_server_supports_acls(NFS_SERVER(dentry->d_inode))) + return 0; if (buf && buflen < len) return -ERANGE; if (buf) @@ -3644,8 +3617,11 @@ struct nfs_rpc_ops nfs_v4_clientops = { .pathconf = nfs4_proc_pathconf, .decode_dirent = nfs4_decode_dirent, .read_setup = nfs4_proc_read_setup, + .read_done = nfs4_read_done, .write_setup = nfs4_proc_write_setup, + .write_done = nfs4_write_done, .commit_setup = nfs4_proc_commit_setup, + .commit_done = nfs4_commit_done, .file_open = nfs_open, .file_release = nfs_release, .lock = nfs4_proc_lock, diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index afad0255e7d..96e5b82c153 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -977,6 +977,7 @@ out: out_error: printk(KERN_WARNING "Error: state recovery failed on NFSv4 server %u.%u.%u.%u with error %d\n", NIPQUAD(clp->cl_addr.s_addr), -status); + set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); goto out; } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 4bbf5ef5778..7c5d70efe72 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -4344,6 +4344,8 @@ nfs_stat_to_errno(int stat) .p_encode = (kxdrproc_t) nfs4_xdr_##argtype, \ .p_decode = (kxdrproc_t) nfs4_xdr_##restype, \ .p_bufsiz = MAX(NFS4_##argtype##_sz,NFS4_##restype##_sz) << 2, \ + .p_statidx = NFSPROC4_CLNT_##proc, \ + .p_name = #proc, \ } struct rpc_procinfo nfs4_procedures[] = { @@ -4384,7 +4386,7 @@ struct rpc_procinfo nfs4_procedures[] = { struct rpc_version nfs_version4 = { .number = 4, - .nrprocs = sizeof(nfs4_procedures)/sizeof(nfs4_procedures[0]), + .nrprocs = ARRAY_SIZE(nfs4_procedures), .procs = nfs4_procedures }; diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index d53857b148e..106aca388eb 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -85,6 +85,9 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode, atomic_set(&req->wb_complete, 0); req->wb_index = page->index; page_cache_get(page); + BUG_ON(PagePrivate(page)); + BUG_ON(!PageLocked(page)); + BUG_ON(page->mapping->host != inode); req->wb_offset = offset; req->wb_pgbase = offset; req->wb_bytes = count; @@ -132,9 +135,11 @@ void nfs_clear_page_writeback(struct nfs_page *req) { struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode); - spin_lock(&nfsi->req_lock); - radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK); - spin_unlock(&nfsi->req_lock); + if (req->wb_page != NULL) { + spin_lock(&nfsi->req_lock); + radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK); + spin_unlock(&nfsi->req_lock); + } nfs_unlock_request(req); } @@ -147,8 +152,9 @@ void nfs_clear_page_writeback(struct nfs_page *req) */ void nfs_clear_request(struct nfs_page *req) { - if (req->wb_page) { - page_cache_release(req->wb_page); + struct page *page = req->wb_page; + if (page != NULL) { + page_cache_release(page); req->wb_page = NULL; } } diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index f5150d71c03..9dd85cac2df 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -58,16 +58,23 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, { struct nfs_fattr *fattr = info->fattr; struct nfs2_fsstat fsinfo; + struct rpc_message msg = { + .rpc_proc = &nfs_procedures[NFSPROC_GETATTR], + .rpc_argp = fhandle, + .rpc_resp = fattr, + }; int status; dprintk("%s: call getattr\n", __FUNCTION__); nfs_fattr_init(fattr); - status = rpc_call(server->client_sys, NFSPROC_GETATTR, fhandle, fattr, 0); + status = rpc_call_sync(server->client_sys, &msg, 0); dprintk("%s: reply getattr: %d\n", __FUNCTION__, status); if (status) return status; dprintk("%s: call statfs\n", __FUNCTION__); - status = rpc_call(server->client_sys, NFSPROC_STATFS, fhandle, &fsinfo, 0); + msg.rpc_proc = &nfs_procedures[NFSPROC_STATFS]; + msg.rpc_resp = &fsinfo; + status = rpc_call_sync(server->client_sys, &msg, 0); dprintk("%s: reply statfs: %d\n", __FUNCTION__, status); if (status) return status; @@ -90,12 +97,16 @@ static int nfs_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr) { + struct rpc_message msg = { + .rpc_proc = &nfs_procedures[NFSPROC_GETATTR], + .rpc_argp = fhandle, + .rpc_resp = fattr, + }; int status; dprintk("NFS call getattr\n"); nfs_fattr_init(fattr); - status = rpc_call(server->client, NFSPROC_GETATTR, - fhandle, fattr, 0); + status = rpc_call_sync(server->client, &msg, 0); dprintk("NFS reply getattr: %d\n", status); return status; } @@ -109,6 +120,11 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, .fh = NFS_FH(inode), .sattr = sattr }; + struct rpc_message msg = { + .rpc_proc = &nfs_procedures[NFSPROC_SETATTR], + .rpc_argp = &arg, + .rpc_resp = fattr, + }; int status; /* Mask out the non-modebit related stuff from attr->ia_mode */ @@ -116,7 +132,7 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, dprintk("NFS call setattr\n"); nfs_fattr_init(fattr); - status = rpc_call(NFS_CLIENT(inode), NFSPROC_SETATTR, &arg, fattr, 0); + status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); if (status == 0) nfs_setattr_update_inode(inode, sattr); dprintk("NFS reply setattr: %d\n", status); @@ -136,11 +152,16 @@ nfs_proc_lookup(struct inode *dir, struct qstr *name, .fh = fhandle, .fattr = fattr }; + struct rpc_message msg = { + .rpc_proc = &nfs_procedures[NFSPROC_LOOKUP], + .rpc_argp = &arg, + .rpc_resp = &res, + }; int status; dprintk("NFS call lookup %s\n", name->name); nfs_fattr_init(fattr); - status = rpc_call(NFS_CLIENT(dir), NFSPROC_LOOKUP, &arg, &res, 0); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); dprintk("NFS reply lookup: %d\n", status); return status; } @@ -154,10 +175,14 @@ static int nfs_proc_readlink(struct inode *inode, struct page *page, .pglen = pglen, .pages = &page }; + struct rpc_message msg = { + .rpc_proc = &nfs_procedures[NFSPROC_READLINK], + .rpc_argp = &args, + }; int status; dprintk("NFS call readlink\n"); - status = rpc_call(NFS_CLIENT(inode), NFSPROC_READLINK, &args, NULL, 0); + status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); dprintk("NFS reply readlink: %d\n", status); return status; } @@ -233,11 +258,16 @@ nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, .fh = &fhandle, .fattr = &fattr }; + struct rpc_message msg = { + .rpc_proc = &nfs_procedures[NFSPROC_CREATE], + .rpc_argp = &arg, + .rpc_resp = &res, + }; int status; nfs_fattr_init(&fattr); dprintk("NFS call create %s\n", dentry->d_name.name); - status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); if (status == 0) status = nfs_instantiate(dentry, &fhandle, &fattr); dprintk("NFS reply create: %d\n", status); @@ -263,6 +293,11 @@ nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, .fh = &fhandle, .fattr = &fattr }; + struct rpc_message msg = { + .rpc_proc = &nfs_procedures[NFSPROC_CREATE], + .rpc_argp = &arg, + .rpc_resp = &res, + }; int status, mode; dprintk("NFS call mknod %s\n", dentry->d_name.name); @@ -277,13 +312,13 @@ nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, } nfs_fattr_init(&fattr); - status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); nfs_mark_for_revalidate(dir); if (status == -EINVAL && S_ISFIFO(mode)) { sattr->ia_mode = mode; nfs_fattr_init(&fattr); - status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); } if (status == 0) status = nfs_instantiate(dentry, &fhandle, &fattr); @@ -302,8 +337,6 @@ nfs_proc_remove(struct inode *dir, struct qstr *name) struct rpc_message msg = { .rpc_proc = &nfs_procedures[NFSPROC_REMOVE], .rpc_argp = &arg, - .rpc_resp = NULL, - .rpc_cred = NULL }; int status; @@ -355,10 +388,14 @@ nfs_proc_rename(struct inode *old_dir, struct qstr *old_name, .toname = new_name->name, .tolen = new_name->len }; + struct rpc_message msg = { + .rpc_proc = &nfs_procedures[NFSPROC_RENAME], + .rpc_argp = &arg, + }; int status; dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name); - status = rpc_call(NFS_CLIENT(old_dir), NFSPROC_RENAME, &arg, NULL, 0); + status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0); nfs_mark_for_revalidate(old_dir); nfs_mark_for_revalidate(new_dir); dprintk("NFS reply rename: %d\n", status); @@ -374,10 +411,14 @@ nfs_proc_link(struct inode *inode, struct inode *dir, struct qstr *name) .toname = name->name, .tolen = name->len }; + struct rpc_message msg = { + .rpc_proc = &nfs_procedures[NFSPROC_LINK], + .rpc_argp = &arg, + }; int status; dprintk("NFS call link %s\n", name->name); - status = rpc_call(NFS_CLIENT(inode), NFSPROC_LINK, &arg, NULL, 0); + status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); nfs_mark_for_revalidate(inode); nfs_mark_for_revalidate(dir); dprintk("NFS reply link: %d\n", status); @@ -397,6 +438,10 @@ nfs_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path, .tolen = path->len, .sattr = sattr }; + struct rpc_message msg = { + .rpc_proc = &nfs_procedures[NFSPROC_SYMLINK], + .rpc_argp = &arg, + }; int status; if (path->len > NFS2_MAXPATHLEN) @@ -404,7 +449,7 @@ nfs_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path, dprintk("NFS call symlink %s -> %s\n", name->name, path->name); nfs_fattr_init(fattr); fhandle->size = 0; - status = rpc_call(NFS_CLIENT(dir), NFSPROC_SYMLINK, &arg, NULL, 0); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); nfs_mark_for_revalidate(dir); dprintk("NFS reply symlink: %d\n", status); return status; @@ -425,11 +470,16 @@ nfs_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) .fh = &fhandle, .fattr = &fattr }; + struct rpc_message msg = { + .rpc_proc = &nfs_procedures[NFSPROC_MKDIR], + .rpc_argp = &arg, + .rpc_resp = &res, + }; int status; dprintk("NFS call mkdir %s\n", dentry->d_name.name); nfs_fattr_init(&fattr); - status = rpc_call(NFS_CLIENT(dir), NFSPROC_MKDIR, &arg, &res, 0); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); nfs_mark_for_revalidate(dir); if (status == 0) status = nfs_instantiate(dentry, &fhandle, &fattr); @@ -445,10 +495,14 @@ nfs_proc_rmdir(struct inode *dir, struct qstr *name) .name = name->name, .len = name->len }; + struct rpc_message msg = { + .rpc_proc = &nfs_procedures[NFSPROC_RMDIR], + .rpc_argp = &arg, + }; int status; dprintk("NFS call rmdir %s\n", name->name); - status = rpc_call(NFS_CLIENT(dir), NFSPROC_RMDIR, &arg, NULL, 0); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); nfs_mark_for_revalidate(dir); dprintk("NFS reply rmdir: %d\n", status); return status; @@ -470,13 +524,12 @@ nfs_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, .fh = NFS_FH(dir), .cookie = cookie, .count = count, - .pages = &page + .pages = &page, }; struct rpc_message msg = { .rpc_proc = &nfs_procedures[NFSPROC_READDIR], .rpc_argp = &arg, - .rpc_resp = NULL, - .rpc_cred = cred + .rpc_cred = cred, }; int status; @@ -495,11 +548,16 @@ nfs_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsstat *stat) { struct nfs2_fsstat fsinfo; + struct rpc_message msg = { + .rpc_proc = &nfs_procedures[NFSPROC_STATFS], + .rpc_argp = fhandle, + .rpc_resp = &fsinfo, + }; int status; dprintk("NFS call statfs\n"); nfs_fattr_init(stat->fattr); - status = rpc_call(server->client, NFSPROC_STATFS, fhandle, &fsinfo, 0); + status = rpc_call_sync(server->client, &msg, 0); dprintk("NFS reply statfs: %d\n", status); if (status) goto out; @@ -518,11 +576,16 @@ nfs_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *info) { struct nfs2_fsstat fsinfo; + struct rpc_message msg = { + .rpc_proc = &nfs_procedures[NFSPROC_STATFS], + .rpc_argp = fhandle, + .rpc_resp = &fsinfo, + }; int status; dprintk("NFS call fsinfo\n"); nfs_fattr_init(info->fattr); - status = rpc_call(server->client, NFSPROC_STATFS, fhandle, &fsinfo, 0); + status = rpc_call_sync(server->client, &msg, 0); dprintk("NFS reply fsinfo: %d\n", status); if (status) goto out; @@ -550,10 +613,8 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, extern u32 * nfs_decode_dirent(u32 *, struct nfs_entry *, int); -static void nfs_read_done(struct rpc_task *task, void *calldata) +static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data) { - struct nfs_read_data *data = calldata; - if (task->tk_status >= 0) { nfs_refresh_inode(data->inode, data->res.fattr); /* Emulate the eof flag, which isn't normally needed in NFSv2 @@ -562,20 +623,11 @@ static void nfs_read_done(struct rpc_task *task, void *calldata) if (data->args.offset + data->args.count >= data->res.fattr->size) data->res.eof = 1; } - nfs_readpage_result(task, calldata); + return 0; } -static const struct rpc_call_ops nfs_read_ops = { - .rpc_call_done = nfs_read_done, - .rpc_release = nfs_readdata_release, -}; - -static void -nfs_proc_read_setup(struct nfs_read_data *data) +static void nfs_proc_read_setup(struct nfs_read_data *data) { - struct rpc_task *task = &data->task; - struct inode *inode = data->inode; - int flags; struct rpc_message msg = { .rpc_proc = &nfs_procedures[NFSPROC_READ], .rpc_argp = &data->args, @@ -583,34 +635,18 @@ nfs_proc_read_setup(struct nfs_read_data *data) .rpc_cred = data->cred, }; - /* N.B. Do we need to test? Never called for swapfile inode */ - flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0); - - /* Finalize the task. */ - rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs_read_ops, data); - rpc_call_setup(task, &msg, 0); + rpc_call_setup(&data->task, &msg, 0); } -static void nfs_write_done(struct rpc_task *task, void *calldata) +static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data) { - struct nfs_write_data *data = calldata; - if (task->tk_status >= 0) nfs_post_op_update_inode(data->inode, data->res.fattr); - nfs_writeback_done(task, calldata); + return 0; } -static const struct rpc_call_ops nfs_write_ops = { - .rpc_call_done = nfs_write_done, - .rpc_release = nfs_writedata_release, -}; - -static void -nfs_proc_write_setup(struct nfs_write_data *data, int how) +static void nfs_proc_write_setup(struct nfs_write_data *data, int how) { - struct rpc_task *task = &data->task; - struct inode *inode = data->inode; - int flags; struct rpc_message msg = { .rpc_proc = &nfs_procedures[NFSPROC_WRITE], .rpc_argp = &data->args, @@ -621,12 +657,8 @@ nfs_proc_write_setup(struct nfs_write_data *data, int how) /* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */ data->args.stable = NFS_FILE_SYNC; - /* Set the initial flags for the task. */ - flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; - /* Finalize the task. */ - rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs_write_ops, data); - rpc_call_setup(task, &msg, 0); + rpc_call_setup(&data->task, &msg, 0); } static void @@ -672,7 +704,9 @@ struct nfs_rpc_ops nfs_v2_clientops = { .pathconf = nfs_proc_pathconf, .decode_dirent = nfs_decode_dirent, .read_setup = nfs_proc_read_setup, + .read_done = nfs_read_done, .write_setup = nfs_proc_write_setup, + .write_done = nfs_write_done, .commit_setup = nfs_proc_commit_setup, .file_open = nfs_open, .file_release = nfs_release, diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 05eb43fadf8..624ca7146b6 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -31,17 +31,49 @@ #include <asm/system.h> +#include "iostat.h" + #define NFSDBG_FACILITY NFSDBG_PAGECACHE static int nfs_pagein_one(struct list_head *, struct inode *); -static void nfs_readpage_result_partial(struct nfs_read_data *, int); -static void nfs_readpage_result_full(struct nfs_read_data *, int); +static const struct rpc_call_ops nfs_read_partial_ops; +static const struct rpc_call_ops nfs_read_full_ops; static kmem_cache_t *nfs_rdata_cachep; -mempool_t *nfs_rdata_mempool; +static mempool_t *nfs_rdata_mempool; #define MIN_POOL_READ (32) +struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) +{ + struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, SLAB_NOFS); + + if (p) { + memset(p, 0, sizeof(*p)); + INIT_LIST_HEAD(&p->pages); + if (pagecount < NFS_PAGEVEC_SIZE) + p->pagevec = &p->page_array[0]; + else { + size_t size = ++pagecount * sizeof(struct page *); + p->pagevec = kmalloc(size, GFP_NOFS); + if (p->pagevec) { + memset(p->pagevec, 0, size); + } else { + mempool_free(p, nfs_rdata_mempool); + p = NULL; + } + } + } + return p; +} + +void nfs_readdata_free(struct nfs_read_data *p) +{ + if (p && (p->pagevec != &p->page_array[0])) + kfree(p->pagevec); + mempool_free(p, nfs_rdata_mempool); +} + void nfs_readdata_release(void *data) { nfs_readdata_free(data); @@ -133,6 +165,8 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode, } count -= result; rdata->args.pgbase += result; + nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, result); + /* Note: result == 0 should only happen if we're caching * a write that extends the file and punches a hole. */ @@ -196,9 +230,11 @@ static void nfs_readpage_release(struct nfs_page *req) * Set up the NFS read request struct */ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, + const struct rpc_call_ops *call_ops, unsigned int count, unsigned int offset) { struct inode *inode; + int flags; data->req = req; data->inode = inode = req->wb_context->dentry->d_inode; @@ -216,6 +252,9 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, data->res.eof = 0; nfs_fattr_init(&data->fattr); + /* Set up the initial task struct. */ + flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0); + rpc_init_task(&data->task, NFS_CLIENT(inode), flags, call_ops, data); NFS_PROTO(inode)->read_setup(data); data->task.tk_cookie = (unsigned long)inode; @@ -303,14 +342,15 @@ static int nfs_pagein_multi(struct list_head *head, struct inode *inode) list_del_init(&data->pages); data->pagevec[0] = page; - data->complete = nfs_readpage_result_partial; if (nbytes > rsize) { - nfs_read_rpcsetup(req, data, rsize, offset); + nfs_read_rpcsetup(req, data, &nfs_read_partial_ops, + rsize, offset); offset += rsize; nbytes -= rsize; } else { - nfs_read_rpcsetup(req, data, nbytes, offset); + nfs_read_rpcsetup(req, data, &nfs_read_partial_ops, + nbytes, offset); nbytes = 0; } nfs_execute_read(data); @@ -356,8 +396,7 @@ static int nfs_pagein_one(struct list_head *head, struct inode *inode) } req = nfs_list_entry(data->pages.next); - data->complete = nfs_readpage_result_full; - nfs_read_rpcsetup(req, data, count, 0); + nfs_read_rpcsetup(req, data, &nfs_read_full_ops, count, 0); nfs_execute_read(data); return 0; @@ -391,12 +430,15 @@ nfs_pagein_list(struct list_head *head, int rpages) /* * Handle a read reply that fills part of a page. */ -static void nfs_readpage_result_partial(struct nfs_read_data *data, int status) +static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata) { + struct nfs_read_data *data = calldata; struct nfs_page *req = data->req; struct page *page = req->wb_page; - if (status >= 0) { + if (nfs_readpage_result(task, data) != 0) + return; + if (task->tk_status >= 0) { unsigned int request = data->args.count; unsigned int result = data->res.count; @@ -415,20 +457,28 @@ static void nfs_readpage_result_partial(struct nfs_read_data *data, int status) } } +static const struct rpc_call_ops nfs_read_partial_ops = { + .rpc_call_done = nfs_readpage_result_partial, + .rpc_release = nfs_readdata_release, +}; + /* * This is the callback from RPC telling us whether a reply was * received or some error occurred (timeout or socket shutdown). */ -static void nfs_readpage_result_full(struct nfs_read_data *data, int status) +static void nfs_readpage_result_full(struct rpc_task *task, void *calldata) { + struct nfs_read_data *data = calldata; unsigned int count = data->res.count; + if (nfs_readpage_result(task, data) != 0) + return; while (!list_empty(&data->pages)) { struct nfs_page *req = nfs_list_entry(data->pages.next); struct page *page = req->wb_page; nfs_list_remove_request(req); - if (status >= 0) { + if (task->tk_status >= 0) { if (count < PAGE_CACHE_SIZE) { if (count < req->wb_bytes) memclear_highpage_flush(page, @@ -444,22 +494,33 @@ static void nfs_readpage_result_full(struct nfs_read_data *data, int status) } } +static const struct rpc_call_ops nfs_read_full_ops = { + .rpc_call_done = nfs_readpage_result_full, + .rpc_release = nfs_readdata_release, +}; + /* * This is the callback from RPC telling us whether a reply was * received or some error occurred (timeout or socket shutdown). */ -void nfs_readpage_result(struct rpc_task *task, void *calldata) +int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data) { - struct nfs_read_data *data = calldata; struct nfs_readargs *argp = &data->args; struct nfs_readres *resp = &data->res; - int status = task->tk_status; + int status; dprintk("NFS: %4d nfs_readpage_result, (status %d)\n", - task->tk_pid, status); + task->tk_pid, task->tk_status); + + status = NFS_PROTO(data->inode)->read_done(task, data); + if (status != 0) + return status; + + nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, resp->count); /* Is this a short read? */ if (task->tk_status >= 0 && resp->count < argp->count && !resp->eof) { + nfs_inc_stats(data->inode, NFSIOS_SHORTREAD); /* Has the server at least made some progress? */ if (resp->count != 0) { /* Yes, so retry the read at the end of the data */ @@ -467,14 +528,14 @@ void nfs_readpage_result(struct rpc_task *task, void *calldata) argp->pgbase += resp->count; argp->count -= resp->count; rpc_restart_call(task); - return; + return -EAGAIN; } task->tk_status = -EIO; } spin_lock(&data->inode->i_lock); NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME; spin_unlock(&data->inode->i_lock); - data->complete(data, status); + return 0; } /* @@ -491,6 +552,9 @@ int nfs_readpage(struct file *file, struct page *page) dprintk("NFS: nfs_readpage (%p %ld@%lu)\n", page, PAGE_CACHE_SIZE, page->index); + nfs_inc_stats(inode, NFSIOS_VFSREADPAGE); + nfs_add_stats(inode, NFSIOS_READPAGES, 1); + /* * Try to flush any pending writes to the file.. * @@ -570,6 +634,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, inode->i_sb->s_id, (long long)NFS_FILEID(inode), nr_pages); + nfs_inc_stats(inode, NFSIOS_VFSREADPAGES); if (filp == NULL) { desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ); @@ -582,6 +647,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, if (!list_empty(&head)) { int err = nfs_pagein_list(&head, server->rpages); if (!ret) + nfs_add_stats(inode, NFSIOS_READPAGES, err); ret = err; } put_nfs_open_context(desc.ctx); @@ -597,10 +663,8 @@ int nfs_init_readpagecache(void) if (nfs_rdata_cachep == NULL) return -ENOMEM; - nfs_rdata_mempool = mempool_create(MIN_POOL_READ, - mempool_alloc_slab, - mempool_free_slab, - nfs_rdata_cachep); + nfs_rdata_mempool = mempool_create_slab_pool(MIN_POOL_READ, + nfs_rdata_cachep); if (nfs_rdata_mempool == NULL) return -ENOMEM; diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index a65c7b53d55..0e28189c215 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -163,10 +163,9 @@ nfs_async_unlink(struct dentry *dentry) struct rpc_clnt *clnt = NFS_CLIENT(dir->d_inode); int status = -ENOMEM; - data = kmalloc(sizeof(*data), GFP_KERNEL); + data = kzalloc(sizeof(*data), GFP_KERNEL); if (!data) goto out; - memset(data, 0, sizeof(*data)); data->cred = rpcauth_lookupcred(clnt->cl_auth, 0); if (IS_ERR(data->cred)) { diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 9449b683550..4cfada2cc09 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -63,6 +63,7 @@ #include <linux/smp_lock.h> #include "delegation.h" +#include "iostat.h" #define NFSDBG_FACILITY NFSDBG_PAGECACHE @@ -76,20 +77,21 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context*, struct inode *, struct page *, unsigned int, unsigned int); -static void nfs_writeback_done_partial(struct nfs_write_data *, int); -static void nfs_writeback_done_full(struct nfs_write_data *, int); static int nfs_wait_on_write_congestion(struct address_space *, int); static int nfs_wait_on_requests(struct inode *, unsigned long, unsigned int); static int nfs_flush_inode(struct inode *inode, unsigned long idx_start, unsigned int npages, int how); +static const struct rpc_call_ops nfs_write_partial_ops; +static const struct rpc_call_ops nfs_write_full_ops; +static const struct rpc_call_ops nfs_commit_ops; static kmem_cache_t *nfs_wdata_cachep; -mempool_t *nfs_wdata_mempool; +static mempool_t *nfs_wdata_mempool; static mempool_t *nfs_commit_mempool; static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion); -static inline struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount) +struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount) { struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS); @@ -100,11 +102,39 @@ static inline struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount) p->pagevec = &p->page_array[0]; else { size_t size = ++pagecount * sizeof(struct page *); + p->pagevec = kzalloc(size, GFP_NOFS); + if (!p->pagevec) { + mempool_free(p, nfs_commit_mempool); + p = NULL; + } + } + } + return p; +} + +void nfs_commit_free(struct nfs_write_data *p) +{ + if (p && (p->pagevec != &p->page_array[0])) + kfree(p->pagevec); + mempool_free(p, nfs_commit_mempool); +} + +struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) +{ + struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, SLAB_NOFS); + + if (p) { + memset(p, 0, sizeof(*p)); + INIT_LIST_HEAD(&p->pages); + if (pagecount < NFS_PAGEVEC_SIZE) + p->pagevec = &p->page_array[0]; + else { + size_t size = ++pagecount * sizeof(struct page *); p->pagevec = kmalloc(size, GFP_NOFS); if (p->pagevec) { memset(p->pagevec, 0, size); } else { - mempool_free(p, nfs_commit_mempool); + mempool_free(p, nfs_wdata_mempool); p = NULL; } } @@ -112,11 +142,11 @@ static inline struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount) return p; } -static inline void nfs_commit_free(struct nfs_write_data *p) +void nfs_writedata_free(struct nfs_write_data *p) { if (p && (p->pagevec != &p->page_array[0])) kfree(p->pagevec); - mempool_free(p, nfs_commit_mempool); + mempool_free(p, nfs_wdata_mempool); } void nfs_writedata_release(void *wdata) @@ -136,6 +166,7 @@ static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int c end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + ((loff_t)offset+count); if (i_size >= end) return; + nfs_inc_stats(inode, NFSIOS_EXTENDWRITE); i_size_write(inode, end); } @@ -225,6 +256,7 @@ static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode, wdata->args.pgbase += result; written += result; count -= result; + nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, result); } while (count); /* Update file length */ nfs_grow_file(page, offset, written); @@ -281,6 +313,9 @@ int nfs_writepage(struct page *page, struct writeback_control *wbc) int priority = wb_priority(wbc); int err; + nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); + nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1); + /* * Note: We need to ensure that we have a reference to the inode * if we are to do asynchronous writes. If not, waiting @@ -345,6 +380,8 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) struct inode *inode = mapping->host; int err; + nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES); + err = generic_writepages(mapping, wbc); if (err) return err; @@ -356,6 +393,7 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) err = nfs_flush_inode(inode, 0, 0, wb_priority(wbc)); if (err < 0) goto out; + nfs_add_stats(inode, NFSIOS_WRITEPAGES, err); wbc->nr_to_write -= err; if (!wbc->nonblocking && wbc->sync_mode == WB_SYNC_ALL) { err = nfs_wait_on_requests(inode, 0, 0); @@ -391,6 +429,7 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) if (nfs_have_delegation(inode, FMODE_WRITE)) nfsi->change_attr++; } + SetPagePrivate(req->wb_page); nfsi->npages++; atomic_inc(&req->wb_count); return 0; @@ -407,6 +446,7 @@ static void nfs_inode_remove_request(struct nfs_page *req) BUG_ON (!NFS_WBACK_BUSY(req)); spin_lock(&nfsi->req_lock); + ClearPagePrivate(req->wb_page); radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index); nfsi->npages--; if (!nfsi->npages) { @@ -499,8 +539,7 @@ nfs_mark_request_commit(struct nfs_page *req) * * Interruptible by signals only if mounted with intr flag. */ -static int -nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int npages) +static int nfs_wait_on_requests_locked(struct inode *inode, unsigned long idx_start, unsigned int npages) { struct nfs_inode *nfsi = NFS_I(inode); struct nfs_page *req; @@ -513,7 +552,6 @@ nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int else idx_end = idx_start + npages - 1; - spin_lock(&nfsi->req_lock); next = idx_start; while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_WRITEBACK)) { if (req->wb_index > idx_end) @@ -526,15 +564,25 @@ nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int spin_unlock(&nfsi->req_lock); error = nfs_wait_on_request(req); nfs_release_request(req); + spin_lock(&nfsi->req_lock); if (error < 0) return error; - spin_lock(&nfsi->req_lock); res++; } - spin_unlock(&nfsi->req_lock); return res; } +static int nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int npages) +{ + struct nfs_inode *nfsi = NFS_I(inode); + int ret; + + spin_lock(&nfsi->req_lock); + ret = nfs_wait_on_requests_locked(inode, idx_start, npages); + spin_unlock(&nfsi->req_lock); + return ret; +} + /* * nfs_scan_dirty - Scan an inode for dirty requests * @inode: NFS inode to scan @@ -586,6 +634,11 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_st } return res; } +#else +static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages) +{ + return 0; +} #endif static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr) @@ -598,6 +651,9 @@ static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr) if (!bdi_write_congested(bdi)) return 0; + + nfs_inc_stats(mapping->host, NFSIOS_CONGESTIONWAIT); + if (intr) { struct rpc_clnt *clnt = NFS_CLIENT(mapping->host); sigset_t oldset; @@ -653,8 +709,11 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx, spin_unlock(&nfsi->req_lock); error = nfs_wait_on_request(req); nfs_release_request(req); - if (error < 0) + if (error < 0) { + if (new) + nfs_release_request(new); return ERR_PTR(error); + } continue; } spin_unlock(&nfsi->req_lock); @@ -748,6 +807,8 @@ int nfs_updatepage(struct file *file, struct page *page, struct nfs_page *req; int status = 0; + nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE); + dprintk("NFS: nfs_updatepage(%s/%s %d@%Ld)\n", file->f_dentry->d_parent->d_name.name, file->f_dentry->d_name.name, count, @@ -857,10 +918,12 @@ static inline int flush_task_priority(int how) */ static void nfs_write_rpcsetup(struct nfs_page *req, struct nfs_write_data *data, + const struct rpc_call_ops *call_ops, unsigned int count, unsigned int offset, int how) { struct inode *inode; + int flags; /* Set up the RPC argument and reply structs * NB: take care not to mess about with data->commit et al. */ @@ -881,6 +944,9 @@ static void nfs_write_rpcsetup(struct nfs_page *req, data->res.verf = &data->verf; nfs_fattr_init(&data->fattr); + /* Set up the initial task struct. */ + flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; + rpc_init_task(&data->task, NFS_CLIENT(inode), flags, call_ops, data); NFS_PROTO(inode)->write_setup(data, how); data->task.tk_priority = flush_task_priority(how); @@ -910,7 +976,7 @@ static void nfs_execute_write(struct nfs_write_data *data) * Generate multiple small requests to write out a single * contiguous dirty area on one page. */ -static int nfs_flush_multi(struct list_head *head, struct inode *inode, int how) +static int nfs_flush_multi(struct inode *inode, struct list_head *head, int how) { struct nfs_page *req = nfs_list_entry(head->next); struct page *page = req->wb_page; @@ -944,14 +1010,15 @@ static int nfs_flush_multi(struct list_head *head, struct inode *inode, int how) list_del_init(&data->pages); data->pagevec[0] = page; - data->complete = nfs_writeback_done_partial; if (nbytes > wsize) { - nfs_write_rpcsetup(req, data, wsize, offset, how); + nfs_write_rpcsetup(req, data, &nfs_write_partial_ops, + wsize, offset, how); offset += wsize; nbytes -= wsize; } else { - nfs_write_rpcsetup(req, data, nbytes, offset, how); + nfs_write_rpcsetup(req, data, &nfs_write_partial_ops, + nbytes, offset, how); nbytes = 0; } nfs_execute_write(data); @@ -978,16 +1045,13 @@ out_bad: * This is the case if nfs_updatepage detects a conflicting request * that has been written but not committed. */ -static int nfs_flush_one(struct list_head *head, struct inode *inode, int how) +static int nfs_flush_one(struct inode *inode, struct list_head *head, int how) { struct nfs_page *req; struct page **pages; struct nfs_write_data *data; unsigned int count; - if (NFS_SERVER(inode)->wsize < PAGE_CACHE_SIZE) - return nfs_flush_multi(head, inode, how); - data = nfs_writedata_alloc(NFS_SERVER(inode)->wpages); if (!data) goto out_bad; @@ -1005,9 +1069,8 @@ static int nfs_flush_one(struct list_head *head, struct inode *inode, int how) } req = nfs_list_entry(data->pages.next); - data->complete = nfs_writeback_done_full; /* Set up the argument struct */ - nfs_write_rpcsetup(req, data, count, 0, how); + nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, how); nfs_execute_write(data); return 0; @@ -1021,24 +1084,32 @@ static int nfs_flush_one(struct list_head *head, struct inode *inode, int how) return -ENOMEM; } -static int -nfs_flush_list(struct list_head *head, int wpages, int how) +static int nfs_flush_list(struct inode *inode, struct list_head *head, int npages, int how) { LIST_HEAD(one_request); - struct nfs_page *req; - int error = 0; - unsigned int pages = 0; + int (*flush_one)(struct inode *, struct list_head *, int); + struct nfs_page *req; + int wpages = NFS_SERVER(inode)->wpages; + int wsize = NFS_SERVER(inode)->wsize; + int error; - while (!list_empty(head)) { - pages += nfs_coalesce_requests(head, &one_request, wpages); + flush_one = nfs_flush_one; + if (wsize < PAGE_CACHE_SIZE) + flush_one = nfs_flush_multi; + /* For single writes, FLUSH_STABLE is more efficient */ + if (npages <= wpages && npages == NFS_I(inode)->npages + && nfs_list_entry(head->next)->wb_bytes <= wsize) + how |= FLUSH_STABLE; + + do { + nfs_coalesce_requests(head, &one_request, wpages); req = nfs_list_entry(one_request.next); - error = nfs_flush_one(&one_request, req->wb_context->dentry->d_inode, how); + error = flush_one(inode, &one_request, how); if (error < 0) - break; - } - if (error >= 0) - return pages; - + goto out_err; + } while (!list_empty(head)); + return 0; +out_err: while (!list_empty(head)) { req = nfs_list_entry(head->next); nfs_list_remove_request(req); @@ -1051,8 +1122,9 @@ nfs_flush_list(struct list_head *head, int wpages, int how) /* * Handle a write reply that flushed part of a page. */ -static void nfs_writeback_done_partial(struct nfs_write_data *data, int status) +static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata) { + struct nfs_write_data *data = calldata; struct nfs_page *req = data->req; struct page *page = req->wb_page; @@ -1062,11 +1134,14 @@ static void nfs_writeback_done_partial(struct nfs_write_data *data, int status) req->wb_bytes, (long long)req_offset(req)); - if (status < 0) { + if (nfs_writeback_done(task, data) != 0) + return; + + if (task->tk_status < 0) { ClearPageUptodate(page); SetPageError(page); - req->wb_context->error = status; - dprintk(", error = %d\n", status); + req->wb_context->error = task->tk_status; + dprintk(", error = %d\n", task->tk_status); } else { #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) if (data->verf.committed < NFS_FILE_SYNC) { @@ -1087,6 +1162,11 @@ static void nfs_writeback_done_partial(struct nfs_write_data *data, int status) nfs_writepage_release(req); } +static const struct rpc_call_ops nfs_write_partial_ops = { + .rpc_call_done = nfs_writeback_done_partial, + .rpc_release = nfs_writedata_release, +}; + /* * Handle a write reply that flushes a whole page. * @@ -1094,11 +1174,15 @@ static void nfs_writeback_done_partial(struct nfs_write_data *data, int status) * writebacks since the page->count is kept > 1 for as long * as the page has a write request pending. */ -static void nfs_writeback_done_full(struct nfs_write_data *data, int status) +static void nfs_writeback_done_full(struct rpc_task *task, void *calldata) { + struct nfs_write_data *data = calldata; struct nfs_page *req; struct page *page; + if (nfs_writeback_done(task, data) != 0) + return; + /* Update attributes as result of writeback. */ while (!list_empty(&data->pages)) { req = nfs_list_entry(data->pages.next); @@ -1111,13 +1195,13 @@ static void nfs_writeback_done_full(struct nfs_write_data *data, int status) req->wb_bytes, (long long)req_offset(req)); - if (status < 0) { + if (task->tk_status < 0) { ClearPageUptodate(page); SetPageError(page); - req->wb_context->error = status; + req->wb_context->error = task->tk_status; end_page_writeback(page); nfs_inode_remove_request(req); - dprintk(", error = %d\n", status); + dprintk(", error = %d\n", task->tk_status); goto next; } end_page_writeback(page); @@ -1139,18 +1223,30 @@ static void nfs_writeback_done_full(struct nfs_write_data *data, int status) } } +static const struct rpc_call_ops nfs_write_full_ops = { + .rpc_call_done = nfs_writeback_done_full, + .rpc_release = nfs_writedata_release, +}; + + /* * This function is called when the WRITE call is complete. */ -void nfs_writeback_done(struct rpc_task *task, void *calldata) +int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) { - struct nfs_write_data *data = calldata; struct nfs_writeargs *argp = &data->args; struct nfs_writeres *resp = &data->res; + int status; dprintk("NFS: %4d nfs_writeback_done (status %d)\n", task->tk_pid, task->tk_status); + /* Call the NFS version-specific code */ + status = NFS_PROTO(data->inode)->write_done(task, data); + if (status != 0) + return status; + nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count); + #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) if (resp->verf->committed < argp->stable && task->tk_status >= 0) { /* We tried a write call, but the server did not @@ -1176,6 +1272,8 @@ void nfs_writeback_done(struct rpc_task *task, void *calldata) if (task->tk_status >= 0 && resp->count < argp->count) { static unsigned long complain; + nfs_inc_stats(data->inode, NFSIOS_SHORTWRITE); + /* Has the server at least made some progress? */ if (resp->count != 0) { /* Was this an NFSv2 write or an NFSv3 stable write? */ @@ -1191,7 +1289,7 @@ void nfs_writeback_done(struct rpc_task *task, void *calldata) argp->stable = NFS_FILE_SYNC; } rpc_restart_call(task); - return; + return -EAGAIN; } if (time_before(complain, jiffies)) { printk(KERN_WARNING @@ -1202,11 +1300,7 @@ void nfs_writeback_done(struct rpc_task *task, void *calldata) /* Can't do anything about it except throw an error. */ task->tk_status = -EIO; } - - /* - * Process the nfs_page list - */ - data->complete(data, task->tk_status); + return 0; } @@ -1220,10 +1314,12 @@ void nfs_commit_release(void *wdata) * Set up the argument/result storage required for the RPC call. */ static void nfs_commit_rpcsetup(struct list_head *head, - struct nfs_write_data *data, int how) + struct nfs_write_data *data, + int how) { struct nfs_page *first; struct inode *inode; + int flags; /* Set up the RPC argument and reply structs * NB: take care not to mess about with data->commit et al. */ @@ -1243,7 +1339,10 @@ static void nfs_commit_rpcsetup(struct list_head *head, data->res.fattr = &data->fattr; data->res.verf = &data->verf; nfs_fattr_init(&data->fattr); - + + /* Set up the initial task struct. */ + flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; + rpc_init_task(&data->task, NFS_CLIENT(inode), flags, &nfs_commit_ops, data); NFS_PROTO(inode)->commit_setup(data, how); data->task.tk_priority = flush_task_priority(how); @@ -1284,7 +1383,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how) /* * COMMIT call returned */ -void nfs_commit_done(struct rpc_task *task, void *calldata) +static void nfs_commit_done(struct rpc_task *task, void *calldata) { struct nfs_write_data *data = calldata; struct nfs_page *req; @@ -1293,6 +1392,10 @@ void nfs_commit_done(struct rpc_task *task, void *calldata) dprintk("NFS: %4d nfs_commit_done (status %d)\n", task->tk_pid, task->tk_status); + /* Call the NFS version-specific code */ + if (NFS_PROTO(data->inode)->commit_done(task, data) != 0) + return; + while (!list_empty(&data->pages)) { req = nfs_list_entry(data->pages.next); nfs_list_remove_request(req); @@ -1326,6 +1429,16 @@ void nfs_commit_done(struct rpc_task *task, void *calldata) } sub_page_state(nr_unstable,res); } + +static const struct rpc_call_ops nfs_commit_ops = { + .rpc_call_done = nfs_commit_done, + .rpc_release = nfs_commit_release, +}; +#else +static inline int nfs_commit_list(struct inode *inode, struct list_head *head, int how) +{ + return 0; +} #endif static int nfs_flush_inode(struct inode *inode, unsigned long idx_start, @@ -1333,24 +1446,16 @@ static int nfs_flush_inode(struct inode *inode, unsigned long idx_start, { struct nfs_inode *nfsi = NFS_I(inode); LIST_HEAD(head); - int res, - error = 0; + int res; spin_lock(&nfsi->req_lock); res = nfs_scan_dirty(inode, &head, idx_start, npages); spin_unlock(&nfsi->req_lock); if (res) { - struct nfs_server *server = NFS_SERVER(inode); - - /* For single writes, FLUSH_STABLE is more efficient */ - if (res == nfsi->npages && nfsi->npages <= server->wpages) { - if (res > 1 || nfs_list_entry(head.next)->wb_bytes <= server->wsize) - how |= FLUSH_STABLE; - } - error = nfs_flush_list(&head, server->wpages, how); + int error = nfs_flush_list(inode, &head, res, how); + if (error < 0) + return error; } - if (error < 0) - return error; return res; } @@ -1359,14 +1464,13 @@ int nfs_commit_inode(struct inode *inode, int how) { struct nfs_inode *nfsi = NFS_I(inode); LIST_HEAD(head); - int res, - error = 0; + int res; spin_lock(&nfsi->req_lock); res = nfs_scan_commit(inode, &head, 0, 0); spin_unlock(&nfsi->req_lock); if (res) { - error = nfs_commit_list(inode, &head, how); + int error = nfs_commit_list(inode, &head, how); if (error < 0) return error; } @@ -1374,28 +1478,38 @@ int nfs_commit_inode(struct inode *inode, int how) } #endif -int nfs_sync_inode(struct inode *inode, unsigned long idx_start, - unsigned int npages, int how) +int nfs_sync_inode_wait(struct inode *inode, unsigned long idx_start, + unsigned int npages, int how) { + struct nfs_inode *nfsi = NFS_I(inode); + LIST_HEAD(head); int nocommit = how & FLUSH_NOCOMMIT; - int wait = how & FLUSH_WAIT; - int error; - - how &= ~(FLUSH_WAIT|FLUSH_NOCOMMIT); + int pages, ret; + how &= ~FLUSH_NOCOMMIT; + spin_lock(&nfsi->req_lock); do { - if (wait) { - error = nfs_wait_on_requests(inode, idx_start, npages); - if (error != 0) - continue; - } - error = nfs_flush_inode(inode, idx_start, npages, how); - if (error != 0) + ret = nfs_wait_on_requests_locked(inode, idx_start, npages); + if (ret != 0) continue; - if (!nocommit) - error = nfs_commit_inode(inode, how); - } while (error > 0); - return error; + pages = nfs_scan_dirty(inode, &head, idx_start, npages); + if (pages != 0) { + spin_unlock(&nfsi->req_lock); + ret = nfs_flush_list(inode, &head, pages, how); + spin_lock(&nfsi->req_lock); + continue; + } + if (nocommit) + break; + pages = nfs_scan_commit(inode, &head, 0, 0); + if (pages == 0) + break; + spin_unlock(&nfsi->req_lock); + ret = nfs_commit_list(inode, &head, how); + spin_lock(&nfsi->req_lock); + } while (ret >= 0); + spin_unlock(&nfsi->req_lock); + return ret; } int nfs_init_writepagecache(void) @@ -1407,17 +1521,13 @@ int nfs_init_writepagecache(void) if (nfs_wdata_cachep == NULL) return -ENOMEM; - nfs_wdata_mempool = mempool_create(MIN_POOL_WRITE, - mempool_alloc_slab, - mempool_free_slab, - nfs_wdata_cachep); + nfs_wdata_mempool = mempool_create_slab_pool(MIN_POOL_WRITE, + nfs_wdata_cachep); if (nfs_wdata_mempool == NULL) return -ENOMEM; - nfs_commit_mempool = mempool_create(MIN_POOL_COMMIT, - mempool_alloc_slab, - mempool_free_slab, - nfs_wdata_cachep); + nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT, + nfs_wdata_cachep); if (nfs_commit_mempool == NULL) return -ENOMEM; |