diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-03-25 10:03:28 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-03-25 10:03:28 -0700 |
commit | 40471856f2e38e9bfa8d605295e8234421110dd6 (patch) | |
tree | 9757e42e40bdbfcff7c52ab133e32b5c2203153b | |
parent | ae005cbed12d0b340b04b59d6f5c56e710b3895d (diff) | |
parent | 0acd2201920d0968919f4f5797d63f7b6f2b19d4 (diff) |
Merge branch 'nfs-for-2.6.39' of git://git.linux-nfs.org/projects/trondmy/nfs-2.6
* 'nfs-for-2.6.39' of git://git.linux-nfs.org/projects/trondmy/nfs-2.6: (28 commits)
Cleanup XDR parsing for LAYOUTGET, GETDEVICEINFO
NFSv4.1 convert layoutcommit sync to boolean
NFSv4.1 pnfs_layoutcommit_inode fixes
NFS: Determine initial mount security
NFS: use secinfo when crossing mountpoints
NFS: Add secinfo procedure
NFS: lookup supports alternate client
NFS: convert call_sync() to a function
NFSv4.1 remove temp code that prevented ds commits
NFSv4.1: layoutcommit
NFSv4.1: filelayout driver specific code for COMMIT
NFSv4.1: remove GETATTR from ds commits
NFSv4.1: add generic layer hooks for pnfs COMMIT
NFSv4.1: alloc and free commit_buckets
NFSv4.1: shift filelayout_free_lseg
NFSv4.1: pull out code from nfs_commit_release
NFSv4.1: pull error handling out of nfs_commit_list
NFSv4.1: add callback to nfs4_commit_done
NFSv4.1: rearrange nfs_commit_rpcsetup
NFSv4.1: don't send COMMIT to ds for data sync writes
...
-rw-r--r-- | fs/nfs/dir.c | 89 | ||||
-rw-r--r-- | fs/nfs/file.c | 3 | ||||
-rw-r--r-- | fs/nfs/getroot.c | 4 | ||||
-rw-r--r-- | fs/nfs/inode.c | 10 | ||||
-rw-r--r-- | fs/nfs/internal.h | 27 | ||||
-rw-r--r-- | fs/nfs/namespace.c | 115 | ||||
-rw-r--r-- | fs/nfs/nfs3proc.c | 2 | ||||
-rw-r--r-- | fs/nfs/nfs4_fs.h | 5 | ||||
-rw-r--r-- | fs/nfs/nfs4filelayout.c | 352 | ||||
-rw-r--r-- | fs/nfs/nfs4filelayout.h | 2 | ||||
-rw-r--r-- | fs/nfs/nfs4filelayoutdev.c | 178 | ||||
-rw-r--r-- | fs/nfs/nfs4proc.c | 302 | ||||
-rw-r--r-- | fs/nfs/nfs4xdr.c | 313 | ||||
-rw-r--r-- | fs/nfs/pagelist.c | 8 | ||||
-rw-r--r-- | fs/nfs/pnfs.c | 142 | ||||
-rw-r--r-- | fs/nfs/pnfs.h | 83 | ||||
-rw-r--r-- | fs/nfs/proc.c | 2 | ||||
-rw-r--r-- | fs/nfs/write.c | 214 | ||||
-rw-r--r-- | fs/nfs_common/nfsacl.c | 1 | ||||
-rw-r--r-- | include/linux/nfs4.h | 2 | ||||
-rw-r--r-- | include/linux/nfs_fs.h | 10 | ||||
-rw-r--r-- | include/linux/nfs_page.h | 7 | ||||
-rw-r--r-- | include/linux/nfs_xdr.h | 70 | ||||
-rw-r--r-- | include/linux/sunrpc/gss_api.h | 3 | ||||
-rw-r--r-- | net/sunrpc/auth_gss/gss_mech_switch.c | 38 | ||||
-rw-r--r-- | net/sunrpc/xprtsock.c | 2 |
26 files changed, 1728 insertions, 256 deletions
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index abdf38d5971..7237672216c 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -44,6 +44,7 @@ /* #define NFS_DEBUG_VERBOSE 1 */ static int nfs_opendir(struct inode *, struct file *); +static int nfs_closedir(struct inode *, struct file *); static int nfs_readdir(struct file *, void *, filldir_t); static struct dentry *nfs_lookup(struct inode *, struct dentry *, struct nameidata *); static int nfs_create(struct inode *, struct dentry *, int, struct nameidata *); @@ -64,7 +65,7 @@ const struct file_operations nfs_dir_operations = { .read = generic_read_dir, .readdir = nfs_readdir, .open = nfs_opendir, - .release = nfs_release, + .release = nfs_closedir, .fsync = nfs_fsync_dir, }; @@ -133,13 +134,35 @@ const struct inode_operations nfs4_dir_inode_operations = { #endif /* CONFIG_NFS_V4 */ +static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct rpc_cred *cred) +{ + struct nfs_open_dir_context *ctx; + ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); + if (ctx != NULL) { + ctx->duped = 0; + ctx->dir_cookie = 0; + ctx->dup_cookie = 0; + ctx->cred = get_rpccred(cred); + } else + ctx = ERR_PTR(-ENOMEM); + return ctx; +} + +static void put_nfs_open_dir_context(struct nfs_open_dir_context *ctx) +{ + put_rpccred(ctx->cred); + kfree(ctx); +} + /* * Open file */ static int nfs_opendir(struct inode *inode, struct file *filp) { - int res; + int res = 0; + struct nfs_open_dir_context *ctx; + struct rpc_cred *cred; dfprintk(FILE, "NFS: open dir(%s/%s)\n", filp->f_path.dentry->d_parent->d_name.name, @@ -147,8 +170,15 @@ nfs_opendir(struct inode *inode, struct file *filp) nfs_inc_stats(inode, NFSIOS_VFSOPEN); - /* Call generic open code in order to cache credentials */ - res = nfs_open(inode, filp); + cred = rpc_lookup_cred(); + if (IS_ERR(cred)) + return PTR_ERR(cred); + ctx = alloc_nfs_open_dir_context(cred); + if (IS_ERR(ctx)) { + res = PTR_ERR(ctx); + goto out; + } + filp->private_data = ctx; if (filp->f_path.dentry == filp->f_path.mnt->mnt_root) { /* This is a mountpoint, so d_revalidate will never * have been called, so we need to refresh the @@ -156,9 +186,18 @@ nfs_opendir(struct inode *inode, struct file *filp) */ __nfs_revalidate_inode(NFS_SERVER(inode), inode); } +out: + put_rpccred(cred); return res; } +static int +nfs_closedir(struct inode *inode, struct file *filp) +{ + put_nfs_open_dir_context(filp->private_data); + return 0; +} + struct nfs_cache_array_entry { u64 cookie; u64 ino; @@ -284,19 +323,20 @@ int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descri { loff_t diff = desc->file->f_pos - desc->current_index; unsigned int index; + struct nfs_open_dir_context *ctx = desc->file->private_data; if (diff < 0) goto out_eof; if (diff >= array->size) { if (array->eof_index >= 0) goto out_eof; - desc->current_index += array->size; return -EAGAIN; } index = (unsigned int)diff; *desc->dir_cookie = array->array[index].cookie; desc->cache_entry_index = index; + ctx->duped = 0; return 0; out_eof: desc->eof = 1; @@ -307,10 +347,18 @@ static int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc) { int i; + loff_t new_pos; int status = -EAGAIN; + struct nfs_open_dir_context *ctx = desc->file->private_data; for (i = 0; i < array->size; i++) { if (array->array[i].cookie == *desc->dir_cookie) { + new_pos = desc->current_index + i; + if (new_pos < desc->file->f_pos) { + ctx->dup_cookie = *desc->dir_cookie; + ctx->duped = 1; + } + desc->file->f_pos = new_pos; desc->cache_entry_index = i; return 0; } @@ -342,6 +390,7 @@ int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc) if (status == -EAGAIN) { desc->last_cookie = array->last_cookie; + desc->current_index += array->size; desc->page_index++; } nfs_readdir_release_array(desc->page); @@ -354,7 +403,8 @@ static int nfs_readdir_xdr_filler(struct page **pages, nfs_readdir_descriptor_t *desc, struct nfs_entry *entry, struct file *file, struct inode *inode) { - struct rpc_cred *cred = nfs_file_cred(file); + struct nfs_open_dir_context *ctx = file->private_data; + struct rpc_cred *cred = ctx->cred; unsigned long timestamp, gencount; int error; @@ -693,6 +743,20 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, int i = 0; int res = 0; struct nfs_cache_array *array = NULL; + struct nfs_open_dir_context *ctx = file->private_data; + + if (ctx->duped != 0 && ctx->dup_cookie == *desc->dir_cookie) { + if (printk_ratelimit()) { + pr_notice("NFS: directory %s/%s contains a readdir loop. " + "Please contact your server vendor. " + "Offending cookie: %llu\n", + file->f_dentry->d_parent->d_name.name, + file->f_dentry->d_name.name, + *desc->dir_cookie); + } + res = -ELOOP; + goto out; + } array = nfs_readdir_get_array(desc->page); if (IS_ERR(array)) { @@ -785,6 +849,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) struct inode *inode = dentry->d_inode; nfs_readdir_descriptor_t my_desc, *desc = &my_desc; + struct nfs_open_dir_context *dir_ctx = filp->private_data; int res; dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n", @@ -801,7 +866,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) memset(desc, 0, sizeof(*desc)); desc->file = filp; - desc->dir_cookie = &nfs_file_open_context(filp)->dir_cookie; + desc->dir_cookie = &dir_ctx->dir_cookie; desc->decode = NFS_PROTO(inode)->decode_dirent; desc->plus = NFS_USE_READDIRPLUS(inode); @@ -853,6 +918,7 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin) { struct dentry *dentry = filp->f_path.dentry; struct inode *inode = dentry->d_inode; + struct nfs_open_dir_context *dir_ctx = filp->private_data; dfprintk(FILE, "NFS: llseek dir(%s/%s, %lld, %d)\n", dentry->d_parent->d_name.name, @@ -872,7 +938,8 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin) } if (offset != filp->f_pos) { filp->f_pos = offset; - nfs_file_open_context(filp)->dir_cookie = 0; + dir_ctx->dir_cookie = 0; + dir_ctx->duped = 0; } out: mutex_unlock(&inode->i_mutex); @@ -1068,7 +1135,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) if (fhandle == NULL || fattr == NULL) goto out_error; - error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr); + error = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, &dentry->d_name, fhandle, fattr); if (error) goto out_bad; if (nfs_compare_fh(NFS_FH(inode), fhandle)) @@ -1224,7 +1291,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru parent = dentry->d_parent; /* Protect against concurrent sillydeletes */ nfs_block_sillyrename(parent); - error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr); + error = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, &dentry->d_name, fhandle, fattr); if (error == -ENOENT) goto no_entry; if (error < 0) { @@ -1562,7 +1629,7 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle, if (dentry->d_inode) goto out; if (fhandle->size == 0) { - error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr); + error = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, &dentry->d_name, fhandle, fattr); if (error) goto out_error; } diff --git a/fs/nfs/file.c b/fs/nfs/file.c index d85a534b15c..3ac5bd695e5 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -326,6 +326,9 @@ nfs_file_fsync(struct file *file, int datasync) ret = xchg(&ctx->error, 0); if (!ret && status < 0) ret = status; + if (!ret && !datasync) + /* application has asked for meta-data sync */ + ret = pnfs_layoutcommit_inode(inode, true); return ret; } diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index 1084792bc0f..dcb61548887 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -222,6 +222,10 @@ struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh, goto out; } + if (fattr->valid & NFS_ATTR_FATTR_FSID && + !nfs_fsid_equal(&server->fsid, &fattr->fsid)) + memcpy(&server->fsid, &fattr->fsid, sizeof(server->fsid)); + inode = nfs_fhget(sb, mntfh, fattr); if (IS_ERR(inode)) { dprintk("nfs_get_root: get root inode failed\n"); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 01768e5e2c9..57bb31ad7a5 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -254,7 +254,9 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) struct inode *inode = ERR_PTR(-ENOENT); unsigned long hash; - if ((fattr->valid & NFS_ATTR_FATTR_FILEID) == 0) + nfs_attr_check_mountpoint(sb, fattr); + + if ((fattr->valid & NFS_ATTR_FATTR_FILEID) == 0 && (fattr->valid & NFS_ATTR_FATTR_MOUNTPOINT) == 0) goto out_no_inode; if ((fattr->valid & NFS_ATTR_FATTR_TYPE) == 0) goto out_no_inode; @@ -298,8 +300,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS)) set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags); /* Deal with crossing mountpoints */ - if ((fattr->valid & NFS_ATTR_FATTR_FSID) - && !nfs_fsid_equal(&NFS_SB(sb)->fsid, &fattr->fsid)) { + if (fattr->valid & NFS_ATTR_FATTR_MOUNTPOINT || + fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) { if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) inode->i_op = &nfs_referral_inode_operations; else @@ -639,7 +641,6 @@ struct nfs_open_context *alloc_nfs_open_context(struct path *path, struct rpc_cr ctx->mode = f_mode; ctx->flags = 0; ctx->error = 0; - ctx->dir_cookie = 0; nfs_init_lock_context(&ctx->lock_context); ctx->lock_context.open_context = ctx; INIT_LIST_HEAD(&ctx->list); @@ -1471,6 +1472,7 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi) nfsi->delegation_state = 0; init_rwsem(&nfsi->rwsem); nfsi->layout = NULL; + atomic_set(&nfsi->commits_outstanding, 0); #endif } diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 72e0bddf7a2..ce118ce885d 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -39,6 +39,12 @@ static inline int nfs4_has_persistent_session(const struct nfs_client *clp) return 0; } +static inline void nfs_attr_check_mountpoint(struct super_block *parent, struct nfs_fattr *fattr) +{ + if (!nfs_fsid_equal(&NFS_SB(parent)->fsid, &fattr->fsid)) + fattr->valid |= NFS_ATTR_FATTR_MOUNTPOINT; +} + struct nfs_clone_mount { const struct super_block *sb; const struct dentry *dentry; @@ -214,6 +220,7 @@ extern const u32 nfs41_maxwrite_overhead; /* nfs4proc.c */ #ifdef CONFIG_NFS_V4 extern struct rpc_procinfo nfs4_procedures[]; +void nfs_fixup_secinfo_attributes(struct nfs_fattr *, struct nfs_fh *); #endif extern int nfs4_init_ds_session(struct nfs_client *clp); @@ -276,11 +283,25 @@ extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt, extern void nfs_read_prepare(struct rpc_task *task, void *calldata); /* write.c */ +extern void nfs_commit_free(struct nfs_write_data *p); extern int nfs_initiate_write(struct nfs_write_data *data, struct rpc_clnt *clnt, const struct rpc_call_ops *call_ops, int how); extern void nfs_write_prepare(struct rpc_task *task, void *calldata); +extern int nfs_initiate_commit(struct nfs_write_data *data, + struct rpc_clnt *clnt, + const struct rpc_call_ops *call_ops, + int how); +extern void nfs_init_commit(struct nfs_write_data *data, + struct list_head *head, + struct pnfs_layout_segment *lseg); +void nfs_retry_commit(struct list_head *page_list, + struct pnfs_layout_segment *lseg); +void nfs_commit_clear_lock(struct nfs_inode *nfsi); +void nfs_commitdata_release(void *data); +void nfs_commit_release_pages(struct nfs_write_data *data); + #ifdef CONFIG_MIGRATION extern int nfs_migrate_page(struct address_space *, struct page *, struct page *); @@ -296,12 +317,14 @@ extern int nfs4_init_client(struct nfs_client *clp, rpc_authflavor_t authflavour, int noresvport); extern void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data); -extern int _nfs4_call_sync(struct nfs_server *server, +extern int _nfs4_call_sync(struct rpc_clnt *clnt, + struct nfs_server *server, struct rpc_message *msg, struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, int cache_reply); -extern int _nfs4_call_sync_session(struct nfs_server *server, +extern int _nfs4_call_sync_session(struct rpc_clnt *clnt, + struct nfs_server *server, struct rpc_message *msg, struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index bf1c68009ff..ad92bf731ff 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -15,6 +15,7 @@ #include <linux/string.h> #include <linux/sunrpc/clnt.h> #include <linux/vfs.h> +#include <linux/sunrpc/gss_api.h> #include "internal.h" #define NFSDBG_FACILITY NFSDBG_VFS @@ -27,7 +28,8 @@ int nfs_mountpoint_expiry_timeout = 500 * HZ; static struct vfsmount *nfs_do_submount(struct dentry *dentry, struct nfs_fh *fh, - struct nfs_fattr *fattr); + struct nfs_fattr *fattr, + rpc_authflavor_t authflavor); /* * nfs_path - reconstruct the path given an arbitrary dentry @@ -116,6 +118,100 @@ Elong: return ERR_PTR(-ENAMETOOLONG); } +#ifdef CONFIG_NFS_V4 +static rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors, struct inode *inode) +{ + struct gss_api_mech *mech; + struct xdr_netobj oid; + int i; + rpc_authflavor_t pseudoflavor = RPC_AUTH_UNIX; + + for (i = 0; i < flavors->num_flavors; i++) { + struct nfs4_secinfo_flavor *flavor; + flavor = &flavors->flavors[i]; + + if (flavor->flavor == RPC_AUTH_NULL || flavor->flavor == RPC_AUTH_UNIX) { + pseudoflavor = flavor->flavor; + break; + } else if (flavor->flavor == RPC_AUTH_GSS) { + oid.len = flavor->gss.sec_oid4.len; + oid.data = flavor->gss.sec_oid4.data; + mech = gss_mech_get_by_OID(&oid); + if (!mech) + continue; + pseudoflavor = gss_svc_to_pseudoflavor(mech, flavor->gss.service); + gss_mech_put(mech); + break; + } + } + + return pseudoflavor; +} + +static rpc_authflavor_t nfs_negotiate_security(const struct dentry *parent, const struct dentry *dentry) +{ + int status = 0; + struct page *page; + struct nfs4_secinfo_flavors *flavors; + int (*secinfo)(struct inode *, const struct qstr *, struct nfs4_secinfo_flavors *); + rpc_authflavor_t flavor = RPC_AUTH_UNIX; + + secinfo = NFS_PROTO(parent->d_inode)->secinfo; + if (secinfo != NULL) { + page = alloc_page(GFP_KERNEL); + if (!page) { + status = -ENOMEM; + goto out; + } + flavors = page_address(page); + status = secinfo(parent->d_inode, &dentry->d_name, flavors); + flavor = nfs_find_best_sec(flavors, dentry->d_inode); + put_page(page); + } + + return flavor; + +out: + status = -ENOMEM; + return status; +} + +static rpc_authflavor_t nfs_lookup_with_sec(struct nfs_server *server, struct dentry *parent, + struct dentry *dentry, struct path *path, + struct nfs_fh *fh, struct nfs_fattr *fattr) +{ + rpc_authflavor_t flavor; + struct rpc_clnt *clone; + struct rpc_auth *auth; + int err; + + flavor = nfs_negotiate_security(parent, path->dentry); + if (flavor < 0) + goto out; + clone = rpc_clone_client(server->client); + auth = rpcauth_create(flavor, clone); + if (!auth) { + flavor = -EIO; + goto out; + } + err = server->nfs_client->rpc_ops->lookup(clone, parent->d_inode, + &path->dentry->d_name, + fh, fattr); + if (err < 0) + flavor = err; +out: + return flavor; +} +#else /* CONFIG_NFS_V4 */ +static inline rpc_authflavor_t nfs_lookup_with_sec(struct nfs_server *server, + struct dentry *parent, struct dentry *dentry, + struct path *path, struct nfs_fh *fh, + struct nfs_fattr *fattr) +{ + return -EPERM; +} +#endif /* CONFIG_NFS_V4 */ + /* * nfs_d_automount - Handle crossing a mountpoint on the server * @path - The mountpoint @@ -136,6 +232,7 @@ struct vfsmount *nfs_d_automount(struct path *path) struct nfs_fh *fh = NULL; struct nfs_fattr *fattr = NULL; int err; + rpc_authflavor_t flavor = 1; dprintk("--> nfs_d_automount()\n"); @@ -153,9 +250,16 @@ struct vfsmount *nfs_d_automount(struct path *path) /* Look it up again to get its attributes */ parent = dget_parent(path->dentry); - err = server->nfs_client->rpc_ops->lookup(parent->d_inode, + err = server->nfs_client->rpc_ops->lookup(server->client, parent->d_inode, &path->dentry->d_name, fh, fattr); + if (err == -EPERM) { + flavor = nfs_lookup_with_sec(server, parent, path->dentry, path, fh, fattr); + if (flavor < 0) + err = flavor; + else + err = 0; + } dput(parent); if (err != 0) { mnt = ERR_PTR(err); @@ -165,7 +269,7 @@ struct vfsmount *nfs_d_automount(struct path *path) if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) mnt = nfs_do_refmount(path->dentry); else - mnt = nfs_do_submount(path->dentry, fh, fattr); + mnt = nfs_do_submount(path->dentry, fh, fattr, flavor); if (IS_ERR(mnt)) goto out; @@ -232,17 +336,20 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server, * @dentry - parent directory * @fh - filehandle for new root dentry * @fattr - attributes for new root inode + * @authflavor - security flavor to use when performing the mount * */ static struct vfsmount *nfs_do_submount(struct dentry *dentry, struct nfs_fh *fh, - struct nfs_fattr *fattr) + struct nfs_fattr *fattr, + rpc_authflavor_t authflavor) { struct nfs_clone_mount mountdata = { .sb = dentry->d_sb, .dentry = dentry, .fh = fh, .fattr = fattr, + .authflavor = authflavor, }; struct vfsmount *mnt = ERR_PTR(-ENOMEM); char *page = (char *) __get_free_page(GFP_USER); diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index d0c80d8b3f9..38053d823eb 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -141,7 +141,7 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, } static int -nfs3_proc_lookup(struct inode *dir, struct qstr *name, +nfs3_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr) { struct nfs3_diropargs arg = { diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index c64be1cff08..e1c261ddd65 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -57,7 +57,8 @@ enum nfs4_session_state { struct nfs4_minor_version_ops { u32 minor_version; - int (*call_sync)(struct nfs_server *server, + int (*call_sync)(struct rpc_clnt *clnt, + struct nfs_server *server, struct rpc_message *msg, struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, @@ -262,6 +263,8 @@ extern int nfs4_proc_destroy_session(struct nfs4_session *); extern int nfs4_init_session(struct nfs_server *server); extern int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo); +extern int nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, + bool sync); static inline bool is_ds_only_client(struct nfs_client *clp) diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 42855846481..6f8192f4cfc 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -154,6 +154,23 @@ static int filelayout_read_done_cb(struct rpc_task *task, } /* + * We reference the rpc_cred of the first WRITE that triggers the need for + * a LAYOUTCOMMIT, and use it to send the layoutcommit compound. + * rfc5661 is not clear about which credential should be used. + */ +static void +filelayout_set_layoutcommit(struct nfs_write_data *wdata) +{ + if (FILELAYOUT_LSEG(wdata->lseg)->commit_through_mds || + wdata->res.verf->committed == NFS_FILE_SYNC) + return; + + pnfs_set_layoutcommit(wdata); + dprintk("%s ionde %lu pls_end_pos %lu\n", __func__, wdata->inode->i_ino, + (unsigned long) wdata->lseg->pls_end_pos); +} + +/* * Call ops for the async read/write cases * In the case of dense layouts, the offset needs to be reset to its * original value. @@ -210,6 +227,38 @@ static int filelayout_write_done_cb(struct rpc_task *task, return -EAGAIN; } + filelayout_set_layoutcommit(data); + return 0; +} + +/* Fake up some data that will cause nfs_commit_release to retry the writes. */ +static void prepare_to_resend_writes(struct nfs_write_data *data) +{ + struct nfs_page *first = nfs_list_entry(data->pages.next); + + data->task.tk_status = 0; + memcpy(data->verf.verifier, first->wb_verf.verifier, + sizeof(first->wb_verf.verifier)); + data->verf.verifier[0]++; /* ensure verifier mismatch */ +} + +static int filelayout_commit_done_cb(struct rpc_task *task, + struct nfs_write_data *data) +{ + int reset = 0; + + if (filelayout_async_handle_error(task, data->args.context->state, + data->ds_clp, &reset) == -EAGAIN) { + dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", + __func__, data->ds_clp, data->ds_clp->cl_session); + if (reset) { + prepare_to_resend_writes(data); + filelayout_set_lo_fail(data->lseg); + } else + nfs_restart_rpc(task, data->ds_clp); + return -EAGAIN; + } + return 0; } @@ -240,6 +289,16 @@ static void filelayout_write_release(void *data) wdata->mds_ops->rpc_release(data); } +static void filelayout_commit_release(void *data) +{ + struct nfs_write_data *wdata = (struct nfs_write_data *)data; + + nfs_commit_release_pages(wdata); + if (atomic_dec_and_test(&NFS_I(wdata->inode)->commits_outstanding)) + nfs_commit_clear_lock(NFS_I(wdata->inode)); + nfs_commitdata_release(wdata); +} + struct rpc_call_ops filelayout_read_call_ops = { .rpc_call_prepare = filelayout_read_prepare, .rpc_call_done = filelayout_read_call_done, @@ -252,6 +311,12 @@ struct rpc_call_ops filelayout_write_call_ops = { .rpc_release = filelayout_write_release, }; +struct rpc_call_ops filelayout_commit_call_ops = { + .rpc_call_prepare = filelayout_write_prepare, + .rpc_call_done = filelayout_write_call_done, + .rpc_release = filelayout_commit_release, +}; + static enum pnfs_try_status filelayout_read_pagelist(struct nfs_read_data *data) { @@ -320,10 +385,6 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync) data->inode->i_ino, sync, (size_t) data->args.count, offset, ntohl(ds->ds_ip_addr), ntohs(ds->ds_port)); - /* We can't handle commit to ds yet */ - if (!FILELAYOUT_LSEG(lseg)->commit_through_mds) - data->args.stable = NFS_FILE_SYNC; - data->write_done_cb = filelayout_write_done_cb; data->ds_clp = ds->ds_clp; fh = nfs4_fl_select_ds_fh(lseg, j); @@ -441,12 +502,33 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo, struct nfs4_layoutget_res *lgr, struct nfs4_deviceid *id) { - uint32_t *p = (uint32_t *)lgr->layout.buf; + struct xdr_stream stream; + struct xdr_buf buf = { + .pages = lgr->layoutp->pages, + .page_len = lgr->layoutp->len, + .buflen = lgr->layoutp->len, + .len = lgr->layoutp->len, + }; + struct page *scratch; + __be32 *p; uint32_t nfl_util; int i; dprintk("%s: set_layout_map Begin\n", __func__); + scratch = alloc_page(GFP_KERNEL); + if (!scratch) + return -ENOMEM; + + xdr_init_decode(&stream, &buf, NULL); + xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); + + /* 20 = ufl_util (4), first_stripe_index (4), pattern_offset (8), + * num_fh (4) */ + p = xdr_inline_decode(&stream, NFS4_DEVICEID4_SIZE + 20); + if (unlikely(!p)) + goto out_err; + memcpy(id, p, sizeof(*id)); p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE); print_deviceid(id); @@ -468,32 +550,57 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo, __func__, nfl_util, fl->num_fh, fl->first_stripe_index, fl->pattern_offset); + if (!fl->num_fh) + goto out_err; + fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh *), GFP_KERNEL); if (!fl->fh_array) - return -ENOMEM; + goto out_err; for (i = 0; i < fl->num_fh; i++) { /* Do we want to use a mempool here? */ fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), GFP_KERNEL); - if (!fl->fh_array[i]) { - filelayout_free_fh_array(fl); - return -ENOMEM; - } + if (!fl->fh_array[i]) + goto out_err_free; + + p = xdr_inline_decode(&stream, 4); + if (unlikely(!p)) + goto out_err_free; fl->fh_array[i]->size = be32_to_cpup(p++); if (sizeof(struct nfs_fh) < fl->fh_array[i]->size) { printk(KERN_ERR "Too big fh %d received %d\n", i, fl->fh_array[i]->size); - filelayout_free_fh_array(fl); - return -EIO; + goto out_err_free; } + + p = xdr_inline_decode(&stream, fl->fh_array[i]->size); + if (unlikely(!p)) + goto out_err_free; memcpy(fl->fh_array[i]->data, p, fl->fh_array[i]->size); - p += XDR_QUADLEN(fl->fh_array[i]->size); dprintk("DEBUG: %s: fh len %d\n", __func__, fl->fh_array[i]->size); } + __free_page(scratch); return 0; + +out_err_free: + filelayout_free_fh_array(fl); +out_err: + __free_page(scratch); + return -EIO; +} + +static void +filelayout_free_lseg(struct pnfs_layout_segment *lseg) +{ + struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); + + dprintk("--> %s\n", __func__); + nfs4_fl_put_deviceid(fl->dsaddr); + kfree(fl->commit_buckets); + _filelayout_free_lseg(fl); } static struct pnfs_layout_segment * @@ -514,17 +621,28 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid, _filelayout_free_lseg(fl); return NULL; } - return &fl->generic_hdr; -} -static void -filelayout_free_lseg(struct pnfs_layout_segment *lseg) -{ - struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); - - dprintk("--> %s\n", __func__); - nfs4_fl_put_deviceid(fl->dsaddr); - _filelayout_free_lseg(fl); + /* This assumes there is only one IOMODE_RW lseg. What + * we really want to do is have a layout_hdr level + * dictionary of <multipath_list4, fh> keys, each + * associated with a struct list_head, populated by calls + * to filelayout_write_pagelist(). + * */ + if ((!fl->commit_through_mds) && (lgr->range.iomode == IOMODE_RW)) { + int i; + int size = (fl->stripe_type == STRIPE_SPARSE) ? + fl->dsaddr->ds_num : fl->dsaddr->stripe_count; + + fl->commit_buckets = kcalloc(size, sizeof(struct list_head), GFP_KERNEL); + if (!fl->commit_buckets) { + filelayout_free_lseg(&fl->generic_hdr); + return NULL; + } + fl->number_of_buckets = size; + for (i = 0; i < size; i++) + INIT_LIST_HEAD(&fl->commit_buckets[i]); + } + return &fl->generic_hdr; } /* @@ -552,6 +670,191 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, return (p_stripe == r_stripe); } +static bool filelayout_mark_pnfs_commit(struct pnfs_layout_segment *lseg) +{ + return !FILELAYOUT_LS |