diff options
Diffstat (limited to 'fs/nfs/nfs4proc.c')
-rw-r--r-- | fs/nfs/nfs4proc.c | 2786 |
1 files changed, 2786 insertions, 0 deletions
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c new file mode 100644 index 00000000000..1d5cb3e80c3 --- /dev/null +++ b/fs/nfs/nfs4proc.c @@ -0,0 +1,2786 @@ +/* + * fs/nfs/nfs4proc.c + * + * Client-side procedure declarations for NFSv4. + * + * Copyright (c) 2002 The Regents of the University of Michigan. + * All rights reserved. + * + * Kendrick Smith <kmsmith@umich.edu> + * Andy Adamson <andros@umich.edu> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/mm.h> +#include <linux/utsname.h> +#include <linux/delay.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/sunrpc/clnt.h> +#include <linux/nfs.h> +#include <linux/nfs4.h> +#include <linux/nfs_fs.h> +#include <linux/nfs_page.h> +#include <linux/smp_lock.h> +#include <linux/namei.h> + +#include "delegation.h" + +#define NFSDBG_FACILITY NFSDBG_PROC + +#define NFS4_POLL_RETRY_MIN (1*HZ) +#define NFS4_POLL_RETRY_MAX (15*HZ) + +static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); +static int nfs4_async_handle_error(struct rpc_task *, struct nfs_server *); +static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry); +static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception); +extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus); +extern struct rpc_procinfo nfs4_procedures[]; + +extern nfs4_stateid zero_stateid; + +/* Prevent leaks of NFSv4 errors into userland */ +int nfs4_map_errors(int err) +{ + if (err < -1000) { + dprintk("%s could not handle NFSv4 error %d\n", + __FUNCTION__, -err); + return -EIO; + } + return err; +} + +/* + * This is our standard bitmap for GETATTR requests. + */ +const u32 nfs4_fattr_bitmap[2] = { + FATTR4_WORD0_TYPE + | FATTR4_WORD0_CHANGE + | FATTR4_WORD0_SIZE + | FATTR4_WORD0_FSID + | FATTR4_WORD0_FILEID, + FATTR4_WORD1_MODE + | FATTR4_WORD1_NUMLINKS + | FATTR4_WORD1_OWNER + | FATTR4_WORD1_OWNER_GROUP + | FATTR4_WORD1_RAWDEV + | FATTR4_WORD1_SPACE_USED + | FATTR4_WORD1_TIME_ACCESS + | FATTR4_WORD1_TIME_METADATA + | FATTR4_WORD1_TIME_MODIFY +}; + +const u32 nfs4_statfs_bitmap[2] = { + FATTR4_WORD0_FILES_AVAIL + | FATTR4_WORD0_FILES_FREE + | FATTR4_WORD0_FILES_TOTAL, + FATTR4_WORD1_SPACE_AVAIL + | FATTR4_WORD1_SPACE_FREE + | FATTR4_WORD1_SPACE_TOTAL +}; + +u32 nfs4_pathconf_bitmap[2] = { + FATTR4_WORD0_MAXLINK + | FATTR4_WORD0_MAXNAME, + 0 +}; + +const u32 nfs4_fsinfo_bitmap[2] = { FATTR4_WORD0_MAXFILESIZE + | FATTR4_WORD0_MAXREAD + | FATTR4_WORD0_MAXWRITE + | FATTR4_WORD0_LEASE_TIME, + 0 +}; + +static void nfs4_setup_readdir(u64 cookie, u32 *verifier, struct dentry *dentry, + struct nfs4_readdir_arg *readdir) +{ + u32 *start, *p; + + BUG_ON(readdir->count < 80); + if (cookie > 2) { + readdir->cookie = (cookie > 2) ? cookie : 0; + memcpy(&readdir->verifier, verifier, sizeof(readdir->verifier)); + return; + } + + readdir->cookie = 0; + memset(&readdir->verifier, 0, sizeof(readdir->verifier)); + if (cookie == 2) + return; + + /* + * NFSv4 servers do not return entries for '.' and '..' + * Therefore, we fake these entries here. We let '.' + * have cookie 0 and '..' have cookie 1. Note that + * when talking to the server, we always send cookie 0 + * instead of 1 or 2. + */ + start = p = (u32 *)kmap_atomic(*readdir->pages, KM_USER0); + + if (cookie == 0) { + *p++ = xdr_one; /* next */ + *p++ = xdr_zero; /* cookie, first word */ + *p++ = xdr_one; /* cookie, second word */ + *p++ = xdr_one; /* entry len */ + memcpy(p, ".\0\0\0", 4); /* entry */ + p++; + *p++ = xdr_one; /* bitmap length */ + *p++ = htonl(FATTR4_WORD0_FILEID); /* bitmap */ + *p++ = htonl(8); /* attribute buffer length */ + p = xdr_encode_hyper(p, dentry->d_inode->i_ino); + } + + *p++ = xdr_one; /* next */ + *p++ = xdr_zero; /* cookie, first word */ + *p++ = xdr_two; /* cookie, second word */ + *p++ = xdr_two; /* entry len */ + memcpy(p, "..\0\0", 4); /* entry */ + p++; + *p++ = xdr_one; /* bitmap length */ + *p++ = htonl(FATTR4_WORD0_FILEID); /* bitmap */ + *p++ = htonl(8); /* attribute buffer length */ + p = xdr_encode_hyper(p, dentry->d_parent->d_inode->i_ino); + + readdir->pgbase = (char *)p - (char *)start; + readdir->count -= readdir->pgbase; + kunmap_atomic(start, KM_USER0); +} + +static void +renew_lease(struct nfs_server *server, unsigned long timestamp) +{ + struct nfs4_client *clp = server->nfs4_state; + spin_lock(&clp->cl_lock); + if (time_before(clp->cl_last_renewal,timestamp)) + clp->cl_last_renewal = timestamp; + spin_unlock(&clp->cl_lock); +} + +static void update_changeattr(struct inode *inode, struct nfs4_change_info *cinfo) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + if (cinfo->before == nfsi->change_attr && cinfo->atomic) + nfsi->change_attr = cinfo->after; +} + +static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags) +{ + struct inode *inode = state->inode; + + open_flags &= (FMODE_READ|FMODE_WRITE); + /* Protect against nfs4_find_state() */ + spin_lock(&inode->i_lock); + state->state |= open_flags; + /* NB! List reordering - see the reclaim code for why. */ + if ((open_flags & FMODE_WRITE) && 0 == state->nwriters++) + list_move(&state->open_states, &state->owner->so_states); + if (open_flags & FMODE_READ) + state->nreaders++; + memcpy(&state->stateid, stateid, sizeof(state->stateid)); + spin_unlock(&inode->i_lock); +} + +/* + * OPEN_RECLAIM: + * reclaim state on the server after a reboot. + * Assumes caller is holding the sp->so_sem + */ +static int _nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state) +{ + struct inode *inode = state->inode; + struct nfs_server *server = NFS_SERVER(inode); + struct nfs_delegation *delegation = NFS_I(inode)->delegation; + struct nfs_openargs o_arg = { + .fh = NFS_FH(inode), + .seqid = sp->so_seqid, + .id = sp->so_id, + .open_flags = state->state, + .clientid = server->nfs4_state->cl_clientid, + .claim = NFS4_OPEN_CLAIM_PREVIOUS, + .bitmask = server->attr_bitmask, + }; + struct nfs_openres o_res = { + .server = server, /* Grrr */ + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR], + .rpc_argp = &o_arg, + .rpc_resp = &o_res, + .rpc_cred = sp->so_cred, + }; + int status; + + if (delegation != NULL) { + if (!(delegation->flags & NFS_DELEGATION_NEED_RECLAIM)) { + memcpy(&state->stateid, &delegation->stateid, + sizeof(state->stateid)); + set_bit(NFS_DELEGATED_STATE, &state->flags); + return 0; + } + o_arg.u.delegation_type = delegation->type; + } + status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); + nfs4_increment_seqid(status, sp); + if (status == 0) { + memcpy(&state->stateid, &o_res.stateid, sizeof(state->stateid)); + if (o_res.delegation_type != 0) { + nfs_inode_reclaim_delegation(inode, sp->so_cred, &o_res); + /* Did the server issue an immediate delegation recall? */ + if (o_res.do_recall) + nfs_async_inode_return_delegation(inode, &o_res.stateid); + } + } + clear_bit(NFS_DELEGATED_STATE, &state->flags); + /* Ensure we update the inode attributes */ + NFS_CACHEINV(inode); + return status; +} + +static int nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state) +{ + struct nfs_server *server = NFS_SERVER(state->inode); + struct nfs4_exception exception = { }; + int err; + do { + err = _nfs4_open_reclaim(sp, state); + switch (err) { + case 0: + case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_EXPIRED: + return err; + } + err = nfs4_handle_exception(server, err, &exception); + } while (exception.retry); + return err; +} + +static int _nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state) +{ + struct nfs4_state_owner *sp = state->owner; + struct inode *inode = dentry->d_inode; + struct nfs_server *server = NFS_SERVER(inode); + struct dentry *parent = dget_parent(dentry); + struct nfs_openargs arg = { + .fh = NFS_FH(parent->d_inode), + .clientid = server->nfs4_state->cl_clientid, + .name = &dentry->d_name, + .id = sp->so_id, + .server = server, + .bitmask = server->attr_bitmask, + .claim = NFS4_OPEN_CLAIM_DELEGATE_CUR, + }; + struct nfs_openres res = { + .server = server, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR], + .rpc_argp = &arg, + .rpc_resp = &res, + .rpc_cred = sp->so_cred, + }; + int status = 0; + + down(&sp->so_sema); + if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) + goto out; + if (state->state == 0) + goto out; + arg.seqid = sp->so_seqid; + arg.open_flags = state->state; + memcpy(arg.u.delegation.data, state->stateid.data, sizeof(arg.u.delegation.data)); + status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); + nfs4_increment_seqid(status, sp); + if (status >= 0) { + memcpy(state->stateid.data, res.stateid.data, + sizeof(state->stateid.data)); + clear_bit(NFS_DELEGATED_STATE, &state->flags); + } +out: + up(&sp->so_sema); + dput(parent); + return status; +} + +int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state) +{ + struct nfs4_exception exception = { }; + struct nfs_server *server = NFS_SERVER(dentry->d_inode); + int err; + do { + err = _nfs4_open_delegation_recall(dentry, state); + switch (err) { + case 0: + return err; + case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_EXPIRED: + /* Don't recall a delegation if it was lost */ + nfs4_schedule_state_recovery(server->nfs4_state); + return err; + } + err = nfs4_handle_exception(server, err, &exception); + } while (exception.retry); + return err; +} + +static inline int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nfs_fh *fh, struct nfs4_state_owner *sp, nfs4_stateid *stateid) +{ + struct nfs_open_confirmargs arg = { + .fh = fh, + .seqid = sp->so_seqid, + .stateid = *stateid, + }; + struct nfs_open_confirmres res; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_CONFIRM], + .rpc_argp = &arg, + .rpc_resp = &res, + .rpc_cred = sp->so_cred, + }; + int status; + + status = rpc_call_sync(clnt, &msg, RPC_TASK_NOINTR); + nfs4_increment_seqid(status, sp); + if (status >= 0) + memcpy(stateid, &res.stateid, sizeof(*stateid)); + return status; +} + +static int _nfs4_proc_open(struct inode *dir, struct nfs4_state_owner *sp, struct nfs_openargs *o_arg, struct nfs_openres *o_res) +{ + struct nfs_server *server = NFS_SERVER(dir); + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN], + .rpc_argp = o_arg, + .rpc_resp = o_res, + .rpc_cred = sp->so_cred, + }; + int status; + + /* Update sequence id. The caller must serialize! */ + o_arg->seqid = sp->so_seqid; + o_arg->id = sp->so_id; + o_arg->clientid = sp->so_client->cl_clientid; + + status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); + nfs4_increment_seqid(status, sp); + if (status != 0) + goto out; + update_changeattr(dir, &o_res->cinfo); + if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) { + status = _nfs4_proc_open_confirm(server->client, &o_res->fh, + sp, &o_res->stateid); + if (status != 0) + goto out; + } + if (!(o_res->f_attr->valid & NFS_ATTR_FATTR)) + status = server->rpc_ops->getattr(server, &o_res->fh, o_res->f_attr); +out: + return status; +} + +static int _nfs4_do_access(struct inode *inode, struct rpc_cred *cred, int openflags) +{ + struct nfs_access_entry cache; + int mask = 0; + int status; + + if (openflags & FMODE_READ) + mask |= MAY_READ; + if (openflags & FMODE_WRITE) + mask |= MAY_WRITE; + status = nfs_access_get_cached(inode, cred, &cache); + if (status == 0) + goto out; + + /* Be clever: ask server to check for all possible rights */ + cache.mask = MAY_EXEC | MAY_WRITE | MAY_READ; + cache.cred = cred; + cache.jiffies = jiffies; + status = _nfs4_proc_access(inode, &cache); + if (status != 0) + return status; + nfs_access_add_cache(inode, &cache); +out: + if ((cache.mask & mask) == mask) + return 0; + return -EACCES; +} + +/* + * OPEN_EXPIRED: + * reclaim state on the server after a network partition. + * Assumes caller holds the appropriate lock + */ +static int _nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry) +{ + struct dentry *parent = dget_parent(dentry); + struct inode *dir = parent->d_inode; + struct inode *inode = state->inode; + struct nfs_server *server = NFS_SERVER(dir); + struct nfs_delegation *delegation = NFS_I(inode)->delegation; + struct nfs_fattr f_attr = { + .valid = 0, + }; + struct nfs_openargs o_arg = { + .fh = NFS_FH(dir), + .open_flags = state->state, + .name = &dentry->d_name, + .bitmask = server->attr_bitmask, + .claim = NFS4_OPEN_CLAIM_NULL, + }; + struct nfs_openres o_res = { + .f_attr = &f_attr, + .server = server, + }; + int status = 0; + + if (delegation != NULL && !(delegation->flags & NFS_DELEGATION_NEED_RECLAIM)) { + status = _nfs4_do_access(inode, sp->so_cred, state->state); + if (status < 0) + goto out; + memcpy(&state->stateid, &delegation->stateid, sizeof(state->stateid)); + set_bit(NFS_DELEGATED_STATE, &state->flags); + goto out; + } + status = _nfs4_proc_open(dir, sp, &o_arg, &o_res); + if (status != 0) + goto out_nodeleg; + /* Check if files differ */ + if ((f_attr.mode & S_IFMT) != (inode->i_mode & S_IFMT)) + goto out_stale; + /* Has the file handle changed? */ + if (nfs_compare_fh(&o_res.fh, NFS_FH(inode)) != 0) { + /* Verify if the change attributes are the same */ + if (f_attr.change_attr != NFS_I(inode)->change_attr) + goto out_stale; + if (nfs_size_to_loff_t(f_attr.size) != inode->i_size) + goto out_stale; + /* Lets just pretend that this is the same file */ + nfs_copy_fh(NFS_FH(inode), &o_res.fh); + NFS_I(inode)->fileid = f_attr.fileid; + } + memcpy(&state->stateid, &o_res.stateid, sizeof(state->stateid)); + if (o_res.delegation_type != 0) { + if (!(delegation->flags & NFS_DELEGATION_NEED_RECLAIM)) + nfs_inode_set_delegation(inode, sp->so_cred, &o_res); + else + nfs_inode_reclaim_delegation(inode, sp->so_cred, &o_res); + } +out_nodeleg: + clear_bit(NFS_DELEGATED_STATE, &state->flags); +out: + dput(parent); + return status; +out_stale: + status = -ESTALE; + /* Invalidate the state owner so we don't ever use it again */ + nfs4_drop_state_owner(sp); + d_drop(dentry); + /* Should we be trying to close that stateid? */ + goto out_nodeleg; +} + +static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state) +{ + struct nfs_inode *nfsi = NFS_I(state->inode); + struct nfs_open_context *ctx; + int status; + + spin_lock(&state->inode->i_lock); + list_for_each_entry(ctx, &nfsi->open_files, list) { + if (ctx->state != state) + continue; + get_nfs_open_context(ctx); + spin_unlock(&state->inode->i_lock); + status = _nfs4_open_expired(sp, state, ctx->dentry); + put_nfs_open_context(ctx); + return status; + } + spin_unlock(&state->inode->i_lock); + return -ENOENT; +} + +/* + * Returns an nfs4_state + an extra reference to the inode + */ +static int _nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred *cred, struct nfs4_state **res) +{ + struct nfs_delegation *delegation; + struct nfs_server *server = NFS_SERVER(inode); + struct nfs4_client *clp = server->nfs4_state; + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs4_state_owner *sp = NULL; + struct nfs4_state *state = NULL; + int open_flags = flags & (FMODE_READ|FMODE_WRITE); + int err; + + /* Protect against reboot recovery - NOTE ORDER! */ + down_read(&clp->cl_sem); + /* Protect against delegation recall */ + down_read(&nfsi->rwsem); + delegation = NFS_I(inode)->delegation; + err = -ENOENT; + if (delegation == NULL || (delegation->type & open_flags) != open_flags) + goto out_err; + err = -ENOMEM; + if (!(sp = nfs4_get_state_owner(server, cred))) { + dprintk("%s: nfs4_get_state_owner failed!\n", __FUNCTION__); + goto out_err; + } + down(&sp->so_sema); + state = nfs4_get_open_state(inode, sp); + if (state == NULL) + goto out_err; + + err = -ENOENT; + if ((state->state & open_flags) == open_flags) { + spin_lock(&inode->i_lock); + if (open_flags & FMODE_READ) + state->nreaders++; + if (open_flags & FMODE_WRITE) + state->nwriters++; + spin_unlock(&inode->i_lock); + goto out_ok; + } else if (state->state != 0) + goto out_err; + + lock_kernel(); + err = _nfs4_do_access(inode, cred, open_flags); + unlock_kernel(); + if (err != 0) + goto out_err; + set_bit(NFS_DELEGATED_STATE, &state->flags); + update_open_stateid(state, &delegation->stateid, open_flags); +out_ok: + up(&sp->so_sema); + nfs4_put_state_owner(sp); + up_read(&nfsi->rwsem); + up_read(&clp->cl_sem); + igrab(inode); + *res = state; + return 0; +out_err: + if (sp != NULL) { + if (state != NULL) + nfs4_put_open_state(state); + up(&sp->so_sema); + nfs4_put_state_owner(sp); + } + up_read(&nfsi->rwsem); + up_read(&clp->cl_sem); + return err; +} + +static struct nfs4_state *nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred *cred) +{ + struct nfs4_exception exception = { }; + struct nfs4_state *res; + int err; + + do { + err = _nfs4_open_delegated(inode, flags, cred, &res); + if (err == 0) + break; + res = ERR_PTR(nfs4_handle_exception(NFS_SERVER(inode), + err, &exception)); + } while (exception.retry); + return res; +} + +/* + * Returns an nfs4_state + an referenced inode + */ +static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res) +{ + struct nfs4_state_owner *sp; + struct nfs4_state *state = NULL; + struct nfs_server *server = NFS_SERVER(dir); + struct nfs4_client *clp = server->nfs4_state; + struct inode *inode = NULL; + int status; + struct nfs_fattr f_attr = { + .valid = 0, + }; + struct nfs_openargs o_arg = { + .fh = NFS_FH(dir), + .open_flags = flags, + .name = &dentry->d_name, + .server = server, + .bitmask = server->attr_bitmask, + .claim = NFS4_OPEN_CLAIM_NULL, + }; + struct nfs_openres o_res = { + .f_attr = &f_attr, + .server = server, + }; + + /* Protect against reboot recovery conflicts */ + down_read(&clp->cl_sem); + status = -ENOMEM; + if (!(sp = nfs4_get_state_owner(server, cred))) { + dprintk("nfs4_do_open: nfs4_get_state_owner failed!\n"); + goto out_err; + } + if (flags & O_EXCL) { + u32 *p = (u32 *) o_arg.u.verifier.data; + p[0] = jiffies; + p[1] = current->pid; + } else + o_arg.u.attrs = sattr; + /* Serialization for the sequence id */ + down(&sp->so_sema); + + status = _nfs4_proc_open(dir, sp, &o_arg, &o_res); + if (status != 0) + goto out_err; + + status = -ENOMEM; + inode = nfs_fhget(dir->i_sb, &o_res.fh, &f_attr); + if (!inode) + goto out_err; + state = nfs4_get_open_state(inode, sp); + if (!state) + goto out_err; + update_open_stateid(state, &o_res.stateid, flags); + if (o_res.delegation_type != 0) + nfs_inode_set_delegation(inode, cred, &o_res); + up(&sp->so_sema); + nfs4_put_state_owner(sp); + up_read(&clp->cl_sem); + *res = state; + return 0; +out_err: + if (sp != NULL) { + if (state != NULL) + nfs4_put_open_state(state); + up(&sp->so_sema); + nfs4_put_state_owner(sp); + } + /* Note: clp->cl_sem must be released before nfs4_put_open_state()! */ + up_read(&clp->cl_sem); + if (inode != NULL) + iput(inode); + *res = NULL; + return status; +} + + +static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred) +{ + struct nfs4_exception exception = { }; + struct nfs4_state *res; + int status; + + do { + status = _nfs4_do_open(dir, dentry, flags, sattr, cred, &res); + if (status == 0) + break; + /* NOTE: BAD_SEQID means the server and client disagree about the + * book-keeping w.r.t. state-changing operations + * (OPEN/CLOSE/LOCK/LOCKU...) + * It is actually a sign of a bug on the client or on the server. + * + * If we receive a BAD_SEQID error in the particular case of + * doing an OPEN, we assume that nfs4_increment_seqid() will + * have unhashed the old state_owner for us, and that we can + * therefore safely retry using a new one. We should still warn + * the user though... + */ + if (status == -NFS4ERR_BAD_SEQID) { + printk(KERN_WARNING "NFS: v4 server returned a bad sequence-id error!\n"); + exception.retry = 1; + continue; + } + res = ERR_PTR(nfs4_handle_exception(NFS_SERVER(dir), + status, &exception)); + } while (exception.retry); + return res; +} + +static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr, + struct nfs_fh *fhandle, struct iattr *sattr, + struct nfs4_state *state) +{ + struct nfs_setattrargs arg = { + .fh = fhandle, + .iap = sattr, + .server = server, + .bitmask = server->attr_bitmask, + }; + struct nfs_setattrres res = { + .fattr = fattr, + .server = server, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETATTR], + .rpc_argp = &arg, + .rpc_resp = &res, + }; + + fattr->valid = 0; + + if (state != NULL) + msg.rpc_cred = state->owner->so_cred; + if (sattr->ia_valid & ATTR_SIZE) + nfs4_copy_stateid(&arg.stateid, state, NULL); + else + memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid)); + + return rpc_call_sync(server->client, &msg, 0); +} + +static int nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr, + struct nfs_fh *fhandle, struct iattr *sattr, + struct nfs4_state *state) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(server, + _nfs4_do_setattr(server, fattr, fhandle, sattr, + state), + &exception); + } while (exception.retry); + return err; +} + +struct nfs4_closedata { + struct inode *inode; + struct nfs4_state *state; + struct nfs_closeargs arg; + struct nfs_closeres res; +}; + +static void nfs4_close_done(struct rpc_task *task) +{ + struct nfs4_closedata *calldata = (struct nfs4_closedata *)task->tk_calldata; + struct nfs4_state *state = calldata->state; + struct nfs4_state_owner *sp = state->owner; + struct nfs_server *server = NFS_SERVER(calldata->inode); + + /* hmm. we are done with the inode, and in the process of freeing + * the state_owner. we keep this around to process errors + */ + nfs4_increment_seqid(task->tk_status, sp); + switch (task->tk_status) { + case 0: + memcpy(&state->stateid, &calldata->res.stateid, + sizeof(state->stateid)); + break; + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_EXPIRED: + state->state = calldata->arg.open_flags; + nfs4_schedule_state_recovery(server->nfs4_state); + break; + default: + if (nfs4_async_handle_error(task, server) == -EAGAIN) { + rpc_restart_call(task); + return; + } + } + state->state = calldata->arg.open_flags; + nfs4_put_open_state(state); + up(&sp->so_sema); + nfs4_put_state_owner(sp); + up_read(&server->nfs4_state->cl_sem); + kfree(calldata); +} + +static inline int nfs4_close_call(struct rpc_clnt *clnt, struct nfs4_closedata *calldata) +{ + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE], + .rpc_argp = &calldata->arg, + .rpc_resp = &calldata->res, + .rpc_cred = calldata->state->owner->so_cred, + }; + if (calldata->arg.open_flags != 0) + msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE]; + return rpc_call_async(clnt, &msg, 0, nfs4_close_done, calldata); +} + +/* + * It is possible for data to be read/written from a mem-mapped file + * after the sys_close call (which hits the vfs layer as a flush). + * This means that we can't safely call nfsv4 close on a file until + * the inode is cleared. This in turn means that we are not good + * NFSv4 citizens - we do not indicate to the server to update the file's + * share state even when we are done with one of the three share + * stateid's in the inode. + * + * NOTE: Caller must be holding the sp->so_owner semaphore! + */ +int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode) +{ + struct nfs4_closedata *calldata; + int status; + + /* Tell caller we're done */ + if (test_bit(NFS_DELEGATED_STATE, &state->flags)) { + state->state = mode; + return 0; + } + calldata = (struct nfs4_closedata *)kmalloc(sizeof(*calldata), GFP_KERNEL); + if (calldata == NULL) + return -ENOMEM; + calldata->inode = inode; + calldata->state = state; + calldata->arg.fh = NFS_FH(inode); + /* Serialization for the sequence id */ + calldata->arg.seqid = state->owner->so_seqid; + calldata->arg.open_flags = mode; + memcpy(&calldata->arg.stateid, &state->stateid, + sizeof(calldata->arg.stateid)); + status = nfs4_close_call(NFS_SERVER(inode)->client, calldata); + /* + * Return -EINPROGRESS on success in order to indicate to the + * caller that an asynchronous RPC call has been launched, and + * that it will release the semaphores on completion. + */ + return (status == 0) ? -EINPROGRESS : status; +} + +struct inode * +nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +{ + struct iattr attr; + struct rpc_cred *cred; + struct nfs4_state *state; + + if (nd->flags & LOOKUP_CREATE) { + attr.ia_mode = nd->intent.open.create_mode; + attr.ia_valid = ATTR_MODE; + if (!IS_POSIXACL(dir)) + attr.ia_mode &= ~current->fs->umask; + } else { + attr.ia_valid = 0; + BUG_ON(nd->intent.open.flags & O_CREAT); + } + + cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0); + if (IS_ERR(cred)) + return (struct inode *)cred; + state = nfs4_do_open(dir, dentry, nd->intent.open.flags, &attr, cred); + put_rpccred(cred); + if (IS_ERR(state)) + return (struct inode *)state; + return state->inode; +} + +int +nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags) +{ + struct rpc_cred *cred; + struct nfs4_state *state; + struct inode *inode; + + cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0); + if (IS_ERR(cred)) + return PTR_ERR(cred); + state = nfs4_open_delegated(dentry->d_inode, openflags, cred); + if (IS_ERR(state)) + state = nfs4_do_open(dir, dentry, openflags, NULL, cred); + put_rpccred(cred); + if (state == ERR_PTR(-ENOENT) && dentry->d_inode == 0) + return 1; + if (IS_ERR(state)) + return 0; + inode = state->inode; + if (inode == dentry->d_inode) { + iput(inode); + return 1; + } + d_drop(dentry); + nfs4_close_state(state, openflags); + iput(inode); + return 0; +} + + +static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) +{ + struct nfs4_server_caps_res res = {}; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SERVER_CAPS], + .rpc_argp = fhandle, + .rpc_resp = &res, + }; + int status; + + status = rpc_call_sync(server->client, &msg, 0); + if (status == 0) { + memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask)); + if (res.attr_bitmask[0] & FATTR4_WORD0_ACL) + server->caps |= NFS_CAP_ACLS; + if (res.has_links != 0) + server->caps |= NFS_CAP_HARDLINKS; + if (res.has_symlinks != 0) + server->caps |= NFS_CAP_SYMLINKS; + server->acl_bitmask = res.acl_bitmask; + } + return status; +} + +static int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(server, + _nfs4_server_capabilities(server, fhandle), + &exception); + } while (exception.retry); + return err; +} + +static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fsinfo *info) +{ + struct nfs_fattr * fattr = info->fattr; + struct nfs4_lookup_root_arg args = { + .bitmask = nfs4_fattr_bitmap, + }; + struct nfs4_lookup_res res = { + .server = server, + .fattr = fattr, + .fh = fhandle, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOOKUP_ROOT], + .rpc_argp = &args, + .rpc_resp = &res, + }; + fattr->valid = 0; + return rpc_call_sync(server->client, &msg, 0); +} + +static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fsinfo *info) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(server, + _nfs4_lookup_root(server, fhandle, info), + &exception); + } while (exception.retry); + return err; +} + +static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fsinfo *info) +{ + struct nfs_fattr * fattr = info->fattr; + unsigned char * p; + struct qstr q; + struct nfs4_lookup_arg args = { + .dir_fh = fhandle, + .name = &q, + .bitmask = nfs4_fattr_bitmap, + }; + struct nfs4_lookup_res res = { + .server = server, + .fattr = fattr, + .fh = fhandle, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOOKUP], + .rpc_argp = &args, + .rpc_resp = &res, + }; + int status; + + /* + * Now we do a separate LOOKUP for each component of the mount path. + * The LOOKUPs are done separately so that we can conveniently + * catch an ERR_WRONGSEC if it occurs along the way... + */ + status = nfs4_lookup_root(server, fhandle, info); + if (status) + goto out; + + p = server->mnt_path; + for (;;) { + struct nfs4_exception exception = { }; + + while (*p == '/') + p++; + if (!*p) + break; + q.name = p; + while (*p && (*p != '/')) + p++; + q.len = p - q.name; + + do { + fattr->valid = 0; + status = nfs4_handle_exception(server, + rpc_call_sync(server->client, &msg, 0), + &exception); + } while (exception.retry); + if (status == 0) + continue; + if (status == -ENOENT) { + printk(KERN_NOTICE "NFS: mount path %s does not exist!\n", server->mnt_path); + printk(KERN_NOTICE "NFS: suggestion: try mounting '/' instead.\n"); + } + break; + } + if (status == 0) + status = nfs4_server_capabilities(server, fhandle); + if (status == 0) + status = nfs4_do_fsinfo(server, fhandle, info); +out: + return status; +} + +static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr) +{ + struct nfs4_getattr_arg args = { + .fh = fhandle, + .bitmask = server->attr_bitmask, + }; + struct nfs4_getattr_res res = { + .fattr = fattr, + .server = server, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETATTR], + .rpc_argp = &args, + .rpc_resp = &res, + }; + + fattr->valid = 0; + return rpc_call_sync(server->client, &msg, 0); +} + +static int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(server, + _nfs4_proc_getattr(server, fhandle, fattr), + &exception); + } while (exception.retry); + return err; +} + +/* + * The file is not closed if it is opened due to the a request to change + * the size of the file. The open call will not be needed once the + * VFS layer lookup-intents are implemented. + * + * Close is called when the inode is destroyed. + * If we haven't opened the file for O_WRONLY, we + * need to in the size_change case to obtain a stateid. + * + * Got race? + * Because OPEN is always done by name in nfsv4, it is + * possible that we opened a different file by the same + * name. We can recognize this race condition, but we + * can't do anything about it besides returning an error. + * + * This will be fixed with VFS changes (lookup-intent). + */ +static int +nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, + struct iattr *sattr) +{ + struct inode * inode = dentry->d_inode; + int size_change = sattr->ia_valid & ATTR_SIZE; + struct nfs4_state *state = NULL; + int need_iput = 0; + int status; + + fattr->valid = 0; + + if (size_change) { + struct rpc_cred *cred = rpcauth_lookupcred(NFS_SERVER(inode)->client-> |