diff options
32 files changed, 2701 insertions, 568 deletions
diff --git a/Documentation/filesystems/nfs/00-INDEX b/Documentation/filesystems/nfs/00-INDEX index 1716874a651..66eb6c8c533 100644 --- a/Documentation/filesystems/nfs/00-INDEX +++ b/Documentation/filesystems/nfs/00-INDEX @@ -20,3 +20,5 @@ rpc-cache.txt - introduction to the caching mechanisms in the sunrpc layer. idmapper.txt - information for configuring request-keys to be used by idmapper +knfsd-rpcgss.txt + - Information on GSS authentication support in the NFS Server diff --git a/Documentation/filesystems/nfs/rpc-server-gss.txt b/Documentation/filesystems/nfs/rpc-server-gss.txt new file mode 100644 index 00000000000..716f4be8e8b --- /dev/null +++ b/Documentation/filesystems/nfs/rpc-server-gss.txt @@ -0,0 +1,91 @@ + +rpcsec_gss support for kernel RPC servers +========================================= + +This document gives references to the standards and protocols used to +implement RPCGSS authentication in kernel RPC servers such as the NFS +server and the NFS client's NFSv4.0 callback server. (But note that +NFSv4.1 and higher don't require the client to act as a server for the +purposes of authentication.) + +RPCGSS is specified in a few IETF documents: + - RFC2203 v1: http://tools.ietf.org/rfc/rfc2203.txt + - RFC5403 v2: http://tools.ietf.org/rfc/rfc5403.txt +and there is a 3rd version being proposed: + - http://tools.ietf.org/id/draft-williams-rpcsecgssv3.txt + (At draft n. 02 at the time of writing) + +Background +---------- + +The RPCGSS Authentication method describes a way to perform GSSAPI +Authentication for NFS. Although GSSAPI is itself completely mechanism +agnostic, in many cases only the KRB5 mechanism is supported by NFS +implementations. + +The Linux kernel, at the moment, supports only the KRB5 mechanism, and +depends on GSSAPI extensions that are KRB5 specific. + +GSSAPI is a complex library, and implementing it completely in kernel is +unwarranted. However GSSAPI operations are fundementally separable in 2 +parts: +- initial context establishment +- integrity/privacy protection (signing and encrypting of individual + packets) + +The former is more complex and policy-independent, but less +performance-sensitive. The latter is simpler and needs to be very fast. + +Therefore, we perform per-packet integrity and privacy protection in the +kernel, but leave the initial context establishment to userspace. We +need upcalls to request userspace to perform context establishment. + +NFS Server Legacy Upcall Mechanism +---------------------------------- + +The classic upcall mechanism uses a custom text based upcall mechanism +to talk to a custom daemon called rpc.svcgssd that is provide by the +nfs-utils package. + +This upcall mechanism has 2 limitations: + +A) It can handle tokens that are no bigger than 2KiB + +In some Kerberos deployment GSSAPI tokens can be quite big, up and +beyond 64KiB in size due to various authorization extensions attacked to +the Kerberos tickets, that needs to be sent through the GSS layer in +order to perform context establishment. + +B) It does not properly handle creds where the user is member of more +than a few housand groups (the current hard limit in the kernel is 65K +groups) due to limitation on the size of the buffer that can be send +back to the kernel (4KiB). + +NFS Server New RPC Upcall Mechanism +----------------------------------- + +The newer upcall mechanism uses RPC over a unix socket to a daemon +called gss-proxy, implemented by a userspace program called Gssproxy. + +The gss_proxy RPC protocol is currently documented here: + + https://fedorahosted.org/gss-proxy/wiki/ProtocolDocumentation + +This upcall mechanism uses the kernel rpc client and connects to the gssproxy +userspace program over a regular unix socket. The gssproxy protocol does not +suffer from the size limitations of the legacy protocol. + +Negotiating Upcall Mechanisms +----------------------------- + +To provide backward compatibility, the kernel defaults to using the +legacy mechanism. To switch to the new mechanism, gss-proxy must bind +to /var/run/gssproxy.sock and then write "1" to +/proc/net/rpc/use-gss-proxy. If gss-proxy dies, it must repeat both +steps. + +Once the upcall mechanism is chosen, it cannot be changed. To prevent +locking into the legacy mechanisms, the above steps must be performed +before starting nfsd. Whoever starts nfsd can guarantee this by reading +from /proc/net/rpc/use-gss-proxy and checking that it contains a +"1"--the read will block until gss-proxy has done its write to the file. diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h index 87fd1410b73..d5c5b3e0026 100644 --- a/fs/nfsd/cache.h +++ b/fs/nfsd/cache.h @@ -82,6 +82,7 @@ int nfsd_reply_cache_init(void); void nfsd_reply_cache_shutdown(void); int nfsd_cache_lookup(struct svc_rqst *); void nfsd_cache_update(struct svc_rqst *, int, __be32 *); +int nfsd_reply_cache_stats_open(struct inode *, struct file *); #ifdef CONFIG_NFSD_V4 void nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp); diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index 1051bebff1b..849a7c3ced2 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -80,6 +80,7 @@ struct nfsd_net { */ struct list_head client_lru; struct list_head close_lru; + struct list_head del_recall_lru; struct delayed_work laundromat_work; diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 99bc85ff021..7f05cd140de 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -37,6 +37,7 @@ #include "nfsd.h" #include "state.h" #include "netns.h" +#include "xdr4cb.h" #define NFSDDBG_FACILITY NFSDDBG_PROC @@ -53,30 +54,6 @@ enum { NFSPROC4_CLNT_CB_SEQUENCE, }; -#define NFS4_MAXTAGLEN 20 - -#define NFS4_enc_cb_null_sz 0 -#define NFS4_dec_cb_null_sz 0 -#define cb_compound_enc_hdr_sz 4 -#define cb_compound_dec_hdr_sz (3 + (NFS4_MAXTAGLEN >> 2)) -#define sessionid_sz (NFS4_MAX_SESSIONID_LEN >> 2) -#define cb_sequence_enc_sz (sessionid_sz + 4 + \ - 1 /* no referring calls list yet */) -#define cb_sequence_dec_sz (op_dec_sz + sessionid_sz + 4) - -#define op_enc_sz 1 -#define op_dec_sz 2 -#define enc_nfs4_fh_sz (1 + (NFS4_FHSIZE >> 2)) -#define enc_stateid_sz (NFS4_STATEID_SIZE >> 2) -#define NFS4_enc_cb_recall_sz (cb_compound_enc_hdr_sz + \ - cb_sequence_enc_sz + \ - 1 + enc_stateid_sz + \ - enc_nfs4_fh_sz) - -#define NFS4_dec_cb_recall_sz (cb_compound_dec_hdr_sz + \ - cb_sequence_dec_sz + \ - op_dec_sz) - struct nfs4_cb_compound_hdr { /* args */ u32 ident; /* minorversion 0 only */ @@ -817,8 +794,7 @@ static bool nfsd41_cb_get_slot(struct nfs4_client *clp, struct rpc_task *task) static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata) { struct nfsd4_callback *cb = calldata; - struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall); - struct nfs4_client *clp = dp->dl_stid.sc_client; + struct nfs4_client *clp = cb->cb_clp; u32 minorversion = clp->cl_minorversion; cb->cb_minorversion = minorversion; @@ -839,8 +815,7 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata) static void nfsd4_cb_done(struct rpc_task *task, void *calldata) { struct nfsd4_callback *cb = calldata; - struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall); - struct nfs4_client *clp = dp->dl_stid.sc_client; + struct nfs4_client *clp = cb->cb_clp; dprintk("%s: minorversion=%d\n", __func__, clp->cl_minorversion); @@ -863,7 +838,7 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) { struct nfsd4_callback *cb = calldata; struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall); - struct nfs4_client *clp = dp->dl_stid.sc_client; + struct nfs4_client *clp = cb->cb_clp; struct rpc_clnt *current_rpc_client = clp->cl_cb_client; nfsd4_cb_done(task, calldata); diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index ae73175e6e6..8ae5abfe6ba 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -191,9 +191,18 @@ static __be32 nfsd_check_obj_isreg(struct svc_fh *fh) return nfserr_symlink; } +static void nfsd4_set_open_owner_reply_cache(struct nfsd4_compound_state *cstate, struct nfsd4_open *open, struct svc_fh *resfh) +{ + if (nfsd4_has_session(cstate)) + return; + fh_copy_shallow(&open->op_openowner->oo_owner.so_replay.rp_openfh, + &resfh->fh_handle); +} + static __be32 -do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) +do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_open *open) { + struct svc_fh *current_fh = &cstate->current_fh; struct svc_fh *resfh; int accmode; __be32 status; @@ -252,9 +261,7 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o if (is_create_with_attrs(open) && open->op_acl != NULL) do_set_nfs4_acl(rqstp, resfh, open->op_acl, open->op_bmval); - /* set reply cache */ - fh_copy_shallow(&open->op_openowner->oo_owner.so_replay.rp_openfh, - &resfh->fh_handle); + nfsd4_set_open_owner_reply_cache(cstate, open, resfh); accmode = NFSD_MAY_NOP; if (open->op_created) accmode |= NFSD_MAY_OWNER_OVERRIDE; @@ -268,8 +275,9 @@ out: } static __be32 -do_open_fhandle(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) +do_open_fhandle(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_open *open) { + struct svc_fh *current_fh = &cstate->current_fh; __be32 status; /* We don't know the target directory, and therefore can not @@ -278,9 +286,7 @@ do_open_fhandle(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_ memset(&open->op_cinfo, 0, sizeof(struct nfsd4_change_info)); - /* set replay cache */ - fh_copy_shallow(&open->op_openowner->oo_owner.so_replay.rp_openfh, - ¤t_fh->fh_handle); + nfsd4_set_open_owner_reply_cache(cstate, open, current_fh); open->op_truncate = (open->op_iattr.ia_valid & ATTR_SIZE) && (open->op_iattr.ia_size == 0); @@ -351,6 +357,10 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, } if (status) goto out; + if (open->op_xdr_error) { + status = open->op_xdr_error; + goto out; + } status = nfsd4_check_open_attributes(rqstp, cstate, open); if (status) @@ -368,8 +378,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, switch (open->op_claim_type) { case NFS4_OPEN_CLAIM_DELEGATE_CUR: case NFS4_OPEN_CLAIM_NULL: - status = do_open_lookup(rqstp, &cstate->current_fh, - open); + status = do_open_lookup(rqstp, cstate, open); if (status) goto out; break; @@ -382,8 +391,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; case NFS4_OPEN_CLAIM_FH: case NFS4_OPEN_CLAIM_DELEG_CUR_FH: - status = do_open_fhandle(rqstp, &cstate->current_fh, - open); + status = do_open_fhandle(rqstp, cstate, open); if (status) goto out; break; @@ -409,14 +417,33 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, WARN_ON(status && open->op_created); out: nfsd4_cleanup_open_state(open, status); - if (open->op_openowner) + if (open->op_openowner && !nfsd4_has_session(cstate)) cstate->replay_owner = &open->op_openowner->oo_owner; - else + nfsd4_bump_seqid(cstate, status); + if (!cstate->replay_owner) nfs4_unlock_state(); return status; } /* + * OPEN is the only seqid-mutating operation whose decoding can fail + * with a seqid-mutating error (specifically, decoding of user names in + * the attributes). Therefore we have to do some processing to look up + * the stateowner so that we can bump the seqid. + */ +static __be32 nfsd4_open_omfg(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_op *op) +{ + struct nfsd4_open *open = (struct nfsd4_open *)&op->u; + + if (!seqid_mutating_err(ntohl(op->status))) + return op->status; + if (nfsd4_has_session(cstate)) + return op->status; + open->op_xdr_error = op->status; + return nfsd4_open(rqstp, cstate, open); +} + +/* * filehandle-manipulating ops. */ static __be32 @@ -786,21 +813,11 @@ nfsd4_rename(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfsd_rename(rqstp, &cstate->save_fh, rename->rn_sname, rename->rn_snamelen, &cstate->current_fh, rename->rn_tname, rename->rn_tnamelen); - - /* the underlying filesystem returns different error's than required - * by NFSv4. both save_fh and current_fh have been verified.. */ - if (status == nfserr_isdir) - status = nfserr_exist; - else if ((status == nfserr_notdir) && - (S_ISDIR(cstate->save_fh.fh_dentry->d_inode->i_mode) && - S_ISDIR(cstate->current_fh.fh_dentry->d_inode->i_mode))) - status = nfserr_exist; - - if (!status) { - set_change_info(&rename->rn_sinfo, &cstate->current_fh); - set_change_info(&rename->rn_tinfo, &cstate->save_fh); - } - return status; + if (status) + return status; + set_change_info(&rename->rn_sinfo, &cstate->current_fh); + set_change_info(&rename->rn_tinfo, &cstate->save_fh); + return nfs_ok; } static __be32 @@ -931,14 +948,14 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfs4_lock_state(); status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), cstate, stateid, WR_STATE, &filp); - if (filp) - get_file(filp); - nfs4_unlock_state(); - if (status) { + nfs4_unlock_state(); dprintk("NFSD: nfsd4_write: couldn't process stateid!\n"); return status; } + if (filp) + get_file(filp); + nfs4_unlock_state(); cnt = write->wr_buflen; write->wr_how_written = write->wr_stable_how; @@ -1244,8 +1261,11 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, * for example, if there is a miscellaneous XDR error * it will be set to nfserr_bad_xdr. */ - if (op->status) + if (op->status) { + if (op->opnum == OP_OPEN) + op->status = nfsd4_open_omfg(rqstp, cstate, op); goto encode_op; + } /* We must be able to encode a successful response to * this operation, with enough room left over to encode a @@ -1282,12 +1302,9 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, if (op->status) goto encode_op; - if (opdesc->op_func) { - if (opdesc->op_get_currentstateid) - opdesc->op_get_currentstateid(cstate, &op->u); - op->status = opdesc->op_func(rqstp, cstate, &op->u); - } else - BUG_ON(op->status == nfs_ok); + if (opdesc->op_get_currentstateid) + opdesc->op_get_currentstateid(cstate, &op->u); + op->status = opdesc->op_func(rqstp, cstate, &op->u); if (!op->status) { if (opdesc->op_set_currentstateid) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 417c8487774..316ec843dec 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -42,6 +42,7 @@ #include <linux/sunrpc/svcauth_gss.h> #include <linux/sunrpc/addr.h> #include "xdr4.h" +#include "xdr4cb.h" #include "vfs.h" #include "current_stateid.h" @@ -94,17 +95,32 @@ nfs4_lock_state(void) mutex_lock(&client_mutex); } -static void free_session(struct kref *); +static void free_session(struct nfsd4_session *); -/* Must be called under the client_lock */ -static void nfsd4_put_session_locked(struct nfsd4_session *ses) +void nfsd4_put_session(struct nfsd4_session *ses) +{ + atomic_dec(&ses->se_ref); +} + +static bool is_session_dead(struct nfsd4_session *ses) +{ + return ses->se_flags & NFS4_SESSION_DEAD; +} + +static __be32 mark_session_dead_locked(struct nfsd4_session *ses) { - kref_put(&ses->se_ref, free_session); + if (atomic_read(&ses->se_ref)) + return nfserr_jukebox; + ses->se_flags |= NFS4_SESSION_DEAD; + return nfs_ok; } -static void nfsd4_get_session(struct nfsd4_session *ses) +static __be32 nfsd4_get_session_locked(struct nfsd4_session *ses) { - kref_get(&ses->se_ref); + if (is_session_dead(ses)) + return nfserr_badsession; + atomic_inc(&ses->se_ref); + return nfs_ok; } void @@ -113,6 +129,90 @@ nfs4_unlock_state(void) mutex_unlock(&client_mutex); } +static bool is_client_expired(struct nfs4_client *clp) +{ + return clp->cl_time == 0; +} + +static __be32 mark_client_expired_locked(struct nfs4_client *clp) +{ + if (atomic_read(&clp->cl_refcount)) + return nfserr_jukebox; + clp->cl_time = 0; + return nfs_ok; +} + +static __be32 mark_client_expired(struct nfs4_client *clp) +{ + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + __be32 ret; + + spin_lock(&nn->client_lock); + ret = mark_client_expired_locked(clp); + spin_unlock(&nn->client_lock); + return ret; +} + +static __be32 get_client_locked(struct nfs4_client *clp) +{ + if (is_client_expired(clp)) + return nfserr_expired; + atomic_inc(&clp->cl_refcount); + return nfs_ok; +} + +/* must be called under the client_lock */ +static inline void +renew_client_locked(struct nfs4_client *clp) +{ + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + if (is_client_expired(clp)) { + WARN_ON(1); + printk("%s: client (clientid %08x/%08x) already expired\n", + __func__, + clp->cl_clientid.cl_boot, + clp->cl_clientid.cl_id); + return; + } + + dprintk("renewing client (clientid %08x/%08x)\n", + clp->cl_clientid.cl_boot, + clp->cl_clientid.cl_id); + list_move_tail(&clp->cl_lru, &nn->client_lru); + clp->cl_time = get_seconds(); +} + +static inline void +renew_client(struct nfs4_client *clp) +{ + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + spin_lock(&nn->client_lock); + renew_client_locked(clp); + spin_unlock(&nn->client_lock); +} + +static void put_client_renew_locked(struct nfs4_client *clp) +{ + if (!atomic_dec_and_test(&clp->cl_refcount)) + return; + if (!is_client_expired(clp)) + renew_client_locked(clp); +} + +void put_client_renew(struct nfs4_client *clp) +{ + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + if (!atomic_dec_and_lock(&clp->cl_refcount, &nn->client_lock)) + return; + if (!is_client_expired(clp)) + renew_client_locked(clp); + spin_unlock(&nn->client_lock); +} + + static inline u32 opaque_hashval(const void *ptr, int nbytes) { @@ -126,8 +226,6 @@ opaque_hashval(const void *ptr, int nbytes) return x; } -static struct list_head del_recall_lru; - static void nfsd4_free_file(struct nfs4_file *f) { kmem_cache_free(file_slab, f); @@ -137,7 +235,7 @@ static inline void put_nfs4_file(struct nfs4_file *fi) { if (atomic_dec_and_lock(&fi->fi_ref, &recall_lock)) { - list_del(&fi->fi_hash); + hlist_del(&fi->fi_hash); spin_unlock(&recall_lock); iput(fi->fi_inode); nfsd4_free_file(fi); @@ -181,7 +279,7 @@ static unsigned int file_hashval(struct inode *ino) return hash_ptr(ino, FILE_HASH_BITS); } -static struct list_head file_hashtbl[FILE_HASH_SIZE]; +static struct hlist_head file_hashtbl[FILE_HASH_SIZE]; static void __nfs4_file_get_access(struct nfs4_file *fp, int oflag) { @@ -210,13 +308,7 @@ static void __nfs4_file_put_access(struct nfs4_file *fp, int oflag) { if (atomic_dec_and_test(&fp->fi_access[oflag])) { nfs4_file_put_fd(fp, oflag); - /* - * It's also safe to get rid of the RDWR open *if* - * we no longer have need of the other kind of access - * or if we already have the other kind of open: - */ - if (fp->fi_fds[1-oflag] - || atomic_read(&fp->fi_access[1 - oflag]) == 0) + if (atomic_read(&fp->fi_access[1 - oflag]) == 0) nfs4_file_put_fd(fp, O_RDWR); } } @@ -262,7 +354,7 @@ kmem_cache *slab) */ return stid; out_free: - kfree(stid); + kmem_cache_free(slab, stid); return NULL; } @@ -313,21 +405,18 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct sv return dp; } -static void free_stid(struct nfs4_stid *s, struct kmem_cache *slab) +static void remove_stid(struct nfs4_stid *s) { struct idr *stateids = &s->sc_client->cl_stateids; idr_remove(stateids, s->sc_stateid.si_opaque.so_id); - kmem_cache_free(slab, s); } void nfs4_put_delegation(struct nfs4_delegation *dp) { if (atomic_dec_and_test(&dp->dl_count)) { - dprintk("NFSD: freeing dp %p\n",dp); - put_nfs4_file(dp->dl_file); - free_stid(&dp->dl_stid, deleg_slab); + kmem_cache_free(deleg_slab, dp); num_delegations--; } } @@ -351,16 +440,45 @@ static void unhash_stid(struct nfs4_stid *s) static void unhash_delegation(struct nfs4_delegation *dp) { - unhash_stid(&dp->dl_stid); list_del_init(&dp->dl_perclnt); spin_lock(&recall_lock); list_del_init(&dp->dl_perfile); list_del_init(&dp->dl_recall_lru); spin_unlock(&recall_lock); nfs4_put_deleg_lease(dp->dl_file); + put_nfs4_file(dp->dl_file); + dp->dl_file = NULL; +} + + + +static void destroy_revoked_delegation(struct nfs4_delegation *dp) +{ + list_del_init(&dp->dl_recall_lru); + remove_stid(&dp->dl_stid); nfs4_put_delegation(dp); } +static void destroy_delegation(struct nfs4_delegation *dp) +{ + unhash_delegation(dp); + remove_stid(&dp->dl_stid); + nfs4_put_delegation(dp); +} + +static void revoke_delegation(struct nfs4_delegation *dp) +{ + struct nfs4_client *clp = dp->dl_stid.sc_client; + + if (clp->cl_minorversion == 0) + destroy_delegation(dp); + else { + unhash_delegation(dp); + dp->dl_stid.sc_type = NFS4_REVOKED_DELEG_STID; + list_add(&dp->dl_recall_lru, &clp->cl_revoked); + } +} + /* * SETCLIENTID state */ @@ -501,7 +619,8 @@ static void close_generic_stateid(struct nfs4_ol_stateid *stp) static void free_generic_stateid(struct nfs4_ol_stateid *stp) { - free_stid(&stp->st_stid, stateid_slab); + remove_stid(&stp->st_stid); + kmem_cache_free(stateid_slab, stp); } static void release_lock_stateid(struct nfs4_ol_stateid *stp) @@ -617,6 +736,28 @@ dump_sessionid(const char *fn, struct nfs4_sessionid *sessionid) } #endif +/* + * Bump the seqid on cstate->replay_owner, and clear replay_owner if it + * won't be used for replay. + */ +void nfsd4_bump_seqid(struct nfsd4_compound_state *cstate, __be32 nfserr) +{ + struct nfs4_stateowner *so = cstate->replay_owner; + + if (nfserr == nfserr_replay_me) + return; + + if (!seqid_mutating_err(ntohl(nfserr))) { + cstate->replay_owner = NULL; + return; + } + if (!so) + return; + if (so->so_is_open_owner) + release_last_closed_stateid(openowner(so)); + so->so_seqid++; + return; +} static void gen_sessionid(struct nfsd4_session *ses) @@ -657,17 +798,15 @@ free_session_slots(struct nfsd4_session *ses) * We don't actually need to cache the rpc and session headers, so we * can allocate a little less for each slot: */ -static inline int slot_bytes(struct nfsd4_channel_attrs *ca) +static inline u32 slot_bytes(struct nfsd4_channel_attrs *ca) { - return ca->maxresp_cached - NFSD_MIN_HDR_SEQ_SZ; -} + u32 size; -static int nfsd4_sanitize_slot_size(u32 size) -{ - size -= NFSD_MIN_HDR_SEQ_SZ; /* We don't cache the rpc header */ - size = min_t(u32, size, NFSD_SLOT_CACHE_SIZE); - - return size; + if (ca->maxresp_cached < NFSD_MIN_HDR_SEQ_SZ) + size = 0; + else + size = ca->maxresp_cached - NFSD_MIN_HDR_SEQ_SZ; + return size + sizeof(struct nfsd4_slot); } /* @@ -675,12 +814,12 @@ static int nfsd4_sanitize_slot_size(u32 size) * re-negotiate active sessions and reduce their slot usage to make * room for new connections. For now we just fail the create session. */ -static int nfsd4_get_drc_mem(int slotsize, u32 num) +static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca) { + u32 slotsize = slot_bytes(ca); + u32 num = ca->maxreqs; int avail; - num = min_t(u32, num, NFSD_MAX_SLOTS_PER_SESSION); - spin_lock(&nfsd_drc_lock); avail = min((unsigned long)NFSD_MAX_MEM_PER_SESSION, nfsd_drc_max_mem - nfsd_drc_mem_used); @@ -691,15 +830,19 @@ static int nfsd4_get_drc_mem(int slotsize, u32 num) return num; } -static void nfsd4_put_drc_mem(int slotsize, int num) +static void nfsd4_put_drc_mem(struct nfsd4_channel_attrs *ca) { + int slotsize = slot_bytes(ca); + spin_lock(&nfsd_drc_lock); - nfsd_drc_mem_used -= slotsize * num; + nfsd_drc_mem_used -= slotsize * ca->maxreqs; spin_unlock(&nfsd_drc_lock); } -static struct nfsd4_session *__alloc_session(int slotsize, int numslots) +static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *attrs) { + int numslots = attrs->maxreqs; + int slotsize = slot_bytes(attrs); struct nfsd4_session *new; int mem, i; @@ -712,8 +855,7 @@ static struct nfsd4_session *__alloc_session(int slotsize, int numslots) return NULL; /* allocate each struct nfsd4_slot and data cache in one piece */ for (i = 0; i < numslots; i++) { - mem = sizeof(struct nfsd4_slot) + slotsize; - new->se_slots[i] = kzalloc(mem, GFP_KERNEL); + new->se_slots[i] = kzalloc(slotsize, GFP_KERNEL); if (!new->se_slots[i]) goto out_free; } @@ -725,21 +867,6 @@ out_free: return NULL; } -static void init_forechannel_attrs(struct nfsd4_channel_attrs *new, - struct nfsd4_channel_attrs *req, - int numslots, int slotsize, - struct nfsd_net *nn) -{ - u32 maxrpc = nn->nfsd_serv->sv_max_mesg; - - new->maxreqs = numslots; - new->maxresp_cached = min_t(u32, req->maxresp_cached, - slotsize + NFSD_MIN_HDR_SEQ_SZ); - new->maxreq_sz = min_t(u32, req->maxreq_sz, maxrpc); - new->maxresp_sz = min_t(u32, req->maxresp_sz, maxrpc); - new->maxops = min_t(u32, req->maxops, NFSD_MAX_OPS_PER_COMPOUND); -} - static void free_conn(struct nfsd4_conn *c) { svc_xprt_put(c->cn_xprt); @@ -756,8 +883,8 @@ static void nfsd4_conn_lost(struct svc_xpt_user *u) list_del(&c->cn_persession); free_conn(c); } - spin_unlock(&clp->cl_lock); nfsd4_probe_callback(clp); + spin_unlock(&clp->cl_lock); } static struct nfsd4_conn *alloc_conn(struct svc_rqst *rqstp, u32 flags) @@ -841,59 +968,20 @@ static void nfsd4_del_conns(struct nfsd4_session *s) static void __free_session(struct nfsd4_session *ses) { - nfsd4_put_drc_mem(slot_bytes(&ses->se_fchannel), ses->se_fchannel.maxreqs); free_session_slots(ses); kfree(ses); } -static void free_session(struct kref *kref) +static void free_session(struct nfsd4_session *ses) { - struct nfsd4_session *ses; - struct nfsd_net *nn; - - ses = container_of(kref, struct nfsd4_session, se_ref); - nn = net_generic(ses->se_client->net, nfsd_net_id); + struct nfsd_net *nn = net_generic(ses->se_client->net, nfsd_net_id); lockdep_assert_held(&nn->client_lock); nfsd4_del_conns(ses); + nfsd4_put_drc_mem(&ses->se_fchannel); __free_session(ses); } -void nfsd4_put_session(struct nfsd4_session *ses) -{ - struct nfsd_net *nn = net_generic(ses->se_client->net, nfsd_net_id); - - spin_lock(&nn->client_lock); - nfsd4_put_session_locked(ses); - spin_unlock(&nn->client_lock); -} - -static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fchan, - struct nfsd_net *nn) -{ - struct nfsd4_session *new; - int numslots, slotsize; - /* - * Note decreasing slot size below client's request may - * make it difficult for client to function correctly, whereas |