diff options
Diffstat (limited to 'fs/nfsd')
| -rw-r--r-- | fs/nfsd/Kconfig | 2 | ||||
| -rw-r--r-- | fs/nfsd/acl.h | 28 | ||||
| -rw-r--r-- | fs/nfsd/auth.c | 10 | ||||
| -rw-r--r-- | fs/nfsd/cache.h | 8 | ||||
| -rw-r--r-- | fs/nfsd/export.c | 112 | ||||
| -rw-r--r-- | fs/nfsd/export.h | 110 | ||||
| -rw-r--r-- | fs/nfsd/fault_inject.c | 15 | ||||
| -rw-r--r-- | fs/nfsd/idmap.h | 4 | ||||
| -rw-r--r-- | fs/nfsd/netns.h | 1 | ||||
| -rw-r--r-- | fs/nfsd/nfs2acl.c | 84 | ||||
| -rw-r--r-- | fs/nfsd/nfs3acl.c | 68 | ||||
| -rw-r--r-- | fs/nfsd/nfs3xdr.c | 41 | ||||
| -rw-r--r-- | fs/nfsd/nfs4acl.c | 169 | ||||
| -rw-r--r-- | fs/nfsd/nfs4callback.c | 23 | ||||
| -rw-r--r-- | fs/nfsd/nfs4idmap.c | 52 | ||||
| -rw-r--r-- | fs/nfsd/nfs4proc.c | 280 | ||||
| -rw-r--r-- | fs/nfsd/nfs4recover.c | 12 | ||||
| -rw-r--r-- | fs/nfsd/nfs4state.c | 494 | ||||
| -rw-r--r-- | fs/nfsd/nfs4xdr.c | 2198 | ||||
| -rw-r--r-- | fs/nfsd/nfscache.c | 46 | ||||
| -rw-r--r-- | fs/nfsd/nfsctl.c | 6 | ||||
| -rw-r--r-- | fs/nfsd/nfsd.h | 19 | ||||
| -rw-r--r-- | fs/nfsd/nfsfh.c | 61 | ||||
| -rw-r--r-- | fs/nfsd/nfsfh.h | 77 | ||||
| -rw-r--r-- | fs/nfsd/nfssvc.c | 36 | ||||
| -rw-r--r-- | fs/nfsd/nfsxdr.c | 15 | ||||
| -rw-r--r-- | fs/nfsd/state.h | 5 | ||||
| -rw-r--r-- | fs/nfsd/stats.c | 1 | ||||
| -rw-r--r-- | fs/nfsd/stats.h | 43 | ||||
| -rw-r--r-- | fs/nfsd/vfs.c | 596 | ||||
| -rw-r--r-- | fs/nfsd/vfs.h | 20 | ||||
| -rw-r--r-- | fs/nfsd/xdr3.h | 3 | ||||
| -rw-r--r-- | fs/nfsd/xdr4.h | 29 | 
33 files changed, 2626 insertions, 2042 deletions
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index dc8f1ef665c..f994e750e0d 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -95,7 +95,7 @@ config NFSD_V4_SECURITY_LABEL  	Smack policies on NFSv4 files, say N.  	WARNING: there is still a chance of backwards-incompatible protocol changes. -	For now we recommend "Y" only for developers and testers." +	For now we recommend "Y" only for developers and testers.  config NFSD_FAULT_INJECTION  	bool "NFS server manual fault injection" diff --git a/fs/nfsd/acl.h b/fs/nfsd/acl.h index 8b186a4955c..a986ceb6fd0 100644 --- a/fs/nfsd/acl.h +++ b/fs/nfsd/acl.h @@ -35,23 +35,25 @@  #ifndef LINUX_NFS4_ACL_H  #define LINUX_NFS4_ACL_H -#include <linux/posix_acl.h> +struct nfs4_acl; +struct svc_fh; +struct svc_rqst; -/* Maximum ACL we'll accept from client; chosen (somewhat arbitrarily) to - * fit in a page: */ -#define NFS4_ACL_MAX 170 +/* + * Maximum ACL we'll accept from a client; chosen (somewhat + * arbitrarily) so that kmalloc'ing the ACL shouldn't require a + * high-order allocation.  This allows 204 ACEs on x86_64: + */ +#define NFS4_ACL_MAX ((PAGE_SIZE - sizeof(struct nfs4_acl)) \ +			/ sizeof(struct nfs4_ace))  struct nfs4_acl *nfs4_acl_new(int);  int nfs4_acl_get_whotype(char *, u32); -int nfs4_acl_write_who(int who, char *p); - -#define NFS4_ACL_TYPE_DEFAULT	0x01 -#define NFS4_ACL_DIR		0x02 -#define NFS4_ACL_OWNER		0x04 +__be32 nfs4_acl_write_who(struct xdr_stream *xdr, int who); -struct nfs4_acl *nfs4_acl_posix_to_nfsv4(struct posix_acl *, -				struct posix_acl *, unsigned int flags); -int nfs4_acl_nfsv4_to_posix(struct nfs4_acl *, struct posix_acl **, -				struct posix_acl **, unsigned int flags); +int nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, +		struct nfs4_acl **acl); +__be32 nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp, +		struct nfs4_acl *acl);  #endif /* LINUX_NFS4_ACL_H */ diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index 06cddd57226..72f44823adb 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c @@ -1,7 +1,6 @@  /* Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> */  #include <linux/sched.h> -#include <linux/user_namespace.h>  #include "nfsd.h"  #include "auth.h" @@ -25,7 +24,6 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)  	struct cred *new;  	int i;  	int flags = nfsexp_flags(rqstp, exp); -	int ret;  	validate_process_creds(); @@ -71,10 +69,8 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)  	if (gid_eq(new->fsgid, INVALID_GID))  		new->fsgid = exp->ex_anon_gid; -	ret = set_groups(new, gi); +	set_groups(new, gi);  	put_group_info(gi); -	if (ret < 0) -		goto error;  	if (!uid_eq(new->fsuid, GLOBAL_ROOT_UID))  		new->cap_effective = cap_drop_nfsd_set(new->cap_effective); @@ -88,9 +84,7 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)  	return 0;  oom: -	ret = -ENOMEM; -error:  	abort_creds(new); -	return ret; +	return -ENOMEM;  } diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h index d5c5b3e0026..b582f9ab6b2 100644 --- a/fs/nfsd/cache.h +++ b/fs/nfsd/cache.h @@ -84,12 +84,4 @@ int	nfsd_cache_lookup(struct svc_rqst *);  void	nfsd_cache_update(struct svc_rqst *, int, __be32 *);  int	nfsd_reply_cache_stats_open(struct inode *, struct file *); -#ifdef CONFIG_NFSD_V4 -void	nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp); -#else  /* CONFIG_NFSD_V4 */ -static inline void nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp) -{ -} -#endif /* CONFIG_NFSD_V4 */ -  #endif /* NFSCACHE_H */ diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 5f38ea36e26..13b85f94d9e 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -17,17 +17,12 @@  #include <linux/exportfs.h>  #include <linux/sunrpc/svc_xprt.h> -#include <net/ipv6.h> -  #include "nfsd.h"  #include "nfsfh.h"  #include "netns.h"  #define NFSDDBG_FACILITY	NFSDDBG_EXPORT -typedef struct auth_domain	svc_client; -typedef struct svc_export	svc_export; -  /*   * We have two caches.   * One maps client+vfsmnt+dentry to export options - the export map @@ -73,7 +68,7 @@ static struct svc_expkey *svc_expkey_lookup(struct cache_detail *cd, struct svc_  static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)  { -	/* client fsidtype fsid [path] */ +	/* client fsidtype fsid expiry [path] */  	char *buf;  	int len;  	struct auth_domain *dom = NULL; @@ -295,13 +290,19 @@ svc_expkey_update(struct cache_detail *cd, struct svc_expkey *new,  static void nfsd4_fslocs_free(struct nfsd4_fs_locations *fsloc)  { +	struct nfsd4_fs_location *locations = fsloc->locations;  	int i; +	if (!locations) +		return; +  	for (i = 0; i < fsloc->locations_count; i++) { -		kfree(fsloc->locations[i].path); -		kfree(fsloc->locations[i].hosts); +		kfree(locations[i].path); +		kfree(locations[i].hosts);  	} -	kfree(fsloc->locations); + +	kfree(locations); +	fsloc->locations = NULL;  }  static void svc_export_put(struct kref *ref) @@ -388,6 +389,10 @@ fsloc_parse(char **mesg, char *buf, struct nfsd4_fs_locations *fsloc)  	int len;  	int migrated, i, err; +	/* more than one fsloc */ +	if (fsloc->locations) +		return -EINVAL; +  	/* listsize */  	err = get_uint(mesg, &fsloc->locations_count);  	if (err) @@ -437,13 +442,18 @@ out_free_all:  static int secinfo_parse(char **mesg, char *buf, struct svc_export *exp)  { -	int listsize, err;  	struct exp_flavor_info *f; +	u32 listsize; +	int err; -	err = get_int(mesg, &listsize); +	/* more than one secinfo */ +	if (exp->ex_nflavors) +		return -EINVAL; + +	err = get_uint(mesg, &listsize);  	if (err)  		return err; -	if (listsize < 0 || listsize > MAX_SECINFO_LIST) +	if (listsize > MAX_SECINFO_LIST)  		return -EINVAL;  	for (f = exp->ex_flavors; f < exp->ex_flavors + listsize; f++) { @@ -474,6 +484,27 @@ static inline int  secinfo_parse(char **mesg, char *buf, struct svc_export *exp) { return 0; }  #endif +static inline int +uuid_parse(char **mesg, char *buf, unsigned char **puuid) +{ +	int len; + +	/* more than one uuid */ +	if (*puuid) +		return -EINVAL; + +	/* expect a 16 byte uuid encoded as \xXXXX... */ +	len = qword_get(mesg, buf, PAGE_SIZE); +	if (len != EX_UUID_LEN) +		return -EINVAL; + +	*puuid = kmemdup(buf, EX_UUID_LEN, GFP_KERNEL); +	if (*puuid == NULL) +		return -ENOMEM; + +	return 0; +} +  static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)  {  	/* client path expiry [flags anonuid anongid fsid] */ @@ -536,16 +567,12 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)  		if (err)  			goto out3;  		exp.ex_anon_uid= make_kuid(&init_user_ns, an_int); -		if (!uid_valid(exp.ex_anon_uid)) -			goto out3;  		/* anon gid */  		err = get_int(&mesg, &an_int);  		if (err)  			goto out3;  		exp.ex_anon_gid= make_kgid(&init_user_ns, an_int); -		if (!gid_valid(exp.ex_anon_gid)) -			goto out3;  		/* fsid */  		err = get_int(&mesg, &an_int); @@ -556,18 +583,9 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)  		while ((len = qword_get(&mesg, buf, PAGE_SIZE)) > 0) {  			if (strcmp(buf, "fsloc") == 0)  				err = fsloc_parse(&mesg, buf, &exp.ex_fslocs); -			else if (strcmp(buf, "uuid") == 0) { -				/* expect a 16 byte uuid encoded as \xXXXX... */ -				len = qword_get(&mesg, buf, PAGE_SIZE); -				if (len != 16) -					err  = -EINVAL; -				else { -					exp.ex_uuid = -						kmemdup(buf, 16, GFP_KERNEL); -					if (exp.ex_uuid == NULL) -						err = -ENOMEM; -				} -			} else if (strcmp(buf, "secinfo") == 0) +			else if (strcmp(buf, "uuid") == 0) +				err = uuid_parse(&mesg, buf, &exp.ex_uuid); +			else if (strcmp(buf, "secinfo") == 0)  				err = secinfo_parse(&mesg, buf, &exp);  			else  				/* quietly ignore unknown words and anything @@ -583,6 +601,26 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)  				   exp.ex_uuid);  		if (err)  			goto out4; +		/* +		 * No point caching this if it would immediately expire. +		 * Also, this protects exportfs's dummy export from the +		 * anon_uid/anon_gid checks: +		 */ +		if (exp.h.expiry_time < seconds_since_boot()) +			goto out4; +		/* +		 * For some reason exportfs has been passing down an +		 * invalid (-1) uid & gid on the "dummy" export which it +		 * uses to test export support.  To make sure exportfs +		 * sees errors from check_export we therefore need to +		 * delay these checks till after check_export: +		 */ +		err = -EINVAL; +		if (!uid_valid(exp.ex_anon_uid)) +			goto out4; +		if (!gid_valid(exp.ex_anon_gid)) +			goto out4; +		err = 0;  	}  	expp = svc_export_lookup(&exp); @@ -633,7 +671,7 @@ static int svc_export_show(struct seq_file *m,  		if (exp->ex_uuid) {  			int i;  			seq_puts(m, ",uuid="); -			for (i=0; i<16; i++) { +			for (i = 0; i < EX_UUID_LEN; i++) {  				if ((i&3) == 0 && i)  					seq_putc(m, ':');  				seq_printf(m, "%02x", exp->ex_uuid[i]); @@ -755,7 +793,7 @@ svc_export_update(struct svc_export *new, struct svc_export *old)  static struct svc_expkey * -exp_find_key(struct cache_detail *cd, svc_client *clp, int fsid_type, +exp_find_key(struct cache_detail *cd, struct auth_domain *clp, int fsid_type,  	     u32 *fsidv, struct cache_req *reqp)  {  	struct svc_expkey key, *ek; @@ -777,9 +815,9 @@ exp_find_key(struct cache_detail *cd, svc_client *clp, int fsid_type,  	return ek;  } - -static svc_export *exp_get_by_name(struct cache_detail *cd, svc_client *clp, -				   const struct path *path, struct cache_req *reqp) +static struct svc_export * +exp_get_by_name(struct cache_detail *cd, struct auth_domain *clp, +		const struct path *path, struct cache_req *reqp)  {  	struct svc_export *exp, key;  	int err; @@ -803,11 +841,11 @@ static svc_export *exp_get_by_name(struct cache_detail *cd, svc_client *clp,  /*   * Find the export entry for a given dentry.   */ -static struct svc_export *exp_parent(struct cache_detail *cd, svc_client *clp, -				     struct path *path) +static struct svc_export * +exp_parent(struct cache_detail *cd, struct auth_domain *clp, struct path *path)  {  	struct dentry *saved = dget(path->dentry); -	svc_export *exp = exp_get_by_name(cd, clp, path, NULL); +	struct svc_export *exp = exp_get_by_name(cd, clp, path, NULL);  	while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(path->dentry)) {  		struct dentry *parent = dget_parent(path->dentry); @@ -828,7 +866,7 @@ static struct svc_export *exp_parent(struct cache_detail *cd, svc_client *clp,   * since its harder to fool a kernel module than a user space program.   */  int -exp_rootfh(struct net *net, svc_client *clp, char *name, +exp_rootfh(struct net *net, struct auth_domain *clp, char *name,  	   struct knfsd_fh *f, int maxsize)  {  	struct svc_export	*exp; diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h new file mode 100644 index 00000000000..cfeea85c5be --- /dev/null +++ b/fs/nfsd/export.h @@ -0,0 +1,110 @@ +/* + * Copyright (C) 1995-1997 Olaf Kirch <okir@monad.swb.de> + */ +#ifndef NFSD_EXPORT_H +#define NFSD_EXPORT_H + +#include <linux/sunrpc/cache.h> +#include <uapi/linux/nfsd/export.h> + +struct knfsd_fh; +struct svc_fh; +struct svc_rqst; + +/* + * FS Locations + */ + +#define MAX_FS_LOCATIONS	128 + +struct nfsd4_fs_location { +	char *hosts; /* colon separated list of hosts */ +	char *path;  /* slash separated list of path components */ +}; + +struct nfsd4_fs_locations { +	uint32_t locations_count; +	struct nfsd4_fs_location *locations; +/* If we're not actually serving this data ourselves (only providing a + * list of replicas that do serve it) then we set "migrated": */ +	int migrated; +}; + +/* + * We keep an array of pseudoflavors with the export, in order from most + * to least preferred.  For the foreseeable future, we don't expect more + * than the eight pseudoflavors null, unix, krb5, krb5i, krb5p, skpm3, + * spkm3i, and spkm3p (and using all 8 at once should be rare). + */ +#define MAX_SECINFO_LIST	8 +#define EX_UUID_LEN		16 + +struct exp_flavor_info { +	u32	pseudoflavor; +	u32	flags; +}; + +struct svc_export { +	struct cache_head	h; +	struct auth_domain *	ex_client; +	int			ex_flags; +	struct path		ex_path; +	kuid_t			ex_anon_uid; +	kgid_t			ex_anon_gid; +	int			ex_fsid; +	unsigned char *		ex_uuid; /* 16 byte fsid */ +	struct nfsd4_fs_locations ex_fslocs; +	uint32_t		ex_nflavors; +	struct exp_flavor_info	ex_flavors[MAX_SECINFO_LIST]; +	struct cache_detail	*cd; +}; + +/* an "export key" (expkey) maps a filehandlefragement to an + * svc_export for a given client.  There can be several per export, + * for the different fsid types. + */ +struct svc_expkey { +	struct cache_head	h; + +	struct auth_domain *	ek_client; +	int			ek_fsidtype; +	u32			ek_fsid[6]; + +	struct path		ek_path; +}; + +#define EX_ISSYNC(exp)		(!((exp)->ex_flags & NFSEXP_ASYNC)) +#define EX_NOHIDE(exp)		((exp)->ex_flags & NFSEXP_NOHIDE) +#define EX_WGATHER(exp)		((exp)->ex_flags & NFSEXP_GATHERED_WRITES) + +int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp); +__be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp); + +/* + * Function declarations + */ +int			nfsd_export_init(struct net *); +void			nfsd_export_shutdown(struct net *); +void			nfsd_export_flush(struct net *); +struct svc_export *	rqst_exp_get_by_name(struct svc_rqst *, +					     struct path *); +struct svc_export *	rqst_exp_parent(struct svc_rqst *, +					struct path *); +struct svc_export *	rqst_find_fsidzero_export(struct svc_rqst *); +int			exp_rootfh(struct net *, struct auth_domain *, +					char *path, struct knfsd_fh *, int maxsize); +__be32			exp_pseudoroot(struct svc_rqst *, struct svc_fh *); +__be32			nfserrno(int errno); + +static inline void exp_put(struct svc_export *exp) +{ +	cache_put(&exp->h, exp->cd); +} + +static inline void exp_get(struct svc_export *exp) +{ +	cache_get(&exp->h); +} +struct svc_export * rqst_exp_find(struct svc_rqst *, int, u32 *); + +#endif /* NFSD_EXPORT_H */ diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c index d620e7f8142..2ed05c3cd43 100644 --- a/fs/nfsd/fault_inject.c +++ b/fs/nfsd/fault_inject.c @@ -97,25 +97,14 @@ static ssize_t fault_inject_read(struct file *file, char __user *buf,  {  	static u64 val;  	char read_buf[25]; -	size_t size, ret; +	size_t size;  	loff_t pos = *ppos;  	if (!pos)  		nfsd_inject_get(file_inode(file)->i_private, &val);  	size = scnprintf(read_buf, sizeof(read_buf), "%llu\n", val); -	if (pos < 0) -		return -EINVAL; -	if (pos >= size || !len) -		return 0; -	if (len > size - pos) -		len = size - pos; -	ret = copy_to_user(buf, read_buf + pos, len); -	if (ret == len) -		return -EFAULT; -	len -= ret; -	*ppos = pos + len; -	return len; +	return simple_read_from_buffer(buf, len, ppos, read_buf, size);  }  static ssize_t fault_inject_write(struct file *file, const char __user *buf, diff --git a/fs/nfsd/idmap.h b/fs/nfsd/idmap.h index bf95f6b817a..a3f34900091 100644 --- a/fs/nfsd/idmap.h +++ b/fs/nfsd/idmap.h @@ -56,7 +56,7 @@ static inline void nfsd_idmap_shutdown(struct net *net)  __be32 nfsd_map_name_to_uid(struct svc_rqst *, const char *, size_t, kuid_t *);  __be32 nfsd_map_name_to_gid(struct svc_rqst *, const char *, size_t, kgid_t *); -int nfsd_map_uid_to_name(struct svc_rqst *, kuid_t, char *); -int nfsd_map_gid_to_name(struct svc_rqst *, kgid_t, char *); +__be32 nfsd4_encode_user(struct xdr_stream *, struct svc_rqst *, kuid_t); +__be32 nfsd4_encode_group(struct xdr_stream *, struct svc_rqst *, kgid_t);  #endif /* LINUX_NFSD_IDMAP_H */ diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index 849a7c3ced2..d32b3aa6600 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -95,6 +95,7 @@ struct nfsd_net {  	time_t nfsd4_grace;  	bool nfsd_net_up; +	bool lockd_up;  	/*  	 * Time of server startup diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c index 95d76dc6c5d..12b023a7ab7 100644 --- a/fs/nfsd/nfs2acl.c +++ b/fs/nfsd/nfs2acl.c @@ -30,8 +30,9 @@ nfsacld_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)  static __be32 nfsacld_proc_getacl(struct svc_rqst * rqstp,  		struct nfsd3_getaclargs *argp, struct nfsd3_getaclres *resp)  { -	svc_fh *fh;  	struct posix_acl *acl; +	struct inode *inode; +	svc_fh *fh;  	__be32 nfserr = 0;  	dprintk("nfsd: GETACL(2acl)   %s\n", SVCFH_fmt(&argp->fh)); @@ -41,6 +42,8 @@ static __be32 nfsacld_proc_getacl(struct svc_rqst * rqstp,  	if (nfserr)  		RETURN_STATUS(nfserr); +	inode = fh->fh_dentry->d_inode; +  	if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT))  		RETURN_STATUS(nfserr_inval);  	resp->mask = argp->mask; @@ -50,21 +53,13 @@ static __be32 nfsacld_proc_getacl(struct svc_rqst * rqstp,  		goto fail;  	if (resp->mask & (NFS_ACL|NFS_ACLCNT)) { -		acl = nfsd_get_posix_acl(fh, ACL_TYPE_ACCESS); +		acl = get_acl(inode, ACL_TYPE_ACCESS);  		if (IS_ERR(acl)) { -			int err = PTR_ERR(acl); - -			if (err == -ENODATA || err == -EOPNOTSUPP) -				acl = NULL; -			else { -				nfserr = nfserrno(err); -				goto fail; -			} +			nfserr = nfserrno(PTR_ERR(acl)); +			goto fail;  		}  		if (acl == NULL) {  			/* Solaris returns the inode's minimum ACL. */ - -			struct inode *inode = fh->fh_dentry->d_inode;  			acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL);  		}  		resp->acl_access = acl; @@ -72,17 +67,10 @@ static __be32 nfsacld_proc_getacl(struct svc_rqst * rqstp,  	if (resp->mask & (NFS_DFACL|NFS_DFACLCNT)) {  		/* Check how Solaris handles requests for the Default ACL  		   of a non-directory! */ - -		acl = nfsd_get_posix_acl(fh, ACL_TYPE_DEFAULT); +		acl = get_acl(inode, ACL_TYPE_DEFAULT);  		if (IS_ERR(acl)) { -			int err = PTR_ERR(acl); - -			if (err == -ENODATA || err == -EOPNOTSUPP) -				acl = NULL; -			else { -				nfserr = nfserrno(err); -				goto fail; -			} +			nfserr = nfserrno(PTR_ERR(acl)); +			goto fail;  		}  		resp->acl_default = acl;  	} @@ -103,31 +91,51 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst * rqstp,  		struct nfsd3_setaclargs *argp,  		struct nfsd_attrstat *resp)  { +	struct inode *inode;  	svc_fh *fh;  	__be32 nfserr = 0; +	int error;  	dprintk("nfsd: SETACL(2acl)   %s\n", SVCFH_fmt(&argp->fh));  	fh = fh_copy(&resp->fh, &argp->fh);  	nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_SATTR); +	if (nfserr) +		goto out; -	if (!nfserr) { -		nfserr = nfserrno( nfsd_set_posix_acl( -			fh, ACL_TYPE_ACCESS, argp->acl_access) ); -	} -	if (!nfserr) { -		nfserr = nfserrno( nfsd_set_posix_acl( -			fh, ACL_TYPE_DEFAULT, argp->acl_default) ); -	} -	if (!nfserr) { -		nfserr = fh_getattr(fh, &resp->stat); +	inode = fh->fh_dentry->d_inode; +	if (!IS_POSIXACL(inode) || !inode->i_op->set_acl) { +		error = -EOPNOTSUPP; +		goto out_errno;  	} +	error = fh_want_write(fh); +	if (error) +		goto out_errno; + +	error = inode->i_op->set_acl(inode, argp->acl_access, ACL_TYPE_ACCESS); +	if (error) +		goto out_drop_write; +	error = inode->i_op->set_acl(inode, argp->acl_default, +				     ACL_TYPE_DEFAULT); +	if (error) +		goto out_drop_write; + +	fh_drop_write(fh); + +	nfserr = fh_getattr(fh, &resp->stat); + +out:  	/* argp->acl_{access,default} may have been allocated in  	   nfssvc_decode_setaclargs. */  	posix_acl_release(argp->acl_access);  	posix_acl_release(argp->acl_default);  	return nfserr; +out_drop_write: +	fh_drop_write(fh); +out_errno: +	nfserr = nfserrno(error); +	goto out;  }  /* @@ -174,7 +182,8 @@ static __be32 nfsacld_proc_access(struct svc_rqst *rqstp, struct nfsd3_accessarg  static int nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p,  		struct nfsd3_getaclargs *argp)  { -	if (!(p = nfs2svc_decode_fh(p, &argp->fh))) +	p = nfs2svc_decode_fh(p, &argp->fh); +	if (!p)  		return 0;  	argp->mask = ntohl(*p); p++; @@ -189,7 +198,8 @@ static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p,  	unsigned int base;  	int n; -	if (!(p = nfs2svc_decode_fh(p, &argp->fh))) +	p = nfs2svc_decode_fh(p, &argp->fh); +	if (!p)  		return 0;  	argp->mask = ntohl(*p++);  	if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT) || @@ -210,7 +220,8 @@ static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p,  static int nfsaclsvc_decode_fhandleargs(struct svc_rqst *rqstp, __be32 *p,  		struct nfsd_fhandle *argp)  { -	if (!(p = nfs2svc_decode_fh(p, &argp->fh))) +	p = nfs2svc_decode_fh(p, &argp->fh); +	if (!p)  		return 0;  	return xdr_argsize_check(rqstp, p);  } @@ -218,7 +229,8 @@ static int nfsaclsvc_decode_fhandleargs(struct svc_rqst *rqstp, __be32 *p,  static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p,  		struct nfsd3_accessargs *argp)  { -	if (!(p = nfs2svc_decode_fh(p, &argp->fh))) +	p = nfs2svc_decode_fh(p, &argp->fh); +	if (!p)  		return 0;  	argp->access = ntohl(*p++); diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c index 9cbc1a841f8..2a514e21dc7 100644 --- a/fs/nfsd/nfs3acl.c +++ b/fs/nfsd/nfs3acl.c @@ -29,8 +29,9 @@ nfsd3_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)  static __be32 nfsd3_proc_getacl(struct svc_rqst * rqstp,  		struct nfsd3_getaclargs *argp, struct nfsd3_getaclres *resp)  { -	svc_fh *fh;  	struct posix_acl *acl; +	struct inode *inode; +	svc_fh *fh;  	__be32 nfserr = 0;  	fh = fh_copy(&resp->fh, &argp->fh); @@ -38,26 +39,20 @@ static __be32 nfsd3_proc_getacl(struct svc_rqst * rqstp,  	if (nfserr)  		RETURN_STATUS(nfserr); +	inode = fh->fh_dentry->d_inode; +  	if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT))  		RETURN_STATUS(nfserr_inval);  	resp->mask = argp->mask;  	if (resp->mask & (NFS_ACL|NFS_ACLCNT)) { -		acl = nfsd_get_posix_acl(fh, ACL_TYPE_ACCESS); +		acl = get_acl(inode, ACL_TYPE_ACCESS);  		if (IS_ERR(acl)) { -			int err = PTR_ERR(acl); - -			if (err == -ENODATA || err == -EOPNOTSUPP) -				acl = NULL; -			else { -				nfserr = nfserrno(err); -				goto fail; -			} +			nfserr = nfserrno(PTR_ERR(acl)); +			goto fail;  		}  		if (acl == NULL) {  			/* Solaris returns the inode's minimum ACL. */ - -			struct inode *inode = fh->fh_dentry->d_inode;  			acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL);  		}  		resp->acl_access = acl; @@ -65,17 +60,10 @@ static __be32 nfsd3_proc_getacl(struct svc_rqst * rqstp,  	if (resp->mask & (NFS_DFACL|NFS_DFACLCNT)) {  		/* Check how Solaris handles requests for the Default ACL  		   of a non-directory! */ - -		acl = nfsd_get_posix_acl(fh, ACL_TYPE_DEFAULT); +		acl = get_acl(inode, ACL_TYPE_DEFAULT);  		if (IS_ERR(acl)) { -			int err = PTR_ERR(acl); - -			if (err == -ENODATA || err == -EOPNOTSUPP) -				acl = NULL; -			else { -				nfserr = nfserrno(err); -				goto fail; -			} +			nfserr = nfserrno(PTR_ERR(acl)); +			goto fail;  		}  		resp->acl_default = acl;  	} @@ -96,21 +84,37 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst * rqstp,  		struct nfsd3_setaclargs *argp,  		struct nfsd3_attrstat *resp)  { +	struct inode *inode;  	svc_fh *fh;  	__be32 nfserr = 0; +	int error;  	fh = fh_copy(&resp->fh, &argp->fh);  	nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_SATTR); +	if (nfserr) +		goto out; -	if (!nfserr) { -		nfserr = nfserrno( nfsd_set_posix_acl( -			fh, ACL_TYPE_ACCESS, argp->acl_access) ); -	} -	if (!nfserr) { -		nfserr = nfserrno( nfsd_set_posix_acl( -			fh, ACL_TYPE_DEFAULT, argp->acl_default) ); +	inode = fh->fh_dentry->d_inode; +	if (!IS_POSIXACL(inode) || !inode->i_op->set_acl) { +		error = -EOPNOTSUPP; +		goto out_errno;  	} +	error = fh_want_write(fh); +	if (error) +		goto out_errno; + +	error = inode->i_op->set_acl(inode, argp->acl_access, ACL_TYPE_ACCESS); +	if (error) +		goto out_drop_write; +	error = inode->i_op->set_acl(inode, argp->acl_default, +				     ACL_TYPE_DEFAULT); + +out_drop_write: +	fh_drop_write(fh); +out_errno: +	nfserr = nfserrno(error); +out:  	/* argp->acl_{access,default} may have been allocated in  	   nfs3svc_decode_setaclargs. */  	posix_acl_release(argp->acl_access); @@ -124,7 +128,8 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst * rqstp,  static int nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p,  		struct nfsd3_getaclargs *args)  { -	if (!(p = nfs3svc_decode_fh(p, &args->fh))) +	p = nfs3svc_decode_fh(p, &args->fh); +	if (!p)  		return 0;  	args->mask = ntohl(*p); p++; @@ -139,7 +144,8 @@ static int nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p,  	unsigned int base;  	int n; -	if (!(p = nfs3svc_decode_fh(p, &args->fh))) +	p = nfs3svc_decode_fh(p, &args->fh); +	if (!p)  		return 0;  	args->mask = ntohl(*p++);  	if (args->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT) || diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 14d9ecb96cf..e6c01e80325 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -168,7 +168,7 @@ encode_fattr3(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp,  	      struct kstat *stat)  {  	*p++ = htonl(nfs3_ftypes[(stat->mode & S_IFMT) >> 12]); -	*p++ = htonl((u32) stat->mode); +	*p++ = htonl((u32) (stat->mode & S_IALLUGO));  	*p++ = htonl((u32) stat->nlink);  	*p++ = htonl((u32) from_kuid(&init_user_ns, stat->uid));  	*p++ = htonl((u32) from_kgid(&init_user_ns, stat->gid)); @@ -278,7 +278,8 @@ void fill_post_wcc(struct svc_fh *fhp)  int  nfs3svc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p, struct nfsd_fhandle *args)  { -	if (!(p = decode_fh(p, &args->fh))) +	p = decode_fh(p, &args->fh); +	if (!p)  		return 0;  	return xdr_argsize_check(rqstp, p);  } @@ -287,7 +288,8 @@ int  nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p,  					struct nfsd3_sattrargs *args)  { -	if (!(p = decode_fh(p, &args->fh))) +	p = decode_fh(p, &args->fh); +	if (!p)  		return 0;  	p = decode_sattr3(p, &args->attrs); @@ -315,7 +317,8 @@ int  nfs3svc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p,  					struct nfsd3_accessargs *args)  { -	if (!(p = decode_fh(p, &args->fh))) +	p = decode_fh(p, &args->fh); +	if (!p)  		return 0;  	args->access = ntohl(*p++); @@ -330,7 +333,8 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,  	int v;  	u32 max_blocksize = svc_max_payload(rqstp); -	if (!(p = decode_fh(p, &args->fh))) +	p = decode_fh(p, &args->fh); +	if (!p)  		return 0;  	p = xdr_decode_hyper(p, &args->offset); @@ -360,7 +364,8 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,  	unsigned int len, v, hdr, dlen;  	u32 max_blocksize = svc_max_payload(rqstp); -	if (!(p = decode_fh(p, &args->fh))) +	p = decode_fh(p, &args->fh); +	if (!p)  		return 0;  	p = xdr_decode_hyper(p, &args->offset); @@ -535,7 +540,8 @@ int  nfs3svc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p,  					struct nfsd3_readlinkargs *args)  { -	if (!(p = decode_fh(p, &args->fh))) +	p = decode_fh(p, &args->fh); +	if (!p)  		return 0;  	args->buffer = page_address(*(rqstp->rq_next_page++)); @@ -558,7 +564,8 @@ int  nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p,  					struct nfsd3_readdirargs *args)  { -	if (!(p = decode_fh(p, &args->fh))) +	p = decode_fh(p, &args->fh); +	if (!p)  		return 0;  	p = xdr_decode_hyper(p, &args->cookie);  	args->verf   = p; p += 2; @@ -580,7 +587,8 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p,  	int len;  	u32 max_blocksize = svc_max_payload(rqstp); -	if (!(p = decode_fh(p, &args->fh))) +	p = decode_fh(p, &args->fh); +	if (!p)  		return 0;  	p = xdr_decode_hyper(p, &args->cookie);  	args->verf     = p; p += 2; @@ -605,7 +613,8 @@ int  nfs3svc_decode_commitargs(struct svc_rqst *rqstp, __be32 *p,  					struct nfsd3_commitargs *args)  { -	if (!(p = decode_fh(p, &args->fh))) +	p = decode_fh(p, &args->fh); +	if (!p)  		return 0;  	p = xdr_decode_hyper(p, &args->offset);  	args->count = ntohl(*p++); @@ -842,21 +851,21 @@ out:  static __be32 *encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, int namlen)  { -	struct svc_fh	fh; +	struct svc_fh	*fh = &cd->scratch;  	__be32 err; -	fh_init(&fh, NFS3_FHSIZE); -	err = compose_entry_fh(cd, &fh, name, namlen); +	fh_init(fh, NFS3_FHSIZE); +	err = compose_entry_fh(cd, fh, name, namlen);  	if (err) {  		*p++ = 0;  		*p++ = 0;  		goto out;  	} -	p = encode_post_op_attr(cd->rqstp, p, &fh); +	p = encode_post_op_attr(cd->rqstp, p, fh);  	*p++ = xdr_one;			/* yes, a file handle follows */ -	p = encode_fh(p, &fh); +	p = encode_fh(p, fh);  out: -	fh_put(&fh); +	fh_put(fh);  	return p;  } diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c index 8a50b3c1809..d714156a19f 100644 --- a/fs/nfsd/nfs4acl.c +++ b/fs/nfsd/nfs4acl.c @@ -36,9 +36,14 @@  #include <linux/slab.h>  #include <linux/nfs_fs.h> -#include <linux/export.h> +#include "nfsfh.h" +#include "nfsd.h"  #include "acl.h" +#include "vfs.h" +#define NFS4_ACL_TYPE_DEFAULT	0x01 +#define NFS4_ACL_DIR		0x02 +#define NFS4_ACL_OWNER		0x04  /* mode bit translations: */  #define NFS4_READ_MODE (NFS4_ACE_READ_DATA) @@ -130,36 +135,47 @@ static short ace2type(struct nfs4_ace *);  static void _posix_to_nfsv4_one(struct posix_acl *, struct nfs4_acl *,  				unsigned int); -struct nfs4_acl * -nfs4_acl_posix_to_nfsv4(struct posix_acl *pacl, struct posix_acl *dpacl, -			unsigned int flags) +int +nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, +		struct nfs4_acl **acl)  { -	struct nfs4_acl *acl; +	struct inode *inode = dentry->d_inode; +	int error = 0; +	struct posix_acl *pacl = NULL, *dpacl = NULL; +	unsigned int flags = 0;  	int size = 0; -	if (pacl) { -		if (posix_acl_valid(pacl) < 0) -			return ERR_PTR(-EINVAL); -		size += 2*pacl->a_count; +	pacl = get_acl(inode, ACL_TYPE_ACCESS); +	if (!pacl) { +		pacl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); +		if (IS_ERR(pacl)) +			return PTR_ERR(pacl);  	} -	if (dpacl) { -		if (posix_acl_valid(dpacl) < 0) -			return ERR_PTR(-EINVAL); -		size += 2*dpacl->a_count; +	/* allocate for worst case: one (deny, allow) pair each: */ +	size += 2 * pacl->a_count; + +	if (S_ISDIR(inode->i_mode)) { +		flags = NFS4_ACL_DIR; +		dpacl = get_acl(inode, ACL_TYPE_DEFAULT); +		if (dpacl) +			size += 2 * dpacl->a_count;  	} -	/* Allocate for worst case: one (deny, allow) pair each: */ -	acl = nfs4_acl_new(size); -	if (acl == NULL) -		return ERR_PTR(-ENOMEM); +	*acl = nfs4_acl_new(size); +	if (*acl == NULL) { +		error = -ENOMEM; +		goto out; +	} -	if (pacl) -		_posix_to_nfsv4_one(pacl, acl, flags & ~NFS4_ACL_TYPE_DEFAULT); +	_posix_to_nfsv4_one(pacl, *acl, flags & ~NFS4_ACL_TYPE_DEFAULT);  	if (dpacl) -		_posix_to_nfsv4_one(dpacl, acl, flags | NFS4_ACL_TYPE_DEFAULT); +		_posix_to_nfsv4_one(dpacl, *acl, flags | NFS4_ACL_TYPE_DEFAULT); -	return acl; + out: +	posix_acl_release(pacl); +	posix_acl_release(dpacl); +	return error;  }  struct posix_acl_summary { @@ -385,8 +401,10 @@ sort_pacl(struct posix_acl *pacl)  	 * by uid/gid. */  	int i, j; -	if (pacl->a_count <= 4) -		return; /* no users or groups */ +	/* no users or groups */ +	if (!pacl || pacl->a_count <= 4) +		return; +  	i = 1;  	while (pacl->a_entries[i].e_tag == ACL_USER)  		i++; @@ -513,19 +531,21 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags)  	/*  	 * ACLs with no ACEs are treated differently in the inheritable -	 * and effective cases: when there are no inheritable ACEs, we -	 * set a zero-length default posix acl: +	 * and effective cases: when there are no inheritable ACEs, +	 * calls ->set_acl with a NULL ACL structure.  	 */ -	if (state->empty && (flags & NFS4_ACL_TYPE_DEFAULT)) { -		pacl = posix_acl_alloc(0, GFP_KERNEL); -		return pacl ? pacl : ERR_PTR(-ENOMEM); -	} +	if (state->empty && (flags & NFS4_ACL_TYPE_DEFAULT)) +		return NULL; +  	/*  	 * When there are no effective ACEs, the following will end  	 * up setting a 3-element effective posix ACL with all  	 * permissions zero.  	 */ -	nace = 4 + state->users->n + state->groups->n; +	if (!state->users->n && !state->groups->n) +		nace = 3; +	else /* Note we also include a MASK ACE in this case: */ +		nace = 4 + state->users->n + state->groups->n;  	pacl = posix_acl_alloc(nace, GFP_KERNEL);  	if (!pacl)  		return ERR_PTR(-ENOMEM); @@ -569,9 +589,11 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags)  		add_to_mask(state, &state->groups->aces[i].perms);  	} -	pace++; -	pace->e_tag = ACL_MASK; -	low_mode_from_nfs4(state->mask.allow, &pace->e_perm, flags); +	if (state->users->n || state->groups->n) { +		pace++; +		pace->e_tag = ACL_MASK; +		low_mode_from_nfs4(state->mask.allow, &pace->e_perm, flags); +	}  	pace++;  	pace->e_tag = ACL_OTHER; @@ -719,8 +741,9 @@ static void process_one_v4_ace(struct posix_acl_state *state,  	}  } -int nfs4_acl_nfsv4_to_posix(struct nfs4_acl *acl, struct posix_acl **pacl, -			    struct posix_acl **dpacl, unsigned int flags) +static int nfs4_acl_nfsv4_to_posix(struct nfs4_acl *acl, +		struct posix_acl **pacl, struct posix_acl **dpacl, +		unsigned int flags)  {  	struct posix_acl_state effective_acl_state, default_acl_state;  	struct nfs4_ace *ace; @@ -780,6 +803,57 @@ out_estate:  	return ret;  } +__be32 +nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp, +		struct nfs4_acl *acl) +{ +	__be32 error; +	int host_error; +	struct dentry *dentry; +	struct inode *inode; +	struct posix_acl *pacl = NULL, *dpacl = NULL; +	unsigned int flags = 0; + +	/* Get inode */ +	error = fh_verify(rqstp, fhp, 0, NFSD_MAY_SATTR); +	if (error) +		return error; + +	dentry = fhp->fh_dentry; +	inode = dentry->d_inode; + +	if (!inode->i_op->set_acl || !IS_POSIXACL(inode)) +		return nfserr_attrnotsupp; + +	if (S_ISDIR(inode->i_mode)) +		flags = NFS4_ACL_DIR; + +	host_error = nfs4_acl_nfsv4_to_posix(acl, &pacl, &dpacl, flags); +	if (host_error == -EINVAL) +		return nfserr_attrnotsupp; +	if (host_error < 0) +		goto out_nfserr; + +	host_error = inode->i_op->set_acl(inode, pacl, ACL_TYPE_ACCESS); +	if (host_error < 0) +		goto out_release; + +	if (S_ISDIR(inode->i_mode)) { +		host_error = inode->i_op->set_acl(inode, dpacl, +						  ACL_TYPE_DEFAULT); +	} + +out_release: +	posix_acl_release(pacl); +	posix_acl_release(dpacl); +out_nfserr: +	if (host_error == -EOPNOTSUPP) +		return nfserr_attrnotsupp; +	else +		return nfserrno(host_error); +} + +  static short  ace2type(struct nfs4_ace *ace)  { @@ -798,9 +872,6 @@ ace2type(struct nfs4_ace *ace)  	return -1;  } -EXPORT_SYMBOL(nfs4_acl_posix_to_nfsv4); -EXPORT_SYMBOL(nfs4_acl_nfsv4_to_posix); -  struct nfs4_acl *  nfs4_acl_new(int n)  { @@ -848,21 +919,21 @@ nfs4_acl_get_whotype(char *p, u32 len)  	return NFS4_ACL_WHO_NAMED;  } -int -nfs4_acl_write_who(int who, char *p) +__be32 nfs4_acl_write_who(struct xdr_stream *xdr, int who)  { +	__be32 *p;  	int i;  	for (i = 0; i < ARRAY_SIZE(s2t_map); i++) { -		if (s2t_map[i].type == who) { -			memcpy(p, s2t_map[i].string, s2t_map[i].stringlen); -			return s2t_map[i].stringlen; -		} +		if (s2t_map[i].type != who) +			continue; +		p = xdr_reserve_space(xdr, s2t_map[i].stringlen + 4); +		if (!p) +			return nfserr_resource; +		p = xdr_encode_opaque(p, s2t_map[i].string, +					s2t_map[i].stringlen); +		return 0;  	} -	BUG(); +	WARN_ON_ONCE(1);  	return -1;  } - -EXPORT_SYMBOL(nfs4_acl_new); -EXPORT_SYMBOL(nfs4_acl_get_whotype); -EXPORT_SYMBOL(nfs4_acl_write_who); diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 7f05cd140de..2c73cae9899 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -32,6 +32,7 @@   */  #include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/xprt.h>  #include <linux/sunrpc/svc_xprt.h>  #include <linux/slab.h>  #include "nfsd.h" @@ -635,11 +636,29 @@ static struct rpc_cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc  	}  } +static struct rpc_clnt *create_backchannel_client(struct rpc_create_args *args) +{ +	struct rpc_xprt *xprt; + +	if (args->protocol != XPRT_TRANSPORT_BC_TCP) +		return rpc_create(args); + +	xprt = args->bc_xprt->xpt_bc_xprt; +	if (xprt) { +		xprt_get(xprt); +		return rpc_create_xprt(args, xprt); +	} + +	return rpc_create(args); +} +  static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn, struct nfsd4_session *ses)  { +	int maxtime = max_cb_time(clp->net);  	struct rpc_timeout	timeparms = { -		.to_initval	= max_cb_time(clp->net), +		.to_initval	= maxtime,  		.to_retries	= 0, +		.to_maxval	= maxtime,  	};  	struct rpc_create_args args = {  		.net		= clp->net, @@ -674,7 +693,7 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c  		args.authflavor = ses->se_cb_sec.flavor;  	}  	/* Create RPC client */ -	client = rpc_create(&args); +	client = create_backchannel_client(&args);  	if (IS_ERR(client)) {  		dprintk("NFSD: couldn't create callback client: %ld\n",  			PTR_ERR(client)); diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index 4832fd819f8..a0ab0a847d6 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -551,27 +551,45 @@ idmap_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen  	return 0;  } -static int -idmap_id_to_name(struct svc_rqst *rqstp, int type, u32 id, char *name) +static __be32 encode_ascii_id(struct xdr_stream *xdr, u32 id) +{ +	char buf[11]; +	int len; +	__be32 *p; + +	len = sprintf(buf, "%u", id); +	p = xdr_reserve_space(xdr, len + 4); +	if (!p) +		return nfserr_resource; +	p = xdr_encode_opaque(p, buf, len); +	return 0; +} + +static __be32 idmap_id_to_name(struct xdr_stream *xdr, +			       struct svc_rqst *rqstp, int type, u32 id)  {  	struct ent *item, key = {  		.id = id,  		.type = type,  	}; +	__be32 *p;  	int ret;  	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);  	strlcpy(key.authname, rqst_authname(rqstp), sizeof(key.authname));  	ret = idmap_lookup(rqstp, idtoname_lookup, &key, nn->idtoname_cache, &item);  	if (ret == -ENOENT) -		return sprintf(name, "%u", id); +		return encode_ascii_id(xdr, id);  	if (ret) -		return ret; +		return nfserrno(ret);  	ret = strlen(item->name); -	BUG_ON(ret > IDMAP_NAMESZ); -	memcpy(name, item->name, ret); +	WARN_ON_ONCE(ret > IDMAP_NAMESZ); +	p = xdr_reserve_space(xdr, ret + 4); +	if (!p) +		return nfserr_resource; +	p = xdr_encode_opaque(p, item->name, ret);  	cache_put(&item->h, nn->idtoname_cache); -	return ret; +	return 0;  }  static bool @@ -603,12 +621,12 @@ do_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, u  	return idmap_name_to_id(rqstp, type, name, namelen, id);  } -static int -do_id_to_name(struct svc_rqst *rqstp, int type, u32 id, char *name) +static __be32 encode_name_from_id(struct xdr_stream *xdr, +				  struct svc_rqst *rqstp, int type, u32 id)  {  	if (nfs4_disable_idmapping && rqstp->rq_cred.cr_flavor < RPC_AUTH_GSS) -		return sprintf(name, "%u", id); -	return idmap_id_to_name(rqstp, type, id, name); +		return encode_ascii_id(xdr, id); +	return idmap_id_to_name(xdr, rqstp, type, id);  }  __be32 @@ -637,16 +655,16 @@ nfsd_map_name_to_gid(struct svc_rqst *rqstp, const char *name, size_t namelen,  	return status;  } -int -nfsd_map_uid_to_name(struct svc_rqst *rqstp, kuid_t uid, char *name) +__be32 nfsd4_encode_user(struct xdr_stream *xdr, struct svc_rqst *rqstp, +			 kuid_t uid)  {  	u32 id = from_kuid(&init_user_ns, uid); -	return do_id_to_name(rqstp, IDMAP_TYPE_USER, id, name); +	return encode_name_from_id(xdr, rqstp, IDMAP_TYPE_USER, id);  } -int -nfsd_map_gid_to_name(struct svc_rqst *rqstp, kgid_t gid, char *name) +__be32 nfsd4_encode_group(struct xdr_stream *xdr, struct svc_rqst *rqstp, +			  kgid_t gid)  {  	u32 id = from_kgid(&init_user_ns, gid); -	return do_id_to_name(rqstp, IDMAP_TYPE_GROUP, id, name); +	return encode_name_from_id(xdr, rqstp, IDMAP_TYPE_GROUP, id);  } diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 419572f33b7..8f029db5d27 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -41,6 +41,7 @@  #include "vfs.h"  #include "current_stateid.h"  #include "netns.h" +#include "acl.h"  #ifdef CONFIG_NFSD_V4_SECURITY_LABEL  #include <linux/security.h> @@ -230,17 +231,16 @@ static void nfsd4_set_open_owner_reply_cache(struct nfsd4_compound_state *cstate  }  static __be32 -do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_open *open) +do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_open *open, struct svc_fh **resfh)  {  	struct svc_fh *current_fh = &cstate->current_fh; -	struct svc_fh *resfh;  	int accmode;  	__be32 status; -	resfh = kmalloc(sizeof(struct svc_fh), GFP_KERNEL); -	if (!resfh) +	*resfh = kmalloc(sizeof(struct svc_fh), GFP_KERNEL); +	if (!*resfh)  		return nfserr_jukebox; -	fh_init(resfh, NFS4_FHSIZE); +	fh_init(*resfh, NFS4_FHSIZE);  	open->op_truncate = 0;  	if (open->op_create) { @@ -265,12 +265,12 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru  		 */  		status = do_nfsd_create(rqstp, current_fh, open->op_fname.data,  					open->op_fname.len, &open->op_iattr, -					resfh, open->op_createmode, +					*resfh, open->op_createmode,  					(u32 *)open->op_verf.data,  					&open->op_truncate, &open->op_created);  		if (!status && open->op_label.len) -			nfsd4_security_inode_setsecctx(resfh, &open->op_label, open->op_bmval); +			nfsd4_security_inode_setsecctx(*resfh, &open->op_label, open->op_bmval);  		/*  		 * Following rfc 3530 14.2.16, use the returned bitmask @@ -280,31 +280,32 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru  		if (open->op_createmode == NFS4_CREATE_EXCLUSIVE && status == 0)  			open->op_bmval[1] = (FATTR4_WORD1_TIME_ACCESS |  							FATTR4_WORD1_TIME_MODIFY); -	} else { +	} else +		/* +		 * Note this may exit with the parent still locked. +		 * We will hold the lock until nfsd4_open's final +		 * lookup, to prevent renames or unlinks until we've had +		 * a chance to an acquire a delegation if appropriate. +		 */  		status = nfsd_lookup(rqstp, current_fh, -				     open->op_fname.data, open->op_fname.len, resfh); -		fh_unlock(current_fh); -	} +				     open->op_fname.data, open->op_fname.len, *resfh);  	if (status)  		goto out; -	status = nfsd_check_obj_isreg(resfh); +	status = nfsd_check_obj_isreg(*resfh);  	if (status)  		goto out;  	if (is_create_with_attrs(open) && open->op_acl != NULL) -		do_set_nfs4_acl(rqstp, resfh, open->op_acl, open->op_bmval); +		do_set_nfs4_acl(rqstp, *resfh, open->op_acl, open->op_bmval); -	nfsd4_set_open_owner_reply_cache(cstate, open, resfh); +	nfsd4_set_open_owner_reply_cache(cstate, open, *resfh);  	accmode = NFSD_MAY_NOP;  	if (open->op_created ||  			open->op_claim_type == NFS4_OPEN_CLAIM_DELEGATE_CUR)  		accmode |= NFSD_MAY_OWNER_OVERRIDE; -	status = do_open_permission(rqstp, resfh, open, accmode); +	status = do_open_permission(rqstp, *resfh, open, accmode);  	set_change_info(&open->op_cinfo, current_fh); -	fh_dup2(current_fh, resfh);  out: -	fh_put(resfh); -	kfree(resfh);  	return status;  } @@ -357,6 +358,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,  	   struct nfsd4_open *open)  {  	__be32 status; +	struct svc_fh *resfh = NULL;  	struct nfsd4_compoundres *resp;  	struct net *net = SVC_NET(rqstp);  	struct nfsd_net *nn = net_generic(net, nfsd_net_id); @@ -423,26 +425,26 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,  	switch (open->op_claim_type) {  		case NFS4_OPEN_CLAIM_DELEGATE_CUR:  		case NFS4_OPEN_CLAIM_NULL: -			status = do_open_lookup(rqstp, cstate, open); +			status = do_open_lookup(rqstp, cstate, open, &resfh);  			if (status)  				goto out;  			break;  		case NFS4_OPEN_CLAIM_PREVIOUS: -			open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;  			status = nfs4_check_open_reclaim(&open->op_clientid,  							 cstate->minorversion,  							 nn);  			if (status)  				goto out; +			open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;  		case NFS4_OPEN_CLAIM_FH:  		case NFS4_OPEN_CLAIM_DELEG_CUR_FH:  			status = do_open_fhandle(rqstp, cstate, open);  			if (status)  				goto out; +			resfh = &cstate->current_fh;  			break;  		case NFS4_OPEN_CLAIM_DELEG_PREV_FH:               	case NFS4_OPEN_CLAIM_DELEGATE_PREV: -			open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;  			dprintk("NFSD: unsupported OPEN claim type %d\n",  				open->op_claim_type);  			status = nfserr_notsupp; @@ -458,9 +460,14 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,  	 * successful, it (1) truncates the file if open->op_truncate was  	 * set, (2) sets open->op_stateid, (3) sets open->op_delegation.  	 */ -	status = nfsd4_process_open2(rqstp, &cstate->current_fh, open); +	status = nfsd4_process_open2(rqstp, resfh, open);  	WARN_ON(status && open->op_created);  out: +	if (resfh && resfh != &cstate->current_fh) { +		fh_dup2(&cstate->current_fh, resfh); +		fh_put(resfh); +		kfree(resfh); +	}  	nfsd4_cleanup_open_state(open, status);  	if (open->op_openowner && !nfsd4_has_session(cstate))  		cstate->replay_owner = &open->op_openowner->oo_owner; @@ -610,15 +617,6 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,  	switch (create->cr_type) {  	case NF4LNK: -		/* ugh! we have to null-terminate the linktext, or -		 * vfs_symlink() will choke.  it is always safe to -		 * null-terminate by brute force, since at worst we -		 * will overwrite the first byte of the create namelen -		 * in the XDR buffer, which has already been extracted -		 * during XDR decode. -		 */ -		create->cr_linkname[create->cr_linklen] = 0; -  		status = nfsd_symlink(rqstp, &cstate->current_fh,  				      create->cr_name, create->cr_namelen,  				      create->cr_linkname, create->cr_linklen, @@ -778,7 +776,6 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,  	if (!nfsd4_last_compound_op(rqstp))  		rqstp->rq_splice_ok = false; -	nfs4_lock_state();  	/* check stateid */  	if ((status = nfs4_preprocess_stateid_op(SVC_NET(rqstp),  						 cstate, &read->rd_stateid, @@ -786,11 +783,8 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,  		dprintk("NFSD: nfsd4_read: couldn't process stateid!\n");  		goto out;  	} -	if (read->rd_filp) -		get_file(read->rd_filp);  	status = nfs_ok;  out: -	nfs4_unlock_state();  	read->rd_rqstp = rqstp;  	read->rd_fhp = &cstate->current_fh;  	return status; @@ -929,10 +923,8 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,  	int err;  	if (setattr->sa_iattr.ia_valid & ATTR_SIZE) { -		nfs4_lock_state();  		status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), cstate,  			&setattr->sa_stateid, WR_STATE, NULL); -		nfs4_unlock_state();  		if (status) {  			dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n");  			return status; @@ -998,17 +990,12 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,  	if (write->wr_offset >= OFFSET_MAX)  		return nfserr_inval; -	nfs4_lock_state();  	status = nfs4_preprocess_stateid_op(SVC_NET(rqstp),  					cstate, stateid, WR_STATE, &filp);  	if (status) { -		nfs4_unlock_state();  		dprintk("NFSD: nfsd4_write: couldn't process stateid!\n");  		return status;  	} -	if (filp) -		get_file(filp); -	nfs4_unlock_state();  	cnt = write->wr_buflen;  	write->wr_how_written = write->wr_stable_how; @@ -1064,13 +1051,15 @@ _nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,  		return nfserr_jukebox;  	p = buf; -	status = nfsd4_encode_fattr(&cstate->current_fh, +	status = nfsd4_encode_fattr_to_buf(&p, count, &cstate->current_fh,  				    cstate->current_fh.fh_export, -				    cstate->current_fh.fh_dentry, &p, -				    count, verify->ve_bmval, +				    cstate->current_fh.fh_dentry, +				    verify->ve_bmval,  				    rqstp, 0); - -	/* this means that nfsd4_encode_fattr() ran out of space */ +	/* +	 * If nfsd4_encode_fattr() ran out of space, assume that's because +	 * the attributes are longer (hence different) than those given: +	 */  	if (status == nfserr_resource)  		status = nfserr_not_same;  	if (status) @@ -1172,9 +1161,7 @@ struct nfsd4_operation {  static struct nfsd4_operation nfsd4_ops[]; -#ifdef NFSD_DEBUG  static const char *nfsd4_op_name(unsigned opnum); -#endif  /*   * Enforce NFSv4.1 COMPOUND ordering rules: @@ -1216,6 +1203,8 @@ static inline struct nfsd4_operation *OPDESC(struct nfsd4_op *op)  bool nfsd4_cache_this_op(struct nfsd4_op *op)  { +	if (op->opnum == OP_ILLEGAL) +		return false;  	return OPDESC(op)->op_flags & OP_CACHEME;  } @@ -1252,6 +1241,25 @@ static bool need_wrongsec_check(struct svc_rqst *rqstp)  	return !(nextd->op_flags & OP_HANDLES_WRONGSEC);  } +static void svcxdr_init_encode(struct svc_rqst *rqstp, +			       struct nfsd4_compoundres *resp) +{ +	struct xdr_stream *xdr = &resp->xdr; +	struct xdr_buf *buf = &rqstp->rq_res; +	struct kvec *head = buf->head; + +	xdr->buf = buf; +	xdr->iov = head; +	xdr->p   = head->iov_base + head->iov_len; +	xdr->end = head->iov_base + PAGE_SIZE - rqstp->rq_auth_slack; +	/* Tail and page_len should be zero at this point: */ +	buf->len = buf->head[0].iov_len; +	xdr->scratch.iov_len = 0; +	xdr->page_ptr = buf->pages - 1; +	buf->buflen = PAGE_SIZE * (1 + rqstp->rq_page_end - buf->pages) +		- rqstp->rq_auth_slack; +} +  /*   * COMPOUND call.   */ @@ -1263,26 +1271,20 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,  	struct nfsd4_op	*op;  	struct nfsd4_operation *opdesc;  	struct nfsd4_compound_state *cstate = &resp->cstate; -	int		slack_bytes; -	u32		plen = 0; +	struct svc_fh *current_fh = &cstate->current_fh; +	struct svc_fh *save_fh = &cstate->save_fh;  	__be32		status; -	resp->xbuf = &rqstp->rq_res; -	resp->p = rqstp->rq_res.head[0].iov_base + -						rqstp->rq_res.head[0].iov_len; -	resp->tagp = resp->p; +	svcxdr_init_encode(rqstp, resp); +	resp->tagp = resp->xdr.p;  	/* reserve space for: taglen, tag, and opcnt */ -	resp->p += 2 + XDR_QUADLEN(args->taglen); -	resp->end = rqstp->rq_res.head[0].iov_base + PAGE_SIZE; +	xdr_reserve_space(&resp->xdr, 8 + args->taglen);  	resp->taglen = args->taglen;  	resp->tag = args->tag; -	resp->opcnt = 0;  	resp->rqstp = rqstp; -	resp->cstate.minorversion = args->minorversion; -	resp->cstate.replay_owner = NULL; -	resp->cstate.session = NULL; -	fh_init(&resp->cstate.current_fh, NFS4_FHSIZE); -	fh_init(&resp->cstate.save_fh, NFS4_FHSIZE); +	cstate->minorversion = args->minorversion; +	fh_init(current_fh, NFS4_FHSIZE); +	fh_init(save_fh, NFS4_FHSIZE);  	/*  	 * Don't use the deferral mechanism for NFSv4; compounds make it  	 * too hard to avoid non-idempotency problems. @@ -1320,35 +1322,34 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,  			goto encode_op;  		} -		/* We must be able to encode a successful response to -		 * this operation, with enough room left over to encode a -		 * failed response to the next operation.  If we don't -		 * have enough room, fail with ERR_RESOURCE. -		 */ -		slack_bytes = (char *)resp->end - (char *)resp->p; -		if (slack_bytes < COMPOUND_SLACK_SPACE -				+ COMPOUND_ERR_SLACK_SPACE) { -			BUG_ON(slack_bytes < COMPOUND_ERR_SLACK_SPACE); -			op->status = nfserr_resource; -			goto encode_op; -		} -  		opdesc = OPDESC(op); -		if (!cstate->current_fh.fh_dentry) { +		if (!current_fh->fh_dentry) {  			if (!(opdesc->op_flags & ALLOWED_WITHOUT_FH)) {  				op->status = nfserr_nofilehandle;  				goto encode_op;  			} -		} else if (cstate->current_fh.fh_export->ex_fslocs.migrated && +		} else if (current_fh->fh_export->ex_fslocs.migrated &&  			  !(opdesc->op_flags & ALLOWED_ON_ABSENT_FS)) {  			op->status = nfserr_moved;  			goto encode_op;  		} +		fh_clear_wcc(current_fh); +  		/* If op is non-idempotent */  		if (opdesc->op_flags & OP_MODIFIES_SOMETHING) { -			plen = opdesc->op_rsize_bop(rqstp, op); +			/* +			 * Don't execute this op if we couldn't encode a +			 * succesful reply: +			 */ +			u32 plen = opdesc->op_rsize_bop(rqstp, op); +			/* +			 * Plus if there's another operation, make sure +			 * we'll have space to at least encode an error: +			 */ +			if (resp->opcnt < args->opcnt) +				plen += COMPOUND_ERR_SLACK_SPACE;  			op->status = nfsd4_check_resp_size(resp, plen);  		} @@ -1367,19 +1368,19 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,  				clear_current_stateid(cstate);  			if (need_wrongsec_check(rqstp)) -				op->status = check_nfsd_access(cstate->current_fh.fh_export, rqstp); +				op->status = check_nfsd_access(current_fh->fh_export, rqstp);  		}  encode_op:  		/* Only from SEQUENCE */ -		if (resp->cstate.status == nfserr_replay_cache) { +		if (cstate->status == nfserr_replay_cache) {  			dprintk("%s NFS4.1 replay from cache\n", __func__);  			status = op->status;  			goto out;  		}  		if (op->status == nfserr_replay_me) {  			op->replay = &cstate->replay_owner->so_replay; -			nfsd4_encode_replay(resp, op); +			nfsd4_encode_replay(&resp->xdr, op);  			status = op->status = op->replay->rp_status;  		} else {  			nfsd4_encode_operation(resp, op); @@ -1401,10 +1402,10 @@ encode_op:  		nfsd4_increment_op_stats(op->opnum);  	} -	resp->cstate.status = status; -	fh_put(&resp->cstate.current_fh); -	fh_put(&resp->cstate.save_fh); -	BUG_ON(resp->cstate.replay_owner); +	cstate->status = status; +	fh_put(current_fh); +	fh_put(save_fh); +	BUG_ON(cstate->replay_owner);  out:  	/* Reset deferral mechanism for RPC deferrals */  	rqstp->rq_usedeferral = 1; @@ -1418,7 +1419,8 @@ out:  #define op_encode_change_info_maxsz	(5)  #define nfs4_fattr_bitmap_maxsz		(4) -#define op_encode_lockowner_maxsz	(1 + XDR_QUADLEN(IDMAP_NAMESZ)) +/* We'll fall back on returning no lockowner if run out of space: */ +#define op_encode_lockowner_maxsz	(0)  #define op_encode_lock_denied_maxsz	(8 + op_encode_lockowner_maxsz)  #define nfs4_owner_maxsz		(1 + XDR_QUADLEN(IDMAP_NAMESZ)) @@ -1450,6 +1452,49 @@ static inline u32 nfsd4_create_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op  		+ nfs4_fattr_bitmap_maxsz) * sizeof(__be32);  } +/* + * Note since this is an idempotent operation we won't insist on failing + * the op prematurely if the estimate is too large.  We may turn off splice + * reads unnecessarily. + */ +static inline u32 nfsd4_getattr_rsize(struct svc_rqst *rqstp, +				      struct nfsd4_op *op) +{ +	u32 *bmap = op->u.getattr.ga_bmval; +	u32 bmap0 = bmap[0], bmap1 = bmap[1], bmap2 = bmap[2]; +	u32 ret = 0; + +	if (bmap0 & FATTR4_WORD0_ACL) +		return svc_max_payload(rqstp); +	if (bmap0 & FATTR4_WORD0_FS_LOCATIONS) +		return svc_max_payload(rqstp); + +	if (bmap1 & FATTR4_WORD1_OWNER) { +		ret += IDMAP_NAMESZ + 4; +		bmap1 &= ~FATTR4_WORD1_OWNER; +	} +	if (bmap1 & FATTR4_WORD1_OWNER_GROUP) { +		ret += IDMAP_NAMESZ + 4; +		bmap1 &= ~FATTR4_WORD1_OWNER_GROUP; +	} +	if (bmap0 & FATTR4_WORD0_FILEHANDLE) { +		ret += NFS4_FHSIZE + 4; +		bmap0 &= ~FATTR4_WORD0_FILEHANDLE; +	} +	if (bmap2 & FATTR4_WORD2_SECURITY_LABEL) { +		ret += NFSD4_MAX_SEC_LABEL_LEN + 12; +		bmap2 &= ~FATTR4_WORD2_SECURITY_LABEL; +	} +	/* +	 * Largest of remaining attributes are 16 bytes (e.g., +	 * supported_attributes) +	 */ +	ret += 16 * (hweight32(bmap0) + hweight32(bmap1) + hweight32(bmap2)); +	/* bitmask, length */ +	ret += 20; +	return ret; +} +  static inline u32 nfsd4_link_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)  {  	return (op_encode_hdr_size + op_encode_change_info_maxsz) @@ -1480,18 +1525,19 @@ static inline u32 nfsd4_read_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)  	if (rlen > maxcount)  		rlen = maxcount; -	return (op_encode_hdr_size + 2) * sizeof(__be32) + rlen; +	return (op_encode_hdr_size + 2 + XDR_QUADLEN(rlen)) * sizeof(__be32);  }  static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)  { +	u32 maxcount = svc_max_payload(rqstp);  	u32 rlen = op->u.readdir.rd_maxcount; -	if (rlen > PAGE_SIZE) -		rlen = PAGE_SIZE; +	if (rlen > maxcount) +		rlen = maxcount; -	return (op_encode_hdr_size + op_encode_verifier_maxsz) -		 * sizeof(__be32) + rlen; +	return (op_encode_hdr_size + op_encode_verifier_maxsz + +		XDR_QUADLEN(rlen)) * sizeof(__be32);  }  static inline u32 nfsd4_remove_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) @@ -1506,6 +1552,12 @@ static inline u32 nfsd4_rename_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op  		+ op_encode_change_info_maxsz) * sizeof(__be32);  } +static inline u32 nfsd4_sequence_rsize(struct svc_rqst *rqstp, +				       struct nfsd4_op *op) +{ +	return NFS4_MAX_SESSIONID_LEN + 20; +} +  static inline u32 nfsd4_setattr_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)  {  	return (op_encode_hdr_size + nfs4_fattr_bitmap_maxsz) * sizeof(__be32); @@ -1513,18 +1565,20 @@ static inline u32 nfsd4_setattr_rsize(struct svc_rqst *rqstp, struct nfsd4_op *o  static inline u32 nfsd4_setclientid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)  { -	return (op_encode_hdr_size + 2 + 1024) * sizeof(__be32); +	return (op_encode_hdr_size + 2 + XDR_QUADLEN(NFS4_VERIFIER_SIZE)) * +								sizeof(__be32);  }  static inline u32 nfsd4_write_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)  { -	return (op_encode_hdr_size + op_encode_verifier_maxsz) * sizeof(__be32); +	return (op_encode_hdr_size + 2 + op_encode_verifier_maxsz) * sizeof(__be32);  }  static inline u32 nfsd4_exchange_id_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)  {  	return (op_encode_hdr_size + 2 + 1 + /* eir_clientid, eir_sequenceid */\ -		1 + 1 + 2 + /* eir_flags, spr_how, spo_must_enforce & _allow */\ +		1 + 1 + /* eir_flags, spr_how */\ +		4 + /* spo_must_enforce & _allow with bitmap */\  		2 + /*eir_server_owner.so_minor_id */\  		/* eir_server_owner.so_major_id<> */\  		XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 +\ @@ -1585,6 +1639,7 @@ static struct nfsd4_operation nfsd4_ops[] = {  	[OP_GETATTR] = {  		.op_func = (nfsd4op_func)nfsd4_getattr,  		.op_flags = ALLOWED_ON_ABSENT_FS, +		.op_rsize_bop = nfsd4_getattr_rsize,  		.op_name = "OP_GETATTR",  	},  	[OP_GETFH] = { @@ -1654,37 +1709,32 @@ static struct nfsd4_operation nfsd4_ops[] = {  	[OP_PUTFH] = {  		.op_func = (nfsd4op_func)nfsd4_putfh,  		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS -				| OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING -				| OP_CLEAR_STATEID, +				| OP_IS_PUTFH_LIKE | OP_CLEAR_STATEID,  		.op_name = "OP_PUTFH",  		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,  	},  	[OP_PUTPUBFH] = {  		.op_func = (nfsd4op_func)nfsd4_putrootfh,  		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS -				| OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING -				| OP_CLEAR_STATEID, +				| OP_IS_PUTFH_LIKE | OP_CLEAR_STATEID,  		.op_name = "OP_PUTPUBFH",  		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,  	},  	[OP_PUTROOTFH] = {  		.op_func = (nfsd4op_func)nfsd4_putrootfh,  		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS -				| OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING -				| OP_CLEAR_STATEID, +				| OP_IS_PUTFH_LIKE | OP_CLEAR_STATEID,  		.op_name = "OP_PUTROOTFH",  		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,  	},  	[OP_READ] = {  		.op_func = (nfsd4op_func)nfsd4_read, -		.op_flags = OP_MODIFIES_SOMETHING,  		.op_name = "OP_READ",  		.op_rsize_bop = (nfsd4op_rsize)nfsd4_read_rsize,  		.op_get_currentstateid = (stateid_getter)nfsd4_get_readstateid,  	},  	[OP_READDIR] = {  		.op_func = (nfsd4op_func)nfsd4_readdir, -		.op_flags = OP_MODIFIES_SOMETHING,  		.op_name = "OP_READDIR",  		.op_rsize_bop = (nfsd4op_rsize)nfsd4_readdir_rsize,  	}, @@ -1842,14 +1892,33 @@ static struct nfsd4_operation nfsd4_ops[] = {  	},  }; -#ifdef NFSD_DEBUG +int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ +	struct nfsd4_operation *opdesc; +	nfsd4op_rsize estimator; + +	if (op->opnum == OP_ILLEGAL) +		return op_encode_hdr_size * sizeof(__be32); +	opdesc = OPDESC(op); +	estimator = opdesc->op_rsize_bop; +	return estimator ? estimator(rqstp, op) : PAGE_SIZE; +} + +void warn_on_nonidempotent_op(struct nfsd4_op *op) +{ +	if (OPDESC(op)->op_flags & OP_MODIFIES_SOMETHING) { +		pr_err("unable to encode reply to nonidempotent op %d (%s)\n", +			op->opnum, nfsd4_op_name(op->opnum)); +		WARN_ON_ONCE(1); +	} +} +  static const char *nfsd4_op_name(unsigned opnum)  {  	if (opnum < ARRAY_SIZE(nfsd4_ops))  		return nfsd4_ops[opnum].op_name;  	return "unknown_operation";  } -#endif  #define nfsd4_voidres			nfsd4_voidargs  struct nfsd4_voidargs { int dummy; }; @@ -1881,6 +1950,7 @@ struct svc_version	nfsd_version4 = {  		.vs_proc	= nfsd_procedures4,  		.vs_dispatch	= nfsd_dispatch,  		.vs_xdrsize	= NFS4_SVC_XDRSIZE, +		.vs_rpcb_optnl	= 1,  };  /* diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index e0a65a9e37e..9c271f42604 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -385,8 +385,8 @@ purge_old(struct dentry *parent, struct dentry *child, struct nfsd_net *nn)  	status = vfs_rmdir(parent->d_inode, child);  	if (status) -		printk("failed to remove client recovery directory %s\n", -				child->d_name.name); +		printk("failed to remove client recovery directory %pd\n", +				child);  	/* Keep trying, success or failure: */  	return 0;  } @@ -410,15 +410,15 @@ out:  	nfs4_release_reclaim(nn);  	if (status)  		printk("nfsd4: failed to purge old clients from recovery" -			" directory %s\n", nn->rec_file->f_path.dentry->d_name.name); +			" directory %pD\n", nn->rec_file);  }  static int  load_recdir(struct dentry *parent, struct dentry *child, struct nfsd_net *nn)  {  	if (child->d_name.len != HEXDIR_LEN - 1) { -		printk("nfsd4: illegal name %s in recovery directory\n", -				child->d_name.name); +		printk("nfsd4: illegal name %pd in recovery directory\n", +				child);  		/* Keep trying; maybe the others are OK: */  		return 0;  	} @@ -437,7 +437,7 @@ nfsd4_recdir_load(struct net *net) {  	status = nfsd4_list_rec_dir(load_recdir, nn);  	if (status)  		printk("nfsd4: failed loading clients from recovery" -			" directory %s\n", nn->rec_file->f_path.dentry->d_name.name); +			" directory %pD\n", nn->rec_file);  	return status;  } diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 0874998a49c..2204e1fe572 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -41,6 +41,7 @@  #include <linux/ratelimit.h>  #include <linux/sunrpc/svcauth_gss.h>  #include <linux/sunrpc/addr.h> +#include <linux/hash.h>  #include "xdr4.h"  #include "xdr4cb.h"  #include "vfs.h" @@ -81,13 +82,13 @@ static DEFINE_MUTEX(client_mutex);   * effort to decrease the scope of the client_mutex, this spinlock may   * eventually cover more:   */ -static DEFINE_SPINLOCK(recall_lock); +static DEFINE_SPINLOCK(state_lock); -static struct kmem_cache *openowner_slab = NULL; -static struct kmem_cache *lockowner_slab = NULL; -static struct kmem_cache *file_slab = NULL; -static struct kmem_cache *stateid_slab = NULL; -static struct kmem_cache *deleg_slab = NULL; +static struct kmem_cache *openowner_slab; +static struct kmem_cache *lockowner_slab; +static struct kmem_cache *file_slab; +static struct kmem_cache *stateid_slab; +static struct kmem_cache *deleg_slab;  void  nfs4_lock_state(void) @@ -235,9 +236,9 @@ static void nfsd4_free_file(struct nfs4_file *f)  static inline void  put_nfs4_file(struct nfs4_file *fi)  { -	if (atomic_dec_and_lock(&fi->fi_ref, &recall_lock)) { +	if (atomic_dec_and_lock(&fi->fi_ref, &state_lock)) {  		hlist_del(&fi->fi_hash); -		spin_unlock(&recall_lock); +		spin_unlock(&state_lock);  		iput(fi->fi_inode);  		nfsd4_free_file(fi);  	} @@ -364,6 +365,79 @@ static struct nfs4_ol_stateid * nfs4_alloc_stateid(struct nfs4_client *clp)  	return openlockstateid(nfs4_alloc_stid(clp, stateid_slab));  } +/* + * When we recall a delegation, we should be careful not to hand it + * out again straight away. + * To ensure this we keep a pair of bloom filters ('new' and 'old') + * in which the filehandles of recalled delegations are "stored". + * If a filehandle appear in either filter, a delegation is blocked. + * When a delegation is recalled, the filehandle is stored in the "new" + * filter. + * Every 30 seconds we swap the filters and clear the "new" one, + * unless both are empty of course. + * + * Each filter is 256 bits.  We hash the filehandle to 32bit and use the + * low 3 bytes as hash-table indices. + * + * 'state_lock', which is always held when block_delegations() is called, + * is used to manage concurrent access.  Testing does not need the lock + * except when swapping the two filters. + */ +static struct bloom_pair { +	int	entries, old_entries; +	time_t	swap_time; +	int	new; /* index into 'set' */ +	DECLARE_BITMAP(set[2], 256); +} blocked_delegations; + +static int delegation_blocked(struct knfsd_fh *fh) +{ +	u32 hash; +	struct bloom_pair *bd = &blocked_delegations; + +	if (bd->entries == 0) +		return 0; +	if (seconds_since_boot() - bd->swap_time > 30) { +		spin_lock(&state_lock); +		if (seconds_since_boot() - bd->swap_time > 30) { +			bd->entries -= bd->old_entries; +			bd->old_entries = bd->entries; +			memset(bd->set[bd->new], 0, +			       sizeof(bd->set[0])); +			bd->new = 1-bd->new; +			bd->swap_time = seconds_since_boot(); +		} +		spin_unlock(&state_lock); +	} +	hash = arch_fast_hash(&fh->fh_base, fh->fh_size, 0); +	if (test_bit(hash&255, bd->set[0]) && +	    test_bit((hash>>8)&255, bd->set[0]) && +	    test_bit((hash>>16)&255, bd->set[0])) +		return 1; + +	if (test_bit(hash&255, bd->set[1]) && +	    test_bit((hash>>8)&255, bd->set[1]) && +	    test_bit((hash>>16)&255, bd->set[1])) +		return 1; + +	return 0; +} + +static void block_delegations(struct knfsd_fh *fh) +{ +	u32 hash; +	struct bloom_pair *bd = &blocked_delegations; + +	hash = arch_fast_hash(&fh->fh_base, fh->fh_size, 0); + +	__set_bit(hash&255, bd->set[bd->new]); +	__set_bit((hash>>8)&255, bd->set[bd->new]); +	__set_bit((hash>>16)&255, bd->set[bd->new]); +	if (bd->entries == 0) +		bd->swap_time = seconds_since_boot(); +	bd->entries += 1; +} +  static struct nfs4_delegation *  alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct svc_fh *current_fh)  { @@ -372,10 +446,11 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct sv  	dprintk("NFSD alloc_init_deleg\n");  	if (num_delegations > max_delegations)  		return NULL; +	if (delegation_blocked(¤t_fh->fh_handle)) +		return NULL;  	dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab));  	if (dp == NULL)  		return dp; -	dp->dl_stid.sc_type = NFS4_DELEG_STID;  	/*  	 * delegation seqid's are never incremented.  The 4.1 special  	 * meaning of seqid 0 isn't meaningful, really, but let's avoid @@ -402,17 +477,24 @@ static void remove_stid(struct nfs4_stid *s)  	idr_remove(stateids, s->sc_stateid.si_opaque.so_id);  } +static void nfs4_free_stid(struct kmem_cache *slab, struct nfs4_stid *s) +{ +	kmem_cache_free(slab, s); +} +  void  nfs4_put_delegation(struct nfs4_delegation *dp)  {  	if (atomic_dec_and_test(&dp->dl_count)) { -		kmem_cache_free(deleg_slab, dp); +		nfs4_free_stid(deleg_slab, &dp->dl_stid);  		num_delegations--;  	}  }  static void nfs4_put_deleg_lease(struct nfs4_file *fp)  { +	if (!fp->fi_lease) +		return;  	if (atomic_dec_and_test(&fp->fi_delegees)) {  		vfs_setlease(fp->fi_deleg_file, F_UNLCK, &fp->fi_lease);  		fp->fi_lease = NULL; @@ -426,18 +508,30 @@ static void unhash_stid(struct nfs4_stid *s)  	s->sc_type = 0;  } +static void +hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp) +{ +	lockdep_assert_held(&state_lock); + +	dp->dl_stid.sc_type = NFS4_DELEG_STID; +	list_add(&dp->dl_perfile, &fp->fi_delegations); +	list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations); +} +  /* Called under the state lock. */  static void  unhash_delegation(struct nfs4_delegation *dp)  { +	spin_lock(&state_lock);  	list_del_init(&dp->dl_perclnt); -	spin_lock(&recall_lock);  	list_del_init(&dp->dl_perfile);  	list_del_init(&dp->dl_recall_lru); -	spin_unlock(&recall_lock); -	nfs4_put_deleg_lease(dp->dl_file); -	put_nfs4_file(dp->dl_file); -	dp->dl_file = NULL; +	spin_unlock(&state_lock); +	if (dp->dl_file) { +		nfs4_put_deleg_lease(dp->dl_file); +		put_nfs4_file(dp->dl_file); +		dp->dl_file = NULL; +	}  } @@ -610,7 +704,7 @@ static void close_generic_stateid(struct nfs4_ol_stateid *stp)  static void free_generic_stateid(struct nfs4_ol_stateid *stp)  {  	remove_stid(&stp->st_stid); -	kmem_cache_free(stateid_slab, stp); +	nfs4_free_stid(stateid_slab, &stp->st_stid);  }  static void release_lock_stateid(struct nfs4_ol_stateid *stp) @@ -640,6 +734,12 @@ static void unhash_lockowner(struct nfs4_lockowner *lo)  	}  } +static void nfs4_free_lockowner(struct nfs4_lockowner *lo) +{ +	kfree(lo->lo_owner.so_owner.data); +	kmem_cache_free(lockowner_slab, lo); +} +  static void release_lockowner(struct nfs4_lockowner *lo)  {  	unhash_lockowner(lo); @@ -668,7 +768,6 @@ static void unhash_open_stateid(struct nfs4_ol_stateid *stp)  static void release_open_stateid(struct nfs4_ol_stateid *stp)  {  	unhash_open_stateid(stp); -	unhash_stid(&stp->st_stid);  	free_generic_stateid(stp);  } @@ -690,12 +789,17 @@ static void release_last_closed_stateid(struct nfs4_openowner *oo)  	struct nfs4_ol_stateid *s = oo->oo_last_closed_stid;  	if (s) { -		unhash_stid(&s->st_stid);  		free_generic_stateid(s);  		oo->oo_last_closed_stid = NULL;  	}  } +static void nfs4_free_openowner(struct nfs4_openowner *oo) +{ +	kfree(oo->oo_owner.so_owner.data); +	kmem_cache_free(openowner_slab, oo); +} +  static void release_openowner(struct nfs4_openowner *oo)  {  	unhash_openowner(oo); @@ -829,10 +933,11 @@ static void nfsd4_put_drc_mem(struct nfsd4_channel_attrs *ca)  	spin_unlock(&nfsd_drc_lock);  } -static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *attrs) +static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs, +					   struct nfsd4_channel_attrs *battrs)  { -	int numslots = attrs->maxreqs; -	int slotsize = slot_bytes(attrs); +	int numslots = fattrs->maxreqs; +	int slotsize = slot_bytes(fattrs);  	struct nfsd4_session *new;  	int mem, i; @@ -849,6 +954,10 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *attrs)  		if (!new->se_slots[i])  			goto out_free;  	} + +	memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs)); +	memcpy(&new->se_bchannel, battrs, sizeof(struct nfsd4_channel_attrs)); +  	return new;  out_free:  	while (i--) @@ -994,8 +1103,7 @@ static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, stru  	list_add(&new->se_perclnt, &clp->cl_sessions);  	spin_unlock(&clp->cl_lock);  	spin_unlock(&nn->client_lock); -	memcpy(&new->se_fchannel, &cses->fore_channel, -			sizeof(struct nfsd4_channel_attrs)); +  	if (cses->flags & SESSION4_BACK_CHAN) {  		struct sockaddr *sa = svc_addr(rqstp);  		/* @@ -1071,10 +1179,22 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)  		return NULL;  	}  	clp->cl_name.len = name.len; +	INIT_LIST_HEAD(&clp->cl_sessions); +	idr_init(&clp->cl_stateids); +	atomic_set(&clp->cl_refcount, 0); +	clp->cl_cb_state = NFSD4_CB_UNKNOWN; +	INIT_LIST_HEAD(&clp->cl_idhash); +	INIT_LIST_HEAD(&clp->cl_openowners); +	INIT_LIST_HEAD(&clp->cl_delegations); +	INIT_LIST_HEAD(&clp->cl_lru); +	INIT_LIST_HEAD(&clp->cl_callbacks); +	INIT_LIST_HEAD(&clp->cl_revoked); +	spin_lock_init(&clp->cl_lock); +	rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table");  	return clp;  } -static inline void +static void  free_client(struct nfs4_client *clp)  {  	struct nfsd_net __maybe_unused *nn = net_generic(clp->net, nfsd_net_id); @@ -1088,6 +1208,7 @@ free_client(struct nfs4_client *clp)  		WARN_ON_ONCE(atomic_read(&ses->se_ref));  		free_session(ses);  	} +	rpc_destroy_wait_queue(&clp->cl_cb_waitq);  	free_svc_cred(&clp->cl_cred);  	kfree(clp->cl_name.data);  	idr_destroy(&clp->cl_stateids); @@ -1116,17 +1237,22 @@ destroy_client(struct nfs4_client *clp)  	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);  	INIT_LIST_HEAD(&reaplist); -	spin_lock(&recall_lock); +	spin_lock(&state_lock);  	while (!list_empty(&clp->cl_delegations)) {  		dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt);  		list_del_init(&dp->dl_perclnt);  		list_move(&dp->dl_recall_lru, &reaplist);  	} -	spin_unlock(&recall_lock); +	spin_unlock(&state_lock);  	while (!list_empty(&reaplist)) {  		dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru);  		destroy_delegation(dp);  	} +	list_splice_init(&clp->cl_revoked, &reaplist); +	while (!list_empty(&reaplist)) { +		dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru); +		destroy_revoked_delegation(dp); +	}  	while (!list_empty(&clp->cl_openowners)) {  		oo = list_entry(clp->cl_openowners.next, struct nfs4_openowner, oo_perclient);  		release_openowner(oo); @@ -1335,7 +1461,6 @@ static struct nfs4_client *create_client(struct xdr_netobj name,  	if (clp == NULL)  		return NULL; -	INIT_LIST_HEAD(&clp->cl_sessions);  	ret = copy_cred(&clp->cl_cred, &rqstp->rq_cred);  	if (ret) {  		spin_lock(&nn->client_lock); @@ -1343,20 +1468,9 @@ static struct nfs4_client *create_client(struct xdr_netobj name,  		spin_unlock(&nn->client_lock);  		return NULL;  	} -	idr_init(&clp->cl_stateids); -	atomic_set(&clp->cl_refcount, 0); -	clp->cl_cb_state = NFSD4_CB_UNKNOWN; -	INIT_LIST_HEAD(&clp->cl_idhash); -	INIT_LIST_HEAD(&clp->cl_openowners); -	INIT_LIST_HEAD(&clp->cl_delegations); -	INIT_LIST_HEAD(&clp->cl_lru); -	INIT_LIST_HEAD(&clp->cl_callbacks); -	INIT_LIST_HEAD(&clp->cl_revoked); -	spin_lock_init(&clp->cl_lock);  	nfsd4_init_callback(&clp->cl_cb_null);  	clp->cl_time = get_seconds();  	clear_bit(0, &clp->cl_cb_slot_busy); -	rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table");  	copy_verf(clp, verf);  	rpc_copy_addr((struct sockaddr *) &clp->cl_addr, sa);  	gen_confirm(clp); @@ -1526,11 +1640,12 @@ out_err:  }  /* - * Cache a reply. nfsd4_check_drc_limit() has bounded the cache size. + * Cache a reply. nfsd4_check_resp_size() has bounded the cache size.   */  void  nfsd4_store_cache_entry(struct nfsd4_compoundres *resp)  { +	struct xdr_buf *buf = resp->xdr.buf;  	struct nfsd4_slot *slot = resp->cstate.slot;  	unsigned int base; @@ -1544,11 +1659,9 @@ nfsd4_store_cache_entry(struct nfsd4_compoundres *resp)  		slot->sl_datalen = 0;  		return;  	} -	slot->sl_datalen = (char *)resp->p - (char *)resp->cstate.datap; -	base = (char *)resp->cstate.datap - -					(char *)resp->xbuf->head[0].iov_base; -	if (read_bytes_from_xdr_buf(resp->xbuf, base, slot->sl_data, -				    slot->sl_datalen)) +	base = resp->cstate.data_offset; +	slot->sl_datalen = buf->len - base; +	if (read_bytes_from_xdr_buf(buf, base, slot->sl_data, slot->sl_datalen))  		WARN("%s: sessions DRC could not cache compound\n", __func__);  	return;  } @@ -1584,28 +1697,31 @@ nfsd4_enc_sequence_replay(struct nfsd4_compoundargs *args,   * The sequence operation is not cached because we can use the slot and   * session values.   */ -__be32 +static __be32  nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,  			 struct nfsd4_sequence *seq)  {  	struct nfsd4_slot *slot = resp->cstate.slot; +	struct xdr_stream *xdr = &resp->xdr; +	__be32 *p;  	__be32 status;  	dprintk("--> %s slot %p\n", __func__, slot); -	/* Either returns 0 or nfserr_retry_uncached */  	status = nfsd4_enc_sequence_replay(resp->rqstp->rq_argp, resp); -	if (status == nfserr_retry_uncached_rep) +	if (status)  		return status; -	/* The sequence operation has been encoded, cstate->datap set. */ -	memcpy(resp->cstate.datap, slot->sl_data, slot->sl_datalen); +	p = xdr_reserve_space(xdr, slot->sl_datalen); +	if (!p) { +		WARN_ON_ONCE(1); +		return nfserr_serverfault; +	} +	xdr_encode_opaque_fixed(p, slot->sl_data, slot->sl_datalen); +	xdr_commit_encode(xdr);  	resp->opcnt = slot->sl_opcnt; -	resp->p = resp->cstate.datap + XDR_QUADLEN(slot->sl_datalen); -	status = slot->sl_status; - -	return status; +	return slot->sl_status;  }  /* @@ -1843,6 +1959,11 @@ static __be32 check_forechannel_attrs(struct nfsd4_channel_attrs *ca, struct nfs  	return nfs_ok;  } +#define NFSD_CB_MAX_REQ_SZ	((NFS4_enc_cb_recall_sz + \ +				 RPC_MAX_HEADER_WITH_AUTH) * sizeof(__be32)) +#define NFSD_CB_MAX_RESP_SZ	((NFS4_dec_cb_recall_sz + \ +				 RPC_MAX_REPHEADER_WITH_AUTH) * sizeof(__be32)) +  static __be32 check_backchannel_attrs(struct nfsd4_channel_attrs *ca)  {  	ca->headerpadsz = 0; @@ -1853,9 +1974,9 @@ static __be32 check_backchannel_attrs(struct nfsd4_channel_attrs *ca)  	 * less than 1k.  Tighten up this estimate in the unlikely event  	 * it turns out to be a problem for some client:  	 */ -	if (ca->maxreq_sz < NFS4_enc_cb_recall_sz + RPC_MAX_HEADER_WITH_AUTH) +	if (ca->maxreq_sz < NFSD_CB_MAX_REQ_SZ)  		return nfserr_toosmall; -	if (ca->maxresp_sz < NFS4_dec_cb_recall_sz + RPC_MAX_REPHEADER_WITH_AUTH) +	if (ca->maxresp_sz < NFSD_CB_MAX_RESP_SZ)  		return nfserr_toosmall;  	ca->maxresp_cached = 0;  	if (ca->maxops < 2) @@ -1905,9 +2026,9 @@ nfsd4_create_session(struct svc_rqst *rqstp,  		return status;  	status = check_backchannel_attrs(&cr_ses->back_channel);  	if (status) -		return status; +		goto out_release_drc_mem;  	status = nfserr_jukebox; -	new = alloc_session(&cr_ses->fore_channel); +	new = alloc_session(&cr_ses->fore_channel, &cr_ses->back_channel);  	if (!new)  		goto out_release_drc_mem;  	conn = alloc_conn_from_crses(rqstp, cr_ses); @@ -2172,11 +2293,13 @@ nfsd4_sequence(struct svc_rqst *rqstp,  	       struct nfsd4_sequence *seq)  {  	struct nfsd4_compoundres *resp = rqstp->rq_resp; +	struct xdr_stream *xdr = &resp->xdr;  	struct nfsd4_session *session;  	struct nfs4_client *clp;  	struct nfsd4_slot *slot;  	struct nfsd4_conn *conn;  	__be32 status; +	int buflen;  	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);  	if (resp->opcnt != 1) @@ -2245,6 +2368,16 @@ nfsd4_sequence(struct svc_rqst *rqstp,  	if (status)  		goto out_put_session; +	buflen = (seq->cachethis) ? +			session->se_fchannel.maxresp_cached : +			session->se_fchannel.maxresp_sz; +	status = (seq->cachethis) ? nfserr_rep_too_big_to_cache : +				    nfserr_rep_too_big; +	if (xdr_restrict_buflen(xdr, buflen - rqstp->rq_auth_slack)) +		goto out_put_session; +	svc_reserve(rqstp, buflen); + +	status = nfs_ok;  	/* Success! bump slot seqid */  	slot->sl_seqid = seq->seqid;  	slot->sl_flags |= NFSD4_SLOT_INUSE; @@ -2270,7 +2403,8 @@ out:  	if (!list_empty(&clp->cl_revoked))  		seq->status_flags |= SEQ4_STATUS_RECALLABLE_STATE_REVOKED;  out_no_session: -	kfree(conn); +	if (conn) +		free_conn(conn);  	spin_unlock(&nn->client_lock);  	return status;  out_put_session: @@ -2481,28 +2615,19 @@ static void nfsd4_init_file(struct nfs4_file *fp, struct inode *ino)  	fp->fi_lease = NULL;  	memset(fp->fi_fds, 0, sizeof(fp->fi_fds));  	memset(fp->fi_access, 0, sizeof(fp->fi_access)); -	spin_lock(&recall_lock); +	spin_lock(&state_lock);  	hlist_add_head(&fp->fi_hash, &file_hashtbl[hashval]); -	spin_unlock(&recall_lock); -} - -static void -nfsd4_free_slab(struct kmem_cache **slab) -{ -	if (*slab == NULL) -		return; -	kmem_cache_destroy(*slab); -	*slab = NULL; +	spin_unlock(&state_lock);  }  void  nfsd4_free_slabs(void)  { -	nfsd4_free_slab(&openowner_slab); -	nfsd4_free_slab(&lockowner_slab); -	nfsd4_free_slab(&file_slab); -	nfsd4_free_slab(&stateid_slab); -	nfsd4_free_slab(&deleg_slab); +	kmem_cache_destroy(openowner_slab); +	kmem_cache_destroy(lockowner_slab); +	kmem_cache_destroy(file_slab); +	kmem_cache_destroy(stateid_slab); +	kmem_cache_destroy(deleg_slab);  }  int @@ -2511,42 +2636,38 @@ nfsd4_init_slabs(void)  	openowner_slab = kmem_cache_create("nfsd4_openowners",  			sizeof(struct nfs4_openowner), 0, 0, NULL);  	if (openowner_slab == NULL) -		goto out_nomem; +		goto out;  	lockowner_slab = kmem_cache_create("nfsd4_lockowners",  			sizeof(struct nfs4_lockowner), 0, 0, NULL);  	if (lockowner_slab == NULL) -		goto out_nomem; +		goto out_free_openowner_slab;  	file_slab = kmem_cache_create("nfsd4_files",  			sizeof(struct nfs4_file), 0, 0, NULL);  	if (file_slab == NULL) -		goto out_nomem; +		goto out_free_lockowner_slab;  	stateid_slab = kmem_cache_create("nfsd4_stateids",  			sizeof(struct nfs4_ol_stateid), 0, 0, NULL);  	if (stateid_slab == NULL) -		goto out_nomem; +		goto out_free_file_slab;  	deleg_slab = kmem_cache_create("nfsd4_delegations",  			sizeof(struct nfs4_delegation), 0, 0, NULL);  	if (deleg_slab == NULL) -		goto out_nomem; +		goto out_free_stateid_slab;  	return 0; -out_nomem: -	nfsd4_free_slabs(); + +out_free_stateid_slab: +	kmem_cache_destroy(stateid_slab); +out_free_file_slab: +	kmem_cache_destroy(file_slab); +out_free_lockowner_slab: +	kmem_cache_destroy(lockowner_slab); +out_free_openowner_slab: +	kmem_cache_destroy(openowner_slab); +out:  	dprintk("nfsd4: out of memory while initializing nfsv4\n");  	return -ENOMEM;  } -void nfs4_free_openowner(struct nfs4_openowner *oo) -{ -	kfree(oo->oo_owner.so_owner.data); -	kmem_cache_free(openowner_slab, oo); -} - -void nfs4_free_lockowner(struct nfs4_lockowner *lo) -{ -	kfree(lo->lo_owner.so_owner.data); -	kmem_cache_free(lockowner_slab, lo); -} -  static void init_nfs4_replay(struct nfs4_replay *rp)  {  	rp->rp_status = nfserr_serverfault; @@ -2667,15 +2788,15 @@ find_file(struct inode *ino)  	unsigned int hashval = file_hashval(ino);  	struct nfs4_file *fp; -	spin_lock(&recall_lock); +	spin_lock(&state_lock);  	hlist_for_each_entry(fp, &file_hashtbl[hashval], fi_hash) {  		if (fp->fi_inode == ino) {  			get_nfs4_file(fp); -			spin_unlock(&recall_lock); +			spin_unlock(&state_lock);  			return fp;  		}  	} -	spin_unlock(&recall_lock); +	spin_unlock(&state_lock);  	return NULL;  } @@ -2712,6 +2833,7 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp)  	struct nfs4_client *clp = dp->dl_stid.sc_client;  	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); +	lockdep_assert_held(&state_lock);  	/* We're assuming the state code never drops its reference  	 * without first removing the lease.  Since we're in this lease  	 * callback (and since the lease code is serialized by the kernel @@ -2724,6 +2846,8 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp)  	/* Only place dl_time is set; protected by i_lock: */  	dp->dl_time = get_seconds(); +	block_delegations(&dp->dl_fh); +  	nfsd4_cb_recall(dp);  } @@ -2748,11 +2872,11 @@ static void nfsd_break_deleg_cb(struct file_lock *fl)  	 */  	fl->fl_break_time = 0; -	spin_lock(&recall_lock); +	spin_lock(&state_lock);  	fp->fi_had_conflict = true;  	list_for_each_entry(dp, &fp->fi_delegations, dl_perfile)  		nfsd_break_one_deleg(dp); -	spin_unlock(&recall_lock); +	spin_unlock(&state_lock);  }  static @@ -3008,7 +3132,7 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp, int f  		return NULL;  	locks_init_lock(fl);  	fl->fl_lmops = &nfsd_lease_mng_ops; -	fl->fl_flags = FL_LEASE; +	fl->fl_flags = FL_DELEG;  	fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK;  	fl->fl_end = OFFSET_MAX;  	fl->fl_owner = (fl_owner_t)(dp->dl_file); @@ -3026,49 +3150,38 @@ static int nfs4_setlease(struct nfs4_delegation *dp)  	if (!fl)  		return -ENOMEM;  	fl->fl_file = find_readable_file(fp); -	list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations);  	status = vfs_setlease(fl->fl_file, fl->fl_type, &fl); -	if (status) { -		list_del_init(&dp->dl_perclnt); -		locks_free_lock(fl); -		return status; -	} +	if (status) +		goto out_free;  	fp->fi_lease = fl;  	fp->fi_deleg_file = get_file(fl->fl_file);  	atomic_set(&fp->fi_delegees, 1); -	list_add(&dp->dl_perfile, &fp->fi_delegations); +	spin_lock(&state_lock); +	hash_delegation_locked(dp, fp); +	spin_unlock(&state_lock);  	return 0; +out_free: +	locks_free_lock(fl); +	return status;  }  static int nfs4_set_delegation(struct nfs4_delegation *dp, struct nfs4_file *fp)  { -	int status; -  	if (fp->fi_had_conflict)  		return -EAGAIN;  	get_nfs4_file(fp);  	dp->dl_file = fp; -	if (!fp->fi_lease) { -		status = nfs4_setlease(dp); -		if (status) -			goto out_free; -		return 0; -	} -	spin_lock(&recall_lock); +	if (!fp->fi_lease) +		return nfs4_setlease(dp); +	spin_lock(&state_lock); +	atomic_inc(&fp->fi_delegees);  	if (fp->fi_had_conflict) { -		spin_unlock(&recall_lock); -		status = -EAGAIN; -		goto out_free; +		spin_unlock(&state_lock); +		return -EAGAIN;  	} -	atomic_inc(&fp->fi_delegees); -	list_add(&dp->dl_perfile, &fp->fi_delegations); -	spin_unlock(&recall_lock); -	list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations); +	hash_delegation_locked(dp, fp); +	spin_unlock(&state_lock);  	return 0; -out_free: -	put_nfs4_file(fp); -	dp->dl_file = fp; -	return status;  }  static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status) @@ -3117,6 +3230,7 @@ nfs4_open_delegation(struct net *net, struct svc_fh *fh,  				goto out_no_deleg;  			break;  		case NFS4_OPEN_CLAIM_NULL: +		case NFS4_OPEN_CLAIM_FH:  			/*  			 * Let's not give out any delegations till everyone's  			 * had the chance to reclaim theirs.... @@ -3154,8 +3268,7 @@ nfs4_open_delegation(struct net *net, struct svc_fh *fh,  	open->op_delegate_type = NFS4_OPEN_DELEGATE_READ;  	return;  out_free: -	unhash_stid(&dp->dl_stid); -	nfs4_put_delegation(dp); +	destroy_delegation(dp);  out_no_deleg:  	open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE;  	if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS && @@ -3372,8 +3485,7 @@ nfs4_laundromat(struct nfsd_net *nn)  	struct nfs4_delegation *dp;  	struct list_head *pos, *next, reaplist;  	time_t cutoff = get_seconds() - nn->nfsd4_lease; -	time_t t, clientid_val = nn->nfsd4_lease; -	time_t u, test_val = nn->nfsd4_lease; +	time_t t, new_timeo = nn->nfsd4_lease;  	nfs4_lock_state(); @@ -3385,8 +3497,7 @@ nfs4_laundromat(struct nfsd_net *nn)  		clp = list_entry(pos, struct nfs4_client, cl_lru);  		if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) {  			t = clp->cl_time - cutoff; -			if (clientid_val > t) -				clientid_val = t; +			new_timeo = min(new_timeo, t);  			break;  		}  		if (mark_client_expired_locked(clp)) { @@ -3403,39 +3514,35 @@ nfs4_laundromat(struct nfsd_net *nn)  			clp->cl_clientid.cl_id);  		expire_client(clp);  	} -	spin_lock(&recall_lock); +	spin_lock(&state_lock);  	list_for_each_safe(pos, next, &nn->del_recall_lru) {  		dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);  		if (net_generic(dp->dl_stid.sc_client->net, nfsd_net_id) != nn)  			continue;  		if (time_after((unsigned long)dp->dl_time, (unsigned long)cutoff)) { -			u = dp->dl_time - cutoff; -			if (test_val > u) -				test_val = u; +			t = dp->dl_time - cutoff; +			new_timeo = min(new_timeo, t);  			break;  		}  		list_move(&dp->dl_recall_lru, &reaplist);  	} -	spin_unlock(&recall_lock); +	spin_unlock(&state_lock);  	list_for_each_safe(pos, next, &reaplist) {  		dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);  		revoke_delegation(dp);  	} -	test_val = nn->nfsd4_lease;  	list_for_each_safe(pos, next, &nn->close_lru) {  		oo = container_of(pos, struct nfs4_openowner, oo_close_lru);  		if (time_after((unsigned long)oo->oo_time, (unsigned long)cutoff)) { -			u = oo->oo_time - cutoff; -			if (test_val > u) -				test_val = u; +			t = oo->oo_time - cutoff; +			new_timeo = min(new_timeo, t);  			break;  		}  		release_openowner(oo);  	} -	if (clientid_val < NFSD_LAUNDROMAT_MINTIMEOUT) -		clientid_val = NFSD_LAUNDROMAT_MINTIMEOUT; +	new_timeo = max_t(time_t, new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT);  	nfs4_unlock_state(); -	return clientid_val; +	return new_timeo;  }  static struct workqueue_struct *laundry_wq; @@ -3609,8 +3716,11 @@ static __be32 nfsd4_lookup_stateid(stateid_t *stateid, unsigned char typemask,  		return nfserr_bad_stateid;  	status = lookup_clientid(&stateid->si_opaque.so_clid, sessions,  							nn, &cl); -	if (status == nfserr_stale_clientid) +	if (status == nfserr_stale_clientid) { +		if (sessions) +			return nfserr_bad_stateid;  		return nfserr_stale_stateid; +	}  	if (status)  		return status;  	*s = find_stateid_by_type(cl, stateid, typemask); @@ -3632,6 +3742,7 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate,  	struct svc_fh *current_fh = &cstate->current_fh;  	struct inode *ino = current_fh->fh_dentry->d_inode;  	struct nfsd_net *nn = net_generic(net, nfsd_net_id); +	struct file *file = NULL;  	__be32 status;  	if (filpp) @@ -3643,10 +3754,12 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate,  	if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))  		return check_special_stateids(net, current_fh, stateid, flags); +	nfs4_lock_state(); +  	status = nfsd4_lookup_stateid(stateid, NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID,  				      &s, cstate->minorversion, nn);  	if (status) -		return status; +		goto out;  	status = check_stateid_generation(stateid, &s->sc_stateid, nfsd4_has_session(cstate));  	if (status)  		goto out; @@ -3657,8 +3770,8 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate,  		if (status)  			goto out;  		if (filpp) { -			*filpp = dp->dl_file->fi_deleg_file; -			if (!*filpp) { +			file = dp->dl_file->fi_deleg_file; +			if (!file) {  				WARN_ON_ONCE(1);  				status = nfserr_serverfault;  				goto out; @@ -3679,25 +3792,36 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate,  			goto out;  		if (filpp) {  			if (flags & RD_STATE) -				*filpp = find_readable_file(stp->st_file); +				file = find_readable_file(stp->st_file);  			else -				*filpp = find_writeable_file(stp->st_file); +				file = find_writeable_file(stp->st_file);  		}  		break;  	default: -		return nfserr_bad_stateid; +		status = nfserr_bad_stateid; +		goto out;  	}  	status = nfs_ok; +	if (file) +		*filpp = get_file(file);  out: +	nfs4_unlock_state();  	return status;  }  static __be32  nfsd4_free_lock_stateid(struct nfs4_ol_stateid *stp)  { -	if (check_for_locks(stp->st_file, lockowner(stp->st_stateowner))) +	struct nfs4_lockowner *lo = lockowner(stp->st_stateowner); + +	if (check_for_locks(stp->st_file, lo))  		return nfserr_locks_held; -	release_lock_stateid(stp); +	/* +	 * Currently there's a 1-1 lock stateid<->lockowner +	 * correspondance, and we have to delete the lockowner when we +	 * delete the lock stateid: +	 */ +	release_lockowner(lo);  	return nfs_ok;  } @@ -3843,9 +3967,8 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,  	struct nfs4_ol_stateid *stp;  	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); -	dprintk("NFSD: nfsd4_open_confirm on file %.*s\n", -			(int)cstate->current_fh.fh_dentry->d_name.len, -			cstate->current_fh.fh_dentry->d_name.name); +	dprintk("NFSD: nfsd4_open_confirm on file %pd\n", +			cstate->current_fh.fh_dentry);  	status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0);  	if (status) @@ -3922,9 +4045,8 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp,  	struct nfs4_ol_stateid *stp;  	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); -	dprintk("NFSD: nfsd4_open_downgrade on file %.*s\n",  -			(int)cstate->current_fh.fh_dentry->d_name.len, -			cstate->current_fh.fh_dentry->d_name.name); +	dprintk("NFSD: nfsd4_open_downgrade on file %pd\n",  +			cstate->current_fh.fh_dentry);  	/* We don't yet support WANT bits: */  	if (od->od_deleg_want) @@ -3980,9 +4102,8 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,  	struct net *net = SVC_NET(rqstp);  	struct nfsd_net *nn = net_generic(net, nfsd_net_id); -	dprintk("NFSD: nfsd4_close on file %.*s\n",  -			(int)cstate->current_fh.fh_dentry->d_name.len, -			cstate->current_fh.fh_dentry->d_name.name); +	dprintk("NFSD: nfsd4_close on file %pd\n",  +			cstate->current_fh.fh_dentry);  	nfs4_lock_state();  	status = nfs4_preprocess_seqid_op(cstate, close->cl_seqid, @@ -3998,10 +4119,9 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,  	nfsd4_close_open_stateid(stp); -	if (cstate->minorversion) { -		unhash_stid(&stp->st_stid); +	if (cstate->minorversion)  		free_generic_stateid(stp); -	} else +	else  		oo->oo_last_closed_stid = stp;  	if (list_empty(&oo->oo_owner.so_stateids)) { @@ -4141,6 +4261,10 @@ static bool same_lockowner_ino(struct nfs4_lockowner *lo, struct inode *inode, c  	if (!same_owner_str(&lo->lo_owner, owner, clid))  		return false; +	if (list_empty(&lo->lo_owner.so_stateids)) { +		WARN_ON_ONCE(1); +		return false; +	}  	lst = list_first_entry(&lo->lo_owner.so_stateids,  			       struct nfs4_ol_stateid, st_perstateowner);  	return lst->st_file->fi_inode == inode; @@ -4867,6 +4991,7 @@ static u64 nfsd_find_all_delegations(struct nfs4_client *clp, u64 max,  	struct nfs4_delegation *dp, *next;  	u64 count = 0; +	lockdep_assert_held(&state_lock);  	list_for_each_entry_safe(dp, next, &clp->cl_delegations, dl_perclnt) {  		if (victims)  			list_move(&dp->dl_recall_lru, victims); @@ -4882,9 +5007,9 @@ u64 nfsd_forget_client_delegations(struct nfs4_client *clp, u64 max)  	LIST_HEAD(victims);  	u64 count; -	spin_lock(&recall_lock); +	spin_lock(&state_lock);  	count = nfsd_find_all_delegations(clp, max, &victims); -	spin_unlock(&recall_lock); +	spin_unlock(&state_lock);  	list_for_each_entry_safe(dp, next, &victims, dl_recall_lru)  		revoke_delegation(dp); @@ -4898,11 +5023,11 @@ u64 nfsd_recall_client_delegations(struct nfs4_client *clp, u64 max)  	LIST_HEAD(victims);  	u64 count; -	spin_lock(&recall_lock); +	spin_lock(&state_lock);  	count = nfsd_find_all_delegations(clp, max, &victims);  	list_for_each_entry_safe(dp, next, &victims, dl_recall_lru)  		nfsd_break_one_deleg(dp); -	spin_unlock(&recall_lock); +	spin_unlock(&state_lock);  	return count;  } @@ -4911,9 +5036,9 @@ u64 nfsd_print_client_delegations(struct nfs4_client *clp, u64 max)  {  	u64 count = 0; -	spin_lock(&recall_lock); +	spin_lock(&state_lock);  	count = nfsd_find_all_delegations(clp, max, NULL); -	spin_unlock(&recall_lock); +	spin_unlock(&state_lock);  	nfsd_print_count(clp, count, "delegations");  	return count; @@ -4954,13 +5079,6 @@ struct nfs4_client *nfsd_find_client(struct sockaddr_storage *addr, size_t addr_  #endif /* CONFIG_NFSD_FAULT_INJECTION */ -/* initialization to perform at module load time: */ - -void -nfs4_state_init(void) -{ -} -  /*   * Since the lifetime of a delegation isn't limited to that of an open, a   * client may quite reasonably hang on to a delegation as long as it has @@ -5048,7 +5166,6 @@ nfs4_state_destroy_net(struct net *net)  	int i;  	struct nfs4_client *clp = NULL;  	struct nfsd_net *nn = net_generic(net, nfsd_net_id); -	struct rb_node *node, *tmp;  	for (i = 0; i < CLIENT_HASH_SIZE; i++) {  		while (!list_empty(&nn->conf_id_hashtbl[i])) { @@ -5057,13 +5174,11 @@ nfs4_state_destroy_net(struct net *net)  		}  	} -	node = rb_first(&nn->unconf_name_tree); -	while (node != NULL) { -		tmp = node; -		node = rb_next(tmp); -		clp = rb_entry(tmp, struct nfs4_client, cl_namenode); -		rb_erase(tmp, &nn->unconf_name_tree); -		destroy_client(clp); +	for (i = 0; i < CLIENT_HASH_SIZE; i++) { +		while (!list_empty(&nn->unconf_id_hashtbl[i])) { +			clp = list_entry(nn->unconf_id_hashtbl[i].next, struct nfs4_client, cl_idhash); +			destroy_client(clp); +		}  	}  	kfree(nn->sessionid_hashtbl); @@ -5122,7 +5237,6 @@ out_recovery:  	return ret;  } -/* should be called with the state lock held */  void  nfs4_state_shutdown_net(struct net *net)  { @@ -5133,13 +5247,14 @@ nfs4_state_shutdown_net(struct net *net)  	cancel_delayed_work_sync(&nn->laundromat_work);  	locks_end_grace(&nn->nfsd4_manager); +	nfs4_lock_state();  	INIT_LIST_HEAD(&reaplist); -	spin_lock(&recall_lock); +	spin_lock(&state_lock);  	list_for_each_safe(pos, next, &nn->del_recall_lru) {  		dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);  		list_move(&dp->dl_recall_lru, &reaplist);  	} -	spin_unlock(&recall_lock); +	spin_unlock(&state_lock);  	list_for_each_safe(pos, next, &reaplist) {  		dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);  		destroy_delegation(dp); @@ -5147,6 +5262,7 @@ nfs4_state_shutdown_net(struct net *net)  	nfsd4_client_tracking_exit(net);  	nfs4_state_destroy_net(net); +	nfs4_unlock_state();  }  void diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index d9454fe5653..944275c8f56 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -98,16 +98,6 @@ xdr_error:					\  	status = nfserr_bad_xdr;		\  	goto out -#define READ32(x)         (x) = ntohl(*p++) -#define READ64(x)         do {			\ -	(x) = (u64)ntohl(*p++) << 32;		\ -	(x) |= ntohl(*p++);			\ -} while (0) -#define READTIME(x)       do {			\ -	p++;					\ -	(x) = ntohl(*p++);			\ -	p++;					\ -} while (0)  #define READMEM(x,nbytes) do {			\  	x = (char *)p;				\  	p += XDR_QUADLEN(nbytes);		\ @@ -141,8 +131,8 @@ xdr_error:					\  static void next_decode_page(struct nfsd4_compoundargs *argp)  { -	argp->pagelist++;  	argp->p = page_address(argp->pagelist[0]); +	argp->pagelist++;  	if (argp->pagelen < PAGE_SIZE) {  		argp->end = argp->p + (argp->pagelen>>2);  		argp->pagelen = 0; @@ -190,6 +180,15 @@ static int zero_clientid(clientid_t *clid)  	return (clid->cl_boot == 0) && (clid->cl_id == 0);  } +/** + * defer_free - mark an allocation as deferred freed + * @argp: NFSv4 compound argument structure to be freed with + * @release: release callback to free @p, typically kfree() + * @p: pointer to be freed + * + * Marks @p to be freed when processing the compound operation + * described in @argp finishes. + */  static int  defer_free(struct nfsd4_compoundargs *argp,  		void (*release)(const void *), void *p) @@ -206,6 +205,16 @@ defer_free(struct nfsd4_compoundargs *argp,  	return 0;  } +/** + * savemem - duplicate a chunk of memory for later processing + * @argp: NFSv4 compound argument structure to be freed with + * @p: pointer to be duplicated + * @nbytes: length to be duplicated + * + * Returns a pointer to a copy of @nbytes bytes of memory at @p + * that are preserved until processing of the NFSv4 compound + * operation described by @argp finishes. + */  static char *savemem(struct nfsd4_compoundargs *argp, __be32 *p, int nbytes)  {  	if (p == argp->tmp) { @@ -234,17 +243,17 @@ nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval)  	bmval[2] = 0;  	READ_BUF(4); -	READ32(bmlen); +	bmlen = be32_to_cpup(p++);  	if (bmlen > 1000)  		goto xdr_error;  	READ_BUF(bmlen << 2);  	if (bmlen > 0) -		READ32(bmval[0]); +		bmval[0] = be32_to_cpup(p++);  	if (bmlen > 1) -		READ32(bmval[1]); +		bmval[1] = be32_to_cpup(p++);  	if (bmlen > 2) -		READ32(bmval[2]); +		bmval[2] = be32_to_cpup(p++);  	DECODE_TAIL;  } @@ -256,8 +265,8 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,  {  	int expected_len, len = 0;  	u32 dummy32; +	u64 sec;  	char *buf; -	int host_err;  	DECODE_HEAD;  	iattr->ia_valid = 0; @@ -265,12 +274,12 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,  		return status;  	READ_BUF(4); -	READ32(expected_len); +	expected_len = be32_to_cpup(p++);  	if (bmval[0] & FATTR4_WORD0_SIZE) {  		READ_BUF(8);  		len += 8; -		READ64(iattr->ia_size); +		p = xdr_decode_hyper(p, &iattr->ia_size);  		iattr->ia_valid |= ATTR_SIZE;  	}  	if (bmval[0] & FATTR4_WORD0_ACL) { @@ -278,25 +287,24 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,  		struct nfs4_ace *ace;  		READ_BUF(4); len += 4; -		READ32(nace); +		nace = be32_to_cpup(p++);  		if (nace > NFS4_ACL_MAX) -			return nfserr_resource; +			return nfserr_fbig;  		*acl = nfs4_acl_new(nace); -		if (*acl == NULL) { -			host_err = -ENOMEM; -			goto out_nfserr; -		} +		if (*acl == NULL) +			return nfserr_jukebox; +  		defer_free(argp, kfree, *acl);  		(*acl)->naces = nace;  		for (ace = (*acl)->aces; ace < (*acl)->aces + nace; ace++) {  			READ_BUF(16); len += 16; -			READ32(ace->type); -			READ32(ace->flag); -			READ32(ace->access_mask); -			READ32(dummy32); +			ace->type = be32_to_cpup(p++); +			ace->flag = be32_to_cpup(p++); +			ace->access_mask = be32_to_cpup(p++); +			dummy32 = be32_to_cpup(p++);  			READ_BUF(dummy32);  			len += XDR_QUADLEN(dummy32) << 2;  			READMEM(buf, dummy32); @@ -318,14 +326,14 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,  	if (bmval[1] & FATTR4_WORD1_MODE) {  		READ_BUF(4);  		len += 4; -		READ32(iattr->ia_mode); +		iattr->ia_mode = be32_to_cpup(p++);  		iattr->ia_mode &= (S_IFMT | S_IALLUGO);  		iattr->ia_valid |= ATTR_MODE;  	}  	if (bmval[1] & FATTR4_WORD1_OWNER) {  		READ_BUF(4);  		len += 4; -		READ32(dummy32); +		dummy32 = be32_to_cpup(p++);  		READ_BUF(dummy32);  		len += (XDR_QUADLEN(dummy32) << 2);  		READMEM(buf, dummy32); @@ -336,7 +344,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,  	if (bmval[1] & FATTR4_WORD1_OWNER_GROUP) {  		READ_BUF(4);  		len += 4; -		READ32(dummy32); +		dummy32 = be32_to_cpup(p++);  		READ_BUF(dummy32);  		len += (XDR_QUADLEN(dummy32) << 2);  		READMEM(buf, dummy32); @@ -347,15 +355,16 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,  	if (bmval[1] & FATTR4_WORD1_TIME_ACCESS_SET) {  		READ_BUF(4);  		len += 4; -		READ32(dummy32); +		dummy32 = be32_to_cpup(p++);  		switch (dummy32) {  		case NFS4_SET_TO_CLIENT_TIME:  			/* We require the high 32 bits of 'seconds' to be 0, and we ignore  			   all 32 bits of 'nseconds'. */  			READ_BUF(12);  			len += 12; -			READ64(iattr->ia_atime.tv_sec); -			READ32(iattr->ia_atime.tv_nsec); +			p = xdr_decode_hyper(p, &sec); +			iattr->ia_atime.tv_sec = (time_t)sec; +			iattr->ia_atime.tv_nsec = be32_to_cpup(p++);  			if (iattr->ia_atime.tv_nsec >= (u32)1000000000)  				return nfserr_inval;  			iattr->ia_valid |= (ATTR_ATIME | ATTR_ATIME_SET); @@ -370,15 +379,16 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,  	if (bmval[1] & FATTR4_WORD1_TIME_MODIFY_SET) {  		READ_BUF(4);  		len += 4; -		READ32(dummy32); +		dummy32 = be32_to_cpup(p++);  		switch (dummy32) {  		case NFS4_SET_TO_CLIENT_TIME:  			/* We require the high 32 bits of 'seconds' to be 0, and we ignore  			   all 32 bits of 'nseconds'. */  			READ_BUF(12);  			len += 12; -			READ64(iattr->ia_mtime.tv_sec); -			READ32(iattr->ia_mtime.tv_nsec); +			p = xdr_decode_hyper(p, &sec); +			iattr->ia_mtime.tv_sec = sec; +			iattr->ia_mtime.tv_nsec = be32_to_cpup(p++);  			if (iattr->ia_mtime.tv_nsec >= (u32)1000000000)  				return nfserr_inval;  			iattr->ia_valid |= (ATTR_MTIME | ATTR_MTIME_SET); @@ -396,13 +406,13 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,  	if (bmval[2] & FATTR4_WORD2_SECURITY_LABEL) {  		READ_BUF(4);  		len += 4; -		READ32(dummy32); /* lfs: we don't use it */ +		dummy32 = be32_to_cpup(p++); /* lfs: we don't use it */  		READ_BUF(4);  		len += 4; -		READ32(dummy32); /* pi: we don't use it either */ +		dummy32 = be32_to_cpup(p++); /* pi: we don't use it either */  		READ_BUF(4);  		len += 4; -		READ32(dummy32); +		dummy32 = be32_to_cpup(p++);  		READ_BUF(dummy32);  		if (dummy32 > NFSD4_MAX_SEC_LABEL_LEN)  			return nfserr_badlabel; @@ -411,6 +421,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,  		label->data = kzalloc(dummy32 + 1, GFP_KERNEL);  		if (!label->data)  			return nfserr_jukebox; +		label->len = dummy32;  		defer_free(argp, kfree, label->data);  		memcpy(label->data, buf, dummy32);  	} @@ -424,10 +435,6 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,  		goto xdr_error;  	DECODE_TAIL; - -out_nfserr: -	status = nfserrno(host_err); -	goto out;  }  static __be32 @@ -436,7 +443,7 @@ nfsd4_decode_stateid(struct nfsd4_compoundargs *argp, stateid_t *sid)  	DECODE_HEAD;  	READ_BUF(sizeof(stateid_t)); -	READ32(sid->si_generation); +	sid->si_generation = be32_to_cpup(p++);  	COPYMEM(&sid->si_opaque, sizeof(stateid_opaque_t));  	DECODE_TAIL; @@ -448,7 +455,7 @@ nfsd4_decode_access(struct nfsd4_compoundargs *argp, struct nfsd4_access *access  	DECODE_HEAD;  	READ_BUF(4); -	READ32(access->ac_req_access); +	access->ac_req_access = be32_to_cpup(p++);  	DECODE_TAIL;  } @@ -463,7 +470,7 @@ static __be32 nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_  	/* callback_sec_params4 */  	READ_BUF(4); -	READ32(nr_secflavs); +	nr_secflavs = be32_to_cpup(p++);  	if (nr_secflavs)  		cbs->flavor = (u32)(-1);  	else @@ -471,7 +478,7 @@ static __be32 nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_  		cbs->flavor = 0;  	for (i = 0; i < nr_secflavs; ++i) {  		READ_BUF(4); -		READ32(dummy); +		dummy = be32_to_cpup(p++);  		switch (dummy) {  		case RPC_AUTH_NULL:  			/* Nothing to read */ @@ -481,21 +488,21 @@ static __be32 nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_  		case RPC_AUTH_UNIX:  			READ_BUF(8);  			/* stamp */ -			READ32(dummy); +			dummy = be32_to_cpup(p++);  			/* machine name */ -			READ32(dummy); +			dummy = be32_to_cpup(p++);  			READ_BUF(dummy);  			SAVEMEM(machine_name, dummy);  			/* uid, gid */  			READ_BUF(8); -			READ32(uid); -			READ32(gid); +			uid = be32_to_cpup(p++); +			gid = be32_to_cpup(p++);  			/* more gids */  			READ_BUF(4); -			READ32(dummy); +			dummy = be32_to_cpup(p++);  			READ_BUF(dummy * 4);  			if (cbs->flavor == (u32)(-1)) {  				kuid_t kuid = make_kuid(&init_user_ns, uid); @@ -515,14 +522,14 @@ static __be32 nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_  				"not supported!\n");  			READ_BUF(8);  			/* gcbp_service */ -			READ32(dummy); +			dummy = be32_to_cpup(p++);  			/* gcbp_handle_from_server */ -			READ32(dummy); +			dummy = be32_to_cpup(p++);  			READ_BUF(dummy);  			p += XDR_QUADLEN(dummy);  			/* gcbp_handle_from_client */  			READ_BUF(4); -			READ32(dummy); +			dummy = be32_to_cpup(p++);  			READ_BUF(dummy);  			break;  		default: @@ -538,7 +545,7 @@ static __be32 nfsd4_decode_backchannel_ctl(struct nfsd4_compoundargs *argp, stru  	DECODE_HEAD;  	READ_BUF(4); -	READ32(bc->bc_cb_program); +	bc->bc_cb_program = be32_to_cpup(p++);  	nfsd4_decode_cb_sec(argp, &bc->bc_cb_sec);  	DECODE_TAIL; @@ -550,7 +557,7 @@ static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp,  	READ_BUF(NFS4_MAX_SESSIONID_LEN + 8);  	COPYMEM(bcts->sessionid.data, NFS4_MAX_SESSIONID_LEN); -	READ32(bcts->dir); +	bcts->dir = be32_to_cpup(p++);  	/* XXX: skipping ctsa_use_conn_in_rdma_mode.  Perhaps Tom Tucker  	 * could help us figure out we should be using it. */  	DECODE_TAIL; @@ -562,7 +569,7 @@ nfsd4_decode_close(struct nfsd4_compoundargs *argp, struct nfsd4_close *close)  	DECODE_HEAD;  	READ_BUF(4); -	READ32(close->cl_seqid); +	close->cl_seqid = be32_to_cpup(p++);  	return nfsd4_decode_stateid(argp, &close->cl_stateid);  	DECODE_TAIL; @@ -575,8 +582,8 @@ nfsd4_decode_commit(struct nfsd4_compoundargs *argp, struct nfsd4_commit *commit  	DECODE_HEAD;  	READ_BUF(12); -	READ64(commit->co_offset); -	READ32(commit->co_count); +	p = xdr_decode_hyper(p, &commit->co_offset); +	commit->co_count = be32_to_cpup(p++);  	DECODE_TAIL;  } @@ -587,19 +594,30 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create  	DECODE_HEAD;  	READ_BUF(4); -	READ32(create->cr_type); +	create->cr_type = be32_to_cpup(p++);  	switch (create->cr_type) {  	case NF4LNK:  		READ_BUF(4); -		READ32(create->cr_linklen); +		create->cr_linklen = be32_to_cpup(p++);  		READ_BUF(create->cr_linklen); -		SAVEMEM(create->cr_linkname, create->cr_linklen); +		/* +		 * The VFS will want a null-terminated string, and +		 * null-terminating in place isn't safe since this might +		 * end on a page boundary: +		 */ +		create->cr_linkname = +				kmalloc(create->cr_linklen + 1, GFP_KERNEL); +		if (!create->cr_linkname) +			return nfserr_jukebox; +		memcpy(create->cr_linkname, p, create->cr_linklen); +		create->cr_linkname[create->cr_linklen] = '\0'; +		defer_free(argp, kfree, create->cr_linkname);  		break;  	case NF4BLK:  	case NF4CHR:  		READ_BUF(8); -		READ32(create->cr_specdata1); -		READ32(create->cr_specdata2); +		create->cr_specdata1 = be32_to_cpup(p++); +		create->cr_specdata2 = be32_to_cpup(p++);  		break;  	case NF4SOCK:  	case NF4FIFO: @@ -609,7 +627,7 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create  	}  	READ_BUF(4); -	READ32(create->cr_namelen); +	create->cr_namelen = be32_to_cpup(p++);  	READ_BUF(create->cr_namelen);  	SAVEMEM(create->cr_name, create->cr_namelen);  	if ((status = check_filename(create->cr_name, create->cr_namelen))) @@ -641,7 +659,7 @@ nfsd4_decode_link(struct nfsd4_compoundargs *argp, struct nfsd4_link *link)  	DECODE_HEAD;  	READ_BUF(4); -	READ32(link->li_namelen); +	link->li_namelen = be32_to_cpup(p++);  	READ_BUF(link->li_namelen);  	SAVEMEM(link->li_name, link->li_namelen);  	if ((status = check_filename(link->li_name, link->li_namelen))) @@ -659,24 +677,24 @@ nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock)  	* type, reclaim(boolean), offset, length, new_lock_owner(boolean)  	*/  	READ_BUF(28); -	READ32(lock->lk_type); +	lock->lk_type = be32_to_cpup(p++);  	if ((lock->lk_type < NFS4_READ_LT) || (lock->lk_type > NFS4_WRITEW_LT))  		goto xdr_error; -	READ32(lock->lk_reclaim); -	READ64(lock->lk_offset); -	READ64(lock->lk_length); -	READ32(lock->lk_is_new); +	lock->lk_reclaim = be32_to_cpup(p++); +	p = xdr_decode_hyper(p, &lock->lk_offset); +	p = xdr_decode_hyper(p, &lock->lk_length); +	lock->lk_is_new = be32_to_cpup(p++);  	if (lock->lk_is_new) {  		READ_BUF(4); -		READ32(lock->lk_new_open_seqid); +		lock->lk_new_open_seqid = be32_to_cpup(p++);  		status = nfsd4_decode_stateid(argp, &lock->lk_new_open_stateid);  		if (status)  			return status;  		READ_BUF(8 + sizeof(clientid_t)); -		READ32(lock->lk_new_lock_seqid); +		lock->lk_new_lock_seqid = be32_to_cpup(p++);  		COPYMEM(&lock->lk_new_clientid, sizeof(clientid_t)); -		READ32(lock->lk_new_owner.len); +		lock->lk_new_owner.len = be32_to_cpup(p++);  		READ_BUF(lock->lk_new_owner.len);  		READMEM(lock->lk_new_owner.data, lock->lk_new_owner.len);  	} else { @@ -684,7 +702,7 @@ nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock)  		if (status)  			return status;  		READ_BUF(4); -		READ32(lock->lk_old_lock_seqid); +		lock->lk_old_lock_seqid = be32_to_cpup(p++);  	}  	DECODE_TAIL; @@ -696,13 +714,13 @@ nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, struct nfsd4_lockt *lockt)  	DECODE_HEAD;  	READ_BUF(32); -	READ32(lockt->lt_type); +	lockt->lt_type = be32_to_cpup(p++);  	if((lockt->lt_type < NFS4_READ_LT) || (lockt->lt_type > NFS4_WRITEW_LT))  		goto xdr_error; -	READ64(lockt->lt_offset); -	READ64(lockt->lt_length); +	p = xdr_decode_hyper(p, &lockt->lt_offset); +	p = xdr_decode_hyper(p, &lockt->lt_length);  	COPYMEM(&lockt->lt_clientid, 8); -	READ32(lockt->lt_owner.len); +	lockt->lt_owner.len = be32_to_cpup(p++);  	READ_BUF(lockt->lt_owner.len);  	READMEM(lockt->lt_owner.data, lockt->lt_owner.len); @@ -715,16 +733,16 @@ nfsd4_decode_locku(struct nfsd4_compoundargs *argp, struct nfsd4_locku *locku)  	DECODE_HEAD;  	READ_BUF(8); -	READ32(locku->lu_type); +	locku->lu_type = be32_to_cpup(p++);  	if ((locku->lu_type < NFS4_READ_LT) || (locku->lu_type > NFS4_WRITEW_LT))  		goto xdr_error; -	READ32(locku->lu_seqid); +	locku->lu_seqid = be32_to_cpup(p++);  	status = nfsd4_decode_stateid(argp, &locku->lu_stateid);  	if (status)  		return status;  	READ_BUF(16); -	READ64(locku->lu_offset); -	READ64(locku->lu_length); +	p = xdr_decode_hyper(p, &locku->lu_offset); +	p = xdr_decode_hyper(p, &locku->lu_length);  	DECODE_TAIL;  } @@ -735,7 +753,7 @@ nfsd4_decode_lookup(struct nfsd4_compoundargs *argp, struct nfsd4_lookup *lookup  	DECODE_HEAD;  	READ_BUF(4); -	READ32(lookup->lo_len); +	lookup->lo_len = be32_to_cpup(p++);  	READ_BUF(lookup->lo_len);  	SAVEMEM(lookup->lo_name, lookup->lo_len);  	if ((status = check_filename(lookup->lo_name, lookup->lo_len))) @@ -750,7 +768,7 @@ static __be32 nfsd4_decode_share_access(struct nfsd4_compoundargs *argp, u32 *sh  	u32 w;  	READ_BUF(4); -	READ32(w); +	w = be32_to_cpup(p++);  	*share_access = w & NFS4_SHARE_ACCESS_MASK;  	*deleg_want = w & NFS4_SHARE_WANT_MASK;  	if (deleg_when) @@ -802,7 +820,7 @@ static __be32 nfsd4_decode_share_deny(struct nfsd4_compoundargs *argp, u32 *x)  	__be32 *p;  	READ_BUF(4); -	READ32(*x); +	*x = be32_to_cpup(p++);  	/* Note: unlinke access bits, deny bits may be zero. */  	if (*x & ~NFS4_SHARE_DENY_BOTH)  		return nfserr_bad_xdr; @@ -816,7 +834,7 @@ static __be32 nfsd4_decode_opaque(struct nfsd4_compoundargs *argp, struct xdr_ne  	__be32 *p;  	READ_BUF(4); -	READ32(o->len); +	o->len = be32_to_cpup(p++);  	if (o->len == 0 || o->len > NFS4_OPAQUE_LIMIT)  		return nfserr_bad_xdr; @@ -841,7 +859,7 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)  	open->op_xdr_error = 0;  	/* seqid, share_access, share_deny, clientid, ownerlen */  	READ_BUF(4); -	READ32(open->op_seqid); +	open->op_seqid = be32_to_cpup(p++);  	/* decode, yet ignore deleg_when until supported */  	status = nfsd4_decode_share_access(argp, &open->op_share_access,  					   &open->op_deleg_want, &dummy); @@ -856,13 +874,13 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)  	if (status)  		goto xdr_error;  	READ_BUF(4); -	READ32(open->op_create); +	open->op_create = be32_to_cpup(p++);  	switch (open->op_create) {  	case NFS4_OPEN_NOCREATE:  		break;  	case NFS4_OPEN_CREATE:  		READ_BUF(4); -		READ32(open->op_createmode); +		open->op_createmode = be32_to_cpup(p++);  		switch (open->op_createmode) {  		case NFS4_CREATE_UNCHECKED:  		case NFS4_CREATE_GUARDED: @@ -895,12 +913,12 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)  	/* open_claim */  	READ_BUF(4); -	READ32(open->op_claim_type); +	open->op_claim_type = be32_to_cpup(p++);  	switch (open->op_claim_type) {  	case NFS4_OPEN_CLAIM_NULL:  	case NFS4_OPEN_CLAIM_DELEGATE_PREV:  		READ_BUF(4); -		READ32(open->op_fname.len); +		open->op_fname.len = be32_to_cpup(p++);  		READ_BUF(open->op_fname.len);  		SAVEMEM(open->op_fname.data, open->op_fname.len);  		if ((status = check_filename(open->op_fname.data, open->op_fname.len))) @@ -908,14 +926,14 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)  		break;  	case NFS4_OPEN_CLAIM_PREVIOUS:  		READ_BUF(4); -		READ32(open->op_delegate_type); +		open->op_delegate_type = be32_to_cpup(p++);  		break;  	case NFS4_OPEN_CLAIM_DELEGATE_CUR:  		status = nfsd4_decode_stateid(argp, &open->op_delegate_stateid);  		if (status)  			return status;  		READ_BUF(4); -		READ32(open->op_fname.len); +		open->op_fname.len = be32_to_cpup(p++);  		READ_BUF(open->op_fname.len);  		SAVEMEM(open->op_fname.data, open->op_fname.len);  		if ((status = check_filename(open->op_fname.data, open->op_fname.len))) @@ -945,13 +963,16 @@ static __be32  nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_open_confirm *open_conf)  {  	DECODE_HEAD; -		     + +	if (argp->minorversion >= 1) +		return nfserr_notsupp; +  	status = nfsd4_decode_stateid(argp, &open_conf->oc_req_stateid);  	if (status)  		return status;  	READ_BUF(4); -	READ32(open_conf->oc_seqid); -						         +	open_conf->oc_seqid = be32_to_cpup(p++); +  	DECODE_TAIL;  } @@ -964,7 +985,7 @@ nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, struct nfsd4_open_d  	if (status)  		return status;  	READ_BUF(4); -	READ32(open_down->od_seqid); +	open_down->od_seqid = be32_to_cpup(p++);  	status = nfsd4_decode_share_access(argp, &open_down->od_share_access,  					   &open_down->od_deleg_want, NULL);  	if (status) @@ -981,7 +1002,7 @@ nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, struct nfsd4_putfh *putfh)  	DECODE_HEAD;  	READ_BUF(4); -	READ32(putfh->pf_fhlen); +	putfh->pf_fhlen = be32_to_cpup(p++);  	if (putfh->pf_fhlen > NFS4_FHSIZE)  		goto xdr_error;  	READ_BUF(putfh->pf_fhlen); @@ -991,6 +1012,14 @@ nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, struct nfsd4_putfh *putfh)  }  static __be32 +nfsd4_decode_putpubfh(struct nfsd4_compoundargs *argp, void *p) +{ +	if (argp->minorversion == 0) +		return nfs_ok; +	return nfserr_notsupp; +} + +static __be32  nfsd4_decode_read(struct nfsd4_compoundargs *argp, struct nfsd4_read *read)  {  	DECODE_HEAD; @@ -999,8 +1028,8 @@ nfsd4_decode_read(struct nfsd4_compoundargs *argp, struct nfsd4_read *read)  	if (status)  		return status;  	READ_BUF(12); -	READ64(read->rd_offset); -	READ32(read->rd_length); +	p = xdr_decode_hyper(p, &read->rd_offset); +	read->rd_length = be32_to_cpup(p++);  	DECODE_TAIL;  } @@ -1011,10 +1040,10 @@ nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, struct nfsd4_readdir *read  	DECODE_HEAD;  	READ_BUF(24); -	READ64(readdir->rd_cookie); +	p = xdr_decode_hyper(p, &readdir->rd_cookie);  	COPYMEM(readdir->rd_verf.data, sizeof(readdir->rd_verf.data)); -	READ32(readdir->rd_dircount);    /* just in case you needed a useless field... */ -	READ32(readdir->rd_maxcount); +	readdir->rd_dircount = be32_to_cpup(p++); +	readdir->rd_maxcount = be32_to_cpup(p++);  	if ((status = nfsd4_decode_bitmap(argp, readdir->rd_bmval)))  		goto out; @@ -1027,7 +1056,7 @@ nfsd4_decode_remove(struct nfsd4_compoundargs *argp, struct nfsd4_remove *remove  	DECODE_HEAD;  	READ_BUF(4); -	READ32(remove->rm_namelen); +	remove->rm_namelen = be32_to_cpup(p++);  	READ_BUF(remove->rm_namelen);  	SAVEMEM(remove->rm_name, remove->rm_namelen);  	if ((status = check_filename(remove->rm_name, remove->rm_namelen))) @@ -1042,10 +1071,10 @@ nfsd4_decode_rename(struct nfsd4_compoundargs *argp, struct nfsd4_rename *rename  	DECODE_HEAD;  	READ_BUF(4); -	READ32(rename->rn_snamelen); +	rename->rn_snamelen = be32_to_cpup(p++);  	READ_BUF(rename->rn_snamelen + 4);  	SAVEMEM(rename->rn_sname, rename->rn_snamelen); -	READ32(rename->rn_tnamelen); +	rename->rn_tnamelen = be32_to_cpup(p++);  	READ_BUF(rename->rn_tnamelen);  	SAVEMEM(rename->rn_tname, rename->rn_tnamelen);  	if ((status = check_filename(rename->rn_sname, rename->rn_snamelen))) @@ -1061,6 +1090,9 @@ nfsd4_decode_renew(struct nfsd4_compoundargs *argp, clientid_t *clientid)  {  	DECODE_HEAD; +	if (argp->minorversion >= 1) +		return nfserr_notsupp; +  	READ_BUF(sizeof(clientid_t));  	COPYMEM(clientid, sizeof(clientid_t)); @@ -1074,7 +1106,7 @@ nfsd4_decode_secinfo(struct nfsd4_compoundargs *argp,  	DECODE_HEAD;  	READ_BUF(4); -	READ32(secinfo->si_namelen); +	secinfo->si_namelen = be32_to_cpup(p++);  	READ_BUF(secinfo->si_namelen);  	SAVEMEM(secinfo->si_name, secinfo->si_namelen);  	status = check_filename(secinfo->si_name, secinfo->si_namelen); @@ -1090,7 +1122,7 @@ nfsd4_decode_secinfo_no_name(struct nfsd4_compoundargs *argp,  	DECODE_HEAD;  	READ_BUF(4); -	READ32(sin->sin_style); +	sin->sin_style = be32_to_cpup(p++);  	DECODE_TAIL;  } @@ -1111,6 +1143,9 @@ nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclient  {  	DECODE_HEAD; +	if (argp->minorversion >= 1) +		return nfserr_notsupp; +  	READ_BUF(NFS4_VERIFIER_SIZE);  	COPYMEM(setclientid->se_verf.data, NFS4_VERIFIER_SIZE); @@ -1118,16 +1153,16 @@ nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclient  	if (status)  		return nfserr_bad_xdr;  	READ_BUF(8); -	READ32(setclientid->se_callback_prog); -	READ32(setclientid->se_callback_netid_len); +	setclientid->se_callback_prog = be32_to_cpup(p++); +	setclientid->se_callback_netid_len = be32_to_cpup(p++);  	READ_BUF(setclientid->se_callback_netid_len + 4);  	SAVEMEM(setclientid->se_callback_netid_val, setclientid->se_callback_netid_len); -	READ32(setclientid->se_callback_addr_len); +	setclientid->se_callback_addr_len = be32_to_cpup(p++);  	READ_BUF(setclientid->se_callback_addr_len + 4);  	SAVEMEM(setclientid->se_callback_addr_val, setclientid->se_callback_addr_len); -	READ32(setclientid->se_callback_ident); +	setclientid->se_callback_ident = be32_to_cpup(p++);  	DECODE_TAIL;  } @@ -1137,6 +1172,9 @@ nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_s  {  	DECODE_HEAD; +	if (argp->minorversion >= 1) +		return nfserr_notsupp; +  	READ_BUF(8 + NFS4_VERIFIER_SIZE);  	COPYMEM(&scd_c->sc_clientid, 8);  	COPYMEM(&scd_c->sc_confirm, NFS4_VERIFIER_SIZE); @@ -1157,7 +1195,7 @@ nfsd4_decode_verify(struct nfsd4_compoundargs *argp, struct nfsd4_verify *verify  	 * nfsd4_proc_verify */  	READ_BUF(4); -	READ32(verify->ve_attrlen); +	verify->ve_attrlen = be32_to_cpup(p++);  	READ_BUF(verify->ve_attrlen);  	SAVEMEM(verify->ve_attrval, verify->ve_attrlen); @@ -1175,11 +1213,11 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)  	if (status)  		return status;  	READ_BUF(16); -	READ64(write->wr_offset); -	READ32(write->wr_stable_how); +	p = xdr_decode_hyper(p, &write->wr_offset); +	write->wr_stable_how = be32_to_cpup(p++);  	if (write->wr_stable_how > 2)  		goto xdr_error; -	READ32(write->wr_buflen); +	write->wr_buflen = be32_to_cpup(p++);  	/* Sorry .. no magic macros for this.. *  	 * READ_BUF(write->wr_buflen); @@ -1193,7 +1231,6 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)  	}  	write->wr_head.iov_base = p;  	write->wr_head.iov_len = avail; -	WARN_ON(avail != (XDR_QUADLEN(avail) << 2));  	write->wr_pagelist = argp->pagelist;  	len = XDR_QUADLEN(write->wr_buflen) << 2; @@ -1208,6 +1245,7 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)  		len -= pages * PAGE_SIZE;  		argp->p = (__be32 *)page_address(argp->pagelist[0]); +		argp->pagelist++;  		argp->end = argp->p + XDR_QUADLEN(PAGE_SIZE);  	}  	argp->p += XDR_QUADLEN(len); @@ -1220,9 +1258,12 @@ nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_rel  {  	DECODE_HEAD; +	if (argp->minorversion >= 1) +		return nfserr_notsupp; +  	READ_BUF(12);  	COPYMEM(&rlockowner->rl_clientid, sizeof(clientid_t)); -	READ32(rlockowner->rl_owner.len); +	rlockowner->rl_owner.len = be32_to_cpup(p++);  	READ_BUF(rlockowner->rl_owner.len);  	READMEM(rlockowner->rl_owner.data, rlockowner->rl_owner.len); @@ -1246,63 +1287,63 @@ nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp,  		return nfserr_bad_xdr;  	READ_BUF(4); -	READ32(exid->flags); +	exid->flags = be32_to_cpup(p++);  	/* Ignore state_protect4_a */  	READ_BUF(4); -	READ32(exid->spa_how); +	exid->spa_how = be32_to_cpup(p++);  	switch (exid->spa_how) {  	case SP4_NONE:  		break;  	case SP4_MACH_CRED:  		/* spo_must_enforce */  		READ_BUF(4); -		READ32(dummy); +		dummy = be32_to_cpup(p++);  		READ_BUF(dummy * 4);  		p += dummy;  		/* spo_must_allow */  		READ_BUF(4); -		READ32(dummy); +		dummy = be32_to_cpup(p++);  		READ_BUF(dummy * 4);  		p += dummy;  		break;  	case SP4_SSV:  		/* ssp_ops */  		READ_BUF(4); -		READ32(dummy); +		dummy = be32_to_cpup(p++);  		READ_BUF(dummy * 4);  		p += dummy;  		READ_BUF(4); -		READ32(dummy); +		dummy = be32_to_cpup(p++);  		READ_BUF(dummy * 4);  		p += dummy;  		/* ssp_hash_algs<> */  		READ_BUF(4); -		READ32(tmp); +		tmp = be32_to_cpup(p++);  		while (tmp--) {  			READ_BUF(4); -			READ32(dummy); +			dummy = be32_to_cpup(p++);  			READ_BUF(dummy);  			p += XDR_QUADLEN(dummy);  		}  		/* ssp_encr_algs<> */  		READ_BUF(4); -		READ32(tmp); +		tmp = be32_to_cpup(p++);  		while (tmp--) {  			READ_BUF(4); -			READ32(dummy); +			dummy = be32_to_cpup(p++);  			READ_BUF(dummy);  			p += XDR_QUADLEN(dummy);  		}  		/* ssp_window and ssp_num_gss_handles */  		READ_BUF(8); -		READ32(dummy); -		READ32(dummy); +		dummy = be32_to_cpup(p++); +		dummy = be32_to_cpup(p++);  		break;  	default:  		goto xdr_error; @@ -1310,7 +1351,7 @@ nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp,  	/* Ignore Implementation ID */  	READ_BUF(4);    /* nfs_impl_id4 array length */ -	READ32(dummy); +	dummy = be32_to_cpup(p++);  	if (dummy > 1)  		goto xdr_error; @@ -1318,13 +1359,13 @@ nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp,  	if (dummy == 1) {  		/* nii_domain */  		READ_BUF(4); -		READ32(dummy); +		dummy = be32_to_cpup(p++);  		READ_BUF(dummy);  		p += XDR_QUADLEN(dummy);  		/* nii_name */  		READ_BUF(4); -		READ32(dummy); +		dummy = be32_to_cpup(p++);  		READ_BUF(dummy);  		p += XDR_QUADLEN(dummy); @@ -1344,21 +1385,21 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,  	READ_BUF(16);  	COPYMEM(&sess->clientid, 8); -	READ32(sess->seqid); -	READ32(sess->flags); +	sess->seqid = be32_to_cpup(p++); +	sess->flags = be32_to_cpup(p++);  	/* Fore channel attrs */  	READ_BUF(28); -	READ32(dummy); /* headerpadsz is always 0 */ -	READ32(sess->fore_channel.maxreq_sz); -	READ32(sess->fore_channel.maxresp_sz); -	READ32(sess->fore_channel.maxresp_cached); -	READ32(sess->fore_channel.maxops); -	READ32(sess->fore_channel.maxreqs); -	READ32(sess->fore_channel.nr_rdma_attrs); +	dummy = be32_to_cpup(p++); /* headerpadsz is always 0 */ +	sess->fore_channel.maxreq_sz = be32_to_cpup(p++); +	sess->fore_channel.maxresp_sz = be32_to_cpup(p++); +	sess->fore_channel.maxresp_cached = be32_to_cpup(p++); +	sess->fore_channel.maxops = be32_to_cpup(p++); +	sess->fore_channel.maxreqs = be32_to_cpup(p++); +	sess->fore_channel.nr_rdma_attrs = be32_to_cpup(p++);  	if (sess->fore_channel.nr_rdma_attrs == 1) {  		READ_BUF(4); -		READ32(sess->fore_channel.rdma_attrs); +		sess->fore_channel.rdma_attrs = be32_to_cpup(p++);  	} else if (sess->fore_channel.nr_rdma_attrs > 1) {  		dprintk("Too many fore channel attr bitmaps!\n");  		goto xdr_error; @@ -1366,23 +1407,23 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,  	/* Back channel attrs */  	READ_BUF(28); -	READ32(dummy); /* headerpadsz is always 0 */ -	READ32(sess->back_channel.maxreq_sz); -	READ32(sess->back_channel.maxresp_sz); -	READ32(sess->back_channel.maxresp_cached); -	READ32(sess->back_channel.maxops); -	READ32(sess->back_channel.maxreqs); -	READ32(sess->back_channel.nr_rdma_attrs); +	dummy = be32_to_cpup(p++); /* headerpadsz is always 0 */ +	sess->back_channel.maxreq_sz = be32_to_cpup(p++); +	sess->back_channel.maxresp_sz = be32_to_cpup(p++); +	sess->back_channel.maxresp_cached = be32_to_cpup(p++); +	sess->back_channel.maxops = be32_to_cpup(p++); +	sess->back_channel.maxreqs = be32_to_cpup(p++); +	sess->back_channel.nr_rdma_attrs = be32_to_cpup(p++);  	if (sess->back_channel.nr_rdma_attrs == 1) {  		READ_BUF(4); -		READ32(sess->back_channel.rdma_attrs); +		sess->back_channel.rdma_attrs = be32_to_cpup(p++);  	} else if (sess->back_channel.nr_rdma_attrs > 1) {  		dprintk("Too many back channel attr bitmaps!\n");  		goto xdr_error;  	}  	READ_BUF(4); -	READ32(sess->callback_prog); +	sess->callback_prog = be32_to_cpup(p++);  	nfsd4_decode_cb_sec(argp, &sess->cb_sec);  	DECODE_TAIL;  } @@ -1405,7 +1446,7 @@ nfsd4_decode_free_stateid(struct nfsd4_compoundargs *argp,  	DECODE_HEAD;  	READ_BUF(sizeof(stateid_t)); -	READ32(free_stateid->fr_stateid.si_generation); +	free_stateid->fr_stateid.si_generation = be32_to_cpup(p++);  	COPYMEM(&free_stateid->fr_stateid.si_opaque, sizeof(stateid_opaque_t));  	DECODE_TAIL; @@ -1419,10 +1460,10 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp,  	READ_BUF(NFS4_MAX_SESSIONID_LEN + 16);  	COPYMEM(seq->sessionid.data, NFS4_MAX_SESSIONID_LEN); -	READ32(seq->seqid); -	READ32(seq->slotid); -	READ32(seq->maxslots); -	READ32(seq->cachethis); +	seq->seqid = be32_to_cpup(p++); +	seq->slotid = be32_to_cpup(p++); +	seq->maxslots = be32_to_cpup(p++); +	seq->cachethis = be32_to_cpup(p++);  	DECODE_TAIL;  } @@ -1479,7 +1520,7 @@ static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, str  	DECODE_HEAD;  	READ_BUF(4); -	READ32(rc->rca_one_fs); +	rc->rca_one_fs = be32_to_cpup(p++);  	DECODE_TAIL;  } @@ -1519,7 +1560,7 @@ static nfsd4_dec nfsd4_dec_ops[] = {  	[OP_OPEN_CONFIRM]	= (nfsd4_dec)nfsd4_decode_open_confirm,  	[OP_OPEN_DOWNGRADE]	= (nfsd4_dec)nfsd4_decode_open_downgrade,  	[OP_PUTFH]		= (nfsd4_dec)nfsd4_decode_putfh, -	[OP_PUTPUBFH]		= (nfsd4_dec)nfsd4_decode_noop, +	[OP_PUTPUBFH]		= (nfsd4_dec)nfsd4_decode_putpubfh,  	[OP_PUTROOTFH]		= (nfsd4_dec)nfsd4_decode_noop,  	[OP_READ]		= (nfsd4_dec)nfsd4_decode_read,  	[OP_READDIR]		= (nfsd4_dec)nfsd4_decode_readdir, @@ -1536,46 +1577,6 @@ static nfsd4_dec nfsd4_dec_ops[] = {  	[OP_VERIFY]		= (nfsd4_dec)nfsd4_decode_verify,  	[OP_WRITE]		= (nfsd4_dec)nfsd4_decode_write,  	[OP_RELEASE_LOCKOWNER]	= (nfsd4_dec)nfsd4_decode_release_lockowner, -}; - -static nfsd4_dec nfsd41_dec_ops[] = { -	[OP_ACCESS]		= (nfsd4_dec)nfsd4_decode_access, -	[OP_CLOSE]		= (nfsd4_dec)nfsd4_decode_close, -	[OP_COMMIT]		= (nfsd4_dec)nfsd4_decode_commit, -	[OP_CREATE]		= (nfsd4_dec)nfsd4_decode_create, -	[OP_DELEGPURGE]		= (nfsd4_dec)nfsd4_decode_notsupp, -	[OP_DELEGRETURN]	= (nfsd4_dec)nfsd4_decode_delegreturn, -	[OP_GETATTR]		= (nfsd4_dec)nfsd4_decode_getattr, -	[OP_GETFH]		= (nfsd4_dec)nfsd4_decode_noop, -	[OP_LINK]		= (nfsd4_dec)nfsd4_decode_link, -	[OP_LOCK]		= (nfsd4_dec)nfsd4_decode_lock, -	[OP_LOCKT]		= (nfsd4_dec)nfsd4_decode_lockt, -	[OP_LOCKU]		= (nfsd4_dec)nfsd4_decode_locku, -	[OP_LOOKUP]		= (nfsd4_dec)nfsd4_decode_lookup, -	[OP_LOOKUPP]		= (nfsd4_dec)nfsd4_decode_noop, -	[OP_NVERIFY]		= (nfsd4_dec)nfsd4_decode_verify, -	[OP_OPEN]		= (nfsd4_dec)nfsd4_decode_open, -	[OP_OPENATTR]		= (nfsd4_dec)nfsd4_decode_notsupp, -	[OP_OPEN_CONFIRM]	= (nfsd4_dec)nfsd4_decode_notsupp, -	[OP_OPEN_DOWNGRADE]	= (nfsd4_dec)nfsd4_decode_open_downgrade, -	[OP_PUTFH]		= (nfsd4_dec)nfsd4_decode_putfh, -	[OP_PUTPUBFH]		= (nfsd4_dec)nfsd4_decode_notsupp, -	[OP_PUTROOTFH]		= (nfsd4_dec)nfsd4_decode_noop, -	[OP_READ]		= (nfsd4_dec)nfsd4_decode_read, -	[OP_READDIR]		= (nfsd4_dec)nfsd4_decode_readdir, -	[OP_READLINK]		= (nfsd4_dec)nfsd4_decode_noop, -	[OP_REMOVE]		= (nfsd4_dec)nfsd4_decode_remove, -	[OP_RENAME]		= (nfsd4_dec)nfsd4_decode_rename, -	[OP_RENEW]		= (nfsd4_dec)nfsd4_decode_notsupp, -	[OP_RESTOREFH]		= (nfsd4_dec)nfsd4_decode_noop, -	[OP_SAVEFH]		= (nfsd4_dec)nfsd4_decode_noop, -	[OP_SECINFO]		= (nfsd4_dec)nfsd4_decode_secinfo, -	[OP_SETATTR]		= (nfsd4_dec)nfsd4_decode_setattr, -	[OP_SETCLIENTID]	= (nfsd4_dec)nfsd4_decode_notsupp, -	[OP_SETCLIENTID_CONFIRM]= (nfsd4_dec)nfsd4_decode_notsupp, -	[OP_VERIFY]		= (nfsd4_dec)nfsd4_decode_verify, -	[OP_WRITE]		= (nfsd4_dec)nfsd4_decode_write, -	[OP_RELEASE_LOCKOWNER]	= (nfsd4_dec)nfsd4_decode_notsupp,  	/* new operations for NFSv4.1 */  	[OP_BACKCHANNEL_CTL]	= (nfsd4_dec)nfsd4_decode_backchannel_ctl, @@ -1599,32 +1600,39 @@ static nfsd4_dec nfsd41_dec_ops[] = {  	[OP_RECLAIM_COMPLETE]	= (nfsd4_dec)nfsd4_decode_reclaim_complete,  }; -struct nfsd4_minorversion_ops { -	nfsd4_dec *decoders; -	int nops; -}; - -static struct nfsd4_minorversion_ops nfsd4_minorversion[] = { -	[0] = { nfsd4_dec_ops, ARRAY_SIZE(nfsd4_dec_ops) }, -	[1] = { nfsd41_dec_ops, ARRAY_SIZE(nfsd41_dec_ops) }, -	[2] = { nfsd41_dec_ops, ARRAY_SIZE(nfsd41_dec_ops) }, -}; +static inline bool +nfsd4_opnum_in_range(struct nfsd4_compoundargs *argp, struct nfsd4_op *op) +{ +	if (op->opnum < FIRST_NFS4_OP) +		return false; +	else if (argp->minorversion == 0 && op->opnum > LAST_NFS40_OP) +		return false; +	else if (argp->minorversion == 1 && op->opnum > LAST_NFS41_OP) +		return false; +	else if (argp->minorversion == 2 && op->opnum > LAST_NFS42_OP) +		return false; +	return true; +}  static __be32  nfsd4_decode_compound(struct nfsd4_compoundargs *argp)  {  	DECODE_HEAD;  	struct nfsd4_op *op; -	struct nfsd4_minorversion_ops *ops;  	bool cachethis = false; +	int auth_slack= argp->rqstp->rq_auth_slack; +	int max_reply = auth_slack + 8; /* opcnt, status */ +	int readcount = 0; +	int readbytes = 0;  	int i;  	READ_BUF(4); -	READ32(argp->taglen); +	argp->taglen = be32_to_cpup(p++);  	READ_BUF(argp->taglen + 8);  	SAVEMEM(argp->tag, argp->taglen); -	READ32(argp->minorversion); -	READ32(argp->opcnt); +	argp->minorversion = be32_to_cpup(p++); +	argp->opcnt = be32_to_cpup(p++); +	max_reply += 4 + (XDR_QUADLEN(argp->taglen) << 2);  	if (argp->taglen > NFSD4_MAX_TAGLEN)  		goto xdr_error; @@ -1640,110 +1648,98 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)  		}  	} -	if (argp->minorversion >= ARRAY_SIZE(nfsd4_minorversion)) +	if (argp->minorversion > NFSD_SUPPORTED_MINOR_VERSION)  		argp->opcnt = 0; -	ops = &nfsd4_minorversion[argp->minorversion];  	for (i = 0; i < argp->opcnt; i++) {  		op = &argp->ops[i];  		op->replay = NULL;  		READ_BUF(4); -		READ32(op->opnum); +		op->opnum = be32_to_cpup(p++); -		if (op->opnum >= FIRST_NFS4_OP && op->opnum <= LAST_NFS4_OP) -			op->status = ops->decoders[op->opnum](argp, &op->u); +		if (nfsd4_opnum_in_range(argp, op)) +			op->status = nfsd4_dec_ops[op->opnum](argp, &op->u);  		else {  			op->opnum = OP_ILLEGAL;  			op->status = nfserr_op_illegal;  		} - -		if (op->status) { -			argp->opcnt = i+1; -			break; -		}  		/*  		 * We'll try to cache the result in the DRC if any one  		 * op in the compound wants to be cached:  		 */  		cachethis |= nfsd4_cache_this_op(op); + +		if (op->opnum == OP_READ) { +			readcount++; +			readbytes += nfsd4_max_reply(argp->rqstp, op); +		} else +			max_reply += nfsd4_max_reply(argp->rqstp, op); + +		if (op->status) { +			argp->opcnt = i+1; +			break; +		}  	}  	/* Sessions make the DRC unnecessary: */  	if (argp->minorversion)  		cachethis = false; +	svc_reserve(argp->rqstp, max_reply + readbytes);  	argp->rqstp->rq_cachetype = cachethis ? RC_REPLBUFF : RC_NOCACHE; -	DECODE_TAIL; -} - -#define WRITE32(n)               *p++ = htonl(n) -#define WRITE64(n)               do {				\ -	*p++ = htonl((u32)((n) >> 32));				\ -	*p++ = htonl((u32)(n));					\ -} while (0) -#define WRITEMEM(ptr,nbytes)     do { if (nbytes > 0) {		\ -	*(p + XDR_QUADLEN(nbytes) -1) = 0;                      \ -	memcpy(p, ptr, nbytes);					\ -	p += XDR_QUADLEN(nbytes);				\ -}} while (0) +	if (readcount > 1 || max_reply > PAGE_SIZE - auth_slack) +		argp->rqstp->rq_splice_ok = false; -static void write32(__be32 **p, u32 n) -{ -	*(*p)++ = htonl(n); -} - -static void write64(__be32 **p, u64 n) -{ -	write32(p, (n >> 32)); -	write32(p, (u32)n); +	DECODE_TAIL;  } -static void write_change(__be32 **p, struct kstat *stat, struct inode *inode) +static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode)  {  	if (IS_I_VERSION(inode)) { -		write64(p, inode->i_version); +		p = xdr_encode_hyper(p, inode->i_version);  	} else { -		write32(p, stat->ctime.tv_sec); -		write32(p, stat->ctime.tv_nsec); +		*p++ = cpu_to_be32(stat->ctime.tv_sec); +		*p++ = cpu_to_be32(stat->ctime.tv_nsec);  	} +	return p;  } -static void write_cinfo(__be32 **p, struct nfsd4_change_info *c) +static __be32 *encode_cinfo(__be32 *p, struct nfsd4_change_info *c)  { -	write32(p, c->atomic); +	*p++ = cpu_to_be32(c->atomic);  	if (c->change_supported) { -		write64(p, c->before_change); -		write64(p, c->after_change); +		p = xdr_encode_hyper(p, c->before_change); +		p = xdr_encode_hyper(p, c->after_change);  	} else { -		write32(p, c->before_ctime_sec); -		write32(p, c->before_ctime_nsec); -		write32(p, c->after_ctime_sec); -		write32(p, c->after_ctime_nsec); +		*p++ = cpu_to_be32(c->before_ctime_sec); +		*p++ = cpu_to_be32(c->before_ctime_nsec); +		*p++ = cpu_to_be32(c->after_ctime_sec); +		*p++ = cpu_to_be32(c->after_ctime_nsec);  	} +	return p;  } -#define RESERVE_SPACE(nbytes)	do {				\ -	p = resp->p;						\ -	BUG_ON(p + XDR_QUADLEN(nbytes) > resp->end);		\ -} while (0) -#define ADJUST_ARGS()		resp->p = p -  /* Encode as an array of strings the string given with components   * separated @sep, escaped with esc_enter and esc_exit.   */ -static __be32 nfsd4_encode_components_esc(char sep, char *components, -				   __be32 **pp, int *buflen, -				   char esc_enter, char esc_exit) +static __be32 nfsd4_encode_components_esc(struct xdr_stream *xdr, char sep, +					  char *components, char esc_enter, +					  char esc_exit)  { -	__be32 *p = *pp; -	__be32 *countp = p; +	__be32 *p; +	__be32 pathlen; +	int pathlen_offset;  	int strlen, count=0;  	char *str, *end, *next;  	dprintk("nfsd4_encode_components(%s)\n", components); -	if ((*buflen -= 4) < 0) + +	pathlen_offset = xdr->buf->len; +	p = xdr_reserve_space(xdr, 4); +	if (!p)  		return nfserr_resource; -	WRITE32(0); /* We will fill this in with @count later */ +	p++; /* We will fill this in with @count later */ +  	end = str = components;  	while (*end) {  		bool found_esc = false; @@ -1765,59 +1761,57 @@ static __be32 nfsd4_encode_components_esc(char sep, char *components,  		strlen = end - str;  		if (strlen) { -			if ((*buflen -= ((XDR_QUADLEN(strlen) << 2) + 4)) < 0) +			p = xdr_reserve_space(xdr, strlen + 4); +			if (!p)  				return nfserr_resource; -			WRITE32(strlen); -			WRITEMEM(str, strlen); +			p = xdr_encode_opaque(p, str, strlen);  			count++;  		}  		else  			end++;  		str = end;  	} -	*pp = p; -	p = countp; -	WRITE32(count); +	pathlen = htonl(xdr->buf->len - pathlen_offset); +	write_bytes_to_xdr_buf(xdr->buf, pathlen_offset, &pathlen, 4);  	return 0;  }  /* Encode as an array of strings the string given with components   * separated @sep.   */ -static __be32 nfsd4_encode_components(char sep, char *components, -				   __be32 **pp, int *buflen) +static __be32 nfsd4_encode_components(struct xdr_stream *xdr, char sep, +				      char *components)  { -	return nfsd4_encode_components_esc(sep, components, pp, buflen, 0, 0); +	return nfsd4_encode_components_esc(xdr, sep, components, 0, 0);  }  /*   * encode a location element of a fs_locations structure   */ -static __be32 nfsd4_encode_fs_location4(struct nfsd4_fs_location *location, -				    __be32 **pp, int *buflen) +static __be32 nfsd4_encode_fs_location4(struct xdr_stream *xdr, +					struct nfsd4_fs_location *location)  {  	__be32 status; -	__be32 *p = *pp; -	status = nfsd4_encode_components_esc(':', location->hosts, &p, buflen, +	status = nfsd4_encode_components_esc(xdr, ':', location->hosts,  						'[', ']');  	if (status)  		return status; -	status = nfsd4_encode_components('/', location->path, &p, buflen); +	status = nfsd4_encode_components(xdr, '/', location->path);  	if (status)  		return status; -	*pp = p;  	return 0;  }  /*   * Encode a path in RFC3530 'pathname4' format   */ -static __be32 nfsd4_encode_path(const struct path *root, -		const struct path *path, __be32 **pp, int *buflen) +static __be32 nfsd4_encode_path(struct xdr_stream *xdr, +				const struct path *root, +				const struct path *path)  {  	struct path cur = *path; -	__be32 *p = *pp; +	__be32 *p;  	struct dentry **components = NULL;  	unsigned int ncomponents = 0;  	__be32 err = nfserr_jukebox; @@ -1848,11 +1842,11 @@ static __be32 nfsd4_encode_path(const struct path *root,  		components[ncomponents++] = cur.dentry;  		cur.dentry = dget_parent(cur.dentry);  	} - -	*buflen -= 4; -	if (*buflen < 0) +	err = nfserr_resource; +	p = xdr_reserve_space(xdr, 4); +	if (!p)  		goto out_free; -	WRITE32(ncomponents); +	*p++ = cpu_to_be32(ncomponents);  	while (ncomponents) {  		struct dentry *dentry = components[ncomponents - 1]; @@ -1860,20 +1854,18 @@ static __be32 nfsd4_encode_path(const struct path *root,  		spin_lock(&dentry->d_lock);  		len = dentry->d_name.len; -		*buflen -= 4 + (XDR_QUADLEN(len) << 2); -		if (*buflen < 0) { +		p = xdr_reserve_space(xdr, len + 4); +		if (!p) {  			spin_unlock(&dentry->d_lock);  			goto out_free;  		} -		WRITE32(len); -		WRITEMEM(dentry->d_name.name, len); +		p = xdr_encode_opaque(p, dentry->d_name.name, len);  		dprintk("/%s", dentry->d_name.name);  		spin_unlock(&dentry->d_lock);  		dput(dentry);  		ncomponents--;  	} -	*pp = p;  	err = 0;  out_free:  	dprintk(")\n"); @@ -1884,8 +1876,8 @@ out_free:  	return err;  } -static __be32 nfsd4_encode_fsloc_fsroot(struct svc_rqst *rqstp, -		const struct path *path, __be32 **pp, int *buflen) +static __be32 nfsd4_encode_fsloc_fsroot(struct xdr_stream *xdr, +			struct svc_rqst *rqstp, const struct path *path)  {  	struct svc_export *exp_ps;  	__be32 res; @@ -1893,7 +1885,7 @@ static __be32 nfsd4_encode_fsloc_fsroot(struct svc_rqst *rqstp,  	exp_ps = rqst_find_fsidzero_export(rqstp);  	if (IS_ERR(exp_ps))  		return nfserrno(PTR_ERR(exp_ps)); -	res = nfsd4_encode_path(&exp_ps->ex_path, path, pp, buflen); +	res = nfsd4_encode_path(xdr, &exp_ps->ex_path, path);  	exp_put(exp_ps);  	return res;  } @@ -1901,28 +1893,26 @@ static __be32 nfsd4_encode_fsloc_fsroot(struct svc_rqst *rqstp,  /*   *  encode a fs_locations structure   */ -static __be32 nfsd4_encode_fs_locations(struct svc_rqst *rqstp, -				     struct svc_export *exp, -				     __be32 **pp, int *buflen) +static __be32 nfsd4_encode_fs_locations(struct xdr_stream *xdr, +			struct svc_rqst *rqstp, struct svc_export *exp)  {  	__be32 status;  	int i; -	__be32 *p = *pp; +	__be32 *p;  	struct nfsd4_fs_locations *fslocs = &exp->ex_fslocs; -	status = nfsd4_encode_fsloc_fsroot(rqstp, &exp->ex_path, &p, buflen); +	status = nfsd4_encode_fsloc_fsroot(xdr, rqstp, &exp->ex_path);  	if (status)  		return status; -	if ((*buflen -= 4) < 0) +	p = xdr_reserve_space(xdr, 4); +	if (!p)  		return nfserr_resource; -	WRITE32(fslocs->locations_count); +	*p++ = cpu_to_be32(fslocs->locations_count);  	for (i=0; i<fslocs->locations_count; i++) { -		status = nfsd4_encode_fs_location4(&fslocs->locations[i], -						   &p, buflen); +		status = nfsd4_encode_fs_location4(xdr, &fslocs->locations[i]);  		if (status)  			return status;  	} -	*pp = p;  	return 0;  } @@ -1940,56 +1930,16 @@ static u32 nfs4_file_type(umode_t mode)  	};  } -static __be32 -nfsd4_encode_name(struct svc_rqst *rqstp, int whotype, kuid_t uid, kgid_t gid, -			__be32 **p, int *buflen) -{ -	int status; - -	if (*buflen < (XDR_QUADLEN(IDMAP_NAMESZ) << 2) + 4) -		return nfserr_resource; -	if (whotype != NFS4_ACL_WHO_NAMED) -		status = nfs4_acl_write_who(whotype, (u8 *)(*p + 1)); -	else if (gid_valid(gid)) -		status = nfsd_map_gid_to_name(rqstp, gid, (u8 *)(*p + 1)); -	else -		status = nfsd_map_uid_to_name(rqstp, uid, (u8 *)(*p + 1)); -	if (status < 0) -		return nfserrno(status); -	*p = xdr_encode_opaque(*p, NULL, status); -	*buflen -= (XDR_QUADLEN(status) << 2) + 4; -	BUG_ON(*buflen < 0); -	return 0; -} - -static inline __be32 -nfsd4_encode_user(struct svc_rqst *rqstp, kuid_t user, __be32 **p, int *buflen) -{ -	return nfsd4_encode_name(rqstp, NFS4_ACL_WHO_NAMED, user, INVALID_GID, -				 p, buflen); -} -  static inline __be32 -nfsd4_encode_group(struct svc_rqst *rqstp, kgid_t group, __be32 **p, int *buflen) +nfsd4_encode_aclname(struct xdr_stream *xdr, struct svc_rqst *rqstp, +		     struct nfs4_ace *ace)  { -	return nfsd4_encode_name(rqstp, NFS4_ACL_WHO_NAMED, INVALID_UID, group, -				 p, buflen); -} - -static inline __be32 -nfsd4_encode_aclname(struct svc_rqst *rqstp, struct nfs4_ace *ace, -		__be32 **p, int *buflen) -{ -	kuid_t uid = INVALID_UID; -	kgid_t gid = INVALID_GID; - -	if (ace->whotype == NFS4_ACL_WHO_NAMED) { -		if (ace->flag & NFS4_ACE_IDENTIFIER_GROUP) -			gid = ace->who_gid; -		else -			uid = ace->who_uid; -	} -	return nfsd4_encode_name(rqstp, ace->whotype, uid, gid, p, buflen); +	if (ace->whotype != NFS4_ACL_WHO_NAMED) +		return nfs4_acl_write_who(xdr, ace->whotype); +	else if (ace->flag & NFS4_ACE_IDENTIFIER_GROUP) +		return nfsd4_encode_group(xdr, rqstp, ace->who_gid); +	else +		return nfsd4_encode_user(xdr, rqstp, ace->who_uid);  }  #define WORD0_ABSENT_FS_ATTRS (FATTR4_WORD0_FS_LOCATIONS | FATTR4_WORD0_FSID | \ @@ -1998,31 +1948,28 @@ nfsd4_encode_aclname(struct svc_rqst *rqstp, struct nfs4_ace *ace,  #ifdef CONFIG_NFSD_V4_SECURITY_LABEL  static inline __be32 -nfsd4_encode_security_label(struct svc_rqst *rqstp, void *context, int len, __be32 **pp, int *buflen) +nfsd4_encode_security_label(struct xdr_stream *xdr, struct svc_rqst *rqstp, +			    void *context, int len)  { -	__be32 *p = *pp; +	__be32 *p; -	if (*buflen < ((XDR_QUADLEN(len) << 2) + 4 + 4 + 4)) +	p = xdr_reserve_space(xdr, len + 4 + 4 + 4); +	if (!p)  		return nfserr_resource;  	/*  	 * For now we use a 0 here to indicate the null translation; in  	 * the future we may place a call to translation code here.  	 */ -	if ((*buflen -= 8) < 0) -		return nfserr_resource; - -	WRITE32(0); /* lfs */ -	WRITE32(0); /* pi */ +	*p++ = cpu_to_be32(0); /* lfs */ +	*p++ = cpu_to_be32(0); /* pi */  	p = xdr_encode_opaque(p, context, len); -	*buflen -= (XDR_QUADLEN(len) << 2) + 4; - -	*pp = p;  	return 0;  }  #else  static inline __be32 -nfsd4_encode_security_label(struct svc_rqst *rqstp, void *context, int len, __be32 **pp, int *buflen) +nfsd4_encode_security_label(struct xdr_stream *xdr, struct svc_rqst *rqstp, +			    void *context, int len)  { return 0; }  #endif @@ -2061,26 +2008,26 @@ static int get_parent_attributes(struct svc_export *exp, struct kstat *stat)  /*   * Note: @fhp can be NULL; in this case, we might have to compose the filehandle   * ourselves. - * - * countp is the buffer size in _words_   */ -__be32 -nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, -		struct dentry *dentry, __be32 **buffer, int count, u32 *bmval, +static __be32 +nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, +		struct svc_export *exp, +		struct dentry *dentry, u32 *bmval,  		struct svc_rqst *rqstp, int ignore_crossmnt)  {  	u32 bmval0 = bmval[0];  	u32 bmval1 = bmval[1];  	u32 bmval2 = bmval[2];  	struct kstat stat; -	struct svc_fh tempfh; +	struct svc_fh *tempfh = NULL;  	struct kstatfs statfs; -	int buflen = count << 2; -	__be32 *attrlenp; +	__be32 *p; +	int starting_len = xdr->buf->len; +	int attrlen_offset; +	__be32 attrlen;  	u32 dummy;  	u64 dummy64;  	u32 rdattr_err = 0; -	__be32 *p = *buffer;  	__be32 status;  	int err;  	int aclsupport = 0; @@ -2111,8 +2058,8 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,  	err = vfs_getattr(&path, &stat);  	if (err)  		goto out_nfserr; -	if ((bmval0 & (FATTR4_WORD0_FILES_FREE | FATTR4_WORD0_FILES_TOTAL | -			FATTR4_WORD0_MAXNAME)) || +	if ((bmval0 & (FATTR4_WORD0_FILES_AVAIL | FATTR4_WORD0_FILES_FREE | +			FATTR4_WORD0_FILES_TOTAL | FATTR4_WORD0_MAXNAME)) ||  	    (bmval1 & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE |  		       FATTR4_WORD1_SPACE_TOTAL))) {  		err = vfs_statfs(&path, &statfs); @@ -2120,11 +2067,15 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,  			goto out_nfserr;  	}  	if ((bmval0 & (FATTR4_WORD0_FILEHANDLE | FATTR4_WORD0_FSID)) && !fhp) { -		fh_init(&tempfh, NFS4_FHSIZE); -		status = fh_compose(&tempfh, exp, dentry, NULL); +		tempfh = kmalloc(sizeof(struct svc_fh), GFP_KERNEL); +		status = nfserr_jukebox; +		if (!tempfh) +			goto out; +		fh_init(tempfh, NFS4_FHSIZE); +		status = fh_compose(tempfh, exp, dentry, NULL);  		if (status)  			goto out; -		fhp = &tempfh; +		fhp = tempfh;  	}  	if (bmval0 & (FATTR4_WORD0_ACL | FATTR4_WORD0_ACLSUPPORT  			| FATTR4_WORD0_SUPPORTED_ATTRS)) { @@ -2157,25 +2108,33 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,  #endif /* CONFIG_NFSD_V4_SECURITY_LABEL */  	if (bmval2) { -		if ((buflen -= 16) < 0) +		p = xdr_reserve_space(xdr, 16); +		if (!p)  			goto out_resource; -		WRITE32(3); -		WRITE32(bmval0); -		WRITE32(bmval1); -		WRITE32(bmval2); +		*p++ = cpu_to_be32(3); +		*p++ = cpu_to_be32(bmval0); +		*p++ = cpu_to_be32(bmval1); +		*p++ = cpu_to_be32(bmval2);  	} else if (bmval1) { -		if ((buflen -= 12) < 0) +		p = xdr_reserve_space(xdr, 12); +		if (!p)  			goto out_resource; -		WRITE32(2); -		WRITE32(bmval0); -		WRITE32(bmval1); +		*p++ = cpu_to_be32(2); +		*p++ = cpu_to_be32(bmval0); +		*p++ = cpu_to_be32(bmval1);  	} else { -		if ((buflen -= 8) < 0) +		p = xdr_reserve_space(xdr, 8); +		if (!p)  			goto out_resource; -		WRITE32(1); -		WRITE32(bmval0); +		*p++ = cpu_to_be32(1); +		*p++ = cpu_to_be32(bmval0);  	} -	attrlenp = p++;                /* to be backfilled later */ + +	attrlen_offset = xdr->buf->len; +	p = xdr_reserve_space(xdr, 4); +	if (!p) +		goto out_resource; +	p++;                /* to be backfilled later */  	if (bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) {  		u32 word0 = nfsd_suppattrs0(minorversion); @@ -2187,302 +2146,343 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,  		if (!contextsupport)  			word2 &= ~FATTR4_WORD2_SECURITY_LABEL;  		if (!word2) { -			if ((buflen -= 12) < 0) +			p = xdr_reserve_space(xdr, 12); +			if (!p)  				goto out_resource; -			WRITE32(2); -			WRITE32(word0); -			WRITE32(word1); +			*p++ = cpu_to_be32(2); +			*p++ = cpu_to_be32(word0); +			*p++ = cpu_to_be32(word1);  		} else { -			if ((buflen -= 16) < 0) +			p = xdr_reserve_space(xdr, 16); +			if (!p)  				goto out_resource; -			WRITE32(3); -			WRITE32(word0); -			WRITE32(word1); -			WRITE32(word2); +			*p++ = cpu_to_be32(3); +			*p++ = cpu_to_be32(word0); +			*p++ = cpu_to_be32(word1); +			*p++ = cpu_to_be32(word2);  		}  	}  	if (bmval0 & FATTR4_WORD0_TYPE) { -		if ((buflen -= 4) < 0) +		p = xdr_reserve_space(xdr, 4); +		if (!p)  			goto out_resource;  		dummy = nfs4_file_type(stat.mode); -		if (dummy == NF4BAD) -			goto out_serverfault; -		WRITE32(dummy); +		if (dummy == NF4BAD) { +			status = nfserr_serverfault; +			goto out; +		} +		*p++ = cpu_to_be32(dummy);  	}  	if (bmval0 & FATTR4_WORD0_FH_EXPIRE_TYPE) { -		if ((buflen -= 4) < 0) +		p = xdr_reserve_space(xdr, 4); +		if (!p)  			goto out_resource;  		if (exp->ex_flags & NFSEXP_NOSUBTREECHECK) -			WRITE32(NFS4_FH_PERSISTENT); +			*p++ = cpu_to_be32(NFS4_FH_PERSISTENT);  		else -			WRITE32(NFS4_FH_PERSISTENT|NFS4_FH_VOL_RENAME); +			*p++ = cpu_to_be32(NFS4_FH_PERSISTENT| +						NFS4_FH_VOL_RENAME);  	}  	if (bmval0 & FATTR4_WORD0_CHANGE) { -		if ((buflen -= 8) < 0) +		p = xdr_reserve_space(xdr, 8); +		if (!p)  			goto out_resource; -		write_change(&p, &stat, dentry->d_inode); +		p = encode_change(p, &stat, dentry->d_inode);  	}  	if (bmval0 & FATTR4_WORD0_SIZE) { -		if ((buflen -= 8) < 0) +		p = xdr_reserve_space(xdr, 8); +		if (!p)  			goto out_resource; -		WRITE64(stat.size); +		p = xdr_encode_hyper(p, stat.size);  	}  	if (bmval0 & FATTR4_WORD0_LINK_SUPPORT) { -		if ((buflen -= 4) < 0) +		p = xdr_reserve_space(xdr, 4); +		if (!p)  			goto out_resource; -		WRITE32(1); +		*p++ = cpu_to_be32(1);  	}  	if (bmval0 & FATTR4_WORD0_SYMLINK_SUPPORT) { -		if ((buflen -= 4) < 0) +		p = xdr_reserve_space(xdr, 4); +		if (!p)  			goto out_resource; -		WRITE32(1); +		*p++ = cpu_to_be32(1);  	}  	if (bmval0 & FATTR4_WORD0_NAMED_ATTR) { -		if ((buflen -= 4) < 0) +		p = xdr_reserve_space(xdr, 4); +		if (!p)  			goto out_resource; -		WRITE32(0); +		*p++ = cpu_to_be32(0);  	}  	if (bmval0 & FATTR4_WORD0_FSID) { -		if ((buflen -= 16) < 0) +		p = xdr_reserve_space(xdr, 16); +		if (!p)  			goto out_resource;  		if (exp->ex_fslocs.migrated) { -			WRITE64(NFS4_REFERRAL_FSID_MAJOR); -			WRITE64(NFS4_REFERRAL_FSID_MINOR); +			p = xdr_encode_hyper(p, NFS4_REFERRAL_FSID_MAJOR); +			p = xdr_encode_hyper(p, NFS4_REFERRAL_FSID_MINOR);  		} else switch(fsid_source(fhp)) {  		case FSIDSOURCE_FSID: -			WRITE64((u64)exp->ex_fsid); -			WRITE64((u64)0); +			p = xdr_encode_hyper(p, (u64)exp->ex_fsid); +			p = xdr_encode_hyper(p, (u64)0);  			break;  		case FSIDSOURCE_DEV: -			WRITE32(0); -			WRITE32(MAJOR(stat.dev)); -			WRITE32(0); -			WRITE32(MINOR(stat.dev)); +			*p++ = cpu_to_be32(0); +			*p++ = cpu_to_be32(MAJOR(stat.dev)); +			*p++ = cpu_to_be32(0); +			*p++ = cpu_to_be32(MINOR(stat.dev));  			break;  		case FSIDSOURCE_UUID: -			WRITEMEM(exp->ex_uuid, 16); +			p = xdr_encode_opaque_fixed(p, exp->ex_uuid, +								EX_UUID_LEN);  			break;  		}  	}  	if (bmval0 & FATTR4_WORD0_UNIQUE_HANDLES) { -		if ((buflen -= 4) < 0) +		p = xdr_reserve_space(xdr, 4); +		if (!p)  			goto out_resource; -		WRITE32(0); +		*p++ = cpu_to_be32(0);  	}  	if (bmval0 & FATTR4_WORD0_LEASE_TIME) { -		if ((buflen -= 4) < 0) +		p = xdr_reserve_space(xdr, 4); +		if (!p)  			goto out_resource; -		WRITE32(nn->nfsd4_lease); +		*p++ = cpu_to_be32(nn->nfsd4_lease);  	}  	if (bmval0 & FATTR4_WORD0_RDATTR_ERROR) { -		if ((buflen -= 4) < 0) +		p = xdr_reserve_space(xdr, 4); +		if (!p)  			goto out_resource; -		WRITE32(rdattr_err); +		*p++ = cpu_to_be32(rdattr_err);  	}  	if (bmval0 & FATTR4_WORD0_ACL) {  		struct nfs4_ace *ace;  		if (acl == NULL) { -			if ((buflen -= 4) < 0) +			p = xdr_reserve_space(xdr, 4); +			if (!p)  				goto out_resource; -			WRITE32(0); +			*p++ = cpu_to_be32(0);  			goto out_acl;  		} -		if ((buflen -= 4) < 0) +		p = xdr_reserve_space(xdr, 4); +		if (!p)  			goto out_resource; -		WRITE32(acl->naces); +		*p++ = cpu_to_be32(acl->naces);  		for (ace = acl->aces; ace < acl->aces + acl->naces; ace++) { -			if ((buflen -= 4*3) < 0) -				goto out_resource; -			WRITE32(ace->type); -			WRITE32(ace->flag); -			WRITE32(ace->access_mask & NFS4_ACE_MASK_ALL); -			status = nfsd4_encode_aclname(rqstp, ace, &p, &buflen); -			if (status == nfserr_resource) +			p = xdr_reserve_space(xdr, 4*3); +			if (!p)  				goto out_resource; +			*p++ = cpu_to_be32(ace->type); +			*p++ = cpu_to_be32(ace->flag); +			*p++ = cpu_to_be32(ace->access_mask & +							NFS4_ACE_MASK_ALL); +			status = nfsd4_encode_aclname(xdr, rqstp, ace);  			if (status)  				goto out;  		}  	}  out_acl:  	if (bmval0 & FATTR4_WORD0_ACLSUPPORT) { -		if ((buflen -= 4) < 0) +		p = xdr_reserve_space(xdr, 4); +		if (!p)  			goto out_resource; -		WRITE32(aclsupport ? +		*p++ = cpu_to_be32(aclsupport ?  			ACL4_SUPPORT_ALLOW_ACL|ACL4_SUPPORT_DENY_ACL : 0);  	}  	if (bmval0 & FATTR4_WORD0_CANSETTIME) { -		if ((buflen -= 4) < 0) +		p = xdr_reserve_space(xdr, 4); +		if (!p)  			goto out_resource; -		WRITE32(1); +		*p++ = cpu_to_be32(1);  	}  	if (bmval0 & FATTR4_WORD0_CASE_INSENSITIVE) { -		if ((buflen -= 4) < 0) +		p = xdr_reserve_space(xdr, 4); +		if (!p)  			goto out_resource; -		WRITE32(0); +		*p++ = cpu_to_be32(0);  	}  	if (bmval0 & FATTR4_WORD0_CASE_PRESERVING) { -		if ((buflen -= 4) < 0) +		p = xdr_reserve_space(xdr, 4); +		if (!p)  			goto out_resource; -		WRITE32(1); +		*p++ = cpu_to_be32(1);  	}  	if (bmval0 & FATTR4_WORD0_CHOWN_RESTRICTED) { -		if ((buflen -= 4) < 0) +		p = xdr_reserve_space(xdr, 4); +		if (!p)  			goto out_resource; -		WRITE32(1); +		*p++ = cpu_to_be32(1);  	}  	if (bmval0 & FATTR4_WORD0_FILEHANDLE) { -		buflen -= (XDR_QUADLEN(fhp->fh_handle.fh_size) << 2) + 4; -		if (buflen < 0) +		p = xdr_reserve_space(xdr, fhp->fh_handle.fh_size + 4); +		if (!p)  			goto out_resource; -		WRITE32(fhp->fh_handle.fh_size); -		WRITEMEM(&fhp->fh_handle.fh_base, fhp->fh_handle.fh_size); +		p = xdr_encode_opaque(p, &fhp->fh_handle.fh_base, +					fhp->fh_handle.fh_size);  	}  	if (bmval0 & FATTR4_WORD0_FILEID) { -		if ((buflen -= 8) < 0) +		p = xdr_reserve_space(xdr, 8); +		if (!p)  			goto out_resource; -		WRITE64(stat.ino); +		p = xdr_encode_hyper(p, stat.ino);  	}  	if (bmval0 & FATTR4_WORD0_FILES_AVAIL) { -		if ((buflen -= 8) < 0) +		p = xdr_reserve_space(xdr, 8); +		if (!p)  			goto out_resource; -		WRITE64((u64) statfs.f_ffree); +		p = xdr_encode_hyper(p, (u64) statfs.f_ffree);  	}  	if (bmval0 & FATTR4_WORD0_FILES_FREE) { -		if ((buflen -= 8) < 0) +		p = xdr_reserve_space(xdr, 8); +		if (!p)  			goto out_resource; -		WRITE64((u64) statfs.f_ffree); +		p = xdr_encode_hyper(p, (u64) statfs.f_ffree);  	}  	if (bmval0 & FATTR4_WORD0_FILES_TOTAL) { -		if ((buflen -= 8) < 0) +		p = xdr_reserve_space(xdr, 8); +		if (!p)  			goto out_resource; -		WRITE64((u64) statfs.f_files); +		p = xdr_encode_hyper(p, (u64) statfs.f_files);  	}  	if (bmval0 & FATTR4_WORD0_FS_LOCATIONS) { -		status = nfsd4_encode_fs_locations(rqstp, exp, &p, &buflen); -		if (status == nfserr_resource) -			goto out_resource; +		status = nfsd4_encode_fs_locations(xdr, rqstp, exp);  		if (status)  			goto out;  	}  	if (bmval0 & FATTR4_WORD0_HOMOGENEOUS) { -		if ((buflen -= 4) < 0) +		p = xdr_reserve_space(xdr, 4); +		if (!p)  			goto out_resource; -		WRITE32(1); +		*p++ = cpu_to_be32(1);  	}  	if (bmval0 & FATTR4_WORD0_MAXFILESIZE) { -		if ((buflen -= 8) < 0) +		p = xdr_reserve_space(xdr, 8); +		if (!p)  			goto out_resource; -		WRITE64(~(u64)0); +		p = xdr_encode_hyper(p, exp->ex_path.mnt->mnt_sb->s_maxbytes);  	}  	if (bmval0 & FATTR4_WORD0_MAXLINK) { -		if ((buflen -= 4) < 0) +		p = xdr_reserve_space(xdr, 4); +		if (!p)  			goto out_resource; -		WRITE32(255); +		*p++ = cpu_to_be32(255);  	}  	if (bmval0 & FATTR4_WORD0_MAXNAME) { -		if ((buflen -= 4) < 0) +		p = xdr_reserve_space(xdr, 4); +		if (!p)  			goto out_resource; -		WRITE32(statfs.f_namelen); +		*p++ = cpu_to_be32(statfs.f_namelen);  	}  	if (bmval0 & FATTR4_WORD0_MAXREAD) { -		if ((buflen -= 8) < 0) +		p = xdr_reserve_space(xdr, 8); +		if (!p)  			goto out_resource; -		WRITE64((u64) svc_max_payload(rqstp)); +		p = xdr_encode_hyper(p, (u64) svc_max_payload(rqstp));  	}  	if (bmval0 & FATTR4_WORD0_MAXWRITE) { -		if ((buflen -= 8) < 0) +		p = xdr_reserve_space(xdr, 8); +		if (!p)  			goto out_resource; -		WRITE64((u64) svc_max_payload(rqstp)); +		p = xdr_encode_hyper(p, (u64) svc_max_payload(rqstp));  	}  	if (bmval1 & FATTR4_WORD1_MODE) { -		if ((buflen -= 4) < 0) +		p = xdr_reserve_space(xdr, 4); +		if (!p)  			goto out_resource; -		WRITE32(stat.mode & S_IALLUGO); +		*p++ = cpu_to_be32(stat.mode & S_IALLUGO);  	}  	if (bmval1 & FATTR4_WORD1_NO_TRUNC) { -		if ((buflen -= 4) < 0) +		p = xdr_reserve_space(xdr, 4); +		if (!p)  			goto out_resource; -		WRITE32(1); +		*p++ = cpu_to_be32(1);  	}  	if (bmval1 & FATTR4_WORD1_NUMLINKS) { -		if ((buflen -= 4) < 0) +		p = xdr_reserve_space(xdr, 4); +		if (!p)  			goto out_resource; -		WRITE32(stat.nlink); +		*p++ = cpu_to_be32(stat.nlink);  	}  	if (bmval1 & FATTR4_WORD1_OWNER) { -		status = nfsd4_encode_user(rqstp, stat.uid, &p, &buflen); -		if (status == nfserr_resource) -			goto out_resource; +		status = nfsd4_encode_user(xdr, rqstp, stat.uid);  		if (status)  			goto out;  	}  	if (bmval1 & FATTR4_WORD1_OWNER_GROUP) { -		status = nfsd4_encode_group(rqstp, stat.gid, &p, &buflen); -		if (status == nfserr_resource) -			goto out_resource; +		status = nfsd4_encode_group(xdr, rqstp, stat.gid);  		if (status)  			goto out;  	}  	if (bmval1 & FATTR4_WORD1_RAWDEV) { -		if ((buflen -= 8) < 0) +		p = xdr_reserve_space(xdr, 8); +		if (!p)  			goto out_resource; -		WRITE32((u32) MAJOR(stat.rdev)); -		WRITE32((u32) MINOR(stat.rdev)); +		*p++ = cpu_to_be32((u32) MAJOR(stat.rdev)); +		*p++ = cpu_to_be32((u32) MINOR(stat.rdev));  	}  	if (bmval1 & FATTR4_WORD1_SPACE_AVAIL) { -		if ((buflen -= 8) < 0) +		p = xdr_reserve_space(xdr, 8); +		if (!p)  			goto out_resource;  		dummy64 = (u64)statfs.f_bavail * (u64)statfs.f_bsize; -		WRITE64(dummy64); +		p = xdr_encode_hyper(p, dummy64);  	}  	if (bmval1 & FATTR4_WORD1_SPACE_FREE) { -		if ((buflen -= 8) < 0) +		p = xdr_reserve_space(xdr, 8); +		if (!p)  			goto out_resource;  		dummy64 = (u64)statfs.f_bfree * (u64)statfs.f_bsize; -		WRITE64(dummy64); +		p = xdr_encode_hyper(p, dummy64);  	}  	if (bmval1 & FATTR4_WORD1_SPACE_TOTAL) { -		if ((buflen -= 8) < 0) +		p = xdr_reserve_space(xdr, 8); +		if (!p)  			goto out_resource;  		dummy64 = (u64)statfs.f_blocks * (u64)statfs.f_bsize; -		WRITE64(dummy64); +		p = xdr_encode_hyper(p, dummy64);  	}  	if (bmval1 & FATTR4_WORD1_SPACE_USED) { -		if ((buflen -= 8) < 0) +		p = xdr_reserve_space(xdr, 8); +		if (!p)  			goto out_resource;  		dummy64 = (u64)stat.blocks << 9; -		WRITE64(dummy64); +		p = xdr_encode_hyper(p, dummy64);  	}  	if (bmval1 & FATTR4_WORD1_TIME_ACCESS) { -		if ((buflen -= 12) < 0) +		p = xdr_reserve_space(xdr, 12); +		if (!p)  			goto out_resource; -		WRITE64((s64)stat.atime.tv_sec); -		WRITE32(stat.atime.tv_nsec); +		p = xdr_encode_hyper(p, (s64)stat.atime.tv_sec); +		*p++ = cpu_to_be32(stat.atime.tv_nsec);  	}  	if (bmval1 & FATTR4_WORD1_TIME_DELTA) { -		if ((buflen -= 12) < 0) +		p = xdr_reserve_space(xdr, 12); +		if (!p)  			goto out_resource; -		WRITE32(0); -		WRITE32(1); -		WRITE32(0); +		*p++ = cpu_to_be32(0); +		*p++ = cpu_to_be32(1); +		*p++ = cpu_to_be32(0);  	}  	if (bmval1 & FATTR4_WORD1_TIME_METADATA) { -		if ((buflen -= 12) < 0) +		p = xdr_reserve_space(xdr, 12); +		if (!p)  			goto out_resource; -		WRITE64((s64)stat.ctime.tv_sec); -		WRITE32(stat.ctime.tv_nsec); +		p = xdr_encode_hyper(p, (s64)stat.ctime.tv_sec); +		*p++ = cpu_to_be32(stat.ctime.tv_nsec);  	}  	if (bmval1 & FATTR4_WORD1_TIME_MODIFY) { -		if ((buflen -= 12) < 0) +		p = xdr_reserve_space(xdr, 12); +		if (!p)  			goto out_resource; -		WRITE64((s64)stat.mtime.tv_sec); -		WRITE32(stat.mtime.tv_nsec); +		p = xdr_encode_hyper(p, (s64)stat.mtime.tv_sec); +		*p++ = cpu_to_be32(stat.mtime.tv_nsec);  	}  	if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) { -		if ((buflen -= 8) < 0) +		p = xdr_reserve_space(xdr, 8); +		if (!p)                  	goto out_resource;  		/*  		 * Get parent's attributes if not ignoring crossmount @@ -2491,23 +2491,26 @@ out_acl:  		if (ignore_crossmnt == 0 &&  		    dentry == exp->ex_path.mnt->mnt_root)  			get_parent_attributes(exp, &stat); -		WRITE64(stat.ino); +		p = xdr_encode_hyper(p, stat.ino);  	}  	if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) { -		status = nfsd4_encode_security_label(rqstp, context, -				contextlen, &p, &buflen); +		status = nfsd4_encode_security_label(xdr, rqstp, context, +								contextlen);  		if (status)  			goto out;  	}  	if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) { -		WRITE32(3); -		WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD0); -		WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD1); -		WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD2); +		p = xdr_reserve_space(xdr, 16); +		if (!p) +			goto out_resource; +		*p++ = cpu_to_be32(3); +		*p++ = cpu_to_be32(NFSD_SUPPATTR_EXCLCREAT_WORD0); +		*p++ = cpu_to_be32(NFSD_SUPPATTR_EXCLCREAT_WORD1); +		*p++ = cpu_to_be32(NFSD_SUPPATTR_EXCLCREAT_WORD2);  	} -	*attrlenp = htonl((char *)p - (char *)attrlenp - 4); -	*buffer = p; +	attrlen = htonl(xdr->buf->len - attrlen_offset - 4); +	write_bytes_to_xdr_buf(xdr->buf, attrlen_offset, &attrlen, 4);  	status = nfs_ok;  out: @@ -2516,8 +2519,12 @@ out:  		security_release_secctx(context, contextlen);  #endif /* CONFIG_NFSD_V4_SECURITY_LABEL */  	kfree(acl); -	if (fhp == &tempfh) -		fh_put(&tempfh); +	if (tempfh) { +		fh_put(tempfh); +		kfree(tempfh); +	} +	if (status) +		xdr_truncate_encode(xdr, starting_len);  	return status;  out_nfserr:  	status = nfserrno(err); @@ -2525,9 +2532,37 @@ out_nfserr:  out_resource:  	status = nfserr_resource;  	goto out; -out_serverfault: -	status = nfserr_serverfault; -	goto out; +} + +static void svcxdr_init_encode_from_buffer(struct xdr_stream *xdr, +				struct xdr_buf *buf, __be32 *p, int bytes) +{ +	xdr->scratch.iov_len = 0; +	memset(buf, 0, sizeof(struct xdr_buf)); +	buf->head[0].iov_base = p; +	buf->head[0].iov_len = 0; +	buf->len = 0; +	xdr->buf = buf; +	xdr->iov = buf->head; +	xdr->p = p; +	xdr->end = (void *)p + bytes; +	buf->buflen = bytes; +} + +__be32 nfsd4_encode_fattr_to_buf(__be32 **p, int words, +			struct svc_fh *fhp, struct svc_export *exp, +			struct dentry *dentry, u32 *bmval, +			struct svc_rqst *rqstp, int ignore_crossmnt) +{ +	struct xdr_buf dummy; +	struct xdr_stream xdr; +	__be32 ret; + +	svcxdr_init_encode_from_buffer(&xdr, &dummy, *p, words << 2); +	ret = nfsd4_encode_fattr(&xdr, fhp, exp, dentry, bmval, rqstp, +							ignore_crossmnt); +	*p = xdr.p; +	return ret;  }  static inline int attributes_need_mount(u32 *bmval) @@ -2540,8 +2575,8 @@ static inline int attributes_need_mount(u32 *bmval)  }  static __be32 -nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd, -		const char *name, int namlen, __be32 **p, int buflen) +nfsd4_encode_dirent_fattr(struct xdr_stream *xdr, struct nfsd4_readdir *cd, +			const char *name, int namlen)  {  	struct svc_export *exp = cd->rd_fhp->fh_export;  	struct dentry *dentry; @@ -2593,7 +2628,7 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,  	}  out_encode: -	nfserr = nfsd4_encode_fattr(NULL, exp, dentry, p, buflen, cd->rd_bmval, +	nfserr = nfsd4_encode_fattr(xdr, NULL, exp, dentry, cd->rd_bmval,  					cd->rd_rqstp, ignore_crossmnt);  out_put:  	dput(dentry); @@ -2602,19 +2637,19 @@ out_put:  }  static __be32 * -nfsd4_encode_rdattr_error(__be32 *p, int buflen, __be32 nfserr) +nfsd4_encode_rdattr_error(struct xdr_stream *xdr, __be32 nfserr)  { -	__be32 *attrlenp; +	__be32 *p; -	if (buflen < 6) +	p = xdr_reserve_space(xdr, 20); +	if (!p)  		return NULL;  	*p++ = htonl(2);  	*p++ = htonl(FATTR4_WORD0_RDATTR_ERROR); /* bmval0 */  	*p++ = htonl(0);			 /* bmval1 */ -	attrlenp = p++; +	*p++ = htonl(4);     /* attribute length */  	*p++ = nfserr;       /* no htonl */ -	*attrlenp = htonl((char *)p - (char *)attrlenp - 4);  	return p;  } @@ -2624,10 +2659,13 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,  {  	struct readdir_cd *ccd = ccdv;  	struct nfsd4_readdir *cd = container_of(ccd, struct nfsd4_readdir, common); -	int buflen; -	__be32 *p = cd->buffer; -	__be32 *cookiep; +	struct xdr_stream *xdr = cd->xdr; +	int start_offset = xdr->buf->len; +	int cookie_offset; +	int entry_bytes;  	__be32 nfserr = nfserr_toosmall; +	__be64 wire_offset; +	__be32 *p;  	/* In nfsv4, "." and ".." never make it onto the wire.. */  	if (name && isdotent(name, namlen)) { @@ -2635,19 +2673,24 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,  		return 0;  	} -	if (cd->offset) -		xdr_encode_hyper(cd->offset, (u64) offset); +	if (cd->cookie_offset) { +		wire_offset = cpu_to_be64(offset); +		write_bytes_to_xdr_buf(xdr->buf, cd->cookie_offset, +							&wire_offset, 8); +	} -	buflen = cd->buflen - 4 - XDR_QUADLEN(namlen); -	if (buflen < 0) +	p = xdr_reserve_space(xdr, 4); +	if (!p)  		goto fail; -  	*p++ = xdr_one;                             /* mark entry present */ -	cookiep = p; +	cookie_offset = xdr->buf->len; +	p = xdr_reserve_space(xdr, 3*4 + namlen); +	if (!p) +		goto fail;  	p = xdr_encode_hyper(p, NFS_OFFSET_MAX);    /* offset of next entry */  	p = xdr_encode_array(p, name, namlen);      /* name length & name */ -	nfserr = nfsd4_encode_dirent_fattr(cd, name, namlen, &p, buflen); +	nfserr = nfsd4_encode_dirent_fattr(xdr, cd, name, namlen);  	switch (nfserr) {  	case nfs_ok:  		break; @@ -2655,6 +2698,7 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,  		nfserr = nfserr_toosmall;  		goto fail;  	case nfserr_noent: +		xdr_truncate_encode(xdr, start_offset);  		goto skip_entry;  	default:  		/* @@ -2666,59 +2710,74 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,  		 */  		if (!(cd->rd_bmval[0] & FATTR4_WORD0_RDATTR_ERROR))  			goto fail; -		p = nfsd4_encode_rdattr_error(p, buflen, nfserr); +		p = nfsd4_encode_rdattr_error(xdr, nfserr);  		if (p == NULL) {  			nfserr = nfserr_toosmall;  			goto fail;  		}  	} -	cd->buflen -= (p - cd->buffer); -	cd->buffer = p; -	cd->offset = cookiep; +	nfserr = nfserr_toosmall; +	entry_bytes = xdr->buf->len - start_offset; +	if (entry_bytes > cd->rd_maxcount) +		goto fail; +	cd->rd_maxcount -= entry_bytes; +	if (!cd->rd_dircount) +		goto fail; +	cd->rd_dircount--; +	cd->cookie_offset = cookie_offset;  skip_entry:  	cd->common.err = nfs_ok;  	return 0;  fail: +	xdr_truncate_encode(xdr, start_offset);  	cd->common.err = nfserr;  	return -EINVAL;  } -static void -nfsd4_encode_stateid(struct nfsd4_compoundres *resp, stateid_t *sid) +static __be32 +nfsd4_encode_stateid(struct xdr_stream *xdr, stateid_t *sid)  {  	__be32 *p; -	RESERVE_SPACE(sizeof(stateid_t)); -	WRITE32(sid->si_generation); -	WRITEMEM(&sid->si_opaque, sizeof(stateid_opaque_t)); -	ADJUST_ARGS(); +	p = xdr_reserve_space(xdr, sizeof(stateid_t)); +	if (!p) +		return nfserr_resource; +	*p++ = cpu_to_be32(sid->si_generation); +	p = xdr_encode_opaque_fixed(p, &sid->si_opaque, +					sizeof(stateid_opaque_t)); +	return 0;  }  static __be32  nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_access *access)  { +	struct xdr_stream *xdr = &resp->xdr;  	__be32 *p;  	if (!nfserr) { -		RESERVE_SPACE(8); -		WRITE32(access->ac_supported); -		WRITE32(access->ac_resp_access); -		ADJUST_ARGS(); +		p = xdr_reserve_space(xdr, 8); +		if (!p) +			return nfserr_resource; +		*p++ = cpu_to_be32(access->ac_supported); +		*p++ = cpu_to_be32(access->ac_resp_access);  	}  	return nfserr;  }  static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_bind_conn_to_session *bcts)  { +	struct xdr_stream *xdr = &resp->xdr;  	__be32 *p;  	if (!nfserr) { -		RESERVE_SPACE(NFS4_MAX_SESSIONID_LEN + 8); -		WRITEMEM(bcts->sessionid.data, NFS4_MAX_SESSIONID_LEN); -		WRITE32(bcts->dir); +		p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN + 8); +		if (!p) +			return nfserr_resource; +		p = xdr_encode_opaque_fixed(p, bcts->sessionid.data, +						NFS4_MAX_SESSIONID_LEN); +		*p++ = cpu_to_be32(bcts->dir);  		/* Sorry, we do not yet support RDMA over 4.1: */ -		WRITE32(0); -		ADJUST_ARGS(); +		*p++ = cpu_to_be32(0);  	}  	return nfserr;  } @@ -2726,8 +2785,10 @@ static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp,  static __be32  nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_close *close)  { +	struct xdr_stream *xdr = &resp->xdr; +  	if (!nfserr) -		nfsd4_encode_stateid(resp, &close->cl_stateid); +		nfserr = nfsd4_encode_stateid(xdr, &close->cl_stateid);  	return nfserr;  } @@ -2736,12 +2797,15 @@ nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_c  static __be32  nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_commit *commit)  { +	struct xdr_stream *xdr = &resp->xdr;  	__be32 *p;  	if (!nfserr) { -		RESERVE_SPACE(NFS4_VERIFIER_SIZE); -		WRITEMEM(commit->co_verf.data, NFS4_VERIFIER_SIZE); -		ADJUST_ARGS(); +		p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE); +		if (!p) +			return nfserr_resource; +		p = xdr_encode_opaque_fixed(p, commit->co_verf.data, +						NFS4_VERIFIER_SIZE);  	}  	return nfserr;  } @@ -2749,15 +2813,17 @@ nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_  static __be32  nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_create *create)  { +	struct xdr_stream *xdr = &resp->xdr;  	__be32 *p;  	if (!nfserr) { -		RESERVE_SPACE(32); -		write_cinfo(&p, &create->cr_cinfo); -		WRITE32(2); -		WRITE32(create->cr_bmval[0]); -		WRITE32(create->cr_bmval[1]); -		ADJUST_ARGS(); +		p = xdr_reserve_space(xdr, 32); +		if (!p) +			return nfserr_resource; +		p = encode_cinfo(p, &create->cr_cinfo); +		*p++ = cpu_to_be32(2); +		*p++ = cpu_to_be32(create->cr_bmval[0]); +		*p++ = cpu_to_be32(create->cr_bmval[1]);  	}  	return nfserr;  } @@ -2766,14 +2832,13 @@ static __be32  nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_getattr *getattr)  {  	struct svc_fh *fhp = getattr->ga_fhp; -	int buflen; +	struct xdr_stream *xdr = &resp->xdr;  	if (nfserr)  		return nfserr; -	buflen = resp->end - resp->p - (COMPOUND_ERR_SLACK_SPACE >> 2); -	nfserr = nfsd4_encode_fattr(fhp, fhp->fh_export, fhp->fh_dentry, -				    &resp->p, buflen, getattr->ga_bmval, +	nfserr = nfsd4_encode_fattr(xdr, fhp, fhp->fh_export, fhp->fh_dentry, +				    getattr->ga_bmval,  				    resp->rqstp, 0);  	return nfserr;  } @@ -2781,16 +2846,17 @@ nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4  static __be32  nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh **fhpp)  { +	struct xdr_stream *xdr = &resp->xdr;  	struct svc_fh *fhp = *fhpp;  	unsigned int len;  	__be32 *p;  	if (!nfserr) {  		len = fhp->fh_handle.fh_size; -		RESERVE_SPACE(len + 4); -		WRITE32(len); -		WRITEMEM(&fhp->fh_handle.fh_base, len); -		ADJUST_ARGS(); +		p = xdr_reserve_space(xdr, len + 4); +		if (!p) +			return nfserr_resource; +		p = xdr_encode_opaque(p, &fhp->fh_handle.fh_base, len);  	}  	return nfserr;  } @@ -2799,35 +2865,50 @@ nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh  * Including all fields other than the name, a LOCK4denied structure requires  *   8(clientid) + 4(namelen) + 8(offset) + 8(length) + 4(type) = 32 bytes.  */ -static void -nfsd4_encode_lock_denied(struct nfsd4_compoundres *resp, struct nfsd4_lock_denied *ld) +static __be32 +nfsd4_encode_lock_denied(struct xdr_stream *xdr, struct nfsd4_lock_denied *ld)  {  	struct xdr_netobj *conf = &ld->ld_owner;  	__be32 *p; -	RESERVE_SPACE(32 + XDR_LEN(conf->len)); -	WRITE64(ld->ld_start); -	WRITE64(ld->ld_length); -	WRITE32(ld->ld_type); +again: +	p = xdr_reserve_space(xdr, 32 + XDR_LEN(conf->len)); +	if (!p) { +		/* +		 * Don't fail to return the result just because we can't +		 * return the conflicting open: +		 */ +		if (conf->len) { +			kfree(conf->data); +			conf->len = 0; +			conf->data = NULL; +			goto again; +		} +		return nfserr_resource; +	} +	p = xdr_encode_hyper(p, ld->ld_start); +	p = xdr_encode_hyper(p, ld->ld_length); +	*p++ = cpu_to_be32(ld->ld_type);  	if (conf->len) { -		WRITEMEM(&ld->ld_clientid, 8); -		WRITE32(conf->len); -		WRITEMEM(conf->data, conf->len); +		p = xdr_encode_opaque_fixed(p, &ld->ld_clientid, 8); +		p = xdr_encode_opaque(p, conf->data, conf->len);  		kfree(conf->data);  	}  else {  /* non - nfsv4 lock in conflict, no clientid nor owner */ -		WRITE64((u64)0); /* clientid */ -		WRITE32(0); /* length of owner name */ +		p = xdr_encode_hyper(p, (u64)0); /* clientid */ +		*p++ = cpu_to_be32(0); /* length of owner name */  	} -	ADJUST_ARGS(); +	return nfserr_denied;  }  static __be32  nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lock *lock)  { +	struct xdr_stream *xdr = &resp->xdr; +  	if (!nfserr) -		nfsd4_encode_stateid(resp, &lock->lk_resp_stateid); +		nfserr = nfsd4_encode_stateid(xdr, &lock->lk_resp_stateid);  	else if (nfserr == nfserr_denied) -		nfsd4_encode_lock_denied(resp, &lock->lk_denied); +		nfserr = nfsd4_encode_lock_denied(xdr, &lock->lk_denied);  	return nfserr;  } @@ -2835,16 +2916,20 @@ nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lo  static __be32  nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lockt *lockt)  { +	struct xdr_stream *xdr = &resp->xdr; +  	if (nfserr == nfserr_denied) -		nfsd4_encode_lock_denied(resp, &lockt->lt_denied); +		nfsd4_encode_lock_denied(xdr, &lockt->lt_denied);  	return nfserr;  }  static __be32  nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_locku *locku)  { +	struct xdr_stream *xdr = &resp->xdr; +  	if (!nfserr) -		nfsd4_encode_stateid(resp, &locku->lu_stateid); +		nfserr = nfsd4_encode_stateid(xdr, &locku->lu_stateid);  	return nfserr;  } @@ -2853,12 +2938,14 @@ nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_l  static __be32  nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_link *link)  { +	struct xdr_stream *xdr = &resp->xdr;  	__be32 *p;  	if (!nfserr) { -		RESERVE_SPACE(20); -		write_cinfo(&p, &link->li_cinfo); -		ADJUST_ARGS(); +		p = xdr_reserve_space(xdr, 20); +		if (!p) +			return nfserr_resource; +		p = encode_cinfo(p, &link->li_cinfo);  	}  	return nfserr;  } @@ -2867,72 +2954,86 @@ nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_li  static __be32  nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open *open)  { +	struct xdr_stream *xdr = &resp->xdr;  	__be32 *p;  	if (nfserr)  		goto out; -	nfsd4_encode_stateid(resp, &open->op_stateid); -	RESERVE_SPACE(40); -	write_cinfo(&p, &open->op_cinfo); -	WRITE32(open->op_rflags); -	WRITE32(2); -	WRITE32(open->op_bmval[0]); -	WRITE32(open->op_bmval[1]); -	WRITE32(open->op_delegate_type); -	ADJUST_ARGS(); +	nfserr = nfsd4_encode_stateid(xdr, &open->op_stateid); +	if (nfserr) +		goto out; +	p = xdr_reserve_space(xdr, 40); +	if (!p) +		return nfserr_resource; +	p = encode_cinfo(p, &open->op_cinfo); +	*p++ = cpu_to_be32(open->op_rflags); +	*p++ = cpu_to_be32(2); +	*p++ = cpu_to_be32(open->op_bmval[0]); +	*p++ = cpu_to_be32(open->op_bmval[1]); +	*p++ = cpu_to_be32(open->op_delegate_type);  	switch (open->op_delegate_type) {  	case NFS4_OPEN_DELEGATE_NONE:  		break;  	case NFS4_OPEN_DELEGATE_READ: -		nfsd4_encode_stateid(resp, &open->op_delegate_stateid); -		RESERVE_SPACE(20); -		WRITE32(open->op_recall); +		nfserr = nfsd4_encode_stateid(xdr, &open->op_delegate_stateid); +		if (nfserr) +			return nfserr; +		p = xdr_reserve_space(xdr, 20); +		if (!p) +			return nfserr_resource; +		*p++ = cpu_to_be32(open->op_recall);  		/*  		 * TODO: ACE's in delegations  		 */ -		WRITE32(NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE); -		WRITE32(0); -		WRITE32(0); -		WRITE32(0);   /* XXX: is NULL principal ok? */ -		ADJUST_ARGS(); +		*p++ = cpu_to_be32(NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE); +		*p++ = cpu_to_be32(0); +		*p++ = cpu_to_be32(0); +		*p++ = cpu_to_be32(0);   /* XXX: is NULL principal ok? */  		break;  	case NFS4_OPEN_DELEGATE_WRITE: -		nfsd4_encode_stateid(resp, &open->op_delegate_stateid); -		RESERVE_SPACE(32); -		WRITE32(0); +		nfserr = nfsd4_encode_stateid(xdr, &open->op_delegate_stateid); +		if (nfserr) +			return nfserr; +		p = xdr_reserve_space(xdr, 32); +		if (!p) +			return nfserr_resource; +		*p++ = cpu_to_be32(0);  		/*  		 * TODO: space_limit's in delegations  		 */ -		WRITE32(NFS4_LIMIT_SIZE); -		WRITE32(~(u32)0); -		WRITE32(~(u32)0); +		*p++ = cpu_to_be32(NFS4_LIMIT_SIZE); +		*p++ = cpu_to_be32(~(u32)0); +		*p++ = cpu_to_be32(~(u32)0);  		/*  		 * TODO: ACE's in delegations  		 */ -		WRITE32(NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE); -		WRITE32(0); -		WRITE32(0); -		WRITE32(0);   /* XXX: is NULL principal ok? */ -		ADJUST_ARGS(); +		*p++ = cpu_to_be32(NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE); +		*p++ = cpu_to_be32(0); +		*p++ = cpu_to_be32(0); +		*p++ = cpu_to_be32(0);   /* XXX: is NULL principal ok? */  		break;  	case NFS4_OPEN_DELEGATE_NONE_EXT: /* 4.1 */  		switch (open->op_why_no_deleg) {  		case WND4_CONTENTION:  		case WND4_RESOURCE: -			RESERVE_SPACE(8); -			WRITE32(open->op_why_no_deleg); -			WRITE32(0);	/* deleg signaling not supported yet */ +			p = xdr_reserve_space(xdr, 8); +			if (!p) +				return nfserr_resource; +			*p++ = cpu_to_be32(open->op_why_no_deleg); +			/* deleg signaling not supported yet: */ +			*p++ = cpu_to_be32(0);  			break;  		default: -			RESERVE_SPACE(4); -			WRITE32(open->op_why_no_deleg); +			p = xdr_reserve_space(xdr, 4); +			if (!p) +				return nfserr_resource; +			*p++ = cpu_to_be32(open->op_why_no_deleg);  		} -		ADJUST_ARGS();  		break;  	default:  		BUG(); @@ -2945,8 +3046,10 @@ out:  static __be32  nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_confirm *oc)  { +	struct xdr_stream *xdr = &resp->xdr; +  	if (!nfserr) -		nfsd4_encode_stateid(resp, &oc->oc_resp_stateid); +		nfserr = nfsd4_encode_stateid(xdr, &oc->oc_resp_stateid);  	return nfserr;  } @@ -2954,127 +3057,233 @@ nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct  static __be32  nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_downgrade *od)  { +	struct xdr_stream *xdr = &resp->xdr; +  	if (!nfserr) -		nfsd4_encode_stateid(resp, &od->od_stateid); +		nfserr = nfsd4_encode_stateid(xdr, &od->od_stateid);  	return nfserr;  } -static __be32 -nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, -		  struct nfsd4_read *read) +static __be32 nfsd4_encode_splice_read( +				struct nfsd4_compoundres *resp, +				struct nfsd4_read *read, +				struct file *file, unsigned long maxcount)  { +	struct xdr_stream *xdr = &resp->xdr; +	struct xdr_buf *buf = xdr->buf;  	u32 eof; -	int v; -	struct page *page; -	unsigned long maxcount;  -	long len; -	__be32 *p; +	int space_left; +	__be32 nfserr; +	__be32 *p = xdr->p - 2; -	if (nfserr) -		return nfserr; -	if (resp->xbuf->page_len) +	/* +	 * Don't inline pages unless we know there's room for eof, +	 * count, and possible padding: +	 */ +	if (xdr->end - xdr->p < 3)  		return nfserr_resource; -	RESERVE_SPACE(8); /* eof flag and byte count */ +	nfserr = nfsd_splice_read(read->rd_rqstp, file, +				  read->rd_offset, &maxcount); +	if (nfserr) { +		/* +		 * nfsd_splice_actor may have already messed with the +		 * page length; reset it so as not to confuse +		 * xdr_truncate_encode: +		 */ +		buf->page_len = 0; +		return nfserr; +	} + +	eof = (read->rd_offset + maxcount >= +	       read->rd_fhp->fh_dentry->d_inode->i_size); -	maxcount = svc_max_payload(resp->rqstp); -	if (maxcount > read->rd_length) -		maxcount = read->rd_length; +	*(p++) = htonl(eof); +	*(p++) = htonl(maxcount); + +	buf->page_len = maxcount; +	buf->len += maxcount; +	xdr->page_ptr += (maxcount + PAGE_SIZE - 1) / PAGE_SIZE; + +	/* Use rest of head for padding and remaining ops: */ +	buf->tail[0].iov_base = xdr->p; +	buf->tail[0].iov_len = 0; +	xdr->iov = buf->tail; +	if (maxcount&3) { +		int pad = 4 - (maxcount&3); + +		*(xdr->p++) = 0; + +		buf->tail[0].iov_base += maxcount&3; +		buf->tail[0].iov_len = pad; +		buf->len += pad; +	} + +	space_left = min_t(int, (void *)xdr->end - (void *)xdr->p, +				buf->buflen - buf->len); +	buf->buflen = buf->len + space_left; +	xdr->end = (__be32 *)((void *)xdr->end + space_left); + +	return 0; +} + +static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp, +				 struct nfsd4_read *read, +				 struct file *file, unsigned long maxcount) +{ +	struct xdr_stream *xdr = &resp->xdr; +	u32 eof; +	int v; +	int starting_len = xdr->buf->len - 8; +	long len; +	int thislen; +	__be32 nfserr; +	__be32 tmp; +	__be32 *p; +	u32 zzz = 0; +	int pad;  	len = maxcount;  	v = 0; -	while (len > 0) { -		page = *(resp->rqstp->rq_next_page); -		if (!page) { /* ran out of pages */ -			maxcount -= len; -			break; -		} -		resp->rqstp->rq_vec[v].iov_base = page_address(page); -		resp->rqstp->rq_vec[v].iov_len = -			len < PAGE_SIZE ? len : PAGE_SIZE; -		resp->rqstp->rq_next_page++; + +	thislen = (void *)xdr->end - (void *)xdr->p; +	if (len < thislen) +		thislen = len; +	p = xdr_reserve_space(xdr, (thislen+3)&~3); +	WARN_ON_ONCE(!p); +	resp->rqstp->rq_vec[v].iov_base = p; +	resp->rqstp->rq_vec[v].iov_len = thislen; +	v++; +	len -= thislen; + +	while (len) { +		thislen = min_t(long, len, PAGE_SIZE); +		p = xdr_reserve_space(xdr, (thislen+3)&~3); +		WARN_ON_ONCE(!p); +		resp->rqstp->rq_vec[v].iov_base = p; +		resp->rqstp->rq_vec[v].iov_len = thislen;  		v++; -		len -= PAGE_SIZE; +		len -= thislen;  	}  	read->rd_vlen = v; -	nfserr = nfsd_read_file(read->rd_rqstp, read->rd_fhp, read->rd_filp, -			read->rd_offset, resp->rqstp->rq_vec, read->rd_vlen, -			&maxcount); - +	nfserr = nfsd_readv(file, read->rd_offset, resp->rqstp->rq_vec, +			read->rd_vlen, &maxcount);  	if (nfserr)  		return nfserr; +	xdr_truncate_encode(xdr, starting_len + 8 + ((maxcount+3)&~3)); +  	eof = (read->rd_offset + maxcount >=  	       read->rd_fhp->fh_dentry->d_inode->i_size); -	WRITE32(eof); -	WRITE32(maxcount); -	ADJUST_ARGS(); -	resp->xbuf->head[0].iov_len = (char*)p -					- (char*)resp->xbuf->head[0].iov_base; -	resp->xbuf->page_len = maxcount; +	tmp = htonl(eof); +	write_bytes_to_xdr_buf(xdr->buf, starting_len    , &tmp, 4); +	tmp = htonl(maxcount); +	write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4); -	/* Use rest of head for padding and remaining ops: */ -	resp->xbuf->tail[0].iov_base = p; -	resp->xbuf->tail[0].iov_len = 0; -	if (maxcount&3) { -		RESERVE_SPACE(4); -		WRITE32(0); -		resp->xbuf->tail[0].iov_base += maxcount&3; -		resp->xbuf->tail[0].iov_len = 4 - (maxcount&3); -		ADJUST_ARGS(); -	} +	pad = (maxcount&3) ? 4 - (maxcount&3) : 0; +	write_bytes_to_xdr_buf(xdr->buf, starting_len + 8 + maxcount, +								&zzz, pad);  	return 0; +  }  static __be32 -nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readlink *readlink) +nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, +		  struct nfsd4_read *read)  { -	int maxcount; -	char *page; +	unsigned long maxcount; +	struct xdr_stream *xdr = &resp->xdr; +	struct file *file = read->rd_filp; +	int starting_len = xdr->buf->len; +	struct raparms *ra;  	__be32 *p; +	__be32 err;  	if (nfserr)  		return nfserr; -	if (resp->xbuf->page_len) + +	p = xdr_reserve_space(xdr, 8); /* eof flag and byte count */ +	if (!p) { +		WARN_ON_ONCE(resp->rqstp->rq_splice_ok);  		return nfserr_resource; -	if (!*resp->rqstp->rq_next_page) +	} +	if (resp->xdr.buf->page_len && resp->rqstp->rq_splice_ok) { +		WARN_ON_ONCE(1);  		return nfserr_resource; +	} +	xdr_commit_encode(xdr); + +	maxcount = svc_max_payload(resp->rqstp); +	if (maxcount > xdr->buf->buflen - xdr->buf->len) +		maxcount = xdr->buf->buflen - xdr->buf->len; +	if (maxcount > read->rd_length) +		maxcount = read->rd_length; + +	if (!read->rd_filp) { +		err = nfsd_get_tmp_read_open(resp->rqstp, read->rd_fhp, +						&file, &ra); +		if (err) +			goto err_truncate; +	} + +	if (file->f_op->splice_read && resp->rqstp->rq_splice_ok) +		err = nfsd4_encode_splice_read(resp, read, file, maxcount); +	else +		err = nfsd4_encode_readv(resp, read, file, maxcount); + +	if (!read->rd_filp) +		nfsd_put_tmp_read_open(file, ra); + +err_truncate: +	if (err) +		xdr_truncate_encode(xdr, starting_len); +	return err; +} -	page = page_address(*(resp->rqstp->rq_next_page++)); +static __be32 +nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readlink *readlink) +{ +	int maxcount; +	__be32 wire_count; +	int zero = 0; +	struct xdr_stream *xdr = &resp->xdr; +	int length_offset = xdr->buf->len; +	__be32 *p; + +	if (nfserr) +		return nfserr; +	p = xdr_reserve_space(xdr, 4); +	if (!p) +		return nfserr_resource;  	maxcount = PAGE_SIZE; -	RESERVE_SPACE(4); +	p = xdr_reserve_space(xdr, maxcount); +	if (!p) +		return nfserr_resource;  	/*  	 * XXX: By default, the ->readlink() VFS op will truncate symlinks  	 * if they would overflow the buffer.  Is this kosher in NFSv4?  If  	 * not, one easy fix is: if ->readlink() precisely fills the buffer,  	 * assume that truncation occurred, and return NFS4ERR_RESOURCE.  	 */ -	nfserr = nfsd_readlink(readlink->rl_rqstp, readlink->rl_fhp, page, &maxcount); +	nfserr = nfsd_readlink(readlink->rl_rqstp, readlink->rl_fhp, +						(char *)p, &maxcount);  	if (nfserr == nfserr_isdir) -		return nfserr_inval; -	if (nfserr) +		nfserr = nfserr_inval; +	if (nfserr) { +		xdr_truncate_encode(xdr, length_offset);  		return nfserr; - -	WRITE32(maxcount); -	ADJUST_ARGS(); -	resp->xbuf->head[0].iov_len = (char*)p -				- (char*)resp->xbuf->head[0].iov_base; -	resp->xbuf->page_len = maxcount; - -	/* Use rest of head for padding and remaining ops: */ -	resp->xbuf->tail[0].iov_base = p; -	resp->xbuf->tail[0].iov_len = 0; -	if (maxcount&3) { -		RESERVE_SPACE(4); -		WRITE32(0); -		resp->xbuf->tail[0].iov_base += maxcount&3; -		resp->xbuf->tail[0].iov_len = 4 - (maxcount&3); -		ADJUST_ARGS();  	} + +	wire_count = htonl(maxcount); +	write_bytes_to_xdr_buf(xdr->buf, length_offset, &wire_count, 4); +	xdr_truncate_encode(xdr, length_offset + 4 + ALIGN(maxcount, 4)); +	if (maxcount & 3) +		write_bytes_to_xdr_buf(xdr->buf, length_offset + 4 + maxcount, +						&zero, 4 - (maxcount&3));  	return 0;  } @@ -3082,47 +3291,52 @@ static __be32  nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readdir *readdir)  {  	int maxcount; +	int bytes_left;  	loff_t offset; -	__be32 *page, *savep, *tailbase; +	__be64 wire_offset; +	struct xdr_stream *xdr = &resp->xdr; +	int starting_len = xdr->buf->len;  	__be32 *p;  	if (nfserr)  		return nfserr; -	if (resp->xbuf->page_len) -		return nfserr_resource; -	if (!*resp->rqstp->rq_next_page) -		return nfserr_resource; -	RESERVE_SPACE(NFS4_VERIFIER_SIZE); -	savep = p; +	p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE); +	if (!p) +		return nfserr_resource;  	/* XXX: Following NFSv3, we ignore the READDIR verifier for now. */ -	WRITE32(0); -	WRITE32(0); -	ADJUST_ARGS(); -	resp->xbuf->head[0].iov_len = ((char*)resp->p) - (char*)resp->xbuf->head[0].iov_base; -	tailbase = p; - -	maxcount = PAGE_SIZE; -	if (maxcount > readdir->rd_maxcount) -		maxcount = readdir->rd_maxcount; +	*p++ = cpu_to_be32(0); +	*p++ = cpu_to_be32(0); +	resp->xdr.buf->head[0].iov_len = ((char *)resp->xdr.p) +				- (char *)resp->xdr.buf->head[0].iov_base;  	/* -	 * Convert from bytes to words, account for the two words already -	 * written, make sure to leave two words at the end for the next -	 * pointer and eof field. +	 * Number of bytes left for directory entries allowing for the +	 * final 8 bytes of the readdir and a following failed op:  	 */ -	maxcount = (maxcount >> 2) - 4; -	if (maxcount < 0) { -		nfserr =  nfserr_toosmall; +	bytes_left = xdr->buf->buflen - xdr->buf->len +			- COMPOUND_ERR_SLACK_SPACE - 8; +	if (bytes_left < 0) { +		nfserr = nfserr_resource; +		goto err_no_verf; +	} +	maxcount = min_t(u32, readdir->rd_maxcount, INT_MAX); +	/* +	 * Note the rfc defines rd_maxcount as the size of the +	 * READDIR4resok structure, which includes the verifier above +	 * and the 8 bytes encoded at the end of this function: +	 */ +	if (maxcount < 16) { +		nfserr = nfserr_toosmall;  		goto err_no_verf;  	} +	maxcount = min_t(int, maxcount-16, bytes_left); -	page = page_address(*(resp->rqstp->rq_next_page++)); +	readdir->xdr = xdr; +	readdir->rd_maxcount = maxcount;  	readdir->common.err = 0; -	readdir->buflen = maxcount; -	readdir->buffer = page; -	readdir->offset = NULL; +	readdir->cookie_offset = 0;  	offset = readdir->rd_cookie;  	nfserr = nfsd_readdir(readdir->rd_rqstp, readdir->rd_fhp, @@ -3130,42 +3344,49 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4  			      &readdir->common, nfsd4_encode_dirent);  	if (nfserr == nfs_ok &&  	    readdir->common.err == nfserr_toosmall && -	    readdir->buffer == page)  -		nfserr = nfserr_toosmall; +	    xdr->buf->len == starting_len + 8) { +		/* nothing encoded; which limit did we hit?: */ +		if (maxcount - 16 < bytes_left) +			/* It was the fault of rd_maxcount: */ +			nfserr = nfserr_toosmall; +		else +			/* We ran out of buffer space: */ +			nfserr = nfserr_resource; +	}  	if (nfserr)  		goto err_no_verf; -	if (readdir->offset) -		xdr_encode_hyper(readdir->offset, offset); +	if (readdir->cookie_offset) { +		wire_offset = cpu_to_be64(offset); +		write_bytes_to_xdr_buf(xdr->buf, readdir->cookie_offset, +							&wire_offset, 8); +	} -	p = readdir->buffer; +	p = xdr_reserve_space(xdr, 8); +	if (!p) { +		WARN_ON_ONCE(1); +		goto err_no_verf; +	}  	*p++ = 0;	/* no more entries */  	*p++ = htonl(readdir->common.err == nfserr_eof); -	resp->xbuf->page_len = ((char*)p) - -		(char*)page_address(*(resp->rqstp->rq_next_page-1)); - -	/* Use rest of head for padding and remaining ops: */ -	resp->xbuf->tail[0].iov_base = tailbase; -	resp->xbuf->tail[0].iov_len = 0; -	resp->p = resp->xbuf->tail[0].iov_base; -	resp->end = resp->p + (PAGE_SIZE - resp->xbuf->head[0].iov_len)/4;  	return 0;  err_no_verf: -	p = savep; -	ADJUST_ARGS(); +	xdr_truncate_encode(xdr, starting_len);  	return nfserr;  }  static __be32  nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_remove *remove)  { +	struct xdr_stream *xdr = &resp->xdr;  	__be32 *p;  	if (!nfserr) { -		RESERVE_SPACE(20); -		write_cinfo(&p, &remove->rm_cinfo); -		ADJUST_ARGS(); +		p = xdr_reserve_space(xdr, 20); +		if (!p) +			return nfserr_resource; +		p = encode_cinfo(p, &remove->rm_cinfo);  	}  	return nfserr;  } @@ -3173,19 +3394,21 @@ nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_  static __be32  nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_rename *rename)  { +	struct xdr_stream *xdr = &resp->xdr;  	__be32 *p;  	if (!nfserr) { -		RESERVE_SPACE(40); -		write_cinfo(&p, &rename->rn_sinfo); -		write_cinfo(&p, &rename->rn_tinfo); -		ADJUST_ARGS(); +		p = xdr_reserve_space(xdr, 40); +		if (!p) +			return nfserr_resource; +		p = encode_cinfo(p, &rename->rn_sinfo); +		p = encode_cinfo(p, &rename->rn_tinfo);  	}  	return nfserr;  }  static __be32 -nfsd4_do_encode_secinfo(struct nfsd4_compoundres *resp, +nfsd4_do_encode_secinfo(struct xdr_stream *xdr,  			 __be32 nfserr, struct svc_export *exp)  {  	u32 i, nflavs, supported; @@ -3196,6 +3419,7 @@ nfsd4_do_encode_secinfo(struct nfsd4_compoundres *resp,  	if (nfserr)  		goto out; +	nfserr = nfserr_resource;  	if (exp->ex_nflavors) {  		flavs = exp->ex_flavors;  		nflavs = exp->ex_nflavors; @@ -3217,9 +3441,10 @@ nfsd4_do_encode_secinfo(struct nfsd4_compoundres *resp,  	}  	supported = 0; -	RESERVE_SPACE(4); +	p = xdr_reserve_space(xdr, 4); +	if (!p) +		goto out;  	flavorsp = p++;		/* to be backfilled later */ -	ADJUST_ARGS();  	for (i = 0; i < nflavs; i++) {  		rpc_authflavor_t pf = flavs[i].pseudoflavor; @@ -3227,18 +3452,20 @@ nfsd4_do_encode_secinfo(struct nfsd4_compoundres *resp,  		if (rpcauth_get_gssinfo(pf, &info) == 0) {  			supported++; -			RESERVE_SPACE(4 + 4 + info.oid.len + 4 + 4); -			WRITE32(RPC_AUTH_GSS); -			WRITE32(info.oid.len); -			WRITEMEM(info.oid.data, info.oid.len); -			WRITE32(info.qop); -			WRITE32(info.service); -			ADJUST_ARGS(); +			p = xdr_reserve_space(xdr, 4 + 4 + +					      XDR_LEN(info.oid.len) + 4 + 4); +			if (!p) +				goto out; +			*p++ = cpu_to_be32(RPC_AUTH_GSS); +			p = xdr_encode_opaque(p,  info.oid.data, info.oid.len); +			*p++ = cpu_to_be32(info.qop); +			*p++ = cpu_to_be32(info.service);  		} else if (pf < RPC_AUTH_MAXFLAVOR) {  			supported++; -			RESERVE_SPACE(4); -			WRITE32(pf); -			ADJUST_ARGS(); +			p = xdr_reserve_space(xdr, 4); +			if (!p) +				goto out; +			*p++ = cpu_to_be32(pf);  		} else {  			if (report)  				pr_warn("NFS: SECINFO: security flavor %u " @@ -3249,7 +3476,7 @@ nfsd4_do_encode_secinfo(struct nfsd4_compoundres *resp,  	if (nflavs != supported)  		report = false;  	*flavorsp = htonl(supported); - +	nfserr = 0;  out:  	if (exp)  		exp_put(exp); @@ -3260,14 +3487,18 @@ static __be32  nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr,  		     struct nfsd4_secinfo *secinfo)  { -	return nfsd4_do_encode_secinfo(resp, nfserr, secinfo->si_exp); +	struct xdr_stream *xdr = &resp->xdr; + +	return nfsd4_do_encode_secinfo(xdr, nfserr, secinfo->si_exp);  }  static __be32  nfsd4_encode_secinfo_no_name(struct nfsd4_compoundres *resp, __be32 nfserr,  		     struct nfsd4_secinfo_no_name *secinfo)  { -	return nfsd4_do_encode_secinfo(resp, nfserr, secinfo->sin_exp); +	struct xdr_stream *xdr = &resp->xdr; + +	return nfsd4_do_encode_secinfo(xdr, nfserr, secinfo->sin_exp);  }  /* @@ -3277,41 +3508,47 @@ nfsd4_encode_secinfo_no_name(struct nfsd4_compoundres *resp, __be32 nfserr,  static __be32  nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setattr *setattr)  { +	struct xdr_stream *xdr = &resp->xdr;  	__be32 *p; -	RESERVE_SPACE(16); +	p = xdr_reserve_space(xdr, 16); +	if (!p) +		return nfserr_resource;  	if (nfserr) { -		WRITE32(3); -		WRITE32(0); -		WRITE32(0); -		WRITE32(0); +		*p++ = cpu_to_be32(3); +		*p++ = cpu_to_be32(0); +		*p++ = cpu_to_be32(0); +		*p++ = cpu_to_be32(0);  	}  	else { -		WRITE32(3); -		WRITE32(setattr->sa_bmval[0]); -		WRITE32(setattr->sa_bmval[1]); -		WRITE32(setattr->sa_bmval[2]); +		*p++ = cpu_to_be32(3); +		*p++ = cpu_to_be32(setattr->sa_bmval[0]); +		*p++ = cpu_to_be32(setattr->sa_bmval[1]); +		*p++ = cpu_to_be32(setattr->sa_bmval[2]);  	} -	ADJUST_ARGS();  	return nfserr;  }  static __be32  nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setclientid *scd)  { +	struct xdr_stream *xdr = &resp->xdr;  	__be32 *p;  	if (!nfserr) { -		RESERVE_SPACE(8 + NFS4_VERIFIER_SIZE); -		WRITEMEM(&scd->se_clientid, 8); -		WRITEMEM(&scd->se_confirm, NFS4_VERIFIER_SIZE); -		ADJUST_ARGS(); +		p = xdr_reserve_space(xdr, 8 + NFS4_VERIFIER_SIZE); +		if (!p) +			return nfserr_resource; +		p = xdr_encode_opaque_fixed(p, &scd->se_clientid, 8); +		p = xdr_encode_opaque_fixed(p, &scd->se_confirm, +						NFS4_VERIFIER_SIZE);  	}  	else if (nfserr == nfserr_clid_inuse) { -		RESERVE_SPACE(8); -		WRITE32(0); -		WRITE32(0); -		ADJUST_ARGS(); +		p = xdr_reserve_space(xdr, 8); +		if (!p) +			return nfserr_resource; +		*p++ = cpu_to_be32(0); +		*p++ = cpu_to_be32(0);  	}  	return nfserr;  } @@ -3319,14 +3556,17 @@ nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct n  static __be32  nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_write *write)  { +	struct xdr_stream *xdr = &resp->xdr;  	__be32 *p;  	if (!nfserr) { -		RESERVE_SPACE(16); -		WRITE32(write->wr_bytes_written); -		WRITE32(write->wr_how_written); -		WRITEMEM(write->wr_verifier.data, NFS4_VERIFIER_SIZE); -		ADJUST_ARGS(); +		p = xdr_reserve_space(xdr, 16); +		if (!p) +			return nfserr_resource; +		*p++ = cpu_to_be32(write->wr_bytes_written); +		*p++ = cpu_to_be32(write->wr_how_written); +		p = xdr_encode_opaque_fixed(p, write->wr_verifier.data, +							NFS4_VERIFIER_SIZE);  	}  	return nfserr;  } @@ -3343,6 +3583,7 @@ static __be32  nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr,  			 struct nfsd4_exchange_id *exid)  { +	struct xdr_stream *xdr = &resp->xdr;  	__be32 *p;  	char *major_id;  	char *server_scope; @@ -3358,52 +3599,61 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr,  	server_scope = utsname()->nodename;  	server_scope_sz = strlen(server_scope); -	RESERVE_SPACE( +	p = xdr_reserve_space(xdr,  		8 /* eir_clientid */ +  		4 /* eir_sequenceid */ +  		4 /* eir_flags */ + -		4 /* spr_how */ + -		8 /* spo_must_enforce, spo_must_allow */ + -		8 /* so_minor_id */ + -		4 /* so_major_id.len */ + -		(XDR_QUADLEN(major_id_sz) * 4) + -		4 /* eir_server_scope.len */ + -		(XDR_QUADLEN(server_scope_sz) * 4) + -		4 /* eir_server_impl_id.count (0) */); +		4 /* spr_how */); +	if (!p) +		return nfserr_resource; + +	p = xdr_encode_opaque_fixed(p, &exid->clientid, 8); +	*p++ = cpu_to_be32(exid->seqid); +	*p++ = cpu_to_be32(exid->flags); -	WRITEMEM(&exid->clientid, 8); -	WRITE32(exid->seqid); -	WRITE32(exid->flags); +	*p++ = cpu_to_be32(exid->spa_how); -	WRITE32(exid->spa_how);  	switch (exid->spa_how) {  	case SP4_NONE:  		break;  	case SP4_MACH_CRED: +		/* spo_must_enforce, spo_must_allow */ +		p = xdr_reserve_space(xdr, 16); +		if (!p) +			return nfserr_resource; +  		/* spo_must_enforce bitmap: */ -		WRITE32(2); -		WRITE32(nfs4_minimal_spo_must_enforce[0]); -		WRITE32(nfs4_minimal_spo_must_enforce[1]); +		*p++ = cpu_to_be32(2); +		*p++ = cpu_to_be32(nfs4_minimal_spo_must_enforce[0]); +		*p++ = cpu_to_be32(nfs4_minimal_spo_must_enforce[1]);  		/* empty spo_must_allow bitmap: */ -		WRITE32(0); +		*p++ = cpu_to_be32(0); +  		break;  	default:  		WARN_ON_ONCE(1);  	} +	p = xdr_reserve_space(xdr, +		8 /* so_minor_id */ + +		4 /* so_major_id.len */ + +		(XDR_QUADLEN(major_id_sz) * 4) + +		4 /* eir_server_scope.len */ + +		(XDR_QUADLEN(server_scope_sz) * 4) + +		4 /* eir_server_impl_id.count (0) */); +	if (!p) +		return nfserr_resource; +  	/* The server_owner struct */ -	WRITE64(minor_id);      /* Minor id */ +	p = xdr_encode_hyper(p, minor_id);      /* Minor id */  	/* major id */ -	WRITE32(major_id_sz); -	WRITEMEM(major_id, major_id_sz); +	p = xdr_encode_opaque(p, major_id, major_id_sz);  	/* Server scope */ -	WRITE32(server_scope_sz); -	WRITEMEM(server_scope, server_scope_sz); +	p = xdr_encode_opaque(p, server_scope, server_scope_sz);  	/* Implementation id */ -	WRITE32(0);	/* zero length nfs_impl_id4 array */ -	ADJUST_ARGS(); +	*p++ = cpu_to_be32(0);	/* zero length nfs_impl_id4 array */  	return 0;  } @@ -3411,93 +3661,81 @@ static __be32  nfsd4_encode_create_session(struct nfsd4_compoundres *resp, __be32 nfserr,  			    struct nfsd4_create_session *sess)  { +	struct xdr_stream *xdr = &resp->xdr;  	__be32 *p;  	if (nfserr)  		return nfserr; -	RESERVE_SPACE(24); -	WRITEMEM(sess->sessionid.data, NFS4_MAX_SESSIONID_LEN); -	WRITE32(sess->seqid); -	WRITE32(sess->flags); -	ADJUST_ARGS(); - -	RESERVE_SPACE(28); -	WRITE32(0); /* headerpadsz */ -	WRITE32(sess->fore_channel.maxreq_sz); -	WRITE32(sess->fore_channel.maxresp_sz); -	WRITE32(sess->fore_channel.maxresp_cached); -	WRITE32(sess->fore_channel.maxops); -	WRITE32(sess->fore_channel.maxreqs); -	WRITE32(sess->fore_channel.nr_rdma_attrs); -	ADJUST_ARGS(); +	p = xdr_reserve_space(xdr, 24); +	if (!p) +		return nfserr_resource; +	p = xdr_encode_opaque_fixed(p, sess->sessionid.data, +					NFS4_MAX_SESSIONID_LEN); +	*p++ = cpu_to_be32(sess->seqid); +	*p++ = cpu_to_be32(sess->flags); + +	p = xdr_reserve_space(xdr, 28); +	if (!p) +		return nfserr_resource; +	*p++ = cpu_to_be32(0); /* headerpadsz */ +	*p++ = cpu_to_be32(sess->fore_channel.maxreq_sz); +	*p++ = cpu_to_be32(sess->fore_channel.maxresp_sz); +	*p++ = cpu_to_be32(sess->fore_channel.maxresp_cached); +	*p++ = cpu_to_be32(sess->fore_channel.maxops); +	*p++ = cpu_to_be32(sess->fore_channel.maxreqs); +	*p++ = cpu_to_be32(sess->fore_channel.nr_rdma_attrs);  	if (sess->fore_channel.nr_rdma_attrs) { -		RESERVE_SPACE(4); -		WRITE32(sess->fore_channel.rdma_attrs); -		ADJUST_ARGS(); +		p = xdr_reserve_space(xdr, 4); +		if (!p) +			return nfserr_resource; +		*p++ = cpu_to_be32(sess->fore_channel.rdma_attrs);  	} -	RESERVE_SPACE(28); -	WRITE32(0); /* headerpadsz */ -	WRITE32(sess->back_channel.maxreq_sz); -	WRITE32(sess->back_channel.maxresp_sz); -	WRITE32(sess->back_channel.maxresp_cached); -	WRITE32(sess->back_channel.maxops); -	WRITE32(sess->back_channel.maxreqs); -	WRITE32(sess->back_channel.nr_rdma_attrs); -	ADJUST_ARGS(); +	p = xdr_reserve_space(xdr, 28); +	if (!p) +		return nfserr_resource; +	*p++ = cpu_to_be32(0); /* headerpadsz */ +	*p++ = cpu_to_be32(sess->back_channel.maxreq_sz); +	*p++ = cpu_to_be32(sess->back_channel.maxresp_sz); +	*p++ = cpu_to_be32(sess->back_channel.maxresp_cached); +	*p++ = cpu_to_be32(sess->back_channel.maxops); +	*p++ = cpu_to_be32(sess->back_channel.maxreqs); +	*p++ = cpu_to_be32(sess->back_channel.nr_rdma_attrs);  	if (sess->back_channel.nr_rdma_attrs) { -		RESERVE_SPACE(4); -		WRITE32(sess->back_channel.rdma_attrs); -		ADJUST_ARGS(); +		p = xdr_reserve_space(xdr, 4); +		if (!p) +			return nfserr_resource; +		*p++ = cpu_to_be32(sess->back_channel.rdma_attrs);  	}  	return 0;  }  static __be32 -nfsd4_encode_destroy_session(struct nfsd4_compoundres *resp, __be32 nfserr, -			     struct nfsd4_destroy_session *destroy_session) -{ -	return nfserr; -} - -static __be32 -nfsd4_encode_free_stateid(struct nfsd4_compoundres *resp, __be32 nfserr, -			  struct nfsd4_free_stateid *free_stateid) -{ -	__be32 *p; - -	if (nfserr) -		return nfserr; - -	RESERVE_SPACE(4); -	*p++ = nfserr; -	ADJUST_ARGS(); -	return nfserr; -} - -static __be32  nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr,  		      struct nfsd4_sequence *seq)  { +	struct xdr_stream *xdr = &resp->xdr;  	__be32 *p;  	if (nfserr)  		return nfserr; -	RESERVE_SPACE(NFS4_MAX_SESSIONID_LEN + 20); -	WRITEMEM(seq->sessionid.data, NFS4_MAX_SESSIONID_LEN); -	WRITE32(seq->seqid); -	WRITE32(seq->slotid); +	p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN + 20); +	if (!p) +		return nfserr_resource; +	p = xdr_encode_opaque_fixed(p, seq->sessionid.data, +					NFS4_MAX_SESSIONID_LEN); +	*p++ = cpu_to_be32(seq->seqid); +	*p++ = cpu_to_be32(seq->slotid);  	/* Note slotid's are numbered from zero: */ -	WRITE32(seq->maxslots - 1); /* sr_highest_slotid */ -	WRITE32(seq->maxslots - 1); /* sr_target_highest_slotid */ -	WRITE32(seq->status_flags); +	*p++ = cpu_to_be32(seq->maxslots - 1); /* sr_highest_slotid */ +	*p++ = cpu_to_be32(seq->maxslots - 1); /* sr_target_highest_slotid */ +	*p++ = cpu_to_be32(seq->status_flags); -	ADJUST_ARGS(); -	resp->cstate.datap = p; /* DRC cache data pointer */ +	resp->cstate.data_offset = xdr->buf->len; /* DRC cache data pointer */  	return 0;  } @@ -3505,17 +3743,22 @@ static __be32  nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr,  			  struct nfsd4_test_stateid *test_stateid)  { +	struct xdr_stream *xdr = &resp->xdr;  	struct nfsd4_test_stateid_id *stateid, *next;  	__be32 *p; -	RESERVE_SPACE(4 + (4 * test_stateid->ts_num_ids)); +	if (nfserr) +		return nfserr; + +	p = xdr_reserve_space(xdr, 4 + (4 * test_stateid->ts_num_ids)); +	if (!p) +		return nfserr_resource;  	*p++ = htonl(test_stateid->ts_num_ids);  	list_for_each_entry_safe(stateid, next, &test_stateid->ts_stateid_list, ts_id_list) {  		*p++ = stateid->ts_id_status;  	} -	ADJUST_ARGS();  	return nfserr;  } @@ -3576,8 +3819,8 @@ static nfsd4_enc nfsd4_enc_ops[] = {  	[OP_BIND_CONN_TO_SESSION] = (nfsd4_enc)nfsd4_encode_bind_conn_to_session,  	[OP_EXCHANGE_ID]	= (nfsd4_enc)nfsd4_encode_exchange_id,  	[OP_CREATE_SESSION]	= (nfsd4_enc)nfsd4_encode_create_session, -	[OP_DESTROY_SESSION]	= (nfsd4_enc)nfsd4_encode_destroy_session, -	[OP_FREE_STATEID]	= (nfsd4_enc)nfsd4_encode_free_stateid, +	[OP_DESTROY_SESSION]	= (nfsd4_enc)nfsd4_encode_noop, +	[OP_FREE_STATEID]	= (nfsd4_enc)nfsd4_encode_noop,  	[OP_GET_DIR_DELEGATION]	= (nfsd4_enc)nfsd4_encode_noop,  	[OP_GETDEVICEINFO]	= (nfsd4_enc)nfsd4_encode_noop,  	[OP_GETDEVICELIST]	= (nfsd4_enc)nfsd4_encode_noop, @@ -3594,83 +3837,99 @@ static nfsd4_enc nfsd4_enc_ops[] = {  };  /* - * Calculate the total amount of memory that the compound response has taken - * after encoding the current operation with pad. + * Calculate whether we still have space to encode repsize bytes. + * There are two considerations: + *     - For NFS versions >=4.1, the size of the reply must stay within + *       session limits + *     - For all NFS versions, we must stay within limited preallocated + *       buffer space.   * - * pad: if operation is non-idempotent, pad was calculate by op_rsize_bop() - *      which was specified at nfsd4_operation, else pad is zero. - * - * Compare this length to the session se_fmaxresp_sz and se_fmaxresp_cached. - * - * Our se_fmaxresp_cached will always be a multiple of PAGE_SIZE, and so - * will be at least a page and will therefore hold the xdr_buf head. + * This is called before the operation is processed, so can only provide + * an upper estimate.  For some nonidempotent operations (such as + * getattr), it's not necessarily a problem if that estimate is wrong, + * as we can fail it after processing without significant side effects.   */ -__be32 nfsd4_check_resp_size(struct nfsd4_compoundres *resp, u32 pad) +__be32 nfsd4_check_resp_size(struct nfsd4_compoundres *resp, u32 respsize)  { -	struct xdr_buf *xb = &resp->rqstp->rq_res; -	struct nfsd4_session *session = NULL; +	struct xdr_buf *buf = &resp->rqstp->rq_res;  	struct nfsd4_slot *slot = resp->cstate.slot; -	u32 length, tlen = 0; +	if (buf->len + respsize <= buf->buflen) +		return nfs_ok;  	if (!nfsd4_has_session(&resp->cstate)) -		return 0; - -	session = resp->cstate.session; -	if (session == NULL) -		return 0; - -	if (xb->page_len == 0) { -		length = (char *)resp->p - (char *)xb->head[0].iov_base + pad; -	} else { -		if (xb->tail[0].iov_base && xb->tail[0].iov_len > 0) -			tlen = (char *)resp->p - (char *)xb->tail[0].iov_base; - -		length = xb->head[0].iov_len + xb->page_len + tlen + pad; -	} -	dprintk("%s length %u, xb->page_len %u tlen %u pad %u\n", __func__, -		length, xb->page_len, tlen, pad); - -	if (length > session->se_fchannel.maxresp_sz) -		return nfserr_rep_too_big; - -	if ((slot->sl_flags & NFSD4_SLOT_CACHETHIS) && -	    length > session->se_fchannel.maxresp_cached) +		return nfserr_resource; +	if (slot->sl_flags & NFSD4_SLOT_CACHETHIS) { +		WARN_ON_ONCE(1);  		return nfserr_rep_too_big_to_cache; - -	return 0; +	} +	return nfserr_rep_too_big;  }  void  nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)  { +	struct xdr_stream *xdr = &resp->xdr;  	struct nfs4_stateowner *so = resp->cstate.replay_owner; -	__be32 *statp; +	struct svc_rqst *rqstp = resp->rqstp; +	int post_err_offset; +	nfsd4_enc encoder;  	__be32 *p; -	RESERVE_SPACE(8); -	WRITE32(op->opnum); -	statp = p++;	/* to be backfilled at the end */ -	ADJUST_ARGS(); +	p = xdr_reserve_space(xdr, 8); +	if (!p) { +		WARN_ON_ONCE(1); +		return; +	} +	*p++ = cpu_to_be32(op->opnum); +	post_err_offset = xdr->buf->len;  	if (op->opnum == OP_ILLEGAL)  		goto status;  	BUG_ON(op->opnum < 0 || op->opnum >= ARRAY_SIZE(nfsd4_enc_ops) ||  	       !nfsd4_enc_ops[op->opnum]); -	op->status = nfsd4_enc_ops[op->opnum](resp, op->status, &op->u); -	/* nfsd4_check_drc_limit guarantees enough room for error status */ -	if (!op->status) -		op->status = nfsd4_check_resp_size(resp, 0); +	encoder = nfsd4_enc_ops[op->opnum]; +	op->status = encoder(resp, op->status, &op->u); +	xdr_commit_encode(xdr); + +	/* nfsd4_check_resp_size guarantees enough room for error status */ +	if (!op->status) { +		int space_needed = 0; +		if (!nfsd4_last_compound_op(rqstp)) +			space_needed = COMPOUND_ERR_SLACK_SPACE; +		op->status = nfsd4_check_resp_size(resp, space_needed); +	} +	if (op->status == nfserr_resource && nfsd4_has_session(&resp->cstate)) { +		struct nfsd4_slot *slot = resp->cstate.slot; + +		if (slot->sl_flags & NFSD4_SLOT_CACHETHIS) +			op->status = nfserr_rep_too_big_to_cache; +		else +			op->status = nfserr_rep_too_big; +	} +	if (op->status == nfserr_resource || +	    op->status == nfserr_rep_too_big || +	    op->status == nfserr_rep_too_big_to_cache) { +		/* +		 * The operation may have already been encoded or +		 * partially encoded.  No op returns anything additional +		 * in the case of one of these three errors, so we can +		 * just truncate back to after the status.  But it's a +		 * bug if we had to do this on a non-idempotent op: +		 */ +		warn_on_nonidempotent_op(op); +		xdr_truncate_encode(xdr, post_err_offset); +	}  	if (so) { +		int len = xdr->buf->len - post_err_offset; +  		so->so_replay.rp_status = op->status; -		so->so_replay.rp_buflen = (char *)resp->p - (char *)(statp+1); -		memcpy(so->so_replay.rp_buf, statp+1, so->so_replay.rp_buflen); +		so->so_replay.rp_buflen = len; +		read_bytes_from_xdr_buf(xdr->buf, post_err_offset, +						so->so_replay.rp_buf, len);  	}  status: -	/* -	 * Note: We write the status directly, instead of using WRITE32(), -	 * since it is already in network byte order. -	 */ -	*statp = op->status; +	/* Note that op->status is already in network byte order: */ +	write_bytes_to_xdr_buf(xdr->buf, post_err_offset - 4, &op->status, 4);  }  /*  @@ -3682,21 +3941,22 @@ status:   * called with nfs4_lock_state() held   */  void -nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op) +nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op)  {  	__be32 *p;  	struct nfs4_replay *rp = op->replay;  	BUG_ON(!rp); -	RESERVE_SPACE(8); -	WRITE32(op->opnum); +	p = xdr_reserve_space(xdr, 8 + rp->rp_buflen); +	if (!p) { +		WARN_ON_ONCE(1); +		return; +	} +	*p++ = cpu_to_be32(op->opnum);  	*p++ = rp->rp_status;  /* already xdr'ed */ -	ADJUST_ARGS(); -	RESERVE_SPACE(rp->rp_buflen); -	WRITEMEM(rp->rp_buf, rp->rp_buflen); -	ADJUST_ARGS(); +	p = xdr_encode_opaque_fixed(p, rp->rp_buf, rp->rp_buflen);  }  int @@ -3728,6 +3988,12 @@ int nfsd4_release_compoundargs(void *rq, __be32 *p, void *resp)  int  nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compoundargs *args)  { +	if (rqstp->rq_arg.head[0].iov_len % 4) { +		/* client is nuts */ +		dprintk("%s: compound not properly padded! (peeraddr=%pISc xid=0x%x)", +			__func__, svc_addr(rqstp), be32_to_cpu(rqstp->rq_xid)); +		return 0; +	}  	args->p = p;  	args->end = rqstp->rq_arg.head[0].iov_base + rqstp->rq_arg.head[0].iov_len;  	args->pagelist = rqstp->rq_arg.pages; @@ -3747,19 +4013,19 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo  	 * All that remains is to write the tag and operation count...  	 */  	struct nfsd4_compound_state *cs = &resp->cstate; -	struct kvec *iov; +	struct xdr_buf *buf = resp->xdr.buf; + +	WARN_ON_ONCE(buf->len != buf->head[0].iov_len + buf->page_len + +				 buf->tail[0].iov_len); + +	rqstp->rq_next_page = resp->xdr.page_ptr + 1; +  	p = resp->tagp;  	*p++ = htonl(resp->taglen);  	memcpy(p, resp->tag, resp->taglen);  	p += XDR_QUADLEN(resp->taglen);  	*p++ = htonl(resp->opcnt); -	if (rqstp->rq_res.page_len)  -		iov = &rqstp->rq_res.tail[0]; -	else -		iov = &rqstp->rq_res.head[0]; -	iov->iov_len = ((char*)resp->p) - (char*)iov->iov_base; -	BUG_ON(iov->iov_len > PAGE_SIZE);  	if (nfsd4_has_session(cs)) {  		struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);  		struct nfs4_client *clp = cs->session->se_client; diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 9186c7ce0b1..6040da8830f 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -224,13 +224,6 @@ hash_refile(struct svc_cacherep *rp)  	hlist_add_head(&rp->c_hash, cache_hash + hash_32(rp->c_xid, maskbits));  } -static inline bool -nfsd_cache_entry_expired(struct svc_cacherep *rp) -{ -	return rp->c_state != RC_INPROG && -	       time_after(jiffies, rp->c_timestamp + RC_EXPIRE); -} -  /*   * Walk the LRU list and prune off entries that are older than RC_EXPIRE.   * Also prune the oldest ones when the total exceeds the max number of entries. @@ -242,8 +235,14 @@ prune_cache_entries(void)  	long freed = 0;  	list_for_each_entry_safe(rp, tmp, &lru_head, c_lru) { -		if (!nfsd_cache_entry_expired(rp) && -		    num_drc_entries <= max_drc_entries) +		/* +		 * Don't free entries attached to calls that are still +		 * in-progress, but do keep scanning the list. +		 */ +		if (rp->c_state == RC_INPROG) +			continue; +		if (num_drc_entries <= max_drc_entries && +		    time_before(jiffies, rp->c_timestamp + RC_EXPIRE))  			break;  		nfsd_reply_cache_free_locked(rp);  		freed++; @@ -409,22 +408,8 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)  	/*  	 * Since the common case is a cache miss followed by an insert, -	 * preallocate an entry. First, try to reuse the first entry on the LRU -	 * if it works, then go ahead and prune the LRU list. +	 * preallocate an entry.  	 */ -	spin_lock(&cache_lock); -	if (!list_empty(&lru_head)) { -		rp = list_first_entry(&lru_head, struct svc_cacherep, c_lru); -		if (nfsd_cache_entry_expired(rp) || -		    num_drc_entries >= max_drc_entries) { -			lru_put_end(rp); -			prune_cache_entries(); -			goto search_cache; -		} -	} - -	/* No expired ones available, allocate a new one. */ -	spin_unlock(&cache_lock);  	rp = nfsd_reply_cache_alloc();  	spin_lock(&cache_lock);  	if (likely(rp)) { @@ -432,7 +417,9 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)  		drc_mem_usage += sizeof(*rp);  	} -search_cache: +	/* go ahead and prune the cache */ +	prune_cache_entries(); +  	found = nfsd_cache_search(rqstp, csum);  	if (found) {  		if (likely(rp)) @@ -446,15 +433,6 @@ search_cache:  		goto out;  	} -	/* -	 * We're keeping the one we just allocated. Are we now over the -	 * limit? Prune one off the tip of the LRU in trade for the one we -	 * just allocated if so. -	 */ -	if (num_drc_entries >= max_drc_entries) -		nfsd_reply_cache_free_locked(list_first_entry(&lru_head, -						struct svc_cacherep, c_lru)); -  	nfsdstats.rcmisses++;  	rqstp->rq_cacherep = rp;  	rp->c_state = RC_INPROG; diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 7f555179bf8..51844048937 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -699,6 +699,11 @@ static ssize_t __write_ports_addfd(char *buf, struct net *net)  	if (err != 0 || fd < 0)  		return -EINVAL; +	if (svc_alien_sock(net, fd)) { +		printk(KERN_ERR "%s: socket net is different to NFSd's one\n", __func__); +		return -EINVAL; +	} +  	err = nfsd_create_serv(net);  	if (err != 0)  		return err; @@ -1174,7 +1179,6 @@ static int __init init_nfsd(void)  	retval = nfsd4_init_slabs();  	if (retval)  		goto out_unregister_pernet; -	nfs4_state_init();  	retval = nfsd_fault_inject_init(); /* nfsd fault injection controls */  	if (retval)  		goto out_free_slabs; diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 30f34ab0213..847daf37e56 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -15,11 +15,20 @@  #include <linux/nfs2.h>  #include <linux/nfs3.h>  #include <linux/nfs4.h> +#include <linux/sunrpc/svc.h>  #include <linux/sunrpc/msg_prot.h> -#include <linux/nfsd/debug.h> -#include <linux/nfsd/export.h> -#include <linux/nfsd/stats.h> +#include <uapi/linux/nfsd/debug.h> + +#include "stats.h" +#include "export.h" + +#undef ifdebug +#ifdef NFSD_DEBUG +# define ifdebug(flag)		if (nfsd_debug & NFSDDBG_##flag) +#else +# define ifdebug(flag)		if (0) +#endif  /*   * nfsd version @@ -106,7 +115,6 @@ static inline int nfsd_v4client(struct svc_rqst *rq)   */  #ifdef CONFIG_NFSD_V4  extern unsigned long max_delegations; -void nfs4_state_init(void);  int nfsd4_init_slabs(void);  void nfsd4_free_slabs(void);  int nfs4_state_start(void); @@ -117,7 +125,6 @@ void nfs4_reset_lease(time_t leasetime);  int nfs4_reset_recoverydir(char *recdir);  char * nfs4_recoverydir(void);  #else -static inline void nfs4_state_init(void) { }  static inline int nfsd4_init_slabs(void) { return 0; }  static inline void nfsd4_free_slabs(void) { }  static inline int nfs4_state_start(void) { return 0; } @@ -282,7 +289,7 @@ void		nfsd_lockd_shutdown(void);   * reason.   */  #define	COMPOUND_SLACK_SPACE		140    /* OP_GETFH */ -#define COMPOUND_ERR_SLACK_SPACE	12     /* OP_SETATTR */ +#define COMPOUND_ERR_SLACK_SPACE	16     /* OP_SETATTR */  #define NFSD_LAUNDROMAT_MINTIMEOUT      1   /* seconds */ diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 814afaa4458..ec839341815 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -47,7 +47,7 @@ static int nfsd_acceptable(void *expv, struct dentry *dentry)  		tdentry = parent;  	}  	if (tdentry != exp->ex_path.dentry) -		dprintk("nfsd_acceptable failed at %p %s\n", tdentry, tdentry->d_name.name); +		dprintk("nfsd_acceptable failed at %p %pd\n", tdentry, tdentry);  	rv = (tdentry == exp->ex_path.dentry);  	dput(tdentry);  	return rv; @@ -88,9 +88,8 @@ static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp,  	/* Check if the request originated from a secure port. */  	if (!rqstp->rq_secure && !(flags & NFSEXP_INSECURE_PORT)) {  		RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); -		dprintk(KERN_WARNING -		       "nfsd: request from insecure port %s!\n", -		       svc_print_addr(rqstp, buf, sizeof(buf))); +		dprintk("nfsd: request from insecure port %s!\n", +		        svc_print_addr(rqstp, buf, sizeof(buf)));  		return nfserr_perm;  	} @@ -169,8 +168,8 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)  		data_left -= len;  		if (data_left < 0)  			return error; -		exp = rqst_exp_find(rqstp, fh->fh_fsid_type, fh->fh_auth); -		fid = (struct fid *)(fh->fh_auth + len); +		exp = rqst_exp_find(rqstp, fh->fh_fsid_type, fh->fh_fsid); +		fid = (struct fid *)(fh->fh_fsid + len);  	} else {  		__u32 tfh[2];  		dev_t xdev; @@ -253,8 +252,8 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)  	if (S_ISDIR(dentry->d_inode->i_mode) &&  			(dentry->d_flags & DCACHE_DISCONNECTED)) { -		printk("nfsd: find_fh_dentry returned a DISCONNECTED directory: %s/%s\n", -				dentry->d_parent->d_name.name, dentry->d_name.name); +		printk("nfsd: find_fh_dentry returned a DISCONNECTED directory: %pd2\n", +				dentry);  	}  	fhp->fh_dentry = dentry; @@ -361,10 +360,9 @@ skip_pseudoflavor_check:  	error = nfsd_permission(rqstp, exp, dentry, access);  	if (error) { -		dprintk("fh_verify: %s/%s permission failure, " +		dprintk("fh_verify: %pd2 permission failure, "  			"acc=%x, error=%d\n", -			dentry->d_parent->d_name.name, -			dentry->d_name.name, +			dentry,  			access, ntohl(error));  	}  out: @@ -386,7 +384,7 @@ static void _fh_update(struct svc_fh *fhp, struct svc_export *exp,  {  	if (dentry != exp->ex_path.dentry) {  		struct fid *fid = (struct fid *) -			(fhp->fh_handle.fh_auth + fhp->fh_handle.fh_size/4 - 1); +			(fhp->fh_handle.fh_fsid + fhp->fh_handle.fh_size/4 - 1);  		int maxsize = (fhp->fh_maxsize - fhp->fh_handle.fh_size)/4;  		int subtreecheck = !(exp->ex_flags & NFSEXP_NOSUBTREECHECK); @@ -514,14 +512,12 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,  	 */  	struct inode * inode = dentry->d_inode; -	struct dentry *parent = dentry->d_parent; -	__u32 *datap;  	dev_t ex_dev = exp_sb(exp)->s_dev; -	dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %s/%s, ino=%ld)\n", +	dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %pd2, ino=%ld)\n",  		MAJOR(ex_dev), MINOR(ex_dev),  		(long) exp->ex_path.dentry->d_inode->i_ino, -		parent->d_name.name, dentry->d_name.name, +		dentry,  		(inode ? inode->i_ino : 0));  	/* Choose filehandle version and fsid type based on @@ -534,13 +530,13 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,  		fh_put(ref_fh);  	if (fhp->fh_locked || fhp->fh_dentry) { -		printk(KERN_ERR "fh_compose: fh %s/%s not initialized!\n", -		       parent->d_name.name, dentry->d_name.name); +		printk(KERN_ERR "fh_compose: fh %pd2 not initialized!\n", +		       dentry);  	}  	if (fhp->fh_maxsize < NFS_FHSIZE) -		printk(KERN_ERR "fh_compose: called with maxsize %d! %s/%s\n", +		printk(KERN_ERR "fh_compose: called with maxsize %d! %pd2\n",  		       fhp->fh_maxsize, -		       parent->d_name.name, dentry->d_name.name); +		       dentry);  	fhp->fh_dentry = dget(dentry); /* our internal copy */  	fhp->fh_export = exp; @@ -559,17 +555,16 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,  		if (inode)  			_fh_update_old(dentry, exp, &fhp->fh_handle);  	} else { -		int len; +		fhp->fh_handle.fh_size = +			key_len(fhp->fh_handle.fh_fsid_type) + 4;  		fhp->fh_handle.fh_auth_type = 0; -		datap = fhp->fh_handle.fh_auth+0; -		mk_fsid(fhp->fh_handle.fh_fsid_type, datap, ex_dev, + +		mk_fsid(fhp->fh_handle.fh_fsid_type, +			fhp->fh_handle.fh_fsid, +			ex_dev,  			exp->ex_path.dentry->d_inode->i_ino,  			exp->ex_fsid, exp->ex_uuid); -		len = key_len(fhp->fh_handle.fh_fsid_type); -		datap += len/4; -		fhp->fh_handle.fh_size = 4 + len; -  		if (inode)  			_fh_update(fhp, exp, dentry);  		if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) { @@ -600,22 +595,20 @@ fh_update(struct svc_fh *fhp)  		_fh_update_old(dentry, fhp->fh_export, &fhp->fh_handle);  	} else {  		if (fhp->fh_handle.fh_fileid_type != FILEID_ROOT) -			goto out; +			return 0;  		_fh_update(fhp, fhp->fh_export, dentry);  		if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID)  			return nfserr_opnotsupp;  	} -out:  	return 0; -  out_bad:  	printk(KERN_ERR "fh_update: fh not verified!\n"); -	goto out; +	return nfserr_serverfault;  out_negative: -	printk(KERN_ERR "fh_update: %s/%s still negative!\n", -		dentry->d_parent->d_name.name, dentry->d_name.name); -	goto out; +	printk(KERN_ERR "fh_update: %pd2 still negative!\n", +		dentry); +	return nfserr_serverfault;  }  /* diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index e5e6707ba68..2e89e70ac15 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -1,9 +1,58 @@ -/* Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de> */ +/* + * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de> + * + * This file describes the layout of the file handles as passed + * over the wire. + */ +#ifndef _LINUX_NFSD_NFSFH_H +#define _LINUX_NFSD_NFSFH_H + +#include <linux/sunrpc/svc.h> +#include <uapi/linux/nfsd/nfsfh.h> + +static inline __u32 ino_t_to_u32(ino_t ino) +{ +	return (__u32) ino; +} -#ifndef _LINUX_NFSD_FH_INT_H -#define _LINUX_NFSD_FH_INT_H +static inline ino_t u32_to_ino_t(__u32 uino) +{ +	return (ino_t) uino; +} + +/* + * This is the internal representation of an NFS handle used in knfsd. + * pre_mtime/post_version will be used to support wcc_attr's in NFSv3. + */ +typedef struct svc_fh { +	struct knfsd_fh		fh_handle;	/* FH data */ +	struct dentry *		fh_dentry;	/* validated dentry */ +	struct svc_export *	fh_export;	/* export pointer */ +	int			fh_maxsize;	/* max size for fh_handle */ -#include <linux/nfsd/nfsfh.h> +	unsigned char		fh_locked;	/* inode locked by us */ +	unsigned char		fh_want_write;	/* remount protection taken */ + +#ifdef CONFIG_NFSD_V3 +	unsigned char		fh_post_saved;	/* post-op attrs saved */ +	unsigned char		fh_pre_saved;	/* pre-op attrs saved */ + +	/* Pre-op attributes saved during fh_lock */ +	__u64			fh_pre_size;	/* size before operation */ +	struct timespec		fh_pre_mtime;	/* mtime before oper */ +	struct timespec		fh_pre_ctime;	/* ctime before oper */ +	/* +	 * pre-op nfsv4 change attr: note must check IS_I_VERSION(inode) +	 *  to find out if it is valid. +	 */ +	u64			fh_pre_change; + +	/* Post-op attributes saved in fh_unlock */ +	struct kstat		fh_post_attr;	/* full attrs after operation */ +	u64			fh_post_change; /* nfsv4 change; see above */ +#endif /* CONFIG_NFSD_V3 */ + +} svc_fh;  enum nfsd_fsid {  	FSID_DEV = 0, @@ -133,6 +182,17 @@ fh_init(struct svc_fh *fhp, int maxsize)  #ifdef CONFIG_NFSD_V3  /* + * The wcc data stored in current_fh should be cleared + * between compound ops. + */ +static inline void +fh_clear_wcc(struct svc_fh *fhp) +{ +	fhp->fh_post_saved = 0; +	fhp->fh_pre_saved = 0; +} + +/*   * Fill in the pre_op attr for the wcc data   */  static inline void @@ -152,7 +212,8 @@ fill_pre_wcc(struct svc_fh *fhp)  extern void fill_post_wcc(struct svc_fh *);  #else -#define	fill_pre_wcc(ignored) +#define fh_clear_wcc(ignored) +#define fill_pre_wcc(ignored)  #define fill_post_wcc(notused)  #endif /* CONFIG_NFSD_V3 */ @@ -173,8 +234,8 @@ fh_lock_nested(struct svc_fh *fhp, unsigned int subclass)  	BUG_ON(!dentry);  	if (fhp->fh_locked) { -		printk(KERN_WARNING "fh_lock: %s/%s already locked!\n", -			dentry->d_parent->d_name.name, dentry->d_name.name); +		printk(KERN_WARNING "fh_lock: %pd2 already locked!\n", +			dentry);  		return;  	} @@ -203,4 +264,4 @@ fh_unlock(struct svc_fh *fhp)  	}  } -#endif /* _LINUX_NFSD_FH_INT_H */ +#endif /* _LINUX_NFSD_NFSFH_H */ diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 760c85a6f53..1879e43f286 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -241,6 +241,15 @@ static void nfsd_shutdown_generic(void)  	nfsd_racache_shutdown();  } +static bool nfsd_needs_lockd(void) +{ +#if defined(CONFIG_NFSD_V3) +	return (nfsd_versions[2] != NULL) || (nfsd_versions[3] != NULL); +#else +	return (nfsd_versions[2] != NULL); +#endif +} +  static int nfsd_startup_net(int nrservs, struct net *net)  {  	struct nfsd_net *nn = net_generic(net, nfsd_net_id); @@ -255,9 +264,14 @@ static int nfsd_startup_net(int nrservs, struct net *net)  	ret = nfsd_init_socks(net);  	if (ret)  		goto out_socks; -	ret = lockd_up(net); -	if (ret) -		goto out_socks; + +	if (nfsd_needs_lockd() && !nn->lockd_up) { +		ret = lockd_up(net); +		if (ret) +			goto out_socks; +		nn->lockd_up = 1; +	} +  	ret = nfs4_state_start_net(net);  	if (ret)  		goto out_lockd; @@ -266,7 +280,10 @@ static int nfsd_startup_net(int nrservs, struct net *net)  	return 0;  out_lockd: -	lockd_down(net); +	if (nn->lockd_up) { +		lockd_down(net); +		nn->lockd_up = 0; +	}  out_socks:  	nfsd_shutdown_generic();  	return ret; @@ -277,7 +294,10 @@ static void nfsd_shutdown_net(struct net *net)  	struct nfsd_net *nn = net_generic(net, nfsd_net_id);  	nfs4_state_shutdown_net(net); -	lockd_down(net); +	if (nn->lockd_up) { +		lockd_down(net); +		nn->lockd_up = 0; +	}  	nn->nfsd_net_up = false;  	nfsd_shutdown_generic();  } @@ -571,12 +591,6 @@ nfsd(void *vrqstp)  	nfsdstats.th_cnt++;  	mutex_unlock(&nfsd_mutex); -	/* -	 * We want less throttling in balance_dirty_pages() so that nfs to -	 * localhost doesn't cause nfsd to lock up due to all the client's -	 * dirty pages. -	 */ -	current->flags |= PF_LESS_THROTTLE;  	set_freezable();  	/* diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 9c769a47ac5..1ac306b769d 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -214,7 +214,8 @@ nfssvc_decode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)  int  nfssvc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p, struct nfsd_fhandle *args)  { -	if (!(p = decode_fh(p, &args->fh))) +	p = decode_fh(p, &args->fh); +	if (!p)  		return 0;  	return xdr_argsize_check(rqstp, p);  } @@ -248,7 +249,8 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,  {  	unsigned int len;  	int v; -	if (!(p = decode_fh(p, &args->fh))) +	p = decode_fh(p, &args->fh); +	if (!p)  		return 0;  	args->offset    = ntohl(*p++); @@ -281,7 +283,8 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,  	unsigned int len, hdr, dlen;  	int v; -	if (!(p = decode_fh(p, &args->fh))) +	p = decode_fh(p, &args->fh); +	if (!p)  		return 0;  	p++;				/* beginoffset */ @@ -355,7 +358,8 @@ nfssvc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p,  int  nfssvc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_readlinkargs *args)  { -	if (!(p = decode_fh(p, &args->fh))) +	p = decode_fh(p, &args->fh); +	if (!p)  		return 0;  	args->buffer = page_address(*(rqstp->rq_next_page++)); @@ -391,7 +395,8 @@ int  nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p,  					struct nfsd_readdirargs *args)  { -	if (!(p = decode_fh(p, &args->fh))) +	p = decode_fh(p, &args->fh); +	if (!p)  		return 0;  	args->cookie = ntohl(*p++);  	args->count  = ntohl(*p++); diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 424d8f5f231..374c66283ac 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -37,7 +37,6 @@  #include <linux/idr.h>  #include <linux/sunrpc/svc_xprt.h> -#include <linux/nfsd/nfsfh.h>  #include "nfsfh.h"  typedef struct { @@ -123,7 +122,7 @@ static inline struct nfs4_delegation *delegstateid(struct nfs4_stid *s)  /* Maximum number of operations per session compound */  #define NFSD_MAX_OPS_PER_COMPOUND	16  /* Maximum  session per slot cache size */ -#define NFSD_SLOT_CACHE_SIZE		1024 +#define NFSD_SLOT_CACHE_SIZE		2048  /* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */  #define NFSD_CACHE_SIZE_SLOTS_PER_SESSION	32  #define NFSD_MAX_MEM_PER_SESSION  \ @@ -464,8 +463,6 @@ extern void nfs4_release_reclaim(struct nfsd_net *);  extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir,  							struct nfsd_net *nn);  extern __be32 nfs4_check_open_reclaim(clientid_t *clid, bool sessions, struct nfsd_net *nn); -extern void nfs4_free_openowner(struct nfs4_openowner *); -extern void nfs4_free_lockowner(struct nfs4_lockowner *);  extern int set_callback_cred(void);  extern void nfsd4_init_callback(struct nfsd4_callback *);  extern void nfsd4_probe_callback(struct nfs4_client *clp); diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c index 6d4521feb6e..cd90878a76a 100644 --- a/fs/nfsd/stats.c +++ b/fs/nfsd/stats.c @@ -24,7 +24,6 @@  #include <linux/seq_file.h>  #include <linux/module.h>  #include <linux/sunrpc/stats.h> -#include <linux/nfsd/stats.h>  #include <net/net_namespace.h>  #include "nfsd.h" diff --git a/fs/nfsd/stats.h b/fs/nfsd/stats.h new file mode 100644 index 00000000000..a5c944b771c --- /dev/null +++ b/fs/nfsd/stats.h @@ -0,0 +1,43 @@ +/* + * Statistics for NFS server. + * + * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> + */ +#ifndef _NFSD_STATS_H +#define _NFSD_STATS_H + +#include <uapi/linux/nfsd/stats.h> + + +struct nfsd_stats { +	unsigned int	rchits;		/* repcache hits */ +	unsigned int	rcmisses;	/* repcache hits */ +	unsigned int	rcnocache;	/* uncached reqs */ +	unsigned int	fh_stale;	/* FH stale error */ +	unsigned int	fh_lookup;	/* dentry cached */ +	unsigned int	fh_anon;	/* anon file dentry returned */ +	unsigned int	fh_nocache_dir;	/* filehandle not found in dcache */ +	unsigned int	fh_nocache_nondir;	/* filehandle not found in dcache */ +	unsigned int	io_read;	/* bytes returned to read requests */ +	unsigned int	io_write;	/* bytes passed in write requests */ +	unsigned int	th_cnt;		/* number of available threads */ +	unsigned int	th_usage[10];	/* number of ticks during which n perdeciles +					 * of available threads were in use */ +	unsigned int	th_fullcnt;	/* number of times last free thread was used */ +	unsigned int	ra_size;	/* size of ra cache */ +	unsigned int	ra_depth[11];	/* number of times ra entry was found that deep +					 * in the cache (10percentiles). [10] = not found */ +#ifdef CONFIG_NFSD_V4 +	unsigned int	nfs4_opcount[LAST_NFS4_OP + 1];	/* count of individual nfsv4 operations */ +#endif + +}; + + +extern struct nfsd_stats	nfsdstats; +extern struct svc_stat		nfsd_svcstats; + +void	nfsd_stat_init(void); +void	nfsd_stat_shutdown(void); + +#endif /* _NFSD_STATS_H */ diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index c827acb0e94..140c496f612 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -207,7 +207,12 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,  				goto out_nfserr;  		}  	} else { -		fh_lock(fhp); +		/* +		 * In the nfsd4_open() case, this may be held across +		 * subsequent open and delegation acquisition which may +		 * need to take the child's i_mutex: +		 */ +		fh_lock_nested(fhp, I_MUTEX_PARENT);  		dentry = lookup_one_len(name, dparent, len);  		host_err = PTR_ERR(dentry);  		if (IS_ERR(dentry)) @@ -273,13 +278,6 @@ out:  	return err;  } -static int nfsd_break_lease(struct inode *inode) -{ -	if (!S_ISREG(inode->i_mode)) -		return 0; -	return break_lease(inode, O_WRONLY | O_NONBLOCK); -} -  /*   * Commit metadata changes to stable storage.   */ @@ -298,41 +296,12 @@ commit_metadata(struct svc_fh *fhp)  }  /* - * Set various file attributes. - * N.B. After this call fhp needs an fh_put + * Go over the attributes and take care of the small differences between + * NFS semantics and what Linux expects.   */ -__be32 -nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, -	     int check_guard, time_t guardtime) +static void +nfsd_sanitize_attrs(struct inode *inode, struct iattr *iap)  { -	struct dentry	*dentry; -	struct inode	*inode; -	int		accmode = NFSD_MAY_SATTR; -	umode_t		ftype = 0; -	__be32		err; -	int		host_err; -	int		size_change = 0; - -	if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE)) -		accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE; -	if (iap->ia_valid & ATTR_SIZE) -		ftype = S_IFREG; - -	/* Get inode */ -	err = fh_verify(rqstp, fhp, ftype, accmode); -	if (err) -		goto out; - -	dentry = fhp->fh_dentry; -	inode = dentry->d_inode; - -	/* Ignore any mode updates on symlinks */ -	if (S_ISLNK(inode->i_mode)) -		iap->ia_valid &= ~ATTR_MODE; - -	if (!iap->ia_valid) -		goto out; -  	/*  	 * NFSv2 does not differentiate between "set-[ac]time-to-now"  	 * which only requires access, and "set-[ac]time-to-X" which @@ -342,8 +311,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,  	 * convert to "set to now" instead of "set to explicit time"  	 *  	 * We only call inode_change_ok as the last test as technically -	 * it is not an interface that we should be using.  It is only -	 * valid if the filesystem does not define it's own i_op->setattr. +	 * it is not an interface that we should be using.  	 */  #define BOTH_TIME_SET (ATTR_ATIME_SET | ATTR_MTIME_SET)  #define	MAX_TOUCH_TIME_ERROR (30*60) @@ -369,30 +337,6 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,  			iap->ia_valid &= ~BOTH_TIME_SET;  		}  	} -	     -	/* -	 * The size case is special. -	 * It changes the file as well as the attributes. -	 */ -	if (iap->ia_valid & ATTR_SIZE) { -		if (iap->ia_size < inode->i_size) { -			err = nfsd_permission(rqstp, fhp->fh_export, dentry, -					NFSD_MAY_TRUNC|NFSD_MAY_OWNER_OVERRIDE); -			if (err) -				goto out; -		} - -		host_err = get_write_access(inode); -		if (host_err) -			goto out_nfserr; - -		size_change = 1; -		host_err = locks_verify_truncate(inode, NULL, iap->ia_size); -		if (host_err) { -			put_write_access(inode); -			goto out_nfserr; -		} -	}  	/* sanitize the mode change */  	if (iap->ia_valid & ATTR_MODE) { @@ -402,8 +346,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,  	/* Revoke setuid/setgid on chown */  	if (!S_ISDIR(inode->i_mode) && -	    (((iap->ia_valid & ATTR_UID) && !uid_eq(iap->ia_uid, inode->i_uid)) || -	     ((iap->ia_valid & ATTR_GID) && !gid_eq(iap->ia_gid, inode->i_gid)))) { +	    ((iap->ia_valid & ATTR_UID) || (iap->ia_valid & ATTR_GID))) {  		iap->ia_valid |= ATTR_KILL_PRIV;  		if (iap->ia_valid & ATTR_MODE) {  			/* we're setting mode too, just clear the s*id bits */ @@ -415,186 +358,118 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,  			iap->ia_valid |= (ATTR_KILL_SUID | ATTR_KILL_SGID);  		}  	} - -	/* Change the attributes. */ - -	iap->ia_valid |= ATTR_CTIME; - -	err = nfserr_notsync; -	if (!check_guard || guardtime == inode->i_ctime.tv_sec) { -		host_err = nfsd_break_lease(inode); -		if (host_err) -			goto out_nfserr; -		fh_lock(fhp); - -		host_err = notify_change(dentry, iap); -		err = nfserrno(host_err); -		fh_unlock(fhp); -	} -	if (size_change) -		put_write_access(inode); -	if (!err) -		commit_metadata(fhp); -out: -	return err; - -out_nfserr: -	err = nfserrno(host_err); -	goto out;  } -#if defined(CONFIG_NFSD_V2_ACL) || \ -    defined(CONFIG_NFSD_V3_ACL) || \ -    defined(CONFIG_NFSD_V4) -static ssize_t nfsd_getxattr(struct dentry *dentry, char *key, void **buf) +static __be32 +nfsd_get_write_access(struct svc_rqst *rqstp, struct svc_fh *fhp, +		struct iattr *iap)  { -	ssize_t buflen; -	ssize_t ret; - -	buflen = vfs_getxattr(dentry, key, NULL, 0); -	if (buflen <= 0) -		return buflen; +	struct inode *inode = fhp->fh_dentry->d_inode; +	int host_err; -	*buf = kmalloc(buflen, GFP_KERNEL); -	if (!*buf) -		return -ENOMEM; +	if (iap->ia_size < inode->i_size) { +		__be32 err; -	ret = vfs_getxattr(dentry, key, *buf, buflen); -	if (ret < 0) -		kfree(*buf); -	return ret; -} -#endif +		err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry, +				NFSD_MAY_TRUNC | NFSD_MAY_OWNER_OVERRIDE); +		if (err) +			return err; +	} -#if defined(CONFIG_NFSD_V4) -static int -set_nfsv4_acl_one(struct dentry *dentry, struct posix_acl *pacl, char *key) -{ -	int len; -	size_t buflen; -	char *buf = NULL; -	int error = 0; - -	buflen = posix_acl_xattr_size(pacl->a_count); -	buf = kmalloc(buflen, GFP_KERNEL); -	error = -ENOMEM; -	if (buf == NULL) -		goto out; +	host_err = get_write_access(inode); +	if (host_err) +		goto out_nfserrno; -	len = posix_acl_to_xattr(&init_user_ns, pacl, buf, buflen); -	if (len < 0) { -		error = len; -		goto out; -	} +	host_err = locks_verify_truncate(inode, NULL, iap->ia_size); +	if (host_err) +		goto out_put_write_access; +	return 0; -	error = vfs_setxattr(dentry, key, buf, len, 0); -out: -	kfree(buf); -	return error; +out_put_write_access: +	put_write_access(inode); +out_nfserrno: +	return nfserrno(host_err);  } +/* + * Set various file attributes.  After this call fhp needs an fh_put. + */  __be32 -nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp, -    struct nfs4_acl *acl) +nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, +	     int check_guard, time_t guardtime)  { -	__be32 error; -	int host_error; -	struct dentry *dentry; -	struct inode *inode; -	struct posix_acl *pacl = NULL, *dpacl = NULL; -	unsigned int flags = 0; +	struct dentry	*dentry; +	struct inode	*inode; +	int		accmode = NFSD_MAY_SATTR; +	umode_t		ftype = 0; +	__be32		err; +	int		host_err; +	bool		get_write_count; +	int		size_change = 0; + +	if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE)) +		accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE; +	if (iap->ia_valid & ATTR_SIZE) +		ftype = S_IFREG; + +	/* Callers that do fh_verify should do the fh_want_write: */ +	get_write_count = !fhp->fh_dentry;  	/* Get inode */ -	error = fh_verify(rqstp, fhp, 0, NFSD_MAY_SATTR); -	if (error) -		return error; +	err = fh_verify(rqstp, fhp, ftype, accmode); +	if (err) +		goto out; +	if (get_write_count) { +		host_err = fh_want_write(fhp); +		if (host_err) +			return nfserrno(host_err); +	}  	dentry = fhp->fh_dentry;  	inode = dentry->d_inode; -	if (S_ISDIR(inode->i_mode)) -		flags = NFS4_ACL_DIR; -	host_error = nfs4_acl_nfsv4_to_posix(acl, &pacl, &dpacl, flags); -	if (host_error == -EINVAL) { -		return nfserr_attrnotsupp; -	} else if (host_error < 0) -		goto out_nfserr; +	/* Ignore any mode updates on symlinks */ +	if (S_ISLNK(inode->i_mode)) +		iap->ia_valid &= ~ATTR_MODE; -	host_error = set_nfsv4_acl_one(dentry, pacl, POSIX_ACL_XATTR_ACCESS); -	if (host_error < 0) -		goto out_release; +	if (!iap->ia_valid) +		goto out; -	if (S_ISDIR(inode->i_mode)) -		host_error = set_nfsv4_acl_one(dentry, dpacl, POSIX_ACL_XATTR_DEFAULT); +	nfsd_sanitize_attrs(inode, iap); -out_release: -	posix_acl_release(pacl); -	posix_acl_release(dpacl); -out_nfserr: -	if (host_error == -EOPNOTSUPP) -		return nfserr_attrnotsupp; -	else -		return nfserrno(host_error); -} +	/* +	 * The size case is special, it changes the file in addition to the +	 * attributes. +	 */ +	if (iap->ia_valid & ATTR_SIZE) { +		err = nfsd_get_write_access(rqstp, fhp, iap); +		if (err) +			goto out; +		size_change = 1; +	} -static struct posix_acl * -_get_posix_acl(struct dentry *dentry, char *key) -{ -	void *buf = NULL; -	struct posix_acl *pacl = NULL; -	int buflen; - -	buflen = nfsd_getxattr(dentry, key, &buf); -	if (!buflen) -		buflen = -ENODATA; -	if (buflen <= 0) -		return ERR_PTR(buflen); - -	pacl = posix_acl_from_xattr(&init_user_ns, buf, buflen); -	kfree(buf); -	return pacl; -} +	iap->ia_valid |= ATTR_CTIME; -int -nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_acl **acl) -{ -	struct inode *inode = dentry->d_inode; -	int error = 0; -	struct posix_acl *pacl = NULL, *dpacl = NULL; -	unsigned int flags = 0; - -	pacl = _get_posix_acl(dentry, POSIX_ACL_XATTR_ACCESS); -	if (IS_ERR(pacl) && PTR_ERR(pacl) == -ENODATA) -		pacl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); -	if (IS_ERR(pacl)) { -		error = PTR_ERR(pacl); -		pacl = NULL; -		goto out; +	if (check_guard && guardtime != inode->i_ctime.tv_sec) { +		err = nfserr_notsync; +		goto out_put_write_access;  	} -	if (S_ISDIR(inode->i_mode)) { -		dpacl = _get_posix_acl(dentry, POSIX_ACL_XATTR_DEFAULT); -		if (IS_ERR(dpacl) && PTR_ERR(dpacl) == -ENODATA) -			dpacl = NULL; -		else if (IS_ERR(dpacl)) { -			error = PTR_ERR(dpacl); -			dpacl = NULL; -			goto out; -		} -		flags = NFS4_ACL_DIR; -	} +	fh_lock(fhp); +	host_err = notify_change(dentry, iap, NULL); +	fh_unlock(fhp); +	err = nfserrno(host_err); -	*acl = nfs4_acl_posix_to_nfsv4(pacl, dpacl, flags); -	if (IS_ERR(*acl)) { -		error = PTR_ERR(*acl); -		*acl = NULL; -	} - out: -	posix_acl_release(pacl); -	posix_acl_release(dpacl); -	return error; +out_put_write_access: +	if (size_change) +		put_write_access(inode); +	if (!err) +		commit_metadata(fhp); +out: +	return err;  } +#if defined(CONFIG_NFSD_V4)  /*   * NFS junction information is stored in an extended attribute.   */ @@ -945,51 +820,54 @@ static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe,  	return __splice_from_pipe(pipe, sd, nfsd_splice_actor);  } -static __be32 -nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, -              loff_t offset, struct kvec *vec, int vlen, unsigned long *count) +__be32 nfsd_finish_read(struct file *file, unsigned long *count, int host_err)  { -	mm_segment_t	oldfs; -	__be32		err; -	int		host_err; - -	err = nfserr_perm; - -	if (file->f_op->splice_read && rqstp->rq_splice_ok) { -		struct splice_desc sd = { -			.len		= 0, -			.total_len	= *count, -			.pos		= offset, -			.u.data		= rqstp, -		}; - -		rqstp->rq_next_page = rqstp->rq_respages + 1; -		host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor); -	} else { -		oldfs = get_fs(); -		set_fs(KERNEL_DS); -		host_err = vfs_readv(file, (struct iovec __user *)vec, vlen, &offset); -		set_fs(oldfs); -	} -  	if (host_err >= 0) {  		nfsdstats.io_read += host_err;  		*count = host_err; -		err = 0;  		fsnotify_access(file); +		return 0;  	} else  -		err = nfserrno(host_err); -	return err; +		return nfserrno(host_err);  } -static void kill_suid(struct dentry *dentry) +int nfsd_splice_read(struct svc_rqst *rqstp, +		     struct file *file, loff_t offset, unsigned long *count)  { -	struct iattr	ia; -	ia.ia_valid = ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV; +	struct splice_desc sd = { +		.len		= 0, +		.total_len	= *count, +		.pos		= offset, +		.u.data		= rqstp, +	}; +	int host_err; -	mutex_lock(&dentry->d_inode->i_mutex); -	notify_change(dentry, &ia); -	mutex_unlock(&dentry->d_inode->i_mutex); +	rqstp->rq_next_page = rqstp->rq_respages + 1; +	host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor); +	return nfsd_finish_read(file, count, host_err); +} + +int nfsd_readv(struct file *file, loff_t offset, struct kvec *vec, int vlen, +		unsigned long *count) +{ +	mm_segment_t oldfs; +	int host_err; + +	oldfs = get_fs(); +	set_fs(KERNEL_DS); +	host_err = vfs_readv(file, (struct iovec __user *)vec, vlen, &offset); +	set_fs(oldfs); +	return nfsd_finish_read(file, count, host_err); +} + +static __be32 +nfsd_vfs_read(struct svc_rqst *rqstp, struct file *file, +	      loff_t offset, struct kvec *vec, int vlen, unsigned long *count) +{ +	if (file->f_op->splice_read && rqstp->rq_splice_ok) +		return nfsd_splice_read(rqstp, file, offset, count); +	else +		return nfsd_readv(file, offset, vec, vlen, count);  }  /* @@ -1043,6 +921,16 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,  	int			stable = *stablep;  	int			use_wgather;  	loff_t			pos = offset; +	unsigned int		pflags = current->flags; + +	if (rqstp->rq_local) +		/* +		 * We want less throttling in balance_dirty_pages() +		 * and shrink_inactive_list() so that nfs to +		 * localhost doesn't cause nfsd to lock up due to all +		 * the client's dirty pages or its congested queue. +		 */ +		current->flags |= PF_LESS_THROTTLE;  	dentry = file->f_path.dentry;  	inode = dentry->d_inode; @@ -1063,10 +951,6 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,  	nfsdstats.io_write += host_err;  	fsnotify_modify(file); -	/* clear setuid/setgid flag after write */ -	if (inode->i_mode & (S_ISUID | S_ISGID)) -		kill_suid(dentry); -  	if (stable) {  		if (use_wgather)  			host_err = wait_for_concurrent_writes(file); @@ -1080,36 +964,33 @@ out_nfserr:  		err = 0;  	else  		err = nfserrno(host_err); +	if (rqstp->rq_local) +		tsk_restore_flags(current, pflags, PF_LESS_THROTTLE);  	return err;  } -/* - * Read data from a file. count must contain the requested read count - * on entry. On return, *count contains the number of bytes actually read. - * N.B. After this call fhp needs an fh_put - */ -__be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, -	loff_t offset, struct kvec *vec, int vlen, unsigned long *count) +__be32 nfsd_get_tmp_read_open(struct svc_rqst *rqstp, struct svc_fh *fhp, +		struct file **file, struct raparms **ra)  { -	struct file *file;  	struct inode *inode; -	struct raparms	*ra;  	__be32 err; -	err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file); +	err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, file);  	if (err)  		return err; -	inode = file_inode(file); +	inode = file_inode(*file);  	/* Get readahead parameters */ -	ra = nfsd_get_raparms(inode->i_sb->s_dev, inode->i_ino); - -	if (ra && ra->p_set) -		file->f_ra = ra->p_ra; +	*ra = nfsd_get_raparms(inode->i_sb->s_dev, inode->i_ino); -	err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count); +	if (*ra && (*ra)->p_set) +		(*file)->f_ra = (*ra)->p_ra; +	return nfs_ok; +} +void nfsd_put_tmp_read_open(struct file *file, struct raparms *ra) +{  	/* Write back readahead params */  	if (ra) {  		struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex]; @@ -1119,28 +1000,29 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,  		ra->p_count--;  		spin_unlock(&rab->pb_lock);  	} -  	nfsd_close(file); -	return err;  } -/* As above, but use the provided file descriptor. */ -__be32 -nfsd_read_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, -		loff_t offset, struct kvec *vec, int vlen, -		unsigned long *count) +/* + * Read data from a file. count must contain the requested read count + * on entry. On return, *count contains the number of bytes actually read. + * N.B. After this call fhp needs an fh_put + */ +__be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, +	loff_t offset, struct kvec *vec, int vlen, unsigned long *count)  { -	__be32		err; +	struct file *file; +	struct raparms	*ra; +	__be32 err; + +	err = nfsd_get_tmp_read_open(rqstp, fhp, &file, &ra); +	if (err) +		return err; + +	err = nfsd_vfs_read(rqstp, file, offset, vec, vlen, count); + +	nfsd_put_tmp_read_open(file, ra); -	if (file) { -		err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry, -				NFSD_MAY_READ|NFSD_MAY_OWNER_OVERRIDE); -		if (err) -			goto out; -		err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count); -	} else /* Note file may still be NULL in NFSv4 special stateid case: */ -		err = nfsd_read(rqstp, fhp, offset, vec, vlen, count); -out:  	return err;  } @@ -1317,9 +1199,8 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,  		if (!fhp->fh_locked) {  			/* not actually possible */  			printk(KERN_ERR -				"nfsd_create: parent %s/%s not locked!\n", -				dentry->d_parent->d_name.name, -				dentry->d_name.name); +				"nfsd_create: parent %pd2 not locked!\n", +				dentry);  			err = nfserr_io;  			goto out;  		} @@ -1329,8 +1210,8 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,  	 */  	err = nfserr_exist;  	if (dchild->d_inode) { -		dprintk("nfsd_create: dentry %s/%s not negative!\n", -			dentry->d_name.name, dchild->d_name.name); +		dprintk("nfsd_create: dentry %pd/%pd not negative!\n", +			dentry, dchild);  		goto out;   	} @@ -1732,12 +1613,7 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,  	err = nfserr_noent;  	if (!dold->d_inode)  		goto out_dput; -	host_err = nfsd_break_lease(dold->d_inode); -	if (host_err) { -		err = nfserrno(host_err); -		goto out_dput; -	} -	host_err = vfs_link(dold, dirp, dnew); +	host_err = vfs_link(dold, dirp, dnew, NULL);  	if (!host_err) {  		err = nfserrno(commit_metadata(ffhp));  		if (!err) @@ -1830,15 +1706,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,  	if (ffhp->fh_export->ex_path.dentry != tfhp->fh_export->ex_path.dentry)  		goto out_dput_new; -	host_err = nfsd_break_lease(odentry->d_inode); -	if (host_err) -		goto out_dput_new; -	if (ndentry->d_inode) { -		host_err = nfsd_break_lease(ndentry->d_inode); -		if (host_err) -			goto out_dput_new; -	} -	host_err = vfs_rename(fdir, odentry, tdir, ndentry); +	host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL, 0);  	if (!host_err) {  		host_err = commit_metadata(tfhp);  		if (!host_err) @@ -1850,10 +1718,10 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,  	dput(odentry);   out_nfserr:  	err = nfserrno(host_err); - -	/* we cannot reply on fh_unlock on the two filehandles, +	/* +	 * We cannot rely on fh_unlock on the two filehandles,  	 * as that would do the wrong thing if the two directories -	 * were the same, so again we do it by hand +	 * were the same, so again we do it by hand.  	 */  	fill_post_wcc(ffhp);  	fill_post_wcc(tfhp); @@ -1907,16 +1775,12 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,  	if (!type)  		type = rdentry->d_inode->i_mode & S_IFMT; -	host_err = nfsd_break_lease(rdentry->d_inode); -	if (host_err) -		goto out_put;  	if (type != S_IFDIR) -		host_err = vfs_unlink(dirp, rdentry); +		host_err = vfs_unlink(dirp, rdentry, NULL);  	else  		host_err = vfs_rmdir(dirp, rdentry);  	if (!host_err)  		host_err = commit_metadata(fhp); -out_put:  	dput(rdentry);  out_nfserr: @@ -2256,93 +2120,3 @@ out_nomem:  	nfsd_racache_shutdown();  	return -ENOMEM;  } - -#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) -struct posix_acl * -nfsd_get_posix_acl(struct svc_fh *fhp, int type) -{ -	struct inode *inode = fhp->fh_dentry->d_inode; -	char *name; -	void *value = NULL; -	ssize_t size; -	struct posix_acl *acl; - -	if (!IS_POSIXACL(inode)) -		return ERR_PTR(-EOPNOTSUPP); - -	switch (type) { -	case ACL_TYPE_ACCESS: -		name = POSIX_ACL_XATTR_ACCESS; -		break; -	case ACL_TYPE_DEFAULT: -		name = POSIX_ACL_XATTR_DEFAULT; -		break; -	default: -		return ERR_PTR(-EOPNOTSUPP); -	} - -	size = nfsd_getxattr(fhp->fh_dentry, name, &value); -	if (size < 0) -		return ERR_PTR(size); - -	acl = posix_acl_from_xattr(&init_user_ns, value, size); -	kfree(value); -	return acl; -} - -int -nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl) -{ -	struct inode *inode = fhp->fh_dentry->d_inode; -	char *name; -	void *value = NULL; -	size_t size; -	int error; - -	if (!IS_POSIXACL(inode) || -	    !inode->i_op->setxattr || !inode->i_op->removexattr) -		return -EOPNOTSUPP; -	switch(type) { -		case ACL_TYPE_ACCESS: -			name = POSIX_ACL_XATTR_ACCESS; -			break; -		case ACL_TYPE_DEFAULT: -			name = POSIX_ACL_XATTR_DEFAULT; -			break; -		default: -			return -EOPNOTSUPP; -	} - -	if (acl && acl->a_count) { -		size = posix_acl_xattr_size(acl->a_count); -		value = kmalloc(size, GFP_KERNEL); -		if (!value) -			return -ENOMEM; -		error = posix_acl_to_xattr(&init_user_ns, acl, value, size); -		if (error < 0) -			goto getout; -		size = error; -	} else -		size = 0; - -	error = fh_want_write(fhp); -	if (error) -		goto getout; -	if (size) -		error = vfs_setxattr(fhp->fh_dentry, name, value, size, 0); -	else { -		if (!S_ISDIR(inode->i_mode) && type == ACL_TYPE_DEFAULT) -			error = 0; -		else { -			error = vfs_removexattr(fhp->fh_dentry, name); -			if (error == -ENODATA) -				error = 0; -		} -	} -	fh_drop_write(fhp); - -getout: -	kfree(value); -	return error; -} -#endif  /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */ diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index a4be2e38967..91b6ae3f658 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -52,9 +52,6 @@ __be32		nfsd_setattr(struct svc_rqst *, struct svc_fh *,  				struct iattr *, int, time_t);  int nfsd_mountpoint(struct dentry *, struct svc_export *);  #ifdef CONFIG_NFSD_V4 -__be32          nfsd4_set_nfs4_acl(struct svc_rqst *, struct svc_fh *, -                    struct nfs4_acl *); -int             nfsd4_get_nfs4_acl(struct svc_rqst *, struct dentry *, struct nfs4_acl **);  __be32          nfsd4_set_nfs4_label(struct svc_rqst *, struct svc_fh *,  		    struct xdr_netobj *);  #endif /* CONFIG_NFSD_V4 */ @@ -73,10 +70,16 @@ __be32		nfsd_commit(struct svc_rqst *, struct svc_fh *,  __be32		nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t,  				int, struct file **);  void		nfsd_close(struct file *); +struct raparms; +__be32		nfsd_get_tmp_read_open(struct svc_rqst *, struct svc_fh *, +				struct file **, struct raparms **); +void		nfsd_put_tmp_read_open(struct file *, struct raparms *); +int		nfsd_splice_read(struct svc_rqst *, +				struct file *, loff_t, unsigned long *); +int		nfsd_readv(struct file *, loff_t, struct kvec *, int, +				unsigned long *);  __be32 		nfsd_read(struct svc_rqst *, struct svc_fh *,  				loff_t, struct kvec *, int, unsigned long *); -__be32 		nfsd_read_file(struct svc_rqst *, struct svc_fh *, struct file *, -				loff_t, struct kvec *, int, unsigned long *);  __be32 		nfsd_write(struct svc_rqst *, struct svc_fh *,struct file *,  				loff_t, struct kvec *,int, unsigned long *, int *);  __be32		nfsd_readlink(struct svc_rqst *, struct svc_fh *, @@ -89,8 +92,6 @@ __be32		nfsd_link(struct svc_rqst *, struct svc_fh *,  __be32		nfsd_rename(struct svc_rqst *,  				struct svc_fh *, char *, int,  				struct svc_fh *, char *, int); -__be32		nfsd_remove(struct svc_rqst *, -				struct svc_fh *, char *, int);  __be32		nfsd_unlink(struct svc_rqst *, struct svc_fh *, int type,  				char *name, int len);  __be32		nfsd_readdir(struct svc_rqst *, struct svc_fh *, @@ -101,11 +102,6 @@ __be32		nfsd_statfs(struct svc_rqst *, struct svc_fh *,  __be32		nfsd_permission(struct svc_rqst *, struct svc_export *,  				struct dentry *, int); -#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) -struct posix_acl *nfsd_get_posix_acl(struct svc_fh *, int); -int nfsd_set_posix_acl(struct svc_fh *, int, struct posix_acl *); -#endif -  static inline int fh_want_write(struct svc_fh *fh)  {  	int ret = mnt_want_write(fh->fh_export->ex_path.mnt); diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h index b6d5542a4ac..335e04aaf7d 100644 --- a/fs/nfsd/xdr3.h +++ b/fs/nfsd/xdr3.h @@ -174,6 +174,9 @@ struct nfsd3_linkres {  struct nfsd3_readdirres {  	__be32			status;  	struct svc_fh		fh; +	/* Just to save kmalloc on every readdirplus entry (svc_fh is a +	 * little large for the stack): */ +	struct svc_fh		scratch;  	int			count;  	__be32			verf[2]; diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index b3ed6446ed8..18cbb6d9c8a 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -58,7 +58,7 @@ struct nfsd4_compound_state {  	/* For sessions DRC */  	struct nfsd4_session	*session;  	struct nfsd4_slot	*slot; -	__be32			*datap; +	int			data_offset;  	size_t			iovlen;  	u32			minorversion;  	__be32			status; @@ -228,7 +228,7 @@ struct nfsd4_open {  	u32		op_create;     	    /* request */  	u32		op_createmode;      /* request */  	u32		op_bmval[3];        /* request */ -	struct iattr	iattr;              /* UNCHECKED4, GUARDED4, EXCLUSIVE4_1 */ +	struct iattr	op_iattr;           /* UNCHECKED4, GUARDED4, EXCLUSIVE4_1 */  	nfs4_verifier	op_verf __attribute__((aligned(32)));  					    /* EXCLUSIVE4 */  	clientid_t	op_clientid;        /* request */ @@ -250,7 +250,6 @@ struct nfsd4_open {  	struct nfs4_acl *op_acl;  	struct xdr_netobj op_label;  }; -#define op_iattr	iattr  struct nfsd4_open_confirm {  	stateid_t	oc_req_stateid		/* request */; @@ -288,9 +287,8 @@ struct nfsd4_readdir {  	struct svc_fh * rd_fhp;             /* response */  	struct readdir_cd	common; -	__be32 *		buffer; -	int			buflen; -	__be32 *		offset; +	struct xdr_stream	*xdr; +	int			cookie_offset;  };  struct nfsd4_release_lockowner { @@ -374,7 +372,6 @@ struct nfsd4_test_stateid {  struct nfsd4_free_stateid {  	stateid_t	fr_stateid;         /* request */ -	__be32		fr_status;          /* response */  };  /* also used for NVERIFY */ @@ -508,9 +505,7 @@ struct nfsd4_compoundargs {  struct nfsd4_compoundres {  	/* scratch variables for XDR encode */ -	__be32 *			p; -	__be32 *			end; -	struct xdr_buf *		xbuf; +	struct xdr_stream		xdr;  	struct svc_rqst *		rqstp;  	u32				taglen; @@ -540,6 +535,9 @@ static inline bool nfsd4_last_compound_op(struct svc_rqst *rqstp)  	return argp->opcnt == resp->opcnt;  } +int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op); +void warn_on_nonidempotent_op(struct nfsd4_op *op); +  #define NFS4_SVC_XDRSIZE		sizeof(struct nfsd4_compoundargs)  static inline void @@ -565,10 +563,11 @@ int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *,  		struct nfsd4_compoundres *);  __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *, u32);  void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *); -void nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op); -__be32 nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, -		       struct dentry *dentry, __be32 **buffer, int countp, -		       u32 *bmval, struct svc_rqst *, int ignore_crossmnt); +void nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op); +__be32 nfsd4_encode_fattr_to_buf(__be32 **p, int words, +		struct svc_fh *fhp, struct svc_export *exp, +		struct dentry *dentry, +		u32 *bmval, struct svc_rqst *, int ignore_crossmnt);  extern __be32 nfsd4_setclientid(struct svc_rqst *rqstp,  		struct nfsd4_compound_state *,  		struct nfsd4_setclientid *setclid); @@ -576,8 +575,6 @@ extern __be32 nfsd4_setclientid_confirm(struct svc_rqst *rqstp,  		struct nfsd4_compound_state *,  		struct nfsd4_setclientid_confirm *setclientid_confirm);  extern void nfsd4_store_cache_entry(struct nfsd4_compoundres *resp); -extern __be32 nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp, -		struct nfsd4_sequence *seq);  extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp,  		struct nfsd4_compound_state *, struct nfsd4_exchange_id *);  extern __be32 nfsd4_backchannel_ctl(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_backchannel_ctl *);  | 
