diff options
Diffstat (limited to 'fs/nfsd')
38 files changed, 4258 insertions, 2868 deletions
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index 8df1ea4a6ff..f994e750e0d 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -65,8 +65,8 @@ config NFSD_V3_ACL If unsure, say N. config NFSD_V4 - bool "NFS server support for NFS version 4 (EXPERIMENTAL)" - depends on NFSD && PROC_FS && EXPERIMENTAL + bool "NFS server support for NFS version 4" + depends on NFSD && PROC_FS select NFSD_V3 select FS_POSIX_ACL select SUNRPC_GSS @@ -81,6 +81,22 @@ config NFSD_V4 If unsure, say N. +config NFSD_V4_SECURITY_LABEL + bool "Provide Security Label support for NFSv4 server" + depends on NFSD_V4 && SECURITY + help + + Say Y here if you want enable fine-grained security label attribute + support for NFS version 4. Security labels allow security modules like + SELinux and Smack to label files to facilitate enforcement of their policies. + Without this an NFSv4 mount will have the same label on each file. + + If you do not wish to enable fine-grained security labels SELinux or + Smack policies on NFSv4 files, say N. + + WARNING: there is still a chance of backwards-incompatible protocol changes. + For now we recommend "Y" only for developers and testers. + config NFSD_FAULT_INJECTION bool "NFS server manual fault injection" depends on NFSD_V4 && DEBUG_KERNEL diff --git a/fs/nfsd/acl.h b/fs/nfsd/acl.h index 34e5c40af5e..a986ceb6fd0 100644 --- a/fs/nfsd/acl.h +++ b/fs/nfsd/acl.h @@ -35,25 +35,25 @@ #ifndef LINUX_NFS4_ACL_H #define LINUX_NFS4_ACL_H -#include <linux/posix_acl.h> +struct nfs4_acl; +struct svc_fh; +struct svc_rqst; -/* Maximum ACL we'll accept from client; chosen (somewhat arbitrarily) to - * fit in a page: */ -#define NFS4_ACL_MAX 170 +/* + * Maximum ACL we'll accept from a client; chosen (somewhat + * arbitrarily) so that kmalloc'ing the ACL shouldn't require a + * high-order allocation. This allows 204 ACEs on x86_64: + */ +#define NFS4_ACL_MAX ((PAGE_SIZE - sizeof(struct nfs4_acl)) \ + / sizeof(struct nfs4_ace)) struct nfs4_acl *nfs4_acl_new(int); int nfs4_acl_get_whotype(char *, u32); -int nfs4_acl_write_who(int who, char *p); -int nfs4_acl_permission(struct nfs4_acl *acl, uid_t owner, gid_t group, - uid_t who, u32 mask); - -#define NFS4_ACL_TYPE_DEFAULT 0x01 -#define NFS4_ACL_DIR 0x02 -#define NFS4_ACL_OWNER 0x04 +__be32 nfs4_acl_write_who(struct xdr_stream *xdr, int who); -struct nfs4_acl *nfs4_acl_posix_to_nfsv4(struct posix_acl *, - struct posix_acl *, unsigned int flags); -int nfs4_acl_nfsv4_to_posix(struct nfs4_acl *, struct posix_acl **, - struct posix_acl **, unsigned int flags); +int nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, + struct nfs4_acl **acl); +__be32 nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct nfs4_acl *acl); #endif /* LINUX_NFS4_ACL_H */ diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index 34a10d78b83..72f44823adb 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c @@ -1,7 +1,6 @@ /* Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> */ #include <linux/sched.h> -#include <linux/user_namespace.h> #include "nfsd.h" #include "auth.h" @@ -25,7 +24,6 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) struct cred *new; int i; int flags = nfsexp_flags(rqstp, exp); - int ret; validate_process_creds(); @@ -47,9 +45,9 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) if (!gi) goto oom; } else if (flags & NFSEXP_ROOTSQUASH) { - if (!new->fsuid) + if (uid_eq(new->fsuid, GLOBAL_ROOT_UID)) new->fsuid = exp->ex_anon_uid; - if (!new->fsgid) + if (gid_eq(new->fsgid, GLOBAL_ROOT_GID)) new->fsgid = exp->ex_anon_gid; gi = groups_alloc(rqgi->ngroups); @@ -58,7 +56,7 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) for (i = 0; i < rqgi->ngroups; i++) { if (gid_eq(GLOBAL_ROOT_GID, GROUP_AT(rqgi, i))) - GROUP_AT(gi, i) = make_kgid(&init_user_ns, exp->ex_anon_gid); + GROUP_AT(gi, i) = exp->ex_anon_gid; else GROUP_AT(gi, i) = GROUP_AT(rqgi, i); } @@ -66,17 +64,15 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) gi = get_group_info(rqgi); } - if (new->fsuid == (uid_t) -1) + if (uid_eq(new->fsuid, INVALID_UID)) new->fsuid = exp->ex_anon_uid; - if (new->fsgid == (gid_t) -1) + if (gid_eq(new->fsgid, INVALID_GID)) new->fsgid = exp->ex_anon_gid; - ret = set_groups(new, gi); + set_groups(new, gi); put_group_info(gi); - if (ret < 0) - goto error; - if (new->fsuid) + if (!uid_eq(new->fsuid, GLOBAL_ROOT_UID)) new->cap_effective = cap_drop_nfsd_set(new->cap_effective); else new->cap_effective = cap_raise_nfsd_set(new->cap_effective, @@ -88,9 +84,7 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) return 0; oom: - ret = -ENOMEM; -error: abort_creds(new); - return ret; + return -ENOMEM; } diff --git a/fs/nfsd/auth.h b/fs/nfsd/auth.h index 78b3c0e9382..53325a12ba6 100644 --- a/fs/nfsd/auth.h +++ b/fs/nfsd/auth.h @@ -1,6 +1,5 @@ /* * nfsd-specific authentication stuff. - * uid/gid mapping not yet implemented. * * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> */ @@ -8,11 +7,6 @@ #ifndef LINUX_NFSD_AUTH_H #define LINUX_NFSD_AUTH_H -#define nfsd_luid(rq, uid) ((u32)(uid)) -#define nfsd_lgid(rq, gid) ((u32)(gid)) -#define nfsd_ruid(rq, uid) ((u32)(uid)) -#define nfsd_rgid(rq, gid) ((u32)(gid)) - /* * Set the current process's fsuid/fsgid etc to those of the NFS * client user diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h index 93cc9d34c45..b582f9ab6b2 100644 --- a/fs/nfsd/cache.h +++ b/fs/nfsd/cache.h @@ -12,6 +12,10 @@ /* * Representation of a reply cache entry. + * + * Note that we use a sockaddr_in6 to hold the address instead of the more + * typical sockaddr_storage. This is for space reasons, since sockaddr_storage + * is much larger than a sockaddr_in6. */ struct svc_cacherep { struct hlist_node c_hash; @@ -20,11 +24,13 @@ struct svc_cacherep { unsigned char c_state, /* unused, inprog, done */ c_type, /* status, buffer */ c_secure : 1; /* req came from port < 1024 */ - struct sockaddr_in c_addr; + struct sockaddr_in6 c_addr; __be32 c_xid; u32 c_prot; u32 c_proc; u32 c_vers; + unsigned int c_len; + __wsum c_csum; unsigned long c_timestamp; union { struct kvec u_vec; @@ -46,8 +52,7 @@ enum { enum { RC_DROPIT, RC_REPLY, - RC_DOIT, - RC_INTR + RC_DOIT }; /* @@ -67,17 +72,16 @@ enum { */ #define RC_DELAY (HZ/5) +/* Cache entries expire after this time period */ +#define RC_EXPIRE (120 * HZ) + +/* Checksum this amount of the request */ +#define RC_CSUMLEN (256U) + int nfsd_reply_cache_init(void); void nfsd_reply_cache_shutdown(void); int nfsd_cache_lookup(struct svc_rqst *); void nfsd_cache_update(struct svc_rqst *, int, __be32 *); - -#ifdef CONFIG_NFSD_V4 -void nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp); -#else /* CONFIG_NFSD_V4 */ -static inline void nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp) -{ -} -#endif /* CONFIG_NFSD_V4 */ +int nfsd_reply_cache_stats_open(struct inode *, struct file *); #endif /* NFSCACHE_H */ diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index a3946cf13fc..13b85f94d9e 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -17,17 +17,12 @@ #include <linux/exportfs.h> #include <linux/sunrpc/svc_xprt.h> -#include <net/ipv6.h> - #include "nfsd.h" #include "nfsfh.h" #include "netns.h" #define NFSDDBG_FACILITY NFSDDBG_EXPORT -typedef struct auth_domain svc_client; -typedef struct svc_export svc_export; - /* * We have two caches. * One maps client+vfsmnt+dentry to export options - the export map @@ -67,18 +62,13 @@ static void expkey_request(struct cache_detail *cd, (*bpp)[-1] = '\n'; } -static int expkey_upcall(struct cache_detail *cd, struct cache_head *h) -{ - return sunrpc_cache_pipe_upcall(cd, h, expkey_request); -} - static struct svc_expkey *svc_expkey_update(struct cache_detail *cd, struct svc_expkey *new, struct svc_expkey *old); static struct svc_expkey *svc_expkey_lookup(struct cache_detail *cd, struct svc_expkey *); static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen) { - /* client fsidtype fsid [path] */ + /* client fsidtype fsid expiry [path] */ char *buf; int len; struct auth_domain *dom = NULL; @@ -245,7 +235,7 @@ static struct cache_detail svc_expkey_cache_template = { .hash_size = EXPKEY_HASHMAX, .name = "nfsd.fh", .cache_put = expkey_put, - .cache_upcall = expkey_upcall, + .cache_request = expkey_request, .cache_parse = expkey_parse, .cache_show = expkey_show, .match = expkey_match, @@ -300,13 +290,19 @@ svc_expkey_update(struct cache_detail *cd, struct svc_expkey *new, static void nfsd4_fslocs_free(struct nfsd4_fs_locations *fsloc) { + struct nfsd4_fs_location *locations = fsloc->locations; int i; + if (!locations) + return; + for (i = 0; i < fsloc->locations_count; i++) { - kfree(fsloc->locations[i].path); - kfree(fsloc->locations[i].hosts); + kfree(locations[i].path); + kfree(locations[i].hosts); } - kfree(fsloc->locations); + + kfree(locations); + fsloc->locations = NULL; } static void svc_export_put(struct kref *ref) @@ -315,6 +311,7 @@ static void svc_export_put(struct kref *ref) path_put(&exp->ex_path); auth_domain_put(exp->ex_client); nfsd4_fslocs_free(&exp->ex_fslocs); + kfree(exp->ex_uuid); kfree(exp); } @@ -337,11 +334,6 @@ static void svc_export_request(struct cache_detail *cd, (*bpp)[-1] = '\n'; } -static int svc_export_upcall(struct cache_detail *cd, struct cache_head *h) -{ - return sunrpc_cache_pipe_upcall(cd, h, svc_export_request); -} - static struct svc_export *svc_export_update(struct svc_export *new, struct svc_export *old); static struct svc_export *svc_export_lookup(struct svc_export *); @@ -397,6 +389,10 @@ fsloc_parse(char **mesg, char *buf, struct nfsd4_fs_locations *fsloc) int len; int migrated, i, err; + /* more than one fsloc */ + if (fsloc->locations) + return -EINVAL; + /* listsize */ err = get_uint(mesg, &fsloc->locations_count); if (err) @@ -446,13 +442,18 @@ out_free_all: static int secinfo_parse(char **mesg, char *buf, struct svc_export *exp) { - int listsize, err; struct exp_flavor_info *f; + u32 listsize; + int err; + + /* more than one secinfo */ + if (exp->ex_nflavors) + return -EINVAL; - err = get_int(mesg, &listsize); + err = get_uint(mesg, &listsize); if (err) return err; - if (listsize < 0 || listsize > MAX_SECINFO_LIST) + if (listsize > MAX_SECINFO_LIST) return -EINVAL; for (f = exp->ex_flavors; f < exp->ex_flavors + listsize; f++) { @@ -483,6 +484,27 @@ static inline int secinfo_parse(char **mesg, char *buf, struct svc_export *exp) { return 0; } #endif +static inline int +uuid_parse(char **mesg, char *buf, unsigned char **puuid) +{ + int len; + + /* more than one uuid */ + if (*puuid) + return -EINVAL; + + /* expect a 16 byte uuid encoded as \xXXXX... */ + len = qword_get(mesg, buf, PAGE_SIZE); + if (len != EX_UUID_LEN) + return -EINVAL; + + *puuid = kmemdup(buf, EX_UUID_LEN, GFP_KERNEL); + if (*puuid == NULL) + return -ENOMEM; + + return 0; +} + static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) { /* client path expiry [flags anonuid anongid fsid] */ @@ -544,13 +566,13 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) err = get_int(&mesg, &an_int); if (err) goto out3; - exp.ex_anon_uid= an_int; + exp.ex_anon_uid= make_kuid(&init_user_ns, an_int); /* anon gid */ err = get_int(&mesg, &an_int); if (err) goto out3; - exp.ex_anon_gid= an_int; + exp.ex_anon_gid= make_kgid(&init_user_ns, an_int); /* fsid */ err = get_int(&mesg, &an_int); @@ -561,18 +583,9 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) while ((len = qword_get(&mesg, buf, PAGE_SIZE)) > 0) { if (strcmp(buf, "fsloc") == 0) err = fsloc_parse(&mesg, buf, &exp.ex_fslocs); - else if (strcmp(buf, "uuid") == 0) { - /* expect a 16 byte uuid encoded as \xXXXX... */ - len = qword_get(&mesg, buf, PAGE_SIZE); - if (len != 16) - err = -EINVAL; - else { - exp.ex_uuid = - kmemdup(buf, 16, GFP_KERNEL); - if (exp.ex_uuid == NULL) - err = -ENOMEM; - } - } else if (strcmp(buf, "secinfo") == 0) + else if (strcmp(buf, "uuid") == 0) + err = uuid_parse(&mesg, buf, &exp.ex_uuid); + else if (strcmp(buf, "secinfo") == 0) err = secinfo_parse(&mesg, buf, &exp); else /* quietly ignore unknown words and anything @@ -588,6 +601,26 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) exp.ex_uuid); if (err) goto out4; + /* + * No point caching this if it would immediately expire. + * Also, this protects exportfs's dummy export from the + * anon_uid/anon_gid checks: + */ + if (exp.h.expiry_time < seconds_since_boot()) + goto out4; + /* + * For some reason exportfs has been passing down an + * invalid (-1) uid & gid on the "dummy" export which it + * uses to test export support. To make sure exportfs + * sees errors from check_export we therefore need to + * delay these checks till after check_export: + */ + err = -EINVAL; + if (!uid_valid(exp.ex_anon_uid)) + goto out4; + if (!gid_valid(exp.ex_anon_gid)) + goto out4; + err = 0; } expp = svc_export_lookup(&exp); @@ -613,7 +646,7 @@ out: } static void exp_flags(struct seq_file *m, int flag, int fsid, - uid_t anonu, uid_t anong, struct nfsd4_fs_locations *fslocs); + kuid_t anonu, kgid_t anong, struct nfsd4_fs_locations *fslocs); static void show_secinfo(struct seq_file *m, struct svc_export *exp); static int svc_export_show(struct seq_file *m, @@ -638,7 +671,7 @@ static int svc_export_show(struct seq_file *m, if (exp->ex_uuid) { int i; seq_puts(m, ",uuid="); - for (i=0; i<16; i++) { + for (i = 0; i < EX_UUID_LEN; i++) { if ((i&3) == 0 && i) seq_putc(m, ':'); seq_printf(m, "%02x", exp->ex_uuid[i]); @@ -670,6 +703,7 @@ static void svc_export_init(struct cache_head *cnew, struct cache_head *citem) new->ex_fslocs.locations = NULL; new->ex_fslocs.locations_count = 0; new->ex_fslocs.migrated = 0; + new->ex_uuid = NULL; new->cd = item->cd; } @@ -711,7 +745,7 @@ static struct cache_detail svc_export_cache_template = { .hash_size = EXPORT_HASHMAX, .name = "nfsd.export", .cache_put = svc_export_put, - .cache_upcall = svc_export_upcall, + .cache_request = svc_export_request, .cache_parse = svc_export_parse, .cache_show = svc_export_show, .match = svc_export_match, @@ -759,7 +793,7 @@ svc_export_update(struct svc_export *new, struct svc_export *old) static struct svc_expkey * -exp_find_key(struct cache_detail *cd, svc_client *clp, int fsid_type, +exp_find_key(struct cache_detail *cd, struct auth_domain *clp, int fsid_type, u32 *fsidv, struct cache_req *reqp) { struct svc_expkey key, *ek; @@ -781,9 +815,9 @@ exp_find_key(struct cache_detail *cd, svc_client *clp, int fsid_type, return ek; } - -static svc_export *exp_get_by_name(struct cache_detail *cd, svc_client *clp, - const struct path *path, struct cache_req *reqp) +static struct svc_export * +exp_get_by_name(struct cache_detail *cd, struct auth_domain *clp, + const struct path *path, struct cache_req *reqp) { struct svc_export *exp, key; int err; @@ -807,11 +841,11 @@ static svc_export *exp_get_by_name(struct cache_detail *cd, svc_client *clp, /* * Find the export entry for a given dentry. */ -static struct svc_export *exp_parent(struct cache_detail *cd, svc_client *clp, - struct path *path) +static struct svc_export * +exp_parent(struct cache_detail *cd, struct auth_domain *clp, struct path *path) { struct dentry *saved = dget(path->dentry); - svc_export *exp = exp_get_by_name(cd, clp, path, NULL); + struct svc_export *exp = exp_get_by_name(cd, clp, path, NULL); while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(path->dentry)) { struct dentry *parent = dget_parent(path->dentry); @@ -832,7 +866,7 @@ static struct svc_export *exp_parent(struct cache_detail *cd, svc_client *clp, * since its harder to fool a kernel module than a user space program. */ int -exp_rootfh(struct net *net, svc_client *clp, char *name, +exp_rootfh(struct net *net, struct auth_domain *clp, char *name, struct knfsd_fh *f, int maxsize) { struct svc_export *exp; @@ -1179,15 +1213,17 @@ static void show_secinfo(struct seq_file *m, struct svc_export *exp) } static void exp_flags(struct seq_file *m, int flag, int fsid, - uid_t anonu, uid_t anong, struct nfsd4_fs_locations *fsloc) + kuid_t anonu, kgid_t anong, struct nfsd4_fs_locations *fsloc) { show_expflags(m, flag, NFSEXP_ALLFLAGS); if (flag & NFSEXP_FSID) seq_printf(m, ",fsid=%d", fsid); - if (anonu != (uid_t)-2 && anonu != (0x10000-2)) - seq_printf(m, ",anonuid=%u", anonu); - if (anong != (gid_t)-2 && anong != (0x10000-2)) - seq_printf(m, ",anongid=%u", anong); + if (!uid_eq(anonu, make_kuid(&init_user_ns, (uid_t)-2)) && + !uid_eq(anonu, make_kuid(&init_user_ns, 0x10000-2))) + seq_printf(m, ",anonuid=%u", from_kuid(&init_user_ns, anonu)); + if (!gid_eq(anong, make_kgid(&init_user_ns, (gid_t)-2)) && + !gid_eq(anong, make_kgid(&init_user_ns, 0x10000-2))) + seq_printf(m, ",anongid=%u", from_kgid(&init_user_ns, anong)); if (fsloc && fsloc->locations_count > 0) { char *loctype = (fsloc->migrated) ? "refer" : "replicas"; int i; diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h new file mode 100644 index 00000000000..cfeea85c5be --- /dev/null +++ b/fs/nfsd/export.h @@ -0,0 +1,110 @@ +/* + * Copyright (C) 1995-1997 Olaf Kirch <okir@monad.swb.de> + */ +#ifndef NFSD_EXPORT_H +#define NFSD_EXPORT_H + +#include <linux/sunrpc/cache.h> +#include <uapi/linux/nfsd/export.h> + +struct knfsd_fh; +struct svc_fh; +struct svc_rqst; + +/* + * FS Locations + */ + +#define MAX_FS_LOCATIONS 128 + +struct nfsd4_fs_location { + char *hosts; /* colon separated list of hosts */ + char *path; /* slash separated list of path components */ +}; + +struct nfsd4_fs_locations { + uint32_t locations_count; + struct nfsd4_fs_location *locations; +/* If we're not actually serving this data ourselves (only providing a + * list of replicas that do serve it) then we set "migrated": */ + int migrated; +}; + +/* + * We keep an array of pseudoflavors with the export, in order from most + * to least preferred. For the foreseeable future, we don't expect more + * than the eight pseudoflavors null, unix, krb5, krb5i, krb5p, skpm3, + * spkm3i, and spkm3p (and using all 8 at once should be rare). + */ +#define MAX_SECINFO_LIST 8 +#define EX_UUID_LEN 16 + +struct exp_flavor_info { + u32 pseudoflavor; + u32 flags; +}; + +struct svc_export { + struct cache_head h; + struct auth_domain * ex_client; + int ex_flags; + struct path ex_path; + kuid_t ex_anon_uid; + kgid_t ex_anon_gid; + int ex_fsid; + unsigned char * ex_uuid; /* 16 byte fsid */ + struct nfsd4_fs_locations ex_fslocs; + uint32_t ex_nflavors; + struct exp_flavor_info ex_flavors[MAX_SECINFO_LIST]; + struct cache_detail *cd; +}; + +/* an "export key" (expkey) maps a filehandlefragement to an + * svc_export for a given client. There can be several per export, + * for the different fsid types. + */ +struct svc_expkey { + struct cache_head h; + + struct auth_domain * ek_client; + int ek_fsidtype; + u32 ek_fsid[6]; + + struct path ek_path; +}; + +#define EX_ISSYNC(exp) (!((exp)->ex_flags & NFSEXP_ASYNC)) +#define EX_NOHIDE(exp) ((exp)->ex_flags & NFSEXP_NOHIDE) +#define EX_WGATHER(exp) ((exp)->ex_flags & NFSEXP_GATHERED_WRITES) + +int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp); +__be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp); + +/* + * Function declarations + */ +int nfsd_export_init(struct net *); +void nfsd_export_shutdown(struct net *); +void nfsd_export_flush(struct net *); +struct svc_export * rqst_exp_get_by_name(struct svc_rqst *, + struct path *); +struct svc_export * rqst_exp_parent(struct svc_rqst *, + struct path *); +struct svc_export * rqst_find_fsidzero_export(struct svc_rqst *); +int exp_rootfh(struct net *, struct auth_domain *, + char *path, struct knfsd_fh *, int maxsize); +__be32 exp_pseudoroot(struct svc_rqst *, struct svc_fh *); +__be32 nfserrno(int errno); + +static inline void exp_put(struct svc_export *exp) +{ + cache_put(&exp->h, exp->cd); +} + +static inline void exp_get(struct svc_export *exp) +{ + cache_get(&exp->h); +} +struct svc_export * rqst_exp_find(struct svc_rqst *, int, u32 *); + +#endif /* NFSD_EXPORT_H */ diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c index e761ee95617..2ed05c3cd43 100644 --- a/fs/nfsd/fault_inject.c +++ b/fs/nfsd/fault_inject.c @@ -9,7 +9,7 @@ #include <linux/debugfs.h> #include <linux/module.h> #include <linux/nsproxy.h> -#include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/addr.h> #include <asm/uaccess.h> #include "state.h" @@ -97,25 +97,14 @@ static ssize_t fault_inject_read(struct file *file, char __user *buf, { static u64 val; char read_buf[25]; - size_t size, ret; + size_t size; loff_t pos = *ppos; if (!pos) - nfsd_inject_get(file->f_dentry->d_inode->i_private, &val); + nfsd_inject_get(file_inode(file)->i_private, &val); size = scnprintf(read_buf, sizeof(read_buf), "%llu\n", val); - if (pos < 0) - return -EINVAL; - if (pos >= size || !len) - return 0; - if (len > size - pos) - len = size - pos; - ret = copy_to_user(buf, read_buf + pos, len); - if (ret == len) - return -EFAULT; - len -= ret; - *ppos = pos + len; - return len; + return simple_read_from_buffer(buf, len, ppos, read_buf, size); } static ssize_t fault_inject_write(struct file *file, const char __user *buf, @@ -133,10 +122,10 @@ static ssize_t fault_inject_write(struct file *file, const char __user *buf, size = rpc_pton(net, write_buf, size, (struct sockaddr *)&sa, sizeof(sa)); if (size > 0) - nfsd_inject_set_client(file->f_dentry->d_inode->i_private, &sa, size); + nfsd_inject_set_client(file_inode(file)->i_private, &sa, size); else { val = simple_strtoll(write_buf, NULL, 0); - nfsd_inject_set(file->f_dentry->d_inode->i_private, val); + nfsd_inject_set(file_inode(file)->i_private, val); } return len; /* on success, claim we got the whole input */ } diff --git a/fs/nfsd/idmap.h b/fs/nfsd/idmap.h index 9d513efc01b..a3f34900091 100644 --- a/fs/nfsd/idmap.h +++ b/fs/nfsd/idmap.h @@ -54,9 +54,9 @@ static inline void nfsd_idmap_shutdown(struct net *net) } #endif -__be32 nfsd_map_name_to_uid(struct svc_rqst *, const char *, size_t, __u32 *); -__be32 nfsd_map_name_to_gid(struct svc_rqst *, const char *, size_t, __u32 *); -int nfsd_map_uid_to_name(struct svc_rqst *, __u32, char *); -int nfsd_map_gid_to_name(struct svc_rqst *, __u32, char *); +__be32 nfsd_map_name_to_uid(struct svc_rqst *, const char *, size_t, kuid_t *); +__be32 nfsd_map_name_to_gid(struct svc_rqst *, const char *, size_t, kgid_t *); +__be32 nfsd4_encode_user(struct xdr_stream *, struct svc_rqst *, kuid_t); +__be32 nfsd4_encode_group(struct xdr_stream *, struct svc_rqst *, kgid_t); #endif /* LINUX_NFSD_IDMAP_H */ diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index 1051bebff1b..d32b3aa6600 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -80,6 +80,7 @@ struct nfsd_net { */ struct list_head client_lru; struct list_head close_lru; + struct list_head del_recall_lru; struct delayed_work laundromat_work; @@ -94,6 +95,7 @@ struct nfsd_net { time_t nfsd4_grace; bool nfsd_net_up; + bool lockd_up; /* * Time of server startup diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c index 9170861c804..12b023a7ab7 100644 --- a/fs/nfsd/nfs2acl.c +++ b/fs/nfsd/nfs2acl.c @@ -30,8 +30,9 @@ nfsacld_proc_null(struct svc_rqst *rqstp, void *argp, void *resp) static __be32 nfsacld_proc_getacl(struct svc_rqst * rqstp, struct nfsd3_getaclargs *argp, struct nfsd3_getaclres *resp) { - svc_fh *fh; struct posix_acl *acl; + struct inode *inode; + svc_fh *fh; __be32 nfserr = 0; dprintk("nfsd: GETACL(2acl) %s\n", SVCFH_fmt(&argp->fh)); @@ -41,26 +42,24 @@ static __be32 nfsacld_proc_getacl(struct svc_rqst * rqstp, if (nfserr) RETURN_STATUS(nfserr); + inode = fh->fh_dentry->d_inode; + if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT)) RETURN_STATUS(nfserr_inval); resp->mask = argp->mask; + nfserr = fh_getattr(fh, &resp->stat); + if (nfserr) + goto fail; + if (resp->mask & (NFS_ACL|NFS_ACLCNT)) { - acl = nfsd_get_posix_acl(fh, ACL_TYPE_ACCESS); + acl = get_acl(inode, ACL_TYPE_ACCESS); if (IS_ERR(acl)) { - int err = PTR_ERR(acl); - - if (err == -ENODATA || err == -EOPNOTSUPP) - acl = NULL; - else { - nfserr = nfserrno(err); - goto fail; - } + nfserr = nfserrno(PTR_ERR(acl)); + goto fail; } if (acl == NULL) { /* Solaris returns the inode's minimum ACL. */ - - struct inode *inode = fh->fh_dentry->d_inode; acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); } resp->acl_access = acl; @@ -68,17 +67,10 @@ static __be32 nfsacld_proc_getacl(struct svc_rqst * rqstp, if (resp->mask & (NFS_DFACL|NFS_DFACLCNT)) { /* Check how Solaris handles requests for the Default ACL of a non-directory! */ - - acl = nfsd_get_posix_acl(fh, ACL_TYPE_DEFAULT); + acl = get_acl(inode, ACL_TYPE_DEFAULT); if (IS_ERR(acl)) { - int err = PTR_ERR(acl); - - if (err == -ENODATA || err == -EOPNOTSUPP) - acl = NULL; - else { - nfserr = nfserrno(err); - goto fail; - } + nfserr = nfserrno(PTR_ERR(acl)); + goto fail; } resp->acl_default = acl; } @@ -99,28 +91,51 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst * rqstp, struct nfsd3_setaclargs *argp, struct nfsd_attrstat *resp) { + struct inode *inode; svc_fh *fh; __be32 nfserr = 0; + int error; dprintk("nfsd: SETACL(2acl) %s\n", SVCFH_fmt(&argp->fh)); fh = fh_copy(&resp->fh, &argp->fh); nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_SATTR); + if (nfserr) + goto out; - if (!nfserr) { - nfserr = nfserrno( nfsd_set_posix_acl( - fh, ACL_TYPE_ACCESS, argp->acl_access) ); - } - if (!nfserr) { - nfserr = nfserrno( nfsd_set_posix_acl( - fh, ACL_TYPE_DEFAULT, argp->acl_default) ); + inode = fh->fh_dentry->d_inode; + if (!IS_POSIXACL(inode) || !inode->i_op->set_acl) { + error = -EOPNOTSUPP; + goto out_errno; } + error = fh_want_write(fh); + if (error) + goto out_errno; + + error = inode->i_op->set_acl(inode, argp->acl_access, ACL_TYPE_ACCESS); + if (error) + goto out_drop_write; + error = inode->i_op->set_acl(inode, argp->acl_default, + ACL_TYPE_DEFAULT); + if (error) + goto out_drop_write; + + fh_drop_write(fh); + + nfserr = fh_getattr(fh, &resp->stat); + +out: /* argp->acl_{access,default} may have been allocated in nfssvc_decode_setaclargs. */ posix_acl_release(argp->acl_access); posix_acl_release(argp->acl_default); return nfserr; +out_drop_write: + fh_drop_write(fh); +out_errno: + nfserr = nfserrno(error); + goto out; } /* @@ -129,10 +144,15 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst * rqstp, static __be32 nfsacld_proc_getattr(struct svc_rqst * rqstp, struct nfsd_fhandle *argp, struct nfsd_attrstat *resp) { + __be32 nfserr; dprintk("nfsd: GETATTR %s\n", SVCFH_fmt(&argp->fh)); fh_copy(&resp->fh, &argp->fh); - return fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP); + nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP); + if (nfserr) + return nfserr; + nfserr = fh_getattr(&resp->fh, &resp->stat); + return nfserr; } /* @@ -150,6 +170,9 @@ static __be32 nfsacld_proc_access(struct svc_rqst *rqstp, struct nfsd3_accessarg fh_copy(&resp->fh, &argp->fh); resp->access = argp->access; nfserr = nfsd_access(rqstp, &resp->fh, &resp->access, NULL); + if (nfserr) + return nfserr; + nfserr = fh_getattr(&resp->fh, &resp->stat); return nfserr; } @@ -159,7 +182,8 @@ static __be32 nfsacld_proc_access(struct svc_rqst *rqstp, struct nfsd3_accessarg static int nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_getaclargs *argp) { - if (!(p = nfs2svc_decode_fh(p, &argp->fh))) + p = nfs2svc_decode_fh(p, &argp->fh); + if (!p) return 0; argp->mask = ntohl(*p); p++; @@ -174,7 +198,8 @@ static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p, unsigned int base; int n; - if (!(p = nfs2svc_decode_fh(p, &argp->fh))) + p = nfs2svc_decode_fh(p, &argp->fh); + if (!p) return 0; argp->mask = ntohl(*p++); if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT) || @@ -195,7 +220,8 @@ static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p, static int nfsaclsvc_decode_fhandleargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_fhandle *argp) { - if (!(p = nfs2svc_decode_fh(p, &argp->fh))) + p = nfs2svc_decode_fh(p, &argp->fh); + if (!p) return 0; return xdr_argsize_check(rqstp, p); } @@ -203,7 +229,8 @@ static int nfsaclsvc_decode_fhandleargs(struct svc_rqst *rqstp, __be32 *p, static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_accessargs *argp) { - if (!(p = nfs2svc_decode_fh(p, &argp->fh))) + p = nfs2svc_decode_fh(p, &argp->fh); + if (!p) return 0; argp->access = ntohl(*p++); @@ -243,7 +270,7 @@ static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p, return 0; inode = dentry->d_inode; - p = nfs2svc_encode_fattr(rqstp, p, &resp->fh); + p = nfs2svc_encode_fattr(rqstp, p, &resp->fh, &resp->stat); *p++ = htonl(resp->mask); if (!xdr_ressize_check(rqstp, p)) return 0; @@ -274,7 +301,7 @@ static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p, static int nfsaclsvc_encode_attrstatres(struct svc_rqst *rqstp, __be32 *p, struct nfsd_attrstat *resp) { - p = nfs2svc_encode_fattr(rqstp, p, &resp->fh); + p = nfs2svc_encode_fattr(rqstp, p, &resp->fh, &resp->stat); return xdr_ressize_check(rqstp, p); } @@ -282,7 +309,7 @@ static int nfsaclsvc_encode_attrstatres(struct svc_rqst *rqstp, __be32 *p, static int nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_accessres *resp) { - p = nfs2svc_encode_fattr(rqstp, p, &resp->fh); + p = nfs2svc_encode_fattr(rqstp, p, &resp->fh, &resp->stat); *p++ = htonl(resp->access); return xdr_ressize_check(rqstp, p); } diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c index 9cbc1a841f8..2a514e21dc7 100644 --- a/fs/nfsd/nfs3acl.c +++ b/fs/nfsd/nfs3acl.c @@ -29,8 +29,9 @@ nfsd3_proc_null(struct svc_rqst *rqstp, void *argp, void *resp) static __be32 nfsd3_proc_getacl(struct svc_rqst * rqstp, struct nfsd3_getaclargs *argp, struct nfsd3_getaclres *resp) { - svc_fh *fh; struct posix_acl *acl; + struct inode *inode; + svc_fh *fh; __be32 nfserr = 0; fh = fh_copy(&resp->fh, &argp->fh); @@ -38,26 +39,20 @@ static __be32 nfsd3_proc_getacl(struct svc_rqst * rqstp, if (nfserr) RETURN_STATUS(nfserr); + inode = fh->fh_dentry->d_inode; + if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT)) RETURN_STATUS(nfserr_inval); resp->mask = argp->mask; if (resp->mask & (NFS_ACL|NFS_ACLCNT)) { - acl = nfsd_get_posix_acl(fh, ACL_TYPE_ACCESS); + acl = get_acl(inode, ACL_TYPE_ACCESS); if (IS_ERR(acl)) { - int err = PTR_ERR(acl); - - if (err == -ENODATA || err == -EOPNOTSUPP) - acl = NULL; - else { - nfserr = nfserrno(err); - goto fail; - } + nfserr = nfserrno(PTR_ERR(acl)); + goto fail; } if (acl == NULL) { /* Solaris returns the inode's minimum ACL. */ - - struct inode *inode = fh->fh_dentry->d_inode; acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); } resp->acl_access = acl; @@ -65,17 +60,10 @@ static __be32 nfsd3_proc_getacl(struct svc_rqst * rqstp, if (resp->mask & (NFS_DFACL|NFS_DFACLCNT)) { /* Check how Solaris handles requests for the Default ACL of a non-directory! */ - - acl = nfsd_get_posix_acl(fh, ACL_TYPE_DEFAULT); + acl = get_acl(inode, ACL_TYPE_DEFAULT); if (IS_ERR(acl)) { - int err = PTR_ERR(acl); - - if (err == -ENODATA || err == -EOPNOTSUPP) - acl = NULL; - else { - nfserr = nfserrno(err); - goto fail; - } + nfserr = nfserrno(PTR_ERR(acl)); + goto fail; } resp->acl_default = acl; } @@ -96,21 +84,37 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst * rqstp, struct nfsd3_setaclargs *argp, struct nfsd3_attrstat *resp) { + struct inode *inode; svc_fh *fh; __be32 nfserr = 0; + int error; fh = fh_copy(&resp->fh, &argp->fh); nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_SATTR); + if (nfserr) + goto out; - if (!nfserr) { - nfserr = nfserrno( nfsd_set_posix_acl( - fh, ACL_TYPE_ACCESS, argp->acl_access) ); - } - if (!nfserr) { - nfserr = nfserrno( nfsd_set_posix_acl( - fh, ACL_TYPE_DEFAULT, argp->acl_default) ); + inode = fh->fh_dentry->d_inode; + if (!IS_POSIXACL(inode) || !inode->i_op->set_acl) { + error = -EOPNOTSUPP; + goto out_errno; } + error = fh_want_write(fh); + if (error) + goto out_errno; + + error = inode->i_op->set_acl(inode, argp->acl_access, ACL_TYPE_ACCESS); + if (error) + goto out_drop_write; + error = inode->i_op->set_acl(inode, argp->acl_default, + ACL_TYPE_DEFAULT); + +out_drop_write: + fh_drop_write(fh); +out_errno: + nfserr = nfserrno(error); +out: /* argp->acl_{access,default} may have been allocated in nfs3svc_decode_setaclargs. */ posix_acl_release(argp->acl_access); @@ -124,7 +128,8 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst * rqstp, static int nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_getaclargs *args) { - if (!(p = nfs3svc_decode_fh(p, &args->fh))) + p = nfs3svc_decode_fh(p, &args->fh); + if (!p) return 0; args->mask = ntohl(*p); p++; @@ -139,7 +144,8 @@ static int nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p, unsigned int base; int n; - if (!(p = nfs3svc_decode_fh(p, &args->fh))) + p = nfs3svc_decode_fh(p, &args->fh); + if (!p) return 0; args->mask = ntohl(*p++); if (args->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT) || diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 1fc02dfdc5c..40128991313 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -43,7 +43,6 @@ static __be32 nfsd3_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle *argp, struct nfsd3_attrstat *resp) { - int err; __be32 nfserr; dprintk("nfsd: GETATTR(3) %s\n", @@ -55,9 +54,7 @@ nfsd3_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle *argp, if (nfserr) RETURN_STATUS(nfserr); - err = vfs_getattr(resp->fh.fh_export->ex_path.mnt, - resp->fh.fh_dentry, &resp->stat); - nfserr = nfserrno(err); + nfserr = fh_getattr(&resp->fh, &resp->stat); RETURN_STATUS(nfserr); } diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 324c0baf7cd..e6c01e80325 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -11,6 +11,7 @@ #include "xdr3.h" #include "auth.h" #include "netns.h" +#include "vfs.h" #define NFSDDBG_FACILITY NFSDDBG_XDR @@ -105,12 +106,14 @@ decode_sattr3(__be32 *p, struct iattr *iap) iap->ia_mode = ntohl(*p++); } if (*p++) { - iap->ia_valid |= ATTR_UID; - iap->ia_uid = ntohl(*p++); + iap->ia_uid = make_kuid(&init_user_ns, ntohl(*p++)); + if (uid_valid(iap->ia_uid)) + iap->ia_valid |= ATTR_UID; } if (*p++) { - iap->ia_valid |= ATTR_GID; - iap->ia_gid = ntohl(*p++); + iap->ia_gid = make_kgid(&init_user_ns, ntohl(*p++)); + if (gid_valid(iap->ia_gid)) + iap->ia_valid |= ATTR_GID; } if (*p++) { u64 newsize; @@ -165,10 +168,10 @@ encode_fattr3(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, struct kstat *stat) { *p++ = htonl(nfs3_ftypes[(stat->mode & S_IFMT) >> 12]); - *p++ = htonl((u32) stat->mode); + *p++ = htonl((u32) (stat->mode & S_IALLUGO)); *p++ = htonl((u32) stat->nlink); - *p++ = htonl((u32) nfsd_ruid(rqstp, stat->uid)); - *p++ = htonl((u32) nfsd_rgid(rqstp, stat->gid)); + *p++ = htonl((u32) from_kuid(&init_user_ns, stat->uid)); + *p++ = htonl((u32) from_kgid(&init_user_ns, stat->gid)); if (S_ISLNK(stat->mode) && stat->size > NFS3_MAXPATHLEN) { p = xdr_encode_hyper(p, (u64) NFS3_MAXPATHLEN); } else { @@ -204,10 +207,10 @@ encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp) { struct dentry *dentry = fhp->fh_dentry; if (dentry && dentry->d_inode) { - int err; + __be32 err; struct kstat stat; - err = vfs_getattr(fhp->fh_export->ex_path.mnt, dentry, &stat); + err = fh_getattr(fhp, &stat); if (!err) { *p++ = xdr_one; /* attributes follow */ lease_get_mtime(dentry->d_inode, &stat.mtime); @@ -254,13 +257,12 @@ encode_wcc_data(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp) */ void fill_post_wcc(struct svc_fh *fhp) { - int err; + __be32 err; if (fhp->fh_post_saved) printk("nfsd: inode locked twice during operation.\n"); - err = vfs_getattr(fhp->fh_export->ex_path.mnt, fhp->fh_dentry, - &fhp->fh_post_attr); + err = fh_getattr(fhp, &fhp->fh_post_attr); fhp->fh_post_change = fhp->fh_dentry->d_inode->i_version; if (err) { fhp->fh_post_saved = 0; @@ -276,7 +278,8 @@ void fill_post_wcc(struct svc_fh *fhp) int nfs3svc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p, struct nfsd_fhandle *args) { - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; return xdr_argsize_check(rqstp, p); } @@ -285,7 +288,8 @@ int nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_sattrargs *args) { - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; p = decode_sattr3(p, &args->attrs); @@ -313,7 +317,8 @@ int nfs3svc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_accessargs *args) { - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; args->access = ntohl(*p++); @@ -328,7 +333,8 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p, int v; u32 max_blocksize = svc_max_payload(rqstp); - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; p = xdr_decode_hyper(p, &args->offset); @@ -358,7 +364,8 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, unsigned int len, v, hdr, dlen; u32 max_blocksize = svc_max_payload(rqstp); - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; p = xdr_decode_hyper(p, &args->offset); @@ -533,7 +540,8 @@ int nfs3svc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_readlinkargs *args) { - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; args->buffer = page_address(*(rqstp->rq_next_page++)); @@ -556,7 +564,8 @@ int nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_readdirargs *args) { - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; p = xdr_decode_hyper(p, &args->cookie); args->verf = p; p += 2; @@ -578,7 +587,8 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p, int len; u32 max_blocksize = svc_max_payload(rqstp); - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; p = xdr_decode_hyper(p, &args->cookie); args->verf = p; p += 2; @@ -603,7 +613,8 @@ int nfs3svc_decode_commitargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_commitargs *args) { - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; p = xdr_decode_hyper(p, &args->offset); args->count = ntohl(*p++); @@ -840,21 +851,21 @@ out: static __be32 *encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, int namlen) { - struct svc_fh fh; + struct svc_fh *fh = &cd->scratch; __be32 err; - fh_init(&fh, NFS3_FHSIZE); - err = compose_entry_fh(cd, &fh, name, namlen); + fh_init(fh, NFS3_FHSIZE); + err = compose_entry_fh(cd, fh, name, namlen); if (err) { *p++ = 0; *p++ = 0; goto out; } - p = encode_post_op_attr(cd->rqstp, p, &fh); + p = encode_post_op_attr(cd->rqstp, p, fh); *p++ = xdr_one; /* yes, a file handle follows */ - p = encode_fh(p, &fh); + p = encode_fh(p, fh); out: - fh_put(&fh); + fh_put(fh); return p; } diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c index 9c51aff02ae..d714156a19f 100644 --- a/fs/nfsd/nfs4acl.c +++ b/fs/nfsd/nfs4acl.c @@ -36,9 +36,14 @@ #include <linux/slab.h> #include <linux/nfs_fs.h> -#include <linux/export.h> +#include "nfsfh.h" +#include "nfsd.h" #include "acl.h" +#include "vfs.h" +#define NFS4_ACL_TYPE_DEFAULT 0x01 +#define NFS4_ACL_DIR 0x02 +#define NFS4_ACL_OWNER 0x04 /* mode bit translations: */ #define NFS4_READ_MODE (NFS4_ACE_READ_DATA) @@ -130,36 +135,47 @@ static short ace2type(struct nfs4_ace *); static void _posix_to_nfsv4_one(struct posix_acl *, struct nfs4_acl *, unsigned int); -struct nfs4_acl * -nfs4_acl_posix_to_nfsv4(struct posix_acl *pacl, struct posix_acl *dpacl, - unsigned int flags) +int +nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, + struct nfs4_acl **acl) { - struct nfs4_acl *acl; + struct inode *inode = dentry->d_inode; + int error = 0; + struct posix_acl *pacl = NULL, *dpacl = NULL; + unsigned int flags = 0; int size = 0; - if (pacl) { - if (posix_acl_valid(pacl) < 0) - return ERR_PTR(-EINVAL); - size += 2*pacl->a_count; + pacl = get_acl(inode, ACL_TYPE_ACCESS); + if (!pacl) { + pacl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); + if (IS_ERR(pacl)) + return PTR_ERR(pacl); } - if (dpacl) { - if (posix_acl_valid(dpacl) < 0) - return ERR_PTR(-EINVAL); - size += 2*dpacl->a_count; + /* allocate for worst case: one (deny, allow) pair each: */ + size += 2 * pacl->a_count; + + if (S_ISDIR(inode->i_mode)) { + flags = NFS4_ACL_DIR; + dpacl = get_acl(inode, ACL_TYPE_DEFAULT); + if (dpacl) + size += 2 * dpacl->a_count; } - /* Allocate for worst case: one (deny, allow) pair each: */ - acl = nfs4_acl_new(size); - if (acl == NULL) - return ERR_PTR(-ENOMEM); + *acl = nfs4_acl_new(size); + if (*acl == NULL) { + error = -ENOMEM; + goto out; + } - if (pacl) - _posix_to_nfsv4_one(pacl, acl, flags & ~NFS4_ACL_TYPE_DEFAULT); + _posix_to_nfsv4_one(pacl, *acl, flags & ~NFS4_ACL_TYPE_DEFAULT); if (dpacl) - _posix_to_nfsv4_one(dpacl, acl, flags | NFS4_ACL_TYPE_DEFAULT); + _posix_to_nfsv4_one(dpacl, *acl, flags | NFS4_ACL_TYPE_DEFAULT); - return acl; + out: + posix_acl_release(pacl); + posix_acl_release(dpacl); + return error; } struct posix_acl_summary { @@ -264,7 +280,7 @@ _posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl, ace->flag = eflag; ace->access_mask = deny_mask_from_posix(deny, flags); ace->whotype = NFS4_ACL_WHO_NAMED; - ace->who = pa->e_id; + ace->who_uid = pa->e_uid; ace++; acl->naces++; } @@ -273,7 +289,7 @@ _posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl, ace->access_mask = mask_from_posix(pa->e_perm & pas.mask, flags); ace->whotype = NFS4_ACL_WHO_NAMED; - ace->who = pa->e_id; + ace->who_uid = pa->e_uid; ace++; acl->naces++; pa++; @@ -300,7 +316,7 @@ _posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl, ace->access_mask = mask_from_posix(pa->e_perm & pas.mask, flags); ace->whotype = NFS4_ACL_WHO_NAMED; - ace->who = pa->e_id; + ace->who_gid = pa->e_gid; ace++; acl->naces++; pa++; @@ -329,7 +345,7 @@ _posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl, ace->flag = eflag | NFS4_ACE_IDENTIFIER_GROUP; ace->access_mask = deny_mask_from_posix(deny, flags); ace->whotype = NFS4_ACL_WHO_NAMED; - ace->who = pa->e_id; + ace->who_gid = pa->e_gid; ace++; acl->naces++; } @@ -345,6 +361,18 @@ _posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl, acl->naces++; } +static bool +pace_gt(struct posix_acl_entry *pace1, struct posix_acl_entry *pace2) +{ + if (pace1->e_tag != pace2->e_tag) + return pace1->e_tag > pace2->e_tag; + if (pace1->e_tag == ACL_USER) + return uid_gt(pace1->e_uid, pace2->e_uid); + if (pace1->e_tag == ACL_GROUP) + return gid_gt(pace1->e_gid, pace2->e_gid); + return false; +} + static void sort_pacl_range(struct posix_acl *pacl, int start, int end) { int sorted = 0, i; @@ -355,8 +383,8 @@ sort_pacl_range(struct posix_acl *pacl, int start, int end) { while (!sorted) { sorted = 1; for (i = start; i < end; i++) { - if (pacl->a_entries[i].e_id - > pacl->a_entries[i+1].e_id) { + if (pace_gt(&pacl->a_entries[i], + &pacl->a_entries[i+1])) { sorted = 0; tmp = pacl->a_entries[i]; pacl->a_entries[i] = pacl->a_entries[i+1]; @@ -373,8 +401,10 @@ sort_pacl(struct posix_acl *pacl) * by uid/gid. */ int i, j; - if (pacl->a_count <= 4) - return; /* no users or groups */ + /* no users or groups */ + if (!pacl || pacl->a_count <= 4) + return; + i = 1; while (pacl->a_entries[i].e_tag == ACL_USER) i++; @@ -398,7 +428,10 @@ struct posix_ace_state { }; struct posix_user_ace_state { - uid_t uid; + union { + kuid_t uid; + kgid_t gid; + }; struct posix_ace_state perms; }; @@ -498,19 +531,21 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags) /* * ACLs with no ACEs are treated differently in the inheritable - * and effective cases: when there are no inheritable ACEs, we - * set a zero-length default posix acl: + * and effective cases: when there are no inheritable ACEs, + * calls ->set_acl with a NULL ACL structure. */ - if (state->empty && (flags & NFS4_ACL_TYPE_DEFAULT)) { - pacl = posix_acl_alloc(0, GFP_KERNEL); - return pacl ? pacl : ERR_PTR(-ENOMEM); - } + if (state->empty && (flags & NFS4_ACL_TYPE_DEFAULT)) + return NULL; + /* * When there are no effective ACEs, the following will end * up setting a 3-element effective posix ACL with all * permissions zero. */ - nace = 4 + state->users->n + state->groups->n; + if (!state->users->n && !state->groups->n) + nace = 3; + else /* Note we also include a MASK ACE in this case: */ + nace = 4 + state->users->n + state->groups->n; pacl = posix_acl_alloc(nace, GFP_KERNEL); if (!pacl) return ERR_PTR(-ENOMEM); @@ -521,7 +556,6 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags) if (error) goto out_err; low_mode_from_nfs4(state->owner.allow, &pace->e_perm, flags); - pace->e_id = ACL_UNDEFINED_ID; for (i=0; i < state->users->n; i++) { pace++; @@ -531,7 +565,7 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags) goto out_err; low_mode_from_nfs4(state->users->aces[i].perms.allow, &pace->e_perm, flags); - pace->e_id = state->users->aces[i].uid; + pace->e_uid = state->users->aces[i].uid; add_to_mask(state, &state->users->aces[i].perms); } @@ -541,7 +575,6 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags) if (error) goto out_err; low_mode_from_nfs4(state->group.allow, &pace->e_perm, flags); - pace->e_id = ACL_UNDEFINED_ID; add_to_mask(state, &state->group); for (i=0; i < state->groups->n; i++) { @@ -552,14 +585,15 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags) goto out_err; low_mode_from_nfs4(state->groups->aces[i].perms.allow, &pace->e_perm, flags); - pace->e_id = state->groups->aces[i].uid; + pace->e_gid = state->groups->aces[i].gid; add_to_mask(state, &state->groups->aces[i].perms); } - pace++; - pace->e_tag = ACL_MASK; - low_mode_from_nfs4(state->mask.allow, &pace->e_perm, flags); - pace->e_id = ACL_UNDEFINED_ID; + if (state->users->n || state->groups->n) { + pace++; + pace->e_tag = ACL_MASK; + low_mode_from_nfs4(state->mask.allow, &pace->e_perm, flags); + } pace++; pace->e_tag = ACL_OTHER; @@ -567,7 +601,6 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags) if (error) goto out_err; low_mode_from_nfs4(state->other.allow, &pace->e_perm, flags); - pace->e_id = ACL_UNDEFINED_ID; return pacl; out_err: @@ -587,12 +620,13 @@ static inline void deny_bits(struct posix_ace_state *astate, u32 mask) astate->deny |= mask & ~astate->allow; } -static int find_uid(struct posix_acl_state *state, struct posix_ace_state_array *a, uid_t uid) +static int find_uid(struct posix_acl_state *state, kuid_t uid) { + struct posix_ace_state_array *a = state->users; int i; for (i = 0; i < a->n; i++) - if (a->aces[i].uid == uid) + if (uid_eq(a->aces[i].uid, uid)) return i; /* Not found: */ a->n++; @@ -603,6 +637,23 @@ static int find_uid(struct posix_acl_state *state, struct posix_ace_state_array return i; } +static int find_gid(struct posix_acl_state *state, kgid_t gid) +{ + struct posix_ace_state_array *a = state->groups; + int i; + + for (i = 0; i < a->n; i++) + if (gid_eq(a->aces[i].gid, gid)) + return i; + /* Not found: */ + a->n++; + a->aces[i].gid = gid; + a->aces[i].perms.allow = state->everyone.allow; + a->aces[i].perms.deny = state->everyone.deny; + + return i; +} + static void deny_bits_array(struct posix_ace_state_array *a, u32 mask) { int i; @@ -636,7 +687,7 @@ static void process_one_v4_ace(struct posix_acl_state *state, } break; case ACL_USER: - i = find_uid(state, state->users, ace->who); + i = find_uid(state, ace->who_uid); if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) { allow_bits(&state->users->aces[i].perms, mask); } else { @@ -658,7 +709,7 @@ static void process_one_v4_ace(struct posix_acl_state *state, } break; case ACL_GROUP: - i = find_uid(state, state->groups, ace->who); + i = find_gid(state, ace->who_gid); if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) { allow_bits(&state->groups->aces[i].perms, mask); } else { @@ -690,8 +741,9 @@ static void process_one_v4_ace(struct posix_acl_state *state, } } -int nfs4_acl_nfsv4_to_posix(struct nfs4_acl *acl, struct posix_acl **pacl, - struct posix_acl **dpacl, unsigned int flags) +static int nfs4_acl_nfsv4_to_posix(struct nfs4_acl *acl, + struct posix_acl **pacl, struct posix_acl **dpacl, + unsigned int flags) { struct posix_acl_state effective_acl_state, default_acl_state; struct nfs4_ace *ace; @@ -751,6 +803,57 @@ out_estate: return ret; } +__be32 +nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct nfs4_acl *acl) +{ + __be32 error; + int host_error; + struct dentry *dentry; + struct inode *inode; + struct posix_acl *pacl = NULL, *dpacl = NULL; + unsigned int flags = 0; + + /* Get inode */ + error = fh_verify(rqstp, fhp, 0, NFSD_MAY_SATTR); + if (error) + return error; + + dentry = fhp->fh_dentry; + inode = dentry->d_inode; + + if (!inode->i_op->set_acl || !IS_POSIXACL(inode)) + return nfserr_attrnotsupp; + + if (S_ISDIR(inode->i_mode)) + flags = NFS4_ACL_DIR; + + host_error = nfs4_acl_nfsv4_to_posix(acl, &pacl, &dpacl, flags); + if (host_error == -EINVAL) + return nfserr_attrnotsupp; + if (host_error < 0) + goto out_nfserr; + + host_error = inode->i_op->set_acl(inode, pacl, ACL_TYPE_ACCESS); + if (host_error < 0) + goto out_release; + + if (S_ISDIR(inode->i_mode)) { + host_error = inode->i_op->set_acl(inode, dpacl, + ACL_TYPE_DEFAULT); + } + +out_release: + posix_acl_release(pacl); + posix_acl_release(dpacl); +out_nfserr: + if (host_error == -EOPNOTSUPP) + return nfserr_attrnotsupp; + else + return nfserrno(host_error); +} + + static short ace2type(struct nfs4_ace *ace) { @@ -769,9 +872,6 @@ ace2type(struct nfs4_ace *ace) return -1; } -EXPORT_SYMBOL(nfs4_acl_posix_to_nfsv4); -EXPORT_SYMBOL(nfs4_acl_nfsv4_to_posix); - struct nfs4_acl * nfs4_acl_new(int n) { @@ -819,21 +919,21 @@ nfs4_acl_get_whotype(char *p, u32 len) return NFS4_ACL_WHO_NAMED; } -int -nfs4_acl_write_who(int who, char *p) +__be32 nfs4_acl_write_who(struct xdr_stream *xdr, int who) { + __be32 *p; int i; for (i = 0; i < ARRAY_SIZE(s2t_map); i++) { - if (s2t_map[i].type == who) { - memcpy(p, s2t_map[i].string, s2t_map[i].stringlen); - return s2t_map[i].stringlen; - } + if (s2t_map[i].type != who) + continue; + p = xdr_reserve_space(xdr, s2t_map[i].stringlen + 4); + if (!p) + return nfserr_resource; + p = xdr_encode_opaque(p, s2t_map[i].string, + s2t_map[i].stringlen); + return 0; } - BUG(); + WARN_ON_ONCE(1); return -1; } - -EXPORT_SYMBOL(nfs4_acl_new); -EXPORT_SYMBOL(nfs4_acl_get_whotype); -EXPORT_SYMBOL(nfs4_acl_write_who); diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 99bc85ff021..2c73cae9899 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -32,11 +32,13 @@ */ #include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/xprt.h> #include <linux/sunrpc/svc_xprt.h> #include <linux/slab.h> #include "nfsd.h" #include "state.h" #include "netns.h" +#include "xdr4cb.h" #define NFSDDBG_FACILITY NFSDDBG_PROC @@ -53,30 +55,6 @@ enum { NFSPROC4_CLNT_CB_SEQUENCE, }; -#define NFS4_MAXTAGLEN 20 - -#define NFS4_enc_cb_null_sz 0 -#define NFS4_dec_cb_null_sz 0 -#define cb_compound_enc_hdr_sz 4 -#define cb_compound_dec_hdr_sz (3 + (NFS4_MAXTAGLEN >> 2)) -#define sessionid_sz (NFS4_MAX_SESSIONID_LEN >> 2) -#define cb_sequence_enc_sz (sessionid_sz + 4 + \ - 1 /* no referring calls list yet */) -#define cb_sequence_dec_sz (op_dec_sz + sessionid_sz + 4) - -#define op_enc_sz 1 -#define op_dec_sz 2 -#define enc_nfs4_fh_sz (1 + (NFS4_FHSIZE >> 2)) -#define enc_stateid_sz (NFS4_STATEID_SIZE >> 2) -#define NFS4_enc_cb_recall_sz (cb_compound_enc_hdr_sz + \ - cb_sequence_enc_sz + \ - 1 + enc_stateid_sz + \ - enc_nfs4_fh_sz) - -#define NFS4_dec_cb_recall_sz (cb_compound_dec_hdr_sz + \ - cb_sequence_dec_sz + \ - op_dec_sz) - struct nfs4_cb_compound_hdr { /* args */ u32 ident; /* minorversion 0 only */ @@ -658,11 +636,29 @@ static struct rpc_cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc } } +static struct rpc_clnt *create_backchannel_client(struct rpc_create_args *args) +{ + struct rpc_xprt *xprt; + + if (args->protocol != XPRT_TRANSPORT_BC_TCP) + return rpc_create(args); + + xprt = args->bc_xprt->xpt_bc_xprt; + if (xprt) { + xprt_get(xprt); + return rpc_create_xprt(args, xprt); + } + + return rpc_create(args); +} + static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn, struct nfsd4_session *ses) { + int maxtime = max_cb_time(clp->net); struct rpc_timeout timeparms = { - .to_initval = max_cb_time(clp->net), + .to_initval = maxtime, .to_retries = 0, + .to_maxval = maxtime, }; struct rpc_create_args args = { .net = clp->net, @@ -697,7 +693,7 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c args.authflavor = ses->se_cb_sec.flavor; } /* Create RPC client */ - client = rpc_create(&args); + client = create_backchannel_client(&args); if (IS_ERR(client)) { dprintk("NFSD: couldn't create callback client: %ld\n", PTR_ERR(client)); @@ -817,8 +813,7 @@ static bool nfsd41_cb_get_slot(struct nfs4_client *clp, struct rpc_task *task) static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata) { struct nfsd4_callback *cb = calldata; - struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall); - struct nfs4_client *clp = dp->dl_stid.sc_client; + struct nfs4_client *clp = cb->cb_clp; u32 minorversion = clp->cl_minorversion; cb->cb_minorversion = minorversion; @@ -839,8 +834,7 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata) static void nfsd4_cb_done(struct rpc_task *task, void *calldata) { struct nfsd4_callback *cb = calldata; - struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall); - struct nfs4_client *clp = dp->dl_stid.sc_client; + struct nfs4_client *clp = cb->cb_clp; dprintk("%s: minorversion=%d\n", __func__, clp->cl_minorversion); @@ -863,7 +857,7 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) { struct nfsd4_callback *cb = calldata; struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall); - struct nfs4_client *clp = dp->dl_stid.sc_client; + struct nfs4_client *clp = cb->cb_clp; struct rpc_clnt *current_rpc_client = clp->cl_cb_client; nfsd4_cb_done(task, calldata); diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index a1f10c0a625..a0ab0a847d6 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -65,7 +65,7 @@ MODULE_PARM_DESC(nfs4_disable_idmapping, struct ent { struct cache_head h; int type; /* User / Group */ - uid_t id; + u32 id; char name[IDMAP_NAMESZ]; char authname[IDMAP_NAMESZ]; }; @@ -140,12 +140,6 @@ idtoname_request(struct cache_detail *cd, struct cache_head *ch, char **bpp, } static int -idtoname_upcall(struct cache_detail *cd, struct cache_head *ch) -{ - return sunrpc_cache_pipe_upcall(cd, ch, idtoname_request); -} - -static int idtoname_match(struct cache_head *ca, struct cache_head *cb) { struct ent *a = container_of(ca, struct ent, h); @@ -192,7 +186,7 @@ static struct cache_detail idtoname_cache_template = { .hash_size = ENT_HASHMAX, .name = "nfs4.idtoname", .cache_put = ent_put, - .cache_upcall = idtoname_upcall, + .cache_request = idtoname_request, .cache_parse = idtoname_parse, .cache_show = idtoname_show, .warn_no_listener = warn_no_idmapd, @@ -321,12 +315,6 @@ nametoid_request(struct cache_detail *cd, struct cache_head *ch, char **bpp, } static int -nametoid_upcall(struct cache_detail *cd, struct cache_head *ch) -{ - return sunrpc_cache_pipe_upcall(cd, ch, nametoid_request); -} - -static int nametoid_match(struct cache_head *ca, struct cache_head *cb) { struct ent *a = container_of(ca, struct ent, h); @@ -365,7 +353,7 @@ static struct cache_detail nametoid_cache_template = { .hash_size = ENT_HASHMAX, .name = "nfs4.nametoid", .cache_put = ent_put, - .cache_upcall = nametoid_upcall, + .cache_request = nametoid_request, .cache_parse = nametoid_parse, .cache_show = nametoid_show, .warn_no_listener = warn_no_idmapd, @@ -540,7 +528,7 @@ rqst_authname(struct svc_rqst *rqstp) static __be32 idmap_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, - uid_t *id) + u32 *id) { struct ent *item, key = { .type = type, @@ -563,31 +551,49 @@ idmap_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen return 0; } -static int -idmap_id_to_name(struct svc_rqst *rqstp, int type, uid_t id, char *name) +static __be32 encode_ascii_id(struct xdr_stream *xdr, u32 id) +{ + char buf[11]; + int len; + __be32 *p; + + len = sprintf(buf, "%u", id); + p = xdr_reserve_space(xdr, len + 4); + if (!p) + return nfserr_resource; + p = xdr_encode_opaque(p, buf, len); + return 0; +} + +static __be32 idmap_id_to_name(struct xdr_stream *xdr, + struct svc_rqst *rqstp, int type, u32 id) { struct ent *item, key = { .id = id, .type = type, }; + __be32 *p; int ret; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); strlcpy(key.authname, rqst_authname(rqstp), sizeof(key.authname)); ret = idmap_lookup(rqstp, idtoname_lookup, &key, nn->idtoname_cache, &item); if (ret == -ENOENT) - return sprintf(name, "%u", id); + return encode_ascii_id(xdr, id); if (ret) - return ret; + return nfserrno(ret); ret = strlen(item->name); - BUG_ON(ret > IDMAP_NAMESZ); - memcpy(name, item->name, ret); + WARN_ON_ONCE(ret > IDMAP_NAMESZ); + p = xdr_reserve_space(xdr, ret + 4); + if (!p) + return nfserr_resource; + p = xdr_encode_opaque(p, item->name, ret); cache_put(&item->h, nn->idtoname_cache); - return ret; + return 0; } static bool -numeric_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, uid_t *id) +numeric_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, u32 *id) { int ret; char buf[11]; @@ -603,7 +609,7 @@ numeric_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namel } static __be32 -do_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, uid_t *id) +do_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, u32 *id) { if (nfs4_disable_idmapping && rqstp->rq_cred.cr_flavor < RPC_AUTH_GSS) if (numeric_name_to_id(rqstp, type, name, namelen, id)) @@ -615,36 +621,50 @@ do_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, u return idmap_name_to_id(rqstp, type, name, namelen, id); } -static int -do_id_to_name(struct svc_rqst *rqstp, int type, uid_t id, char *name) +static __be32 encode_name_from_id(struct xdr_stream *xdr, + struct svc_rqst *rqstp, int type, u32 id) { if (nfs4_disable_idmapping && rqstp->rq_cred.cr_flavor < RPC_AUTH_GSS) - return sprintf(name, "%u", id); - return idmap_id_to_name(rqstp, type, id, name); + return encode_ascii_id(xdr, id); + return idmap_id_to_name(xdr, rqstp, type, id); } __be32 nfsd_map_name_to_uid(struct svc_rqst *rqstp, const char *name, size_t namelen, - __u32 *id) + kuid_t *uid) { - return do_name_to_id(rqstp, IDMAP_TYPE_USER, name, namelen, id); + __be32 status; + u32 id = -1; + status = do_name_to_id(rqstp, IDMAP_TYPE_USER, name, namelen, &id); + *uid = make_kuid(&init_user_ns, id); + if (!uid_valid(*uid)) + status = nfserr_badowner; + return status; } __be32 nfsd_map_name_to_gid(struct svc_rqst *rqstp, const char *name, size_t namelen, - __u32 *id) + kgid_t *gid) { - return do_name_to_id(rqstp, IDMAP_TYPE_GROUP, name, namelen, id); + __be32 status; + u32 id = -1; + status = do_name_to_id(rqstp, IDMAP_TYPE_GROUP, name, namelen, &id); + *gid = make_kgid(&init_user_ns, id); + if (!gid_valid(*gid)) + status = nfserr_badowner; + return status; } -int -nfsd_map_uid_to_name(struct svc_rqst *rqstp, __u32 id, char *name) +__be32 nfsd4_encode_user(struct xdr_stream *xdr, struct svc_rqst *rqstp, + kuid_t uid) { - return do_id_to_name(rqstp, IDMAP_TYPE_USER, id, name); + u32 id = from_kuid(&init_user_ns, uid); + return encode_name_from_id(xdr, rqstp, IDMAP_TYPE_USER, id); } -int -nfsd_map_gid_to_name(struct svc_rqst *rqstp, __u32 id, char *name) +__be32 nfsd4_encode_group(struct xdr_stream *xdr, struct svc_rqst *rqstp, + kgid_t gid) { - return do_id_to_name(rqstp, IDMAP_TYPE_GROUP, id, name); + u32 id = from_kgid(&init_user_ns, gid); + return encode_name_from_id(xdr, rqstp, IDMAP_TYPE_GROUP, id); } diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 9d1c5dba2bb..8f029db5d27 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -41,6 +41,37 @@ #include "vfs.h" #include "current_stateid.h" #include "netns.h" +#include "acl.h" + +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL +#include <linux/security.h> + +static inline void +nfsd4_security_inode_setsecctx(struct svc_fh *resfh, struct xdr_netobj *label, u32 *bmval) +{ + struct inode *inode = resfh->fh_dentry->d_inode; + int status; + + mutex_lock(&inode->i_mutex); + status = security_inode_setsecctx(resfh->fh_dentry, + label->data, label->len); + mutex_unlock(&inode->i_mutex); + + if (status) + /* + * XXX: We should really fail the whole open, but we may + * already have created a new file, so it may be too + * late. For now this seems the least of evils: + */ + bmval[2] &= ~FATTR4_WORD2_SECURITY_LABEL; + + return; +} +#else +static inline void +nfsd4_security_inode_setsecctx(struct svc_fh *resfh, struct xdr_netobj *label, u32 *bmval) +{ } +#endif #define NFSDDBG_FACILITY NFSDDBG_PROC @@ -191,17 +222,25 @@ static __be32 nfsd_check_obj_isreg(struct svc_fh *fh) return nfserr_symlink; } +static void nfsd4_set_open_owner_reply_cache(struct nfsd4_compound_state *cstate, struct nfsd4_open *open, struct svc_fh *resfh) +{ + if (nfsd4_has_session(cstate)) + return; + fh_copy_shallow(&open->op_openowner->oo_owner.so_replay.rp_openfh, + &resfh->fh_handle); +} + static __be32 -do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) +do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_open *open, struct svc_fh **resfh) { - struct svc_fh *resfh; + struct svc_fh *current_fh = &cstate->current_fh; int accmode; __be32 status; - resfh = kmalloc(sizeof(struct svc_fh), GFP_KERNEL); - if (!resfh) + *resfh = kmalloc(sizeof(struct svc_fh), GFP_KERNEL); + if (!*resfh) return nfserr_jukebox; - fh_init(resfh, NFS4_FHSIZE); + fh_init(*resfh, NFS4_FHSIZE); open->op_truncate = 0; if (open->op_create) { @@ -226,10 +265,13 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o */ status = do_nfsd_create(rqstp, current_fh, open->op_fname.data, open->op_fname.len, &open->op_iattr, - resfh, open->op_createmode, + *resfh, open->op_createmode, (u32 *)open->op_verf.data, &open->op_truncate, &open->op_created); + if (!status && open->op_label.len) + nfsd4_security_inode_setsecctx(*resfh, &open->op_label, open->op_bmval); + /* * Following rfc 3530 14.2.16, use the returned bitmask * to indicate which attributes we used to store the @@ -238,39 +280,41 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o if (open->op_createmode == NFS4_CREATE_EXCLUSIVE && status == 0) open->op_bmval[1] = (FATTR4_WORD1_TIME_ACCESS | FATTR4_WORD1_TIME_MODIFY); - } else { + } else + /* + * Note this may exit with the parent still locked. + * We will hold the lock until nfsd4_open's final + * lookup, to prevent renames or unlinks until we've had + * a chance to an acquire a delegation if appropriate. + */ status = nfsd_lookup(rqstp, current_fh, - open->op_fname.data, open->op_fname.len, resfh); - fh_unlock(current_fh); - } + open->op_fname.data, open->op_fname.len, *resfh); if (status) goto out; - status = nfsd_check_obj_isreg(resfh); + status = nfsd_check_obj_isreg(*resfh); if (status) goto out; if (is_create_with_attrs(open) && open->op_acl != NULL) - do_set_nfs4_acl(rqstp, resfh, open->op_acl, open->op_bmval); + do_set_nfs4_acl(rqstp, *resfh, open->op_acl, open->op_bmval); - /* set reply cache */ - fh_copy_shallow(&open->op_openowner->oo_owner.so_replay.rp_openfh, - &resfh->fh_handle); + nfsd4_set_open_owner_reply_cache(cstate, open, *resfh); accmode = NFSD_MAY_NOP; - if (open->op_created) + if (open->op_created || + open->op_claim_type == NFS4_OPEN_CLAIM_DELEGATE_CUR) accmode |= NFSD_MAY_OWNER_OVERRIDE; - status = do_open_permission(rqstp, resfh, open, accmode); + status = do_open_permission(rqstp, *resfh, open, accmode); set_change_info(&open->op_cinfo, current_fh); - fh_dup2(current_fh, resfh); out: - fh_put(resfh); - kfree(resfh); return status; } static __be32 -do_open_fhandle(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) +do_open_fhandle(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_open *open) { + struct svc_fh *current_fh = &cstate->current_fh; __be32 status; + int accmode = 0; /* We don't know the target directory, and therefore can not * set the change info @@ -278,15 +322,23 @@ do_open_fhandle(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_ memset(&open->op_cinfo, 0, sizeof(struct nfsd4_change_info)); - /* set replay cache */ - fh_copy_shallow(&open->op_openowner->oo_owner.so_replay.rp_openfh, - ¤t_fh->fh_handle); + nfsd4_set_open_owner_reply_cache(cstate, open, current_fh); open->op_truncate = (open->op_iattr.ia_valid & ATTR_SIZE) && (open->op_iattr.ia_size == 0); + /* + * In the delegation case, the client is telling us about an + * open that it *already* performed locally, some time ago. We + * should let it succeed now if possible. + * + * In the case of a CLAIM_FH open, on the other hand, the client + * may be counting on us to enforce permissions (the Linux 4.1 + * client uses this for normal opens, for example). + */ + if (open->op_claim_type == NFS4_OPEN_CLAIM_DELEG_CUR_FH) + accmode = NFSD_MAY_OWNER_OVERRIDE; - status = do_open_permission(rqstp, current_fh, open, - NFSD_MAY_OWNER_OVERRIDE); + status = do_open_permission(rqstp, current_fh, open, accmode); return status; } @@ -306,6 +358,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_open *open) { __be32 status; + struct svc_fh *resfh = NULL; struct nfsd4_compoundres *resp; struct net *net = SVC_NET(rqstp); struct nfsd_net *nn = net_generic(net, nfsd_net_id); @@ -351,6 +404,10 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, } if (status) goto out; + if (open->op_xdr_error) { + status = open->op_xdr_error; + goto out; + } status = nfsd4_check_open_attributes(rqstp, cstate, open); if (status) @@ -368,28 +425,26 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, switch (open->op_claim_type) { case NFS4_OPEN_CLAIM_DELEGATE_CUR: case NFS4_OPEN_CLAIM_NULL: - status = do_open_lookup(rqstp, &cstate->current_fh, - open); + status = do_open_lookup(rqstp, cstate, open, &resfh); if (status) goto out; break; case NFS4_OPEN_CLAIM_PREVIOUS: - open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; status = nfs4_check_open_reclaim(&open->op_clientid, cstate->minorversion, nn); if (status) goto out; + open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; case NFS4_OPEN_CLAIM_FH: case NFS4_OPEN_CLAIM_DELEG_CUR_FH: - status = do_open_fhandle(rqstp, &cstate->current_fh, - open); + status = do_open_fhandle(rqstp, cstate, open); if (status) goto out; + resfh = &cstate->current_fh; break; case NFS4_OPEN_CLAIM_DELEG_PREV_FH: case NFS4_OPEN_CLAIM_DELEGATE_PREV: - open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; dprintk("NFSD: unsupported OPEN claim type %d\n", open->op_claim_type); status = nfserr_notsupp; @@ -405,18 +460,42 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, * successful, it (1) truncates the file if open->op_truncate was * set, (2) sets open->op_stateid, (3) sets open->op_delegation. */ - status = nfsd4_process_open2(rqstp, &cstate->current_fh, open); + status = nfsd4_process_open2(rqstp, resfh, open); WARN_ON(status && open->op_created); out: + if (resfh && resfh != &cstate->current_fh) { + fh_dup2(&cstate->current_fh, resfh); + fh_put(resfh); + kfree(resfh); + } nfsd4_cleanup_open_state(open, status); - if (open->op_openowner) + if (open->op_openowner && !nfsd4_has_session(cstate)) cstate->replay_owner = &open->op_openowner->oo_owner; - else + nfsd4_bump_seqid(cstate, status); + if (!cstate->replay_owner) nfs4_unlock_state(); return status; } /* + * OPEN is the only seqid-mutating operation whose decoding can fail + * with a seqid-mutating error (specifically, decoding of user names in + * the attributes). Therefore we have to do some processing to look up + * the stateowner so that we can bump the seqid. + */ +static __be32 nfsd4_open_omfg(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_op *op) +{ + struct nfsd4_open *open = (struct nfsd4_open *)&op->u; + + if (!seqid_mutating_err(ntohl(op->status))) + return op->status; + if (nfsd4_has_session(cstate)) + return op->status; + open->op_xdr_error = op->status; + return nfsd4_open(rqstp, cstate, open); +} + +/* * filehandle-manipulating ops. */ static __be32 @@ -538,15 +617,6 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, switch (create->cr_type) { case NF4LNK: - /* ugh! we have to null-terminate the linktext, or - * vfs_symlink() will choke. it is always safe to - * null-terminate by brute force, since at worst we - * will overwrite the first byte of the create namelen - * in the XDR buffer, which has already been extracted - * during XDR decode. - */ - create->cr_linkname[create->cr_linklen] = 0; - status = nfsd_symlink(rqstp, &cstate->current_fh, create->cr_name, create->cr_namelen, create->cr_linkname, create->cr_linklen, @@ -599,6 +669,9 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) goto out; + if (create->cr_label.len) + nfsd4_security_inode_setsecctx(&resfh, &create->cr_label, create->cr_bmval); + if (create->cr_acl != NULL) do_set_nfs4_acl(rqstp, &resfh, create->cr_acl, create->cr_bmval); @@ -703,7 +776,6 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (!nfsd4_last_compound_op(rqstp)) rqstp->rq_splice_ok = false; - nfs4_lock_state(); /* check stateid */ if ((status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), cstate, &read->rd_stateid, @@ -711,11 +783,8 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, dprintk("NFSD: nfsd4_read: couldn't process stateid!\n"); goto out; } - if (read->rd_filp) - get_file(read->rd_filp); status = nfs_ok; out: - nfs4_unlock_state(); read->rd_rqstp = rqstp; read->rd_fhp = &cstate->current_fh; return status; @@ -786,21 +855,11 @@ nfsd4_rename(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfsd_rename(rqstp, &cstate->save_fh, rename->rn_sname, rename->rn_snamelen, &cstate->current_fh, rename->rn_tname, rename->rn_tnamelen); - - /* the underlying filesystem returns different error's than required - * by NFSv4. both save_fh and current_fh have been verified.. */ - if (status == nfserr_isdir) - status = nfserr_exist; - else if ((status == nfserr_notdir) && - (S_ISDIR(cstate->save_fh.fh_dentry->d_inode->i_mode) && - S_ISDIR(cstate->current_fh.fh_dentry->d_inode->i_mode))) - status = nfserr_exist; - - if (!status) { - set_change_info(&rename->rn_sinfo, &cstate->current_fh); - set_change_info(&rename->rn_tinfo, &cstate->save_fh); - } - return status; + if (status) + return status; + set_change_info(&rename->rn_sinfo, &cstate->current_fh); + set_change_info(&rename->rn_tinfo, &cstate->save_fh); + return nfs_ok; } static __be32 @@ -864,10 +923,8 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, int err; if (setattr->sa_iattr.ia_valid & ATTR_SIZE) { - nfs4_lock_state(); status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), cstate, &setattr->sa_stateid, WR_STATE, NULL); - nfs4_unlock_state(); if (status) { dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n"); return status; @@ -888,6 +945,11 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, setattr->sa_acl); if (status) goto out; + if (setattr->sa_label.len) + status = nfsd4_set_nfs4_label(rqstp, &cstate->current_fh, + &setattr->sa_label); + if (status) + goto out; status = nfsd_setattr(rqstp, &cstate->current_fh, &setattr->sa_iattr, 0, (time_t)0); out: @@ -928,13 +990,8 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (write->wr_offset >= OFFSET_MAX) return nfserr_inval; - nfs4_lock_state(); status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), cstate, stateid, WR_STATE, &filp); - if (filp) - get_file(filp); - nfs4_unlock_state(); - if (status) { dprintk("NFSD: nfsd4_write: couldn't process stateid!\n"); return status; @@ -993,14 +1050,17 @@ _nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (!buf) return nfserr_jukebox; - status = nfsd4_encode_fattr(&cstate->current_fh, + p = buf; + status = nfsd4_encode_fattr_to_buf(&p, count, &cstate->current_fh, cstate->current_fh.fh_export, - cstate->current_fh.fh_dentry, buf, - &count, verify->ve_bmval, + cstate->current_fh.fh_dentry, + verify->ve_bmval, rqstp, 0); - - /* this means that nfsd4_encode_fattr() ran out of space */ - if (status == nfserr_resource && count == 0) + /* + * If nfsd4_encode_fattr() ran out of space, assume that's because + * the attributes are longer (hence different) than those given: + */ + if (status == nfserr_resource) status = nfserr_not_same; if (status) goto out_kfree; @@ -1101,9 +1161,7 @@ struct nfsd4_operation { static struct nfsd4_operation nfsd4_ops[]; -#ifdef NFSD_DEBUG static const char *nfsd4_op_name(unsigned opnum); -#endif /* * Enforce NFSv4.1 COMPOUND ordering rules: @@ -1145,6 +1203,8 @@ static inline struct nfsd4_operation *OPDESC(struct nfsd4_op *op) bool nfsd4_cache_this_op(struct nfsd4_op *op) { + if (op->opnum == OP_ILLEGAL) + return false; return OPDESC(op)->op_flags & OP_CACHEME; } @@ -1181,6 +1241,25 @@ static bool need_wrongsec_check(struct svc_rqst *rqstp) return !(nextd->op_flags & OP_HANDLES_WRONGSEC); } +static void svcxdr_init_encode(struct svc_rqst *rqstp, + struct nfsd4_compoundres *resp) +{ + struct xdr_stream *xdr = &resp->xdr; + struct xdr_buf *buf = &rqstp->rq_res; + struct kvec *head = buf->head; + + xdr->buf = buf; + xdr->iov = head; + xdr->p = head->iov_base + head->iov_len; + xdr->end = head->iov_base + PAGE_SIZE - rqstp->rq_auth_slack; + /* Tail and page_len should be zero at this point: */ + buf->len = buf->head[0].iov_len; + xdr->scratch.iov_len = 0; + xdr->page_ptr = buf->pages - 1; + buf->buflen = PAGE_SIZE * (1 + rqstp->rq_page_end - buf->pages) + - rqstp->rq_auth_slack; +} + /* * COMPOUND call. */ @@ -1192,26 +1271,20 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, struct nfsd4_op *op; struct nfsd4_operation *opdesc; struct nfsd4_compound_state *cstate = &resp->cstate; - int slack_bytes; - u32 plen = 0; + struct svc_fh *current_fh = &cstate->current_fh; + struct svc_fh *save_fh = &cstate->save_fh; __be32 status; - resp->xbuf = &rqstp->rq_res; - resp->p = rqstp->rq_res.head[0].iov_base + - rqstp->rq_res.head[0].iov_len; - resp->tagp = resp->p; + svcxdr_init_encode(rqstp, resp); + resp->tagp = resp->xdr.p; /* reserve space for: taglen, tag, and opcnt */ - resp->p += 2 + XDR_QUADLEN(args->taglen); - resp->end = rqstp->rq_res.head[0].iov_base + PAGE_SIZE; + xdr_reserve_space(&resp->xdr, 8 + args->taglen); resp->taglen = args->taglen; resp->tag = args->tag; - resp->opcnt = 0; resp->rqstp = rqstp; - resp->cstate.minorversion = args->minorversion; - resp->cstate.replay_owner = NULL; - resp->cstate.session = NULL; - fh_init(&resp->cstate.current_fh, NFS4_FHSIZE); - fh_init(&resp->cstate.save_fh, NFS4_FHSIZE); + cstate->minorversion = args->minorversion; + fh_init(current_fh, NFS4_FHSIZE); + fh_init(save_fh, NFS4_FHSIZE); /* * Don't use the deferral mechanism for NFSv4; compounds make it * too hard to avoid non-idempotency problems. @@ -1222,7 +1295,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, * According to RFC3010, this takes precedence over all other errors. */ status = nfserr_minor_vers_mismatch; - if (args->minorversion > nfsd_supported_minorversion) + if (nfsd_minorversion(args->minorversion, NFSD_TEST) <= 0) goto out; status = nfs41_check_op_ordering(args); @@ -1243,50 +1316,49 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, * for example, if there is a miscellaneous XDR error * it will be set to nfserr_bad_xdr. */ - if (op->status) - goto encode_op; - - /* We must be able to encode a successful response to - * this operation, with enough room left over to encode a - * failed response to the next operation. If we don't - * have enough room, fail with ERR_RESOURCE. - */ - slack_bytes = (char *)resp->end - (char *)resp->p; - if (slack_bytes < COMPOUND_SLACK_SPACE - + COMPOUND_ERR_SLACK_SPACE) { - BUG_ON(slack_bytes < COMPOUND_ERR_SLACK_SPACE); - op->status = nfserr_resource; + if (op->status) { + if (op->opnum == OP_OPEN) + op->status = nfsd4_open_omfg(rqstp, cstate, op); goto encode_op; } opdesc = OPDESC(op); - if (!cstate->current_fh.fh_dentry) { + if (!current_fh->fh_dentry) { if (!(opdesc->op_flags & ALLOWED_WITHOUT_FH)) { op->status = nfserr_nofilehandle; goto encode_op; } - } else if (cstate->current_fh.fh_export->ex_fslocs.migrated && + } else if (current_fh->fh_export->ex_fslocs.migrated && !(opdesc->op_flags & ALLOWED_ON_ABSENT_FS)) { op->status = nfserr_moved; goto encode_op; } + fh_clear_wcc(current_fh); + /* If op is non-idempotent */ if (opdesc->op_flags & OP_MODIFIES_SOMETHING) { - plen = opdesc->op_rsize_bop(rqstp, op); + /* + * Don't execute this op if we couldn't encode a + * succesful reply: + */ + u32 plen = opdesc->op_rsize_bop(rqstp, op); + /* + * Plus if there's another operation, make sure + * we'll have space to at least encode an error: + */ + if (resp->opcnt < args->opcnt) + plen += COMPOUND_ERR_SLACK_SPACE; op->status = nfsd4_check_resp_size(resp, plen); } if (op->status) goto encode_op; - if (opdesc->op_func) { - if (opdesc->op_get_currentstateid) - opdesc->op_get_currentstateid(cstate, &op->u); - op->status = opdesc->op_func(rqstp, cstate, &op->u); - } else - BUG_ON(op->status == nfs_ok); + if (opdesc->op_get_currentstateid) + opdesc->op_get_currentstateid(cstate, &op->u); + op->status = opdesc->op_func(rqstp, cstate, &op->u); if (!op->status) { if (opdesc->op_set_currentstateid) @@ -1296,19 +1368,19 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, clear_current_stateid(cstate); if (need_wrongsec_check(rqstp)) - op->status = check_nfsd_access(cstate->current_fh.fh_export, rqstp); + op->status = check_nfsd_access(current_fh->fh_export, rqstp); } encode_op: /* Only from SEQUENCE */ - if (resp->cstate.status == nfserr_replay_cache) { + if (cstate->status == nfserr_replay_cache) { dprintk("%s NFS4.1 replay from cache\n", __func__); status = op->status; goto out; } if (op->status == nfserr_replay_me) { op->replay = &cstate->replay_owner->so_replay; - nfsd4_encode_replay(resp, op); + nfsd4_encode_replay(&resp->xdr, op); status = op->status = op->replay->rp_status; } else { nfsd4_encode_operation(resp, op); @@ -1330,10 +1402,10 @@ encode_op: nfsd4_increment_op_stats(op->opnum); } - resp->cstate.status = status; - fh_put(&resp->cstate.current_fh); - fh_put(&resp->cstate.save_fh); - BUG_ON(resp->cstate.replay_owner); + cstate->status = status; + fh_put(current_fh); + fh_put(save_fh); + BUG_ON(cstate->replay_owner); out: /* Reset deferral mechanism for RPC deferrals */ rqstp->rq_usedeferral = 1; @@ -1347,7 +1419,8 @@ out: #define op_encode_change_info_maxsz (5) #define nfs4_fattr_bitmap_maxsz (4) -#define op_encode_lockowner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) +/* We'll fall back on returning no lockowner if run out of space: */ +#define op_encode_lockowner_maxsz (0) #define op_encode_lock_denied_maxsz (8 + op_encode_lockowner_maxsz) #define nfs4_owner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) @@ -1379,6 +1452,49 @@ static inline u32 nfsd4_create_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op + nfs4_fattr_bitmap_maxsz) * sizeof(__be32); } +/* + * Note since this is an idempotent operation we won't insist on failing + * the op prematurely if the estimate is too large. We may turn off splice + * reads unnecessarily. + */ +static inline u32 nfsd4_getattr_rsize(struct svc_rqst *rqstp, + struct nfsd4_op *op) +{ + u32 *bmap = op->u.getattr.ga_bmval; + u32 bmap0 = bmap[0], bmap1 = bmap[1], bmap2 = bmap[2]; + u32 ret = 0; + + if (bmap0 & FATTR4_WORD0_ACL) + return svc_max_payload(rqstp); + if (bmap0 & FATTR4_WORD0_FS_LOCATIONS) + return svc_max_payload(rqstp); + + if (bmap1 & FATTR4_WORD1_OWNER) { + ret += IDMAP_NAMESZ + 4; + bmap1 &= ~FATTR4_WORD1_OWNER; + } + if (bmap1 & FATTR4_WORD1_OWNER_GROUP) { + ret += IDMAP_NAMESZ + 4; + bmap1 &= ~FATTR4_WORD1_OWNER_GROUP; + } + if (bmap0 & FATTR4_WORD0_FILEHANDLE) { + ret += NFS4_FHSIZE + 4; + bmap0 &= ~FATTR4_WORD0_FILEHANDLE; + } + if (bmap2 & FATTR4_WORD2_SECURITY_LABEL) { + ret += NFSD4_MAX_SEC_LABEL_LEN + 12; + bmap2 &= ~FATTR4_WORD2_SECURITY_LABEL; + } + /* + * Largest of remaining attributes are 16 bytes (e.g., + * supported_attributes) + */ + ret += 16 * (hweight32(bmap0) + hweight32(bmap1) + hweight32(bmap2)); + /* bitmask, length */ + ret += 20; + return ret; +} + static inline u32 nfsd4_link_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { return (op_encode_hdr_size + op_encode_change_info_maxsz) @@ -1409,18 +1525,19 @@ static inline u32 nfsd4_read_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) if (rlen > maxcount) rlen = maxcount; - return (op_encode_hdr_size + 2) * sizeof(__be32) + rlen; + return (op_encode_hdr_size + 2 + XDR_QUADLEN(rlen)) * sizeof(__be32); } static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { + u32 maxcount = svc_max_payload(rqstp); u32 rlen = op->u.readdir.rd_maxcount; - if (rlen > PAGE_SIZE) - rlen = PAGE_SIZE; + if (rlen > maxcount) + rlen = maxcount; - return (op_encode_hdr_size + op_encode_verifier_maxsz) - * sizeof(__be32) + rlen; + return (op_encode_hdr_size + op_encode_verifier_maxsz + + XDR_QUADLEN(rlen)) * sizeof(__be32); } static inline u32 nfsd4_remove_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) @@ -1435,6 +1552,12 @@ static inline u32 nfsd4_rename_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op + op_encode_change_info_maxsz) * sizeof(__be32); } +static inline u32 nfsd4_sequence_rsize(struct svc_rqst *rqstp, + struct nfsd4_op *op) +{ + return NFS4_MAX_SESSIONID_LEN + 20; +} + static inline u32 nfsd4_setattr_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { return (op_encode_hdr_size + nfs4_fattr_bitmap_maxsz) * sizeof(__be32); @@ -1442,18 +1565,20 @@ static inline u32 nfsd4_setattr_rsize(struct svc_rqst *rqstp, struct nfsd4_op *o static inline u32 nfsd4_setclientid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { - return (op_encode_hdr_size + 2 + 1024) * sizeof(__be32); + return (op_encode_hdr_size + 2 + XDR_QUADLEN(NFS4_VERIFIER_SIZE)) * + sizeof(__be32); } static inline u32 nfsd4_write_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { - return (op_encode_hdr_size + op_encode_verifier_maxsz) * sizeof(__be32); + return (op_encode_hdr_size + 2 + op_encode_verifier_maxsz) * sizeof(__be32); } static inline u32 nfsd4_exchange_id_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { return (op_encode_hdr_size + 2 + 1 + /* eir_clientid, eir_sequenceid */\ - 1 + 1 + 0 + /* eir_flags, spr_how, SP4_NONE (for now) */\ + 1 + 1 + /* eir_flags, spr_how */\ + 4 + /* spo_must_enforce & _allow with bitmap */\ 2 + /*eir_server_owner.so_minor_id */\ /* eir_server_owner.so_major_id<> */\ XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 +\ @@ -1514,6 +1639,7 @@ static struct nfsd4_operation nfsd4_ops[] = { [OP_GETATTR] = { .op_func = (nfsd4op_func)nfsd4_getattr, .op_flags = ALLOWED_ON_ABSENT_FS, + .op_rsize_bop = nfsd4_getattr_rsize, .op_name = "OP_GETATTR", }, [OP_GETFH] = { @@ -1583,37 +1709,32 @@ static struct nfsd4_operation nfsd4_ops[] = { [OP_PUTFH] = { .op_func = (nfsd4op_func)nfsd4_putfh, .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS - | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING - | OP_CLEAR_STATEID, + | OP_IS_PUTFH_LIKE | OP_CLEAR_STATEID, .op_name = "OP_PUTFH", .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, }, [OP_PUTPUBFH] = { .op_func = (nfsd4op_func)nfsd4_putrootfh, .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS - | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING - | OP_CLEAR_STATEID, + | OP_IS_PUTFH_LIKE | OP_CLEAR_STATEID, .op_name = "OP_PUTPUBFH", .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, }, [OP_PUTROOTFH] = { .op_func = (nfsd4op_func)nfsd4_putrootfh, .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS - | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING - | OP_CLEAR_STATEID, + | OP_IS_PUTFH_LIKE | OP_CLEAR_STATEID, .op_name = "OP_PUTROOTFH", .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, }, [OP_READ] = { .op_func = (nfsd4op_func)nfsd4_read, - .op_flags = OP_MODIFIES_SOMETHING, .op_name = "OP_READ", .op_rsize_bop = (nfsd4op_rsize)nfsd4_read_rsize, .op_get_currentstateid = (stateid_getter)nfsd4_get_readstateid, }, [OP_READDIR] = { .op_func = (nfsd4op_func)nfsd4_readdir, - .op_flags = OP_MODIFIES_SOMETHING, .op_name = "OP_READDIR", .op_rsize_bop = (nfsd4op_rsize)nfsd4_readdir_rsize, }, @@ -1771,14 +1892,33 @@ static struct nfsd4_operation nfsd4_ops[] = { }, }; -#ifdef NFSD_DEBUG +int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + struct nfsd4_operation *opdesc; + nfsd4op_rsize estimator; + + if (op->opnum == OP_ILLEGAL) + return op_encode_hdr_size * sizeof(__be32); + opdesc = OPDESC(op); + estimator = opdesc->op_rsize_bop; + return estimator ? estimator(rqstp, op) : PAGE_SIZE; +} + +void warn_on_nonidempotent_op(struct nfsd4_op *op) +{ + if (OPDESC(op)->op_flags & OP_MODIFIES_SOMETHING) { + pr_err("unable to encode reply to nonidempotent op %d (%s)\n", + op->opnum, nfsd4_op_name(op->opnum)); + WARN_ON_ONCE(1); + } +} + static const char *nfsd4_op_name(unsigned opnum) { if (opnum < ARRAY_SIZE(nfsd4_ops)) return nfsd4_ops[opnum].op_name; return "unknown_operation"; } -#endif #define nfsd4_voidres nfsd4_voidargs struct nfsd4_voidargs { int dummy; }; @@ -1810,6 +1950,7 @@ struct svc_version nfsd_version4 = { .vs_proc = nfsd_procedures4, .vs_dispatch = nfsd_dispatch, .vs_xdrsize = NFS4_SVC_XDRSIZE, + .vs_rpcb_optnl = 1, }; /* diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index ba6fdd4a045..9c271f42604 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -73,8 +73,8 @@ nfs4_save_creds(const struct cred **original_creds) if (!new) return -ENOMEM; - new->fsuid = 0; - new->fsgid = 0; + new->fsuid = GLOBAL_ROOT_UID; + new->fsgid = GLOBAL_ROOT_GID; *original_creds = override_creds(new); put_cred(new); return 0; @@ -146,7 +146,7 @@ out_no_tfm: * then disable recovery tracking. */ static void -legacy_recdir_name_error(int error) +legacy_recdir_name_error(struct nfs4_client *clp, int error) { printk(KERN_ERR "NFSD: unable to generate recoverydir " "name (%d).\n", error); @@ -159,9 +159,7 @@ legacy_recdir_name_error(int error) if (error == -ENOENT) { printk(KERN_ERR "NFSD: disabling legacy clientid tracking. " "Reboot recovery will not function correctly!\n"); - - /* the argument is ignored by the legacy exit function */ - nfsd4_client_tracking_exit(NULL); + nfsd4_client_tracking_exit(clp->net); } } @@ -175,8 +173,6 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) int status; struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); - dprintk("NFSD: nfsd4_create_clid_dir for \"%s\"\n", dname); - if (test_and_set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) return; if (!nn->rec_file) @@ -184,7 +180,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) status = nfs4_make_rec_clidname(dname, &clp->cl_name); if (status) - return legacy_recdir_name_error(status); + return legacy_recdir_name_error(clp, status); status = nfs4_save_creds(&original_cred); if (status < 0) @@ -242,11 +238,16 @@ struct name_list { struct list_head list; }; +struct nfs4_dir_ctx { + struct dir_context ctx; + struct list_head names; +}; + static int nfsd4_build_namelist(void *arg, const char *name, int namlen, loff_t offset, u64 ino, unsigned int d_type) { - struct list_head *names = arg; + struct nfs4_dir_ctx *ctx = arg; struct name_list *entry; if (namlen != HEXDIR_LEN - 1) @@ -256,7 +257,7 @@ nfsd4_build_namelist(void *arg, const char *name, int namlen, return -ENOMEM; memcpy(entry->name, name, HEXDIR_LEN - 1); entry->name[HEXDIR_LEN - 1] = '\0'; - list_add(&entry->list, names); + list_add(&entry->list, &ctx->names); return 0; } @@ -265,7 +266,10 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn) { const struct cred *original_cred; struct dentry *dir = nn->rec_file->f_path.dentry; - LIST_HEAD(names); + struct nfs4_dir_ctx ctx = { + .ctx.actor = nfsd4_build_namelist, + .names = LIST_HEAD_INIT(ctx.names) + }; int status; status = nfs4_save_creds(&original_cred); @@ -278,11 +282,11 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn) return status; } - status = vfs_readdir(nn->rec_file, nfsd4_build_namelist, &names); + status = iterate_dir(nn->rec_file, &ctx.ctx); mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); - while (!list_empty(&names)) { + while (!list_empty(&ctx.names)) { struct name_list *entry; - entry = list_entry(names.next, struct name_list, list); + entry = list_entry(ctx.names.next, struct name_list, list); if (!status) { struct dentry *dentry; dentry = lookup_one_len(entry->name, dir, HEXDIR_LEN-1); @@ -341,7 +345,7 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp) status = nfs4_make_rec_clidname(dname, &clp->cl_name); if (status) - return legacy_recdir_name_error(status); + return legacy_recdir_name_error(clp, status); status = mnt_want_write_file(nn->rec_file); if (status) @@ -381,8 +385,8 @@ purge_old(struct dentry *parent, struct dentry *child, struct nfsd_net *nn) status = vfs_rmdir(parent->d_inode, child); if (status) - printk("failed to remove client recovery directory %s\n", - child->d_name.name); + printk("failed to remove client recovery directory %pd\n", + child); /* Keep trying, success or failure: */ return 0; } @@ -406,15 +410,15 @@ out: nfs4_release_reclaim(nn); if (status) printk("nfsd4: failed to purge old clients from recovery" - " directory %s\n", nn->rec_file->f_path.dentry->d_name.name); + " directory %pD\n", nn->rec_file); } static int load_recdir(struct dentry *parent, struct dentry *child, struct nfsd_net *nn) { if (child->d_name.len != HEXDIR_LEN - 1) { - printk("nfsd4: illegal name %s in recovery directory\n", - child->d_name.name); + printk("nfsd4: illegal name %pd in recovery directory\n", + child); /* Keep trying; maybe the others are OK: */ return 0; } @@ -433,7 +437,7 @@ nfsd4_recdir_load(struct net *net) { status = nfsd4_list_rec_dir(load_recdir, nn); if (status) printk("nfsd4: failed loading clients from recovery" - " directory %s\n", nn->rec_file->f_path.dentry->d_name.name); + " directory %pD\n", nn->rec_file); return status; } @@ -601,7 +605,7 @@ nfsd4_check_legacy_client(struct nfs4_client *clp) status = nfs4_make_rec_clidname(dname, &clp->cl_name); if (status) { - legacy_recdir_name_error(status); + legacy_recdir_name_error(clp, status); return status; } @@ -1185,6 +1189,12 @@ bin_to_hex_dup(const unsigned char *src, int srclen) static int nfsd4_umh_cltrack_init(struct net __attribute__((unused)) *net) { + /* XXX: The usermode helper s not working in container yet. */ + if (net != &init_net) { + WARN(1, KERN_ERR "NFSD: attempt to initialize umh client " + "tracking in a container!\n"); + return -EINVAL; + } return nfsd4_umh_cltrack_upcall("init", NULL, NULL); } diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index ac8ed96c419..2204e1fe572 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -40,8 +40,10 @@ #include <linux/pagemap.h> #include <linux/ratelimit.h> #include <linux/sunrpc/svcauth_gss.h> -#include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/addr.h> +#include <linux/hash.h> #include "xdr4.h" +#include "xdr4cb.h" #include "vfs.h" #include "current_stateid.h" @@ -80,13 +82,13 @@ static DEFINE_MUTEX(client_mutex); * effort to decrease the scope of the client_mutex, this spinlock may * eventually cover more: */ -static DEFINE_SPINLOCK(recall_lock); +static DEFINE_SPINLOCK(state_lock); -static struct kmem_cache *openowner_slab = NULL; -static struct kmem_cache *lockowner_slab = NULL; -static struct kmem_cache *file_slab = NULL; -static struct kmem_cache *stateid_slab = NULL; -static struct kmem_cache *deleg_slab = NULL; +static struct kmem_cache *openowner_slab; +static struct kmem_cache *lockowner_slab; +static struct kmem_cache *file_slab; +static struct kmem_cache *stateid_slab; +static struct kmem_cache *deleg_slab; void nfs4_lock_state(void) @@ -94,17 +96,33 @@ nfs4_lock_state(void) mutex_lock(&client_mutex); } -static void free_session(struct kref *); +static void free_session(struct nfsd4_session *); -/* Must be called under the client_lock */ -static void nfsd4_put_session_locked(struct nfsd4_session *ses) +static bool is_session_dead(struct nfsd4_session *ses) { - kref_put(&ses->se_ref, free_session); + return ses->se_flags & NFS4_SESSION_DEAD; } -static void nfsd4_get_session(struct nfsd4_session *ses) +void nfsd4_put_session(struct nfsd4_session *ses) +{ + if (atomic_dec_and_test(&ses->se_ref) && is_session_dead(ses)) + free_session(ses); +} + +static __be32 mark_session_dead_locked(struct nfsd4_session *ses, int ref_held_by_me) +{ + if (atomic_read(&ses->se_ref) > ref_held_by_me) + return nfserr_jukebox; + ses->se_flags |= NFS4_SESSION_DEAD; + return nfs_ok; +} + +static __be32 nfsd4_get_session_locked(struct nfsd4_session *ses) { - kref_get(&ses->se_ref); + if (is_session_dead(ses)) + return nfserr_badsession; + atomic_inc(&ses->se_ref); + return nfs_ok; } void @@ -113,6 +131,90 @@ nfs4_unlock_state(void) mutex_unlock(&client_mutex); } +static bool is_client_expired(struct nfs4_client *clp) +{ + return clp->cl_time == 0; +} + +static __be32 mark_client_expired_locked(struct nfs4_client *clp) +{ + if (atomic_read(&clp->cl_refcount)) + return nfserr_jukebox; + clp->cl_time = 0; + return nfs_ok; +} + +static __be32 mark_client_expired(struct nfs4_client *clp) +{ + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + __be32 ret; + + spin_lock(&nn->client_lock); + ret = mark_client_expired_locked(clp); + spin_unlock(&nn->client_lock); + return ret; +} + +static __be32 get_client_locked(struct nfs4_client *clp) +{ + if (is_client_expired(clp)) + return nfserr_expired; + atomic_inc(&clp->cl_refcount); + return nfs_ok; +} + +/* must be called under the client_lock */ +static inline void +renew_client_locked(struct nfs4_client *clp) +{ + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + if (is_client_expired(clp)) { + WARN_ON(1); + printk("%s: client (clientid %08x/%08x) already expired\n", + __func__, + clp->cl_clientid.cl_boot, + clp->cl_clientid.cl_id); + return; + } + + dprintk("renewing client (clientid %08x/%08x)\n", + clp->cl_clientid.cl_boot, + clp->cl_clientid.cl_id); + list_move_tail(&clp->cl_lru, &nn->client_lru); + clp->cl_time = get_seconds(); +} + +static inline void +renew_client(struct nfs4_client *clp) +{ + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + spin_lock(&nn->client_lock); + renew_client_locked(clp); + spin_unlock(&nn->client_lock); +} + +static void put_client_renew_locked(struct nfs4_client *clp) +{ + if (!atomic_dec_and_test(&clp->cl_refcount)) + return; + if (!is_client_expired(clp)) + renew_client_locked(clp); +} + +void put_client_renew(struct nfs4_client *clp) +{ + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + if (!atomic_dec_and_lock(&clp->cl_refcount, &nn->client_lock)) + return; + if (!is_client_expired(clp)) + renew_client_locked(clp); + spin_unlock(&nn->client_lock); +} + + static inline u32 opaque_hashval(const void *ptr, int nbytes) { @@ -126,8 +228,6 @@ opaque_hashval(const void *ptr, int nbytes) return x; } -static struct list_head del_recall_lru; - static void nfsd4_free_file(struct nfs4_file *f) { kmem_cache_free(file_slab, f); @@ -136,9 +236,9 @@ static void nfsd4_free_file(struct nfs4_file *f) static inline void put_nfs4_file(struct nfs4_file *fi) { - if (atomic_dec_and_lock(&fi->fi_ref, &recall_lock)) { - list_del(&fi->fi_hash); - spin_unlock(&recall_lock); + if (atomic_dec_and_lock(&fi->fi_ref, &state_lock)) { + hlist_del(&fi->fi_hash); + spin_unlock(&state_lock); iput(fi->fi_inode); nfsd4_free_file(fi); } @@ -151,7 +251,7 @@ get_nfs4_file(struct nfs4_file *fi) } static int num_delegations; -unsigned int max_delegations; +unsigned long max_delegations; /* * Open owner state (share locks) @@ -181,7 +281,7 @@ static unsigned int file_hashval(struct inode *ino) return hash_ptr(ino, FILE_HASH_BITS); } -static struct list_head file_hashtbl[FILE_HASH_SIZE]; +static struct hlist_head file_hashtbl[FILE_HASH_SIZE]; static void __nfs4_file_get_access(struct nfs4_file *fp, int oflag) { @@ -210,13 +310,7 @@ static void __nfs4_file_put_access(struct nfs4_file *fp, int oflag) { if (atomic_dec_and_test(&fp->fi_access[oflag])) { nfs4_file_put_fd(fp, oflag); - /* - * It's also safe to get rid of the RDWR open *if* - * we no longer have need of the other kind of access - * or if we already have the other kind of open: - */ - if (fp->fi_fds[1-oflag] - || atomic_read(&fp->fi_access[1 - oflag]) == 0) + if (atomic_read(&fp->fi_access[1 - oflag]) == 0) nfs4_file_put_fd(fp, O_RDWR); } } @@ -230,21 +324,27 @@ static void nfs4_file_put_access(struct nfs4_file *fp, int oflag) __nfs4_file_put_access(fp, oflag); } -static inline int get_new_stid(struct nfs4_stid *stid) +static struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct +kmem_cache *slab) { - static int min_stateid = 0; - struct idr *stateids = &stid->sc_client->cl_stateids; - int new_stid; - int error; + struct idr *stateids = &cl->cl_stateids; + struct nfs4_stid *stid; + int new_id; + + stid = kmem_cache_alloc(slab, GFP_KERNEL); + if (!stid) + return NULL; + + new_id = idr_alloc_cyclic(stateids, stid, 0, 0, GFP_KERNEL); + if (new_id < 0) + goto out_free; + stid->sc_client = cl; + stid->sc_type = 0; + stid->sc_stateid.si_opaque.so_id = new_id; + stid->sc_stateid.si_opaque.so_clid = cl->cl_clientid; + /* Will be incremented before return to client: */ + stid->sc_stateid.si_generation = 0; - error = idr_get_new_above(stateids, stid, min_stateid, &new_stid); - /* - * Note: the necessary preallocation was done in - * nfs4_alloc_stateid(). The idr code caps the number of - * preallocations that can exist at a time, but the state lock - * prevents anyone from using ours before we get here: - */ - WARN_ON_ONCE(error); /* * It shouldn't be a problem to reuse an opaque stateid value. * I don't think it is for 4.1. But with 4.0 I worry that, for @@ -254,69 +354,103 @@ static inline int get_new_stid(struct nfs4_stid *stid) * amount of time until an id is reused, by ensuring they always * "increase" (mod INT_MAX): */ - - min_stateid = new_stid+1; - if (min_stateid == INT_MAX) - min_stateid = 0; - return new_stid; + return stid; +out_free: + kmem_cache_free(slab, stid); + return NULL; } -static void init_stid(struct nfs4_stid *stid, struct nfs4_client *cl, unsigned char type) +static struct nfs4_ol_stateid * nfs4_alloc_stateid(struct nfs4_client *clp) { - stateid_t *s = &stid->sc_stateid; - int new_id; - - stid->sc_type = type; - stid->sc_client = cl; - s->si_opaque.so_clid = cl->cl_clientid; - new_id = get_new_stid(stid); - s->si_opaque.so_id = (u32)new_id; - /* Will be incremented before return to client: */ - s->si_generation = 0; + return openlockstateid(nfs4_alloc_stid(clp, stateid_slab)); } -static struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab) +/* + * When we recall a delegation, we should be careful not to hand it + * out again straight away. + * To ensure this we keep a pair of bloom filters ('new' and 'old') + * in which the filehandles of recalled delegations are "stored". + * If a filehandle appear in either filter, a delegation is blocked. + * When a delegation is recalled, the filehandle is stored in the "new" + * filter. + * Every 30 seconds we swap the filters and clear the "new" one, + * unless both are empty of course. + * + * Each filter is 256 bits. We hash the filehandle to 32bit and use the + * low 3 bytes as hash-table indices. + * + * 'state_lock', which is always held when block_delegations() is called, + * is used to manage concurrent access. Testing does not need the lock + * except when swapping the two filters. + */ +static struct bloom_pair { + int entries, old_entries; + time_t swap_time; + int new; /* index into 'set' */ + DECLARE_BITMAP(set[2], 256); +} blocked_delegations; + +static int delegation_blocked(struct knfsd_fh *fh) { - struct idr *stateids = &cl->cl_stateids; + u32 hash; + struct bloom_pair *bd = &blocked_delegations; - if (!idr_pre_get(stateids, GFP_KERNEL)) - return NULL; - /* - * Note: if we fail here (or any time between now and the time - * we actually get the new idr), we won't need to undo the idr - * preallocation, since the idr code caps the number of - * preallocated entries. - */ - return kmem_cache_alloc(slab, GFP_KERNEL); + if (bd->entries == 0) + return 0; + if (seconds_since_boot() - bd->swap_time > 30) { + spin_lock(&state_lock); + if (seconds_since_boot() - bd->swap_time > 30) { + bd->entries -= bd->old_entries; + bd->old_entries = bd->entries; + memset(bd->set[bd->new], 0, + sizeof(bd->set[0])); + bd->new = 1-bd->new; + bd->swap_time = seconds_since_boot(); + } + spin_unlock(&state_lock); + } + hash = arch_fast_hash(&fh->fh_base, fh->fh_size, 0); + if (test_bit(hash&255, bd->set[0]) && + test_bit((hash>>8)&255, bd->set[0]) && + test_bit((hash>>16)&255, bd->set[0])) + return 1; + + if (test_bit(hash&255, bd->set[1]) && + test_bit((hash>>8)&255, bd->set[1]) && + test_bit((hash>>16)&255, bd->set[1])) + return 1; + + return 0; } -static struct nfs4_ol_stateid * nfs4_alloc_stateid(struct nfs4_client *clp) +static void block_delegations(struct knfsd_fh *fh) { - return openlockstateid(nfs4_alloc_stid(clp, stateid_slab)); + u32 hash; + struct bloom_pair *bd = &blocked_delegations; + + hash = arch_fast_hash(&fh->fh_base, fh->fh_size, 0); + + __set_bit(hash&255, bd->set[bd->new]); + __set_bit((hash>>8)&255, bd->set[bd->new]); + __set_bit((hash>>16)&255, bd->set[bd->new]); + if (bd->entries == 0) + bd->swap_time = seconds_since_boot(); + bd->entries += 1; } static struct nfs4_delegation * -alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct svc_fh *current_fh, u32 type) +alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct svc_fh *current_fh) { struct nfs4_delegation *dp; - struct nfs4_file *fp = stp->st_file; dprintk("NFSD alloc_init_deleg\n"); - /* - * Major work on the lease subsystem (for example, to support - * calbacks on stat) will be required before we can support - * write delegations properly. - */ - if (type != NFS4_OPEN_DELEGATE_READ) - return NULL; - if (fp->fi_had_conflict) - return NULL; if (num_delegations > max_delegations) return NULL; + if (delegation_blocked(¤t_fh->fh_handle)) + return NULL; dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab)); if (dp == NULL) return dp; - init_stid(&dp->dl_stid, clp, NFS4_DELEG_STID); /* * delegation seqid's are never incremented. The 4.1 special * meaning of seqid 0 isn't meaningful, really, but let's avoid @@ -327,9 +461,8 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct sv INIT_LIST_HEAD(&dp->dl_perfile); INIT_LIST_HEAD(&dp->dl_perclnt); INIT_LIST_HEAD(&dp->dl_recall_lru); - get_nfs4_file(fp); - dp->dl_file = fp; - dp->dl_type = type; + dp->dl_file = NULL; + dp->dl_type = NFS4_OPEN_DELEGATE_READ; fh_copy_shallow(&dp->dl_fh, ¤t_fh->fh_handle); dp->dl_time = 0; atomic_set(&dp->dl_count, 1); @@ -337,19 +470,31 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct sv return dp; } +static void remove_stid(struct nfs4_stid *s) +{ + struct idr *stateids = &s->sc_client->cl_stateids; + + idr_remove(stateids, s->sc_stateid.si_opaque.so_id); +} + +static void nfs4_free_stid(struct kmem_cache *slab, struct nfs4_stid *s) +{ + kmem_cache_free(slab, s); +} + void nfs4_put_delegation(struct nfs4_delegation *dp) { if (atomic_dec_and_test(&dp->dl_count)) { - dprintk("NFSD: freeing dp %p\n",dp); - put_nfs4_file(dp->dl_file); - kmem_cache_free(deleg_slab, dp); + nfs4_free_stid(deleg_slab, &dp->dl_stid); num_delegations--; } } static void nfs4_put_deleg_lease(struct nfs4_file *fp) { + if (!fp->fi_lease) + return; if (atomic_dec_and_test(&fp->fi_delegees)) { vfs_setlease(fp->fi_deleg_file, F_UNLCK, &fp->fi_lease); fp->fi_lease = NULL; @@ -360,25 +505,64 @@ static void nfs4_put_deleg_lease(struct nfs4_file *fp) static void unhash_stid(struct nfs4_stid *s) { - struct idr *stateids = &s->sc_client->cl_stateids; + s->sc_type = 0; +} - idr_remove(stateids, s->sc_stateid.si_opaque.so_id); +static void +hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp) +{ + lockdep_assert_held(&state_lock); + + dp->dl_stid.sc_type = NFS4_DELEG_STID; + list_add(&dp->dl_perfile, &fp->fi_delegations); + list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations); } /* Called under the state lock. */ static void unhash_delegation(struct nfs4_delegation *dp) { - unhash_stid(&dp->dl_stid); + spin_lock(&state_lock); list_del_init(&dp->dl_perclnt); - spin_lock(&recall_lock); list_del_init(&dp->dl_perfile); list_del_init(&dp->dl_recall_lru); - spin_unlock(&recall_lock); - nfs4_put_deleg_lease(dp->dl_file); + spin_unlock(&state_lock); + if (dp->dl_file) { + nfs4_put_deleg_lease(dp->dl_file); + put_nfs4_file(dp->dl_file); + dp->dl_file = NULL; + } +} + + + +static void destroy_revoked_delegation(struct nfs4_delegation *dp) +{ + list_del_init(&dp->dl_recall_lru); + remove_stid(&dp->dl_stid); + nfs4_put_delegation(dp); +} + +static void destroy_delegation(struct nfs4_delegation *dp) +{ + unhash_delegation(dp); + remove_stid(&dp->dl_stid); nfs4_put_delegation(dp); } +static void revoke_delegation(struct nfs4_delegation *dp) +{ + struct nfs4_client *clp = dp->dl_stid.sc_client; + + if (clp->cl_minorversion == 0) + destroy_delegation(dp); + else { + unhash_delegation(dp); + dp->dl_stid.sc_type = NFS4_REVOKED_DELEG_STID; + list_add(&dp->dl_recall_lru, &clp->cl_revoked); + } +} + /* * SETCLIENTID state */ @@ -519,7 +703,8 @@ static void close_generic_stateid(struct nfs4_ol_stateid *stp) static void free_generic_stateid(struct nfs4_ol_stateid *stp) { - kmem_cache_free(stateid_slab, stp); + remove_stid(&stp->st_stid); + nfs4_free_stid(stateid_slab, &stp->st_stid); } static void release_lock_stateid(struct nfs4_ol_stateid *stp) @@ -549,6 +734,12 @@ static void unhash_lockowner(struct nfs4_lockowner *lo) } } +static void nfs4_free_lockowner(struct nfs4_lockowner *lo) +{ + kfree(lo->lo_owner.so_owner.data); + kmem_cache_free(lockowner_slab, lo); +} + static void release_lockowner(struct nfs4_lockowner *lo) { unhash_lockowner(lo); @@ -577,7 +768,6 @@ static void unhash_open_stateid(struct nfs4_ol_stateid *stp) static void release_open_stateid(struct nfs4_ol_stateid *stp) { unhash_open_stateid(stp); - unhash_stid(&stp->st_stid); free_generic_stateid(stp); } @@ -599,12 +789,17 @@ static void release_last_closed_stateid(struct nfs4_openowner *oo) struct nfs4_ol_stateid *s = oo->oo_last_closed_stid; if (s) { - unhash_stid(&s->st_stid); free_generic_stateid(s); oo->oo_last_closed_stid = NULL; } } +static void nfs4_free_openowner(struct nfs4_openowner *oo) +{ + kfree(oo->oo_owner.so_owner.data); + kmem_cache_free(openowner_slab, oo); +} + static void release_openowner(struct nfs4_openowner *oo) { unhash_openowner(oo); @@ -635,6 +830,28 @@ dump_sessionid(const char *fn, struct nfs4_sessionid *sessionid) } #endif +/* + * Bump the seqid on cstate->replay_owner, and clear replay_owner if it + * won't be used for replay. + */ +void nfsd4_bump_seqid(struct nfsd4_compound_state *cstate, __be32 nfserr) +{ + struct nfs4_stateowner *so = cstate->replay_owner; + + if (nfserr == nfserr_replay_me) + return; + + if (!seqid_mutating_err(ntohl(nfserr))) { + cstate->replay_owner = NULL; + return; + } + if (!so) + return; + if (so->so_is_open_owner) + release_last_closed_stateid(openowner(so)); + so->so_seqid++; + return; +} static void gen_sessionid(struct nfsd4_session *ses) @@ -675,17 +892,15 @@ free_session_slots(struct nfsd4_session *ses) * We don't actually need to cache the rpc and session headers, so we * can allocate a little less for each slot: */ -static inline int slot_bytes(struct nfsd4_channel_attrs *ca) +static inline u32 slot_bytes(struct nfsd4_channel_attrs *ca) { - return ca->maxresp_cached - NFSD_MIN_HDR_SEQ_SZ; -} + u32 size; -static int nfsd4_sanitize_slot_size(u32 size) -{ - size -= NFSD_MIN_HDR_SEQ_SZ; /* We don't cache the rpc header */ - size = min_t(u32, size, NFSD_SLOT_CACHE_SIZE); - - return size; + if (ca->maxresp_cached < NFSD_MIN_HDR_SEQ_SZ) + size = 0; + else + size = ca->maxresp_cached - NFSD_MIN_HDR_SEQ_SZ; + return size + sizeof(struct nfsd4_slot); } /* @@ -693,15 +908,15 @@ static int nfsd4_sanitize_slot_size(u32 size) * re-negotiate active sessions and reduce their slot usage to make * room for new connections. For now we just fail the create session. */ -static int nfsd4_get_drc_mem(int slotsize, u32 num) +static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca) { + u32 slotsize = slot_bytes(ca); + u32 num = ca->maxreqs; int avail; - num = min_t(u32, num, NFSD_MAX_SLOTS_PER_SESSION); - spin_lock(&nfsd_drc_lock); - avail = min_t(int, NFSD_MAX_MEM_PER_SESSION, - nfsd_drc_max_mem - nfsd_drc_mem_used); + avail = min((unsigned long)NFSD_MAX_MEM_PER_SESSION, + nfsd_drc_max_mem - nfsd_drc_mem_used); num = min_t(int, num, avail / slotsize); nfsd_drc_mem_used += num * slotsize; spin_unlock(&nfsd_drc_lock); @@ -709,15 +924,20 @@ static int nfsd4_get_drc_mem(int slotsize, u32 num) return num; } -static void nfsd4_put_drc_mem(int slotsize, int num) +static void nfsd4_put_drc_mem(struct nfsd4_channel_attrs *ca) { + int slotsize = slot_bytes(ca); + spin_lock(&nfsd_drc_lock); - nfsd_drc_mem_used -= slotsize * num; + nfsd_drc_mem_used -= slotsize * ca->maxreqs; spin_unlock(&nfsd_drc_lock); } -static struct nfsd4_session *__alloc_session(int slotsize, int numslots) +static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs, + struct nfsd4_channel_attrs *battrs) { + int numslots = fattrs->maxreqs; + int slotsize = slot_bytes(fattrs); struct nfsd4_session *new; int mem, i; @@ -730,11 +950,14 @@ static struct nfsd4_session *__alloc_session(int slotsize, int numslots) return NULL; /* allocate each struct nfsd4_slot and data cache in one piece */ for (i = 0; i < numslots; i++) { - mem = sizeof(struct nfsd4_slot) + slotsize; - new->se_slots[i] = kzalloc(mem, GFP_KERNEL); + new->se_slots[i] = kzalloc(slotsize, GFP_KERNEL); if (!new->se_slots[i]) goto out_free; } + + memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs)); + memcpy(&new->se_bchannel, battrs, sizeof(struct nfsd4_channel_attrs)); + return new; out_free: while (i--) @@ -743,21 +966,6 @@ out_free: return NULL; } -static void init_forechannel_attrs(struct nfsd4_channel_attrs *new, - struct nfsd4_channel_attrs *req, - int numslots, int slotsize, - struct nfsd_net *nn) -{ - u32 maxrpc = nn->nfsd_serv->sv_max_mesg; - - new->maxreqs = numslots; - new->maxresp_cached = min_t(u32, req->maxresp_cached, - slotsize + NFSD_MIN_HDR_SEQ_SZ); - new->maxreq_sz = min_t(u32, req->maxreq_sz, maxrpc); - new->maxresp_sz = min_t(u32, req->maxresp_sz, maxrpc); - new->maxops = min_t(u32, req->maxops, NFSD_MAX_OPS_PER_COMPOUND); -} - static void free_conn(struct nfsd4_conn *c) { svc_xprt_put(c->cn_xprt); @@ -774,8 +982,8 @@ static void nfsd4_conn_lost(struct svc_xpt_user *u) list_del(&c->cn_persession); free_conn(c); } - spin_unlock(&clp->cl_lock); nfsd4_probe_callback(clp); + spin_unlock(&clp->cl_lock); } static struct nfsd4_conn *alloc_conn(struct svc_rqst *rqstp, u32 flags) @@ -859,59 +1067,20 @@ static void nfsd4_del_conns(struct nfsd4_session *s) static void __free_session(struct nfsd4_session *ses) { - nfsd4_put_drc_mem(slot_bytes(&ses->se_fchannel), ses->se_fchannel.maxreqs); free_session_slots(ses); kfree(ses); } -static void free_session(struct kref *kref) +static void free_session(struct nfsd4_session *ses) { - struct nfsd4_session *ses; - struct nfsd_net *nn; - - ses = container_of(kref, struct nfsd4_session, se_ref); - nn = net_generic(ses->se_client->net, nfsd_net_id); + struct nfsd_net *nn = net_generic(ses->se_client->net, nfsd_net_id); lockdep_assert_held(&nn->client_lock); nfsd4_del_conns(ses); + nfsd4_put_drc_mem(&ses->se_fchannel); __free_session(ses); } -void nfsd4_put_session(struct nfsd4_session *ses) -{ - struct nfsd_net *nn = net_generic(ses->se_client->net, nfsd_net_id); - - spin_lock(&nn->client_lock); - nfsd4_put_session_locked(ses); - spin_unlock(&nn->client_lock); -} - -static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fchan, - struct nfsd_net *nn) -{ - struct nfsd4_session *new; - int numslots, slotsize; - /* - * Note decreasing slot size below client's request may - * make it difficult for client to function correctly, whereas - * decreasing the number of slots will (just?) affect - * performance. When short on memory we therefore prefer to - * decrease number of slots instead of their size. - */ - slotsize = nfsd4_sanitize_slot_size(fchan->maxresp_cached); - numslots = nfsd4_get_drc_mem(slotsize, fchan->maxreqs); - if (numslots < 1) - return NULL; - - new = __alloc_session(slotsize, numslots); - if (!new) { - nfsd4_put_drc_mem(slotsize, fchan->maxreqs); - return NULL; - } - init_forechannel_attrs(&new->se_fchannel, fchan, numslots, slotsize, nn); - return new; -} - static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, struct nfs4_client *clp, struct nfsd4_create_session *cses) { int idx; @@ -926,7 +1095,7 @@ static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, stru new->se_flags = cses->flags; new->se_cb_prog = cses->callback_prog; new->se_cb_sec = cses->cb_sec; - kref_init(&new->se_ref); + atomic_set(&new->se_ref, 0); idx = hash_sessionid(&new->se_sessionid); spin_lock(&nn->client_lock); list_add(&new->se_hash, &nn->sessionid_hashtbl[idx]); @@ -981,38 +1150,6 @@ unhash_session(struct nfsd4_session *ses) spin_unlock(&ses->se_client->cl_lock); } -/* must be called under the client_lock */ -static inline void -renew_client_locked(struct nfs4_client *clp) -{ - struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); - - if (is_client_expired(clp)) { - WARN_ON(1); - printk("%s: client (clientid %08x/%08x) already expired\n", - __func__, - clp->cl_clientid.cl_boot, - clp->cl_clientid.cl_id); - return; - } - - dprintk("renewing client (clientid %08x/%08x)\n", - clp->cl_clientid.cl_boot, - clp->cl_clientid.cl_id); - list_move_tail(&clp->cl_lru, &nn->client_lru); - clp->cl_time = get_seconds(); -} - -static inline void -renew_client(struct nfs4_client *clp) -{ - struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); - - spin_lock(&nn->client_lock); - renew_client_locked(clp); - spin_unlock(&nn->client_lock); -} - /* SETCLIENTID and SETCLIENTID_CONFIRM Helper functions */ static int STALE_CLIENTID(clientid_t *clid, struct nfsd_net *nn) @@ -1042,13 +1179,25 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name) return NULL; } clp->cl_name.len = name.len; + INIT_LIST_HEAD(&clp->cl_sessions); + idr_init(&clp->cl_stateids); + atomic_set(&clp->cl_refcount, 0); + clp->cl_cb_state = NFSD4_CB_UNKNOWN; + INIT_LIST_HEAD(&clp->cl_idhash); + INIT_LIST_HEAD(&clp->cl_openowners); + INIT_LIST_HEAD(&clp->cl_delegations); + INIT_LIST_HEAD(&clp->cl_lru); + INIT_LIST_HEAD(&clp->cl_callbacks); + INIT_LIST_HEAD(&clp->cl_revoked); + spin_lock_init(&clp->cl_lock); + rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table"); return clp; } -static inline void +static void free_client(struct nfs4_client *clp) { - struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + struct nfsd_net __maybe_unused *nn = net_generic(clp->net, nfsd_net_id); lockdep_assert_held(&nn->client_lock); while (!list_empty(&clp->cl_sessions)) { @@ -1056,36 +1205,22 @@ free_client(struct nfs4_client *clp) ses = list_entry(clp->cl_sessions.next, struct nfsd4_session, se_perclnt); list_del(&ses->se_perclnt); - nfsd4_put_session_locked(ses); + WARN_ON_ONCE(atomic_read(&ses->se_ref)); + free_session(ses); } + rpc_destroy_wait_queue(&clp->cl_cb_waitq); free_svc_cred(&clp->cl_cred); kfree(clp->cl_name.data); + idr_destroy(&clp->cl_stateids); kfree(clp); } -void -release_session_client(struct nfsd4_session *session) -{ - struct nfs4_client *clp = session->se_client; - struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); - - if (!atomic_dec_and_lock(&clp->cl_refcount, &nn->client_lock)) - return; - if (is_client_expired(clp)) { - free_client(clp); - session->se_client = NULL; - } else - renew_client_locked(clp); - spin_unlock(&nn->client_lock); -} - /* must be called under the client_lock */ static inline void unhash_client_locked(struct nfs4_client *clp) { struct nfsd4_session *ses; - mark_client_expired(clp); list_del(&clp->cl_lru); spin_lock(&clp->cl_lock); list_for_each_entry(ses, &clp->cl_sessions, se_perclnt) @@ -1102,16 +1237,21 @@ destroy_client(struct nfs4_client *clp) struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); INIT_LIST_HEAD(&reaplist); - spin_lock(&recall_lock); + spin_lock(&state_lock); while (!list_empty(&clp->cl_delegations)) { dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt); list_del_init(&dp->dl_perclnt); list_move(&dp->dl_recall_lru, &reaplist); } - spin_unlock(&recall_lock); + spin_unlock(&state_lock); while (!list_empty(&reaplist)) { dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru); - unhash_delegation(dp); + destroy_delegation(dp); + } + list_splice_init(&clp->cl_revoked, &reaplist); + while (!list_empty(&reaplist)) { + dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru); + destroy_revoked_delegation(dp); } while (!list_empty(&clp->cl_openowners)) { oo = list_entry(clp->cl_openowners.next, struct nfs4_openowner, oo_perclient); @@ -1127,8 +1267,8 @@ destroy_client(struct nfs4_client *clp) rb_erase(&clp->cl_namenode, &nn->unconf_name_tree); spin_lock(&nn->client_lock); unhash_client_locked(clp); - if (atomic_read(&clp->cl_refcount) == 0) - free_client(clp); + WARN_ON_ONCE(atomic_read(&clp->cl_refcount)); + free_client(clp); spin_unlock(&nn->client_lock); } @@ -1164,6 +1304,9 @@ static int copy_cred(struct svc_cred *target, struct svc_cred *source) target->cr_gid = source->cr_gid; target->cr_group_info = source->cr_group_info; get_group_info(target->cr_group_info); + target->cr_gss_mech = source->cr_gss_mech; + if (source->cr_gss_mech) + gss_mech_get(source->cr_gss_mech); return 0; } @@ -1202,7 +1345,7 @@ static bool groups_equal(struct group_info *g1, struct group_info *g2) if (g1->ngroups != g2->ngroups) return false; for (i=0; i<g1->ngroups; i++) - if (GROUP_AT(g1, i) != GROUP_AT(g2, i)) + if (!gid_eq(GROUP_AT(g1, i), GROUP_AT(g2, i))) return false; return true; } @@ -1227,8 +1370,8 @@ static bool same_creds(struct svc_cred *cr1, struct svc_cred *cr2) { if ((is_gss_cred(cr1) != is_gss_cred(cr2)) - || (cr1->cr_uid != cr2->cr_uid) - || (cr1->cr_gid != cr2->cr_gid) + || (!uid_eq(cr1->cr_uid, cr2->cr_uid)) + || (!gid_eq(cr1->cr_gid, cr2->cr_gid)) || !groups_equal(cr1->cr_group_info, cr2->cr_group_info)) return false; if (cr1->cr_principal == cr2->cr_principal) @@ -1238,6 +1381,33 @@ same_creds(struct svc_cred *cr1, struct svc_cred *cr2) return 0 == strcmp(cr1->cr_principal, cr2->cr_principal); } +static bool svc_rqst_integrity_protected(struct svc_rqst *rqstp) +{ + struct svc_cred *cr = &rqstp->rq_cred; + u32 service; + + if (!cr->cr_gss_mech) + return false; + service = gss_pseudoflavor_to_service(cr->cr_gss_mech, cr->cr_flavor); + return service == RPC_GSS_SVC_INTEGRITY || + service == RPC_GSS_SVC_PRIVACY; +} + +static bool mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp) +{ + struct svc_cred *cr = &rqstp->rq_cred; + + if (!cl->cl_mach_cred) + return true; + if (cl->cl_cred.cr_gss_mech != cr->cr_gss_mech) + return false; + if (!svc_rqst_integrity_protected(rqstp)) + return false; + if (!cr->cr_principal) + return false; + return 0 == strcmp(cl->cl_cred.cr_principal, cr->cr_principal); +} + static void gen_clid(struct nfs4_client *clp, struct nfsd_net *nn) { static u32 current_clientid = 1; @@ -1258,7 +1428,12 @@ static void gen_confirm(struct nfs4_client *clp) static struct nfs4_stid *find_stateid(struct nfs4_client *cl, stateid_t *t) { - return idr_find(&cl->cl_stateids, t->si_opaque.so_id); + struct nfs4_stid *ret; + + ret = idr_find(&cl->cl_stateids, t->si_opaque.so_id); + if (!ret || !ret->sc_type) + return NULL; + return ret; } static struct nfs4_stid *find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask) @@ -1286,7 +1461,6 @@ static struct nfs4_client *create_client(struct xdr_netobj name, if (clp == NULL) return NULL; - INIT_LIST_HEAD(&clp->cl_sessions); ret = copy_cred(&clp->cl_cred, &rqstp->rq_cred); if (ret) { spin_lock(&nn->client_lock); @@ -1294,19 +1468,9 @@ static struct nfs4_client *create_client(struct xdr_netobj name, spin_unlock(&nn->client_lock); return NULL; } - idr_init(&clp->cl_stateids); - atomic_set(&clp->cl_refcount, 0); - clp->cl_cb_state = NFSD4_CB_UNKNOWN; - INIT_LIST_HEAD(&clp->cl_idhash); - INIT_LIST_HEAD(&clp->cl_openowners); - INIT_LIST_HEAD(&clp->cl_delegations); - INIT_LIST_HEAD(&clp->cl_lru); - INIT_LIST_HEAD(&clp->cl_callbacks); - spin_lock_init(&clp->cl_lock); nfsd4_init_callback(&clp->cl_cb_null); clp->cl_time = get_seconds(); clear_bit(0, &clp->cl_cb_slot_busy); - rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table"); copy_verf(clp, verf); rpc_copy_addr((struct sockaddr *) &clp->cl_addr, sa); gen_confirm(clp); @@ -1383,12 +1547,12 @@ move_to_confirmed(struct nfs4_client *clp) } static struct nfs4_client * -find_confirmed_client(clientid_t *clid, bool sessions, struct nfsd_net *nn) +find_client_in_id_table(struct list_head *tbl, clientid_t *clid, bool sessions) { struct nfs4_client *clp; unsigned int idhashval = clientid_hashval(clid->cl_id); - list_for_each_entry(clp, &nn->conf_id_hashtbl[idhashval], cl_idhash) { + list_for_each_entry(clp, &tbl[idhashval], cl_idhash) { if (same_clid(&clp->cl_clientid, clid)) { if ((bool)clp->cl_minorversion != sessions) return NULL; @@ -1400,19 +1564,19 @@ find_confirmed_client(clientid_t *clid, bool sessions, struct nfsd_net *nn) } static struct nfs4_client * +find_confirmed_client(clientid_t *clid, bool sessions, struct nfsd_net *nn) +{ + struct list_head *tbl = nn->conf_id_hashtbl; + + return find_client_in_id_table(tbl, clid, sessions); +} + +static struct nfs4_client * find_unconfirmed_client(clientid_t *clid, bool sessions, struct nfsd_net *nn) { - struct nfs4_client *clp; - unsigned int idhashval = clientid_hashval(clid->cl_id); + struct list_head *tbl = nn->unconf_id_hashtbl; - list_for_each_entry(clp, &nn->unconf_id_hashtbl[idhashval], cl_idhash) { - if (same_clid(&clp->cl_clientid, clid)) { - if ((bool)clp->cl_minorversion != sessions) - return NULL; - return clp; - } - } - return NULL; + return find_client_in_id_table(tbl, clid, sessions); } static bool clp_used_exchangeid(struct nfs4_client *clp) @@ -1476,11 +1640,12 @@ out_err: } /* - * Cache a reply. nfsd4_check_drc_limit() has bounded the cache size. + * Cache a reply. nfsd4_check_resp_size() has bounded the cache size. */ void nfsd4_store_cache_entry(struct nfsd4_compoundres *resp) { + struct xdr_buf *buf = resp->xdr.buf; struct nfsd4_slot *slot = resp->cstate.slot; unsigned int base; @@ -1494,11 +1659,9 @@ nfsd4_store_cache_entry(struct nfsd4_compoundres *resp) slot->sl_datalen = 0; return; } - slot->sl_datalen = (char *)resp->p - (char *)resp->cstate.datap; - base = (char *)resp->cstate.datap - - (char *)resp->xbuf->head[0].iov_base; - if (read_bytes_from_xdr_buf(resp->xbuf, base, slot->sl_data, - slot->sl_datalen)) + base = resp->cstate.data_offset; + slot->sl_datalen = buf->len - base; + if (read_bytes_from_xdr_buf(buf, base, slot->sl_data, slot->sl_datalen)) WARN("%s: sessions DRC could not cache compound\n", __func__); return; } @@ -1534,28 +1697,31 @@ nfsd4_enc_sequence_replay(struct nfsd4_compoundargs *args, * The sequence operation is not cached because we can use the slot and * session values. */ -__be32 +static __be32 nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp, struct nfsd4_sequence *seq) { struct nfsd4_slot *slot = resp->cstate.slot; + struct xdr_stream *xdr = &resp->xdr; + __be32 *p; __be32 status; dprintk("--> %s slot %p\n", __func__, slot); - /* Either returns 0 or nfserr_retry_uncached */ status = nfsd4_enc_sequence_replay(resp->rqstp->rq_argp, resp); - if (status == nfserr_retry_uncached_rep) + if (status) return status; - /* The sequence operation has been encoded, cstate->datap set. */ - memcpy(resp->cstate.datap, slot->sl_data, slot->sl_datalen); + p = xdr_reserve_space(xdr, slot->sl_datalen); + if (!p) { + WARN_ON_ONCE(1); + return nfserr_serverfault; + } + xdr_encode_opaque_fixed(p, slot->sl_data, slot->sl_datalen); + xdr_commit_encode(xdr); resp->opcnt = slot->sl_opcnt; - resp->p = resp->cstate.datap + XDR_QUADLEN(slot->sl_datalen); - status = slot->sl_status; - - return status; + return slot->sl_status; } /* @@ -1609,15 +1775,16 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, if (exid->flags & ~EXCHGID4_FLAG_MASK_A) return nfserr_inval; - /* Currently only support SP4_NONE */ switch (exid->spa_how) { + case SP4_MACH_CRED: + if (!svc_rqst_integrity_protected(rqstp)) + return nfserr_inval; case SP4_NONE: break; default: /* checked by xdr code */ WARN_ON_ONCE(1); case SP4_SSV: - case SP4_MACH_CRED: - return nfserr_serverfault; /* no excuse :-/ */ + return nfserr_encr_alg_unsupp; } /* Cases below refer to rfc 5661 section 18.35.4: */ @@ -1632,6 +1799,10 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, status = nfserr_inval; goto out; } + if (!mach_creds_match(conf, rqstp)) { + status = nfserr_wrong_cred; + goto out; + } if (!creds_match) { /* case 9 */ status = nfserr_perm; goto out; @@ -1678,7 +1849,8 @@ out_new: status = nfserr_jukebox; goto out; } - new->cl_minorversion = 1; + new->cl_minorversion = cstate->minorversion; + new->cl_mach_cred = (exid->spa_how == SP4_MACH_CRED); gen_clid(new, nn); add_to_unconfirmed(new); @@ -1757,10 +1929,78 @@ nfsd4_replay_create_session(struct nfsd4_create_session *cr_ses, /* seqid, slotID, slotID, slotID, status */ \ 5 ) * sizeof(__be32)) -static bool check_forechannel_attrs(struct nfsd4_channel_attrs fchannel) +static __be32 check_forechannel_attrs(struct nfsd4_channel_attrs *ca, struct nfsd_net *nn) +{ + u32 maxrpc = nn->nfsd_serv->sv_max_mesg; + + if (ca->maxreq_sz < NFSD_MIN_REQ_HDR_SEQ_SZ) + return nfserr_toosmall; + if (ca->maxresp_sz < NFSD_MIN_RESP_HDR_SEQ_SZ) + return nfserr_toosmall; + ca->headerpadsz = 0; + ca->maxreq_sz = min_t(u32, ca->maxreq_sz, maxrpc); + ca->maxresp_sz = min_t(u32, ca->maxresp_sz, maxrpc); + ca->maxops = min_t(u32, ca->maxops, NFSD_MAX_OPS_PER_COMPOUND); + ca->maxresp_cached = min_t(u32, ca->maxresp_cached, + NFSD_SLOT_CACHE_SIZE + NFSD_MIN_HDR_SEQ_SZ); + ca->maxreqs = min_t(u32, ca->maxreqs, NFSD_MAX_SLOTS_PER_SESSION); + /* + * Note decreasing slot size below client's request may make it + * difficult for client to function correctly, whereas + * decreasing the number of slots will (just?) affect + * performance. When short on memory we therefore prefer to + * decrease number of slots instead of their size. Clients that + * request larger slots than they need will get poor results: + */ + ca->maxreqs = nfsd4_get_drc_mem(ca); + if (!ca->maxreqs) + return nfserr_jukebox; + + return nfs_ok; +} + +#define NFSD_CB_MAX_REQ_SZ ((NFS4_enc_cb_recall_sz + \ + RPC_MAX_HEADER_WITH_AUTH) * sizeof(__be32)) +#define NFSD_CB_MAX_RESP_SZ ((NFS4_dec_cb_recall_sz + \ + RPC_MAX_REPHEADER_WITH_AUTH) * sizeof(__be32)) + +static __be32 check_backchannel_attrs(struct nfsd4_channel_attrs *ca) +{ + ca->headerpadsz = 0; + + /* + * These RPC_MAX_HEADER macros are overkill, especially since we + * don't even do gss on the backchannel yet. But this is still + * less than 1k. Tighten up this estimate in the unlikely event + * it turns out to be a problem for some client: + */ + if (ca->maxreq_sz < NFSD_CB_MAX_REQ_SZ) + return nfserr_toosmall; + if (ca->maxresp_sz < NFSD_CB_MAX_RESP_SZ) + return nfserr_toosmall; + ca->maxresp_cached = 0; + if (ca->maxops < 2) + return nfserr_toosmall; + + return nfs_ok; +} + +static __be32 nfsd4_check_cb_sec(struct nfsd4_cb_sec *cbs) { - return fchannel.maxreq_sz < NFSD_MIN_REQ_HDR_SEQ_SZ - || fchannel.maxresp_sz < NFSD_MIN_RESP_HDR_SEQ_SZ; + switch (cbs->flavor) { + case RPC_AUTH_NULL: + case RPC_AUTH_UNIX: + return nfs_ok; + default: + /* + * GSS case: the spec doesn't allow us to return this + * error. But it also doesn't allow us not to support + * GSS. + * I'd rather this fail hard than return some error the + * client might think it can already handle: + */ + return nfserr_encr_alg_unsupp; + } } __be32 @@ -1778,12 +2018,19 @@ nfsd4_create_session(struct svc_rqst *rqstp, if (cr_ses->flags & ~SESSION4_FLAG_MASK_A) return nfserr_inval; - if (check_forechannel_attrs(cr_ses->fore_channel)) - return nfserr_toosmall; - new = alloc_session(&cr_ses->fore_channel, nn); - if (!new) - return nfserr_jukebox; + status = nfsd4_check_cb_sec(&cr_ses->cb_sec); + if (status) + return status; + status = check_forechannel_attrs(&cr_ses->fore_channel, nn); + if (status) + return status; + status = check_backchannel_attrs(&cr_ses->back_channel); + if (status) + goto out_release_drc_mem; status = nfserr_jukebox; + new = alloc_session(&cr_ses->fore_channel, &cr_ses->back_channel); + if (!new) + goto out_release_drc_mem; conn = alloc_conn_from_crses(rqstp, cr_ses); if (!conn) goto out_free_session; @@ -1791,8 +2038,12 @@ nfsd4_create_session(struct svc_rqst *rqstp, nfs4_lock_state(); unconf = find_unconfirmed_client(&cr_ses->clientid, true, nn); conf = find_confirmed_client(&cr_ses->clientid, true, nn); + WARN_ON_ONCE(conf && unconf); if (conf) { + status = nfserr_wrong_cred; + if (!mach_creds_match(conf, rqstp)) + goto out_free_conn; cs_slot = &conf->cl_cs_slot; status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0); if (status == nfserr_replay_cache) { @@ -1809,6 +2060,9 @@ nfsd4_create_session(struct svc_rqst *rqstp, status = nfserr_clid_inuse; goto out_free_conn; } + status = nfserr_wrong_cred; + if (!mach_creds_match(unconf, rqstp)) + goto out_free_conn; cs_slot = &unconf->cl_cs_slot; status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0); if (status) { @@ -1817,8 +2071,12 @@ nfsd4_create_session(struct svc_rqst *rqstp, goto out_free_conn; } old = find_confirmed_client_by_name(&unconf->cl_name, nn); - if (old) + if (old) { + status = mark_client_expired(old); + if (status) + goto out_free_conn; expire_client(old); + } move_to_confirmed(unconf); conf = unconf; } else { @@ -1837,22 +2095,21 @@ nfsd4_create_session(struct svc_rqst *rqstp, memcpy(cr_ses->sessionid.data, new->se_sessionid.data, NFS4_MAX_SESSIONID_LEN); - memcpy(&cr_ses->fore_channel, &new->se_fchannel, - sizeof(struct nfsd4_channel_attrs)); cs_slot->sl_seqid++; cr_ses->seqid = cs_slot->sl_seqid; /* cache solo and embedded create sessions under the state lock */ nfsd4_cache_create_session(cr_ses, cs_slot, status); -out: nfs4_unlock_state(); - dprintk("%s returns %d\n", __func__, ntohl(status)); return status; out_free_conn: + nfs4_unlock_state(); free_conn(conn); out_free_session: __free_session(new); - goto out; +out_release_drc_mem: + nfsd4_put_drc_mem(&cr_ses->fore_channel); + return status; } static __be32 nfsd4_map_bcts_dir(u32 *dir) @@ -1873,7 +2130,11 @@ __be32 nfsd4_backchannel_ctl(struct svc_rqst *rqstp, struct nfsd4_compound_state { struct nfsd4_session *session = cstate->session; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + __be32 status; + status = nfsd4_check_cb_sec(&bc->bc_cb_sec); + if (status) + return status; spin_lock(&nn->client_lock); session->se_cb_prog = bc->bc_cb_program; session->se_cb_sec = bc->bc_cb_sec; @@ -1890,30 +2151,33 @@ __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp, { __be32 status; struct nfsd4_conn *conn; + struct nfsd4_session *session; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); if (!nfsd4_last_compound_op(rqstp)) return nfserr_not_only_op; + nfs4_lock_state(); spin_lock(&nn->client_lock); - cstate->session = find_in_sessionid_hashtbl(&bcts->sessionid, SVC_NET(rqstp)); - /* Sorta weird: we only need the refcnt'ing because new_conn acquires - * client_lock iself: */ - if (cstate->session) { - nfsd4_get_session(cstate->session); - atomic_inc(&cstate->session->se_client->cl_refcount); - } + session = find_in_sessionid_hashtbl(&bcts->sessionid, SVC_NET(rqstp)); spin_unlock(&nn->client_lock); - if (!cstate->session) - return nfserr_badsession; - + status = nfserr_badsession; + if (!session) + goto out; + status = nfserr_wrong_cred; + if (!mach_creds_match(session->se_client, rqstp)) + goto out; status = nfsd4_map_bcts_dir(&bcts->dir); if (status) - return status; + goto out; conn = alloc_conn(rqstp, bcts->dir); + status = nfserr_jukebox; if (!conn) - return nfserr_jukebox; - nfsd4_init_conn(rqstp, conn, cstate->session); - return nfs_ok; + goto out; + nfsd4_init_conn(rqstp, conn, session); + status = nfs_ok; +out: + nfs4_unlock_state(); + return status; } static bool nfsd4_compound_in_session(struct nfsd4_session *session, struct nfs4_sessionid *sid) @@ -1929,42 +2193,43 @@ nfsd4_destroy_session(struct svc_rqst *r, struct nfsd4_destroy_session *sessionid) { struct nfsd4_session *ses; - __be32 status = nfserr_badsession; + __be32 status; + int ref_held_by_me = 0; struct nfsd_net *nn = net_generic(SVC_NET(r), nfsd_net_id); - /* Notes: - * - The confirmed nfs4_client->cl_sessionid holds destroyed sessinid - * - Should we return nfserr_back_chan_busy if waiting for - * callbacks on to-be-destroyed session? - * - Do we need to clear any callback info from previous session? - */ - + nfs4_lock_state(); + status = nfserr_not_only_op; if (nfsd4_compound_in_session(cstate->session, &sessionid->sessionid)) { if (!nfsd4_last_compound_op(r)) - return nfserr_not_only_op; + goto out; + ref_held_by_me++; } dump_sessionid(__func__, &sessionid->sessionid); spin_lock(&nn->client_lock); ses = find_in_sessionid_hashtbl(&sessionid->sessionid, SVC_NET(r)); - if (!ses) { - spin_unlock(&nn->client_lock); - goto out; - } - + status = nfserr_badsession; + if (!ses) + goto out_client_lock; + status = nfserr_wrong_cred; + if (!mach_creds_match(ses->se_client, r)) + goto out_client_lock; + nfsd4_get_session_locked(ses); + status = mark_session_dead_locked(ses, 1 + ref_held_by_me); + if (status) + goto out_put_session; unhash_session(ses); spin_unlock(&nn->client_lock); - nfs4_lock_state(); nfsd4_probe_callback_sync(ses->se_client); - nfs4_unlock_state(); spin_lock(&nn->client_lock); - nfsd4_del_conns(ses); - nfsd4_put_session_locked(ses); - spin_unlock(&nn->client_lock); status = nfs_ok; +out_put_session: + nfsd4_put_session(ses); +out_client_lock: + spin_unlock(&nn->client_lock); out: - dprintk("%s returns %d\n", __func__, ntohl(status)); + nfs4_unlock_state(); return status; } @@ -1980,26 +2245,31 @@ static struct nfsd4_conn *__nfsd4_find_conn(struct svc_xprt *xpt, struct nfsd4_s return NULL; } -static void nfsd4_sequence_check_conn(struct nfsd4_conn *new, struct nfsd4_session *ses) +static __be32 nfsd4_sequence_check_conn(struct nfsd4_conn *new, struct nfsd4_session *ses) { struct nfs4_client *clp = ses->se_client; struct nfsd4_conn *c; + __be32 status = nfs_ok; int ret; spin_lock(&clp->cl_lock); c = __nfsd4_find_conn(new->cn_xprt, ses); - if (c) { - spin_unlock(&clp->cl_lock); - free_conn(new); - return; - } + if (c) + goto out_free; + status = nfserr_conn_not_bound_to_session; + if (clp->cl_mach_cred) + goto out_free; __nfsd4_hash_conn(new, ses); spin_unlock(&clp->cl_lock); ret = nfsd4_register_conn(new); if (ret) /* oops; xprt is already down: */ nfsd4_conn_lost(&new->cn_xpt_user); - return; + return nfs_ok; +out_free: + spin_unlock(&clp->cl_lock); + free_conn(new); + return status; } static bool nfsd4_session_too_many_ops(struct svc_rqst *rqstp, struct nfsd4_session *session) @@ -2023,10 +2293,13 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_sequence *seq) { struct nfsd4_compoundres *resp = rqstp->rq_resp; + struct xdr_stream *xdr = &resp->xdr; struct nfsd4_session *session; + struct nfs4_client *clp; struct nfsd4_slot *slot; struct nfsd4_conn *conn; __be32 status; + int buflen; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); if (resp->opcnt != 1) @@ -2044,19 +2317,26 @@ nfsd4_sequence(struct svc_rqst *rqstp, status = nfserr_badsession; session = find_in_sessionid_hashtbl(&seq->sessionid, SVC_NET(rqstp)); if (!session) - goto out; + goto out_no_session; + clp = session->se_client; + status = get_client_locked(clp); + if (status) + goto out_no_session; + status = nfsd4_get_session_locked(session); + if (status) + goto out_put_client; status = nfserr_too_many_ops; if (nfsd4_session_too_many_ops(rqstp, session)) - goto out; + goto out_put_session; status = nfserr_req_too_big; if (nfsd4_request_too_big(rqstp, session)) - goto out; + goto out_put_session; status = nfserr_badslot; if (seq->slotid >= session->se_fchannel.maxreqs) - goto out; + goto out_put_session; slot = session->se_slots[seq->slotid]; dprintk("%s: slotid %d\n", __func__, seq->slotid); @@ -2071,7 +2351,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, if (status == nfserr_replay_cache) { status = nfserr_seq_misordered; if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED)) - goto out; + goto out_put_session; cstate->slot = slot; cstate->session = session; /* Return the cached reply status and set cstate->status @@ -2081,11 +2361,23 @@ nfsd4_sequence(struct svc_rqst *rqstp, goto out; } if (status) - goto out; + goto out_put_session; - nfsd4_sequence_check_conn(conn, session); + status = nfsd4_sequence_check_conn(conn, session); conn = NULL; + if (status) + goto out_put_session; + + buflen = (seq->cachethis) ? + session->se_fchannel.maxresp_cached : + session->se_fchannel.maxresp_sz; + status = (seq->cachethis) ? nfserr_rep_too_big_to_cache : + nfserr_rep_too_big; + if (xdr_restrict_buflen(xdr, buflen - rqstp->rq_auth_slack)) + goto out_put_session; + svc_reserve(rqstp, buflen); + status = nfs_ok; /* Success! bump slot seqid */ slot->sl_seqid = seq->seqid; slot->sl_flags |= NFSD4_SLOT_INUSE; @@ -2098,27 +2390,28 @@ nfsd4_sequence(struct svc_rqst *rqstp, cstate->session = session; out: - /* Hold a session reference until done processing the compound. */ - if (cstate->session) { - struct nfs4_client *clp = session->se_client; - - nfsd4_get_session(cstate->session); - atomic_inc(&clp->cl_refcount); - switch (clp->cl_cb_state) { - case NFSD4_CB_DOWN: - seq->status_flags = SEQ4_STATUS_CB_PATH_DOWN; - break; - case NFSD4_CB_FAULT: - seq->status_flags = SEQ4_STATUS_BACKCHANNEL_FAULT; - break; - default: - seq->status_flags = 0; - } + switch (clp->cl_cb_state) { + case NFSD4_CB_DOWN: + seq->status_flags = SEQ4_STATUS_CB_PATH_DOWN; + break; + case NFSD4_CB_FAULT: + seq->status_flags = SEQ4_STATUS_BACKCHANNEL_FAULT; + break; + default: + seq->status_flags = 0; } - kfree(conn); + if (!list_empty(&clp->cl_revoked)) + seq->status_flags |= SEQ4_STATUS_RECALLABLE_STATE_REVOKED; +out_no_session: + if (conn) + free_conn(conn); spin_unlock(&nn->client_lock); - dprintk("%s: return %d\n", __func__, ntohl(status)); return status; +out_put_session: + nfsd4_put_session(session); +out_put_client: + put_client_renew_locked(clp); + goto out_no_session; } __be32 @@ -2131,17 +2424,12 @@ nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta nfs4_lock_state(); unconf = find_unconfirmed_client(&dc->clientid, true, nn); conf = find_confirmed_client(&dc->clientid, true, nn); + WARN_ON_ONCE(conf && unconf); if (conf) { clp = conf; - if (!is_client_expired(conf) && client_has_state(conf)) { - status = nfserr_clientid_busy; - goto out; - } - - /* rfc5661 18.50.3 */ - if (cstate->session && conf == cstate->session->se_client) { + if (client_has_state(conf)) { status = nfserr_clientid_busy; goto out; } @@ -2151,11 +2439,13 @@ nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta status = nfserr_stale_clientid; goto out; } - + if (!mach_creds_match(clp, rqstp)) { + status = nfserr_wrong_cred; + goto out; + } expire_client(clp); out: nfs4_unlock_state(); - dprintk("%s return %d\n", __func__, ntohl(status)); return status; } @@ -2293,8 +2583,12 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, expire_client(unconf); } else { /* case 3: normal case; new or rebooted client */ conf = find_confirmed_client_by_name(&unconf->cl_name, nn); - if (conf) + if (conf) { + status = mark_client_expired(conf); + if (status) + goto out; expire_client(conf); + } move_to_confirmed(unconf); nfsd4_probe_callback(unconf); } @@ -2314,7 +2608,6 @@ static void nfsd4_init_file(struct nfs4_file *fp, struct inode *ino) unsigned int hashval = file_hashval(ino); atomic_set(&fp->fi_ref, 1); - INIT_LIST_HEAD(&fp->fi_hash); INIT_LIST_HEAD(&fp->fi_stateids); INIT_LIST_HEAD(&fp->fi_delegations); fp->fi_inode = igrab(ino); @@ -2322,28 +2615,19 @@ static void nfsd4_init_file(struct nfs4_file *fp, struct inode *ino) fp->fi_lease = NULL; memset(fp->fi_fds, 0, sizeof(fp->fi_fds)); memset(fp->fi_access, 0, sizeof(fp->fi_access)); - spin_lock(&recall_lock); - list_add(&fp->fi_hash, &file_hashtbl[hashval]); - spin_unlock(&recall_lock); -} - -static void -nfsd4_free_slab(struct kmem_cache **slab) -{ - if (*slab == NULL) - return; - kmem_cache_destroy(*slab); - *slab = NULL; + spin_lock(&state_lock); + hlist_add_head(&fp->fi_hash, &file_hashtbl[hashval]); + spin_unlock(&state_lock); } void nfsd4_free_slabs(void) { - nfsd4_free_slab(&openowner_slab); - nfsd4_free_slab(&lockowner_slab); - nfsd4_free_slab(&file_slab); - nfsd4_free_slab(&stateid_slab); - nfsd4_free_slab(&deleg_slab); + kmem_cache_destroy(openowner_slab); + kmem_cache_destroy(lockowner_slab); + kmem_cache_destroy(file_slab); + kmem_cache_destroy(stateid_slab); + kmem_cache_destroy(deleg_slab); } int @@ -2352,42 +2636,38 @@ nfsd4_init_slabs(void) openowner_slab = kmem_cache_create("nfsd4_openowners", sizeof(struct nfs4_openowner), 0, 0, NULL); if (openowner_slab == NULL) - goto out_nomem; + goto out; lockowner_slab = kmem_cache_create("nfsd4_lockowners", sizeof(struct nfs4_lockowner), 0, 0, NULL); if (lockowner_slab == NULL) - goto out_nomem; + goto out_free_openowner_slab; file_slab = kmem_cache_create("nfsd4_files", sizeof(struct nfs4_file), 0, 0, NULL); if (file_slab == NULL) - goto out_nomem; + goto out_free_lockowner_slab; stateid_slab = kmem_cache_create("nfsd4_stateids", sizeof(struct nfs4_ol_stateid), 0, 0, NULL); if (stateid_slab == NULL) - goto out_nomem; + goto out_free_file_slab; deleg_slab = kmem_cache_create("nfsd4_delegations", sizeof(struct nfs4_delegation), 0, 0, NULL); if (deleg_slab == NULL) - goto out_nomem; + goto out_free_stateid_slab; return 0; -out_nomem: - nfsd4_free_slabs(); + +out_free_stateid_slab: + kmem_cache_destroy(stateid_slab); +out_free_file_slab: + kmem_cache_destroy(file_slab); +out_free_lockowner_slab: + kmem_cache_destroy(lockowner_slab); +out_free_openowner_slab: + kmem_cache_destroy(openowner_slab); +out: dprintk("nfsd4: out of memory while initializing nfsv4\n"); return -ENOMEM; } -void nfs4_free_openowner(struct nfs4_openowner *oo) -{ - kfree(oo->oo_owner.so_owner.data); - kmem_cache_free(openowner_slab, oo); -} - -void nfs4_free_lockowner(struct nfs4_lockowner *lo) -{ - kfree(lo->lo_owner.so_owner.data); - kmem_cache_free(lockowner_slab, lo); -} - static void init_nfs4_replay(struct nfs4_replay *rp) { rp->rp_status = nfserr_serverfault; @@ -2443,9 +2723,8 @@ alloc_init_open_stateowner(unsigned int strhashval, struct nfs4_client *clp, str static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) { struct nfs4_openowner *oo = open->op_openowner; - struct nfs4_client *clp = oo->oo_owner.so_client; - init_stid(&stp->st_stid, clp, NFS4_OPEN_STID); + stp->st_stid.sc_type = NFS4_OPEN_STID; INIT_LIST_HEAD(&stp->st_lockowners); list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids); list_add(&stp->st_perfile, &fp->fi_stateids); @@ -2509,15 +2788,15 @@ find_file(struct inode *ino) unsigned int hashval = file_hashval(ino); struct nfs4_file *fp; - spin_lock(&recall_lock); - list_for_each_entry(fp, &file_hashtbl[hashval], fi_hash) { + spin_lock(&state_lock); + hlist_for_each_entry(fp, &file_hashtbl[hashval], fi_hash) { if (fp->fi_inode == ino) { get_nfs4_file(fp); - spin_unlock(&recall_lock); + spin_unlock(&state_lock); return fp; } } - spin_unlock(&recall_lock); + spin_unlock(&state_lock); return NULL; } @@ -2533,8 +2812,6 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type) struct nfs4_ol_stateid *stp; __be32 ret; - dprintk("NFSD: nfs4_share_conflict\n"); - fp = find_file(ino); if (!fp) return nfs_ok; @@ -2553,6 +2830,10 @@ out: static void nfsd_break_one_deleg(struct nfs4_delegation *dp) { + struct nfs4_client *clp = dp->dl_stid.sc_client; + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + lockdep_assert_held(&state_lock); /* We're assuming the state code never drops its reference * without first removing the lease. Since we're in this lease * callback (and since the lease code is serialized by the kernel @@ -2560,15 +2841,17 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp) * it's safe to take a reference: */ atomic_inc(&dp->dl_count); - list_add_tail(&dp->dl_recall_lru, &del_recall_lru); + list_add_tail(&dp->dl_recall_lru, &nn->del_recall_lru); - /* only place dl_time is set. protected by lock_flocks*/ + /* Only place dl_time is set; protected by i_lock: */ dp->dl_time = get_seconds(); + block_delegations(&dp->dl_fh); + nfsd4_cb_recall(dp); } -/* Called from break_lease() with lock_flocks() held. */ +/* Called from break_lease() with i_lock held. */ static void nfsd_break_deleg_cb(struct file_lock *fl) { struct nfs4_file *fp = (struct nfs4_file *)fl->fl_owner; @@ -2589,11 +2872,11 @@ static void nfsd_break_deleg_cb(struct file_lock *fl) */ fl->fl_break_time = 0; - spin_lock(&recall_lock); + spin_lock(&state_lock); fp->fi_had_conflict = true; list_for_each_entry(dp, &fp->fi_delegations, dl_perfile) nfsd_break_one_deleg(dp); - spin_unlock(&recall_lock); + spin_unlock(&state_lock); } static @@ -2706,7 +2989,7 @@ static bool nfsd4_is_deleg_cur(struct nfsd4_open *open) } static __be32 -nfs4_check_deleg(struct nfs4_client *cl, struct nfs4_file *fp, struct nfsd4_open *open, +nfs4_check_deleg(struct nfs4_client *cl, struct nfsd4_open *open, struct nfs4_delegation **dp) { int flags; @@ -2849,7 +3132,7 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp, int f return NULL; locks_init_lock(fl); fl->fl_lmops = &nfsd_lease_mng_ops; - fl->fl_flags = FL_LEASE; + fl->fl_flags = FL_DELEG; fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK; fl->fl_end = OFFSET_MAX; fl->fl_owner = (fl_owner_t)(dp->dl_file); @@ -2857,45 +3140,47 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp, int f return fl; } -static int nfs4_setlease(struct nfs4_delegation *dp, int flag) +static int nfs4_setlease(struct nfs4_delegation *dp) { struct nfs4_file *fp = dp->dl_file; struct file_lock *fl; int status; - fl = nfs4_alloc_init_lease(dp, flag); + fl = nfs4_alloc_init_lease(dp, NFS4_OPEN_DELEGATE_READ); if (!fl) return -ENOMEM; fl->fl_file = find_readable_file(fp); - list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations); status = vfs_setlease(fl->fl_file, fl->fl_type, &fl); - if (status) { - list_del_init(&dp->dl_perclnt); - locks_free_lock(fl); - return -ENOMEM; - } + if (status) + goto out_free; fp->fi_lease = fl; fp->fi_deleg_file = get_file(fl->fl_file); atomic_set(&fp->fi_delegees, 1); - list_add(&dp->dl_perfile, &fp->fi_delegations); + spin_lock(&state_lock); + hash_delegation_locked(dp, fp); + spin_unlock(&state_lock); return 0; +out_free: + locks_free_lock(fl); + return status; } -static int nfs4_set_delegation(struct nfs4_delegation *dp, int flag) +static int nfs4_set_delegation(struct nfs4_delegation *dp, struct nfs4_file *fp) { - struct nfs4_file *fp = dp->dl_file; - + if (fp->fi_had_conflict) + return -EAGAIN; + get_nfs4_file(fp); + dp->dl_file = fp; if (!fp->fi_lease) - return nfs4_setlease(dp, flag); - spin_lock(&recall_lock); + return nfs4_setlease(dp); + spin_lock(&state_lock); + atomic_inc(&fp->fi_delegees); if (fp->fi_had_conflict) { - spin_unlock(&recall_lock); + spin_unlock(&state_lock); return -EAGAIN; } - atomic_inc(&fp->fi_delegees); - list_add(&dp->dl_perfile, &fp->fi_delegations); - spin_unlock(&recall_lock); - list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations); + hash_delegation_locked(dp, fp); + spin_unlock(&state_lock); return 0; } @@ -2922,6 +3207,9 @@ static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status) /* * Attempt to hand out a delegation. + * + * Note we don't support write delegations, and won't until the vfs has + * proper support for them. */ static void nfs4_open_delegation(struct net *net, struct svc_fh *fh, @@ -2930,39 +3218,46 @@ nfs4_open_delegation(struct net *net, struct svc_fh *fh, struct nfs4_delegation *dp; struct nfs4_openowner *oo = container_of(stp->st_stateowner, struct nfs4_openowner, oo_owner); int cb_up; - int status = 0, flag = 0; + int status = 0; cb_up = nfsd4_cb_channel_good(oo->oo_owner.so_client); - flag = NFS4_OPEN_DELEGATE_NONE; open->op_recall = 0; switch (open->op_claim_type) { case NFS4_OPEN_CLAIM_PREVIOUS: if (!cb_up) open->op_recall = 1; - flag = open->op_delegate_type; - if (flag == NFS4_OPEN_DELEGATE_NONE) - goto out; + if (open->op_delegate_type != NFS4_OPEN_DELEGATE_READ) + goto out_no_deleg; break; case NFS4_OPEN_CLAIM_NULL: - /* Let's not give out any delegations till everyone's - * had the chance to reclaim theirs.... */ + case NFS4_OPEN_CLAIM_FH: + /* + * Let's not give out any delegations till everyone's + * had the chance to reclaim theirs.... + */ if (locks_in_grace(net)) - goto out; + goto out_no_deleg; if (!cb_up || !(oo->oo_flags & NFS4_OO_CONFIRMED)) - goto out; + goto out_no_deleg; + /* + * Also, if the file was opened for write or + * create, there's a good chance the client's + * about to write to it, resulting in an + * immediate recall (since we don't support + * write delegations): + */ if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) - flag = NFS4_OPEN_DELEGATE_WRITE; - else - flag = NFS4_OPEN_DELEGATE_READ; + goto out_no_deleg; + if (open->op_create == NFS4_OPEN_CREATE) + goto out_no_deleg; break; default: - goto out; + goto out_no_deleg; } - - dp = alloc_init_deleg(oo->oo_owner.so_client, stp, fh, flag); + dp = alloc_init_deleg(oo->oo_owner.so_client, stp, fh); if (dp == NULL) goto out_no_deleg; - status = nfs4_set_delegation(dp, flag); + status = nfs4_set_delegation(dp, stp->st_file); if (status) goto out_free; @@ -2970,24 +3265,22 @@ nfs4_open_delegation(struct net *net, struct svc_fh *fh, dprintk("NFSD: delegation stateid=" STATEID_FMT "\n", STATEID_VAL(&dp->dl_stid.sc_stateid)); -out: - open->op_delegate_type = flag; - if (flag == NFS4_OPEN_DELEGATE_NONE) { - if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS && - open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE) - dprintk("NFSD: WARNING: refusing delegation reclaim\n"); - - /* 4.1 client asking for a delegation? */ - if (open->op_deleg_want) - nfsd4_open_deleg_none_ext(open, status); - } + open->op_delegate_type = NFS4_OPEN_DELEGATE_READ; return; out_free: - unhash_stid(&dp->dl_stid); - nfs4_put_delegation(dp); + destroy_delegation(dp); out_no_deleg: - flag = NFS4_OPEN_DELEGATE_NONE; - goto out; + open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE; + if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS && + open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE) { + dprintk("NFSD: WARNING: refusing delegation reclaim\n"); + open->op_recall = 1; + } + + /* 4.1 client asking for a delegation? */ + if (open->op_deleg_want) + nfsd4_open_deleg_none_ext(open, status); + return; } static void nfsd4_deleg_xgrade_none_ext(struct nfsd4_open *open, @@ -3031,7 +3324,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf if (fp) { if ((status = nfs4_check_open(fp, open, &stp))) goto out; - status = nfs4_check_deleg(cl, fp, open, &dp); + status = nfs4_check_deleg(cl, open, &dp); if (status) goto out; } else { @@ -3192,8 +3485,7 @@ nfs4_laundromat(struct nfsd_net *nn) struct nfs4_delegation *dp; struct list_head *pos, *next, reaplist; time_t cutoff = get_seconds() - nn->nfsd4_lease; - time_t t, clientid_val = nn->nfsd4_lease; - time_t u, test_val = nn->nfsd4_lease; + time_t t, new_timeo = nn->nfsd4_lease; nfs4_lock_state(); @@ -3205,17 +3497,15 @@ nfs4_laundromat(struct nfsd_net *nn) clp = list_entry(pos, struct nfs4_client, cl_lru); if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) { t = clp->cl_time - cutoff; - if (clientid_val > t) - clientid_val = t; + new_timeo = min(new_timeo, t); break; } - if (atomic_read(&clp->cl_refcount)) { + if (mark_client_expired_locked(clp)) { dprintk("NFSD: client in use (clientid %08x)\n", clp->cl_clientid.cl_id); continue; } - unhash_client_locked(clp); - list_add(&clp->cl_lru, &reaplist); + list_move(&clp->cl_lru, &reaplist); } spin_unlock(&nn->client_lock); list_for_each_safe(pos, next, &reaplist) { @@ -3224,39 +3514,35 @@ nfs4_laundromat(struct nfsd_net *nn) clp->cl_clientid.cl_id); expire_client(clp); } - spin_lock(&recall_lock); - list_for_each_safe(pos, next, &del_recall_lru) { + spin_lock(&state_lock); + list_for_each_safe(pos, next, &nn->del_recall_lru) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); if (net_generic(dp->dl_stid.sc_client->net, nfsd_net_id) != nn) continue; if (time_after((unsigned long)dp->dl_time, (unsigned long)cutoff)) { - u = dp->dl_time - cutoff; - if (test_val > u) - test_val = u; + t = dp->dl_time - cutoff; + new_timeo = min(new_timeo, t); break; } list_move(&dp->dl_recall_lru, &reaplist); } - spin_unlock(&recall_lock); + spin_unlock(&state_lock); list_for_each_safe(pos, next, &reaplist) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); - unhash_delegation(dp); + revoke_delegation(dp); } - test_val = nn->nfsd4_lease; list_for_each_safe(pos, next, &nn->close_lru) { oo = container_of(pos, struct nfs4_openowner, oo_close_lru); if (time_after((unsigned long)oo->oo_time, (unsigned long)cutoff)) { - u = oo->oo_time - cutoff; - if (test_val > u) - test_val = u; + t = oo->oo_time - cutoff; + new_timeo = min(new_timeo, t); break; } release_openowner(oo); } - if (clientid_val < NFSD_LAUNDROMAT_MINTIMEOUT) - clientid_val = NFSD_LAUNDROMAT_MINTIMEOUT; + new_timeo = max_t(time_t, new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT); nfs4_unlock_state(); - return clientid_val; + return new_timeo; } static struct workqueue_struct *laundry_wq; @@ -3283,16 +3569,6 @@ static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_ol_stateid *s return nfs_ok; } -static int -STALE_STATEID(stateid_t *stateid, struct nfsd_net *nn) -{ - if (stateid->si_opaque.so_clid.cl_boot == nn->boot_time) - return 0; - dprintk("NFSD: stale stateid " STATEID_FMT "!\n", - STATEID_VAL(stateid)); - return 1; -} - static inline int access_permit_read(struct nfs4_ol_stateid *stp) { @@ -3355,7 +3631,7 @@ grace_disallows_io(struct net *net, struct inode *inode) /* Returns true iff a is later than b: */ static bool stateid_generation_after(stateid_t *a, stateid_t *b) { - return (s32)a->si_generation - (s32)b->si_generation > 0; + return (s32)(a->si_generation - b->si_generation) > 0; } static __be32 check_stateid_generation(stateid_t *in, stateid_t *ref, bool has_session) @@ -3409,13 +3685,24 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid) status = check_stateid_generation(stateid, &s->sc_stateid, 1); if (status) return status; - if (!(s->sc_type & (NFS4_OPEN_STID | NFS4_LOCK_STID))) + switch (s->sc_type) { + case NFS4_DELEG_STID: return nfs_ok; - ols = openlockstateid(s); - if (ols->st_stateowner->so_is_open_owner - && !(openowner(ols->st_stateowner)->oo_flags & NFS4_OO_CONFIRMED)) + case NFS4_REVOKED_DELEG_STID: + return nfserr_deleg_revoked; + case NFS4_OPEN_STID: + case NFS4_LOCK_STID: + ols = openlockstateid(s); + if (ols->st_stateowner->so_is_open_owner + && !(openowner(ols->st_stateowner)->oo_flags + & NFS4_OO_CONFIRMED)) + return nfserr_bad_stateid; + return nfs_ok; + default: + printk("unknown stateid type %x\n", s->sc_type); + case NFS4_CLOSED_STID: return nfserr_bad_stateid; - return nfs_ok; + } } static __be32 nfsd4_lookup_stateid(stateid_t *stateid, unsigned char typemask, @@ -3423,19 +3710,23 @@ static __be32 nfsd4_lookup_stateid(stateid_t *stateid, unsigned char typemask, struct nfsd_net *nn) { struct nfs4_client *cl; + __be32 status; if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) return nfserr_bad_stateid; - if (STALE_STATEID(stateid, nn)) + status = lookup_clientid(&stateid->si_opaque.so_clid, sessions, + nn, &cl); + if (status == nfserr_stale_clientid) { + if (sessions) + return nfserr_bad_stateid; return nfserr_stale_stateid; - cl = find_confirmed_client(&stateid->si_opaque.so_clid, sessions, nn); - if (!cl) - return nfserr_expired; + } + if (status) + return status; *s = find_stateid_by_type(cl, stateid, typemask); if (!*s) return nfserr_bad_stateid; return nfs_ok; - } /* @@ -3451,6 +3742,7 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate, struct svc_fh *current_fh = &cstate->current_fh; struct inode *ino = current_fh->fh_dentry->d_inode; struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct file *file = NULL; __be32 status; if (filpp) @@ -3462,10 +3754,12 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate, if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) return check_special_stateids(net, current_fh, stateid, flags); + nfs4_lock_state(); + status = nfsd4_lookup_stateid(stateid, NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID, &s, cstate->minorversion, nn); if (status) - return status; + goto out; status = check_stateid_generation(stateid, &s->sc_stateid, nfsd4_has_session(cstate)); if (status) goto out; @@ -3476,8 +3770,8 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate, if (status) goto out; if (filpp) { - *filpp = dp->dl_file->fi_deleg_file; - if (!*filpp) { + file = dp->dl_file->fi_deleg_file; + if (!file) { WARN_ON_ONCE(1); status = nfserr_serverfault; goto out; @@ -3498,25 +3792,36 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate, goto out; if (filpp) { if (flags & RD_STATE) - *filpp = find_readable_file(stp->st_file); + file = find_readable_file(stp->st_file); else - *filpp = find_writeable_file(stp->st_file); + file = find_writeable_file(stp->st_file); } break; default: - return nfserr_bad_stateid; + status = nfserr_bad_stateid; + goto out; } status = nfs_ok; + if (file) + *filpp = get_file(file); out: + nfs4_unlock_state(); return status; } static __be32 nfsd4_free_lock_stateid(struct nfs4_ol_stateid *stp) { - if (check_for_locks(stp->st_file, lockowner(stp->st_stateowner))) + struct nfs4_lockowner *lo = lockowner(stp->st_stateowner); + + if (check_for_locks(stp->st_file, lo)) return nfserr_locks_held; - release_lock_stateid(stp); + /* + * Currently there's a 1-1 lock stateid<->lockowner + * correspondance, and we have to delete the lockowner when we + * delete the lock stateid: + */ + release_lockowner(lo); return nfs_ok; } @@ -3545,6 +3850,7 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, { stateid_t *stateid = &free_stateid->fr_stateid; struct nfs4_stid *s; + struct nfs4_delegation *dp; struct nfs4_client *cl = cstate->session->se_client; __be32 ret = nfserr_bad_stateid; @@ -3566,6 +3872,11 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, else ret = nfserr_locks_held; break; + case NFS4_REVOKED_DELEG_STID: + dp = delegstateid(s); + destroy_revoked_delegation(dp); + ret = nfs_ok; + break; default: ret = nfserr_bad_stateid; } @@ -3590,10 +3901,12 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_ status = nfsd4_check_seqid(cstate, sop, seqid); if (status) return status; - if (stp->st_stid.sc_type == NFS4_CLOSED_STID) + if (stp->st_stid.sc_type == NFS4_CLOSED_STID + || stp->st_stid.sc_type == NFS4_REVOKED_DELEG_STID) /* * "Closed" stateid's exist *only* to return - * nfserr_replay_me from the previous step. + * nfserr_replay_me from the previous step, and + * revoked delegations are kept only for free_stateid. */ return nfserr_bad_stateid; status = check_stateid_generation(stateid, &stp->st_stid.sc_stateid, nfsd4_has_session(cstate)); @@ -3623,7 +3936,8 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, if (status) return status; *stpp = openlockstateid(s); - cstate->replay_owner = (*stpp)->st_stateowner; + if (!nfsd4_has_session(cstate)) + cstate->replay_owner = (*stpp)->st_stateowner; return nfs4_seqid_op_checks(cstate, stateid, seqid, *stpp); } @@ -3653,9 +3967,8 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfs4_ol_stateid *stp; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); - dprintk("NFSD: nfsd4_open_confirm on file %.*s\n", - (int)cstate->current_fh.fh_dentry->d_name.len, - cstate->current_fh.fh_dentry->d_name.name); + dprintk("NFSD: nfsd4_open_confirm on file %pd\n", + cstate->current_fh.fh_dentry); status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0); if (status) @@ -3681,6 +3994,7 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfsd4_client_record_create(oo->oo_owner.so_client); status = nfs_ok; out: + nfsd4_bump_seqid(cstate, status); if (!cstate->replay_owner) nfs4_unlock_state(); return status; @@ -3731,9 +4045,8 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp, struct nfs4_ol_stateid *stp; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); - dprintk("NFSD: nfsd4_open_downgrade on file %.*s\n", - (int)cstate->current_fh.fh_dentry->d_name.len, - cstate->current_fh.fh_dentry->d_name.name); + dprintk("NFSD: nfsd4_open_downgrade on file %pd\n", + cstate->current_fh.fh_dentry); /* We don't yet support WANT bits: */ if (od->od_deleg_want) @@ -3764,31 +4077,12 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp, memcpy(&od->od_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); status = nfs_ok; out: + nfsd4_bump_seqid(cstate, status); if (!cstate->replay_owner) nfs4_unlock_state(); return status; } -void nfsd4_purge_closed_stateid(struct nfs4_stateowner *so) -{ - struct nfs4_openowner *oo; - struct nfs4_ol_stateid *s; - - if (!so->so_is_open_owner) - return; - oo = openowner(so); - s = oo->oo_last_closed_stid; - if (!s) - return; - if (!(oo->oo_flags & NFS4_OO_PURGE_CLOSE)) { - /* Release the last_closed_stid on the next seqid bump: */ - oo->oo_flags |= NFS4_OO_PURGE_CLOSE; - return; - } - oo->oo_flags &= ~NFS4_OO_PURGE_CLOSE; - release_last_closed_stateid(oo); -} - static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s) { unhash_open_stateid(s); @@ -3808,37 +4102,37 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct net *net = SVC_NET(rqstp); struct nfsd_net *nn = net_generic(net, nfsd_net_id); - dprintk("NFSD: nfsd4_close on file %.*s\n", - (int)cstate->current_fh.fh_dentry->d_name.len, - cstate->current_fh.fh_dentry->d_name.name); + dprintk("NFSD: nfsd4_close on file %pd\n", + cstate->current_fh.fh_dentry); nfs4_lock_state(); status = nfs4_preprocess_seqid_op(cstate, close->cl_seqid, &close->cl_stateid, NFS4_OPEN_STID|NFS4_CLOSED_STID, &stp, nn); + nfsd4_bump_seqid(cstate, status); if (status) goto out; oo = openowner(stp->st_stateowner); - status = nfs_ok; update_stateid(&stp->st_stid.sc_stateid); memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); nfsd4_close_open_stateid(stp); - release_last_closed_stateid(oo); - oo->oo_last_closed_stid = stp; + + if (cstate->minorversion) + free_generic_stateid(stp); + else + oo->oo_last_closed_stid = stp; if (list_empty(&oo->oo_owner.so_stateids)) { - if (cstate->minorversion) { + if (cstate->minorversion) release_openowner(oo); - cstate->replay_owner = NULL; - } else { + else { /* * In the 4.0 case we need to keep the owners around a * little while to handle CLOSE replay. */ - if (list_empty(&oo->oo_owner.so_stateids)) - move_to_close_lru(oo, SVC_NET(rqstp)); + move_to_close_lru(oo, SVC_NET(rqstp)); } } out: @@ -3870,7 +4164,7 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) goto out; - unhash_delegation(dp); + destroy_delegation(dp); out: nfs4_unlock_state(); @@ -3967,6 +4261,10 @@ static bool same_lockowner_ino(struct nfs4_lockowner *lo, struct inode *inode, c if (!same_owner_str(&lo->lo_owner, owner, clid)) return false; + if (list_empty(&lo->lo_owner.so_stateids)) { + WARN_ON_ONCE(1); + return false; + } lst = list_first_entry(&lo->lo_owner.so_stateids, struct nfs4_ol_stateid, st_perstateowner); return lst->st_file->fi_inode == inode; @@ -4031,7 +4329,7 @@ alloc_init_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp, struct stp = nfs4_alloc_stateid(clp); if (stp == NULL) return NULL; - init_stid(&stp->st_stid, clp, NFS4_LOCK_STID); + stp->st_stid.sc_type = NFS4_LOCK_STID; list_add(&stp->st_perfile, &fp->fi_stateids); list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids); stp->st_stateowner = &lo->lo_owner; @@ -4248,6 +4546,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, out: if (status && new_state) release_lockowner(lock_sop); + nfsd4_bump_seqid(cstate, status); if (!cstate->replay_owner) nfs4_unlock_state(); if (file_lock) @@ -4402,21 +4701,16 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, locku->lu_length); nfs4_transform_lock_offset(file_lock); - /* - * Try to unlock the file in the VFS. - */ err = vfs_lock_file(filp, F_SETLK, file_lock, NULL); if (err) { dprintk("NFSD: nfs4_locku: vfs_lock_file failed!\n"); goto out_nfserr; } - /* - * OK, unlock succeeded; the only thing left to do is update the stateid. - */ update_stateid(&stp->st_stid.sc_stateid); memcpy(&locku->lu_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); out: + nfsd4_bump_seqid(cstate, status); if (!cstate->replay_owner) nfs4_unlock_state(); if (file_lock) @@ -4440,7 +4734,7 @@ check_for_locks(struct nfs4_file *filp, struct nfs4_lockowner *lowner) struct inode *inode = filp->fi_inode; int status = 0; - lock_flocks(); + spin_lock(&inode->i_lock); for (flpp = &inode->i_flock; *flpp != NULL; flpp = &(*flpp)->fl_next) { if ((*flpp)->fl_owner == (fl_owner_t)lowner) { status = 1; @@ -4448,7 +4742,7 @@ check_for_locks(struct nfs4_file *filp, struct nfs4_lockowner *lowner) } } out: - unlock_flocks(); + spin_unlock(&inode->i_lock); return status; } @@ -4609,6 +4903,8 @@ nfs4_check_open_reclaim(clientid_t *clid, bool sessions, struct nfsd_net *nn) u64 nfsd_forget_client(struct nfs4_client *clp, u64 max) { + if (mark_client_expired(clp)) + return 0; expire_client(clp); return 1; } @@ -4695,6 +4991,7 @@ static u64 nfsd_find_all_delegations(struct nfs4_client *clp, u64 max, struct nfs4_delegation *dp, *next; u64 count = 0; + lockdep_assert_held(&state_lock); list_for_each_entry_safe(dp, next, &clp->cl_delegations, dl_perclnt) { if (victims) list_move(&dp->dl_recall_lru, victims); @@ -4710,12 +5007,12 @@ u64 nfsd_forget_client_delegations(struct nfs4_client *clp, u64 max) LIST_HEAD(victims); u64 count; - spin_lock(&recall_lock); + spin_lock(&state_lock); count = nfsd_find_all_delegations(clp, max, &victims); - spin_unlock(&recall_lock); + spin_unlock(&state_lock); list_for_each_entry_safe(dp, next, &victims, dl_recall_lru) - unhash_delegation(dp); + revoke_delegation(dp); return count; } @@ -4726,11 +5023,11 @@ u64 nfsd_recall_client_delegations(struct nfs4_client *clp, u64 max) LIST_HEAD(victims); u64 count; - spin_lock(&recall_lock); + spin_lock(&state_lock); count = nfsd_find_all_delegations(clp, max, &victims); list_for_each_entry_safe(dp, next, &victims, dl_recall_lru) nfsd_break_one_deleg(dp); - spin_unlock(&recall_lock); + spin_unlock(&state_lock); return count; } @@ -4739,9 +5036,9 @@ u64 nfsd_print_client_delegations(struct nfs4_client *clp, u64 max) { u64 count = 0; - spin_lock(&recall_lock); + spin_lock(&state_lock); count = nfsd_find_all_delegations(clp, max, NULL); - spin_unlock(&recall_lock); + spin_unlock(&state_lock); nfsd_print_count(clp, count, "delegations"); return count; @@ -4782,19 +5079,6 @@ struct nfs4_client *nfsd_find_client(struct sockaddr_storage *addr, size_t addr_ #endif /* CONFIG_NFSD_FAULT_INJECTION */ -/* initialization to perform at module load time: */ - -void -nfs4_state_init(void) -{ - int i; - - for (i = 0; i < FILE_HASH_SIZE; i++) { - INIT_LIST_HEAD(&file_hashtbl[i]); - } - INIT_LIST_HEAD(&del_recall_lru); -} - /* * Since the lifetime of a delegation isn't limited to that of an open, a * client may quite reasonably hang on to a delegation as long as it has @@ -4856,6 +5140,7 @@ static int nfs4_state_create_net(struct net *net) nn->unconf_name_tree = RB_ROOT; INIT_LIST_HEAD(&nn->client_lru); INIT_LIST_HEAD(&nn->close_lru); + INIT_LIST_HEAD(&nn->del_recall_lru); spin_lock_init(&nn->client_lock); INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main); @@ -4881,7 +5166,6 @@ nfs4_state_destroy_net(struct net *net) int i; struct nfs4_client *clp = NULL; struct nfsd_net *nn = net_generic(net, nfsd_net_id); - struct rb_node *node, *tmp; for (i = 0; i < CLIENT_HASH_SIZE; i++) { while (!list_empty(&nn->conf_id_hashtbl[i])) { @@ -4890,13 +5174,11 @@ nfs4_state_destroy_net(struct net *net) } } - node = rb_first(&nn->unconf_name_tree); - while (node != NULL) { - tmp = node; - node = rb_next(tmp); - clp = rb_entry(tmp, struct nfs4_client, cl_namenode); - rb_erase(tmp, &nn->unconf_name_tree); - destroy_client(clp); + for (i = 0; i < CLIENT_HASH_SIZE; i++) { + while (!list_empty(&nn->unconf_id_hashtbl[i])) { + clp = list_entry(nn->unconf_id_hashtbl[i].next, struct nfs4_client, cl_idhash); + destroy_client(clp); + } } kfree(nn->sessionid_hashtbl); @@ -4913,16 +5195,6 @@ nfs4_state_start_net(struct net *net) struct nfsd_net *nn = net_generic(net, nfsd_net_id); int ret; - /* - * FIXME: For now, we hang most of the pernet global stuff off of - * init_net until nfsd is fully containerized. Eventually, we'll - * need to pass a net pointer into this function, take a reference - * to that instead and then do most of the rest of this on a per-net - * basis. - */ - if (net != &init_net) - return -EINVAL; - ret = nfs4_state_create_net(net); if (ret) return ret; @@ -4965,7 +5237,6 @@ out_recovery: return ret; } -/* should be called with the state lock held */ void nfs4_state_shutdown_net(struct net *net) { @@ -4976,22 +5247,22 @@ nfs4_state_shutdown_net(struct net *net) cancel_delayed_work_sync(&nn->laundromat_work); locks_end_grace(&nn->nfsd4_manager); + nfs4_lock_state(); INIT_LIST_HEAD(&reaplist); - spin_lock(&recall_lock); - list_for_each_safe(pos, next, &del_recall_lru) { + spin_lock(&state_lock); + list_for_each_safe(pos, next, &nn->del_recall_lru) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); - if (dp->dl_stid.sc_client->net != net) - continue; list_move(&dp->dl_recall_lru, &reaplist); } - spin_unlock(&recall_lock); + spin_unlock(&state_lock); list_for_each_safe(pos, next, &reaplist) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); - unhash_delegation(dp); + destroy_delegation(dp); } nfsd4_client_tracking_exit(net); nfs4_state_destroy_net(net); + nfs4_unlock_state(); } void diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 0dc11586682..944275c8f56 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -55,6 +55,11 @@ #include "cache.h" #include "netns.h" +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL +#include <linux/security.h> +#endif + + #define NFSDDBG_FACILITY NFSDDBG_XDR /* @@ -93,16 +98,6 @@ xdr_error: \ status = nfserr_bad_xdr; \ goto out -#define READ32(x) (x) = ntohl(*p++) -#define READ64(x) do { \ - (x) = (u64)ntohl(*p++) << 32; \ - (x) |= ntohl(*p++); \ -} while (0) -#define READTIME(x) do { \ - p++; \ - (x) = ntohl(*p++); \ - p++; \ -} while (0) #define READMEM(x,nbytes) do { \ x = (char *)p; \ p += XDR_QUADLEN(nbytes); \ @@ -134,6 +129,19 @@ xdr_error: \ } \ } while (0) +static void next_decode_page(struct nfsd4_compoundargs *argp) +{ + argp->p = page_address(argp->pagelist[0]); + argp->pagelist++; + if (argp->pagelen < PAGE_SIZE) { + argp->end = argp->p + (argp->pagelen>>2); + argp->pagelen = 0; + } else { + argp->end = argp->p + (PAGE_SIZE>>2); + argp->pagelen -= PAGE_SIZE; + } +} + static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes) { /* We want more bytes than seem to be available. @@ -161,16 +169,7 @@ static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes) * guarantee p points to at least nbytes bytes. */ memcpy(p, argp->p, avail); - /* step to next page */ - argp->p = page_address(argp->pagelist[0]); - argp->pagelist++; - if (argp->pagelen < PAGE_SIZE) { - argp->end = argp->p + (argp->pagelen>>2); - argp->pagelen = 0; - } else { - argp->end = argp->p + (PAGE_SIZE>>2); - argp->pagelen -= PAGE_SIZE; - } + next_decode_page(argp); memcpy(((char*)p)+avail, argp->p, (nbytes - avail)); argp->p += XDR_QUADLEN(nbytes - avail); return p; @@ -181,6 +180,15 @@ static int zero_clientid(clientid_t *clid) return (clid->cl_boot == 0) && (clid->cl_id == 0); } +/** + * defer_free - mark an allocation as deferred freed + * @argp: NFSv4 compound argument structure to be freed with + * @release: release callback to free @p, typically kfree() + * @p: pointer to be freed + * + * Marks @p to be freed when processing the compound operation + * described in @argp finishes. + */ static int defer_free(struct nfsd4_compoundargs *argp, void (*release)(const void *), void *p) @@ -197,6 +205,16 @@ defer_free(struct nfsd4_compoundargs *argp, return 0; } +/** + * savemem - duplicate a chunk of memory for later processing + * @argp: NFSv4 compound argument structure to be freed with + * @p: pointer to be duplicated + * @nbytes: length to be duplicated + * + * Returns a pointer to a copy of @nbytes bytes of memory at @p + * that are preserved until processing of the NFSv4 compound + * operation described by @argp finishes. + */ static char *savemem(struct nfsd4_compoundargs *argp, __be32 *p, int nbytes) { if (p == argp->tmp) { @@ -225,29 +243,30 @@ nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval) bmval[2] = 0; READ_BUF(4); - READ32(bmlen); + bmlen = be32_to_cpup(p++); if (bmlen > 1000) goto xdr_error; READ_BUF(bmlen << 2); if (bmlen > 0) - READ32(bmval[0]); + bmval[0] = be32_to_cpup(p++); if (bmlen > 1) - READ32(bmval[1]); + bmval[1] = be32_to_cpup(p++); if (bmlen > 2) - READ32(bmval[2]); + bmval[2] = be32_to_cpup(p++); DECODE_TAIL; } static __be32 nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, - struct iattr *iattr, struct nfs4_acl **acl) + struct iattr *iattr, struct nfs4_acl **acl, + struct xdr_netobj *label) { int expected_len, len = 0; u32 dummy32; + u64 sec; char *buf; - int host_err; DECODE_HEAD; iattr->ia_valid = 0; @@ -255,51 +274,50 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, return status; READ_BUF(4); - READ32(expected_len); + expected_len = be32_to_cpup(p++); if (bmval[0] & FATTR4_WORD0_SIZE) { READ_BUF(8); len += 8; - READ64(iattr->ia_size); + p = xdr_decode_hyper(p, &iattr->ia_size); iattr->ia_valid |= ATTR_SIZE; } if (bmval[0] & FATTR4_WORD0_ACL) { - int nace; + u32 nace; struct nfs4_ace *ace; READ_BUF(4); len += 4; - READ32(nace); + nace = be32_to_cpup(p++); if (nace > NFS4_ACL_MAX) - return nfserr_resource; + return nfserr_fbig; *acl = nfs4_acl_new(nace); - if (*acl == NULL) { - host_err = -ENOMEM; - goto out_nfserr; - } + if (*acl == NULL) + return nfserr_jukebox; + defer_free(argp, kfree, *acl); (*acl)->naces = nace; for (ace = (*acl)->aces; ace < (*acl)->aces + nace; ace++) { READ_BUF(16); len += 16; - READ32(ace->type); - READ32(ace->flag); - READ32(ace->access_mask); - READ32(dummy32); + ace->type = be32_to_cpup(p++); + ace->flag = be32_to_cpup(p++); + ace->access_mask = be32_to_cpup(p++); + dummy32 = be32_to_cpup(p++); READ_BUF(dummy32); len += XDR_QUADLEN(dummy32) << 2; READMEM(buf, dummy32); ace->whotype = nfs4_acl_get_whotype(buf, dummy32); status = nfs_ok; if (ace->whotype != NFS4_ACL_WHO_NAMED) - ace->who = 0; + ; else if (ace->flag & NFS4_ACE_IDENTIFIER_GROUP) status = nfsd_map_name_to_gid(argp->rqstp, - buf, dummy32, &ace->who); + buf, dummy32, &ace->who_gid); else status = nfsd_map_name_to_uid(argp->rqstp, - buf, dummy32, &ace->who); + buf, dummy32, &ace->who_uid); if (status) return status; } @@ -308,14 +326,14 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, if (bmval[1] & FATTR4_WORD1_MODE) { READ_BUF(4); len += 4; - READ32(iattr->ia_mode); + iattr->ia_mode = be32_to_cpup(p++); iattr->ia_mode &= (S_IFMT | S_IALLUGO); iattr->ia_valid |= ATTR_MODE; } if (bmval[1] & FATTR4_WORD1_OWNER) { READ_BUF(4); len += 4; - READ32(dummy32); + dummy32 = be32_to_cpup(p++); READ_BUF(dummy32); len += (XDR_QUADLEN(dummy32) << 2); READMEM(buf, dummy32); @@ -326,7 +344,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, if (bmval[1] & FATTR4_WORD1_OWNER_GROUP) { READ_BUF(4); len += 4; - READ32(dummy32); + dummy32 = be32_to_cpup(p++); READ_BUF(dummy32); len += (XDR_QUADLEN(dummy32) << 2); READMEM(buf, dummy32); @@ -337,18 +355,16 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, if (bmval[1] & FATTR4_WORD1_TIME_ACCESS_SET) { READ_BUF(4); len += 4; - READ32(dummy32); + dummy32 = be32_to_cpup(p++); switch (dummy32) { case NFS4_SET_TO_CLIENT_TIME: /* We require the high 32 bits of 'seconds' to be 0, and we ignore all 32 bits of 'nseconds'. */ READ_BUF(12); len += 12; - READ32(dummy32); - if (dummy32) - return nfserr_inval; - READ32(iattr->ia_atime.tv_sec); - READ32(iattr->ia_atime.tv_nsec); + p = xdr_decode_hyper(p, &sec); + iattr->ia_atime.tv_sec = (time_t)sec; + iattr->ia_atime.tv_nsec = be32_to_cpup(p++); if (iattr->ia_atime.tv_nsec >= (u32)1000000000) return nfserr_inval; iattr->ia_valid |= (ATTR_ATIME | ATTR_ATIME_SET); @@ -363,18 +379,16 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, if (bmval[1] & FATTR4_WORD1_TIME_MODIFY_SET) { READ_BUF(4); len += 4; - READ32(dummy32); + dummy32 = be32_to_cpup(p++); switch (dummy32) { case NFS4_SET_TO_CLIENT_TIME: /* We require the high 32 bits of 'seconds' to be 0, and we ignore all 32 bits of 'nseconds'. */ READ_BUF(12); len += 12; - READ32(dummy32); - if (dummy32) - return nfserr_inval; - READ32(iattr->ia_mtime.tv_sec); - READ32(iattr->ia_mtime.tv_nsec); + p = xdr_decode_hyper(p, &sec); + iattr->ia_mtime.tv_sec = sec; + iattr->ia_mtime.tv_nsec = be32_to_cpup(p++); if (iattr->ia_mtime.tv_nsec >= (u32)1000000000) return nfserr_inval; iattr->ia_valid |= (ATTR_MTIME | ATTR_MTIME_SET); @@ -386,6 +400,33 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, goto xdr_error; } } + + label->len = 0; +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL + if (bmval[2] & FATTR4_WORD2_SECURITY_LABEL) { + READ_BUF(4); + len += 4; + dummy32 = be32_to_cpup(p++); /* lfs: we don't use it */ + READ_BUF(4); + len += 4; + dummy32 = be32_to_cpup(p++); /* pi: we don't use it either */ + READ_BUF(4); + len += 4; + dummy32 = be32_to_cpup(p++); + READ_BUF(dummy32); + if (dummy32 > NFSD4_MAX_SEC_LABEL_LEN) + return nfserr_badlabel; + len += (XDR_QUADLEN(dummy32) << 2); + READMEM(buf, dummy32); + label->data = kzalloc(dummy32 + 1, GFP_KERNEL); + if (!label->data) + return nfserr_jukebox; + label->len = dummy32; + defer_free(argp, kfree, label->data); + memcpy(label->data, buf, dummy32); + } +#endif + if (bmval[0] & ~NFSD_WRITEABLE_ATTRS_WORD0 || bmval[1] & ~NFSD_WRITEABLE_ATTRS_WORD1 || bmval[2] & ~NFSD_WRITEABLE_ATTRS_WORD2) @@ -394,10 +435,6 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, goto xdr_error; DECODE_TAIL; - -out_nfserr: - status = nfserrno(host_err); - goto out; } static __be32 @@ -406,7 +443,7 @@ nfsd4_decode_stateid(struct nfsd4_compoundargs *argp, stateid_t *sid) DECODE_HEAD; READ_BUF(sizeof(stateid_t)); - READ32(sid->si_generation); + sid->si_generation = be32_to_cpup(p++); COPYMEM(&sid->si_opaque, sizeof(stateid_opaque_t)); DECODE_TAIL; @@ -418,7 +455,7 @@ nfsd4_decode_access(struct nfsd4_compoundargs *argp, struct nfsd4_access *access DECODE_HEAD; READ_BUF(4); - READ32(access->ac_req_access); + access->ac_req_access = be32_to_cpup(p++); DECODE_TAIL; } @@ -433,11 +470,15 @@ static __be32 nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_ /* callback_sec_params4 */ READ_BUF(4); - READ32(nr_secflavs); - cbs->flavor = (u32)(-1); + nr_secflavs = be32_to_cpup(p++); + if (nr_secflavs) + cbs->flavor = (u32)(-1); + else + /* Is this legal? Be generous, take it to mean AUTH_NONE: */ + cbs->flavor = 0; for (i = 0; i < nr_secflavs; ++i) { READ_BUF(4); - READ32(dummy); + dummy = be32_to_cpup(p++); switch (dummy) { case RPC_AUTH_NULL: /* Nothing to read */ @@ -447,26 +488,33 @@ static __be32 nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_ case RPC_AUTH_UNIX: READ_BUF(8); /* stamp */ - READ32(dummy); + dummy = be32_to_cpup(p++); /* machine name */ - READ32(dummy); + dummy = be32_to_cpup(p++); READ_BUF(dummy); SAVEMEM(machine_name, dummy); /* uid, gid */ READ_BUF(8); - READ32(uid); - READ32(gid); + uid = be32_to_cpup(p++); + gid = be32_to_cpup(p++); /* more gids */ READ_BUF(4); - READ32(dummy); + dummy = be32_to_cpup(p++); READ_BUF(dummy * 4); if (cbs->flavor == (u32)(-1)) { - cbs->uid = uid; - cbs->gid = gid; - cbs->flavor = RPC_AUTH_UNIX; + kuid_t kuid = make_kuid(&init_user_ns, uid); + kgid_t kgid = make_kgid(&init_user_ns, gid); + if (uid_valid(kuid) && gid_valid(kgid)) { + cbs->uid = kuid; + cbs->gid = kgid; + cbs->flavor = RPC_AUTH_UNIX; + } else { + dprintk("RPC_AUTH_UNIX with invalid" + "uid or gid ignoring!\n"); + } } break; case RPC_AUTH_GSS: @@ -474,14 +522,14 @@ static __be32 nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_ "not supported!\n"); READ_BUF(8); /* gcbp_service */ - READ32(dummy); + dummy = be32_to_cpup(p++); /* gcbp_handle_from_server */ - READ32(dummy); + dummy = be32_to_cpup(p++); READ_BUF(dummy); p += XDR_QUADLEN(dummy); /* gcbp_handle_from_client */ READ_BUF(4); - READ32(dummy); + dummy = be32_to_cpup(p++); READ_BUF(dummy); break; default: @@ -497,7 +545,7 @@ static __be32 nfsd4_decode_backchannel_ctl(struct nfsd4_compoundargs *argp, stru DECODE_HEAD; READ_BUF(4); - READ32(bc->bc_cb_program); + bc->bc_cb_program = be32_to_cpup(p++); nfsd4_decode_cb_sec(argp, &bc->bc_cb_sec); DECODE_TAIL; @@ -509,7 +557,7 @@ static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp, READ_BUF(NFS4_MAX_SESSIONID_LEN + 8); COPYMEM(bcts->sessionid.data, NFS4_MAX_SESSIONID_LEN); - READ32(bcts->dir); + bcts->dir = be32_to_cpup(p++); /* XXX: skipping ctsa_use_conn_in_rdma_mode. Perhaps Tom Tucker * could help us figure out we should be using it. */ DECODE_TAIL; @@ -521,7 +569,7 @@ nfsd4_decode_close(struct nfsd4_compoundargs *argp, struct nfsd4_close *close) DECODE_HEAD; READ_BUF(4); - READ32(close->cl_seqid); + close->cl_seqid = be32_to_cpup(p++); return nfsd4_decode_stateid(argp, &close->cl_stateid); DECODE_TAIL; @@ -534,8 +582,8 @@ nfsd4_decode_commit(struct nfsd4_compoundargs *argp, struct nfsd4_commit *commit DECODE_HEAD; READ_BUF(12); - READ64(commit->co_offset); - READ32(commit->co_count); + p = xdr_decode_hyper(p, &commit->co_offset); + commit->co_count = be32_to_cpup(p++); DECODE_TAIL; } @@ -546,19 +594,30 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create DECODE_HEAD; READ_BUF(4); - READ32(create->cr_type); + create->cr_type = be32_to_cpup(p++); switch (create->cr_type) { case NF4LNK: READ_BUF(4); - READ32(create->cr_linklen); + create->cr_linklen = be32_to_cpup(p++); READ_BUF(create->cr_linklen); - SAVEMEM(create->cr_linkname, create->cr_linklen); + /* + * The VFS will want a null-terminated string, and + * null-terminating in place isn't safe since this might + * end on a page boundary: + */ + create->cr_linkname = + kmalloc(create->cr_linklen + 1, GFP_KERNEL); + if (!create->cr_linkname) + return nfserr_jukebox; + memcpy(create->cr_linkname, p, create->cr_linklen); + create->cr_linkname[create->cr_linklen] = '\0'; + defer_free(argp, kfree, create->cr_linkname); break; case NF4BLK: case NF4CHR: READ_BUF(8); - READ32(create->cr_specdata1); - READ32(create->cr_specdata2); + create->cr_specdata1 = be32_to_cpup(p++); + create->cr_specdata2 = be32_to_cpup(p++); break; case NF4SOCK: case NF4FIFO: @@ -568,14 +627,14 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create } READ_BUF(4); - READ32(create->cr_namelen); + create->cr_namelen = be32_to_cpup(p++); READ_BUF(create->cr_namelen); SAVEMEM(create->cr_name, create->cr_namelen); if ((status = check_filename(create->cr_name, create->cr_namelen))) return status; status = nfsd4_decode_fattr(argp, create->cr_bmval, &create->cr_iattr, - &create->cr_acl); + &create->cr_acl, &create->cr_label); if (status) goto out; @@ -600,7 +659,7 @@ nfsd4_decode_link(struct nfsd4_compoundargs *argp, struct nfsd4_link *link) DECODE_HEAD; READ_BUF(4); - READ32(link->li_namelen); + link->li_namelen = be32_to_cpup(p++); READ_BUF(link->li_namelen); SAVEMEM(link->li_name, link->li_namelen); if ((status = check_filename(link->li_name, link->li_namelen))) @@ -618,24 +677,24 @@ nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock) * type, reclaim(boolean), offset, length, new_lock_owner(boolean) */ READ_BUF(28); - READ32(lock->lk_type); + lock->lk_type = be32_to_cpup(p++); if ((lock->lk_type < NFS4_READ_LT) || (lock->lk_type > NFS4_WRITEW_LT)) goto xdr_error; - READ32(lock->lk_reclaim); - READ64(lock->lk_offset); - READ64(lock->lk_length); - READ32(lock->lk_is_new); + lock->lk_reclaim = be32_to_cpup(p++); + p = xdr_decode_hyper(p, &lock->lk_offset); + p = xdr_decode_hyper(p, &lock->lk_length); + lock->lk_is_new = be32_to_cpup(p++); if (lock->lk_is_new) { READ_BUF(4); - READ32(lock->lk_new_open_seqid); + lock->lk_new_open_seqid = be32_to_cpup(p++); status = nfsd4_decode_stateid(argp, &lock->lk_new_open_stateid); if (status) return status; READ_BUF(8 + sizeof(clientid_t)); - READ32(lock->lk_new_lock_seqid); + lock->lk_new_lock_seqid = be32_to_cpup(p++); COPYMEM(&lock->lk_new_clientid, sizeof(clientid_t)); - READ32(lock->lk_new_owner.len); + lock->lk_new_owner.len = be32_to_cpup(p++); READ_BUF(lock->lk_new_owner.len); READMEM(lock->lk_new_owner.data, lock->lk_new_owner.len); } else { @@ -643,7 +702,7 @@ nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock) if (status) return status; READ_BUF(4); - READ32(lock->lk_old_lock_seqid); + lock->lk_old_lock_seqid = be32_to_cpup(p++); } DECODE_TAIL; @@ -655,13 +714,13 @@ nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, struct nfsd4_lockt *lockt) DECODE_HEAD; READ_BUF(32); - READ32(lockt->lt_type); + lockt->lt_type = be32_to_cpup(p++); if((lockt->lt_type < NFS4_READ_LT) || (lockt->lt_type > NFS4_WRITEW_LT)) goto xdr_error; - READ64(lockt->lt_offset); - READ64(lockt->lt_length); + p = xdr_decode_hyper(p, &lockt->lt_offset); + p = xdr_decode_hyper(p, &lockt->lt_length); COPYMEM(&lockt->lt_clientid, 8); - READ32(lockt->lt_owner.len); + lockt->lt_owner.len = be32_to_cpup(p++); READ_BUF(lockt->lt_owner.len); READMEM(lockt->lt_owner.data, lockt->lt_owner.len); @@ -674,16 +733,16 @@ nfsd4_decode_locku(struct nfsd4_compoundargs *argp, struct nfsd4_locku *locku) DECODE_HEAD; READ_BUF(8); - READ32(locku->lu_type); + locku->lu_type = be32_to_cpup(p++); if ((locku->lu_type < NFS4_READ_LT) || (locku->lu_type > NFS4_WRITEW_LT)) goto xdr_error; - READ32(locku->lu_seqid); + locku->lu_seqid = be32_to_cpup(p++); status = nfsd4_decode_stateid(argp, &locku->lu_stateid); if (status) return status; READ_BUF(16); - READ64(locku->lu_offset); - READ64(locku->lu_length); + p = xdr_decode_hyper(p, &locku->lu_offset); + p = xdr_decode_hyper(p, &locku->lu_length); DECODE_TAIL; } @@ -694,7 +753,7 @@ nfsd4_decode_lookup(struct nfsd4_compoundargs *argp, struct nfsd4_lookup *lookup DECODE_HEAD; READ_BUF(4); - READ32(lookup->lo_len); + lookup->lo_len = be32_to_cpup(p++); READ_BUF(lookup->lo_len); SAVEMEM(lookup->lo_name, lookup->lo_len); if ((status = check_filename(lookup->lo_name, lookup->lo_len))) @@ -709,7 +768,7 @@ static __be32 nfsd4_decode_share_access(struct nfsd4_compoundargs *argp, u32 *sh u32 w; READ_BUF(4); - READ32(w); + w = be32_to_cpup(p++); *share_access = w & NFS4_SHARE_ACCESS_MASK; *deleg_want = w & NFS4_SHARE_WANT_MASK; if (deleg_when) @@ -761,7 +820,7 @@ static __be32 nfsd4_decode_share_deny(struct nfsd4_compoundargs *argp, u32 *x) __be32 *p; READ_BUF(4); - READ32(*x); + *x = be32_to_cpup(p++); /* Note: unlinke access bits, deny bits may be zero. */ if (*x & ~NFS4_SHARE_DENY_BOTH) return nfserr_bad_xdr; @@ -775,7 +834,7 @@ static __be32 nfsd4_decode_opaque(struct nfsd4_compoundargs *argp, struct xdr_ne __be32 *p; READ_BUF(4); - READ32(o->len); + o->len = be32_to_cpup(p++); if (o->len == 0 || o->len > NFS4_OPAQUE_LIMIT) return nfserr_bad_xdr; @@ -797,9 +856,10 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) open->op_iattr.ia_valid = 0; open->op_openowner = NULL; + open->op_xdr_error = 0; /* seqid, share_access, share_deny, clientid, ownerlen */ READ_BUF(4); - READ32(open->op_seqid); + open->op_seqid = be32_to_cpup(p++); /* decode, yet ignore deleg_when until supported */ status = nfsd4_decode_share_access(argp, &open->op_share_access, &open->op_deleg_want, &dummy); @@ -814,18 +874,18 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) if (status) goto xdr_error; READ_BUF(4); - READ32(open->op_create); + open->op_create = be32_to_cpup(p++); switch (open->op_create) { case NFS4_OPEN_NOCREATE: break; case NFS4_OPEN_CREATE: READ_BUF(4); - READ32(open->op_createmode); + open->op_createmode = be32_to_cpup(p++); switch (open->op_createmode) { case NFS4_CREATE_UNCHECKED: case NFS4_CREATE_GUARDED: status = nfsd4_decode_fattr(argp, open->op_bmval, - &open->op_iattr, &open->op_acl); + &open->op_iattr, &open->op_acl, &open->op_label); if (status) goto out; break; @@ -839,7 +899,7 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) READ_BUF(NFS4_VERIFIER_SIZE); COPYMEM(open->op_verf.data, NFS4_VERIFIER_SIZE); status = nfsd4_decode_fattr(argp, open->op_bmval, - &open->op_iattr, &open->op_acl); + &open->op_iattr, &open->op_acl, &open->op_label); if (status) goto out; break; @@ -853,12 +913,12 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) /* open_claim */ READ_BUF(4); - READ32(open->op_claim_type); + open->op_claim_type = be32_to_cpup(p++); switch (open->op_claim_type) { case NFS4_OPEN_CLAIM_NULL: case NFS4_OPEN_CLAIM_DELEGATE_PREV: READ_BUF(4); - READ32(open->op_fname.len); + open->op_fname.len = be32_to_cpup(p++); READ_BUF(open->op_fname.len); SAVEMEM(open->op_fname.data, open->op_fname.len); if ((status = check_filename(open->op_fname.data, open->op_fname.len))) @@ -866,14 +926,14 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) break; case NFS4_OPEN_CLAIM_PREVIOUS: READ_BUF(4); - READ32(open->op_delegate_type); + open->op_delegate_type = be32_to_cpup(p++); break; case NFS4_OPEN_CLAIM_DELEGATE_CUR: status = nfsd4_decode_stateid(argp, &open->op_delegate_stateid); if (status) return status; READ_BUF(4); - READ32(open->op_fname.len); + open->op_fname.len = be32_to_cpup(p++); READ_BUF(open->op_fname.len); SAVEMEM(open->op_fname.data, open->op_fname.len); if ((status = check_filename(open->op_fname.data, open->op_fname.len))) @@ -903,13 +963,16 @@ static __be32 nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_open_confirm *open_conf) { DECODE_HEAD; - + + if (argp->minorversion >= 1) + return nfserr_notsupp; + status = nfsd4_decode_stateid(argp, &open_conf->oc_req_stateid); if (status) return status; READ_BUF(4); - READ32(open_conf->oc_seqid); - + open_conf->oc_seqid = be32_to_cpup(p++); + DECODE_TAIL; } @@ -922,7 +985,7 @@ nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, struct nfsd4_open_d if (status) return status; READ_BUF(4); - READ32(open_down->od_seqid); + open_down->od_seqid = be32_to_cpup(p++); status = nfsd4_decode_share_access(argp, &open_down->od_share_access, &open_down->od_deleg_want, NULL); if (status) @@ -939,7 +1002,7 @@ nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, struct nfsd4_putfh *putfh) DECODE_HEAD; READ_BUF(4); - READ32(putfh->pf_fhlen); + putfh->pf_fhlen = be32_to_cpup(p++); if (putfh->pf_fhlen > NFS4_FHSIZE) goto xdr_error; READ_BUF(putfh->pf_fhlen); @@ -949,6 +1012,14 @@ nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, struct nfsd4_putfh *putfh) } static __be32 +nfsd4_decode_putpubfh(struct nfsd4_compoundargs *argp, void *p) +{ + if (argp->minorversion == 0) + return nfs_ok; + return nfserr_notsupp; +} + +static __be32 nfsd4_decode_read(struct nfsd4_compoundargs *argp, struct nfsd4_read *read) { DECODE_HEAD; @@ -957,8 +1028,8 @@ nfsd4_decode_read(struct nfsd4_compoundargs *argp, struct nfsd4_read *read) if (status) return status; READ_BUF(12); - READ64(read->rd_offset); - READ32(read->rd_length); + p = xdr_decode_hyper(p, &read->rd_offset); + read->rd_length = be32_to_cpup(p++); DECODE_TAIL; } @@ -969,10 +1040,10 @@ nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, struct nfsd4_readdir *read DECODE_HEAD; READ_BUF(24); - READ64(readdir->rd_cookie); + p = xdr_decode_hyper(p, &readdir->rd_cookie); COPYMEM(readdir->rd_verf.data, sizeof(readdir->rd_verf.data)); - READ32(readdir->rd_dircount); /* just in case you needed a useless field... */ - READ32(readdir->rd_maxcount); + readdir->rd_dircount = be32_to_cpup(p++); + readdir->rd_maxcount = be32_to_cpup(p++); if ((status = nfsd4_decode_bitmap(argp, readdir->rd_bmval))) goto out; @@ -985,7 +1056,7 @@ nfsd4_decode_remove(struct nfsd4_compoundargs *argp, struct nfsd4_remove *remove DECODE_HEAD; READ_BUF(4); - READ32(remove->rm_namelen); + remove->rm_namelen = be32_to_cpup(p++); READ_BUF(remove->rm_namelen); SAVEMEM(remove->rm_name, remove->rm_namelen); if ((status = check_filename(remove->rm_name, remove->rm_namelen))) @@ -1000,10 +1071,10 @@ nfsd4_decode_rename(struct nfsd4_compoundargs *argp, struct nfsd4_rename *rename DECODE_HEAD; READ_BUF(4); - READ32(rename->rn_snamelen); + rename->rn_snamelen = be32_to_cpup(p++); READ_BUF(rename->rn_snamelen + 4); SAVEMEM(rename->rn_sname, rename->rn_snamelen); - READ32(rename->rn_tnamelen); + rename->rn_tnamelen = be32_to_cpup(p++); READ_BUF(rename->rn_tnamelen); SAVEMEM(rename->rn_tname, rename->rn_tnamelen); if ((status = check_filename(rename->rn_sname, rename->rn_snamelen))) @@ -1019,6 +1090,9 @@ nfsd4_decode_renew(struct nfsd4_compoundargs *argp, clientid_t *clientid) { DECODE_HEAD; + if (argp->minorversion >= 1) + return nfserr_notsupp; + READ_BUF(sizeof(clientid_t)); COPYMEM(clientid, sizeof(clientid_t)); @@ -1032,7 +1106,7 @@ nfsd4_decode_secinfo(struct nfsd4_compoundargs *argp, DECODE_HEAD; READ_BUF(4); - READ32(secinfo->si_namelen); + secinfo->si_namelen = be32_to_cpup(p++); READ_BUF(secinfo->si_namelen); SAVEMEM(secinfo->si_name, secinfo->si_namelen); status = check_filename(secinfo->si_name, secinfo->si_namelen); @@ -1048,7 +1122,7 @@ nfsd4_decode_secinfo_no_name(struct nfsd4_compoundargs *argp, DECODE_HEAD; READ_BUF(4); - READ32(sin->sin_style); + sin->sin_style = be32_to_cpup(p++); DECODE_TAIL; } @@ -1061,7 +1135,7 @@ nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *seta if (status) return status; return nfsd4_decode_fattr(argp, setattr->sa_bmval, &setattr->sa_iattr, - &setattr->sa_acl); + &setattr->sa_acl, &setattr->sa_label); } static __be32 @@ -1069,6 +1143,9 @@ nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclient { DECODE_HEAD; + if (argp->minorversion >= 1) + return nfserr_notsupp; + READ_BUF(NFS4_VERIFIER_SIZE); COPYMEM(setclientid->se_verf.data, NFS4_VERIFIER_SIZE); @@ -1076,16 +1153,16 @@ nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclient if (status) return nfserr_bad_xdr; READ_BUF(8); - READ32(setclientid->se_callback_prog); - READ32(setclientid->se_callback_netid_len); + setclientid->se_callback_prog = be32_to_cpup(p++); + setclientid->se_callback_netid_len = be32_to_cpup(p++); READ_BUF(setclientid->se_callback_netid_len + 4); SAVEMEM(setclientid->se_callback_netid_val, setclientid->se_callback_netid_len); - READ32(setclientid->se_callback_addr_len); + setclientid->se_callback_addr_len = be32_to_cpup(p++); READ_BUF(setclientid->se_callback_addr_len + 4); SAVEMEM(setclientid->se_callback_addr_val, setclientid->se_callback_addr_len); - READ32(setclientid->se_callback_ident); + setclientid->se_callback_ident = be32_to_cpup(p++); DECODE_TAIL; } @@ -1095,6 +1172,9 @@ nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_s { DECODE_HEAD; + if (argp->minorversion >= 1) + return nfserr_notsupp; + READ_BUF(8 + NFS4_VERIFIER_SIZE); COPYMEM(&scd_c->sc_clientid, 8); COPYMEM(&scd_c->sc_confirm, NFS4_VERIFIER_SIZE); @@ -1115,7 +1195,7 @@ nfsd4_decode_verify(struct nfsd4_compoundargs *argp, struct nfsd4_verify *verify * nfsd4_proc_verify */ READ_BUF(4); - READ32(verify->ve_attrlen); + verify->ve_attrlen = be32_to_cpup(p++); READ_BUF(verify->ve_attrlen); SAVEMEM(verify->ve_attrval, verify->ve_attrlen); @@ -1133,11 +1213,11 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write) if (status) return status; READ_BUF(16); - READ64(write->wr_offset); - READ32(write->wr_stable_how); + p = xdr_decode_hyper(p, &write->wr_offset); + write->wr_stable_how = be32_to_cpup(p++); if (write->wr_stable_how > 2) goto xdr_error; - READ32(write->wr_buflen); + write->wr_buflen = be32_to_cpup(p++); /* Sorry .. no magic macros for this.. * * READ_BUF(write->wr_buflen); @@ -1151,7 +1231,6 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write) } write->wr_head.iov_base = p; write->wr_head.iov_len = avail; - WARN_ON(avail != (XDR_QUADLEN(avail) << 2)); write->wr_pagelist = argp->pagelist; len = XDR_QUADLEN(write->wr_buflen) << 2; @@ -1166,6 +1245,7 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write) len -= pages * PAGE_SIZE; argp->p = (__be32 *)page_address(argp->pagelist[0]); + argp->pagelist++; argp->end = argp->p + XDR_QUADLEN(PAGE_SIZE); } argp->p += XDR_QUADLEN(len); @@ -1178,9 +1258,12 @@ nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_rel { DECODE_HEAD; + if (argp->minorversion >= 1) + return nfserr_notsupp; + READ_BUF(12); COPYMEM(&rlockowner->rl_clientid, sizeof(clientid_t)); - READ32(rlockowner->rl_owner.len); + rlockowner->rl_owner.len = be32_to_cpup(p++); READ_BUF(rlockowner->rl_owner.len); READMEM(rlockowner->rl_owner.data, rlockowner->rl_owner.len); @@ -1204,63 +1287,63 @@ nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp, return nfserr_bad_xdr; READ_BUF(4); - READ32(exid->flags); + exid->flags = be32_to_cpup(p++); /* Ignore state_protect4_a */ READ_BUF(4); - READ32(exid->spa_how); + exid->spa_how = be32_to_cpup(p++); switch (exid->spa_how) { case SP4_NONE: break; case SP4_MACH_CRED: /* spo_must_enforce */ READ_BUF(4); - READ32(dummy); + dummy = be32_to_cpup(p++); READ_BUF(dummy * 4); p += dummy; /* spo_must_allow */ READ_BUF(4); - READ32(dummy); + dummy = be32_to_cpup(p++); READ_BUF(dummy * 4); p += dummy; break; case SP4_SSV: /* ssp_ops */ READ_BUF(4); - READ32(dummy); + dummy = be32_to_cpup(p++); READ_BUF(dummy * 4); p += dummy; READ_BUF(4); - READ32(dummy); + dummy = be32_to_cpup(p++); READ_BUF(dummy * 4); p += dummy; /* ssp_hash_algs<> */ READ_BUF(4); - READ32(tmp); + tmp = be32_to_cpup(p++); while (tmp--) { READ_BUF(4); - READ32(dummy); + dummy = be32_to_cpup(p++); READ_BUF(dummy); p += XDR_QUADLEN(dummy); } /* ssp_encr_algs<> */ READ_BUF(4); - READ32(tmp); + tmp = be32_to_cpup(p++); while (tmp--) { READ_BUF(4); - READ32(dummy); + dummy = be32_to_cpup(p++); READ_BUF(dummy); p += XDR_QUADLEN(dummy); } /* ssp_window and ssp_num_gss_handles */ READ_BUF(8); - READ32(dummy); - READ32(dummy); + dummy = be32_to_cpup(p++); + dummy = be32_to_cpup(p++); break; default: goto xdr_error; @@ -1268,7 +1351,7 @@ nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp, /* Ignore Implementation ID */ READ_BUF(4); /* nfs_impl_id4 array length */ - READ32(dummy); + dummy = be32_to_cpup(p++); if (dummy > 1) goto xdr_error; @@ -1276,13 +1359,13 @@ nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp, if (dummy == 1) { /* nii_domain */ READ_BUF(4); - READ32(dummy); + dummy = be32_to_cpup(p++); READ_BUF(dummy); p += XDR_QUADLEN(dummy); /* nii_name */ READ_BUF(4); - READ32(dummy); + dummy = be32_to_cpup(p++); READ_BUF(dummy); p += XDR_QUADLEN(dummy); @@ -1302,21 +1385,21 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp, READ_BUF(16); COPYMEM(&sess->clientid, 8); - READ32(sess->seqid); - READ32(sess->flags); + sess->seqid = be32_to_cpup(p++); + sess->flags = be32_to_cpup(p++); /* Fore channel attrs */ READ_BUF(28); - READ32(dummy); /* headerpadsz is always 0 */ - READ32(sess->fore_channel.maxreq_sz); - READ32(sess->fore_channel.maxresp_sz); - READ32(sess->fore_channel.maxresp_cached); - READ32(sess->fore_channel.maxops); - READ32(sess->fore_channel.maxreqs); - READ32(sess->fore_channel.nr_rdma_attrs); + dummy = be32_to_cpup(p++); /* headerpadsz is always 0 */ + sess->fore_channel.maxreq_sz = be32_to_cpup(p++); + sess->fore_channel.maxresp_sz = be32_to_cpup(p++); + sess->fore_channel.maxresp_cached = be32_to_cpup(p++); + sess->fore_channel.maxops = be32_to_cpup(p++); + sess->fore_channel.maxreqs = be32_to_cpup(p++); + sess->fore_channel.nr_rdma_attrs = be32_to_cpup(p++); if (sess->fore_channel.nr_rdma_attrs == 1) { READ_BUF(4); - READ32(sess->fore_channel.rdma_attrs); + sess->fore_channel.rdma_attrs = be32_to_cpup(p++); } else if (sess->fore_channel.nr_rdma_attrs > 1) { dprintk("Too many fore channel attr bitmaps!\n"); goto xdr_error; @@ -1324,23 +1407,23 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp, /* Back channel attrs */ READ_BUF(28); - READ32(dummy); /* headerpadsz is always 0 */ - READ32(sess->back_channel.maxreq_sz); - READ32(sess->back_channel.maxresp_sz); - READ32(sess->back_channel.maxresp_cached); - READ32(sess->back_channel.maxops); - READ32(sess->back_channel.maxreqs); - READ32(sess->back_channel.nr_rdma_attrs); + dummy = be32_to_cpup(p++); /* headerpadsz is always 0 */ + sess->back_channel.maxreq_sz = be32_to_cpup(p++); + sess->back_channel.maxresp_sz = be32_to_cpup(p++); + sess->back_channel.maxresp_cached = be32_to_cpup(p++); + sess->back_channel.maxops = be32_to_cpup(p++); + sess->back_channel.maxreqs = be32_to_cpup(p++); + sess->back_channel.nr_rdma_attrs = be32_to_cpup(p++); if (sess->back_channel.nr_rdma_attrs == 1) { READ_BUF(4); - READ32(sess->back_channel.rdma_attrs); + sess->back_channel.rdma_attrs = be32_to_cpup(p++); } else if (sess->back_channel.nr_rdma_attrs > 1) { dprintk("Too many back channel attr bitmaps!\n"); goto xdr_error; } READ_BUF(4); - READ32(sess->callback_prog); + sess->callback_prog = be32_to_cpup(p++); nfsd4_decode_cb_sec(argp, &sess->cb_sec); DECODE_TAIL; } @@ -1363,7 +1446,7 @@ nfsd4_decode_free_stateid(struct nfsd4_compoundargs *argp, DECODE_HEAD; READ_BUF(sizeof(stateid_t)); - READ32(free_stateid->fr_stateid.si_generation); + free_stateid->fr_stateid.si_generation = be32_to_cpup(p++); COPYMEM(&free_stateid->fr_stateid.si_opaque, sizeof(stateid_opaque_t)); DECODE_TAIL; @@ -1377,10 +1460,10 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp, READ_BUF(NFS4_MAX_SESSIONID_LEN + 16); COPYMEM(seq->sessionid.data, NFS4_MAX_SESSIONID_LEN); - READ32(seq->seqid); - READ32(seq->slotid); - READ32(seq->maxslots); - READ32(seq->cachethis); + seq->seqid = be32_to_cpup(p++); + seq->slotid = be32_to_cpup(p++); + seq->maxslots = be32_to_cpup(p++); + seq->cachethis = be32_to_cpup(p++); DECODE_TAIL; } @@ -1437,7 +1520,7 @@ static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, str DECODE_HEAD; READ_BUF(4); - READ32(rc->rca_one_fs); + rc->rca_one_fs = be32_to_cpup(p++); DECODE_TAIL; } @@ -1477,7 +1560,7 @@ static nfsd4_dec nfsd4_dec_ops[] = { [OP_OPEN_CONFIRM] = (nfsd4_dec)nfsd4_decode_open_confirm, [OP_OPEN_DOWNGRADE] = (nfsd4_dec)nfsd4_decode_open_downgrade, [OP_PUTFH] = (nfsd4_dec)nfsd4_decode_putfh, - [OP_PUTPUBFH] = (nfsd4_dec)nfsd4_decode_noop, + [OP_PUTPUBFH] = (nfsd4_dec)nfsd4_decode_putpubfh, [OP_PUTROOTFH] = (nfsd4_dec)nfsd4_decode_noop, [OP_READ] = (nfsd4_dec)nfsd4_decode_read, [OP_READDIR] = (nfsd4_dec)nfsd4_decode_readdir, @@ -1494,46 +1577,6 @@ static nfsd4_dec nfsd4_dec_ops[] = { [OP_VERIFY] = (nfsd4_dec)nfsd4_decode_verify, [OP_WRITE] = (nfsd4_dec)nfsd4_decode_write, [OP_RELEASE_LOCKOWNER] = (nfsd4_dec)nfsd4_decode_release_lockowner, -}; - -static nfsd4_dec nfsd41_dec_ops[] = { - [OP_ACCESS] = (nfsd4_dec)nfsd4_decode_access, - [OP_CLOSE] = (nfsd4_dec)nfsd4_decode_close, - [OP_COMMIT] = (nfsd4_dec)nfsd4_decode_commit, - [OP_CREATE] = (nfsd4_dec)nfsd4_decode_create, - [OP_DELEGPURGE] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_DELEGRETURN] = (nfsd4_dec)nfsd4_decode_delegreturn, - [OP_GETATTR] = (nfsd4_dec)nfsd4_decode_getattr, - [OP_GETFH] = (nfsd4_dec)nfsd4_decode_noop, - [OP_LINK] = (nfsd4_dec)nfsd4_decode_link, - [OP_LOCK] = (nfsd4_dec)nfsd4_decode_lock, - [OP_LOCKT] = (nfsd4_dec)nfsd4_decode_lockt, - [OP_LOCKU] = (nfsd4_dec)nfsd4_decode_locku, - [OP_LOOKUP] = (nfsd4_dec)nfsd4_decode_lookup, - [OP_LOOKUPP] = (nfsd4_dec)nfsd4_decode_noop, - [OP_NVERIFY] = (nfsd4_dec)nfsd4_decode_verify, - [OP_OPEN] = (nfsd4_dec)nfsd4_decode_open, - [OP_OPENATTR] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_OPEN_CONFIRM] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_OPEN_DOWNGRADE] = (nfsd4_dec)nfsd4_decode_open_downgrade, - [OP_PUTFH] = (nfsd4_dec)nfsd4_decode_putfh, - [OP_PUTPUBFH] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_PUTROOTFH] = (nfsd4_dec)nfsd4_decode_noop, - [OP_READ] = (nfsd4_dec)nfsd4_decode_read, - [OP_READDIR] = (nfsd4_dec)nfsd4_decode_readdir, - [OP_READLINK] = (nfsd4_dec)nfsd4_decode_noop, - [OP_REMOVE] = (nfsd4_dec)nfsd4_decode_remove, - [OP_RENAME] = (nfsd4_dec)nfsd4_decode_rename, - [OP_RENEW] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_RESTOREFH] = (nfsd4_dec)nfsd4_decode_noop, - [OP_SAVEFH] = (nfsd4_dec)nfsd4_decode_noop, - [OP_SECINFO] = (nfsd4_dec)nfsd4_decode_secinfo, - [OP_SETATTR] = (nfsd4_dec)nfsd4_decode_setattr, - [OP_SETCLIENTID] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_SETCLIENTID_CONFIRM]= (nfsd4_dec)nfsd4_decode_notsupp, - [OP_VERIFY] = (nfsd4_dec)nfsd4_decode_verify, - [OP_WRITE] = (nfsd4_dec)nfsd4_decode_write, - [OP_RELEASE_LOCKOWNER] = (nfsd4_dec)nfsd4_decode_notsupp, /* new operations for NFSv4.1 */ [OP_BACKCHANNEL_CTL] = (nfsd4_dec)nfsd4_decode_backchannel_ctl, @@ -1557,31 +1600,39 @@ static nfsd4_dec nfsd41_dec_ops[] = { [OP_RECLAIM_COMPLETE] = (nfsd4_dec)nfsd4_decode_reclaim_complete, }; -struct nfsd4_minorversion_ops { - nfsd4_dec *decoders; - int nops; -}; - -static struct nfsd4_minorversion_ops nfsd4_minorversion[] = { - [0] = { nfsd4_dec_ops, ARRAY_SIZE(nfsd4_dec_ops) }, - [1] = { nfsd41_dec_ops, ARRAY_SIZE(nfsd41_dec_ops) }, -}; +static inline bool +nfsd4_opnum_in_range(struct nfsd4_compoundargs *argp, struct nfsd4_op *op) +{ + if (op->opnum < FIRST_NFS4_OP) + return false; + else if (argp->minorversion == 0 && op->opnum > LAST_NFS40_OP) + return false; + else if (argp->minorversion == 1 && op->opnum > LAST_NFS41_OP) + return false; + else if (argp->minorversion == 2 && op->opnum > LAST_NFS42_OP) + return false; + return true; +} static __be32 nfsd4_decode_compound(struct nfsd4_compoundargs *argp) { DECODE_HEAD; struct nfsd4_op *op; - struct nfsd4_minorversion_ops *ops; bool cachethis = false; + int auth_slack= argp->rqstp->rq_auth_slack; + int max_reply = auth_slack + 8; /* opcnt, status */ + int readcount = 0; + int readbytes = 0; int i; READ_BUF(4); - READ32(argp->taglen); + argp->taglen = be32_to_cpup(p++); READ_BUF(argp->taglen + 8); SAVEMEM(argp->tag, argp->taglen); - READ32(argp->minorversion); - READ32(argp->opcnt); + argp->minorversion = be32_to_cpup(p++); + argp->opcnt = be32_to_cpup(p++); + max_reply += 4 + (XDR_QUADLEN(argp->taglen) << 2); if (argp->taglen > NFSD4_MAX_TAGLEN) goto xdr_error; @@ -1597,140 +1648,98 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) } } - if (argp->minorversion >= ARRAY_SIZE(nfsd4_minorversion)) + if (argp->minorversion > NFSD_SUPPORTED_MINOR_VERSION) argp->opcnt = 0; - ops = &nfsd4_minorversion[argp->minorversion]; for (i = 0; i < argp->opcnt; i++) { op = &argp->ops[i]; op->replay = NULL; READ_BUF(4); - READ32(op->opnum); + op->opnum = be32_to_cpup(p++); - if (op->opnum >= FIRST_NFS4_OP && op->opnum <= LAST_NFS4_OP) - op->status = ops->decoders[op->opnum](argp, &op->u); + if (nfsd4_opnum_in_range(argp, op)) + op->status = nfsd4_dec_ops[op->opnum](argp, &op->u); else { op->opnum = OP_ILLEGAL; op->status = nfserr_op_illegal; } - - if (op->status) { - argp->opcnt = i+1; - break; - } /* * We'll try to cache the result in the DRC if any one * op in the compound wants to be cached: */ cachethis |= nfsd4_cache_this_op(op); + + if (op->opnum == OP_READ) { + readcount++; + readbytes += nfsd4_max_reply(argp->rqstp, op); + } else + max_reply += nfsd4_max_reply(argp->rqstp, op); + + if (op->status) { + argp->opcnt = i+1; + break; + } } /* Sessions make the DRC unnecessary: */ if (argp->minorversion) cachethis = false; + svc_reserve(argp->rqstp, max_reply + readbytes); argp->rqstp->rq_cachetype = cachethis ? RC_REPLBUFF : RC_NOCACHE; - DECODE_TAIL; -} + if (readcount > 1 || max_reply > PAGE_SIZE - auth_slack) + argp->rqstp->rq_splice_ok = false; -#define WRITE32(n) *p++ = htonl(n) -#define WRITE64(n) do { \ - *p++ = htonl((u32)((n) >> 32)); \ - *p++ = htonl((u32)(n)); \ -} while (0) -#define WRITEMEM(ptr,nbytes) do { if (nbytes > 0) { \ - *(p + XDR_QUADLEN(nbytes) -1) = 0; \ - memcpy(p, ptr, nbytes); \ - p += XDR_QUADLEN(nbytes); \ -}} while (0) - -static void write32(__be32 **p, u32 n) -{ - *(*p)++ = htonl(n); -} - -static void write64(__be32 **p, u64 n) -{ - write32(p, (n >> 32)); - write32(p, (u32)n); + DECODE_TAIL; } -static void write_change(__be32 **p, struct kstat *stat, struct inode *inode) +static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode) { if (IS_I_VERSION(inode)) { - write64(p, inode->i_version); + p = xdr_encode_hyper(p, inode->i_version); } else { - write32(p, stat->ctime.tv_sec); - write32(p, stat->ctime.tv_nsec); + *p++ = cpu_to_be32(stat->ctime.tv_sec); + *p++ = cpu_to_be32(stat->ctime.tv_nsec); } + return p; } -static void write_cinfo(__be32 **p, struct nfsd4_change_info *c) +static __be32 *encode_cinfo(__be32 *p, struct nfsd4_change_info *c) { - write32(p, c->atomic); + *p++ = cpu_to_be32(c->atomic); if (c->change_supported) { - write64(p, c->before_change); - write64(p, c->after_change); + p = xdr_encode_hyper(p, c->before_change); + p = xdr_encode_hyper(p, c->after_change); } else { - write32(p, c->before_ctime_sec); - write32(p, c->before_ctime_nsec); - write32(p, c->after_ctime_sec); - write32(p, c->after_ctime_nsec); - } -} - -#define RESERVE_SPACE(nbytes) do { \ - p = resp->p; \ - BUG_ON(p + XDR_QUADLEN(nbytes) > resp->end); \ -} while (0) -#define ADJUST_ARGS() resp->p = p - -/* - * Header routine to setup seqid operation replay cache - */ -#define ENCODE_SEQID_OP_HEAD \ - __be32 *save; \ - \ - save = resp->p; - -/* - * Routine for encoding the result of a "seqid-mutating" NFSv4 operation. This - * is where sequence id's are incremented, and the replay cache is filled. - * Note that we increment sequence id's here, at the last moment, so we're sure - * we know whether the error to be returned is a sequence id mutating error. - */ - -static void encode_seqid_op_tail(struct nfsd4_compoundres *resp, __be32 *save, __be32 nfserr) -{ - struct nfs4_stateowner *stateowner = resp->cstate.replay_owner; - - if (seqid_mutating_err(ntohl(nfserr)) && stateowner) { - stateowner->so_seqid++; - stateowner->so_replay.rp_status = nfserr; - stateowner->so_replay.rp_buflen = - (char *)resp->p - (char *)save; - memcpy(stateowner->so_replay.rp_buf, save, - stateowner->so_replay.rp_buflen); - nfsd4_purge_closed_stateid(stateowner); + *p++ = cpu_to_be32(c->before_ctime_sec); + *p++ = cpu_to_be32(c->before_ctime_nsec); + *p++ = cpu_to_be32(c->after_ctime_sec); + *p++ = cpu_to_be32(c->after_ctime_nsec); } + return p; } /* Encode as an array of strings the string given with components * separated @sep, escaped with esc_enter and esc_exit. */ -static __be32 nfsd4_encode_components_esc(char sep, char *components, - __be32 **pp, int *buflen, - char esc_enter, char esc_exit) +static __be32 nfsd4_encode_components_esc(struct xdr_stream *xdr, char sep, + char *components, char esc_enter, + char esc_exit) { - __be32 *p = *pp; - __be32 *countp = p; + __be32 *p; + __be32 pathlen; + int pathlen_offset; int strlen, count=0; char *str, *end, *next; dprintk("nfsd4_encode_components(%s)\n", components); - if ((*buflen -= 4) < 0) + + pathlen_offset = xdr->buf->len; + p = xdr_reserve_space(xdr, 4); + if (!p) return nfserr_resource; - WRITE32(0); /* We will fill this in with @count later */ + p++; /* We will fill this in with @count later */ + end = str = components; while (*end) { bool found_esc = false; @@ -1752,62 +1761,57 @@ static __be32 nfsd4_encode_components_esc(char sep, char *components, strlen = end - str; if (strlen) { - if ((*buflen -= ((XDR_QUADLEN(strlen) << 2) + 4)) < 0) + p = xdr_reserve_space(xdr, strlen + 4); + if (!p) return nfserr_resource; - WRITE32(strlen); - WRITEMEM(str, strlen); + p = xdr_encode_opaque(p, str, strlen); count++; } else end++; str = end; } - *pp = p; - p = countp; - WRITE32(count); + pathlen = htonl(xdr->buf->len - pathlen_offset); + write_bytes_to_xdr_buf(xdr->buf, pathlen_offset, &pathlen, 4); return 0; } /* Encode as an array of strings the string given with components * separated @sep. */ -static __be32 nfsd4_encode_components(char sep, char *components, - __be32 **pp, int *buflen) +static __be32 nfsd4_encode_components(struct xdr_stream *xdr, char sep, + char *components) { - return nfsd4_encode_components_esc(sep, components, pp, buflen, 0, 0); + return nfsd4_encode_components_esc(xdr, sep, components, 0, 0); } /* * encode a location element of a fs_locations structure */ -static __be32 nfsd4_encode_fs_location4(struct nfsd4_fs_location *location, - __be32 **pp, int *buflen) +static __be32 nfsd4_encode_fs_location4(struct xdr_stream *xdr, + struct nfsd4_fs_location *location) { __be32 status; - __be32 *p = *pp; - status = nfsd4_encode_components_esc(':', location->hosts, &p, buflen, + status = nfsd4_encode_components_esc(xdr, ':', location->hosts, '[', ']'); if (status) return status; - status = nfsd4_encode_components('/', location->path, &p, buflen); + status = nfsd4_encode_components(xdr, '/', location->path); if (status) return status; - *pp = p; return 0; } /* * Encode a path in RFC3530 'pathname4' format */ -static __be32 nfsd4_encode_path(const struct path *root, - const struct path *path, __be32 **pp, int *buflen) +static __be32 nfsd4_encode_path(struct xdr_stream *xdr, + const struct path *root, + const struct path *path) { - struct path cur = { - .mnt = path->mnt, - .dentry = path->dentry, - }; - __be32 *p = *pp; + struct path cur = *path; + __be32 *p; struct dentry **components = NULL; unsigned int ncomponents = 0; __be32 err = nfserr_jukebox; @@ -1838,27 +1842,30 @@ static __be32 nfsd4_encode_path(const struct path *root, components[ncomponents++] = cur.dentry; cur.dentry = dget_parent(cur.dentry); } - - *buflen -= 4; - if (*buflen < 0) + err = nfserr_resource; + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_free; - WRITE32(ncomponents); + *p++ = cpu_to_be32(ncomponents); while (ncomponents) { struct dentry *dentry = components[ncomponents - 1]; - unsigned int len = dentry->d_name.len; + unsigned int len; - *buflen -= 4 + (XDR_QUADLEN(len) << 2); - if (*buflen < 0) + spin_lock(&dentry->d_lock); + len = dentry->d_name.len; + p = xdr_reserve_space(xdr, len + 4); + if (!p) { + spin_unlock(&dentry->d_lock); goto out_free; - WRITE32(len); - WRITEMEM(dentry->d_name.name, len); + } + p = xdr_encode_opaque(p, dentry->d_name.name, len); dprintk("/%s", dentry->d_name.name); + spin_unlock(&dentry->d_lock); dput(dentry); ncomponents--; } - *pp = p; err = 0; out_free: dprintk(")\n"); @@ -1869,8 +1876,8 @@ out_free: return err; } -static __be32 nfsd4_encode_fsloc_fsroot(struct svc_rqst *rqstp, - const struct path *path, __be32 **pp, int *buflen) +static __be32 nfsd4_encode_fsloc_fsroot(struct xdr_stream *xdr, + struct svc_rqst *rqstp, const struct path *path) { struct svc_export *exp_ps; __be32 res; @@ -1878,7 +1885,7 @@ static __be32 nfsd4_encode_fsloc_fsroot(struct svc_rqst *rqstp, exp_ps = rqst_find_fsidzero_export(rqstp); if (IS_ERR(exp_ps)) return nfserrno(PTR_ERR(exp_ps)); - res = nfsd4_encode_path(&exp_ps->ex_path, path, pp, buflen); + res = nfsd4_encode_path(xdr, &exp_ps->ex_path, path); exp_put(exp_ps); return res; } @@ -1886,28 +1893,26 @@ static __be32 nfsd4_encode_fsloc_fsroot(struct svc_rqst *rqstp, /* * encode a fs_locations structure */ -static __be32 nfsd4_encode_fs_locations(struct svc_rqst *rqstp, - struct svc_export *exp, - __be32 **pp, int *buflen) +static __be32 nfsd4_encode_fs_locations(struct xdr_stream *xdr, + struct svc_rqst *rqstp, struct svc_export *exp) { __be32 status; int i; - __be32 *p = *pp; + __be32 *p; struct nfsd4_fs_locations *fslocs = &exp->ex_fslocs; - status = nfsd4_encode_fsloc_fsroot(rqstp, &exp->ex_path, &p, buflen); + status = nfsd4_encode_fsloc_fsroot(xdr, rqstp, &exp->ex_path); if (status) return status; - if ((*buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) return nfserr_resource; - WRITE32(fslocs->locations_count); + *p++ = cpu_to_be32(fslocs->locations_count); for (i=0; i<fslocs->locations_count; i++) { - status = nfsd4_encode_fs_location4(&fslocs->locations[i], - &p, buflen); + status = nfsd4_encode_fs_location4(xdr, &fslocs->locations[i]); if (status) return status; } - *pp = p; return 0; } @@ -1925,50 +1930,48 @@ static u32 nfs4_file_type(umode_t mode) }; } -static __be32 -nfsd4_encode_name(struct svc_rqst *rqstp, int whotype, uid_t id, int group, - __be32 **p, int *buflen) +static inline __be32 +nfsd4_encode_aclname(struct xdr_stream *xdr, struct svc_rqst *rqstp, + struct nfs4_ace *ace) { - int status; - - if (*buflen < (XDR_QUADLEN(IDMAP_NAMESZ) << 2) + 4) - return nfserr_resource; - if (whotype != NFS4_ACL_WHO_NAMED) - status = nfs4_acl_write_who(whotype, (u8 *)(*p + 1)); - else if (group) - status = nfsd_map_gid_to_name(rqstp, id, (u8 *)(*p + 1)); + if (ace->whotype != NFS4_ACL_WHO_NAMED) + return nfs4_acl_write_who(xdr, ace->whotype); + else if (ace->flag & NFS4_ACE_IDENTIFIER_GROUP) + return nfsd4_encode_group(xdr, rqstp, ace->who_gid); else - status = nfsd_map_uid_to_name(rqstp, id, (u8 *)(*p + 1)); - if (status < 0) - return nfserrno(status); - *p = xdr_encode_opaque(*p, NULL, status); - *buflen -= (XDR_QUADLEN(status) << 2) + 4; - BUG_ON(*buflen < 0); - return 0; + return nfsd4_encode_user(xdr, rqstp, ace->who_uid); } -static inline __be32 -nfsd4_encode_user(struct svc_rqst *rqstp, uid_t uid, __be32 **p, int *buflen) -{ - return nfsd4_encode_name(rqstp, NFS4_ACL_WHO_NAMED, uid, 0, p, buflen); -} +#define WORD0_ABSENT_FS_ATTRS (FATTR4_WORD0_FS_LOCATIONS | FATTR4_WORD0_FSID | \ + FATTR4_WORD0_RDATTR_ERROR) +#define WORD1_ABSENT_FS_ATTRS FATTR4_WORD1_MOUNTED_ON_FILEID +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL static inline __be32 -nfsd4_encode_group(struct svc_rqst *rqstp, uid_t gid, __be32 **p, int *buflen) +nfsd4_encode_security_label(struct xdr_stream *xdr, struct svc_rqst *rqstp, + void *context, int len) { - return nfsd4_encode_name(rqstp, NFS4_ACL_WHO_NAMED, gid, 1, p, buflen); -} + __be32 *p; -static inline __be32 -nfsd4_encode_aclname(struct svc_rqst *rqstp, int whotype, uid_t id, int group, - __be32 **p, int *buflen) -{ - return nfsd4_encode_name(rqstp, whotype, id, group, p, buflen); -} + p = xdr_reserve_space(xdr, len + 4 + 4 + 4); + if (!p) + return nfserr_resource; -#define WORD0_ABSENT_FS_ATTRS (FATTR4_WORD0_FS_LOCATIONS | FATTR4_WORD0_FSID | \ - FATTR4_WORD0_RDATTR_ERROR) -#define WORD1_ABSENT_FS_ATTRS FATTR4_WORD1_MOUNTED_ON_FILEID + /* + * For now we use a 0 here to indicate the null translation; in + * the future we may place a call to translation code here. + */ + *p++ = cpu_to_be32(0); /* lfs */ + *p++ = cpu_to_be32(0); /* pi */ + p = xdr_encode_opaque(p, context, len); + return 0; +} +#else +static inline __be32 +nfsd4_encode_security_label(struct xdr_stream *xdr, struct svc_rqst *rqstp, + void *context, int len) +{ return 0; } +#endif static __be32 fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *rdattr_err) { @@ -1997,7 +2000,7 @@ static int get_parent_attributes(struct svc_export *exp, struct kstat *stat) if (path.dentry != path.mnt->mnt_root) break; } - err = vfs_getattr(path.mnt, path.dentry, stat); + err = vfs_getattr(&path, stat); path_put(&path); return err; } @@ -2005,31 +2008,33 @@ static int get_parent_attributes(struct svc_export *exp, struct kstat *stat) /* * Note: @fhp can be NULL; in this case, we might have to compose the filehandle * ourselves. - * - * @countp is the buffer size in _words_; upon successful return this becomes - * replaced with the number of words written. */ -__be32 -nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, - struct dentry *dentry, __be32 *buffer, int *countp, u32 *bmval, +static __be32 +nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, + struct svc_export *exp, + struct dentry *dentry, u32 *bmval, struct svc_rqst *rqstp, int ignore_crossmnt) { u32 bmval0 = bmval[0]; u32 bmval1 = bmval[1]; u32 bmval2 = bmval[2]; struct kstat stat; - struct svc_fh tempfh; + struct svc_fh *tempfh = NULL; struct kstatfs statfs; - int buflen = *countp << 2; - __be32 *attrlenp; + __be32 *p; + int starting_len = xdr->buf->len; + int attrlen_offset; + __be32 attrlen; u32 dummy; u64 dummy64; u32 rdattr_err = 0; - __be32 *p = buffer; __be32 status; int err; int aclsupport = 0; struct nfs4_acl *acl = NULL; + void *context = NULL; + int contextlen; + bool contextsupport = false; struct nfsd4_compoundres *resp = rqstp->rq_resp; u32 minorversion = resp->cstate.minorversion; struct path path = { @@ -2050,11 +2055,11 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, goto out; } - err = vfs_getattr(exp->ex_path.mnt, dentry, &stat); + err = vfs_getattr(&path, &stat); if (err) goto out_nfserr; - if ((bmval0 & (FATTR4_WORD0_FILES_FREE | FATTR4_WORD0_FILES_TOTAL | - FATTR4_WORD0_MAXNAME)) || + if ((bmval0 & (FATTR4_WORD0_FILES_AVAIL | FATTR4_WORD0_FILES_FREE | + FATTR4_WORD0_FILES_TOTAL | FATTR4_WORD0_MAXNAME)) || (bmval1 & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE | FATTR4_WORD1_SPACE_TOTAL))) { err = vfs_statfs(&path, &statfs); @@ -2062,11 +2067,15 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, goto out_nfserr; } if ((bmval0 & (FATTR4_WORD0_FILEHANDLE | FATTR4_WORD0_FSID)) && !fhp) { - fh_init(&tempfh, NFS4_FHSIZE); - status = fh_compose(&tempfh, exp, dentry, NULL); + tempfh = kmalloc(sizeof(struct svc_fh), GFP_KERNEL); + status = nfserr_jukebox; + if (!tempfh) + goto out; + fh_init(tempfh, NFS4_FHSIZE); + status = fh_compose(tempfh, exp, dentry, NULL); if (status) goto out; - fhp = &tempfh; + fhp = tempfh; } if (bmval0 & (FATTR4_WORD0_ACL | FATTR4_WORD0_ACLSUPPORT | FATTR4_WORD0_SUPPORTED_ATTRS)) { @@ -2083,26 +2092,49 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, } } +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL + if ((bmval[2] & FATTR4_WORD2_SECURITY_LABEL) || + bmval[0] & FATTR4_WORD0_SUPPORTED_ATTRS) { + err = security_inode_getsecctx(dentry->d_inode, + &context, &contextlen); + contextsupport = (err == 0); + if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) { + if (err == -EOPNOTSUPP) + bmval2 &= ~FATTR4_WORD2_SECURITY_LABEL; + else if (err) + goto out_nfserr; + } + } +#endif /* CONFIG_NFSD_V4_SECURITY_LABEL */ + if (bmval2) { - if ((buflen -= 16) < 0) + p = xdr_reserve_space(xdr, 16); + if (!p) goto out_resource; - WRITE32(3); - WRITE32(bmval0); - WRITE32(bmval1); - WRITE32(bmval2); + *p++ = cpu_to_be32(3); + *p++ = cpu_to_be32(bmval0); + *p++ = cpu_to_be32(bmval1); + *p++ = cpu_to_be32(bmval2); } else if (bmval1) { - if ((buflen -= 12) < 0) + p = xdr_reserve_space(xdr, 12); + if (!p) goto out_resource; - WRITE32(2); - WRITE32(bmval0); - WRITE32(bmval1); + *p++ = cpu_to_be32(2); + *p++ = cpu_to_be32(bmval0); + *p++ = cpu_to_be32(bmval1); } else { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; - WRITE32(1); - WRITE32(bmval0); + *p++ = cpu_to_be32(1); + *p++ = cpu_to_be32(bmval0); } - attrlenp = p++; /* to be backfilled later */ + + attrlen_offset = xdr->buf->len; + p = xdr_reserve_space(xdr, 4); + if (!p) + goto out_resource; + p++; /* to be backfilled later */ if (bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) { u32 word0 = nfsd_suppattrs0(minorversion); @@ -2111,308 +2143,346 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, if (!aclsupport) word0 &= ~FATTR4_WORD0_ACL; + if (!contextsupport) + word2 &= ~FATTR4_WORD2_SECURITY_LABEL; if (!word2) { - if ((buflen -= 12) < 0) + p = xdr_reserve_space(xdr, 12); + if (!p) goto out_resource; - WRITE32(2); - WRITE32(word0); - WRITE32(word1); + *p++ = cpu_to_be32(2); + *p++ = cpu_to_be32(word0); + *p++ = cpu_to_be32(word1); } else { - if ((buflen -= 16) < 0) + p = xdr_reserve_space(xdr, 16); + if (!p) goto out_resource; - WRITE32(3); - WRITE32(word0); - WRITE32(word1); - WRITE32(word2); + *p++ = cpu_to_be32(3); + *p++ = cpu_to_be32(word0); + *p++ = cpu_to_be32(word1); + *p++ = cpu_to_be32(word2); } } if (bmval0 & FATTR4_WORD0_TYPE) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; dummy = nfs4_file_type(stat.mode); - if (dummy == NF4BAD) - goto out_serverfault; - WRITE32(dummy); + if (dummy == NF4BAD) { + status = nfserr_serverfault; + goto out; + } + *p++ = cpu_to_be32(dummy); } if (bmval0 & FATTR4_WORD0_FH_EXPIRE_TYPE) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; if (exp->ex_flags & NFSEXP_NOSUBTREECHECK) - WRITE32(NFS4_FH_PERSISTENT); + *p++ = cpu_to_be32(NFS4_FH_PERSISTENT); else - WRITE32(NFS4_FH_PERSISTENT|NFS4_FH_VOL_RENAME); + *p++ = cpu_to_be32(NFS4_FH_PERSISTENT| + NFS4_FH_VOL_RENAME); } if (bmval0 & FATTR4_WORD0_CHANGE) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; - write_change(&p, &stat, dentry->d_inode); + p = encode_change(p, &stat, dentry->d_inode); } if (bmval0 & FATTR4_WORD0_SIZE) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; - WRITE64(stat.size); + p = xdr_encode_hyper(p, stat.size); } if (bmval0 & FATTR4_WORD0_LINK_SUPPORT) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; - WRITE32(1); + *p++ = cpu_to_be32(1); } if (bmval0 & FATTR4_WORD0_SYMLINK_SUPPORT) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; - WRITE32(1); + *p++ = cpu_to_be32(1); } if (bmval0 & FATTR4_WORD0_NAMED_ATTR) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; - WRITE32(0); + *p++ = cpu_to_be32(0); } if (bmval0 & FATTR4_WORD0_FSID) { - if ((buflen -= 16) < 0) + p = xdr_reserve_space(xdr, 16); + if (!p) goto out_resource; if (exp->ex_fslocs.migrated) { - WRITE64(NFS4_REFERRAL_FSID_MAJOR); - WRITE64(NFS4_REFERRAL_FSID_MINOR); + p = xdr_encode_hyper(p, NFS4_REFERRAL_FSID_MAJOR); + p = xdr_encode_hyper(p, NFS4_REFERRAL_FSID_MINOR); } else switch(fsid_source(fhp)) { case FSIDSOURCE_FSID: - WRITE64((u64)exp->ex_fsid); - WRITE64((u64)0); + p = xdr_encode_hyper(p, (u64)exp->ex_fsid); + p = xdr_encode_hyper(p, (u64)0); break; case FSIDSOURCE_DEV: - WRITE32(0); - WRITE32(MAJOR(stat.dev)); - WRITE32(0); - WRITE32(MINOR(stat.dev)); + *p++ = cpu_to_be32(0); + *p++ = cpu_to_be32(MAJOR(stat.dev)); + *p++ = cpu_to_be32(0); + *p++ = cpu_to_be32(MINOR(stat.dev)); break; case FSIDSOURCE_UUID: - WRITEMEM(exp->ex_uuid, 16); + p = xdr_encode_opaque_fixed(p, exp->ex_uuid, + EX_UUID_LEN); break; } } if (bmval0 & FATTR4_WORD0_UNIQUE_HANDLES) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; - WRITE32(0); + *p++ = cpu_to_be32(0); } if (bmval0 & FATTR4_WORD0_LEASE_TIME) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; - WRITE32(nn->nfsd4_lease); + *p++ = cpu_to_be32(nn->nfsd4_lease); } if (bmval0 & FATTR4_WORD0_RDATTR_ERROR) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; - WRITE32(rdattr_err); + *p++ = cpu_to_be32(rdattr_err); } if (bmval0 & FATTR4_WORD0_ACL) { struct nfs4_ace *ace; if (acl == NULL) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; - WRITE32(0); + *p++ = cpu_to_be32(0); goto out_acl; } - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; - WRITE32(acl->naces); + *p++ = cpu_to_be32(acl->naces); for (ace = acl->aces; ace < acl->aces + acl->naces; ace++) { - if ((buflen -= 4*3) < 0) - goto out_resource; - WRITE32(ace->type); - WRITE32(ace->flag); - WRITE32(ace->access_mask & NFS4_ACE_MASK_ALL); - status = nfsd4_encode_aclname(rqstp, ace->whotype, - ace->who, ace->flag & NFS4_ACE_IDENTIFIER_GROUP, - &p, &buflen); - if (status == nfserr_resource) + p = xdr_reserve_space(xdr, 4*3); + if (!p) goto out_resource; + *p++ = cpu_to_be32(ace->type); + *p++ = cpu_to_be32(ace->flag); + *p++ = cpu_to_be32(ace->access_mask & + NFS4_ACE_MASK_ALL); + status = nfsd4_encode_aclname(xdr, rqstp, ace); if (status) goto out; } } out_acl: if (bmval0 & FATTR4_WORD0_ACLSUPPORT) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; - WRITE32(aclsupport ? + *p++ = cpu_to_be32(aclsupport ? ACL4_SUPPORT_ALLOW_ACL|ACL4_SUPPORT_DENY_ACL : 0); } if (bmval0 & FATTR4_WORD0_CANSETTIME) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; - WRITE32(1); + *p++ = cpu_to_be32(1); } if (bmval0 & FATTR4_WORD0_CASE_INSENSITIVE) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; - WRITE32(0); + *p++ = cpu_to_be32(0); } if (bmval0 & FATTR4_WORD0_CASE_PRESERVING) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; - WRITE32(1); + *p++ = cpu_to_be32(1); } if (bmval0 & FATTR4_WORD0_CHOWN_RESTRICTED) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; - WRITE32(1); + *p++ = cpu_to_be32(1); } if (bmval0 & FATTR4_WORD0_FILEHANDLE) { - buflen -= (XDR_QUADLEN(fhp->fh_handle.fh_size) << 2) + 4; - if (buflen < 0) + p = xdr_reserve_space(xdr, fhp->fh_handle.fh_size + 4); + if (!p) goto out_resource; - WRITE32(fhp->fh_handle.fh_size); - WRITEMEM(&fhp->fh_handle.fh_base, fhp->fh_handle.fh_size); + p = xdr_encode_opaque(p, &fhp->fh_handle.fh_base, + fhp->fh_handle.fh_size); } if (bmval0 & FATTR4_WORD0_FILEID) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; - WRITE64(stat.ino); + p = xdr_encode_hyper(p, stat.ino); } if (bmval0 & FATTR4_WORD0_FILES_AVAIL) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; - WRITE64((u64) statfs.f_ffree); + p = xdr_encode_hyper(p, (u64) statfs.f_ffree); } if (bmval0 & FATTR4_WORD0_FILES_FREE) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; - WRITE64((u64) statfs.f_ffree); + p = xdr_encode_hyper(p, (u64) statfs.f_ffree); } if (bmval0 & FATTR4_WORD0_FILES_TOTAL) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; - WRITE64((u64) statfs.f_files); + p = xdr_encode_hyper(p, (u64) statfs.f_files); } if (bmval0 & FATTR4_WORD0_FS_LOCATIONS) { - status = nfsd4_encode_fs_locations(rqstp, exp, &p, &buflen); - if (status == nfserr_resource) - goto out_resource; + status = nfsd4_encode_fs_locations(xdr, rqstp, exp); if (status) goto out; } if (bmval0 & FATTR4_WORD0_HOMOGENEOUS) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; - WRITE32(1); + *p++ = cpu_to_be32(1); } if (bmval0 & FATTR4_WORD0_MAXFILESIZE) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; - WRITE64(~(u64)0); + p = xdr_encode_hyper(p, exp->ex_path.mnt->mnt_sb->s_maxbytes); } if (bmval0 & FATTR4_WORD0_MAXLINK) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; - WRITE32(255); + *p++ = cpu_to_be32(255); } if (bmval0 & FATTR4_WORD0_MAXNAME) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; - WRITE32(statfs.f_namelen); + *p++ = cpu_to_be32(statfs.f_namelen); } if (bmval0 & FATTR4_WORD0_MAXREAD) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; - WRITE64((u64) svc_max_payload(rqstp)); + p = xdr_encode_hyper(p, (u64) svc_max_payload(rqstp)); } if (bmval0 & FATTR4_WORD0_MAXWRITE) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; - WRITE64((u64) svc_max_payload(rqstp)); + p = xdr_encode_hyper(p, (u64) svc_max_payload(rqstp)); } if (bmval1 & FATTR4_WORD1_MODE) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; - WRITE32(stat.mode & S_IALLUGO); + *p++ = cpu_to_be32(stat.mode & S_IALLUGO); } if (bmval1 & FATTR4_WORD1_NO_TRUNC) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; - WRITE32(1); + *p++ = cpu_to_be32(1); } if (bmval1 & FATTR4_WORD1_NUMLINKS) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; - WRITE32(stat.nlink); + *p++ = cpu_to_be32(stat.nlink); } if (bmval1 & FATTR4_WORD1_OWNER) { - status = nfsd4_encode_user(rqstp, stat.uid, &p, &buflen); - if (status == nfserr_resource) - goto out_resource; + status = nfsd4_encode_user(xdr, rqstp, stat.uid); if (status) goto out; } if (bmval1 & FATTR4_WORD1_OWNER_GROUP) { - status = nfsd4_encode_group(rqstp, stat.gid, &p, &buflen); - if (status == nfserr_resource) - goto out_resource; + status = nfsd4_encode_group(xdr, rqstp, stat.gid); if (status) goto out; } if (bmval1 & FATTR4_WORD1_RAWDEV) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; - WRITE32((u32) MAJOR(stat.rdev)); - WRITE32((u32) MINOR(stat.rdev)); + *p++ = cpu_to_be32((u32) MAJOR(stat.rdev)); + *p++ = cpu_to_be32((u32) MINOR(stat.rdev)); } if (bmval1 & FATTR4_WORD1_SPACE_AVAIL) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; dummy64 = (u64)statfs.f_bavail * (u64)statfs.f_bsize; - WRITE64(dummy64); + p = xdr_encode_hyper(p, dummy64); } if (bmval1 & FATTR4_WORD1_SPACE_FREE) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; dummy64 = (u64)statfs.f_bfree * (u64)statfs.f_bsize; - WRITE64(dummy64); + p = xdr_encode_hyper(p, dummy64); } if (bmval1 & FATTR4_WORD1_SPACE_TOTAL) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; dummy64 = (u64)statfs.f_blocks * (u64)statfs.f_bsize; - WRITE64(dummy64); + p = xdr_encode_hyper(p, dummy64); } if (bmval1 & FATTR4_WORD1_SPACE_USED) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; dummy64 = (u64)stat.blocks << 9; - WRITE64(dummy64); + p = xdr_encode_hyper(p, dummy64); } if (bmval1 & FATTR4_WORD1_TIME_ACCESS) { - if ((buflen -= 12) < 0) + p = xdr_reserve_space(xdr, 12); + if (!p) goto out_resource; - WRITE32(0); - WRITE32(stat.atime.tv_sec); - WRITE32(stat.atime.tv_nsec); + p = xdr_encode_hyper(p, (s64)stat.atime.tv_sec); + *p++ = cpu_to_be32(stat.atime.tv_nsec); } if (bmval1 & FATTR4_WORD1_TIME_DELTA) { - if ((buflen -= 12) < 0) + p = xdr_reserve_space(xdr, 12); + if (!p) goto out_resource; - WRITE32(0); - WRITE32(1); - WRITE32(0); + *p++ = cpu_to_be32(0); + *p++ = cpu_to_be32(1); + *p++ = cpu_to_be32(0); } if (bmval1 & FATTR4_WORD1_TIME_METADATA) { - if ((buflen -= 12) < 0) + p = xdr_reserve_space(xdr, 12); + if (!p) goto out_resource; - WRITE32(0); - WRITE32(stat.ctime.tv_sec); - WRITE32(stat.ctime.tv_nsec); + p = xdr_encode_hyper(p, (s64)stat.ctime.tv_sec); + *p++ = cpu_to_be32(stat.ctime.tv_nsec); } if (bmval1 & FATTR4_WORD1_TIME_MODIFY) { - if ((buflen -= 12) < 0) + p = xdr_reserve_space(xdr, 12); + if (!p) goto out_resource; - WRITE32(0); - WRITE32(stat.mtime.tv_sec); - WRITE32(stat.mtime.tv_nsec); + p = xdr_encode_hyper(p, (s64)stat.mtime.tv_sec); + *p++ = cpu_to_be32(stat.mtime.tv_nsec); } if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; /* * Get parent's attributes if not ignoring crossmount @@ -2421,34 +2491,78 @@ out_acl: if (ignore_crossmnt == 0 && dentry == exp->ex_path.mnt->mnt_root) get_parent_attributes(exp, &stat); - WRITE64(stat.ino); + p = xdr_encode_hyper(p, stat.ino); + } + if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) { + status = nfsd4_encode_security_label(xdr, rqstp, context, + contextlen); + if (status) + goto out; } if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) { - WRITE32(3); - WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD0); - WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD1); - WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD2); + p = xdr_reserve_space(xdr, 16); + if (!p) + goto out_resource; + *p++ = cpu_to_be32(3); + *p++ = cpu_to_be32(NFSD_SUPPATTR_EXCLCREAT_WORD0); + *p++ = cpu_to_be32(NFSD_SUPPATTR_EXCLCREAT_WORD1); + *p++ = cpu_to_be32(NFSD_SUPPATTR_EXCLCREAT_WORD2); } - *attrlenp = htonl((char *)p - (char *)attrlenp - 4); - *countp = p - buffer; + attrlen = htonl(xdr->buf->len - attrlen_offset - 4); + write_bytes_to_xdr_buf(xdr->buf, attrlen_offset, &attrlen, 4); status = nfs_ok; out: +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL + if (context) + security_release_secctx(context, contextlen); +#endif /* CONFIG_NFSD_V4_SECURITY_LABEL */ kfree(acl); - if (fhp == &tempfh) - fh_put(&tempfh); + if (tempfh) { + fh_put(tempfh); + kfree(tempfh); + } + if (status) + xdr_truncate_encode(xdr, starting_len); return status; out_nfserr: status = nfserrno(err); goto out; out_resource: - *countp = 0; status = nfserr_resource; goto out; -out_serverfault: - status = nfserr_serverfault; - goto out; +} + +static void svcxdr_init_encode_from_buffer(struct xdr_stream *xdr, + struct xdr_buf *buf, __be32 *p, int bytes) +{ + xdr->scratch.iov_len = 0; + memset(buf, 0, sizeof(struct xdr_buf)); + buf->head[0].iov_base = p; + buf->head[0].iov_len = 0; + buf->len = 0; + xdr->buf = buf; + xdr->iov = buf->head; + xdr->p = p; + xdr->end = (void *)p + bytes; + buf->buflen = bytes; +} + +__be32 nfsd4_encode_fattr_to_buf(__be32 **p, int words, + struct svc_fh *fhp, struct svc_export *exp, + struct dentry *dentry, u32 *bmval, + struct svc_rqst *rqstp, int ignore_crossmnt) +{ + struct xdr_buf dummy; + struct xdr_stream xdr; + __be32 ret; + + svcxdr_init_encode_from_buffer(&xdr, &dummy, *p, words << 2); + ret = nfsd4_encode_fattr(&xdr, fhp, exp, dentry, bmval, rqstp, + ignore_crossmnt); + *p = xdr.p; + return ret; } static inline int attributes_need_mount(u32 *bmval) @@ -2461,8 +2575,8 @@ static inline int attributes_need_mount(u32 *bmval) } static __be32 -nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd, - const char *name, int namlen, __be32 *p, int *buflen) +nfsd4_encode_dirent_fattr(struct xdr_stream *xdr, struct nfsd4_readdir *cd, + const char *name, int namlen) { struct svc_export *exp = cd->rd_fhp->fh_export; struct dentry *dentry; @@ -2514,7 +2628,7 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd, } out_encode: - nfserr = nfsd4_encode_fattr(NULL, exp, dentry, p, buflen, cd->rd_bmval, + nfserr = nfsd4_encode_fattr(xdr, NULL, exp, dentry, cd->rd_bmval, cd->rd_rqstp, ignore_crossmnt); out_put: dput(dentry); @@ -2523,19 +2637,19 @@ out_put: } static __be32 * -nfsd4_encode_rdattr_error(__be32 *p, int buflen, __be32 nfserr) +nfsd4_encode_rdattr_error(struct xdr_stream *xdr, __be32 nfserr) { - __be32 *attrlenp; + __be32 *p; - if (buflen < 6) + p = xdr_reserve_space(xdr, 20); + if (!p) return NULL; *p++ = htonl(2); *p++ = htonl(FATTR4_WORD0_RDATTR_ERROR); /* bmval0 */ *p++ = htonl(0); /* bmval1 */ - attrlenp = p++; + *p++ = htonl(4); /* attribute length */ *p++ = nfserr; /* no htonl */ - *attrlenp = htonl((char *)p - (char *)attrlenp - 4); return p; } @@ -2545,10 +2659,13 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, { struct readdir_cd *ccd = ccdv; struct nfsd4_readdir *cd = container_of(ccd, struct nfsd4_readdir, common); - int buflen; - __be32 *p = cd->buffer; - __be32 *cookiep; + struct xdr_stream *xdr = cd->xdr; + int start_offset = xdr->buf->len; + int cookie_offset; + int entry_bytes; __be32 nfserr = nfserr_toosmall; + __be64 wire_offset; + __be32 *p; /* In nfsv4, "." and ".." never make it onto the wire.. */ if (name && isdotent(name, namlen)) { @@ -2556,27 +2673,32 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, return 0; } - if (cd->offset) - xdr_encode_hyper(cd->offset, (u64) offset); + if (cd->cookie_offset) { + wire_offset = cpu_to_be64(offset); + write_bytes_to_xdr_buf(xdr->buf, cd->cookie_offset, + &wire_offset, 8); + } - buflen = cd->buflen - 4 - XDR_QUADLEN(namlen); - if (buflen < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto fail; - *p++ = xdr_one; /* mark entry present */ - cookiep = p; + cookie_offset = xdr->buf->len; + p = xdr_reserve_space(xdr, 3*4 + namlen); + if (!p) + goto fail; p = xdr_encode_hyper(p, NFS_OFFSET_MAX); /* offset of next entry */ p = xdr_encode_array(p, name, namlen); /* name length & name */ - nfserr = nfsd4_encode_dirent_fattr(cd, name, namlen, p, &buflen); + nfserr = nfsd4_encode_dirent_fattr(xdr, cd, name, namlen); switch (nfserr) { case nfs_ok: - p += buflen; break; case nfserr_resource: nfserr = nfserr_toosmall; goto fail; case nfserr_noent: + xdr_truncate_encode(xdr, start_offset); goto skip_entry; default: /* @@ -2588,59 +2710,74 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, */ if (!(cd->rd_bmval[0] & FATTR4_WORD0_RDATTR_ERROR)) goto fail; - p = nfsd4_encode_rdattr_error(p, buflen, nfserr); + p = nfsd4_encode_rdattr_error(xdr, nfserr); if (p == NULL) { nfserr = nfserr_toosmall; goto fail; } } - cd->buflen -= (p - cd->buffer); - cd->buffer = p; - cd->offset = cookiep; + nfserr = nfserr_toosmall; + entry_bytes = xdr->buf->len - start_offset; + if (entry_bytes > cd->rd_maxcount) + goto fail; + cd->rd_maxcount -= entry_bytes; + if (!cd->rd_dircount) + goto fail; + cd->rd_dircount--; + cd->cookie_offset = cookie_offset; skip_entry: cd->common.err = nfs_ok; return 0; fail: + xdr_truncate_encode(xdr, start_offset); cd->common.err = nfserr; return -EINVAL; } -static void -nfsd4_encode_stateid(struct nfsd4_compoundres *resp, stateid_t *sid) +static __be32 +nfsd4_encode_stateid(struct xdr_stream *xdr, stateid_t *sid) { __be32 *p; - RESERVE_SPACE(sizeof(stateid_t)); - WRITE32(sid->si_generation); - WRITEMEM(&sid->si_opaque, sizeof(stateid_opaque_t)); - ADJUST_ARGS(); + p = xdr_reserve_space(xdr, sizeof(stateid_t)); + if (!p) + return nfserr_resource; + *p++ = cpu_to_be32(sid->si_generation); + p = xdr_encode_opaque_fixed(p, &sid->si_opaque, + sizeof(stateid_opaque_t)); + return 0; } static __be32 nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_access *access) { + struct xdr_stream *xdr = &resp->xdr; __be32 *p; if (!nfserr) { - RESERVE_SPACE(8); - WRITE32(access->ac_supported); - WRITE32(access->ac_resp_access); - ADJUST_ARGS(); + p = xdr_reserve_space(xdr, 8); + if (!p) + return nfserr_resource; + *p++ = cpu_to_be32(access->ac_supported); + *p++ = cpu_to_be32(access->ac_resp_access); } return nfserr; } static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_bind_conn_to_session *bcts) { + struct xdr_stream *xdr = &resp->xdr; __be32 *p; if (!nfserr) { - RESERVE_SPACE(NFS4_MAX_SESSIONID_LEN + 8); - WRITEMEM(bcts->sessionid.data, NFS4_MAX_SESSIONID_LEN); - WRITE32(bcts->dir); + p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN + 8); + if (!p) + return nfserr_resource; + p = xdr_encode_opaque_fixed(p, bcts->sessionid.data, + NFS4_MAX_SESSIONID_LEN); + *p++ = cpu_to_be32(bcts->dir); /* Sorry, we do not yet support RDMA over 4.1: */ - WRITE32(0); - ADJUST_ARGS(); + *p++ = cpu_to_be32(0); } return nfserr; } @@ -2648,12 +2785,11 @@ static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, static __be32 nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_close *close) { - ENCODE_SEQID_OP_HEAD; + struct xdr_stream *xdr = &resp->xdr; if (!nfserr) - nfsd4_encode_stateid(resp, &close->cl_stateid); + nfserr = nfsd4_encode_stateid(xdr, &close->cl_stateid); - encode_seqid_op_tail(resp, save, nfserr); return nfserr; } @@ -2661,12 +2797,15 @@ nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_c static __be32 nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_commit *commit) { + struct xdr_stream *xdr = &resp->xdr; __be32 *p; if (!nfserr) { - RESERVE_SPACE(NFS4_VERIFIER_SIZE); - WRITEMEM(commit->co_verf.data, NFS4_VERIFIER_SIZE); - ADJUST_ARGS(); + p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE); + if (!p) + return nfserr_resource; + p = xdr_encode_opaque_fixed(p, commit->co_verf.data, + NFS4_VERIFIER_SIZE); } return nfserr; } @@ -2674,15 +2813,17 @@ nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ static __be32 nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_create *create) { + struct xdr_stream *xdr = &resp->xdr; __be32 *p; if (!nfserr) { - RESERVE_SPACE(32); - write_cinfo(&p, &create->cr_cinfo); - WRITE32(2); - WRITE32(create->cr_bmval[0]); - WRITE32(create->cr_bmval[1]); - ADJUST_ARGS(); + p = xdr_reserve_space(xdr, 32); + if (!p) + return nfserr_resource; + p = encode_cinfo(p, &create->cr_cinfo); + *p++ = cpu_to_be32(2); + *p++ = cpu_to_be32(create->cr_bmval[0]); + *p++ = cpu_to_be32(create->cr_bmval[1]); } return nfserr; } @@ -2691,33 +2832,31 @@ static __be32 nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_getattr *getattr) { struct svc_fh *fhp = getattr->ga_fhp; - int buflen; + struct xdr_stream *xdr = &resp->xdr; if (nfserr) return nfserr; - buflen = resp->end - resp->p - (COMPOUND_ERR_SLACK_SPACE >> 2); - nfserr = nfsd4_encode_fattr(fhp, fhp->fh_export, fhp->fh_dentry, - resp->p, &buflen, getattr->ga_bmval, + nfserr = nfsd4_encode_fattr(xdr, fhp, fhp->fh_export, fhp->fh_dentry, + getattr->ga_bmval, resp->rqstp, 0); - if (!nfserr) - resp->p += buflen; return nfserr; } static __be32 nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh **fhpp) { + struct xdr_stream *xdr = &resp->xdr; struct svc_fh *fhp = *fhpp; unsigned int len; __be32 *p; if (!nfserr) { len = fhp->fh_handle.fh_size; - RESERVE_SPACE(len + 4); - WRITE32(len); - WRITEMEM(&fhp->fh_handle.fh_base, len); - ADJUST_ARGS(); + p = xdr_reserve_space(xdr, len + 4); + if (!p) + return nfserr_resource; + p = xdr_encode_opaque(p, &fhp->fh_handle.fh_base, len); } return nfserr; } @@ -2726,59 +2865,72 @@ nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh * Including all fields other than the name, a LOCK4denied structure requires * 8(clientid) + 4(namelen) + 8(offset) + 8(length) + 4(type) = 32 bytes. */ -static void -nfsd4_encode_lock_denied(struct nfsd4_compoundres *resp, struct nfsd4_lock_denied *ld) +static __be32 +nfsd4_encode_lock_denied(struct xdr_stream *xdr, struct nfsd4_lock_denied *ld) { struct xdr_netobj *conf = &ld->ld_owner; __be32 *p; - RESERVE_SPACE(32 + XDR_LEN(conf->len)); - WRITE64(ld->ld_start); - WRITE64(ld->ld_length); - WRITE32(ld->ld_type); +again: + p = xdr_reserve_space(xdr, 32 + XDR_LEN(conf->len)); + if (!p) { + /* + * Don't fail to return the result just because we can't + * return the conflicting open: + */ + if (conf->len) { + kfree(conf->data); + conf->len = 0; + conf->data = NULL; + goto again; + } + return nfserr_resource; + } + p = xdr_encode_hyper(p, ld->ld_start); + p = xdr_encode_hyper(p, ld->ld_length); + *p++ = cpu_to_be32(ld->ld_type); if (conf->len) { - WRITEMEM(&ld->ld_clientid, 8); - WRITE32(conf->len); - WRITEMEM(conf->data, conf->len); + p = xdr_encode_opaque_fixed(p, &ld->ld_clientid, 8); + p = xdr_encode_opaque(p, conf->data, conf->len); kfree(conf->data); } else { /* non - nfsv4 lock in conflict, no clientid nor owner */ - WRITE64((u64)0); /* clientid */ - WRITE32(0); /* length of owner name */ + p = xdr_encode_hyper(p, (u64)0); /* clientid */ + *p++ = cpu_to_be32(0); /* length of owner name */ } - ADJUST_ARGS(); + return nfserr_denied; } static __be32 nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lock *lock) { - ENCODE_SEQID_OP_HEAD; + struct xdr_stream *xdr = &resp->xdr; if (!nfserr) - nfsd4_encode_stateid(resp, &lock->lk_resp_stateid); + nfserr = nfsd4_encode_stateid(xdr, &lock->lk_resp_stateid); else if (nfserr == nfserr_denied) - nfsd4_encode_lock_denied(resp, &lock->lk_denied); + nfserr = nfsd4_encode_lock_denied(xdr, &lock->lk_denied); - encode_seqid_op_tail(resp, save, nfserr); return nfserr; } static __be32 nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lockt *lockt) { + struct xdr_stream *xdr = &resp->xdr; + if (nfserr == nfserr_denied) - nfsd4_encode_lock_denied(resp, &lockt->lt_denied); + nfsd4_encode_lock_denied(xdr, &lockt->lt_denied); return nfserr; } static __be32 nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_locku *locku) { - ENCODE_SEQID_OP_HEAD; + struct xdr_stream *xdr = &resp->xdr; if (!nfserr) - nfsd4_encode_stateid(resp, &locku->lu_stateid); + nfserr = nfsd4_encode_stateid(xdr, &locku->lu_stateid); - encode_seqid_op_tail(resp, save, nfserr); return nfserr; } @@ -2786,12 +2938,14 @@ nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_l static __be32 nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_link *link) { + struct xdr_stream *xdr = &resp->xdr; __be32 *p; if (!nfserr) { - RESERVE_SPACE(20); - write_cinfo(&p, &link->li_cinfo); - ADJUST_ARGS(); + p = xdr_reserve_space(xdr, 20); + if (!p) + return nfserr_resource; + p = encode_cinfo(p, &link->li_cinfo); } return nfserr; } @@ -2800,222 +2954,336 @@ nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_li static __be32 nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open *open) { + struct xdr_stream *xdr = &resp->xdr; __be32 *p; - ENCODE_SEQID_OP_HEAD; if (nfserr) goto out; - nfsd4_encode_stateid(resp, &open->op_stateid); - RESERVE_SPACE(40); - write_cinfo(&p, &open->op_cinfo); - WRITE32(open->op_rflags); - WRITE32(2); - WRITE32(open->op_bmval[0]); - WRITE32(open->op_bmval[1]); - WRITE32(open->op_delegate_type); - ADJUST_ARGS(); + nfserr = nfsd4_encode_stateid(xdr, &open->op_stateid); + if (nfserr) + goto out; + p = xdr_reserve_space(xdr, 40); + if (!p) + return nfserr_resource; + p = encode_cinfo(p, &open->op_cinfo); + *p++ = cpu_to_be32(open->op_rflags); + *p++ = cpu_to_be32(2); + *p++ = cpu_to_be32(open->op_bmval[0]); + *p++ = cpu_to_be32(open->op_bmval[1]); + *p++ = cpu_to_be32(open->op_delegate_type); switch (open->op_delegate_type) { case NFS4_OPEN_DELEGATE_NONE: break; case NFS4_OPEN_DELEGATE_READ: - nfsd4_encode_stateid(resp, &open->op_delegate_stateid); - RESERVE_SPACE(20); - WRITE32(open->op_recall); + nfserr = nfsd4_encode_stateid(xdr, &open->op_delegate_stateid); + if (nfserr) + return nfserr; + p = xdr_reserve_space(xdr, 20); + if (!p) + return nfserr_resource; + *p++ = cpu_to_be32(open->op_recall); /* * TODO: ACE's in delegations */ - WRITE32(NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE); - WRITE32(0); - WRITE32(0); - WRITE32(0); /* XXX: is NULL principal ok? */ - ADJUST_ARGS(); + *p++ = cpu_to_be32(NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE); + *p++ = cpu_to_be32(0); + *p++ = cpu_to_be32(0); + *p++ = cpu_to_be32(0); /* XXX: is NULL principal ok? */ break; case NFS4_OPEN_DELEGATE_WRITE: - nfsd4_encode_stateid(resp, &open->op_delegate_stateid); - RESERVE_SPACE(32); - WRITE32(0); + nfserr = nfsd4_encode_stateid(xdr, &open->op_delegate_stateid); + if (nfserr) + return nfserr; + p = xdr_reserve_space(xdr, 32); + if (!p) + return nfserr_resource; + *p++ = cpu_to_be32(0); /* * TODO: space_limit's in delegations */ - WRITE32(NFS4_LIMIT_SIZE); - WRITE32(~(u32)0); - WRITE32(~(u32)0); + *p++ = cpu_to_be32(NFS4_LIMIT_SIZE); + *p++ = cpu_to_be32(~(u32)0); + *p++ = cpu_to_be32(~(u32)0); /* * TODO: ACE's in delegations */ - WRITE32(NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE); - WRITE32(0); - WRITE32(0); - WRITE32(0); /* XXX: is NULL principal ok? */ - ADJUST_ARGS(); + *p++ = cpu_to_be32(NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE); + *p++ = cpu_to_be32(0); + *p++ = cpu_to_be32(0); + *p++ = cpu_to_be32(0); /* XXX: is NULL principal ok? */ break; case NFS4_OPEN_DELEGATE_NONE_EXT: /* 4.1 */ switch (open->op_why_no_deleg) { case WND4_CONTENTION: case WND4_RESOURCE: - RESERVE_SPACE(8); - WRITE32(open->op_why_no_deleg); - WRITE32(0); /* deleg signaling not supported yet */ + p = xdr_reserve_space(xdr, 8); + if (!p) + return nfserr_resource; + *p++ = cpu_to_be32(open->op_why_no_deleg); + /* deleg signaling not supported yet: */ + *p++ = cpu_to_be32(0); break; default: - RESERVE_SPACE(4); - WRITE32(open->op_why_no_deleg); + p = xdr_reserve_space(xdr, 4); + if (!p) + return nfserr_resource; + *p++ = cpu_to_be32(open->op_why_no_deleg); } - ADJUST_ARGS(); break; default: BUG(); } /* XXX save filehandle here */ out: - encode_seqid_op_tail(resp, save, nfserr); return nfserr; } static __be32 nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_confirm *oc) { - ENCODE_SEQID_OP_HEAD; + struct xdr_stream *xdr = &resp->xdr; if (!nfserr) - nfsd4_encode_stateid(resp, &oc->oc_resp_stateid); + nfserr = nfsd4_encode_stateid(xdr, &oc->oc_resp_stateid); - encode_seqid_op_tail(resp, save, nfserr); return nfserr; } static __be32 nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_downgrade *od) { - ENCODE_SEQID_OP_HEAD; + struct xdr_stream *xdr = &resp->xdr; if (!nfserr) - nfsd4_encode_stateid(resp, &od->od_stateid); + nfserr = nfsd4_encode_stateid(xdr, &od->od_stateid); - encode_seqid_op_tail(resp, save, nfserr); return nfserr; } -static __be32 -nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_read *read) +static __be32 nfsd4_encode_splice_read( + struct nfsd4_compoundres *resp, + struct nfsd4_read *read, + struct file *file, unsigned long maxcount) { + struct xdr_stream *xdr = &resp->xdr; + struct xdr_buf *buf = xdr->buf; u32 eof; - int v; - struct page *page; - unsigned long maxcount; - long len; - __be32 *p; + int space_left; + __be32 nfserr; + __be32 *p = xdr->p - 2; - if (nfserr) - return nfserr; - if (resp->xbuf->page_len) + /* + * Don't inline pages unless we know there's room for eof, + * count, and possible padding: + */ + if (xdr->end - xdr->p < 3) return nfserr_resource; - RESERVE_SPACE(8); /* eof flag and byte count */ + nfserr = nfsd_splice_read(read->rd_rqstp, file, + read->rd_offset, &maxcount); + if (nfserr) { + /* + * nfsd_splice_actor may have already messed with the + * page length; reset it so as not to confuse + * xdr_truncate_encode: + */ + buf->page_len = 0; + return nfserr; + } - maxcount = svc_max_payload(resp->rqstp); - if (maxcount > read->rd_length) - maxcount = read->rd_length; + eof = (read->rd_offset + maxcount >= + read->rd_fhp->fh_dentry->d_inode->i_size); + + *(p++) = htonl(eof); + *(p++) = htonl(maxcount); + + buf->page_len = maxcount; + buf->len += maxcount; + xdr->page_ptr += (maxcount + PAGE_SIZE - 1) / PAGE_SIZE; + + /* Use rest of head for padding and remaining ops: */ + buf->tail[0].iov_base = xdr->p; + buf->tail[0].iov_len = 0; + xdr->iov = buf->tail; + if (maxcount&3) { + int pad = 4 - (maxcount&3); + + *(xdr->p++) = 0; + + buf->tail[0].iov_base += maxcount&3; + buf->tail[0].iov_len = pad; + buf->len += pad; + } + + space_left = min_t(int, (void *)xdr->end - (void *)xdr->p, + buf->buflen - buf->len); + buf->buflen = buf->len + space_left; + xdr->end = (__be32 *)((void *)xdr->end + space_left); + + return 0; +} + +static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp, + struct nfsd4_read *read, + struct file *file, unsigned long maxcount) +{ + struct xdr_stream *xdr = &resp->xdr; + u32 eof; + int v; + int starting_len = xdr->buf->len - 8; + long len; + int thislen; + __be32 nfserr; + __be32 tmp; + __be32 *p; + u32 zzz = 0; + int pad; len = maxcount; v = 0; - while (len > 0) { - page = *(resp->rqstp->rq_next_page); - if (!page) { /* ran out of pages */ - maxcount -= len; - break; - } - resp->rqstp->rq_vec[v].iov_base = page_address(page); - resp->rqstp->rq_vec[v].iov_len = - len < PAGE_SIZE ? len : PAGE_SIZE; - resp->rqstp->rq_next_page++; + + thislen = (void *)xdr->end - (void *)xdr->p; + if (len < thislen) + thislen = len; + p = xdr_reserve_space(xdr, (thislen+3)&~3); + WARN_ON_ONCE(!p); + resp->rqstp->rq_vec[v].iov_base = p; + resp->rqstp->rq_vec[v].iov_len = thislen; + v++; + len -= thislen; + + while (len) { + thislen = min_t(long, len, PAGE_SIZE); + p = xdr_reserve_space(xdr, (thislen+3)&~3); + WARN_ON_ONCE(!p); + resp->rqstp->rq_vec[v].iov_base = p; + resp->rqstp->rq_vec[v].iov_len = thislen; v++; - len -= PAGE_SIZE; + len -= thislen; } read->rd_vlen = v; - nfserr = nfsd_read_file(read->rd_rqstp, read->rd_fhp, read->rd_filp, - read->rd_offset, resp->rqstp->rq_vec, read->rd_vlen, - &maxcount); - + nfserr = nfsd_readv(file, read->rd_offset, resp->rqstp->rq_vec, + read->rd_vlen, &maxcount); if (nfserr) return nfserr; + xdr_truncate_encode(xdr, starting_len + 8 + ((maxcount+3)&~3)); + eof = (read->rd_offset + maxcount >= read->rd_fhp->fh_dentry->d_inode->i_size); - WRITE32(eof); - WRITE32(maxcount); - ADJUST_ARGS(); - resp->xbuf->head[0].iov_len = (char*)p - - (char*)resp->xbuf->head[0].iov_base; - resp->xbuf->page_len = maxcount; + tmp = htonl(eof); + write_bytes_to_xdr_buf(xdr->buf, starting_len , &tmp, 4); + tmp = htonl(maxcount); + write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4); - /* Use rest of head for padding and remaining ops: */ - resp->xbuf->tail[0].iov_base = p; - resp->xbuf->tail[0].iov_len = 0; - if (maxcount&3) { - RESERVE_SPACE(4); - WRITE32(0); - resp->xbuf->tail[0].iov_base += maxcount&3; - resp->xbuf->tail[0].iov_len = 4 - (maxcount&3); - ADJUST_ARGS(); - } + pad = (maxcount&3) ? 4 - (maxcount&3) : 0; + write_bytes_to_xdr_buf(xdr->buf, starting_len + 8 + maxcount, + &zzz, pad); return 0; + } static __be32 -nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readlink *readlink) +nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, + struct nfsd4_read *read) { - int maxcount; - char *page; + unsigned long maxcount; + struct xdr_stream *xdr = &resp->xdr; + struct file *file = read->rd_filp; + int starting_len = xdr->buf->len; + struct raparms *ra; __be32 *p; + __be32 err; if (nfserr) return nfserr; - if (resp->xbuf->page_len) + + p = xdr_reserve_space(xdr, 8); /* eof flag and byte count */ + if (!p) { + WARN_ON_ONCE(resp->rqstp->rq_splice_ok); return nfserr_resource; - if (!*resp->rqstp->rq_next_page) + } + if (resp->xdr.buf->page_len && resp->rqstp->rq_splice_ok) { + WARN_ON_ONCE(1); return nfserr_resource; + } + xdr_commit_encode(xdr); + + maxcount = svc_max_payload(resp->rqstp); + if (maxcount > xdr->buf->buflen - xdr->buf->len) + maxcount = xdr->buf->buflen - xdr->buf->len; + if (maxcount > read->rd_length) + maxcount = read->rd_length; + + if (!read->rd_filp) { + err = nfsd_get_tmp_read_open(resp->rqstp, read->rd_fhp, + &file, &ra); + if (err) + goto err_truncate; + } - page = page_address(*(resp->rqstp->rq_next_page++)); + if (file->f_op->splice_read && resp->rqstp->rq_splice_ok) + err = nfsd4_encode_splice_read(resp, read, file, maxcount); + else + err = nfsd4_encode_readv(resp, read, file, maxcount); + + if (!read->rd_filp) + nfsd_put_tmp_read_open(file, ra); + +err_truncate: + if (err) + xdr_truncate_encode(xdr, starting_len); + return err; +} +static __be32 +nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readlink *readlink) +{ + int maxcount; + __be32 wire_count; + int zero = 0; + struct xdr_stream *xdr = &resp->xdr; + int length_offset = xdr->buf->len; + __be32 *p; + + if (nfserr) + return nfserr; + + p = xdr_reserve_space(xdr, 4); + if (!p) + return nfserr_resource; maxcount = PAGE_SIZE; - RESERVE_SPACE(4); + p = xdr_reserve_space(xdr, maxcount); + if (!p) + return nfserr_resource; /* * XXX: By default, the ->readlink() VFS op will truncate symlinks * if they would overflow the buffer. Is this kosher in NFSv4? If * not, one easy fix is: if ->readlink() precisely fills the buffer, * assume that truncation occurred, and return NFS4ERR_RESOURCE. */ - nfserr = nfsd_readlink(readlink->rl_rqstp, readlink->rl_fhp, page, &maxcount); + nfserr = nfsd_readlink(readlink->rl_rqstp, readlink->rl_fhp, + (char *)p, &maxcount); if (nfserr == nfserr_isdir) - return nfserr_inval; - if (nfserr) + nfserr = nfserr_inval; + if (nfserr) { + xdr_truncate_encode(xdr, length_offset); return nfserr; - - WRITE32(maxcount); - ADJUST_ARGS(); - resp->xbuf->head[0].iov_len = (char*)p - - (char*)resp->xbuf->head[0].iov_base; - resp->xbuf->page_len = maxcount; - - /* Use rest of head for padding and remaining ops: */ - resp->xbuf->tail[0].iov_base = p; - resp->xbuf->tail[0].iov_len = 0; - if (maxcount&3) { - RESERVE_SPACE(4); - WRITE32(0); - resp->xbuf->tail[0].iov_base += maxcount&3; - resp->xbuf->tail[0].iov_len = 4 - (maxcount&3); - ADJUST_ARGS(); } + + wire_count = htonl(maxcount); + write_bytes_to_xdr_buf(xdr->buf, length_offset, &wire_count, 4); + xdr_truncate_encode(xdr, length_offset + 4 + ALIGN(maxcount, 4)); + if (maxcount & 3) + write_bytes_to_xdr_buf(xdr->buf, length_offset + 4 + maxcount, + &zero, 4 - (maxcount&3)); return 0; } @@ -3023,47 +3291,52 @@ static __be32 nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readdir *readdir) { int maxcount; + int bytes_left; loff_t offset; - __be32 *page, *savep, *tailbase; + __be64 wire_offset; + struct xdr_stream *xdr = &resp->xdr; + int starting_len = xdr->buf->len; __be32 *p; if (nfserr) return nfserr; - if (resp->xbuf->page_len) - return nfserr_resource; - if (!*resp->rqstp->rq_next_page) - return nfserr_resource; - RESERVE_SPACE(NFS4_VERIFIER_SIZE); - savep = p; + p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE); + if (!p) + return nfserr_resource; /* XXX: Following NFSv3, we ignore the READDIR verifier for now. */ - WRITE32(0); - WRITE32(0); - ADJUST_ARGS(); - resp->xbuf->head[0].iov_len = ((char*)resp->p) - (char*)resp->xbuf->head[0].iov_base; - tailbase = p; - - maxcount = PAGE_SIZE; - if (maxcount > readdir->rd_maxcount) - maxcount = readdir->rd_maxcount; + *p++ = cpu_to_be32(0); + *p++ = cpu_to_be32(0); + resp->xdr.buf->head[0].iov_len = ((char *)resp->xdr.p) + - (char *)resp->xdr.buf->head[0].iov_base; /* - * Convert from bytes to words, account for the two words already - * written, make sure to leave two words at the end for the next - * pointer and eof field. + * Number of bytes left for directory entries allowing for the + * final 8 bytes of the readdir and a following failed op: + */ + bytes_left = xdr->buf->buflen - xdr->buf->len + - COMPOUND_ERR_SLACK_SPACE - 8; + if (bytes_left < 0) { + nfserr = nfserr_resource; + goto err_no_verf; + } + maxcount = min_t(u32, readdir->rd_maxcount, INT_MAX); + /* + * Note the rfc defines rd_maxcount as the size of the + * READDIR4resok structure, which includes the verifier above + * and the 8 bytes encoded at the end of this function: */ - maxcount = (maxcount >> 2) - 4; - if (maxcount < 0) { - nfserr = nfserr_toosmall; + if (maxcount < 16) { + nfserr = nfserr_toosmall; goto err_no_verf; } + maxcount = min_t(int, maxcount-16, bytes_left); - page = page_address(*(resp->rqstp->rq_next_page++)); + readdir->xdr = xdr; + readdir->rd_maxcount = maxcount; readdir->common.err = 0; - readdir->buflen = maxcount; - readdir->buffer = page; - readdir->offset = NULL; + readdir->cookie_offset = 0; offset = readdir->rd_cookie; nfserr = nfsd_readdir(readdir->rd_rqstp, readdir->rd_fhp, @@ -3071,42 +3344,49 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 &readdir->common, nfsd4_encode_dirent); if (nfserr == nfs_ok && readdir->common.err == nfserr_toosmall && - readdir->buffer == page) - nfserr = nfserr_toosmall; + xdr->buf->len == starting_len + 8) { + /* nothing encoded; which limit did we hit?: */ + if (maxcount - 16 < bytes_left) + /* It was the fault of rd_maxcount: */ + nfserr = nfserr_toosmall; + else + /* We ran out of buffer space: */ + nfserr = nfserr_resource; + } if (nfserr) goto err_no_verf; - if (readdir->offset) - xdr_encode_hyper(readdir->offset, offset); + if (readdir->cookie_offset) { + wire_offset = cpu_to_be64(offset); + write_bytes_to_xdr_buf(xdr->buf, readdir->cookie_offset, + &wire_offset, 8); + } - p = readdir->buffer; + p = xdr_reserve_space(xdr, 8); + if (!p) { + WARN_ON_ONCE(1); + goto err_no_verf; + } *p++ = 0; /* no more entries */ *p++ = htonl(readdir->common.err == nfserr_eof); - resp->xbuf->page_len = ((char*)p) - - (char*)page_address(*(resp->rqstp->rq_next_page-1)); - - /* Use rest of head for padding and remaining ops: */ - resp->xbuf->tail[0].iov_base = tailbase; - resp->xbuf->tail[0].iov_len = 0; - resp->p = resp->xbuf->tail[0].iov_base; - resp->end = resp->p + (PAGE_SIZE - resp->xbuf->head[0].iov_len)/4; return 0; err_no_verf: - p = savep; - ADJUST_ARGS(); + xdr_truncate_encode(xdr, starting_len); return nfserr; } static __be32 nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_remove *remove) { + struct xdr_stream *xdr = &resp->xdr; __be32 *p; if (!nfserr) { - RESERVE_SPACE(20); - write_cinfo(&p, &remove->rm_cinfo); - ADJUST_ARGS(); + p = xdr_reserve_space(xdr, 20); + if (!p) + return nfserr_resource; + p = encode_cinfo(p, &remove->rm_cinfo); } return nfserr; } @@ -3114,29 +3394,32 @@ nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ static __be32 nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_rename *rename) { + struct xdr_stream *xdr = &resp->xdr; __be32 *p; if (!nfserr) { - RESERVE_SPACE(40); - write_cinfo(&p, &rename->rn_sinfo); - write_cinfo(&p, &rename->rn_tinfo); - ADJUST_ARGS(); + p = xdr_reserve_space(xdr, 40); + if (!p) + return nfserr_resource; + p = encode_cinfo(p, &rename->rn_sinfo); + p = encode_cinfo(p, &rename->rn_tinfo); } return nfserr; } static __be32 -nfsd4_do_encode_secinfo(struct nfsd4_compoundres *resp, - __be32 nfserr,struct svc_export *exp) +nfsd4_do_encode_secinfo(struct xdr_stream *xdr, + __be32 nfserr, struct svc_export *exp) { - int i = 0; - u32 nflavs; + u32 i, nflavs, supported; struct exp_flavor_info *flavs; struct exp_flavor_info def_flavs[2]; - __be32 *p; + __be32 *p, *flavorsp; + static bool report = true; if (nfserr) goto out; + nfserr = nfserr_resource; if (exp->ex_nflavors) { flavs = exp->ex_flavors; nflavs = exp->ex_nflavors; @@ -3157,34 +3440,43 @@ nfsd4_do_encode_secinfo(struct nfsd4_compoundres *resp, } } - RESERVE_SPACE(4); - WRITE32(nflavs); - ADJUST_ARGS(); + supported = 0; + p = xdr_reserve_space(xdr, 4); + if (!p) + goto out; + flavorsp = p++; /* to be backfilled later */ + for (i = 0; i < nflavs; i++) { - u32 flav = flavs[i].pseudoflavor; - struct gss_api_mech *gm = gss_mech_get_by_pseudoflavor(flav); - - if (gm) { - RESERVE_SPACE(4); - WRITE32(RPC_AUTH_GSS); - ADJUST_ARGS(); - RESERVE_SPACE(4 + gm->gm_oid.len); - WRITE32(gm->gm_oid.len); - WRITEMEM(gm->gm_oid.data, gm->gm_oid.len); - ADJUST_ARGS(); - RESERVE_SPACE(4); - WRITE32(0); /* qop */ - ADJUST_ARGS(); - RESERVE_SPACE(4); - WRITE32(gss_pseudoflavor_to_service(gm, flav)); - ADJUST_ARGS(); - gss_mech_put(gm); + rpc_authflavor_t pf = flavs[i].pseudoflavor; + struct rpcsec_gss_info info; + + if (rpcauth_get_gssinfo(pf, &info) == 0) { + supported++; + p = xdr_reserve_space(xdr, 4 + 4 + + XDR_LEN(info.oid.len) + 4 + 4); + if (!p) + goto out; + *p++ = cpu_to_be32(RPC_AUTH_GSS); + p = xdr_encode_opaque(p, info.oid.data, info.oid.len); + *p++ = cpu_to_be32(info.qop); + *p++ = cpu_to_be32(info.service); + } else if (pf < RPC_AUTH_MAXFLAVOR) { + supported++; + p = xdr_reserve_space(xdr, 4); + if (!p) + goto out; + *p++ = cpu_to_be32(pf); } else { - RESERVE_SPACE(4); - WRITE32(flav); - ADJUST_ARGS(); + if (report) + pr_warn("NFS: SECINFO: security flavor %u " + "is not supported\n", pf); } } + + if (nflavs != supported) + report = false; + *flavorsp = htonl(supported); + nfserr = 0; out: if (exp) exp_put(exp); @@ -3195,14 +3487,18 @@ static __be32 nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_secinfo *secinfo) { - return nfsd4_do_encode_secinfo(resp, nfserr, secinfo->si_exp); + struct xdr_stream *xdr = &resp->xdr; + + return nfsd4_do_encode_secinfo(xdr, nfserr, secinfo->si_exp); } static __be32 nfsd4_encode_secinfo_no_name(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_secinfo_no_name *secinfo) { - return nfsd4_do_encode_secinfo(resp, nfserr, secinfo->sin_exp); + struct xdr_stream *xdr = &resp->xdr; + + return nfsd4_do_encode_secinfo(xdr, nfserr, secinfo->sin_exp); } /* @@ -3212,39 +3508,47 @@ nfsd4_encode_secinfo_no_name(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setattr *setattr) { + struct xdr_stream *xdr = &resp->xdr; __be32 *p; - RESERVE_SPACE(12); + p = xdr_reserve_space(xdr, 16); + if (!p) + return nfserr_resource; if (nfserr) { - WRITE32(2); - WRITE32(0); - WRITE32(0); + *p++ = cpu_to_be32(3); + *p++ = cpu_to_be32(0); + *p++ = cpu_to_be32(0); + *p++ = cpu_to_be32(0); } else { - WRITE32(2); - WRITE32(setattr->sa_bmval[0]); - WRITE32(setattr->sa_bmval[1]); + *p++ = cpu_to_be32(3); + *p++ = cpu_to_be32(setattr->sa_bmval[0]); + *p++ = cpu_to_be32(setattr->sa_bmval[1]); + *p++ = cpu_to_be32(setattr->sa_bmval[2]); } - ADJUST_ARGS(); return nfserr; } static __be32 nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setclientid *scd) { + struct xdr_stream *xdr = &resp->xdr; __be32 *p; if (!nfserr) { - RESERVE_SPACE(8 + NFS4_VERIFIER_SIZE); - WRITEMEM(&scd->se_clientid, 8); - WRITEMEM(&scd->se_confirm, NFS4_VERIFIER_SIZE); - ADJUST_ARGS(); + p = xdr_reserve_space(xdr, 8 + NFS4_VERIFIER_SIZE); + if (!p) + return nfserr_resource; + p = xdr_encode_opaque_fixed(p, &scd->se_clientid, 8); + p = xdr_encode_opaque_fixed(p, &scd->se_confirm, + NFS4_VERIFIER_SIZE); } else if (nfserr == nfserr_clid_inuse) { - RESERVE_SPACE(8); - WRITE32(0); - WRITE32(0); - ADJUST_ARGS(); + p = xdr_reserve_space(xdr, 8); + if (!p) + return nfserr_resource; + *p++ = cpu_to_be32(0); + *p++ = cpu_to_be32(0); } return nfserr; } @@ -3252,22 +3556,34 @@ nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct n static __be32 nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_write *write) { + struct xdr_stream *xdr = &resp->xdr; __be32 *p; if (!nfserr) { - RESERVE_SPACE(16); - WRITE32(write->wr_bytes_written); - WRITE32(write->wr_how_written); - WRITEMEM(write->wr_verifier.data, NFS4_VERIFIER_SIZE); - ADJUST_ARGS(); + p = xdr_reserve_space(xdr, 16); + if (!p) + return nfserr_resource; + *p++ = cpu_to_be32(write->wr_bytes_written); + *p++ = cpu_to_be32(write->wr_how_written); + p = xdr_encode_opaque_fixed(p, write->wr_verifier.data, + NFS4_VERIFIER_SIZE); } return nfserr; } +static const u32 nfs4_minimal_spo_must_enforce[2] = { + [1] = 1 << (OP_BIND_CONN_TO_SESSION - 32) | + 1 << (OP_EXCHANGE_ID - 32) | + 1 << (OP_CREATE_SESSION - 32) | + 1 << (OP_DESTROY_SESSION - 32) | + 1 << (OP_DESTROY_CLIENTID - 32) +}; + static __be32 nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_exchange_id *exid) { + struct xdr_stream *xdr = &resp->xdr; __be32 *p; char *major_id; char *server_scope; @@ -3283,39 +3599,61 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, server_scope = utsname()->nodename; server_scope_sz = strlen(server_scope); - RESERVE_SPACE( + p = xdr_reserve_space(xdr, 8 /* eir_clientid */ + 4 /* eir_sequenceid */ + 4 /* eir_flags */ + - 4 /* spr_how (SP4_NONE) */ + + 4 /* spr_how */); + if (!p) + return nfserr_resource; + + p = xdr_encode_opaque_fixed(p, &exid->clientid, 8); + *p++ = cpu_to_be32(exid->seqid); + *p++ = cpu_to_be32(exid->flags); + + *p++ = cpu_to_be32(exid->spa_how); + + switch (exid->spa_how) { + case SP4_NONE: + break; + case SP4_MACH_CRED: + /* spo_must_enforce, spo_must_allow */ + p = xdr_reserve_space(xdr, 16); + if (!p) + return nfserr_resource; + + /* spo_must_enforce bitmap: */ + *p++ = cpu_to_be32(2); + *p++ = cpu_to_be32(nfs4_minimal_spo_must_enforce[0]); + *p++ = cpu_to_be32(nfs4_minimal_spo_must_enforce[1]); + /* empty spo_must_allow bitmap: */ + *p++ = cpu_to_be32(0); + + break; + default: + WARN_ON_ONCE(1); + } + + p = xdr_reserve_space(xdr, 8 /* so_minor_id */ + 4 /* so_major_id.len */ + (XDR_QUADLEN(major_id_sz) * 4) + 4 /* eir_server_scope.len */ + (XDR_QUADLEN(server_scope_sz) * 4) + 4 /* eir_server_impl_id.count (0) */); - - WRITEMEM(&exid->clientid, 8); - WRITE32(exid->seqid); - WRITE32(exid->flags); - - /* state_protect4_r. Currently only support SP4_NONE */ - BUG_ON(exid->spa_how != SP4_NONE); - WRITE32(exid->spa_how); + if (!p) + return nfserr_resource; /* The server_owner struct */ - WRITE64(minor_id); /* Minor id */ + p = xdr_encode_hyper(p, minor_id); /* Minor id */ /* major id */ - WRITE32(major_id_sz); - WRITEMEM(major_id, major_id_sz); + p = xdr_encode_opaque(p, major_id, major_id_sz); /* Server scope */ - WRITE32(server_scope_sz); - WRITEMEM(server_scope, server_scope_sz); + p = xdr_encode_opaque(p, server_scope, server_scope_sz); /* Implementation id */ - WRITE32(0); /* zero length nfs_impl_id4 array */ - ADJUST_ARGS(); + *p++ = cpu_to_be32(0); /* zero length nfs_impl_id4 array */ return 0; } @@ -3323,93 +3661,81 @@ static __be32 nfsd4_encode_create_session(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_create_session *sess) { + struct xdr_stream *xdr = &resp->xdr; __be32 *p; if (nfserr) return nfserr; - RESERVE_SPACE(24); - WRITEMEM(sess->sessionid.data, NFS4_MAX_SESSIONID_LEN); - WRITE32(sess->seqid); - WRITE32(sess->flags); - ADJUST_ARGS(); - - RESERVE_SPACE(28); - WRITE32(0); /* headerpadsz */ - WRITE32(sess->fore_channel.maxreq_sz); - WRITE32(sess->fore_channel.maxresp_sz); - WRITE32(sess->fore_channel.maxresp_cached); - WRITE32(sess->fore_channel.maxops); - WRITE32(sess->fore_channel.maxreqs); - WRITE32(sess->fore_channel.nr_rdma_attrs); - ADJUST_ARGS(); + p = xdr_reserve_space(xdr, 24); + if (!p) + return nfserr_resource; + p = xdr_encode_opaque_fixed(p, sess->sessionid.data, + NFS4_MAX_SESSIONID_LEN); + *p++ = cpu_to_be32(sess->seqid); + *p++ = cpu_to_be32(sess->flags); + + p = xdr_reserve_space(xdr, 28); + if (!p) + return nfserr_resource; + *p++ = cpu_to_be32(0); /* headerpadsz */ + *p++ = cpu_to_be32(sess->fore_channel.maxreq_sz); + *p++ = cpu_to_be32(sess->fore_channel.maxresp_sz); + *p++ = cpu_to_be32(sess->fore_channel.maxresp_cached); + *p++ = cpu_to_be32(sess->fore_channel.maxops); + *p++ = cpu_to_be32(sess->fore_channel.maxreqs); + *p++ = cpu_to_be32(sess->fore_channel.nr_rdma_attrs); if (sess->fore_channel.nr_rdma_attrs) { - RESERVE_SPACE(4); - WRITE32(sess->fore_channel.rdma_attrs); - ADJUST_ARGS(); + p = xdr_reserve_space(xdr, 4); + if (!p) + return nfserr_resource; + *p++ = cpu_to_be32(sess->fore_channel.rdma_attrs); } - RESERVE_SPACE(28); - WRITE32(0); /* headerpadsz */ - WRITE32(sess->back_channel.maxreq_sz); - WRITE32(sess->back_channel.maxresp_sz); - WRITE32(sess->back_channel.maxresp_cached); - WRITE32(sess->back_channel.maxops); - WRITE32(sess->back_channel.maxreqs); - WRITE32(sess->back_channel.nr_rdma_attrs); - ADJUST_ARGS(); + p = xdr_reserve_space(xdr, 28); + if (!p) + return nfserr_resource; + *p++ = cpu_to_be32(0); /* headerpadsz */ + *p++ = cpu_to_be32(sess->back_channel.maxreq_sz); + *p++ = cpu_to_be32(sess->back_channel.maxresp_sz); + *p++ = cpu_to_be32(sess->back_channel.maxresp_cached); + *p++ = cpu_to_be32(sess->back_channel.maxops); + *p++ = cpu_to_be32(sess->back_channel.maxreqs); + *p++ = cpu_to_be32(sess->back_channel.nr_rdma_attrs); if (sess->back_channel.nr_rdma_attrs) { - RESERVE_SPACE(4); - WRITE32(sess->back_channel.rdma_attrs); - ADJUST_ARGS(); + p = xdr_reserve_space(xdr, 4); + if (!p) + return nfserr_resource; + *p++ = cpu_to_be32(sess->back_channel.rdma_attrs); } return 0; } static __be32 -nfsd4_encode_destroy_session(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_destroy_session *destroy_session) -{ - return nfserr; -} - -static __be32 -nfsd4_encode_free_stateid(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_free_stateid *free_stateid) -{ - __be32 *p; - - if (nfserr) - return nfserr; - - RESERVE_SPACE(4); - *p++ = nfserr; - ADJUST_ARGS(); - return nfserr; -} - -static __be32 nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_sequence *seq) { + struct xdr_stream *xdr = &resp->xdr; __be32 *p; if (nfserr) return nfserr; - RESERVE_SPACE(NFS4_MAX_SESSIONID_LEN + 20); - WRITEMEM(seq->sessionid.data, NFS4_MAX_SESSIONID_LEN); - WRITE32(seq->seqid); - WRITE32(seq->slotid); + p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN + 20); + if (!p) + return nfserr_resource; + p = xdr_encode_opaque_fixed(p, seq->sessionid.data, + NFS4_MAX_SESSIONID_LEN); + *p++ = cpu_to_be32(seq->seqid); + *p++ = cpu_to_be32(seq->slotid); /* Note slotid's are numbered from zero: */ - WRITE32(seq->maxslots - 1); /* sr_highest_slotid */ - WRITE32(seq->maxslots - 1); /* sr_target_highest_slotid */ - WRITE32(seq->status_flags); + *p++ = cpu_to_be32(seq->maxslots - 1); /* sr_highest_slotid */ + *p++ = cpu_to_be32(seq->maxslots - 1); /* sr_target_highest_slotid */ + *p++ = cpu_to_be32(seq->status_flags); - ADJUST_ARGS(); - resp->cstate.datap = p; /* DRC cache data pointer */ + resp->cstate.data_offset = xdr->buf->len; /* DRC cache data pointer */ return 0; } @@ -3417,17 +3743,22 @@ static __be32 nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_test_stateid *test_stateid) { + struct xdr_stream *xdr = &resp->xdr; struct nfsd4_test_stateid_id *stateid, *next; __be32 *p; - RESERVE_SPACE(4 + (4 * test_stateid->ts_num_ids)); + if (nfserr) + return nfserr; + + p = xdr_reserve_space(xdr, 4 + (4 * test_stateid->ts_num_ids)); + if (!p) + return nfserr_resource; *p++ = htonl(test_stateid->ts_num_ids); list_for_each_entry_safe(stateid, next, &test_stateid->ts_stateid_list, ts_id_list) { *p++ = stateid->ts_id_status; } - ADJUST_ARGS(); return nfserr; } @@ -3488,8 +3819,8 @@ static nfsd4_enc nfsd4_enc_ops[] = { [OP_BIND_CONN_TO_SESSION] = (nfsd4_enc)nfsd4_encode_bind_conn_to_session, [OP_EXCHANGE_ID] = (nfsd4_enc)nfsd4_encode_exchange_id, [OP_CREATE_SESSION] = (nfsd4_enc)nfsd4_encode_create_session, - [OP_DESTROY_SESSION] = (nfsd4_enc)nfsd4_encode_destroy_session, - [OP_FREE_STATEID] = (nfsd4_enc)nfsd4_encode_free_stateid, + [OP_DESTROY_SESSION] = (nfsd4_enc)nfsd4_encode_noop, + [OP_FREE_STATEID] = (nfsd4_enc)nfsd4_encode_noop, [OP_GET_DIR_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop, [OP_GETDEVICEINFO] = (nfsd4_enc)nfsd4_encode_noop, [OP_GETDEVICELIST] = (nfsd4_enc)nfsd4_encode_noop, @@ -3506,77 +3837,99 @@ static nfsd4_enc nfsd4_enc_ops[] = { }; /* - * Calculate the total amount of memory that the compound response has taken - * after encoding the current operation with pad. - * - * pad: if operation is non-idempotent, pad was calculate by op_rsize_bop() - * which was specified at nfsd4_operation, else pad is zero. - * - * Compare this length to the session se_fmaxresp_sz and se_fmaxresp_cached. + * Calculate whether we still have space to encode repsize bytes. + * There are two considerations: + * - For NFS versions >=4.1, the size of the reply must stay within + * session limits + * - For all NFS versions, we must stay within limited preallocated + * buffer space. * - * Our se_fmaxresp_cached will always be a multiple of PAGE_SIZE, and so - * will be at least a page and will therefore hold the xdr_buf head. + * This is called before the operation is processed, so can only provide + * an upper estimate. For some nonidempotent operations (such as + * getattr), it's not necessarily a problem if that estimate is wrong, + * as we can fail it after processing without significant side effects. */ -__be32 nfsd4_check_resp_size(struct nfsd4_compoundres *resp, u32 pad) +__be32 nfsd4_check_resp_size(struct nfsd4_compoundres *resp, u32 respsize) { - struct xdr_buf *xb = &resp->rqstp->rq_res; - struct nfsd4_session *session = NULL; + struct xdr_buf *buf = &resp->rqstp->rq_res; struct nfsd4_slot *slot = resp->cstate.slot; - u32 length, tlen = 0; + if (buf->len + respsize <= buf->buflen) + return nfs_ok; if (!nfsd4_has_session(&resp->cstate)) - return 0; - - session = resp->cstate.session; - if (session == NULL) - return 0; - - if (xb->page_len == 0) { - length = (char *)resp->p - (char *)xb->head[0].iov_base + pad; - } else { - if (xb->tail[0].iov_base && xb->tail[0].iov_len > 0) - tlen = (char *)resp->p - (char *)xb->tail[0].iov_base; - - length = xb->head[0].iov_len + xb->page_len + tlen + pad; - } - dprintk("%s length %u, xb->page_len %u tlen %u pad %u\n", __func__, - length, xb->page_len, tlen, pad); - - if (length > session->se_fchannel.maxresp_sz) - return nfserr_rep_too_big; - - if ((slot->sl_flags & NFSD4_SLOT_CACHETHIS) && - length > session->se_fchannel.maxresp_cached) + return nfserr_resource; + if (slot->sl_flags & NFSD4_SLOT_CACHETHIS) { + WARN_ON_ONCE(1); return nfserr_rep_too_big_to_cache; - - return 0; + } + return nfserr_rep_too_big; } void nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) { - __be32 *statp; + struct xdr_stream *xdr = &resp->xdr; + struct nfs4_stateowner *so = resp->cstate.replay_owner; + struct svc_rqst *rqstp = resp->rqstp; + int post_err_offset; + nfsd4_enc encoder; __be32 *p; - RESERVE_SPACE(8); - WRITE32(op->opnum); - statp = p++; /* to be backfilled at the end */ - ADJUST_ARGS(); + p = xdr_reserve_space(xdr, 8); + if (!p) { + WARN_ON_ONCE(1); + return; + } + *p++ = cpu_to_be32(op->opnum); + post_err_offset = xdr->buf->len; if (op->opnum == OP_ILLEGAL) goto status; BUG_ON(op->opnum < 0 || op->opnum >= ARRAY_SIZE(nfsd4_enc_ops) || !nfsd4_enc_ops[op->opnum]); - op->status = nfsd4_enc_ops[op->opnum](resp, op->status, &op->u); - /* nfsd4_check_drc_limit guarantees enough room for error status */ - if (!op->status) - op->status = nfsd4_check_resp_size(resp, 0); + encoder = nfsd4_enc_ops[op->opnum]; + op->status = encoder(resp, op->status, &op->u); + xdr_commit_encode(xdr); + + /* nfsd4_check_resp_size guarantees enough room for error status */ + if (!op->status) { + int space_needed = 0; + if (!nfsd4_last_compound_op(rqstp)) + space_needed = COMPOUND_ERR_SLACK_SPACE; + op->status = nfsd4_check_resp_size(resp, space_needed); + } + if (op->status == nfserr_resource && nfsd4_has_session(&resp->cstate)) { + struct nfsd4_slot *slot = resp->cstate.slot; + + if (slot->sl_flags & NFSD4_SLOT_CACHETHIS) + op->status = nfserr_rep_too_big_to_cache; + else + op->status = nfserr_rep_too_big; + } + if (op->status == nfserr_resource || + op->status == nfserr_rep_too_big || + op->status == nfserr_rep_too_big_to_cache) { + /* + * The operation may have already been encoded or + * partially encoded. No op returns anything additional + * in the case of one of these three errors, so we can + * just truncate back to after the status. But it's a + * bug if we had to do this on a non-idempotent op: + */ + warn_on_nonidempotent_op(op); + xdr_truncate_encode(xdr, post_err_offset); + } + if (so) { + int len = xdr->buf->len - post_err_offset; + + so->so_replay.rp_status = op->status; + so->so_replay.rp_buflen = len; + read_bytes_from_xdr_buf(xdr->buf, post_err_offset, + so->so_replay.rp_buf, len); + } status: - /* - * Note: We write the status directly, instead of using WRITE32(), - * since it is already in network byte order. - */ - *statp = op->status; + /* Note that op->status is already in network byte order: */ + write_bytes_to_xdr_buf(xdr->buf, post_err_offset - 4, &op->status, 4); } /* @@ -3588,21 +3941,22 @@ status: * called with nfs4_lock_state() held */ void -nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op) +nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op) { __be32 *p; struct nfs4_replay *rp = op->replay; BUG_ON(!rp); - RESERVE_SPACE(8); - WRITE32(op->opnum); + p = xdr_reserve_space(xdr, 8 + rp->rp_buflen); + if (!p) { + WARN_ON_ONCE(1); + return; + } + *p++ = cpu_to_be32(op->opnum); *p++ = rp->rp_status; /* already xdr'ed */ - ADJUST_ARGS(); - RESERVE_SPACE(rp->rp_buflen); - WRITEMEM(rp->rp_buf, rp->rp_buflen); - ADJUST_ARGS(); + p = xdr_encode_opaque_fixed(p, rp->rp_buf, rp->rp_buflen); } int @@ -3634,6 +3988,12 @@ int nfsd4_release_compoundargs(void *rq, __be32 *p, void *resp) int nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compoundargs *args) { + if (rqstp->rq_arg.head[0].iov_len % 4) { + /* client is nuts */ + dprintk("%s: compound not properly padded! (peeraddr=%pISc xid=0x%x)", + __func__, svc_addr(rqstp), be32_to_cpu(rqstp->rq_xid)); + return 0; + } args->p = p; args->end = rqstp->rq_arg.head[0].iov_base + rqstp->rq_arg.head[0].iov_len; args->pagelist = rqstp->rq_arg.pages; @@ -3653,27 +4013,31 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo * All that remains is to write the tag and operation count... */ struct nfsd4_compound_state *cs = &resp->cstate; - struct kvec *iov; + struct xdr_buf *buf = resp->xdr.buf; + + WARN_ON_ONCE(buf->len != buf->head[0].iov_len + buf->page_len + + buf->tail[0].iov_len); + + rqstp->rq_next_page = resp->xdr.page_ptr + 1; + p = resp->tagp; *p++ = htonl(resp->taglen); memcpy(p, resp->tag, resp->taglen); p += XDR_QUADLEN(resp->taglen); *p++ = htonl(resp->opcnt); - if (rqstp->rq_res.page_len) - iov = &rqstp->rq_res.tail[0]; - else - iov = &rqstp->rq_res.head[0]; - iov->iov_len = ((char*)resp->p) - (char*)iov->iov_base; - BUG_ON(iov->iov_len > PAGE_SIZE); if (nfsd4_has_session(cs)) { + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + struct nfs4_client *clp = cs->session->se_client; if (cs->status != nfserr_replay_cache) { nfsd4_store_cache_entry(resp); cs->slot->sl_flags &= ~NFSD4_SLOT_INUSE; } /* Renew the clientid on success and on replay */ - release_session_client(cs->session); + spin_lock(&nn->client_lock); nfsd4_put_session(cs->session); + spin_unlock(&nn->client_lock); + put_client_renew(clp); } return 1; } diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 2cbac34a55d..6040da8830f 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -9,34 +9,66 @@ */ #include <linux/slab.h> +#include <linux/sunrpc/addr.h> +#include <linux/highmem.h> +#include <linux/log2.h> +#include <linux/hash.h> +#include <net/checksum.h> #include "nfsd.h" #include "cache.h" -/* Size of reply cache. Common values are: - * 4.3BSD: 128 - * 4.4BSD: 256 - * Solaris2: 1024 - * DEC Unix: 512-4096 +#define NFSDDBG_FACILITY NFSDDBG_REPCACHE + +/* + * We use this value to determine the number of hash buckets from the max + * cache size, the idea being that when the cache is at its maximum number + * of entries, then this should be the average number of entries per bucket. */ -#define CACHESIZE 1024 -#define HASHSIZE 64 +#define TARGET_BUCKET_SIZE 64 static struct hlist_head * cache_hash; static struct list_head lru_head; -static int cache_disabled = 1; +static struct kmem_cache *drc_slab; + +/* max number of entries allowed in the cache */ +static unsigned int max_drc_entries; + +/* number of significant bits in the hash value */ +static unsigned int maskbits; /* - * Calculate the hash index from an XID. + * Stats and other tracking of on the duplicate reply cache. All of these and + * the "rc" fields in nfsdstats are protected by the cache_lock */ -static inline u32 request_hash(u32 xid) -{ - u32 h = xid; - h ^= (xid >> 24); - return h & (HASHSIZE-1); -} + +/* total number of entries */ +static unsigned int num_drc_entries; + +/* cache misses due only to checksum comparison failures */ +static unsigned int payload_misses; + +/* amount of memory (in bytes) currently consumed by the DRC */ +static unsigned int drc_mem_usage; + +/* longest hash chain seen */ +static unsigned int longest_chain; + +/* size of cache when we saw the longest hash chain */ +static unsigned int longest_chain_cachesize; static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec); +static void cache_cleaner_func(struct work_struct *unused); +static unsigned long nfsd_reply_cache_count(struct shrinker *shrink, + struct shrink_control *sc); +static unsigned long nfsd_reply_cache_scan(struct shrinker *shrink, + struct shrink_control *sc); + +static struct shrinker nfsd_reply_cache_shrinker = { + .scan_objects = nfsd_reply_cache_scan, + .count_objects = nfsd_reply_cache_count, + .seeks = 1, +}; /* * locking for the reply cache: @@ -44,30 +76,104 @@ static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec); * Otherwise, it when accessing _prev or _next, the lock must be held. */ static DEFINE_SPINLOCK(cache_lock); +static DECLARE_DELAYED_WORK(cache_cleaner, cache_cleaner_func); -int nfsd_reply_cache_init(void) +/* + * Put a cap on the size of the DRC based on the amount of available + * low memory in the machine. + * + * 64MB: 8192 + * 128MB: 11585 + * 256MB: 16384 + * 512MB: 23170 + * 1GB: 32768 + * 2GB: 46340 + * 4GB: 65536 + * 8GB: 92681 + * 16GB: 131072 + * + * ...with a hard cap of 256k entries. In the worst case, each entry will be + * ~1k, so the above numbers should give a rough max of the amount of memory + * used in k. + */ +static unsigned int +nfsd_cache_size_limit(void) +{ + unsigned int limit; + unsigned long low_pages = totalram_pages - totalhigh_pages; + + limit = (16 * int_sqrt(low_pages)) << (PAGE_SHIFT-10); + return min_t(unsigned int, limit, 256*1024); +} + +/* + * Compute the number of hash buckets we need. Divide the max cachesize by + * the "target" max bucket size, and round up to next power of two. + */ +static unsigned int +nfsd_hashsize(unsigned int limit) +{ + return roundup_pow_of_two(limit / TARGET_BUCKET_SIZE); +} + +static struct svc_cacherep * +nfsd_reply_cache_alloc(void) { struct svc_cacherep *rp; - int i; - INIT_LIST_HEAD(&lru_head); - i = CACHESIZE; - while (i) { - rp = kmalloc(sizeof(*rp), GFP_KERNEL); - if (!rp) - goto out_nomem; - list_add(&rp->c_lru, &lru_head); + rp = kmem_cache_alloc(drc_slab, GFP_KERNEL); + if (rp) { rp->c_state = RC_UNUSED; rp->c_type = RC_NOCACHE; + INIT_LIST_HEAD(&rp->c_lru); INIT_HLIST_NODE(&rp->c_hash); - i--; } + return rp; +} + +static void +nfsd_reply_cache_free_locked(struct svc_cacherep *rp) +{ + if (rp->c_type == RC_REPLBUFF && rp->c_replvec.iov_base) { + drc_mem_usage -= rp->c_replvec.iov_len; + kfree(rp->c_replvec.iov_base); + } + if (!hlist_unhashed(&rp->c_hash)) + hlist_del(&rp->c_hash); + list_del(&rp->c_lru); + --num_drc_entries; + drc_mem_usage -= sizeof(*rp); + kmem_cache_free(drc_slab, rp); +} - cache_hash = kcalloc (HASHSIZE, sizeof(struct hlist_head), GFP_KERNEL); +static void +nfsd_reply_cache_free(struct svc_cacherep *rp) +{ + spin_lock(&cache_lock); + nfsd_reply_cache_free_locked(rp); + spin_unlock(&cache_lock); +} + +int nfsd_reply_cache_init(void) +{ + unsigned int hashsize; + + INIT_LIST_HEAD(&lru_head); + max_drc_entries = nfsd_cache_size_limit(); + num_drc_entries = 0; + hashsize = nfsd_hashsize(max_drc_entries); + maskbits = ilog2(hashsize); + + register_shrinker(&nfsd_reply_cache_shrinker); + drc_slab = kmem_cache_create("nfsd_drc", sizeof(struct svc_cacherep), + 0, 0, NULL); + if (!drc_slab) + goto out_nomem; + + cache_hash = kcalloc(hashsize, sizeof(struct hlist_head), GFP_KERNEL); if (!cache_hash) goto out_nomem; - cache_disabled = 0; return 0; out_nomem: printk(KERN_ERR "nfsd: failed to allocate reply cache\n"); @@ -79,27 +185,33 @@ void nfsd_reply_cache_shutdown(void) { struct svc_cacherep *rp; + unregister_shrinker(&nfsd_reply_cache_shrinker); + cancel_delayed_work_sync(&cache_cleaner); + while (!list_empty(&lru_head)) { rp = list_entry(lru_head.next, struct svc_cacherep, c_lru); - if (rp->c_state == RC_DONE && rp->c_type == RC_REPLBUFF) - kfree(rp->c_replvec.iov_base); - list_del(&rp->c_lru); - kfree(rp); + nfsd_reply_cache_free_locked(rp); } - cache_disabled = 1; - kfree (cache_hash); cache_hash = NULL; + + if (drc_slab) { + kmem_cache_destroy(drc_slab); + drc_slab = NULL; + } } /* - * Move cache entry to end of LRU list + * Move cache entry to end of LRU list, and queue the cleaner to run if it's + * not already scheduled. */ static void lru_put_end(struct svc_cacherep *rp) { + rp->c_timestamp = jiffies; list_move_tail(&rp->c_lru, &lru_head); + schedule_delayed_work(&cache_cleaner, RC_EXPIRE); } /* @@ -109,89 +221,236 @@ static void hash_refile(struct svc_cacherep *rp) { hlist_del_init(&rp->c_hash); - hlist_add_head(&rp->c_hash, cache_hash + request_hash(rp->c_xid)); + hlist_add_head(&rp->c_hash, cache_hash + hash_32(rp->c_xid, maskbits)); +} + +/* + * Walk the LRU list and prune off entries that are older than RC_EXPIRE. + * Also prune the oldest ones when the total exceeds the max number of entries. + */ +static long +prune_cache_entries(void) +{ + struct svc_cacherep *rp, *tmp; + long freed = 0; + + list_for_each_entry_safe(rp, tmp, &lru_head, c_lru) { + /* + * Don't free entries attached to calls that are still + * in-progress, but do keep scanning the list. + */ + if (rp->c_state == RC_INPROG) + continue; + if (num_drc_entries <= max_drc_entries && + time_before(jiffies, rp->c_timestamp + RC_EXPIRE)) + break; + nfsd_reply_cache_free_locked(rp); + freed++; + } + + /* + * Conditionally rearm the job. If we cleaned out the list, then + * cancel any pending run (since there won't be any work to do). + * Otherwise, we rearm the job or modify the existing one to run in + * RC_EXPIRE since we just ran the pruner. + */ + if (list_empty(&lru_head)) + cancel_delayed_work(&cache_cleaner); + else + mod_delayed_work(system_wq, &cache_cleaner, RC_EXPIRE); + return freed; +} + +static void +cache_cleaner_func(struct work_struct *unused) +{ + spin_lock(&cache_lock); + prune_cache_entries(); + spin_unlock(&cache_lock); +} + +static unsigned long +nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc) +{ + unsigned long num; + + spin_lock(&cache_lock); + num = num_drc_entries; + spin_unlock(&cache_lock); + + return num; +} + +static unsigned long +nfsd_reply_cache_scan(struct shrinker *shrink, struct shrink_control *sc) +{ + unsigned long freed; + + spin_lock(&cache_lock); + freed = prune_cache_entries(); + spin_unlock(&cache_lock); + return freed; +} +/* + * Walk an xdr_buf and get a CRC for at most the first RC_CSUMLEN bytes + */ +static __wsum +nfsd_cache_csum(struct svc_rqst *rqstp) +{ + int idx; + unsigned int base; + __wsum csum; + struct xdr_buf *buf = &rqstp->rq_arg; + const unsigned char *p = buf->head[0].iov_base; + size_t csum_len = min_t(size_t, buf->head[0].iov_len + buf->page_len, + RC_CSUMLEN); + size_t len = min(buf->head[0].iov_len, csum_len); + + /* rq_arg.head first */ + csum = csum_partial(p, len, 0); + csum_len -= len; + + /* Continue into page array */ + idx = buf->page_base / PAGE_SIZE; + base = buf->page_base & ~PAGE_MASK; + while (csum_len) { + p = page_address(buf->pages[idx]) + base; + len = min_t(size_t, PAGE_SIZE - base, csum_len); + csum = csum_partial(p, len, csum); + csum_len -= len; + base = 0; + ++idx; + } + return csum; +} + +static bool +nfsd_cache_match(struct svc_rqst *rqstp, __wsum csum, struct svc_cacherep *rp) +{ + /* Check RPC header info first */ + if (rqstp->rq_xid != rp->c_xid || rqstp->rq_proc != rp->c_proc || + rqstp->rq_prot != rp->c_prot || rqstp->rq_vers != rp->c_vers || + rqstp->rq_arg.len != rp->c_len || + !rpc_cmp_addr(svc_addr(rqstp), (struct sockaddr *)&rp->c_addr) || + rpc_get_port(svc_addr(rqstp)) != rpc_get_port((struct sockaddr *)&rp->c_addr)) + return false; + + /* compare checksum of NFS data */ + if (csum != rp->c_csum) { + ++payload_misses; + return false; + } + + return true; +} + +/* + * Search the request hash for an entry that matches the given rqstp. + * Must be called with cache_lock held. Returns the found entry or + * NULL on failure. + */ +static struct svc_cacherep * +nfsd_cache_search(struct svc_rqst *rqstp, __wsum csum) +{ + struct svc_cacherep *rp, *ret = NULL; + struct hlist_head *rh; + unsigned int entries = 0; + + rh = &cache_hash[hash_32(rqstp->rq_xid, maskbits)]; + hlist_for_each_entry(rp, rh, c_hash) { + ++entries; + if (nfsd_cache_match(rqstp, csum, rp)) { + ret = rp; + break; + } + } + + /* tally hash chain length stats */ + if (entries > longest_chain) { + longest_chain = entries; + longest_chain_cachesize = num_drc_entries; + } else if (entries == longest_chain) { + /* prefer to keep the smallest cachesize possible here */ + longest_chain_cachesize = min(longest_chain_cachesize, + num_drc_entries); + } + + return ret; } /* * Try to find an entry matching the current call in the cache. When none - * is found, we grab the oldest unlocked entry off the LRU list. - * Note that no operation within the loop may sleep. + * is found, we try to grab the oldest expired entry off the LRU list. If + * a suitable one isn't there, then drop the cache_lock and allocate a + * new one, then search again in case one got inserted while this thread + * didn't hold the lock. */ int nfsd_cache_lookup(struct svc_rqst *rqstp) { - struct hlist_node *hn; - struct hlist_head *rh; - struct svc_cacherep *rp; + struct svc_cacherep *rp, *found; __be32 xid = rqstp->rq_xid; u32 proto = rqstp->rq_prot, vers = rqstp->rq_vers, proc = rqstp->rq_proc; + __wsum csum; unsigned long age; int type = rqstp->rq_cachetype; - int rtn; + int rtn = RC_DOIT; rqstp->rq_cacherep = NULL; - if (cache_disabled || type == RC_NOCACHE) { + if (type == RC_NOCACHE) { nfsdstats.rcnocache++; - return RC_DOIT; + return rtn; } - spin_lock(&cache_lock); - rtn = RC_DOIT; + csum = nfsd_cache_csum(rqstp); - rh = &cache_hash[request_hash(xid)]; - hlist_for_each_entry(rp, hn, rh, c_hash) { - if (rp->c_state != RC_UNUSED && - xid == rp->c_xid && proc == rp->c_proc && - proto == rp->c_prot && vers == rp->c_vers && - time_before(jiffies, rp->c_timestamp + 120*HZ) && - memcmp((char*)&rqstp->rq_addr, (char*)&rp->c_addr, sizeof(rp->c_addr))==0) { - nfsdstats.rchits++; - goto found_entry; - } + /* + * Since the common case is a cache miss followed by an insert, + * preallocate an entry. + */ + rp = nfsd_reply_cache_alloc(); + spin_lock(&cache_lock); + if (likely(rp)) { + ++num_drc_entries; + drc_mem_usage += sizeof(*rp); } - nfsdstats.rcmisses++; - /* This loop shouldn't take more than a few iterations normally */ - { - int safe = 0; - list_for_each_entry(rp, &lru_head, c_lru) { - if (rp->c_state != RC_INPROG) - break; - if (safe++ > CACHESIZE) { - printk("nfsd: loop in repcache LRU list\n"); - cache_disabled = 1; - goto out; - } - } - } + /* go ahead and prune the cache */ + prune_cache_entries(); - /* All entries on the LRU are in-progress. This should not happen */ - if (&rp->c_lru == &lru_head) { - static int complaints; + found = nfsd_cache_search(rqstp, csum); + if (found) { + if (likely(rp)) + nfsd_reply_cache_free_locked(rp); + rp = found; + goto found_entry; + } - printk(KERN_WARNING "nfsd: all repcache entries locked!\n"); - if (++complaints > 5) { - printk(KERN_WARNING "nfsd: disabling repcache.\n"); - cache_disabled = 1; - } + if (!rp) { + dprintk("nfsd: unable to allocate DRC entry!\n"); goto out; } + nfsdstats.rcmisses++; rqstp->rq_cacherep = rp; rp->c_state = RC_INPROG; rp->c_xid = xid; rp->c_proc = proc; - memcpy(&rp->c_addr, svc_addr_in(rqstp), sizeof(rp->c_addr)); + rpc_copy_addr((struct sockaddr *)&rp->c_addr, svc_addr(rqstp)); + rpc_set_port((struct sockaddr *)&rp->c_addr, rpc_get_port(svc_addr(rqstp))); rp->c_prot = proto; rp->c_vers = vers; - rp->c_timestamp = jiffies; + rp->c_len = rqstp->rq_arg.len; + rp->c_csum = csum; hash_refile(rp); + lru_put_end(rp); /* release any buffer */ if (rp->c_type == RC_REPLBUFF) { + drc_mem_usage -= rp->c_replvec.iov_len; kfree(rp->c_replvec.iov_base); rp->c_replvec.iov_base = NULL; } @@ -201,9 +460,9 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) return rtn; found_entry: + nfsdstats.rchits++; /* We found a matching entry which is either in progress or done. */ age = jiffies - rp->c_timestamp; - rp->c_timestamp = jiffies; lru_put_end(rp); rtn = RC_DROPIT; @@ -232,7 +491,7 @@ found_entry: break; default: printk(KERN_WARNING "nfsd: bad repcache type %d\n", rp->c_type); - rp->c_state = RC_UNUSED; + nfsd_reply_cache_free_locked(rp); } goto out; @@ -257,11 +516,12 @@ found_entry: void nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) { - struct svc_cacherep *rp; + struct svc_cacherep *rp = rqstp->rq_cacherep; struct kvec *resv = &rqstp->rq_res.head[0], *cachv; int len; + size_t bufsize = 0; - if (!(rp = rqstp->rq_cacherep) || cache_disabled) + if (!rp) return; len = resv->iov_len - ((char*)statp - (char*)resv->iov_base); @@ -269,7 +529,7 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) /* Don't cache excessive amounts of data and XDR failures */ if (!statp || len > (256 >> 2)) { - rp->c_state = RC_UNUSED; + nfsd_reply_cache_free(rp); return; } @@ -281,23 +541,25 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) break; case RC_REPLBUFF: cachv = &rp->c_replvec; - cachv->iov_base = kmalloc(len << 2, GFP_KERNEL); + bufsize = len << 2; + cachv->iov_base = kmalloc(bufsize, GFP_KERNEL); if (!cachv->iov_base) { - spin_lock(&cache_lock); - rp->c_state = RC_UNUSED; - spin_unlock(&cache_lock); + nfsd_reply_cache_free(rp); return; } - cachv->iov_len = len << 2; - memcpy(cachv->iov_base, statp, len << 2); + cachv->iov_len = bufsize; + memcpy(cachv->iov_base, statp, bufsize); break; + case RC_NOCACHE: + nfsd_reply_cache_free(rp); + return; } spin_lock(&cache_lock); + drc_mem_usage += bufsize; lru_put_end(rp); rp->c_secure = rqstp->rq_secure; rp->c_type = cachetype; rp->c_state = RC_DONE; - rp->c_timestamp = jiffies; spin_unlock(&cache_lock); return; } @@ -321,3 +583,30 @@ nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *data) vec->iov_len += data->iov_len; return 1; } + +/* + * Note that fields may be added, removed or reordered in the future. Programs + * scraping this file for info should test the labels to ensure they're + * getting the correct field. + */ +static int nfsd_reply_cache_stats_show(struct seq_file *m, void *v) +{ + spin_lock(&cache_lock); + seq_printf(m, "max entries: %u\n", max_drc_entries); + seq_printf(m, "num entries: %u\n", num_drc_entries); + seq_printf(m, "hash buckets: %u\n", 1 << maskbits); + seq_printf(m, "mem usage: %u\n", drc_mem_usage); + seq_printf(m, "cache hits: %u\n", nfsdstats.rchits); + seq_printf(m, "cache misses: %u\n", nfsdstats.rcmisses); + seq_printf(m, "not cached: %u\n", nfsdstats.rcnocache); + seq_printf(m, "payload misses: %u\n", payload_misses); + seq_printf(m, "longest chain len: %u\n", longest_chain); + seq_printf(m, "cachesize at longest: %u\n", longest_chain_cachesize); + spin_unlock(&cache_lock); + return 0; +} + +int nfsd_reply_cache_stats_open(struct inode *inode, struct file *file) +{ + return single_open(file, nfsd_reply_cache_stats_show, NULL); +} diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 74934284d9a..51844048937 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -10,7 +10,7 @@ #include <linux/sunrpc/svcsock.h> #include <linux/lockd/lockd.h> -#include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/addr.h> #include <linux/sunrpc/gss_api.h> #include <linux/sunrpc/gss_krb5_enctypes.h> #include <linux/sunrpc/rpc_pipe_fs.h> @@ -35,6 +35,7 @@ enum { NFSD_Threads, NFSD_Pool_Threads, NFSD_Pool_Stats, + NFSD_Reply_Cache_Stats, NFSD_Versions, NFSD_Ports, NFSD_MaxBlkSize, @@ -85,7 +86,7 @@ static ssize_t (*write_op[])(struct file *, char *, size_t) = { static ssize_t nfsctl_transaction_write(struct file *file, const char __user *buf, size_t size, loff_t *pos) { - ino_t ino = file->f_path.dentry->d_inode->i_ino; + ino_t ino = file_inode(file)->i_ino; char *data; ssize_t rv; @@ -125,11 +126,11 @@ static const struct file_operations transaction_ops = { .llseek = default_llseek, }; -static int exports_open(struct inode *inode, struct file *file) +static int exports_net_open(struct net *net, struct file *file) { int err; struct seq_file *seq; - struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); + struct nfsd_net *nn = net_generic(net, nfsd_net_id); err = seq_open(file, &nfs_exports_op); if (err) @@ -140,8 +141,26 @@ static int exports_open(struct inode *inode, struct file *file) return 0; } -static const struct file_operations exports_operations = { - .open = exports_open, +static int exports_proc_open(struct inode *inode, struct file *file) +{ + return exports_net_open(current->nsproxy->net_ns, file); +} + +static const struct file_operations exports_proc_operations = { + .open = exports_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, + .owner = THIS_MODULE, +}; + +static int exports_nfsd_open(struct inode *inode, struct file *file) +{ + return exports_net_open(inode->i_sb->s_fs_info, file); +} + +static const struct file_operations exports_nfsd_operations = { + .open = exports_nfsd_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release, @@ -159,7 +178,7 @@ static int export_features_open(struct inode *inode, struct file *file) return single_open(file, export_features_show, NULL); } -static struct file_operations export_features_operations = { +static const struct file_operations export_features_operations = { .open = export_features_open, .read = seq_read, .llseek = seq_lseek, @@ -178,7 +197,7 @@ static int supported_enctypes_open(struct inode *inode, struct file *file) return single_open(file, supported_enctypes_show, NULL); } -static struct file_operations supported_enctypes_ops = { +static const struct file_operations supported_enctypes_ops = { .open = supported_enctypes_open, .read = seq_read, .llseek = seq_lseek, @@ -194,6 +213,13 @@ static const struct file_operations pool_stats_operations = { .owner = THIS_MODULE, }; +static struct file_operations reply_cache_stats_operations = { + .open = nfsd_reply_cache_stats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + /*----------------------------------------------------------------------------*/ /* * payload - write methods @@ -220,6 +246,7 @@ static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size) struct sockaddr *sap = (struct sockaddr *)&address; size_t salen = sizeof(address); char *fo_path; + struct net *net = file->f_dentry->d_sb->s_fs_info; /* sanity check */ if (size == 0) @@ -232,7 +259,7 @@ static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size) if (qword_get(&buf, fo_path, size) < 0) return -EINVAL; - if (rpc_pton(&init_net, fo_path, size, sap, salen) == 0) + if (rpc_pton(net, fo_path, size, sap, salen) == 0) return -EINVAL; return nlmsvc_unlock_all_by_ip(sap); @@ -317,6 +344,7 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size) int len; struct auth_domain *dom; struct knfsd_fh fh; + struct net *net = file->f_dentry->d_sb->s_fs_info; if (size == 0) return -EINVAL; @@ -352,7 +380,7 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size) if (!dom) return -ENOMEM; - len = exp_rootfh(&init_net, dom, path, &fh, maxsize); + len = exp_rootfh(net, dom, path, &fh, maxsize); auth_domain_put(dom); if (len) return len; @@ -396,7 +424,7 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size) { char *mesg = buf; int rv; - struct net *net = &init_net; + struct net *net = file->f_dentry->d_sb->s_fs_info; if (size > 0) { int newthreads; @@ -447,7 +475,7 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size) int len; int npools; int *nthreads; - struct net *net = &init_net; + struct net *net = file->f_dentry->d_sb->s_fs_info; mutex_lock(&nfsd_mutex); npools = nfsd_nrpools(net); @@ -510,7 +538,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) unsigned minor; ssize_t tlen = 0; char *sep; - struct net *net = &init_net; + struct net *net = file->f_dentry->d_sb->s_fs_info; struct nfsd_net *nn = net_generic(net, nfsd_net_id); if (size>0) { @@ -534,7 +562,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) else num = simple_strtol(vers, &minorp, 0); if (*minorp == '.') { - if (num < 4) + if (num != 4) return -EINVAL; minor = simple_strtoul(minorp+1, NULL, 0); if (minor == 0) @@ -671,6 +699,11 @@ static ssize_t __write_ports_addfd(char *buf, struct net *net) if (err != 0 || fd < 0) return -EINVAL; + if (svc_alien_sock(net, fd)) { + printk(KERN_ERR "%s: socket net is different to NFSd's one\n", __func__); + return -EINVAL; + } + err = nfsd_create_serv(net); if (err != 0) return err; @@ -792,7 +825,7 @@ static ssize_t __write_ports(struct file *file, char *buf, size_t size, static ssize_t write_ports(struct file *file, char *buf, size_t size) { ssize_t rv; - struct net *net = &init_net; + struct net *net = file->f_dentry->d_sb->s_fs_info; mutex_lock(&nfsd_mutex); rv = __write_ports(file, buf, size, net); @@ -827,7 +860,7 @@ int nfsd_max_blksize; static ssize_t write_maxblksize(struct file *file, char *buf, size_t size) { char *mesg = buf; - struct net *net = &init_net; + struct net *net = file->f_dentry->d_sb->s_fs_info; struct nfsd_net *nn = net_generic(net, nfsd_net_id); if (size > 0) { @@ -923,7 +956,8 @@ static ssize_t nfsd4_write_time(struct file *file, char *buf, size_t size, */ static ssize_t write_leasetime(struct file *file, char *buf, size_t size) { - struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); + struct net *net = file->f_dentry->d_sb->s_fs_info; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); return nfsd4_write_time(file, buf, size, &nn->nfsd4_lease, nn); } @@ -939,7 +973,8 @@ static ssize_t write_leasetime(struct file *file, char *buf, size_t size) */ static ssize_t write_gracetime(struct file *file, char *buf, size_t size) { - struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); + struct net *net = file->f_dentry->d_sb->s_fs_info; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); return nfsd4_write_time(file, buf, size, &nn->nfsd4_grace, nn); } @@ -995,7 +1030,8 @@ static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size, static ssize_t write_recoverydir(struct file *file, char *buf, size_t size) { ssize_t rv; - struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); + struct net *net = file->f_dentry->d_sb->s_fs_info; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); mutex_lock(&nfsd_mutex); rv = __write_recoverydir(file, buf, size, nn); @@ -1013,7 +1049,7 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size) static int nfsd_fill_super(struct super_block * sb, void * data, int silent) { static struct tree_descr nfsd_files[] = { - [NFSD_List] = {"exports", &exports_operations, S_IRUGO}, + [NFSD_List] = {"exports", &exports_nfsd_operations, S_IRUGO}, [NFSD_Export_features] = {"export_features", &export_features_operations, S_IRUGO}, [NFSD_FO_UnlockIP] = {"unlock_ip", @@ -1024,6 +1060,7 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent) [NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Pool_Threads] = {"pool_threads", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Pool_Stats] = {"pool_stats", &pool_stats_operations, S_IRUGO}, + [NFSD_Reply_Cache_Stats] = {"reply_cache_stats", &reply_cache_stats_operations, S_IRUGO}, [NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO}, [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO}, @@ -1037,21 +1074,37 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent) #endif /* last one */ {""} }; - return simple_fill_super(sb, 0x6e667364, nfsd_files); + struct net *net = data; + int ret; + + ret = simple_fill_super(sb, 0x6e667364, nfsd_files); + if (ret) + return ret; + sb->s_fs_info = get_net(net); + return 0; } static struct dentry *nfsd_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - return mount_single(fs_type, flags, data, nfsd_fill_super); + return mount_ns(fs_type, flags, current->nsproxy->net_ns, nfsd_fill_super); +} + +static void nfsd_umount(struct super_block *sb) +{ + struct net *net = sb->s_fs_info; + + kill_litter_super(sb); + put_net(net); } static struct file_system_type nfsd_fs_type = { .owner = THIS_MODULE, .name = "nfsd", .mount = nfsd_mount, - .kill_sb = kill_litter_super, + .kill_sb = nfsd_umount, }; +MODULE_ALIAS_FS("nfsd"); #ifdef CONFIG_PROC_FS static int create_proc_exports_entry(void) @@ -1061,9 +1114,12 @@ static int create_proc_exports_entry(void) entry = proc_mkdir("fs/nfs", NULL); if (!entry) return -ENOMEM; - entry = proc_create("exports", 0, entry, &exports_operations); - if (!entry) + entry = proc_create("exports", 0, entry, + &exports_proc_operations); + if (!entry) { + remove_proc_entry("fs/nfs", NULL); return -ENOMEM; + } return 0; } #else /* CONFIG_PROC_FS */ @@ -1123,7 +1179,6 @@ static int __init init_nfsd(void) retval = nfsd4_init_slabs(); if (retval) goto out_unregister_pernet; - nfs4_state_init(); retval = nfsd_fault_inject_init(); /* nfsd fault injection controls */ if (retval) goto out_free_slabs; diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index de23db255c6..847daf37e56 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -15,16 +15,25 @@ #include <linux/nfs2.h> #include <linux/nfs3.h> #include <linux/nfs4.h> +#include <linux/sunrpc/svc.h> #include <linux/sunrpc/msg_prot.h> -#include <linux/nfsd/debug.h> -#include <linux/nfsd/export.h> -#include <linux/nfsd/stats.h> +#include <uapi/linux/nfsd/debug.h> + +#include "stats.h" +#include "export.h" + +#undef ifdebug +#ifdef NFSD_DEBUG +# define ifdebug(flag) if (nfsd_debug & NFSDDBG_##flag) +#else +# define ifdebug(flag) if (0) +#endif /* * nfsd version */ -#define NFSD_SUPPORTED_MINOR_VERSION 1 +#define NFSD_SUPPORTED_MINOR_VERSION 2 /* * Maximum blocksizes supported by daemon under various circumstances. */ @@ -53,11 +62,10 @@ struct readdir_cd { extern struct svc_program nfsd_program; extern struct svc_version nfsd_version2, nfsd_version3, nfsd_version4; -extern u32 nfsd_supported_minorversion; extern struct mutex nfsd_mutex; extern spinlock_t nfsd_drc_lock; -extern unsigned int nfsd_drc_max_mem; -extern unsigned int nfsd_drc_mem_used; +extern unsigned long nfsd_drc_max_mem; +extern unsigned long nfsd_drc_mem_used; extern const struct seq_operations nfs_exports_op; @@ -106,8 +114,7 @@ static inline int nfsd_v4client(struct svc_rqst *rq) * NFSv4 State */ #ifdef CONFIG_NFSD_V4 -extern unsigned int max_delegations; -void nfs4_state_init(void); +extern unsigned long max_delegations; int nfsd4_init_slabs(void); void nfsd4_free_slabs(void); int nfs4_state_start(void); @@ -118,7 +125,6 @@ void nfs4_reset_lease(time_t leasetime); int nfs4_reset_recoverydir(char *recdir); char * nfs4_recoverydir(void); #else -static inline void nfs4_state_init(void) { } static inline int nfsd4_init_slabs(void) { return 0; } static inline void nfsd4_free_slabs(void) { } static inline int nfs4_state_start(void) { return 0; } @@ -243,6 +249,12 @@ void nfsd_lockd_shutdown(void); #define nfserr_reject_deleg cpu_to_be32(NFS4ERR_REJECT_DELEG) #define nfserr_returnconflict cpu_to_be32(NFS4ERR_RETURNCONFLICT) #define nfserr_deleg_revoked cpu_to_be32(NFS4ERR_DELEG_REVOKED) +#define nfserr_partner_notsupp cpu_to_be32(NFS4ERR_PARTNER_NOTSUPP) +#define nfserr_partner_no_auth cpu_to_be32(NFS4ERR_PARTNER_NO_AUTH) +#define nfserr_metadata_notsupp cpu_to_be32(NFS4ERR_METADATA_NOTSUPP) +#define nfserr_offload_denied cpu_to_be32(NFS4ERR_OFFLOAD_DENIED) +#define nfserr_wrong_lfs cpu_to_be32(NFS4ERR_WRONG_LFS) +#define nfserr_badlabel cpu_to_be32(NFS4ERR_BADLABEL) /* error codes for internal use */ /* if a request fails due to kmalloc failure, it gets dropped. @@ -277,7 +289,7 @@ void nfsd_lockd_shutdown(void); * reason. */ #define COMPOUND_SLACK_SPACE 140 /* OP_GETFH */ -#define COMPOUND_ERR_SLACK_SPACE 12 /* OP_SETATTR */ +#define COMPOUND_ERR_SLACK_SPACE 16 /* OP_SETATTR */ #define NFSD_LAUNDROMAT_MINTIMEOUT 1 /* seconds */ @@ -322,6 +334,13 @@ void nfsd_lockd_shutdown(void); #define NFSD4_1_SUPPORTED_ATTRS_WORD2 \ (NFSD4_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SUPPATTR_EXCLCREAT) +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL +#define NFSD4_2_SUPPORTED_ATTRS_WORD2 \ + (NFSD4_1_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SECURITY_LABEL) +#else +#define NFSD4_2_SUPPORTED_ATTRS_WORD2 0 +#endif + static inline u32 nfsd_suppattrs0(u32 minorversion) { return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD0 @@ -336,8 +355,11 @@ static inline u32 nfsd_suppattrs1(u32 minorversion) static inline u32 nfsd_suppattrs2(u32 minorversion) { - return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD2 - : NFSD4_SUPPORTED_ATTRS_WORD2; + switch (minorversion) { + default: return NFSD4_2_SUPPORTED_ATTRS_WORD2; + case 1: return NFSD4_1_SUPPORTED_ATTRS_WORD2; + case 0: return NFSD4_SUPPORTED_ATTRS_WORD2; + } } /* These will return ERR_INVAL if specified in GETATTR or READDIR. */ @@ -350,7 +372,11 @@ static inline u32 nfsd_suppattrs2(u32 minorversion) #define NFSD_WRITEABLE_ATTRS_WORD1 \ (FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP \ | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET) +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL +#define NFSD_WRITEABLE_ATTRS_WORD2 FATTR4_WORD2_SECURITY_LABEL +#else #define NFSD_WRITEABLE_ATTRS_WORD2 0 +#endif #define NFSD_SUPPATTR_EXCLCREAT_WORD0 \ NFSD_WRITEABLE_ATTRS_WORD0 diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 814afaa4458..ec839341815 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -47,7 +47,7 @@ static int nfsd_acceptable(void *expv, struct dentry *dentry) tdentry = parent; } if (tdentry != exp->ex_path.dentry) - dprintk("nfsd_acceptable failed at %p %s\n", tdentry, tdentry->d_name.name); + dprintk("nfsd_acceptable failed at %p %pd\n", tdentry, tdentry); rv = (tdentry == exp->ex_path.dentry); dput(tdentry); return rv; @@ -88,9 +88,8 @@ static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp, /* Check if the request originated from a secure port. */ if (!rqstp->rq_secure && !(flags & NFSEXP_INSECURE_PORT)) { RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); - dprintk(KERN_WARNING - "nfsd: request from insecure port %s!\n", - svc_print_addr(rqstp, buf, sizeof(buf))); + dprintk("nfsd: request from insecure port %s!\n", + svc_print_addr(rqstp, buf, sizeof(buf))); return nfserr_perm; } @@ -169,8 +168,8 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) data_left -= len; if (data_left < 0) return error; - exp = rqst_exp_find(rqstp, fh->fh_fsid_type, fh->fh_auth); - fid = (struct fid *)(fh->fh_auth + len); + exp = rqst_exp_find(rqstp, fh->fh_fsid_type, fh->fh_fsid); + fid = (struct fid *)(fh->fh_fsid + len); } else { __u32 tfh[2]; dev_t xdev; @@ -253,8 +252,8 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) if (S_ISDIR(dentry->d_inode->i_mode) && (dentry->d_flags & DCACHE_DISCONNECTED)) { - printk("nfsd: find_fh_dentry returned a DISCONNECTED directory: %s/%s\n", - dentry->d_parent->d_name.name, dentry->d_name.name); + printk("nfsd: find_fh_dentry returned a DISCONNECTED directory: %pd2\n", + dentry); } fhp->fh_dentry = dentry; @@ -361,10 +360,9 @@ skip_pseudoflavor_check: error = nfsd_permission(rqstp, exp, dentry, access); if (error) { - dprintk("fh_verify: %s/%s permission failure, " + dprintk("fh_verify: %pd2 permission failure, " "acc=%x, error=%d\n", - dentry->d_parent->d_name.name, - dentry->d_name.name, + dentry, access, ntohl(error)); } out: @@ -386,7 +384,7 @@ static void _fh_update(struct svc_fh *fhp, struct svc_export *exp, { if (dentry != exp->ex_path.dentry) { struct fid *fid = (struct fid *) - (fhp->fh_handle.fh_auth + fhp->fh_handle.fh_size/4 - 1); + (fhp->fh_handle.fh_fsid + fhp->fh_handle.fh_size/4 - 1); int maxsize = (fhp->fh_maxsize - fhp->fh_handle.fh_size)/4; int subtreecheck = !(exp->ex_flags & NFSEXP_NOSUBTREECHECK); @@ -514,14 +512,12 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, */ struct inode * inode = dentry->d_inode; - struct dentry *parent = dentry->d_parent; - __u32 *datap; dev_t ex_dev = exp_sb(exp)->s_dev; - dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %s/%s, ino=%ld)\n", + dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %pd2, ino=%ld)\n", MAJOR(ex_dev), MINOR(ex_dev), (long) exp->ex_path.dentry->d_inode->i_ino, - parent->d_name.name, dentry->d_name.name, + dentry, (inode ? inode->i_ino : 0)); /* Choose filehandle version and fsid type based on @@ -534,13 +530,13 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, fh_put(ref_fh); if (fhp->fh_locked || fhp->fh_dentry) { - printk(KERN_ERR "fh_compose: fh %s/%s not initialized!\n", - parent->d_name.name, dentry->d_name.name); + printk(KERN_ERR "fh_compose: fh %pd2 not initialized!\n", + dentry); } if (fhp->fh_maxsize < NFS_FHSIZE) - printk(KERN_ERR "fh_compose: called with maxsize %d! %s/%s\n", + printk(KERN_ERR "fh_compose: called with maxsize %d! %pd2\n", fhp->fh_maxsize, - parent->d_name.name, dentry->d_name.name); + dentry); fhp->fh_dentry = dget(dentry); /* our internal copy */ fhp->fh_export = exp; @@ -559,17 +555,16 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, if (inode) _fh_update_old(dentry, exp, &fhp->fh_handle); } else { - int len; + fhp->fh_handle.fh_size = + key_len(fhp->fh_handle.fh_fsid_type) + 4; fhp->fh_handle.fh_auth_type = 0; - datap = fhp->fh_handle.fh_auth+0; - mk_fsid(fhp->fh_handle.fh_fsid_type, datap, ex_dev, + + mk_fsid(fhp->fh_handle.fh_fsid_type, + fhp->fh_handle.fh_fsid, + ex_dev, exp->ex_path.dentry->d_inode->i_ino, exp->ex_fsid, exp->ex_uuid); - len = key_len(fhp->fh_handle.fh_fsid_type); - datap += len/4; - fhp->fh_handle.fh_size = 4 + len; - if (inode) _fh_update(fhp, exp, dentry); if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) { @@ -600,22 +595,20 @@ fh_update(struct svc_fh *fhp) _fh_update_old(dentry, fhp->fh_export, &fhp->fh_handle); } else { if (fhp->fh_handle.fh_fileid_type != FILEID_ROOT) - goto out; + return 0; _fh_update(fhp, fhp->fh_export, dentry); if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) return nfserr_opnotsupp; } -out: return 0; - out_bad: printk(KERN_ERR "fh_update: fh not verified!\n"); - goto out; + return nfserr_serverfault; out_negative: - printk(KERN_ERR "fh_update: %s/%s still negative!\n", - dentry->d_parent->d_name.name, dentry->d_name.name); - goto out; + printk(KERN_ERR "fh_update: %pd2 still negative!\n", + dentry); + return nfserr_serverfault; } /* diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index e5e6707ba68..2e89e70ac15 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -1,9 +1,58 @@ -/* Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de> */ +/* + * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de> + * + * This file describes the layout of the file handles as passed + * over the wire. + */ +#ifndef _LINUX_NFSD_NFSFH_H +#define _LINUX_NFSD_NFSFH_H + +#include <linux/sunrpc/svc.h> +#include <uapi/linux/nfsd/nfsfh.h> + +static inline __u32 ino_t_to_u32(ino_t ino) +{ + return (__u32) ino; +} -#ifndef _LINUX_NFSD_FH_INT_H -#define _LINUX_NFSD_FH_INT_H +static inline ino_t u32_to_ino_t(__u32 uino) +{ + return (ino_t) uino; +} + +/* + * This is the internal representation of an NFS handle used in knfsd. + * pre_mtime/post_version will be used to support wcc_attr's in NFSv3. + */ +typedef struct svc_fh { + struct knfsd_fh fh_handle; /* FH data */ + struct dentry * fh_dentry; /* validated dentry */ + struct svc_export * fh_export; /* export pointer */ + int fh_maxsize; /* max size for fh_handle */ -#include <linux/nfsd/nfsfh.h> + unsigned char fh_locked; /* inode locked by us */ + unsigned char fh_want_write; /* remount protection taken */ + +#ifdef CONFIG_NFSD_V3 + unsigned char fh_post_saved; /* post-op attrs saved */ + unsigned char fh_pre_saved; /* pre-op attrs saved */ + + /* Pre-op attributes saved during fh_lock */ + __u64 fh_pre_size; /* size before operation */ + struct timespec fh_pre_mtime; /* mtime before oper */ + struct timespec fh_pre_ctime; /* ctime before oper */ + /* + * pre-op nfsv4 change attr: note must check IS_I_VERSION(inode) + * to find out if it is valid. + */ + u64 fh_pre_change; + + /* Post-op attributes saved in fh_unlock */ + struct kstat fh_post_attr; /* full attrs after operation */ + u64 fh_post_change; /* nfsv4 change; see above */ +#endif /* CONFIG_NFSD_V3 */ + +} svc_fh; enum nfsd_fsid { FSID_DEV = 0, @@ -133,6 +182,17 @@ fh_init(struct svc_fh *fhp, int maxsize) #ifdef CONFIG_NFSD_V3 /* + * The wcc data stored in current_fh should be cleared + * between compound ops. + */ +static inline void +fh_clear_wcc(struct svc_fh *fhp) +{ + fhp->fh_post_saved = 0; + fhp->fh_pre_saved = 0; +} + +/* * Fill in the pre_op attr for the wcc data */ static inline void @@ -152,7 +212,8 @@ fill_pre_wcc(struct svc_fh *fhp) extern void fill_post_wcc(struct svc_fh *); #else -#define fill_pre_wcc(ignored) +#define fh_clear_wcc(ignored) +#define fill_pre_wcc(ignored) #define fill_post_wcc(notused) #endif /* CONFIG_NFSD_V3 */ @@ -173,8 +234,8 @@ fh_lock_nested(struct svc_fh *fhp, unsigned int subclass) BUG_ON(!dentry); if (fhp->fh_locked) { - printk(KERN_WARNING "fh_lock: %s/%s already locked!\n", - dentry->d_parent->d_name.name, dentry->d_name.name); + printk(KERN_WARNING "fh_lock: %pd2 already locked!\n", + dentry); return; } @@ -203,4 +264,4 @@ fh_unlock(struct svc_fh *fhp) } } -#endif /* _LINUX_NFSD_FH_INT_H */ +#endif /* _LINUX_NFSD_NFSFH_H */ diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index aad6d457b9e..54c6b3d3cc7 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -26,17 +26,13 @@ static __be32 nfsd_return_attrs(__be32 err, struct nfsd_attrstat *resp) { if (err) return err; - return nfserrno(vfs_getattr(resp->fh.fh_export->ex_path.mnt, - resp->fh.fh_dentry, - &resp->stat)); + return fh_getattr(&resp->fh, &resp->stat); } static __be32 nfsd_return_dirop(__be32 err, struct nfsd_diropres *resp) { if (err) return err; - return nfserrno(vfs_getattr(resp->fh.fh_export->ex_path.mnt, - resp->fh.fh_dentry, - &resp->stat)); + return fh_getattr(&resp->fh, &resp->stat); } /* * Get a file's attributes @@ -150,9 +146,7 @@ nfsd_proc_read(struct svc_rqst *rqstp, struct nfsd_readargs *argp, &resp->count); if (nfserr) return nfserr; - return nfserrno(vfs_getattr(resp->fh.fh_export->ex_path.mnt, - resp->fh.fh_dentry, - &resp->stat)); + return fh_getattr(&resp->fh, &resp->stat); } /* diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index cee62ab9d4a..1879e43f286 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -59,8 +59,8 @@ DEFINE_MUTEX(nfsd_mutex); * nfsd_drc_pages_used tracks the current version 4.1 DRC memory usage. */ spinlock_t nfsd_drc_lock; -unsigned int nfsd_drc_max_mem; -unsigned int nfsd_drc_mem_used; +unsigned long nfsd_drc_max_mem; +unsigned long nfsd_drc_mem_used; #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) static struct svc_stat nfsd_acl_svcstats; @@ -116,7 +116,10 @@ struct svc_program nfsd_program = { }; -u32 nfsd_supported_minorversion; +static bool nfsd_supported_minorversions[NFSD_SUPPORTED_MINOR_VERSION + 1] = { + [0] = 1, + [1] = 1, +}; int nfsd_vers(int vers, enum vers_op change) { @@ -151,15 +154,13 @@ int nfsd_minorversion(u32 minorversion, enum vers_op change) return -1; switch(change) { case NFSD_SET: - nfsd_supported_minorversion = minorversion; + nfsd_supported_minorversions[minorversion] = true; break; case NFSD_CLEAR: - if (minorversion == 0) - return -1; - nfsd_supported_minorversion = minorversion - 1; + nfsd_supported_minorversions[minorversion] = false; break; case NFSD_TEST: - return minorversion <= nfsd_supported_minorversion; + return nfsd_supported_minorversions[minorversion]; case NFSD_AVAIL: return minorversion <= NFSD_SUPPORTED_MINOR_VERSION; } @@ -240,6 +241,15 @@ static void nfsd_shutdown_generic(void) nfsd_racache_shutdown(); } +static bool nfsd_needs_lockd(void) +{ +#if defined(CONFIG_NFSD_V3) + return (nfsd_versions[2] != NULL) || (nfsd_versions[3] != NULL); +#else + return (nfsd_versions[2] != NULL); +#endif +} + static int nfsd_startup_net(int nrservs, struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); @@ -254,9 +264,14 @@ static int nfsd_startup_net(int nrservs, struct net *net) ret = nfsd_init_socks(net); if (ret) goto out_socks; - ret = lockd_up(net); - if (ret) - goto out_socks; + + if (nfsd_needs_lockd() && !nn->lockd_up) { + ret = lockd_up(net); + if (ret) + goto out_socks; + nn->lockd_up = 1; + } + ret = nfs4_state_start_net(net); if (ret) goto out_lockd; @@ -265,7 +280,10 @@ static int nfsd_startup_net(int nrservs, struct net *net) return 0; out_lockd: - lockd_down(net); + if (nn->lockd_up) { + lockd_down(net); + nn->lockd_up = 0; + } out_socks: nfsd_shutdown_generic(); return ret; @@ -276,7 +294,10 @@ static void nfsd_shutdown_net(struct net *net) struct nfsd_net *nn = net_generic(net, nfsd_net_id); nfs4_state_shutdown_net(net); - lockd_down(net); + if (nn->lockd_up) { + lockd_down(net); + nn->lockd_up = 0; + } nn->nfsd_net_up = false; nfsd_shutdown_generic(); } @@ -342,7 +363,7 @@ static void set_max_drc(void) >> NFSD_DRC_SIZE_SHIFT) * PAGE_SIZE; nfsd_drc_mem_used = 0; spin_lock_init(&nfsd_drc_lock); - dprintk("%s nfsd_drc_max_mem %u \n", __func__, nfsd_drc_max_mem); + dprintk("%s nfsd_drc_max_mem %lu \n", __func__, nfsd_drc_max_mem); } static int nfsd_get_default_max_blksize(void) @@ -570,12 +591,6 @@ nfsd(void *vrqstp) nfsdstats.th_cnt++; mutex_unlock(&nfsd_mutex); - /* - * We want less throttling in balance_dirty_pages() so that nfs to - * localhost doesn't cause nfsd to lock up due to all the client's - * dirty pages. - */ - current->flags |= PF_LESS_THROTTLE; set_freezable(); /* @@ -652,7 +667,6 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) /* Check whether we have this call in the cache. */ switch (nfsd_cache_lookup(rqstp)) { - case RC_INTR: case RC_DROPIT: return 0; case RC_REPLY: @@ -703,8 +717,7 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) int nfsd_pool_stats_open(struct inode *inode, struct file *file) { int ret; - struct net *net = &init_net; - struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct nfsd_net *nn = net_generic(inode->i_sb->s_fs_info, nfsd_net_id); mutex_lock(&nfsd_mutex); if (nn->nfsd_serv == NULL) { @@ -721,7 +734,7 @@ int nfsd_pool_stats_open(struct inode *inode, struct file *file) int nfsd_pool_stats_release(struct inode *inode, struct file *file) { int ret = seq_release(inode, file); - struct net *net = &init_net; + struct net *net = inode->i_sb->s_fs_info; mutex_lock(&nfsd_mutex); /* this function really, really should have been called svc_put() */ diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 979b4210697..1ac306b769d 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -4,6 +4,7 @@ * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> */ +#include "vfs.h" #include "xdr.h" #include "auth.h" @@ -100,12 +101,14 @@ decode_sattr(__be32 *p, struct iattr *iap) iap->ia_mode = tmp; } if ((tmp = ntohl(*p++)) != (u32)-1) { - iap->ia_valid |= ATTR_UID; - iap->ia_uid = tmp; + iap->ia_uid = make_kuid(&init_user_ns, tmp); + if (uid_valid(iap->ia_uid)) + iap->ia_valid |= ATTR_UID; } if ((tmp = ntohl(*p++)) != (u32)-1) { - iap->ia_valid |= ATTR_GID; - iap->ia_gid = tmp; + iap->ia_gid = make_kgid(&init_user_ns, tmp); + if (gid_valid(iap->ia_gid)) + iap->ia_valid |= ATTR_GID; } if ((tmp = ntohl(*p++)) != (u32)-1) { iap->ia_valid |= ATTR_SIZE; @@ -151,8 +154,8 @@ encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, *p++ = htonl(nfs_ftypes[type >> 12]); *p++ = htonl((u32) stat->mode); *p++ = htonl((u32) stat->nlink); - *p++ = htonl((u32) nfsd_ruid(rqstp, stat->uid)); - *p++ = htonl((u32) nfsd_rgid(rqstp, stat->gid)); + *p++ = htonl((u32) from_kuid(&init_user_ns, stat->uid)); + *p++ = htonl((u32) from_kgid(&init_user_ns, stat->gid)); if (S_ISLNK(type) && stat->size > NFS_MAXPATHLEN) { *p++ = htonl(NFS_MAXPATHLEN); @@ -194,11 +197,9 @@ encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, } /* Helper function for NFSv2 ACL code */ -__be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp) +__be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, struct kstat *stat) { - struct kstat stat; - vfs_getattr(fhp->fh_export->ex_path.mnt, fhp->fh_dentry, &stat); - return encode_fattr(rqstp, p, fhp, &stat); + return encode_fattr(rqstp, p, fhp, stat); } /* @@ -213,7 +214,8 @@ nfssvc_decode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy) int nfssvc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p, struct nfsd_fhandle *args) { - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; return xdr_argsize_check(rqstp, p); } @@ -247,7 +249,8 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p, { unsigned int len; int v; - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; args->offset = ntohl(*p++); @@ -280,7 +283,8 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, unsigned int len, hdr, dlen; int v; - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; p++; /* beginoffset */ @@ -354,7 +358,8 @@ nfssvc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p, int nfssvc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_readlinkargs *args) { - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; args->buffer = page_address(*(rqstp->rq_next_page++)); @@ -390,7 +395,8 @@ int nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_readdirargs *args) { - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; args->cookie = ntohl(*p++); args->count = ntohl(*p++); diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index d1c229feed5..374c66283ac 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -37,7 +37,6 @@ #include <linux/idr.h> #include <linux/sunrpc/svc_xprt.h> -#include <linux/nfsd/nfsfh.h> #include "nfsfh.h" typedef struct { @@ -79,6 +78,8 @@ struct nfs4_stid { #define NFS4_DELEG_STID 4 /* For an open stateid kept around *only* to process close replays: */ #define NFS4_CLOSED_STID 8 +/* For a deleg stateid kept around only to process free_stateid's: */ +#define NFS4_REVOKED_DELEG_STID 16 unsigned char sc_type; stateid_t sc_stateid; struct nfs4_client *sc_client; @@ -121,7 +122,7 @@ static inline struct nfs4_delegation *delegstateid(struct nfs4_stid *s) /* Maximum number of operations per session compound */ #define NFSD_MAX_OPS_PER_COMPOUND 16 /* Maximum session per slot cache size */ -#define NFSD_SLOT_CACHE_SIZE 1024 +#define NFSD_SLOT_CACHE_SIZE 2048 /* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */ #define NFSD_CACHE_SIZE_SLOTS_PER_SESSION 32 #define NFSD_MAX_MEM_PER_SESSION \ @@ -152,8 +153,8 @@ struct nfsd4_channel_attrs { struct nfsd4_cb_sec { u32 flavor; /* (u32)(-1) used to mean "no valid flavor" */ - u32 uid; - u32 gid; + kuid_t uid; + kgid_t gid; }; struct nfsd4_create_session { @@ -194,9 +195,11 @@ struct nfsd4_conn { }; struct nfsd4_session { - struct kref se_ref; + atomic_t se_ref; struct list_head se_hash; /* hash by sessionid */ struct list_head se_perclnt; +/* See SESSION4_PERSIST, etc. for standard flags; this is internal-only: */ +#define NFS4_SESSION_DEAD 0x010 u32 se_flags; struct nfs4_client *se_client; struct nfs4_sessionid se_sessionid; @@ -236,11 +239,13 @@ struct nfs4_client { struct list_head cl_openowners; struct idr cl_stateids; /* stateid lookup */ struct list_head cl_delegations; + struct list_head cl_revoked; /* unacknowledged, revoked 4.1 state */ struct list_head cl_lru; /* tail queue */ struct xdr_netobj cl_name; /* id generated by client */ nfs4_verifier cl_verifier; /* generated by client */ time_t cl_time; /* time of last lease renewal */ struct sockaddr_storage cl_addr; /* client ipaddress */ + bool cl_mach_cred; /* SP4_MACH_CRED in force */ struct svc_cred cl_cred; /* setclientid principal */ clientid_t cl_clientid; /* generated by server */ nfs4_verifier cl_confirm; /* generated by server */ @@ -286,18 +291,6 @@ struct nfs4_client { struct net *net; }; -static inline void -mark_client_expired(struct nfs4_client *clp) -{ - clp->cl_time = 0; -} - -static inline bool -is_client_expired(struct nfs4_client *clp) -{ - return clp->cl_time == 0; -} - /* struct nfs4_client_reset * one per old client. Populates reset_str_hashtbl. Filled from conf_id_hashtbl * upon lease reset, or from upcall to state_daemon (to read in state @@ -365,7 +358,6 @@ struct nfs4_openowner { struct nfs4_ol_stateid *oo_last_closed_stid; time_t oo_time; /* time of placement on so_close_lru */ #define NFS4_OO_CONFIRMED 1 -#define NFS4_OO_PURGE_CLOSE 2 #define NFS4_OO_NEW 4 unsigned char oo_flags; }; @@ -373,7 +365,7 @@ struct nfs4_openowner { struct nfs4_lockowner { struct nfs4_stateowner lo_owner; /* must be first element */ struct list_head lo_owner_ino_hash; /* hash by owner,file */ - struct list_head lo_perstateid; /* for lockowners only */ + struct list_head lo_perstateid; struct list_head lo_list; /* for temporary uses */ }; @@ -390,7 +382,7 @@ static inline struct nfs4_lockowner * lockowner(struct nfs4_stateowner *so) /* nfs4_file: a file opened by some number of (open) nfs4_stateowners. */ struct nfs4_file { atomic_t fi_ref; - struct list_head fi_hash; /* hash by "struct inode *" */ + struct hlist_node fi_hash; /* hash by "struct inode *" */ struct list_head fi_stateids; struct list_head fi_delegations; /* One each for O_RDONLY, O_WRONLY, O_RDWR: */ @@ -471,8 +463,6 @@ extern void nfs4_release_reclaim(struct nfsd_net *); extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir, struct nfsd_net *nn); extern __be32 nfs4_check_open_reclaim(clientid_t *clid, bool sessions, struct nfsd_net *nn); -extern void nfs4_free_openowner(struct nfs4_openowner *); -extern void nfs4_free_lockowner(struct nfs4_lockowner *); extern int set_callback_cred(void); extern void nfsd4_init_callback(struct nfsd4_callback *); extern void nfsd4_probe_callback(struct nfs4_client *clp); @@ -486,8 +476,7 @@ extern void nfs4_put_delegation(struct nfs4_delegation *dp); extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(const char *name, struct nfsd_net *nn); extern bool nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn); -extern void release_session_client(struct nfsd4_session *); -extern void nfsd4_purge_closed_stateid(struct nfs4_stateowner *); +extern void put_client_renew(struct nfs4_client *clp); /* nfs4recover operations */ extern int nfsd4_client_tracking_init(struct net *net); diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c index 6d4521feb6e..cd90878a76a 100644 --- a/fs/nfsd/stats.c +++ b/fs/nfsd/stats.c @@ -24,7 +24,6 @@ #include <linux/seq_file.h> #include <linux/module.h> #include <linux/sunrpc/stats.h> -#include <linux/nfsd/stats.h> #include <net/net_namespace.h> #include "nfsd.h" diff --git a/fs/nfsd/stats.h b/fs/nfsd/stats.h new file mode 100644 index 00000000000..a5c944b771c --- /dev/null +++ b/fs/nfsd/stats.h @@ -0,0 +1,43 @@ +/* + * Statistics for NFS server. + * + * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> + */ +#ifndef _NFSD_STATS_H +#define _NFSD_STATS_H + +#include <uapi/linux/nfsd/stats.h> + + +struct nfsd_stats { + unsigned int rchits; /* repcache hits */ + unsigned int rcmisses; /* repcache hits */ + unsigned int rcnocache; /* uncached reqs */ + unsigned int fh_stale; /* FH stale error */ + unsigned int fh_lookup; /* dentry cached */ + unsigned int fh_anon; /* anon file dentry returned */ + unsigned int fh_nocache_dir; /* filehandle not found in dcache */ + unsigned int fh_nocache_nondir; /* filehandle not found in dcache */ + unsigned int io_read; /* bytes returned to read requests */ + unsigned int io_write; /* bytes passed in write requests */ + unsigned int th_cnt; /* number of available threads */ + unsigned int th_usage[10]; /* number of ticks during which n perdeciles + * of available threads were in use */ + unsigned int th_fullcnt; /* number of times last free thread was used */ + unsigned int ra_size; /* size of ra cache */ + unsigned int ra_depth[11]; /* number of times ra entry was found that deep + * in the cache (10percentiles). [10] = not found */ +#ifdef CONFIG_NFSD_V4 + unsigned int nfs4_opcount[LAST_NFS4_OP + 1]; /* count of individual nfsv4 operations */ +#endif + +}; + + +extern struct nfsd_stats nfsdstats; +extern struct svc_stat nfsd_svcstats; + +void nfsd_stat_init(void); +void nfsd_stat_shutdown(void); + +#endif /* _NFSD_STATS_H */ diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index d586117fa94..140c496f612 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -28,6 +28,7 @@ #include <asm/uaccess.h> #include <linux/exportfs.h> #include <linux/writeback.h> +#include <linux/security.h> #ifdef CONFIG_NFSD_V3 #include "xdr3.h" @@ -206,7 +207,12 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp, goto out_nfserr; } } else { - fh_lock(fhp); + /* + * In the nfsd4_open() case, this may be held across + * subsequent open and delegation acquisition which may + * need to take the child's i_mutex: + */ + fh_lock_nested(fhp, I_MUTEX_PARENT); dentry = lookup_one_len(name, dparent, len); host_err = PTR_ERR(dentry); if (IS_ERR(dentry)) @@ -272,13 +278,6 @@ out: return err; } -static int nfsd_break_lease(struct inode *inode) -{ - if (!S_ISREG(inode->i_mode)) - return 0; - return break_lease(inode, O_WRONLY | O_NONBLOCK); -} - /* * Commit metadata changes to stable storage. */ @@ -297,41 +296,12 @@ commit_metadata(struct svc_fh *fhp) } /* - * Set various file attributes. - * N.B. After this call fhp needs an fh_put + * Go over the attributes and take care of the small differences between + * NFS semantics and what Linux expects. */ -__be32 -nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, - int check_guard, time_t guardtime) +static void +nfsd_sanitize_attrs(struct inode *inode, struct iattr *iap) { - struct dentry *dentry; - struct inode *inode; - int accmode = NFSD_MAY_SATTR; - umode_t ftype = 0; - __be32 err; - int host_err; - int size_change = 0; - - if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE)) - accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE; - if (iap->ia_valid & ATTR_SIZE) - ftype = S_IFREG; - - /* Get inode */ - err = fh_verify(rqstp, fhp, ftype, accmode); - if (err) - goto out; - - dentry = fhp->fh_dentry; - inode = dentry->d_inode; - - /* Ignore any mode updates on symlinks */ - if (S_ISLNK(inode->i_mode)) - iap->ia_valid &= ~ATTR_MODE; - - if (!iap->ia_valid) - goto out; - /* * NFSv2 does not differentiate between "set-[ac]time-to-now" * which only requires access, and "set-[ac]time-to-X" which @@ -341,8 +311,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, * convert to "set to now" instead of "set to explicit time" * * We only call inode_change_ok as the last test as technically - * it is not an interface that we should be using. It is only - * valid if the filesystem does not define it's own i_op->setattr. + * it is not an interface that we should be using. */ #define BOTH_TIME_SET (ATTR_ATIME_SET | ATTR_MTIME_SET) #define MAX_TOUCH_TIME_ERROR (30*60) @@ -368,30 +337,6 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, iap->ia_valid &= ~BOTH_TIME_SET; } } - - /* - * The size case is special. - * It changes the file as well as the attributes. - */ - if (iap->ia_valid & ATTR_SIZE) { - if (iap->ia_size < inode->i_size) { - err = nfsd_permission(rqstp, fhp->fh_export, dentry, - NFSD_MAY_TRUNC|NFSD_MAY_OWNER_OVERRIDE); - if (err) - goto out; - } - - host_err = get_write_access(inode); - if (host_err) - goto out_nfserr; - - size_change = 1; - host_err = locks_verify_truncate(inode, NULL, iap->ia_size); - if (host_err) { - put_write_access(inode); - goto out_nfserr; - } - } /* sanitize the mode change */ if (iap->ia_valid & ATTR_MODE) { @@ -401,8 +346,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, /* Revoke setuid/setgid on chown */ if (!S_ISDIR(inode->i_mode) && - (((iap->ia_valid & ATTR_UID) && iap->ia_uid != inode->i_uid) || - ((iap->ia_valid & ATTR_GID) && iap->ia_gid != inode->i_gid))) { + ((iap->ia_valid & ATTR_UID) || (iap->ia_valid & ATTR_GID))) { iap->ia_valid |= ATTR_KILL_PRIV; if (iap->ia_valid & ATTR_MODE) { /* we're setting mode too, just clear the s*id bits */ @@ -414,186 +358,118 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, iap->ia_valid |= (ATTR_KILL_SUID | ATTR_KILL_SGID); } } - - /* Change the attributes. */ - - iap->ia_valid |= ATTR_CTIME; - - err = nfserr_notsync; - if (!check_guard || guardtime == inode->i_ctime.tv_sec) { - host_err = nfsd_break_lease(inode); - if (host_err) - goto out_nfserr; - fh_lock(fhp); - - host_err = notify_change(dentry, iap); - err = nfserrno(host_err); - fh_unlock(fhp); - } - if (size_change) - put_write_access(inode); - if (!err) - commit_metadata(fhp); -out: - return err; - -out_nfserr: - err = nfserrno(host_err); - goto out; } -#if defined(CONFIG_NFSD_V2_ACL) || \ - defined(CONFIG_NFSD_V3_ACL) || \ - defined(CONFIG_NFSD_V4) -static ssize_t nfsd_getxattr(struct dentry *dentry, char *key, void **buf) +static __be32 +nfsd_get_write_access(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct iattr *iap) { - ssize_t buflen; - ssize_t ret; - - buflen = vfs_getxattr(dentry, key, NULL, 0); - if (buflen <= 0) - return buflen; + struct inode *inode = fhp->fh_dentry->d_inode; + int host_err; - *buf = kmalloc(buflen, GFP_KERNEL); - if (!*buf) - return -ENOMEM; + if (iap->ia_size < inode->i_size) { + __be32 err; - ret = vfs_getxattr(dentry, key, *buf, buflen); - if (ret < 0) - kfree(*buf); - return ret; -} -#endif + err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry, + NFSD_MAY_TRUNC | NFSD_MAY_OWNER_OVERRIDE); + if (err) + return err; + } -#if defined(CONFIG_NFSD_V4) -static int -set_nfsv4_acl_one(struct dentry *dentry, struct posix_acl *pacl, char *key) -{ - int len; - size_t buflen; - char *buf = NULL; - int error = 0; - - buflen = posix_acl_xattr_size(pacl->a_count); - buf = kmalloc(buflen, GFP_KERNEL); - error = -ENOMEM; - if (buf == NULL) - goto out; + host_err = get_write_access(inode); + if (host_err) + goto out_nfserrno; - len = posix_acl_to_xattr(&init_user_ns, pacl, buf, buflen); - if (len < 0) { - error = len; - goto out; - } + host_err = locks_verify_truncate(inode, NULL, iap->ia_size); + if (host_err) + goto out_put_write_access; + return 0; - error = vfs_setxattr(dentry, key, buf, len, 0); -out: - kfree(buf); - return error; +out_put_write_access: + put_write_access(inode); +out_nfserrno: + return nfserrno(host_err); } +/* + * Set various file attributes. After this call fhp needs an fh_put. + */ __be32 -nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp, - struct nfs4_acl *acl) +nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, + int check_guard, time_t guardtime) { - __be32 error; - int host_error; - struct dentry *dentry; - struct inode *inode; - struct posix_acl *pacl = NULL, *dpacl = NULL; - unsigned int flags = 0; + struct dentry *dentry; + struct inode *inode; + int accmode = NFSD_MAY_SATTR; + umode_t ftype = 0; + __be32 err; + int host_err; + bool get_write_count; + int size_change = 0; + + if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE)) + accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE; + if (iap->ia_valid & ATTR_SIZE) + ftype = S_IFREG; + + /* Callers that do fh_verify should do the fh_want_write: */ + get_write_count = !fhp->fh_dentry; /* Get inode */ - error = fh_verify(rqstp, fhp, 0, NFSD_MAY_SATTR); - if (error) - return error; + err = fh_verify(rqstp, fhp, ftype, accmode); + if (err) + goto out; + if (get_write_count) { + host_err = fh_want_write(fhp); + if (host_err) + return nfserrno(host_err); + } dentry = fhp->fh_dentry; inode = dentry->d_inode; - if (S_ISDIR(inode->i_mode)) - flags = NFS4_ACL_DIR; - host_error = nfs4_acl_nfsv4_to_posix(acl, &pacl, &dpacl, flags); - if (host_error == -EINVAL) { - return nfserr_attrnotsupp; - } else if (host_error < 0) - goto out_nfserr; + /* Ignore any mode updates on symlinks */ + if (S_ISLNK(inode->i_mode)) + iap->ia_valid &= ~ATTR_MODE; - host_error = set_nfsv4_acl_one(dentry, pacl, POSIX_ACL_XATTR_ACCESS); - if (host_error < 0) - goto out_release; + if (!iap->ia_valid) + goto out; - if (S_ISDIR(inode->i_mode)) - host_error = set_nfsv4_acl_one(dentry, dpacl, POSIX_ACL_XATTR_DEFAULT); + nfsd_sanitize_attrs(inode, iap); -out_release: - posix_acl_release(pacl); - posix_acl_release(dpacl); -out_nfserr: - if (host_error == -EOPNOTSUPP) - return nfserr_attrnotsupp; - else - return nfserrno(host_error); -} + /* + * The size case is special, it changes the file in addition to the + * attributes. + */ + if (iap->ia_valid & ATTR_SIZE) { + err = nfsd_get_write_access(rqstp, fhp, iap); + if (err) + goto out; + size_change = 1; + } -static struct posix_acl * -_get_posix_acl(struct dentry *dentry, char *key) -{ - void *buf = NULL; - struct posix_acl *pacl = NULL; - int buflen; - - buflen = nfsd_getxattr(dentry, key, &buf); - if (!buflen) - buflen = -ENODATA; - if (buflen <= 0) - return ERR_PTR(buflen); - - pacl = posix_acl_from_xattr(&init_user_ns, buf, buflen); - kfree(buf); - return pacl; -} + iap->ia_valid |= ATTR_CTIME; -int -nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_acl **acl) -{ - struct inode *inode = dentry->d_inode; - int error = 0; - struct posix_acl *pacl = NULL, *dpacl = NULL; - unsigned int flags = 0; - - pacl = _get_posix_acl(dentry, POSIX_ACL_XATTR_ACCESS); - if (IS_ERR(pacl) && PTR_ERR(pacl) == -ENODATA) - pacl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); - if (IS_ERR(pacl)) { - error = PTR_ERR(pacl); - pacl = NULL; - goto out; + if (check_guard && guardtime != inode->i_ctime.tv_sec) { + err = nfserr_notsync; + goto out_put_write_access; } - if (S_ISDIR(inode->i_mode)) { - dpacl = _get_posix_acl(dentry, POSIX_ACL_XATTR_DEFAULT); - if (IS_ERR(dpacl) && PTR_ERR(dpacl) == -ENODATA) - dpacl = NULL; - else if (IS_ERR(dpacl)) { - error = PTR_ERR(dpacl); - dpacl = NULL; - goto out; - } - flags = NFS4_ACL_DIR; - } + fh_lock(fhp); + host_err = notify_change(dentry, iap, NULL); + fh_unlock(fhp); + err = nfserrno(host_err); - *acl = nfs4_acl_posix_to_nfsv4(pacl, dpacl, flags); - if (IS_ERR(*acl)) { - error = PTR_ERR(*acl); - *acl = NULL; - } - out: - posix_acl_release(pacl); - posix_acl_release(dpacl); - return error; +out_put_write_access: + if (size_change) + put_write_access(inode); + if (!err) + commit_metadata(fhp); +out: + return err; } +#if defined(CONFIG_NFSD_V4) /* * NFS junction information is stored in an extended attribute. */ @@ -621,6 +497,33 @@ int nfsd4_is_junction(struct dentry *dentry) return 0; return 1; } +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL +__be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct xdr_netobj *label) +{ + __be32 error; + int host_error; + struct dentry *dentry; + + error = fh_verify(rqstp, fhp, 0 /* S_IFREG */, NFSD_MAY_SATTR); + if (error) + return error; + + dentry = fhp->fh_dentry; + + mutex_lock(&dentry->d_inode->i_mutex); + host_error = security_inode_setsecctx(dentry, label->data, label->len); + mutex_unlock(&dentry->d_inode->i_mutex); + return nfserrno(host_error); +} +#else +__be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct xdr_netobj *label) +{ + return nfserr_notsupp; +} +#endif + #endif /* defined(CONFIG_NFSD_V4) */ #ifdef CONFIG_NFSD_V3 @@ -802,9 +705,10 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, flags = O_WRONLY|O_LARGEFILE; } *filp = dentry_open(&path, flags, current_cred()); - if (IS_ERR(*filp)) + if (IS_ERR(*filp)) { host_err = PTR_ERR(*filp); - else { + *filp = NULL; + } else { host_err = ima_file_check(*filp, may_flags); if (may_flags & NFSD_MAY_64BIT_COOKIE) @@ -916,51 +820,54 @@ static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe, return __splice_from_pipe(pipe, sd, nfsd_splice_actor); } -static __be32 -nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, - loff_t offset, struct kvec *vec, int vlen, unsigned long *count) +__be32 nfsd_finish_read(struct file *file, unsigned long *count, int host_err) { - mm_segment_t oldfs; - __be32 err; - int host_err; - - err = nfserr_perm; - - if (file->f_op->splice_read && rqstp->rq_splice_ok) { - struct splice_desc sd = { - .len = 0, - .total_len = *count, - .pos = offset, - .u.data = rqstp, - }; - - rqstp->rq_next_page = rqstp->rq_respages + 1; - host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor); - } else { - oldfs = get_fs(); - set_fs(KERNEL_DS); - host_err = vfs_readv(file, (struct iovec __user *)vec, vlen, &offset); - set_fs(oldfs); - } - if (host_err >= 0) { nfsdstats.io_read += host_err; *count = host_err; - err = 0; fsnotify_access(file); + return 0; } else - err = nfserrno(host_err); - return err; + return nfserrno(host_err); } -static void kill_suid(struct dentry *dentry) +int nfsd_splice_read(struct svc_rqst *rqstp, + struct file *file, loff_t offset, unsigned long *count) { - struct iattr ia; - ia.ia_valid = ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV; + struct splice_desc sd = { + .len = 0, + .total_len = *count, + .pos = offset, + .u.data = rqstp, + }; + int host_err; - mutex_lock(&dentry->d_inode->i_mutex); - notify_change(dentry, &ia); - mutex_unlock(&dentry->d_inode->i_mutex); + rqstp->rq_next_page = rqstp->rq_respages + 1; + host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor); + return nfsd_finish_read(file, count, host_err); +} + +int nfsd_readv(struct file *file, loff_t offset, struct kvec *vec, int vlen, + unsigned long *count) +{ + mm_segment_t oldfs; + int host_err; + + oldfs = get_fs(); + set_fs(KERNEL_DS); + host_err = vfs_readv(file, (struct iovec __user *)vec, vlen, &offset); + set_fs(oldfs); + return nfsd_finish_read(file, count, host_err); +} + +static __be32 +nfsd_vfs_read(struct svc_rqst *rqstp, struct file *file, + loff_t offset, struct kvec *vec, int vlen, unsigned long *count) +{ + if (file->f_op->splice_read && rqstp->rq_splice_ok) + return nfsd_splice_read(rqstp, file, offset, count); + else + return nfsd_readv(file, offset, vec, vlen, count); } /* @@ -979,7 +886,7 @@ static void kill_suid(struct dentry *dentry) */ static int wait_for_concurrent_writes(struct file *file) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); static ino_t last_ino; static dev_t last_dev; int err = 0; @@ -1013,6 +920,17 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, int host_err; int stable = *stablep; int use_wgather; + loff_t pos = offset; + unsigned int pflags = current->flags; + + if (rqstp->rq_local) + /* + * We want less throttling in balance_dirty_pages() + * and shrink_inactive_list() so that nfs to + * localhost doesn't cause nfsd to lock up due to all + * the client's dirty pages or its congested queue. + */ + current->flags |= PF_LESS_THROTTLE; dentry = file->f_path.dentry; inode = dentry->d_inode; @@ -1025,7 +943,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, /* Write the data. */ oldfs = get_fs(); set_fs(KERNEL_DS); - host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset); + host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &pos); set_fs(oldfs); if (host_err < 0) goto out_nfserr; @@ -1033,10 +951,6 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, nfsdstats.io_write += host_err; fsnotify_modify(file); - /* clear setuid/setgid flag after write */ - if (inode->i_mode & (S_ISUID | S_ISGID)) - kill_suid(dentry); - if (stable) { if (use_wgather) host_err = wait_for_concurrent_writes(file); @@ -1050,36 +964,33 @@ out_nfserr: err = 0; else err = nfserrno(host_err); + if (rqstp->rq_local) + tsk_restore_flags(current, pflags, PF_LESS_THROTTLE); return err; } -/* - * Read data from a file. count must contain the requested read count - * on entry. On return, *count contains the number of bytes actually read. - * N.B. After this call fhp needs an fh_put - */ -__be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, - loff_t offset, struct kvec *vec, int vlen, unsigned long *count) +__be32 nfsd_get_tmp_read_open(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct file **file, struct raparms **ra) { - struct file *file; struct inode *inode; - struct raparms *ra; __be32 err; - err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file); + err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, file); if (err) return err; - inode = file->f_path.dentry->d_inode; + inode = file_inode(*file); /* Get readahead parameters */ - ra = nfsd_get_raparms(inode->i_sb->s_dev, inode->i_ino); - - if (ra && ra->p_set) - file->f_ra = ra->p_ra; + *ra = nfsd_get_raparms(inode->i_sb->s_dev, inode->i_ino); - err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count); + if (*ra && (*ra)->p_set) + (*file)->f_ra = (*ra)->p_ra; + return nfs_ok; +} +void nfsd_put_tmp_read_open(struct file *file, struct raparms *ra) +{ /* Write back readahead params */ if (ra) { struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex]; @@ -1089,28 +1000,29 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, ra->p_count--; spin_unlock(&rab->pb_lock); } - nfsd_close(file); - return err; } -/* As above, but use the provided file descriptor. */ -__be32 -nfsd_read_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, - loff_t offset, struct kvec *vec, int vlen, - unsigned long *count) +/* + * Read data from a file. count must contain the requested read count + * on entry. On return, *count contains the number of bytes actually read. + * N.B. After this call fhp needs an fh_put + */ +__be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, + loff_t offset, struct kvec *vec, int vlen, unsigned long *count) { - __be32 err; + struct file *file; + struct raparms *ra; + __be32 err; + + err = nfsd_get_tmp_read_open(rqstp, fhp, &file, &ra); + if (err) + return err; + + err = nfsd_vfs_read(rqstp, file, offset, vec, vlen, count); + + nfsd_put_tmp_read_open(file, ra); - if (file) { - err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry, - NFSD_MAY_READ|NFSD_MAY_OWNER_OVERRIDE); - if (err) - goto out; - err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count); - } else /* Note file may still be NULL in NFSv4 special stateid case: */ - err = nfsd_read(rqstp, fhp, offset, vec, vlen, count); -out: return err; } @@ -1205,7 +1117,7 @@ nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp, * send along the gid on create when it tries to implement * setgid directories via NFS: */ - if (current_fsuid() != 0) + if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID)) iap->ia_valid &= ~(ATTR_UID|ATTR_GID); if (iap->ia_valid) return nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0); @@ -1287,9 +1199,8 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, if (!fhp->fh_locked) { /* not actually possible */ printk(KERN_ERR - "nfsd_create: parent %s/%s not locked!\n", - dentry->d_parent->d_name.name, - dentry->d_name.name); + "nfsd_create: parent %pd2 not locked!\n", + dentry); err = nfserr_io; goto out; } @@ -1299,8 +1210,8 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, */ err = nfserr_exist; if (dchild->d_inode) { - dprintk("nfsd_create: dentry %s/%s not negative!\n", - dentry->d_name.name, dchild->d_name.name); + dprintk("nfsd_create: dentry %pd/%pd not negative!\n", + dentry, dchild); goto out; } @@ -1702,12 +1613,7 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, err = nfserr_noent; if (!dold->d_inode) goto out_dput; - host_err = nfsd_break_lease(dold->d_inode); - if (host_err) { - err = nfserrno(host_err); - goto out_dput; - } - host_err = vfs_link(dold, dirp, dnew); + host_err = vfs_link(dold, dirp, dnew, NULL); if (!host_err) { err = nfserrno(commit_metadata(ffhp)); if (!err) @@ -1757,10 +1663,6 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, tdentry = tfhp->fh_dentry; tdir = tdentry->d_inode; - err = (rqstp->rq_vers == 2) ? nfserr_acces : nfserr_xdev; - if (ffhp->fh_export != tfhp->fh_export) - goto out; - err = nfserr_perm; if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen)) goto out; @@ -1801,16 +1703,10 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, host_err = -EXDEV; if (ffhp->fh_export->ex_path.mnt != tfhp->fh_export->ex_path.mnt) goto out_dput_new; - - host_err = nfsd_break_lease(odentry->d_inode); - if (host_err) + if (ffhp->fh_export->ex_path.dentry != tfhp->fh_export->ex_path.dentry) goto out_dput_new; - if (ndentry->d_inode) { - host_err = nfsd_break_lease(ndentry->d_inode); - if (host_err) - goto out_dput_new; - } - host_err = vfs_rename(fdir, odentry, tdir, ndentry); + + host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL, 0); if (!host_err) { host_err = commit_metadata(tfhp); if (!host_err) @@ -1822,10 +1718,10 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, dput(odentry); out_nfserr: err = nfserrno(host_err); - - /* we cannot reply on fh_unlock on the two filehandles, + /* + * We cannot rely on fh_unlock on the two filehandles, * as that would do the wrong thing if the two directories - * were the same, so again we do it by hand + * were the same, so again we do it by hand. */ fill_post_wcc(ffhp); fill_post_wcc(tfhp); @@ -1879,16 +1775,12 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, if (!type) type = rdentry->d_inode->i_mode & S_IFMT; - host_err = nfsd_break_lease(rdentry->d_inode); - if (host_err) - goto out_put; if (type != S_IFDIR) - host_err = vfs_unlink(dirp, rdentry); + host_err = vfs_unlink(dirp, rdentry, NULL); else host_err = vfs_rmdir(dirp, rdentry); if (!host_err) host_err = commit_metadata(fhp); -out_put: dput(rdentry); out_nfserr: @@ -1913,6 +1805,7 @@ struct buffered_dirent { }; struct readdir_data { + struct dir_context ctx; char *dirent; size_t used; int full; @@ -1944,27 +1837,29 @@ static int nfsd_buffered_filldir(void *__buf, const char *name, int namlen, static __be32 nfsd_buffered_readdir(struct file *file, filldir_t func, struct readdir_cd *cdp, loff_t *offsetp) { - struct readdir_data buf; struct buffered_dirent *de; int host_err; int size; loff_t offset; + struct readdir_data buf = { + .ctx.actor = nfsd_buffered_filldir, + .dirent = (void *)__get_free_page(GFP_KERNEL) + }; - buf.dirent = (void *)__get_free_page(GFP_KERNEL); if (!buf.dirent) return nfserrno(-ENOMEM); offset = *offsetp; while (1) { - struct inode *dir_inode = file->f_path.dentry->d_inode; + struct inode *dir_inode = file_inode(file); unsigned int reclen; cdp->err = nfserr_eof; /* will be cleared on successful read */ buf.used = 0; buf.full = 0; - host_err = vfs_readdir(file, nfsd_buffered_filldir, &buf); + host_err = iterate_dir(file, &buf.ctx); if (buf.full) host_err = 0; @@ -2150,7 +2045,7 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp, * with NFSv3. */ if ((acc & NFSD_MAY_OWNER_OVERRIDE) && - inode->i_uid == current_fsuid()) + uid_eq(inode->i_uid, current_fsuid())) return 0; /* This assumes NFSD_MAY_{READ,WRITE,EXEC} == MAY_{READ,WRITE,EXEC} */ @@ -2225,93 +2120,3 @@ out_nomem: nfsd_racache_shutdown(); return -ENOMEM; } - -#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) -struct posix_acl * -nfsd_get_posix_acl(struct svc_fh *fhp, int type) -{ - struct inode *inode = fhp->fh_dentry->d_inode; - char *name; - void *value = NULL; - ssize_t size; - struct posix_acl *acl; - - if (!IS_POSIXACL(inode)) - return ERR_PTR(-EOPNOTSUPP); - - switch (type) { - case ACL_TYPE_ACCESS: - name = POSIX_ACL_XATTR_ACCESS; - break; - case ACL_TYPE_DEFAULT: - name = POSIX_ACL_XATTR_DEFAULT; - break; - default: - return ERR_PTR(-EOPNOTSUPP); - } - - size = nfsd_getxattr(fhp->fh_dentry, name, &value); - if (size < 0) - return ERR_PTR(size); - - acl = posix_acl_from_xattr(&init_user_ns, value, size); - kfree(value); - return acl; -} - -int -nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl) -{ - struct inode *inode = fhp->fh_dentry->d_inode; - char *name; - void *value = NULL; - size_t size; - int error; - - if (!IS_POSIXACL(inode) || - !inode->i_op->setxattr || !inode->i_op->removexattr) - return -EOPNOTSUPP; - switch(type) { - case ACL_TYPE_ACCESS: - name = POSIX_ACL_XATTR_ACCESS; - break; - case ACL_TYPE_DEFAULT: - name = POSIX_ACL_XATTR_DEFAULT; - break; - default: - return -EOPNOTSUPP; - } - - if (acl && acl->a_count) { - size = posix_acl_xattr_size(acl->a_count); - value = kmalloc(size, GFP_KERNEL); - if (!value) - return -ENOMEM; - error = posix_acl_to_xattr(&init_user_ns, acl, value, size); - if (error < 0) - goto getout; - size = error; - } else - size = 0; - - error = fh_want_write(fhp); - if (error) - goto getout; - if (size) - error = vfs_setxattr(fhp->fh_dentry, name, value, size, 0); - else { - if (!S_ISDIR(inode->i_mode) && type == ACL_TYPE_DEFAULT) - error = 0; - else { - error = vfs_removexattr(fhp->fh_dentry, name); - if (error == -ENODATA) - error = 0; - } - } - fh_drop_write(fhp); - -getout: - kfree(value); - return error; -} -#endif /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */ diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index 359594c393d..91b6ae3f658 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -6,6 +6,7 @@ #define LINUX_NFSD_VFS_H #include "nfsfh.h" +#include "nfsd.h" /* * Flags for nfsd_permission @@ -38,7 +39,6 @@ typedef int (*nfsd_dirop_t)(struct inode *, struct dentry *, int, int); /* nfsd/vfs.c */ -int fh_lock_parent(struct svc_fh *, struct dentry *); int nfsd_racache_init(int); void nfsd_racache_shutdown(void); int nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, @@ -52,9 +52,8 @@ __be32 nfsd_setattr(struct svc_rqst *, struct svc_fh *, struct iattr *, int, time_t); int nfsd_mountpoint(struct dentry *, struct svc_export *); #ifdef CONFIG_NFSD_V4 -__be32 nfsd4_set_nfs4_acl(struct svc_rqst *, struct svc_fh *, - struct nfs4_acl *); -int nfsd4_get_nfs4_acl(struct svc_rqst *, struct dentry *, struct nfs4_acl **); +__be32 nfsd4_set_nfs4_label(struct svc_rqst *, struct svc_fh *, + struct xdr_netobj *); #endif /* CONFIG_NFSD_V4 */ __be32 nfsd_create(struct svc_rqst *, struct svc_fh *, char *name, int len, struct iattr *attrs, @@ -71,10 +70,16 @@ __be32 nfsd_commit(struct svc_rqst *, struct svc_fh *, __be32 nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t, int, struct file **); void nfsd_close(struct file *); +struct raparms; +__be32 nfsd_get_tmp_read_open(struct svc_rqst *, struct svc_fh *, + struct file **, struct raparms **); +void nfsd_put_tmp_read_open(struct file *, struct raparms *); +int nfsd_splice_read(struct svc_rqst *, + struct file *, loff_t, unsigned long *); +int nfsd_readv(struct file *, loff_t, struct kvec *, int, + unsigned long *); __be32 nfsd_read(struct svc_rqst *, struct svc_fh *, loff_t, struct kvec *, int, unsigned long *); -__be32 nfsd_read_file(struct svc_rqst *, struct svc_fh *, struct file *, - loff_t, struct kvec *, int, unsigned long *); __be32 nfsd_write(struct svc_rqst *, struct svc_fh *,struct file *, loff_t, struct kvec *,int, unsigned long *, int *); __be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *, @@ -87,26 +92,15 @@ __be32 nfsd_link(struct svc_rqst *, struct svc_fh *, __be32 nfsd_rename(struct svc_rqst *, struct svc_fh *, char *, int, struct svc_fh *, char *, int); -__be32 nfsd_remove(struct svc_rqst *, - struct svc_fh *, char *, int); __be32 nfsd_unlink(struct svc_rqst *, struct svc_fh *, int type, char *name, int len); -int nfsd_truncate(struct svc_rqst *, struct svc_fh *, - unsigned long size); __be32 nfsd_readdir(struct svc_rqst *, struct svc_fh *, loff_t *, struct readdir_cd *, filldir_t); __be32 nfsd_statfs(struct svc_rqst *, struct svc_fh *, struct kstatfs *, int access); -int nfsd_notify_change(struct inode *, struct iattr *); __be32 nfsd_permission(struct svc_rqst *, struct svc_export *, struct dentry *, int); -int nfsd_sync_dir(struct dentry *dp); - -#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) -struct posix_acl *nfsd_get_posix_acl(struct svc_fh *, int); -int nfsd_set_posix_acl(struct svc_fh *, int, struct posix_acl *); -#endif static inline int fh_want_write(struct svc_fh *fh) { @@ -125,4 +119,11 @@ static inline void fh_drop_write(struct svc_fh *fh) } } +static inline __be32 fh_getattr(struct svc_fh *fh, struct kstat *stat) +{ + struct path p = {.mnt = fh->fh_export->ex_path.mnt, + .dentry = fh->fh_dentry}; + return nfserrno(vfs_getattr(&p, stat)); +} + #endif /* LINUX_NFSD_VFS_H */ diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h index 53b1863dd8f..4f0481d6380 100644 --- a/fs/nfsd/xdr.h +++ b/fs/nfsd/xdr.h @@ -167,7 +167,7 @@ int nfssvc_encode_entry(void *, const char *name, int nfssvc_release_fhandle(struct svc_rqst *, __be32 *, struct nfsd_fhandle *); /* Helper functions for NFSv2 ACL code */ -__be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp); +__be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, struct kstat *stat); __be32 *nfs2svc_decode_fh(__be32 *p, struct svc_fh *fhp); #endif /* LINUX_NFSD_H */ diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h index 7df980eb056..335e04aaf7d 100644 --- a/fs/nfsd/xdr3.h +++ b/fs/nfsd/xdr3.h @@ -136,6 +136,7 @@ struct nfsd3_accessres { __be32 status; struct svc_fh fh; __u32 access; + struct kstat stat; }; struct nfsd3_readlinkres { @@ -173,6 +174,9 @@ struct nfsd3_linkres { struct nfsd3_readdirres { __be32 status; struct svc_fh fh; + /* Just to save kmalloc on every readdirplus entry (svc_fh is a + * little large for the stack): */ + struct svc_fh scratch; int count; __be32 verf[2]; @@ -225,6 +229,7 @@ struct nfsd3_getaclres { int mask; struct posix_acl *acl_access; struct posix_acl *acl_default; + struct kstat stat; }; /* dummy type for release */ diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 0889bfb43dc..18cbb6d9c8a 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -40,6 +40,7 @@ #include "state.h" #include "nfsd.h" +#define NFSD4_MAX_SEC_LABEL_LEN 2048 #define NFSD4_MAX_TAGLEN 128 #define XDR_LEN(n) (((n) + 3) & ~3) @@ -57,7 +58,7 @@ struct nfsd4_compound_state { /* For sessions DRC */ struct nfsd4_session *session; struct nfsd4_slot *slot; - __be32 *datap; + int data_offset; size_t iovlen; u32 minorversion; __be32 status; @@ -118,6 +119,7 @@ struct nfsd4_create { struct iattr cr_iattr; /* request */ struct nfsd4_change_info cr_cinfo; /* response */ struct nfs4_acl *cr_acl; + struct xdr_netobj cr_label; }; #define cr_linklen u.link.namelen #define cr_linkname u.link.name @@ -184,7 +186,6 @@ struct nfsd4_lock { #define lk_old_lock_stateid v.old.lock_stateid #define lk_old_lock_seqid v.old.lock_seqid -#define lk_rflags u.ok.rflags #define lk_resp_stateid u.ok.stateid #define lk_denied u.denied @@ -227,7 +228,7 @@ struct nfsd4_open { u32 op_create; /* request */ u32 op_createmode; /* request */ u32 op_bmval[3]; /* request */ - struct iattr iattr; /* UNCHECKED4, GUARDED4, EXCLUSIVE4_1 */ + struct iattr op_iattr; /* UNCHECKED4, GUARDED4, EXCLUSIVE4_1 */ nfs4_verifier op_verf __attribute__((aligned(32))); /* EXCLUSIVE4 */ clientid_t op_clientid; /* request */ @@ -237,6 +238,7 @@ struct nfsd4_open { u32 op_share_deny; /* request */ u32 op_deleg_want; /* request */ stateid_t op_stateid; /* response */ + __be32 op_xdr_error; /* see nfsd4_open_omfg() */ u32 op_recall; /* recall */ struct nfsd4_change_info op_cinfo; /* response */ u32 op_rflags; /* response */ @@ -246,8 +248,8 @@ struct nfsd4_open { struct nfs4_file *op_file; /* used during processing */ struct nfs4_ol_stateid *op_stp; /* used during processing */ struct nfs4_acl *op_acl; + struct xdr_netobj op_label; }; -#define op_iattr iattr struct nfsd4_open_confirm { stateid_t oc_req_stateid /* request */; @@ -285,9 +287,8 @@ struct nfsd4_readdir { struct svc_fh * rd_fhp; /* response */ struct readdir_cd common; - __be32 * buffer; - int buflen; - __be32 * offset; + struct xdr_stream *xdr; + int cookie_offset; }; struct nfsd4_release_lockowner { @@ -330,6 +331,7 @@ struct nfsd4_setattr { u32 sa_bmval[3]; /* request */ struct iattr sa_iattr; /* request */ struct nfs4_acl *sa_acl; + struct xdr_netobj sa_label; }; struct nfsd4_setclientid { @@ -370,7 +372,6 @@ struct nfsd4_test_stateid { struct nfsd4_free_stateid { stateid_t fr_stateid; /* request */ - __be32 fr_status; /* response */ }; /* also used for NVERIFY */ @@ -504,9 +505,7 @@ struct nfsd4_compoundargs { struct nfsd4_compoundres { /* scratch variables for XDR encode */ - __be32 * p; - __be32 * end; - struct xdr_buf * xbuf; + struct xdr_stream xdr; struct svc_rqst * rqstp; u32 taglen; @@ -536,6 +535,9 @@ static inline bool nfsd4_last_compound_op(struct svc_rqst *rqstp) return argp->opcnt == resp->opcnt; } +int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op); +void warn_on_nonidempotent_op(struct nfsd4_op *op); + #define NFS4_SVC_XDRSIZE sizeof(struct nfsd4_compoundargs) static inline void @@ -561,10 +563,11 @@ int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *, struct nfsd4_compoundres *); __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *, u32); void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *); -void nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op); -__be32 nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, - struct dentry *dentry, __be32 *buffer, int *countp, - u32 *bmval, struct svc_rqst *, int ignore_crossmnt); +void nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op); +__be32 nfsd4_encode_fattr_to_buf(__be32 **p, int words, + struct svc_fh *fhp, struct svc_export *exp, + struct dentry *dentry, + u32 *bmval, struct svc_rqst *, int ignore_crossmnt); extern __be32 nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *, struct nfsd4_setclientid *setclid); @@ -572,8 +575,6 @@ extern __be32 nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *, struct nfsd4_setclientid_confirm *setclientid_confirm); extern void nfsd4_store_cache_entry(struct nfsd4_compoundres *resp); -extern __be32 nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp, - struct nfsd4_sequence *seq); extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *, struct nfsd4_exchange_id *); extern __be32 nfsd4_backchannel_ctl(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_backchannel_ctl *); @@ -623,6 +624,7 @@ extern __be32 nfsd4_test_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *, struct nfsd4_test_stateid *test_stateid); extern __be32 nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *, struct nfsd4_free_stateid *free_stateid); +extern void nfsd4_bump_seqid(struct nfsd4_compound_state *, __be32 nfserr); #endif /* diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h new file mode 100644 index 00000000000..c5c55dfb91a --- /dev/null +++ b/fs/nfsd/xdr4cb.h @@ -0,0 +1,23 @@ +#define NFS4_MAXTAGLEN 20 + +#define NFS4_enc_cb_null_sz 0 +#define NFS4_dec_cb_null_sz 0 +#define cb_compound_enc_hdr_sz 4 +#define cb_compound_dec_hdr_sz (3 + (NFS4_MAXTAGLEN >> 2)) +#define sessionid_sz (NFS4_MAX_SESSIONID_LEN >> 2) +#define cb_sequence_enc_sz (sessionid_sz + 4 + \ + 1 /* no referring calls list yet */) +#define cb_sequence_dec_sz (op_dec_sz + sessionid_sz + 4) + +#define op_enc_sz 1 +#define op_dec_sz 2 +#define enc_nfs4_fh_sz (1 + (NFS4_FHSIZE >> 2)) +#define enc_stateid_sz (NFS4_STATEID_SIZE >> 2) +#define NFS4_enc_cb_recall_sz (cb_compound_enc_hdr_sz + \ + cb_sequence_enc_sz + \ + 1 + enc_stateid_sz + \ + enc_nfs4_fh_sz) + +#define NFS4_dec_cb_recall_sz (cb_compound_dec_hdr_sz + \ + cb_sequence_dec_sz + \ + op_dec_sz) |
